mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Make: Add xomp_stats to track time per page (and other attributes) [#456]
This commit is contained in:
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.xowa.wikis.pages.*;
|
||||
import gplx.xowa.parsers.logs.stats.*;
|
||||
public class Xomp_stat_tbl implements Rls_able {
|
||||
private static final String tbl_name = "xomp_stats"; private static final Dbmeta_fld_list flds = new Dbmeta_fld_list();
|
||||
private static final String
|
||||
fld_page_id = flds.Add_int_pkey("page_id"), fld_wkr_uid = flds.Add_int("wkr_uid")
|
||||
, fld_wtxt_len = flds.Add_int("wtxt_len"), fld_html_len = flds.Add_int("html_len"), fld_zip_len = flds.Add_int("zip_len")
|
||||
, fld_page_time = flds.Add_long("page_time"), fld_tidy_time = flds.Add_long("tidy_time"), fld_fulltext_time = flds.Add_long("fulltext_time")
|
||||
, fld_scrib_time = flds.Add_long("scrib_time"), fld_scrib_count = flds.Add_int("scrib_count"), fld_scrib_depth = flds.Add_int("scrib_depth")
|
||||
, fld_image_count = flds.Add_int("image_count"), fld_audio_count = flds.Add_int("audio_count"), fld_video_count = flds.Add_int("video_count"), fld_media_count = flds.Add_int("media_count")
|
||||
, fld_lnki_count = flds.Add_int("lnki_count"), fld_lnke_count = flds.Add_int("lnke_count"), fld_hdr_count = flds.Add_int("hdr_count")
|
||||
, fld_math_count = flds.Add_int("math_count"), fld_imap_count = flds.Add_int("imap_count"), fld_hiero_count = flds.Add_int("hiero_count")
|
||||
, fld_gallery_count = flds.Add_int("gallery_count"), fld_gallery_packed_count = flds.Add_int("gallery_packed_count")
|
||||
;
|
||||
private final Db_conn conn; private Db_stmt stmt_insert;
|
||||
public Xomp_stat_tbl(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
this.Create_tbl();
|
||||
conn.Stmt_delete(tbl_name).Exec_delete(); // always zap table
|
||||
conn.Rls_reg(this);
|
||||
}
|
||||
public void Create_tbl() {conn.Meta_tbl_create(Dbmeta_tbl_itm.New(tbl_name, flds, Dbmeta_idx_itm.new_unique_by_tbl(tbl_name, "pkey", fld_page_id)));}
|
||||
public void Rls() {
|
||||
stmt_insert = Db_stmt_.Rls(stmt_insert);
|
||||
}
|
||||
public void Stmt_new() {
|
||||
stmt_insert = conn.Stmt_insert(tbl_name, flds);
|
||||
}
|
||||
public void Insert_by_copy(Db_rdr rdr) {
|
||||
stmt_insert.Clear()
|
||||
.Val_int (fld_page_id , rdr.Read_int(fld_page_id))
|
||||
.Val_int (fld_wkr_uid , rdr.Read_int(fld_wkr_uid))
|
||||
.Val_int (fld_wtxt_len , rdr.Read_int(fld_wtxt_len))
|
||||
.Val_int (fld_html_len , rdr.Read_int(fld_html_len))
|
||||
.Val_int (fld_zip_len , rdr.Read_int(fld_zip_len))
|
||||
.Val_long(fld_page_time , rdr.Read_long(fld_page_time))
|
||||
.Val_long(fld_tidy_time , rdr.Read_long(fld_tidy_time))
|
||||
.Val_long(fld_fulltext_time , rdr.Read_long(fld_fulltext_time))
|
||||
.Val_long(fld_scrib_time , rdr.Read_long(fld_scrib_time))
|
||||
.Val_int (fld_scrib_count , rdr.Read_int (fld_scrib_count))
|
||||
.Val_int (fld_scrib_depth , rdr.Read_int (fld_scrib_depth))
|
||||
.Val_int (fld_image_count , rdr.Read_int (fld_image_count))
|
||||
.Val_int (fld_audio_count , rdr.Read_int (fld_audio_count))
|
||||
.Val_int (fld_video_count , rdr.Read_int (fld_video_count))
|
||||
.Val_int (fld_media_count , rdr.Read_int (fld_media_count))
|
||||
.Val_int (fld_lnki_count , rdr.Read_int (fld_lnki_count))
|
||||
.Val_int (fld_lnke_count , rdr.Read_int (fld_lnke_count))
|
||||
.Val_int (fld_hdr_count , rdr.Read_int (fld_hdr_count))
|
||||
.Val_int (fld_math_count , rdr.Read_int (fld_math_count))
|
||||
.Val_int (fld_imap_count , rdr.Read_int (fld_imap_count))
|
||||
.Val_int (fld_hiero_count , rdr.Read_int (fld_hiero_count))
|
||||
.Val_int (fld_gallery_count , rdr.Read_int (fld_gallery_count))
|
||||
.Val_int (fld_gallery_packed_count , rdr.Read_int (fld_gallery_packed_count))
|
||||
.Exec_insert();
|
||||
}
|
||||
public void Insert(Xoae_page wpg, Xoh_page hpg, int wkr_uid, long page_time, long fulltext_time) {
|
||||
Xop_log_stat stat = wpg.Stat_itm();
|
||||
stmt_insert.Clear()
|
||||
.Val_int(fld_page_id , hpg.Page_id())
|
||||
.Val_int(fld_wkr_uid , wkr_uid)
|
||||
.Val_int(fld_wtxt_len , Len_or_0(wpg.Root().Root_src()))
|
||||
.Val_int(fld_html_len , Len_or_0(hpg.Db().Html().Html_bry()))
|
||||
.Val_int(fld_zip_len , hpg.Db().Html().Zip_len())
|
||||
.Val_long(fld_page_time , page_time)
|
||||
.Val_long(fld_tidy_time , stat.Tidy_time)
|
||||
.Val_long(fld_fulltext_time , fulltext_time)
|
||||
.Val_long(fld_scrib_time , stat.Scrib().Time())
|
||||
.Val_int (fld_scrib_count , stat.Scrib().Count())
|
||||
.Val_int (fld_scrib_depth , stat.Scrib().Depth_max())
|
||||
.Val_int (fld_image_count , stat.Image_count)
|
||||
.Val_int (fld_audio_count , stat.Audio_count)
|
||||
.Val_int (fld_video_count , stat.Video_count)
|
||||
.Val_int (fld_media_count , stat.Media_count)
|
||||
.Val_int (fld_lnki_count , stat.Lnki_count)
|
||||
.Val_int (fld_lnke_count , stat.Lnke_count)
|
||||
.Val_int (fld_hdr_count , stat.Hdr_count)
|
||||
.Val_int (fld_math_count , stat.Math_count)
|
||||
.Val_int (fld_imap_count , stat.Imap_count)
|
||||
.Val_int (fld_hiero_count , stat.Hiero_count)
|
||||
.Val_int (fld_gallery_count , stat.Gallery_count)
|
||||
.Val_int (fld_gallery_packed_count , stat.Gallery_packed_count)
|
||||
.Exec_insert();
|
||||
}
|
||||
public void Stmt_rls() {
|
||||
stmt_insert = Db_stmt_.Rls(stmt_insert);
|
||||
}
|
||||
private static int Len_or_0(byte[] bry) {return bry == null ? 0 : bry.length;}
|
||||
}
|
||||
@@ -20,8 +20,12 @@ public class Xomp_make_cmd extends Xob_cmd__base {
|
||||
public Xomp_make_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
||||
@Override public void Cmd_run() {
|
||||
wiki.Init_assert();
|
||||
new Xomp_make_html().Exec(wiki, cfg);
|
||||
new Xomp_make_lnki().Exec(wiki, cfg, 10000);
|
||||
if (cfg.Mode().Has("html"))
|
||||
new Xomp_make_html().Exec(wiki, cfg);
|
||||
if (cfg.Mode().Has("lnki"))
|
||||
new Xomp_make_lnki().Exec(wiki, cfg, 10000);
|
||||
if (cfg.Mode().Has("stat"))
|
||||
new Xomp_make_stat().Exec(wiki, cfg);
|
||||
}
|
||||
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk__cfg)) return cfg;
|
||||
|
||||
@@ -15,10 +15,12 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
public class Xomp_make_cmd_cfg implements Gfo_invk {
|
||||
public Hash_adp Mode() {return mode;} private Hash_adp mode = Hash_adp_.New().Add_and_more("html", "html").Add_and_more("lnki", "lnki").Add_and_more("stat", "stat");
|
||||
public boolean Delete_html_dbs() {return delete_html_dbs;} private boolean delete_html_dbs = true;
|
||||
public Ordered_hash Merger_wkrs() {return merger_wkrs;} private final Ordered_hash merger_wkrs = Ordered_hash_.New();
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk__delete_html_dbs_)) delete_html_dbs = m.ReadYn("v");
|
||||
if (ctx.Match(k, Invk__mode_)) mode = GfoMsg_.Read_str_ary_as_hash(m, "v");
|
||||
else if (ctx.Match(k, Invk__delete_html_dbs_)) delete_html_dbs = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk__merger_wkrs_)) {
|
||||
String[] ary = m.ReadStrAry("k", "|");
|
||||
for (String itm : ary)
|
||||
@@ -27,5 +29,5 @@ public class Xomp_make_cmd_cfg implements Gfo_invk {
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
private static final String Invk__delete_html_dbs_ = "delete_html_dbs_", Invk__merger_wkrs_ = "merger_wkrs_";
|
||||
private static final String Invk__mode_ = "mode_", Invk__delete_html_dbs_ = "delete_html_dbs_", Invk__merger_wkrs_ = "merger_wkrs_";
|
||||
}
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.core.brys.*;
|
||||
import gplx.dbs.*; import gplx.xowa.htmls.core.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
|
||||
class Xomp_make_stat {
|
||||
public void Exec(Xowe_wiki wiki, Xomp_make_cmd_cfg cfg) {
|
||||
// init mgr_db and mgr_tbl
|
||||
Xomp_mgr_db mgr_db = Xomp_mgr_db.New__load(wiki);
|
||||
Db_conn mgr_conn = mgr_db.Conn();
|
||||
Xomp_stat_tbl mgr_tbl = new Xomp_stat_tbl(mgr_conn);
|
||||
mgr_conn.Txn_bgn("xomp_stats");
|
||||
mgr_tbl.Stmt_new();
|
||||
|
||||
// loop wkrs
|
||||
String sql = String_.Format("SELECT * FROM xomp_stats;");
|
||||
int wkrs_len = mgr_db.Tbl__wkr().Select_count();
|
||||
for (int i = 0; i < wkrs_len; ++i) {
|
||||
int count = 0;
|
||||
Xomp_wkr_db wkr_db = Xomp_wkr_db.New(mgr_db.Dir(), i);
|
||||
Db_rdr rdr = wkr_db.Conn().Stmt_sql(sql).Exec_select__rls_auto(); // ANSI.Y
|
||||
try {
|
||||
while (rdr.Move_next()) {
|
||||
mgr_tbl.Insert_by_copy(rdr);
|
||||
if (++count % 10000 == 0) {
|
||||
Gfo_usr_dlg_.Instance.Prog_many("", "", "xomp.stat.insert: db=~{0} count=~{1}", Int_.To_str_pad_bgn_space(i, 3), Int_.To_str_pad_bgn_space(count, 8));
|
||||
mgr_conn.Txn_sav();
|
||||
}
|
||||
}
|
||||
} finally {rdr.Rls();}
|
||||
}
|
||||
|
||||
// cleanup
|
||||
mgr_tbl.Stmt_rls();
|
||||
mgr_conn.Txn_end();
|
||||
mgr_conn.Rls_conn();
|
||||
}
|
||||
}
|
||||
@@ -40,7 +40,9 @@ public class Xomp_parse_wkr implements Gfo_invk {
|
||||
private final Xob_hdump_bldr hdump_bldr = new Xob_hdump_bldr();
|
||||
private final int uid;
|
||||
private Xomp_wkr_db wkr_db;
|
||||
private Xomp_stat_tbl stat_tbl;
|
||||
|
||||
// indexer vars
|
||||
private final Xofulltext_indexer_wkr indexer;
|
||||
|
||||
private final List_adp list = List_adp_.New(); private int list_idx = 0, list_len = 0;
|
||||
@@ -64,9 +66,9 @@ public class Xomp_parse_wkr implements Gfo_invk {
|
||||
// wkr-specific vars
|
||||
this.wiki = wiki; this.uid = uid;
|
||||
this.wkr_db = Xomp_wkr_db.New(Xomp_mgr_db.New__url(wiki), uid);
|
||||
this.stat_tbl = new Xomp_stat_tbl(wkr_db.Conn());
|
||||
}
|
||||
public void Exec() {
|
||||
// init
|
||||
Xow_parser_mgr parser_mgr = wiki.Parser_mgr();
|
||||
|
||||
// disable file download
|
||||
@@ -86,7 +88,7 @@ public class Xomp_parse_wkr implements Gfo_invk {
|
||||
logger.Bgn();
|
||||
}
|
||||
|
||||
// init log_mgr / property_wkr
|
||||
// init log_mgr / property_wkr / stats
|
||||
Xop_log_wkr_factory wkr_factory = new Xop_log_wkr_factory(wkr_db.Conn());
|
||||
if (cfg.Log_math()) wiki.Parser_mgr().Math__core().Log_wkr_(wkr_factory);
|
||||
|
||||
@@ -94,6 +96,7 @@ public class Xomp_parse_wkr implements Gfo_invk {
|
||||
hdump_bldr.Enabled_(cfg.Hdump_enabled()).Hzip_enabled_(cfg.Hzip_enabled()).Hzip_diff_(cfg.Hdiff_enabled()).Zip_tid_(cfg.Zip_tid());
|
||||
hdump_bldr.Init(wiki, wkr_db.Conn(), new Xob_hdump_tbl_retriever__xomp(wkr_db.Html_tbl()));
|
||||
wkr_db.Conn().Txn_bgn("xomp");
|
||||
stat_tbl.Stmt_new();
|
||||
|
||||
// set status to running
|
||||
mgr_db.Tbl__wkr().Update_status(uid, Xomp_wkr_tbl.Status__running);
|
||||
@@ -110,8 +113,9 @@ public class Xomp_parse_wkr implements Gfo_invk {
|
||||
if (ppg.Text() == null) continue; // some pages have no text; ignore them else null ref; PAGE: it.d:miercuri DATE:2015-12-05
|
||||
|
||||
try {
|
||||
// init page
|
||||
long done_bgn = gplx.core.envs.System_.Ticks();
|
||||
|
||||
// get ns / ttl
|
||||
int cur_ns = ppg.Ns_id();
|
||||
Xoa_ttl ttl = wiki.Ttl_parse(cur_ns, ppg.Ttl_bry());
|
||||
// if ns changed and prv_ns is main
|
||||
@@ -120,10 +124,13 @@ public class Xomp_parse_wkr implements Gfo_invk {
|
||||
wiki.Cache_mgr().Free_mem__all(); // NOTE: clears page and wbase cache only; needed else OutOfMemory error for en.w in 25th hour; DATE:2017-01-11
|
||||
prv_ns = cur_ns;
|
||||
}
|
||||
|
||||
// init page
|
||||
Xoae_page wpg = Xoae_page.New(wiki, ttl);
|
||||
wpg.Bldr__ns_ord_(ns_ord_mgr.Get_ord_by_ns_id(cur_ns)); // NOTE: must set ns_id for tier_id in lnki_temp; DATE:2016-09-19
|
||||
wpg.Db().Text().Text_bry_(ppg.Text());
|
||||
wpg.Db().Page().Init_by_mp(ppg.Id(), ppg.Page_score());
|
||||
wpg.Stat_itm().Init(uid);
|
||||
|
||||
// parse page
|
||||
Xop_ctx pctx = parser_mgr.Ctx();
|
||||
@@ -134,16 +141,22 @@ public class Xomp_parse_wkr implements Gfo_invk {
|
||||
hdump_bldr.Insert(pctx, wpg);
|
||||
|
||||
// index
|
||||
if (indexer != null) indexer.Index(wpg);
|
||||
long fulltext_time = 0;
|
||||
if (indexer != null) {
|
||||
fulltext_time = gplx.core.envs.System_.Ticks();
|
||||
indexer.Index(wpg);
|
||||
fulltext_time = gplx.core.envs.System_.Ticks__elapsed_in_frac(fulltext_time);
|
||||
}
|
||||
|
||||
// mark done for sake of progress
|
||||
prog_mgr.Mark_done(ppg.Id());
|
||||
|
||||
// update stats
|
||||
long time_cur = gplx.core.envs.System_.Ticks();
|
||||
done_time += time_cur - done_bgn;
|
||||
done_bgn = time_cur;
|
||||
long page_time = time_cur - done_bgn;
|
||||
done_time += page_time;
|
||||
++done_count;
|
||||
stat_tbl.Insert(wpg, hdump_bldr.Tmp_hpg(), uid, page_time, fulltext_time);
|
||||
|
||||
// cleanup
|
||||
// ctx.App().Utl__bfr_mkr().Clear_fail_check(); // make sure all bfrs are released
|
||||
@@ -165,6 +178,7 @@ public class Xomp_parse_wkr implements Gfo_invk {
|
||||
if (logger != null) logger.End();
|
||||
wkr_db.Conn().Txn_end();
|
||||
wkr_db.Conn().Rls_conn();
|
||||
stat_tbl.Stmt_rls();
|
||||
mgr.Wkrs_done_add_1(); // NOTE: must release latch last else thread errors
|
||||
}
|
||||
public void Bld_stats(Bry_bfr bfr) {
|
||||
|
||||
Reference in New Issue
Block a user