1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Make: Add xomp_stats to track time per page (and other attributes) [#456]

This commit is contained in:
gnosygnu
2019-05-08 06:35:10 -04:00
parent 5db81504fb
commit cf94f252e9
34 changed files with 336 additions and 175 deletions

View File

@@ -0,0 +1,106 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.mass_parses.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.wikis.pages.*;
import gplx.xowa.parsers.logs.stats.*;
public class Xomp_stat_tbl implements Rls_able {
private static final String tbl_name = "xomp_stats"; private static final Dbmeta_fld_list flds = new Dbmeta_fld_list();
private static final String
fld_page_id = flds.Add_int_pkey("page_id"), fld_wkr_uid = flds.Add_int("wkr_uid")
, fld_wtxt_len = flds.Add_int("wtxt_len"), fld_html_len = flds.Add_int("html_len"), fld_zip_len = flds.Add_int("zip_len")
, fld_page_time = flds.Add_long("page_time"), fld_tidy_time = flds.Add_long("tidy_time"), fld_fulltext_time = flds.Add_long("fulltext_time")
, fld_scrib_time = flds.Add_long("scrib_time"), fld_scrib_count = flds.Add_int("scrib_count"), fld_scrib_depth = flds.Add_int("scrib_depth")
, fld_image_count = flds.Add_int("image_count"), fld_audio_count = flds.Add_int("audio_count"), fld_video_count = flds.Add_int("video_count"), fld_media_count = flds.Add_int("media_count")
, fld_lnki_count = flds.Add_int("lnki_count"), fld_lnke_count = flds.Add_int("lnke_count"), fld_hdr_count = flds.Add_int("hdr_count")
, fld_math_count = flds.Add_int("math_count"), fld_imap_count = flds.Add_int("imap_count"), fld_hiero_count = flds.Add_int("hiero_count")
, fld_gallery_count = flds.Add_int("gallery_count"), fld_gallery_packed_count = flds.Add_int("gallery_packed_count")
;
private final Db_conn conn; private Db_stmt stmt_insert;
public Xomp_stat_tbl(Db_conn conn) {
this.conn = conn;
this.Create_tbl();
conn.Stmt_delete(tbl_name).Exec_delete(); // always zap table
conn.Rls_reg(this);
}
public void Create_tbl() {conn.Meta_tbl_create(Dbmeta_tbl_itm.New(tbl_name, flds, Dbmeta_idx_itm.new_unique_by_tbl(tbl_name, "pkey", fld_page_id)));}
public void Rls() {
stmt_insert = Db_stmt_.Rls(stmt_insert);
}
public void Stmt_new() {
stmt_insert = conn.Stmt_insert(tbl_name, flds);
}
public void Insert_by_copy(Db_rdr rdr) {
stmt_insert.Clear()
.Val_int (fld_page_id , rdr.Read_int(fld_page_id))
.Val_int (fld_wkr_uid , rdr.Read_int(fld_wkr_uid))
.Val_int (fld_wtxt_len , rdr.Read_int(fld_wtxt_len))
.Val_int (fld_html_len , rdr.Read_int(fld_html_len))
.Val_int (fld_zip_len , rdr.Read_int(fld_zip_len))
.Val_long(fld_page_time , rdr.Read_long(fld_page_time))
.Val_long(fld_tidy_time , rdr.Read_long(fld_tidy_time))
.Val_long(fld_fulltext_time , rdr.Read_long(fld_fulltext_time))
.Val_long(fld_scrib_time , rdr.Read_long(fld_scrib_time))
.Val_int (fld_scrib_count , rdr.Read_int (fld_scrib_count))
.Val_int (fld_scrib_depth , rdr.Read_int (fld_scrib_depth))
.Val_int (fld_image_count , rdr.Read_int (fld_image_count))
.Val_int (fld_audio_count , rdr.Read_int (fld_audio_count))
.Val_int (fld_video_count , rdr.Read_int (fld_video_count))
.Val_int (fld_media_count , rdr.Read_int (fld_media_count))
.Val_int (fld_lnki_count , rdr.Read_int (fld_lnki_count))
.Val_int (fld_lnke_count , rdr.Read_int (fld_lnke_count))
.Val_int (fld_hdr_count , rdr.Read_int (fld_hdr_count))
.Val_int (fld_math_count , rdr.Read_int (fld_math_count))
.Val_int (fld_imap_count , rdr.Read_int (fld_imap_count))
.Val_int (fld_hiero_count , rdr.Read_int (fld_hiero_count))
.Val_int (fld_gallery_count , rdr.Read_int (fld_gallery_count))
.Val_int (fld_gallery_packed_count , rdr.Read_int (fld_gallery_packed_count))
.Exec_insert();
}
public void Insert(Xoae_page wpg, Xoh_page hpg, int wkr_uid, long page_time, long fulltext_time) {
Xop_log_stat stat = wpg.Stat_itm();
stmt_insert.Clear()
.Val_int(fld_page_id , hpg.Page_id())
.Val_int(fld_wkr_uid , wkr_uid)
.Val_int(fld_wtxt_len , Len_or_0(wpg.Root().Root_src()))
.Val_int(fld_html_len , Len_or_0(hpg.Db().Html().Html_bry()))
.Val_int(fld_zip_len , hpg.Db().Html().Zip_len())
.Val_long(fld_page_time , page_time)
.Val_long(fld_tidy_time , stat.Tidy_time)
.Val_long(fld_fulltext_time , fulltext_time)
.Val_long(fld_scrib_time , stat.Scrib().Time())
.Val_int (fld_scrib_count , stat.Scrib().Count())
.Val_int (fld_scrib_depth , stat.Scrib().Depth_max())
.Val_int (fld_image_count , stat.Image_count)
.Val_int (fld_audio_count , stat.Audio_count)
.Val_int (fld_video_count , stat.Video_count)
.Val_int (fld_media_count , stat.Media_count)
.Val_int (fld_lnki_count , stat.Lnki_count)
.Val_int (fld_lnke_count , stat.Lnke_count)
.Val_int (fld_hdr_count , stat.Hdr_count)
.Val_int (fld_math_count , stat.Math_count)
.Val_int (fld_imap_count , stat.Imap_count)
.Val_int (fld_hiero_count , stat.Hiero_count)
.Val_int (fld_gallery_count , stat.Gallery_count)
.Val_int (fld_gallery_packed_count , stat.Gallery_packed_count)
.Exec_insert();
}
public void Stmt_rls() {
stmt_insert = Db_stmt_.Rls(stmt_insert);
}
private static int Len_or_0(byte[] bry) {return bry == null ? 0 : bry.length;}
}

View File

@@ -20,8 +20,12 @@ public class Xomp_make_cmd extends Xob_cmd__base {
public Xomp_make_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
@Override public void Cmd_run() {
wiki.Init_assert();
new Xomp_make_html().Exec(wiki, cfg);
new Xomp_make_lnki().Exec(wiki, cfg, 10000);
if (cfg.Mode().Has("html"))
new Xomp_make_html().Exec(wiki, cfg);
if (cfg.Mode().Has("lnki"))
new Xomp_make_lnki().Exec(wiki, cfg, 10000);
if (cfg.Mode().Has("stat"))
new Xomp_make_stat().Exec(wiki, cfg);
}
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk__cfg)) return cfg;

View File

@@ -15,10 +15,12 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
public class Xomp_make_cmd_cfg implements Gfo_invk {
public Hash_adp Mode() {return mode;} private Hash_adp mode = Hash_adp_.New().Add_and_more("html", "html").Add_and_more("lnki", "lnki").Add_and_more("stat", "stat");
public boolean Delete_html_dbs() {return delete_html_dbs;} private boolean delete_html_dbs = true;
public Ordered_hash Merger_wkrs() {return merger_wkrs;} private final Ordered_hash merger_wkrs = Ordered_hash_.New();
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk__delete_html_dbs_)) delete_html_dbs = m.ReadYn("v");
if (ctx.Match(k, Invk__mode_)) mode = GfoMsg_.Read_str_ary_as_hash(m, "v");
else if (ctx.Match(k, Invk__delete_html_dbs_)) delete_html_dbs = m.ReadYn("v");
else if (ctx.Match(k, Invk__merger_wkrs_)) {
String[] ary = m.ReadStrAry("k", "|");
for (String itm : ary)
@@ -27,5 +29,5 @@ public class Xomp_make_cmd_cfg implements Gfo_invk {
else return Gfo_invk_.Rv_unhandled;
return this;
}
private static final String Invk__delete_html_dbs_ = "delete_html_dbs_", Invk__merger_wkrs_ = "merger_wkrs_";
private static final String Invk__mode_ = "mode_", Invk__delete_html_dbs_ = "delete_html_dbs_", Invk__merger_wkrs_ = "merger_wkrs_";
}

View File

@@ -0,0 +1,51 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.core.brys.*;
import gplx.dbs.*; import gplx.xowa.htmls.core.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
class Xomp_make_stat {
public void Exec(Xowe_wiki wiki, Xomp_make_cmd_cfg cfg) {
// init mgr_db and mgr_tbl
Xomp_mgr_db mgr_db = Xomp_mgr_db.New__load(wiki);
Db_conn mgr_conn = mgr_db.Conn();
Xomp_stat_tbl mgr_tbl = new Xomp_stat_tbl(mgr_conn);
mgr_conn.Txn_bgn("xomp_stats");
mgr_tbl.Stmt_new();
// loop wkrs
String sql = String_.Format("SELECT * FROM xomp_stats;");
int wkrs_len = mgr_db.Tbl__wkr().Select_count();
for (int i = 0; i < wkrs_len; ++i) {
int count = 0;
Xomp_wkr_db wkr_db = Xomp_wkr_db.New(mgr_db.Dir(), i);
Db_rdr rdr = wkr_db.Conn().Stmt_sql(sql).Exec_select__rls_auto(); // ANSI.Y
try {
while (rdr.Move_next()) {
mgr_tbl.Insert_by_copy(rdr);
if (++count % 10000 == 0) {
Gfo_usr_dlg_.Instance.Prog_many("", "", "xomp.stat.insert: db=~{0} count=~{1}", Int_.To_str_pad_bgn_space(i, 3), Int_.To_str_pad_bgn_space(count, 8));
mgr_conn.Txn_sav();
}
}
} finally {rdr.Rls();}
}
// cleanup
mgr_tbl.Stmt_rls();
mgr_conn.Txn_end();
mgr_conn.Rls_conn();
}
}

View File

@@ -40,7 +40,9 @@ public class Xomp_parse_wkr implements Gfo_invk {
private final Xob_hdump_bldr hdump_bldr = new Xob_hdump_bldr();
private final int uid;
private Xomp_wkr_db wkr_db;
private Xomp_stat_tbl stat_tbl;
// indexer vars
private final Xofulltext_indexer_wkr indexer;
private final List_adp list = List_adp_.New(); private int list_idx = 0, list_len = 0;
@@ -64,9 +66,9 @@ public class Xomp_parse_wkr implements Gfo_invk {
// wkr-specific vars
this.wiki = wiki; this.uid = uid;
this.wkr_db = Xomp_wkr_db.New(Xomp_mgr_db.New__url(wiki), uid);
this.stat_tbl = new Xomp_stat_tbl(wkr_db.Conn());
}
public void Exec() {
// init
Xow_parser_mgr parser_mgr = wiki.Parser_mgr();
// disable file download
@@ -86,7 +88,7 @@ public class Xomp_parse_wkr implements Gfo_invk {
logger.Bgn();
}
// init log_mgr / property_wkr
// init log_mgr / property_wkr / stats
Xop_log_wkr_factory wkr_factory = new Xop_log_wkr_factory(wkr_db.Conn());
if (cfg.Log_math()) wiki.Parser_mgr().Math__core().Log_wkr_(wkr_factory);
@@ -94,6 +96,7 @@ public class Xomp_parse_wkr implements Gfo_invk {
hdump_bldr.Enabled_(cfg.Hdump_enabled()).Hzip_enabled_(cfg.Hzip_enabled()).Hzip_diff_(cfg.Hdiff_enabled()).Zip_tid_(cfg.Zip_tid());
hdump_bldr.Init(wiki, wkr_db.Conn(), new Xob_hdump_tbl_retriever__xomp(wkr_db.Html_tbl()));
wkr_db.Conn().Txn_bgn("xomp");
stat_tbl.Stmt_new();
// set status to running
mgr_db.Tbl__wkr().Update_status(uid, Xomp_wkr_tbl.Status__running);
@@ -110,8 +113,9 @@ public class Xomp_parse_wkr implements Gfo_invk {
if (ppg.Text() == null) continue; // some pages have no text; ignore them else null ref; PAGE: it.d:miercuri DATE:2015-12-05
try {
// init page
long done_bgn = gplx.core.envs.System_.Ticks();
// get ns / ttl
int cur_ns = ppg.Ns_id();
Xoa_ttl ttl = wiki.Ttl_parse(cur_ns, ppg.Ttl_bry());
// if ns changed and prv_ns is main
@@ -120,10 +124,13 @@ public class Xomp_parse_wkr implements Gfo_invk {
wiki.Cache_mgr().Free_mem__all(); // NOTE: clears page and wbase cache only; needed else OutOfMemory error for en.w in 25th hour; DATE:2017-01-11
prv_ns = cur_ns;
}
// init page
Xoae_page wpg = Xoae_page.New(wiki, ttl);
wpg.Bldr__ns_ord_(ns_ord_mgr.Get_ord_by_ns_id(cur_ns)); // NOTE: must set ns_id for tier_id in lnki_temp; DATE:2016-09-19
wpg.Db().Text().Text_bry_(ppg.Text());
wpg.Db().Page().Init_by_mp(ppg.Id(), ppg.Page_score());
wpg.Stat_itm().Init(uid);
// parse page
Xop_ctx pctx = parser_mgr.Ctx();
@@ -134,16 +141,22 @@ public class Xomp_parse_wkr implements Gfo_invk {
hdump_bldr.Insert(pctx, wpg);
// index
if (indexer != null) indexer.Index(wpg);
long fulltext_time = 0;
if (indexer != null) {
fulltext_time = gplx.core.envs.System_.Ticks();
indexer.Index(wpg);
fulltext_time = gplx.core.envs.System_.Ticks__elapsed_in_frac(fulltext_time);
}
// mark done for sake of progress
prog_mgr.Mark_done(ppg.Id());
// update stats
long time_cur = gplx.core.envs.System_.Ticks();
done_time += time_cur - done_bgn;
done_bgn = time_cur;
long page_time = time_cur - done_bgn;
done_time += page_time;
++done_count;
stat_tbl.Insert(wpg, hdump_bldr.Tmp_hpg(), uid, page_time, fulltext_time);
// cleanup
// ctx.App().Utl__bfr_mkr().Clear_fail_check(); // make sure all bfrs are released
@@ -165,6 +178,7 @@ public class Xomp_parse_wkr implements Gfo_invk {
if (logger != null) logger.End();
wkr_db.Conn().Txn_end();
wkr_db.Conn().Rls_conn();
stat_tbl.Stmt_rls();
mgr.Wkrs_done_add_1(); // NOTE: must release latch last else thread errors
}
public void Bld_stats(Bry_bfr bfr) {