1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Mass_parse: Add error_filter to ignore logging certain errors [#793]

This commit is contained in:
gnosygnu 2020-09-01 09:02:47 -04:00
parent 1b6324938c
commit f19228c886
3 changed files with 378 additions and 328 deletions

View File

@ -1,6 +1,6 @@
/* /*
XOWA: the XOWA Offline Wiki Application XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3, XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0. or alternatively under the terms of the Apache License Version 2.0.
@ -13,42 +13,54 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.xowa.addons.bldrs.mass_parses.parses.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*; package gplx.xowa.addons.bldrs.mass_parses.parses.mgrs;
import gplx.core.ios.streams.*;
import gplx.Datetime_now;
import gplx.GfoMsg;
import gplx.Gfo_invk;
import gplx.Gfo_invk_;
import gplx.GfsCtx;
import gplx.Io_mgr;
import gplx.Io_url;
import gplx.core.ios.streams.Io_stream_tid_;
import gplx.xowa.Xowe_wiki;
import gplx.xowa.parsers.logs.Xop_log_invoke_wkr;
import gplx.xowa.xtns.scribunto.Scrib_err_filter_mgr;
public class Xomp_parse_mgr_cfg implements Gfo_invk { public class Xomp_parse_mgr_cfg implements Gfo_invk {
public int Num_wkrs() {return num_wkrs;} private int num_wkrs = -1; public int Num_wkrs() {return num_wkrs;} private int num_wkrs = -1;
public int Num_pages_in_pool() {return num_pages_in_pool;} private int num_pages_in_pool = -1; public int Num_pages_in_pool() {return num_pages_in_pool;} private int num_pages_in_pool = -1;
public int Num_pages_per_wkr() {return num_pages_per_wkr;} private int num_pages_per_wkr = 1000; public int Num_pages_per_wkr() {return num_pages_per_wkr;} private int num_pages_per_wkr = 1000;
public int Progress_interval() {return progress_interval;} private int progress_interval = 1000; public int Progress_interval() {return progress_interval;} private int progress_interval = 1000;
public int Perf_interval() {return perf_interval;} private int perf_interval = 10000; public int Perf_interval() {return perf_interval;} private int perf_interval = 10000;
public int Commit_interval() {return commit_interval;} private int commit_interval = 10000; public int Commit_interval() {return commit_interval;} private int commit_interval = 10000;
public int Cleanup_interval() {return cleanup_interval;} private int cleanup_interval = 50; // setting at 1000 uses lots of memory public int Cleanup_interval() {return cleanup_interval;} private int cleanup_interval = 50; // setting at 1000 uses lots of memory
public boolean Hdump_enabled() {return hdump_enabled;} private boolean hdump_enabled = true; public boolean Hdump_enabled() {return hdump_enabled;} private boolean hdump_enabled = true;
public boolean Hdump_catboxs() {return hdump_catboxs;} private boolean hdump_catboxs = false; public boolean Hdump_catboxs() {return hdump_catboxs;} private boolean hdump_catboxs = false;
public boolean Hzip_enabled() {return hzip_enabled;} private boolean hzip_enabled = true; public boolean Hzip_enabled() {return hzip_enabled;} private boolean hzip_enabled = true;
public boolean Hdiff_enabled() {return hdiff_enabled;} private boolean hdiff_enabled = true; public boolean Hdiff_enabled() {return hdiff_enabled;} private boolean hdiff_enabled = true;
public boolean Log_file_lnkis() {return log_file_lnkis;} private boolean log_file_lnkis = true; public boolean Log_file_lnkis() {return log_file_lnkis;} private boolean log_file_lnkis = true;
public boolean Load_all_templates() {return load_all_templates;} private boolean load_all_templates = true; public boolean Load_all_templates() {return load_all_templates;} private boolean load_all_templates = true;
public boolean Load_all_imglinks() {return load_all_imglinks;} private boolean load_all_imglinks = true; public boolean Load_all_imglinks() {return load_all_imglinks;} private boolean load_all_imglinks = true;
public String Load_ifexists_ns() {return load_ifexists_ns;} private String load_ifexists_ns = null; public String Load_ifexists_ns() {return load_ifexists_ns;} private String load_ifexists_ns = null;
public boolean Log_math() {return log_math;} private boolean log_math = false; public boolean Log_math() {return log_math;} private boolean log_math = false;
public byte Zip_tid() {return zip_tid;} private byte zip_tid = Io_stream_tid_.Tid__gzip; public byte Zip_tid() {return zip_tid;} private byte zip_tid = Io_stream_tid_.Tid__gzip;
public Io_url Mgr_url() {return mgr_url;} private Io_url mgr_url; public Io_url Mgr_url() {return mgr_url;} private Io_url mgr_url;
public String Wkr_machine_name() {return wkr_machine_name;} private String wkr_machine_name; public String Wkr_machine_name() {return wkr_machine_name;} private String wkr_machine_name;
public boolean Show_msg__fetched_pool() {return show_msg__fetched_pool;} private boolean show_msg__fetched_pool; public boolean Show_msg__fetched_pool() {return show_msg__fetched_pool;} private boolean show_msg__fetched_pool;
public boolean Indexer_enabled() {return indexer_enabled;} private boolean indexer_enabled; public boolean Indexer_enabled() {return indexer_enabled;} private boolean indexer_enabled;
public String Indexer_opt() {return indexer_opt;} private String indexer_opt = gplx.gflucene.indexers.Gflucene_idx_opt.Docs_and_freqs.Key(); public String Indexer_opt() {return indexer_opt;} private String indexer_opt = gplx.gflucene.indexers.Gflucene_idx_opt.Docs_and_freqs.Key();
public String Wbase_cache_mru_type() {return wbase_cache_mru_type;} private String wbase_cache_mru_type = "mru"; public String Wbase_cache_mru_type() {return wbase_cache_mru_type;} private String wbase_cache_mru_type = "mru";
public long Wbase_cache_mru_size() {return wbase_cache_mru_size;} private long wbase_cache_mru_size = 100; public long Wbase_cache_mru_size() {return wbase_cache_mru_size;} private long wbase_cache_mru_size = 100;
public long Wbase_cache_mru_weight() {return wbase_cache_mru_weight;} private long wbase_cache_mru_weight = 10; public long Wbase_cache_mru_weight() {return wbase_cache_mru_weight;} private long wbase_cache_mru_weight = 10;
public long Wbase_cache_mru_compress_size() {return wbase_cache_mru_compress_size;} private long wbase_cache_mru_compress_size = 70; public long Wbase_cache_mru_compress_size() {return wbase_cache_mru_compress_size;} private long wbase_cache_mru_compress_size = 70;
public long Page_cache_min() {return page_cache_min;} private long page_cache_min = 1500 * Io_mgr.Len_mb_long; public long Page_cache_min() {return page_cache_min;} private long page_cache_min = 1500 * Io_mgr.Len_mb_long;
public long Page_cache_max() {return page_cache_max;} private long page_cache_max = 2000 * Io_mgr.Len_mb_long; public long Page_cache_max() {return page_cache_max;} private long page_cache_max = 2000 * Io_mgr.Len_mb_long;
public void Init(Xowe_wiki wiki) { public void Init(Xowe_wiki wiki) {
if (num_wkrs == -1) num_wkrs = gplx.core.envs.Runtime_.Cpu_count(); if (num_wkrs == -1) num_wkrs = gplx.core.envs.Runtime_.Cpu_count();
if (num_pages_in_pool == -1) num_pages_in_pool = num_wkrs * 1000; if (num_pages_in_pool == -1) num_pages_in_pool = num_wkrs * 1000;
if (mgr_url == null) mgr_url = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp"); if (mgr_url == null) mgr_url = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp");
if (wkr_machine_name == null) wkr_machine_name = gplx.core.envs.System_.Env__machine_name(); if (wkr_machine_name == null) wkr_machine_name = gplx.core.envs.System_.Env__machine_name();
} }
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) { public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
@ -80,9 +92,11 @@ public class Xomp_parse_mgr_cfg implements Gfo_invk {
else if (ctx.Match(k, "wbase_cache_mru_compress_size_")) wbase_cache_mru_compress_size = m.ReadLong("v"); else if (ctx.Match(k, "wbase_cache_mru_compress_size_")) wbase_cache_mru_compress_size = m.ReadLong("v");
else if (ctx.Match(k, "page_cache_min_")) page_cache_min = gplx.core.ios.Io_size_.parse_or(m.ReadStr("v"), page_cache_min); else if (ctx.Match(k, "page_cache_min_")) page_cache_min = gplx.core.ios.Io_size_.parse_or(m.ReadStr("v"), page_cache_min);
else if (ctx.Match(k, "page_cache_max_")) page_cache_max = gplx.core.ios.Io_size_.parse_or(m.ReadStr("v"), page_cache_max); else if (ctx.Match(k, "page_cache_max_")) page_cache_max = gplx.core.ios.Io_size_.parse_or(m.ReadStr("v"), page_cache_max);
else if (ctx.Match(k, Invk__err_filter)) return Scrib_err_filter_mgr.INSTANCE;
else return Gfo_invk_.Rv_unhandled; else return Gfo_invk_.Rv_unhandled;
return this; return this;
} }
private Xop_log_invoke_wkr invoke_wkr;
private static final String private static final String
Invk__num_wkrs_ = "num_wkrs_", Invk__num_pages_in_pool_ = "num_pages_in_pool_", Invk__num_pages_per_wkr_ = "num_pages_per_wkr_" Invk__num_wkrs_ = "num_wkrs_", Invk__num_pages_in_pool_ = "num_pages_in_pool_", Invk__num_pages_per_wkr_ = "num_pages_per_wkr_"
, Invk__progress_interval_ = "progress_interval_", Invk__commit_interval_ = "commit_interval_", Invk__cleanup_interval_ = "cleanup_interval_" , Invk__progress_interval_ = "progress_interval_", Invk__commit_interval_ = "commit_interval_", Invk__cleanup_interval_ = "cleanup_interval_"
@ -92,5 +106,6 @@ public class Xomp_parse_mgr_cfg implements Gfo_invk {
, Invk__log_math_ = "log_math_" , Invk__log_math_ = "log_math_"
, Invk__mgr_url_ = "mgr_url_", Invk__wkr_machine_name_ = "wkr_machine_name_" , Invk__mgr_url_ = "mgr_url_", Invk__wkr_machine_name_ = "wkr_machine_name_"
, Invk__show_msg__fetched_pool_ = "show_msg__fetched_pool_" , Invk__show_msg__fetched_pool_ = "show_msg__fetched_pool_"
, Invk__err_filter = "err_filter"
; ;
} }

View File

@ -1,6 +1,6 @@
/* /*
XOWA: the XOWA Offline Wiki Application XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3, XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0. or alternatively under the terms of the Apache License Version 2.0.
@ -13,11 +13,25 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.xowa.xtns.scribunto; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; package gplx.xowa.xtns.scribunto;
import gplx.Bry_bfr;
import gplx.Bry_bfr_;
import gplx.GfoMsg;
import gplx.Gfo_invk;
import gplx.Gfo_invk_;
import gplx.GfsCtx;
import gplx.List_adp;
import gplx.List_adp_;
import gplx.Ordered_hash;
import gplx.Ordered_hash_;
import gplx.String_;
public class Scrib_err_filter_mgr implements Gfo_invk { public class Scrib_err_filter_mgr implements Gfo_invk {
private final Ordered_hash hash_by_mod = Ordered_hash_.New(); private final Object thread_lock = new Object();
private final Ordered_hash hash_by_mod = Ordered_hash_.New();
public void Clear() {hash_by_mod.Clear();} public void Clear() {hash_by_mod.Clear();}
public boolean Count_eq_0() {return hash_by_mod.Count() == 0;} public boolean Empty() {return empty;} private boolean empty = true;
public boolean Match(String mod, String fnc, String err) { public boolean Match(String mod, String fnc, String err) {
List_adp itms = Get_itms_or_null(mod, fnc); if (itms == null) return false; List_adp itms = Get_itms_or_null(mod, fnc); if (itms == null) return false;
int itms_len = itms.Count(); int itms_len = itms.Count();
@ -33,9 +47,12 @@ public class Scrib_err_filter_mgr implements Gfo_invk {
return match; return match;
} }
public void Add(int count_expd, String mod, String fnc, String err, String comment) { public void Add(int count_expd, String mod, String fnc, String err, String comment) {
List_adp itms = Get_itms_or_null(mod, fnc); synchronized (thread_lock) {
if (itms == null) itms = New_itms(mod, fnc); empty = false;
itms.Add(new Scrib_err_filter_itm(count_expd, mod, fnc, err, comment)); List_adp itms = Get_itms_or_null(mod, fnc);
if (itms == null) itms = New_itms(mod, fnc);
itms.Add(new Scrib_err_filter_itm(count_expd, mod, fnc, err, comment));
}
} }
public String Print() { public String Print() {
Bry_bfr bfr = Bry_bfr_.New_w_size(8); Bry_bfr bfr = Bry_bfr_.New_w_size(8);
@ -79,14 +96,17 @@ public class Scrib_err_filter_mgr implements Gfo_invk {
else return Gfo_invk_.Rv_unhandled; else return Gfo_invk_.Rv_unhandled;
return this; return this;
} private static final String Invk_add = "add"; } private static final String Invk_add = "add";
// 2020-09-01: singleton b/c xomp instantiates multiple wikis; previous implementation was `((Scrib_xtn_mgr)(wiki.Xtn_mgr().Get_or_fail(Scrib_xtn_mgr.XTN_KEY))).Invoke_wkr();` which doesn't multi-thread
public static final Scrib_err_filter_mgr INSTANCE = new Scrib_err_filter_mgr();
} }
class Scrib_err_filter_itm { class Scrib_err_filter_itm {
public Scrib_err_filter_itm(int count_expd, String mod, String fnc, String err, String comment) {this.count_expd = count_expd; this.mod = mod; this.err = err; this.fnc = fnc; this.comment = comment;} public Scrib_err_filter_itm(int count_expd, String mod, String fnc, String err, String comment) {this.count_expd = count_expd; this.mod = mod; this.err = err; this.fnc = fnc; this.comment = comment;}
public String Mod() {return mod;} private final String mod; public String Mod() {return mod;} private final String mod;
public String Fnc() {return fnc;} private final String fnc; public String Fnc() {return fnc;} private final String fnc;
public String Err() {return err;} private final String err; public String Err() {return err;} private final String err;
public String Comment() {return comment;} private final String comment; public String Comment() {return comment;} private final String comment;
public int Count_expd() {return count_expd;} private final int count_expd; public int Count_expd() {return count_expd;} private final int count_expd;
public int Count_actl() {return count_actl;} private int count_actl; public int Count_actl() {return count_actl;} private int count_actl;
public void Count_actl_add_1() {++count_actl;} public void Count_actl_add_1() {++count_actl;}
} }

View File

@ -1,6 +1,6 @@
/* /*
XOWA: the XOWA Offline Wiki Application XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3, XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0. or alternatively under the terms of the Apache License Version 2.0.
@ -13,17 +13,33 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.xowa.xtns.scribunto; import gplx.*; package gplx.xowa.xtns.scribunto;
import gplx.Bry_;
import gplx.Bry_bfr;
import gplx.Err;
import gplx.Err_;
import gplx.GfoMsg;
import gplx.Gfo_invk;
import gplx.GfsCtx;
import gplx.String_;
import gplx.core.brys.fmtrs.Bry_fmtr;
import gplx.core.envs.System_;
import gplx.core.threads.Thread_adp; import gplx.core.threads.Thread_adp;
import gplx.core.threads.Thread_adp_; import gplx.core.threads.Thread_adp_;
import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.Xoa_ttl;
import gplx.core.brys.fmtrs.*; import gplx.core.envs.*; import gplx.xowa.Xowe_wiki;
import gplx.langs.htmls.*; import gplx.xowa.langs.kwds.Xol_kwd_grp_;
import gplx.xowa.langs.kwds.*; import gplx.xowa.langs.msgs.*; import gplx.xowa.langs.msgs.Xol_msg_itm_;
import gplx.xowa.wikis.nss.*; import gplx.xowa.langs.msgs.Xow_msg_mgr;
import gplx.xowa.htmls.*; import gplx.xowa.parsers.Xop_ctx;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.logs.*; import gplx.xowa.parsers.tmpls.*; import gplx.xowa.parsers.logs.Xop_log_invoke_wkr;
import gplx.xowa.xtns.pfuncs.*; import gplx.xowa.parsers.tmpls.Xot_invk;
import gplx.xowa.wikis.nss.Xow_ns;
import gplx.xowa.wikis.nss.Xow_ns_;
import gplx.xowa.xtns.pfuncs.Pf_func;
import gplx.xowa.xtns.pfuncs.Pf_func_;
import gplx.xowa.xtns.pfuncs.Pf_func_base;
import gplx.xowa.xtns.scribunto.cfgs.ScribCfg; import gplx.xowa.xtns.scribunto.cfgs.ScribCfg;
import gplx.xowa.xtns.scribunto.cfgs.ScribCfgResolver; import gplx.xowa.xtns.scribunto.cfgs.ScribCfgResolver;
@ -106,9 +122,8 @@ public class Scrib_invoke_func extends Pf_func_base {
catch (Throwable e) { catch (Throwable e) {
Err err = Err_.Cast_or_make(e); Err err = Err_.Cast_or_make(e);
Error(bfr, wiki.Msg_mgr(), err); Error(bfr, wiki.Msg_mgr(), err);
Scrib_err_filter_mgr err_filter_mgr = invoke_wkr == null ? null : invoke_wkr.Err_filter_mgr(); Scrib_err_filter_mgr err_filter_mgr = Scrib_err_filter_mgr.INSTANCE;
if ( err_filter_mgr == null // no err_filter_mgr defined; if ( err_filter_mgr.Empty() // err_filter_mgr exists, but no definitions
|| err_filter_mgr.Count_eq_0() // err_filter_mgr exists, but no definitions
|| !err_filter_mgr.Match(String_.new_u8(mod_name), String_.new_u8(fnc_name), err.To_str__msg_only())) // NOTE: must be To_str__msg_only; err_filter_mgr has defintion and it doesn't match current; print warn; DATE:2015-07-24 || !err_filter_mgr.Match(String_.new_u8(mod_name), String_.new_u8(fnc_name), err.To_str__msg_only())) // NOTE: must be To_str__msg_only; err_filter_mgr has defintion and it doesn't match current; print warn; DATE:2015-07-24
ctx.App().Usr_dlg().Warn_many("", "", "invoke failed: ~{0} ~{1} ~{2}", ctx.Page().Ttl().Raw(), Bry_.Replace_nl_w_tab(src, self.Src_bgn(), self.Src_end()), err.To_str__log()); ctx.App().Usr_dlg().Warn_many("", "", "invoke failed: ~{0} ~{1} ~{2}", ctx.Page().Ttl().Raw(), Bry_.Replace_nl_w_tab(src, self.Src_bgn(), self.Src_end()), err.To_str__log());
wiki.Parser_mgr().Scrib().Terminate_when_page_changes_y_(); // NOTE: terminate core when page changes; not terminating now, else page with many errors will be very slow due to multiple remakes of core; PAGE:th.d:all; DATE:2014-10-03 wiki.Parser_mgr().Scrib().Terminate_when_page_changes_y_(); // NOTE: terminate core when page changes; not terminating now, else page with many errors will be very slow due to multiple remakes of core; PAGE:th.d:all; DATE:2014-10-03