Mass_parse: Add error_filter to ignore logging certain errors [#793]

staging
gnosygnu 4 years ago
parent 1b6324938c
commit f19228c886

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,42 +13,54 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.mass_parses.parses.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
import gplx.core.ios.streams.*;
package gplx.xowa.addons.bldrs.mass_parses.parses.mgrs;
import gplx.Datetime_now;
import gplx.GfoMsg;
import gplx.Gfo_invk;
import gplx.Gfo_invk_;
import gplx.GfsCtx;
import gplx.Io_mgr;
import gplx.Io_url;
import gplx.core.ios.streams.Io_stream_tid_;
import gplx.xowa.Xowe_wiki;
import gplx.xowa.parsers.logs.Xop_log_invoke_wkr;
import gplx.xowa.xtns.scribunto.Scrib_err_filter_mgr;
public class Xomp_parse_mgr_cfg implements Gfo_invk {
public int Num_wkrs() {return num_wkrs;} private int num_wkrs = -1;
public int Num_pages_in_pool() {return num_pages_in_pool;} private int num_pages_in_pool = -1;
public int Num_pages_per_wkr() {return num_pages_per_wkr;} private int num_pages_per_wkr = 1000;
public int Progress_interval() {return progress_interval;} private int progress_interval = 1000;
public int Perf_interval() {return perf_interval;} private int perf_interval = 10000;
public int Commit_interval() {return commit_interval;} private int commit_interval = 10000;
public int Cleanup_interval() {return cleanup_interval;} private int cleanup_interval = 50; // setting at 1000 uses lots of memory
public boolean Hdump_enabled() {return hdump_enabled;} private boolean hdump_enabled = true;
public boolean Hdump_catboxs() {return hdump_catboxs;} private boolean hdump_catboxs = false;
public boolean Hzip_enabled() {return hzip_enabled;} private boolean hzip_enabled = true;
public boolean Hdiff_enabled() {return hdiff_enabled;} private boolean hdiff_enabled = true;
public boolean Log_file_lnkis() {return log_file_lnkis;} private boolean log_file_lnkis = true;
public boolean Load_all_templates() {return load_all_templates;} private boolean load_all_templates = true;
public boolean Load_all_imglinks() {return load_all_imglinks;} private boolean load_all_imglinks = true;
public String Load_ifexists_ns() {return load_ifexists_ns;} private String load_ifexists_ns = null;
public boolean Log_math() {return log_math;} private boolean log_math = false;
public byte Zip_tid() {return zip_tid;} private byte zip_tid = Io_stream_tid_.Tid__gzip;
public Io_url Mgr_url() {return mgr_url;} private Io_url mgr_url;
public String Wkr_machine_name() {return wkr_machine_name;} private String wkr_machine_name;
public boolean Show_msg__fetched_pool() {return show_msg__fetched_pool;} private boolean show_msg__fetched_pool;
public boolean Indexer_enabled() {return indexer_enabled;} private boolean indexer_enabled;
public String Indexer_opt() {return indexer_opt;} private String indexer_opt = gplx.gflucene.indexers.Gflucene_idx_opt.Docs_and_freqs.Key();
public String Wbase_cache_mru_type() {return wbase_cache_mru_type;} private String wbase_cache_mru_type = "mru";
public long Wbase_cache_mru_size() {return wbase_cache_mru_size;} private long wbase_cache_mru_size = 100;
public long Wbase_cache_mru_weight() {return wbase_cache_mru_weight;} private long wbase_cache_mru_weight = 10;
public long Wbase_cache_mru_compress_size() {return wbase_cache_mru_compress_size;} private long wbase_cache_mru_compress_size = 70;
public long Page_cache_min() {return page_cache_min;} private long page_cache_min = 1500 * Io_mgr.Len_mb_long;
public long Page_cache_max() {return page_cache_max;} private long page_cache_max = 2000 * Io_mgr.Len_mb_long;
public int Num_wkrs() {return num_wkrs;} private int num_wkrs = -1;
public int Num_pages_in_pool() {return num_pages_in_pool;} private int num_pages_in_pool = -1;
public int Num_pages_per_wkr() {return num_pages_per_wkr;} private int num_pages_per_wkr = 1000;
public int Progress_interval() {return progress_interval;} private int progress_interval = 1000;
public int Perf_interval() {return perf_interval;} private int perf_interval = 10000;
public int Commit_interval() {return commit_interval;} private int commit_interval = 10000;
public int Cleanup_interval() {return cleanup_interval;} private int cleanup_interval = 50; // setting at 1000 uses lots of memory
public boolean Hdump_enabled() {return hdump_enabled;} private boolean hdump_enabled = true;
public boolean Hdump_catboxs() {return hdump_catboxs;} private boolean hdump_catboxs = false;
public boolean Hzip_enabled() {return hzip_enabled;} private boolean hzip_enabled = true;
public boolean Hdiff_enabled() {return hdiff_enabled;} private boolean hdiff_enabled = true;
public boolean Log_file_lnkis() {return log_file_lnkis;} private boolean log_file_lnkis = true;
public boolean Load_all_templates() {return load_all_templates;} private boolean load_all_templates = true;
public boolean Load_all_imglinks() {return load_all_imglinks;} private boolean load_all_imglinks = true;
public String Load_ifexists_ns() {return load_ifexists_ns;} private String load_ifexists_ns = null;
public boolean Log_math() {return log_math;} private boolean log_math = false;
public byte Zip_tid() {return zip_tid;} private byte zip_tid = Io_stream_tid_.Tid__gzip;
public Io_url Mgr_url() {return mgr_url;} private Io_url mgr_url;
public String Wkr_machine_name() {return wkr_machine_name;} private String wkr_machine_name;
public boolean Show_msg__fetched_pool() {return show_msg__fetched_pool;} private boolean show_msg__fetched_pool;
public boolean Indexer_enabled() {return indexer_enabled;} private boolean indexer_enabled;
public String Indexer_opt() {return indexer_opt;} private String indexer_opt = gplx.gflucene.indexers.Gflucene_idx_opt.Docs_and_freqs.Key();
public String Wbase_cache_mru_type() {return wbase_cache_mru_type;} private String wbase_cache_mru_type = "mru";
public long Wbase_cache_mru_size() {return wbase_cache_mru_size;} private long wbase_cache_mru_size = 100;
public long Wbase_cache_mru_weight() {return wbase_cache_mru_weight;} private long wbase_cache_mru_weight = 10;
public long Wbase_cache_mru_compress_size() {return wbase_cache_mru_compress_size;} private long wbase_cache_mru_compress_size = 70;
public long Page_cache_min() {return page_cache_min;} private long page_cache_min = 1500 * Io_mgr.Len_mb_long;
public long Page_cache_max() {return page_cache_max;} private long page_cache_max = 2000 * Io_mgr.Len_mb_long;
public void Init(Xowe_wiki wiki) {
if (num_wkrs == -1) num_wkrs = gplx.core.envs.Runtime_.Cpu_count();
if (num_pages_in_pool == -1) num_pages_in_pool = num_wkrs * 1000;
if (mgr_url == null) mgr_url = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp");
if (wkr_machine_name == null) wkr_machine_name = gplx.core.envs.System_.Env__machine_name();
if (num_wkrs == -1) num_wkrs = gplx.core.envs.Runtime_.Cpu_count();
if (num_pages_in_pool == -1) num_pages_in_pool = num_wkrs * 1000;
if (mgr_url == null) mgr_url = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp");
if (wkr_machine_name == null) wkr_machine_name = gplx.core.envs.System_.Env__machine_name();
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
@ -80,9 +92,11 @@ public class Xomp_parse_mgr_cfg implements Gfo_invk {
else if (ctx.Match(k, "wbase_cache_mru_compress_size_")) wbase_cache_mru_compress_size = m.ReadLong("v");
else if (ctx.Match(k, "page_cache_min_")) page_cache_min = gplx.core.ios.Io_size_.parse_or(m.ReadStr("v"), page_cache_min);
else if (ctx.Match(k, "page_cache_max_")) page_cache_max = gplx.core.ios.Io_size_.parse_or(m.ReadStr("v"), page_cache_max);
else if (ctx.Match(k, Invk__err_filter)) return Scrib_err_filter_mgr.INSTANCE;
else return Gfo_invk_.Rv_unhandled;
return this;
}
private Xop_log_invoke_wkr invoke_wkr;
private static final String
Invk__num_wkrs_ = "num_wkrs_", Invk__num_pages_in_pool_ = "num_pages_in_pool_", Invk__num_pages_per_wkr_ = "num_pages_per_wkr_"
, Invk__progress_interval_ = "progress_interval_", Invk__commit_interval_ = "commit_interval_", Invk__cleanup_interval_ = "cleanup_interval_"
@ -92,5 +106,6 @@ public class Xomp_parse_mgr_cfg implements Gfo_invk {
, Invk__log_math_ = "log_math_"
, Invk__mgr_url_ = "mgr_url_", Invk__wkr_machine_name_ = "wkr_machine_name_"
, Invk__show_msg__fetched_pool_ = "show_msg__fetched_pool_"
, Invk__err_filter = "err_filter"
;
}

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,11 +13,25 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
package gplx.xowa.xtns.scribunto;
import gplx.Bry_bfr;
import gplx.Bry_bfr_;
import gplx.GfoMsg;
import gplx.Gfo_invk;
import gplx.Gfo_invk_;
import gplx.GfsCtx;
import gplx.List_adp;
import gplx.List_adp_;
import gplx.Ordered_hash;
import gplx.Ordered_hash_;
import gplx.String_;
public class Scrib_err_filter_mgr implements Gfo_invk {
private final Ordered_hash hash_by_mod = Ordered_hash_.New();
private final Object thread_lock = new Object();
private final Ordered_hash hash_by_mod = Ordered_hash_.New();
public void Clear() {hash_by_mod.Clear();}
public boolean Count_eq_0() {return hash_by_mod.Count() == 0;}
public boolean Empty() {return empty;} private boolean empty = true;
public boolean Match(String mod, String fnc, String err) {
List_adp itms = Get_itms_or_null(mod, fnc); if (itms == null) return false;
int itms_len = itms.Count();
@ -33,9 +47,12 @@ public class Scrib_err_filter_mgr implements Gfo_invk {
return match;
}
public void Add(int count_expd, String mod, String fnc, String err, String comment) {
List_adp itms = Get_itms_or_null(mod, fnc);
if (itms == null) itms = New_itms(mod, fnc);
itms.Add(new Scrib_err_filter_itm(count_expd, mod, fnc, err, comment));
synchronized (thread_lock) {
empty = false;
List_adp itms = Get_itms_or_null(mod, fnc);
if (itms == null) itms = New_itms(mod, fnc);
itms.Add(new Scrib_err_filter_itm(count_expd, mod, fnc, err, comment));
}
}
public String Print() {
Bry_bfr bfr = Bry_bfr_.New_w_size(8);
@ -79,14 +96,17 @@ public class Scrib_err_filter_mgr implements Gfo_invk {
else return Gfo_invk_.Rv_unhandled;
return this;
} private static final String Invk_add = "add";
// 2020-09-01: singleton b/c xomp instantiates multiple wikis; previous implementation was `((Scrib_xtn_mgr)(wiki.Xtn_mgr().Get_or_fail(Scrib_xtn_mgr.XTN_KEY))).Invoke_wkr();` which doesn't multi-thread
public static final Scrib_err_filter_mgr INSTANCE = new Scrib_err_filter_mgr();
}
class Scrib_err_filter_itm {
public Scrib_err_filter_itm(int count_expd, String mod, String fnc, String err, String comment) {this.count_expd = count_expd; this.mod = mod; this.err = err; this.fnc = fnc; this.comment = comment;}
public String Mod() {return mod;} private final String mod;
public String Fnc() {return fnc;} private final String fnc;
public String Err() {return err;} private final String err;
public String Comment() {return comment;} private final String comment;
public int Count_expd() {return count_expd;} private final int count_expd;
public String Mod() {return mod;} private final String mod;
public String Fnc() {return fnc;} private final String fnc;
public String Err() {return err;} private final String err;
public String Comment() {return comment;} private final String comment;
public int Count_expd() {return count_expd;} private final int count_expd;
public int Count_actl() {return count_actl;} private int count_actl;
public void Count_actl_add_1() {++count_actl;}
}

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,17 +13,33 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto; import gplx.*;
package gplx.xowa.xtns.scribunto;
import gplx.Bry_;
import gplx.Bry_bfr;
import gplx.Err;
import gplx.Err_;
import gplx.GfoMsg;
import gplx.Gfo_invk;
import gplx.GfsCtx;
import gplx.String_;
import gplx.core.brys.fmtrs.Bry_fmtr;
import gplx.core.envs.System_;
import gplx.core.threads.Thread_adp;
import gplx.core.threads.Thread_adp_;
import gplx.xowa.*; import gplx.xowa.xtns.*;
import gplx.core.brys.fmtrs.*; import gplx.core.envs.*;
import gplx.langs.htmls.*;
import gplx.xowa.langs.kwds.*; import gplx.xowa.langs.msgs.*;
import gplx.xowa.wikis.nss.*;
import gplx.xowa.htmls.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.logs.*; import gplx.xowa.parsers.tmpls.*;
import gplx.xowa.xtns.pfuncs.*;
import gplx.xowa.Xoa_ttl;
import gplx.xowa.Xowe_wiki;
import gplx.xowa.langs.kwds.Xol_kwd_grp_;
import gplx.xowa.langs.msgs.Xol_msg_itm_;
import gplx.xowa.langs.msgs.Xow_msg_mgr;
import gplx.xowa.parsers.Xop_ctx;
import gplx.xowa.parsers.logs.Xop_log_invoke_wkr;
import gplx.xowa.parsers.tmpls.Xot_invk;
import gplx.xowa.wikis.nss.Xow_ns;
import gplx.xowa.wikis.nss.Xow_ns_;
import gplx.xowa.xtns.pfuncs.Pf_func;
import gplx.xowa.xtns.pfuncs.Pf_func_;
import gplx.xowa.xtns.pfuncs.Pf_func_base;
import gplx.xowa.xtns.scribunto.cfgs.ScribCfg;
import gplx.xowa.xtns.scribunto.cfgs.ScribCfgResolver;
@ -106,9 +122,8 @@ public class Scrib_invoke_func extends Pf_func_base {
catch (Throwable e) {
Err err = Err_.Cast_or_make(e);
Error(bfr, wiki.Msg_mgr(), err);
Scrib_err_filter_mgr err_filter_mgr = invoke_wkr == null ? null : invoke_wkr.Err_filter_mgr();
if ( err_filter_mgr == null // no err_filter_mgr defined;
|| err_filter_mgr.Count_eq_0() // err_filter_mgr exists, but no definitions
Scrib_err_filter_mgr err_filter_mgr = Scrib_err_filter_mgr.INSTANCE;
if ( err_filter_mgr.Empty() // err_filter_mgr exists, but no definitions
|| !err_filter_mgr.Match(String_.new_u8(mod_name), String_.new_u8(fnc_name), err.To_str__msg_only())) // NOTE: must be To_str__msg_only; err_filter_mgr has defintion and it doesn't match current; print warn; DATE:2015-07-24
ctx.App().Usr_dlg().Warn_many("", "", "invoke failed: ~{0} ~{1} ~{2}", ctx.Page().Ttl().Raw(), Bry_.Replace_nl_w_tab(src, self.Src_bgn(), self.Src_end()), err.To_str__log());
wiki.Parser_mgr().Scrib().Terminate_when_page_changes_y_(); // NOTE: terminate core when page changes; not terminating now, else page with many errors will be very slow due to multiple remakes of core; PAGE:th.d:all; DATE:2014-10-03

Loading…
Cancel
Save