Dansguardian: Skip specified namespaces

pull/620/head
gnosygnu 8 years ago
parent 7dd01b6e23
commit 45d355454e

@ -31,7 +31,7 @@ public class Xoa_app_ {
}
public static final String Name = "xowa";
public static final int Version_id = 517;
public static final String Version = "4.3.1.1702";
public static final String Version = "4.3.2.1703";
public static String Build_date = "2012-12-30 00:00:00";
public static String Build_date_fmt = "yyyy-MM-dd HH:mm:ss";
public static String Op_sys_str;

@ -17,7 +17,7 @@ package gplx.xowa.bldrs.filters.core; import gplx.*; import gplx.xowa.*; import
import gplx.xowa.wikis.ttls.*;
public class Xob_ttl_filter_mgr {
private boolean exclude_is_empty = true, include_is_empty = true;
private final Xob_ttl_filter_mgr_srl srl = new Xob_ttl_filter_mgr_srl();
private final Xob_ttl_filter_mgr_srl srl = new Xob_ttl_filter_mgr_srl();
private Hash_adp_bry exclude_hash = Hash_adp_bry.cs(), include_hash = Hash_adp_bry.cs();
public void Clear() {
exclude_hash.Clear();

@ -25,12 +25,14 @@ public class Dg_match_mgr {
private final Ordered_hash rule_group_hash = Ordered_hash_.New_bry(), rule_tally_hash = Ordered_hash_.New_bry();
private final Dg_parser parser = new Dg_parser();
private final Xob_ttl_filter_mgr ttl_filter_mgr = new Xob_ttl_filter_mgr();
private final Dg_ns_skip_mgr ns_skip_mgr = new Dg_ns_skip_mgr();
private final Dg_log_mgr log_mgr = new Dg_log_mgr();
public Dg_match_mgr(Io_url root_dir, int score_init, int score_fail, boolean case_match, boolean log_enabled, Io_url log_url) {
this.score_init = score_init; this.score_fail = score_fail; this.case_match = case_match; this.log_enabled = log_enabled;
if (log_enabled) log_mgr.Init(log_url);
ttl_filter_mgr.Load(Bool_.N, root_dir.GenSubFil("xowa.title.include.txt"));
ttl_filter_mgr.Load(Bool_.Y, root_dir.GenSubFil("xowa.title.exclude.txt"));
ns_skip_mgr.Load(root_dir.GenSubFil("xowa.ns.skip.txt"));
Io_url dg_root_url = root_dir.GenSubDir("dansguardian");
Dg_file[] files = parser.Parse_dir(dg_root_url); Gfo_usr_dlg_.Instance.Plog_many("", "", "import.dg.rules: url=~{0} files=~{1}", dg_root_url, files.length);
Init_by_files(files);
@ -77,6 +79,10 @@ public class Dg_match_mgr {
return rv;
}
public boolean Match(int log_tid, int page_id, int page_ns, byte[] page_ttl, byte[] page_ttl_db, Xol_lang_itm lang, byte[] src) {
// if ns is in skip_mgr, ignore; needed to skip Template and Module
if (ns_skip_mgr.Has(page_ns))
return false;
int src_len = src.length;
int clude_type = 0;
if (ttl_filter_mgr.Match_include(page_ttl_db)) clude_type = -1;

@ -0,0 +1,40 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
import gplx.core.lists.hashs.*;
class Dg_ns_skip_mgr {
private final Hash_adp__int ns_hash = new Hash_adp__int();
private boolean is_empty = true;
public boolean Has(int ns) {return is_empty ? false : ns_hash.Get_by_or_null(ns) != null;}
public void Load(Io_url url) {
// load from file
Gfo_usr_dlg_.Instance.Log_many("", "", "loading ns.skip file; url=~{0}", url.Raw());
byte[] src = Io_mgr.Instance.LoadFilBry_loose(url);
// parse to lines
byte[][] lines = Bry_split_.Split_lines(src);
// add to hash
for (byte[] line : lines) {
int ns_id = Bry_.To_int_or(line, Int_.Max_value);
if (ns_id != Int_.Max_value) {
Gfo_usr_dlg_.Instance.Log_many("", "", "adding ns; ns_id=~{0}", ns_id);
ns_hash.Add_if_dupe_use_1st(ns_id, line);
is_empty = false;
}
}
}
}
Loading…
Cancel
Save