1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

v2.12.1.1

This commit is contained in:
gnosygnu
2015-12-06 23:12:52 -05:00
parent 097e6c7f80
commit 9509363f46
337 changed files with 3473 additions and 1917 deletions

View File

@@ -16,8 +16,8 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.tests.*;
import gplx.core.ios.*; import gplx.dbs.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.bldrs.cmds.texts.tdbs.*;
import gplx.core.tests.*; import gplx.core.ios.*; import gplx.core.times.*;
import gplx.dbs.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.bldrs.cmds.texts.tdbs.*;
public class Xob_fxt {
public Xob_fxt Ctor_mem() {
Io_mgr.Instance.InitEngine_mem();

View File

@@ -140,8 +140,10 @@ public abstract class Xob_dump_mgr_base extends Xob_itm_basic_base implements Xo
usr_dlg.Prog_many("", "", "parsing: ns=~{0} db=~{1} pg=~{2} count=~{3} time=~{4} rate=~{5} ttl=~{6}"
, ns.Id(), db_id, page.Id(), exec_count
, Env_.TickCount_elapsed_in_sec(time_bgn), rate_mgr.Rate_as_str(), String_.new_u8(page.Ttl_page_db()));
ctx.Clear();
Exec_pg_itm_hook(ns_ord, ns, page, page.Text());
ctx.Clear_all();
byte[] page_src = page.Text();
if (page_src != null) // some pages have no text; ignore them else null ref; PAGE: it.d:miercuri DATE:2015-12-05
Exec_pg_itm_hook(ns_ord, ns, page, page_src);
ctx.App().Utl__bfr_mkr().Clear_fail_check(); // make sure all bfrs are released
if (ctx.Wiki().Cache_mgr().Tmpl_result_cache().Count() > 50000)
ctx.Wiki().Cache_mgr().Tmpl_result_cache().Clear();
@@ -155,7 +157,7 @@ public abstract class Xob_dump_mgr_base extends Xob_itm_basic_base implements Xo
Free();
}
catch (Exception exc) {
bldr.Usr_dlg().Warn_many(GRP_KEY, "parse", "failed to parse ~{0} error ~{1}", String_.new_u8(page.Ttl_page_db()), Err_.Message_lang(exc));
bldr.Usr_dlg().Warn_many("", "", "parse failed: wiki=~{0} ttl=~{1} err=~{2}", wiki.Domain_str(), page.Ttl_full_db(), Err_.Message_gplx_log(exc));
ctx.App().Utl__bfr_mkr().Clear();
this.Free();
}
@@ -216,7 +218,6 @@ public abstract class Xob_dump_mgr_base extends Xob_itm_basic_base implements Xo
, Invk_poll_mgr = "poll_mgr", Invk_reset_db_ = "reset_db_"
, Invk_exec_count_max_ = "exec_count_max_", Invk_exit_now_ = "exit_now_", Invk_exit_after_commit_ = "exit_after_commit_"
;
private static final String GRP_KEY = "xowa.bldr.parse";
}
class Xob_dump_mgr_base_ {
public static void Load_all_tmpls(Gfo_usr_dlg usr_dlg, Xowe_wiki wiki, Xob_dump_src_id page_src) {

View File

@@ -16,8 +16,8 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.cmds.ctgs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.core.brys.*; import gplx.core.ios.*; import gplx.xowa.wikis.ctgs.*;
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.sqls.*;
import gplx.core.brys.*; import gplx.core.ios.*; import gplx.core.times.*;
import gplx.xowa.wikis.ctgs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.sqls.*;
public abstract class Xob_categorylinks_base extends Xob_sql_dump_base implements Sql_file_parser_cmd {
private DateAdp_parser date_parser = DateAdp_parser.new_(); private Sql_file_parser sql_parser; Uca_trie trie; private Bry_bfr uca_bfr = Bry_bfr.reset_(255);
public abstract Io_sort_cmd Make_sort_cmd(Sql_file_parser sql_parser);

View File

@@ -25,7 +25,7 @@ import gplx.fsdb.data.*; import gplx.fsdb.meta.*;
public class Xob_fsdb_make_cmd extends Xob_itm_basic_base implements Xob_cmd {
private Db_conn bldr_conn; private Db_cfg_tbl bldr_cfg_tbl;
private Xof_bin_mgr src_bin_mgr; private Xof_bin_wkr__fsdb_sql src_fsdb_wkr; private boolean src_bin_mgr__cache_enabled = Bool_.N; private String src_bin_mgr__fsdb_version; private String[] src_bin_mgr__fsdb_skip_wkrs; private boolean src_bin_mgr__wmf_enabled;
private Fsm_mnt_itm trg_mnt_itm; private Fsm_cfg_mgr trg_cfg_mgr; private Fsm_atr_fil trg_atr_fil; private Fsm_bin_fil trg_bin_fil; private long trg_bin_db_max;
private Fsm_mnt_itm trg_mnt_itm; private Fsm_cfg_mgr trg_cfg_mgr; private Fsm_atr_fil trg_atr_fil; private Fsm_bin_fil trg_bin_fil; private long trg_bin_db_max; private String trg_bin_mgr__fsdb_version;
private final Xof_bin_updater trg_bin_updater = new Xof_bin_updater(); private Xob_bin_db_mgr bin_db_mgr; private int[] ns_ids; private int prv_lnki_tier_id = -1;
private long download_size_max = Io_mgr.Len_mb_long * 5; private int[] download_keep_tier_ids = Int_.Ary(0);
private Xobu_poll_mgr poll_mgr; private int poll_interval; private long time_bgn;
@@ -66,7 +66,8 @@ public class Xob_fsdb_make_cmd extends Xob_itm_basic_base implements Xob_cmd {
}
// trg_mnt_itm
this.trg_bin_db_max = app.Api_root().Bldr().Wiki().Import().File_db_max();
Fsdb_db_mgr trg_db_mgr = Fsdb_db_mgr_.new_detect(wiki, wiki.Fsys_mgr().Root_dir(), wiki.Fsys_mgr().File_dir());
Io_url trg_file_dir_v1 = String_.Eq(trg_bin_mgr__fsdb_version, "v1") ? wiki.Fsys_mgr().File_dir().GenNewNameOnly(wiki.Domain_str() + "-prv") : wiki.Fsys_mgr().File_dir(); // NOTE: convoluted way of setting trg to -prv if trg_bin_mgr__fsdb_version_v1 is set; otherwise set to "en.wikipedia.org" which will noop; DATE:2015-12-02
Fsdb_db_mgr trg_db_mgr = Fsdb_db_mgr_.new_detect(wiki, wiki.Fsys_mgr().Root_dir(), trg_file_dir_v1);
if (trg_db_mgr == null) trg_db_mgr = Fsdb_db_mgr__v2_bldr.Instance.Get_or_make(wiki, Bool_.Y);
Fsm_mnt_mgr trg_mnt_mgr = new Fsm_mnt_mgr(); trg_mnt_mgr.Ctor_by_load(trg_db_mgr);
trg_mnt_mgr.Mnts__get_insert_idx_(Fsm_mnt_mgr.Mnt_idx_main); // NOTE: do not delete; mnt_mgr default to Mnt_idx_user; DATE:2014-04-25
@@ -317,6 +318,7 @@ public class Xob_fsdb_make_cmd extends Xob_itm_basic_base implements Xob_cmd {
else if (ctx.Match(k, Invk_src_bin_mgr__fsdb_skip_wkrs_)) src_bin_mgr__fsdb_skip_wkrs = m.ReadStrAry("v", "|");
else if (ctx.Match(k, Invk_src_bin_mgr__wmf_enabled_)) src_bin_mgr__wmf_enabled = m.ReadYn("v");
else if (ctx.Match(k, Invk_src_bin_mgr__cache_enabled_)) src_bin_mgr__cache_enabled = m.ReadYn("v");
else if (ctx.Match(k, Invk_trg_bin_mgr__fsdb_version_)) trg_bin_mgr__fsdb_version = m.ReadStr("v");
else if (ctx.Match(k, Invk_poll_mgr)) return poll_mgr;
else if (ctx.Match(k, Invk_download_keep_tier_ids)) download_keep_tier_ids = Int_.Ary_parse(m.ReadStr("v"), "|");
else if (ctx.Match(k, Invk_download_size_max)) download_size_max = Io_size_.To_long_by_msg_mb(m, download_size_max);
@@ -331,6 +333,7 @@ public class Xob_fsdb_make_cmd extends Xob_itm_basic_base implements Xob_cmd {
, Invk_src_bin_mgr__fsdb_version_ = "src_bin_mgr__fsdb_version_", Invk_src_bin_mgr__fsdb_skip_wkrs_ = "src_bin_mgr__fsdb_skip_wkrs_"
, Invk_src_bin_mgr__wmf_enabled_ = "src_bin_mgr__wmf_enabled_"
, Invk_src_bin_mgr__cache_enabled_ = "src_bin_mgr__cache_enabled_", Invk_ns_ids_ = "ns_ids_"
, Invk_trg_bin_mgr__fsdb_version_ = "trg_bin_mgr__fsdb_version_"
, Invk_download_size_max = "download_size_max", Invk_download_keep_tier_ids = "download_keep_tier_ids"
;
public static Fsdb_db_mgr new_src_bin_db_mgr(Xow_wiki wiki, String version) {

View File

@@ -92,7 +92,7 @@ public class Xob_lnki_temp_wkr extends Xob_dump_mgr_base implements Xopg_redlink
byte page_tid = Xow_page_tid.Identify(wiki.Domain_tid(), ns.Id(), ttl_bry);
if (page_tid != Xow_page_tid.Tid_wikitext) return; // ignore js, css, lua, json
Xoae_page page = ctx.Cur_page();
page.Clear();
page.Clear_all();
page.Bldr__ns_ord_(ns_ord);
page.Ttl_(ttl).Revision_data().Id_(db_page.Id());
page.Redlink_lnki_list().Clear();

View File

@@ -68,7 +68,7 @@ class Dg_file_tbl {
.Exec_insert();
}
}
class Dg_rule_tbl implements RlsAble {
class Dg_rule_tbl implements Rls_able {
private String tbl_name = "dg_rule"; private final Db_meta_fld_list flds = Db_meta_fld_list.new_();
private String fld_file_id, fld_rule_id, fld_rule_idx, fld_rule_score, fld_rule_text;
private Db_conn conn; private Db_stmt stmt_insert;
@@ -101,7 +101,7 @@ class Dg_rule_tbl implements RlsAble {
.Exec_insert();
}
}
class Dg_page_score_tbl implements RlsAble {
class Dg_page_score_tbl implements Rls_able {
private String tbl_name = "dg_page_score"; private final Db_meta_fld_list flds = Db_meta_fld_list.new_();
private String fld_log_tid, fld_page_id, fld_page_ns, fld_page_ttl, fld_page_len, fld_page_score, fld_page_rule_count, fld_clude_type;
private Db_conn conn; private Db_stmt stmt_insert;
@@ -141,7 +141,7 @@ class Dg_page_score_tbl implements RlsAble {
.Exec_insert();
}
}
class Dg_page_rule_tbl implements RlsAble {
class Dg_page_rule_tbl implements Rls_able {
private String tbl_name = "dg_page_rule"; private final Db_meta_fld_list flds = Db_meta_fld_list.new_();
private String fld_log_tid, fld_page_id, fld_rule_id, fld_rule_score_total;
private Db_conn conn; private Db_stmt stmt_insert;

View File

@@ -16,8 +16,8 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.xmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.btries.*; import gplx.core.ios.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.wikis.nss.*;
import gplx.core.btries.*; import gplx.core.ios.*; import gplx.core.times.*;
import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.wikis.nss.*;
public class Xob_xml_parser {
Btrie_fast_mgr trie = Xob_xml_parser_.trie_(); Bry_bfr data_bfr = Bry_bfr.new_(); DateAdp_parser date_parser = DateAdp_parser.new_();
public Xob_xml_parser Tag_len_max_(int v) {tag_len_max = v; return this;} private int tag_len_max = 255; // max size of any (a) xml tag, (b) int or (c) date; everything else goes into a data_bfr

View File

@@ -16,7 +16,8 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs.xmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*; import gplx.core.ios.*; import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.data.tbls.*;
import org.junit.*; import gplx.core.ios.*; import gplx.core.times.*;
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.data.tbls.*;
public class Xob_xml_parser_tst {
@Before public void init() {
Io_mgr.Instance.InitEngine_mem();