1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

'v3.4.3.1'

This commit is contained in:
gnosygnu
2016-04-17 23:47:45 -04:00
parent 14471ca656
commit ad140a93fe
102 changed files with 1278 additions and 382 deletions

View File

@@ -68,12 +68,12 @@ public class Xob_base_fxt {
cmd.Cmd_end();
}
public static void Run_wkr(Xob_bldr bldr, Xob_page_wkr wkr, Xowd_page_itm[] page_ary) {
wkr.Wkr_bgn(bldr);
wkr.Page_wkr__bgn();
int page_ary_len = page_ary.length;
for (int i = 0; i < page_ary_len; i++) {
Xowd_page_itm page = page_ary[i];
wkr.Wkr_run(page);
wkr.Page_wkr__run(page);
}
wkr.Wkr_end();
wkr.Page_wkr__end();
}
}

View File

@@ -62,7 +62,7 @@ public class Xob_fxt {
return rv;
}
public Xob_fxt Run_ctg() {
Xobd_parser parser = new Xobd_parser();
Xobd_parser parser = new Xobd_parser(bldr);
gplx.xowa.bldrs.cmds.ctgs.Xob_ctg_v1_base ctg_wkr = new gplx.xowa.bldrs.cmds.ctgs.Xob_ctg_v1_txt().Ctor(bldr, wiki);
byte[] bry = Bry_.new_a7("[[Category:");
ctg_wkr.Wkr_hooks().Add(bry, bry);
@@ -75,12 +75,12 @@ public class Xob_fxt {
return this;
}
private void Run_wkr(Xob_page_wkr wkr) {
wkr.Wkr_bgn(bldr);
wkr.Page_wkr__bgn();
for (int i = 0; i < doc_ary.length; i++) {
Xowd_page_itm page = doc_ary[i];
wkr.Wkr_run(page);
wkr.Page_wkr__run(page);
}
wkr.Wkr_end();
wkr.Page_wkr__end();
}
private void tst_fils(Io_url[] ary) {
Io_fil[] actls = Get_actl(ary);
@@ -105,7 +105,7 @@ public class Xob_fxt {
}
public Xob_fxt Run_page_title() {return Run(new gplx.xowa.bldrs.cmds.texts.tdbs.Xob_page_txt(bldr, wiki));}
public Xob_fxt Run(Xobd_parser_wkr... wkrs) {
Xobd_parser parser_wkr = new Xobd_parser();
Xobd_parser parser_wkr = new Xobd_parser(bldr);
int len = wkrs.length;
for (int i = 0; i < len; i++)
parser_wkr.Wkr_add(wkrs[i]);
@@ -116,12 +116,12 @@ public class Xob_fxt {
int doc_ary_len = doc_ary.length;
for (int j = 0; j < wkrs.length; j++) {
Xob_page_wkr wkr = wkrs[j];
wkr.Wkr_bgn(bldr);
wkr.Page_wkr__bgn();
for (int i = 0; i < doc_ary_len; i++) {
Xowd_page_itm page = doc_ary[i];
wkr.Wkr_run(page);
wkr.Page_wkr__run(page);
}
wkr.Wkr_end();
wkr.Page_wkr__end();
}
Test_expd_files();
return this;

View File

@@ -1,25 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.xowa.wikis.data.tbls.*;
interface Xowd_page_cmd {
String Cmd_key();
void Cmd_bgn(Xob_bldr bldr);
void Cmd_run(Xowd_page_itm page);
void Cmd_end();
}

View File

@@ -27,7 +27,7 @@ public class Xob_page_wkr_cmd implements Xob_cmd {
public void Cmd_run() {
Xob_page_wkr[] wkr_ary = (Xob_page_wkr[])wkrs.To_ary(Xob_page_wkr.class); int wkr_ary_len = wkr_ary.length;
for (int i = 0; i < wkr_ary_len; i++)
wkr_ary[i].Wkr_bgn(bldr);
wkr_ary[i].Page_wkr__bgn();
Io_buffer_rdr fil = Io_buffer_rdr.Null; Xowd_page_itm page = new Xowd_page_itm(); Xow_ns_mgr ns_mgr = wiki.Ns_mgr();
Xob_xml_parser parser = bldr.Dump_parser().Data_bfr_len_(Io_mgr.Len_mb);
long fil_len = 0;
@@ -46,7 +46,7 @@ public class Xob_page_wkr_cmd implements Xob_cmd {
prv_pos = cur_pos;
try {
for (int i = 0; i < wkr_ary_len; i++)
wkr_ary[i].Wkr_run(page);
wkr_ary[i].Page_wkr__run(page);
}
catch (Exception e) {
Err_.Noop(e);
@@ -57,6 +57,8 @@ public class Xob_page_wkr_cmd implements Xob_cmd {
Console_adp__sys.Instance.Write_str_w_nl(msg);
}
}
for (int i = wkr_ary_len - 1; i > -1; --i) // NOTE: release in reverse order; needed to make sure txns are released correctly
wkr_ary[i].Page_wkr__run_cleanup();
}
catch (Exception e) {
String msg = Err_.Message_lang(e);
@@ -67,17 +69,17 @@ public class Xob_page_wkr_cmd implements Xob_cmd {
finally {fil.Rls();}
bldr.Usr_dlg().Prog_none("", "", "reading completed: performing post-processing clean-up");
for (int i = wkr_ary_len - 1; i > -1; --i) // NOTE: release in reverse order; needed to make sure txns are released correctly
wkr_ary[i].Wkr_end();
wkr_ary[i].Page_wkr__end();
}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_init(Xob_bldr bldr) {}
public void Cmd_end() {}
public void Cmd_term() {}
public void Wkr_add(Xob_page_wkr wkr) {wkrs.Add(wkr.Wkr_key(), wkr);} private Ordered_hash wkrs = Ordered_hash_.New();
public void Wkr_add(Xob_page_wkr wkr) {wkrs.Add(wkr.Page_wkr__key(), wkr);} private Ordered_hash wkrs = Ordered_hash_.New();
public Xob_page_wkr Wkr_get(String key) {return (Xob_page_wkr)wkrs.Get_by(key);}
public Xobd_parser Page_parser_assert() {
if (page_parser == null) {
page_parser = new Xobd_parser();
page_parser = new Xobd_parser(bldr);
this.Wkr_add(page_parser);
}
return page_parser;

View File

@@ -16,18 +16,20 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.btries.*;
import gplx.core.btries.*; import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.wikis.data.tbls.*;
public class Xobd_parser implements Xob_page_wkr {
private Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci.ascii:MW_const.en; ctg.v1 assumes [[Category:
private List_adp wkr_list = List_adp_.new_();
public String Wkr_key() {return KEY;} static final String KEY = "page_parser";
private final Xob_bldr bldr;
private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci.ascii:MW_const.en; ctg.v1 assumes [[Category:
private final List_adp wkr_list = List_adp_.new_();
public String Page_wkr__key() {return KEY;} static final String KEY = "page_parser";
public Xobd_parser(Xob_bldr bldr) {this.bldr = bldr;}
public void Wkr_add(Xobd_parser_wkr wkr) {wkr_list.Add(wkr);}
public void Wkr_bgn(Xob_bldr app) {
public void Page_wkr__bgn() {
int wkr_list_len = wkr_list.Count();
for (int i = 0; i < wkr_list_len; i++) {
Xobd_parser_wkr wkr = (Xobd_parser_wkr)wkr_list.Get_at(i);
wkr.Wkr_bgn(app);
wkr.Wkr_bgn(bldr);
int hooks_len = wkr.Wkr_hooks().Count();
for (int j = 0; j < hooks_len; j++) {
byte[] bry = (byte[])wkr.Wkr_hooks().Get_at(j);
@@ -35,7 +37,7 @@ public class Xobd_parser implements Xob_page_wkr {
}
}
}
public void Wkr_run(Xowd_page_itm page) {
public void Page_wkr__run(Xowd_page_itm page) {
byte[] src = page.Text(); int src_len = src.length;
int pos = 0;
while (true) {
@@ -49,7 +51,8 @@ public class Xobd_parser implements Xob_page_wkr {
}
}
}
public void Wkr_end() {
public void Page_wkr__run_cleanup() {}
public void Page_wkr__end() {
int wkr_list_len = wkr_list.Count();
for (int i = 0; i < wkr_list_len; i++) {
Xobd_parser_wkr wkr = (Xobd_parser_wkr)wkr_list.Get_at(i);

View File

@@ -32,17 +32,17 @@ class Xowd_page_wkr_ctg_fxt {
byte[] src;
public Xowd_page_wkr_ctg_fxt ini_(String s) {src = Bry_.new_u8(s); return this;}
public Xowd_page_wkr_ctg_fxt tst_(String... expd) {
Xobd_parser mgr = new Xobd_parser();
Xoae_app app = Xoa_app_fxt.Make__app__edit();
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
Xob_bldr bldr = Xoa_app_fxt.bldr_(app);
Xobd_parser mgr = new Xobd_parser(bldr);
Xobd_parser_wkr_ctg_tstr wkr = (Xobd_parser_wkr_ctg_tstr)new Xobd_parser_wkr_ctg_tstr().Ctor(bldr, wiki);
byte[] bry = Bry_.new_a7("[[Category:");
wkr.Wkr_hooks().Add(bry, bry);
mgr.Wkr_add(wkr);
Xowd_page_itm page = new Xowd_page_itm().Text_(src);//.Ttl_(Bry_.new_a7("Test"), new Xow_ns_mgr());
mgr.Wkr_bgn(bldr);
mgr.Wkr_run(page);
mgr.Page_wkr__bgn();
mgr.Page_wkr__run(page);
byte[][] ttl = (byte[][])wkr.Found().To_ary(byte[].class);
String[] actl = new String[ttl.length];
for (int i = 0; i < actl.length; i++) {

View File

@@ -28,8 +28,8 @@ public class Xob_page_cmd extends Xob_itm_basic_base implements Xob_page_wkr, Gf
private DateAdp modified_latest = DateAdp_.MinValue; private int page_count_all, page_count_main = 0; private int commit_interval = 100000; // 100 k
private Dg_match_mgr dg_match_mgr; private Xob_ns_to_db_mgr ns_to_db_mgr;
public Xob_page_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
public String Wkr_key() {return Xob_cmd_keys.Key_text_page;}
public void Wkr_bgn(Xob_bldr bldr) {
public String Page_wkr__key() {return Xob_cmd_keys.Key_text_page;}
public void Page_wkr__bgn() {
Xoae_app app = wiki.Appe();
Xoapi_import import_cfg = app.Api_root().Bldr().Wiki().Import();
this.redirect_mgr = wiki.Redirect_mgr();
@@ -50,7 +50,7 @@ public class Xob_page_cmd extends Xob_itm_basic_base implements Xob_page_wkr, Gf
page_core_tbl.Insert_bgn();
usr_dlg.Prog_many("", "", "import.page.bgn");
}
public void Wkr_run(Xowd_page_itm page) {
public void Page_wkr__run(Xowd_page_itm page) {
int id = page.Id();
DateAdp modified = page.Modified_on(); if (modified.compareTo(modified_latest) == CompareAble_.More) modified_latest = modified;
byte[] text_raw = page.Text(); int text_raw_len = page.Text_len();
@@ -77,9 +77,12 @@ public class Xob_page_cmd extends Xob_itm_basic_base implements Xob_page_wkr, Gf
if (dg_match_mgr != null) dg_match_mgr.Commit();
}
}
public void Wkr_end() {
public void Page_wkr__run_cleanup() {
usr_dlg.Log_many("", "", "import.page: insert done; committing pages; pages=~{0}", page_count_all);
page_core_tbl.Insert_end(); ns_to_db_mgr.Rls_all();
ns_to_db_mgr.Rls_all();
page_core_tbl.Insert_end();
}
public void Page_wkr__end() {
if (dg_match_mgr != null) dg_match_mgr.Rls();
usr_dlg.Log_many("", "", "import.page: updating core stats");
Xow_ns_mgr ns_mgr = wiki.Ns_mgr();

View File

@@ -20,17 +20,17 @@ import gplx.core.primitives.*; import gplx.core.ios.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.wtrs.*;
import gplx.xowa.langs.*; import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.data.tbls.*;
public abstract class Srch_bldr_wkr_base extends Xob_itm_dump_base implements Xob_page_wkr {
private final Ordered_hash list = Ordered_hash_.New(); private Xol_lang_itm lang;
public abstract String Wkr_key();
public void Wkr_bgn(Xob_bldr bldr) {
private final Ordered_hash list = Ordered_hash_.New(); private Xol_lang_itm lang;
public abstract String Page_wkr__key();
public void Page_wkr__bgn() {
make_dir = wiki.Tdb_fsys_mgr().Ns_dir();
this.Init_dump(this.Wkr_key(), make_dir);
this.Init_dump(this.Page_wkr__key(), make_dir);
lang = wiki.Lang(); // wiki.Appe().Lang_mgr().Lang_en(); // NOTE: was .Lang_en which is wrong (should match lang of wiki); DATE:2013-05-11
tmp_wtr_mgr = new Xob_tmp_wtr_mgr(new Xob_tmp_wtr_wkr__ttl(temp_dir, dump_fil_len));
if (wiki.Db_mgr().Tid() == Xodb_mgr_sql.Tid_sql) // if sqlite, hard-code to ns_main; aggregates all ns into one
ns_main = wiki.Ns_mgr().Ns_main();
} private Xob_tmp_wtr_mgr tmp_wtr_mgr; private Xow_ns ns_main;
public void Wkr_run(Xowd_page_itm page) {
public void Page_wkr__run(Xowd_page_itm page) {
// if (page.Ns_id() != Xow_ns_.Tid__main) return; // limit to main ns for now
try {
byte[] ttl = page.Ttl_page_db();
@@ -51,7 +51,8 @@ public abstract class Srch_bldr_wkr_base extends Xob_itm_dump_base implements Xo
}
} catch (Exception e) {bldr.Usr_dlg().Warn_many("", "", "search_index:fatal error: err=~{0}", Err_.Message_gplx_full(e));} // never let single page crash entire import
}
public void Wkr_end() {
public void Page_wkr__run_cleanup() {}
public void Page_wkr__end() {
tmp_wtr_mgr.Flush_all(bldr.Usr_dlg());
dump_bfr.ClearAndReset();
Xobdc_merger.Ns(bldr.Usr_dlg(), tmp_wtr_mgr.Regy(), Xotdb_dir_info_.Name_search_ttl, temp_dir, make_dir, sort_mem_len, Io_line_rdr_key_gen_.first_pipe, this.Make_cmd_site());

View File

@@ -20,16 +20,17 @@ import gplx.core.ios.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.da
import gplx.xowa.bldrs.wkrs.*;
public class Xob_make_id_wkr extends Xob_itm_dump_base implements Xob_page_wkr, GfoInvkAble {
public Xob_make_id_wkr(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
public String Wkr_key() {return KEY;} public static final String KEY = "core.make_id";
public void Wkr_bgn(Xob_bldr bldr) {
public String Page_wkr__key() {return KEY;} public static final String KEY = "core.make_id";
public void Page_wkr__bgn() {
this.Init_dump(KEY, wiki.Tdb_fsys_mgr().Site_dir().GenSubDir(Xotdb_dir_info_.Name_id));
}
public void Wkr_run(Xowd_page_itm page) {
public void Page_wkr__run(Xowd_page_itm page) {
byte[] ttl = page.Ttl_page_db();
if (dump_bfr.Len() + row_fixed_len + ttl.length > dump_fil_len) Io_mgr.Instance.AppendFilBfr(dump_url_gen.Nxt_url(), dump_bfr);
Xotdb_page_itm_.Txt_id_save(dump_bfr, page);
}
public void Wkr_end() {
public void Page_wkr__run_cleanup() {}
public void Page_wkr__end() {
this.Term_dump(new Xob_make_cmd_site(bldr.Usr_dlg(), make_dir, make_fil_len));
if (delete_temp) Io_mgr.Instance.DeleteDirDeep(temp_dir);
}

View File

@@ -23,8 +23,8 @@ import gplx.xowa.parsers.utils.*;
import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.tdbs.xdats.*; import gplx.xowa.wikis.tdbs.stats.*;
public class Xob_page_txt extends Xob_itm_dump_base implements Xob_page_wkr, GfoInvkAble {
public Xob_page_txt(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
public String Wkr_key() {return Xob_cmd_keys.Key_tdb_make_page;}
public void Wkr_bgn(Xob_bldr bldr) {
public String Page_wkr__key() {return Xob_cmd_keys.Key_tdb_make_page;}
public void Page_wkr__bgn() {
redirect_mgr = wiki.Redirect_mgr(); page_storage_type = wiki.Appe().Setup_mgr().Dump_mgr().Data_storage_format();
fsys_mgr = wiki.Tdb_fsys_mgr();
make_dir = fsys_mgr.Ns_dir();
@@ -36,7 +36,7 @@ public class Xob_page_txt extends Xob_itm_dump_base implements Xob_page_wkr, Gfo
int page_file_len = 512 * Io_mgr.Len_kb, title_file_len = 64 * Io_mgr.Len_kb; Xob_tmp_wtr_mgr ttl_wtr_mgr;
Xob_xdat_file_wtr[] page_wtr_regy = new Xob_xdat_file_wtr[Ns_ordinal_max]; static final int Ns_ordinal_max = Xow_ns_mgr_.Ordinal_max; // ASSUME: no more than 128 ns in a wiki
Xob_stat_type data_rpt_typ; Xob_stat_mgr stat_mgr = new Xob_stat_mgr(); byte page_storage_type;
public void Wkr_run(Xowd_page_itm page) {
public void Page_wkr__run(Xowd_page_itm page) {
int id = page.Id(); byte[] ttl_wo_ns = page.Ttl_page_db(), text = page.Text(); int ttl_len = ttl_wo_ns.length, text_len = text.length; Xow_ns ns = page.Ns();
boolean redirect = redirect_mgr.Is_redirect(text, text_len);
page.Redirected_(redirect);
@@ -54,7 +54,8 @@ public class Xob_page_txt extends Xob_itm_dump_base implements Xob_page_wkr, Gfo
if (ttl_wtr.FlushNeeded(Xotdb_page_itm_.Txt_ttl_len__fixed + ttl_len)) ttl_wtr.Flush(bldr.Usr_dlg());
Xotdb_page_itm_.Txt_ttl_save(ttl_wtr.Bfr(), id, file_idx, row_idx, redirect, text_len, ttl_wo_ns);
}
public void Wkr_end() {
public void Page_wkr__run_cleanup() {}
public void Page_wkr__end() {
Flush_page(page_wtr_regy);
ttl_wtr_mgr.Flush_all(bldr.Usr_dlg());
Xobdc_merger.Ns(bldr.Usr_dlg(), ttl_wtr_mgr.Regy(), Xotdb_dir_info_.Name_title, temp_dir, make_dir, sort_mem_len, Io_line_rdr_key_gen_.last_pipe, new Io_sort_cmd_ns(bldr.Usr_dlg()));

View File

@@ -20,16 +20,17 @@ import gplx.core.ios.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.wik
import gplx.xowa.bldrs.wkrs.*;
public class Xob_parse_dump_templates_cmd extends Xob_itm_dump_base implements Xob_page_wkr, GfoInvkAble {
public Xob_parse_dump_templates_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
public String Wkr_key() {return KEY;} public static final String KEY = "parse.dump_templates";
public String Page_wkr__key() {return KEY;} public static final String KEY = "parse.dump_templates";
public static final int FixedLen_page = 1 + 5 + 1 + 5 + 1 + 1 + 1; // \tid|date|title|text\n
public void Wkr_bgn(Xob_bldr bldr) {
public void Page_wkr__bgn() {
Init_dump(KEY);
}
public void Wkr_run(Xowd_page_itm page) {
public void Page_wkr__run(Xowd_page_itm page) {
if (page.Ns_id() != Xow_ns_.Tid__template) return;
int id = page.Id(); byte[] title = page.Ttl_page_db(), text = page.Text(); int title_len = title.length, text_len = text.length;
if (FixedLen_page + title_len + text_len + dump_bfr.Len() > dump_fil_len) super.Flush_dump();
Xotdb_page_itm_.Txt_page_save(dump_bfr, id, page.Modified_on(), title, text, true);
}
public void Wkr_end() {super.Flush_dump();}
public void Page_wkr__run_cleanup() {}
public void Page_wkr__end() {super.Flush_dump();}
}

View File

@@ -19,7 +19,7 @@ package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; impo
import gplx.xowa.addons.apps.searchs.bldrs.*;
public class Xob_search_tdb extends Srch_bldr_wkr_base {
public Xob_search_tdb(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
@Override public String Wkr_key() {return Xob_cmd_keys.Key_tdb_make_search_title;}
@Override public String Page_wkr__key() {return Xob_cmd_keys.Key_tdb_make_search_title;}
@Override public gplx.core.ios.Io_make_cmd Make_cmd_site() {
return new Xob_make_cmd_site(bldr.Usr_dlg(), this.make_dir, this.make_fil_len);
}

View File

@@ -15,10 +15,11 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
package gplx.xowa.bldrs.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
public interface Xob_page_wkr extends GfoInvkAble {
String Wkr_key();
void Wkr_bgn(Xob_bldr bldr);
void Wkr_run(gplx.xowa.wikis.data.tbls.Xowd_page_itm page);
void Wkr_end();
String Page_wkr__key();
void Page_wkr__bgn();
void Page_wkr__run(gplx.xowa.wikis.data.tbls.Xowd_page_itm page);
void Page_wkr__run_cleanup(); // close txns opened during Page_wkr__run
void Page_wkr__end();
}