1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Embeddable: Create core dbs in proper subdirectory

This commit is contained in:
gnosygnu
2017-10-23 20:50:22 -04:00
parent dc22c15895
commit 1336d44f34
4537 changed files with 0 additions and 311750 deletions

View File

@@ -13,24 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
public class Db_idx_mode {
private int tid;
Db_idx_mode(int tid) {this.tid = tid;}
public boolean Tid_is_bgn() {return tid == Tid_bgn;}
public boolean Tid_is_end() {return tid == Tid_end;}
public boolean Tid_is_skip() {return tid == Tid_skip;}
private static final int Tid_skip = 0, Tid_bgn = 1, Tid_end = 2;
private static final String Key_skip = "skip", Key_bgn = "bgn", Key_end = "end";
public static final Db_idx_mode
Itm_skip = new Db_idx_mode(Tid_skip)
, Itm_bgn = new Db_idx_mode(Tid_bgn)
, Itm_end = new Db_idx_mode(Tid_end)
;
public static Db_idx_mode Xto_itm(String key) {
if (String_.Eq(key, Key_skip)) return Itm_skip;
else if (String_.Eq(key, Key_bgn)) return Itm_bgn;
else if (String_.Eq(key, Key_end)) return Itm_end;
else throw Err_.new_unhandled(key);
}
}

View File

@@ -13,80 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.primitives.*; import gplx.core.strings.*;
import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.addons.wikis.ctgs.*;
import gplx.xowa.wikis.nss.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.infos.*;
import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*;
public class Db_mgr_fxt {
public Db_mgr_fxt Ctor_fsys() {bldr_fxt = new Xob_fxt().Ctor(Xoa_test_.Url_root().GenSubDir("root")); return this;}
public Db_mgr_fxt Ctor_mem() {bldr_fxt = new Xob_fxt().Ctor_mem(); return this;} private Xob_fxt bldr_fxt;
public Xowd_page_itm page_(int id, String modified_on, boolean type_redirect, int text_len) {return new Xowd_page_itm().Id_(id).Modified_on_(DateAdp_.parse_gplx(modified_on)).Redirected_(type_redirect).Text_len_(text_len);}
public Xowe_wiki Wiki() {return bldr_fxt.Wiki();}
public Xob_bldr Bldr() {return bldr_fxt.Bldr();}
public Db_mgr_fxt doc_ary_(Xowd_page_itm... v) {bldr_fxt.doc_ary_(v); return this;}
public Xowd_page_itm doc_(int id, String date, String title, String text) {return bldr_fxt.doc_(id, date, title, text);}
public Xowd_page_itm doc_wo_date_(int id, String title, String text) {return bldr_fxt.doc_(id, "2012-01-02 03:04", title, text);}
public Xowd_page_itm doc_ttl_(int id, String title) {return bldr_fxt.doc_(id, "2012-01-02 03:04", title, "IGNORE");}
public Db_mgr_fxt Init_fil(String url, String raw) {return Init_fil(Io_url_.new_fil_(url), raw);}
public Db_mgr_fxt Init_fil(Io_url url, String raw) {Io_mgr.Instance.SaveFilStr(url, raw); return this;}
public Db_mgr_fxt Exec_run(Xob_page_wkr wkr) {bldr_fxt.Run(wkr); return this;}
public Db_mgr_fxt Exec_run(Xob_cmd cmd) {bldr_fxt.Run_cmds(cmd); return this;}
public Db_mgr_fxt Exec_run(Xobd_parser_wkr wkr) {bldr_fxt.Run(wkr); return this;}
public void Init_page_insert(Int_obj_ref page_id_next, int ns_id, String[] ttls) {
Xowe_wiki wiki = this.Wiki();
int len = ttls.length;
DateAdp modified_on = Datetime_now.Dflt_add_min_(0);
Xowd_page_tbl tbl_page = wiki.Db_mgr_as_sql().Core_data_mgr().Tbl__page();
tbl_page.Insert_bgn();
for (int i = 0; i < len; i++) {
String ttl = ttls[i];
int page_id = page_id_next.Val();
tbl_page.Insert_cmd_by_batch(page_id, ns_id, Bry_.new_u8(ttl), false, modified_on, 0, page_id, 0, 0, -1);
page_id_next.Val_add(1);
}
tbl_page.Insert_end();
}
public void Test_load_ttl(int ns_id, String ttl_str, Xowd_page_itm expd) {
Xowe_wiki wiki = bldr_fxt.Wiki();
Xow_ns ns = wiki.Ns_mgr().Ids_get_or_null(ns_id);
byte[] ttl_bry = Bry_.new_a7(ttl_str);
wiki.Db_mgr_as_sql().Load_mgr().Load_by_ttl(actl, ns, ttl_bry);
Tfds.Eq(expd.Id(), actl.Id());
Tfds.Eq_date(expd.Modified_on(), actl.Modified_on());
Tfds.Eq(expd.Redirected(), actl.Redirected());
Tfds.Eq(expd.Text_len(), actl.Text_len());
} private Xowd_page_itm actl = new Xowd_page_itm();
public void Test_load_page(int ns_id, int page_id, String expd) {
Xowe_wiki wiki = bldr_fxt.Wiki();
Xow_ns ns = wiki.Ns_mgr().Ids_get_or_null(ns_id);
wiki.Db_mgr_as_sql().Load_mgr().Load_page(actl.Id_(page_id), ns);
Tfds.Eq(expd, String_.new_a7(actl.Text()));
}
int[] Xto_int_ary(List_adp rslts) {
int len = rslts.Count();
int[] rv = new int[len];
for (int i = 0; i < len; i++) {
Xowd_page_itm page = (Xowd_page_itm)rslts.Get_at(i);
rv[i] = page.Id();
}
return rv;
}
public void Test_file(String url, String expd) {
String actl = Io_mgr.Instance.LoadFilStr(url);
Tfds.Eq_str_lines(expd, actl);
}
public void Init_db_sqlite() {
Xowe_wiki wiki = this.Wiki();
Db_conn_pool.Instance.Rls_all();
Db_conn_bldr.Instance.Reg_default_sqlite();
Io_mgr.Instance.DeleteDir_cmd(wiki.Fsys_mgr().Root_dir()).MissingIgnored_().Exec();
wiki.Db_mgr_create_as_sql().Core_data_mgr().Init_by_make(Xowd_core_db_props.Test, Xob_info_session.Test);
Io_mgr.Instance.SaveFilStr(wiki.Import_cfg().Src_dir().GenSubFil("a.xml"), "<test/>");
}
public void Rls() {
this.Wiki().Db_mgr_as_sql().Core_data_mgr().Rls();
}
}

View File

@@ -13,65 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.ios.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.wikis.data.tbls.*;
public class Xob_base_fxt {
public Xob_base_fxt Clear() {
if (app == null) {
app = Xoa_app_fxt.Make__app__edit();
wiki = Xoa_app_fxt.Make__wiki__edit(app);
bldr = Xoa_app_fxt.bldr_(app);
}
this.Init_(bldr, wiki);
Clear_hook();
return this;
}
@gplx.Virtual public void Clear_hook() {}
public Xob_base_fxt Init_(Xob_bldr bldr, Xowe_wiki wiki) {this.bldr = bldr; this.wiki = wiki; return this;}
public Xoae_app App() {return app;} private Xoae_app app;
public Xob_bldr Bldr() {return bldr;} private Xob_bldr bldr;
public Xowe_wiki Wiki() {return wiki;} private Xowe_wiki wiki;
public Gfo_invk Bldr_itm() {return bldr_itm;} Gfo_invk bldr_itm;
public Xowd_page_itm page_(String ttl) {return page_(ttl, "");}
public Xowd_page_itm page_(String ttl, String text) {return new Xowd_page_itm().Ttl_(Bry_.new_u8(ttl), wiki.Ns_mgr()).Text_(Bry_.new_u8(text));}
public Io_fil_chkr meta_(String url, String data) {return new Io_fil_chkr(Io_url_.mem_fil_(url), data);}
public void Init_fxts(Xob_bldr bldr, Xowe_wiki wiki, Xob_base_fxt... fxt_ary) {
int fxt_ary_len = fxt_ary.length;
for (int i = 0; i < fxt_ary_len; i++)
fxt_ary[i].Init_(bldr, wiki);
}
public Xob_base_fxt Init_fil(String url, String raw) {return Init_fil(Io_url_.new_fil_(url), raw);}
public Xob_base_fxt Init_fil(Io_url url, String raw) {Io_mgr.Instance.SaveFilStr(url, raw); return this;}
public Xob_base_fxt Exec_cmd(String cmd_key, GfoMsg... msgs) {
Xob_cmd cmd = (Xob_cmd)bldr.Cmd_mgr().Add_cmd(wiki, cmd_key);
this.bldr_itm = cmd;
int len = msgs.length;
GfsCtx ctx = GfsCtx.new_();
for (int i = 0; i < len; i++) {
GfoMsg msg = msgs[i];
cmd.Invk(ctx, GfsCtx.Ikey_null, msg.Key(), msg);
}
Run_cmd(bldr, cmd);
return this;
}
public Xob_base_fxt Test_fil(String url, String expd) {return Test_fil(Io_url_.new_fil_(url), expd);}
public Xob_base_fxt Test_fil(Io_url url, String expd) {
Tfds.Eq_str_lines(expd, Io_mgr.Instance.LoadFilStr(url));
return this;
}
public static void Run_cmd(Xob_bldr bldr, Xob_cmd cmd) {
cmd.Cmd_bgn(bldr);
cmd.Cmd_run();
cmd.Cmd_end();
}
public static void Run_wkr(Xob_bldr bldr, Xob_page_wkr wkr, Xowd_page_itm[] page_ary) {
wkr.Page_wkr__bgn();
int page_ary_len = page_ary.length;
for (int i = 0; i < page_ary_len; i++) {
Xowd_page_itm page = page_ary[i];
wkr.Page_wkr__run(page);
}
wkr.Page_wkr__end();
}
}

View File

@@ -13,155 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.consoles.*; import gplx.core.envs.*;
import gplx.xowa.apps.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.xmls.*; import gplx.xowa.langs.bldrs.*;
import gplx.xowa.bldrs.wkrs.*;
import gplx.langs.jsons.*;
import gplx.xowa.addons.bldrs.app_cfgs.*;
public class Xob_bldr implements Gfo_invk {
private boolean pause_at_end = false; private long prv_prog_time; private Xob_xml_parser dump_parser;
public Xob_bldr(Xoae_app app) {
this.app = app;
this.cmd_mgr = new Xob_cmd_mgr(this, cmd_regy);
this.import_marker = new Xob_import_marker();
this.wiki_cfg_bldr = new Xob_wiki_cfg_bldr(this);
}
public Xoae_app App() {return app;} private final Xoae_app app;
public Xob_cmd_regy Cmd_regy() {return cmd_regy;} private final Xob_cmd_regy cmd_regy = new Xob_cmd_regy();
public Xob_cmd_mgr Cmd_mgr() {return cmd_mgr;} private final Xob_cmd_mgr cmd_mgr;
public Gfo_usr_dlg Usr_dlg() {return app.Usr_dlg();}
public int Sort_mem_len() {return sort_mem_len;} public Xob_bldr Sort_mem_len_(int v) {sort_mem_len = v; return this;} private int sort_mem_len = 16 * Io_mgr.Len_mb;
public int Dump_fil_len() {return dump_fil_len;} public Xob_bldr Dump_fil_len_(int v) {dump_fil_len = v; return this;} private int dump_fil_len = 1 * Io_mgr.Len_mb;
public int Make_fil_len() {return make_fil_len;} public Xob_bldr Make_fil_len_(int v) {make_fil_len = v; return this;} private int make_fil_len = 64 * Io_mgr.Len_kb;
public Xob_xml_parser Dump_parser() {if (dump_parser == null) this.dump_parser = new Xob_xml_parser(); return dump_parser;}
public Xob_import_marker Import_marker() {return import_marker;} private Xob_import_marker import_marker;
public Xob_wiki_cfg_bldr Wiki_cfg_bldr() {return wiki_cfg_bldr;} private Xob_wiki_cfg_bldr wiki_cfg_bldr;
public void Pause_at_end_(boolean v) {this.pause_at_end = v;}
public void Print_prog_msg(long cur, long end, int pct_idx, String fmt, Object... ary) {
long now = System_.Ticks(); if (now - prv_prog_time < 100) return;
this.prv_prog_time = now;
if (pct_idx > -1) ary[pct_idx] = Decimal_adp_.CalcPctStr(cur, end, "00.00");
app.Usr_dlg().Prog_many("", "", fmt, ary);
}
public Xob_bldr Exec_json(String script) {
try {
this.cmd_mgr.Clear();
Json_parser jdoc_parser = new Json_parser();
Json_doc jdoc = jdoc_parser.Parse(script);
Json_ary cmds = jdoc.Root_ary();
int cmds_len = cmds.Len();
for (int i = 0; i < cmds_len; ++i) {
Json_nde cmd = cmds.Get_at_as_nde(i);
byte[] key = cmd.Get_bry_or_null("key");
Xob_cmd prime = cmd_regy.Get_or_null(String_.new_u8(key));
if (prime == null) throw Err_.new_("bldr", "bldr.cmd does not exists: cmd={0}", key);
byte[] wiki_key = cmd.Get_bry_or_null("wiki");
Xowe_wiki wiki = wiki_key == null ? app.Usere().Wiki() : app.Wiki_mgr().Get_by_or_make(wiki_key);
Xob_cmd clone = prime.Cmd_clone(this, wiki);
int atrs_len = cmd.Len();
for (int j = 0; j < atrs_len; ++j) {
Json_kv atr_kv = cmd.Get_at_as_kv(j);
String atr_key = atr_kv.Key_as_str();
if ( String_.Eq(atr_key, "key")
|| String_.Eq(atr_key, "wiki")) continue;
byte[] atr_val = atr_kv.Val_as_bry();
Gfo_invk_.Invk_by_val(clone, atr_key + Gfo_invk_.Mutator_suffix, String_.new_u8(atr_val));
}
cmd_mgr.Add(clone);
}
gplx.core.threads.Thread_adp_.Start_by_key("bldr_by_json", this, Invk_run_by_kit);
} catch (Exception e) {
app.Gui_mgr().Kit().Ask_ok("", "", "error: ~{0}", Err_.Message_gplx_log(e));
}
return this;
}
private void Run_by_kit() { // same as Run, but shows exception; don't want to change backward compatibility on Run
try {this.Run();}
catch (Exception e) {
String log_msg = Err_.Message_gplx_log(e);
Xoa_app_.Usr_dlg().Log_many("", "", log_msg);
app.Gui_mgr().Kit().Ask_ok("", "", "error: ~{0}", Err_.Message_gplx_full(e));
}
}
public void Run() {
try {
app.Bldr__running_(true);
app.Launch(); // HACK: bldr will be called by a gfs file which embeds "bldr.run" inside it; need to call Launch though before Run; DATE:2013-03-23
long time_bgn = System_.Ticks();
int cmd_mgr_len = cmd_mgr.Len();
for (int i = 0; i < cmd_mgr_len; i++) {
Xob_cmd cmd = cmd_mgr.Get_at(i);
cmd.Cmd_init(this);
}
cmd_mgr_len = cmd_mgr.Len(); // NOTE: refresh len b/c other cmds may have added new ones in Cmd_init
for (int i = 0; i < cmd_mgr_len; i++) {
Xob_cmd cmd = cmd_mgr.Get_at(i);
app.Usr_dlg().Note_many("", "", "cmd bgn: ~{0}", cmd.Cmd_key());
long time_cur = System_.Ticks();
try {
cmd.Cmd_bgn(this);
cmd.Cmd_run();
cmd.Cmd_end();
} catch (Exception e) {
throw Err_.new_exc(e, "bldr", "unknown error", "key", cmd.Cmd_key());
}
System_.Garbage_collect();
app.Usr_dlg().Note_many("", "", "cmd end: ~{0} ~{1}", cmd.Cmd_key(), Time_span_.from_(time_cur).XtoStrUiAbbrv());
}
for (int i = 0; i < cmd_mgr_len; i++) {
Xob_cmd cmd = cmd_mgr.Get_at(i);
cmd.Cmd_term();
}
app.Usr_dlg().Note_many("", "", "bldr done: ~{0}", Time_span_.from_(time_bgn).XtoStrUiAbbrv());
cmd_mgr.Clear();
if (pause_at_end && !Env_.Mode_testing()) {Console_adp__sys.Instance.Read_line("press enter to continue");}
}
catch (Exception e) {
app.Bldr__running_(false);
throw Err_.new_exc(e, "bldr", "unknown error");
}
}
private void Cancel() {
int cmd_mgr_len = cmd_mgr.Len();
for (int i = 0; i < cmd_mgr_len; i++) {
Xob_cmd cmd = cmd_mgr.Get_at(i);
cmd.Cmd_end();
}
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_pause_at_end_)) pause_at_end = m.ReadBoolOrTrue("val");
else if (ctx.Match(k, Invk_cmds)) return cmd_mgr;
else if (ctx.Match(k, Invk_wiki_cfg_bldr)) return wiki_cfg_bldr;
else if (ctx.Match(k, Invk_sort_mem_len_)) sort_mem_len = gplx.core.ios.Io_size_.Load_int_(m);
else if (ctx.Match(k, Invk_dump_fil_len_)) dump_fil_len = gplx.core.ios.Io_size_.Load_int_(m);
else if (ctx.Match(k, Invk_make_fil_len_)) make_fil_len = gplx.core.ios.Io_size_.Load_int_(m);
else if (ctx.Match(k, Invk_run)) Run();
else if (ctx.Match(k, Invk_run_by_kit)) Run_by_kit();
else if (ctx.Match(k, Invk_cancel)) Cancel();
else return Gfo_invk_.Rv_unhandled;
return this;
}
private static final String
Invk_cmds = "cmds", Invk_wiki_cfg_bldr = "wiki_cfg_bldr"
, Invk_pause_at_end_ = "pause_at_end_", Invk_sort_mem_len_ = "sort_mem_len_", Invk_dump_fil_len_ = "dump_fil_len_", Invk_make_fil_len_ = "make_fil_len_"
, Invk_cancel = "cancel"
, Invk_run_by_kit = "run_by_kit"
;
public static final String Invk_run = "run";
}
/*
. make_fil_len: max size of made file; EX: /id/..../0000000001.csv will have max len of 64 KB
. dump_fil_len: max size of temp file; EX: /tmp/.../0000000001.csv will have max len of 1 MB
. sort_mem_len: max size of memory for external merge process; note the following
.. a continguous range of memory of that size will be needed: "Bry_bfr_.New(sort_mem_len)" will be called
.. large sort_mem_len will result in smaller number of merge files
... EX: 16 MB will take en.wikipedia.org's 640 MB title files and generate 40 temp files of 8 MB each
.. number of merge files is number of open file channels during merge process
... 40 is a "reasonable" number; the 1st max is 512 (for older windows OS's) and 2048 for Windows XP; Linux seems to be about 7000
.. small sort_mem_len will use smaller buffer; 16 MB / 40 files -> 400 kb buffer for each file
... do not go under max page size for a given row
... for example, a 100 b buffer will fail if a given row is > 100 b (the entire row won't be loaded in memory)
.. smaller buffer will mean more refills which will require more I/O
... EX: 400 kb buffer will require at least 20 refills to read the entire 8 MB file
*/

View File

@@ -13,37 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
public class Xob_cmd_keys {
public static final String
Key_text_init = "text.init" // "import.sql.init"
, Key_text_page = "text.page" // "import.sql.page"
, Key_text_css = "text.css"
, Key_text_search_cmd = "text.search.cmd" // "import.sql.search_title.cmd"
, Key_text_search_wkr = "text.search" // "import.sql.search_title.wkr"
, Key_text_term = "text.term" // "import.sql.term"
, Key_html_redlinks = "html.redlinks"
, Key_util_cleanup = "util.cleanup" // "core.cleanup"
, Key_util_download = "util.download" // "file.download"
, Key_util_xml_dump = "util.xml_dump"
, Key_util_random = "util.random"
, Key_util_delete = "util.delete"
, Key_wbase_qid = "wbase.qid" // "text.wdata.qid"
, Key_wbase_pid = "wbase.pid" // "text.wdata.pid"
, Key_wbase_db = "wbase.db" // "wiki.wdata_db"
, Key_site_meta = "util.site_meta"
, Key_diff_build = "diff.build"
, Key_diff_merge = "diff.merge"
, Key_text_delete_page = "text.delete_page"
, Key_tdb_text_init = "tdb.text.init" // "core.init"
, Key_tdb_make_page = "tdb.text.page" // "core.make_page"
, Key_tdb_make_id = "core.make_id"
, Key_tdb_calc_stats = "core.calc_stats"
, Key_tdb_text_wdata_qid = "tdb.text.wdata.qid"
, Key_tdb_text_wdata_pid = "tdb.text.wdata.pid"
, Key_exec_sql = "import.sql.exec_sql"
, Key_decompress_bz2 = "core.decompress_bz2"
;
}

View File

@@ -13,118 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.primitives.*;
import gplx.xowa.wikis.*; import gplx.xowa.xtns.wbases.imports.*;
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.cmds.texts.*; import gplx.xowa.bldrs.cmds.texts.sqls.*; import gplx.xowa.bldrs.cmds.texts.tdbs.*; import gplx.xowa.addons.bldrs.files.*; import gplx.xowa.addons.wikis.ctgs.bldrs.*; import gplx.xowa.bldrs.cmds.utils.*;
import gplx.xowa.bldrs.cmds.diffs.*;
import gplx.xowa.files.origs.*; import gplx.xowa.htmls.core.bldrs.*;
import gplx.xowa.addons.wikis.searchs.bldrs.*;
import gplx.xowa.addons.bldrs.files.cmds.*; import gplx.xowa.addons.wikis.htmls.css.bldrs.*;
public class Xob_cmd_mgr implements Gfo_invk {
private final Xob_bldr bldr;
public final Xob_cmd_regy cmd_regy;
public Xob_cmd_mgr(Xob_bldr bldr, Xob_cmd_regy cmd_regy) {this.bldr = bldr; this.cmd_regy = cmd_regy;}
public void Clear() {list.Clear(); dump_rdrs.Clear();}
public int Len() {return list.Count();} private final List_adp list = List_adp_.New();
public Xob_cmd Get_at(int i) {return (Xob_cmd)list.Get_at(i);}
public Xob_cmd Add(Xob_cmd cmd) {list.Add(cmd); return cmd;}
public Gfo_invk Add_cmd(Xowe_wiki wiki, String cmd_key) {
Xob_cmd prime = cmd_regy.Get_or_null(cmd_key);
if (prime != null) {
Xob_cmd clone = prime.Cmd_clone(bldr, wiki);
Add(clone);
return clone;
}
if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_init)) return Add(new Xob_init_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_page)) return Xml_rdr_direct_add(wiki, new Xob_page_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_css)) return Add(new Xob_css_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_search_wkr)) return Xml_rdr_direct_add(wiki, new gplx.xowa.addons.wikis.searchs.bldrs.Srch_bldr_wkr(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_search_cmd)) return Add(new Srch_bldr_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_term)) return Add(new Xob_term_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_delete_page)) return Add(new Xob_page_delete_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_html_redlinks)) return Add(new Xob_redlink_mkr_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_cleanup)) return Add(new Xob_cleanup_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_delete)) return Add(new Xob_delete_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_download)) return Add(new Xob_download_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_xml_dump)) return Add(new Xob_xml_dumper_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_qid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_qid_sql().Ctor(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_pid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_pid_sql().Ctor(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_db)) return Add(new Xob_wdata_db_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_site_meta)) return Add(new Xob_site_meta_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_text_init)) return Add(new Xob_init_tdb(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_make_id)) return Xml_rdr_direct_add(wiki, new Xob_make_id_wkr(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_calc_stats)) return Add(new Xob_calc_stats_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_text_wdata_qid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_qid_txt().Ctor(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_text_wdata_pid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_pid_txt().Ctor(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_diff_build)) return Add(new Xob_diff_build_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_exec_sql)) return Add(new Xob_exec_sql_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_decompress_bz2)) return Add(new Xob_decompress_bz2_cmd(bldr, wiki));
else throw Err_.new_unimplemented_w_msg("builder command is not supported: " + cmd_key);
}
private Xob_page_wkr Xml_rdr_direct_add(Xowe_wiki wiki, Xob_page_wkr wkr) {
Xob_page_wkr_cmd dump_rdr = Xml_rdr_get(wiki);
dump_rdr.Wkr_add(wkr);
return wkr;
}
private Xob_page_wkr_cmd Xml_rdr_get(Xowe_wiki wiki) {
byte[] wiki_key = wiki.Domain_bry();
Xob_page_wkr_cmd rv = (Xob_page_wkr_cmd)dump_rdrs.Get_by(dump_rdrs_ref.Val_(wiki_key));
if (rv == null) {
rv = new Xob_page_wkr_cmd(bldr, wiki);
dump_rdrs.Add(Bry_obj_ref.New(wiki_key), rv);
this.Add(rv);
}
return rv;
}
private Hash_adp dump_rdrs = Hash_adp_.New(); private Bry_obj_ref dump_rdrs_ref = Bry_obj_ref.New_empty();
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_add)) return Add_cmd(Wiki_get_or_make(m), m.ReadStr("v"));
else if (ctx.Match(k, Invk_add_many)) return Add_many(m);
else if (ctx.Match(k, Invk_get_first)) return Get_first(m);
else if (ctx.Match(k, Invk_new_batch)) return new Xob_core_batch_utl(bldr, m.ReadBry("v"));
else return Gfo_invk_.Rv_unhandled;
}
private static final String Invk_add = "add", Invk_add_many = "add_many", Invk_new_batch = "new_batch", Invk_get_first = "get_first";
private Object Get_first(GfoMsg m) {
String cmd_key = m.ReadStr("v");
int cmds_len = list.Count();
for (int i = 0;i < cmds_len; i++) {
Xob_cmd cmd = (Xob_cmd)list.Get_at(i);
if (String_.Eq(cmd.Cmd_key(), cmd_key)) return cmd;
}
throw Err_.new_wo_type("cmd not found", "key", cmd_key);
}
private Object Add_many(GfoMsg m) {
Xowe_wiki wiki = Wiki_get_or_make(m);
wiki.Lang().Init_by_load_assert(); // NOTE: must check that lang is loaded; else case_mgr will not initialize; DATE:2013-05-11
int args_len = m.Args_count();
String[] cmds = new String[args_len - 1]; // -1 b/c 1st arg is wiki
for (int i = 1; i < args_len; i++) {
Keyval kv = m.Args_getAt(i);
cmds[i - 1] = kv.Val_to_str_or_empty();
}
return Add_many(wiki, cmds);
}
public Object Add_many(Xowe_wiki wiki, String... cmds) {
int len = cmds.length; if (len == 0) throw Err_.new_wo_type("add_many cannot have 0 cmds");
Object rv = null;
for (int i = 0; i < len; i++)
rv = Add_cmd(wiki, cmds[i]);
return rv;
}
public void Add_cmd_ary(Xob_cmd... cmds_ary) {
int cmds_len = cmds_ary.length;
for (int i = 0; i < cmds_len; ++i)
this.Add(cmds_ary[i]);
}
private Xowe_wiki Wiki_get_or_make(GfoMsg m) {
byte[] wiki_key = m.ReadBry("v");
Xoae_wiki_mgr wiki_mgr = bldr.App().Wiki_mgr();
Xowe_wiki rv = wiki_mgr.Get_by_or_make(wiki_key);
rv.Lang().Init_by_load();
return rv;
}
public static final String GRP_KEY = "xowa.bldr.cmds";
}

View File

@@ -13,16 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.xowa.bldrs.wkrs.*;
public class Xob_cmd_regy {
private final Ordered_hash regy = Ordered_hash_.New();
public Xob_cmd Get_or_null(String key) {return (Xob_cmd)regy.Get_by(key);}
public void Add_many(Xob_cmd... ary) {
int len = ary.length;
for (int i = 0; i < len; ++i) {
Xob_cmd cmd = ary[i];
regy.Add(cmd.Cmd_key(), cmd);
}
}
}

View File

@@ -13,42 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.dbs.*; import gplx.dbs.cfgs.*;
public class Xob_db_file {
Xob_db_file(Io_url url, Db_conn conn) {
this.url = url; this.conn = conn;
this.tbl__cfg = gplx.xowa.wikis.data.Xowd_cfg_tbl_.New(conn);
}
public Io_url Url() {return url;} private final Io_url url;
public Db_conn Conn() {return conn;} private final Db_conn conn;
public Db_cfg_tbl Tbl__cfg() {return tbl__cfg;} private final Db_cfg_tbl tbl__cfg;
public static Xob_db_file New__file_make(Io_url dir) {return New(dir, Name__file_make);}
public static Xob_db_file New__page_regy(Io_url dir) {return New(dir, Name__page_regy);}
public static Xob_db_file New__wiki_image(Io_url dir) {return New(dir, Name__wiki_image);}
public static Xob_db_file New__wiki_redirect(Io_url dir) {return New(dir, Name__wiki_redirect);}
public static Xob_db_file New__temp_log(Io_url dir) {return New(dir, Name__temp_log);}
public static Xob_db_file New__redlink(Io_url dir) {return New(dir, Name__redlink);}
public static Xob_db_file New__page_link(Xow_wiki wiki) {return New(wiki.Fsys_mgr().Root_dir(), Name__page_link);}
public static Xob_db_file New__page_file_map(Xow_wiki wiki) {return New(wiki.Fsys_mgr().Root_dir(), wiki.Domain_str() + "-file-page_map.xowa");}
public static Xob_db_file New__img_link(Xow_wiki wiki) {return New(wiki.Fsys_mgr().Root_dir(), "xowa.wiki.imglinks.sqlite3");}
public static Xob_db_file New__deletion_db(Xow_wiki wiki) {
String name = String_.Format("{0}-file-deletion-{1}.xowa", wiki.Domain_str(), Datetime_now.Get().XtoStr_fmt("yyyy.MM"));
return New(wiki.Fsys_mgr().Root_dir(), name);
}
public static Xob_db_file New(Io_url dir, String name) {
Io_url url = dir.GenSubFil(name);
Db_conn_bldr_data conn_data = Db_conn_bldr.Instance.Get_or_new(url);
Db_conn conn = conn_data.Conn();
Xob_db_file rv = new Xob_db_file(url, conn);
if (conn_data.Created())
rv.Tbl__cfg().Create_tbl();
return rv;
}
public static final String
Name__wiki_image = "xowa.wiki.image.sqlite3", Name__wiki_redirect = "xowa.wiki.redirect.sqlite3"
, Name__file_make = "xowa.file.make.sqlite3", Name__temp_log = "xowa.temp.log.sqlite3"
, Name__page_regy = "xowa.file.page_regy.sqlite3", Name__redlink = "xowa.temp.redlink.sqlite3"
, Name__page_link = "xowa.wiki.pagelinks.sqlite3"
;
}

View File

@@ -13,140 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.tests.*; import gplx.core.ios.*; import gplx.core.times.*;
import gplx.dbs.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.bldrs.cmds.texts.tdbs.*;
import gplx.xowa.bldrs.wkrs.*;
public class Xob_fxt {
public Xob_fxt Ctor_mem() {
Io_mgr.Instance.InitEngine_mem();
return Ctor(Io_url_.mem_dir_("mem/xowa/"));
}
public Xob_fxt Ctor(Io_url root_dir) {
Db_conn_bldr.Instance.Reg_default_sqlite();
app = Xoa_app_fxt.Make__app__edit("linux", root_dir);
wiki = Xoa_app_fxt.Make__wiki__edit(app);
bldr = Xoa_app_fxt.bldr_(app);
return this;
}
public Xoae_app App() {return app;} private Xoae_app app;
public Xob_bldr Bldr() {return bldr;} private Xob_bldr bldr;
public Xowe_wiki Wiki() {return wiki;} private Xowe_wiki wiki;
public Io_url fil_ns_title(int ns_id, int idx) {return wiki.Tdb_fsys_mgr().Url_ns_fil(Xotdb_dir_info_.Tid_ttl, ns_id, idx);}
public Io_url fil_ns_page(int ns_id, int idx) {return wiki.Tdb_fsys_mgr().Url_ns_fil(Xotdb_dir_info_.Tid_page, ns_id, idx);}
public Io_url fil_ns_sttl(int ns_id, int idx) {return wiki.Tdb_fsys_mgr().Url_ns_fil(Xotdb_dir_info_.Tid_search_ttl, ns_id, idx);}
public Io_url fil_site(byte tid, int idx) {return wiki.Tdb_fsys_mgr().Url_site_fil(tid, idx);}
public Io_url fil_site_ctg(int idx) {return wiki.Tdb_fsys_mgr().Url_site_fil(Xotdb_dir_info_.Tid_category, idx);}
public Io_url fil_site_id(int idx) {return wiki.Tdb_fsys_mgr().Url_site_fil(Xotdb_dir_info_.Tid_id, idx);}
public Io_url fil_reg(byte tid) {return wiki.Tdb_fsys_mgr().Url_site_reg(tid);}
public Io_url fil_reg(int ns_id, byte tid) {return wiki.Tdb_fsys_mgr().Url_ns_reg(Int_.To_str_pad_bgn_zero(ns_id, 3), tid);}
public Xob_fxt Fil_expd(Io_url url, String... expd) {
String text = String_.Concat_lines_nl_skip_last(expd); // skipLast b/c if trailing line wanted, easier to pass in extra argument for ""
expd_list.Add(new Io_fil_chkr(url, text));
return this;
} List_adp expd_list = List_adp_.New();
public Xob_fxt Fil_skip(Io_url... urls) {
for (int i = 0; i < urls.length; i++)
skip_list.Add(urls[i]);
return this;
} List_adp skip_list = List_adp_.New();
public Xob_fxt doc_ary_(Xowd_page_itm... v) {doc_ary = v; return this;} private Xowd_page_itm[] doc_ary;
public Xowd_page_itm doc_wo_date_(int id, String title, String text) {return doc_(id, "2012-01-02 13:14", title, text);}
public Xowd_page_itm doc_(int id, String date, String title, String text) {
Xowd_page_itm rv = new Xowd_page_itm().Id_(id).Ttl_(Bry_.new_u8(title), wiki.Ns_mgr()).Text_(Bry_.new_u8(text));
int[] modified_on = new int[7];
dateParser.Parse_iso8651_like(modified_on, date);
rv.Modified_on_(DateAdp_.seg_(modified_on));
return rv;
}
public Xob_fxt Run_id() {
Xob_make_id_wkr wkr = new Xob_make_id_wkr(bldr, wiki);
Run(wkr);
return this;
}
private void Run_wkr(Xob_page_wkr wkr) {
wkr.Page_wkr__bgn();
for (int i = 0; i < doc_ary.length; i++) {
Xowd_page_itm page = doc_ary[i];
wkr.Page_wkr__run(page);
}
wkr.Page_wkr__end();
}
private void tst_fils(Io_url[] ary) {
Io_fil[] actls = Get_actl(ary);
Io_fil_chkr[] expds = (Io_fil_chkr[])expd_list.To_ary(Io_fil_chkr.class);
tst_mgr.Tst_ary("all", expds, actls);
}
Io_fil[] Get_actl(Io_url[] ary) {
int len = ary.length;
Io_fil[] rv = new Io_fil[len];
for (int i = 0; i < len; i++) {
Io_url url = ary[i];
String data = Io_mgr.Instance.LoadFilStr(url);
rv[i] = new Io_fil(url, data);
}
return rv;
}
public Xob_fxt Run_tmpl_dump() {
Xob_parse_dump_templates_cmd wkr = new Xob_parse_dump_templates_cmd(bldr, wiki);
Run_wkr(wkr);
tst_fils(wkr.Dump_url_gen().Prv_urls());
return this;
}
public Xob_fxt Run(Xobd_parser_wkr... wkrs) {
Xobd_parser parser_wkr = new Xobd_parser(bldr);
int len = wkrs.length;
for (int i = 0; i < len; i++)
parser_wkr.Wkr_add(wkrs[i]);
Run(parser_wkr);
return this;
}
public Xob_fxt Run(Xob_page_wkr... wkrs) {
int doc_ary_len = doc_ary.length;
for (int j = 0; j < wkrs.length; j++) {
Xob_page_wkr wkr = wkrs[j];
wkr.Page_wkr__bgn();
for (int i = 0; i < doc_ary_len; i++) {
Xowd_page_itm page = doc_ary[i];
wkr.Page_wkr__run(page);
}
wkr.Page_wkr__end();
}
Test_expd_files();
return this;
}
public Xob_fxt Run_cmds(Xob_cmd... cmds) {
for (int j = 0; j < cmds.length; j++) {
Xob_cmd cmd = cmds[j];
cmd.Cmd_bgn(bldr);
cmd.Cmd_run();
cmd.Cmd_end();
}
Test_expd_files();
return this;
}
private void Test_expd_files() {
if (expd_list.Count() > 0) {
Io_fil_chkr[] expd = (Io_fil_chkr[])expd_list.To_ary(Io_fil_chkr.class);
Io_fil[] actl = wiki_();
tst_mgr.Tst_ary("all", expd, actl);
}
}
Io_fil[] wiki_() {
List_adp rv = List_adp_.New();
wiki_fil_add(rv, wiki.Tdb_fsys_mgr().Ns_dir());
wiki_fil_add(rv, wiki.Tdb_fsys_mgr().Site_dir());
rv.Sort();
return (Io_fil[])rv.To_ary(Io_fil.class);
}
private void wiki_fil_add(List_adp list, Io_url root_dir) {
Io_url[] ary = Io_mgr.Instance.QueryDir_args(root_dir).Recur_().ExecAsUrlAry();
for (int i = 0; i < ary.length; i++) {
Io_url url = ary[i];
Io_fil fil = new Io_fil(url, Io_mgr.Instance.LoadFilStr_args(url).MissingIgnored_().Exec());
list.Add(fil);
}
}
Tst_mgr tst_mgr = new Tst_mgr();
DateAdp_parser dateParser = DateAdp_parser.new_();
}

View File

@@ -13,75 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.bldrs.cmds.*;
public class Xob_ns_to_db_mgr {
private final Xob_ns_to_db_wkr wkr; private final Xow_db_mgr db_mgr; private final long db_max; private boolean one_file_conn_init = true;
private final Ordered_hash db_list = Ordered_hash_.New();
public Xob_ns_to_db_mgr(Xob_ns_to_db_wkr wkr, Xow_db_mgr db_mgr, long db_max) {
this.wkr = wkr; this.db_mgr = db_mgr; this.db_max = db_max;
}
public Xow_db_file Get_by_ns(Xob_ns_file_itm ns_file_itm, int data_len) {
Xow_db_file rv = null;
if (db_mgr.Props().Layout_text().Tid_is_all()) {
rv = db_mgr.Db__core();
if (one_file_conn_init) {
one_file_conn_init = false;
Init_tbl(rv);
}
}
else if (wkr.Db_tid() == Xow_db_file_.Tid__html_data && db_mgr.Props().Layout_html().Tid_is_all_or_few()) {
if (one_file_conn_init) {
one_file_conn_init = false;
rv = db_mgr.Dbs__make_by_tid(wkr.Db_tid());
Init_tbl(rv);
}
else
rv = db_mgr.Db__html();
}
else {
int db_id = ns_file_itm.Nth_db_id();
if (db_id == Xob_ns_file_itm.Nth_db_id_null) // ns not assigned yet to db
rv = Init_db(ns_file_itm);
else
rv = db_mgr.Dbs__get_by_id_or_fail(db_id);
long file_len = rv.File_len();
if (file_len + data_len > db_max) { // file is "full"
Term_tbl(rv);
rv = Init_db(ns_file_itm);
}
}
rv.File_len_add(data_len);
return rv;
}
private Xow_db_file Init_db(Xob_ns_file_itm ns_file_itm) {
Xow_db_file rv = db_mgr.Dbs__make_by_tid(ns_file_itm.Db_file_tid(), Int_ary_.To_str("|", ns_file_itm.Ns_ids()), ns_file_itm.Nth_db_idx(), ns_file_itm.Make_file_name());
ns_file_itm.Nth_db_id_(rv.Id());
Init_tbl(rv);
return rv;
}
private void Init_tbl(Xow_db_file db) {
wkr.Tbl_init(db);
db_list.Add(db.Id(), db);
}
private void Term_tbl(Xow_db_file db) {
wkr.Tbl_term(db);
db_list.Del(db.Id());
}
public void Rls_all() {
Xow_db_file[] ary = (Xow_db_file[])db_list.To_ary(Xow_db_file.class);
int len = ary.length;
for (int i = 0; i < len; ++i) {
Xow_db_file db = (Xow_db_file)ary[i];
Term_tbl(db); // SQLITE:1_TXN; may call close on db where txn is already closed
}
}
public void Commit() {
int len = db_list.Count();
for (int i = 0; i < len; ++i) {
Xow_db_file db = (Xow_db_file)db_list.Get_at(i);
db.Conn().Txn_sav();
}
}
}

View File

@@ -13,10 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.xowa.wikis.data.*;
public interface Xob_ns_to_db_wkr {
byte Db_tid();
void Tbl_init(Xow_db_file db);
void Tbl_term(Xow_db_file db);
}

View File

@@ -13,84 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.consoles.*; import gplx.core.ios.*;
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.parsers.tmpls.*;
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.xmls.*;
public class Xob_page_wkr_cmd implements Xob_cmd {
private final Xob_bldr bldr; private final Xowe_wiki wiki;
public Xob_page_wkr_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.bldr = bldr; this.wiki = wiki;}
public String Cmd_key() {return KEY;} public static final String KEY = "dump_mgr";
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
public void Cmd_run() {
Xob_page_wkr[] wkr_ary = (Xob_page_wkr[])wkrs.To_ary(Xob_page_wkr.class); int wkr_ary_len = wkr_ary.length;
for (int i = 0; i < wkr_ary_len; i++)
wkr_ary[i].Page_wkr__bgn();
Io_buffer_rdr fil = Io_buffer_rdr.Null; Xowd_page_itm page = new Xowd_page_itm(); Xow_ns_mgr ns_mgr = wiki.Ns_mgr();
Xob_xml_parser parser = bldr.Dump_parser().Data_bfr_len_(Io_mgr.Len_mb);
long fil_len = 0;
Gfo_usr_dlg usr_dlg = bldr.App().Usr_dlg();
try {
gplx.core.ios.streams.Io_stream_rdr src_rdr = wiki.Import_cfg().Src_rdr();
fil = Io_buffer_rdr.new_(src_rdr, optRdrBfrSize);
fil_len = fil.Fil_len();
if (src_rdr.Tid() == gplx.core.ios.streams.Io_stream_tid_.Tid__bzip2) fil_len = (fil_len * 100) / 18; // HACK: no way to get actual file progress; assume 18% compression
// fil.Seek(bldr.Opts().ResumeAt());
int prv_pos = 0;
while (true) {
int cur_pos = parser.Parse_page(page, usr_dlg, fil, fil.Bfr(), prv_pos, ns_mgr); if (cur_pos == Bry_find_.Not_found) break;
if (cur_pos < prv_pos)
bldr.Print_prog_msg(fil.Fil_pos(), fil_len, 1, optRdrFillFmt, Int_.To_str_pad_bgn_zero((int)(fil.Fil_pos() / Io_mgr.Len_mb), Int_.DigitCount((int)(fil.Fil_len() / Io_mgr.Len_mb))), "", String_.new_u8(page.Ttl_full_db()));
prv_pos = cur_pos;
try {
for (int i = 0; i < wkr_ary_len; i++)
wkr_ary[i].Page_wkr__run(page);
}
catch (Exception e) {
Err_.Noop(e);
long dividend = fil.Fil_pos();
if (dividend >= fil_len) dividend = fil_len - 1; // prevent % from going over 100
String msg = Decimal_adp_.CalcPctStr(dividend, fil_len, "00.00") + "|" + String_.new_u8(page.Ttl_full_db()) + "|" + Err_.Message_gplx_log(e);
bldr.Usr_dlg().Log_wkr().Log_to_session(msg);
Console_adp__sys.Instance.Write_str_w_nl(msg);
}
}
for (int i = wkr_ary_len - 1; i > -1; --i) // NOTE: release in reverse order; needed to make sure txns are released correctly
wkr_ary[i].Page_wkr__run_cleanup();
}
catch (Exception e) {
String msg = Err_.Message_lang(e);
bldr.Usr_dlg().Log_wkr().Log_to_session(msg);
Console_adp__sys.Instance.Write_str_w_nl(msg);
throw Err_.new_exc(e, "xo", "error while reading dump");
}
finally {fil.Rls();}
bldr.Usr_dlg().Prog_none("", "", "reading completed: performing post-processing clean-up");
for (int i = wkr_ary_len - 1; i > -1; --i) // NOTE: release in reverse order; needed to make sure txns are released correctly
wkr_ary[i].Page_wkr__end();
}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_init(Xob_bldr bldr) {}
public void Cmd_end() {}
public void Cmd_term() {}
public void Wkr_add(Xob_page_wkr wkr) {wkrs.Add(wkr.Page_wkr__key(), wkr);} private Ordered_hash wkrs = Ordered_hash_.New();
public Xob_page_wkr Wkr_get(String key) {return (Xob_page_wkr)wkrs.Get_by(key);}
public Xobd_parser Page_parser_assert() {
if (page_parser == null) {
page_parser = new Xobd_parser(bldr);
this.Wkr_add(page_parser);
}
return page_parser;
} private Xobd_parser page_parser;
public static Io_url Find_fil_by(Io_url dir, String filter) {
Io_url[] fil_ary = Io_mgr.Instance.QueryDir_args(dir).FilPath_(filter).ExecAsUrlAry();
int fil_ary_len = fil_ary.length;
return fil_ary_len == 0 ? null : fil_ary[fil_ary_len - 1]; // return last
}
int optRdrBfrSize = 8 * Io_mgr.Len_mb;
String optRdrFillFmt = "reading ~{0} MB: ~{1} ~{2}";
static final String GRP_KEY = "xowa.bldr.rdr";
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
throw Err_.new_unimplemented();
}
}

View File

@@ -13,52 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.btries.*; import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.wikis.data.tbls.*;
public class Xobd_parser implements Xob_page_wkr {
private final Xob_bldr bldr;
private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci.ascii:MW_const.en; ctg.v1 assumes [[Category:
private final Btrie_rv trv = new Btrie_rv();
private final List_adp wkr_list = List_adp_.New();
public String Page_wkr__key() {return KEY;} static final String KEY = "page_parser";
public Xobd_parser(Xob_bldr bldr) {this.bldr = bldr;}
public void Wkr_add(Xobd_parser_wkr wkr) {wkr_list.Add(wkr);}
public void Page_wkr__bgn() {
int wkr_list_len = wkr_list.Count();
for (int i = 0; i < wkr_list_len; i++) {
Xobd_parser_wkr wkr = (Xobd_parser_wkr)wkr_list.Get_at(i);
wkr.Wkr_bgn(bldr);
int hooks_len = wkr.Wkr_hooks().Count();
for (int j = 0; j < hooks_len; j++) {
byte[] bry = (byte[])wkr.Wkr_hooks().Get_at(j);
trie.Add_obj(bry, wkr);
}
}
}
public void Page_wkr__run(Xowd_page_itm page) {
byte[] src = page.Text(); int src_len = src.length;
int pos = 0;
while (true) {
if (pos == src_len) break;
Object o = trie.Match_at(trv, src, pos, src_len);
if (o == null)
++pos;
else {
Xobd_parser_wkr wkr = (Xobd_parser_wkr)o;
pos = wkr.Wkr_run(page, src, src_len, pos, trv.Pos());
}
}
}
public void Page_wkr__run_cleanup() {}
public void Page_wkr__end() {
int wkr_list_len = wkr_list.Count();
for (int i = 0; i < wkr_list_len; i++) {
Xobd_parser_wkr wkr = (Xobd_parser_wkr)wkr_list.Get_at(i);
wkr.Wkr_end();
}
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
throw Err_.new_unimplemented();
}
}

View File

@@ -13,11 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.xowa.wikis.data.tbls.*;
public interface Xobd_parser_wkr extends Gfo_invk {
Ordered_hash Wkr_hooks();
void Wkr_bgn(Xob_bldr bldr);
int Wkr_run(Xowd_page_itm page, byte[] src, int src_len, int bgn, int end);
void Wkr_end();
}

View File

@@ -13,29 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.ios.*; import gplx.core.lists.*;
import gplx.xowa.wikis.nss.*;
import gplx.xowa.bldrs.wtrs.*;
public class Xobdc_merger {
public static void Basic(Gfo_usr_dlg usr_dlg, Io_url_gen dump_url_gen, Io_url sort_dir, int memory_max, Io_line_rdr_key_gen key_gen, Io_sort_cmd make_cmd) {Basic(usr_dlg, dump_url_gen, sort_dir, memory_max, Io_sort_split_itm_sorter.Instance, key_gen, make_cmd);}
public static void Basic(Gfo_usr_dlg usr_dlg, Io_url_gen dump_url_gen, Io_url sort_dir, int memory_max, ComparerAble row_comparer, Io_line_rdr_key_gen key_gen, Io_sort_cmd make_cmd) {
Io_sort sort = new Io_sort().Memory_max_(memory_max);
Io_url_gen sort_url_gen = Io_url_gen_.dir_(sort_dir);
sort.Split(usr_dlg, dump_url_gen, sort_url_gen, row_comparer, key_gen);
sort.Merge(usr_dlg, sort_url_gen.Prv_urls(), row_comparer, key_gen, make_cmd);
}
public static void Ns(Gfo_usr_dlg usr_dlg, Xob_tmp_wtr[] ttl_wtrs, String type, Io_url tmp_root, Io_url make_root, int memory_max, Io_line_rdr_key_gen key_gen, Io_make_cmd make_cmd) {
int len = ttl_wtrs.length;
for (int i = 0; i < len; i++) {
Xob_tmp_wtr ttl_wtr = ttl_wtrs[i]; if (ttl_wtr == null) continue;
Xow_ns ns = ttl_wtr.Ns_itm();
Io_url make_dir = make_root.GenSubDir_nest(ns.Num_str(), type);
make_cmd.Make_dir_(make_dir);
Basic(usr_dlg
, ttl_wtr.Url_gen()
, tmp_root.GenSubDir_nest(ns.Num_str(), "sort")
, memory_max, key_gen, make_cmd);
}
}
}

View File

@@ -13,39 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.ios.*; import gplx.core.lists.*;
import gplx.xowa.wikis.tdbs.*;
class Io_sort_filCmd_reg implements Io_sort_filCmd { // 123|bgn|end|1
public Io_sort_filCmd_reg() {}
public void Bfr_add(Io_line_rdr stream) {
++itm_count;
int key_bgn = stream.Key_pos_bgn(), key_end = stream.Key_pos_end();
Bry_.Copy_by_pos(stream.Bfr(), key_bgn, key_end, prv_key, 0); prv_key_len = key_end - key_bgn;
} byte[] prv_key = new byte[1024]; int prv_key_len = 0;
public void Fil_bgn(Io_line_rdr stream) {
bfr.Add_int_variable(fil_idx++).Add_byte(Byte_ascii.Pipe);
bfr.Add_mid(stream.Bfr(), stream.Key_pos_bgn(), stream.Key_pos_end()).Add_byte(Byte_ascii.Pipe);
}
public void Fil_end() {
bfr.Add_mid(prv_key, 0, prv_key_len).Add_byte(Byte_ascii.Pipe)
.Add_int_variable(itm_count).Add_byte(Byte_ascii.Nl);
itm_count = 0;
}
public void Flush(Io_url fil) {
Io_mgr.Instance.SaveFilBry(fil, bfr.Bfr(), bfr.Len());
} private Bry_bfr bfr = Bry_bfr_.New(); int fil_idx = 0; int itm_count = 0;
}
class Io_url_gen_nest implements gplx.core.ios.Io_url_gen {
public Io_url Cur_url() {return cur_url;} Io_url cur_url;
public Io_url Nxt_url() {cur_url = Xotdb_fsys_mgr.Url_fil(root_dir, fil_idx++, ext); return cur_url;}
public Io_url[] Prv_urls() {
Io_url[] rv = new Io_url[fil_idx];
for (int i = 0; i < fil_idx; i++) {
rv[i] = Xotdb_fsys_mgr.Url_fil(root_dir, fil_idx++, ext);
}
return rv;
}
public void Del_all() {if (Io_mgr.Instance.ExistsDir(root_dir)) Io_mgr.Instance.DeleteDirDeep(root_dir);}
public Io_url_gen_nest(Io_url root_dir, String ext) {this.root_dir = root_dir; this.ext = Bry_.new_u8(ext);} Io_url root_dir; byte[] ext; int fil_idx;
}

View File

@@ -13,49 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.ios.*;
import gplx.xowa.wikis.data.*;
public class Xobldr_cfg {
private static long layout_all_max = 0; // disable by default; may set to 200 MB in future
private static boolean hzip_enabled = Bool_.Y;
private static boolean hzip_mode_is_b256 = Bool_.Y;
public static byte Zip_mode__text(Xoa_app app) {return Zip_mode(app, "xowa.bldr.db.zip_mode.text");} // CFG: Cfg__
public static byte Zip_mode__html(Xoa_app app) {return Zip_mode(app, "xowa.bldr.db.zip_mode.html");} // CFG: Cfg__
private static byte Zip_mode(Xoa_app app, String key) {
String val = app.Cfg().Get_str_app_or(key, "gzip");
return gplx.core.ios.streams.Io_stream_tid_.To_tid(val);
}
public static long Max_size__text(Xoa_app app) {return Max_size(app, "xowa.bldr.db.max_size.text");} // CFG: Cfg__
public static long Max_size__html(Xoa_app app) {return Max_size(app, "xowa.bldr.db.max_size.html");} // CFG: Cfg__
public static long Max_size__file(Xoa_app app) {return Max_size(app, "xowa.bldr.db.max_size.file");} // CFG: Cfg__
private static long Max_size(Xoa_app app, String key) {
long rv = app.Cfg().Get_long_app_or(key, Io_size_.To_long_by_int_mb(1500));
return rv * Io_mgr.Len_mb;
}
public static long Layout_size__text(Xoa_app app) {return Layout_size(app, "xowa.bldr.db.layout_size.text");} // CFG: Cfg__
public static long Layout_size__html(Xoa_app app) {return Layout_size(app, "xowa.bldr.db.layout_size.html");} // CFG: Cfg__
public static long Layout_size__file(Xoa_app app) {return Layout_size(app, "xowa.bldr.db.layout_size.file");} // CFG: Cfg__
private static long Layout_size(Xoa_app app, String key) {
long rv = app.Cfg().Get_long_app_or(key, Io_size_.To_long_by_int_mb(1500));
return rv * Io_mgr.Len_mb;
}
public static byte[] New_ns_file_map(Xoa_app app, long dump_file_size) {
return dump_file_size < Layout_size__text(app)
? gplx.xowa.bldrs.cmds.Xob_ns_file_itm_parser.Ns_file_map__few
: gplx.xowa.bldrs.cmds.Xob_ns_file_itm_parser.Ns_file_map__each; // DB.FEW: DATE:2016-06-07
}
public static Xowd_core_db_props New_props(Xoa_app app, String domain_str, long dump_file_size) {
Xow_db_layout layout_text, layout_html, layout_file;
if (dump_file_size < layout_all_max)
layout_text = layout_html = layout_file = Xow_db_layout.Itm_all;
else {
layout_text = dump_file_size < Layout_size__text(app) ? Xow_db_layout.Itm_few : Xow_db_layout.Itm_lot;
layout_html = dump_file_size < Layout_size__html(app) ? Xow_db_layout.Itm_few : Xow_db_layout.Itm_lot;
layout_file = dump_file_size < Layout_size__file(app) ? Xow_db_layout.Itm_few : Xow_db_layout.Itm_lot;
}
return new Xowd_core_db_props(2, layout_text, layout_html, layout_file, Zip_mode__text(app), Zip_mode__html(app), hzip_enabled, hzip_mode_is_b256);
}
public static final byte[] Ns_file_map__each = Bry_.new_a7("<each>");
}

View File

@@ -13,37 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.aria2; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.envs.*;
import gplx.xowa.apps.fsys.*; import gplx.xowa.bldrs.wms.dumps.*;
public class Aria2_lib_mgr implements Gfo_invk {
public Process_adp Lib() {return lib;} private Process_adp lib = new Process_adp();
public void Init_by_app(Xoae_app app) {
Xoa_fsys_eval cmd_eval = app.Url_cmd_eval();
Process_adp.ini_(this, app.Usr_dlg(), lib, cmd_eval, Process_adp.Run_mode_sync_block, Int_.Max_value
, "~{<>bin_plat_dir<>}aria2" + Op_sys.Cur().Fsys_dir_spr_str() + "aria2c"
, Lib_args_fmt
, "wiki_abrv", "wiki_date", "wiki_type");
}
// private Bry_bfr tmp_bfr = Bry_bfr_.Reset(255);
public void Exec(Xowm_dump_file dump_file) {
// byte[] args_bry = lib.Args_fmtr().Bld_bry_many(tmp_bfr, dump_file.Wiki_alias(), dump_file.Dump_date(), dump_file.Dump_file_type());
// Process_adp process = new Process_adp().Exe_url_(lib.Exe_url()).Args_str_(String_.new_u8(args_bry));
// process.Run_wait();
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_lib)) return lib;
else return Gfo_invk_.Rv_unhandled;
}
private static final String Invk_lib = "lib";
private static final String Lib_args_fmt = String_.Concat
( "--max-connection-per-server=2"
, " --max-concurrent-downloads=20"
, " --split=4"
, " --file-allocation=prealloc"
, " --remote-time=true"
, " --server-stat-of=serverstats.txt"
, " ftp://ftpmirror.your.org/pub/wikimedia/dumps/~{wiki_abrv}/~{wiki_date}/~{wiki_abrv}-~{wiki_date}-~{wiki_type}.bz2"
, " https://dumps.wikimedia.org/~{wiki_abrv}/~{wiki_date}/~{wiki_abrv}-~{wiki_date}-~{wiki_type}.xml.bz2"
);
}

View File

@@ -13,26 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.aria2; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.gfui.*;
class Gfui_process_win {
public void Exec_async(String process, String args, Gfo_invk_cmd done_cbk) {
// Gfo_process process = new Gfo_process().Init_process_(process, args).Init_async_(done_cbk).Init_strm_out_err_(output_box).Exec();
}
}
class Gfo_process {
// private Gfo_invk_cmd done_cbk;
// private Gfo_process_wtr out_wtr, err_wtr;
public String Cmd_path() {return cmd_path;} private String cmd_path;
public String Cmd_args() {return cmd_args;} private String cmd_args;
public byte Mode() {return mode;} private byte mode;
public Gfo_process Init_cmd_(String cmd_path, String cmd_args) {this.cmd_path = cmd_path; this.cmd_args = cmd_args; return this;}
public Gfo_process Init_mode_async_() {mode = Gfo_process_.Mode_async; return this;}
// public Gfo_process Init_mode_async_(Gfo_invk_cmd done_cbk) {this.done_cbk = done_cbk; return this.Init_mode_async_();}
// public Gfo_process Init_wtr_out_err_(Gfo_process_wtr wtr) {out_wtr = err_wtr = wtr; return this;}
}
class Gfo_process_wtr {}
class Gfo_process_rdr {}
class Gfo_process_ {
public static final byte Mode_async = 0, Mode_sync = 1, Mode_sync_timeout = 2;
}

View File

@@ -13,313 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.envs.*;
import gplx.dbs.*; import gplx.xowa.wikis.caches.*; import gplx.xowa.addons.bldrs.files.*; import gplx.xowa.files.origs.*;
import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.wikis.nss.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.addons.bldrs.files.utls.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.tmpls.*;
public abstract class Xob_dump_mgr_base extends Xob_itm_basic_base implements Xob_cmd, Gfo_invk {
private Xob_dump_src_id page_src;
private Xow_db_mgr db_fsys_mgr; protected Xop_parser parser; protected Xop_ctx ctx; protected Xop_root_tkn root;
private int[] ns_ary; private Xow_db_file[] db_ary;
private int ns_bgn = -1, db_bgn = -1, pg_bgn = -1;
private int ns_end = -1, db_end = -1, pg_end = Int_.Max_value;
private int commit_interval = 1000, progress_interval = 250, cleanup_interval = 2500, select_size = 10 * Io_mgr.Len_mb;
private int exec_count, exec_count_max = Int_.Max_value;
private boolean reset_db = false, exit_after_commit = false, exit_now = false;
private boolean load_tmpls;
private Xob_dump_bmk_mgr bmk_mgr = new Xob_dump_bmk_mgr();
private Xobu_poll_mgr poll_mgr; private int poll_interval = 5000;
private Xob_rate_mgr rate_mgr = new Xob_rate_mgr();
public abstract String Cmd_key();
@Override protected void Cmd_ctor_end(Xob_bldr bldr, Xowe_wiki wiki) {
poll_mgr = new Xobu_poll_mgr(bldr.App()); // init in ctor so gfs can invoke methods
}
public void Cmd_bgn(Xob_bldr bldr) {
parser = wiki.Parser_mgr().Main();
ctx = wiki.Parser_mgr().Ctx();
root = ctx.Tkn_mkr().Root(Bry_.Empty);
wiki.Init_assert(); // NOTE: must init wiki for db_mgr_as_sql
// assert by calling Db_mgr_as_sql
wiki.Db_mgr_as_sql().Core_data_mgr();
// load db_mgr
Xow_db_mgr.Init_by_load(wiki, gplx.xowa.wikis.data.Xow_db_file__core_.Find_core_fil_or_null(wiki)); // NOTE: must reinit providers as previous steps may have rls'd (and left member variable conn which is closed)
wiki.File__orig_mgr().Wkrs__del(Xof_orig_wkr_.Tid_wmf_api);
db_fsys_mgr = wiki.Db_mgr_as_sql().Core_data_mgr();
db_ary = Xob_dump_mgr_base_.Init_text_files_ary(db_fsys_mgr);
poll_interval = poll_mgr.Poll_interval();
page_src = new Xob_dump_src_id().Init(wiki, this.Init_redirect(), select_size);
ns_ary = Init_ns_ary();
Db_conn conn = Init_db_file();
Io_url wiki_dir = wiki.Fsys_mgr().Root_dir();
bmk_mgr.Cfg_url_(wiki_dir.GenSubFil("xowa.file.make.cfg.gfs"));
rate_mgr.Log_file_(wiki_dir.GenSubFil("xowa.file.make.log.csv"));
if (reset_db) {
bmk_mgr.Reset();
Init_reset(conn);
}
bmk_mgr.Load(wiki.Appe(), this);
Cmd_bgn_end();
}
protected abstract void Cmd_bgn_end();
public abstract byte Init_redirect();
public abstract int[] Init_ns_ary();
protected abstract void Init_reset(Db_conn p);
protected abstract Db_conn Init_db_file();
private long time_bgn;
public void Cmd_run() {Exec_ns_ary();}
private void Exec_ns_ary() {
if (pg_bgn == Int_.Max_value) return;
if (load_tmpls) Xob_dump_mgr_base_.Load_all_tmpls(usr_dlg, wiki, page_src);
time_bgn = System_.Ticks();
Xob_dump_bmk dump_bmk = new Xob_dump_bmk();
rate_mgr.Init();
int ns_ary_len = ns_ary.length;
for (int i = 0; i < ns_ary_len; i++) {
int ns_id = ns_ary[i];
if (ns_bgn != -1) { // ns_bgn set
if (ns_id == ns_bgn) // ns_id is ns_bgn; null out ns_bgn and continue
ns_bgn = -1;
else // ns_id is not ns_bgn; keep looking
continue;
}
dump_bmk.Ns_id_(ns_id);
Exec_db_ary(i, dump_bmk, ns_id);
if (ns_id == ns_end) exit_now = true; // ns_end set; exit
if (exit_now) break; // exit_now b/c of pg_bgn, db_bgn or something else
}
Exec_commit(dump_bmk.Ns_id(), dump_bmk.Db_id(), dump_bmk.Pg_id(), Bry_.Empty);
}
private void Exec_db_ary(int ns_ord, Xob_dump_bmk dump_bmk, int ns_id) {
int db_ary_len = db_ary.length;
for (int i = 0; i < db_ary_len; i++) {
int db_id = db_ary[i].Id();
if (db_bgn != -1) { // db_bgn set
if (db_id == db_bgn) // db_id is db_bgn; null out db_bgn and continue
db_bgn = -1;
else // db_id is not db_bgn; keep looking
continue;
}
dump_bmk.Db_id_(db_id);
Exec_db_itm(dump_bmk, ns_ord, ns_id, db_id);
if (db_id == db_end) exit_now = true; // db_end set; exit;
if (exit_now) return; // exit_now b/c of pg_bgn, db_bgn or something else
}
}
private void Exec_db_itm(Xob_dump_bmk dump_bmk, int ns_ord, int ns_id, int db_id) {
List_adp pages = List_adp_.New();
Xow_ns ns = wiki.Ns_mgr().Ids_get_or_null(ns_id);
int pg_id = pg_bgn;
while (true) {
page_src.Get_pages(pages, db_id, ns_id, pg_id);
int pages_len = pages.Count();
if (pages_len == 0) { // no more pages in db;
if (pg_id > pg_bgn) // reset pg_bgn to 0 only if pg_bgn seen;
pg_bgn = 0;
return;
}
usr_dlg.Prog_many("", "", "fetched pages: ~{0}", pages_len);
for (int i = 0; i < pages_len; i++) {
Xowd_page_itm page = (Xowd_page_itm)pages.Get_at(i);
dump_bmk.Pg_id_(pg_id);
Exec_pg_itm(ns_ord, ns, db_id, page);
if ( pg_id >= pg_end
|| exec_count >= exec_count_max) {
exit_now = true;
}
if (exit_now) return;
pg_id = page.Id();
}
}
}
private void Exec_pg_itm(int ns_ord, Xow_ns ns, int db_id, Xowd_page_itm page) {
try {
if ((exec_count % progress_interval) == 0)
usr_dlg.Prog_many("", "", "parsing: ns=~{0} db=~{1} pg=~{2} count=~{3} time=~{4} rate=~{5} ttl=~{6}"
, ns.Id(), db_id, page.Id(), exec_count
, System_.Ticks__elapsed_in_sec(time_bgn), rate_mgr.Rate_as_str(), String_.new_u8(page.Ttl_page_db()));
ctx.Clear_all();
byte[] page_src = page.Text();
if (page_src != null) // some pages have no text; ignore them else null ref; PAGE: it.d:miercuri DATE:2015-12-05
Exec_pg_itm_hook(ns_ord, ns, page, page_src);
ctx.Wiki().Utl__bfr_mkr().Clear_fail_check(); // make sure all bfrs are released
if (ctx.Wiki().Cache_mgr().Tmpl_result_cache().Count() > 50000)
ctx.Wiki().Cache_mgr().Tmpl_result_cache().Clear();
++exec_count;
rate_mgr.Increment();
if ((exec_count % poll_interval) == 0)
poll_mgr.Poll();
if ((exec_count % commit_interval) == 0)
Exec_commit(ns.Id(), db_id, page.Id(), page.Ttl_page_db());
if ((exec_count % cleanup_interval) == 0)
Free();
}
catch (Exception exc) {
bldr.Usr_dlg().Warn_many("", "", "parse failed: wiki=~{0} ttl=~{1} err=~{2}", wiki.Domain_str(), page.Ttl_full_db(), Err_.Message_gplx_log(exc));
ctx.Wiki().Utl__bfr_mkr().Clear();
this.Free();
}
}
public abstract void Exec_pg_itm_hook(int ns_ord, Xow_ns ns, Xowd_page_itm page, byte[] page_text);
private void Exec_commit(int ns_id, int db_id, int pg_id, byte[] ttl) {
usr_dlg.Prog_many("", "", "committing: ns=~{0} db=~{1} pg=~{2} count=~{3} ttl=~{4}", ns_id, db_id, pg_id, exec_count, String_.new_u8(ttl));
Exec_commit_hook();
bmk_mgr.Save(ns_id, db_id, pg_id);
if (exit_after_commit) exit_now = true;
}
public abstract void Exec_commit_hook();
public abstract void Exec_end_hook();
public void Cmd_init(Xob_bldr bldr) {}
public void Cmd_term() {}
public void Cmd_end() {
if (!exit_now)
pg_bgn = Int_.Max_value;
Exec_commit(-1, -1, -1, Bry_.Empty);
Exec_end_hook();
Free();
usr_dlg.Note_many("", "", "done: ~{0} ~{1}", exec_count, Decimal_adp_.divide_safe_(exec_count, System_.Ticks__elapsed_in_sec(time_bgn)).To_str("#,###.000"));
}
private void Free() {
Xowe_wiki_.Rls_mem(wiki, true);
}
protected void Reset_db_y_() {this.reset_db = true;}
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_commit_interval_)) commit_interval = m.ReadInt("v");
else if (ctx.Match(k, Invk_progress_interval_)) progress_interval = m.ReadInt("v");
else if (ctx.Match(k, Invk_rate_interval_)) rate_mgr.Reset_interval_(m.ReadInt("v"));
else if (ctx.Match(k, Invk_cleanup_interval_)) cleanup_interval = m.ReadInt("v");
else if (ctx.Match(k, Invk_select_size_)) select_size = m.ReadInt("v") * Io_mgr.Len_mb;
else if (ctx.Match(k, Invk_ns_bgn_)) {ns_bgn = m.ReadInt("v"); Notify_restoring("ns", ns_bgn);}
else if (ctx.Match(k, Invk_db_bgn_)) {db_bgn = m.ReadInt("v"); Notify_restoring("db", db_bgn);}
else if (ctx.Match(k, Invk_pg_bgn_)) {pg_bgn = m.ReadInt("v"); Notify_restoring("pg", pg_bgn);}
else if (ctx.Match(k, Invk_ns_end_)) ns_end = m.ReadInt("v");
else if (ctx.Match(k, Invk_db_end_)) db_end = m.ReadInt("v");
else if (ctx.Match(k, Invk_pg_end_)) pg_end = m.ReadInt("v");
else if (ctx.Match(k, Invk_load_tmpls_)) load_tmpls = m.ReadYn("v");
else if (ctx.Match(k, Invk_poll_mgr)) return poll_mgr;
else if (ctx.Match(k, Invk_reset_db_)) reset_db = m.ReadYn("v");
else if (ctx.Match(k, Invk_exec_count_max_)) exec_count_max = m.ReadInt("v");
else if (ctx.Match(k, Invk_exit_now_)) exit_now = m.ReadYn("v");
else if (ctx.Match(k, Invk_exit_after_commit_)) exit_after_commit = m.ReadYn("v");
else if (ctx.Match(k, Invk__manual_now_)) Datetime_now.Manual_and_freeze_(m.ReadDate("v"));
else return Gfo_invk_.Rv_unhandled;
return this;
}
private void Notify_restoring(String itm, int val) {
usr_dlg.Note_many("", "", "restoring: itm=~{0} val=~{1}", itm, val);
}
public static final String
Invk_progress_interval_ = "progress_interval_", Invk_commit_interval_ = "commit_interval_", Invk_cleanup_interval_ = "cleanup_interval_", Invk_rate_interval_ = "rate_interval_"
, Invk_select_size_ = "select_size_"
, Invk_ns_bgn_ = "ns_bgn_", Invk_db_bgn_ = "db_bgn_", Invk_pg_bgn_ = "pg_bgn_"
, Invk_ns_end_ = "ns_end_", Invk_db_end_ = "db_end_", Invk_pg_end_ = "pg_end_"
, Invk_load_tmpls_ = "load_tmpls_"
, Invk_poll_mgr = "poll_mgr", Invk_reset_db_ = "reset_db_"
, Invk_exec_count_max_ = "exec_count_max_", Invk_exit_now_ = "exit_now_", Invk_exit_after_commit_ = "exit_after_commit_"
, Invk__manual_now_ = "manual_now_"
;
}
class Xob_dump_mgr_base_ {
public static void Load_all_tmpls(Gfo_usr_dlg usr_dlg, Xowe_wiki wiki, Xob_dump_src_id page_src) {
List_adp pages = List_adp_.New();
Xow_ns ns_tmpl = wiki.Ns_mgr().Ns_template();
Xow_defn_cache defn_cache = wiki.Cache_mgr().Defn_cache();
int cur_page_id = -1;
int load_count = 0;
usr_dlg.Note_many("", "", "tmpl_load init");
while (true) {
page_src.Get_pages(pages, 0, Xow_ns_.Tid__template, cur_page_id); // 0 is always template db
int page_count = pages.Count();
if (page_count == 0) break; // no more pages in db;
Xowd_page_itm page = null;
for (int i = 0; i < page_count; i++) {
page = (Xowd_page_itm)pages.Get_at(i);
Xot_defn_tmpl defn = new Xot_defn_tmpl();
defn.Init_by_new(ns_tmpl, ns_tmpl.Gen_ttl(page.Ttl_page_db()), page.Text(), null, false); // NOTE: passing null, false; will be overriden later when Parse is called
defn_cache.Add(defn, ns_tmpl.Case_match());
++load_count;
if ((load_count % 10000) == 0) usr_dlg.Prog_many("", "", "tmpl_loading: ~{0}", load_count);
}
cur_page_id = page.Id();
}
usr_dlg.Note_many("", "", "tmpl_load done: ~{0}", load_count);
}
public static Xow_db_file[] Init_text_files_ary(Xow_db_mgr core_data_mgr) {
List_adp text_files_list = List_adp_.New();
int len = core_data_mgr.Dbs__len();
if (len == 1) return new Xow_db_file[] {core_data_mgr.Dbs__get_at(0)}; // single file: return core; note that there are no Tid = Text
for (int i = 0; i < len; i++) {
Xow_db_file file = core_data_mgr.Dbs__get_at(i);
switch (file.Tid()) {
case Xow_db_file_.Tid__text:
case Xow_db_file_.Tid__text_solo:
text_files_list.Add(file);
break;
}
}
return (Xow_db_file[])text_files_list.To_ary_and_clear(Xow_db_file.class);
}
}
class Xob_dump_bmk_mgr {
private Bry_bfr save_bfr = Bry_bfr_.Reset(1024);
public Io_url Cfg_url() {return cfg_url;} public Xob_dump_bmk_mgr Cfg_url_(Io_url v) {cfg_url = v; return this;} private Io_url cfg_url;
public void Reset() {Io_mgr.Instance.DeleteFil(cfg_url);}
public void Load(Xoae_app app, Xob_dump_mgr_base dump_mgr) {
app.Gfs_mgr().Run_url_for(dump_mgr, cfg_url);
}
public void Save(int ns_id, int db_id, int pg_id) {
Save_itm(save_bfr, Xob_dump_mgr_base.Invk_ns_bgn_, ns_id);
Save_itm(save_bfr, Xob_dump_mgr_base.Invk_db_bgn_, db_id);
Save_itm(save_bfr, Xob_dump_mgr_base.Invk_pg_bgn_, pg_id);
Io_mgr.Instance.SaveFilBfr(cfg_url, save_bfr);
}
private void Save_itm(Bry_bfr save_bfr, String key, int val) {
String fmt = "{0}('{1}');\n";
String str = String_.Format(fmt, key, val);
save_bfr.Add_str_u8(str);
}
}
class Xob_rate_mgr {
private long time_bgn;
private int item_len;
private Bry_bfr save_bfr = Bry_bfr_.Reset(255);
public int Reset_interval() {return reset_interval;} public Xob_rate_mgr Reset_interval_(int v) {reset_interval = v; return this;} private int reset_interval = 10000;
public Io_url Log_file_url() {return log_file;} public Xob_rate_mgr Log_file_(Io_url v) {log_file = v; return this;} private Io_url log_file;
public void Init() {time_bgn = System_.Ticks();}
public void Increment() {
++item_len;
if (item_len % reset_interval == 0) {
long time_end = System_.Ticks();
Save(item_len, time_bgn, time_end);
time_bgn = time_end;
item_len = 0;
}
}
private void Save(int count, long bgn, long end) {
int dif = (int)(end - bgn) / 1000;
Decimal_adp rate = Decimal_adp_.divide_safe_(count, dif);
save_bfr
.Add_str_a7(rate.To_str("#,##0.000")).Add_byte_pipe()
.Add_int_variable(count).Add_byte_pipe()
.Add_int_variable(dif).Add_byte_nl()
;
Io_mgr.Instance.AppendFilByt(log_file, save_bfr.To_bry_and_clear());
}
public String Rate_as_str() {return Int_.To_str(Rate());}
public int Rate() {
int elapsed = System_.Ticks__elapsed_in_sec(time_bgn);
return Math_.Div_safe_as_int(item_len, elapsed);
}
}
class Xob_dump_bmk {
public int Ns_id() {return ns_id;} public Xob_dump_bmk Ns_id_(int v) {ns_id = v; return this;} private int ns_id;
public int Db_id() {return db_id;} public Xob_dump_bmk Db_id_(int v) {db_id = v; return this;} private int db_id;
public int Pg_id() {return pg_id;} public Xob_dump_bmk Pg_id_(int v) {pg_id = v; return this;} private int pg_id;
}

View File

@@ -13,50 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.xowa.wikis.nss.*;
import gplx.xowa.wikis.data.*;
public class Xob_ns_file_itm {
public Xob_ns_file_itm(byte db_file_tid, String file_name, int[] ns_ids) {
this.db_file_tid = db_file_tid; this.file_name = file_name; this.ns_ids = ns_ids;
this.nth_db_id = Nth_db_id_null; this.nth_db_idx = 1;
}
public byte Db_file_tid() {return db_file_tid;} private final byte db_file_tid;
public String File_name() {return file_name;} private final String file_name;
public int[] Ns_ids() {return ns_ids;} private final int[] ns_ids;
public int Nth_db_id() {return nth_db_id;} public void Nth_db_id_(int v) {nth_db_id = v;} private int nth_db_id;
public int Nth_db_idx() {return nth_db_idx;} private int nth_db_idx;
public String Make_file_name() { // EX: en.wikipedia.org-text-ns.000-001.xowa
String rv = String_.Format("-{0}{1}{2}.xowa" // EX: -text-ns.000-db.001.xowa
, Xow_db_file_.To_key(db_file_tid) // text
, String_.Len_eq_0(file_name) ? "" : "-" + file_name // if empty, don't add "ns.000" segment; produces en.wikipedia.org-text-001.xowa
, nth_db_idx == 1 ? "" : "-db." + Int_.To_str_pad_bgn_zero(nth_db_idx, 3) // "-db.001"
);
++nth_db_idx;
return rv;
}
public static final int Nth_db_id_null = -1;
public static void Init_ns_bldr_data(byte db_file_tid, Xow_ns_mgr ns_mgr, byte[] ns_file_map) {
int ns_len = ns_mgr.Ords_len();
Xob_ns_file_itm ns_file_itm_default = new Xob_ns_file_itm(db_file_tid, "", null);
for (int i = 0; i < ns_len; ++i) {
Xow_ns ns = ns_mgr.Ords_get_at(i);
ns.Bldr_data_(ns_file_itm_default);
}
Xob_ns_file_itm_parser ns_itm_parser = new Xob_ns_file_itm_parser();
ns_itm_parser.Ctor(db_file_tid, ns_mgr);
Xob_ns_file_itm[] ns_itm_ary = ns_itm_parser.To_ary(ns_file_map);
int ns_itm_ary_len = ns_itm_ary.length;
for (int i = 0; i < ns_itm_ary_len; ++i) {
Xob_ns_file_itm itm = ns_itm_ary[i];
int[] ns_ids = itm.Ns_ids();
int ns_ids_len = ns_ids.length;
for (int j = 0; j < ns_ids_len; j++) {
int ns_id = ns_ids[j];
Xow_ns ns = ns_mgr.Ids_get_or_null(ns_id); if (ns == null) continue; // some dumps may not have ns; for example, pre-2013 dumps won't have Module (828)
ns.Bldr_data_(itm);
}
}
}
}

View File

@@ -13,79 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.langs.dsvs.*;
import gplx.xowa.wikis.nss.*;
public class Xob_ns_file_itm_parser extends Dsv_wkr_base {
private byte[] ns_ids_bry; private String name; private final List_adp rslts = List_adp_.New();
private Xow_ns_mgr ns_mgr; private byte db_file_tid; private boolean mode_each = false;
public void Ctor(byte db_file_tid, Xow_ns_mgr ns_mgr) {
this.db_file_tid = db_file_tid; this.ns_mgr = ns_mgr;
this.mode_each = false; rslts.Clear();
}
@Override public Dsv_fld_parser[] Fld_parsers() {return new Dsv_fld_parser[] {Dsv_fld_parser_.Bry_parser, Dsv_fld_parser_.Bry_parser};}
@Override public boolean Write_bry(Dsv_tbl_parser parser, int fld_idx, byte[] src, int bgn, int end) {
switch (fld_idx) {
case 0: ns_ids_bry = Bry_.Mid(src, bgn, end); return true;
case 1: name = String_.new_u8(src, bgn, end); return true;
default: return false;
}
}
@Override public void Commit_itm(Dsv_tbl_parser parser, int pos) {
if (ns_ids_bry == null) throw parser.Err_row_bgn("ns_itm missing ns_ids", pos);
if (mode_each) return;
// mode is <each>; create map with each ns in separate file
if (Bry_.Eq(ns_ids_bry, Ns_file_map__each)) {
mode_each = true;
int len = ns_mgr.Ords_len();
for (int i = 0; i < len; ++i) {
Xow_ns ns = ns_mgr.Ords_get_at(i);
int ns_id = ns.Id();
rslts.Add(new Xob_ns_file_itm(db_file_tid, "ns." + Int_.To_str_pad_bgn_zero(ns_id, 3), Int_ary_.New(ns_id)));
}
return;
}
// mode is <few>; create map with each ns in one file; // DB.FEW: DATE:2016-06-07
else if (Bry_.Eq(ns_ids_bry, Ns_file_map__few)) {
int len = ns_mgr.Ords_len();
int[] ns_ary_for_few = new int[len];
for (int i = 0; i < len; ++i) {
ns_ary_for_few[i] = ns_mgr.Ords_get_at(i).Id();
}
rslts.Add(new Xob_ns_file_itm(db_file_tid, String_.Empty, ns_ary_for_few));
return;
}
int[] ns_ids = null;
if (ns_ids_bry.length == 1 && ns_ids_bry[0] == Byte_ascii.Star) { // "*"
int len = ns_mgr.Ords_len();
ns_ids = new int[len];
for (int i = 0; i < len; ++i)
ns_ids[i] = ns_mgr.Ords_get_at(i).Id();
}
else
ns_ids = Int_ary_.Parse(String_.new_u8(ns_ids_bry), ",");
if (ns_ids.length == 0) throw Err_.new_wo_type("map.invalid.ns_missing", "src", this.Src());
if (String_.Len_eq_0(name)) { // no name; auto-generate
int ns_id_1st = ns_ids[0]; // take 1st ns_id
name = "ns." + Int_.To_str_pad_bgn_zero(ns_id_1st, 3); // EX: ns.000
}
Xob_ns_file_itm ns_itm = new Xob_ns_file_itm(db_file_tid, name, ns_ids);
rslts.Add(ns_itm);
ns_itm.toString();
ns_ids = null; name = null;
}
public Xob_ns_file_itm[] To_ary(byte[] bry) {
this.Load_by_bry(bry);
return (Xob_ns_file_itm[])rslts.To_ary(Xob_ns_file_itm.class);
}
public static final byte[] Ns_file_map__few = Bry_.new_a7("few"), Ns_file_map__each = Bry_.new_a7("<each>");
/*
"" -> no rules; return "default"; generates "text-001" and lumps all ns into it
"*|<id>|3700|2" -> auto-generate per ns
<single-file>
<all>||gzip
<each>||gzip
*/
}

View File

@@ -13,79 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.stores.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.dbs.*; import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.xowa.wikis.data.tbls.*;
class Xob_dump_src_id {
private Xodb_mgr_sql db_mgr; private byte redirect;
private String page_db_url; private int size_max;
private Db_stmt text_stmt; int cur_text_db_idx = -1;
public Xob_dump_src_id Init(Xowe_wiki wiki, byte redirect, int size_max) {
this.db_mgr = wiki.Db_mgr_as_sql(); this.redirect = redirect;
this.size_max = size_max;
this.page_db_url = db_mgr.Core_data_mgr().Db__core().Url().Raw();
return this;
}
public void Get_pages(List_adp list, int text_db_idx, int cur_ns, int prv_id) {
DataRdr rdr = DataRdr_.Null;
int size_len = 0;
list.Clear();
try {
rdr = New_rdr(db_mgr, page_db_url, text_db_idx, cur_ns, prv_id, redirect);
while (rdr.MoveNextPeer()) {
Xowd_page_itm page = New_page(db_mgr, cur_ns, rdr);
list.Add(page);
size_len += page.Text_len();
if (size_len > size_max)
break;
}
}
finally {rdr.Rls();}
}
private DataRdr New_rdr(Xodb_mgr_sql db_mgr, String page_db_url, int text_db_idx, int cur_ns, int prv_id, byte redirect) {
if (cur_text_db_idx != text_db_idx) {
cur_text_db_idx = text_db_idx;
Xow_db_file text_db = db_mgr.Core_data_mgr().Dbs__get_by_id_or_fail(text_db_idx);
Db_conn conn = text_db.Conn();
String sql = String_.Format(Sql_select_clause, New_rdr__redirect_clause(redirect));
text_stmt = conn.Stmt_sql(sql);
}
return text_stmt.Clear().Val_int(prv_id).Val_int(cur_ns).Exec_select();
}
private static Xowd_page_itm New_page(Xodb_mgr_sql db_mgr, int ns_id, DataRdr rdr) {
Xowd_page_tbl page_core_tbl = db_mgr.Core_data_mgr().Tbl__page();
Xowd_page_itm rv = new Xowd_page_itm();
rv.Id_(rdr.ReadInt(page_core_tbl.Fld_page_id()));
rv.Ns_id_(ns_id);
rv.Ttl_page_db_(rdr.ReadBryByStr(page_core_tbl.Fld_page_title()));
String text_data_name = db_mgr.Core_data_mgr().Db__core().Tbl__text().Fld_text_data();
byte[] text_data = rdr.ReadBry(text_data_name);
text_data = db_mgr.Wiki().Appe().Zip_mgr().Unzip(db_mgr.Core_data_mgr().Props().Zip_tid_text(), text_data);
rv.Text_(text_data);
return rv;
}
private static String New_rdr__redirect_clause(byte redirect) {
switch (redirect) {
case Bool_.Y_byte: return Sql_select__redirect_y;
case Bool_.N_byte: return Sql_select__redirect_n;
case Bool_.__byte: return Sql_select__redirect__;
default: throw Err_.new_unhandled(redirect);
}
}
private static final String Sql_select_clause = String_.Concat_lines_nl
( "SELECT p.page_id"
, ", p.page_title"
, ", t.text_data"
, "FROM page_dump p"
, " JOIN text t ON t.page_id = p.page_id"
, "WHERE p.page_id > ?"
, "AND p.page_namespace = ?{0}"
, "ORDER BY p.page_id"
);
private static final String
Sql_select__redirect_y = "\nAND p.page_is_redirect = 1"
, Sql_select__redirect_n = "\nAND p.page_is_redirect = 0"
, Sql_select__redirect__ = ""
;
}

View File

@@ -13,31 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.diffs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.core.brys.*; import gplx.xowa.wikis.*;
class Bfr_arg__dump_dir implements Bfr_arg { // .dump_dir = "/xowa/wiki/en.wikipedia.org/"
private final Xow_wiki wiki;
public Bfr_arg__dump_dir(Xow_wiki wiki) {this.wiki = wiki;}
public void Bfr_arg__add(Bry_bfr bfr) {
bfr.Add(wiki.Fsys_mgr().Root_dir().RawBry());
}
}
class Bfr_arg__dump_core implements Bfr_arg {// .dump_core = "en.wikipedia.org-core.xowa"
private final Xow_wiki wiki;
public Bfr_arg__dump_core(Xow_wiki wiki) {this.wiki = wiki;}
public void Bfr_arg__add(Bry_bfr bfr) {
bfr.Add_str_u8(wiki.Data__core_mgr().Db__core().Url().NameAndExt());
}
}
class Bfr_arg__dump_domain implements Bfr_arg {// .dump_domain = en.wikipedia.org
private final Xow_wiki wiki;
public Bfr_arg__dump_domain(Xow_wiki wiki) {this.wiki = wiki;}
public void Bfr_arg__add(Bry_bfr bfr) {
bfr.Add(wiki.Domain_bry());
}
}
class Bfr_arg__dir_spr implements Bfr_arg {// .dir_spr = "/"
public void Bfr_arg__add(Bry_bfr bfr) {
bfr.Add_byte(gplx.core.envs.Op_sys.Cur().Fsys_dir_spr_byte());
}
}

View File

@@ -13,32 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.diffs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.xowa.bldrs.wkrs.*;
public class Xob_diff_build_cmd implements Xob_cmd {
private final Xob_bldr bldr; private final Xowe_wiki wiki;
private String prev_url, curr_url, diff_url; private int commit_interval;
private int[] db_ids = Int_ary_.Empty; private String bld_name = "all";
public Xob_diff_build_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.bldr = bldr; this.wiki = wiki;}
public String Cmd_key() {return Xob_cmd_keys.Key_diff_build;}
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
public void Cmd_run() {
new Xob_diff_build_wkr(bldr, wiki, prev_url, curr_url, diff_url, commit_interval, new Xowd_tbl_mapr(bld_name, db_ids)).Exec();
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk__prev_url_)) prev_url = m.ReadStr("v");
else if (ctx.Match(k, Invk__curr_url_)) curr_url = m.ReadStr("v");
else if (ctx.Match(k, Invk__diff_url_)) diff_url = m.ReadStr("v");
else if (ctx.Match(k, Invk__commit_interval_)) commit_interval = m.ReadInt("v");
else if (ctx.Match(k, Invk__db_ids_)) db_ids = Int_ary_.Parse(m.ReadStr("v"), "|");
else if (ctx.Match(k, Invk__bld_name_)) bld_name = m.ReadStr("v");
else return Gfo_invk_.Rv_unhandled;
return this;
}
public void Cmd_init(Xob_bldr bldr) {}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_end() {}
public void Cmd_term() {}
private static final String Invk__prev_url_ = "prev_url_", Invk__curr_url_ = "curr_url_", Invk__diff_url_ = "diff_url_"
, Invk__commit_interval_ = "commit_interval_", Invk__db_ids_ = "db_ids_", Invk__bld_name_ = "bld_name_";
}

View File

@@ -13,81 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.diffs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.core.brys.*; import gplx.core.brys.fmts.*;
import gplx.dbs.*; import gplx.dbs.metas.*; import gplx.dbs.diffs.*; import gplx.dbs.diffs.builds.*; import gplx.dbs.diffs.itms.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
class Xob_diff_build_wkr {
private final Gfdb_diff_bldr dif_bldr = new Gfdb_diff_bldr();
private final Xowe_wiki wiki;
private Db_conn old_conn, new_conn, dif_conn;
private final Xowd_tbl_mapr tbl_mapr;
public Xob_diff_build_wkr(Xob_bldr bldr, Xowe_wiki wiki, String old_url, String new_url, String dif_url, int commit_interval, Xowd_tbl_mapr tbl_mapr) {
this.wiki = wiki;
wiki.Init_by_wiki();
Bry_fmt url_fmt = Bry_fmt.New("").Args_(New_url_args(wiki, tbl_mapr.Name));
Bry_bfr tmp_bfr = Bry_bfr_.New();
old_conn = New_conn(tmp_bfr, wiki, url_fmt, Bool_.N, old_url);
new_conn = New_conn(tmp_bfr, wiki, url_fmt, Bool_.N, new_url);
dif_conn = New_conn(tmp_bfr, wiki, url_fmt, Bool_.Y, dif_url);
this.tbl_mapr = tbl_mapr;
}
public void Exec() {
Gdif_core dif_core = new Gdif_core(dif_conn);
String name = String_.Format("{0}|{1}|diffs|{2}", wiki.Domain_str(), tbl_mapr.Name, wiki.Props().Modified_latest().XtoStr_fmt(DateAdp_.Fmt__yyyyMMdd)); // EX: "simple.wikipedia.org|text|diffs|20160112"
String made_by = wiki.App().User().Key();
Gdif_job_itm job_itm = dif_core.New_job(name, made_by);
Gdif_bldr_ctx ctx = new Gdif_bldr_ctx().Init(dif_core, job_itm);
Gfdb_diff_wkr__db dif_wkr = new Gfdb_diff_wkr__db();
Gdif_db dif_db = dif_core.Db();
dif_wkr.Init_conn(dif_db, 1000);
dif_bldr.Init(dif_wkr);
// wiki.Data__core_mgr().Db__core().Conn().Conn_info();
Xow_db_file[] db_file_ary = wiki.Data__core_mgr().Db__core().Tbl__db().Select_all(wiki.Data__core_mgr().Props(), Io_url_.Empty);
int db_files_len = db_file_ary.length;
for (int i = 0; i < db_files_len; ++i) {
Xow_db_file db_file = db_file_ary[i];
if (tbl_mapr.Db_ids__has(db_file.Tid()))
Compare(ctx);
}
// int old_tbl_len = old_tbl_mgr.Len();
// for (int i = 0; i < old_tbl_len; ++i) {
// Dbmeta_tbl_itm old_tbl = old_tbl_mgr.Get_at(i);
// Dbmeta_tbl_itm new_tbl = new_tbl_mgr.Get_by(old_tbl.Name());
// if (new_tbl == null) {
// // delete all
// }
// }
}
private void Compare(Gdif_bldr_ctx ctx) {
Dbmeta_tbl_mgr old_tbl_mgr = old_conn.Meta_mgr();
Dbmeta_tbl_mgr new_tbl_mgr = old_conn.Meta_mgr();
int new_tbl_len = new_tbl_mgr.Len();
for (int i = 0; i < new_tbl_len; ++i) {
Dbmeta_tbl_itm new_tbl = new_tbl_mgr.Get_at(i);
Dbmeta_tbl_itm old_tbl = old_tbl_mgr.Get_by(new_tbl.Name()); if (old_tbl == null) continue;
Gfdb_diff_tbl dif_tbl = Gfdb_diff_tbl.New(new_tbl);
dif_bldr.Compare(ctx, dif_tbl, old_conn, new_conn);
// save txn
}
}
public static Db_conn New_conn(Bry_bfr tmp_bfr, Xow_wiki wiki, Bry_fmt fmtr, boolean autocreate, String url_fmt) {
fmtr.Fmt_(url_fmt).Bld_many(tmp_bfr);
return Db_conn_bldr.Instance.Get_or_autocreate(autocreate, Io_url_.new_any_(tmp_bfr.To_str_and_clear()));
}
private static Bfr_fmt_arg[] New_url_args(Xow_wiki wiki, String db_mapr_name) {
Bfr_fmt_arg[] rv = new Bfr_fmt_arg[]
{ new Bfr_fmt_arg(Bry_.new_a7(".dump_dir"), new Bfr_arg__dump_dir(wiki))
, new Bfr_fmt_arg(Bry_.new_a7(".dump_core"), new Bfr_arg__dump_core(wiki))
, new Bfr_fmt_arg(Bry_.new_a7(".dump_domain"), new Bfr_arg__dump_domain(wiki))
, new Bfr_fmt_arg(Bry_.new_a7(".dir_spr"), new Bfr_arg__dir_spr())
, new Bfr_fmt_arg(Bry_.new_a7(".dif_name"), Bfr_arg_.New_bry(db_mapr_name))
};
return rv;
}
//old_url='~{.dump_dir}-prev/~{.dump_core}';
//new_url='~{.dump_dir}/~{.dump_core}';
//dif_url='~{.dump_dir}/~{.dump_domain}-{.dif_name}-diff.xowa';
// old_conn='data source="~{.dump_dir}/~{.dump_core}";url='
// dif_conn='gplx_key=sqlite;url='
}

View File

@@ -13,35 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.diffs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.dbs.*; import gplx.dbs.metas.*; import gplx.dbs.diffs.*;
class Xob_diff_manifest {
// page|page_id|*
public static Gfdb_diff_tbl[] Parse(Db_conn conn, String src_str) {
// byte[][] rows_ary = Bry_split_.Split_lines(Bry_.new_u8(src_str));
// int rows_len = rows_ary.length;
// for (int i = 0; i < rows_len; ++i) {
// byte[] row = rows_ary[i];
// byte[][] itms_ary = Bry_split_.Split(row, Byte_ascii.Pipe);
// byte[] tbl_name = itms_ary[0];
// conn.Meta_tbl_exists
// int itms_len = itms_ary.length;
// for (int j = 0; j < itms_len; ++j) {
// byte[] itm = itms_ary[j];
// Tfds.Dbg(itm);
// }
// Gfdb_diff_tbl tbl = new Gfdb_diff_tbl(String_.new_u8(itms_ary[0]),keys, vals, Db_rdr_.Empty);
// }
return null;
}
}
/*
class Wkr {
public void Make() {
sdif_db_mgr sdif_db = new Sdif_db_mgr(conn);
for (int i = 0; i < rhs_tbl_len; ++i) {
}
}
}
*/

View File

@@ -13,20 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.diffs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.xowa.wikis.data.*;
class Xowd_tbl_mapr {
public Xowd_tbl_mapr(String name, int[] db_ids) {
this.Name = name;
this.Db_ids = db_ids;
}
public final String Name;
public final int[] Db_ids;
public boolean Db_ids__has(int id) {return true;}
// private static List_adp Fill_tbl_names(List_adp rv, int db_tid) {
// switch (db_tid) {
// case Xow_db_file_.Tid__cat:
// return
// break;
// }
}

View File

@@ -13,53 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.xowa.xtns.wbases.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.xmls.*; import gplx.xowa.bldrs.cmds.texts.xmls.*;
import gplx.xowa.bldrs.css.*; import gplx.xowa.wikis.domains.*;
import gplx.xowa.wikis.data.*;
public abstract class Xob_init_base implements Xob_cmd, Gfo_invk {
private Xob_bldr bldr; private Xowe_wiki wiki; private Gfo_usr_dlg usr_dlg;
private byte wbase_enabled = Bool_.__byte;
public Xob_init_base Ctor(Xob_bldr bldr, Xowe_wiki wiki) {this.bldr = bldr; this.wiki = wiki; this.usr_dlg = wiki.Appe().Usr_dlg(); return this;}
public abstract String Cmd_key();
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
public abstract void Cmd_ini_wdata(Xob_bldr bldr, Xowe_wiki wiki);
public abstract void Cmd_run_end(Xowe_wiki wiki);
@gplx.Virtual public void Cmd_init(Xob_bldr bldr) { // add other cmds; EX: wikidata
bldr.Import_marker().Bgn(wiki);
if (wbase_enabled == Bool_.__byte) wbase_enabled = wiki.Domain_tid() == Xow_domain_tid_.Tid__wikidata ? Bool_.Y_byte : Bool_.N_byte; // if wbase_enabled not explicitly set, set it to y if wiki is "www.wikidata.org"
if (wbase_enabled == Bool_.Y_byte) // if wbase_enabled, auto-add wdata_wkrs bldr
this.Cmd_ini_wdata(bldr, wiki);
}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_run() { // parse site_info
gplx.core.ios.streams.Io_stream_rdr src_rdr = wiki.Import_cfg().Src_rdr(); usr_dlg.Plog_many("", "", "reading dump header: ~{0}", src_rdr.Url().Raw());
Xob_siteinfo_parser_.Parse(Xob_siteinfo_parser_.Extract(src_rdr), wiki);
this.Cmd_run_end(wiki); // save site info
}
public void Cmd_end() {
wiki.Appe().Gui_mgr().Html_mgr().Portal_mgr().Wikis().Itms_reset(); // dirty wiki list so that next refresh will load itm
// if (wiki.Appe().Setup_mgr().Dump_mgr().Css_wiki_update()) { // NOTE: used to be option, but was no longer being set; may need to reinstate; DATE:2016-12-21
Io_url url = wiki.Appe().Fsys_mgr().Wiki_css_dir(wiki.Domain_str()).GenSubFil(Xoa_css_extractor.Css_wiki_name);
usr_dlg.Log_many("", "", "deleting css: ~{0}", url.Raw());
Io_mgr.Instance.DeleteFil_args(url).MissingFails_off().Exec();
// }
// always save xowa_cfg data at end of init step, not term step; else, other builder commands will load empty cfg and import data will be null; DATE:2017-02-20
if (!gplx.core.envs.Env_.Mode_testing()) { // need else Xob_init_base_tst fails; DATE:2017-02-20
Xowd_cfg_tbl_.Upsert__import(wiki);
Xowd_cfg_tbl_.Upsert__create(wiki);
}
}
@gplx.Virtual public void Cmd_term() {}
@gplx.Virtual public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_src_xml_fil_)) wiki.Import_cfg().Src_fil_xml_(m.ReadIoUrl("v"));
else if (ctx.Match(k, Invk_src_bz2_fil_)) wiki.Import_cfg().Src_fil_bz2_(m.ReadIoUrl("v"));
else if (ctx.Match(k, Invk_wdata_enabled_)) wbase_enabled = m.ReadYn("v") ? Bool_.Y_byte : Bool_.N_byte;
else if (ctx.Match(k, Invk_owner)) return bldr.Cmd_mgr();
else return Gfo_invk_.Rv_unhandled;
return this;
} private static final String Invk_src_xml_fil_ = "src_xml_fil_", Invk_src_bz2_fil_ = "src_bz2_fil_", Invk_owner = "owner", Invk_wdata_enabled_ = "wdata_enabled_";
}

View File

@@ -13,43 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.xmls.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.wikis.dbs.*;
public abstract class Xob_term_base implements Xob_cmd, Gfo_invk {
public Xob_term_base Ctor(Xob_bldr bldr, Xowe_wiki wiki) {this.wiki = wiki; return this;} private Xowe_wiki wiki;
public abstract String Cmd_key();
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
public void Cmd_init(Xob_bldr bldr) {}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_run() {}
public void Cmd_end() {
Xoae_app app = wiki.Appe();
// dirty wiki list so that next refresh will load wiki
app.Gui_mgr().Html_mgr().Portal_mgr().Wikis().Itms_reset();
// clear cache, else import will load new page with old items from cache; DATE:2013-11-21
app.Free_mem(false);
// update main page
byte[] new_main_page = gplx.xowa.langs.msgs.Xow_mainpage_finder.Find_or(wiki, wiki.Props().Siteinfo_mainpage()); // get new main_page from mainpage_finder
wiki.Props().Main_page_(new_main_page);
wiki.Data__core_mgr().Db__core().Tbl__cfg().Upsert_bry(gplx.xowa.wikis.data.Xowd_cfg_key_.Grp__wiki_init, gplx.xowa.wikis.data.Xowd_cfg_key_.Key__init__main_page , new_main_page);
// remove import marker
app.Bldr().Import_marker().End(wiki);
// flag init_needed prior to show; dir_info will show page_txt instead of page_gz;
wiki.Init_needed_(true);
// force load; needed to pick up MediaWiki ns for MediaWiki:mainpage
wiki.Init_assert();
Cmd_end_hook();
}
public abstract void Cmd_end_hook();
public void Cmd_term() {}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
return this;
}
}

View File

@@ -13,29 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.xowa.bldrs.*; import gplx.xowa.apps.apis.xowa.bldrs.imports.*;
import gplx.xowa.xtns.wbases.imports.*;
public class Xob_init_cmd extends Xob_init_base {
public Xob_init_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Ctor(bldr, wiki);}
@Override public String Cmd_key() {return Xob_cmd_keys.Key_text_init;}
@Override public void Cmd_ini_wdata(Xob_bldr bldr, Xowe_wiki wiki) {
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_wbase_qid);
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_wbase_pid);
}
@Override public void Cmd_init(Xob_bldr bldr) {
super.Cmd_init(bldr);
// gplx.dbs.qrys.bats.Db_batch__journal_wal.Batch__init(gplx.dbs.Db_conn_pool.Instance.Batch_mgr());
}
@Override public void Cmd_run_end(Xowe_wiki wiki) {
if (gplx.xowa.wikis.data.Xow_db_file__core_.Find_core_fil_or_null(wiki) != null)
throw wiki.Appe().Bldr().Usr_dlg().Fail_many("", "", "directory must not contain any .xowa or .sqlite3 files: dir=~{0}", wiki.Fsys_mgr().Root_dir().Raw());
Xowe_wiki_.Create(wiki, wiki.Import_cfg().Src_rdr_len(), wiki.Import_cfg().Src_fil().NameOnly());
}
@Override public void Cmd_term() {
super.Cmd_term();
// gplx.dbs.qrys.bats.Db_batch__journal_wal.Batch__term(gplx.dbs.Db_conn_pool.Instance.Batch_mgr());
// gplx.dbs.Db_conn_pool.Instance.Rls_all();
}
}

View File

@@ -13,16 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
public class Xob_ns_to_db_wkr__text implements Xob_ns_to_db_wkr {
public byte Db_tid() {return Xow_db_file_.Tid__text;}
public void Tbl_init(Xow_db_file db) {
Xowd_text_tbl tbl = db.Tbl__text();
tbl.Create_tbl();
tbl.Insert_bgn();
}
public void Tbl_term(Xow_db_file db) {
db.Tbl__text().Insert_end();
}
}

View File

@@ -13,97 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.dbs.*; import gplx.core.ios.*; import gplx.xowa.bldrs.cmds.*;
import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.wikis.nss.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.wikis.dbs.*;
import gplx.xowa.wikis.*; import gplx.xowa.bldrs.filters.dansguardians.*; import gplx.xowa.apps.apis.xowa.bldrs.imports.*;
import gplx.xowa.parsers.utils.*; import gplx.xowa.addons.bldrs.files.cmds.*; import gplx.xowa.addons.bldrs.files.dbs.*;
public class Xob_page_cmd extends Xob_itm_basic_base implements Xob_page_wkr, Gfo_invk {
private Xow_db_mgr db_mgr; private Db_idx_mode idx_mode = Db_idx_mode.Itm_end; private Xowd_page_tbl page_core_tbl; private Io_stream_zip_mgr text_zip_mgr; private byte text_zip_tid;
private Xop_redirect_mgr redirect_mgr; private Xob_redirect_tbl redirect_tbl; private boolean redirect_id_enabled;
private DateAdp modified_latest = DateAdp_.MinValue; private int page_count_all, page_count_main = 0; private int commit_interval = 100000; // 100 k
private Dg_match_mgr dg_match_mgr; private Xob_ns_to_db_mgr ns_to_db_mgr;
public Xob_page_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
public String Page_wkr__key() {return Xob_cmd_keys.Key_text_page;}
public void Page_wkr__bgn() {
Xoae_app app = wiki.Appe();
this.redirect_mgr = wiki.Redirect_mgr();
this.db_mgr = wiki.Db_mgr_as_sql().Core_data_mgr();
this.page_core_tbl = db_mgr.Tbl__page();
this.text_zip_mgr = wiki.Utl__zip_mgr();
this.text_zip_tid = Xobldr_cfg.Zip_mode__text(app);
// NOTE: rebuild needed to add canonical namespaces as templates; else, redirects to English namespaces won't work in non-English wikis; EX: gu.w and #REDIRECT [[Template:COLON]]; DATE:2017-02-20
Xow_ns_mgr_.rebuild_(wiki.Lang(), wiki.Ns_mgr());
this.ns_to_db_mgr = new Xob_ns_to_db_mgr(new Xob_ns_to_db_wkr__text(), db_mgr, Xobldr_cfg.Max_size__text(app));
this.dg_match_mgr = Dg_match_mgr.New_mgr(app, wiki);
if (dg_match_mgr != null) redirect_id_enabled = true; // always enable redirect_id if dg_match_mgr enabled; DATE:2016-01-04
if (redirect_id_enabled) {
this.redirect_tbl = new Xob_redirect_tbl(wiki.Fsys_mgr().Root_dir(), gplx.langs.htmls.encoders.Gfo_url_encoder_.Http_url_ttl).Create_table();
redirect_tbl.Conn().Txn_bgn("bldr__page__redirect");
}
app.Bldr().Dump_parser().Trie_tab_del_(); // disable swapping &#09; for \t
byte[] ns_file_map = Xobldr_cfg.New_ns_file_map(app, wiki.Import_cfg().Src_rdr_len());
Xob_ns_file_itm.Init_ns_bldr_data(Xow_db_file_.Tid__text, wiki.Ns_mgr(), ns_file_map);
if (idx_mode.Tid_is_bgn()) page_core_tbl.Create_idx();
page_core_tbl.Insert_bgn();
usr_dlg.Prog_many("", "", "import.page.bgn");
}
public void Page_wkr__run(Xowd_page_itm page) {
int id = page.Id();
DateAdp modified = page.Modified_on(); if (modified.compareTo(modified_latest) == CompareAble_.More) modified_latest = modified;
byte[] text_raw = page.Text(); int text_raw_len = page.Text_len();
Xoa_ttl redirect_ttl = redirect_mgr.Extract_redirect(text_raw, text_raw_len); boolean redirect = redirect_ttl != null;
page.Redirected_(redirect);
Xow_ns ns = page.Ns();
int random_int = ns.Count() + 1; ns.Count_(random_int);
if (dg_match_mgr != null) {
if (dg_match_mgr.Match(1, id, ns.Id(), page.Ttl_page_db(), page.Ttl_full_db(), wiki.Lang(), text_raw)) return;
}
byte[] text_zip = text_zip_mgr.Zip(text_zip_tid, text_raw);
Xow_db_file text_db = ns_to_db_mgr.Get_by_ns(ns.Bldr_data(), text_zip.length);
try {db_mgr.Create_page(page_core_tbl, text_db.Tbl__text(), id, page.Ns_id(), page.Ttl_page_db(), redirect, modified, text_zip, text_raw_len, random_int, text_db.Id(), -1);}
catch (Exception e) {
throw Err_.new_exc(e, "bldr", "create page in db failed; skipping page", "id", id, "ns", page.Ns_id(), "name", page.Ttl_page_db(), "redirect", redirect, "modified", modified, "text_len", text_raw_len, "text_db_id", text_db.Id());
}
if (redirect && redirect_id_enabled)
redirect_tbl.Insert(id, page.Ttl_page_db(), redirect_ttl);
++page_count_all;
if (ns.Id_is_main() && !page.Redirected()) ++page_count_main;
if (page_count_all % commit_interval == 0) {
page_core_tbl.Conn().Txn_sav(); text_db.Conn().Txn_sav();
if (redirect_id_enabled) redirect_tbl.Conn().Txn_sav();
if (dg_match_mgr != null) dg_match_mgr.Commit();
}
}
public void Page_wkr__run_cleanup() {
usr_dlg.Log_many("", "", "import.page: insert done; committing pages; pages=~{0}", page_count_all);
ns_to_db_mgr.Rls_all();
page_core_tbl.Insert_end();
}
public void Page_wkr__end() {
if (dg_match_mgr != null) dg_match_mgr.Rls();
usr_dlg.Log_many("", "", "import.page: updating core stats");
Xow_ns_mgr ns_mgr = wiki.Ns_mgr();
Xow_db_file db_core = db_mgr.Db__core();
db_core.Tbl__site_stats().Update(page_count_main, page_count_all, ns_mgr.Ns_file().Count()); // save page stats
db_core.Tbl__ns().Insert(ns_mgr); // save ns
if (idx_mode.Tid_is_end()) page_core_tbl.Create_idx();
if (redirect_id_enabled) {
redirect_tbl.Conn().Txn_end();
redirect_tbl.Update_trg_redirect_id(db_core.Url(), 1);
redirect_tbl.Update_src_redirect_id(db_core.Url(), page_core_tbl.Conn());
}
}
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_commit_interval_)) commit_interval = m.ReadInt("v");
else if (ctx.Match(k, Invk_idx_mode_)) idx_mode = Db_idx_mode.Xto_itm(m.ReadStr("v"));
else if (ctx.Match(k, Invk_redirect_id_enabled_)) redirect_id_enabled = m.ReadYn("v");
else return super.Invk(ctx, ikey, k, m);
return this;
}
private static final String Invk_commit_interval_ = "commit_interval_", Invk_idx_mode_ = "idx_mode_", Invk_redirect_id_enabled_ = "redirect_id_enabled_";
}

View File

@@ -13,77 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.dbs.*; import gplx.xowa.wikis.data.*;
import gplx.xowa.bldrs.wkrs.*;
public class Xob_page_delete_cmd extends Xob_cmd_base {
private final Xow_wiki wiki;
public Xob_page_delete_cmd(Xob_bldr bldr, Xow_wiki wiki) {this.wiki = wiki;}
@Override public String Cmd_key() {return Xob_cmd_keys.Key_text_delete_page;}
@Override public void Cmd_run() {
wiki.Init_by_wiki();
Xow_db_file core_db = wiki.Data__core_mgr().Db__core();
Db_conn core_db_conn = core_db.Conn();
Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Instance;
usr_dlg.Plog_many("", "", "creating page_filter");
if (!core_db_conn.Meta_tbl_exists("page_filter")) {
core_db_conn.Meta_tbl_create
( Dbmeta_tbl_itm.New("page_filter", new Dbmeta_fld_itm[]
{ Dbmeta_fld_itm.new_int("page_id").Primary_y_()
, Dbmeta_fld_itm.new_int("page_text_db_id")
}
, Dbmeta_idx_itm.new_normal_by_tbl("page_filter", "db_id__page", "page_text_db_id", "page_id")
, Dbmeta_idx_itm.new_normal_by_tbl("page_filter", "page_id", "page_id")
));
}
core_db_conn.Exec_sql_plog_ntx("finding missing redirects", String_.Concat_lines_nl_skip_last
( "INSERT INTO page_filter (page_id, page_text_db_id)"
, "SELECT ptr.page_id, ptr.page_text_db_id"
, "FROM page ptr"
, " LEFT JOIN page orig ON ptr.page_redirect_id = orig.page_id"
, "WHERE ptr.page_is_redirect = 1"
, "AND orig.page_id IS NULL"
, "UNION"
, "SELECT ptr.page_id, ptr.page_text_db_id"
, "FROM page ptr"
, "WHERE ptr.page_is_redirect = 1"
, "AND ptr.page_redirect_id = -1"
, ";"
));
String db_file_cur = "";
try {
Xow_db_file[] db_file_ary = core_db.Tbl__db().Select_all(wiki.Data__core_mgr().Props(), wiki.Fsys_mgr().Root_dir());
int len = db_file_ary.length;
for (int i = 0; i < len; ++i) {
boolean db_file_is_text = Bool_.N, db_file_is_cat = Bool_.N, db_file_is_search = Bool_.N;
Xow_db_file db_file = db_file_ary[i];
switch (db_file.Tid()) {
case Xow_db_file_.Tid__core: case Xow_db_file_.Tid__wiki_solo: case Xow_db_file_.Tid__text_solo:
// if mode is lot, then "core" db does not have cat, search; skip; DATE:2016-01-31
if (wiki.Data__core_mgr().Props().Layout_text().Tid_is_lot()) continue;
db_file_is_cat = db_file_is_search = Bool_.Y; // do not set db_file_is_text to true; DATE:2016-10-18
break;
case Xow_db_file_.Tid__text: db_file_is_text = Bool_.Y; break;
case Xow_db_file_.Tid__cat: db_file_is_cat = Bool_.Y; break;
case Xow_db_file_.Tid__search_link: db_file_is_search = Bool_.Y; break; // changed from search_data to search_link; DATE:2016-10-19
}
db_file_cur = db_file.Url().Raw();
int db_id = db_file.Id();
if (db_file_is_text) Run_sql(core_db_conn, db_file.Url(), db_id, "deleting text: " + db_id, "DELETE FROM <data_db>text WHERE page_id IN (SELECT page_id FROM page_filter WHERE page_text_db_id = {0});");
if (db_file_is_cat) Run_sql(core_db_conn, db_file.Url(), db_id, "deleting cat: " + db_id, "DELETE FROM <data_db>cat_link WHERE cl_from IN (SELECT page_id FROM page_filter);");
if (db_file_is_search) Run_sql(core_db_conn, db_file.Url(), db_id, "deleting search:" + db_id, "DELETE FROM <data_db>search_link WHERE page_id IN (SELECT page_id FROM page_filter);");
if (db_file_is_text || db_file_is_cat || db_file_is_search)
db_file.Conn().Env_vacuum();
}
} catch (Exception e) {Gfo_usr_dlg_.Instance.Warn_many("", "", "fatal error during page deletion: cur=~{0} err=~{1}", db_file_cur, Err_.Message_gplx_log(e));}
core_db_conn.Exec_sql_plog_ntx("deleting from table: page", "DELETE FROM page WHERE page_id IN (SELECT page_id FROM page_filter);");
// core_db_conn.Meta_tbl_delete("page_filter");
core_db_conn.Env_vacuum();
usr_dlg.Plog_many("", "", "");
}
private void Run_sql(Db_conn core_db_conn, Io_url db_url, int db_id, String prog_msg, String sql) {
new Db_attach_mgr(core_db_conn, new Db_attach_itm("data_db", db_url))
.Exec_sql_w_msg(prog_msg , sql, db_id);
}
}

View File

@@ -13,23 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.dbs.cfgs.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.*;
import gplx.xowa.wikis.data.*;
public class Xob_term_cmd extends Xob_term_base {
public Xob_term_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Ctor(bldr, wiki); this.wiki = wiki;} private Xowe_wiki wiki;
@Override public String Cmd_key() {return KEY;} public static final String KEY = "text.term";
@Override public void Cmd_end_hook() {
// delete wiki's temp dir
Io_mgr.Instance.DeleteDirDeep(wiki.Fsys_mgr().Tmp_dir());
// build fsdb
gplx.fsdb.Fsdb_db_mgr__v2_bldr.Get_or_make(wiki, false);// always build file.user db; DATE:2015-05-12
// dansguardian
if (wiki.App().Cfg().Get_bool_wiki_or(wiki, gplx.xowa.bldrs.filters.dansguardians.Dg_match_mgr.Cfg__enabled, false))
new Xob_page_delete_cmd(wiki.Appe().Bldr(), wiki).Cmd_run();
wiki.Data__core_mgr().Rls();
}
}

View File

@@ -13,47 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.core.ios.*;
import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.tdbs.xdats.*;
public class Io_sort_cmd_ns implements Io_make_cmd {
Xob_xdat_file_wtr fil_wtr; Bry_bfr reg_bfr = Bry_bfr_.New(), key_bfr_0 = Bry_bfr_.New_w_size(512), key_bfr_n = Bry_bfr_.New_w_size(512);
int fil_count = 0, itm_count = 0;
public Io_sort_cmd_ns(Gfo_usr_dlg usr_dlg) {this.usr_dlg = usr_dlg;} Gfo_usr_dlg usr_dlg;
public int Trg_fil_max() {return trg_fil_max;} public Io_sort_cmd_ns Trg_fil_max_(int v) {trg_fil_max = v; return this;} private int trg_fil_max = 65 * Io_mgr.Len_kb;
Io_url reg_url;
public Io_sort_cmd Make_dir_(Io_url v) {make_dir = v; return this;} Io_url make_dir;
public void Sort_bgn() {
fil_count = itm_count = 0;
fil_wtr = Xob_xdat_file_wtr.new_file_(trg_fil_max, make_dir);
reg_url = make_dir.GenSubFil(Xotdb_dir_info_.Name_reg_fil);
}
public void Sort_do(Io_line_rdr rdr) {
int itm_bgn = rdr.Itm_pos_bgn(), itm_end = rdr.Itm_pos_end(), key_bgn = rdr.Key_pos_bgn(), key_end = rdr.Key_pos_end();
int itm_len = itm_end - itm_bgn;
if (fil_wtr.FlushNeeded(itm_len)) Flush();
byte[] bfr = rdr.Bfr();
if (key_bfr_0.Len() == 0) {key_bfr_0.Add_mid(bfr, key_bgn, key_end);}
key_bfr_n.Clear().Add_mid(bfr, key_bgn, key_end);
fil_wtr.Bfr().Add_mid(rdr.Bfr(), itm_bgn, itm_end);
fil_wtr.Add_idx(Byte_ascii.Null);
++itm_count;
}
public void Sort_end() {
Flush();
Io_mgr.Instance.AppendFilBfr(reg_url, reg_bfr);
//fil_wtr.Rls(); reg_bfr.Rls(); key_bfr_0.Rls(); key_bfr_n.Rls();
}
private void Flush() {
reg_bfr
.Add_int_variable(fil_count++).Add_byte(Byte_ascii.Pipe)
.Add_bfr_and_preserve(key_bfr_0).Add_byte(Byte_ascii.Pipe)
.Add_bfr_and_preserve(key_bfr_n).Add_byte(Byte_ascii.Pipe)
.Add_int_variable(itm_count).Add_byte(Byte_ascii.Nl);
itm_count = 0;
key_bfr_0.Clear();
if (fil_wtr.Fil_idx() % 10 == 0)
usr_dlg.Prog_many("cmd_ns", "prog", "saving: ~{0} ~{1}", reg_url.OwnerDir().OwnerDir().NameOnly(), fil_wtr.Fil_url().NameOnly());
fil_wtr.Flush(usr_dlg);
}
}

View File

@@ -13,93 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.core.primitives.*; import gplx.core.ios.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.wtrs.*;
import gplx.xowa.langs.*; import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.data.tbls.*;
public abstract class Srch_bldr_wkr_base extends Xob_itm_dump_base implements Xob_page_wkr {
private final Ordered_hash list = Ordered_hash_.New(); private Xol_lang_itm lang;
public abstract String Page_wkr__key();
public void Page_wkr__bgn() {
make_dir = wiki.Tdb_fsys_mgr().Ns_dir();
this.Init_dump(this.Page_wkr__key(), make_dir);
lang = wiki.Lang(); // wiki.Appe().Lang_mgr().Lang_en(); // NOTE: was .Lang_en which is wrong (should match lang of wiki); DATE:2013-05-11
tmp_wtr_mgr = new Xob_tmp_wtr_mgr(new Xob_tmp_wtr_wkr__ttl(temp_dir, dump_fil_len));
if (wiki.Db_mgr().Tid() == Xodb_mgr_sql.Tid_sql) // if sqlite, hard-code to ns_main; aggregates all ns into one
ns_main = wiki.Ns_mgr().Ns_main();
} private Xob_tmp_wtr_mgr tmp_wtr_mgr; private Xow_ns ns_main;
public void Page_wkr__run(Xowd_page_itm page) {
// if (page.Ns_id() != Xow_ns_.Tid__main) return; // limit to main ns for now
try {
byte[] ttl = page.Ttl_page_db();
byte[][] words = Split_ttl_into_words(lang, list, dump_bfr, ttl);
Xob_tmp_wtr wtr = tmp_wtr_mgr.Get_or_new(ns_main == null ? page.Ns() : ns_main);
int words_len = words.length;
int row_len = 0;
for (int i = 0; i < words_len; i++) {
byte[] word = words[i];
row_len += word.length + 13; // 13=5(id) + 5(page_len) + 3(dlms)
}
if (wtr.FlushNeeded(row_len)) wtr.Flush(bldr.Usr_dlg());
for (int i = 0; i < words_len; i++) {
byte[] word = words[i];
wtr.Bfr() .Add(word) .Add_byte(Byte_ascii.Pipe)
.Add_base85_len_5(page.Id()) .Add_byte(Byte_ascii.Semic)
.Add_base85_len_5(page.Text().length) .Add_byte(Byte_ascii.Nl);
}
} catch (Exception e) {bldr.Usr_dlg().Warn_many("", "", "search_index:fatal error: err=~{0}", Err_.Message_gplx_full(e));} // never let single page crash entire import
}
public void Page_wkr__run_cleanup() {}
public void Page_wkr__end() {
tmp_wtr_mgr.Flush_all(bldr.Usr_dlg());
dump_bfr.ClearAndReset();
Xobdc_merger.Ns(bldr.Usr_dlg(), tmp_wtr_mgr.Regy(), Xotdb_dir_info_.Name_search_ttl, temp_dir, make_dir, sort_mem_len, Io_line_rdr_key_gen_.first_pipe, this.Make_cmd_site());
tmp_wtr_mgr.Rls_all();
if (delete_temp) Io_mgr.Instance.DeleteDirDeep(temp_dir);
}
public abstract Io_make_cmd Make_cmd_site();
public static byte[][] Split_ttl_into_words(Xol_lang_itm lang, Ordered_hash list, Bry_bfr bfr, byte[] ttl) {
if (lang != null) // null lang passed in by searcher
ttl = lang.Case_mgr().Case_build_lower(ttl);
int ttl_len = ttl.length; Bry_obj_ref word_ref = Bry_obj_ref.New(Bry_.Empty);
int i = 0; boolean word_done = false;
while (true) {
if (word_done || i == ttl_len) {
if (bfr.Len() > 0) {
byte[] word = bfr.To_bry_and_clear();
word_ref.Val_(word);
if (!list.Has(word_ref)) list.Add(word_ref, word); // don't add same word twice; EX: Title of "Can Can" should only have "Can" in index
}
if (i == ttl_len) break;
word_done = false;
}
byte b = ttl[i];
switch (b) {
case Byte_ascii.Underline: // underline is word-breaking; EX: A_B -> A, B
case Byte_ascii.Space: // should not occur, but just in case (only underscores)
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: // should not occur in titles, but just in case
case Byte_ascii.Dash: // treat hypenated words separately
case Byte_ascii.Dot: // treat abbreviations as separate words; EX: A.B.C.
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent:
case Byte_ascii.Amp: case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star:
case Byte_ascii.Comma: case Byte_ascii.Slash:
case Byte_ascii.Colon: case Byte_ascii.Semic: case Byte_ascii.Gt:
case Byte_ascii.Question: case Byte_ascii.At: case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end:
case Byte_ascii.Pow: case Byte_ascii.Tick:
case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde:
case Byte_ascii.Quote: case Byte_ascii.Apos: // FUTURE: apos will split "Earth's" to Earth and s; should remove latter
++i;
word_done = true;
break;
default:
bfr.Add_byte(b);
++i;
break;
}
}
byte[][] rv = (byte[][])list.To_ary(byte[].class);
list.Clear(); list.Resize_bounds(16);
return rv;
}
}

View File

@@ -13,101 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.wikis.nss.*;
import gplx.xowa.wikis.data.site_stats.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.tdbs.hives.*; import gplx.xowa.wikis.tdbs.xdats.*;
public class Xob_calc_stats_cmd extends Xob_itm_basic_base implements Xob_cmd {
public Xob_calc_stats_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
public String Cmd_key() {return Xob_cmd_keys.Key_tdb_calc_stats;}
public void Cmd_init(Xob_bldr bldr) {}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_run() {Exec();}
public void Cmd_end() {}
public void Cmd_term() {}
private void Exec() {
int ns_len = wiki.Ns_mgr().Ords_len();
int total = 0;
for (int i = 0; i < ns_len; i++) {
Xow_ns ns = wiki.Ns_mgr().Ords_ary()[i];
int ns_count = Calc_counts(ns);
ns.Count_(ns_count);
total += ns_count;
}
int count_main = Calc_count_articles(wiki.Ns_mgr().Ns_main());
int count_file = Calc_count_articles(wiki.Ns_mgr().Ns_file());
Bry_bfr bfr = Bry_bfr_.New();
Gen_call(Bool_.Y, bfr, Xowe_wiki.Invk_stats);
Gen_call(Bool_.N, bfr, Xowd_site_stats_mgr.Invk_number_of_articles_, count_main);
Gen_call(Bool_.N, bfr, Xowd_site_stats_mgr.Invk_number_of_files_, count_file);
Gen_call(Bool_.N, bfr, Xowd_site_stats_mgr.Invk_number_of_pages_, total);
for (int i = 0; i < ns_len; i++) {
Xow_ns ns = wiki.Ns_mgr().Ords_ary()[i];
if (ns.Id() < 0) continue;
bfr.Add_byte_nl();
Gen_call(Bool_.N, bfr, Xowd_site_stats_mgr.Invk_number_of_articles_in_ns_, ns.Num_str(), Int_.To_str_pad_bgn_zero(ns.Count(), 10));
}
bfr.Add_byte_nl().Add_byte(Byte_ascii.Semic).Add_byte_nl();
Io_url wiki_gfs = Wiki_gfs_url(wiki);
Io_mgr.Instance.SaveFilBfr(wiki_gfs, bfr);
}
private void Gen_call(boolean first, Bry_bfr bfr, String key, Object... vals) {
if (!first) bfr.Add_byte(Byte_ascii.Dot);
bfr.Add_str_u8(key);
int len = vals.length;
if (len > 0) {
bfr.Add_byte(Byte_ascii.Paren_bgn);
for (int i = 0; i < len; i++) {
if (i != 0) bfr.Add_byte(Byte_ascii.Comma).Add_byte(Byte_ascii.Space);
Object val = vals[i];
bfr.Add_str_u8(Object_.Xto_str_strict_or_null_mark(val));
}
bfr.Add_byte(Byte_ascii.Paren_end);
}
}
int Calc_counts(Xow_ns ns) {
Io_url reg_url = wiki.Tdb_fsys_mgr().Url_ns_reg(ns.Num_str(), Xotdb_dir_info_.Tid_ttl);
Xowd_regy_mgr reg_mgr = new Xowd_regy_mgr(reg_url);
int files_ary_len = reg_mgr.Files_ary().length;
int count = 0;
for (int i = 0; i < files_ary_len; i++) {
count += reg_mgr.Files_ary()[i].Count();
}
return count;
}
int Calc_count_articles(Xow_ns ns) {
Io_url hive_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest(Xotdb_dir_info_.Name_ns, ns.Num_str(), Xotdb_dir_info_.Name_title);
return Calc_count_articles_dir(ns, hive_dir);
}
int Calc_count_articles_dir(Xow_ns ns, Io_url dir) {
Io_url[] subs = Io_mgr.Instance.QueryDir_args(dir).DirInclude_().ExecAsUrlAry();
int count = 0;
int subs_len = subs.length;
bldr.Usr_dlg().Prog_one(GRP_KEY, "count", "calculating: ~{0}", dir.Raw());
for (int i = 0; i < subs_len; i++) {
Io_url sub = subs[i];
if (sub.Type_dir())
count += Calc_count_articles_dir(ns, sub);
else
count += Calc_count_articles_fil(ns, sub);
}
return count;
}
int Calc_count_articles_fil(Xow_ns ns, Io_url fil) {
if (String_.Eq(fil.NameAndExt(), Xotdb_dir_info_.Name_reg_fil)) return 0;
int rv = 0;
byte[] bry = Io_mgr.Instance.LoadFilBry(fil);
Xob_xdat_file xdat_file = new Xob_xdat_file().Parse(bry, bry.length, fil);
Xowd_page_itm page = Xowd_page_itm.new_tmp();
int count = xdat_file.Count();
for (int i = 0; i < count; i++) {
byte[] ttl_bry = xdat_file.Get_bry(i);
Xotdb_page_itm_.Txt_ttl_load(page, ttl_bry);
rv += page.Redirected() ? 0 : 1;
}
return rv;
}
static final String GRP_KEY = "xowa.bldr.calc_stats";
public static Io_url Wiki_gfs_url(Xowe_wiki wiki) {return wiki.Fsys_mgr().Root_dir().GenSubFil_nest("cfg", "wiki_stats.gfs");}
}

View File

@@ -13,29 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import org.junit.*; import gplx.xowa.htmls.portal.*; import gplx.xowa.wikis.xwikis.*;
public class Xob_init_base_tst {
@Before public void init() {fxt.Clear();} private Xob_init_base_fxt fxt = new Xob_init_base_fxt();
@Test public void Dirty_wiki_itms() {
Xoae_app app = fxt.App(); Xowe_wiki wiki = fxt.Wiki();
Xoa_available_wikis_mgr wikis_list = fxt.App().Gui_mgr().Html_mgr().Portal_mgr().Wikis();
Tfds.Eq("", wikis_list.Itms_as_html()); // assert
Xow_xwiki_itm xwiki_itm = app.Usere().Wiki().Xwiki_mgr().Add_by_atrs("en.wikipedia.org", "en.wikipedia.org");
xwiki_itm.Offline_(Bool_.Y); // simulate add via Available_from_fsys; DATE:2014-09-21
Tfds.Eq("", wikis_list.Itms_as_html()); // still empty
new Xob_init_tdb(app.Bldr(), wiki).Cmd_end(); // mock "init" task
Tfds.Eq("\n <li><a href=\"/site/en.wikipedia.org/\" class='xowa-hover-off'>en.wikipedia.org</a></li>", wikis_list.Itms_as_html()); // no longer empty
}
}
class Xob_init_base_fxt {
public void Clear() {
if (app == null) {
app = Xoa_app_fxt.Make__app__edit();
wiki = Xoa_app_fxt.Make__wiki__edit(app);
}
Io_mgr.Instance.InitEngine_mem();
}
public Xoae_app App() {return app;} private Xoae_app app;
public Xowe_wiki Wiki() {return wiki;} private Xowe_wiki wiki;
}

View File

@@ -13,14 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.xowa.xtns.wbases.imports.*;
public class Xob_init_tdb extends Xob_init_base {
public Xob_init_tdb(Xob_bldr bldr, Xowe_wiki wiki) {this.Ctor(bldr, wiki);}
@Override public String Cmd_key() {return Xob_cmd_keys.Key_tdb_text_init;}
@Override public void Cmd_ini_wdata(Xob_bldr bldr, Xowe_wiki wiki) {
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_tdb_text_wdata_qid);
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_tdb_text_wdata_pid);
}
@Override public void Cmd_run_end(Xowe_wiki wiki) {}
}

View File

@@ -13,124 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.core.ios.*; import gplx.core.ios.streams.*;
import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.tdbs.xdats.*;
public class Xob_make_cmd_site implements Io_make_cmd {
Xob_xdat_file_wtr fil_wtr; Bry_bfr cur_bfr = Bry_bfr_.New(), reg_bfr = Bry_bfr_.New(), reg_key_0 = Bry_bfr_.New_w_size(512), reg_key_n = Bry_bfr_.New_w_size(512);
int make_fil_max = 65 * Io_mgr.Len_kb, fil_count = 0, itm_count = 0, itm_key_end = 0; Io_url reg_url;
public Xob_make_cmd_site(Gfo_usr_dlg usr_dlg, Io_url make_dir, int make_fil_max) {this.usr_dlg = usr_dlg; this.make_dir = make_dir; this.make_fil_max = make_fil_max;} Gfo_usr_dlg usr_dlg;
public Io_sort_cmd Make_dir_(Io_url v) {make_dir = v; return this;} Io_url make_dir;
public byte Line_dlm() {return line_dlm;} public Xob_make_cmd_site Line_dlm_(byte v) {line_dlm = v; return this;} private byte line_dlm = Byte_ascii.Null;
public void Sort_bgn() {
fil_count = itm_count = itm_key_end = 0;
reg_url = make_dir.GenSubFil(Xotdb_dir_info_.Name_reg_fil);
fil_wtr = Xob_xdat_file_wtr.new_file_(make_fil_max, make_dir);
}
public void Sort_do(Io_line_rdr rdr) {
if (line_dlm == Byte_ascii.Null) line_dlm = rdr.Line_dlm();
int rdr_key_bgn = rdr.Key_pos_bgn(), rdr_key_end = rdr.Key_pos_end();
int rdr_key_len = rdr_key_end - rdr_key_bgn;
int rdr_val_bgn = rdr_key_end, /* NOTE: no +1: want to include fld_dlm for below*/ rdr_val_end = rdr.Itm_pos_end() - 1; // -1: ignore rdr_dlm
if (Bry_.Match(cur_bfr.Bfr(), 0, itm_key_end, rdr.Bfr(), rdr_key_bgn, rdr_key_end)) // key is same; add rest of line as val
cur_bfr.Add_mid(rdr.Bfr(), rdr_val_bgn, rdr_val_end);
else {
if (fil_wtr.FlushNeeded(cur_bfr.Len() + rdr_key_len)) Flush();
byte[] bfr = rdr.Bfr();
if (reg_key_0.Len() == 0) {
if (cur_bfr.Len() == 0)
reg_key_0.Add_mid(bfr, rdr_key_bgn, rdr_key_end);
else
reg_key_0.Add_mid(cur_bfr.Bfr(), 0, itm_key_end);
}
if (cur_bfr.Len() > 0) {
reg_key_n.Clear().Add_mid(cur_bfr.Bfr(), 0, itm_key_end);
fil_wtr.Bfr().Add_bfr_and_clear(cur_bfr);
fil_wtr.Add_idx(line_dlm);
}
cur_bfr.Add_mid(rdr.Bfr(), rdr.Itm_pos_bgn(), rdr.Itm_pos_end() - 1); // -1 to ignore closing newline
itm_key_end = rdr_key_len; // NOTE: must be set last
++itm_count;
}
}
public void Do_bry(byte[] bry, int key_bgn, int key_end, int itm_bgn, int itm_end) {
int val_bgn = key_end, /* NOTE: no +1: want to include fld_dlm for below*/ val_end = itm_end - 1; // -1: ignore rdr_dlm
if (Bry_.Match(cur_bfr.Bfr(), 0, itm_key_end, bry, key_bgn, key_end)) // key is same; add rest of line as val
cur_bfr.Add_mid(bry, val_bgn, val_end);
else { // key changed;
int itm_len = itm_end - itm_bgn;
if (cur_bfr.Len() > 0) { // pending itm
fil_wtr.Bfr().Add_bfr_and_clear(cur_bfr); // add cur_bfr to fil_bfr
fil_wtr.Add_idx(line_dlm); // add cur_itm to hdr
if (fil_wtr.FlushNeeded(cur_bfr.Len() + itm_len))
Flush();
}
if (reg_key_0.Len() == 0) // regy.key_0 bfr is empty
reg_key_0.Add_mid(bry, key_bgn, key_end); // update reg_0key_0
reg_key_n.Clear().Add_mid(bry, key_bgn, key_end); // always update reg_key_n
if (itm_len > 100 * Io_mgr.Len_mb)
Flush_large(bry, itm_bgn, itm_end, itm_len);
else {
cur_bfr.Add_mid(bry, itm_bgn, itm_end - 1); // add incoming itm; -1 to ignore closing newline
itm_key_end = key_end; // NOTE: must be set last
++itm_count;
}
}
}
public void Sort_end() {
reg_key_n.Clear().Add_mid(cur_bfr.Bfr(), 0, itm_key_end);
fil_wtr.Bfr().Add_bfr_and_clear(cur_bfr);
fil_wtr.Add_idx(line_dlm);
Flush();
Io_mgr.Instance.AppendFilBfr(reg_url, reg_bfr);
//fil_wtr.Rls(); cur_bfr.Rls(); fil_wtr.Rls(); reg_bfr.Rls(); reg_key_0.Rls(); reg_key_n.Rls();
}
// private void Flush_large(byte[] bry, int itm_bgn, int itm_end, int itm_len) {
// ++itm_count;
// this.Flush_reg();
// fil_wtr.Add_idx_direct(itm_len, Byte_.Zero);
// IoStream stream = IoStream_.Null;
// try {
// stream = Io_mgr.Instance.OpenStreamWrite(fil_wtr.Fil_url());
// fil_wtr.FlushIdx(stream);
// stream.Write_and_flush(bry, itm_bgn, itm_end);
// fil_wtr.Clear();
// fil_wtr.Url_gen_add();
// }
// finally {stream.Rls();}
// }
private void Flush_large(byte[] bry, int itm_bgn, int itm_end, int itm_len) {
++itm_count;
this.Flush_reg();
fil_wtr.Add_idx_direct(itm_len, Byte_.Zero);
Io_stream_wtr wtr = null;
try {
wtr = Io_stream_wtr_.New__raw(fil_wtr.Fil_url());
wtr.Open();
fil_wtr.FlushIdx(wtr);
wtr.Write(bry, itm_bgn, itm_end);
wtr.Flush();
fil_wtr.Clear();
fil_wtr.Url_gen_add();
}
finally {if (wtr != null) wtr.Rls();}
}
private void Flush() {
Flush_reg();
Flush_fil();
}
private void Flush_reg() {
reg_bfr
.Add_int_variable(fil_count++).Add_byte(Byte_ascii.Pipe)
.Add_bfr_and_preserve(reg_key_0).Add_byte(Byte_ascii.Pipe)
.Add_bfr_and_preserve(reg_key_n).Add_byte(Byte_ascii.Pipe)
.Add_int_variable(itm_count).Add_byte(Byte_ascii.Nl);
itm_count = 0;
reg_key_0.Clear();
}
private void Flush_fil() {
if (fil_wtr.Fil_idx() % 10 == 0)
usr_dlg.Prog_many("cmd_site", "prog", "saving: ~{0} ~{1}", reg_url.OwnerDir().NameOnly(), fil_wtr.Fil_url().NameOnly());
fil_wtr.Flush(usr_dlg);
}
}

View File

@@ -13,24 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.core.ios.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.bldrs.wkrs.*;
public class Xob_make_id_wkr extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk {
public Xob_make_id_wkr(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
public String Page_wkr__key() {return KEY;} public static final String KEY = "core.make_id";
public void Page_wkr__bgn() {
this.Init_dump(KEY, wiki.Tdb_fsys_mgr().Site_dir().GenSubDir(Xotdb_dir_info_.Name_id));
}
public void Page_wkr__run(Xowd_page_itm page) {
byte[] ttl = page.Ttl_page_db();
if (dump_bfr.Len() + row_fixed_len + ttl.length > dump_fil_len) Io_mgr.Instance.AppendFilBfr(dump_url_gen.Nxt_url(), dump_bfr);
Xotdb_page_itm_.Txt_id_save(dump_bfr, page);
}
public void Page_wkr__run_cleanup() {}
public void Page_wkr__end() {
this.Term_dump(new Xob_make_cmd_site(bldr.Usr_dlg(), make_dir, make_fil_len));
if (delete_temp) Io_mgr.Instance.DeleteDirDeep(temp_dir);
}
static final int row_fixed_len = 25 + 1 + 7; // 25=5 base_85 flds; 1=Redirect; 7=dlm
}

View File

@@ -13,22 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.core.ios.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.nss.*;
import gplx.xowa.bldrs.wkrs.*;
public class Xob_parse_dump_templates_cmd extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk {
public Xob_parse_dump_templates_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
public String Page_wkr__key() {return KEY;} public static final String KEY = "parse.dump_templates";
public static final int FixedLen_page = 1 + 5 + 1 + 5 + 1 + 1 + 1; // \tid|date|title|text\n
public void Page_wkr__bgn() {
Init_dump(KEY);
}
public void Page_wkr__run(Xowd_page_itm page) {
if (page.Ns_id() != Xow_ns_.Tid__template) return;
int id = page.Id(); byte[] title = page.Ttl_page_db(), text = page.Text(); int title_len = title.length, text_len = text.length;
if (FixedLen_page + title_len + text_len + dump_bfr.Len() > dump_fil_len) super.Flush_dump();
Xotdb_page_itm_.Txt_page_save(dump_bfr, id, page.Modified_on(), title, text, true);
}
public void Page_wkr__run_cleanup() {}
public void Page_wkr__end() {super.Flush_dump();}
}

View File

@@ -13,35 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.xmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.xowa.wikis.nss.*;
public class Xob_siteinfo_nde {
public Xob_siteinfo_nde(String site_name, String db_name, byte[] main_page, String generator, String case_dflt, Xow_ns_mgr ns_mgr) {
this.site_name = site_name;
this.db_name = db_name;
this.main_page = main_page;
this.generator = generator;
this.case_dflt = case_dflt;
this.ns_mgr = ns_mgr;
}
public String Site_name() {return site_name;} private final String site_name;
public String Db_name() {return db_name;} private final String db_name;
public byte[] Main_page() {return main_page;} private final byte[] main_page;
public String Generator() {return generator;} private final String generator;
public String Case_dflt() {return case_dflt;} private final String case_dflt;
public Xow_ns_mgr Ns_mgr() {return ns_mgr;} private final Xow_ns_mgr ns_mgr;
public void To_bfr(Bry_bfr bfr) {
bfr.Add (main_page).Add_byte_pipe();
bfr.Add_str_u8(case_dflt).Add_byte_pipe();
bfr.Add_str_u8(site_name).Add_byte_pipe();
bfr.Add_str_u8(db_name).Add_byte_pipe();
bfr.Add_str_u8(generator).Add_byte_nl();
int len = ns_mgr.Count();
for (int i = 0; i < len; ++i) {
Xow_ns ns = ns_mgr.Ords_get_at(i);
bfr.Add_int_variable(ns.Id()).Add_byte_pipe();
bfr.Add_str_u8(Xow_ns_case_.To_str(ns.Case_match())).Add_byte_pipe();
bfr.Add(ns.Name_ui()).Add_byte_nl();
}
}
}

View File

@@ -13,67 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.xmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.core.ios.*; import gplx.core.ios.streams.*; import gplx.langs.xmls.*; // NOTE: gplx.langs.xmls does not support Android; DATE:2013-01-17
import gplx.xowa.wikis.nss.*;
public class Xob_siteinfo_parser_ {
public static byte[] Extract(Io_stream_rdr src_rdr) {
Io_buffer_rdr rdr = Io_buffer_rdr.Null;
try {
rdr = Io_buffer_rdr.new_(src_rdr, Io_mgr.Len_mb); // ASSUME: siteInfo is fully contained in the 1st MB of the src_xml
byte[] src = rdr.Bfr();
int bgn = Bry_find_.Find_fwd(src, Bry_siteinfo_bgn, 0) ; if (bgn == Bry_find_.Not_found) throw Err_.new_("Xob_siteinfo_parser_", "could not find <siteinfo>", "src", src);
int end = Bry_find_.Move_fwd(src, Bry_siteinfo_end, bgn); if (end == Bry_find_.Not_found) throw Err_.new_("Xob_siteinfo_parser_", "could not find </siteinfo>", "src", src);
return Bry_.Mid(src, bgn, end);
}
finally {rdr.Rls();}
}
public static void Parse(byte[] siteinfo_bry, Xowe_wiki wiki) {
Xob_siteinfo_nde nde = Parse(String_.new_u8(siteinfo_bry), wiki.Ns_mgr());
wiki.Props().Bldr_version_(Bry_.new_a7(Xoa_app_.Version));
wiki.Props().Main_page_(nde.Main_page());
wiki.Props().Siteinfo_mainpage_(nde.Main_page());
Bry_bfr bfr = Bry_bfr_.New().Add_str_u8(nde.Site_name()).Add_byte_pipe().Add_str_u8(nde.Generator()).Add_byte_pipe().Add_str_u8(nde.Case_dflt()).Add_byte_pipe();
wiki.Props().Siteinfo_misc_(bfr.To_bry_and_clear());
}
public static Xob_siteinfo_nde Parse(String xdoc_src, Xow_ns_mgr ns_mgr) {
XmlDoc xdoc = XmlDoc_.parse(xdoc_src); XmlNde root = xdoc.Root();
String site_name = "", db_name = "", generator = "", case_dflt = Xow_ns_case_.Key__1st; byte[] main_page = Xoa_page_.Main_page_bry;
int root_len = root.SubNdes().Count();
for (int i = 0; i < root_len; ++i) {
XmlNde sub_nde = root.SubNdes().Get_at(i); String sub_name = sub_nde.Name();
if (String_.Eq(sub_name, "sitename")) site_name = sub_nde.Text_inner();
else if (String_.Eq(sub_name, "generator")) generator = sub_nde.Text_inner();
else if (String_.Eq(sub_name, "case")) case_dflt = sub_nde.Text_inner();
else if (String_.Eq(sub_name, "dbname")) db_name = sub_nde.Text_inner();
else if (String_.Eq(sub_name, "base")) main_page = Parse_base(Bry_.new_u8(sub_nde.Text_inner()));
else if (String_.Eq(sub_name, "namespaces")) Parse_namespaces(sub_nde, ns_mgr, case_dflt);
else if (String_.Eq(sub_name, "#text")) {} // JAVA.XML.#text: ignore unexpected #text nodes
}
return new Xob_siteinfo_nde(site_name, db_name, main_page, generator, case_dflt, ns_mgr);
}
private static byte[] Parse_base(byte[] url) {
int page_bgn = Bry_find_.Find_fwd(url, gplx.xowa.htmls.hrefs.Xoh_href_.Bry__wiki, 0);
if (page_bgn == Bry_find_.Not_found) { // "/wiki/" not found; EX: "http://mywiki/My_main_page"
page_bgn = Bry_find_.Find_bwd(url, Byte_ascii.Slash); // ASSUME last segment is page
if (page_bgn == Bry_find_.Not_found) throw Err_.new_("Xob_siteinfo_parser_", "could not parse main page url", "url", url);
++page_bgn; // add 1 to position after slash
}
else // "/wiki/" found
page_bgn += gplx.xowa.htmls.hrefs.Xoh_href_.Len__wiki; // position bgn after "/wiki/"
return Bry_.Mid(url, page_bgn, url.length); // extract everything after "page_bgn"; EX: "http://en.wikipedia.org/wiki/Main_Page" -> "Main_Page"
}
private static void Parse_namespaces(XmlNde grp_nde, Xow_ns_mgr ns_mgr, String case_dflt) {
ns_mgr.Clear(); // NOTE: wipe out any preexisting ns; use siteinfo.xml as definitive list
int grp_len = grp_nde.SubNdes().Count();
for (int i = 0; i < grp_len; ++i) {
XmlNde itm_nde = grp_nde.SubNdes().Get_at(i); if (itm_nde.Atrs().Count() == 0) continue; // JAVA.XML.#text: ignore unexpected #text nodes
String ns_id = itm_nde.Atrs().FetchValOr("key", null); if (ns_id == null) throw Err_.new_("Xob_siteinfo_parser_", "missing key for ns", "ns_xml", itm_nde.Text_inner());
String case_match = itm_nde.Atrs().FetchValOr("case", case_dflt); // NOTE: some dumps can omit "case"; EX: https://dumps.wikimedia.org/sep11wiki; DATE:2015-11-01
String name = itm_nde.Text_inner();
ns_mgr.Add_new(Int_.Parse(ns_id), Bry_.new_u8(name), Xow_ns_case_.To_tid(case_match), false);
}
ns_mgr.Init_w_defaults();
}
private static final byte[] Bry_siteinfo_bgn = Bry_.new_a7("<siteinfo>"), Bry_siteinfo_end = Bry_.new_a7("</siteinfo>");
}

View File

@@ -13,96 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.xmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import org.junit.*; import gplx.xowa.wikis.nss.*;
public class Xob_siteinfo_parser__tst {
private final Xob_siteinfo_parser__fxt fxt = new Xob_siteinfo_parser__fxt();
@Test public void Basic__simplewikt() { // PURPOSE: basic test of siteinfo parse; DATE:2015-11-01
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( " <siteinfo>"
, " <sitename>Wiktionary</sitename>"
, " <dbname>simplewiktionary</dbname>"
, " <super>https://simple.wiktionary.org/wiki/Main_Page</super>"
, " <generator>MediaWiki 1.27.0-wmf.3</generator>"
, " <case>case-sensitive</case>"
, " <namespaces>"
, " <namespace key=\"-2\" case=\"case-sensitive\">Media</namespace>"
, " <namespace key=\"-1\" case=\"first-letter\">Special</namespace>"
, " <namespace key=\"0\" case=\"case-sensitive\" />"
, " <namespace key=\"1\" case=\"case-sensitive\">Talk</namespace>"
, " <namespace key=\"2\" case=\"first-letter\">User</namespace>"
, " <namespace key=\"3\" case=\"first-letter\">User talk</namespace>"
, " <namespace key=\"4\" case=\"case-sensitive\">Wiktionary</namespace>"
, " <namespace key=\"5\" case=\"case-sensitive\">Wiktionary talk</namespace>"
, " <namespace key=\"6\" case=\"case-sensitive\">File</namespace>"
, " <namespace key=\"7\" case=\"case-sensitive\">File talk</namespace>"
, " <namespace key=\"8\" case=\"first-letter\">MediaWiki</namespace>"
, " <namespace key=\"9\" case=\"first-letter\">MediaWiki talk</namespace>"
, " <namespace key=\"10\" case=\"case-sensitive\">Template</namespace>"
, " <namespace key=\"11\" case=\"case-sensitive\">Template talk</namespace>"
, " <namespace key=\"12\" case=\"case-sensitive\">Help</namespace>"
, " <namespace key=\"13\" case=\"case-sensitive\">Help talk</namespace>"
, " <namespace key=\"14\" case=\"case-sensitive\">Category</namespace>"
, " <namespace key=\"15\" case=\"case-sensitive\">Category talk</namespace>"
, " <namespace key=\"828\" case=\"case-sensitive\">Module</namespace>"
, " <namespace key=\"829\" case=\"case-sensitive\">Module talk</namespace>"
, " <namespace key=\"2300\" case=\"case-sensitive\">Gadget</namespace>"
, " <namespace key=\"2301\" case=\"case-sensitive\">Gadget talk</namespace>"
, " <namespace key=\"2302\" case=\"case-sensitive\">Gadget definition</namespace>"
, " <namespace key=\"2303\" case=\"case-sensitive\">Gadget definition talk</namespace>"
, " <namespace key=\"2600\" case=\"first-letter\">Topic</namespace>"
, " </namespaces>"
, " </siteinfo>"
), String_.Concat_lines_nl
( "Main_Page|case-sensitive|Wiktionary|simplewiktionary|MediaWiki 1.27.0-wmf.3"
, "-2|case-sensitive|Media"
, "-1|first-letter|Special"
, "0|case-sensitive|"
, "1|case-sensitive|Talk"
, "2|first-letter|User"
, "3|first-letter|User talk"
, "4|case-sensitive|Wiktionary"
, "5|case-sensitive|Wiktionary talk"
, "6|case-sensitive|File"
, "7|case-sensitive|File talk"
, "8|first-letter|MediaWiki"
, "9|first-letter|MediaWiki talk"
, "10|case-sensitive|Template"
, "11|case-sensitive|Template talk"
, "12|case-sensitive|Help"
, "13|case-sensitive|Help talk"
, "14|case-sensitive|Category"
, "15|case-sensitive|Category talk"
, "828|case-sensitive|Module"
, "829|case-sensitive|Module talk"
, "2300|case-sensitive|Gadget"
, "2301|case-sensitive|Gadget talk"
, "2302|case-sensitive|Gadget definition"
, "2303|case-sensitive|Gadget definition talk"
, "2600|first-letter|Topic"
, "2601|first-letter|2601" // NOTE: Topic_talk doesn't exist in <siteinfo>, but added by XOWA b/c every subj ns must have a talk ns
));
}
@Test public void Case_dflt() { // PURPOSE: missing case should use dflt DATE:2015-11-01
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( " <siteinfo>"
, " <case>case-sensitive</case>"
, " <namespaces>"
, " <namespace key=\"-2\">Media</namespace>"
, " </namespaces>"
, " </siteinfo>"
), String_.Concat_lines_nl
( "Main_Page|case-sensitive|||"
, "-2|case-sensitive|Media"
));
}
}
class Xob_siteinfo_parser__fxt {
private final Xow_ns_mgr ns_mgr = new Xow_ns_mgr(gplx.xowa.langs.cases.Xol_case_mgr_.U8());
private final Bry_bfr bfr = Bry_bfr_.New();
public void Test__parse(String src_str, String expd) {
Xob_siteinfo_nde nde = Xob_siteinfo_parser_.Parse(src_str, ns_mgr);
nde.To_bfr(bfr);
Tfds.Eq_str_lines(expd, bfr.To_str_and_clear());
}
}

View File

@@ -13,26 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.xowa.bldrs.wkrs.*;
import gplx.gfui.*; import gplx.gfui.kits.core.*;
public class Xob_alert_cmd extends Xob_cmd__base implements Xob_cmd {
public Xob_alert_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
public Xob_alert_cmd Msg_(String v) {this.msg = v; return this;} private String msg = "no message specified";
@Override public void Cmd_run() {
Gfui_kit kit = app.Gui_mgr().Kit();
if (kit.Tid() != Gfui_kit_.Swt_tid) return;
kit.Ask_ok("", "", msg);
Xoa_app_.Usr_dlg().Prog_many("", "", msg);
}
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk__text_)) this.msg = m.ReadStr("v");
else return Gfo_invk_.Rv_unhandled;
return this;
} private static final String Invk__text_ = "text_";
public static final String BLDR_CMD_KEY = "ui.alert";
@Override public String Cmd_key() {return BLDR_CMD_KEY;}
public static final Xob_cmd Prototype = new Xob_alert_cmd(null, null);
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xob_alert_cmd(bldr, wiki);}
}

View File

@@ -13,119 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.core.criterias.*;
import gplx.xowa.bldrs.wkrs.*;
public class Xob_cleanup_cmd extends Xob_itm_basic_base implements Xob_cmd {
private String bz2_cmd;
private boolean delete_all, delete_tmp;
private Criteria_ioMatch[] delete_by_match_ary;
public Xob_cleanup_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
public String Cmd_key() {return Xob_cmd_keys.Key_util_cleanup;}
public Xob_cleanup_cmd Delete_sqlite3_(boolean v){delete_sqlite3 = v; return this;} private boolean delete_sqlite3;
public Xob_cleanup_cmd Delete_xml_(boolean v) {delete_xml = v; return this;} private boolean delete_xml;
public Xob_cleanup_cmd Delete_tdb_(boolean v) {delete_tdb = v; return this;} private boolean delete_tdb;
public void Bz2_fil_(Io_url v) {bz2_fil = v;} private Io_url bz2_fil;
public void Cmd_run() {
Io_url wiki_root_dir = wiki.Fsys_mgr().Root_dir();
if (bz2_fil != null) {
if (String_.Eq(bz2_cmd, "delete"))
Io_mgr.Instance.DeleteFil(bz2_fil);
else if (String_.Eq(bz2_cmd, "move"))
Io_mgr.Instance.MoveFil(bz2_fil, bz2_fil.OwnerDir().OwnerDir().GenSubFil_nest("done", bz2_fil.NameAndExt()));
}
if (delete_xml) Io_mgr.Instance.DeleteFil(Xob_page_wkr_cmd.Find_fil_by(wiki_root_dir, "*.xml"));
if (delete_tdb) {
usr_dlg.Note_many("", "", "bldr.wiki:deleting tdb wiki");
Delete_tdb(wiki_root_dir);
}
if (delete_sqlite3)
Delete_wiki_sql(wiki);
if (delete_all) {
Io_mgr.Instance.DeleteDir_cmd(wiki_root_dir).Exec(); // do not delete subdirs; needed to support "/prv" for fsdb; DATE:2015-04-01
Io_mgr.Instance.DeleteDirDeep(app.Usere().Fsys_mgr().Wiki_root_dir().GenSubDir(wiki.Domain_str())); // delete css dir; DATE:2015-07-06
}
if (delete_by_match_ary != null)
Delete_by_match(wiki_root_dir, delete_by_match_ary);
if (delete_tmp)
Io_mgr.Instance.DeleteDirDeep(wiki_root_dir.GenSubDir("tmp"));
}
public void Cmd_init(Xob_bldr bldr) {}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_end() {}
public void Cmd_term() {}
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_bz2_cmd_)) bz2_cmd = m.ReadStr("v");
else if (ctx.Match(k, Invk_delete_xml_)) delete_xml = m.ReadYn("v");
else if (ctx.Match(k, Invk_delete_wiki_)) delete_tdb = m.ReadYn("v");
else if (ctx.Match(k, Invk_delete_sqlite3_)) delete_sqlite3 = m.ReadYn("v");
else if (ctx.Match(k, Invk_delete_all_)) delete_all = m.ReadYn("v");
else if (ctx.Match(k, Invk_bz2_fil_)) bz2_fil = m.ReadIoUrl("v");
else if (ctx.Match(k, Invk_delete_by_match_)) delete_by_match_ary = Delete_by_match_parse(m.ReadStr("v"));
else if (ctx.Match(k, Invk_delete_tmp_)) delete_tmp = m.ReadYn("v");
else return super.Invk(ctx, ikey, k, m);
return this;
}
private static final String Invk_bz2_cmd_ = "bz2_cmd_", Invk_bz2_fil_ = "bz2_fil_"
, Invk_delete_xml_ = "delete_xml_", Invk_delete_wiki_ = "delete_wiki_", Invk_delete_sqlite3_ = "delete_sqlite3_"
, Invk_delete_all_ = "delete_all_"
, Invk_delete_tmp_ = "delete_tmp_"
, Invk_delete_by_match_ = "delete_by_match"
;
private static Criteria_ioMatch[] Delete_by_match_parse(String raw) {
String[] match_ary = String_.Split(raw, '|');
int match_ary_len = match_ary.length;
Criteria_ioMatch[] rv = new Criteria_ioMatch[match_ary_len];
for (int i = 0; i < rv.length; i++) {
String match = match_ary[i];
rv[i] = Criteria_ioMatch.parse(true, match, false);
}
return rv;
}
private static void Delete_by_match(Io_url dir, Criteria_ioMatch[] match_ary) {
int match_len = match_ary.length;
Io_url[] subs = Io_mgr.Instance.QueryDir_fils(dir);
int subs_len = subs.length;
for (int i = 0; i < subs_len; i++) {
Io_url sub = subs[i];
for (int j = 0; j < match_len; j++) {
Criteria_ioMatch match = match_ary[j];
if (match.Matches(sub)) {
if (sub.Type_fil())
Io_mgr.Instance.DeleteFil(sub);
}
}
}
}
private static void Delete_tdb(Io_url wiki_root_dir) {
Io_url[] dirs = Io_mgr.Instance.QueryDir_args(wiki_root_dir).DirOnly_().DirInclude_().ExecAsUrlAry();
int dirs_len = dirs.length;
for (int i = 0; i < dirs_len; i++) {
Io_url dir = dirs[i];
if (gplx.xowa.wikis.tdbs.Xotdb_dir_info_.Dir_name_is_tdb(dir.NameOnly()))
Io_mgr.Instance.DeleteDirDeep(dir);
}
}
public static void Delete_wiki_sql(Xowe_wiki wiki) {
Gfo_usr_dlg usr_dlg = wiki.Appe().Usr_dlg(); Io_url wiki_root_dir = wiki.Fsys_mgr().Root_dir();
if (wiki.Db_mgr().Tid() == gplx.xowa.wikis.dbs.Xodb_mgr_sql.Tid_sql) // NOTE: must check; if empty dir (or text db) than db_mgr will be txt
wiki.Db_mgr_as_sql().Core_data_mgr().Rls(); // NOTE: if sqlite files, must rls;
Io_url[] files = Io_mgr.Instance.QueryDir_fils(wiki_root_dir);
int files_len = files.length;
int deleted = 0;
String file_prefix = wiki.Domain_str() + "-file"; // NOTE: skip anything with "-file"; EX: "en.wikipedia.org-file.xowa"
String html_prefix = wiki.Domain_str() + "-html"; // NOTE: skip anything with "-html"; EX: "en.wikipedia.org-html-ns.000-db.002.xowa"
for (int i = 0; i < files_len; i++) {
Io_url url = files[i];
if ( !String_.Eq(url.Ext(), ".xowa")
&& !String_.Eq(url.Ext(), ".sqlite3"))
continue;
if ( String_.Has_at_bgn(url.NameAndExt(), file_prefix)
|| String_.Has_at_bgn(url.NameAndExt(), html_prefix)
) continue; // skip
Io_mgr.Instance.DeleteFil(url);
deleted++;
}
usr_dlg.Note_many("", "delete_wiki", "deleting sqlite3 files: ~{0} ~{1}", deleted, wiki_root_dir.Raw());
}
}

View File

@@ -13,31 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.core.brys.fmtrs.*;
import gplx.xowa.wikis.domains.*;
import gplx.xowa.bldrs.wms.dumps.*;
public class Xob_core_batch_utl implements Gfo_invk {
private final Xob_bldr bldr;
private final Bry_fmtr fmtr = Bry_fmtr.keys_("bz2_fil", "wiki_key");
public Xob_core_batch_utl(Xob_bldr bldr, byte[] raw) {this.bldr = bldr; fmtr.Fmt_(raw);}
private void Run() {
Io_url[] bz2_fils = Io_mgr.Instance.QueryDir_fils(bldr.App().Fsys_mgr().Wiki_dir().GenSubDir_nest(Dir_dump, "todo"));
Bry_bfr bfr = Bry_bfr_.Reset(Io_mgr.Len_kb);
int bz2_fils_len = bz2_fils.length;
for (int i = 0; i < bz2_fils_len; i++) {
Io_url bz2_fil_url = bz2_fils[i];
Xowm_dump_file dump_file = Xowm_dump_file_.parse(Bry_.new_u8(bz2_fil_url.NameOnly()));
String domain_str = dump_file.Domain_itm().Domain_str();
fmtr.Bld_bfr_many(bfr, bz2_fil_url.Raw(), domain_str);
bldr.Usr_dlg().Note_many("", "", "starting script for ~{0}", domain_str);
bldr.App().Gfs_mgr().Run_str(bfr.To_str_and_clear());
}
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_owner)) return bldr.Cmd_mgr();
else if (ctx.Match(k, Invk_run)) Run();
return this;
} private static final String Invk_owner = "owner", Invk_run = "run";
public static String Dir_dump = "#dump";
}

View File

@@ -13,41 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.core.ios.*; import gplx.core.threads.*; import gplx.core.envs.*;
import gplx.xowa.bldrs.wkrs.*;
public class Xob_decompress_bz2_cmd extends Xob_itm_basic_base implements Xob_cmd {
public Xob_decompress_bz2_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
public String Cmd_key() {return Xob_cmd_keys.Key_decompress_bz2;}
public void Cmd_init(Xob_bldr bldr) {}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_run() {
if (Io_mgr.Instance.ExistsFil(trg)) return; // file already exists; don't decompress again
usr_dlg.Note_many(GRP_KEY, "bgn", "decompressing ~{0}", src.Raw(), trg.Raw());
Decompress(bldr.App(), src.Raw(), trg);
}
public void Cmd_end() {}
public void Cmd_term() {}
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_src_)) this.Src_(m.ReadIoUrl("v"));
else return super.Invk(ctx, ikey, k, m);
return this;
} private static final String Invk_src_ = "src_";
private void Src_(Io_url v) {
src = v;
trg = bldr.App().Fsys_mgr().Wiki_dir().GenSubFil_nest(wiki.Domain_str(), v.NameOnly()); // NOTE: NameOnly() will take "enwiki.xml.bz2" and make it "enwiki.xml"
} Io_url src, trg;
static final String GRP_KEY = "xowa.bldr.cmd.decompress_bz2";
public static boolean Decompress(Xoae_app app, String src_fil, Io_url trg_fil) {
Io_mgr.Instance.CreateDirIfAbsent(trg_fil.OwnerDir()); // 7zip will fail if dir does not exist
Process_adp decompress = app.Prog_mgr().App_decompress_bz2();
decompress.Prog_dlg_(app.Usr_dlg()).Run_mode_(Process_adp.Run_mode_async);
decompress.Run(src_fil, trg_fil, trg_fil.OwnerDir().Xto_api());
while (decompress.Exit_code() == Process_adp.Exit_init) {
String size = gplx.core.ios.Io_size_.To_str(Io_mgr.Instance.QueryFil(trg_fil).Size());
app.Usr_dlg().Prog_many(GRP_KEY, "decompress", "decompressing: ~{0}", size);
Thread_adp_.Sleep(1000);
}
return true;
}
}

View File

@@ -13,35 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.dbs.*; import gplx.core.ios.*; import gplx.core.envs.*;
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.wms.dumps.*;
public class Xob_delete_cmd extends Xob_cmd__base implements Xob_cmd {
private String[] patterns_ary = String_.Ary_empty;
public Xob_delete_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
public Xob_delete_cmd Patterns_ary_(String... v) {this.patterns_ary = v; return this;}
@Override public String Cmd_key() {return Xob_cmd_keys.Key_util_delete;}
@Override public void Cmd_run() {
int len = patterns_ary.length; if (len == 0) return;
// build filter EX: '*.xml|*.txt'
Bry_bfr bfr = Bry_bfr_.New();
for (int i = 0; i < len; ++i) {
String pattern = patterns_ary[i];
if (i != 0) bfr.Add_byte_pipe();
bfr.Add_str_u8(pattern);
}
// get files; iterate and delete
String file_pattern = bfr.To_str_and_clear();
Io_url[] files = Io_mgr.Instance.QueryDir_args(wiki.Fsys_mgr().Root_dir()).Recur_(Bool_.N).FilPath_(file_pattern).ExecAsUrlAry();
int files_len = files.length;
for (int i = 0; i < files_len; ++i) {
Io_url file = files[i];
if (file.Ext() == ".sqlite3")
Db_conn_bldr.Instance.Get_or_noop(file).Rls_conn();
Io_mgr.Instance.DeleteFil(file);
}
}
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {return Gfo_invk_.Noop;}
}

View File

@@ -13,73 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.dbs.*; import gplx.core.ios.*; import gplx.core.envs.*;
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.wms.dumps.*;
public class Xob_download_cmd extends Xob_cmd__base implements Xob_cmd {
private String dump_date = "latest", dump_type = null, dump_src = null;
private Io_url dump_trg_zip = null, dump_trg_bin = null;
private boolean unzip = true;
public Xob_download_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
public Xob_download_cmd Dump_type_(String v) {dump_type = v; return this;}
@Override public String Cmd_key() {return Xob_cmd_keys.Key_util_download;}
@Override public void Cmd_run() {
// init vars; if no explicit values, calc defaults;
if (dump_type == null) throw Err_.new_("bldr", "dump_type must be specified");
if (!gplx.core.ios.IoEngine_system.Web_access_enabled) return;
Xowm_dump_file dump_file = new Xowm_dump_file(wiki.Domain_str(), dump_date, dump_type);
if (dump_src == null) {
dump_file.Server_url_(gplx.xowa.bldrs.installs.Xoi_dump_mgr.Server_urls(app)[0]);
dump_src = dump_file.File_url();
}
if (dump_trg_zip == null)
dump_trg_zip = wiki.Fsys_mgr().Root_dir().GenSubFil(dump_file.File_name());
if (dump_trg_bin == null && unzip)
dump_trg_bin = dump_trg_zip.GenNewNameAndExt(dump_trg_zip.NameOnly()); // convert a.sql.gz -> a.sql
// download
usr_dlg.Note_many("", "", "downloading file: now=~{0} src=~{1} trg=~{2}", Datetime_now.Get().XtoStr_fmt_yyyyMMdd_HHmmss(), dump_src, dump_trg_zip.OwnerDir());
IoEngine_xrg_downloadFil download_wkr = app.Wmf_mgr().Download_wkr().Download_xrg();
download_wkr.Src_last_modified_query_(false).Init(dump_src, dump_trg_zip);
if (!download_wkr.Exec())
usr_dlg.Warn_many("", "", "download failed: src=~{0} trg=~{1} err=~{2}", dump_src, dump_trg_zip.Raw(), Err_.Message_gplx_full(download_wkr.Rslt_err()));
if (unzip) { // parsing unzipped file is faster, but takes up more storage space
usr_dlg.Note_many("", "", "unzipping file: now=~{0} trg=~{1}", Datetime_now.Get().XtoStr_fmt_yyyyMMdd_HHmmss(), dump_trg_bin.Raw());
Xob_unzip_wkr unzip_wkr = new Xob_unzip_wkr().Init(app).Process_run_mode_(Process_adp.Run_mode_sync_block);
unzip_wkr.Decompress(dump_trg_zip, dump_trg_bin);
}
}
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (String_.Eq(k, Invk_dump_date_)) dump_date = m.ReadStr("v");
else if (String_.Eq(k, Invk_dump_type_)) dump_type = m.ReadStr("v");
else if (String_.Eq(k, Invk_dump_src_)) dump_src = m.ReadStr("v");
else if (String_.Eq(k, Invk_dump_trg_zip_)) dump_trg_zip = m.ReadIoUrl("v");
else if (String_.Eq(k, Invk_dump_trg_bin_)) dump_trg_bin = m.ReadIoUrl("v");
else if (String_.Eq(k, Invk_unzip_)) unzip = m.ReadYn("v");
else return Gfo_invk_.Rv_unhandled;
return this;
}
private static final String
Invk_dump_date_ = "dump_date_", Invk_dump_type_ = "dump_type_", Invk_unzip_ = "unzip_"
, Invk_dump_src_ = "dump_src_", Invk_dump_trg_zip_ = "dump_trg_zip_", Invk_dump_trg_bin_ = "dump_trg_bin_";
public static void Add_if_not_found_many(Xob_bldr bldr, Xowe_wiki wiki, String... dump_types) {
IoItmHash itm_hash = Io_mgr.Instance.QueryDir_args(wiki.Fsys_mgr().Root_dir()).ExecAsItmHash();
for (String dump_type : dump_types)
Add_if_not_found(bldr, wiki, itm_hash, dump_type);
}
private static void Add_if_not_found(Xob_bldr bldr, Xowe_wiki wiki, IoItmHash itm_hash, String dump_type) {
if (!Found(itm_hash, dump_type))
bldr.Cmd_mgr().Add(new Xob_download_cmd(bldr, wiki).Dump_type_(dump_type));
}
private static boolean Found(IoItmHash hash, String dump_type) {
String match = String_.Format("{0}.sql", dump_type); // EX: "page_props.sql"
int len = hash.Count();
for (int i = 0; i < len; i++) {
IoItm_base fil = (IoItm_base)hash.Get_at(i);
if (String_.Has(fil.Url().NameAndExt(), match))
return true;
}
return false;
}
}

View File

@@ -13,31 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.dbs.*;
public class Xob_exec_sql_cmd implements Xob_cmd {
private Xowe_wiki wiki; private int file_idx = -1; private String sql;
public Xob_exec_sql_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.wiki = wiki;}
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
public String Cmd_key() {return Xob_cmd_keys.Key_exec_sql;}
public void Cmd_run() {
Xoae_app app = wiki.Appe();
wiki.Init_assert(); // force load; needed to pick up MediaWiki ns for MediaWiki:mainpage
Xodb_mgr_sql db_mgr = wiki.Db_mgr_as_sql();
Xow_db_mgr fsys_mgr = db_mgr.Core_data_mgr();
Xow_db_file file = fsys_mgr.Dbs__get_by_id_or_fail(file_idx);
app.Usr_dlg().Plog_many("", "", "exec_sql: running sql; file_idx=~{0} sql=~{1}", file_idx, sql);
file.Conn().Exec_sql(sql);
}
public void Cmd_init(Xob_bldr bldr) {}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_end() {}
public void Cmd_term() {}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_file_idx_)) file_idx = m.ReadInt("v");
else if (ctx.Match(k, Invk_sql_)) sql = m.ReadStr("v");
return this;
}
private static final String Invk_file_idx_ = "file_idx_", Invk_sql_ = "sql_";
}

View File

@@ -13,79 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.core.net.*;
import gplx.dbs.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.wms.*; import gplx.xowa.bldrs.wms.sites.*;
import gplx.xowa.wikis.domains.*; import gplx.xowa.apps.site_cfgs.*;
public class Xob_site_meta_cmd implements Xob_cmd {
private final Xob_bldr bldr;
private String[] wikis; private Io_url db_url; private DateAdp cutoff_time;
public Xob_site_meta_cmd(Xob_bldr bldr, Xow_wiki wiki) {this.bldr = bldr;}
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
public String Cmd_key() {return Xob_cmd_keys.Key_site_meta;}
public void Cmd_run() {
Xoa_app app = bldr.App();
if (wikis == null) wikis = Xow_domain_regy.All;
if (db_url == null) db_url = app.Fsys_mgr().Cfg_site_meta_fil();
if (cutoff_time == null) cutoff_time = Datetime_now.Get().Add_day(-1);
Load_all(app, db_url, wikis, cutoff_time);
}
private void Load_all(Xoa_app app, Io_url db_url, String[] reqd_ary, DateAdp cutoff) {
Site_json_parser site_parser = new Site_json_parser(app.Utl__json_parser());
Gfo_usr_dlg usr_dlg = app.Usr_dlg();
Gfo_inet_conn inet_conn = app.Utl__inet_conn();
Ordered_hash reqd_hash = Ordered_hash_.New();
int reqd_len = reqd_ary.length;
for (int i = 0; i < reqd_len; ++i)
reqd_hash.Add_as_key_and_val(reqd_ary[i]);
Site_core_db json_db = new Site_core_db(db_url);
Site_core_itm[] actl_ary = json_db.Tbl__core().Select_all_downloaded(cutoff);
int actl_len = actl_ary.length;
for (int i = 0; i < actl_len; ++i) { // remove items that have been completed after cutoff date
Site_core_itm actl_itm = actl_ary[i];
reqd_hash.Del(String_.new_u8(actl_itm.Site_domain()));
}
reqd_len = reqd_hash.Count();
for (int i = 0; i < reqd_len; ++i) {
String domain_str = (String)reqd_hash.Get_at(i);
DateAdp json_date = Datetime_now.Get();
byte[] json_text = null;
for (int j = 0; j < 5; ++j) {
json_text = gplx.xowa.bldrs.wms.Xowm_api_mgr.Call_by_qarg(usr_dlg, inet_conn, domain_str, Xoa_site_cfg_loader__inet.Qarg__all);
if (json_text == null)
gplx.core.threads.Thread_adp_.Sleep(1000);
else
break;
}
byte[] domain_bry = Bry_.new_u8(domain_str);
byte[] site_abrv = Xow_abrv_xo_.To_bry(domain_bry);
json_db.Tbl__core().Insert(site_abrv, domain_bry, Bool_.N, json_date, json_text);
}
reqd_len = reqd_ary.length;
for (int i = 0; i < reqd_len; ++i) {
String domain_str = reqd_ary[i];
byte[] site_abrv = Xow_abrv_xo_.To_bry(Bry_.new_u8(domain_str));
Site_core_itm core_itm = json_db.Tbl__core().Select_itm(site_abrv);
if (core_itm.Json_completed()) continue;
Site_meta_itm meta_itm = new Site_meta_itm();
site_parser.Parse_root(meta_itm, String_.new_u8(core_itm.Site_domain()), core_itm.Json_text());
json_db.Save(meta_itm, site_abrv);
}
}
public void Cmd_init(Xob_bldr bldr) {}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_end() {}
public void Cmd_term() {}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_db_url_)) this.db_url = m.ReadIoUrl("v");
else if (ctx.Match(k, Invk_wikis_)) this.wikis = m.ReadStrAry("v", "\n");
else if (ctx.Match(k, Invk_cutoff_time_)) this.cutoff_time = m.ReadDate("v");
else return Gfo_invk_.Rv_unhandled;
return this;
}
private static String Invk_db_url_ = "db_url_", Invk_wikis_ = "wikis_", Invk_cutoff_time_ = "cutoff_time_";
}

View File

@@ -13,28 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.core.envs.*;
public class Xob_unzip_wkr {
private Process_adp decompress_bz2, decompress_zip, decompress_gz, process;
public int Process_exit_code() {return process.Exit_code();}
public byte Process_run_mode() {return process_run_mode;} public Xob_unzip_wkr Process_run_mode_(byte v) {process_run_mode = v; return this;} private byte process_run_mode = Process_adp.Run_mode_async;
public Xob_unzip_wkr Init(Xoae_app app) {return Init(app.Prog_mgr().App_decompress_bz2(), app.Prog_mgr().App_decompress_zip(), app.Prog_mgr().App_decompress_gz());}
public Xob_unzip_wkr Init(Process_adp decompress_bz2, Process_adp decompress_zip, Process_adp decompress_gz) {
this.decompress_bz2 = decompress_bz2;
this.decompress_zip = decompress_zip;
this.decompress_gz = decompress_gz;
return this;
}
public void Decompress(Io_url src, Io_url trg) {
String src_ext = src.Ext();
if (String_.Eq(src_ext, gplx.core.ios.streams.Io_stream_tid_.Ext__bz2)) process = decompress_bz2;
else if (String_.Eq(src_ext, gplx.core.ios.streams.Io_stream_tid_.Ext__zip)) process = decompress_zip;
else if (String_.Eq(src_ext, gplx.core.ios.streams.Io_stream_tid_.Ext__gz)) process = decompress_gz;
else throw Err_.new_unhandled(src_ext);
Io_url trg_owner_dir = trg.OwnerDir();
Io_mgr.Instance.CreateDirIfAbsent(trg_owner_dir);
process.Run_mode_(process_run_mode);
process.Run(src, trg, trg_owner_dir.Xto_api());
}
}

View File

@@ -13,52 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.xmls.*;
import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.data.tbls.*;
public class Xob_xml_dumper_cmd implements Xob_cmd {
private final Xowe_wiki wiki; private final Gfo_usr_dlg usr_dlg;
private final Xob_xml_dumper xml_dumper = new Xob_xml_dumper(); private int commit_interval = 1000;
private Io_url dump_url;
public Xob_xml_dumper_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.wiki = wiki; this.usr_dlg = wiki.Appe().Usr_dlg();}
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
public String Cmd_key() {return Xob_cmd_keys.Key_util_xml_dump;}
public void Cmd_init(Xob_bldr bldr) {
dump_url = wiki.Fsys_mgr().Root_dir().GenSubFil(wiki.Domain_str() + "-dump.xml");
Io_mgr.Instance.DeleteFil(dump_url);
}
public void Cmd_run() {
usr_dlg.Plog_many("", "", Cmd_key() + ":bgn;");
String wiki_abrv = "";
String main_page = String_.Format("https://{0}/wiki/{1}", wiki.Domain_str(), String_.new_u8(wiki.Props().Main_page()));
String ns_case = "first-letter"; // TODO_OLD:
xml_dumper.Write_root_bgn(wiki.Ns_mgr(), wiki.Domain_itm(), wiki_abrv, main_page, ns_case, "XOWA " + Xoa_app_.Version);
Xodb_page_rdr page_rdr = wiki.Db_mgr().Load_mgr().Get_page_rdr(wiki);
Xowd_page_itm page = new Xowd_page_itm();
int page_count = 0;
try {
while (page_rdr.Move_next()) {
page_rdr.Read(page);
page.Ttl_(wiki.Ttl_parse(page.Ns_id(), page.Ttl_page_db()));
xml_dumper.Write_page(page);
if ((++page_count % commit_interval) == 0) Commit();
}
}
catch (Exception e) {throw Err_.new_exc(e, "xo", "xml_dumper failed");}
finally {page_rdr.Rls();}
xml_dumper.Write_root_end();
this.Commit();
usr_dlg.Plog_many("", "", Cmd_key() + ":end;");
}
private void Commit() {
Io_mgr.Instance.AppendFilStr(dump_url, xml_dumper.Bld_str());
}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_end() {}
public void Cmd_term() {}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_commit_interval_)) commit_interval = m.ReadInt("v");
else return Gfo_invk_.Rv_unhandled;
return this;
} private static final String Invk_commit_interval_ = "commit_interval_";
}

View File

@@ -13,283 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.brys.fmtrs.*; import gplx.core.ios.*; import gplx.core.envs.*;
import gplx.xowa.htmls.*; import gplx.langs.htmls.encoders.*;
import gplx.xowa.wikis.nss.*;
import gplx.xowa.wikis.*; import gplx.xowa.wikis.domains.*; import gplx.xowa.wikis.data.*;
import gplx.xowa.files.downloads.*;
import gplx.core.net.*;
import gplx.xowa.addons.wikis.htmls.css.bldrs.*; import gplx.xowa.addons.wikis.htmls.css.mgrs.*;
import gplx.xowa.wikis.data.fetchers.*;
public class Xoa_css_extractor {
private Io_url home_css_dir;
public IoEngine_xrg_downloadFil Download_xrg() {return download_xrg;} private IoEngine_xrg_downloadFil download_xrg = Io_mgr.Instance.DownloadFil_args("", Io_url_.Empty);
public Xoa_css_extractor Wiki_domain_(byte[] v) {wiki_domain = v; return this;} private byte[] wiki_domain;
public Xoa_css_extractor Usr_dlg_(Gfo_usr_dlg v) {usr_dlg = v; return this;} private Gfo_usr_dlg usr_dlg;
public Xoa_css_extractor Failover_dir_(Io_url v) {failover_dir = v; return this;} private Io_url failover_dir;
public Xoa_css_extractor Wiki_html_dir_(Io_url v) {wiki_html_dir = v; return this;} private Io_url wiki_html_dir;
public Xoa_css_extractor Mainpage_url_(String v) {mainpage_url = v; return this;} private String mainpage_url;
public Xoa_css_extractor Protocol_prefix_(String v) {protocol_prefix = v; return this;} private String protocol_prefix = "https:";// NOTE: changed from http to https; DATE:2015-02-17
public Xoa_css_extractor Page_fetcher_(Xow_page_fetcher v) {page_fetcher = v; return this;} private Xow_page_fetcher page_fetcher;
public Xoa_css_extractor Css_img_downloader_(Xoa_css_img_downloader v) {this.css_img_downloader = v; return this;} private Xoa_css_img_downloader css_img_downloader;
public Xoa_css_extractor Opt_download_css_common_(boolean v) {opt_download_css_common = v; return this;} private boolean opt_download_css_common;
public Xoa_css_extractor Url_encoder_(Gfo_url_encoder v) {url_encoder = v; return this;} private Gfo_url_encoder url_encoder;
public Xoa_css_extractor Wiki_code_(byte[] v) {this.wiki_code = v; return this;} private byte[] wiki_code = null;
private byte[] mainpage_html; private boolean lang_is_ltr = true;
private final Gfo_url_parser url_parser = new Gfo_url_parser();
public void Init_by_app(Xoae_app app) {
this.usr_dlg = app.Usr_dlg();
this.home_css_dir = app.Usere().Fsys_mgr().Wiki_html_dir("home").GenSubDir("html");
Xof_download_wkr download_wkr = app.Wmf_mgr().Download_wkr();
this.download_xrg = download_wkr.Download_xrg();
css_img_downloader = new Xoa_css_img_downloader().Ctor(usr_dlg, download_wkr, Bry_.new_u8(protocol_prefix));
failover_dir = app.Fsys_mgr().Bin_xowa_dir().GenSubDir_nest("html", "css", "failover");
url_encoder = gplx.langs.htmls.encoders.Gfo_url_encoder_.Http_url;
}
public void Install(Xow_wiki wiki, String css_key) {
try {
this.wiki_html_dir = wiki.App().Fsys_mgr().Wiki_css_dir(wiki.Domain_str()); // EX: /xowa/user/anonymous/wiki/en.wikipedia.org
Io_url css_comm_fil = wiki_html_dir.GenSubFil(Css_common_name);
Io_url css_wiki_fil = wiki_html_dir.GenSubFil(Css_wiki_name);
wiki.Html__wtr_mgr().Init_css_urls(wiki.App(), wiki.Domain_str(), css_comm_fil, css_wiki_fil);
if (wiki.Domain_tid() == Xow_domain_tid_.Tid__home || Env_.Mode_testing()) return; // NOTE: do not download if home_wiki; also needed for TEST
if (Io_mgr.Instance.ExistsFil(css_wiki_fil)) return; // css file exists; nothing to generate
if (wiki.Html__css_installing()) return;
wiki.Html__css_installing_(true);
wiki.App().Usr_dlg().Log_many("", "", "generating css for '~{0}'", wiki.Domain_str());
if (css_key != null) {
if (Install_by_db(wiki, wiki_html_dir, css_key)) return;
}
if (wiki.Type_is_edit())
this.Install_by_wmf((Xowe_wiki)wiki, wiki_html_dir);
wiki.Html__css_installing_(false);
}
catch (Exception e) { // if error, failover; paranoia catch for outliers like bad network connectivity fail, or MediaWiki: message not existing; DATE:2013-11-21
wiki.App().Usr_dlg().Warn_many("", "", "failed to get css; failing over; wiki='~{0}' err=~{1}", wiki.Domain_str(), Err_.Message_gplx_full(e));
Css_common_failover(); // only failover xowa_common.css; xowa_wiki.css comes from MediaWiki:Common.css / Vector.css
wiki.Html__css_installing_(false);
}
}
private void Install_by_wmf(Xowe_wiki wiki, Io_url wiki_html_dir) {
opt_download_css_common = wiki.Appe().Cfg().Get_bool_app_or("xowa.bldr.import.download_xowa_common", true); // CFG: Cfg__
// do not download css if web_access disabled or wiki is other; DATE:2017-02-25
boolean wiki_is_other = wiki.Domain_tid() == Xow_domain_tid_.Tid__other;
if ( !gplx.core.ios.IoEngine_system.Web_access_enabled
|| wiki_is_other)
opt_download_css_common = false; // if !web_access_enabled, don't download
this.wiki_domain = wiki.Domain_bry();
mainpage_url = "https://" + wiki.Domain_str(); // NOTE: cannot reuse protocol_prefix b/c "//" needs to be added manually; protocol_prefix is used for logo and images which have form of "//domain/image.png"; changed to https; DATE:2015-02-17
if (page_fetcher == null) page_fetcher = new Xow_page_fetcher_wiki();
page_fetcher.Wiki_(wiki);
this.wiki_html_dir = wiki_html_dir;
this.lang_is_ltr = wiki.Lang().Dir_ltr();
this.wiki_code = wiki.Domain_abrv();
// get mainpage; do not download css if wiki is other; DATE:2017-02-25
mainpage_html = wiki_is_other ? Bry_.Empty : Mainpage_download_html();
// generate css
Css_common_setup();
Css_wiki_setup();
Logo_setup();
}
private boolean Install_by_db(Xow_wiki wiki, Io_url wiki_html_dir, String css_key) {
Xow_db_mgr core_db_mgr = wiki.Data__core_mgr();
if ( core_db_mgr == null
|| core_db_mgr.Props() == null
|| core_db_mgr.Props().Schema_is_1()
|| !core_db_mgr.Tbl__cfg().Select_yn_or(Xowd_cfg_key_.Grp__wiki_schema, Xow_db_file_schema_props.Key__tbl_css_core, Bool_.N)
) {
Xoa_app_.Usr_dlg().Warn_many("", "", "css.db not found; wiki=~{0} css_dir=~{1}", wiki.Domain_str(), wiki_html_dir.Raw());
return false;
}
Xow_db_file core_db = core_db_mgr.Db__core();
return Xowd_css_core_mgr.Get(core_db.Tbl__css_core(), core_db.Tbl__css_file(), wiki_html_dir, css_key);
}
public void Css_common_setup() {
if (opt_download_css_common)
Css_common_download();
else
Css_common_failover();
}
private void Css_common_failover() {
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_common_name);
if (home_css_dir != null) // TEST:
Io_mgr.Instance.CopyDirDeep(home_css_dir, trg_fil.OwnerDir()); // NOTE: copy dir first b/c xowa_commons.css will be replaced below
Io_mgr.Instance.CopyFil(Css_common_failover_url(), trg_fil, true);
}
private void Css_common_download() {
boolean css_stylesheet_common_missing = true;
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_common_name);
css_stylesheet_common_missing = !Css_scrape_setup();
if (css_stylesheet_common_missing)
Io_mgr.Instance.CopyFil(Css_common_failover_url(), trg_fil, true);
else
css_img_downloader.Chk(wiki_domain, trg_fil);
}
private Io_url Css_common_failover_url() {
Io_url css_commons_url = failover_dir.GenSubDir("xowa_common_override").GenSubFil_ary("xowa_common_", String_.new_u8(wiki_code), ".css");
if (Io_mgr.Instance.ExistsFil(css_commons_url)) return css_commons_url; // specific css exists for wiki; use it; EX: xowa_common_wiki_mediawikiwiki.css
return failover_dir.GenSubFil(lang_is_ltr ? Css_common_name_ltr : Css_common_name_rtl);
}
public void Css_wiki_setup() {
boolean css_stylesheet_wiki_missing = true;
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_wiki_name);
if (Io_mgr.Instance.ExistsFil(trg_fil)) return; // don't download if already there
css_stylesheet_wiki_missing = !Css_wiki_generate(trg_fil);
if (css_stylesheet_wiki_missing)
Failover(trg_fil);
else
css_img_downloader.Chk(wiki_domain, trg_fil);
}
private boolean Css_wiki_generate(Io_url trg_fil) {
Bry_bfr bfr = Bry_bfr_.New();
Css_wiki_generate_section(bfr, Ttl_common_css);
Css_wiki_generate_section(bfr, Ttl_vector_css);
byte[] bry = bfr.To_bry_and_clear();
bry = Bry_.Replace(bry, gplx.xowa.bldrs.xmls.Xob_xml_parser_.Bry_tab_ent, gplx.xowa.bldrs.xmls.Xob_xml_parser_.Bry_tab);
Io_mgr.Instance.SaveFilBry(trg_fil, bry);
return true;
} private static final byte[] Ttl_common_css = Bry_.new_a7("Common.css"), Ttl_vector_css = Bry_.new_a7("Vector.css");
private boolean Css_wiki_generate_section(Bry_bfr bfr, byte[] ttl) {
byte[] page = page_fetcher.Get_by(Xow_ns_.Tid__mediawiki, ttl);
if (page == null) return false;
if (bfr.Len() != 0) bfr.Add_byte_nl().Add_byte_nl(); // add "\n\n" between sections; !=0 checks against first
Css_wiki_section_hdr.Bld_bfr_many(bfr, ttl); // add "/*XOWA:MediaWiki:Common.css*/\n"
bfr.Add(page); // add page
return true;
} private static final Bry_fmtr Css_wiki_section_hdr = Bry_fmtr.new_("/*XOWA:MediaWiki:~{ttl}*/\n", "ttl");
public void Logo_setup() {
boolean logo_missing = true;
Io_url logo_url = wiki_html_dir.GenSubFil("logo.png");
if (Io_mgr.Instance.ExistsFil(logo_url)) return; // don't download if already there
logo_missing = !Logo_download(logo_url);
if (logo_missing)
Failover(logo_url);
}
private boolean Logo_download(Io_url trg_fil) {
String src_fil = Logo_find_src();
if (src_fil == null) {
if (Logo_copy_from_css(trg_fil)) return true;
usr_dlg.Warn_many("", "", "failed to extract logo: trg_fil=~{0};", trg_fil.Raw());
return false;
}
String log_msg = usr_dlg.Prog_many("", "", "downloading logo: '~{0}'", src_fil);
boolean rv = download_xrg.Prog_fmt_hdr_(log_msg).Src_(src_fil).Trg_(trg_fil).Exec();
if (!rv)
usr_dlg.Warn_many("", "", "failed to download logo: src_url=~{0};", src_fil);
return rv;
}
private boolean Logo_copy_from_css(Io_url trg_fil) {
Io_url commons_file = wiki_html_dir.GenSubFil(Css_common_name);
byte[] commons_src = Io_mgr.Instance.LoadFilBry(commons_file);
int bgn_pos = Bry_find_.Find_fwd(commons_src, Bry_mw_wiki_logo); if (bgn_pos == Bry_find_.Not_found) return false;
bgn_pos += Bry_mw_wiki_logo.length;
int end_pos = Bry_find_.Find_fwd(commons_src, Byte_ascii.Quote, bgn_pos + 1); if (end_pos == Bry_find_.Not_found) return false;
byte[] src_bry = Bry_.Mid(commons_src, bgn_pos, end_pos);
src_bry = Xob_url_fixer.Fix(wiki_domain, src_bry, src_bry.length);
if (wiki_html_dir.Info().DirSpr_byte() == Byte_ascii.Backslash)
src_bry = Bry_.Replace(src_bry, Byte_ascii.Slash, Byte_ascii.Backslash);
Io_url src_fil = wiki_html_dir.GenSubFil(String_.new_u8(src_bry));
Io_mgr.Instance.CopyFil(src_fil, trg_fil, true);
return true;
} private static final byte[] Bry_mw_wiki_logo = Bry_.new_a7(".mw-wiki-logo{background-image:url(\"");
private String Logo_find_src() {
if (mainpage_html == null) return null;
int main_page_html_len = mainpage_html.length;
int logo_bgn = Bry_find_.Find_fwd(mainpage_html, Logo_find_bgn, 0); if (logo_bgn == Bry_find_.Not_found) return null;
logo_bgn += Logo_find_bgn.length;
logo_bgn = Bry_find_.Find_fwd(mainpage_html, Logo_find_end, logo_bgn); if (logo_bgn == Bry_find_.Not_found) return null;
logo_bgn += Logo_find_end.length;
int logo_end = Bry_find_.Find_fwd(mainpage_html, Byte_ascii.Paren_end, logo_bgn, main_page_html_len); if (logo_bgn == Bry_find_.Not_found) return null;
byte[] logo_bry = Bry_.Mid(mainpage_html, logo_bgn, logo_end);
return protocol_prefix + String_.new_u8(logo_bry);
}
private static final byte[] Logo_find_bgn = Bry_.new_a7("<div id=\"p-logo\""), Logo_find_end = Bry_.new_a7("background-image: url(");
public boolean Mainpage_download() {
mainpage_html = Mainpage_download_html();
return mainpage_html != null;
}
private byte[] Mainpage_download_html() {
String main_page_url_temp = mainpage_url;
if (Bry_.Eq(wiki_domain, Xow_domain_itm_.Bry__wikidata)) // if wikidata, download css for a Q* page; Main_Page has less css; DATE:2014-09-30
main_page_url_temp = main_page_url_temp + "/wiki/Q2";
String log_msg = usr_dlg.Prog_many("", "main_page.download", "downloading main page for '~{0}'", main_page_url_temp);
byte[] main_page_html = download_xrg.Prog_fmt_hdr_(log_msg).Exec_as_bry(main_page_url_temp);
if (main_page_html == null) usr_dlg.Warn_many("", "", "failed to download main_page: src_url=~{0};", main_page_url_temp);
return main_page_html;
}
private void Failover(Io_url trg_fil) {
usr_dlg.Note_many("", "", "copying failover file: trg_fil=~{0};", trg_fil.Raw());
Io_mgr.Instance.CopyFil(failover_dir.GenSubFil(trg_fil.NameAndExt()), trg_fil, true);
}
public boolean Css_scrape_setup() {
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_common_name);
// if (Io_mgr.Instance.ExistsFil(trg_fil)) return; // don't download if already there; DELETED: else main_page is not scraped for all stylesheet links; simple.d: fails; DATE:2014-02-11
byte[] css_url = Css_scrape();
if (css_url == null) {
Css_common_failover();
return false;
}
else {
Io_mgr.Instance.SaveFilBry(trg_fil, css_url);
css_img_downloader.Chk(wiki_domain, trg_fil);
return true;
}
}
private byte[] Css_scrape() {
if (mainpage_html == null) return null;
String[] css_urls = Css_scrape_urls(mainpage_html); if (css_urls.length == 0) return null;
return Css_scrape_download(css_urls);
}
private String[] Css_scrape_urls(byte[] raw) {
List_adp rv = List_adp_.New();
int raw_len = raw.length;
int prv_pos = 0;
int css_find_bgn_len = Css_find_bgn.length;
byte[] protocol_prefix_bry = Bry_.new_u8(protocol_prefix);
while (true) {
int url_bgn = Bry_find_.Find_fwd(raw, Css_find_bgn, prv_pos); if (url_bgn == Bry_find_.Not_found) break; // nothing left; stop
url_bgn += css_find_bgn_len;
int url_end = Bry_find_.Find_fwd(raw, Byte_ascii.Quote, url_bgn, raw_len); if (url_end == Bry_find_.Not_found) {usr_dlg.Warn_many("", "main_page.css_parse", "could not find css; pos='~{0}' text='~{1}'", url_bgn, String_.new_u8__by_len(raw, url_bgn, url_bgn + 32)); break;}
byte[] css_url_bry = Bry_.Mid(raw, url_bgn, url_end);
css_url_bry = Bry_.Replace(css_url_bry, Css_amp_find, Css_amp_repl); // &amp; -> &
css_url_bry = url_encoder.Decode(css_url_bry); // %2C -> %7C -> |
css_url_bry = Xoa_css_extractor.Url_root_fix(wiki_domain, css_url_bry);
Gfo_url gfo_url = url_parser.Parse(css_url_bry, 0, css_url_bry.length);
if ( gfo_url.Protocol_tid() == Gfo_protocol_itm.Tid_relative_1 // if rel url, add protocol_prefix DATE:2015-08-01
|| (Env_.Mode_testing() && gfo_url.Protocol_tid() == Gfo_protocol_itm.Tid_unknown)) // TEST:
css_url_bry = Bry_.Add(protocol_prefix_bry, css_url_bry);
rv.Add(String_.new_u8(css_url_bry));
prv_pos = url_end;
}
return rv.To_str_ary();
} private static final byte[] Css_find_bgn = Bry_.new_a7("<link rel=\"stylesheet\" href=\""), Css_amp_find = Bry_.new_a7("&amp;"), Css_amp_repl = Bry_.new_a7("&");
private byte[] Css_scrape_download(String[] css_urls) {
int css_urls_len = css_urls.length;
Bry_bfr tmp_bfr = Bry_bfr_.New();
for (int i = 0; i < css_urls_len; i++) {
String css_url = css_urls[i];
usr_dlg.Prog_many("", "main_page.css_download", "downloading css for '~{0}'", css_url);
download_xrg.Prog_fmt_hdr_(css_url);
byte[] css_bry = download_xrg.Exec_as_bry(css_url); if (css_bry == null) continue; // css not found; continue
tmp_bfr.Add(Xoa_css_img_downloader.Bry_comment_bgn).Add_str_u8(css_url).Add(Xoa_css_img_downloader.Bry_comment_end).Add_byte_nl();
tmp_bfr.Add(css_bry).Add_byte_nl().Add_byte_nl();
}
return tmp_bfr.To_bry_and_clear();
}
private static byte[] Url_root_fix(byte[] domain, byte[] url) {// DATE:2015-09-20
if (url.length < 3) return url; // need at least 2 chars
if ( url[0] == Byte_ascii.Slash // starts with "/" EX: "/w/api.php"
&& url[1] != Byte_ascii.Slash // but not "//"; EX: "//en.wikipedia.org"
)
return Bry_.Add(gplx.xowa.htmls.hrefs.Xoh_href_.Bry__https, domain, url);
else
return url;
}
public static final String Css_common_name = "xowa_common.css", Css_wiki_name = "xowa_wiki.css"
, Css_common_name_ltr = "xowa_common_ltr.css", Css_common_name_rtl = "xowa_common_rtl.css";
}

View File

@@ -13,65 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*; import gplx.core.ios.*; import gplx.langs.htmls.encoders.*; import gplx.xowa.wikis.data.*; import gplx.xowa.files.downloads.*;
import gplx.xowa.wikis.data.fetchers.*;
public class Xoa_css_extractor_basic_tst {
@Before public void init() {fxt.Clear();} private Xoa_css_extractor_fxt fxt = new Xoa_css_extractor_fxt();
@Test public void Logo_download() {
fxt.Init_fil("mem/http/en.wikipedia.org" , Xoa_css_extractor_fxt.Main_page_html);
fxt.Init_fil("mem/http/wiki.png" , "download");
fxt.Exec_logo_setup();
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/logo.png", "download");
}
@Test public void Logo_download_mw_wiki_logo() {
fxt.Init_fil("mem/http/en.wikipedia.org" , "");
fxt.Init_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/a/wiki.png" , "download");
fxt.Init_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css" , ".mw-wiki-logo{background-image:url(\"//a/wiki.png\");");
fxt.Exec_logo_setup();
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/logo.png" , "download");
}
@Test public void Logo_failover() {
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/logo.png" , "failover");
fxt.Exec_logo_setup();
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/logo.png", "failover");
}
@Test public void Css_common_download_failover() {
fxt.Css_installer().Opt_download_css_common_(true);
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_ltr.css", "failover");
fxt.Exec_css_common_setup();
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
}
@Test public void Css_common_copy() {
fxt.Css_installer().Opt_download_css_common_(false);
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_ltr.css", "failover");
fxt.Exec_css_common_setup();
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
}
@Test public void Css_common_copy_specific_wiki() { // PURPOSE: css for specific wiki
fxt.Css_installer().Opt_download_css_common_(false).Wiki_code_(Bry_.new_a7("enwiki"));
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_override/xowa_common_enwiki.css", "failover");
fxt.Exec_css_common_setup();
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
}
@Test public void Css_scrape_download() {
fxt.Css_installer().Url_encoder_(Gfo_url_encoder_.Http_url);
fxt.Init_fil("mem/http/en.wikipedia.org" , Xoa_css_extractor_fxt.Main_page_html);
fxt.Init_fil("mem/http/en.wikipedia.org/common.css" , "download");
fxt.Init_fil("mem/http/www/a&0|b,c" , "data=css_0");
fxt.Init_fil("mem/http/www/a&1|b,c" , "data=css_1");
fxt.Exec_css_mainpage_setup();
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", String_.Concat_lines_nl
( "/*XOWA:mem/http/www/a&0|b,c*/"
, "data=css_0"
, ""
, "/*XOWA:mem/http/www/a&1|b,c*/"
, "data=css_1"
));
}
@Test public void Css_scrape_failover() {
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_ltr.css", "failover");
fxt.Exec_css_mainpage_setup();
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
}
}

View File

@@ -13,59 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.ios.*;
import gplx.xowa.wikis.data.fetchers.*;
import gplx.xowa.files.downloads.*;
public class Xoa_css_extractor_fxt {
public void Clear() {
Io_mgr.Instance.InitEngine_mem();
Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Test();
css_installer = new Xoa_css_extractor();
css_installer.Download_xrg().Trg_engine_key_(IoEngine_.MemKey);
css_installer
.Usr_dlg_(usr_dlg)
.Wiki_domain_(Bry_.new_a7("en.wikipedia.org"))
.Protocol_prefix_("mem/http/")
.Mainpage_url_("mem/http/en.wikipedia.org")
.Failover_dir_(Io_url_.new_any_("mem/xowa/bin/any/html/xowa/import/")) // "mem/xowa/user/anonymous/wiki/home/html/"
.Wiki_html_dir_(Io_url_.new_any_("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/"))
;
page_fetcher = new Xow_page_fetcher_test();
css_installer.Page_fetcher_(page_fetcher);
Xoa_css_img_downloader css_img_downloader = new Xoa_css_img_downloader();
css_img_downloader.Ctor(usr_dlg, new Xof_download_wkr_test(), Bry_.new_a7("mem/http/"));
css_installer.Css_img_downloader_(css_img_downloader);
} private Xow_page_fetcher_test page_fetcher;
public Xoa_css_extractor Css_installer() {return css_installer;} private Xoa_css_extractor css_installer;
public void Init_page(int ns_id, String ttl, String text) {
page_fetcher.Add(ns_id, Bry_.new_a7(ttl), Bry_.new_a7(text));
}
public void Init_fil_empty(String url) {Init_fil(url, "");}
public void Init_fil(String url, String text) {Io_mgr.Instance.SaveFilStr(url, text);}
public void Test_fil(String url, String expd) {Tfds.Eq_str_lines(expd, Io_mgr.Instance.LoadFilStr(Io_url_.new_any_(url)));}
public void Exec_logo_setup() {
css_installer.Mainpage_download();
css_installer.Logo_setup();
}
public void Exec_css_common_setup() {
css_installer.Mainpage_download();
css_installer.Css_common_setup();
}
public void Exec_css_wiki_setup() {css_installer.Css_wiki_setup();}
public void Exec_css_mainpage_setup() {
css_installer.Mainpage_download();
css_installer.Css_scrape_setup();
}
public static String Main_page_html = String_.Concat_lines_nl
( "<html>"
, " <head>"
, " <link rel=\"stylesheet\" href=\"www/a&amp;0%7Cb%2Cc\" />"
, " <link rel=\"stylesheet\" href=\"www/a&amp;1%7Cb%2Cc\" />"
, " </head>"
, " <body>"
, " <div id=\"p-logo\" role=\"banner\"><a style=\"background-image: url(wiki.png);\""
, " </body>"
, "</html>"
);
}

View File

@@ -13,32 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*; import gplx.core.ios.*; import gplx.xowa.wikis.nss.*;
public class Xoa_css_extractor_wiki_tst {
@Before public void init() {fxt.Clear();} private Xoa_css_extractor_fxt fxt = new Xoa_css_extractor_fxt();
@Test public void Css_wiki_generate() {
fxt.Init_page(Xow_ns_.Tid__mediawiki, "Common.css" , "css_0");
fxt.Init_page(Xow_ns_.Tid__mediawiki, "Vector.css" , "css_1");
fxt.Exec_css_wiki_setup();
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_wiki.css", String_.Concat_lines_nl
( "/*XOWA:MediaWiki:Common.css*/"
, "css_0"
, ""
, "/*XOWA:MediaWiki:Vector.css*/"
, "css_1"
));
}
@Test public void Css_wiki_missing() {
fxt.Exec_css_wiki_setup();
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_wiki.css", "");
}
@Test public void Css_wiki_tab() { // PURPOSE: swap out &#09; for xdat files
fxt.Init_page(Xow_ns_.Tid__mediawiki, "Common.css" , "a&#09;b");
fxt.Exec_css_wiki_setup();
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_wiki.css", String_.Concat_lines_nl
( "/*XOWA:MediaWiki:Common.css*/"
, "a\tb"
));
}
}

View File

@@ -13,177 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.xowa.files.downloads.*; import gplx.core.envs.*;
public class Xoa_css_img_downloader {
private byte[] wiki_domain;
public Xoa_css_img_downloader Ctor(Gfo_usr_dlg usr_dlg, Xof_download_wkr download_wkr, byte[] stylesheet_prefix) {
this.usr_dlg = usr_dlg; this.download_wkr = download_wkr; this.stylesheet_prefix = stylesheet_prefix;
return this;
} private Gfo_usr_dlg usr_dlg; private Xof_download_wkr download_wkr;
public Xoa_css_img_downloader Stylesheet_prefix_(byte[] v) {stylesheet_prefix = v; return this;} private byte[] stylesheet_prefix; // TEST: setter exposed b/c tests can handle "mem/" but not "//mem"
public void Chk(byte[] wiki_domain, Io_url css_fil) {
this.wiki_domain = wiki_domain;
List_adp img_list = List_adp_.New();
byte[] old_bry = Io_mgr.Instance.LoadFilBry(css_fil);
byte[] rel_url_prefix = Bry_.Add(Bry_fwd_slashes, wiki_domain);
byte[] new_bry = Convert_to_local_urls(rel_url_prefix, old_bry, img_list);
Io_url img_dir = css_fil.OwnerDir();
Download_fils(img_dir, img_list.To_str_ary());
Io_mgr.Instance.SaveFilBry(css_fil, new_bry);
}
public byte[] Convert_to_local_urls(byte[] rel_url_prefix, byte[] src, List_adp list) {
try {
int src_len = src.length;
int prv_pos = 0;
Bry_bfr bfr = Bry_bfr_.New_w_size(src_len);
Hash_adp img_hash = Hash_adp_bry.cs();
while (true) {
int url_pos = Bry_find_.Find_fwd(src, Bry_url, prv_pos);
if (url_pos == Bry_find_.Not_found) {bfr.Add_mid(src, prv_pos, src_len); break;} // no more "url("; exit;
int bgn_pos = url_pos + Bry_url_len; // set bgn_pos after "url("
byte bgn_byte = src[bgn_pos];
byte end_byte = Byte_ascii.Null;
boolean quoted = true;
switch (bgn_byte) { // find end_byte
case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; end_byte is ' or "
end_byte = bgn_byte;
++bgn_pos;
break;
default: // not quoted; end byte is ")"
end_byte = Byte_ascii.Paren_end;
quoted = false;
break;
}
int end_pos = Bry_find_.Find_fwd(src, end_byte, bgn_pos, src_len);
if (end_pos == Bry_find_.Not_found) { // unclosed "url("; exit since nothing else will be found
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.end_missing", "could not find end_sequence for 'url(': bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25));
bfr.Add_mid(src, prv_pos, src_len);
break;
}
if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.empty", "'url(' is empty: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25));
bfr.Add_mid(src, prv_pos, bgn_pos);
prv_pos = bgn_pos;
continue;
}
byte[] img_raw = Bry_.Mid(src, bgn_pos, end_pos); int img_raw_len = img_raw.length;
if (Bry_.Has_at_bgn(img_raw, Bry_data_image, 0, img_raw_len)) { // base64
bfr.Add_mid(src, prv_pos, end_pos); // nothing to download; just add entire String
prv_pos = end_pos;
continue;
}
int import_url_end = Import_url_chk(rel_url_prefix, src, src_len, prv_pos, url_pos, img_raw, bfr); // check for embedded stylesheets via @import tag
if (import_url_end != Bry_find_.Not_found) {
prv_pos = import_url_end;
continue;
}
byte[] img_cleaned = Xob_url_fixer.Fix(wiki_domain, img_raw, img_raw_len);
if (img_cleaned == null) { // could not clean img
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.clean_failed", "could not extract valid http src: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8(img_raw));
bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue;
}
if (!img_hash.Has(img_cleaned)) {// only add unique items for download;
img_hash.Add_as_key_and_val(img_cleaned);
list.Add(String_.new_u8(img_cleaned));
}
img_cleaned = Replace_invalid_chars(Bry_.Copy(img_cleaned)); // NOTE: must call ByteAry.Copy else img_cleaned will change *inside* hash
bfr.Add_mid(src, prv_pos, bgn_pos);
if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
bfr.Add(img_cleaned);
if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
prv_pos = end_pos;
}
return bfr.To_bry_and_clear();
}
catch (Exception e) {
usr_dlg.Warn_many("", "", "failed to convert local_urls: ~{0} ~{1}", String_.new_u8(rel_url_prefix), Err_.Message_gplx_full(e));
return src;
}
}
public static byte[] Import_url_build(byte[] stylesheet_prefix, byte[] rel_url_prefix, byte[] css_url) {
return Bry_.Has_at_bgn(css_url, Bry_http_protocol) // css_url already starts with "http"; return self; PAGE:tr.n:Main_Page; DATE:2014-06-04
? css_url
: Bry_.Add(stylesheet_prefix, css_url)
;
}
private int Import_url_chk(byte[] rel_url_prefix, byte[] src, int src_len, int old_pos, int find_bgn, byte[] url_raw, Bry_bfr bfr) {
if (find_bgn < Bry_import_len) return Bry_find_.Not_found;
if (!Bry_.Match(src, find_bgn - Bry_import_len, find_bgn, Bry_import)) return Bry_find_.Not_found;
byte[] css_url = url_raw; int css_url_len = css_url.length;
if (css_url_len > 0 && css_url[0] == Byte_ascii.Slash) { // css_url starts with "/"; EX: "/page" or "//site/page" DATE:2014-02-03
if (css_url_len > 1 && css_url[1] != Byte_ascii.Slash) // skip if css_url starts with "//"; EX: "//site/page"
css_url = Bry_.Add(rel_url_prefix, css_url); // "/w/a.css" -> "//en.wikipedia.org/w/a.css"
}
css_url = Bry_.Replace(css_url, Byte_ascii.Space, Byte_ascii.Underline); // NOTE: must replace spaces with underlines else download will fail; EX:https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
byte[] css_src_bry = Import_url_build(stylesheet_prefix, rel_url_prefix, css_url);
String css_src_str = String_.new_u8(css_src_bry);
download_wkr.Download_xrg().Prog_fmt_hdr_(usr_dlg.Log_many(GRP_KEY, "logo.download", "downloading import for '~{0}'", css_src_str));
byte[] css_trg_bry = download_wkr.Download_xrg().Exec_as_bry(css_src_str);
if (css_trg_bry == null) {
usr_dlg.Warn_many("", "", "could not import css: url=~{0}", css_src_str);
return Bry_find_.Not_found; // css not found
}
bfr.Add_mid(src, old_pos, find_bgn - Bry_import_len).Add_byte_nl();
bfr.Add(Bry_comment_bgn).Add(css_url).Add(Bry_comment_end).Add_byte_nl();
if (Bry_find_.Find_fwd(css_url, Wikisource_dynimg_ttl) != -1) css_trg_bry = Bry_.Replace(css_trg_bry, Wikisource_dynimg_find, Wikisource_dynimg_repl); // FreedImg hack; PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06
bfr.Add(css_trg_bry).Add_byte_nl();
bfr.Add_byte_nl();
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, find_bgn + url_raw.length, src_len);
return semic_pos + Byte_ascii.Len_1;
}
private static final byte[]
Wikisource_dynimg_ttl = Bry_.new_a7("en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css")
, Wikisource_dynimg_find = Bry_.new_a7(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
, Wikisource_dynimg_repl = Bry_.new_a7(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], /*XOWA:handle file:// paths which will have /commons.wikimedia.org/ but not /wikipedia/ */ .freedImg img[src*=\"wikimedia\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
;
public byte[] Clean_img_url(byte[] raw, int raw_len) {
int pos_bgn = 0;
if (Bry_.Has_at_bgn(raw, Bry_fwd_slashes, 0, raw_len)) pos_bgn = Bry_fwd_slashes.length;
if (Bry_.Has_at_bgn(raw, Bry_http, 0, raw_len)) pos_bgn = Bry_http.length;
int pos_slash = Bry_find_.Find_fwd(raw, Byte_ascii.Slash, pos_bgn, raw_len);
if (pos_slash == Bry_find_.Not_found) return null; // first segment is site_name; at least one slash must be present for image name; EX: site.org/img_name.jpg
if (pos_slash == raw_len - 1) return null; // "site.org/" is invalid
int pos_end = raw_len;
int pos_question = Bry_find_.Find_bwd(raw, Byte_ascii.Question);
if (pos_question != Bry_find_.Not_found)
pos_end = pos_question; // remove query params; EX: img_name?key=val
return Bry_.Mid(raw, pos_bgn, pos_end);
}
private void Download_fils(Io_url css_dir, String[] ary) {
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
String src = ary[i];
Io_url trg = css_dir.GenSubFil_nest(Op_sys.Cur().Fsys_http_frag_to_url_str(Replace_invalid_chars_str(src)));
if (Io_mgr.Instance.ExistsFil(trg)) continue;
download_wkr.Download(true, "https://" + src, trg, "download: " + src); // ILN
if (Io_mgr.Instance.QueryFil(trg).Size() == 0) { // warn if 0 byte files downloaded; DATE:2015-07-06
Xoa_app_.Usr_dlg().Warn_many("", "", "css.download; 0 byte file downloaded; file=~{0}", trg.Raw());
}
}
}
String Replace_invalid_chars_str(String raw_str) {return String_.new_u8(Replace_invalid_chars(Bry_.new_u8(raw_str)));}
byte[] Replace_invalid_chars(byte[] raw_bry) {
int raw_len = raw_bry.length;
for (int i = 0; i < raw_len; i++) { // convert invalid wnt chars to underscores
byte b = raw_bry[i];
switch (b) {
//case Byte_ascii.Slash:
case Byte_ascii.Backslash: case Byte_ascii.Colon: case Byte_ascii.Star: case Byte_ascii.Question:
case Byte_ascii.Quote: case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Pipe:
raw_bry[i] = Byte_ascii.Underline;
break;
}
}
return raw_bry;
}
private static final byte[]
Bry_url = Bry_.new_a7("url("), Bry_data_image = Bry_.new_a7("data:image/")
, Bry_http = Bry_.new_a7("http://"), Bry_fwd_slashes = Bry_.new_a7("//"), Bry_import = Bry_.new_a7("@import ")
, Bry_http_protocol = Bry_.new_a7("http")
;
public static final byte[]
Bry_comment_bgn = Bry_.new_a7("/*XOWA:"), Bry_comment_end = Bry_.new_a7("*/");
private static final int Bry_url_len = Bry_url.length, Bry_import_len = Bry_import.length;
static final String GRP_KEY = "xowa.wikis.init.css";
}

View File

@@ -13,169 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*; import gplx.xowa.files.downloads.*;
public class Xoa_css_img_downloader_tst {
@Before public void init() {fxt.Clear();} private Xoa_css_img_downloader_fxt fxt = new Xoa_css_img_downloader_fxt();
@Test public void Basic() {
fxt.Test_css_convert
( "x {url(\"//site/a.jpg\")} y {url(\"//site/b.jpg\")}"
, "x {url(\"site/a.jpg\")} y {url(\"site/b.jpg\")}"
, "site/a.jpg"
, "site/b.jpg"
);
}
@Test public void Unquoted() {
fxt.Test_css_convert
( "x {url(//site/a.jpg)}"
, "x {url(\"site/a.jpg\")}"
, "site/a.jpg"
);
}
@Test public void Http() {
fxt.Test_css_convert
( "x {url(http://site/a.jpg)}"
, "x {url(\"site/a.jpg\")}"
, "site/a.jpg"
);
}
@Test public void Base64() {
fxt.Test_css_convert
( "x {url(\"//site/a.jpg\")} y {url(\"data:image/png;base64,BASE64DATA;ABC=\")} z {}"
, "x {url(\"site/a.jpg\")} y {url(\"data:image/png;base64,BASE64DATA;ABC=\")} z {}"
, "site/a.jpg"
);
}
@Test public void Exc_missing_quote() {
fxt.Test_css_convert
( "x {url(\"//site/a.jpg\")} y {url(\"//site/b.jpg} z {}"
, "x {url(\"site/a.jpg\")} y {url(\"//site/b.jpg} z {}"
, "site/a.jpg"
);
}
@Test public void Exc_empty() {
fxt.Test_css_convert
( "x {url(\"//site/a.jpg\")} y {url(\"\"} z {}"
, "x {url(\"site/a.jpg\")} y {url(\"\"} z {}"
, "site/a.jpg"
);
}
// @Test public void Exc_name_only() { // COMMENTED: not sure how to handle "b.jpg" (automatically add "current" path?); RESTORE: when example found
// fxt.Test_css_convert
// ( "x {url(\"//site/a.jpg\")} y {url(\"b.jpg\"} z {}"
// , "x {url(\"site/a.jpg\")} y {url(\"b.jpg\"} z {}"
// , "site/a.jpg"
// );
// }
@Test public void Repeat() {// PURPOSE.fix: exact same item was being added literally
fxt.Test_css_convert
( "x {url(\"//site/a.jpg?a=b\")} y {url(\"//site/a.jpg?a=b\"}"
, "x {url(\"site/a.jpg\")} y {url(\"site/a.jpg\"}"
, "site/a.jpg"
);
}
@Test public void Clean_basic() {fxt.Test_clean_img_url("//site/a.jpg" , "site/a.jpg");}
@Test public void Clean_query() {fxt.Test_clean_img_url("//site/a.jpg?key=val" , "site/a.jpg");}
@Test public void Clean_dir() {fxt.Test_clean_img_url("//site/a/b/c.jpg?key=val" , "site/a/b/c.jpg");}
@Test public void Clean_exc_site_only() {fxt.Test_clean_img_url("//site" , null);}
@Test public void Clean_exc_site_only_2() {fxt.Test_clean_img_url("//site/" , null);}
@Test public void Import_url() {
Io_mgr.Instance.InitEngine_mem();
Io_mgr.Instance.SaveFilStr("mem/www/b.css", "imported_css");
fxt.Test_css_convert
( "x @import url(\"mem/www/b.css\") screen; z"
, String_.Concat_lines_nl
( "x "
, "/*XOWA:mem/www/b.css*/"
, "imported_css"
, ""
, " z"
)
);
}
@Test public void Import_url_make() {
fxt.Test_import_url("a.org/b" , "http:a.org/b"); // add "stylesheet_prefix"
fxt.Test_import_url("http://a.org" , "http://a.org"); // unless it starts with http
fxt.Test_import_url("https://a.org" , "https://a.org"); // unless starts with https EX:: handle @import(https://...); PAGE:tr.n:Main_Page; DATE:2014-06-04
}
@Test public void Import_url_relative() { // PURPOSE: if directory, add domain; "/a/b.css" -> "//domain/a/b.css"; DATE:2014-02-03
Io_mgr.Instance.InitEngine_mem();
Io_mgr.Instance.SaveFilStr("mem/en.wikipedia.org/www/b.css", "imported_css");
fxt.Test_css_convert
( "x @import url(\"/www/b.css\") screen; z" // starts with "/"
, String_.Concat_lines_nl
( "x "
, "/*XOWA:mem/en.wikipedia.org/www/b.css*/"
, "imported_css"
, ""
, " z"
)
);
}
@Test public void Import_url_relative_skip() { // PURPOSE: if rel path, skip; "//site/a/b.css"; DATE:2014-02-03
fxt.Downloader().Stylesheet_prefix_(Bry_.new_a7("mem")); // stylesheet prefix prefix defaults to ""; set to "mem", else test will try to retrieve "//url" which will fail
Io_mgr.Instance.InitEngine_mem();
Io_mgr.Instance.SaveFilStr("mem//en.wikipedia.org/a/b.css", "imported_css");
fxt.Test_css_convert
( "x @import url(\"//en.wikipedia.org/a/b.css\") screen; z" // starts with "//"
, String_.Concat_lines_nl
( "x "
, "/*XOWA://en.wikipedia.org/a/b.css*/"
, "imported_css"
, ""
, " z"
)
);
}
@Test public void Import_url_space() { // PURPOSE: some css has spaces; replace with underlines else fails when downloaded; EX: https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
Io_mgr.Instance.InitEngine_mem();
Io_mgr.Instance.SaveFilStr("mem/www/b_c.css", "imported_css");
fxt.Test_css_convert
( "x @import url(\"mem/www/b c.css\") screen; z"
, String_.Concat_lines_nl
( "x "
, "/*XOWA:mem/www/b_c.css*/"
, "imported_css"
, ""
, " z"
)
);
}
@Test public void Wikisource_freedimg() { // PURPOSE: check that "wikimedia" is replaced for FreedImg hack; PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06
fxt.Downloader().Stylesheet_prefix_(Bry_.new_a7("mem")); // stylesheet prefix prefix defaults to ""; set to "mem", else test will try to retrieve "//url" which will fail
Io_mgr.Instance.InitEngine_mem();
Io_mgr.Instance.SaveFilStr("mem//en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css", ".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {");
fxt.Test_css_convert
( "x @import url(\"//en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css\") screen; z" // starts with "//"
, String_.Concat_lines_nl
( "x "
, "/*XOWA://en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css*/"
, ".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], /*XOWA:handle file:// paths which will have /commons.wikimedia.org/ but not /wikipedia/ */ .freedImg img[src*=\"wikimedia\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {"
, ""
, " z"
)
);
}
}
class Xoa_css_img_downloader_fxt {
public Xoa_css_img_downloader Downloader() {return downloader;} private Xoa_css_img_downloader downloader;
public void Clear() {
downloader = new Xoa_css_img_downloader();
downloader.Ctor(Gfo_usr_dlg_.Test(), new Xof_download_wkr_test(), Bry_.Empty);
}
public void Test_css_convert(String raw, String expd, String... expd_img_ary) {
List_adp actl_img_list = List_adp_.New();
byte[] actl_bry = downloader.Convert_to_local_urls(Bry_.new_a7("mem/en.wikipedia.org"), Bry_.new_u8(raw), actl_img_list);
Tfds.Eq_str_lines(expd, String_.new_u8(actl_bry));
Tfds.Eq_ary_str(expd_img_ary, actl_img_list.To_str_ary());
}
public void Test_clean_img_url(String raw_str, String expd) {
byte[] raw = Bry_.new_a7(raw_str);
byte[] actl = downloader.Clean_img_url(raw, raw.length);
Tfds.Eq(expd, actl == null ? null : String_.new_a7(actl));
}
public void Test_import_url(String raw, String expd) {
byte[] actl = Xoa_css_img_downloader.Import_url_build(Bry_.new_a7("http:"), Bry_.new_a7("//en.wikipedia.org"), Bry_.new_u8(raw));
Tfds.Eq(expd, String_.new_u8(actl));
}
}

View File

@@ -13,43 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.btries.*; import gplx.core.primitives.*;
class Xob_css_parser {
private final Bry_bfr bfr = Bry_bfr_.New_w_size(255);
private final Xob_mirror_mgr mgr;
private final Xob_css_parser__url url_parser; private final Xob_css_parser__import import_parser;
private final Btrie_rv trv = new Btrie_rv();
public Xob_css_parser(Xob_mirror_mgr mgr) {
this.mgr = mgr;
this.url_parser = new Xob_css_parser__url(mgr.Site_url());
this.import_parser = new Xob_css_parser__import(url_parser);
}
public void Parse(byte[] src) {
int src_len = src.length; int pos = 0;
while (pos < src_len) {
byte b = src[pos];
Object o = tkns_trie.Match_at_w_b0(trv, b, src, pos, src_len);
if (o == null) {
bfr.Add_byte(b);
++pos;
}
else {
byte tkn_tid = ((Byte_obj_val)o).Val();
int match_pos = trv.Pos();
Xob_css_tkn__base tkn = null;
switch (tkn_tid) {
case Tkn_url: tkn = url_parser.Parse(src, src_len, pos, match_pos); break;
case Tkn_import: tkn = import_parser.Parse(src, src_len, pos, match_pos); break;
}
tkn.Process(mgr);
pos = tkn.Write(bfr, src);
}
}
}
private static final byte Tkn_import = 1, Tkn_url = 2;
private static final Btrie_slim_mgr tkns_trie = Btrie_slim_mgr.ci_a7()
.Add_str_byte("@import" , Tkn_import)
.Add_str_byte(" url(" , Tkn_url)
;
}

View File

@@ -13,29 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.xowa.files.downloads.*;
class Xob_css_parser__import {
// // "//id.wikibooks.org/w/index.php?title=MediaWiki:Common.css&oldid=43393&action=raw&ctype=text/css";
private final Xob_css_parser__url url_parser;
public Xob_css_parser__import(Xob_css_parser__url url_parser) {this.url_parser = url_parser;}
public Xob_css_tkn__base Parse(byte[] src, int src_len, int tkn_bgn, int tkn_end) { // " @import"
int bgn_pos = Bry_find_.Find_fwd_while_ws(src, tkn_end, src_len); // skip any ws after " @import"
if (bgn_pos == src_len) return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.import:EOS after import; bgn=~{0}", tkn_bgn);
if (!Bry_.Has_at_bgn(src, Tkn_url_bry, bgn_pos, src_len)) return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.import:url missing; bgn=~{0}", tkn_bgn);
tkn_end = bgn_pos + Tkn_url_bry.length;
Xob_css_tkn__base frag = url_parser.Parse(src, src_len, bgn_pos, tkn_end);
if (frag.Tid() != Xob_css_tkn__url.Tid_url) return Xob_css_tkn__warn.new_(tkn_bgn, frag.Pos_end(), "mirror.parser.import:url invalid; bgn=~{0}", tkn_bgn);
Xob_css_tkn__url url_frag = (Xob_css_tkn__url)frag;
byte[] src_url = url_frag.Src_url();
src_url = Bry_.Replace(src_url, Byte_ascii.Space, Byte_ascii.Underline); // NOTE: must replace spaces with underlines else download will fail; EX:https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, frag.Pos_end(), src_len);
return Xob_css_tkn__import.new_(tkn_bgn, semic_pos + 1, src_url, url_frag.Trg_url(), url_frag.Quote_byte());
}
private static final byte[] Tkn_url_bry = Bry_.new_a7("url(");
public static final byte[]
Wikisource_dynimg_ttl = Bry_.new_a7("en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css")
, Wikisource_dynimg_find = Bry_.new_a7(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
, Wikisource_dynimg_repl = Bry_.new_a7(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], /*XOWA:handle file:// paths which will have /commons.wikimedia.org/ but not /wikipedia/ */ .freedImg img[src*=\"wikimedia\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
;
}

View File

@@ -13,24 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*;
public class Xob_css_parser__import_tst {
@Before public void init() {fxt.Clear();} private Xob_css_parser__import_fxt fxt = new Xob_css_parser__import_fxt();
@Test public void Basic() {fxt.Test_parse_import (" @import url(//site/a.png)" , " @import url('site/a.png')");}
@Test public void Warn_eos() {fxt.Test_parse_warn (" @import" , " @import" , "EOS");}
@Test public void Warn_missing() {fxt.Test_parse_warn (" @import ('//site/a.png')" , " @import" , "missing");} // no "url("
@Test public void Warn_invalid() {fxt.Test_parse_warn (" @import url('//site')" , " @import url('//site')" , "invalid");} // invalid
}
class Xob_css_parser__import_fxt extends Xob_css_parser__url_fxt { private Xob_css_parser__import import_parser;
@Override public void Clear() {
super.Clear();
this.import_parser = new Xob_css_parser__import(url_parser);
}
@Override protected void Exec_parse_hook() {
this.cur_frag = import_parser.Parse(src_bry, src_bry.length, 0, 8); // 8=" @import".length
}
public void Test_parse_import(String src_str, String expd) {
Exec_parse(src_str, Xob_css_tkn__base.Tid_import, expd);
}
}

View File

@@ -13,44 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
class Xob_css_parser__url {
private final byte[] site;
public Xob_css_parser__url(byte[] site) {this.site = site;}
public Xob_css_tkn__base Parse(byte[] src, int src_len, int tkn_bgn, int tkn_end) { // " url"
int bgn_pos = Bry_find_.Find_fwd_while_ws(src, tkn_end, src_len); // skip any ws after " url("
if (bgn_pos == src_len) return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:EOS; bgn=~{0}", tkn_bgn);
byte end_byte = src[bgn_pos]; // note that first non-ws byte should determine end_byte
byte quote_byte = end_byte;
switch (end_byte) {
case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; increment position; EX: ' url("a.png")'
++bgn_pos;
break;
default: // not quoted; end byte is ")"; EX: ' url(a.png)'
end_byte = Byte_ascii.Paren_end;
quote_byte = Byte_ascii.Null;
break;
}
int end_pos = Bry_find_.Find_fwd(src, end_byte, bgn_pos, src_len);
if (end_pos == Bry_find_.Not_found) // unclosed "url("; exit since nothing else will be found
return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:dangling; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8__by_len(src, tkn_bgn, tkn_bgn + 128));
if (end_pos - bgn_pos == 0) // empty; "url()"; ignore
return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:empty; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8__by_len(src, tkn_bgn, tkn_bgn + 128));
byte[] url_orig = Bry_.Mid(src, bgn_pos, end_pos); int url_orig_len = url_orig.length;
++end_pos; // increment end_pos so rv will be after it;
if ( end_byte != Byte_ascii.Paren_end) { // end_byte is apos / quote
if ( end_pos < src_len
&& src[end_pos] == Byte_ascii.Paren_end)
++end_pos;
else
return Xob_css_tkn__warn.new_(tkn_bgn, end_pos, "mirror.parser.url:base64 dangling; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8(url_orig));
}
if (Bry_.Has_at_bgn(url_orig, Bry_data_image)) // base64
return Xob_css_tkn__base64.new_(tkn_bgn, end_pos);
byte[] src_url = Xob_url_fixer.Fix(site, url_orig, url_orig_len);
if (src_url == null) // could not convert
return Xob_css_tkn__warn.new_(tkn_bgn, end_pos, "mirror.parser.url:invalid url; bgn=~{0} excerpt=~{1}", tkn_bgn, String_.new_u8(url_orig));
return Xob_css_tkn__url.new_(tkn_bgn, end_pos, src_url, quote_byte);
}
private static final byte[] Bry_data_image = Bry_.new_a7("data:image/");
}

View File

@@ -13,46 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*;
public class Xob_css_parser__url_tst {
@Before public void init() {fxt.Clear();} private Xob_css_parser__url_fxt fxt = new Xob_css_parser__url_fxt();
@Test public void Quote_none() {fxt.Test_parse_url(" url(//site/A.png) b" , " url('site/A.png')");}
@Test public void Quote_apos() {fxt.Test_parse_url(" url('//site/A.png') b" , " url('site/A.png')");}
@Test public void Quote_quote() {fxt.Test_parse_url(" url(\"//site/A.png\") b" , " url(\"site/A.png\")");}
@Test public void Base64() {fxt.Test_parse_base64(" url('data:image/png;base64,BASE64DATA;ABC=') b", " url('data:image/png;base64,BASE64DATA;ABC=')");}
@Test public void Base64_dangling() {fxt.Test_parse_warn(" url('data:image/png;base64,BASE64DATA;ABC=' ", " url('data:image/png;base64,BASE64DATA;ABC='", "base64 dangling");}
@Test public void Warn_eos() {fxt.Test_parse_warn(" url(" , " url(" , "EOS");}
@Test public void Warn_dangling() {fxt.Test_parse_warn(" url(a" , " url(" , "dangling");}
@Test public void Warn_empty() {fxt.Test_parse_warn(" url()" , " url(" , "empty");}
@Test public void Warn_site() {fxt.Test_parse_warn(" url('//site')" , " url('//site')" , "invalid");}
}
class Xob_css_parser__url_fxt {
protected Xob_css_parser__url url_parser; private final Bry_bfr bfr = Bry_bfr_.New_w_size(32);
protected Xob_css_tkn__base cur_frag; protected byte[] src_bry;
@gplx.Virtual public void Clear() {
url_parser = new Xob_css_parser__url(Bry_.new_a7("site"));
}
protected void Exec_parse(String src_str, int expd_tid, String expd_str) {
this.src_bry = Bry_.new_u8(src_str);
this.Exec_parse_hook();
cur_frag.Write(bfr, src_bry);
String actl_str = bfr.To_str_and_clear();
Tfds.Eq(expd_tid, cur_frag.Tid(), "wrong tid; expd={0}, actl={1}", expd_tid, cur_frag.Tid());
Tfds.Eq(expd_str, actl_str);
}
@gplx.Virtual protected void Exec_parse_hook() {
this.cur_frag = url_parser.Parse(src_bry, src_bry.length, 0, 5); // 5=" url(".length
}
public void Test_parse_url(String src_str, String expd) {
Exec_parse(src_str, Xob_css_tkn__base.Tid_url, expd);
}
public void Test_parse_base64(String src_str, String expd) {
Exec_parse(src_str, Xob_css_tkn__base.Tid_base64, expd);
}
public void Test_parse_warn(String src_str, String expd, String warn) {
Exec_parse(src_str, Xob_css_tkn__base.Tid_warn, expd);
Xob_css_tkn__warn sub_frag = (Xob_css_tkn__warn)cur_frag;
Tfds.Eq(true, String_.Has(sub_frag.Fail_msg(), warn));
}
}

View File

@@ -13,104 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.envs.*;
abstract class Xob_css_tkn__base {
public void Init(int tid, int pos_bgn, int pos_end) {
this.tid = tid; this.pos_bgn = pos_bgn; this.pos_end = pos_end;
}
public int Tid() {return tid;} protected int tid;
public int Pos_bgn() {return pos_bgn;} protected int pos_bgn;
public int Pos_end() {return pos_end;} protected int pos_end;
@gplx.Virtual public void Process(Xob_mirror_mgr mgr) {}
public abstract int Write(Bry_bfr bfr, byte[] src);
public static final int Tid_warn = 1, Tid_base64 = 2, Tid_url = 3, Tid_import = 4;
}
class Xob_css_tkn__warn extends Xob_css_tkn__base {
public String Fail_msg() {return fail_msg;} private String fail_msg;
@Override public void Process(Xob_mirror_mgr mgr) {
mgr.Usr_dlg().Warn_many("", "", fail_msg);
}
@Override public int Write(Bry_bfr bfr, byte[] src) {
bfr.Add_mid(src, pos_bgn, pos_end);
return pos_end;
}
public static Xob_css_tkn__warn new_(int pos_bgn, int pos_end, String fmt, Object... fmt_args) {
Xob_css_tkn__warn rv = new Xob_css_tkn__warn();
rv.Init(Tid_warn, pos_bgn, pos_end);
rv.fail_msg = String_.Format(fmt, fmt_args);
return rv;
}
}
class Xob_css_tkn__base64 extends Xob_css_tkn__base {
@Override public int Write(Bry_bfr bfr, byte[] src) {
bfr.Add_mid(src, pos_bgn, pos_end);
return pos_end;
}
public static Xob_css_tkn__base64 new_(int pos_bgn, int pos_end) {
Xob_css_tkn__base64 rv = new Xob_css_tkn__base64();
rv.Init(Tid_base64, pos_bgn, pos_end);
return rv;
}
}
class Xob_css_tkn__url extends Xob_css_tkn__base {
public byte Quote_byte() {return quote_byte;} private byte quote_byte;
public byte[] Src_url() {return src_url;} private byte[] src_url;
public byte[] Trg_url() {return trg_url;} private byte[] trg_url;
@Override public void Process(Xob_mirror_mgr mgr) {
mgr.File_hash().Add_if_dupe_use_1st(src_url, new Xobc_download_itm(Xobc_download_itm.Tid_file, String_.new_u8(src_url), trg_url));
}
@Override public int Write(Bry_bfr bfr, byte[] src) {
byte quote = quote_byte; if (quote == Byte_ascii.Null) quote = Byte_ascii.Apos;
bfr.Add_str_a7(" url("); // EX: ' url('
bfr.Add_byte(quote).Add(trg_url).Add_byte(quote); // EX: '"a.png"'
bfr.Add_byte(Byte_ascii.Paren_end); // EX: ')'
return pos_end;
}
public static Xob_css_tkn__url new_(int pos_bgn, int pos_end, byte[] src_url, byte quote_byte) {
Xob_css_tkn__url rv = new Xob_css_tkn__url();
rv.Init(Tid_url, pos_bgn, pos_end);
rv.src_url = src_url; rv.trg_url = To_fsys(src_url); rv.quote_byte = quote_byte;
return rv;
}
public static byte[] To_fsys(byte[] src) {
if (!Op_sys.Cur().Tid_is_wnt()) return src;
src = Bry_.Copy(src); // NOTE: must call ByteAry.Copy else url_actl will change *inside* bry
int len = src.length;
for (int i = 0; i < len; ++i) {
byte b = src[i];
switch (b) {
case Byte_ascii.Slash:
case Byte_ascii.Backslash:
break;
case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Colon: case Byte_ascii.Pipe: case Byte_ascii.Question: case Byte_ascii.Star: case Byte_ascii.Quote:
src[i] = Byte_ascii.Underline;
break;
default:
break;
}
}
return src;
}
}
class Xob_css_tkn__import extends Xob_css_tkn__base {
public byte Quote_byte() {return quote_byte;} private byte quote_byte;
public byte[] Src_url() {return src_url;} private byte[] src_url;
public byte[] Trg_url() {return trg_url;} private byte[] trg_url;
@Override public void Process(Xob_mirror_mgr mgr) {
mgr.Code_add(src_url);
}
@Override public int Write(Bry_bfr bfr, byte[] src) {
byte quote = quote_byte; if (quote == Byte_ascii.Null) quote = Byte_ascii.Apos;
bfr.Add_str_a7(" @import url("); // EX: ' @import url('
bfr.Add_byte(quote).Add(trg_url).Add_byte(quote); // EX: '"a.png"'
bfr.Add_byte(Byte_ascii.Paren_end); // EX: ')'
return pos_end;
}
public static Xob_css_tkn__import new_(int pos_bgn, int pos_end, byte[] src_url, byte[] trg_url, byte quote_byte) {
Xob_css_tkn__import rv = new Xob_css_tkn__import();
rv.Init(Tid_import, pos_bgn, pos_end);
rv.src_url = src_url; rv.trg_url = trg_url; rv.quote_byte = quote_byte;
return rv;
}
}

View File

@@ -13,45 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.ios.*; import gplx.xowa.files.downloads.*;
public class Xob_mirror_mgr {
private final Xof_download_wkr download_wkr; private final Xob_css_parser css_parser;
private final byte[] page_url; private final Io_url fsys_root;
public Xob_mirror_mgr(Gfo_usr_dlg usr_dlg, Xof_download_wkr download_wkr, byte[] site_url, byte[] page_url, Io_url fsys_root) {
this.usr_dlg = usr_dlg; this.download_wkr = download_wkr;
this.site_url = site_url; this.page_url = page_url; this.fsys_root = fsys_root;
this.css_parser = new Xob_css_parser(this);
}
public Gfo_usr_dlg Usr_dlg() {return usr_dlg;} private final Gfo_usr_dlg usr_dlg;
public byte[] Site_url() {return site_url;} private final byte[] site_url;
public void Code_add(byte[] src_url) {
byte[] trg_url = Xob_css_tkn__url.To_fsys(src_url);
code_hash.Add_if_dupe_use_1st(src_url, new Xobc_download_itm(Xobc_download_itm.Tid_css, String_.new_u8(src_url), trg_url));
}
public Ordered_hash Code_hash() {return code_hash;} private final Ordered_hash code_hash = Ordered_hash_.New();
public Ordered_hash File_hash() {return file_hash;} private final Ordered_hash file_hash = Ordered_hash_.New();
public void Exec() {
usr_dlg.Plog_many("", "", "html_mirror:download.root_page; url=~{0}", page_url);
IoEngine_xrg_downloadFil download_xrg = download_wkr.Download_xrg();
css_parser.Parse(download_xrg.Exec_as_bry(String_.new_u8(page_url)));
while (true) {
Xobc_download_itm[] code_ary = (Xobc_download_itm[])code_hash.To_ary_and_clear(Xobc_download_itm.class);
int code_ary_len = code_ary.length;
if (code_ary_len == 0) break;
for (int i = 0; i < code_ary_len; ++i) {
Xobc_download_itm code = code_ary[i];
byte[] code_src = download_xrg.Exec_as_bry(code.Http_str());
Io_mgr.Instance.SaveFilBry(fsys_root.Gen_sub_path_for_os(String_.new_u8(code.Fsys_url())), code_src);
css_parser.Parse(code_src);
}
}
Xobc_download_itm[] file_ary = (Xobc_download_itm[])file_hash.To_ary_and_clear(Xobc_download_itm.class);
int file_ary_len = file_ary.length;
for (int i = 0; i < file_ary_len; ++i) {
Xobc_download_itm file = file_ary[i];
download_xrg.Init(file.Http_str(), Io_url_.new_fil_(fsys_root.Gen_sub_path_for_os(String_.new_u8(file.Fsys_url()))));
download_xrg.Exec();
}
}
}

View File

@@ -13,49 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*;
import gplx.xowa.files.downloads.*;
public class Xob_mirror_mgr_tst {
@Before public void init() {fxt.Clear();} private Xob_mirror_mgr_fxt fxt = new Xob_mirror_mgr_fxt();
@Test public void Download_1() {
fxt.Fsys().Init_fil("mem/http/enwiki/file/a.png");
fxt.Fsys().Init_fil("mem/http/enwiki/wiki/Main_Page", "url('//enwiki/wiki/a.png')");
// fxt.Test_css();
// fxt.Fsys().Test_fil("url('//enwiki/wiki/a.png')", "url('enwiki/wiki/a.png')"); // remove "//"
// fxt.Fsys().Test_fil("mem/fsys/enwiki/file/a.png");
}
}
class Xob_mirror_mgr_fxt {
// private Xob_mirror_mgr mirror_mgr;
public Io_fsys_fxt Fsys() {return fsys;} private final Io_fsys_fxt fsys = new Io_fsys_fxt();
public void Clear() {
fsys.Clear();
// mirror_mgr = new Xob_mirror_mgr(Gfo_usr_dlg_.Noop, new Xof_download_wkr_test(), Bry_.new_a7("mem/http/enwiki"), Bry_.new_a7("mem/http/enwiki/wiki/Main_Page"), Io_url_.new_dir_("mem/fsys"));
}
public void Test_css(String raw, String expd) {
// byte[] raw_bry = Bry_.new_u8(raw);
// mirror_mgr.Exec();
}
}
class Io_fsys_fxt {
public void Clear() {
Io_mgr.Instance.InitEngine_mem();
}
public void Init_fil(String url_str) {
Io_url url = Io_url_.new_fil_(url_str);
Init_fil(url, url.NameAndExt());
}
public void Init_fil(String url_str, String text) {Init_fil(Io_url_.new_fil_(url_str), text);}
public void Init_fil(Io_url url, String text) {
Io_mgr.Instance.SaveFilStr(url, text);
}
public void Test_fil(String url_str) {
Io_url url = Io_url_.new_fil_(url_str);
Test_fil(url, url.NameAndExt());
}
public void Test_fil(String url, String expd) {Test_fil(Io_url_.new_fil_(url), expd);}
public void Test_fil(Io_url url, String expd) {
Tfds.Eq_str_lines(expd, Io_mgr.Instance.LoadFilStr(url));
}
}

View File

@@ -13,85 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
class Xob_url_fixer {
public static byte[] Fix(byte[] site, byte[] src, int src_len) { // return "site/img.png" if "//site/img.png" or "http://site/img.png"; also, handle "img.png?key=val"
int bgn = 0; int bgn_tkn_tid = 0;
Object o = Xob_url_fixer_tkn.Bgn_trie().Match_bgn(src, bgn, src_len);
if (o != null) {
Xob_url_fixer_tkn tkn = (Xob_url_fixer_tkn)o;
bgn_tkn_tid = tkn.Tid();
switch (bgn_tkn_tid) {
case Xob_url_fixer_tkn.Tid_bgn_slash_2:
case Xob_url_fixer_tkn.Tid_bgn_http:
case Xob_url_fixer_tkn.Tid_bgn_https:
bgn = tkn.Raw_len(); // remove "//", "http://", "https://"
break;
case Xob_url_fixer_tkn.Tid_bgn_slash_1: // convert "/a" to "site/a"
src = Bry_.Add(site, src);
src_len = src.length;
break;
}
}
int pos = bgn, end = src_len; boolean no_slashes = true;
Btrie_slim_mgr mid_trie = Xob_url_fixer_tkn.Mid_trie();
int[] seg_ary = new int[gplx.xowa.xtns.pfuncs.ttls.Pfunc_rel2abs.Ttl_max];
while (pos < src_len) {
byte b = src[pos];
o = mid_trie.Match_bgn_w_byte(b, src, pos, src_len);
if (o != null) {
Xob_url_fixer_tkn tkn = (Xob_url_fixer_tkn)o;
switch (tkn.Tid()) {
case Xob_url_fixer_tkn.Tid_mid_slash: if (no_slashes) no_slashes = false; break;
case Xob_url_fixer_tkn.Tid_mid_question: end = pos; pos = src_len; break;
case Xob_url_fixer_tkn.Tid_mid_rel_1:
case Xob_url_fixer_tkn.Tid_mid_rel_2:
Bry_bfr tmp_bfr = Bry_bfr_.New_w_size(src_len);
byte[] to_rel_root = Bry_.Mid(src, bgn, pos);
byte[] to_rel_qry = Bry_.Mid(src, pos, src_len);
src = gplx.xowa.xtns.pfuncs.ttls.Pfunc_rel2abs.Rel2abs(tmp_bfr, seg_ary, to_rel_qry, to_rel_root, Int_obj_ref.New_neg1());
bgn = pos = 0;
end = src_len = src.length;
no_slashes = true;
break;
}
}
++pos;
}
if (no_slashes) return null; // invalid; EX: "//site"
return Bry_.Mid(src, bgn, end);
}
}
class Xob_url_fixer_tkn {
public Xob_url_fixer_tkn(int tid, byte[] raw) {this.tid = tid; this.raw = raw; this.raw_len = raw.length;}
public int Tid() {return tid;} private int tid;
public byte[] Raw() {return raw;} private byte[] raw;
public int Raw_len() {return raw_len;} private int raw_len;
public static Xob_url_fixer_tkn new_(int tid, String raw) {return new Xob_url_fixer_tkn(tid, Bry_.new_u8(raw));}
private static void trie_add(Btrie_slim_mgr trie, int tid, String s) {trie.Add_obj(s, new_(tid, s));}
public static final int Tid_bgn_slash_1 = 1, Tid_bgn_slash_2 = 2, Tid_bgn_http = 3, Tid_bgn_https = 4;
private static Btrie_slim_mgr bgn_trie;
public static Btrie_slim_mgr Bgn_trie() {
if (bgn_trie == null) {
bgn_trie = Btrie_slim_mgr.ci_a7();
trie_add(bgn_trie, Tid_bgn_slash_1 , "/");
trie_add(bgn_trie, Tid_bgn_slash_2 , "//");
trie_add(bgn_trie, Tid_bgn_http , "http://");
trie_add(bgn_trie, Tid_bgn_https , "https://");
}
return bgn_trie;
}
public static final int Tid_mid_rel_1 = 1, Tid_mid_rel_2 = 2, Tid_mid_slash = 3, Tid_mid_question = 4;
private static Btrie_slim_mgr mid_trie;
public static Btrie_slim_mgr Mid_trie() {
if (mid_trie == null) {
mid_trie = Btrie_slim_mgr.ci_a7();
trie_add(mid_trie, Tid_mid_rel_1 , "/../");
trie_add(mid_trie, Tid_mid_rel_2 , "/./");
trie_add(mid_trie, Tid_mid_slash , "/");
trie_add(mid_trie, Tid_mid_question , "?");
}
return mid_trie;
}
}

View File

@@ -13,28 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*;
public class Xob_url_fixer_tst {
@Before public void init() {fxt.Clear();} private Xob_url_fixer_fxt fxt = new Xob_url_fixer_fxt();
@Test public void Slash2() {fxt.Test_fix("//site/a.png" , "site/a.png");}
@Test public void Http() {fxt.Test_fix("http://site/a.png" , "site/a.png");}
@Test public void Https() {fxt.Test_fix("https://site/a.png" , "site/a.png");}
@Test public void Qarg() {fxt.Test_fix("//site/a.png?key=val" , "site/a.png");}
@Test public void Qarg_dir() {fxt.Test_fix("//site/a/b/c.png?key=val" , "site/a/b/c.png");}
@Test public void Root() {fxt.Test_fix("/a/b.png" , "site/a/b.png");} // EX:/static/images/project-logos/wikivoyage.png; DATE:2015-05-09
@Test public void Rel_dot2() {fxt.Test_fix("//site/a/../b/c.png" , "site/b/c.png");} // DATE:2015-05-09
@Test public void Rel_dot2_mult() {fxt.Test_fix("//site/a/../b/../c/d.png" , "site/c/d.png");} // DATE:2015-05-09
@Test public void Rel_dot1() {fxt.Test_fix("//site/a/./b/c.png" , "site/a/b/c.png");} // DATE:2015-05-09
@Test public void Site_only() {fxt.Test_fix("//site" , null);}
}
class Xob_url_fixer_fxt {
public void Site_(String v) {site_bry = Bry_.new_u8(v);} private byte[] site_bry;
public void Clear() {
this.Site_("site");
}
public void Test_fix(String raw, String expd) {
byte[] raw_bry = Bry_.new_u8(raw);
Tfds.Eq(expd, String_.new_u8(Xob_url_fixer.Fix(site_bry, raw_bry, raw_bry.length)));
}
}

View File

@@ -13,11 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
class Xobc_download_itm {
public Xobc_download_itm(int tid, String http_str, byte[] fsys_url) {this.tid = tid; this.http_str = http_str; this.fsys_url = fsys_url;}
public int Tid() {return tid;} private final int tid;
public String Http_str() {return http_str;} private final String http_str;
public byte[] Fsys_url() {return fsys_url;} private final byte[] fsys_url;
public static final int Tid_file = 1, Tid_html = 2, Tid_css = 3;
}

View File

@@ -13,29 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
import gplx.xowa.wikis.ttls.*;
public class Xob_ttl_filter_mgr {
private boolean exclude_is_empty = true, include_is_empty = true;
private final Xob_ttl_filter_mgr_srl srl = new Xob_ttl_filter_mgr_srl();
private Hash_adp_bry exclude_hash = Hash_adp_bry.cs(), include_hash = Hash_adp_bry.cs();
public void Clear() {
exclude_hash.Clear();
include_hash.Clear();
exclude_is_empty = include_is_empty = true;
}
public boolean Match_include(byte[] src) {return include_is_empty ? false : include_hash.Has(src);}
public boolean Match_exclude(byte[] src) {return exclude_is_empty ? false : exclude_hash.Has(src);}
public void Load(boolean exclude, Io_url url) {
byte[] src = Io_mgr.Instance.LoadFilBry_loose(url);
if (Bry_.Len_gt_0(src)) Load(exclude, src);
}
public void Load(boolean exclude, byte[] src) {
Hash_adp_bry hash = exclude ? exclude_hash : include_hash;
srl.Init(hash).Load_by_bry(src);
if (exclude)
exclude_is_empty = exclude_hash.Count() == 0;
else
include_is_empty = include_hash.Count() == 0;
}
}

View File

@@ -13,25 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
import gplx.langs.dsvs.*;
class Xob_ttl_filter_mgr_srl extends Dsv_wkr_base {
private byte[] ttl; private Hash_adp_bry hash;
public Xob_ttl_filter_mgr_srl Init(Hash_adp_bry hash) {this.hash = hash; return this;}
@Override public Dsv_fld_parser[] Fld_parsers() {return new Dsv_fld_parser[] {Dsv_fld_parser_.Line_parser__comment_is_pipe};}
@Override public boolean Write_bry(Dsv_tbl_parser parser, int fld_idx, byte[] src, int bgn, int end) {
switch (fld_idx) {
case 0:
if (end - bgn == 0) return true; // ignore blank lines
if (src[bgn] == Byte_ascii.Pipe) return true; // ignore lines starting with pipe; EX: "| some comment"
ttl = Bry_.Mid(src, bgn, end);
return true;
default: return false;
}
}
@Override public void Commit_itm(Dsv_tbl_parser parser, int pos) {
if (ttl == null) return;
hash.Add(ttl, ttl);
ttl = null;
}
}

View File

@@ -13,40 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
import org.junit.*;
public class Xob_ttl_filter_mgr_srl_tst {
@Before public void init() {fxt.Clear();} private final Xob_ttl_filter_mgr_srl_fxt fxt = new Xob_ttl_filter_mgr_srl_fxt();
@Test public void One() {fxt.Test_parse("a" , 1, "a");}
@Test public void Two() {fxt.Test_parse("a\nb" , 2, "a", "b");}
@Test public void Comment() {fxt.Test_parse("|x" , 0);}
@Test public void Comment_many() {fxt.Test_parse("|x||" , 0);}
@Test public void Blank() {fxt.Test_parse("\n" , 0);}
@Test public void Mix() {
fxt.Test_parse(String_.Concat_lines_nl_skip_last
( "|comment 1"
, "a"
, ""
, "|comment 2"
, "b"
)
, 2, "a", "b")
;}
}
class Xob_ttl_filter_mgr_srl_fxt {
private final Xob_ttl_filter_mgr_srl mgr = new Xob_ttl_filter_mgr_srl();
private final Hash_adp_bry hash = Hash_adp_bry.cs();
public void Clear() {
hash.Clear();
}
public void Test_parse(String src, int expd_count, String... expd_itms) {
mgr.Init(hash);
mgr.Load_by_bry(Bry_.new_u8(src));
Tfds.Eq(expd_count, hash.Count());
int expd_len = expd_itms.length;
for (int i = 0; i < expd_len; ++i) {
String expd_itm = expd_itms[i];
Tfds.Eq_true(hash.Has(Bry_.new_u8(expd_itm)));
}
}
}

View File

@@ -13,37 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
import org.junit.*;
public class Xob_ttl_filter_mgr_tst {
@Before public void init() {fxt.Clear();} private final Xob_ttl_filter_mgr_fxt fxt = new Xob_ttl_filter_mgr_fxt();
@Test public void One() {
fxt.Init_load_exclude("A");
fxt.Init_load_include("B");
fxt.Test_match_exclude_y("A");
fxt.Test_match_exclude_n("B", "C");
fxt.Test_match_include_y("B");
fxt.Test_match_include_n("A", "C");
}
}
class Xob_ttl_filter_mgr_fxt {
private final Xob_ttl_filter_mgr mgr = new Xob_ttl_filter_mgr();
public void Clear() {
mgr.Clear();
}
public void Init_load_exclude(String itm) {mgr.Load(Bool_.Y, Bry_.new_u8(itm));}
public void Init_load_include(String itm) {mgr.Load(Bool_.N, Bry_.new_u8(itm));}
public void Test_match_exclude_y(String... itms) {Test_match(Bool_.Y, Bool_.Y, itms);}
public void Test_match_exclude_n(String... itms) {Test_match(Bool_.Y, Bool_.N, itms);}
public void Test_match_include_y(String... itms) {Test_match(Bool_.N, Bool_.Y, itms);}
public void Test_match_include_n(String... itms) {Test_match(Bool_.N, Bool_.N, itms);}
private void Test_match(boolean exclude, boolean expd, String... itms) {
for (String itm : itms) {
byte[] itm_bry = Bry_.new_u8(itm);
if (exclude)
Tfds.Eq(expd, mgr.Match_exclude(itm_bry), itm);
else
Tfds.Eq(expd, mgr.Match_include(itm_bry), itm);
}
}
}

View File

@@ -13,33 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
import gplx.core.criterias.*;
class Crt__match_exact implements Criteria {
public Crt__match_exact(boolean negated, byte[][] ary) {this.negated = negated; Val_as_bry_ary_(ary);}
public byte Tid() {return Tid_match_exact;}
public String To_str_name() {return "MATCH_EXACT";}
public boolean Matches(Object comp_obj) {
if (ary_len == 0) return false; // empty array never matches
byte[] comp = (byte[])comp_obj;
boolean rv = false;
for (int i = 0; i < ary_len; i++) {
byte[] val = ary[i];
if (Bry_.Eq(val, comp)) {
rv = true;
break;
}
}
return negated ? !rv : rv;
}
public boolean Negated() {return negated;} private boolean negated;
public byte[][] Val_as_bry_ary() {return ary;} protected byte[][] ary; protected int ary_len;
protected void Val_as_bry_ary_(byte[][] v) {
this.ary = v;
ary_len = v.length;
}
public void Val_as_obj_(Object v) {Val_as_bry_ary_((byte[][])v);}
public void Val_from_args(Hash_adp args) {throw Err_.new_unimplemented();}
public String To_str() {return String_.Concat_any(this.To_str_name(), " ", String_.Ary(ary));}
public byte Tid_match_exact = 12;
}

View File

@@ -13,71 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
import gplx.core.primitives.*;
class Dg_file {
public Dg_file(int id, String rel_path, Dg_rule[] lines) {this.id = id; this.rel_path = rel_path; this.lines = lines;}
public int Id() {return id;} private final int id;
public String Rel_path() {return rel_path;} private final String rel_path; // EX: goodphrases/weighted_general
public Dg_rule[] Lines() {return lines;} private final Dg_rule[] lines;
}
class Dg_rule {// EX: < wikipedia ><-30>
private final Hash_adp_bry word_idx_hash = Hash_adp_bry.cs();
public Dg_rule(int file_id, int id, int idx, int tid, byte[] key, int score, Dg_word[] words) {
this.file_id = file_id;
this.id = id; this.idx = idx; this.tid = tid; this.key = key; this.score = score; this.words = words;
if (words != null) { // static rules will have null byte[][]
int words_len = words.length;
for (int i = 0; i < words_len; ++i) {
Dg_word word = words[i];
word_idx_hash.Add_bry_obj(word.Raw(), Int_obj_ref.New(i));
}
}
}
public int File_id() {return file_id;} private final int file_id;
public int Id() {return id;} private final int id;
public int Idx() {return idx;} private final int idx;
public int Tid() {return tid;} private final int tid;
public byte[] Key() {return key;} private final byte[] key;
public Dg_word[] Words() {return words;} private final Dg_word[] words;
public Hash_adp_bry Word_idx_hash() {return word_idx_hash;}
public int Score() {return score;} private final int score;
public static final int
Tid_rule = 0
, Tid_comment = 1
, Tid_blank = 3
, Tid_invalid = 4
;
public static final Dg_rule
Itm_comment = new Dg_rule(-1, -1, -1, Tid_comment, null, -1, null)
, Itm_blank = new Dg_rule(-1, -1, -1, Tid_blank, null, -1, null)
, Itm_invalid = new Dg_rule(-1, -1, -1, Tid_invalid, null, -1, null)
;
public static final int Score_banned = 0;
}
class Dg_word {
public Dg_word(byte[] raw) {this.raw = raw;}
public byte[] Raw() {return raw;} private final byte[] raw;
public static String Ary_concat(Dg_word[] ary, Bry_bfr bfr, byte dlm) {
if (ary == null) return String_.Empty;
int len = ary.length;
if (len == 0) return String_.Empty;
bfr.Add_byte_apos();
for (int i = 0; i < len; ++i) {
Dg_word itm = ary[i];
if (i != 0) bfr.Add_byte(dlm);
bfr.Add(itm.Raw());
}
bfr.Add_byte_apos();
return bfr.To_str_and_clear();
}
public static Dg_word[] Ary_new_by_str_ary(String[] ary) {
int ary_len = ary.length;
Dg_word[] rv = new Dg_word[ary_len];
for (int i = 0; i < ary_len; ++i) {
String raw = ary[i];
rv[i] = new Dg_word(Bry_.new_u8(raw));
}
return rv;
}
}

View File

@@ -13,161 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
import gplx.dbs.*;
class Dg_log_mgr {
private Db_conn conn;
private final Dg_file_tbl tbl_file = new Dg_file_tbl();
private final Dg_rule_tbl tbl_rule = new Dg_rule_tbl();
private final Dg_page_score_tbl tbl_page_score = new Dg_page_score_tbl();
private final Dg_page_rule_tbl tbl_page_rule = new Dg_page_rule_tbl();
private final Bry_bfr tmp_bfr = Bry_bfr_.Reset(16);
public void Init(Io_url db_url) {
Db_conn_bldr_data conn_data = Db_conn_bldr.Instance.Get_or_new(db_url);
conn = conn_data.Conn(); boolean created = conn_data.Created();
tbl_file.Conn_(conn, created);
tbl_rule.Conn_(conn, created);
tbl_page_score.Conn_(conn, created);
tbl_page_rule.Conn_(conn, created);
conn.Txn_bgn("dansguardian");
}
public void Insert_file(Dg_file file) {tbl_file.Insert(file.Id(), file.Rel_path(), file.Lines().length);}
public void Insert_rule(Dg_rule rule) {tbl_rule.Insert(rule.File_id(), rule.Id(), rule.Idx(), rule.Score(), Dg_word.Ary_concat(rule.Words(), tmp_bfr, Byte_ascii.Tilde));}
public void Insert_page_score(int log_tid, int page_id, int page_ns, byte[] page_ttl, int page_len, int page_score, int page_rule_count, int clude_type) {
tbl_page_score.Insert(log_tid, page_id, page_ns, page_ttl, page_len, page_score, page_rule_count, clude_type);
}
public void Insert_page_rule(int log_tid, int page_id, int rule_id, int rule_score_total) {tbl_page_rule.Insert(log_tid, page_id, rule_id, rule_score_total);}
public void Commit() {conn.Txn_sav();}
public void Rls() {conn.Txn_end();}
}
class Dg_file_tbl {
private String tbl_name = "dg_file"; private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
private String fld_file_id, fld_file_path, fld_rule_count;
private Db_conn conn; private Db_stmt stmt_insert;
public void Conn_(Db_conn new_conn, boolean created) {
this.conn = new_conn; flds.Clear();
fld_file_id = flds.Add_int("file_id");
fld_file_path = flds.Add_str("file_path", 512);
fld_rule_count = flds.Add_int("rule_count");
if (created) {
Dbmeta_tbl_itm meta = Dbmeta_tbl_itm.New(tbl_name, flds
, Dbmeta_idx_itm.new_unique_by_tbl(tbl_name, "file_id", fld_file_id)
);
conn.Meta_tbl_create(meta);
}
stmt_insert = null;
}
public void Insert(int file_id, String file_path, int rule_count) {
if (stmt_insert == null) stmt_insert = conn.Stmt_insert(tbl_name, flds);
stmt_insert.Clear()
.Val_int(fld_file_id , file_id)
.Val_str(fld_file_path , file_path)
.Val_int(fld_rule_count , rule_count)
.Exec_insert();
}
}
class Dg_rule_tbl implements Rls_able {
private String tbl_name = "dg_rule"; private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
private String fld_file_id, fld_rule_id, fld_rule_idx, fld_rule_score, fld_rule_text;
private Db_conn conn; private Db_stmt stmt_insert;
public void Conn_(Db_conn new_conn, boolean created) {
this.conn = new_conn; flds.Clear();
fld_file_id = flds.Add_int("file_id");
fld_rule_id = flds.Add_int("rule_id");
fld_rule_idx = flds.Add_int("rule_idx");
fld_rule_score = flds.Add_int("rule_score");
fld_rule_text = flds.Add_str("rule_text", 1024);
if (created) {
Dbmeta_tbl_itm meta = Dbmeta_tbl_itm.New(tbl_name, flds
, Dbmeta_idx_itm.new_unique_by_tbl(tbl_name, "pkey", fld_rule_id)
);
conn.Meta_tbl_create(meta);
}
conn.Rls_reg(this);
}
public void Rls() {
stmt_insert = Db_stmt_.Rls(stmt_insert);
}
public void Insert(int file_id, int rule_id, int rule_idx, int rule_score, String rule_text) {
if (stmt_insert == null) stmt_insert = conn.Stmt_insert(tbl_name, flds);
stmt_insert.Clear()
.Val_int(fld_file_id , file_id)
.Val_int(fld_rule_id , rule_id)
.Val_int(fld_rule_idx , rule_idx)
.Val_int(fld_rule_score , rule_score)
.Val_str(fld_rule_text , rule_text)
.Exec_insert();
}
}
class Dg_page_score_tbl implements Rls_able {
private String tbl_name = "dg_page_score"; private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
private String fld_log_tid, fld_page_id, fld_page_ns, fld_page_ttl, fld_page_len, fld_page_score, fld_page_rule_count, fld_clude_type;
private Db_conn conn; private Db_stmt stmt_insert;
public void Conn_(Db_conn new_conn, boolean created) {
this.conn = new_conn; flds.Clear();
fld_log_tid = flds.Add_int("log_tid"); // title or text
fld_page_id = flds.Add_int("page_id");
fld_page_ns = flds.Add_int("page_ns");
fld_page_ttl = flds.Add_int("page_ttl");
fld_page_len = flds.Add_int("page_len");
fld_page_score = flds.Add_int("page_score");
fld_page_rule_count = flds.Add_int("page_rule_count");
fld_clude_type = flds.Add_int("page_clude_type");
if (created) {
Dbmeta_tbl_itm meta = Dbmeta_tbl_itm.New(tbl_name, flds
, Dbmeta_idx_itm.new_unique_by_tbl(tbl_name, "pkey", fld_log_tid, fld_page_id)
);
conn.Meta_tbl_create(meta);
}
stmt_insert = null;
conn.Rls_reg(this);
}
public void Rls() {
stmt_insert = Db_stmt_.Rls(stmt_insert);
}
public void Insert(int log_tid, int page_id, int page_ns, byte[] page_ttl, int page_len, int page_score, int page_rule_count, int clude_type) {
if (stmt_insert == null) stmt_insert = conn.Stmt_insert(tbl_name, flds);
stmt_insert.Clear()
.Val_int(fld_log_tid , log_tid)
.Val_int(fld_page_id , page_id)
.Val_int(fld_page_ns , page_ns)
.Val_bry_as_str(fld_page_ttl, page_ttl)
.Val_int(fld_page_len , page_len)
.Val_int(fld_page_score , page_score)
.Val_int(fld_page_rule_count, page_rule_count)
.Val_int(fld_clude_type , clude_type)
.Exec_insert();
}
}
class Dg_page_rule_tbl implements Rls_able {
private String tbl_name = "dg_page_rule"; private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
private String fld_log_tid, fld_page_id, fld_rule_id, fld_rule_score_total;
private Db_conn conn; private Db_stmt stmt_insert;
public void Conn_(Db_conn new_conn, boolean created) {
this.conn = new_conn; flds.Clear();
fld_log_tid = flds.Add_int("log_tid"); // title or text
fld_page_id = flds.Add_int("page_id");
fld_rule_id = flds.Add_int("rule_id");
fld_rule_score_total = flds.Add_int("rule_score_total");
if (created) {
Dbmeta_tbl_itm meta = Dbmeta_tbl_itm.New(tbl_name, flds
, Dbmeta_idx_itm.new_unique_by_tbl(tbl_name, "pkey", fld_log_tid, fld_page_id, fld_rule_id)
);
conn.Meta_tbl_create(meta);
}
stmt_insert = null;
conn.Rls_reg(this);
}
public void Rls() {
stmt_insert = Db_stmt_.Rls(stmt_insert);
}
public void Insert(int log_tid, int page_id, int rule_id, int rule_score_total) {
if (stmt_insert == null) stmt_insert = conn.Stmt_insert(tbl_name, flds);
stmt_insert.Clear()
.Val_int(fld_log_tid , log_tid)
.Val_int(fld_page_id , page_id)
.Val_int(fld_rule_id , rule_id)
.Val_int(fld_rule_score_total , rule_score_total)
.Exec_insert();
}
}

View File

@@ -13,176 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
import gplx.xowa.addons.apps.cfgs.*;
import gplx.xowa.langs.*;
import gplx.xowa.bldrs.filters.core.*;
public class Dg_match_mgr {
private int score_init, score_fail; private boolean log_enabled, case_match;
private final Btrie_slim_mgr btrie = Btrie_slim_mgr.cs();
private final Ordered_hash rules = Ordered_hash_.New_bry();
private final Ordered_hash rule_group_hash = Ordered_hash_.New_bry(), rule_tally_hash = Ordered_hash_.New_bry();
private final Dg_parser parser = new Dg_parser();
private final Xob_ttl_filter_mgr ttl_filter_mgr = new Xob_ttl_filter_mgr();
private final Dg_ns_skip_mgr ns_skip_mgr = new Dg_ns_skip_mgr();
private final Dg_log_mgr log_mgr = new Dg_log_mgr();
public Dg_match_mgr(Io_url root_dir, int score_init, int score_fail, boolean case_match, boolean log_enabled, Io_url log_url) {
this.score_init = score_init; this.score_fail = score_fail; this.case_match = case_match; this.log_enabled = log_enabled;
if (log_enabled) log_mgr.Init(log_url);
ttl_filter_mgr.Load(Bool_.N, root_dir.GenSubFil("xowa.title.include.txt"));
ttl_filter_mgr.Load(Bool_.Y, root_dir.GenSubFil("xowa.title.exclude.txt"));
ns_skip_mgr.Load(root_dir.GenSubFil("xowa.ns.skip.txt"));
Io_url dg_root_url = root_dir.GenSubDir("dansguardian");
Dg_file[] files = parser.Parse_dir(dg_root_url); Gfo_usr_dlg_.Instance.Plog_many("", "", "import.dg.rules: url=~{0} files=~{1}", dg_root_url, files.length);
Init_by_files(files);
if (log_enabled) log_mgr.Commit();
}
public void Clear() {
btrie.Clear();
rules.Clear();
rule_group_hash.Clear();
rule_tally_hash.Clear();
}
private void Init_by_files(Dg_file[] files) {
for (Dg_file file : files) {
Dg_rule[] rules = file.Lines();
if (log_enabled) log_mgr.Insert_file(file);
for (Dg_rule rule : rules)
Init_by_rule(rule);
}
}
@gplx.Internal protected void Init_by_rule(Dg_rule rule) {
if (rule.Tid() != Dg_rule.Tid_rule) return;
if (log_enabled) log_mgr.Insert_rule(rule);
Dg_word[] words = rule.Words();
for (Dg_word word : words) {
Dg_rule_group rule_group = Get_rule_group_or_new(word.Raw());
rule_group.Rules_list().Add(rule);
btrie.Add_obj(word.Raw(), rule_group);
}
}
private Dg_rule_group Get_rule_group_or_new(byte[] word) {
Dg_rule_group rv = (Dg_rule_group)rule_group_hash.Get_by(word);
if (rv == null) {
rv = new Dg_rule_group(word);
rule_group_hash.Add(word, rv);
}
return rv;
}
private Dg_rule_tally Get_rule_tally_or_new(byte[] key, Dg_rule rule) {
Dg_rule_tally rv = (Dg_rule_tally)rule_tally_hash.Get_by(key);
if (rv == null) {
rv = new Dg_rule_tally(rule);
rule_tally_hash.Add(key, rv);
}
return rv;
}
public boolean Match(int log_tid, int page_id, int page_ns, byte[] page_ttl, byte[] page_ttl_db, Xol_lang_itm lang, byte[] src) {
// if ns is in skip_mgr, ignore; needed to skip Template and Module
if (ns_skip_mgr.Has(page_ns))
return false;
int src_len = src.length;
int clude_type = 0;
if (ttl_filter_mgr.Match_include(page_ttl_db)) clude_type = -1;
else if (ttl_filter_mgr.Match_exclude(page_ttl_db)) clude_type = 1;
if (clude_type != 0) {
log_mgr.Insert_page_score(log_tid, page_id, page_ns, page_ttl, src_len, 0, 0, clude_type);
return clude_type == 1;
}
if (!case_match) {
src = lang.Case_mgr().Case_build_lower(src);
src_len = src.length;
}
rules.Clear();
rule_tally_hash.Clear();
int pos = 0;
int score_cur = score_init;
while (pos < src_len) {
Object o = btrie.Match_bgn(src, pos, src_len);
if (o == null)
++pos;
else {
Dg_rule_group rule_group = (Dg_rule_group)o;
Dg_rule[] rules_ary = rule_group.Rules_ary();
for (Dg_rule rule : rules_ary) {
Dg_rule_tally rule_tally = Get_rule_tally_or_new(rule.Key(), rule);
rule_tally.Process(rule_group.Word());
}
++pos;
}
}
int rule_tally_len = rule_tally_hash.Count(); if (rule_tally_len == 0) return false;
int rule_match_count = 0;
for (int i = 0; i < rule_tally_len; ++i) {
Dg_rule_tally rule_tally = (Dg_rule_tally)rule_tally_hash.Get_at(i);
int min_results = rule_tally.Results_pass_count();
if (min_results > 0) {
int rule_score = rule_tally.Rule().Score();
int rule_score_total = rule_score * min_results;
if (log_enabled) log_mgr.Insert_page_rule(log_tid, page_id, rule_tally.Rule().Id(), rule_score_total);
if (rule_score == Dg_rule.Score_banned) {score_cur = Int_.Max_value; break;}
score_cur += rule_score_total;
++rule_match_count;
}
}
boolean rv = score_cur > score_fail;
if (rv && log_enabled) log_mgr.Insert_page_score(log_tid, page_id, page_ns, page_ttl, src_len, score_cur, rule_match_count, 0);
return rv;
}
public void Rls() {log_mgr.Rls();}
public void Commit() {if (log_enabled) log_mgr.Commit();}
public static void Cfg__reg(Xoa_app app) {
app.Cfg().Dflt_mgr().Add(Cfg__root_dir, app.Fsys_mgr().Bin_xowa_dir().GenSubDir_nest("cfg", "bldr", "filter").Raw());
}
public static Dg_match_mgr New_mgr(Xoa_app app, Xow_wiki wiki) {
Xocfg_mgr cfg_mgr = app.Cfg();
if (!cfg_mgr.Get_bool_wiki_or(wiki, Cfg__enabled, false)) return null;
String ctx = cfg_mgr.To_ctx(wiki);
return new Dg_match_mgr
( cfg_mgr.Get_url_or(ctx, Cfg__root_dir, app.Fsys_mgr().Bin_xowa_dir().GenSubDir_nest("cfg", "bldr", "filter")).GenSubDir(wiki.Domain_str())
, cfg_mgr.Get_int_or(ctx, "xowa.bldr.dansguardian.score_init", 0)
, cfg_mgr.Get_int_or(ctx, "xowa.bldr.dansguardian.score_fail", 0)
, cfg_mgr.Get_bool_or(ctx, "xowa.bldr.dansguardian.case_match", false)
, cfg_mgr.Get_bool_or(ctx, "xowa.bldr.dansguardian.log_enabled", true)
, wiki.Fsys_mgr().Root_dir().GenSubFil("dansguardian_log.sqlite3")
);
}
public static final String Cfg__enabled = "xowa.bldr.dansguardian.enabled";
private static final String Cfg__root_dir = "xowa.bldr.dansguardian.root_dir";
}
class Dg_rule_group {
public Dg_rule_group(byte[] word) {this.word = word;}
public byte[] Word() {return word;} private final byte[] word;
public List_adp Rules_list() {return rules_list;} private final List_adp rules_list = List_adp_.New();
public Dg_rule[] Rules_ary() {
if (rules_ary == null)
rules_ary = (Dg_rule[])rules_list.To_ary_and_clear(Dg_rule.class);
return rules_ary;
} private Dg_rule[] rules_ary;
}
class Dg_rule_tally {
public Dg_rule_tally(Dg_rule rule) {
this.rule = rule;
Dg_word[] words = rule.Words();
this.results_len = words.length;
this.results = new int[results_len];
}
public Dg_rule Rule() {return rule;} private final Dg_rule rule;
public int[] Results() {return results;} private final int[] results; private final int results_len;
public void Process(byte[] word) {
Int_obj_ref idx = (Int_obj_ref)rule.Word_idx_hash().Get_by_bry(word);
int idx_val = idx.Val();
results[idx_val] = results[idx_val] + 1;
}
public int Results_pass_count() {
int rv = Int_.Max_value;
for (int i = 0; i < results_len; ++i) {
int result = results[i];
if (rv > result) rv = result;
}
return rv;
}
}

View File

@@ -13,44 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
import org.junit.*; import gplx.dbs.*;
public class Dg_match_mgr_tst {
@Before public void init() {fxt.Clear();} private Dg_match_mgr_fxt fxt = new Dg_match_mgr_fxt();
@Test public void One() {
fxt.Init_line(100, "a");
fxt.Test_match_many_y("a", "ab", "ba", "abc");
fxt.Test_match_many_n("b");
}
}
class Dg_match_mgr_fxt {
private Dg_match_mgr match_mgr;
private final List_adp rule_list = List_adp_.New();
public void Clear() {
Db_conn_bldr.Instance.Reg_default_mem();
Io_url root_dir = Io_url_.mem_dir_("mem/dg/");
match_mgr = new Dg_match_mgr(root_dir.GenSubDir("words"), 1, 0, Bool_.Y, Bool_.Y, root_dir.GenSubDir("log"));
rule_list.Clear();
}
public void Init_line(int score, String... words) {
Dg_rule line = new Dg_rule(-1, -1, -1, Dg_rule.Tid_rule, Bry_.new_a7("key"), score, Dg_word.Ary_new_by_str_ary(words));
rule_list.Add(line);
}
public void Test_match_many_y(String... words) {Test_match_many(Bool_.Y, words);}
public void Test_match_many_n(String... words) {Test_match_many(Bool_.N, words);}
public void Test_match_many(boolean expd, String... words) {
int words_len = words.length;
for (int i = 0; i < words_len; ++i)
Test_match_one(expd, words[i]);
}
public void Test_match_one(boolean expd, String word_str) {
match_mgr.Clear();
int rule_list_len = rule_list.Count();
for (int j = 0; j < rule_list_len; ++j) {
Dg_rule rule = (Dg_rule)rule_list.Get_at(j);
match_mgr.Init_by_rule(rule);
}
byte[] word_bry = Bry_.new_u8(word_str);
Tfds.Eq(expd, match_mgr.Match(1, 101, 0, Bry_.Empty, Bry_.Empty, null, word_bry), (expd ? "pass:" : "fail:") + word_str);
}
}

View File

@@ -13,28 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
import gplx.core.lists.hashs.*;
class Dg_ns_skip_mgr {
private final Hash_adp__int ns_hash = new Hash_adp__int();
private boolean is_empty = true;
public boolean Has(int ns) {return is_empty ? false : ns_hash.Get_by_or_null(ns) != null;}
public void Load(Io_url url) {
// load from file
Gfo_usr_dlg_.Instance.Log_many("", "", "loading ns.skip file; url=~{0}", url.Raw());
byte[] src = Io_mgr.Instance.LoadFilBry_loose(url);
// parse to lines
byte[][] lines = Bry_split_.Split_lines(src);
// add to hash
for (byte[] line : lines) {
int ns_id = Bry_.To_int_or(line, Int_.Max_value);
if (ns_id != Int_.Max_value) {
Gfo_usr_dlg_.Instance.Log_many("", "", "adding ns; ns_id=~{0}", ns_id);
ns_hash.Add_if_dupe_use_1st(ns_id, line);
is_empty = false;
}
}
}
}

View File

@@ -13,81 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
class Dg_parser {
private Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Instance; private final Bry_bfr key_bldr = Bry_bfr_.Reset(32);
private final List_adp files = List_adp_.New(), lines = List_adp_.New(), words = List_adp_.New();
private int next_id = 0;
public Dg_file[] Parse_dir(Io_url dir) {
Io_url[] fil_urls = Io_mgr.Instance.QueryDir_args(dir).Recur_(true).ExecAsUrlAry();
this.usr_dlg = Gfo_usr_dlg_.Instance;
files.Clear();
int len = fil_urls.length;
for (int i = 0; i < len; ++i) {
Io_url fil_url = fil_urls[i];
byte[] fil_src = Io_mgr.Instance.LoadFilBry_loose(fil_url);
Dg_file file = Parse_fil(i, fil_url.GenRelUrl_orEmpty(dir), fil_src);
if (file != null) files.Add(file);
}
return (Dg_file[])files.To_ary_and_clear(Dg_file.class);
}
private Dg_file Parse_fil(int file_idx, String rel_path, byte[] src) {
int line_idx = 0; int line_bgn = 0; int src_len = src.length;
lines.Clear();
int file_id = ++next_id;
while (line_bgn < src_len) {
++line_idx;
int line_end = Bry_find_.Find_fwd(src, Byte_ascii.Nl, line_bgn); if (line_end == Bry_find_.Not_found) line_end = src_len;
Dg_rule line = Parse_line(rel_path, file_id, line_idx, src, line_bgn, line_end);
if (line.Tid() != Dg_rule.Tid_invalid)
lines.Add(line);
line_bgn = line_end + 1;
}
return new Dg_file(file_id, rel_path, (Dg_rule[])lines.To_ary_and_clear(Dg_rule.class));
}
public Dg_rule Parse_line(String rel_path, int file_id, int line_idx, byte[] src, int line_bgn, int line_end) {
int score = Dg_rule.Score_banned;
int brack_bgn = line_bgn;
if (line_end - line_bgn <= 1) return Dg_rule.Itm_blank; // ignore blank lines; EX: ""
if (src[line_bgn] == Byte_ascii.Hash) return Dg_rule.Itm_comment; // ignore lines starting with hash; EX: "# comment"
while (brack_bgn < line_end) { // look for terms bracketed by "<>"
if (src[brack_bgn] != Byte_ascii.Lt) {Warn("dg.invalid_line.term must start with angle_bgn", rel_path, line_idx, src, line_bgn, line_end); return Dg_rule.Itm_invalid;}
int brack_end = Bry_find_.Find_fwd(src, Byte_ascii.Gt, brack_bgn);
if (brack_end == Bry_find_.Not_found) {Warn("dg.invalid_line.angle_end not found", rel_path, line_idx, src, line_bgn, line_end); return Dg_rule.Itm_invalid;}
byte[] word = Bry_.Mid(src, brack_bgn + 1, brack_end);
words.Add(word);
int next_pos = brack_end + 1;
if (next_pos == line_end) {
score = Dg_rule.Score_banned;
break;
}
byte next = src[next_pos];
if (next == Byte_ascii.Comma)
brack_bgn = brack_end + 2;
else {
brack_bgn = brack_end + 1;
if (src[brack_bgn] != Byte_ascii.Lt) {Warn("dg.invalid_line.wrong_term_dlm", rel_path, line_idx, src, line_bgn, line_end); break;}
brack_end = Bry_find_.Find_fwd(src, Byte_ascii.Gt, brack_bgn);
if (brack_end == Bry_find_.Not_found) {Warn("dg.invalid_line.score not found", rel_path, line_idx, src, line_bgn, line_end); break;}
int parse_score = Bry_.To_int_or(src, brack_bgn + 1, brack_end, Int_.Min_value);
if (parse_score == Int_.Min_value) {Warn("dg.invalid_line.score is invalid", rel_path, line_idx, src, line_bgn, line_end); break;}
score = parse_score;
break;
}
}
byte[] key = key_bldr.Add_int_variable(file_id).Add_byte_dot().Add_int_variable(line_idx).To_bry_and_clear();
return new Dg_rule(file_id, ++next_id, line_idx, Dg_rule.Tid_rule, key, score, Ary_new_by_ary((byte[][])words.To_ary_and_clear(byte[].class)));
}
private static Dg_word[] Ary_new_by_ary(byte[][] ary) {
int ary_len = ary.length;
Dg_word[] rv = new Dg_word[ary_len];
for (int i = 0; i < ary_len; ++i) {
byte[] raw = ary[i];
rv[i] = new Dg_word(raw);
}
return rv;
}
private void Warn(String err_msg, String rel_path, int line_idx, byte[] src, int line_bgn, int line_end) {
usr_dlg.Warn_many("", "", err_msg + "; file=~{0} line_idx=~{1} line=~{2}", rel_path, line_idx, String_.new_u8(src, line_bgn, line_end));
}
}

View File

@@ -13,43 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
import org.junit.*;
public class Dg_parser_tst {
@Before public void init() {fxt.Init();} private Dg_parser_fxt fxt = new Dg_parser_fxt();
@Test public void One() {fxt.Test_parse_line("<a><123>", fxt.Make_line(123, "a"));}
@Test public void Many() {fxt.Test_parse_line("<a>,<b>,<c><-123>", fxt.Make_line(-123, "a", "b", "c"));}
@Test public void Score_0() {fxt.Test_parse_line("<a><0>", fxt.Make_line(Dg_rule.Score_banned, "a"));}
@Test public void Noscore() {fxt.Test_parse_line("<a>", fxt.Make_line(Dg_rule.Score_banned, "a"));}
@Test public void Noscore_2() {fxt.Test_parse_line("<a>,<b>", fxt.Make_line(Dg_rule.Score_banned, "a", "b"));}
@Test public void Comment() {fxt.Test_parse_line("# comment", Dg_rule.Itm_comment);}
@Test public void Blank() {fxt.Test_parse_line("", Dg_rule.Itm_blank);}
@Test public void Invalid_line_bgn() {fxt.Test_parse_line(" <a><1>", Dg_rule.Itm_invalid);}
@Test public void Dangling_word() {fxt.Test_parse_line("<a", Dg_rule.Itm_invalid);}
@Test public void Dangling_score() {fxt.Test_parse_line("<a><12", fxt.Make_line(Dg_rule.Score_banned, "a"));}
@Test public void Invalid_dlm() {fxt.Test_parse_line("<a> <1>", fxt.Make_line(Dg_rule.Score_banned, "a"));}
@Test public void Invalid_dlm_2() {fxt.Test_parse_line("<a>,<b><c><2>", fxt.Make_line(Dg_rule.Score_banned, "a", "b"));}
@Test public void Invalid_score() {fxt.Test_parse_line("<a><1a>", fxt.Make_line(Dg_rule.Score_banned, "a"));}
// @Test public void Parse_dir() {
// Dg_parser parser = new Dg_parser();
// Gfo_usr_dlg_.I = Xoa_app_.New__usr_dlg__console();
// parser.Parse_dir(Io_url_.new_dir_("C:\\xowa\\bin\\any\\xowa\\bldr\\filters\simple.wikipedia.org\\Dansguardian\\\\"));
// }
}
class Dg_parser_fxt {
private final Dg_parser parser = new Dg_parser(); private final Bry_bfr bfr = Bry_bfr_.Reset(32);
private final Bry_bfr tmp_bfr = Bry_bfr_.Reset(16);
public void Init() {}
public Dg_rule Make_line(int score, String... words) {return new Dg_rule(-1, -1, -1, Dg_rule.Tid_rule, null, score, Dg_word.Ary_new_by_str_ary(words));}
public void Test_parse_line(String str, Dg_rule expd) {
byte[] src = Bry_.new_u8(str);
Dg_rule actl = parser.Parse_line("rel_path", 0, 0, src, 0, src.length);
Tfds.Eq_str_lines(Xto_str(bfr, expd), Xto_str(bfr, actl));
}
private String Xto_str(Bry_bfr bfr, Dg_rule line) {
bfr .Add_str_a7("score=").Add_int_variable(line.Score()).Add_byte_nl()
.Add_str_a7("words=").Add_str_u8(String_.Concat_with_str(";", Dg_word.Ary_concat(line.Words(), tmp_bfr, Byte_ascii.Tick))).Add_byte_nl()
;
return bfr.To_str_and_clear();
}
}

View File

@@ -13,56 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.infos; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.dbs.*; import gplx.dbs.cfgs.*;
public class Xob_info_file {
public Xob_info_file(int id, String type, String ns_ids, int part_id, Guid_adp guid, int schema_version, String core_file_name, String orig_file_name) {
this.id = id; this.type = type; this.ns_ids = ns_ids; this.part_id = part_id; this.guid = guid;
this.schema_version = schema_version; this.core_file_name = core_file_name; this.orig_file_name = orig_file_name;
}
public int Id() {return id;} private final int id;
public String Type() {return type;} private final String type;
public String Ns_ids() {return ns_ids;} private final String ns_ids;
public int Part_id() {return part_id;} private final int part_id;
public Guid_adp Guid() {return guid;} private final Guid_adp guid;
public int Schema_version() {return schema_version;} private final int schema_version;
public String Core_file_name() {return core_file_name;} private final String core_file_name;
public String Orig_file_name() {return orig_file_name;} private final String orig_file_name;
public void Save(Db_cfg_tbl tbl) {
tbl.Conn().Txn_bgn("make__info__file");
tbl.Insert_int (Cfg_grp, Cfg_key__id , id);
tbl.Insert_str (Cfg_grp, Cfg_key__type , type);
tbl.Insert_str (Cfg_grp, Cfg_key__ns_ids , ns_ids);
tbl.Insert_int (Cfg_grp, Cfg_key__part_id , part_id);
tbl.Insert_guid (Cfg_grp, Cfg_key__guid , guid);
tbl.Insert_int (Cfg_grp, Cfg_key__schema_version , schema_version);
tbl.Insert_str (Cfg_grp, Cfg_key__core_file_name , core_file_name);
tbl.Insert_str (Cfg_grp, Cfg_key__orig_file_name , orig_file_name);
tbl.Conn().Txn_end();
}
public static Xob_info_file Load(Db_cfg_tbl tbl) {
Db_cfg_hash hash = tbl.Select_as_hash(Cfg_grp);
return new Xob_info_file
( hash.Get_by(Cfg_key__id ).To_int_or(-1)
, hash.Get_by(Cfg_key__type ).To_str_or("unknown")
, hash.Get_by(Cfg_key__ns_ids ).To_str_or("")
, hash.Get_by(Cfg_key__part_id ).To_int_or(-1)
, hash.Get_by(Cfg_key__guid ).To_guid_or(Guid_adp_.Empty)
, hash.Get_by(Cfg_key__schema_version ).To_int_or(2)
, hash.Get_by(Cfg_key__core_file_name ).To_str_or("")
, hash.Get_by(Cfg_key__orig_file_name ).To_str_or("")
);
}
private static final String Cfg_grp = gplx.xowa.wikis.data.Xowd_cfg_key_.Grp__bldr_db
, Cfg_key__id = "id" // EX: 1
, Cfg_key__type = "type" // EX: core
, Cfg_key__ns_ids = "ns_ids" // EX: 0
, Cfg_key__part_id = "part_id" // EX: 0
, Cfg_key__guid = "guid" // EX: 00000000-0000-0000-0000-000000000000
, Cfg_key__schema_version = "schema_version" // EX: 2
, Cfg_key__core_file_name = "core_file_name" // EX: en.wikipedia.org-text.xowa
, Cfg_key__orig_file_name = "orig_file_name" // EX: en.wikipedia.org-text-ns.000-db.002.xowa
;
public static final String Ns_ids_empty = "";
public static final int Part_id_1st = 1;
}

View File

@@ -13,47 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.infos; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.dbs.cfgs.*;
public class Xob_info_session {
Xob_info_session(String user, String version, String wiki_domain, String dump_name, DateAdp time, Guid_adp guid) {
this.user = user; this.version = version; this.wiki_domain = wiki_domain; this.dump_name = dump_name; this.time = time; this.guid = guid;
}
public String User() {return user;} private final String user;
public String Version() {return version;} private final String version;
public String Wiki_domain() {return wiki_domain;} private final String wiki_domain;
public String Dump_name() {return dump_name;} private final String dump_name;
public DateAdp Time() {return time;} private final DateAdp time;
public Guid_adp Uuid() {return guid;} private final Guid_adp guid;
public void Save(Db_cfg_tbl tbl) {
tbl.Conn().Txn_bgn("make__info__session");
tbl.Insert_str (Cfg_grp, Cfg_key__user , user);
tbl.Insert_str (Cfg_grp, Cfg_key__version , version);
tbl.Insert_str (Cfg_grp, Cfg_key__wiki_domain , wiki_domain);
tbl.Insert_str (Cfg_grp, Cfg_key__dump_name , dump_name);
tbl.Insert_date (Cfg_grp, Cfg_key__time , time);
tbl.Insert_guid (Cfg_grp, Cfg_key__guid , guid);
tbl.Conn().Txn_end();
}
public static Xob_info_session Load(Db_cfg_tbl tbl) {
Db_cfg_hash hash = tbl.Select_as_hash(Cfg_grp);
return new Xob_info_session
( hash.Get_by(Cfg_key__user).To_str_or("")
, hash.Get_by(Cfg_key__version).To_str_or("")
, hash.Get_by(Cfg_key__wiki_domain).To_str_or("")
, hash.Get_by(Cfg_key__dump_name).To_str_or("")
, hash.Get_by(Cfg_key__time).To_date_or(DateAdp_.MinValue)
, hash.Get_by(Cfg_key__guid).To_guid_or(Guid_adp_.Empty)
);
}
public static final String Cfg_grp = gplx.xowa.wikis.data.Xowd_cfg_key_.Grp__bldr_session
, Cfg_key__user = "user" // EX: anonymous
, Cfg_key__version = "version" // EX: 2.3.1.4
, Cfg_key__wiki_domain = "wiki_domain" // EX: en.wikipedia.org
, Cfg_key__dump_name = "dump_name" // EX: enwiki-latest-pages-articles
, Cfg_key__time = "time" // EX: 20150102 030405
, Cfg_key__guid = "guid" // EX: 00000000-0000-0000-0000-000000000000
;
public static Xob_info_session new_(String user, String wiki_domain, String dump_name) {return new Xob_info_session(user, Xoa_app_.Version, wiki_domain, dump_name, Datetime_now.Get(), Guid_adp_.New());}
public static final Xob_info_session Test = new_("anonymous", "en.wikipedia.org", "enwiki-latest-pages-articles");
}

View File

@@ -13,89 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.threads.*; import gplx.xowa.bldrs.*;
import gplx.xowa.wikis.domains.*;
import gplx.xowa.bldrs.wms.dumps.*;
abstract class Xoi_cmd_base implements Gfo_thread_cmd {
public void Ctor(Xoi_setup_mgr install_mgr, String wiki_key) {
this.install_mgr = install_mgr; this.wiki_key = wiki_key;
this.Owner_(install_mgr);
} private Xoi_setup_mgr install_mgr; String wiki_key;
@gplx.Virtual public void Cmd_ctor() {}
public abstract String Async_key();
public int Async_sleep_interval() {return Gfo_thread_cmd_.Async_sleep_interval_1_second;}
public boolean Async_prog_enabled() {return false;}
public void Async_prog_run(int async_sleep_sum) {}
public byte Async_init() {return Gfo_thread_cmd_.Init_ok;}
public boolean Async_term() {return true;}
public Gfo_invk Owner() {return owner;} public Xoi_cmd_base Owner_(Gfo_invk v) {owner = v; return this;} Gfo_invk owner;
public Gfo_thread_cmd Async_next_cmd() {return next_cmd;} public void Async_next_cmd_(Gfo_thread_cmd v) {next_cmd = v;} Gfo_thread_cmd next_cmd;
public void Async_run() {
running = true;
Thread_adp_.Start_by_key(this.Async_key(), this, Invk_process_async);
}
public boolean Async_running() {return running;} private boolean running;
public void Process_async() {
Xoae_app app = install_mgr.App();
Xob_bldr bldr = app.Bldr();
Xowe_wiki wiki = app.Wiki_mgr().Get_by_or_make(Bry_.new_a7(wiki_key));
wiki.Init_assert();
bldr.Cmd_mgr().Clear();
Process_async_init(app, wiki, bldr);
bldr.Pause_at_end_(false);
try {bldr.Run();}
catch (Exception e) {
running = false;
install_mgr.Cmd_mgr().Working_(Bool_.N);
throw Err_.new_exc(e, "xo", "error during import");
}
app.Usr_dlg().Prog_none("", "clear", "");
app.Usr_dlg().Note_none("", "clear", "");
Process_async_done(app, wiki, bldr);
running = false;
}
public abstract void Process_async_init(Xoae_app app, Xowe_wiki wiki, Xob_bldr bldr);
public abstract void Process_async_done(Xoae_app app, Xowe_wiki wiki, Xob_bldr bldr);
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_process_async)) Process_async();
else if (ctx.Match(k, Invk_owner)) return owner;
else return Gfo_invk_.Rv_unhandled;
return this;
} private static final String Invk_process_async = "run_async", Invk_owner = "owner";
}
class Xoi_cmd_category2_page_props extends Xoi_cmd_wiki_download { public Xoi_cmd_category2_page_props(Xoi_setup_mgr install_mgr, String wiki_key, String dump_date) {this.Ctor_download_(install_mgr, wiki_key, dump_date, Xowm_dump_type_.Str__page_props);}
@Override public String Download_file_ext() {return ".sql.gz";}
public static final String KEY_category2 = "wiki.category2.download.page_props";
}
class Xoi_cmd_category2_categorylinks extends Xoi_cmd_wiki_download { public Xoi_cmd_category2_categorylinks(Xoi_setup_mgr install_mgr, String wiki_key, String dump_date) {this.Ctor_download_(install_mgr, wiki_key, dump_date, Xowm_dump_type_.Str__categorylinks);}
@Override public String Download_file_ext() {return ".sql.gz";}
public static final String KEY_category2 = "wiki.category2.download.categorylinks";
}
class Xoi_cmd_category2_build extends Xoi_cmd_base {
public Xoi_cmd_category2_build(Xoi_setup_mgr install_mgr, String wiki_key) {this.Ctor(install_mgr, wiki_key); this.app = install_mgr.App(); this.wiki_key = wiki_key;} private Xoae_app app; private String wiki_key;
@Override public void Cmd_ctor() {
Xowe_wiki wiki = app.Wiki_mgr().Get_by_or_make(Bry_.new_u8(wiki_key));
wiki.Import_cfg().Category_version_(gplx.xowa.addons.wikis.ctgs.Xoa_ctg_mgr.Version_2);
}
@Override public String Async_key() {return KEY;} public static final String KEY = "wiki.category2.build";
@Override public void Process_async_init(Xoae_app app, Xowe_wiki wiki, Xob_bldr bldr) {
wiki.Db_mgr_as_sql().Category_version_update(false);
bldr.Cmd_mgr().Add_many(wiki, gplx.xowa.addons.wikis.ctgs.bldrs.Xob_pageprop_cmd.BLDR_CMD_KEY, gplx.xowa.addons.wikis.ctgs.bldrs.Xob_catlink_cmd.BLDR_CMD_KEY);
}
@Override public void Process_async_done(Xoae_app app, Xowe_wiki wiki, Xob_bldr bldr) {
app.Usr_dlg().Prog_many("", "", "category2 setup done");
}
}
class Xoi_cmd_search2_build extends Xoi_cmd_base {
public Xoi_cmd_search2_build(Xoi_setup_mgr install_mgr, String wiki_key) {this.Ctor(install_mgr, wiki_key);}
@Override public String Async_key() {return KEY;} public static final String KEY = "wiki.search2.build";
@Override public void Process_async_init(Xoae_app app, Xowe_wiki wiki, Xob_bldr bldr) {
wiki.Db_mgr_as_sql().Category_version_update(false);
gplx.xowa.addons.wikis.searchs.bldrs.Srch_bldr_mgr_.Setup(wiki);
}
@Override public void Process_async_done(Xoae_app app, Xowe_wiki wiki, Xob_bldr bldr) {
app.Usr_dlg().Prog_many("", "", "search2 setup done");
// wiki.Db_mgr().Search_version_refresh();
}
}

View File

@@ -13,46 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.threads.*;
class Xoi_cmd_dumpfile {
public byte[] Domain() {return domain;} private byte[] domain;
public Io_url Bz2_url() {return bz2_url;} Io_url bz2_url;
public Io_url Xml_url() {return xml_url;} Io_url xml_url;
public boolean Bz2_unzip() {return bz2_unzip;} private boolean bz2_unzip;
public void Clear() {domain = null; bz2_url = xml_url = null; bz2_unzip = false;}
public Xoi_cmd_dumpfile Parse_msg(GfoMsg m) {
Io_url dump_url = m.ReadIoUrl("url");
domain = m.ReadBry("domain");
if (Bry_.Len_eq_0(domain)) domain = Bry_.new_u8(dump_url.OwnerDir().NameOnly());
bz2_unzip = String_.Eq(m.ReadStr("args"), "unzip");
String dump_ext = dump_url.Ext();
if (String_.Eq(dump_ext, ".bz2")) {
bz2_url = dump_url;
if (bz2_unzip) {
xml_url = bz2_url.GenNewExt(""); // remove .bz2 extension (new file path should be .xml)
if (!String_.Eq(xml_url.Ext(), ".xml"))
xml_url = xml_url.GenNewExt(".xml");
}
}
else if (String_.Eq(dump_ext, ".xml")) { // user selected xml file;
bz2_url = null;
xml_url = dump_url;
bz2_unzip = false; // ignore unzip arge
}
return this;
}
public Gfo_thread_cmd Exec(Xoi_cmd_mgr cmd_mgr) {
Xowe_wiki wiki = cmd_mgr.App().Wiki_mgr().Get_by_or_make(domain);
if (bz2_unzip) { // unzip requested; add unzip cmd
GfoMsg unzip_msg = GfoMsg_.new_parse_(Gfo_thread_cmd_unzip.KEY).Add("v", Gfo_thread_cmd_unzip.KEY).Add("src", bz2_url.Raw()).Add("trg", xml_url.Raw());
Gfo_thread_cmd_unzip unzip_cmd = (Gfo_thread_cmd_unzip)cmd_mgr.Cmd_add(unzip_msg);
unzip_cmd.Term_cmd_for_src_(Gfo_thread_cmd_unzip.Term_cmd_for_src_noop); // don't do anything with bz2 after unzip
}
if (xml_url == null)
wiki.Import_cfg().Src_fil_bz2_(bz2_url);
else
wiki.Import_cfg().Src_fil_xml_(xml_url);
return cmd_mgr.Dump_add_many_custom(String_.new_u8(domain), "", "", true);
}
}

View File

@@ -13,62 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*;
public class Xoi_cmd_dumpfile_tst {
@Before public void init() {fxt.Clear();} private Xoi_cmd_dumpfile_fxt fxt = new Xoi_cmd_dumpfile_fxt();
@Test public void Bz2__unzip() {
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.xml.bz2", "", "unzip")
.Test_domain("en.wikipedia.org")
.Test_vals("mem/en.wikipedia.org/fil.xml.bz2", "mem/en.wikipedia.org/fil.xml", true)
;
}
@Test public void Bz2__unzip__assert_xml_ext() { // xml ext relies on removing ".bz2" from ".xml.bz2"; if just ".bz2" add an ".xml"
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.bz2", "", "unzip")
.Test_vals("mem/en.wikipedia.org/fil.bz2", "mem/en.wikipedia.org/fil.xml", true)
;
}
@Test public void Bz2__direct() {
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.bz2", "", "")
.Test_vals("mem/en.wikipedia.org/fil.bz2", null, false)
;
}
@Test public void Xml__unzip_n() {
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.xml", "", "")
.Test_vals(null, "mem/en.wikipedia.org/fil.xml", false)
;
}
@Test public void Xml__unzip_y() {
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.xml", "", "")
.Test_vals(null, "mem/en.wikipedia.org/fil.xml", false)
;
}
}
class Xoi_cmd_dumpfile_fxt {
public void Clear() {
dumpfile.Clear();
} private Xoi_cmd_dumpfile dumpfile = new Xoi_cmd_dumpfile();
public Xoi_cmd_dumpfile_fxt Exec_parse_msg(String url, String domain, String args) {
GfoMsg m = GfoMsg_.new_parse_("").Add("url", url).Add("domain", domain).Add("args", args);
dumpfile.Parse_msg(m);
return this;
}
public Xoi_cmd_dumpfile_fxt Test_vals(String expd_bz2, String expd_xml, boolean expd_unzip) {
Eq_url(expd_bz2, dumpfile.Bz2_url());
Eq_url(expd_xml, dumpfile.Xml_url());
Tfds.Eq(expd_unzip, dumpfile.Bz2_unzip());
return this;
}
public Xoi_cmd_dumpfile_fxt Test_domain(String expd_domain) {
Tfds.Eq(expd_domain, String_.new_u8(dumpfile.Domain()));
return this;
}
private void Eq_url(String expd, Io_url actl) {
if (expd == null && actl == null) return;
else if (expd != null && actl != null) {
Tfds.Eq(expd, actl.Raw());
}
else if (expd == null) throw Err_.new_wo_type("actl should be null", "expd", expd);
else if (actl == null) throw Err_.new_wo_type("actl should not be null", "expd", expd);
}
}

View File

@@ -13,35 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.gfui.*; import gplx.gfui.kits.core.*;
import gplx.core.threads.*;
class Xoi_cmd_imageMagick_download extends Gfo_thread_cmd_download implements Gfo_thread_cmd {// private static final byte[] Bry_windows_zip = Bry_.new_a7("-windows.zip");
// static final String Src_imageMagick = "ftp://ftp.sunet.se/pub/multimedia/graphics/ImageMagick/binaries/";
public Xoi_cmd_imageMagick_download(Gfo_usr_dlg usr_dlg, Gfui_kit kit, Io_url trg) {this.Ctor(usr_dlg, kit); this.trg = trg;} Io_url trg;
@Override public byte Async_init() { // <a href="ImageMagick-6.8.1-9-Q16-x86-windows.zip">
// byte[] raw = xrg.Exec_as_bry(Src_imageMagick);
// int find_pos = Bry_find_.Find_fwd(raw, Bry_windows_zip); if (find_pos == Bry_find_.Not_found) return Fail();
// int bgn_pos = Bry_find_.Find_bwd(raw, Byte_ascii.Quote, find_pos); if (bgn_pos == Bry_find_.Not_found) return Fail();
// ++bgn_pos;
// int end_pos = Bry_find_.Find_fwd(raw, Byte_ascii.Quote, bgn_pos); if (end_pos == Bry_find_.Not_found) return Fail();
// String src = Src_imageMagick + String_.new_a7(Bry_.Mid(raw, bgn_pos, end_pos));
String src = "http://ftp.sunet.se/pub/multimedia/graphics/ImageMagick/binaries/ImageMagick-6.8.8-1-Q16-x86-windows.zip";
this.Init("downloading", src, trg);
return super.Async_init();
}
byte Fail() {
kit.Ask_ok(GRP_KEY, "windows_not_found", "Could not find Windows binary. Please download ImageMagick directly from the site.");
return Gfo_thread_cmd_.Init_cancel_step;
}
public static final String KEY_imageMagick = "download.imageMagick";
static final String GRP_KEY = "xowa.install.cmds.download.imageMagick";
}
class Xoi_cmd_msg_ok extends Gfo_thread_cmd_base implements Gfo_thread_cmd {
public Xoi_cmd_msg_ok(Gfo_usr_dlg usr_dlg, Gfui_kit kit, String msg) {this.msg = msg; this.Ctor(usr_dlg, kit);} private String msg;
@Override public boolean Async_term() {
kit.Ask_ok("msg_ok", "msg", msg);
return true;
}
public static final String KEY = "msg.ok";
}

View File

@@ -13,133 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.brys.fmtrs.*; import gplx.core.threads.*;
public class Xoi_cmd_mgr implements Gfo_invk {
List_adp cmds = List_adp_.New();
public Xoi_cmd_mgr(Xoi_setup_mgr install_mgr) {this.app = install_mgr.App(); this.install_mgr = install_mgr;} private Xoae_app app; Xoi_setup_mgr install_mgr;
public Xoae_app App() {return app;}
public void Canceled_y_() {canceled = true;} private boolean canceled = false;
public boolean Working() {return working;} private boolean working;
public void Working_(boolean v) {
working = v;
app.Bldr__running_(v);
}
private void Process_async(Gfo_thread_cmd cmd) {
byte init_rslt = cmd.Async_init();
if (init_rslt == Gfo_thread_cmd_.Init_ok) {
cmd.Async_run();
int async_sleep_interval = cmd.Async_sleep_interval();
boolean async_prog_enabled = cmd.Async_prog_enabled();
int async_sleep_sum = 0;
while (cmd.Async_running()) {
if (canceled) {this.Working_(Bool_.N); return;}
if (async_prog_enabled) cmd.Async_prog_run(async_sleep_sum);
Thread_adp_.Sleep(async_sleep_interval);
async_sleep_sum += async_sleep_interval; // NOTE: this is not exact
}
}
boolean term_pass = cmd.Async_term();
if (cmd.Async_next_cmd() != null && init_rslt != Gfo_thread_cmd_.Init_cancel_all && term_pass)
Run_async(cmd.Async_next_cmd());
else
this.Working_(Bool_.N);
}
private void Run_async(Gfo_thread_cmd cmd) {Thread_adp_.Start_by_val(cmd.Async_key(), this, Invk_process_async, cmd);}
private void Cmds_run() {
if (working) {
app.Gui_mgr().Kit().Ask_ok("", "", "An import is in progress. Please wait for it to complete. If you want to do multiple imports at once, see Dashboard/Import/Offline."); // HOME
return;
}
int cmds_len = cmds.Count();
if (cmds_len == 0) return;
for (int i = 0; i < cmds_len - 1; i++) {
Gfo_thread_cmd cur_cmd = (Gfo_thread_cmd)cmds.Get_at(i);
Gfo_thread_cmd nxt_cmd = (Gfo_thread_cmd)cmds.Get_at(i + 1);
cur_cmd.Cmd_ctor();
cur_cmd.Async_next_cmd_(nxt_cmd);
}
Gfo_thread_cmd cmd = (Gfo_thread_cmd)cmds.Get_at(0);
cmds.Clear();
this.Working_(Bool_.Y);
app.Bldr__running_(true);
this.Run_async(cmd);
}
Object Dump_add_many(GfoMsg m) {
int args_len = m.Args_count();
if (args_len < 4) throw Err_.new_wo_type("Please provide the following: wiki name, wiki date, dump_type, and one command; EX: ('simple.wikipedia.org', 'latest', 'pages-articles', 'wiki.download')");
String wiki_key = m.Args_getAt(0).Val_to_str_or_empty();
String wiki_date = m.Args_getAt(1).Val_to_str_or_empty();
String dump_type = m.Args_getAt(2).Val_to_str_or_empty();
Gfo_thread_cmd cmd = null;
for (int i = 3; i < args_len; i++) {
Keyval kv = m.Args_getAt(i);
String kv_val = kv.Val_to_str_or_empty();
if (String_.Eq(kv_val, Wiki_cmd_custom))
return Dump_add_many_custom(wiki_key, wiki_date, dump_type, false);
else {
cmd = Dump_cmd_new(wiki_key, wiki_date, dump_type, kv.Val_to_str_or_empty());
cmds.Add(cmd);
}
}
return cmd; // return last cmd
}
public Gfo_thread_cmd Dump_add_many_custom(String wiki_key, String wiki_date, String dump_type, boolean dumpfile_cmd) {
String[] custom_cmds = (app.Cfg().Get_bool_app_or("xowa.bldr.import.unzip_bz2_file", false)) // CFG: Cfg__
? String_.Ary(Xoi_cmd_wiki_download.Key_wiki_download, Xoi_cmd_wiki_unzip.KEY_dump, Xoi_cmd_wiki_import.KEY)
: String_.Ary(Xoi_cmd_wiki_download.Key_wiki_download, Xoi_cmd_wiki_import.KEY);
int custom_cmds_len = custom_cmds.length;
Gfo_thread_cmd cmd = null;
for (int j = 0; j < custom_cmds_len; j++) {
cmd = Dump_cmd_new(wiki_key, wiki_date, dump_type, custom_cmds[j]);
if (dumpfile_cmd) {
if (String_.Eq(cmd.Async_key(), Xoi_cmd_wiki_download.Key_wiki_download)) continue; // skip download if wiki.dump_file
else if (String_.Eq(cmd.Async_key(), Xoi_cmd_wiki_unzip.KEY_dump)) {
Xowe_wiki wiki = app.Wiki_mgr().Get_by_or_make(Bry_.new_u8(wiki_key));
if (wiki.Import_cfg().Src_fil_xml() != null) continue; // skip unzip if xml exists
}
else if (String_.Eq(cmd.Async_key(), Xoi_cmd_wiki_import.KEY)) {
((Xoi_cmd_wiki_import)cmd).Import_move_bz2_to_done_(false);
}
}
cmds.Add(cmd);
}
return cmd;
}
Gfo_thread_cmd Dump_cmd_new(String wiki_key, String wiki_date, String dump_type, String cmd_key) {
if (String_.Eq(cmd_key, Xoi_cmd_wiki_download.Key_wiki_download)) return new Xoi_cmd_wiki_download().Ctor_download_(install_mgr, wiki_key, wiki_date, dump_type).Owner_(this);
else if (String_.Eq(cmd_key, Xoi_cmd_wiki_unzip.KEY_dump)) return new Xoi_cmd_wiki_unzip(install_mgr, wiki_key, wiki_date, dump_type).Owner_(this);
else if (String_.Eq(cmd_key, Xoi_cmd_wiki_import.KEY)) return new Xoi_cmd_wiki_import(install_mgr, wiki_key, wiki_date, dump_type).Owner_(this);
else if (String_.Eq(cmd_key, Xoi_cmd_category2_build.KEY)) return new Xoi_cmd_category2_build(install_mgr, wiki_key).Owner_(this);
else if (String_.Eq(cmd_key, Xoi_cmd_category2_page_props.KEY_category2)) return new Xoi_cmd_category2_page_props(install_mgr, wiki_key, wiki_date).Owner_(this);
else if (String_.Eq(cmd_key, Xoi_cmd_category2_categorylinks.KEY_category2)) return new Xoi_cmd_category2_categorylinks(install_mgr, wiki_key, wiki_date).Owner_(this);
else if (String_.Eq(cmd_key, Xoi_cmd_search2_build.KEY)) return new Xoi_cmd_search2_build(install_mgr, wiki_key).Owner_(this);
else throw Err_.new_unhandled(cmd_key);
}
public static final String Wiki_cmd_custom = "wiki.custom", Wiki_cmd_dump_file = "wiki.dump_file";
public Gfo_thread_cmd Cmd_add(GfoMsg m) {Gfo_thread_cmd rv = Cmd_clone(m); cmds.Add(rv); return rv;}
Gfo_thread_cmd Cmd_clone(GfoMsg m) {
String cmd_key = m.ReadStr("v");
if (String_.Eq(cmd_key, Gfo_thread_cmd_download.KEY)) return new Gfo_thread_cmd_download().Init("downloading", m.ReadStr("src"), Bry_fmtr_eval_mgr_.Eval_url(app.Url_cmd_eval(), m.ReadBry("trg"))).Url_eval_mgr_(app.Url_cmd_eval()).Owner_(this).Ctor(app.Usr_dlg(), app.Gui_mgr().Kit());
else if (String_.Eq(cmd_key, Gfo_thread_cmd_unzip.KEY)) return new Gfo_thread_cmd_unzip().Url_eval_mgr_(app.Url_cmd_eval()).Owner_(this).Init(app.Usr_dlg(), app.Gui_mgr().Kit(), app.Prog_mgr().App_decompress_bz2(), app.Prog_mgr().App_decompress_zip(), app.Prog_mgr().App_decompress_gz(), Bry_fmtr_eval_mgr_.Eval_url(app.Url_cmd_eval(), m.ReadBry("src")), Bry_fmtr_eval_mgr_.Eval_url(app.Url_cmd_eval(), m.ReadBry("trg")));
else if (String_.Eq(cmd_key, Gfo_thread_cmd_replace.KEY)) return new Gfo_thread_cmd_replace().Url_eval_mgr_(app.Url_cmd_eval()).Owner_(this).Init(app.Usr_dlg(), app.Gui_mgr().Kit(), Bry_fmtr_eval_mgr_.Eval_url(app.Url_cmd_eval(), m.ReadBry("fil")));
else if (String_.Eq(cmd_key, Xoi_cmd_wiki_goto_page.KEY)) return new Xoi_cmd_wiki_goto_page(app, m.ReadStr("v")).Owner_(this);
else if (String_.Eq(cmd_key, Xoi_cmd_msg_ok.KEY)) return new Xoi_cmd_msg_ok(app.Usr_dlg(), app.Gui_mgr().Kit(), m.ReadStr("v")).Owner_(this);
else if (String_.Eq(cmd_key, Xoi_cmd_imageMagick_download.KEY_imageMagick)) return new Xoi_cmd_imageMagick_download(app.Usr_dlg(), app.Gui_mgr().Kit(), Bry_fmtr_eval_mgr_.Eval_url(app.Url_cmd_eval(), m.ReadBry("trg"))).Owner_(this);
else if (String_.Eq(cmd_key, Wiki_cmd_dump_file)) return Wiki_cmd_dump_file_make(m);
else throw Err_.new_unhandled(cmd_key);
}
Gfo_thread_cmd Wiki_cmd_dump_file_make(GfoMsg m) { // note: might be used directly in home-wiki pages to download files
Xoi_cmd_dumpfile dumpfile = new Xoi_cmd_dumpfile().Parse_msg(m);
return dumpfile.Exec(this);
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_process_async)) Process_async((Gfo_thread_cmd)m.CastObj("v"));
else if (ctx.Match(k, Invk_dump_add_many)) return Dump_add_many(m);
else if (ctx.Match(k, Invk_cmd_add)) return Cmd_add(m);
else if (ctx.Match(k, Invk_run)) Cmds_run();
else return Gfo_invk_.Rv_unhandled;
return this;
} private static final String Invk_process_async = "process_async", Invk_dump_add_many = "dump_add_many", Invk_run = "run", Invk_cmd_add = "cmd_add";
static final String GRP_KEY = "xowa.install_mgr.cmd_mgr";
}

View File

@@ -13,41 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.gfui.*;
import gplx.core.threads.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.utils.*;
import gplx.xowa.bldrs.wms.dumps.*;
class Xoi_cmd_wiki_download extends Gfo_thread_cmd_download implements Gfo_thread_cmd { private Xoi_setup_mgr install_mgr; private String wiki_key, dump_date, dump_type;
public Xoi_cmd_wiki_download Ctor_download_(Xoi_setup_mgr install_mgr, String wiki_key, String dump_date, String dump_type) {
this.install_mgr = install_mgr;
this.wiki_key = wiki_key;
this.dump_date = dump_date;
this.dump_type = dump_type;
this.Owner_(install_mgr);
return this;
}
@gplx.Virtual public String Download_file_ext() {return ".xml.bz2";} // wiki.download is primarily used for dump files; default to .xml.bz2; NOTE: changed from ".xml"; DATE:2013-11-07
@Override public String Async_key() {return Key_wiki_download;} public static final String Key_wiki_download = "wiki.download";
@Override public byte Async_init() {
Xoae_app app = install_mgr.App();
Xowm_dump_file dump_file = new Xowm_dump_file(wiki_key, dump_date, dump_type);
String[] server_urls = gplx.xowa.bldrs.installs.Xoi_dump_mgr.Server_urls(app);
boolean connected = Xowm_dump_file_.Connect_first(dump_file, server_urls);
if (connected)
app.Usr_dlg().Note_many("", "", "url: ~{0}", dump_file.File_url());
else {
if (!Dump_servers_offline_msg_shown) {
app.Gui_mgr().Kit().Ask_ok("", "", "all dump servers are offline: ~{0}", String_.AryXtoStr(server_urls));
Dump_servers_offline_msg_shown = true;
}
}
Xowe_wiki wiki = app.Wiki_mgr().Get_by_or_make(dump_file.Domain_itm().Domain_bry());
Io_url root_dir = wiki.Fsys_mgr().Root_dir();
Io_url[] trg_fil_ary = Io_mgr.Instance.QueryDir_args(root_dir).FilPath_("*." + dump_type + Download_file_ext() + "*").ExecAsUrlAry();
Io_url trg = trg_fil_ary.length == 0 ? root_dir.GenSubFil(dump_file.File_name()) : trg_fil_ary[0];
this.Ctor(app.Usr_dlg(), app.Gui_mgr().Kit());
this.Init("download", dump_file.File_url(), trg);
return super.Async_init();
}
private static boolean Dump_servers_offline_msg_shown = false;
}

View File

@@ -13,16 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.threads.*;
class Xoi_cmd_wiki_goto_page extends Gfo_thread_cmd_base implements Gfo_thread_cmd {
public Xoi_cmd_wiki_goto_page(Xoae_app app, String page) {this.app = app; this.page = page; this.Ctor(app.Usr_dlg(), app.Gui_mgr().Kit());} private Xoae_app app; String page;
@Override public void Async_run() {kit.New_cmd_sync(this).Invk(GfsCtx.new_(), 0, Invk_goto_page, GfoMsg_.Null);}
private void Goto_page(String page) {app.Gui_mgr().Browser_win().Page__navigate_by_url_bar(page);}
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_goto_page)) Goto_page(page);
else return super.Invk(ctx, ikey, k, m);
return this;
} private static final String Invk_goto_page = "goto_page";
public static final String KEY = "wiki.goto_page";
}

View File

@@ -13,99 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.core.threads.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.utils.*;
import gplx.xowa.guis.views.*;
import gplx.xowa.wikis.domains.*;
import gplx.xowa.htmls.hrefs.*;
import gplx.xowa.addons.wikis.ctgs.bldrs.*;
class Xoi_cmd_wiki_import implements Gfo_thread_cmd {
private boolean running;
private Xowe_wiki wiki;
public Xoi_cmd_wiki_import(Xoi_setup_mgr install_mgr, String wiki_key, String wiki_date, String dump_type) {this.install_mgr = install_mgr; this.Owner_(install_mgr); this.wiki_key = wiki_key; this.wiki_date = wiki_date; this.dump_type = dump_type;} private Xoi_setup_mgr install_mgr; String wiki_key, wiki_date, dump_type;
public static final String KEY = "wiki.import";
public void Cmd_ctor() {}
public String Async_key() {return KEY;}
public int Async_sleep_interval() {return Gfo_thread_cmd_.Async_sleep_interval_1_second;}
public boolean Async_prog_enabled() {return false;}
public void Async_prog_run(int async_sleep_sum) {}
public byte Async_init() {return Gfo_thread_cmd_.Init_ok;}
public boolean Async_term() {
install_mgr.App().Usr_dlg().Log_many(GRP_KEY, "import.end", "import.end ~{0} ~{1} ~{2}", wiki_key, wiki_date, dump_type);
return true;
}
public Gfo_invk Owner() {return owner;} public Xoi_cmd_wiki_import Owner_(Gfo_invk v) {owner = v; return this;} Gfo_invk owner;
public Gfo_thread_cmd Async_next_cmd() {return next_cmd;} public void Async_next_cmd_(Gfo_thread_cmd v) {next_cmd = v;} Gfo_thread_cmd next_cmd;
public void Async_run() {
running = true;
install_mgr.App().Usr_dlg().Log_many(GRP_KEY, "import.bgn", "import.bgn ~{0} ~{1} ~{2}", wiki_key, wiki_date, dump_type);
Thread_adp_.Start_by_key(this.Async_key(), this, Invk_process_async);
}
public boolean Async_running() {
return running;
}
public boolean Import_move_bz2_to_done() {return import_move_bz2_to_done;} public Xoi_cmd_wiki_import Import_move_bz2_to_done_(boolean v) {import_move_bz2_to_done = v; return this;} private boolean import_move_bz2_to_done = true;
private void Process_async() {
Xoae_app app = install_mgr.App();
app.Usr_dlg().Prog_one("", "", "preparing import: ~{0}", wiki_key);
Xob_bldr bldr = app.Bldr();
wiki = app.Wiki_mgr().Get_by_or_make(Bry_.new_a7(wiki_key));
wiki.Init_assert();
bldr.Cmd_mgr().Clear();
bldr.Pause_at_end_(false);
Io_url src_url = wiki.Import_cfg().Src_rdr().Url();
Process_sql(bldr, src_url);
bldr.Run();
app.Usr_dlg().Prog_none(GRP_KEY, "clear", ""); app.Usr_dlg().Note_none(GRP_KEY, "clear", "");
app.Usere().Available_from_fsys();
wiki.Init_needed_(true);
wiki.Html_mgr().Page_wtr_mgr().Init_(true);
wiki.Init_assert();
if (String_.Eq(src_url.Ext(), ".xml")) {
if ( app.Cfg().Get_bool_app_or("xowa.bldr.import.delete_xml_file", true) // CFG: Cfg__
&& Io_mgr.Instance.ExistsFil(src_url.GenNewExt(".bz2")) // only delete the file if there is a corresponding bz2 file; BUG.GH:#124; DATE:2017-02-02
)
Io_mgr.Instance.DeleteFil(src_url);
}
else if (String_.Eq(src_url.Ext(), ".bz2")) {
Io_url trg_fil = app.Fsys_mgr().Wiki_dir().GenSubFil_nest("#dump", "done", src_url.NameAndExt());
if (import_move_bz2_to_done)
Io_mgr.Instance.MoveFil_args(src_url, trg_fil, true).Exec();
}
running = false;
wiki.Import_cfg().Src_fil_xml_(null).Src_fil_bz2_(null); // reset file else error when going from Import/Script to Import/List
app.Gui_mgr().Kit().New_cmd_sync(this).Invk(GfsCtx.new_(), 0, Invk_open_wiki, GfoMsg_.Null);
}
private void Process_sql(Xob_bldr bldr, Io_url dump_url) {
// setup wiki
((Xob_cleanup_cmd)bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_util_cleanup)).Delete_tdb_(true).Delete_sqlite3_(true);
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_text_init);
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_text_page);
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_text_css);
// if (wiki.Appe().Setup_mgr().Dump_mgr().Search_version() == gplx.xowa.addons.wikis.searchs.specials.Srch_special_page.Version_2)
gplx.xowa.addons.wikis.searchs.bldrs.Srch_bldr_mgr_.Setup(wiki);
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_text_term);
// setup category
if (wiki.Domain_itm().Domain_type_id() != Xow_domain_tid_.Tid__other) { // do not add category if not wmf; note that wikia wikis will not have category dumps; DATE:2016-10-22
Xob_download_cmd.Add_if_not_found_many(bldr, wiki, Xob_catlink_cmd.Dump_file_name, Xob_pageprop_cmd.Dump_file_name);
bldr.Cmd_mgr().Add(new gplx.xowa.addons.wikis.ctgs.bldrs.Xob_pageprop_cmd(bldr, wiki).Src_dir_manual_(dump_url.OwnerDir()));
bldr.Cmd_mgr().Add(new gplx.xowa.addons.wikis.ctgs.bldrs.Xob_catlink_cmd(bldr, wiki).Src_dir_manual_(dump_url.OwnerDir()));
}
}
private void Open_wiki(String wiki_key) {
Xog_win_itm main_win = install_mgr.App().Gui_mgr().Browser_win();
if (main_win.Active_page() == null) return; // will be null when invoked through cmd-line
byte[] url = Bry_.Add(wiki.Domain_bry(), Xoh_href_.Bry__wiki, wiki.Props().Main_page());
main_win.Page__navigate_by_url_bar(String_.new_u8(url));
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_process_async)) Process_async();
else if (ctx.Match(k, Invk_owner)) return owner;
else if (ctx.Match(k, Invk_open_wiki)) Open_wiki(wiki_key);
else return Gfo_invk_.Rv_unhandled;
return this;
} private static final String Invk_process_async = "run_async", Invk_owner = "owner", Invk_open_wiki = "open_wiki";
static final String GRP_KEY = "xowa.thread.op.build";
}

View File

@@ -13,116 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*;
import gplx.core.consoles.*;
import gplx.core.brys.args.*; import gplx.core.threads.*; import gplx.xowa.bldrs.setups.maints.*; import gplx.xowa.xtns.wbases.imports.*;
import gplx.xowa.wikis.domains.*;
import gplx.xowa.bldrs.wms.*; import gplx.xowa.bldrs.wms.dumps.*;
public class Xoi_cmd_wiki_tst {
@Test public void Run() { // MAINT:2017-03-28
// Bld_import_list(Xow_domain_regy.All);
// Bld_cfg_files(Xow_domain_regy.All); // NOTE: remember to carry over the wikisource / page / index commands from the existing xowa_build_cfg.gfs; also, only run the xowa_build_cfg.gfs once; DATE:2013-10-15; last run: DATE:2014-09-09
}
public void Bld_import_list(String... ary) {
int ary_len = ary.length;
Bry_bfr bfr = Bry_bfr_.Reset(255);
Wmf_latest_parser parser = new Wmf_latest_parser();
Bfr_arg__time time_fmtr = new Bfr_arg__time();
for (int i = 0; i < ary_len; i++)
Bld_import_list_itm2(bfr, parser, time_fmtr, ary, i);
Io_mgr.Instance.SaveFilStr("C:\\xowa\\user\\temp.txt", bfr.To_str());
}
private void Bld_import_list_itm2(Bry_bfr bfr, Wmf_latest_parser parser, Bfr_arg__time time_fmtr, String[] ary, int i) {
String domain_str = ary[i];
byte[] domain_bry = Bry_.new_a7(domain_str);
Xow_domain_itm domain_itm = Xow_domain_itm_.parse(domain_bry);
byte[] wmf_key_bry = Bry_.Replace(Xow_abrv_wm_.To_abrv(domain_itm), Byte_ascii.Dash, Byte_ascii.Underline);
String wmf_key = String_.new_u8(wmf_key_bry);
String url = "https://dumps.wikimedia.org/" + wmf_key + "/latest";
byte[] latest_html = null;
for (int j = 0; j < 5; ++j) {
latest_html = Io_mgr.Instance.DownloadFil_args("", Io_url_.Empty).Exec_as_bry(url);
if (latest_html != null) break;
Tfds.Dbg("fail|" + domain_str + "|" + url);
if (j == 4) return;
}
parser.Parse(latest_html);
Xowm_dump_file dump_file = new Xowm_dump_file(domain_str, "latest", Xowm_dump_type_.Str__pages_articles);
dump_file.Server_url_(Xowm_dump_file_.Server_wmf_https);
byte[] pages_articles_key = Bry_.new_a7(wmf_key + "-latest-pages-articles.xml.bz2");
Wmf_latest_itm latest_itm = parser.Get_by(pages_articles_key);
if (latest_itm == null) {Tfds.Dbg("missing|" + domain_str + "|" + url); return;} // NOTE: commonswiki missing entry for commonswiki-latest-pages-articles.xml.bz2 DATE:2016-05-01
Tfds.Dbg("pass|" + domain_str + "|" + url);
bfr.Add(domain_bry).Add_byte_pipe();
bfr.Add_str_u8(dump_file.File_url()).Add_byte_pipe();
bfr.Add(Xow_domain_tid_.Get_type_as_bry(domain_itm.Domain_type_id())).Add_byte_pipe();
long src_size = latest_itm.Size();
bfr.Add_long_variable(src_size).Add_byte_pipe();
bfr.Add_str_a7(gplx.core.ios.Io_size_.To_str(src_size)).Add_byte_pipe();
time_fmtr.Seconds_(Math_.Div_safe_as_long(src_size, 1000000)).Bfr_arg__add(bfr);
bfr.Add_byte_pipe();
bfr.Add_str_a7(latest_itm.Date().XtoStr_fmt_yyyy_MM_dd_HH_mm());
bfr.Add_byte_pipe();
bfr.Add_str_a7(dump_file.Dump_date());
bfr.Add_byte_nl();
}
/*
private void Bld_import_list_itm(Bry_bfr bfr, Xowm_dump_file dump_file, Bry_fmtr_arg_time time_fmtr, String[] ary, int i) {
String itm = ary[i];
dump_file.Ctor(itm, "latest", Xowm_dump_type_.Str__pages_articles);
int count = 0;
while (count++ < 1) {
dump_file.Server_url_(Xowm_dump_file_.Server_wmf);
if (dump_file.Connect()) break;
Tfds.WriteText(String_.Format("retrying: {0} {1}\n", count, dump_file.File_modified()));
Thread_adp_.Sleep(15000); // wait for connection to reset
}
if (count == 10) {
Tfds.WriteText(String_.Format("failed: {0}\n", dump_file.File_url()));
return;
}
else
Tfds.WriteText(String_.Format("passed: {0}\n", itm));
bfr.Add_str(itm).Add_byte_pipe();
bfr.Add_str(dump_file.File_url()).Add_byte_pipe();
bfr.Add(Xow_domain_tid_.Get_type_as_bry(dump_file.Wiki_type().Wiki_tid())).Add_byte_pipe();
// Xol_lang_stub lang_itm = Xol_lang_stub_.Get_by_key(wiki_type.Lang_key());
// if (lang_itm == null) lang_itm = Xol_lang_stub_.Get_by_key(Xol_lang_itm_.Key_en); // commons, species, meta, etc will have no lang
// bfr.Add(lang_itm.Local_name()).Add_byte_pipe();
// bfr.Add(lang_itm.Canonical_name()).Add_byte_pipe();
long src_size = dump_file.File_len();
bfr.Add_long_variable(src_size).Add_byte_pipe();
bfr.Add_str(gplx.core.ios.Io_size_.To_str(src_size)).Add_byte_pipe();
time_fmtr.Seconds_(Math_.Div_safe_as_long(src_size, 1000000)).XferAry(bfr, 0);
bfr.Add_byte_pipe();
bfr.Add_str(dump_file.File_modified().XtoStr_fmt_yyyy_MM_dd_HH_mm());
bfr.Add_byte_pipe();
// bfr.Add_str(String_.Concat_with_obj(",", (Object[])dump_file.Dump_available_dates()));
// bfr.Add_byte_pipe();
bfr.Add_str(dump_file.Dump_date());
bfr.Add_byte_nl();
Thread_adp_.Sleep(1000);
}
*/
public void Bld_cfg_files(String... ary) {
Bry_bfr bfr = Bry_bfr_.Reset(255);
gplx.xowa.bldrs.wiki_cfgs.Xoi_wiki_props_api api = new gplx.xowa.bldrs.wiki_cfgs.Xoi_wiki_props_api();
gplx.xowa.bldrs.wiki_cfgs.Xoi_wiki_props_wiki wiki = new gplx.xowa.bldrs.wiki_cfgs.Xoi_wiki_props_wiki();
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
String wiki_domain = ary[i];
try {
byte[] xml = api.Exec_api(api.Api_src(wiki_domain));
wiki.Wiki_domain_(Bry_.new_a7(wiki_domain));
api.Parse(wiki, String_.new_u8(xml));
api.Build_cfg(bfr, wiki);
}
catch (Exception e) {
Console_adp__sys.Instance.Write_str_w_nl(Err_.Message_gplx_full(e));
}
}
bfr.Add_str_a7("app.bldr.wiki_cfg_bldr.run;").Add_byte_nl();
Io_mgr.Instance.SaveFilStr("C:\\user\\xowa_build_cfg.gfs", bfr.To_str());
}
}

View File

@@ -13,36 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.gfui.*; import gplx.gfui.kits.core.*;
import gplx.core.threads.*;
class Xoi_cmd_wiki_unzip extends Gfo_thread_cmd_unzip implements Gfo_thread_cmd { public static final String KEY_dump = "wiki.unzip";
public Xoi_cmd_wiki_unzip(Xoi_setup_mgr install_mgr, String wiki_key, String dump_date, String dump_type) {this.install_mgr = install_mgr; this.Owner_(install_mgr); this.wiki_key = wiki_key; this.dump_date = dump_date; this.dump_type = dump_type;} private Xoi_setup_mgr install_mgr; String wiki_key, dump_date, dump_type;
@Override public String Async_key() {return KEY_dump;}
@Override public byte Async_init() {
Xoae_app app = install_mgr.App(); Gfui_kit kit = app.Gui_mgr().Kit();
Xowe_wiki wiki = app.Wiki_mgr().Get_by_or_make(Bry_.new_u8(wiki_key));
Io_url wiki_dir = wiki.Import_cfg().Src_dir();
Io_url[] urls = Io_mgr.Instance.QueryDir_args(wiki_dir).Recur_(false).FilPath_("*.xml.bz2").ExecAsUrlAry();
if (urls.length == 0) {
kit.Ask_ok(GRP_KEY, "dump.unzip_latest.file_missing", "Could not find a dump file for ~{0} in ~{1}", wiki_key, wiki_dir.Raw());
return Gfo_thread_cmd_.Init_cancel_step;
}
Io_url src = urls[urls.length - 1];
Io_url trg = app.Fsys_mgr().Wiki_dir().GenSubFil_nest(wiki_key, src.NameOnly()); // NOTE: NameOnly() will strip trailing .bz2; EX: a.xml.bz2 -> a.xml
super.Init(app.Usr_dlg(), app.Gui_mgr().Kit(), app.Prog_mgr().App_decompress_bz2(), app.Prog_mgr().App_decompress_zip(), app.Prog_mgr().App_decompress_gz(), src, trg);
this.Term_cmd_for_src_(Term_cmd_for_src_move);
this.Term_cmd_for_src_url_(app.Fsys_mgr().Wiki_dir().GenSubFil_nest("#dump", "done", src.NameAndExt()));
if (Io_mgr.Instance.ExistsFil(trg)) {
int rslt = kit.Ask_yes_no_cancel(GRP_KEY, "target_exists", "Target file already exists: '~{0}'.\nDo you want to delete it?", trg.Raw());
switch (rslt) {
case Gfui_dlg_msg_.Btn_yes: Io_mgr.Instance.DeleteFil(trg); break;
case Gfui_dlg_msg_.Btn_no: return Gfo_thread_cmd_.Init_cancel_step;
case Gfui_dlg_msg_.Btn_cancel: return Gfo_thread_cmd_.Init_cancel_all;
default: throw Err_.new_unhandled(rslt);
}
}
return Gfo_thread_cmd_.Init_ok;
}
static final String GRP_KEY = "xowa.thread.dump.unzip";
}

View File

@@ -13,12 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import gplx.xowa.bldrs.wms.dumps.*;
public class Xoi_dump_mgr {
public static boolean Import_bz2_by_stdout(Xoa_app app) {return app.Cfg().Get_bool_app_or("xowa.bldr.import.apps.bz2_stdout.enabled", true);} // CFG: Cfg__
public static String[] Server_urls(Xoa_app app) {
String[] or = String_.Ary(Xowm_dump_file_.Server_your_org, Xowm_dump_file_.Server_wmf_https, Xowm_dump_file_.Server_c3sl, Xowm_dump_file_.Server_masaryk); // promote your.org to primary url; DATE:2016-08-07
return app.Cfg().Get_strary_app_or("xowa.bldr.import.dump_servers", ",", or); // CFG: Cfg__
}
}

View File

@@ -13,33 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
public class Xoi_mirror_parser {
public String[] Parse(String raw_str) {
if (String_.Len_eq_0(raw_str)) return String_.Ary_empty;
byte[] raw = Bry_.new_u8(raw_str);
List_adp rv = List_adp_.New();
int pos = 0;
while (true) {
int bgn = Bry_find_.Find_fwd(raw, CONST_href_bgn, pos); if (bgn == Bry_find_.Not_found) break;
bgn += CONST_href_bgn.length;
int end = Bry_find_.Find_fwd(raw, CONST_href_end, bgn); if (end == Bry_find_.Not_found) return String_.Ary_empty;
byte[] date = Bry_.Mid(raw, bgn, end);
pos = end + CONST_href_end.length;
if (Bry_.Match(date, CONST_date_parent_dir)) continue;
int date_pos_last = date.length - 1;
if (date_pos_last == -1) return String_.Ary_empty;
if (date[date_pos_last] == Byte_ascii.Slash) date = Bry_.Mid(date, 0, date_pos_last); // trim trailing /; EX: "20130101/" -> "20130101"
rv.Add(String_.new_u8(date));
}
return rv.To_str_ary();
} private static final byte[] CONST_href_bgn = Bry_.new_a7("<a href=\""), CONST_href_end = Bry_.new_a7("\""), CONST_date_parent_dir = Bry_.new_a7("../");
public static String Find_last_lte(String[] ary, String comp) { // assuming sorted ary, find last entry that is lte comp
int len = ary.length;
for (int i = len - 1; i > -1; i--) {
String itm = ary[i];
if (CompareAble_.Is(CompareAble_.Less_or_same, itm, comp)) return itm;
}
return "";
}
}

View File

@@ -13,46 +13,3 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
import org.junit.*;
public class Xoi_mirror_parser_tst {
@Test public void Basic() {
Tst_parse(String_.Concat_lines_nl
( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"
, "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">"
, "<head>"
, "<title>Index of /simplewiki/</title>"
, "<link rel=\"stylesheet\" type=\"text/css\" href=\"/pub/misc/lighttpd-white-dir.css\" />"
, "</head>"
, "<body>"
, "<h2>Index of /simplewiki/</h2>"
, "<div class=\"list\">"
, "<table summary=\"Directory Listing\" cellpadding=\"0\" cellspacing=\"0\">"
, "<thead><tr><th class=\"n\">Name</th><th class=\"m\">Last Modified</th><th class=\"s\">Size</th><th class=\"t\">Type</th></tr></thead>"
, "<tbody>"
, "<tr><td class=\"n\"><a href=\"../\">Parent Directory</a>/</td><td class=\"m\">&nbsp;</td><td class=\"s\">- &nbsp;</td><td class=\"t\">Directory</td></tr>"
, "<tr><td class=\"n\"><a href=\"20120516/\">20120516</a>/</td><td class=\"m\">2012-May-17 01:04:39</td><td class=\"s\">- &nbsp;</td><td class=\"t\">Directory</td></tr>"
, "<tr><td class=\"n\"><a href=\"20121220/\">20121220</a>/</td><td class=\"m\">2012-Dec-20 20:15:55</td><td class=\"s\">- &nbsp;</td><td class=\"t\">Directory</td></tr>"
, "<tr><td class=\"n\"><a href=\"20130214/\">20130214</a>/</td><td class=\"m\">2013-Feb-14 06:28:41</td><td class=\"s\">- &nbsp;</td><td class=\"t\">Directory</td></tr>"
, "<tr><td class=\"n\"><a href=\"latest/\">latest</a>/</td><td class=\"m\">2013-Feb-14 06:28:41</td><td class=\"s\">- &nbsp;</td><td class=\"t\">Directory</td></tr>"
, "</tbody>"
, "</table>"
, "</div>"
, "<div class=\"foot\">lighttpd</div>"
, "</body>"
, "</html>"
), String_.Ary("20120516", "20121220", "20130214", "latest"));
}
@Test public void Find_last_lte() {
Tst_find_last_lte(String_.Ary("20120516", "20121220", "20130214", "latest"), "20130101", "20121220");
Tst_find_last_lte(String_.Ary("20120516", "20121220", "20130214", "latest"), "20120101", "");
}
private void Tst_parse(String raw, String[] expd) {
Xoi_mirror_parser parser = new Xoi_mirror_parser();
Tfds.Eq_ary_str(expd, parser.Parse(raw));
}
private void Tst_find_last_lte(String[] ary, String comp, String expd) {
Tfds.Eq(expd, Xoi_mirror_parser.Find_last_lte(ary, comp));
}
}

Some files were not shown because too many files have changed in this diff Show More