mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Embeddable: Create core dbs in proper subdirectory
This commit is contained in:
@@ -13,3 +13,24 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
public class Db_idx_mode {
|
||||
private int tid;
|
||||
Db_idx_mode(int tid) {this.tid = tid;}
|
||||
public boolean Tid_is_bgn() {return tid == Tid_bgn;}
|
||||
public boolean Tid_is_end() {return tid == Tid_end;}
|
||||
public boolean Tid_is_skip() {return tid == Tid_skip;}
|
||||
private static final int Tid_skip = 0, Tid_bgn = 1, Tid_end = 2;
|
||||
private static final String Key_skip = "skip", Key_bgn = "bgn", Key_end = "end";
|
||||
public static final Db_idx_mode
|
||||
Itm_skip = new Db_idx_mode(Tid_skip)
|
||||
, Itm_bgn = new Db_idx_mode(Tid_bgn)
|
||||
, Itm_end = new Db_idx_mode(Tid_end)
|
||||
;
|
||||
public static Db_idx_mode Xto_itm(String key) {
|
||||
if (String_.Eq(key, Key_skip)) return Itm_skip;
|
||||
else if (String_.Eq(key, Key_bgn)) return Itm_bgn;
|
||||
else if (String_.Eq(key, Key_end)) return Itm_end;
|
||||
else throw Err_.new_unhandled(key);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,80 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.primitives.*; import gplx.core.strings.*;
|
||||
import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.addons.wikis.ctgs.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.infos.*;
|
||||
import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*;
|
||||
public class Db_mgr_fxt {
|
||||
public Db_mgr_fxt Ctor_fsys() {bldr_fxt = new Xob_fxt().Ctor(Xoa_test_.Url_root().GenSubDir("root")); return this;}
|
||||
public Db_mgr_fxt Ctor_mem() {bldr_fxt = new Xob_fxt().Ctor_mem(); return this;} private Xob_fxt bldr_fxt;
|
||||
public Xowd_page_itm page_(int id, String modified_on, boolean type_redirect, int text_len) {return new Xowd_page_itm().Id_(id).Modified_on_(DateAdp_.parse_gplx(modified_on)).Redirected_(type_redirect).Text_len_(text_len);}
|
||||
public Xowe_wiki Wiki() {return bldr_fxt.Wiki();}
|
||||
public Xob_bldr Bldr() {return bldr_fxt.Bldr();}
|
||||
public Db_mgr_fxt doc_ary_(Xowd_page_itm... v) {bldr_fxt.doc_ary_(v); return this;}
|
||||
public Xowd_page_itm doc_(int id, String date, String title, String text) {return bldr_fxt.doc_(id, date, title, text);}
|
||||
public Xowd_page_itm doc_wo_date_(int id, String title, String text) {return bldr_fxt.doc_(id, "2012-01-02 03:04", title, text);}
|
||||
public Xowd_page_itm doc_ttl_(int id, String title) {return bldr_fxt.doc_(id, "2012-01-02 03:04", title, "IGNORE");}
|
||||
public Db_mgr_fxt Init_fil(String url, String raw) {return Init_fil(Io_url_.new_fil_(url), raw);}
|
||||
public Db_mgr_fxt Init_fil(Io_url url, String raw) {Io_mgr.Instance.SaveFilStr(url, raw); return this;}
|
||||
public Db_mgr_fxt Exec_run(Xob_page_wkr wkr) {bldr_fxt.Run(wkr); return this;}
|
||||
public Db_mgr_fxt Exec_run(Xob_cmd cmd) {bldr_fxt.Run_cmds(cmd); return this;}
|
||||
public Db_mgr_fxt Exec_run(Xobd_parser_wkr wkr) {bldr_fxt.Run(wkr); return this;}
|
||||
public void Init_page_insert(Int_obj_ref page_id_next, int ns_id, String[] ttls) {
|
||||
Xowe_wiki wiki = this.Wiki();
|
||||
int len = ttls.length;
|
||||
DateAdp modified_on = Datetime_now.Dflt_add_min_(0);
|
||||
Xowd_page_tbl tbl_page = wiki.Db_mgr_as_sql().Core_data_mgr().Tbl__page();
|
||||
tbl_page.Insert_bgn();
|
||||
for (int i = 0; i < len; i++) {
|
||||
String ttl = ttls[i];
|
||||
int page_id = page_id_next.Val();
|
||||
tbl_page.Insert_cmd_by_batch(page_id, ns_id, Bry_.new_u8(ttl), false, modified_on, 0, page_id, 0, 0, -1);
|
||||
page_id_next.Val_add(1);
|
||||
}
|
||||
tbl_page.Insert_end();
|
||||
}
|
||||
public void Test_load_ttl(int ns_id, String ttl_str, Xowd_page_itm expd) {
|
||||
Xowe_wiki wiki = bldr_fxt.Wiki();
|
||||
Xow_ns ns = wiki.Ns_mgr().Ids_get_or_null(ns_id);
|
||||
byte[] ttl_bry = Bry_.new_a7(ttl_str);
|
||||
wiki.Db_mgr_as_sql().Load_mgr().Load_by_ttl(actl, ns, ttl_bry);
|
||||
Tfds.Eq(expd.Id(), actl.Id());
|
||||
Tfds.Eq_date(expd.Modified_on(), actl.Modified_on());
|
||||
Tfds.Eq(expd.Redirected(), actl.Redirected());
|
||||
Tfds.Eq(expd.Text_len(), actl.Text_len());
|
||||
} private Xowd_page_itm actl = new Xowd_page_itm();
|
||||
public void Test_load_page(int ns_id, int page_id, String expd) {
|
||||
Xowe_wiki wiki = bldr_fxt.Wiki();
|
||||
Xow_ns ns = wiki.Ns_mgr().Ids_get_or_null(ns_id);
|
||||
wiki.Db_mgr_as_sql().Load_mgr().Load_page(actl.Id_(page_id), ns);
|
||||
Tfds.Eq(expd, String_.new_a7(actl.Text()));
|
||||
}
|
||||
int[] Xto_int_ary(List_adp rslts) {
|
||||
int len = rslts.Count();
|
||||
int[] rv = new int[len];
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xowd_page_itm page = (Xowd_page_itm)rslts.Get_at(i);
|
||||
rv[i] = page.Id();
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public void Test_file(String url, String expd) {
|
||||
String actl = Io_mgr.Instance.LoadFilStr(url);
|
||||
Tfds.Eq_str_lines(expd, actl);
|
||||
}
|
||||
public void Init_db_sqlite() {
|
||||
Xowe_wiki wiki = this.Wiki();
|
||||
Db_conn_pool.Instance.Rls_all();
|
||||
Db_conn_bldr.Instance.Reg_default_sqlite();
|
||||
Io_mgr.Instance.DeleteDir_cmd(wiki.Fsys_mgr().Root_dir()).MissingIgnored_().Exec();
|
||||
wiki.Db_mgr_create_as_sql().Core_data_mgr().Init_by_make(Xowd_core_db_props.Test, Xob_info_session.Test);
|
||||
Io_mgr.Instance.SaveFilStr(wiki.Import_cfg().Src_dir().GenSubFil("a.xml"), "<test/>");
|
||||
}
|
||||
public void Rls() {
|
||||
this.Wiki().Db_mgr_as_sql().Core_data_mgr().Rls();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,65 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.ios.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
public class Xob_base_fxt {
|
||||
public Xob_base_fxt Clear() {
|
||||
if (app == null) {
|
||||
app = Xoa_app_fxt.Make__app__edit();
|
||||
wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
bldr = Xoa_app_fxt.bldr_(app);
|
||||
}
|
||||
this.Init_(bldr, wiki);
|
||||
Clear_hook();
|
||||
return this;
|
||||
}
|
||||
@gplx.Virtual public void Clear_hook() {}
|
||||
public Xob_base_fxt Init_(Xob_bldr bldr, Xowe_wiki wiki) {this.bldr = bldr; this.wiki = wiki; return this;}
|
||||
public Xoae_app App() {return app;} private Xoae_app app;
|
||||
public Xob_bldr Bldr() {return bldr;} private Xob_bldr bldr;
|
||||
public Xowe_wiki Wiki() {return wiki;} private Xowe_wiki wiki;
|
||||
public Gfo_invk Bldr_itm() {return bldr_itm;} Gfo_invk bldr_itm;
|
||||
public Xowd_page_itm page_(String ttl) {return page_(ttl, "");}
|
||||
public Xowd_page_itm page_(String ttl, String text) {return new Xowd_page_itm().Ttl_(Bry_.new_u8(ttl), wiki.Ns_mgr()).Text_(Bry_.new_u8(text));}
|
||||
public Io_fil_chkr meta_(String url, String data) {return new Io_fil_chkr(Io_url_.mem_fil_(url), data);}
|
||||
public void Init_fxts(Xob_bldr bldr, Xowe_wiki wiki, Xob_base_fxt... fxt_ary) {
|
||||
int fxt_ary_len = fxt_ary.length;
|
||||
for (int i = 0; i < fxt_ary_len; i++)
|
||||
fxt_ary[i].Init_(bldr, wiki);
|
||||
}
|
||||
public Xob_base_fxt Init_fil(String url, String raw) {return Init_fil(Io_url_.new_fil_(url), raw);}
|
||||
public Xob_base_fxt Init_fil(Io_url url, String raw) {Io_mgr.Instance.SaveFilStr(url, raw); return this;}
|
||||
public Xob_base_fxt Exec_cmd(String cmd_key, GfoMsg... msgs) {
|
||||
Xob_cmd cmd = (Xob_cmd)bldr.Cmd_mgr().Add_cmd(wiki, cmd_key);
|
||||
this.bldr_itm = cmd;
|
||||
int len = msgs.length;
|
||||
GfsCtx ctx = GfsCtx.new_();
|
||||
for (int i = 0; i < len; i++) {
|
||||
GfoMsg msg = msgs[i];
|
||||
cmd.Invk(ctx, GfsCtx.Ikey_null, msg.Key(), msg);
|
||||
}
|
||||
Run_cmd(bldr, cmd);
|
||||
return this;
|
||||
}
|
||||
public Xob_base_fxt Test_fil(String url, String expd) {return Test_fil(Io_url_.new_fil_(url), expd);}
|
||||
public Xob_base_fxt Test_fil(Io_url url, String expd) {
|
||||
Tfds.Eq_str_lines(expd, Io_mgr.Instance.LoadFilStr(url));
|
||||
return this;
|
||||
}
|
||||
public static void Run_cmd(Xob_bldr bldr, Xob_cmd cmd) {
|
||||
cmd.Cmd_bgn(bldr);
|
||||
cmd.Cmd_run();
|
||||
cmd.Cmd_end();
|
||||
}
|
||||
public static void Run_wkr(Xob_bldr bldr, Xob_page_wkr wkr, Xowd_page_itm[] page_ary) {
|
||||
wkr.Page_wkr__bgn();
|
||||
int page_ary_len = page_ary.length;
|
||||
for (int i = 0; i < page_ary_len; i++) {
|
||||
Xowd_page_itm page = page_ary[i];
|
||||
wkr.Page_wkr__run(page);
|
||||
}
|
||||
wkr.Page_wkr__end();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,155 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.consoles.*; import gplx.core.envs.*;
|
||||
import gplx.xowa.apps.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.xmls.*; import gplx.xowa.langs.bldrs.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.langs.jsons.*;
|
||||
import gplx.xowa.addons.bldrs.app_cfgs.*;
|
||||
public class Xob_bldr implements Gfo_invk {
|
||||
private boolean pause_at_end = false; private long prv_prog_time; private Xob_xml_parser dump_parser;
|
||||
public Xob_bldr(Xoae_app app) {
|
||||
this.app = app;
|
||||
this.cmd_mgr = new Xob_cmd_mgr(this, cmd_regy);
|
||||
this.import_marker = new Xob_import_marker();
|
||||
this.wiki_cfg_bldr = new Xob_wiki_cfg_bldr(this);
|
||||
}
|
||||
public Xoae_app App() {return app;} private final Xoae_app app;
|
||||
public Xob_cmd_regy Cmd_regy() {return cmd_regy;} private final Xob_cmd_regy cmd_regy = new Xob_cmd_regy();
|
||||
public Xob_cmd_mgr Cmd_mgr() {return cmd_mgr;} private final Xob_cmd_mgr cmd_mgr;
|
||||
public Gfo_usr_dlg Usr_dlg() {return app.Usr_dlg();}
|
||||
public int Sort_mem_len() {return sort_mem_len;} public Xob_bldr Sort_mem_len_(int v) {sort_mem_len = v; return this;} private int sort_mem_len = 16 * Io_mgr.Len_mb;
|
||||
public int Dump_fil_len() {return dump_fil_len;} public Xob_bldr Dump_fil_len_(int v) {dump_fil_len = v; return this;} private int dump_fil_len = 1 * Io_mgr.Len_mb;
|
||||
public int Make_fil_len() {return make_fil_len;} public Xob_bldr Make_fil_len_(int v) {make_fil_len = v; return this;} private int make_fil_len = 64 * Io_mgr.Len_kb;
|
||||
public Xob_xml_parser Dump_parser() {if (dump_parser == null) this.dump_parser = new Xob_xml_parser(); return dump_parser;}
|
||||
public Xob_import_marker Import_marker() {return import_marker;} private Xob_import_marker import_marker;
|
||||
public Xob_wiki_cfg_bldr Wiki_cfg_bldr() {return wiki_cfg_bldr;} private Xob_wiki_cfg_bldr wiki_cfg_bldr;
|
||||
public void Pause_at_end_(boolean v) {this.pause_at_end = v;}
|
||||
public void Print_prog_msg(long cur, long end, int pct_idx, String fmt, Object... ary) {
|
||||
long now = System_.Ticks(); if (now - prv_prog_time < 100) return;
|
||||
this.prv_prog_time = now;
|
||||
if (pct_idx > -1) ary[pct_idx] = Decimal_adp_.CalcPctStr(cur, end, "00.00");
|
||||
app.Usr_dlg().Prog_many("", "", fmt, ary);
|
||||
}
|
||||
public Xob_bldr Exec_json(String script) {
|
||||
try {
|
||||
this.cmd_mgr.Clear();
|
||||
Json_parser jdoc_parser = new Json_parser();
|
||||
Json_doc jdoc = jdoc_parser.Parse(script);
|
||||
Json_ary cmds = jdoc.Root_ary();
|
||||
int cmds_len = cmds.Len();
|
||||
for (int i = 0; i < cmds_len; ++i) {
|
||||
Json_nde cmd = cmds.Get_at_as_nde(i);
|
||||
byte[] key = cmd.Get_bry_or_null("key");
|
||||
Xob_cmd prime = cmd_regy.Get_or_null(String_.new_u8(key));
|
||||
if (prime == null) throw Err_.new_("bldr", "bldr.cmd does not exists: cmd={0}", key);
|
||||
byte[] wiki_key = cmd.Get_bry_or_null("wiki");
|
||||
Xowe_wiki wiki = wiki_key == null ? app.Usere().Wiki() : app.Wiki_mgr().Get_by_or_make(wiki_key);
|
||||
Xob_cmd clone = prime.Cmd_clone(this, wiki);
|
||||
int atrs_len = cmd.Len();
|
||||
for (int j = 0; j < atrs_len; ++j) {
|
||||
Json_kv atr_kv = cmd.Get_at_as_kv(j);
|
||||
String atr_key = atr_kv.Key_as_str();
|
||||
if ( String_.Eq(atr_key, "key")
|
||||
|| String_.Eq(atr_key, "wiki")) continue;
|
||||
byte[] atr_val = atr_kv.Val_as_bry();
|
||||
Gfo_invk_.Invk_by_val(clone, atr_key + Gfo_invk_.Mutator_suffix, String_.new_u8(atr_val));
|
||||
}
|
||||
cmd_mgr.Add(clone);
|
||||
}
|
||||
gplx.core.threads.Thread_adp_.Start_by_key("bldr_by_json", this, Invk_run_by_kit);
|
||||
} catch (Exception e) {
|
||||
app.Gui_mgr().Kit().Ask_ok("", "", "error: ~{0}", Err_.Message_gplx_log(e));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
private void Run_by_kit() { // same as Run, but shows exception; don't want to change backward compatibility on Run
|
||||
try {this.Run();}
|
||||
catch (Exception e) {
|
||||
String log_msg = Err_.Message_gplx_log(e);
|
||||
Xoa_app_.Usr_dlg().Log_many("", "", log_msg);
|
||||
app.Gui_mgr().Kit().Ask_ok("", "", "error: ~{0}", Err_.Message_gplx_full(e));
|
||||
}
|
||||
}
|
||||
public void Run() {
|
||||
try {
|
||||
app.Bldr__running_(true);
|
||||
app.Launch(); // HACK: bldr will be called by a gfs file which embeds "bldr.run" inside it; need to call Launch though before Run; DATE:2013-03-23
|
||||
long time_bgn = System_.Ticks();
|
||||
int cmd_mgr_len = cmd_mgr.Len();
|
||||
for (int i = 0; i < cmd_mgr_len; i++) {
|
||||
Xob_cmd cmd = cmd_mgr.Get_at(i);
|
||||
cmd.Cmd_init(this);
|
||||
}
|
||||
cmd_mgr_len = cmd_mgr.Len(); // NOTE: refresh len b/c other cmds may have added new ones in Cmd_init
|
||||
for (int i = 0; i < cmd_mgr_len; i++) {
|
||||
Xob_cmd cmd = cmd_mgr.Get_at(i);
|
||||
app.Usr_dlg().Note_many("", "", "cmd bgn: ~{0}", cmd.Cmd_key());
|
||||
long time_cur = System_.Ticks();
|
||||
try {
|
||||
cmd.Cmd_bgn(this);
|
||||
cmd.Cmd_run();
|
||||
cmd.Cmd_end();
|
||||
} catch (Exception e) {
|
||||
throw Err_.new_exc(e, "bldr", "unknown error", "key", cmd.Cmd_key());
|
||||
}
|
||||
System_.Garbage_collect();
|
||||
app.Usr_dlg().Note_many("", "", "cmd end: ~{0} ~{1}", cmd.Cmd_key(), Time_span_.from_(time_cur).XtoStrUiAbbrv());
|
||||
}
|
||||
for (int i = 0; i < cmd_mgr_len; i++) {
|
||||
Xob_cmd cmd = cmd_mgr.Get_at(i);
|
||||
cmd.Cmd_term();
|
||||
}
|
||||
app.Usr_dlg().Note_many("", "", "bldr done: ~{0}", Time_span_.from_(time_bgn).XtoStrUiAbbrv());
|
||||
cmd_mgr.Clear();
|
||||
if (pause_at_end && !Env_.Mode_testing()) {Console_adp__sys.Instance.Read_line("press enter to continue");}
|
||||
}
|
||||
catch (Exception e) {
|
||||
app.Bldr__running_(false);
|
||||
throw Err_.new_exc(e, "bldr", "unknown error");
|
||||
}
|
||||
}
|
||||
private void Cancel() {
|
||||
int cmd_mgr_len = cmd_mgr.Len();
|
||||
for (int i = 0; i < cmd_mgr_len; i++) {
|
||||
Xob_cmd cmd = cmd_mgr.Get_at(i);
|
||||
cmd.Cmd_end();
|
||||
}
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_pause_at_end_)) pause_at_end = m.ReadBoolOrTrue("val");
|
||||
else if (ctx.Match(k, Invk_cmds)) return cmd_mgr;
|
||||
else if (ctx.Match(k, Invk_wiki_cfg_bldr)) return wiki_cfg_bldr;
|
||||
else if (ctx.Match(k, Invk_sort_mem_len_)) sort_mem_len = gplx.core.ios.Io_size_.Load_int_(m);
|
||||
else if (ctx.Match(k, Invk_dump_fil_len_)) dump_fil_len = gplx.core.ios.Io_size_.Load_int_(m);
|
||||
else if (ctx.Match(k, Invk_make_fil_len_)) make_fil_len = gplx.core.ios.Io_size_.Load_int_(m);
|
||||
else if (ctx.Match(k, Invk_run)) Run();
|
||||
else if (ctx.Match(k, Invk_run_by_kit)) Run_by_kit();
|
||||
else if (ctx.Match(k, Invk_cancel)) Cancel();
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
private static final String
|
||||
Invk_cmds = "cmds", Invk_wiki_cfg_bldr = "wiki_cfg_bldr"
|
||||
, Invk_pause_at_end_ = "pause_at_end_", Invk_sort_mem_len_ = "sort_mem_len_", Invk_dump_fil_len_ = "dump_fil_len_", Invk_make_fil_len_ = "make_fil_len_"
|
||||
, Invk_cancel = "cancel"
|
||||
, Invk_run_by_kit = "run_by_kit"
|
||||
;
|
||||
public static final String Invk_run = "run";
|
||||
}
|
||||
/*
|
||||
. make_fil_len: max size of made file; EX: /id/..../0000000001.csv will have max len of 64 KB
|
||||
. dump_fil_len: max size of temp file; EX: /tmp/.../0000000001.csv will have max len of 1 MB
|
||||
. sort_mem_len: max size of memory for external merge process; note the following
|
||||
.. a continguous range of memory of that size will be needed: "Bry_bfr_.New(sort_mem_len)" will be called
|
||||
.. large sort_mem_len will result in smaller number of merge files
|
||||
... EX: 16 MB will take en.wikipedia.org's 640 MB title files and generate 40 temp files of 8 MB each
|
||||
.. number of merge files is number of open file channels during merge process
|
||||
... 40 is a "reasonable" number; the 1st max is 512 (for older windows OS's) and 2048 for Windows XP; Linux seems to be about 7000
|
||||
.. small sort_mem_len will use smaller buffer; 16 MB / 40 files -> 400 kb buffer for each file
|
||||
... do not go under max page size for a given row
|
||||
... for example, a 100 b buffer will fail if a given row is > 100 b (the entire row won't be loaded in memory)
|
||||
.. smaller buffer will mean more refills which will require more I/O
|
||||
... EX: 400 kb buffer will require at least 20 refills to read the entire 8 MB file
|
||||
*/
|
||||
|
||||
@@ -13,3 +13,37 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
public class Xob_cmd_keys {
|
||||
public static final String
|
||||
Key_text_init = "text.init" // "import.sql.init"
|
||||
, Key_text_page = "text.page" // "import.sql.page"
|
||||
, Key_text_css = "text.css"
|
||||
, Key_text_search_cmd = "text.search.cmd" // "import.sql.search_title.cmd"
|
||||
, Key_text_search_wkr = "text.search" // "import.sql.search_title.wkr"
|
||||
, Key_text_term = "text.term" // "import.sql.term"
|
||||
, Key_html_redlinks = "html.redlinks"
|
||||
, Key_util_cleanup = "util.cleanup" // "core.cleanup"
|
||||
, Key_util_download = "util.download" // "file.download"
|
||||
, Key_util_xml_dump = "util.xml_dump"
|
||||
, Key_util_random = "util.random"
|
||||
, Key_util_delete = "util.delete"
|
||||
, Key_wbase_qid = "wbase.qid" // "text.wdata.qid"
|
||||
, Key_wbase_pid = "wbase.pid" // "text.wdata.pid"
|
||||
, Key_wbase_db = "wbase.db" // "wiki.wdata_db"
|
||||
, Key_site_meta = "util.site_meta"
|
||||
, Key_diff_build = "diff.build"
|
||||
, Key_diff_merge = "diff.merge"
|
||||
, Key_text_delete_page = "text.delete_page"
|
||||
|
||||
, Key_tdb_text_init = "tdb.text.init" // "core.init"
|
||||
, Key_tdb_make_page = "tdb.text.page" // "core.make_page"
|
||||
, Key_tdb_make_id = "core.make_id"
|
||||
, Key_tdb_calc_stats = "core.calc_stats"
|
||||
, Key_tdb_text_wdata_qid = "tdb.text.wdata.qid"
|
||||
, Key_tdb_text_wdata_pid = "tdb.text.wdata.pid"
|
||||
, Key_exec_sql = "import.sql.exec_sql"
|
||||
, Key_decompress_bz2 = "core.decompress_bz2"
|
||||
;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,118 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.primitives.*;
|
||||
import gplx.xowa.wikis.*; import gplx.xowa.xtns.wbases.imports.*;
|
||||
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.cmds.texts.*; import gplx.xowa.bldrs.cmds.texts.sqls.*; import gplx.xowa.bldrs.cmds.texts.tdbs.*; import gplx.xowa.addons.bldrs.files.*; import gplx.xowa.addons.wikis.ctgs.bldrs.*; import gplx.xowa.bldrs.cmds.utils.*;
|
||||
import gplx.xowa.bldrs.cmds.diffs.*;
|
||||
import gplx.xowa.files.origs.*; import gplx.xowa.htmls.core.bldrs.*;
|
||||
import gplx.xowa.addons.wikis.searchs.bldrs.*;
|
||||
import gplx.xowa.addons.bldrs.files.cmds.*; import gplx.xowa.addons.wikis.htmls.css.bldrs.*;
|
||||
public class Xob_cmd_mgr implements Gfo_invk {
|
||||
private final Xob_bldr bldr;
|
||||
public final Xob_cmd_regy cmd_regy;
|
||||
public Xob_cmd_mgr(Xob_bldr bldr, Xob_cmd_regy cmd_regy) {this.bldr = bldr; this.cmd_regy = cmd_regy;}
|
||||
public void Clear() {list.Clear(); dump_rdrs.Clear();}
|
||||
public int Len() {return list.Count();} private final List_adp list = List_adp_.New();
|
||||
public Xob_cmd Get_at(int i) {return (Xob_cmd)list.Get_at(i);}
|
||||
public Xob_cmd Add(Xob_cmd cmd) {list.Add(cmd); return cmd;}
|
||||
public Gfo_invk Add_cmd(Xowe_wiki wiki, String cmd_key) {
|
||||
Xob_cmd prime = cmd_regy.Get_or_null(cmd_key);
|
||||
if (prime != null) {
|
||||
Xob_cmd clone = prime.Cmd_clone(bldr, wiki);
|
||||
Add(clone);
|
||||
return clone;
|
||||
}
|
||||
if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_init)) return Add(new Xob_init_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_page)) return Xml_rdr_direct_add(wiki, new Xob_page_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_css)) return Add(new Xob_css_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_search_wkr)) return Xml_rdr_direct_add(wiki, new gplx.xowa.addons.wikis.searchs.bldrs.Srch_bldr_wkr(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_search_cmd)) return Add(new Srch_bldr_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_term)) return Add(new Xob_term_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_text_delete_page)) return Add(new Xob_page_delete_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_html_redlinks)) return Add(new Xob_redlink_mkr_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_cleanup)) return Add(new Xob_cleanup_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_delete)) return Add(new Xob_delete_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_download)) return Add(new Xob_download_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_xml_dump)) return Add(new Xob_xml_dumper_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_qid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_qid_sql().Ctor(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_pid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_pid_sql().Ctor(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_db)) return Add(new Xob_wdata_db_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_site_meta)) return Add(new Xob_site_meta_cmd(bldr, wiki));
|
||||
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_text_init)) return Add(new Xob_init_tdb(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_make_id)) return Xml_rdr_direct_add(wiki, new Xob_make_id_wkr(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_calc_stats)) return Add(new Xob_calc_stats_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_text_wdata_qid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_qid_txt().Ctor(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_text_wdata_pid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_pid_txt().Ctor(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_diff_build)) return Add(new Xob_diff_build_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_exec_sql)) return Add(new Xob_exec_sql_cmd(bldr, wiki));
|
||||
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_decompress_bz2)) return Add(new Xob_decompress_bz2_cmd(bldr, wiki));
|
||||
else throw Err_.new_unimplemented_w_msg("builder command is not supported: " + cmd_key);
|
||||
}
|
||||
private Xob_page_wkr Xml_rdr_direct_add(Xowe_wiki wiki, Xob_page_wkr wkr) {
|
||||
Xob_page_wkr_cmd dump_rdr = Xml_rdr_get(wiki);
|
||||
dump_rdr.Wkr_add(wkr);
|
||||
return wkr;
|
||||
}
|
||||
private Xob_page_wkr_cmd Xml_rdr_get(Xowe_wiki wiki) {
|
||||
byte[] wiki_key = wiki.Domain_bry();
|
||||
Xob_page_wkr_cmd rv = (Xob_page_wkr_cmd)dump_rdrs.Get_by(dump_rdrs_ref.Val_(wiki_key));
|
||||
if (rv == null) {
|
||||
rv = new Xob_page_wkr_cmd(bldr, wiki);
|
||||
dump_rdrs.Add(Bry_obj_ref.New(wiki_key), rv);
|
||||
this.Add(rv);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private Hash_adp dump_rdrs = Hash_adp_.New(); private Bry_obj_ref dump_rdrs_ref = Bry_obj_ref.New_empty();
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_add)) return Add_cmd(Wiki_get_or_make(m), m.ReadStr("v"));
|
||||
else if (ctx.Match(k, Invk_add_many)) return Add_many(m);
|
||||
else if (ctx.Match(k, Invk_get_first)) return Get_first(m);
|
||||
else if (ctx.Match(k, Invk_new_batch)) return new Xob_core_batch_utl(bldr, m.ReadBry("v"));
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
}
|
||||
private static final String Invk_add = "add", Invk_add_many = "add_many", Invk_new_batch = "new_batch", Invk_get_first = "get_first";
|
||||
private Object Get_first(GfoMsg m) {
|
||||
String cmd_key = m.ReadStr("v");
|
||||
int cmds_len = list.Count();
|
||||
for (int i = 0;i < cmds_len; i++) {
|
||||
Xob_cmd cmd = (Xob_cmd)list.Get_at(i);
|
||||
if (String_.Eq(cmd.Cmd_key(), cmd_key)) return cmd;
|
||||
}
|
||||
throw Err_.new_wo_type("cmd not found", "key", cmd_key);
|
||||
}
|
||||
private Object Add_many(GfoMsg m) {
|
||||
Xowe_wiki wiki = Wiki_get_or_make(m);
|
||||
wiki.Lang().Init_by_load_assert(); // NOTE: must check that lang is loaded; else case_mgr will not initialize; DATE:2013-05-11
|
||||
int args_len = m.Args_count();
|
||||
String[] cmds = new String[args_len - 1]; // -1 b/c 1st arg is wiki
|
||||
for (int i = 1; i < args_len; i++) {
|
||||
Keyval kv = m.Args_getAt(i);
|
||||
cmds[i - 1] = kv.Val_to_str_or_empty();
|
||||
}
|
||||
return Add_many(wiki, cmds);
|
||||
}
|
||||
public Object Add_many(Xowe_wiki wiki, String... cmds) {
|
||||
int len = cmds.length; if (len == 0) throw Err_.new_wo_type("add_many cannot have 0 cmds");
|
||||
Object rv = null;
|
||||
for (int i = 0; i < len; i++)
|
||||
rv = Add_cmd(wiki, cmds[i]);
|
||||
return rv;
|
||||
}
|
||||
public void Add_cmd_ary(Xob_cmd... cmds_ary) {
|
||||
int cmds_len = cmds_ary.length;
|
||||
for (int i = 0; i < cmds_len; ++i)
|
||||
this.Add(cmds_ary[i]);
|
||||
}
|
||||
private Xowe_wiki Wiki_get_or_make(GfoMsg m) {
|
||||
byte[] wiki_key = m.ReadBry("v");
|
||||
Xoae_wiki_mgr wiki_mgr = bldr.App().Wiki_mgr();
|
||||
Xowe_wiki rv = wiki_mgr.Get_by_or_make(wiki_key);
|
||||
rv.Lang().Init_by_load();
|
||||
return rv;
|
||||
}
|
||||
public static final String GRP_KEY = "xowa.bldr.cmds";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,16 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xob_cmd_regy {
|
||||
private final Ordered_hash regy = Ordered_hash_.New();
|
||||
public Xob_cmd Get_or_null(String key) {return (Xob_cmd)regy.Get_by(key);}
|
||||
public void Add_many(Xob_cmd... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xob_cmd cmd = ary[i];
|
||||
regy.Add(cmd.Cmd_key(), cmd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,42 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.dbs.*; import gplx.dbs.cfgs.*;
|
||||
public class Xob_db_file {
|
||||
Xob_db_file(Io_url url, Db_conn conn) {
|
||||
this.url = url; this.conn = conn;
|
||||
this.tbl__cfg = gplx.xowa.wikis.data.Xowd_cfg_tbl_.New(conn);
|
||||
}
|
||||
public Io_url Url() {return url;} private final Io_url url;
|
||||
public Db_conn Conn() {return conn;} private final Db_conn conn;
|
||||
public Db_cfg_tbl Tbl__cfg() {return tbl__cfg;} private final Db_cfg_tbl tbl__cfg;
|
||||
public static Xob_db_file New__file_make(Io_url dir) {return New(dir, Name__file_make);}
|
||||
public static Xob_db_file New__page_regy(Io_url dir) {return New(dir, Name__page_regy);}
|
||||
public static Xob_db_file New__wiki_image(Io_url dir) {return New(dir, Name__wiki_image);}
|
||||
public static Xob_db_file New__wiki_redirect(Io_url dir) {return New(dir, Name__wiki_redirect);}
|
||||
public static Xob_db_file New__temp_log(Io_url dir) {return New(dir, Name__temp_log);}
|
||||
public static Xob_db_file New__redlink(Io_url dir) {return New(dir, Name__redlink);}
|
||||
public static Xob_db_file New__page_link(Xow_wiki wiki) {return New(wiki.Fsys_mgr().Root_dir(), Name__page_link);}
|
||||
public static Xob_db_file New__page_file_map(Xow_wiki wiki) {return New(wiki.Fsys_mgr().Root_dir(), wiki.Domain_str() + "-file-page_map.xowa");}
|
||||
public static Xob_db_file New__img_link(Xow_wiki wiki) {return New(wiki.Fsys_mgr().Root_dir(), "xowa.wiki.imglinks.sqlite3");}
|
||||
public static Xob_db_file New__deletion_db(Xow_wiki wiki) {
|
||||
String name = String_.Format("{0}-file-deletion-{1}.xowa", wiki.Domain_str(), Datetime_now.Get().XtoStr_fmt("yyyy.MM"));
|
||||
return New(wiki.Fsys_mgr().Root_dir(), name);
|
||||
}
|
||||
public static Xob_db_file New(Io_url dir, String name) {
|
||||
Io_url url = dir.GenSubFil(name);
|
||||
Db_conn_bldr_data conn_data = Db_conn_bldr.Instance.Get_or_new(url);
|
||||
Db_conn conn = conn_data.Conn();
|
||||
Xob_db_file rv = new Xob_db_file(url, conn);
|
||||
if (conn_data.Created())
|
||||
rv.Tbl__cfg().Create_tbl();
|
||||
return rv;
|
||||
}
|
||||
public static final String
|
||||
Name__wiki_image = "xowa.wiki.image.sqlite3", Name__wiki_redirect = "xowa.wiki.redirect.sqlite3"
|
||||
, Name__file_make = "xowa.file.make.sqlite3", Name__temp_log = "xowa.temp.log.sqlite3"
|
||||
, Name__page_regy = "xowa.file.page_regy.sqlite3", Name__redlink = "xowa.temp.redlink.sqlite3"
|
||||
, Name__page_link = "xowa.wiki.pagelinks.sqlite3"
|
||||
;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,140 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.tests.*; import gplx.core.ios.*; import gplx.core.times.*;
|
||||
import gplx.dbs.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.bldrs.cmds.texts.tdbs.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xob_fxt {
|
||||
public Xob_fxt Ctor_mem() {
|
||||
Io_mgr.Instance.InitEngine_mem();
|
||||
return Ctor(Io_url_.mem_dir_("mem/xowa/"));
|
||||
}
|
||||
public Xob_fxt Ctor(Io_url root_dir) {
|
||||
Db_conn_bldr.Instance.Reg_default_sqlite();
|
||||
app = Xoa_app_fxt.Make__app__edit("linux", root_dir);
|
||||
wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
bldr = Xoa_app_fxt.bldr_(app);
|
||||
return this;
|
||||
}
|
||||
public Xoae_app App() {return app;} private Xoae_app app;
|
||||
public Xob_bldr Bldr() {return bldr;} private Xob_bldr bldr;
|
||||
public Xowe_wiki Wiki() {return wiki;} private Xowe_wiki wiki;
|
||||
public Io_url fil_ns_title(int ns_id, int idx) {return wiki.Tdb_fsys_mgr().Url_ns_fil(Xotdb_dir_info_.Tid_ttl, ns_id, idx);}
|
||||
public Io_url fil_ns_page(int ns_id, int idx) {return wiki.Tdb_fsys_mgr().Url_ns_fil(Xotdb_dir_info_.Tid_page, ns_id, idx);}
|
||||
public Io_url fil_ns_sttl(int ns_id, int idx) {return wiki.Tdb_fsys_mgr().Url_ns_fil(Xotdb_dir_info_.Tid_search_ttl, ns_id, idx);}
|
||||
public Io_url fil_site(byte tid, int idx) {return wiki.Tdb_fsys_mgr().Url_site_fil(tid, idx);}
|
||||
public Io_url fil_site_ctg(int idx) {return wiki.Tdb_fsys_mgr().Url_site_fil(Xotdb_dir_info_.Tid_category, idx);}
|
||||
public Io_url fil_site_id(int idx) {return wiki.Tdb_fsys_mgr().Url_site_fil(Xotdb_dir_info_.Tid_id, idx);}
|
||||
public Io_url fil_reg(byte tid) {return wiki.Tdb_fsys_mgr().Url_site_reg(tid);}
|
||||
public Io_url fil_reg(int ns_id, byte tid) {return wiki.Tdb_fsys_mgr().Url_ns_reg(Int_.To_str_pad_bgn_zero(ns_id, 3), tid);}
|
||||
public Xob_fxt Fil_expd(Io_url url, String... expd) {
|
||||
String text = String_.Concat_lines_nl_skip_last(expd); // skipLast b/c if trailing line wanted, easier to pass in extra argument for ""
|
||||
expd_list.Add(new Io_fil_chkr(url, text));
|
||||
return this;
|
||||
} List_adp expd_list = List_adp_.New();
|
||||
public Xob_fxt Fil_skip(Io_url... urls) {
|
||||
for (int i = 0; i < urls.length; i++)
|
||||
skip_list.Add(urls[i]);
|
||||
return this;
|
||||
} List_adp skip_list = List_adp_.New();
|
||||
public Xob_fxt doc_ary_(Xowd_page_itm... v) {doc_ary = v; return this;} private Xowd_page_itm[] doc_ary;
|
||||
public Xowd_page_itm doc_wo_date_(int id, String title, String text) {return doc_(id, "2012-01-02 13:14", title, text);}
|
||||
public Xowd_page_itm doc_(int id, String date, String title, String text) {
|
||||
Xowd_page_itm rv = new Xowd_page_itm().Id_(id).Ttl_(Bry_.new_u8(title), wiki.Ns_mgr()).Text_(Bry_.new_u8(text));
|
||||
int[] modified_on = new int[7];
|
||||
dateParser.Parse_iso8651_like(modified_on, date);
|
||||
rv.Modified_on_(DateAdp_.seg_(modified_on));
|
||||
return rv;
|
||||
}
|
||||
public Xob_fxt Run_id() {
|
||||
Xob_make_id_wkr wkr = new Xob_make_id_wkr(bldr, wiki);
|
||||
Run(wkr);
|
||||
return this;
|
||||
}
|
||||
private void Run_wkr(Xob_page_wkr wkr) {
|
||||
wkr.Page_wkr__bgn();
|
||||
for (int i = 0; i < doc_ary.length; i++) {
|
||||
Xowd_page_itm page = doc_ary[i];
|
||||
wkr.Page_wkr__run(page);
|
||||
}
|
||||
wkr.Page_wkr__end();
|
||||
}
|
||||
private void tst_fils(Io_url[] ary) {
|
||||
Io_fil[] actls = Get_actl(ary);
|
||||
Io_fil_chkr[] expds = (Io_fil_chkr[])expd_list.To_ary(Io_fil_chkr.class);
|
||||
tst_mgr.Tst_ary("all", expds, actls);
|
||||
}
|
||||
Io_fil[] Get_actl(Io_url[] ary) {
|
||||
int len = ary.length;
|
||||
Io_fil[] rv = new Io_fil[len];
|
||||
for (int i = 0; i < len; i++) {
|
||||
Io_url url = ary[i];
|
||||
String data = Io_mgr.Instance.LoadFilStr(url);
|
||||
rv[i] = new Io_fil(url, data);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public Xob_fxt Run_tmpl_dump() {
|
||||
Xob_parse_dump_templates_cmd wkr = new Xob_parse_dump_templates_cmd(bldr, wiki);
|
||||
Run_wkr(wkr);
|
||||
tst_fils(wkr.Dump_url_gen().Prv_urls());
|
||||
return this;
|
||||
}
|
||||
public Xob_fxt Run(Xobd_parser_wkr... wkrs) {
|
||||
Xobd_parser parser_wkr = new Xobd_parser(bldr);
|
||||
int len = wkrs.length;
|
||||
for (int i = 0; i < len; i++)
|
||||
parser_wkr.Wkr_add(wkrs[i]);
|
||||
Run(parser_wkr);
|
||||
return this;
|
||||
}
|
||||
public Xob_fxt Run(Xob_page_wkr... wkrs) {
|
||||
int doc_ary_len = doc_ary.length;
|
||||
for (int j = 0; j < wkrs.length; j++) {
|
||||
Xob_page_wkr wkr = wkrs[j];
|
||||
wkr.Page_wkr__bgn();
|
||||
for (int i = 0; i < doc_ary_len; i++) {
|
||||
Xowd_page_itm page = doc_ary[i];
|
||||
wkr.Page_wkr__run(page);
|
||||
}
|
||||
wkr.Page_wkr__end();
|
||||
}
|
||||
Test_expd_files();
|
||||
return this;
|
||||
}
|
||||
public Xob_fxt Run_cmds(Xob_cmd... cmds) {
|
||||
for (int j = 0; j < cmds.length; j++) {
|
||||
Xob_cmd cmd = cmds[j];
|
||||
cmd.Cmd_bgn(bldr);
|
||||
cmd.Cmd_run();
|
||||
cmd.Cmd_end();
|
||||
}
|
||||
Test_expd_files();
|
||||
return this;
|
||||
}
|
||||
private void Test_expd_files() {
|
||||
if (expd_list.Count() > 0) {
|
||||
Io_fil_chkr[] expd = (Io_fil_chkr[])expd_list.To_ary(Io_fil_chkr.class);
|
||||
Io_fil[] actl = wiki_();
|
||||
tst_mgr.Tst_ary("all", expd, actl);
|
||||
}
|
||||
}
|
||||
Io_fil[] wiki_() {
|
||||
List_adp rv = List_adp_.New();
|
||||
wiki_fil_add(rv, wiki.Tdb_fsys_mgr().Ns_dir());
|
||||
wiki_fil_add(rv, wiki.Tdb_fsys_mgr().Site_dir());
|
||||
rv.Sort();
|
||||
return (Io_fil[])rv.To_ary(Io_fil.class);
|
||||
}
|
||||
private void wiki_fil_add(List_adp list, Io_url root_dir) {
|
||||
Io_url[] ary = Io_mgr.Instance.QueryDir_args(root_dir).Recur_().ExecAsUrlAry();
|
||||
for (int i = 0; i < ary.length; i++) {
|
||||
Io_url url = ary[i];
|
||||
Io_fil fil = new Io_fil(url, Io_mgr.Instance.LoadFilStr_args(url).MissingIgnored_().Exec());
|
||||
list.Add(fil);
|
||||
}
|
||||
}
|
||||
Tst_mgr tst_mgr = new Tst_mgr();
|
||||
DateAdp_parser dateParser = DateAdp_parser.new_();
|
||||
}
|
||||
|
||||
@@ -13,3 +13,75 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.bldrs.cmds.*;
|
||||
public class Xob_ns_to_db_mgr {
|
||||
private final Xob_ns_to_db_wkr wkr; private final Xow_db_mgr db_mgr; private final long db_max; private boolean one_file_conn_init = true;
|
||||
private final Ordered_hash db_list = Ordered_hash_.New();
|
||||
public Xob_ns_to_db_mgr(Xob_ns_to_db_wkr wkr, Xow_db_mgr db_mgr, long db_max) {
|
||||
this.wkr = wkr; this.db_mgr = db_mgr; this.db_max = db_max;
|
||||
}
|
||||
public Xow_db_file Get_by_ns(Xob_ns_file_itm ns_file_itm, int data_len) {
|
||||
Xow_db_file rv = null;
|
||||
if (db_mgr.Props().Layout_text().Tid_is_all()) {
|
||||
rv = db_mgr.Db__core();
|
||||
if (one_file_conn_init) {
|
||||
one_file_conn_init = false;
|
||||
Init_tbl(rv);
|
||||
}
|
||||
}
|
||||
else if (wkr.Db_tid() == Xow_db_file_.Tid__html_data && db_mgr.Props().Layout_html().Tid_is_all_or_few()) {
|
||||
if (one_file_conn_init) {
|
||||
one_file_conn_init = false;
|
||||
rv = db_mgr.Dbs__make_by_tid(wkr.Db_tid());
|
||||
Init_tbl(rv);
|
||||
}
|
||||
else
|
||||
rv = db_mgr.Db__html();
|
||||
}
|
||||
else {
|
||||
int db_id = ns_file_itm.Nth_db_id();
|
||||
if (db_id == Xob_ns_file_itm.Nth_db_id_null) // ns not assigned yet to db
|
||||
rv = Init_db(ns_file_itm);
|
||||
else
|
||||
rv = db_mgr.Dbs__get_by_id_or_fail(db_id);
|
||||
long file_len = rv.File_len();
|
||||
if (file_len + data_len > db_max) { // file is "full"
|
||||
Term_tbl(rv);
|
||||
rv = Init_db(ns_file_itm);
|
||||
}
|
||||
}
|
||||
rv.File_len_add(data_len);
|
||||
return rv;
|
||||
}
|
||||
private Xow_db_file Init_db(Xob_ns_file_itm ns_file_itm) {
|
||||
Xow_db_file rv = db_mgr.Dbs__make_by_tid(ns_file_itm.Db_file_tid(), Int_ary_.To_str("|", ns_file_itm.Ns_ids()), ns_file_itm.Nth_db_idx(), ns_file_itm.Make_file_name());
|
||||
ns_file_itm.Nth_db_id_(rv.Id());
|
||||
Init_tbl(rv);
|
||||
return rv;
|
||||
}
|
||||
private void Init_tbl(Xow_db_file db) {
|
||||
wkr.Tbl_init(db);
|
||||
db_list.Add(db.Id(), db);
|
||||
}
|
||||
private void Term_tbl(Xow_db_file db) {
|
||||
wkr.Tbl_term(db);
|
||||
db_list.Del(db.Id());
|
||||
}
|
||||
public void Rls_all() {
|
||||
Xow_db_file[] ary = (Xow_db_file[])db_list.To_ary(Xow_db_file.class);
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xow_db_file db = (Xow_db_file)ary[i];
|
||||
Term_tbl(db); // SQLITE:1_TXN; may call close on db where txn is already closed
|
||||
}
|
||||
}
|
||||
public void Commit() {
|
||||
int len = db_list.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xow_db_file db = (Xow_db_file)db_list.Get_at(i);
|
||||
db.Conn().Txn_sav();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,10 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.xowa.wikis.data.*;
|
||||
public interface Xob_ns_to_db_wkr {
|
||||
byte Db_tid();
|
||||
void Tbl_init(Xow_db_file db);
|
||||
void Tbl_term(Xow_db_file db);
|
||||
}
|
||||
|
||||
@@ -13,3 +13,84 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.consoles.*; import gplx.core.ios.*;
|
||||
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.parsers.tmpls.*;
|
||||
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.xmls.*;
|
||||
public class Xob_page_wkr_cmd implements Xob_cmd {
|
||||
private final Xob_bldr bldr; private final Xowe_wiki wiki;
|
||||
public Xob_page_wkr_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.bldr = bldr; this.wiki = wiki;}
|
||||
public String Cmd_key() {return KEY;} public static final String KEY = "dump_mgr";
|
||||
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
|
||||
public void Cmd_run() {
|
||||
Xob_page_wkr[] wkr_ary = (Xob_page_wkr[])wkrs.To_ary(Xob_page_wkr.class); int wkr_ary_len = wkr_ary.length;
|
||||
for (int i = 0; i < wkr_ary_len; i++)
|
||||
wkr_ary[i].Page_wkr__bgn();
|
||||
Io_buffer_rdr fil = Io_buffer_rdr.Null; Xowd_page_itm page = new Xowd_page_itm(); Xow_ns_mgr ns_mgr = wiki.Ns_mgr();
|
||||
Xob_xml_parser parser = bldr.Dump_parser().Data_bfr_len_(Io_mgr.Len_mb);
|
||||
long fil_len = 0;
|
||||
Gfo_usr_dlg usr_dlg = bldr.App().Usr_dlg();
|
||||
try {
|
||||
gplx.core.ios.streams.Io_stream_rdr src_rdr = wiki.Import_cfg().Src_rdr();
|
||||
fil = Io_buffer_rdr.new_(src_rdr, optRdrBfrSize);
|
||||
fil_len = fil.Fil_len();
|
||||
if (src_rdr.Tid() == gplx.core.ios.streams.Io_stream_tid_.Tid__bzip2) fil_len = (fil_len * 100) / 18; // HACK: no way to get actual file progress; assume 18% compression
|
||||
// fil.Seek(bldr.Opts().ResumeAt());
|
||||
int prv_pos = 0;
|
||||
while (true) {
|
||||
int cur_pos = parser.Parse_page(page, usr_dlg, fil, fil.Bfr(), prv_pos, ns_mgr); if (cur_pos == Bry_find_.Not_found) break;
|
||||
if (cur_pos < prv_pos)
|
||||
bldr.Print_prog_msg(fil.Fil_pos(), fil_len, 1, optRdrFillFmt, Int_.To_str_pad_bgn_zero((int)(fil.Fil_pos() / Io_mgr.Len_mb), Int_.DigitCount((int)(fil.Fil_len() / Io_mgr.Len_mb))), "", String_.new_u8(page.Ttl_full_db()));
|
||||
prv_pos = cur_pos;
|
||||
try {
|
||||
for (int i = 0; i < wkr_ary_len; i++)
|
||||
wkr_ary[i].Page_wkr__run(page);
|
||||
}
|
||||
catch (Exception e) {
|
||||
Err_.Noop(e);
|
||||
long dividend = fil.Fil_pos();
|
||||
if (dividend >= fil_len) dividend = fil_len - 1; // prevent % from going over 100
|
||||
String msg = Decimal_adp_.CalcPctStr(dividend, fil_len, "00.00") + "|" + String_.new_u8(page.Ttl_full_db()) + "|" + Err_.Message_gplx_log(e);
|
||||
bldr.Usr_dlg().Log_wkr().Log_to_session(msg);
|
||||
Console_adp__sys.Instance.Write_str_w_nl(msg);
|
||||
}
|
||||
}
|
||||
for (int i = wkr_ary_len - 1; i > -1; --i) // NOTE: release in reverse order; needed to make sure txns are released correctly
|
||||
wkr_ary[i].Page_wkr__run_cleanup();
|
||||
}
|
||||
catch (Exception e) {
|
||||
String msg = Err_.Message_lang(e);
|
||||
bldr.Usr_dlg().Log_wkr().Log_to_session(msg);
|
||||
Console_adp__sys.Instance.Write_str_w_nl(msg);
|
||||
throw Err_.new_exc(e, "xo", "error while reading dump");
|
||||
}
|
||||
finally {fil.Rls();}
|
||||
bldr.Usr_dlg().Prog_none("", "", "reading completed: performing post-processing clean-up");
|
||||
for (int i = wkr_ary_len - 1; i > -1; --i) // NOTE: release in reverse order; needed to make sure txns are released correctly
|
||||
wkr_ary[i].Page_wkr__end();
|
||||
}
|
||||
public void Cmd_bgn(Xob_bldr bldr) {}
|
||||
public void Cmd_init(Xob_bldr bldr) {}
|
||||
public void Cmd_end() {}
|
||||
public void Cmd_term() {}
|
||||
public void Wkr_add(Xob_page_wkr wkr) {wkrs.Add(wkr.Page_wkr__key(), wkr);} private Ordered_hash wkrs = Ordered_hash_.New();
|
||||
public Xob_page_wkr Wkr_get(String key) {return (Xob_page_wkr)wkrs.Get_by(key);}
|
||||
public Xobd_parser Page_parser_assert() {
|
||||
if (page_parser == null) {
|
||||
page_parser = new Xobd_parser(bldr);
|
||||
this.Wkr_add(page_parser);
|
||||
}
|
||||
return page_parser;
|
||||
} private Xobd_parser page_parser;
|
||||
public static Io_url Find_fil_by(Io_url dir, String filter) {
|
||||
Io_url[] fil_ary = Io_mgr.Instance.QueryDir_args(dir).FilPath_(filter).ExecAsUrlAry();
|
||||
int fil_ary_len = fil_ary.length;
|
||||
return fil_ary_len == 0 ? null : fil_ary[fil_ary_len - 1]; // return last
|
||||
}
|
||||
int optRdrBfrSize = 8 * Io_mgr.Len_mb;
|
||||
String optRdrFillFmt = "reading ~{0} MB: ~{1} ~{2}";
|
||||
static final String GRP_KEY = "xowa.bldr.rdr";
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
throw Err_.new_unimplemented();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,52 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.xowa.wikis.data.tbls.*;
|
||||
public class Xobd_parser implements Xob_page_wkr {
|
||||
private final Xob_bldr bldr;
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci.ascii:MW_const.en; ctg.v1 assumes [[Category:
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final List_adp wkr_list = List_adp_.New();
|
||||
public String Page_wkr__key() {return KEY;} static final String KEY = "page_parser";
|
||||
public Xobd_parser(Xob_bldr bldr) {this.bldr = bldr;}
|
||||
public void Wkr_add(Xobd_parser_wkr wkr) {wkr_list.Add(wkr);}
|
||||
public void Page_wkr__bgn() {
|
||||
int wkr_list_len = wkr_list.Count();
|
||||
for (int i = 0; i < wkr_list_len; i++) {
|
||||
Xobd_parser_wkr wkr = (Xobd_parser_wkr)wkr_list.Get_at(i);
|
||||
wkr.Wkr_bgn(bldr);
|
||||
int hooks_len = wkr.Wkr_hooks().Count();
|
||||
for (int j = 0; j < hooks_len; j++) {
|
||||
byte[] bry = (byte[])wkr.Wkr_hooks().Get_at(j);
|
||||
trie.Add_obj(bry, wkr);
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Page_wkr__run(Xowd_page_itm page) {
|
||||
byte[] src = page.Text(); int src_len = src.length;
|
||||
int pos = 0;
|
||||
while (true) {
|
||||
if (pos == src_len) break;
|
||||
Object o = trie.Match_at(trv, src, pos, src_len);
|
||||
if (o == null)
|
||||
++pos;
|
||||
else {
|
||||
Xobd_parser_wkr wkr = (Xobd_parser_wkr)o;
|
||||
pos = wkr.Wkr_run(page, src, src_len, pos, trv.Pos());
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Page_wkr__run_cleanup() {}
|
||||
public void Page_wkr__end() {
|
||||
int wkr_list_len = wkr_list.Count();
|
||||
for (int i = 0; i < wkr_list_len; i++) {
|
||||
Xobd_parser_wkr wkr = (Xobd_parser_wkr)wkr_list.Get_at(i);
|
||||
wkr.Wkr_end();
|
||||
}
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
throw Err_.new_unimplemented();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,11 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.xowa.wikis.data.tbls.*;
|
||||
public interface Xobd_parser_wkr extends Gfo_invk {
|
||||
Ordered_hash Wkr_hooks();
|
||||
void Wkr_bgn(Xob_bldr bldr);
|
||||
int Wkr_run(Xowd_page_itm page, byte[] src, int src_len, int bgn, int end);
|
||||
void Wkr_end();
|
||||
}
|
||||
|
||||
@@ -13,3 +13,29 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.ios.*; import gplx.core.lists.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.bldrs.wtrs.*;
|
||||
public class Xobdc_merger {
|
||||
public static void Basic(Gfo_usr_dlg usr_dlg, Io_url_gen dump_url_gen, Io_url sort_dir, int memory_max, Io_line_rdr_key_gen key_gen, Io_sort_cmd make_cmd) {Basic(usr_dlg, dump_url_gen, sort_dir, memory_max, Io_sort_split_itm_sorter.Instance, key_gen, make_cmd);}
|
||||
public static void Basic(Gfo_usr_dlg usr_dlg, Io_url_gen dump_url_gen, Io_url sort_dir, int memory_max, ComparerAble row_comparer, Io_line_rdr_key_gen key_gen, Io_sort_cmd make_cmd) {
|
||||
Io_sort sort = new Io_sort().Memory_max_(memory_max);
|
||||
Io_url_gen sort_url_gen = Io_url_gen_.dir_(sort_dir);
|
||||
sort.Split(usr_dlg, dump_url_gen, sort_url_gen, row_comparer, key_gen);
|
||||
sort.Merge(usr_dlg, sort_url_gen.Prv_urls(), row_comparer, key_gen, make_cmd);
|
||||
}
|
||||
public static void Ns(Gfo_usr_dlg usr_dlg, Xob_tmp_wtr[] ttl_wtrs, String type, Io_url tmp_root, Io_url make_root, int memory_max, Io_line_rdr_key_gen key_gen, Io_make_cmd make_cmd) {
|
||||
int len = ttl_wtrs.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xob_tmp_wtr ttl_wtr = ttl_wtrs[i]; if (ttl_wtr == null) continue;
|
||||
Xow_ns ns = ttl_wtr.Ns_itm();
|
||||
Io_url make_dir = make_root.GenSubDir_nest(ns.Num_str(), type);
|
||||
make_cmd.Make_dir_(make_dir);
|
||||
Basic(usr_dlg
|
||||
, ttl_wtr.Url_gen()
|
||||
, tmp_root.GenSubDir_nest(ns.Num_str(), "sort")
|
||||
, memory_max, key_gen, make_cmd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,39 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.ios.*; import gplx.core.lists.*;
|
||||
import gplx.xowa.wikis.tdbs.*;
|
||||
class Io_sort_filCmd_reg implements Io_sort_filCmd { // 123|bgn|end|1
|
||||
public Io_sort_filCmd_reg() {}
|
||||
public void Bfr_add(Io_line_rdr stream) {
|
||||
++itm_count;
|
||||
int key_bgn = stream.Key_pos_bgn(), key_end = stream.Key_pos_end();
|
||||
Bry_.Copy_by_pos(stream.Bfr(), key_bgn, key_end, prv_key, 0); prv_key_len = key_end - key_bgn;
|
||||
} byte[] prv_key = new byte[1024]; int prv_key_len = 0;
|
||||
public void Fil_bgn(Io_line_rdr stream) {
|
||||
bfr.Add_int_variable(fil_idx++).Add_byte(Byte_ascii.Pipe);
|
||||
bfr.Add_mid(stream.Bfr(), stream.Key_pos_bgn(), stream.Key_pos_end()).Add_byte(Byte_ascii.Pipe);
|
||||
}
|
||||
public void Fil_end() {
|
||||
bfr.Add_mid(prv_key, 0, prv_key_len).Add_byte(Byte_ascii.Pipe)
|
||||
.Add_int_variable(itm_count).Add_byte(Byte_ascii.Nl);
|
||||
itm_count = 0;
|
||||
}
|
||||
public void Flush(Io_url fil) {
|
||||
Io_mgr.Instance.SaveFilBry(fil, bfr.Bfr(), bfr.Len());
|
||||
} private Bry_bfr bfr = Bry_bfr_.New(); int fil_idx = 0; int itm_count = 0;
|
||||
}
|
||||
class Io_url_gen_nest implements gplx.core.ios.Io_url_gen {
|
||||
public Io_url Cur_url() {return cur_url;} Io_url cur_url;
|
||||
public Io_url Nxt_url() {cur_url = Xotdb_fsys_mgr.Url_fil(root_dir, fil_idx++, ext); return cur_url;}
|
||||
public Io_url[] Prv_urls() {
|
||||
Io_url[] rv = new Io_url[fil_idx];
|
||||
for (int i = 0; i < fil_idx; i++) {
|
||||
rv[i] = Xotdb_fsys_mgr.Url_fil(root_dir, fil_idx++, ext);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public void Del_all() {if (Io_mgr.Instance.ExistsDir(root_dir)) Io_mgr.Instance.DeleteDirDeep(root_dir);}
|
||||
public Io_url_gen_nest(Io_url root_dir, String ext) {this.root_dir = root_dir; this.ext = Bry_.new_u8(ext);} Io_url root_dir; byte[] ext; int fil_idx;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,49 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.ios.*;
|
||||
import gplx.xowa.wikis.data.*;
|
||||
public class Xobldr_cfg {
|
||||
private static long layout_all_max = 0; // disable by default; may set to 200 MB in future
|
||||
private static boolean hzip_enabled = Bool_.Y;
|
||||
private static boolean hzip_mode_is_b256 = Bool_.Y;
|
||||
|
||||
public static byte Zip_mode__text(Xoa_app app) {return Zip_mode(app, "xowa.bldr.db.zip_mode.text");} // CFG: Cfg__
|
||||
public static byte Zip_mode__html(Xoa_app app) {return Zip_mode(app, "xowa.bldr.db.zip_mode.html");} // CFG: Cfg__
|
||||
private static byte Zip_mode(Xoa_app app, String key) {
|
||||
String val = app.Cfg().Get_str_app_or(key, "gzip");
|
||||
return gplx.core.ios.streams.Io_stream_tid_.To_tid(val);
|
||||
}
|
||||
public static long Max_size__text(Xoa_app app) {return Max_size(app, "xowa.bldr.db.max_size.text");} // CFG: Cfg__
|
||||
public static long Max_size__html(Xoa_app app) {return Max_size(app, "xowa.bldr.db.max_size.html");} // CFG: Cfg__
|
||||
public static long Max_size__file(Xoa_app app) {return Max_size(app, "xowa.bldr.db.max_size.file");} // CFG: Cfg__
|
||||
private static long Max_size(Xoa_app app, String key) {
|
||||
long rv = app.Cfg().Get_long_app_or(key, Io_size_.To_long_by_int_mb(1500));
|
||||
return rv * Io_mgr.Len_mb;
|
||||
}
|
||||
public static long Layout_size__text(Xoa_app app) {return Layout_size(app, "xowa.bldr.db.layout_size.text");} // CFG: Cfg__
|
||||
public static long Layout_size__html(Xoa_app app) {return Layout_size(app, "xowa.bldr.db.layout_size.html");} // CFG: Cfg__
|
||||
public static long Layout_size__file(Xoa_app app) {return Layout_size(app, "xowa.bldr.db.layout_size.file");} // CFG: Cfg__
|
||||
private static long Layout_size(Xoa_app app, String key) {
|
||||
long rv = app.Cfg().Get_long_app_or(key, Io_size_.To_long_by_int_mb(1500));
|
||||
return rv * Io_mgr.Len_mb;
|
||||
}
|
||||
public static byte[] New_ns_file_map(Xoa_app app, long dump_file_size) {
|
||||
return dump_file_size < Layout_size__text(app)
|
||||
? gplx.xowa.bldrs.cmds.Xob_ns_file_itm_parser.Ns_file_map__few
|
||||
: gplx.xowa.bldrs.cmds.Xob_ns_file_itm_parser.Ns_file_map__each; // DB.FEW: DATE:2016-06-07
|
||||
}
|
||||
public static Xowd_core_db_props New_props(Xoa_app app, String domain_str, long dump_file_size) {
|
||||
Xow_db_layout layout_text, layout_html, layout_file;
|
||||
if (dump_file_size < layout_all_max)
|
||||
layout_text = layout_html = layout_file = Xow_db_layout.Itm_all;
|
||||
else {
|
||||
layout_text = dump_file_size < Layout_size__text(app) ? Xow_db_layout.Itm_few : Xow_db_layout.Itm_lot;
|
||||
layout_html = dump_file_size < Layout_size__html(app) ? Xow_db_layout.Itm_few : Xow_db_layout.Itm_lot;
|
||||
layout_file = dump_file_size < Layout_size__file(app) ? Xow_db_layout.Itm_few : Xow_db_layout.Itm_lot;
|
||||
}
|
||||
return new Xowd_core_db_props(2, layout_text, layout_html, layout_file, Zip_mode__text(app), Zip_mode__html(app), hzip_enabled, hzip_mode_is_b256);
|
||||
}
|
||||
public static final byte[] Ns_file_map__each = Bry_.new_a7("<each>");
|
||||
}
|
||||
|
||||
@@ -13,3 +13,37 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.aria2; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.envs.*;
|
||||
import gplx.xowa.apps.fsys.*; import gplx.xowa.bldrs.wms.dumps.*;
|
||||
public class Aria2_lib_mgr implements Gfo_invk {
|
||||
public Process_adp Lib() {return lib;} private Process_adp lib = new Process_adp();
|
||||
public void Init_by_app(Xoae_app app) {
|
||||
Xoa_fsys_eval cmd_eval = app.Url_cmd_eval();
|
||||
Process_adp.ini_(this, app.Usr_dlg(), lib, cmd_eval, Process_adp.Run_mode_sync_block, Int_.Max_value
|
||||
, "~{<>bin_plat_dir<>}aria2" + Op_sys.Cur().Fsys_dir_spr_str() + "aria2c"
|
||||
, Lib_args_fmt
|
||||
, "wiki_abrv", "wiki_date", "wiki_type");
|
||||
}
|
||||
// private Bry_bfr tmp_bfr = Bry_bfr_.Reset(255);
|
||||
public void Exec(Xowm_dump_file dump_file) {
|
||||
// byte[] args_bry = lib.Args_fmtr().Bld_bry_many(tmp_bfr, dump_file.Wiki_alias(), dump_file.Dump_date(), dump_file.Dump_file_type());
|
||||
// Process_adp process = new Process_adp().Exe_url_(lib.Exe_url()).Args_str_(String_.new_u8(args_bry));
|
||||
// process.Run_wait();
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_lib)) return lib;
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
}
|
||||
private static final String Invk_lib = "lib";
|
||||
private static final String Lib_args_fmt = String_.Concat
|
||||
( "--max-connection-per-server=2"
|
||||
, " --max-concurrent-downloads=20"
|
||||
, " --split=4"
|
||||
, " --file-allocation=prealloc"
|
||||
, " --remote-time=true"
|
||||
, " --server-stat-of=serverstats.txt"
|
||||
, " ftp://ftpmirror.your.org/pub/wikimedia/dumps/~{wiki_abrv}/~{wiki_date}/~{wiki_abrv}-~{wiki_date}-~{wiki_type}.bz2"
|
||||
, " https://dumps.wikimedia.org/~{wiki_abrv}/~{wiki_date}/~{wiki_abrv}-~{wiki_date}-~{wiki_type}.xml.bz2"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -13,3 +13,26 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.aria2; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.gfui.*;
|
||||
class Gfui_process_win {
|
||||
public void Exec_async(String process, String args, Gfo_invk_cmd done_cbk) {
|
||||
// Gfo_process process = new Gfo_process().Init_process_(process, args).Init_async_(done_cbk).Init_strm_out_err_(output_box).Exec();
|
||||
}
|
||||
}
|
||||
class Gfo_process {
|
||||
// private Gfo_invk_cmd done_cbk;
|
||||
// private Gfo_process_wtr out_wtr, err_wtr;
|
||||
public String Cmd_path() {return cmd_path;} private String cmd_path;
|
||||
public String Cmd_args() {return cmd_args;} private String cmd_args;
|
||||
public byte Mode() {return mode;} private byte mode;
|
||||
public Gfo_process Init_cmd_(String cmd_path, String cmd_args) {this.cmd_path = cmd_path; this.cmd_args = cmd_args; return this;}
|
||||
public Gfo_process Init_mode_async_() {mode = Gfo_process_.Mode_async; return this;}
|
||||
// public Gfo_process Init_mode_async_(Gfo_invk_cmd done_cbk) {this.done_cbk = done_cbk; return this.Init_mode_async_();}
|
||||
// public Gfo_process Init_wtr_out_err_(Gfo_process_wtr wtr) {out_wtr = err_wtr = wtr; return this;}
|
||||
}
|
||||
class Gfo_process_wtr {}
|
||||
class Gfo_process_rdr {}
|
||||
class Gfo_process_ {
|
||||
public static final byte Mode_async = 0, Mode_sync = 1, Mode_sync_timeout = 2;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,313 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.envs.*;
|
||||
import gplx.dbs.*; import gplx.xowa.wikis.caches.*; import gplx.xowa.addons.bldrs.files.*; import gplx.xowa.files.origs.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.addons.bldrs.files.utls.*;
|
||||
import gplx.xowa.parsers.*; import gplx.xowa.parsers.tmpls.*;
|
||||
public abstract class Xob_dump_mgr_base extends Xob_itm_basic_base implements Xob_cmd, Gfo_invk {
|
||||
private Xob_dump_src_id page_src;
|
||||
private Xow_db_mgr db_fsys_mgr; protected Xop_parser parser; protected Xop_ctx ctx; protected Xop_root_tkn root;
|
||||
private int[] ns_ary; private Xow_db_file[] db_ary;
|
||||
private int ns_bgn = -1, db_bgn = -1, pg_bgn = -1;
|
||||
private int ns_end = -1, db_end = -1, pg_end = Int_.Max_value;
|
||||
private int commit_interval = 1000, progress_interval = 250, cleanup_interval = 2500, select_size = 10 * Io_mgr.Len_mb;
|
||||
private int exec_count, exec_count_max = Int_.Max_value;
|
||||
private boolean reset_db = false, exit_after_commit = false, exit_now = false;
|
||||
private boolean load_tmpls;
|
||||
private Xob_dump_bmk_mgr bmk_mgr = new Xob_dump_bmk_mgr();
|
||||
private Xobu_poll_mgr poll_mgr; private int poll_interval = 5000;
|
||||
private Xob_rate_mgr rate_mgr = new Xob_rate_mgr();
|
||||
public abstract String Cmd_key();
|
||||
@Override protected void Cmd_ctor_end(Xob_bldr bldr, Xowe_wiki wiki) {
|
||||
poll_mgr = new Xobu_poll_mgr(bldr.App()); // init in ctor so gfs can invoke methods
|
||||
}
|
||||
public void Cmd_bgn(Xob_bldr bldr) {
|
||||
parser = wiki.Parser_mgr().Main();
|
||||
ctx = wiki.Parser_mgr().Ctx();
|
||||
root = ctx.Tkn_mkr().Root(Bry_.Empty);
|
||||
wiki.Init_assert(); // NOTE: must init wiki for db_mgr_as_sql
|
||||
|
||||
// assert by calling Db_mgr_as_sql
|
||||
wiki.Db_mgr_as_sql().Core_data_mgr();
|
||||
|
||||
// load db_mgr
|
||||
Xow_db_mgr.Init_by_load(wiki, gplx.xowa.wikis.data.Xow_db_file__core_.Find_core_fil_or_null(wiki)); // NOTE: must reinit providers as previous steps may have rls'd (and left member variable conn which is closed)
|
||||
|
||||
wiki.File__orig_mgr().Wkrs__del(Xof_orig_wkr_.Tid_wmf_api);
|
||||
db_fsys_mgr = wiki.Db_mgr_as_sql().Core_data_mgr();
|
||||
db_ary = Xob_dump_mgr_base_.Init_text_files_ary(db_fsys_mgr);
|
||||
poll_interval = poll_mgr.Poll_interval();
|
||||
|
||||
page_src = new Xob_dump_src_id().Init(wiki, this.Init_redirect(), select_size);
|
||||
ns_ary = Init_ns_ary();
|
||||
Db_conn conn = Init_db_file();
|
||||
Io_url wiki_dir = wiki.Fsys_mgr().Root_dir();
|
||||
bmk_mgr.Cfg_url_(wiki_dir.GenSubFil("xowa.file.make.cfg.gfs"));
|
||||
rate_mgr.Log_file_(wiki_dir.GenSubFil("xowa.file.make.log.csv"));
|
||||
if (reset_db) {
|
||||
bmk_mgr.Reset();
|
||||
Init_reset(conn);
|
||||
}
|
||||
bmk_mgr.Load(wiki.Appe(), this);
|
||||
Cmd_bgn_end();
|
||||
}
|
||||
protected abstract void Cmd_bgn_end();
|
||||
public abstract byte Init_redirect();
|
||||
public abstract int[] Init_ns_ary();
|
||||
protected abstract void Init_reset(Db_conn p);
|
||||
protected abstract Db_conn Init_db_file();
|
||||
private long time_bgn;
|
||||
public void Cmd_run() {Exec_ns_ary();}
|
||||
private void Exec_ns_ary() {
|
||||
if (pg_bgn == Int_.Max_value) return;
|
||||
if (load_tmpls) Xob_dump_mgr_base_.Load_all_tmpls(usr_dlg, wiki, page_src);
|
||||
time_bgn = System_.Ticks();
|
||||
Xob_dump_bmk dump_bmk = new Xob_dump_bmk();
|
||||
rate_mgr.Init();
|
||||
int ns_ary_len = ns_ary.length;
|
||||
for (int i = 0; i < ns_ary_len; i++) {
|
||||
int ns_id = ns_ary[i];
|
||||
if (ns_bgn != -1) { // ns_bgn set
|
||||
if (ns_id == ns_bgn) // ns_id is ns_bgn; null out ns_bgn and continue
|
||||
ns_bgn = -1;
|
||||
else // ns_id is not ns_bgn; keep looking
|
||||
continue;
|
||||
}
|
||||
dump_bmk.Ns_id_(ns_id);
|
||||
Exec_db_ary(i, dump_bmk, ns_id);
|
||||
if (ns_id == ns_end) exit_now = true; // ns_end set; exit
|
||||
if (exit_now) break; // exit_now b/c of pg_bgn, db_bgn or something else
|
||||
}
|
||||
Exec_commit(dump_bmk.Ns_id(), dump_bmk.Db_id(), dump_bmk.Pg_id(), Bry_.Empty);
|
||||
}
|
||||
private void Exec_db_ary(int ns_ord, Xob_dump_bmk dump_bmk, int ns_id) {
|
||||
int db_ary_len = db_ary.length;
|
||||
for (int i = 0; i < db_ary_len; i++) {
|
||||
int db_id = db_ary[i].Id();
|
||||
if (db_bgn != -1) { // db_bgn set
|
||||
if (db_id == db_bgn) // db_id is db_bgn; null out db_bgn and continue
|
||||
db_bgn = -1;
|
||||
else // db_id is not db_bgn; keep looking
|
||||
continue;
|
||||
}
|
||||
dump_bmk.Db_id_(db_id);
|
||||
Exec_db_itm(dump_bmk, ns_ord, ns_id, db_id);
|
||||
if (db_id == db_end) exit_now = true; // db_end set; exit;
|
||||
if (exit_now) return; // exit_now b/c of pg_bgn, db_bgn or something else
|
||||
}
|
||||
}
|
||||
private void Exec_db_itm(Xob_dump_bmk dump_bmk, int ns_ord, int ns_id, int db_id) {
|
||||
List_adp pages = List_adp_.New();
|
||||
Xow_ns ns = wiki.Ns_mgr().Ids_get_or_null(ns_id);
|
||||
int pg_id = pg_bgn;
|
||||
while (true) {
|
||||
page_src.Get_pages(pages, db_id, ns_id, pg_id);
|
||||
int pages_len = pages.Count();
|
||||
if (pages_len == 0) { // no more pages in db;
|
||||
if (pg_id > pg_bgn) // reset pg_bgn to 0 only if pg_bgn seen;
|
||||
pg_bgn = 0;
|
||||
return;
|
||||
}
|
||||
usr_dlg.Prog_many("", "", "fetched pages: ~{0}", pages_len);
|
||||
for (int i = 0; i < pages_len; i++) {
|
||||
Xowd_page_itm page = (Xowd_page_itm)pages.Get_at(i);
|
||||
dump_bmk.Pg_id_(pg_id);
|
||||
Exec_pg_itm(ns_ord, ns, db_id, page);
|
||||
if ( pg_id >= pg_end
|
||||
|| exec_count >= exec_count_max) {
|
||||
exit_now = true;
|
||||
}
|
||||
if (exit_now) return;
|
||||
pg_id = page.Id();
|
||||
}
|
||||
}
|
||||
}
|
||||
private void Exec_pg_itm(int ns_ord, Xow_ns ns, int db_id, Xowd_page_itm page) {
|
||||
try {
|
||||
if ((exec_count % progress_interval) == 0)
|
||||
usr_dlg.Prog_many("", "", "parsing: ns=~{0} db=~{1} pg=~{2} count=~{3} time=~{4} rate=~{5} ttl=~{6}"
|
||||
, ns.Id(), db_id, page.Id(), exec_count
|
||||
, System_.Ticks__elapsed_in_sec(time_bgn), rate_mgr.Rate_as_str(), String_.new_u8(page.Ttl_page_db()));
|
||||
ctx.Clear_all();
|
||||
byte[] page_src = page.Text();
|
||||
if (page_src != null) // some pages have no text; ignore them else null ref; PAGE: it.d:miercuri DATE:2015-12-05
|
||||
Exec_pg_itm_hook(ns_ord, ns, page, page_src);
|
||||
ctx.Wiki().Utl__bfr_mkr().Clear_fail_check(); // make sure all bfrs are released
|
||||
if (ctx.Wiki().Cache_mgr().Tmpl_result_cache().Count() > 50000)
|
||||
ctx.Wiki().Cache_mgr().Tmpl_result_cache().Clear();
|
||||
++exec_count;
|
||||
rate_mgr.Increment();
|
||||
if ((exec_count % poll_interval) == 0)
|
||||
poll_mgr.Poll();
|
||||
if ((exec_count % commit_interval) == 0)
|
||||
Exec_commit(ns.Id(), db_id, page.Id(), page.Ttl_page_db());
|
||||
if ((exec_count % cleanup_interval) == 0)
|
||||
Free();
|
||||
}
|
||||
catch (Exception exc) {
|
||||
bldr.Usr_dlg().Warn_many("", "", "parse failed: wiki=~{0} ttl=~{1} err=~{2}", wiki.Domain_str(), page.Ttl_full_db(), Err_.Message_gplx_log(exc));
|
||||
ctx.Wiki().Utl__bfr_mkr().Clear();
|
||||
this.Free();
|
||||
}
|
||||
}
|
||||
public abstract void Exec_pg_itm_hook(int ns_ord, Xow_ns ns, Xowd_page_itm page, byte[] page_text);
|
||||
private void Exec_commit(int ns_id, int db_id, int pg_id, byte[] ttl) {
|
||||
usr_dlg.Prog_many("", "", "committing: ns=~{0} db=~{1} pg=~{2} count=~{3} ttl=~{4}", ns_id, db_id, pg_id, exec_count, String_.new_u8(ttl));
|
||||
Exec_commit_hook();
|
||||
bmk_mgr.Save(ns_id, db_id, pg_id);
|
||||
if (exit_after_commit) exit_now = true;
|
||||
}
|
||||
public abstract void Exec_commit_hook();
|
||||
public abstract void Exec_end_hook();
|
||||
public void Cmd_init(Xob_bldr bldr) {}
|
||||
public void Cmd_term() {}
|
||||
public void Cmd_end() {
|
||||
if (!exit_now)
|
||||
pg_bgn = Int_.Max_value;
|
||||
Exec_commit(-1, -1, -1, Bry_.Empty);
|
||||
Exec_end_hook();
|
||||
Free();
|
||||
usr_dlg.Note_many("", "", "done: ~{0} ~{1}", exec_count, Decimal_adp_.divide_safe_(exec_count, System_.Ticks__elapsed_in_sec(time_bgn)).To_str("#,###.000"));
|
||||
}
|
||||
private void Free() {
|
||||
Xowe_wiki_.Rls_mem(wiki, true);
|
||||
}
|
||||
protected void Reset_db_y_() {this.reset_db = true;}
|
||||
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_commit_interval_)) commit_interval = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk_progress_interval_)) progress_interval = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk_rate_interval_)) rate_mgr.Reset_interval_(m.ReadInt("v"));
|
||||
else if (ctx.Match(k, Invk_cleanup_interval_)) cleanup_interval = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk_select_size_)) select_size = m.ReadInt("v") * Io_mgr.Len_mb;
|
||||
else if (ctx.Match(k, Invk_ns_bgn_)) {ns_bgn = m.ReadInt("v"); Notify_restoring("ns", ns_bgn);}
|
||||
else if (ctx.Match(k, Invk_db_bgn_)) {db_bgn = m.ReadInt("v"); Notify_restoring("db", db_bgn);}
|
||||
else if (ctx.Match(k, Invk_pg_bgn_)) {pg_bgn = m.ReadInt("v"); Notify_restoring("pg", pg_bgn);}
|
||||
else if (ctx.Match(k, Invk_ns_end_)) ns_end = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk_db_end_)) db_end = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk_pg_end_)) pg_end = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk_load_tmpls_)) load_tmpls = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_poll_mgr)) return poll_mgr;
|
||||
else if (ctx.Match(k, Invk_reset_db_)) reset_db = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_exec_count_max_)) exec_count_max = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk_exit_now_)) exit_now = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_exit_after_commit_)) exit_after_commit = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk__manual_now_)) Datetime_now.Manual_and_freeze_(m.ReadDate("v"));
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
private void Notify_restoring(String itm, int val) {
|
||||
usr_dlg.Note_many("", "", "restoring: itm=~{0} val=~{1}", itm, val);
|
||||
}
|
||||
public static final String
|
||||
Invk_progress_interval_ = "progress_interval_", Invk_commit_interval_ = "commit_interval_", Invk_cleanup_interval_ = "cleanup_interval_", Invk_rate_interval_ = "rate_interval_"
|
||||
, Invk_select_size_ = "select_size_"
|
||||
, Invk_ns_bgn_ = "ns_bgn_", Invk_db_bgn_ = "db_bgn_", Invk_pg_bgn_ = "pg_bgn_"
|
||||
, Invk_ns_end_ = "ns_end_", Invk_db_end_ = "db_end_", Invk_pg_end_ = "pg_end_"
|
||||
, Invk_load_tmpls_ = "load_tmpls_"
|
||||
, Invk_poll_mgr = "poll_mgr", Invk_reset_db_ = "reset_db_"
|
||||
, Invk_exec_count_max_ = "exec_count_max_", Invk_exit_now_ = "exit_now_", Invk_exit_after_commit_ = "exit_after_commit_"
|
||||
, Invk__manual_now_ = "manual_now_"
|
||||
;
|
||||
}
|
||||
class Xob_dump_mgr_base_ {
|
||||
public static void Load_all_tmpls(Gfo_usr_dlg usr_dlg, Xowe_wiki wiki, Xob_dump_src_id page_src) {
|
||||
List_adp pages = List_adp_.New();
|
||||
Xow_ns ns_tmpl = wiki.Ns_mgr().Ns_template();
|
||||
Xow_defn_cache defn_cache = wiki.Cache_mgr().Defn_cache();
|
||||
int cur_page_id = -1;
|
||||
int load_count = 0;
|
||||
usr_dlg.Note_many("", "", "tmpl_load init");
|
||||
while (true) {
|
||||
page_src.Get_pages(pages, 0, Xow_ns_.Tid__template, cur_page_id); // 0 is always template db
|
||||
int page_count = pages.Count();
|
||||
if (page_count == 0) break; // no more pages in db;
|
||||
Xowd_page_itm page = null;
|
||||
for (int i = 0; i < page_count; i++) {
|
||||
page = (Xowd_page_itm)pages.Get_at(i);
|
||||
Xot_defn_tmpl defn = new Xot_defn_tmpl();
|
||||
defn.Init_by_new(ns_tmpl, ns_tmpl.Gen_ttl(page.Ttl_page_db()), page.Text(), null, false); // NOTE: passing null, false; will be overriden later when Parse is called
|
||||
defn_cache.Add(defn, ns_tmpl.Case_match());
|
||||
++load_count;
|
||||
if ((load_count % 10000) == 0) usr_dlg.Prog_many("", "", "tmpl_loading: ~{0}", load_count);
|
||||
}
|
||||
cur_page_id = page.Id();
|
||||
}
|
||||
usr_dlg.Note_many("", "", "tmpl_load done: ~{0}", load_count);
|
||||
}
|
||||
public static Xow_db_file[] Init_text_files_ary(Xow_db_mgr core_data_mgr) {
|
||||
List_adp text_files_list = List_adp_.New();
|
||||
int len = core_data_mgr.Dbs__len();
|
||||
if (len == 1) return new Xow_db_file[] {core_data_mgr.Dbs__get_at(0)}; // single file: return core; note that there are no Tid = Text
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xow_db_file file = core_data_mgr.Dbs__get_at(i);
|
||||
switch (file.Tid()) {
|
||||
case Xow_db_file_.Tid__text:
|
||||
case Xow_db_file_.Tid__text_solo:
|
||||
text_files_list.Add(file);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (Xow_db_file[])text_files_list.To_ary_and_clear(Xow_db_file.class);
|
||||
}
|
||||
}
|
||||
class Xob_dump_bmk_mgr {
|
||||
private Bry_bfr save_bfr = Bry_bfr_.Reset(1024);
|
||||
public Io_url Cfg_url() {return cfg_url;} public Xob_dump_bmk_mgr Cfg_url_(Io_url v) {cfg_url = v; return this;} private Io_url cfg_url;
|
||||
public void Reset() {Io_mgr.Instance.DeleteFil(cfg_url);}
|
||||
public void Load(Xoae_app app, Xob_dump_mgr_base dump_mgr) {
|
||||
app.Gfs_mgr().Run_url_for(dump_mgr, cfg_url);
|
||||
}
|
||||
public void Save(int ns_id, int db_id, int pg_id) {
|
||||
Save_itm(save_bfr, Xob_dump_mgr_base.Invk_ns_bgn_, ns_id);
|
||||
Save_itm(save_bfr, Xob_dump_mgr_base.Invk_db_bgn_, db_id);
|
||||
Save_itm(save_bfr, Xob_dump_mgr_base.Invk_pg_bgn_, pg_id);
|
||||
Io_mgr.Instance.SaveFilBfr(cfg_url, save_bfr);
|
||||
}
|
||||
private void Save_itm(Bry_bfr save_bfr, String key, int val) {
|
||||
String fmt = "{0}('{1}');\n";
|
||||
String str = String_.Format(fmt, key, val);
|
||||
save_bfr.Add_str_u8(str);
|
||||
}
|
||||
}
|
||||
class Xob_rate_mgr {
|
||||
private long time_bgn;
|
||||
private int item_len;
|
||||
private Bry_bfr save_bfr = Bry_bfr_.Reset(255);
|
||||
public int Reset_interval() {return reset_interval;} public Xob_rate_mgr Reset_interval_(int v) {reset_interval = v; return this;} private int reset_interval = 10000;
|
||||
public Io_url Log_file_url() {return log_file;} public Xob_rate_mgr Log_file_(Io_url v) {log_file = v; return this;} private Io_url log_file;
|
||||
public void Init() {time_bgn = System_.Ticks();}
|
||||
public void Increment() {
|
||||
++item_len;
|
||||
if (item_len % reset_interval == 0) {
|
||||
long time_end = System_.Ticks();
|
||||
Save(item_len, time_bgn, time_end);
|
||||
time_bgn = time_end;
|
||||
item_len = 0;
|
||||
}
|
||||
}
|
||||
private void Save(int count, long bgn, long end) {
|
||||
int dif = (int)(end - bgn) / 1000;
|
||||
Decimal_adp rate = Decimal_adp_.divide_safe_(count, dif);
|
||||
save_bfr
|
||||
.Add_str_a7(rate.To_str("#,##0.000")).Add_byte_pipe()
|
||||
.Add_int_variable(count).Add_byte_pipe()
|
||||
.Add_int_variable(dif).Add_byte_nl()
|
||||
;
|
||||
Io_mgr.Instance.AppendFilByt(log_file, save_bfr.To_bry_and_clear());
|
||||
}
|
||||
public String Rate_as_str() {return Int_.To_str(Rate());}
|
||||
public int Rate() {
|
||||
int elapsed = System_.Ticks__elapsed_in_sec(time_bgn);
|
||||
return Math_.Div_safe_as_int(item_len, elapsed);
|
||||
}
|
||||
}
|
||||
class Xob_dump_bmk {
|
||||
public int Ns_id() {return ns_id;} public Xob_dump_bmk Ns_id_(int v) {ns_id = v; return this;} private int ns_id;
|
||||
public int Db_id() {return db_id;} public Xob_dump_bmk Db_id_(int v) {db_id = v; return this;} private int db_id;
|
||||
public int Pg_id() {return pg_id;} public Xob_dump_bmk Pg_id_(int v) {pg_id = v; return this;} private int pg_id;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,50 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.wikis.data.*;
|
||||
public class Xob_ns_file_itm {
|
||||
public Xob_ns_file_itm(byte db_file_tid, String file_name, int[] ns_ids) {
|
||||
this.db_file_tid = db_file_tid; this.file_name = file_name; this.ns_ids = ns_ids;
|
||||
this.nth_db_id = Nth_db_id_null; this.nth_db_idx = 1;
|
||||
}
|
||||
public byte Db_file_tid() {return db_file_tid;} private final byte db_file_tid;
|
||||
public String File_name() {return file_name;} private final String file_name;
|
||||
public int[] Ns_ids() {return ns_ids;} private final int[] ns_ids;
|
||||
public int Nth_db_id() {return nth_db_id;} public void Nth_db_id_(int v) {nth_db_id = v;} private int nth_db_id;
|
||||
public int Nth_db_idx() {return nth_db_idx;} private int nth_db_idx;
|
||||
public String Make_file_name() { // EX: en.wikipedia.org-text-ns.000-001.xowa
|
||||
String rv = String_.Format("-{0}{1}{2}.xowa" // EX: -text-ns.000-db.001.xowa
|
||||
, Xow_db_file_.To_key(db_file_tid) // text
|
||||
, String_.Len_eq_0(file_name) ? "" : "-" + file_name // if empty, don't add "ns.000" segment; produces en.wikipedia.org-text-001.xowa
|
||||
, nth_db_idx == 1 ? "" : "-db." + Int_.To_str_pad_bgn_zero(nth_db_idx, 3) // "-db.001"
|
||||
);
|
||||
++nth_db_idx;
|
||||
return rv;
|
||||
}
|
||||
public static final int Nth_db_id_null = -1;
|
||||
|
||||
public static void Init_ns_bldr_data(byte db_file_tid, Xow_ns_mgr ns_mgr, byte[] ns_file_map) {
|
||||
int ns_len = ns_mgr.Ords_len();
|
||||
Xob_ns_file_itm ns_file_itm_default = new Xob_ns_file_itm(db_file_tid, "", null);
|
||||
for (int i = 0; i < ns_len; ++i) {
|
||||
Xow_ns ns = ns_mgr.Ords_get_at(i);
|
||||
ns.Bldr_data_(ns_file_itm_default);
|
||||
}
|
||||
Xob_ns_file_itm_parser ns_itm_parser = new Xob_ns_file_itm_parser();
|
||||
ns_itm_parser.Ctor(db_file_tid, ns_mgr);
|
||||
Xob_ns_file_itm[] ns_itm_ary = ns_itm_parser.To_ary(ns_file_map);
|
||||
int ns_itm_ary_len = ns_itm_ary.length;
|
||||
for (int i = 0; i < ns_itm_ary_len; ++i) {
|
||||
Xob_ns_file_itm itm = ns_itm_ary[i];
|
||||
int[] ns_ids = itm.Ns_ids();
|
||||
int ns_ids_len = ns_ids.length;
|
||||
for (int j = 0; j < ns_ids_len; j++) {
|
||||
int ns_id = ns_ids[j];
|
||||
Xow_ns ns = ns_mgr.Ids_get_or_null(ns_id); if (ns == null) continue; // some dumps may not have ns; for example, pre-2013 dumps won't have Module (828)
|
||||
ns.Bldr_data_(itm);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,79 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.langs.dsvs.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
public class Xob_ns_file_itm_parser extends Dsv_wkr_base {
|
||||
private byte[] ns_ids_bry; private String name; private final List_adp rslts = List_adp_.New();
|
||||
private Xow_ns_mgr ns_mgr; private byte db_file_tid; private boolean mode_each = false;
|
||||
public void Ctor(byte db_file_tid, Xow_ns_mgr ns_mgr) {
|
||||
this.db_file_tid = db_file_tid; this.ns_mgr = ns_mgr;
|
||||
this.mode_each = false; rslts.Clear();
|
||||
}
|
||||
@Override public Dsv_fld_parser[] Fld_parsers() {return new Dsv_fld_parser[] {Dsv_fld_parser_.Bry_parser, Dsv_fld_parser_.Bry_parser};}
|
||||
@Override public boolean Write_bry(Dsv_tbl_parser parser, int fld_idx, byte[] src, int bgn, int end) {
|
||||
switch (fld_idx) {
|
||||
case 0: ns_ids_bry = Bry_.Mid(src, bgn, end); return true;
|
||||
case 1: name = String_.new_u8(src, bgn, end); return true;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
@Override public void Commit_itm(Dsv_tbl_parser parser, int pos) {
|
||||
if (ns_ids_bry == null) throw parser.Err_row_bgn("ns_itm missing ns_ids", pos);
|
||||
if (mode_each) return;
|
||||
|
||||
// mode is <each>; create map with each ns in separate file
|
||||
if (Bry_.Eq(ns_ids_bry, Ns_file_map__each)) {
|
||||
mode_each = true;
|
||||
int len = ns_mgr.Ords_len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xow_ns ns = ns_mgr.Ords_get_at(i);
|
||||
int ns_id = ns.Id();
|
||||
rslts.Add(new Xob_ns_file_itm(db_file_tid, "ns." + Int_.To_str_pad_bgn_zero(ns_id, 3), Int_ary_.New(ns_id)));
|
||||
}
|
||||
return;
|
||||
}
|
||||
// mode is <few>; create map with each ns in one file; // DB.FEW: DATE:2016-06-07
|
||||
else if (Bry_.Eq(ns_ids_bry, Ns_file_map__few)) {
|
||||
int len = ns_mgr.Ords_len();
|
||||
int[] ns_ary_for_few = new int[len];
|
||||
for (int i = 0; i < len; ++i) {
|
||||
ns_ary_for_few[i] = ns_mgr.Ords_get_at(i).Id();
|
||||
}
|
||||
rslts.Add(new Xob_ns_file_itm(db_file_tid, String_.Empty, ns_ary_for_few));
|
||||
return;
|
||||
}
|
||||
|
||||
int[] ns_ids = null;
|
||||
if (ns_ids_bry.length == 1 && ns_ids_bry[0] == Byte_ascii.Star) { // "*"
|
||||
int len = ns_mgr.Ords_len();
|
||||
ns_ids = new int[len];
|
||||
for (int i = 0; i < len; ++i)
|
||||
ns_ids[i] = ns_mgr.Ords_get_at(i).Id();
|
||||
}
|
||||
else
|
||||
ns_ids = Int_ary_.Parse(String_.new_u8(ns_ids_bry), ",");
|
||||
if (ns_ids.length == 0) throw Err_.new_wo_type("map.invalid.ns_missing", "src", this.Src());
|
||||
if (String_.Len_eq_0(name)) { // no name; auto-generate
|
||||
int ns_id_1st = ns_ids[0]; // take 1st ns_id
|
||||
name = "ns." + Int_.To_str_pad_bgn_zero(ns_id_1st, 3); // EX: ns.000
|
||||
}
|
||||
Xob_ns_file_itm ns_itm = new Xob_ns_file_itm(db_file_tid, name, ns_ids);
|
||||
rslts.Add(ns_itm);
|
||||
ns_itm.toString();
|
||||
ns_ids = null; name = null;
|
||||
}
|
||||
public Xob_ns_file_itm[] To_ary(byte[] bry) {
|
||||
this.Load_by_bry(bry);
|
||||
return (Xob_ns_file_itm[])rslts.To_ary(Xob_ns_file_itm.class);
|
||||
}
|
||||
public static final byte[] Ns_file_map__few = Bry_.new_a7("few"), Ns_file_map__each = Bry_.new_a7("<each>");
|
||||
/*
|
||||
"" -> no rules; return "default"; generates "text-001" and lumps all ns into it
|
||||
"*|<id>|3700|2" -> auto-generate per ns
|
||||
<single-file>
|
||||
<all>||gzip
|
||||
<each>||gzip
|
||||
*/
|
||||
}
|
||||
|
||||
@@ -13,3 +13,79 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.stores.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.dbs.*; import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
class Xob_dump_src_id {
|
||||
private Xodb_mgr_sql db_mgr; private byte redirect;
|
||||
private String page_db_url; private int size_max;
|
||||
private Db_stmt text_stmt; int cur_text_db_idx = -1;
|
||||
public Xob_dump_src_id Init(Xowe_wiki wiki, byte redirect, int size_max) {
|
||||
this.db_mgr = wiki.Db_mgr_as_sql(); this.redirect = redirect;
|
||||
this.size_max = size_max;
|
||||
this.page_db_url = db_mgr.Core_data_mgr().Db__core().Url().Raw();
|
||||
return this;
|
||||
}
|
||||
public void Get_pages(List_adp list, int text_db_idx, int cur_ns, int prv_id) {
|
||||
DataRdr rdr = DataRdr_.Null;
|
||||
int size_len = 0;
|
||||
list.Clear();
|
||||
try {
|
||||
rdr = New_rdr(db_mgr, page_db_url, text_db_idx, cur_ns, prv_id, redirect);
|
||||
while (rdr.MoveNextPeer()) {
|
||||
Xowd_page_itm page = New_page(db_mgr, cur_ns, rdr);
|
||||
list.Add(page);
|
||||
size_len += page.Text_len();
|
||||
if (size_len > size_max)
|
||||
break;
|
||||
}
|
||||
}
|
||||
finally {rdr.Rls();}
|
||||
}
|
||||
private DataRdr New_rdr(Xodb_mgr_sql db_mgr, String page_db_url, int text_db_idx, int cur_ns, int prv_id, byte redirect) {
|
||||
if (cur_text_db_idx != text_db_idx) {
|
||||
cur_text_db_idx = text_db_idx;
|
||||
Xow_db_file text_db = db_mgr.Core_data_mgr().Dbs__get_by_id_or_fail(text_db_idx);
|
||||
Db_conn conn = text_db.Conn();
|
||||
String sql = String_.Format(Sql_select_clause, New_rdr__redirect_clause(redirect));
|
||||
text_stmt = conn.Stmt_sql(sql);
|
||||
}
|
||||
return text_stmt.Clear().Val_int(prv_id).Val_int(cur_ns).Exec_select();
|
||||
}
|
||||
private static Xowd_page_itm New_page(Xodb_mgr_sql db_mgr, int ns_id, DataRdr rdr) {
|
||||
Xowd_page_tbl page_core_tbl = db_mgr.Core_data_mgr().Tbl__page();
|
||||
Xowd_page_itm rv = new Xowd_page_itm();
|
||||
rv.Id_(rdr.ReadInt(page_core_tbl.Fld_page_id()));
|
||||
rv.Ns_id_(ns_id);
|
||||
rv.Ttl_page_db_(rdr.ReadBryByStr(page_core_tbl.Fld_page_title()));
|
||||
|
||||
String text_data_name = db_mgr.Core_data_mgr().Db__core().Tbl__text().Fld_text_data();
|
||||
byte[] text_data = rdr.ReadBry(text_data_name);
|
||||
text_data = db_mgr.Wiki().Appe().Zip_mgr().Unzip(db_mgr.Core_data_mgr().Props().Zip_tid_text(), text_data);
|
||||
rv.Text_(text_data);
|
||||
return rv;
|
||||
}
|
||||
private static String New_rdr__redirect_clause(byte redirect) {
|
||||
switch (redirect) {
|
||||
case Bool_.Y_byte: return Sql_select__redirect_y;
|
||||
case Bool_.N_byte: return Sql_select__redirect_n;
|
||||
case Bool_.__byte: return Sql_select__redirect__;
|
||||
default: throw Err_.new_unhandled(redirect);
|
||||
}
|
||||
}
|
||||
private static final String Sql_select_clause = String_.Concat_lines_nl
|
||||
( "SELECT p.page_id"
|
||||
, ", p.page_title"
|
||||
, ", t.text_data"
|
||||
, "FROM page_dump p"
|
||||
, " JOIN text t ON t.page_id = p.page_id"
|
||||
, "WHERE p.page_id > ?"
|
||||
, "AND p.page_namespace = ?{0}"
|
||||
, "ORDER BY p.page_id"
|
||||
);
|
||||
private static final String
|
||||
Sql_select__redirect_y = "\nAND p.page_is_redirect = 1"
|
||||
, Sql_select__redirect_n = "\nAND p.page_is_redirect = 0"
|
||||
, Sql_select__redirect__ = ""
|
||||
;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,31 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.diffs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.core.brys.*; import gplx.xowa.wikis.*;
|
||||
class Bfr_arg__dump_dir implements Bfr_arg { // .dump_dir = "/xowa/wiki/en.wikipedia.org/"
|
||||
private final Xow_wiki wiki;
|
||||
public Bfr_arg__dump_dir(Xow_wiki wiki) {this.wiki = wiki;}
|
||||
public void Bfr_arg__add(Bry_bfr bfr) {
|
||||
bfr.Add(wiki.Fsys_mgr().Root_dir().RawBry());
|
||||
}
|
||||
}
|
||||
class Bfr_arg__dump_core implements Bfr_arg {// .dump_core = "en.wikipedia.org-core.xowa"
|
||||
private final Xow_wiki wiki;
|
||||
public Bfr_arg__dump_core(Xow_wiki wiki) {this.wiki = wiki;}
|
||||
public void Bfr_arg__add(Bry_bfr bfr) {
|
||||
bfr.Add_str_u8(wiki.Data__core_mgr().Db__core().Url().NameAndExt());
|
||||
}
|
||||
}
|
||||
class Bfr_arg__dump_domain implements Bfr_arg {// .dump_domain = en.wikipedia.org
|
||||
private final Xow_wiki wiki;
|
||||
public Bfr_arg__dump_domain(Xow_wiki wiki) {this.wiki = wiki;}
|
||||
public void Bfr_arg__add(Bry_bfr bfr) {
|
||||
bfr.Add(wiki.Domain_bry());
|
||||
}
|
||||
}
|
||||
class Bfr_arg__dir_spr implements Bfr_arg {// .dir_spr = "/"
|
||||
public void Bfr_arg__add(Bry_bfr bfr) {
|
||||
bfr.Add_byte(gplx.core.envs.Op_sys.Cur().Fsys_dir_spr_byte());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,32 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.diffs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xob_diff_build_cmd implements Xob_cmd {
|
||||
private final Xob_bldr bldr; private final Xowe_wiki wiki;
|
||||
private String prev_url, curr_url, diff_url; private int commit_interval;
|
||||
private int[] db_ids = Int_ary_.Empty; private String bld_name = "all";
|
||||
public Xob_diff_build_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.bldr = bldr; this.wiki = wiki;}
|
||||
public String Cmd_key() {return Xob_cmd_keys.Key_diff_build;}
|
||||
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
|
||||
public void Cmd_run() {
|
||||
new Xob_diff_build_wkr(bldr, wiki, prev_url, curr_url, diff_url, commit_interval, new Xowd_tbl_mapr(bld_name, db_ids)).Exec();
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk__prev_url_)) prev_url = m.ReadStr("v");
|
||||
else if (ctx.Match(k, Invk__curr_url_)) curr_url = m.ReadStr("v");
|
||||
else if (ctx.Match(k, Invk__diff_url_)) diff_url = m.ReadStr("v");
|
||||
else if (ctx.Match(k, Invk__commit_interval_)) commit_interval = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk__db_ids_)) db_ids = Int_ary_.Parse(m.ReadStr("v"), "|");
|
||||
else if (ctx.Match(k, Invk__bld_name_)) bld_name = m.ReadStr("v");
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
public void Cmd_init(Xob_bldr bldr) {}
|
||||
public void Cmd_bgn(Xob_bldr bldr) {}
|
||||
public void Cmd_end() {}
|
||||
public void Cmd_term() {}
|
||||
private static final String Invk__prev_url_ = "prev_url_", Invk__curr_url_ = "curr_url_", Invk__diff_url_ = "diff_url_"
|
||||
, Invk__commit_interval_ = "commit_interval_", Invk__db_ids_ = "db_ids_", Invk__bld_name_ = "bld_name_";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,81 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.diffs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.core.brys.*; import gplx.core.brys.fmts.*;
|
||||
import gplx.dbs.*; import gplx.dbs.metas.*; import gplx.dbs.diffs.*; import gplx.dbs.diffs.builds.*; import gplx.dbs.diffs.itms.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
class Xob_diff_build_wkr {
|
||||
private final Gfdb_diff_bldr dif_bldr = new Gfdb_diff_bldr();
|
||||
private final Xowe_wiki wiki;
|
||||
private Db_conn old_conn, new_conn, dif_conn;
|
||||
private final Xowd_tbl_mapr tbl_mapr;
|
||||
public Xob_diff_build_wkr(Xob_bldr bldr, Xowe_wiki wiki, String old_url, String new_url, String dif_url, int commit_interval, Xowd_tbl_mapr tbl_mapr) {
|
||||
this.wiki = wiki;
|
||||
wiki.Init_by_wiki();
|
||||
Bry_fmt url_fmt = Bry_fmt.New("").Args_(New_url_args(wiki, tbl_mapr.Name));
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
old_conn = New_conn(tmp_bfr, wiki, url_fmt, Bool_.N, old_url);
|
||||
new_conn = New_conn(tmp_bfr, wiki, url_fmt, Bool_.N, new_url);
|
||||
dif_conn = New_conn(tmp_bfr, wiki, url_fmt, Bool_.Y, dif_url);
|
||||
this.tbl_mapr = tbl_mapr;
|
||||
}
|
||||
public void Exec() {
|
||||
Gdif_core dif_core = new Gdif_core(dif_conn);
|
||||
String name = String_.Format("{0}|{1}|diffs|{2}", wiki.Domain_str(), tbl_mapr.Name, wiki.Props().Modified_latest().XtoStr_fmt(DateAdp_.Fmt__yyyyMMdd)); // EX: "simple.wikipedia.org|text|diffs|20160112"
|
||||
String made_by = wiki.App().User().Key();
|
||||
Gdif_job_itm job_itm = dif_core.New_job(name, made_by);
|
||||
Gdif_bldr_ctx ctx = new Gdif_bldr_ctx().Init(dif_core, job_itm);
|
||||
Gfdb_diff_wkr__db dif_wkr = new Gfdb_diff_wkr__db();
|
||||
Gdif_db dif_db = dif_core.Db();
|
||||
dif_wkr.Init_conn(dif_db, 1000);
|
||||
dif_bldr.Init(dif_wkr);
|
||||
// wiki.Data__core_mgr().Db__core().Conn().Conn_info();
|
||||
Xow_db_file[] db_file_ary = wiki.Data__core_mgr().Db__core().Tbl__db().Select_all(wiki.Data__core_mgr().Props(), Io_url_.Empty);
|
||||
int db_files_len = db_file_ary.length;
|
||||
for (int i = 0; i < db_files_len; ++i) {
|
||||
Xow_db_file db_file = db_file_ary[i];
|
||||
if (tbl_mapr.Db_ids__has(db_file.Tid()))
|
||||
Compare(ctx);
|
||||
}
|
||||
// int old_tbl_len = old_tbl_mgr.Len();
|
||||
// for (int i = 0; i < old_tbl_len; ++i) {
|
||||
// Dbmeta_tbl_itm old_tbl = old_tbl_mgr.Get_at(i);
|
||||
// Dbmeta_tbl_itm new_tbl = new_tbl_mgr.Get_by(old_tbl.Name());
|
||||
// if (new_tbl == null) {
|
||||
// // delete all
|
||||
// }
|
||||
// }
|
||||
}
|
||||
private void Compare(Gdif_bldr_ctx ctx) {
|
||||
Dbmeta_tbl_mgr old_tbl_mgr = old_conn.Meta_mgr();
|
||||
Dbmeta_tbl_mgr new_tbl_mgr = old_conn.Meta_mgr();
|
||||
int new_tbl_len = new_tbl_mgr.Len();
|
||||
for (int i = 0; i < new_tbl_len; ++i) {
|
||||
Dbmeta_tbl_itm new_tbl = new_tbl_mgr.Get_at(i);
|
||||
Dbmeta_tbl_itm old_tbl = old_tbl_mgr.Get_by(new_tbl.Name()); if (old_tbl == null) continue;
|
||||
Gfdb_diff_tbl dif_tbl = Gfdb_diff_tbl.New(new_tbl);
|
||||
dif_bldr.Compare(ctx, dif_tbl, old_conn, new_conn);
|
||||
// save txn
|
||||
}
|
||||
}
|
||||
public static Db_conn New_conn(Bry_bfr tmp_bfr, Xow_wiki wiki, Bry_fmt fmtr, boolean autocreate, String url_fmt) {
|
||||
fmtr.Fmt_(url_fmt).Bld_many(tmp_bfr);
|
||||
return Db_conn_bldr.Instance.Get_or_autocreate(autocreate, Io_url_.new_any_(tmp_bfr.To_str_and_clear()));
|
||||
}
|
||||
private static Bfr_fmt_arg[] New_url_args(Xow_wiki wiki, String db_mapr_name) {
|
||||
Bfr_fmt_arg[] rv = new Bfr_fmt_arg[]
|
||||
{ new Bfr_fmt_arg(Bry_.new_a7(".dump_dir"), new Bfr_arg__dump_dir(wiki))
|
||||
, new Bfr_fmt_arg(Bry_.new_a7(".dump_core"), new Bfr_arg__dump_core(wiki))
|
||||
, new Bfr_fmt_arg(Bry_.new_a7(".dump_domain"), new Bfr_arg__dump_domain(wiki))
|
||||
, new Bfr_fmt_arg(Bry_.new_a7(".dir_spr"), new Bfr_arg__dir_spr())
|
||||
, new Bfr_fmt_arg(Bry_.new_a7(".dif_name"), Bfr_arg_.New_bry(db_mapr_name))
|
||||
};
|
||||
return rv;
|
||||
}
|
||||
//old_url='~{.dump_dir}-prev/~{.dump_core}';
|
||||
//new_url='~{.dump_dir}/~{.dump_core}';
|
||||
//dif_url='~{.dump_dir}/~{.dump_domain}-{.dif_name}-diff.xowa';
|
||||
// old_conn='data source="~{.dump_dir}/~{.dump_core}";url='
|
||||
// dif_conn='gplx_key=sqlite;url='
|
||||
}
|
||||
|
||||
@@ -13,3 +13,35 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.diffs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.dbs.*; import gplx.dbs.metas.*; import gplx.dbs.diffs.*;
|
||||
class Xob_diff_manifest {
|
||||
// page|page_id|*
|
||||
public static Gfdb_diff_tbl[] Parse(Db_conn conn, String src_str) {
|
||||
// byte[][] rows_ary = Bry_split_.Split_lines(Bry_.new_u8(src_str));
|
||||
// int rows_len = rows_ary.length;
|
||||
// for (int i = 0; i < rows_len; ++i) {
|
||||
// byte[] row = rows_ary[i];
|
||||
// byte[][] itms_ary = Bry_split_.Split(row, Byte_ascii.Pipe);
|
||||
// byte[] tbl_name = itms_ary[0];
|
||||
// conn.Meta_tbl_exists
|
||||
// int itms_len = itms_ary.length;
|
||||
// for (int j = 0; j < itms_len; ++j) {
|
||||
// byte[] itm = itms_ary[j];
|
||||
// Tfds.Dbg(itm);
|
||||
// }
|
||||
// Gfdb_diff_tbl tbl = new Gfdb_diff_tbl(String_.new_u8(itms_ary[0]),keys, vals, Db_rdr_.Empty);
|
||||
// }
|
||||
return null;
|
||||
}
|
||||
}
|
||||
/*
|
||||
class Wkr {
|
||||
public void Make() {
|
||||
sdif_db_mgr sdif_db = new Sdif_db_mgr(conn);
|
||||
for (int i = 0; i < rhs_tbl_len; ++i) {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
@@ -13,3 +13,20 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.diffs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.xowa.wikis.data.*;
|
||||
class Xowd_tbl_mapr {
|
||||
public Xowd_tbl_mapr(String name, int[] db_ids) {
|
||||
this.Name = name;
|
||||
this.Db_ids = db_ids;
|
||||
}
|
||||
public final String Name;
|
||||
public final int[] Db_ids;
|
||||
public boolean Db_ids__has(int id) {return true;}
|
||||
// private static List_adp Fill_tbl_names(List_adp rv, int db_tid) {
|
||||
// switch (db_tid) {
|
||||
// case Xow_db_file_.Tid__cat:
|
||||
// return
|
||||
// break;
|
||||
// }
|
||||
}
|
||||
|
||||
@@ -13,3 +13,53 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.xowa.xtns.wbases.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.xmls.*; import gplx.xowa.bldrs.cmds.texts.xmls.*;
|
||||
import gplx.xowa.bldrs.css.*; import gplx.xowa.wikis.domains.*;
|
||||
import gplx.xowa.wikis.data.*;
|
||||
public abstract class Xob_init_base implements Xob_cmd, Gfo_invk {
|
||||
private Xob_bldr bldr; private Xowe_wiki wiki; private Gfo_usr_dlg usr_dlg;
|
||||
private byte wbase_enabled = Bool_.__byte;
|
||||
public Xob_init_base Ctor(Xob_bldr bldr, Xowe_wiki wiki) {this.bldr = bldr; this.wiki = wiki; this.usr_dlg = wiki.Appe().Usr_dlg(); return this;}
|
||||
public abstract String Cmd_key();
|
||||
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
|
||||
public abstract void Cmd_ini_wdata(Xob_bldr bldr, Xowe_wiki wiki);
|
||||
public abstract void Cmd_run_end(Xowe_wiki wiki);
|
||||
@gplx.Virtual public void Cmd_init(Xob_bldr bldr) { // add other cmds; EX: wikidata
|
||||
bldr.Import_marker().Bgn(wiki);
|
||||
if (wbase_enabled == Bool_.__byte) wbase_enabled = wiki.Domain_tid() == Xow_domain_tid_.Tid__wikidata ? Bool_.Y_byte : Bool_.N_byte; // if wbase_enabled not explicitly set, set it to y if wiki is "www.wikidata.org"
|
||||
if (wbase_enabled == Bool_.Y_byte) // if wbase_enabled, auto-add wdata_wkrs bldr
|
||||
this.Cmd_ini_wdata(bldr, wiki);
|
||||
}
|
||||
public void Cmd_bgn(Xob_bldr bldr) {}
|
||||
public void Cmd_run() { // parse site_info
|
||||
gplx.core.ios.streams.Io_stream_rdr src_rdr = wiki.Import_cfg().Src_rdr(); usr_dlg.Plog_many("", "", "reading dump header: ~{0}", src_rdr.Url().Raw());
|
||||
Xob_siteinfo_parser_.Parse(Xob_siteinfo_parser_.Extract(src_rdr), wiki);
|
||||
this.Cmd_run_end(wiki); // save site info
|
||||
}
|
||||
public void Cmd_end() {
|
||||
wiki.Appe().Gui_mgr().Html_mgr().Portal_mgr().Wikis().Itms_reset(); // dirty wiki list so that next refresh will load itm
|
||||
|
||||
// if (wiki.Appe().Setup_mgr().Dump_mgr().Css_wiki_update()) { // NOTE: used to be option, but was no longer being set; may need to reinstate; DATE:2016-12-21
|
||||
Io_url url = wiki.Appe().Fsys_mgr().Wiki_css_dir(wiki.Domain_str()).GenSubFil(Xoa_css_extractor.Css_wiki_name);
|
||||
usr_dlg.Log_many("", "", "deleting css: ~{0}", url.Raw());
|
||||
Io_mgr.Instance.DeleteFil_args(url).MissingFails_off().Exec();
|
||||
// }
|
||||
|
||||
// always save xowa_cfg data at end of init step, not term step; else, other builder commands will load empty cfg and import data will be null; DATE:2017-02-20
|
||||
if (!gplx.core.envs.Env_.Mode_testing()) { // need else Xob_init_base_tst fails; DATE:2017-02-20
|
||||
Xowd_cfg_tbl_.Upsert__import(wiki);
|
||||
Xowd_cfg_tbl_.Upsert__create(wiki);
|
||||
}
|
||||
}
|
||||
@gplx.Virtual public void Cmd_term() {}
|
||||
@gplx.Virtual public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_src_xml_fil_)) wiki.Import_cfg().Src_fil_xml_(m.ReadIoUrl("v"));
|
||||
else if (ctx.Match(k, Invk_src_bz2_fil_)) wiki.Import_cfg().Src_fil_bz2_(m.ReadIoUrl("v"));
|
||||
else if (ctx.Match(k, Invk_wdata_enabled_)) wbase_enabled = m.ReadYn("v") ? Bool_.Y_byte : Bool_.N_byte;
|
||||
else if (ctx.Match(k, Invk_owner)) return bldr.Cmd_mgr();
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
} private static final String Invk_src_xml_fil_ = "src_xml_fil_", Invk_src_bz2_fil_ = "src_bz2_fil_", Invk_owner = "owner", Invk_wdata_enabled_ = "wdata_enabled_";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,43 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.xmls.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.wikis.dbs.*;
|
||||
public abstract class Xob_term_base implements Xob_cmd, Gfo_invk {
|
||||
public Xob_term_base Ctor(Xob_bldr bldr, Xowe_wiki wiki) {this.wiki = wiki; return this;} private Xowe_wiki wiki;
|
||||
public abstract String Cmd_key();
|
||||
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
|
||||
public void Cmd_init(Xob_bldr bldr) {}
|
||||
public void Cmd_bgn(Xob_bldr bldr) {}
|
||||
public void Cmd_run() {}
|
||||
public void Cmd_end() {
|
||||
Xoae_app app = wiki.Appe();
|
||||
|
||||
// dirty wiki list so that next refresh will load wiki
|
||||
app.Gui_mgr().Html_mgr().Portal_mgr().Wikis().Itms_reset();
|
||||
|
||||
// clear cache, else import will load new page with old items from cache; DATE:2013-11-21
|
||||
app.Free_mem(false);
|
||||
|
||||
// update main page
|
||||
byte[] new_main_page = gplx.xowa.langs.msgs.Xow_mainpage_finder.Find_or(wiki, wiki.Props().Siteinfo_mainpage()); // get new main_page from mainpage_finder
|
||||
wiki.Props().Main_page_(new_main_page);
|
||||
wiki.Data__core_mgr().Db__core().Tbl__cfg().Upsert_bry(gplx.xowa.wikis.data.Xowd_cfg_key_.Grp__wiki_init, gplx.xowa.wikis.data.Xowd_cfg_key_.Key__init__main_page , new_main_page);
|
||||
|
||||
// remove import marker
|
||||
app.Bldr().Import_marker().End(wiki);
|
||||
|
||||
// flag init_needed prior to show; dir_info will show page_txt instead of page_gz;
|
||||
wiki.Init_needed_(true);
|
||||
|
||||
// force load; needed to pick up MediaWiki ns for MediaWiki:mainpage
|
||||
wiki.Init_assert();
|
||||
|
||||
Cmd_end_hook();
|
||||
}
|
||||
public abstract void Cmd_end_hook();
|
||||
public void Cmd_term() {}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,29 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.apps.apis.xowa.bldrs.imports.*;
|
||||
import gplx.xowa.xtns.wbases.imports.*;
|
||||
public class Xob_init_cmd extends Xob_init_base {
|
||||
public Xob_init_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Ctor(bldr, wiki);}
|
||||
@Override public String Cmd_key() {return Xob_cmd_keys.Key_text_init;}
|
||||
@Override public void Cmd_ini_wdata(Xob_bldr bldr, Xowe_wiki wiki) {
|
||||
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_wbase_qid);
|
||||
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_wbase_pid);
|
||||
}
|
||||
@Override public void Cmd_init(Xob_bldr bldr) {
|
||||
super.Cmd_init(bldr);
|
||||
// gplx.dbs.qrys.bats.Db_batch__journal_wal.Batch__init(gplx.dbs.Db_conn_pool.Instance.Batch_mgr());
|
||||
}
|
||||
|
||||
@Override public void Cmd_run_end(Xowe_wiki wiki) {
|
||||
if (gplx.xowa.wikis.data.Xow_db_file__core_.Find_core_fil_or_null(wiki) != null)
|
||||
throw wiki.Appe().Bldr().Usr_dlg().Fail_many("", "", "directory must not contain any .xowa or .sqlite3 files: dir=~{0}", wiki.Fsys_mgr().Root_dir().Raw());
|
||||
Xowe_wiki_.Create(wiki, wiki.Import_cfg().Src_rdr_len(), wiki.Import_cfg().Src_fil().NameOnly());
|
||||
}
|
||||
@Override public void Cmd_term() {
|
||||
super.Cmd_term();
|
||||
// gplx.dbs.qrys.bats.Db_batch__journal_wal.Batch__term(gplx.dbs.Db_conn_pool.Instance.Batch_mgr());
|
||||
// gplx.dbs.Db_conn_pool.Instance.Rls_all();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,16 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
public class Xob_ns_to_db_wkr__text implements Xob_ns_to_db_wkr {
|
||||
public byte Db_tid() {return Xow_db_file_.Tid__text;}
|
||||
public void Tbl_init(Xow_db_file db) {
|
||||
Xowd_text_tbl tbl = db.Tbl__text();
|
||||
tbl.Create_tbl();
|
||||
tbl.Insert_bgn();
|
||||
}
|
||||
public void Tbl_term(Xow_db_file db) {
|
||||
db.Tbl__text().Insert_end();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,97 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.dbs.*; import gplx.core.ios.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.wikis.dbs.*;
|
||||
import gplx.xowa.wikis.*; import gplx.xowa.bldrs.filters.dansguardians.*; import gplx.xowa.apps.apis.xowa.bldrs.imports.*;
|
||||
import gplx.xowa.parsers.utils.*; import gplx.xowa.addons.bldrs.files.cmds.*; import gplx.xowa.addons.bldrs.files.dbs.*;
|
||||
public class Xob_page_cmd extends Xob_itm_basic_base implements Xob_page_wkr, Gfo_invk {
|
||||
private Xow_db_mgr db_mgr; private Db_idx_mode idx_mode = Db_idx_mode.Itm_end; private Xowd_page_tbl page_core_tbl; private Io_stream_zip_mgr text_zip_mgr; private byte text_zip_tid;
|
||||
private Xop_redirect_mgr redirect_mgr; private Xob_redirect_tbl redirect_tbl; private boolean redirect_id_enabled;
|
||||
private DateAdp modified_latest = DateAdp_.MinValue; private int page_count_all, page_count_main = 0; private int commit_interval = 100000; // 100 k
|
||||
private Dg_match_mgr dg_match_mgr; private Xob_ns_to_db_mgr ns_to_db_mgr;
|
||||
public Xob_page_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
|
||||
public String Page_wkr__key() {return Xob_cmd_keys.Key_text_page;}
|
||||
public void Page_wkr__bgn() {
|
||||
Xoae_app app = wiki.Appe();
|
||||
this.redirect_mgr = wiki.Redirect_mgr();
|
||||
this.db_mgr = wiki.Db_mgr_as_sql().Core_data_mgr();
|
||||
this.page_core_tbl = db_mgr.Tbl__page();
|
||||
this.text_zip_mgr = wiki.Utl__zip_mgr();
|
||||
this.text_zip_tid = Xobldr_cfg.Zip_mode__text(app);
|
||||
|
||||
// NOTE: rebuild needed to add canonical namespaces as templates; else, redirects to English namespaces won't work in non-English wikis; EX: gu.w and #REDIRECT [[Template:COLON]]; DATE:2017-02-20
|
||||
Xow_ns_mgr_.rebuild_(wiki.Lang(), wiki.Ns_mgr());
|
||||
|
||||
this.ns_to_db_mgr = new Xob_ns_to_db_mgr(new Xob_ns_to_db_wkr__text(), db_mgr, Xobldr_cfg.Max_size__text(app));
|
||||
this.dg_match_mgr = Dg_match_mgr.New_mgr(app, wiki);
|
||||
if (dg_match_mgr != null) redirect_id_enabled = true; // always enable redirect_id if dg_match_mgr enabled; DATE:2016-01-04
|
||||
if (redirect_id_enabled) {
|
||||
this.redirect_tbl = new Xob_redirect_tbl(wiki.Fsys_mgr().Root_dir(), gplx.langs.htmls.encoders.Gfo_url_encoder_.Http_url_ttl).Create_table();
|
||||
redirect_tbl.Conn().Txn_bgn("bldr__page__redirect");
|
||||
}
|
||||
app.Bldr().Dump_parser().Trie_tab_del_(); // disable swapping 	 for \t
|
||||
byte[] ns_file_map = Xobldr_cfg.New_ns_file_map(app, wiki.Import_cfg().Src_rdr_len());
|
||||
Xob_ns_file_itm.Init_ns_bldr_data(Xow_db_file_.Tid__text, wiki.Ns_mgr(), ns_file_map);
|
||||
if (idx_mode.Tid_is_bgn()) page_core_tbl.Create_idx();
|
||||
page_core_tbl.Insert_bgn();
|
||||
usr_dlg.Prog_many("", "", "import.page.bgn");
|
||||
}
|
||||
public void Page_wkr__run(Xowd_page_itm page) {
|
||||
int id = page.Id();
|
||||
DateAdp modified = page.Modified_on(); if (modified.compareTo(modified_latest) == CompareAble_.More) modified_latest = modified;
|
||||
byte[] text_raw = page.Text(); int text_raw_len = page.Text_len();
|
||||
Xoa_ttl redirect_ttl = redirect_mgr.Extract_redirect(text_raw, text_raw_len); boolean redirect = redirect_ttl != null;
|
||||
page.Redirected_(redirect);
|
||||
Xow_ns ns = page.Ns();
|
||||
int random_int = ns.Count() + 1; ns.Count_(random_int);
|
||||
if (dg_match_mgr != null) {
|
||||
if (dg_match_mgr.Match(1, id, ns.Id(), page.Ttl_page_db(), page.Ttl_full_db(), wiki.Lang(), text_raw)) return;
|
||||
}
|
||||
byte[] text_zip = text_zip_mgr.Zip(text_zip_tid, text_raw);
|
||||
Xow_db_file text_db = ns_to_db_mgr.Get_by_ns(ns.Bldr_data(), text_zip.length);
|
||||
try {db_mgr.Create_page(page_core_tbl, text_db.Tbl__text(), id, page.Ns_id(), page.Ttl_page_db(), redirect, modified, text_zip, text_raw_len, random_int, text_db.Id(), -1);}
|
||||
catch (Exception e) {
|
||||
throw Err_.new_exc(e, "bldr", "create page in db failed; skipping page", "id", id, "ns", page.Ns_id(), "name", page.Ttl_page_db(), "redirect", redirect, "modified", modified, "text_len", text_raw_len, "text_db_id", text_db.Id());
|
||||
}
|
||||
if (redirect && redirect_id_enabled)
|
||||
redirect_tbl.Insert(id, page.Ttl_page_db(), redirect_ttl);
|
||||
++page_count_all;
|
||||
if (ns.Id_is_main() && !page.Redirected()) ++page_count_main;
|
||||
if (page_count_all % commit_interval == 0) {
|
||||
page_core_tbl.Conn().Txn_sav(); text_db.Conn().Txn_sav();
|
||||
if (redirect_id_enabled) redirect_tbl.Conn().Txn_sav();
|
||||
if (dg_match_mgr != null) dg_match_mgr.Commit();
|
||||
}
|
||||
}
|
||||
public void Page_wkr__run_cleanup() {
|
||||
usr_dlg.Log_many("", "", "import.page: insert done; committing pages; pages=~{0}", page_count_all);
|
||||
ns_to_db_mgr.Rls_all();
|
||||
page_core_tbl.Insert_end();
|
||||
}
|
||||
public void Page_wkr__end() {
|
||||
if (dg_match_mgr != null) dg_match_mgr.Rls();
|
||||
usr_dlg.Log_many("", "", "import.page: updating core stats");
|
||||
Xow_ns_mgr ns_mgr = wiki.Ns_mgr();
|
||||
Xow_db_file db_core = db_mgr.Db__core();
|
||||
db_core.Tbl__site_stats().Update(page_count_main, page_count_all, ns_mgr.Ns_file().Count()); // save page stats
|
||||
db_core.Tbl__ns().Insert(ns_mgr); // save ns
|
||||
if (idx_mode.Tid_is_end()) page_core_tbl.Create_idx();
|
||||
if (redirect_id_enabled) {
|
||||
redirect_tbl.Conn().Txn_end();
|
||||
redirect_tbl.Update_trg_redirect_id(db_core.Url(), 1);
|
||||
redirect_tbl.Update_src_redirect_id(db_core.Url(), page_core_tbl.Conn());
|
||||
}
|
||||
}
|
||||
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_commit_interval_)) commit_interval = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk_idx_mode_)) idx_mode = Db_idx_mode.Xto_itm(m.ReadStr("v"));
|
||||
else if (ctx.Match(k, Invk_redirect_id_enabled_)) redirect_id_enabled = m.ReadYn("v");
|
||||
else return super.Invk(ctx, ikey, k, m);
|
||||
return this;
|
||||
}
|
||||
private static final String Invk_commit_interval_ = "commit_interval_", Invk_idx_mode_ = "idx_mode_", Invk_redirect_id_enabled_ = "redirect_id_enabled_";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,77 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.dbs.*; import gplx.xowa.wikis.data.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xob_page_delete_cmd extends Xob_cmd_base {
|
||||
private final Xow_wiki wiki;
|
||||
public Xob_page_delete_cmd(Xob_bldr bldr, Xow_wiki wiki) {this.wiki = wiki;}
|
||||
@Override public String Cmd_key() {return Xob_cmd_keys.Key_text_delete_page;}
|
||||
@Override public void Cmd_run() {
|
||||
wiki.Init_by_wiki();
|
||||
Xow_db_file core_db = wiki.Data__core_mgr().Db__core();
|
||||
Db_conn core_db_conn = core_db.Conn();
|
||||
Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Instance;
|
||||
usr_dlg.Plog_many("", "", "creating page_filter");
|
||||
if (!core_db_conn.Meta_tbl_exists("page_filter")) {
|
||||
core_db_conn.Meta_tbl_create
|
||||
( Dbmeta_tbl_itm.New("page_filter", new Dbmeta_fld_itm[]
|
||||
{ Dbmeta_fld_itm.new_int("page_id").Primary_y_()
|
||||
, Dbmeta_fld_itm.new_int("page_text_db_id")
|
||||
}
|
||||
, Dbmeta_idx_itm.new_normal_by_tbl("page_filter", "db_id__page", "page_text_db_id", "page_id")
|
||||
, Dbmeta_idx_itm.new_normal_by_tbl("page_filter", "page_id", "page_id")
|
||||
));
|
||||
}
|
||||
core_db_conn.Exec_sql_plog_ntx("finding missing redirects", String_.Concat_lines_nl_skip_last
|
||||
( "INSERT INTO page_filter (page_id, page_text_db_id)"
|
||||
, "SELECT ptr.page_id, ptr.page_text_db_id"
|
||||
, "FROM page ptr"
|
||||
, " LEFT JOIN page orig ON ptr.page_redirect_id = orig.page_id"
|
||||
, "WHERE ptr.page_is_redirect = 1"
|
||||
, "AND orig.page_id IS NULL"
|
||||
, "UNION"
|
||||
, "SELECT ptr.page_id, ptr.page_text_db_id"
|
||||
, "FROM page ptr"
|
||||
, "WHERE ptr.page_is_redirect = 1"
|
||||
, "AND ptr.page_redirect_id = -1"
|
||||
, ";"
|
||||
));
|
||||
|
||||
String db_file_cur = "";
|
||||
try {
|
||||
Xow_db_file[] db_file_ary = core_db.Tbl__db().Select_all(wiki.Data__core_mgr().Props(), wiki.Fsys_mgr().Root_dir());
|
||||
int len = db_file_ary.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
boolean db_file_is_text = Bool_.N, db_file_is_cat = Bool_.N, db_file_is_search = Bool_.N;
|
||||
Xow_db_file db_file = db_file_ary[i];
|
||||
switch (db_file.Tid()) {
|
||||
case Xow_db_file_.Tid__core: case Xow_db_file_.Tid__wiki_solo: case Xow_db_file_.Tid__text_solo:
|
||||
// if mode is lot, then "core" db does not have cat, search; skip; DATE:2016-01-31
|
||||
if (wiki.Data__core_mgr().Props().Layout_text().Tid_is_lot()) continue;
|
||||
db_file_is_cat = db_file_is_search = Bool_.Y; // do not set db_file_is_text to true; DATE:2016-10-18
|
||||
break;
|
||||
case Xow_db_file_.Tid__text: db_file_is_text = Bool_.Y; break;
|
||||
case Xow_db_file_.Tid__cat: db_file_is_cat = Bool_.Y; break;
|
||||
case Xow_db_file_.Tid__search_link: db_file_is_search = Bool_.Y; break; // changed from search_data to search_link; DATE:2016-10-19
|
||||
}
|
||||
db_file_cur = db_file.Url().Raw();
|
||||
int db_id = db_file.Id();
|
||||
if (db_file_is_text) Run_sql(core_db_conn, db_file.Url(), db_id, "deleting text: " + db_id, "DELETE FROM <data_db>text WHERE page_id IN (SELECT page_id FROM page_filter WHERE page_text_db_id = {0});");
|
||||
if (db_file_is_cat) Run_sql(core_db_conn, db_file.Url(), db_id, "deleting cat: " + db_id, "DELETE FROM <data_db>cat_link WHERE cl_from IN (SELECT page_id FROM page_filter);");
|
||||
if (db_file_is_search) Run_sql(core_db_conn, db_file.Url(), db_id, "deleting search:" + db_id, "DELETE FROM <data_db>search_link WHERE page_id IN (SELECT page_id FROM page_filter);");
|
||||
if (db_file_is_text || db_file_is_cat || db_file_is_search)
|
||||
db_file.Conn().Env_vacuum();
|
||||
}
|
||||
} catch (Exception e) {Gfo_usr_dlg_.Instance.Warn_many("", "", "fatal error during page deletion: cur=~{0} err=~{1}", db_file_cur, Err_.Message_gplx_log(e));}
|
||||
core_db_conn.Exec_sql_plog_ntx("deleting from table: page", "DELETE FROM page WHERE page_id IN (SELECT page_id FROM page_filter);");
|
||||
// core_db_conn.Meta_tbl_delete("page_filter");
|
||||
core_db_conn.Env_vacuum();
|
||||
usr_dlg.Plog_many("", "", "");
|
||||
}
|
||||
private void Run_sql(Db_conn core_db_conn, Io_url db_url, int db_id, String prog_msg, String sql) {
|
||||
new Db_attach_mgr(core_db_conn, new Db_attach_itm("data_db", db_url))
|
||||
.Exec_sql_w_msg(prog_msg , sql, db_id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,23 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.sqls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.dbs.cfgs.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.*;
|
||||
import gplx.xowa.wikis.data.*;
|
||||
public class Xob_term_cmd extends Xob_term_base {
|
||||
public Xob_term_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Ctor(bldr, wiki); this.wiki = wiki;} private Xowe_wiki wiki;
|
||||
@Override public String Cmd_key() {return KEY;} public static final String KEY = "text.term";
|
||||
@Override public void Cmd_end_hook() {
|
||||
// delete wiki's temp dir
|
||||
Io_mgr.Instance.DeleteDirDeep(wiki.Fsys_mgr().Tmp_dir());
|
||||
|
||||
// build fsdb
|
||||
gplx.fsdb.Fsdb_db_mgr__v2_bldr.Get_or_make(wiki, false);// always build file.user db; DATE:2015-05-12
|
||||
|
||||
// dansguardian
|
||||
if (wiki.App().Cfg().Get_bool_wiki_or(wiki, gplx.xowa.bldrs.filters.dansguardians.Dg_match_mgr.Cfg__enabled, false))
|
||||
new Xob_page_delete_cmd(wiki.Appe().Bldr(), wiki).Cmd_run();
|
||||
|
||||
wiki.Data__core_mgr().Rls();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,47 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.core.ios.*;
|
||||
import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.tdbs.xdats.*;
|
||||
public class Io_sort_cmd_ns implements Io_make_cmd {
|
||||
Xob_xdat_file_wtr fil_wtr; Bry_bfr reg_bfr = Bry_bfr_.New(), key_bfr_0 = Bry_bfr_.New_w_size(512), key_bfr_n = Bry_bfr_.New_w_size(512);
|
||||
int fil_count = 0, itm_count = 0;
|
||||
public Io_sort_cmd_ns(Gfo_usr_dlg usr_dlg) {this.usr_dlg = usr_dlg;} Gfo_usr_dlg usr_dlg;
|
||||
public int Trg_fil_max() {return trg_fil_max;} public Io_sort_cmd_ns Trg_fil_max_(int v) {trg_fil_max = v; return this;} private int trg_fil_max = 65 * Io_mgr.Len_kb;
|
||||
Io_url reg_url;
|
||||
public Io_sort_cmd Make_dir_(Io_url v) {make_dir = v; return this;} Io_url make_dir;
|
||||
public void Sort_bgn() {
|
||||
fil_count = itm_count = 0;
|
||||
fil_wtr = Xob_xdat_file_wtr.new_file_(trg_fil_max, make_dir);
|
||||
reg_url = make_dir.GenSubFil(Xotdb_dir_info_.Name_reg_fil);
|
||||
}
|
||||
public void Sort_do(Io_line_rdr rdr) {
|
||||
int itm_bgn = rdr.Itm_pos_bgn(), itm_end = rdr.Itm_pos_end(), key_bgn = rdr.Key_pos_bgn(), key_end = rdr.Key_pos_end();
|
||||
int itm_len = itm_end - itm_bgn;
|
||||
if (fil_wtr.FlushNeeded(itm_len)) Flush();
|
||||
byte[] bfr = rdr.Bfr();
|
||||
if (key_bfr_0.Len() == 0) {key_bfr_0.Add_mid(bfr, key_bgn, key_end);}
|
||||
key_bfr_n.Clear().Add_mid(bfr, key_bgn, key_end);
|
||||
fil_wtr.Bfr().Add_mid(rdr.Bfr(), itm_bgn, itm_end);
|
||||
fil_wtr.Add_idx(Byte_ascii.Null);
|
||||
++itm_count;
|
||||
}
|
||||
public void Sort_end() {
|
||||
Flush();
|
||||
Io_mgr.Instance.AppendFilBfr(reg_url, reg_bfr);
|
||||
//fil_wtr.Rls(); reg_bfr.Rls(); key_bfr_0.Rls(); key_bfr_n.Rls();
|
||||
}
|
||||
private void Flush() {
|
||||
reg_bfr
|
||||
.Add_int_variable(fil_count++).Add_byte(Byte_ascii.Pipe)
|
||||
.Add_bfr_and_preserve(key_bfr_0).Add_byte(Byte_ascii.Pipe)
|
||||
.Add_bfr_and_preserve(key_bfr_n).Add_byte(Byte_ascii.Pipe)
|
||||
.Add_int_variable(itm_count).Add_byte(Byte_ascii.Nl);
|
||||
itm_count = 0;
|
||||
key_bfr_0.Clear();
|
||||
if (fil_wtr.Fil_idx() % 10 == 0)
|
||||
usr_dlg.Prog_many("cmd_ns", "prog", "saving: ~{0} ~{1}", reg_url.OwnerDir().OwnerDir().NameOnly(), fil_wtr.Fil_url().NameOnly());
|
||||
fil_wtr.Flush(usr_dlg);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,93 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.core.primitives.*; import gplx.core.ios.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.wtrs.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
public abstract class Srch_bldr_wkr_base extends Xob_itm_dump_base implements Xob_page_wkr {
|
||||
private final Ordered_hash list = Ordered_hash_.New(); private Xol_lang_itm lang;
|
||||
public abstract String Page_wkr__key();
|
||||
public void Page_wkr__bgn() {
|
||||
make_dir = wiki.Tdb_fsys_mgr().Ns_dir();
|
||||
this.Init_dump(this.Page_wkr__key(), make_dir);
|
||||
lang = wiki.Lang(); // wiki.Appe().Lang_mgr().Lang_en(); // NOTE: was .Lang_en which is wrong (should match lang of wiki); DATE:2013-05-11
|
||||
tmp_wtr_mgr = new Xob_tmp_wtr_mgr(new Xob_tmp_wtr_wkr__ttl(temp_dir, dump_fil_len));
|
||||
if (wiki.Db_mgr().Tid() == Xodb_mgr_sql.Tid_sql) // if sqlite, hard-code to ns_main; aggregates all ns into one
|
||||
ns_main = wiki.Ns_mgr().Ns_main();
|
||||
} private Xob_tmp_wtr_mgr tmp_wtr_mgr; private Xow_ns ns_main;
|
||||
public void Page_wkr__run(Xowd_page_itm page) {
|
||||
// if (page.Ns_id() != Xow_ns_.Tid__main) return; // limit to main ns for now
|
||||
try {
|
||||
byte[] ttl = page.Ttl_page_db();
|
||||
byte[][] words = Split_ttl_into_words(lang, list, dump_bfr, ttl);
|
||||
Xob_tmp_wtr wtr = tmp_wtr_mgr.Get_or_new(ns_main == null ? page.Ns() : ns_main);
|
||||
int words_len = words.length;
|
||||
int row_len = 0;
|
||||
for (int i = 0; i < words_len; i++) {
|
||||
byte[] word = words[i];
|
||||
row_len += word.length + 13; // 13=5(id) + 5(page_len) + 3(dlms)
|
||||
}
|
||||
if (wtr.FlushNeeded(row_len)) wtr.Flush(bldr.Usr_dlg());
|
||||
for (int i = 0; i < words_len; i++) {
|
||||
byte[] word = words[i];
|
||||
wtr.Bfr() .Add(word) .Add_byte(Byte_ascii.Pipe)
|
||||
.Add_base85_len_5(page.Id()) .Add_byte(Byte_ascii.Semic)
|
||||
.Add_base85_len_5(page.Text().length) .Add_byte(Byte_ascii.Nl);
|
||||
}
|
||||
} catch (Exception e) {bldr.Usr_dlg().Warn_many("", "", "search_index:fatal error: err=~{0}", Err_.Message_gplx_full(e));} // never let single page crash entire import
|
||||
}
|
||||
public void Page_wkr__run_cleanup() {}
|
||||
public void Page_wkr__end() {
|
||||
tmp_wtr_mgr.Flush_all(bldr.Usr_dlg());
|
||||
dump_bfr.ClearAndReset();
|
||||
Xobdc_merger.Ns(bldr.Usr_dlg(), tmp_wtr_mgr.Regy(), Xotdb_dir_info_.Name_search_ttl, temp_dir, make_dir, sort_mem_len, Io_line_rdr_key_gen_.first_pipe, this.Make_cmd_site());
|
||||
tmp_wtr_mgr.Rls_all();
|
||||
if (delete_temp) Io_mgr.Instance.DeleteDirDeep(temp_dir);
|
||||
}
|
||||
public abstract Io_make_cmd Make_cmd_site();
|
||||
public static byte[][] Split_ttl_into_words(Xol_lang_itm lang, Ordered_hash list, Bry_bfr bfr, byte[] ttl) {
|
||||
if (lang != null) // null lang passed in by searcher
|
||||
ttl = lang.Case_mgr().Case_build_lower(ttl);
|
||||
int ttl_len = ttl.length; Bry_obj_ref word_ref = Bry_obj_ref.New(Bry_.Empty);
|
||||
int i = 0; boolean word_done = false;
|
||||
while (true) {
|
||||
if (word_done || i == ttl_len) {
|
||||
if (bfr.Len() > 0) {
|
||||
byte[] word = bfr.To_bry_and_clear();
|
||||
word_ref.Val_(word);
|
||||
if (!list.Has(word_ref)) list.Add(word_ref, word); // don't add same word twice; EX: Title of "Can Can" should only have "Can" in index
|
||||
}
|
||||
if (i == ttl_len) break;
|
||||
word_done = false;
|
||||
}
|
||||
byte b = ttl[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Underline: // underline is word-breaking; EX: A_B -> A, B
|
||||
case Byte_ascii.Space: // should not occur, but just in case (only underscores)
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: // should not occur in titles, but just in case
|
||||
|
||||
case Byte_ascii.Dash: // treat hypenated words separately
|
||||
case Byte_ascii.Dot: // treat abbreviations as separate words; EX: A.B.C.
|
||||
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent:
|
||||
case Byte_ascii.Amp: case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star:
|
||||
case Byte_ascii.Comma: case Byte_ascii.Slash:
|
||||
case Byte_ascii.Colon: case Byte_ascii.Semic: case Byte_ascii.Gt:
|
||||
case Byte_ascii.Question: case Byte_ascii.At: case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end:
|
||||
case Byte_ascii.Pow: case Byte_ascii.Tick:
|
||||
case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde:
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos: // FUTURE: apos will split "Earth's" to Earth and s; should remove latter
|
||||
++i;
|
||||
word_done = true;
|
||||
break;
|
||||
default:
|
||||
bfr.Add_byte(b);
|
||||
++i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
byte[][] rv = (byte[][])list.To_ary(byte[].class);
|
||||
list.Clear(); list.Resize_bounds(16);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,101 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.wikis.data.site_stats.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.tdbs.hives.*; import gplx.xowa.wikis.tdbs.xdats.*;
|
||||
public class Xob_calc_stats_cmd extends Xob_itm_basic_base implements Xob_cmd {
|
||||
public Xob_calc_stats_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
|
||||
public String Cmd_key() {return Xob_cmd_keys.Key_tdb_calc_stats;}
|
||||
public void Cmd_init(Xob_bldr bldr) {}
|
||||
public void Cmd_bgn(Xob_bldr bldr) {}
|
||||
public void Cmd_run() {Exec();}
|
||||
public void Cmd_end() {}
|
||||
public void Cmd_term() {}
|
||||
private void Exec() {
|
||||
int ns_len = wiki.Ns_mgr().Ords_len();
|
||||
int total = 0;
|
||||
for (int i = 0; i < ns_len; i++) {
|
||||
Xow_ns ns = wiki.Ns_mgr().Ords_ary()[i];
|
||||
int ns_count = Calc_counts(ns);
|
||||
ns.Count_(ns_count);
|
||||
total += ns_count;
|
||||
}
|
||||
int count_main = Calc_count_articles(wiki.Ns_mgr().Ns_main());
|
||||
int count_file = Calc_count_articles(wiki.Ns_mgr().Ns_file());
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
Gen_call(Bool_.Y, bfr, Xowe_wiki.Invk_stats);
|
||||
Gen_call(Bool_.N, bfr, Xowd_site_stats_mgr.Invk_number_of_articles_, count_main);
|
||||
Gen_call(Bool_.N, bfr, Xowd_site_stats_mgr.Invk_number_of_files_, count_file);
|
||||
Gen_call(Bool_.N, bfr, Xowd_site_stats_mgr.Invk_number_of_pages_, total);
|
||||
for (int i = 0; i < ns_len; i++) {
|
||||
Xow_ns ns = wiki.Ns_mgr().Ords_ary()[i];
|
||||
if (ns.Id() < 0) continue;
|
||||
bfr.Add_byte_nl();
|
||||
Gen_call(Bool_.N, bfr, Xowd_site_stats_mgr.Invk_number_of_articles_in_ns_, ns.Num_str(), Int_.To_str_pad_bgn_zero(ns.Count(), 10));
|
||||
}
|
||||
bfr.Add_byte_nl().Add_byte(Byte_ascii.Semic).Add_byte_nl();
|
||||
Io_url wiki_gfs = Wiki_gfs_url(wiki);
|
||||
Io_mgr.Instance.SaveFilBfr(wiki_gfs, bfr);
|
||||
}
|
||||
private void Gen_call(boolean first, Bry_bfr bfr, String key, Object... vals) {
|
||||
if (!first) bfr.Add_byte(Byte_ascii.Dot);
|
||||
bfr.Add_str_u8(key);
|
||||
int len = vals.length;
|
||||
if (len > 0) {
|
||||
bfr.Add_byte(Byte_ascii.Paren_bgn);
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (i != 0) bfr.Add_byte(Byte_ascii.Comma).Add_byte(Byte_ascii.Space);
|
||||
Object val = vals[i];
|
||||
bfr.Add_str_u8(Object_.Xto_str_strict_or_null_mark(val));
|
||||
}
|
||||
bfr.Add_byte(Byte_ascii.Paren_end);
|
||||
}
|
||||
}
|
||||
int Calc_counts(Xow_ns ns) {
|
||||
Io_url reg_url = wiki.Tdb_fsys_mgr().Url_ns_reg(ns.Num_str(), Xotdb_dir_info_.Tid_ttl);
|
||||
Xowd_regy_mgr reg_mgr = new Xowd_regy_mgr(reg_url);
|
||||
int files_ary_len = reg_mgr.Files_ary().length;
|
||||
int count = 0;
|
||||
for (int i = 0; i < files_ary_len; i++) {
|
||||
count += reg_mgr.Files_ary()[i].Count();
|
||||
}
|
||||
return count;
|
||||
}
|
||||
int Calc_count_articles(Xow_ns ns) {
|
||||
Io_url hive_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest(Xotdb_dir_info_.Name_ns, ns.Num_str(), Xotdb_dir_info_.Name_title);
|
||||
return Calc_count_articles_dir(ns, hive_dir);
|
||||
}
|
||||
int Calc_count_articles_dir(Xow_ns ns, Io_url dir) {
|
||||
Io_url[] subs = Io_mgr.Instance.QueryDir_args(dir).DirInclude_().ExecAsUrlAry();
|
||||
int count = 0;
|
||||
int subs_len = subs.length;
|
||||
bldr.Usr_dlg().Prog_one(GRP_KEY, "count", "calculating: ~{0}", dir.Raw());
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
Io_url sub = subs[i];
|
||||
if (sub.Type_dir())
|
||||
count += Calc_count_articles_dir(ns, sub);
|
||||
else
|
||||
count += Calc_count_articles_fil(ns, sub);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
int Calc_count_articles_fil(Xow_ns ns, Io_url fil) {
|
||||
if (String_.Eq(fil.NameAndExt(), Xotdb_dir_info_.Name_reg_fil)) return 0;
|
||||
int rv = 0;
|
||||
byte[] bry = Io_mgr.Instance.LoadFilBry(fil);
|
||||
Xob_xdat_file xdat_file = new Xob_xdat_file().Parse(bry, bry.length, fil);
|
||||
Xowd_page_itm page = Xowd_page_itm.new_tmp();
|
||||
int count = xdat_file.Count();
|
||||
for (int i = 0; i < count; i++) {
|
||||
byte[] ttl_bry = xdat_file.Get_bry(i);
|
||||
Xotdb_page_itm_.Txt_ttl_load(page, ttl_bry);
|
||||
rv += page.Redirected() ? 0 : 1;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
static final String GRP_KEY = "xowa.bldr.calc_stats";
|
||||
public static Io_url Wiki_gfs_url(Xowe_wiki wiki) {return wiki.Fsys_mgr().Root_dir().GenSubFil_nest("cfg", "wiki_stats.gfs");}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,29 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import org.junit.*; import gplx.xowa.htmls.portal.*; import gplx.xowa.wikis.xwikis.*;
|
||||
public class Xob_init_base_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xob_init_base_fxt fxt = new Xob_init_base_fxt();
|
||||
@Test public void Dirty_wiki_itms() {
|
||||
Xoae_app app = fxt.App(); Xowe_wiki wiki = fxt.Wiki();
|
||||
Xoa_available_wikis_mgr wikis_list = fxt.App().Gui_mgr().Html_mgr().Portal_mgr().Wikis();
|
||||
Tfds.Eq("", wikis_list.Itms_as_html()); // assert
|
||||
Xow_xwiki_itm xwiki_itm = app.Usere().Wiki().Xwiki_mgr().Add_by_atrs("en.wikipedia.org", "en.wikipedia.org");
|
||||
xwiki_itm.Offline_(Bool_.Y); // simulate add via Available_from_fsys; DATE:2014-09-21
|
||||
Tfds.Eq("", wikis_list.Itms_as_html()); // still empty
|
||||
new Xob_init_tdb(app.Bldr(), wiki).Cmd_end(); // mock "init" task
|
||||
Tfds.Eq("\n <li><a href=\"/site/en.wikipedia.org/\" class='xowa-hover-off'>en.wikipedia.org</a></li>", wikis_list.Itms_as_html()); // no longer empty
|
||||
}
|
||||
}
|
||||
class Xob_init_base_fxt {
|
||||
public void Clear() {
|
||||
if (app == null) {
|
||||
app = Xoa_app_fxt.Make__app__edit();
|
||||
wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
}
|
||||
Io_mgr.Instance.InitEngine_mem();
|
||||
}
|
||||
public Xoae_app App() {return app;} private Xoae_app app;
|
||||
public Xowe_wiki Wiki() {return wiki;} private Xowe_wiki wiki;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,14 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.xowa.xtns.wbases.imports.*;
|
||||
public class Xob_init_tdb extends Xob_init_base {
|
||||
public Xob_init_tdb(Xob_bldr bldr, Xowe_wiki wiki) {this.Ctor(bldr, wiki);}
|
||||
@Override public String Cmd_key() {return Xob_cmd_keys.Key_tdb_text_init;}
|
||||
@Override public void Cmd_ini_wdata(Xob_bldr bldr, Xowe_wiki wiki) {
|
||||
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_tdb_text_wdata_qid);
|
||||
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_tdb_text_wdata_pid);
|
||||
}
|
||||
@Override public void Cmd_run_end(Xowe_wiki wiki) {}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,124 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.core.ios.*; import gplx.core.ios.streams.*;
|
||||
import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.tdbs.xdats.*;
|
||||
public class Xob_make_cmd_site implements Io_make_cmd {
|
||||
Xob_xdat_file_wtr fil_wtr; Bry_bfr cur_bfr = Bry_bfr_.New(), reg_bfr = Bry_bfr_.New(), reg_key_0 = Bry_bfr_.New_w_size(512), reg_key_n = Bry_bfr_.New_w_size(512);
|
||||
int make_fil_max = 65 * Io_mgr.Len_kb, fil_count = 0, itm_count = 0, itm_key_end = 0; Io_url reg_url;
|
||||
public Xob_make_cmd_site(Gfo_usr_dlg usr_dlg, Io_url make_dir, int make_fil_max) {this.usr_dlg = usr_dlg; this.make_dir = make_dir; this.make_fil_max = make_fil_max;} Gfo_usr_dlg usr_dlg;
|
||||
public Io_sort_cmd Make_dir_(Io_url v) {make_dir = v; return this;} Io_url make_dir;
|
||||
public byte Line_dlm() {return line_dlm;} public Xob_make_cmd_site Line_dlm_(byte v) {line_dlm = v; return this;} private byte line_dlm = Byte_ascii.Null;
|
||||
public void Sort_bgn() {
|
||||
fil_count = itm_count = itm_key_end = 0;
|
||||
reg_url = make_dir.GenSubFil(Xotdb_dir_info_.Name_reg_fil);
|
||||
fil_wtr = Xob_xdat_file_wtr.new_file_(make_fil_max, make_dir);
|
||||
}
|
||||
public void Sort_do(Io_line_rdr rdr) {
|
||||
if (line_dlm == Byte_ascii.Null) line_dlm = rdr.Line_dlm();
|
||||
int rdr_key_bgn = rdr.Key_pos_bgn(), rdr_key_end = rdr.Key_pos_end();
|
||||
int rdr_key_len = rdr_key_end - rdr_key_bgn;
|
||||
int rdr_val_bgn = rdr_key_end, /* NOTE: no +1: want to include fld_dlm for below*/ rdr_val_end = rdr.Itm_pos_end() - 1; // -1: ignore rdr_dlm
|
||||
if (Bry_.Match(cur_bfr.Bfr(), 0, itm_key_end, rdr.Bfr(), rdr_key_bgn, rdr_key_end)) // key is same; add rest of line as val
|
||||
cur_bfr.Add_mid(rdr.Bfr(), rdr_val_bgn, rdr_val_end);
|
||||
else {
|
||||
if (fil_wtr.FlushNeeded(cur_bfr.Len() + rdr_key_len)) Flush();
|
||||
byte[] bfr = rdr.Bfr();
|
||||
if (reg_key_0.Len() == 0) {
|
||||
if (cur_bfr.Len() == 0)
|
||||
reg_key_0.Add_mid(bfr, rdr_key_bgn, rdr_key_end);
|
||||
else
|
||||
reg_key_0.Add_mid(cur_bfr.Bfr(), 0, itm_key_end);
|
||||
}
|
||||
if (cur_bfr.Len() > 0) {
|
||||
reg_key_n.Clear().Add_mid(cur_bfr.Bfr(), 0, itm_key_end);
|
||||
fil_wtr.Bfr().Add_bfr_and_clear(cur_bfr);
|
||||
fil_wtr.Add_idx(line_dlm);
|
||||
}
|
||||
cur_bfr.Add_mid(rdr.Bfr(), rdr.Itm_pos_bgn(), rdr.Itm_pos_end() - 1); // -1 to ignore closing newline
|
||||
itm_key_end = rdr_key_len; // NOTE: must be set last
|
||||
++itm_count;
|
||||
}
|
||||
}
|
||||
public void Do_bry(byte[] bry, int key_bgn, int key_end, int itm_bgn, int itm_end) {
|
||||
int val_bgn = key_end, /* NOTE: no +1: want to include fld_dlm for below*/ val_end = itm_end - 1; // -1: ignore rdr_dlm
|
||||
if (Bry_.Match(cur_bfr.Bfr(), 0, itm_key_end, bry, key_bgn, key_end)) // key is same; add rest of line as val
|
||||
cur_bfr.Add_mid(bry, val_bgn, val_end);
|
||||
else { // key changed;
|
||||
int itm_len = itm_end - itm_bgn;
|
||||
if (cur_bfr.Len() > 0) { // pending itm
|
||||
fil_wtr.Bfr().Add_bfr_and_clear(cur_bfr); // add cur_bfr to fil_bfr
|
||||
fil_wtr.Add_idx(line_dlm); // add cur_itm to hdr
|
||||
if (fil_wtr.FlushNeeded(cur_bfr.Len() + itm_len))
|
||||
Flush();
|
||||
}
|
||||
if (reg_key_0.Len() == 0) // regy.key_0 bfr is empty
|
||||
reg_key_0.Add_mid(bry, key_bgn, key_end); // update reg_0key_0
|
||||
reg_key_n.Clear().Add_mid(bry, key_bgn, key_end); // always update reg_key_n
|
||||
if (itm_len > 100 * Io_mgr.Len_mb)
|
||||
Flush_large(bry, itm_bgn, itm_end, itm_len);
|
||||
else {
|
||||
cur_bfr.Add_mid(bry, itm_bgn, itm_end - 1); // add incoming itm; -1 to ignore closing newline
|
||||
itm_key_end = key_end; // NOTE: must be set last
|
||||
++itm_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Sort_end() {
|
||||
reg_key_n.Clear().Add_mid(cur_bfr.Bfr(), 0, itm_key_end);
|
||||
fil_wtr.Bfr().Add_bfr_and_clear(cur_bfr);
|
||||
fil_wtr.Add_idx(line_dlm);
|
||||
Flush();
|
||||
Io_mgr.Instance.AppendFilBfr(reg_url, reg_bfr);
|
||||
//fil_wtr.Rls(); cur_bfr.Rls(); fil_wtr.Rls(); reg_bfr.Rls(); reg_key_0.Rls(); reg_key_n.Rls();
|
||||
}
|
||||
// private void Flush_large(byte[] bry, int itm_bgn, int itm_end, int itm_len) {
|
||||
// ++itm_count;
|
||||
// this.Flush_reg();
|
||||
// fil_wtr.Add_idx_direct(itm_len, Byte_.Zero);
|
||||
// IoStream stream = IoStream_.Null;
|
||||
// try {
|
||||
// stream = Io_mgr.Instance.OpenStreamWrite(fil_wtr.Fil_url());
|
||||
// fil_wtr.FlushIdx(stream);
|
||||
// stream.Write_and_flush(bry, itm_bgn, itm_end);
|
||||
// fil_wtr.Clear();
|
||||
// fil_wtr.Url_gen_add();
|
||||
// }
|
||||
// finally {stream.Rls();}
|
||||
// }
|
||||
private void Flush_large(byte[] bry, int itm_bgn, int itm_end, int itm_len) {
|
||||
++itm_count;
|
||||
this.Flush_reg();
|
||||
fil_wtr.Add_idx_direct(itm_len, Byte_.Zero);
|
||||
Io_stream_wtr wtr = null;
|
||||
try {
|
||||
wtr = Io_stream_wtr_.New__raw(fil_wtr.Fil_url());
|
||||
wtr.Open();
|
||||
fil_wtr.FlushIdx(wtr);
|
||||
wtr.Write(bry, itm_bgn, itm_end);
|
||||
wtr.Flush();
|
||||
fil_wtr.Clear();
|
||||
fil_wtr.Url_gen_add();
|
||||
}
|
||||
finally {if (wtr != null) wtr.Rls();}
|
||||
}
|
||||
private void Flush() {
|
||||
Flush_reg();
|
||||
Flush_fil();
|
||||
}
|
||||
private void Flush_reg() {
|
||||
reg_bfr
|
||||
.Add_int_variable(fil_count++).Add_byte(Byte_ascii.Pipe)
|
||||
.Add_bfr_and_preserve(reg_key_0).Add_byte(Byte_ascii.Pipe)
|
||||
.Add_bfr_and_preserve(reg_key_n).Add_byte(Byte_ascii.Pipe)
|
||||
.Add_int_variable(itm_count).Add_byte(Byte_ascii.Nl);
|
||||
itm_count = 0;
|
||||
reg_key_0.Clear();
|
||||
}
|
||||
private void Flush_fil() {
|
||||
if (fil_wtr.Fil_idx() % 10 == 0)
|
||||
usr_dlg.Prog_many("cmd_site", "prog", "saving: ~{0} ~{1}", reg_url.OwnerDir().NameOnly(), fil_wtr.Fil_url().NameOnly());
|
||||
fil_wtr.Flush(usr_dlg);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,24 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.core.ios.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xob_make_id_wkr extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk {
|
||||
public Xob_make_id_wkr(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
|
||||
public String Page_wkr__key() {return KEY;} public static final String KEY = "core.make_id";
|
||||
public void Page_wkr__bgn() {
|
||||
this.Init_dump(KEY, wiki.Tdb_fsys_mgr().Site_dir().GenSubDir(Xotdb_dir_info_.Name_id));
|
||||
}
|
||||
public void Page_wkr__run(Xowd_page_itm page) {
|
||||
byte[] ttl = page.Ttl_page_db();
|
||||
if (dump_bfr.Len() + row_fixed_len + ttl.length > dump_fil_len) Io_mgr.Instance.AppendFilBfr(dump_url_gen.Nxt_url(), dump_bfr);
|
||||
Xotdb_page_itm_.Txt_id_save(dump_bfr, page);
|
||||
}
|
||||
public void Page_wkr__run_cleanup() {}
|
||||
public void Page_wkr__end() {
|
||||
this.Term_dump(new Xob_make_cmd_site(bldr.Usr_dlg(), make_dir, make_fil_len));
|
||||
if (delete_temp) Io_mgr.Instance.DeleteDirDeep(temp_dir);
|
||||
}
|
||||
static final int row_fixed_len = 25 + 1 + 7; // 25=5 base_85 flds; 1=Redirect; 7=dlm
|
||||
}
|
||||
|
||||
@@ -13,3 +13,22 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.core.ios.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xob_parse_dump_templates_cmd extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk {
|
||||
public Xob_parse_dump_templates_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
|
||||
public String Page_wkr__key() {return KEY;} public static final String KEY = "parse.dump_templates";
|
||||
public static final int FixedLen_page = 1 + 5 + 1 + 5 + 1 + 1 + 1; // \tid|date|title|text\n
|
||||
public void Page_wkr__bgn() {
|
||||
Init_dump(KEY);
|
||||
}
|
||||
public void Page_wkr__run(Xowd_page_itm page) {
|
||||
if (page.Ns_id() != Xow_ns_.Tid__template) return;
|
||||
int id = page.Id(); byte[] title = page.Ttl_page_db(), text = page.Text(); int title_len = title.length, text_len = text.length;
|
||||
if (FixedLen_page + title_len + text_len + dump_bfr.Len() > dump_fil_len) super.Flush_dump();
|
||||
Xotdb_page_itm_.Txt_page_save(dump_bfr, id, page.Modified_on(), title, text, true);
|
||||
}
|
||||
public void Page_wkr__run_cleanup() {}
|
||||
public void Page_wkr__end() {super.Flush_dump();}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,35 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.xmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
public class Xob_siteinfo_nde {
|
||||
public Xob_siteinfo_nde(String site_name, String db_name, byte[] main_page, String generator, String case_dflt, Xow_ns_mgr ns_mgr) {
|
||||
this.site_name = site_name;
|
||||
this.db_name = db_name;
|
||||
this.main_page = main_page;
|
||||
this.generator = generator;
|
||||
this.case_dflt = case_dflt;
|
||||
this.ns_mgr = ns_mgr;
|
||||
}
|
||||
public String Site_name() {return site_name;} private final String site_name;
|
||||
public String Db_name() {return db_name;} private final String db_name;
|
||||
public byte[] Main_page() {return main_page;} private final byte[] main_page;
|
||||
public String Generator() {return generator;} private final String generator;
|
||||
public String Case_dflt() {return case_dflt;} private final String case_dflt;
|
||||
public Xow_ns_mgr Ns_mgr() {return ns_mgr;} private final Xow_ns_mgr ns_mgr;
|
||||
public void To_bfr(Bry_bfr bfr) {
|
||||
bfr.Add (main_page).Add_byte_pipe();
|
||||
bfr.Add_str_u8(case_dflt).Add_byte_pipe();
|
||||
bfr.Add_str_u8(site_name).Add_byte_pipe();
|
||||
bfr.Add_str_u8(db_name).Add_byte_pipe();
|
||||
bfr.Add_str_u8(generator).Add_byte_nl();
|
||||
int len = ns_mgr.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xow_ns ns = ns_mgr.Ords_get_at(i);
|
||||
bfr.Add_int_variable(ns.Id()).Add_byte_pipe();
|
||||
bfr.Add_str_u8(Xow_ns_case_.To_str(ns.Case_match())).Add_byte_pipe();
|
||||
bfr.Add(ns.Name_ui()).Add_byte_nl();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,67 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.xmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import gplx.core.ios.*; import gplx.core.ios.streams.*; import gplx.langs.xmls.*; // NOTE: gplx.langs.xmls does not support Android; DATE:2013-01-17
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
public class Xob_siteinfo_parser_ {
|
||||
public static byte[] Extract(Io_stream_rdr src_rdr) {
|
||||
Io_buffer_rdr rdr = Io_buffer_rdr.Null;
|
||||
try {
|
||||
rdr = Io_buffer_rdr.new_(src_rdr, Io_mgr.Len_mb); // ASSUME: siteInfo is fully contained in the 1st MB of the src_xml
|
||||
byte[] src = rdr.Bfr();
|
||||
int bgn = Bry_find_.Find_fwd(src, Bry_siteinfo_bgn, 0) ; if (bgn == Bry_find_.Not_found) throw Err_.new_("Xob_siteinfo_parser_", "could not find <siteinfo>", "src", src);
|
||||
int end = Bry_find_.Move_fwd(src, Bry_siteinfo_end, bgn); if (end == Bry_find_.Not_found) throw Err_.new_("Xob_siteinfo_parser_", "could not find </siteinfo>", "src", src);
|
||||
return Bry_.Mid(src, bgn, end);
|
||||
}
|
||||
finally {rdr.Rls();}
|
||||
}
|
||||
public static void Parse(byte[] siteinfo_bry, Xowe_wiki wiki) {
|
||||
Xob_siteinfo_nde nde = Parse(String_.new_u8(siteinfo_bry), wiki.Ns_mgr());
|
||||
wiki.Props().Bldr_version_(Bry_.new_a7(Xoa_app_.Version));
|
||||
wiki.Props().Main_page_(nde.Main_page());
|
||||
wiki.Props().Siteinfo_mainpage_(nde.Main_page());
|
||||
Bry_bfr bfr = Bry_bfr_.New().Add_str_u8(nde.Site_name()).Add_byte_pipe().Add_str_u8(nde.Generator()).Add_byte_pipe().Add_str_u8(nde.Case_dflt()).Add_byte_pipe();
|
||||
wiki.Props().Siteinfo_misc_(bfr.To_bry_and_clear());
|
||||
}
|
||||
public static Xob_siteinfo_nde Parse(String xdoc_src, Xow_ns_mgr ns_mgr) {
|
||||
XmlDoc xdoc = XmlDoc_.parse(xdoc_src); XmlNde root = xdoc.Root();
|
||||
String site_name = "", db_name = "", generator = "", case_dflt = Xow_ns_case_.Key__1st; byte[] main_page = Xoa_page_.Main_page_bry;
|
||||
int root_len = root.SubNdes().Count();
|
||||
for (int i = 0; i < root_len; ++i) {
|
||||
XmlNde sub_nde = root.SubNdes().Get_at(i); String sub_name = sub_nde.Name();
|
||||
if (String_.Eq(sub_name, "sitename")) site_name = sub_nde.Text_inner();
|
||||
else if (String_.Eq(sub_name, "generator")) generator = sub_nde.Text_inner();
|
||||
else if (String_.Eq(sub_name, "case")) case_dflt = sub_nde.Text_inner();
|
||||
else if (String_.Eq(sub_name, "dbname")) db_name = sub_nde.Text_inner();
|
||||
else if (String_.Eq(sub_name, "base")) main_page = Parse_base(Bry_.new_u8(sub_nde.Text_inner()));
|
||||
else if (String_.Eq(sub_name, "namespaces")) Parse_namespaces(sub_nde, ns_mgr, case_dflt);
|
||||
else if (String_.Eq(sub_name, "#text")) {} // JAVA.XML.#text: ignore unexpected #text nodes
|
||||
}
|
||||
return new Xob_siteinfo_nde(site_name, db_name, main_page, generator, case_dflt, ns_mgr);
|
||||
}
|
||||
private static byte[] Parse_base(byte[] url) {
|
||||
int page_bgn = Bry_find_.Find_fwd(url, gplx.xowa.htmls.hrefs.Xoh_href_.Bry__wiki, 0);
|
||||
if (page_bgn == Bry_find_.Not_found) { // "/wiki/" not found; EX: "http://mywiki/My_main_page"
|
||||
page_bgn = Bry_find_.Find_bwd(url, Byte_ascii.Slash); // ASSUME last segment is page
|
||||
if (page_bgn == Bry_find_.Not_found) throw Err_.new_("Xob_siteinfo_parser_", "could not parse main page url", "url", url);
|
||||
++page_bgn; // add 1 to position after slash
|
||||
}
|
||||
else // "/wiki/" found
|
||||
page_bgn += gplx.xowa.htmls.hrefs.Xoh_href_.Len__wiki; // position bgn after "/wiki/"
|
||||
return Bry_.Mid(url, page_bgn, url.length); // extract everything after "page_bgn"; EX: "http://en.wikipedia.org/wiki/Main_Page" -> "Main_Page"
|
||||
}
|
||||
private static void Parse_namespaces(XmlNde grp_nde, Xow_ns_mgr ns_mgr, String case_dflt) {
|
||||
ns_mgr.Clear(); // NOTE: wipe out any preexisting ns; use siteinfo.xml as definitive list
|
||||
int grp_len = grp_nde.SubNdes().Count();
|
||||
for (int i = 0; i < grp_len; ++i) {
|
||||
XmlNde itm_nde = grp_nde.SubNdes().Get_at(i); if (itm_nde.Atrs().Count() == 0) continue; // JAVA.XML.#text: ignore unexpected #text nodes
|
||||
String ns_id = itm_nde.Atrs().FetchValOr("key", null); if (ns_id == null) throw Err_.new_("Xob_siteinfo_parser_", "missing key for ns", "ns_xml", itm_nde.Text_inner());
|
||||
String case_match = itm_nde.Atrs().FetchValOr("case", case_dflt); // NOTE: some dumps can omit "case"; EX: https://dumps.wikimedia.org/sep11wiki; DATE:2015-11-01
|
||||
String name = itm_nde.Text_inner();
|
||||
ns_mgr.Add_new(Int_.Parse(ns_id), Bry_.new_u8(name), Xow_ns_case_.To_tid(case_match), false);
|
||||
}
|
||||
ns_mgr.Init_w_defaults();
|
||||
}
|
||||
private static final byte[] Bry_siteinfo_bgn = Bry_.new_a7("<siteinfo>"), Bry_siteinfo_end = Bry_.new_a7("</siteinfo>");
|
||||
}
|
||||
|
||||
@@ -13,3 +13,96 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.texts.xmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
|
||||
import org.junit.*; import gplx.xowa.wikis.nss.*;
|
||||
public class Xob_siteinfo_parser__tst {
|
||||
private final Xob_siteinfo_parser__fxt fxt = new Xob_siteinfo_parser__fxt();
|
||||
@Test public void Basic__simplewikt() { // PURPOSE: basic test of siteinfo parse; DATE:2015-11-01
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( " <siteinfo>"
|
||||
, " <sitename>Wiktionary</sitename>"
|
||||
, " <dbname>simplewiktionary</dbname>"
|
||||
, " <super>https://simple.wiktionary.org/wiki/Main_Page</super>"
|
||||
, " <generator>MediaWiki 1.27.0-wmf.3</generator>"
|
||||
, " <case>case-sensitive</case>"
|
||||
, " <namespaces>"
|
||||
, " <namespace key=\"-2\" case=\"case-sensitive\">Media</namespace>"
|
||||
, " <namespace key=\"-1\" case=\"first-letter\">Special</namespace>"
|
||||
, " <namespace key=\"0\" case=\"case-sensitive\" />"
|
||||
, " <namespace key=\"1\" case=\"case-sensitive\">Talk</namespace>"
|
||||
, " <namespace key=\"2\" case=\"first-letter\">User</namespace>"
|
||||
, " <namespace key=\"3\" case=\"first-letter\">User talk</namespace>"
|
||||
, " <namespace key=\"4\" case=\"case-sensitive\">Wiktionary</namespace>"
|
||||
, " <namespace key=\"5\" case=\"case-sensitive\">Wiktionary talk</namespace>"
|
||||
, " <namespace key=\"6\" case=\"case-sensitive\">File</namespace>"
|
||||
, " <namespace key=\"7\" case=\"case-sensitive\">File talk</namespace>"
|
||||
, " <namespace key=\"8\" case=\"first-letter\">MediaWiki</namespace>"
|
||||
, " <namespace key=\"9\" case=\"first-letter\">MediaWiki talk</namespace>"
|
||||
, " <namespace key=\"10\" case=\"case-sensitive\">Template</namespace>"
|
||||
, " <namespace key=\"11\" case=\"case-sensitive\">Template talk</namespace>"
|
||||
, " <namespace key=\"12\" case=\"case-sensitive\">Help</namespace>"
|
||||
, " <namespace key=\"13\" case=\"case-sensitive\">Help talk</namespace>"
|
||||
, " <namespace key=\"14\" case=\"case-sensitive\">Category</namespace>"
|
||||
, " <namespace key=\"15\" case=\"case-sensitive\">Category talk</namespace>"
|
||||
, " <namespace key=\"828\" case=\"case-sensitive\">Module</namespace>"
|
||||
, " <namespace key=\"829\" case=\"case-sensitive\">Module talk</namespace>"
|
||||
, " <namespace key=\"2300\" case=\"case-sensitive\">Gadget</namespace>"
|
||||
, " <namespace key=\"2301\" case=\"case-sensitive\">Gadget talk</namespace>"
|
||||
, " <namespace key=\"2302\" case=\"case-sensitive\">Gadget definition</namespace>"
|
||||
, " <namespace key=\"2303\" case=\"case-sensitive\">Gadget definition talk</namespace>"
|
||||
, " <namespace key=\"2600\" case=\"first-letter\">Topic</namespace>"
|
||||
, " </namespaces>"
|
||||
, " </siteinfo>"
|
||||
), String_.Concat_lines_nl
|
||||
( "Main_Page|case-sensitive|Wiktionary|simplewiktionary|MediaWiki 1.27.0-wmf.3"
|
||||
, "-2|case-sensitive|Media"
|
||||
, "-1|first-letter|Special"
|
||||
, "0|case-sensitive|"
|
||||
, "1|case-sensitive|Talk"
|
||||
, "2|first-letter|User"
|
||||
, "3|first-letter|User talk"
|
||||
, "4|case-sensitive|Wiktionary"
|
||||
, "5|case-sensitive|Wiktionary talk"
|
||||
, "6|case-sensitive|File"
|
||||
, "7|case-sensitive|File talk"
|
||||
, "8|first-letter|MediaWiki"
|
||||
, "9|first-letter|MediaWiki talk"
|
||||
, "10|case-sensitive|Template"
|
||||
, "11|case-sensitive|Template talk"
|
||||
, "12|case-sensitive|Help"
|
||||
, "13|case-sensitive|Help talk"
|
||||
, "14|case-sensitive|Category"
|
||||
, "15|case-sensitive|Category talk"
|
||||
, "828|case-sensitive|Module"
|
||||
, "829|case-sensitive|Module talk"
|
||||
, "2300|case-sensitive|Gadget"
|
||||
, "2301|case-sensitive|Gadget talk"
|
||||
, "2302|case-sensitive|Gadget definition"
|
||||
, "2303|case-sensitive|Gadget definition talk"
|
||||
, "2600|first-letter|Topic"
|
||||
, "2601|first-letter|2601" // NOTE: Topic_talk doesn't exist in <siteinfo>, but added by XOWA b/c every subj ns must have a talk ns
|
||||
));
|
||||
}
|
||||
@Test public void Case_dflt() { // PURPOSE: missing case should use dflt DATE:2015-11-01
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( " <siteinfo>"
|
||||
, " <case>case-sensitive</case>"
|
||||
, " <namespaces>"
|
||||
, " <namespace key=\"-2\">Media</namespace>"
|
||||
, " </namespaces>"
|
||||
, " </siteinfo>"
|
||||
), String_.Concat_lines_nl
|
||||
( "Main_Page|case-sensitive|||"
|
||||
, "-2|case-sensitive|Media"
|
||||
));
|
||||
}
|
||||
}
|
||||
class Xob_siteinfo_parser__fxt {
|
||||
private final Xow_ns_mgr ns_mgr = new Xow_ns_mgr(gplx.xowa.langs.cases.Xol_case_mgr_.U8());
|
||||
private final Bry_bfr bfr = Bry_bfr_.New();
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
Xob_siteinfo_nde nde = Xob_siteinfo_parser_.Parse(src_str, ns_mgr);
|
||||
nde.To_bfr(bfr);
|
||||
Tfds.Eq_str_lines(expd, bfr.To_str_and_clear());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,26 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.gfui.*; import gplx.gfui.kits.core.*;
|
||||
public class Xob_alert_cmd extends Xob_cmd__base implements Xob_cmd {
|
||||
public Xob_alert_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
||||
public Xob_alert_cmd Msg_(String v) {this.msg = v; return this;} private String msg = "no message specified";
|
||||
@Override public void Cmd_run() {
|
||||
Gfui_kit kit = app.Gui_mgr().Kit();
|
||||
if (kit.Tid() != Gfui_kit_.Swt_tid) return;
|
||||
kit.Ask_ok("", "", msg);
|
||||
Xoa_app_.Usr_dlg().Prog_many("", "", msg);
|
||||
}
|
||||
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk__text_)) this.msg = m.ReadStr("v");
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
} private static final String Invk__text_ = "text_";
|
||||
|
||||
public static final String BLDR_CMD_KEY = "ui.alert";
|
||||
@Override public String Cmd_key() {return BLDR_CMD_KEY;}
|
||||
public static final Xob_cmd Prototype = new Xob_alert_cmd(null, null);
|
||||
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xob_alert_cmd(bldr, wiki);}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,119 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.core.criterias.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xob_cleanup_cmd extends Xob_itm_basic_base implements Xob_cmd {
|
||||
private String bz2_cmd;
|
||||
private boolean delete_all, delete_tmp;
|
||||
private Criteria_ioMatch[] delete_by_match_ary;
|
||||
public Xob_cleanup_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
|
||||
public String Cmd_key() {return Xob_cmd_keys.Key_util_cleanup;}
|
||||
public Xob_cleanup_cmd Delete_sqlite3_(boolean v){delete_sqlite3 = v; return this;} private boolean delete_sqlite3;
|
||||
public Xob_cleanup_cmd Delete_xml_(boolean v) {delete_xml = v; return this;} private boolean delete_xml;
|
||||
public Xob_cleanup_cmd Delete_tdb_(boolean v) {delete_tdb = v; return this;} private boolean delete_tdb;
|
||||
public void Bz2_fil_(Io_url v) {bz2_fil = v;} private Io_url bz2_fil;
|
||||
public void Cmd_run() {
|
||||
Io_url wiki_root_dir = wiki.Fsys_mgr().Root_dir();
|
||||
if (bz2_fil != null) {
|
||||
if (String_.Eq(bz2_cmd, "delete"))
|
||||
Io_mgr.Instance.DeleteFil(bz2_fil);
|
||||
else if (String_.Eq(bz2_cmd, "move"))
|
||||
Io_mgr.Instance.MoveFil(bz2_fil, bz2_fil.OwnerDir().OwnerDir().GenSubFil_nest("done", bz2_fil.NameAndExt()));
|
||||
}
|
||||
if (delete_xml) Io_mgr.Instance.DeleteFil(Xob_page_wkr_cmd.Find_fil_by(wiki_root_dir, "*.xml"));
|
||||
if (delete_tdb) {
|
||||
usr_dlg.Note_many("", "", "bldr.wiki:deleting tdb wiki");
|
||||
Delete_tdb(wiki_root_dir);
|
||||
}
|
||||
if (delete_sqlite3)
|
||||
Delete_wiki_sql(wiki);
|
||||
if (delete_all) {
|
||||
Io_mgr.Instance.DeleteDir_cmd(wiki_root_dir).Exec(); // do not delete subdirs; needed to support "/prv" for fsdb; DATE:2015-04-01
|
||||
Io_mgr.Instance.DeleteDirDeep(app.Usere().Fsys_mgr().Wiki_root_dir().GenSubDir(wiki.Domain_str())); // delete css dir; DATE:2015-07-06
|
||||
}
|
||||
if (delete_by_match_ary != null)
|
||||
Delete_by_match(wiki_root_dir, delete_by_match_ary);
|
||||
if (delete_tmp)
|
||||
Io_mgr.Instance.DeleteDirDeep(wiki_root_dir.GenSubDir("tmp"));
|
||||
}
|
||||
public void Cmd_init(Xob_bldr bldr) {}
|
||||
public void Cmd_bgn(Xob_bldr bldr) {}
|
||||
public void Cmd_end() {}
|
||||
public void Cmd_term() {}
|
||||
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_bz2_cmd_)) bz2_cmd = m.ReadStr("v");
|
||||
else if (ctx.Match(k, Invk_delete_xml_)) delete_xml = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_delete_wiki_)) delete_tdb = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_delete_sqlite3_)) delete_sqlite3 = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_delete_all_)) delete_all = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk_bz2_fil_)) bz2_fil = m.ReadIoUrl("v");
|
||||
else if (ctx.Match(k, Invk_delete_by_match_)) delete_by_match_ary = Delete_by_match_parse(m.ReadStr("v"));
|
||||
else if (ctx.Match(k, Invk_delete_tmp_)) delete_tmp = m.ReadYn("v");
|
||||
else return super.Invk(ctx, ikey, k, m);
|
||||
return this;
|
||||
}
|
||||
private static final String Invk_bz2_cmd_ = "bz2_cmd_", Invk_bz2_fil_ = "bz2_fil_"
|
||||
, Invk_delete_xml_ = "delete_xml_", Invk_delete_wiki_ = "delete_wiki_", Invk_delete_sqlite3_ = "delete_sqlite3_"
|
||||
, Invk_delete_all_ = "delete_all_"
|
||||
, Invk_delete_tmp_ = "delete_tmp_"
|
||||
, Invk_delete_by_match_ = "delete_by_match"
|
||||
;
|
||||
private static Criteria_ioMatch[] Delete_by_match_parse(String raw) {
|
||||
String[] match_ary = String_.Split(raw, '|');
|
||||
int match_ary_len = match_ary.length;
|
||||
Criteria_ioMatch[] rv = new Criteria_ioMatch[match_ary_len];
|
||||
for (int i = 0; i < rv.length; i++) {
|
||||
String match = match_ary[i];
|
||||
rv[i] = Criteria_ioMatch.parse(true, match, false);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private static void Delete_by_match(Io_url dir, Criteria_ioMatch[] match_ary) {
|
||||
int match_len = match_ary.length;
|
||||
Io_url[] subs = Io_mgr.Instance.QueryDir_fils(dir);
|
||||
int subs_len = subs.length;
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
Io_url sub = subs[i];
|
||||
for (int j = 0; j < match_len; j++) {
|
||||
Criteria_ioMatch match = match_ary[j];
|
||||
if (match.Matches(sub)) {
|
||||
if (sub.Type_fil())
|
||||
Io_mgr.Instance.DeleteFil(sub);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
private static void Delete_tdb(Io_url wiki_root_dir) {
|
||||
Io_url[] dirs = Io_mgr.Instance.QueryDir_args(wiki_root_dir).DirOnly_().DirInclude_().ExecAsUrlAry();
|
||||
int dirs_len = dirs.length;
|
||||
for (int i = 0; i < dirs_len; i++) {
|
||||
Io_url dir = dirs[i];
|
||||
if (gplx.xowa.wikis.tdbs.Xotdb_dir_info_.Dir_name_is_tdb(dir.NameOnly()))
|
||||
Io_mgr.Instance.DeleteDirDeep(dir);
|
||||
}
|
||||
}
|
||||
public static void Delete_wiki_sql(Xowe_wiki wiki) {
|
||||
Gfo_usr_dlg usr_dlg = wiki.Appe().Usr_dlg(); Io_url wiki_root_dir = wiki.Fsys_mgr().Root_dir();
|
||||
if (wiki.Db_mgr().Tid() == gplx.xowa.wikis.dbs.Xodb_mgr_sql.Tid_sql) // NOTE: must check; if empty dir (or text db) than db_mgr will be txt
|
||||
wiki.Db_mgr_as_sql().Core_data_mgr().Rls(); // NOTE: if sqlite files, must rls;
|
||||
Io_url[] files = Io_mgr.Instance.QueryDir_fils(wiki_root_dir);
|
||||
int files_len = files.length;
|
||||
int deleted = 0;
|
||||
String file_prefix = wiki.Domain_str() + "-file"; // NOTE: skip anything with "-file"; EX: "en.wikipedia.org-file.xowa"
|
||||
String html_prefix = wiki.Domain_str() + "-html"; // NOTE: skip anything with "-html"; EX: "en.wikipedia.org-html-ns.000-db.002.xowa"
|
||||
for (int i = 0; i < files_len; i++) {
|
||||
Io_url url = files[i];
|
||||
if ( !String_.Eq(url.Ext(), ".xowa")
|
||||
&& !String_.Eq(url.Ext(), ".sqlite3"))
|
||||
continue;
|
||||
if ( String_.Has_at_bgn(url.NameAndExt(), file_prefix)
|
||||
|| String_.Has_at_bgn(url.NameAndExt(), html_prefix)
|
||||
) continue; // skip
|
||||
Io_mgr.Instance.DeleteFil(url);
|
||||
deleted++;
|
||||
}
|
||||
usr_dlg.Note_many("", "delete_wiki", "deleting sqlite3 files: ~{0} ~{1}", deleted, wiki_root_dir.Raw());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,31 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.core.brys.fmtrs.*;
|
||||
import gplx.xowa.wikis.domains.*;
|
||||
import gplx.xowa.bldrs.wms.dumps.*;
|
||||
public class Xob_core_batch_utl implements Gfo_invk {
|
||||
private final Xob_bldr bldr;
|
||||
private final Bry_fmtr fmtr = Bry_fmtr.keys_("bz2_fil", "wiki_key");
|
||||
public Xob_core_batch_utl(Xob_bldr bldr, byte[] raw) {this.bldr = bldr; fmtr.Fmt_(raw);}
|
||||
private void Run() {
|
||||
Io_url[] bz2_fils = Io_mgr.Instance.QueryDir_fils(bldr.App().Fsys_mgr().Wiki_dir().GenSubDir_nest(Dir_dump, "todo"));
|
||||
Bry_bfr bfr = Bry_bfr_.Reset(Io_mgr.Len_kb);
|
||||
int bz2_fils_len = bz2_fils.length;
|
||||
for (int i = 0; i < bz2_fils_len; i++) {
|
||||
Io_url bz2_fil_url = bz2_fils[i];
|
||||
Xowm_dump_file dump_file = Xowm_dump_file_.parse(Bry_.new_u8(bz2_fil_url.NameOnly()));
|
||||
String domain_str = dump_file.Domain_itm().Domain_str();
|
||||
fmtr.Bld_bfr_many(bfr, bz2_fil_url.Raw(), domain_str);
|
||||
bldr.Usr_dlg().Note_many("", "", "starting script for ~{0}", domain_str);
|
||||
bldr.App().Gfs_mgr().Run_str(bfr.To_str_and_clear());
|
||||
}
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_owner)) return bldr.Cmd_mgr();
|
||||
else if (ctx.Match(k, Invk_run)) Run();
|
||||
return this;
|
||||
} private static final String Invk_owner = "owner", Invk_run = "run";
|
||||
public static String Dir_dump = "#dump";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,41 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.core.ios.*; import gplx.core.threads.*; import gplx.core.envs.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xob_decompress_bz2_cmd extends Xob_itm_basic_base implements Xob_cmd {
|
||||
public Xob_decompress_bz2_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
|
||||
public String Cmd_key() {return Xob_cmd_keys.Key_decompress_bz2;}
|
||||
public void Cmd_init(Xob_bldr bldr) {}
|
||||
public void Cmd_bgn(Xob_bldr bldr) {}
|
||||
public void Cmd_run() {
|
||||
if (Io_mgr.Instance.ExistsFil(trg)) return; // file already exists; don't decompress again
|
||||
usr_dlg.Note_many(GRP_KEY, "bgn", "decompressing ~{0}", src.Raw(), trg.Raw());
|
||||
Decompress(bldr.App(), src.Raw(), trg);
|
||||
}
|
||||
public void Cmd_end() {}
|
||||
public void Cmd_term() {}
|
||||
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_src_)) this.Src_(m.ReadIoUrl("v"));
|
||||
else return super.Invk(ctx, ikey, k, m);
|
||||
return this;
|
||||
} private static final String Invk_src_ = "src_";
|
||||
private void Src_(Io_url v) {
|
||||
src = v;
|
||||
trg = bldr.App().Fsys_mgr().Wiki_dir().GenSubFil_nest(wiki.Domain_str(), v.NameOnly()); // NOTE: NameOnly() will take "enwiki.xml.bz2" and make it "enwiki.xml"
|
||||
} Io_url src, trg;
|
||||
static final String GRP_KEY = "xowa.bldr.cmd.decompress_bz2";
|
||||
public static boolean Decompress(Xoae_app app, String src_fil, Io_url trg_fil) {
|
||||
Io_mgr.Instance.CreateDirIfAbsent(trg_fil.OwnerDir()); // 7zip will fail if dir does not exist
|
||||
Process_adp decompress = app.Prog_mgr().App_decompress_bz2();
|
||||
decompress.Prog_dlg_(app.Usr_dlg()).Run_mode_(Process_adp.Run_mode_async);
|
||||
decompress.Run(src_fil, trg_fil, trg_fil.OwnerDir().Xto_api());
|
||||
while (decompress.Exit_code() == Process_adp.Exit_init) {
|
||||
String size = gplx.core.ios.Io_size_.To_str(Io_mgr.Instance.QueryFil(trg_fil).Size());
|
||||
app.Usr_dlg().Prog_many(GRP_KEY, "decompress", "decompressing: ~{0}", size);
|
||||
Thread_adp_.Sleep(1000);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,35 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.dbs.*; import gplx.core.ios.*; import gplx.core.envs.*;
|
||||
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.wms.dumps.*;
|
||||
public class Xob_delete_cmd extends Xob_cmd__base implements Xob_cmd {
|
||||
private String[] patterns_ary = String_.Ary_empty;
|
||||
public Xob_delete_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
||||
public Xob_delete_cmd Patterns_ary_(String... v) {this.patterns_ary = v; return this;}
|
||||
@Override public String Cmd_key() {return Xob_cmd_keys.Key_util_delete;}
|
||||
@Override public void Cmd_run() {
|
||||
int len = patterns_ary.length; if (len == 0) return;
|
||||
|
||||
// build filter EX: '*.xml|*.txt'
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
String pattern = patterns_ary[i];
|
||||
if (i != 0) bfr.Add_byte_pipe();
|
||||
bfr.Add_str_u8(pattern);
|
||||
}
|
||||
|
||||
// get files; iterate and delete
|
||||
String file_pattern = bfr.To_str_and_clear();
|
||||
Io_url[] files = Io_mgr.Instance.QueryDir_args(wiki.Fsys_mgr().Root_dir()).Recur_(Bool_.N).FilPath_(file_pattern).ExecAsUrlAry();
|
||||
int files_len = files.length;
|
||||
for (int i = 0; i < files_len; ++i) {
|
||||
Io_url file = files[i];
|
||||
if (file.Ext() == ".sqlite3")
|
||||
Db_conn_bldr.Instance.Get_or_noop(file).Rls_conn();
|
||||
Io_mgr.Instance.DeleteFil(file);
|
||||
}
|
||||
}
|
||||
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {return Gfo_invk_.Noop;}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,73 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.dbs.*; import gplx.core.ios.*; import gplx.core.envs.*;
|
||||
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.wms.dumps.*;
|
||||
public class Xob_download_cmd extends Xob_cmd__base implements Xob_cmd {
|
||||
private String dump_date = "latest", dump_type = null, dump_src = null;
|
||||
private Io_url dump_trg_zip = null, dump_trg_bin = null;
|
||||
private boolean unzip = true;
|
||||
public Xob_download_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
||||
public Xob_download_cmd Dump_type_(String v) {dump_type = v; return this;}
|
||||
@Override public String Cmd_key() {return Xob_cmd_keys.Key_util_download;}
|
||||
@Override public void Cmd_run() {
|
||||
// init vars; if no explicit values, calc defaults;
|
||||
if (dump_type == null) throw Err_.new_("bldr", "dump_type must be specified");
|
||||
if (!gplx.core.ios.IoEngine_system.Web_access_enabled) return;
|
||||
Xowm_dump_file dump_file = new Xowm_dump_file(wiki.Domain_str(), dump_date, dump_type);
|
||||
if (dump_src == null) {
|
||||
dump_file.Server_url_(gplx.xowa.bldrs.installs.Xoi_dump_mgr.Server_urls(app)[0]);
|
||||
dump_src = dump_file.File_url();
|
||||
}
|
||||
if (dump_trg_zip == null)
|
||||
dump_trg_zip = wiki.Fsys_mgr().Root_dir().GenSubFil(dump_file.File_name());
|
||||
if (dump_trg_bin == null && unzip)
|
||||
dump_trg_bin = dump_trg_zip.GenNewNameAndExt(dump_trg_zip.NameOnly()); // convert a.sql.gz -> a.sql
|
||||
|
||||
// download
|
||||
usr_dlg.Note_many("", "", "downloading file: now=~{0} src=~{1} trg=~{2}", Datetime_now.Get().XtoStr_fmt_yyyyMMdd_HHmmss(), dump_src, dump_trg_zip.OwnerDir());
|
||||
IoEngine_xrg_downloadFil download_wkr = app.Wmf_mgr().Download_wkr().Download_xrg();
|
||||
download_wkr.Src_last_modified_query_(false).Init(dump_src, dump_trg_zip);
|
||||
if (!download_wkr.Exec())
|
||||
usr_dlg.Warn_many("", "", "download failed: src=~{0} trg=~{1} err=~{2}", dump_src, dump_trg_zip.Raw(), Err_.Message_gplx_full(download_wkr.Rslt_err()));
|
||||
if (unzip) { // parsing unzipped file is faster, but takes up more storage space
|
||||
usr_dlg.Note_many("", "", "unzipping file: now=~{0} trg=~{1}", Datetime_now.Get().XtoStr_fmt_yyyyMMdd_HHmmss(), dump_trg_bin.Raw());
|
||||
Xob_unzip_wkr unzip_wkr = new Xob_unzip_wkr().Init(app).Process_run_mode_(Process_adp.Run_mode_sync_block);
|
||||
unzip_wkr.Decompress(dump_trg_zip, dump_trg_bin);
|
||||
}
|
||||
}
|
||||
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (String_.Eq(k, Invk_dump_date_)) dump_date = m.ReadStr("v");
|
||||
else if (String_.Eq(k, Invk_dump_type_)) dump_type = m.ReadStr("v");
|
||||
else if (String_.Eq(k, Invk_dump_src_)) dump_src = m.ReadStr("v");
|
||||
else if (String_.Eq(k, Invk_dump_trg_zip_)) dump_trg_zip = m.ReadIoUrl("v");
|
||||
else if (String_.Eq(k, Invk_dump_trg_bin_)) dump_trg_bin = m.ReadIoUrl("v");
|
||||
else if (String_.Eq(k, Invk_unzip_)) unzip = m.ReadYn("v");
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
private static final String
|
||||
Invk_dump_date_ = "dump_date_", Invk_dump_type_ = "dump_type_", Invk_unzip_ = "unzip_"
|
||||
, Invk_dump_src_ = "dump_src_", Invk_dump_trg_zip_ = "dump_trg_zip_", Invk_dump_trg_bin_ = "dump_trg_bin_";
|
||||
|
||||
public static void Add_if_not_found_many(Xob_bldr bldr, Xowe_wiki wiki, String... dump_types) {
|
||||
IoItmHash itm_hash = Io_mgr.Instance.QueryDir_args(wiki.Fsys_mgr().Root_dir()).ExecAsItmHash();
|
||||
for (String dump_type : dump_types)
|
||||
Add_if_not_found(bldr, wiki, itm_hash, dump_type);
|
||||
}
|
||||
private static void Add_if_not_found(Xob_bldr bldr, Xowe_wiki wiki, IoItmHash itm_hash, String dump_type) {
|
||||
if (!Found(itm_hash, dump_type))
|
||||
bldr.Cmd_mgr().Add(new Xob_download_cmd(bldr, wiki).Dump_type_(dump_type));
|
||||
}
|
||||
private static boolean Found(IoItmHash hash, String dump_type) {
|
||||
String match = String_.Format("{0}.sql", dump_type); // EX: "page_props.sql"
|
||||
int len = hash.Count();
|
||||
for (int i = 0; i < len; i++) {
|
||||
IoItm_base fil = (IoItm_base)hash.Get_at(i);
|
||||
if (String_.Has(fil.Url().NameAndExt(), match))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,31 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.dbs.*;
|
||||
public class Xob_exec_sql_cmd implements Xob_cmd {
|
||||
private Xowe_wiki wiki; private int file_idx = -1; private String sql;
|
||||
public Xob_exec_sql_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.wiki = wiki;}
|
||||
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
|
||||
public String Cmd_key() {return Xob_cmd_keys.Key_exec_sql;}
|
||||
public void Cmd_run() {
|
||||
Xoae_app app = wiki.Appe();
|
||||
wiki.Init_assert(); // force load; needed to pick up MediaWiki ns for MediaWiki:mainpage
|
||||
Xodb_mgr_sql db_mgr = wiki.Db_mgr_as_sql();
|
||||
Xow_db_mgr fsys_mgr = db_mgr.Core_data_mgr();
|
||||
Xow_db_file file = fsys_mgr.Dbs__get_by_id_or_fail(file_idx);
|
||||
app.Usr_dlg().Plog_many("", "", "exec_sql: running sql; file_idx=~{0} sql=~{1}", file_idx, sql);
|
||||
file.Conn().Exec_sql(sql);
|
||||
}
|
||||
public void Cmd_init(Xob_bldr bldr) {}
|
||||
public void Cmd_bgn(Xob_bldr bldr) {}
|
||||
public void Cmd_end() {}
|
||||
public void Cmd_term() {}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_file_idx_)) file_idx = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk_sql_)) sql = m.ReadStr("v");
|
||||
return this;
|
||||
}
|
||||
private static final String Invk_file_idx_ = "file_idx_", Invk_sql_ = "sql_";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,79 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.core.net.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.wms.*; import gplx.xowa.bldrs.wms.sites.*;
|
||||
import gplx.xowa.wikis.domains.*; import gplx.xowa.apps.site_cfgs.*;
|
||||
public class Xob_site_meta_cmd implements Xob_cmd {
|
||||
private final Xob_bldr bldr;
|
||||
private String[] wikis; private Io_url db_url; private DateAdp cutoff_time;
|
||||
public Xob_site_meta_cmd(Xob_bldr bldr, Xow_wiki wiki) {this.bldr = bldr;}
|
||||
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
|
||||
public String Cmd_key() {return Xob_cmd_keys.Key_site_meta;}
|
||||
public void Cmd_run() {
|
||||
Xoa_app app = bldr.App();
|
||||
if (wikis == null) wikis = Xow_domain_regy.All;
|
||||
if (db_url == null) db_url = app.Fsys_mgr().Cfg_site_meta_fil();
|
||||
if (cutoff_time == null) cutoff_time = Datetime_now.Get().Add_day(-1);
|
||||
Load_all(app, db_url, wikis, cutoff_time);
|
||||
}
|
||||
private void Load_all(Xoa_app app, Io_url db_url, String[] reqd_ary, DateAdp cutoff) {
|
||||
Site_json_parser site_parser = new Site_json_parser(app.Utl__json_parser());
|
||||
Gfo_usr_dlg usr_dlg = app.Usr_dlg();
|
||||
Gfo_inet_conn inet_conn = app.Utl__inet_conn();
|
||||
Ordered_hash reqd_hash = Ordered_hash_.New();
|
||||
int reqd_len = reqd_ary.length;
|
||||
for (int i = 0; i < reqd_len; ++i)
|
||||
reqd_hash.Add_as_key_and_val(reqd_ary[i]);
|
||||
|
||||
Site_core_db json_db = new Site_core_db(db_url);
|
||||
Site_core_itm[] actl_ary = json_db.Tbl__core().Select_all_downloaded(cutoff);
|
||||
int actl_len = actl_ary.length;
|
||||
for (int i = 0; i < actl_len; ++i) { // remove items that have been completed after cutoff date
|
||||
Site_core_itm actl_itm = actl_ary[i];
|
||||
reqd_hash.Del(String_.new_u8(actl_itm.Site_domain()));
|
||||
}
|
||||
|
||||
reqd_len = reqd_hash.Count();
|
||||
for (int i = 0; i < reqd_len; ++i) {
|
||||
String domain_str = (String)reqd_hash.Get_at(i);
|
||||
DateAdp json_date = Datetime_now.Get();
|
||||
byte[] json_text = null;
|
||||
for (int j = 0; j < 5; ++j) {
|
||||
json_text = gplx.xowa.bldrs.wms.Xowm_api_mgr.Call_by_qarg(usr_dlg, inet_conn, domain_str, Xoa_site_cfg_loader__inet.Qarg__all);
|
||||
if (json_text == null)
|
||||
gplx.core.threads.Thread_adp_.Sleep(1000);
|
||||
else
|
||||
break;
|
||||
}
|
||||
byte[] domain_bry = Bry_.new_u8(domain_str);
|
||||
byte[] site_abrv = Xow_abrv_xo_.To_bry(domain_bry);
|
||||
json_db.Tbl__core().Insert(site_abrv, domain_bry, Bool_.N, json_date, json_text);
|
||||
}
|
||||
|
||||
reqd_len = reqd_ary.length;
|
||||
for (int i = 0; i < reqd_len; ++i) {
|
||||
String domain_str = reqd_ary[i];
|
||||
byte[] site_abrv = Xow_abrv_xo_.To_bry(Bry_.new_u8(domain_str));
|
||||
Site_core_itm core_itm = json_db.Tbl__core().Select_itm(site_abrv);
|
||||
if (core_itm.Json_completed()) continue;
|
||||
Site_meta_itm meta_itm = new Site_meta_itm();
|
||||
site_parser.Parse_root(meta_itm, String_.new_u8(core_itm.Site_domain()), core_itm.Json_text());
|
||||
json_db.Save(meta_itm, site_abrv);
|
||||
}
|
||||
}
|
||||
public void Cmd_init(Xob_bldr bldr) {}
|
||||
public void Cmd_bgn(Xob_bldr bldr) {}
|
||||
public void Cmd_end() {}
|
||||
public void Cmd_term() {}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_db_url_)) this.db_url = m.ReadIoUrl("v");
|
||||
else if (ctx.Match(k, Invk_wikis_)) this.wikis = m.ReadStrAry("v", "\n");
|
||||
else if (ctx.Match(k, Invk_cutoff_time_)) this.cutoff_time = m.ReadDate("v");
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
private static String Invk_db_url_ = "db_url_", Invk_wikis_ = "wikis_", Invk_cutoff_time_ = "cutoff_time_";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,28 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.core.envs.*;
|
||||
public class Xob_unzip_wkr {
|
||||
private Process_adp decompress_bz2, decompress_zip, decompress_gz, process;
|
||||
public int Process_exit_code() {return process.Exit_code();}
|
||||
public byte Process_run_mode() {return process_run_mode;} public Xob_unzip_wkr Process_run_mode_(byte v) {process_run_mode = v; return this;} private byte process_run_mode = Process_adp.Run_mode_async;
|
||||
public Xob_unzip_wkr Init(Xoae_app app) {return Init(app.Prog_mgr().App_decompress_bz2(), app.Prog_mgr().App_decompress_zip(), app.Prog_mgr().App_decompress_gz());}
|
||||
public Xob_unzip_wkr Init(Process_adp decompress_bz2, Process_adp decompress_zip, Process_adp decompress_gz) {
|
||||
this.decompress_bz2 = decompress_bz2;
|
||||
this.decompress_zip = decompress_zip;
|
||||
this.decompress_gz = decompress_gz;
|
||||
return this;
|
||||
}
|
||||
public void Decompress(Io_url src, Io_url trg) {
|
||||
String src_ext = src.Ext();
|
||||
if (String_.Eq(src_ext, gplx.core.ios.streams.Io_stream_tid_.Ext__bz2)) process = decompress_bz2;
|
||||
else if (String_.Eq(src_ext, gplx.core.ios.streams.Io_stream_tid_.Ext__zip)) process = decompress_zip;
|
||||
else if (String_.Eq(src_ext, gplx.core.ios.streams.Io_stream_tid_.Ext__gz)) process = decompress_gz;
|
||||
else throw Err_.new_unhandled(src_ext);
|
||||
Io_url trg_owner_dir = trg.OwnerDir();
|
||||
Io_mgr.Instance.CreateDirIfAbsent(trg_owner_dir);
|
||||
process.Run_mode_(process_run_mode);
|
||||
process.Run(src, trg, trg_owner_dir.Xto_api());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,52 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.cmds.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*;
|
||||
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.xmls.*;
|
||||
import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
public class Xob_xml_dumper_cmd implements Xob_cmd {
|
||||
private final Xowe_wiki wiki; private final Gfo_usr_dlg usr_dlg;
|
||||
private final Xob_xml_dumper xml_dumper = new Xob_xml_dumper(); private int commit_interval = 1000;
|
||||
private Io_url dump_url;
|
||||
public Xob_xml_dumper_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.wiki = wiki; this.usr_dlg = wiki.Appe().Usr_dlg();}
|
||||
public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return null;}
|
||||
public String Cmd_key() {return Xob_cmd_keys.Key_util_xml_dump;}
|
||||
public void Cmd_init(Xob_bldr bldr) {
|
||||
dump_url = wiki.Fsys_mgr().Root_dir().GenSubFil(wiki.Domain_str() + "-dump.xml");
|
||||
Io_mgr.Instance.DeleteFil(dump_url);
|
||||
}
|
||||
public void Cmd_run() {
|
||||
usr_dlg.Plog_many("", "", Cmd_key() + ":bgn;");
|
||||
String wiki_abrv = "";
|
||||
String main_page = String_.Format("https://{0}/wiki/{1}", wiki.Domain_str(), String_.new_u8(wiki.Props().Main_page()));
|
||||
String ns_case = "first-letter"; // TODO_OLD:
|
||||
xml_dumper.Write_root_bgn(wiki.Ns_mgr(), wiki.Domain_itm(), wiki_abrv, main_page, ns_case, "XOWA " + Xoa_app_.Version);
|
||||
Xodb_page_rdr page_rdr = wiki.Db_mgr().Load_mgr().Get_page_rdr(wiki);
|
||||
Xowd_page_itm page = new Xowd_page_itm();
|
||||
int page_count = 0;
|
||||
try {
|
||||
while (page_rdr.Move_next()) {
|
||||
page_rdr.Read(page);
|
||||
page.Ttl_(wiki.Ttl_parse(page.Ns_id(), page.Ttl_page_db()));
|
||||
xml_dumper.Write_page(page);
|
||||
if ((++page_count % commit_interval) == 0) Commit();
|
||||
}
|
||||
}
|
||||
catch (Exception e) {throw Err_.new_exc(e, "xo", "xml_dumper failed");}
|
||||
finally {page_rdr.Rls();}
|
||||
xml_dumper.Write_root_end();
|
||||
this.Commit();
|
||||
usr_dlg.Plog_many("", "", Cmd_key() + ":end;");
|
||||
}
|
||||
private void Commit() {
|
||||
Io_mgr.Instance.AppendFilStr(dump_url, xml_dumper.Bld_str());
|
||||
}
|
||||
public void Cmd_bgn(Xob_bldr bldr) {}
|
||||
public void Cmd_end() {}
|
||||
public void Cmd_term() {}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_commit_interval_)) commit_interval = m.ReadInt("v");
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
} private static final String Invk_commit_interval_ = "commit_interval_";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,283 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.brys.fmtrs.*; import gplx.core.ios.*; import gplx.core.envs.*;
|
||||
import gplx.xowa.htmls.*; import gplx.langs.htmls.encoders.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.wikis.*; import gplx.xowa.wikis.domains.*; import gplx.xowa.wikis.data.*;
|
||||
import gplx.xowa.files.downloads.*;
|
||||
import gplx.core.net.*;
|
||||
import gplx.xowa.addons.wikis.htmls.css.bldrs.*; import gplx.xowa.addons.wikis.htmls.css.mgrs.*;
|
||||
import gplx.xowa.wikis.data.fetchers.*;
|
||||
public class Xoa_css_extractor {
|
||||
private Io_url home_css_dir;
|
||||
public IoEngine_xrg_downloadFil Download_xrg() {return download_xrg;} private IoEngine_xrg_downloadFil download_xrg = Io_mgr.Instance.DownloadFil_args("", Io_url_.Empty);
|
||||
public Xoa_css_extractor Wiki_domain_(byte[] v) {wiki_domain = v; return this;} private byte[] wiki_domain;
|
||||
public Xoa_css_extractor Usr_dlg_(Gfo_usr_dlg v) {usr_dlg = v; return this;} private Gfo_usr_dlg usr_dlg;
|
||||
public Xoa_css_extractor Failover_dir_(Io_url v) {failover_dir = v; return this;} private Io_url failover_dir;
|
||||
public Xoa_css_extractor Wiki_html_dir_(Io_url v) {wiki_html_dir = v; return this;} private Io_url wiki_html_dir;
|
||||
public Xoa_css_extractor Mainpage_url_(String v) {mainpage_url = v; return this;} private String mainpage_url;
|
||||
public Xoa_css_extractor Protocol_prefix_(String v) {protocol_prefix = v; return this;} private String protocol_prefix = "https:";// NOTE: changed from http to https; DATE:2015-02-17
|
||||
public Xoa_css_extractor Page_fetcher_(Xow_page_fetcher v) {page_fetcher = v; return this;} private Xow_page_fetcher page_fetcher;
|
||||
public Xoa_css_extractor Css_img_downloader_(Xoa_css_img_downloader v) {this.css_img_downloader = v; return this;} private Xoa_css_img_downloader css_img_downloader;
|
||||
public Xoa_css_extractor Opt_download_css_common_(boolean v) {opt_download_css_common = v; return this;} private boolean opt_download_css_common;
|
||||
public Xoa_css_extractor Url_encoder_(Gfo_url_encoder v) {url_encoder = v; return this;} private Gfo_url_encoder url_encoder;
|
||||
public Xoa_css_extractor Wiki_code_(byte[] v) {this.wiki_code = v; return this;} private byte[] wiki_code = null;
|
||||
private byte[] mainpage_html; private boolean lang_is_ltr = true;
|
||||
private final Gfo_url_parser url_parser = new Gfo_url_parser();
|
||||
public void Init_by_app(Xoae_app app) {
|
||||
this.usr_dlg = app.Usr_dlg();
|
||||
this.home_css_dir = app.Usere().Fsys_mgr().Wiki_html_dir("home").GenSubDir("html");
|
||||
Xof_download_wkr download_wkr = app.Wmf_mgr().Download_wkr();
|
||||
this.download_xrg = download_wkr.Download_xrg();
|
||||
css_img_downloader = new Xoa_css_img_downloader().Ctor(usr_dlg, download_wkr, Bry_.new_u8(protocol_prefix));
|
||||
failover_dir = app.Fsys_mgr().Bin_xowa_dir().GenSubDir_nest("html", "css", "failover");
|
||||
url_encoder = gplx.langs.htmls.encoders.Gfo_url_encoder_.Http_url;
|
||||
}
|
||||
public void Install(Xow_wiki wiki, String css_key) {
|
||||
try {
|
||||
this.wiki_html_dir = wiki.App().Fsys_mgr().Wiki_css_dir(wiki.Domain_str()); // EX: /xowa/user/anonymous/wiki/en.wikipedia.org
|
||||
Io_url css_comm_fil = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
Io_url css_wiki_fil = wiki_html_dir.GenSubFil(Css_wiki_name);
|
||||
wiki.Html__wtr_mgr().Init_css_urls(wiki.App(), wiki.Domain_str(), css_comm_fil, css_wiki_fil);
|
||||
if (wiki.Domain_tid() == Xow_domain_tid_.Tid__home || Env_.Mode_testing()) return; // NOTE: do not download if home_wiki; also needed for TEST
|
||||
if (Io_mgr.Instance.ExistsFil(css_wiki_fil)) return; // css file exists; nothing to generate
|
||||
if (wiki.Html__css_installing()) return;
|
||||
wiki.Html__css_installing_(true);
|
||||
wiki.App().Usr_dlg().Log_many("", "", "generating css for '~{0}'", wiki.Domain_str());
|
||||
if (css_key != null) {
|
||||
if (Install_by_db(wiki, wiki_html_dir, css_key)) return;
|
||||
}
|
||||
if (wiki.Type_is_edit())
|
||||
this.Install_by_wmf((Xowe_wiki)wiki, wiki_html_dir);
|
||||
wiki.Html__css_installing_(false);
|
||||
}
|
||||
catch (Exception e) { // if error, failover; paranoia catch for outliers like bad network connectivity fail, or MediaWiki: message not existing; DATE:2013-11-21
|
||||
wiki.App().Usr_dlg().Warn_many("", "", "failed to get css; failing over; wiki='~{0}' err=~{1}", wiki.Domain_str(), Err_.Message_gplx_full(e));
|
||||
Css_common_failover(); // only failover xowa_common.css; xowa_wiki.css comes from MediaWiki:Common.css / Vector.css
|
||||
wiki.Html__css_installing_(false);
|
||||
}
|
||||
}
|
||||
private void Install_by_wmf(Xowe_wiki wiki, Io_url wiki_html_dir) {
|
||||
opt_download_css_common = wiki.Appe().Cfg().Get_bool_app_or("xowa.bldr.import.download_xowa_common", true); // CFG: Cfg__
|
||||
|
||||
// do not download css if web_access disabled or wiki is other; DATE:2017-02-25
|
||||
boolean wiki_is_other = wiki.Domain_tid() == Xow_domain_tid_.Tid__other;
|
||||
if ( !gplx.core.ios.IoEngine_system.Web_access_enabled
|
||||
|| wiki_is_other)
|
||||
opt_download_css_common = false; // if !web_access_enabled, don't download
|
||||
|
||||
this.wiki_domain = wiki.Domain_bry();
|
||||
mainpage_url = "https://" + wiki.Domain_str(); // NOTE: cannot reuse protocol_prefix b/c "//" needs to be added manually; protocol_prefix is used for logo and images which have form of "//domain/image.png"; changed to https; DATE:2015-02-17
|
||||
if (page_fetcher == null) page_fetcher = new Xow_page_fetcher_wiki();
|
||||
page_fetcher.Wiki_(wiki);
|
||||
this.wiki_html_dir = wiki_html_dir;
|
||||
this.lang_is_ltr = wiki.Lang().Dir_ltr();
|
||||
this.wiki_code = wiki.Domain_abrv();
|
||||
|
||||
// get mainpage; do not download css if wiki is other; DATE:2017-02-25
|
||||
mainpage_html = wiki_is_other ? Bry_.Empty : Mainpage_download_html();
|
||||
|
||||
// generate css
|
||||
Css_common_setup();
|
||||
Css_wiki_setup();
|
||||
Logo_setup();
|
||||
}
|
||||
private boolean Install_by_db(Xow_wiki wiki, Io_url wiki_html_dir, String css_key) {
|
||||
Xow_db_mgr core_db_mgr = wiki.Data__core_mgr();
|
||||
if ( core_db_mgr == null
|
||||
|| core_db_mgr.Props() == null
|
||||
|| core_db_mgr.Props().Schema_is_1()
|
||||
|| !core_db_mgr.Tbl__cfg().Select_yn_or(Xowd_cfg_key_.Grp__wiki_schema, Xow_db_file_schema_props.Key__tbl_css_core, Bool_.N)
|
||||
) {
|
||||
Xoa_app_.Usr_dlg().Warn_many("", "", "css.db not found; wiki=~{0} css_dir=~{1}", wiki.Domain_str(), wiki_html_dir.Raw());
|
||||
return false;
|
||||
}
|
||||
Xow_db_file core_db = core_db_mgr.Db__core();
|
||||
return Xowd_css_core_mgr.Get(core_db.Tbl__css_core(), core_db.Tbl__css_file(), wiki_html_dir, css_key);
|
||||
}
|
||||
public void Css_common_setup() {
|
||||
if (opt_download_css_common)
|
||||
Css_common_download();
|
||||
else
|
||||
Css_common_failover();
|
||||
}
|
||||
private void Css_common_failover() {
|
||||
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
if (home_css_dir != null) // TEST:
|
||||
Io_mgr.Instance.CopyDirDeep(home_css_dir, trg_fil.OwnerDir()); // NOTE: copy dir first b/c xowa_commons.css will be replaced below
|
||||
Io_mgr.Instance.CopyFil(Css_common_failover_url(), trg_fil, true);
|
||||
}
|
||||
private void Css_common_download() {
|
||||
boolean css_stylesheet_common_missing = true;
|
||||
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
css_stylesheet_common_missing = !Css_scrape_setup();
|
||||
if (css_stylesheet_common_missing)
|
||||
Io_mgr.Instance.CopyFil(Css_common_failover_url(), trg_fil, true);
|
||||
else
|
||||
css_img_downloader.Chk(wiki_domain, trg_fil);
|
||||
}
|
||||
private Io_url Css_common_failover_url() {
|
||||
Io_url css_commons_url = failover_dir.GenSubDir("xowa_common_override").GenSubFil_ary("xowa_common_", String_.new_u8(wiki_code), ".css");
|
||||
if (Io_mgr.Instance.ExistsFil(css_commons_url)) return css_commons_url; // specific css exists for wiki; use it; EX: xowa_common_wiki_mediawikiwiki.css
|
||||
return failover_dir.GenSubFil(lang_is_ltr ? Css_common_name_ltr : Css_common_name_rtl);
|
||||
}
|
||||
public void Css_wiki_setup() {
|
||||
boolean css_stylesheet_wiki_missing = true;
|
||||
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_wiki_name);
|
||||
if (Io_mgr.Instance.ExistsFil(trg_fil)) return; // don't download if already there
|
||||
css_stylesheet_wiki_missing = !Css_wiki_generate(trg_fil);
|
||||
if (css_stylesheet_wiki_missing)
|
||||
Failover(trg_fil);
|
||||
else
|
||||
css_img_downloader.Chk(wiki_domain, trg_fil);
|
||||
}
|
||||
private boolean Css_wiki_generate(Io_url trg_fil) {
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
Css_wiki_generate_section(bfr, Ttl_common_css);
|
||||
Css_wiki_generate_section(bfr, Ttl_vector_css);
|
||||
byte[] bry = bfr.To_bry_and_clear();
|
||||
bry = Bry_.Replace(bry, gplx.xowa.bldrs.xmls.Xob_xml_parser_.Bry_tab_ent, gplx.xowa.bldrs.xmls.Xob_xml_parser_.Bry_tab);
|
||||
Io_mgr.Instance.SaveFilBry(trg_fil, bry);
|
||||
return true;
|
||||
} private static final byte[] Ttl_common_css = Bry_.new_a7("Common.css"), Ttl_vector_css = Bry_.new_a7("Vector.css");
|
||||
private boolean Css_wiki_generate_section(Bry_bfr bfr, byte[] ttl) {
|
||||
byte[] page = page_fetcher.Get_by(Xow_ns_.Tid__mediawiki, ttl);
|
||||
if (page == null) return false;
|
||||
if (bfr.Len() != 0) bfr.Add_byte_nl().Add_byte_nl(); // add "\n\n" between sections; !=0 checks against first
|
||||
Css_wiki_section_hdr.Bld_bfr_many(bfr, ttl); // add "/*XOWA:MediaWiki:Common.css*/\n"
|
||||
bfr.Add(page); // add page
|
||||
return true;
|
||||
} private static final Bry_fmtr Css_wiki_section_hdr = Bry_fmtr.new_("/*XOWA:MediaWiki:~{ttl}*/\n", "ttl");
|
||||
public void Logo_setup() {
|
||||
boolean logo_missing = true;
|
||||
Io_url logo_url = wiki_html_dir.GenSubFil("logo.png");
|
||||
if (Io_mgr.Instance.ExistsFil(logo_url)) return; // don't download if already there
|
||||
logo_missing = !Logo_download(logo_url);
|
||||
if (logo_missing)
|
||||
Failover(logo_url);
|
||||
}
|
||||
private boolean Logo_download(Io_url trg_fil) {
|
||||
String src_fil = Logo_find_src();
|
||||
if (src_fil == null) {
|
||||
if (Logo_copy_from_css(trg_fil)) return true;
|
||||
usr_dlg.Warn_many("", "", "failed to extract logo: trg_fil=~{0};", trg_fil.Raw());
|
||||
return false;
|
||||
}
|
||||
String log_msg = usr_dlg.Prog_many("", "", "downloading logo: '~{0}'", src_fil);
|
||||
boolean rv = download_xrg.Prog_fmt_hdr_(log_msg).Src_(src_fil).Trg_(trg_fil).Exec();
|
||||
if (!rv)
|
||||
usr_dlg.Warn_many("", "", "failed to download logo: src_url=~{0};", src_fil);
|
||||
return rv;
|
||||
}
|
||||
private boolean Logo_copy_from_css(Io_url trg_fil) {
|
||||
Io_url commons_file = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
byte[] commons_src = Io_mgr.Instance.LoadFilBry(commons_file);
|
||||
int bgn_pos = Bry_find_.Find_fwd(commons_src, Bry_mw_wiki_logo); if (bgn_pos == Bry_find_.Not_found) return false;
|
||||
bgn_pos += Bry_mw_wiki_logo.length;
|
||||
int end_pos = Bry_find_.Find_fwd(commons_src, Byte_ascii.Quote, bgn_pos + 1); if (end_pos == Bry_find_.Not_found) return false;
|
||||
byte[] src_bry = Bry_.Mid(commons_src, bgn_pos, end_pos);
|
||||
src_bry = Xob_url_fixer.Fix(wiki_domain, src_bry, src_bry.length);
|
||||
if (wiki_html_dir.Info().DirSpr_byte() == Byte_ascii.Backslash)
|
||||
src_bry = Bry_.Replace(src_bry, Byte_ascii.Slash, Byte_ascii.Backslash);
|
||||
Io_url src_fil = wiki_html_dir.GenSubFil(String_.new_u8(src_bry));
|
||||
Io_mgr.Instance.CopyFil(src_fil, trg_fil, true);
|
||||
return true;
|
||||
} private static final byte[] Bry_mw_wiki_logo = Bry_.new_a7(".mw-wiki-logo{background-image:url(\"");
|
||||
private String Logo_find_src() {
|
||||
if (mainpage_html == null) return null;
|
||||
int main_page_html_len = mainpage_html.length;
|
||||
int logo_bgn = Bry_find_.Find_fwd(mainpage_html, Logo_find_bgn, 0); if (logo_bgn == Bry_find_.Not_found) return null;
|
||||
logo_bgn += Logo_find_bgn.length;
|
||||
logo_bgn = Bry_find_.Find_fwd(mainpage_html, Logo_find_end, logo_bgn); if (logo_bgn == Bry_find_.Not_found) return null;
|
||||
logo_bgn += Logo_find_end.length;
|
||||
int logo_end = Bry_find_.Find_fwd(mainpage_html, Byte_ascii.Paren_end, logo_bgn, main_page_html_len); if (logo_bgn == Bry_find_.Not_found) return null;
|
||||
byte[] logo_bry = Bry_.Mid(mainpage_html, logo_bgn, logo_end);
|
||||
return protocol_prefix + String_.new_u8(logo_bry);
|
||||
}
|
||||
private static final byte[] Logo_find_bgn = Bry_.new_a7("<div id=\"p-logo\""), Logo_find_end = Bry_.new_a7("background-image: url(");
|
||||
public boolean Mainpage_download() {
|
||||
mainpage_html = Mainpage_download_html();
|
||||
return mainpage_html != null;
|
||||
}
|
||||
private byte[] Mainpage_download_html() {
|
||||
String main_page_url_temp = mainpage_url;
|
||||
if (Bry_.Eq(wiki_domain, Xow_domain_itm_.Bry__wikidata)) // if wikidata, download css for a Q* page; Main_Page has less css; DATE:2014-09-30
|
||||
main_page_url_temp = main_page_url_temp + "/wiki/Q2";
|
||||
String log_msg = usr_dlg.Prog_many("", "main_page.download", "downloading main page for '~{0}'", main_page_url_temp);
|
||||
byte[] main_page_html = download_xrg.Prog_fmt_hdr_(log_msg).Exec_as_bry(main_page_url_temp);
|
||||
if (main_page_html == null) usr_dlg.Warn_many("", "", "failed to download main_page: src_url=~{0};", main_page_url_temp);
|
||||
return main_page_html;
|
||||
}
|
||||
private void Failover(Io_url trg_fil) {
|
||||
usr_dlg.Note_many("", "", "copying failover file: trg_fil=~{0};", trg_fil.Raw());
|
||||
Io_mgr.Instance.CopyFil(failover_dir.GenSubFil(trg_fil.NameAndExt()), trg_fil, true);
|
||||
}
|
||||
public boolean Css_scrape_setup() {
|
||||
Io_url trg_fil = wiki_html_dir.GenSubFil(Css_common_name);
|
||||
// if (Io_mgr.Instance.ExistsFil(trg_fil)) return; // don't download if already there; DELETED: else main_page is not scraped for all stylesheet links; simple.d: fails; DATE:2014-02-11
|
||||
byte[] css_url = Css_scrape();
|
||||
if (css_url == null) {
|
||||
Css_common_failover();
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
Io_mgr.Instance.SaveFilBry(trg_fil, css_url);
|
||||
css_img_downloader.Chk(wiki_domain, trg_fil);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
private byte[] Css_scrape() {
|
||||
if (mainpage_html == null) return null;
|
||||
String[] css_urls = Css_scrape_urls(mainpage_html); if (css_urls.length == 0) return null;
|
||||
return Css_scrape_download(css_urls);
|
||||
}
|
||||
private String[] Css_scrape_urls(byte[] raw) {
|
||||
List_adp rv = List_adp_.New();
|
||||
int raw_len = raw.length;
|
||||
int prv_pos = 0;
|
||||
int css_find_bgn_len = Css_find_bgn.length;
|
||||
byte[] protocol_prefix_bry = Bry_.new_u8(protocol_prefix);
|
||||
while (true) {
|
||||
int url_bgn = Bry_find_.Find_fwd(raw, Css_find_bgn, prv_pos); if (url_bgn == Bry_find_.Not_found) break; // nothing left; stop
|
||||
url_bgn += css_find_bgn_len;
|
||||
int url_end = Bry_find_.Find_fwd(raw, Byte_ascii.Quote, url_bgn, raw_len); if (url_end == Bry_find_.Not_found) {usr_dlg.Warn_many("", "main_page.css_parse", "could not find css; pos='~{0}' text='~{1}'", url_bgn, String_.new_u8__by_len(raw, url_bgn, url_bgn + 32)); break;}
|
||||
byte[] css_url_bry = Bry_.Mid(raw, url_bgn, url_end);
|
||||
css_url_bry = Bry_.Replace(css_url_bry, Css_amp_find, Css_amp_repl); // & -> &
|
||||
css_url_bry = url_encoder.Decode(css_url_bry); // %2C -> %7C -> |
|
||||
css_url_bry = Xoa_css_extractor.Url_root_fix(wiki_domain, css_url_bry);
|
||||
Gfo_url gfo_url = url_parser.Parse(css_url_bry, 0, css_url_bry.length);
|
||||
if ( gfo_url.Protocol_tid() == Gfo_protocol_itm.Tid_relative_1 // if rel url, add protocol_prefix DATE:2015-08-01
|
||||
|| (Env_.Mode_testing() && gfo_url.Protocol_tid() == Gfo_protocol_itm.Tid_unknown)) // TEST:
|
||||
css_url_bry = Bry_.Add(protocol_prefix_bry, css_url_bry);
|
||||
rv.Add(String_.new_u8(css_url_bry));
|
||||
prv_pos = url_end;
|
||||
}
|
||||
return rv.To_str_ary();
|
||||
} private static final byte[] Css_find_bgn = Bry_.new_a7("<link rel=\"stylesheet\" href=\""), Css_amp_find = Bry_.new_a7("&"), Css_amp_repl = Bry_.new_a7("&");
|
||||
private byte[] Css_scrape_download(String[] css_urls) {
|
||||
int css_urls_len = css_urls.length;
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
for (int i = 0; i < css_urls_len; i++) {
|
||||
String css_url = css_urls[i];
|
||||
usr_dlg.Prog_many("", "main_page.css_download", "downloading css for '~{0}'", css_url);
|
||||
download_xrg.Prog_fmt_hdr_(css_url);
|
||||
byte[] css_bry = download_xrg.Exec_as_bry(css_url); if (css_bry == null) continue; // css not found; continue
|
||||
tmp_bfr.Add(Xoa_css_img_downloader.Bry_comment_bgn).Add_str_u8(css_url).Add(Xoa_css_img_downloader.Bry_comment_end).Add_byte_nl();
|
||||
tmp_bfr.Add(css_bry).Add_byte_nl().Add_byte_nl();
|
||||
}
|
||||
return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
private static byte[] Url_root_fix(byte[] domain, byte[] url) {// DATE:2015-09-20
|
||||
if (url.length < 3) return url; // need at least 2 chars
|
||||
if ( url[0] == Byte_ascii.Slash // starts with "/" EX: "/w/api.php"
|
||||
&& url[1] != Byte_ascii.Slash // but not "//"; EX: "//en.wikipedia.org"
|
||||
)
|
||||
return Bry_.Add(gplx.xowa.htmls.hrefs.Xoh_href_.Bry__https, domain, url);
|
||||
else
|
||||
return url;
|
||||
}
|
||||
public static final String Css_common_name = "xowa_common.css", Css_wiki_name = "xowa_wiki.css"
|
||||
, Css_common_name_ltr = "xowa_common_ltr.css", Css_common_name_rtl = "xowa_common_rtl.css";
|
||||
}
|
||||
@@ -13,3 +13,65 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*; import gplx.core.ios.*; import gplx.langs.htmls.encoders.*; import gplx.xowa.wikis.data.*; import gplx.xowa.files.downloads.*;
|
||||
import gplx.xowa.wikis.data.fetchers.*;
|
||||
public class Xoa_css_extractor_basic_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoa_css_extractor_fxt fxt = new Xoa_css_extractor_fxt();
|
||||
@Test public void Logo_download() {
|
||||
fxt.Init_fil("mem/http/en.wikipedia.org" , Xoa_css_extractor_fxt.Main_page_html);
|
||||
fxt.Init_fil("mem/http/wiki.png" , "download");
|
||||
fxt.Exec_logo_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/logo.png", "download");
|
||||
}
|
||||
@Test public void Logo_download_mw_wiki_logo() {
|
||||
fxt.Init_fil("mem/http/en.wikipedia.org" , "");
|
||||
fxt.Init_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/a/wiki.png" , "download");
|
||||
fxt.Init_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css" , ".mw-wiki-logo{background-image:url(\"//a/wiki.png\");");
|
||||
fxt.Exec_logo_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/logo.png" , "download");
|
||||
}
|
||||
@Test public void Logo_failover() {
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/logo.png" , "failover");
|
||||
fxt.Exec_logo_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/logo.png", "failover");
|
||||
}
|
||||
@Test public void Css_common_download_failover() {
|
||||
fxt.Css_installer().Opt_download_css_common_(true);
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_ltr.css", "failover");
|
||||
fxt.Exec_css_common_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
|
||||
}
|
||||
@Test public void Css_common_copy() {
|
||||
fxt.Css_installer().Opt_download_css_common_(false);
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_ltr.css", "failover");
|
||||
fxt.Exec_css_common_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
|
||||
}
|
||||
@Test public void Css_common_copy_specific_wiki() { // PURPOSE: css for specific wiki
|
||||
fxt.Css_installer().Opt_download_css_common_(false).Wiki_code_(Bry_.new_a7("enwiki"));
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_override/xowa_common_enwiki.css", "failover");
|
||||
fxt.Exec_css_common_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
|
||||
}
|
||||
@Test public void Css_scrape_download() {
|
||||
fxt.Css_installer().Url_encoder_(Gfo_url_encoder_.Http_url);
|
||||
fxt.Init_fil("mem/http/en.wikipedia.org" , Xoa_css_extractor_fxt.Main_page_html);
|
||||
fxt.Init_fil("mem/http/en.wikipedia.org/common.css" , "download");
|
||||
fxt.Init_fil("mem/http/www/a&0|b,c" , "data=css_0");
|
||||
fxt.Init_fil("mem/http/www/a&1|b,c" , "data=css_1");
|
||||
fxt.Exec_css_mainpage_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", String_.Concat_lines_nl
|
||||
( "/*XOWA:mem/http/www/a&0|b,c*/"
|
||||
, "data=css_0"
|
||||
, ""
|
||||
, "/*XOWA:mem/http/www/a&1|b,c*/"
|
||||
, "data=css_1"
|
||||
));
|
||||
}
|
||||
@Test public void Css_scrape_failover() {
|
||||
fxt.Init_fil("mem/xowa/bin/any/html/xowa/import/xowa_common_ltr.css", "failover");
|
||||
fxt.Exec_css_mainpage_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_common.css", "failover");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,59 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.ios.*;
|
||||
import gplx.xowa.wikis.data.fetchers.*;
|
||||
import gplx.xowa.files.downloads.*;
|
||||
public class Xoa_css_extractor_fxt {
|
||||
public void Clear() {
|
||||
Io_mgr.Instance.InitEngine_mem();
|
||||
Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Test();
|
||||
css_installer = new Xoa_css_extractor();
|
||||
css_installer.Download_xrg().Trg_engine_key_(IoEngine_.MemKey);
|
||||
css_installer
|
||||
.Usr_dlg_(usr_dlg)
|
||||
.Wiki_domain_(Bry_.new_a7("en.wikipedia.org"))
|
||||
.Protocol_prefix_("mem/http/")
|
||||
.Mainpage_url_("mem/http/en.wikipedia.org")
|
||||
.Failover_dir_(Io_url_.new_any_("mem/xowa/bin/any/html/xowa/import/")) // "mem/xowa/user/anonymous/wiki/home/html/"
|
||||
.Wiki_html_dir_(Io_url_.new_any_("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/"))
|
||||
;
|
||||
page_fetcher = new Xow_page_fetcher_test();
|
||||
css_installer.Page_fetcher_(page_fetcher);
|
||||
Xoa_css_img_downloader css_img_downloader = new Xoa_css_img_downloader();
|
||||
css_img_downloader.Ctor(usr_dlg, new Xof_download_wkr_test(), Bry_.new_a7("mem/http/"));
|
||||
css_installer.Css_img_downloader_(css_img_downloader);
|
||||
} private Xow_page_fetcher_test page_fetcher;
|
||||
public Xoa_css_extractor Css_installer() {return css_installer;} private Xoa_css_extractor css_installer;
|
||||
public void Init_page(int ns_id, String ttl, String text) {
|
||||
page_fetcher.Add(ns_id, Bry_.new_a7(ttl), Bry_.new_a7(text));
|
||||
}
|
||||
public void Init_fil_empty(String url) {Init_fil(url, "");}
|
||||
public void Init_fil(String url, String text) {Io_mgr.Instance.SaveFilStr(url, text);}
|
||||
public void Test_fil(String url, String expd) {Tfds.Eq_str_lines(expd, Io_mgr.Instance.LoadFilStr(Io_url_.new_any_(url)));}
|
||||
public void Exec_logo_setup() {
|
||||
css_installer.Mainpage_download();
|
||||
css_installer.Logo_setup();
|
||||
}
|
||||
public void Exec_css_common_setup() {
|
||||
css_installer.Mainpage_download();
|
||||
css_installer.Css_common_setup();
|
||||
}
|
||||
public void Exec_css_wiki_setup() {css_installer.Css_wiki_setup();}
|
||||
public void Exec_css_mainpage_setup() {
|
||||
css_installer.Mainpage_download();
|
||||
css_installer.Css_scrape_setup();
|
||||
}
|
||||
public static String Main_page_html = String_.Concat_lines_nl
|
||||
( "<html>"
|
||||
, " <head>"
|
||||
, " <link rel=\"stylesheet\" href=\"www/a&0%7Cb%2Cc\" />"
|
||||
, " <link rel=\"stylesheet\" href=\"www/a&1%7Cb%2Cc\" />"
|
||||
, " </head>"
|
||||
, " <body>"
|
||||
, " <div id=\"p-logo\" role=\"banner\"><a style=\"background-image: url(wiki.png);\""
|
||||
, " </body>"
|
||||
, "</html>"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -13,3 +13,32 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*; import gplx.core.ios.*; import gplx.xowa.wikis.nss.*;
|
||||
public class Xoa_css_extractor_wiki_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoa_css_extractor_fxt fxt = new Xoa_css_extractor_fxt();
|
||||
@Test public void Css_wiki_generate() {
|
||||
fxt.Init_page(Xow_ns_.Tid__mediawiki, "Common.css" , "css_0");
|
||||
fxt.Init_page(Xow_ns_.Tid__mediawiki, "Vector.css" , "css_1");
|
||||
fxt.Exec_css_wiki_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_wiki.css", String_.Concat_lines_nl
|
||||
( "/*XOWA:MediaWiki:Common.css*/"
|
||||
, "css_0"
|
||||
, ""
|
||||
, "/*XOWA:MediaWiki:Vector.css*/"
|
||||
, "css_1"
|
||||
));
|
||||
}
|
||||
@Test public void Css_wiki_missing() {
|
||||
fxt.Exec_css_wiki_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_wiki.css", "");
|
||||
}
|
||||
@Test public void Css_wiki_tab() { // PURPOSE: swap out 	 for xdat files
|
||||
fxt.Init_page(Xow_ns_.Tid__mediawiki, "Common.css" , "a	b");
|
||||
fxt.Exec_css_wiki_setup();
|
||||
fxt.Test_fil("mem/xowa/user/anonymous/wiki/en.wikipedia.org/html/xowa_wiki.css", String_.Concat_lines_nl
|
||||
( "/*XOWA:MediaWiki:Common.css*/"
|
||||
, "a\tb"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,177 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.xowa.files.downloads.*; import gplx.core.envs.*;
|
||||
public class Xoa_css_img_downloader {
|
||||
private byte[] wiki_domain;
|
||||
public Xoa_css_img_downloader Ctor(Gfo_usr_dlg usr_dlg, Xof_download_wkr download_wkr, byte[] stylesheet_prefix) {
|
||||
this.usr_dlg = usr_dlg; this.download_wkr = download_wkr; this.stylesheet_prefix = stylesheet_prefix;
|
||||
return this;
|
||||
} private Gfo_usr_dlg usr_dlg; private Xof_download_wkr download_wkr;
|
||||
public Xoa_css_img_downloader Stylesheet_prefix_(byte[] v) {stylesheet_prefix = v; return this;} private byte[] stylesheet_prefix; // TEST: setter exposed b/c tests can handle "mem/" but not "//mem"
|
||||
public void Chk(byte[] wiki_domain, Io_url css_fil) {
|
||||
this.wiki_domain = wiki_domain;
|
||||
List_adp img_list = List_adp_.New();
|
||||
byte[] old_bry = Io_mgr.Instance.LoadFilBry(css_fil);
|
||||
byte[] rel_url_prefix = Bry_.Add(Bry_fwd_slashes, wiki_domain);
|
||||
byte[] new_bry = Convert_to_local_urls(rel_url_prefix, old_bry, img_list);
|
||||
Io_url img_dir = css_fil.OwnerDir();
|
||||
Download_fils(img_dir, img_list.To_str_ary());
|
||||
Io_mgr.Instance.SaveFilBry(css_fil, new_bry);
|
||||
}
|
||||
public byte[] Convert_to_local_urls(byte[] rel_url_prefix, byte[] src, List_adp list) {
|
||||
try {
|
||||
int src_len = src.length;
|
||||
int prv_pos = 0;
|
||||
Bry_bfr bfr = Bry_bfr_.New_w_size(src_len);
|
||||
Hash_adp img_hash = Hash_adp_bry.cs();
|
||||
while (true) {
|
||||
int url_pos = Bry_find_.Find_fwd(src, Bry_url, prv_pos);
|
||||
if (url_pos == Bry_find_.Not_found) {bfr.Add_mid(src, prv_pos, src_len); break;} // no more "url("; exit;
|
||||
int bgn_pos = url_pos + Bry_url_len; // set bgn_pos after "url("
|
||||
byte bgn_byte = src[bgn_pos];
|
||||
byte end_byte = Byte_ascii.Null;
|
||||
boolean quoted = true;
|
||||
switch (bgn_byte) { // find end_byte
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; end_byte is ' or "
|
||||
end_byte = bgn_byte;
|
||||
++bgn_pos;
|
||||
break;
|
||||
default: // not quoted; end byte is ")"
|
||||
end_byte = Byte_ascii.Paren_end;
|
||||
quoted = false;
|
||||
break;
|
||||
}
|
||||
int end_pos = Bry_find_.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
if (end_pos == Bry_find_.Not_found) { // unclosed "url("; exit since nothing else will be found
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.end_missing", "could not find end_sequence for 'url(': bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25));
|
||||
bfr.Add_mid(src, prv_pos, src_len);
|
||||
break;
|
||||
}
|
||||
if (end_pos - bgn_pos == 0) { // empty; "url()"; ignore
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.empty", "'url(' is empty: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8__by_len(src, prv_pos, prv_pos + 25));
|
||||
bfr.Add_mid(src, prv_pos, bgn_pos);
|
||||
prv_pos = bgn_pos;
|
||||
continue;
|
||||
}
|
||||
byte[] img_raw = Bry_.Mid(src, bgn_pos, end_pos); int img_raw_len = img_raw.length;
|
||||
if (Bry_.Has_at_bgn(img_raw, Bry_data_image, 0, img_raw_len)) { // base64
|
||||
bfr.Add_mid(src, prv_pos, end_pos); // nothing to download; just add entire String
|
||||
prv_pos = end_pos;
|
||||
continue;
|
||||
}
|
||||
int import_url_end = Import_url_chk(rel_url_prefix, src, src_len, prv_pos, url_pos, img_raw, bfr); // check for embedded stylesheets via @import tag
|
||||
if (import_url_end != Bry_find_.Not_found) {
|
||||
prv_pos = import_url_end;
|
||||
continue;
|
||||
}
|
||||
byte[] img_cleaned = Xob_url_fixer.Fix(wiki_domain, img_raw, img_raw_len);
|
||||
if (img_cleaned == null) { // could not clean img
|
||||
usr_dlg.Warn_many(GRP_KEY, "parse.invalid_url.clean_failed", "could not extract valid http src: bgn='~{0}' end='~{1}'", prv_pos, String_.new_u8(img_raw));
|
||||
bfr.Add_mid(src, prv_pos, bgn_pos); prv_pos = bgn_pos; continue;
|
||||
}
|
||||
if (!img_hash.Has(img_cleaned)) {// only add unique items for download;
|
||||
img_hash.Add_as_key_and_val(img_cleaned);
|
||||
list.Add(String_.new_u8(img_cleaned));
|
||||
}
|
||||
img_cleaned = Replace_invalid_chars(Bry_.Copy(img_cleaned)); // NOTE: must call ByteAry.Copy else img_cleaned will change *inside* hash
|
||||
bfr.Add_mid(src, prv_pos, bgn_pos);
|
||||
if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
|
||||
bfr.Add(img_cleaned);
|
||||
if (!quoted) bfr.Add_byte(Byte_ascii.Quote);
|
||||
prv_pos = end_pos;
|
||||
}
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
catch (Exception e) {
|
||||
usr_dlg.Warn_many("", "", "failed to convert local_urls: ~{0} ~{1}", String_.new_u8(rel_url_prefix), Err_.Message_gplx_full(e));
|
||||
return src;
|
||||
}
|
||||
}
|
||||
public static byte[] Import_url_build(byte[] stylesheet_prefix, byte[] rel_url_prefix, byte[] css_url) {
|
||||
return Bry_.Has_at_bgn(css_url, Bry_http_protocol) // css_url already starts with "http"; return self; PAGE:tr.n:Main_Page; DATE:2014-06-04
|
||||
? css_url
|
||||
: Bry_.Add(stylesheet_prefix, css_url)
|
||||
;
|
||||
}
|
||||
private int Import_url_chk(byte[] rel_url_prefix, byte[] src, int src_len, int old_pos, int find_bgn, byte[] url_raw, Bry_bfr bfr) {
|
||||
if (find_bgn < Bry_import_len) return Bry_find_.Not_found;
|
||||
if (!Bry_.Match(src, find_bgn - Bry_import_len, find_bgn, Bry_import)) return Bry_find_.Not_found;
|
||||
byte[] css_url = url_raw; int css_url_len = css_url.length;
|
||||
if (css_url_len > 0 && css_url[0] == Byte_ascii.Slash) { // css_url starts with "/"; EX: "/page" or "//site/page" DATE:2014-02-03
|
||||
if (css_url_len > 1 && css_url[1] != Byte_ascii.Slash) // skip if css_url starts with "//"; EX: "//site/page"
|
||||
css_url = Bry_.Add(rel_url_prefix, css_url); // "/w/a.css" -> "//en.wikipedia.org/w/a.css"
|
||||
}
|
||||
css_url = Bry_.Replace(css_url, Byte_ascii.Space, Byte_ascii.Underline); // NOTE: must replace spaces with underlines else download will fail; EX:https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
|
||||
byte[] css_src_bry = Import_url_build(stylesheet_prefix, rel_url_prefix, css_url);
|
||||
String css_src_str = String_.new_u8(css_src_bry);
|
||||
download_wkr.Download_xrg().Prog_fmt_hdr_(usr_dlg.Log_many(GRP_KEY, "logo.download", "downloading import for '~{0}'", css_src_str));
|
||||
byte[] css_trg_bry = download_wkr.Download_xrg().Exec_as_bry(css_src_str);
|
||||
if (css_trg_bry == null) {
|
||||
usr_dlg.Warn_many("", "", "could not import css: url=~{0}", css_src_str);
|
||||
return Bry_find_.Not_found; // css not found
|
||||
}
|
||||
bfr.Add_mid(src, old_pos, find_bgn - Bry_import_len).Add_byte_nl();
|
||||
bfr.Add(Bry_comment_bgn).Add(css_url).Add(Bry_comment_end).Add_byte_nl();
|
||||
if (Bry_find_.Find_fwd(css_url, Wikisource_dynimg_ttl) != -1) css_trg_bry = Bry_.Replace(css_trg_bry, Wikisource_dynimg_find, Wikisource_dynimg_repl); // FreedImg hack; PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06
|
||||
bfr.Add(css_trg_bry).Add_byte_nl();
|
||||
bfr.Add_byte_nl();
|
||||
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, find_bgn + url_raw.length, src_len);
|
||||
return semic_pos + Byte_ascii.Len_1;
|
||||
}
|
||||
private static final byte[]
|
||||
Wikisource_dynimg_ttl = Bry_.new_a7("en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css")
|
||||
, Wikisource_dynimg_find = Bry_.new_a7(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
|
||||
, Wikisource_dynimg_repl = Bry_.new_a7(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], /*XOWA:handle file:// paths which will have /commons.wikimedia.org/ but not /wikipedia/ */ .freedImg img[src*=\"wikimedia\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
|
||||
;
|
||||
public byte[] Clean_img_url(byte[] raw, int raw_len) {
|
||||
int pos_bgn = 0;
|
||||
if (Bry_.Has_at_bgn(raw, Bry_fwd_slashes, 0, raw_len)) pos_bgn = Bry_fwd_slashes.length;
|
||||
if (Bry_.Has_at_bgn(raw, Bry_http, 0, raw_len)) pos_bgn = Bry_http.length;
|
||||
int pos_slash = Bry_find_.Find_fwd(raw, Byte_ascii.Slash, pos_bgn, raw_len);
|
||||
if (pos_slash == Bry_find_.Not_found) return null; // first segment is site_name; at least one slash must be present for image name; EX: site.org/img_name.jpg
|
||||
if (pos_slash == raw_len - 1) return null; // "site.org/" is invalid
|
||||
int pos_end = raw_len;
|
||||
int pos_question = Bry_find_.Find_bwd(raw, Byte_ascii.Question);
|
||||
if (pos_question != Bry_find_.Not_found)
|
||||
pos_end = pos_question; // remove query params; EX: img_name?key=val
|
||||
return Bry_.Mid(raw, pos_bgn, pos_end);
|
||||
}
|
||||
private void Download_fils(Io_url css_dir, String[] ary) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
String src = ary[i];
|
||||
Io_url trg = css_dir.GenSubFil_nest(Op_sys.Cur().Fsys_http_frag_to_url_str(Replace_invalid_chars_str(src)));
|
||||
if (Io_mgr.Instance.ExistsFil(trg)) continue;
|
||||
download_wkr.Download(true, "https://" + src, trg, "download: " + src); // ILN
|
||||
if (Io_mgr.Instance.QueryFil(trg).Size() == 0) { // warn if 0 byte files downloaded; DATE:2015-07-06
|
||||
Xoa_app_.Usr_dlg().Warn_many("", "", "css.download; 0 byte file downloaded; file=~{0}", trg.Raw());
|
||||
}
|
||||
}
|
||||
}
|
||||
String Replace_invalid_chars_str(String raw_str) {return String_.new_u8(Replace_invalid_chars(Bry_.new_u8(raw_str)));}
|
||||
byte[] Replace_invalid_chars(byte[] raw_bry) {
|
||||
int raw_len = raw_bry.length;
|
||||
for (int i = 0; i < raw_len; i++) { // convert invalid wnt chars to underscores
|
||||
byte b = raw_bry[i];
|
||||
switch (b) {
|
||||
//case Byte_ascii.Slash:
|
||||
case Byte_ascii.Backslash: case Byte_ascii.Colon: case Byte_ascii.Star: case Byte_ascii.Question:
|
||||
case Byte_ascii.Quote: case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Pipe:
|
||||
raw_bry[i] = Byte_ascii.Underline;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return raw_bry;
|
||||
}
|
||||
private static final byte[]
|
||||
Bry_url = Bry_.new_a7("url("), Bry_data_image = Bry_.new_a7("data:image/")
|
||||
, Bry_http = Bry_.new_a7("http://"), Bry_fwd_slashes = Bry_.new_a7("//"), Bry_import = Bry_.new_a7("@import ")
|
||||
, Bry_http_protocol = Bry_.new_a7("http")
|
||||
;
|
||||
public static final byte[]
|
||||
Bry_comment_bgn = Bry_.new_a7("/*XOWA:"), Bry_comment_end = Bry_.new_a7("*/");
|
||||
private static final int Bry_url_len = Bry_url.length, Bry_import_len = Bry_import.length;
|
||||
static final String GRP_KEY = "xowa.wikis.init.css";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,169 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*; import gplx.xowa.files.downloads.*;
|
||||
public class Xoa_css_img_downloader_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoa_css_img_downloader_fxt fxt = new Xoa_css_img_downloader_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"//site/b.jpg\")}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"site/b.jpg\")}"
|
||||
, "site/a.jpg"
|
||||
, "site/b.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Unquoted() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(//site/a.jpg)}"
|
||||
, "x {url(\"site/a.jpg\")}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Http() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(http://site/a.jpg)}"
|
||||
, "x {url(\"site/a.jpg\")}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Base64() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"data:image/png;base64,BASE64DATA;ABC=\")} z {}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"data:image/png;base64,BASE64DATA;ABC=\")} z {}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Exc_missing_quote() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"//site/b.jpg} z {}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"//site/b.jpg} z {}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Exc_empty() {
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg\")} y {url(\"\"} z {}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"\"} z {}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
// @Test public void Exc_name_only() { // COMMENTED: not sure how to handle "b.jpg" (automatically add "current" path?); RESTORE: when example found
|
||||
// fxt.Test_css_convert
|
||||
// ( "x {url(\"//site/a.jpg\")} y {url(\"b.jpg\"} z {}"
|
||||
// , "x {url(\"site/a.jpg\")} y {url(\"b.jpg\"} z {}"
|
||||
// , "site/a.jpg"
|
||||
// );
|
||||
// }
|
||||
@Test public void Repeat() {// PURPOSE.fix: exact same item was being added literally
|
||||
fxt.Test_css_convert
|
||||
( "x {url(\"//site/a.jpg?a=b\")} y {url(\"//site/a.jpg?a=b\"}"
|
||||
, "x {url(\"site/a.jpg\")} y {url(\"site/a.jpg\"}"
|
||||
, "site/a.jpg"
|
||||
);
|
||||
}
|
||||
@Test public void Clean_basic() {fxt.Test_clean_img_url("//site/a.jpg" , "site/a.jpg");}
|
||||
@Test public void Clean_query() {fxt.Test_clean_img_url("//site/a.jpg?key=val" , "site/a.jpg");}
|
||||
@Test public void Clean_dir() {fxt.Test_clean_img_url("//site/a/b/c.jpg?key=val" , "site/a/b/c.jpg");}
|
||||
@Test public void Clean_exc_site_only() {fxt.Test_clean_img_url("//site" , null);}
|
||||
@Test public void Clean_exc_site_only_2() {fxt.Test_clean_img_url("//site/" , null);}
|
||||
@Test public void Import_url() {
|
||||
Io_mgr.Instance.InitEngine_mem();
|
||||
Io_mgr.Instance.SaveFilStr("mem/www/b.css", "imported_css");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"mem/www/b.css\") screen; z"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA:mem/www/b.css*/"
|
||||
, "imported_css"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Import_url_make() {
|
||||
fxt.Test_import_url("a.org/b" , "http:a.org/b"); // add "stylesheet_prefix"
|
||||
fxt.Test_import_url("http://a.org" , "http://a.org"); // unless it starts with http
|
||||
fxt.Test_import_url("https://a.org" , "https://a.org"); // unless starts with https EX:: handle @import(https://...); PAGE:tr.n:Main_Page; DATE:2014-06-04
|
||||
}
|
||||
@Test public void Import_url_relative() { // PURPOSE: if directory, add domain; "/a/b.css" -> "//domain/a/b.css"; DATE:2014-02-03
|
||||
Io_mgr.Instance.InitEngine_mem();
|
||||
Io_mgr.Instance.SaveFilStr("mem/en.wikipedia.org/www/b.css", "imported_css");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"/www/b.css\") screen; z" // starts with "/"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA:mem/en.wikipedia.org/www/b.css*/"
|
||||
, "imported_css"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Import_url_relative_skip() { // PURPOSE: if rel path, skip; "//site/a/b.css"; DATE:2014-02-03
|
||||
fxt.Downloader().Stylesheet_prefix_(Bry_.new_a7("mem")); // stylesheet prefix prefix defaults to ""; set to "mem", else test will try to retrieve "//url" which will fail
|
||||
Io_mgr.Instance.InitEngine_mem();
|
||||
Io_mgr.Instance.SaveFilStr("mem//en.wikipedia.org/a/b.css", "imported_css");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"//en.wikipedia.org/a/b.css\") screen; z" // starts with "//"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA://en.wikipedia.org/a/b.css*/"
|
||||
, "imported_css"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Import_url_space() { // PURPOSE: some css has spaces; replace with underlines else fails when downloaded; EX: https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
|
||||
Io_mgr.Instance.InitEngine_mem();
|
||||
Io_mgr.Instance.SaveFilStr("mem/www/b_c.css", "imported_css");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"mem/www/b c.css\") screen; z"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA:mem/www/b_c.css*/"
|
||||
, "imported_css"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Wikisource_freedimg() { // PURPOSE: check that "wikimedia" is replaced for FreedImg hack; PAGE:en.s:Page:Notes_on_Osteology_of_Baptanodon._With_a_Description_of_a_New_Species.pdf/3 DATE:2014-09-06
|
||||
fxt.Downloader().Stylesheet_prefix_(Bry_.new_a7("mem")); // stylesheet prefix prefix defaults to ""; set to "mem", else test will try to retrieve "//url" which will fail
|
||||
Io_mgr.Instance.InitEngine_mem();
|
||||
Io_mgr.Instance.SaveFilStr("mem//en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css", ".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {");
|
||||
fxt.Test_css_convert
|
||||
( "x @import url(\"//en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css\") screen; z" // starts with "//"
|
||||
, String_.Concat_lines_nl
|
||||
( "x "
|
||||
, "/*XOWA://en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css*/"
|
||||
, ".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], /*XOWA:handle file:// paths which will have /commons.wikimedia.org/ but not /wikipedia/ */ .freedImg img[src*=\"wikimedia\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {"
|
||||
, ""
|
||||
, " z"
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
class Xoa_css_img_downloader_fxt {
|
||||
public Xoa_css_img_downloader Downloader() {return downloader;} private Xoa_css_img_downloader downloader;
|
||||
public void Clear() {
|
||||
downloader = new Xoa_css_img_downloader();
|
||||
downloader.Ctor(Gfo_usr_dlg_.Test(), new Xof_download_wkr_test(), Bry_.Empty);
|
||||
}
|
||||
public void Test_css_convert(String raw, String expd, String... expd_img_ary) {
|
||||
List_adp actl_img_list = List_adp_.New();
|
||||
byte[] actl_bry = downloader.Convert_to_local_urls(Bry_.new_a7("mem/en.wikipedia.org"), Bry_.new_u8(raw), actl_img_list);
|
||||
Tfds.Eq_str_lines(expd, String_.new_u8(actl_bry));
|
||||
Tfds.Eq_ary_str(expd_img_ary, actl_img_list.To_str_ary());
|
||||
}
|
||||
public void Test_clean_img_url(String raw_str, String expd) {
|
||||
byte[] raw = Bry_.new_a7(raw_str);
|
||||
byte[] actl = downloader.Clean_img_url(raw, raw.length);
|
||||
Tfds.Eq(expd, actl == null ? null : String_.new_a7(actl));
|
||||
}
|
||||
public void Test_import_url(String raw, String expd) {
|
||||
byte[] actl = Xoa_css_img_downloader.Import_url_build(Bry_.new_a7("http:"), Bry_.new_a7("//en.wikipedia.org"), Bry_.new_u8(raw));
|
||||
Tfds.Eq(expd, String_.new_u8(actl));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,43 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
class Xob_css_parser {
|
||||
private final Bry_bfr bfr = Bry_bfr_.New_w_size(255);
|
||||
private final Xob_mirror_mgr mgr;
|
||||
private final Xob_css_parser__url url_parser; private final Xob_css_parser__import import_parser;
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
public Xob_css_parser(Xob_mirror_mgr mgr) {
|
||||
this.mgr = mgr;
|
||||
this.url_parser = new Xob_css_parser__url(mgr.Site_url());
|
||||
this.import_parser = new Xob_css_parser__import(url_parser);
|
||||
}
|
||||
public void Parse(byte[] src) {
|
||||
int src_len = src.length; int pos = 0;
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
Object o = tkns_trie.Match_at_w_b0(trv, b, src, pos, src_len);
|
||||
if (o == null) {
|
||||
bfr.Add_byte(b);
|
||||
++pos;
|
||||
}
|
||||
else {
|
||||
byte tkn_tid = ((Byte_obj_val)o).Val();
|
||||
int match_pos = trv.Pos();
|
||||
Xob_css_tkn__base tkn = null;
|
||||
switch (tkn_tid) {
|
||||
case Tkn_url: tkn = url_parser.Parse(src, src_len, pos, match_pos); break;
|
||||
case Tkn_import: tkn = import_parser.Parse(src, src_len, pos, match_pos); break;
|
||||
}
|
||||
tkn.Process(mgr);
|
||||
pos = tkn.Write(bfr, src);
|
||||
}
|
||||
}
|
||||
}
|
||||
private static final byte Tkn_import = 1, Tkn_url = 2;
|
||||
private static final Btrie_slim_mgr tkns_trie = Btrie_slim_mgr.ci_a7()
|
||||
.Add_str_byte("@import" , Tkn_import)
|
||||
.Add_str_byte(" url(" , Tkn_url)
|
||||
;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,29 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.xowa.files.downloads.*;
|
||||
class Xob_css_parser__import {
|
||||
// // "//id.wikibooks.org/w/index.php?title=MediaWiki:Common.css&oldid=43393&action=raw&ctype=text/css";
|
||||
private final Xob_css_parser__url url_parser;
|
||||
public Xob_css_parser__import(Xob_css_parser__url url_parser) {this.url_parser = url_parser;}
|
||||
public Xob_css_tkn__base Parse(byte[] src, int src_len, int tkn_bgn, int tkn_end) { // " @import"
|
||||
int bgn_pos = Bry_find_.Find_fwd_while_ws(src, tkn_end, src_len); // skip any ws after " @import"
|
||||
if (bgn_pos == src_len) return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.import:EOS after import; bgn=~{0}", tkn_bgn);
|
||||
if (!Bry_.Has_at_bgn(src, Tkn_url_bry, bgn_pos, src_len)) return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.import:url missing; bgn=~{0}", tkn_bgn);
|
||||
tkn_end = bgn_pos + Tkn_url_bry.length;
|
||||
Xob_css_tkn__base frag = url_parser.Parse(src, src_len, bgn_pos, tkn_end);
|
||||
if (frag.Tid() != Xob_css_tkn__url.Tid_url) return Xob_css_tkn__warn.new_(tkn_bgn, frag.Pos_end(), "mirror.parser.import:url invalid; bgn=~{0}", tkn_bgn);
|
||||
Xob_css_tkn__url url_frag = (Xob_css_tkn__url)frag;
|
||||
byte[] src_url = url_frag.Src_url();
|
||||
src_url = Bry_.Replace(src_url, Byte_ascii.Space, Byte_ascii.Underline); // NOTE: must replace spaces with underlines else download will fail; EX:https://it.wikivoyage.org/w/index.php?title=MediaWiki:Container e Infobox.css&action=raw&ctype=text/css; DATE:2015-03-08
|
||||
int semic_pos = Bry_find_.Find_fwd(src, Byte_ascii.Semic, frag.Pos_end(), src_len);
|
||||
return Xob_css_tkn__import.new_(tkn_bgn, semic_pos + 1, src_url, url_frag.Trg_url(), url_frag.Quote_byte());
|
||||
}
|
||||
private static final byte[] Tkn_url_bry = Bry_.new_a7("url(");
|
||||
public static final byte[]
|
||||
Wikisource_dynimg_ttl = Bry_.new_a7("en.wikisource.org/w/index.php?title=MediaWiki:Dynimg.css")
|
||||
, Wikisource_dynimg_find = Bry_.new_a7(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
|
||||
, Wikisource_dynimg_repl = Bry_.new_a7(".freedImg img[src*=\"wikipedia\"], .freedImg img[src*=\"wikisource\"], /*XOWA:handle file:// paths which will have /commons.wikimedia.org/ but not /wikipedia/ */ .freedImg img[src*=\"wikimedia\"], .freedImg img[src*=\"score\"], .freedImg img[src*=\"math\"] {")
|
||||
;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,24 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
public class Xob_css_parser__import_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xob_css_parser__import_fxt fxt = new Xob_css_parser__import_fxt();
|
||||
@Test public void Basic() {fxt.Test_parse_import (" @import url(//site/a.png)" , " @import url('site/a.png')");}
|
||||
@Test public void Warn_eos() {fxt.Test_parse_warn (" @import" , " @import" , "EOS");}
|
||||
@Test public void Warn_missing() {fxt.Test_parse_warn (" @import ('//site/a.png')" , " @import" , "missing");} // no "url("
|
||||
@Test public void Warn_invalid() {fxt.Test_parse_warn (" @import url('//site')" , " @import url('//site')" , "invalid");} // invalid
|
||||
}
|
||||
class Xob_css_parser__import_fxt extends Xob_css_parser__url_fxt { private Xob_css_parser__import import_parser;
|
||||
@Override public void Clear() {
|
||||
super.Clear();
|
||||
this.import_parser = new Xob_css_parser__import(url_parser);
|
||||
}
|
||||
@Override protected void Exec_parse_hook() {
|
||||
this.cur_frag = import_parser.Parse(src_bry, src_bry.length, 0, 8); // 8=" @import".length
|
||||
}
|
||||
public void Test_parse_import(String src_str, String expd) {
|
||||
Exec_parse(src_str, Xob_css_tkn__base.Tid_import, expd);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,44 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
class Xob_css_parser__url {
|
||||
private final byte[] site;
|
||||
public Xob_css_parser__url(byte[] site) {this.site = site;}
|
||||
public Xob_css_tkn__base Parse(byte[] src, int src_len, int tkn_bgn, int tkn_end) { // " url"
|
||||
int bgn_pos = Bry_find_.Find_fwd_while_ws(src, tkn_end, src_len); // skip any ws after " url("
|
||||
if (bgn_pos == src_len) return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:EOS; bgn=~{0}", tkn_bgn);
|
||||
byte end_byte = src[bgn_pos]; // note that first non-ws byte should determine end_byte
|
||||
byte quote_byte = end_byte;
|
||||
switch (end_byte) {
|
||||
case Byte_ascii.Quote: case Byte_ascii.Apos: // quoted; increment position; EX: ' url("a.png")'
|
||||
++bgn_pos;
|
||||
break;
|
||||
default: // not quoted; end byte is ")"; EX: ' url(a.png)'
|
||||
end_byte = Byte_ascii.Paren_end;
|
||||
quote_byte = Byte_ascii.Null;
|
||||
break;
|
||||
}
|
||||
int end_pos = Bry_find_.Find_fwd(src, end_byte, bgn_pos, src_len);
|
||||
if (end_pos == Bry_find_.Not_found) // unclosed "url("; exit since nothing else will be found
|
||||
return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:dangling; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8__by_len(src, tkn_bgn, tkn_bgn + 128));
|
||||
if (end_pos - bgn_pos == 0) // empty; "url()"; ignore
|
||||
return Xob_css_tkn__warn.new_(tkn_bgn, tkn_end, "mirror.parser.url:empty; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8__by_len(src, tkn_bgn, tkn_bgn + 128));
|
||||
byte[] url_orig = Bry_.Mid(src, bgn_pos, end_pos); int url_orig_len = url_orig.length;
|
||||
++end_pos; // increment end_pos so rv will be after it;
|
||||
if ( end_byte != Byte_ascii.Paren_end) { // end_byte is apos / quote
|
||||
if ( end_pos < src_len
|
||||
&& src[end_pos] == Byte_ascii.Paren_end)
|
||||
++end_pos;
|
||||
else
|
||||
return Xob_css_tkn__warn.new_(tkn_bgn, end_pos, "mirror.parser.url:base64 dangling; bgn=~{0} excerpt=~{1}", bgn_pos, String_.new_u8(url_orig));
|
||||
}
|
||||
if (Bry_.Has_at_bgn(url_orig, Bry_data_image)) // base64
|
||||
return Xob_css_tkn__base64.new_(tkn_bgn, end_pos);
|
||||
byte[] src_url = Xob_url_fixer.Fix(site, url_orig, url_orig_len);
|
||||
if (src_url == null) // could not convert
|
||||
return Xob_css_tkn__warn.new_(tkn_bgn, end_pos, "mirror.parser.url:invalid url; bgn=~{0} excerpt=~{1}", tkn_bgn, String_.new_u8(url_orig));
|
||||
return Xob_css_tkn__url.new_(tkn_bgn, end_pos, src_url, quote_byte);
|
||||
}
|
||||
private static final byte[] Bry_data_image = Bry_.new_a7("data:image/");
|
||||
}
|
||||
|
||||
@@ -13,3 +13,46 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
public class Xob_css_parser__url_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xob_css_parser__url_fxt fxt = new Xob_css_parser__url_fxt();
|
||||
@Test public void Quote_none() {fxt.Test_parse_url(" url(//site/A.png) b" , " url('site/A.png')");}
|
||||
@Test public void Quote_apos() {fxt.Test_parse_url(" url('//site/A.png') b" , " url('site/A.png')");}
|
||||
@Test public void Quote_quote() {fxt.Test_parse_url(" url(\"//site/A.png\") b" , " url(\"site/A.png\")");}
|
||||
@Test public void Base64() {fxt.Test_parse_base64(" url('data:image/png;base64,BASE64DATA;ABC=') b", " url('data:image/png;base64,BASE64DATA;ABC=')");}
|
||||
@Test public void Base64_dangling() {fxt.Test_parse_warn(" url('data:image/png;base64,BASE64DATA;ABC=' ", " url('data:image/png;base64,BASE64DATA;ABC='", "base64 dangling");}
|
||||
@Test public void Warn_eos() {fxt.Test_parse_warn(" url(" , " url(" , "EOS");}
|
||||
@Test public void Warn_dangling() {fxt.Test_parse_warn(" url(a" , " url(" , "dangling");}
|
||||
@Test public void Warn_empty() {fxt.Test_parse_warn(" url()" , " url(" , "empty");}
|
||||
@Test public void Warn_site() {fxt.Test_parse_warn(" url('//site')" , " url('//site')" , "invalid");}
|
||||
}
|
||||
class Xob_css_parser__url_fxt {
|
||||
protected Xob_css_parser__url url_parser; private final Bry_bfr bfr = Bry_bfr_.New_w_size(32);
|
||||
protected Xob_css_tkn__base cur_frag; protected byte[] src_bry;
|
||||
@gplx.Virtual public void Clear() {
|
||||
url_parser = new Xob_css_parser__url(Bry_.new_a7("site"));
|
||||
}
|
||||
protected void Exec_parse(String src_str, int expd_tid, String expd_str) {
|
||||
this.src_bry = Bry_.new_u8(src_str);
|
||||
this.Exec_parse_hook();
|
||||
cur_frag.Write(bfr, src_bry);
|
||||
String actl_str = bfr.To_str_and_clear();
|
||||
Tfds.Eq(expd_tid, cur_frag.Tid(), "wrong tid; expd={0}, actl={1}", expd_tid, cur_frag.Tid());
|
||||
Tfds.Eq(expd_str, actl_str);
|
||||
}
|
||||
@gplx.Virtual protected void Exec_parse_hook() {
|
||||
this.cur_frag = url_parser.Parse(src_bry, src_bry.length, 0, 5); // 5=" url(".length
|
||||
}
|
||||
public void Test_parse_url(String src_str, String expd) {
|
||||
Exec_parse(src_str, Xob_css_tkn__base.Tid_url, expd);
|
||||
}
|
||||
public void Test_parse_base64(String src_str, String expd) {
|
||||
Exec_parse(src_str, Xob_css_tkn__base.Tid_base64, expd);
|
||||
}
|
||||
public void Test_parse_warn(String src_str, String expd, String warn) {
|
||||
Exec_parse(src_str, Xob_css_tkn__base.Tid_warn, expd);
|
||||
Xob_css_tkn__warn sub_frag = (Xob_css_tkn__warn)cur_frag;
|
||||
Tfds.Eq(true, String_.Has(sub_frag.Fail_msg(), warn));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,104 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.envs.*;
|
||||
abstract class Xob_css_tkn__base {
|
||||
public void Init(int tid, int pos_bgn, int pos_end) {
|
||||
this.tid = tid; this.pos_bgn = pos_bgn; this.pos_end = pos_end;
|
||||
}
|
||||
public int Tid() {return tid;} protected int tid;
|
||||
public int Pos_bgn() {return pos_bgn;} protected int pos_bgn;
|
||||
public int Pos_end() {return pos_end;} protected int pos_end;
|
||||
@gplx.Virtual public void Process(Xob_mirror_mgr mgr) {}
|
||||
public abstract int Write(Bry_bfr bfr, byte[] src);
|
||||
public static final int Tid_warn = 1, Tid_base64 = 2, Tid_url = 3, Tid_import = 4;
|
||||
}
|
||||
class Xob_css_tkn__warn extends Xob_css_tkn__base {
|
||||
public String Fail_msg() {return fail_msg;} private String fail_msg;
|
||||
@Override public void Process(Xob_mirror_mgr mgr) {
|
||||
mgr.Usr_dlg().Warn_many("", "", fail_msg);
|
||||
}
|
||||
@Override public int Write(Bry_bfr bfr, byte[] src) {
|
||||
bfr.Add_mid(src, pos_bgn, pos_end);
|
||||
return pos_end;
|
||||
}
|
||||
public static Xob_css_tkn__warn new_(int pos_bgn, int pos_end, String fmt, Object... fmt_args) {
|
||||
Xob_css_tkn__warn rv = new Xob_css_tkn__warn();
|
||||
rv.Init(Tid_warn, pos_bgn, pos_end);
|
||||
rv.fail_msg = String_.Format(fmt, fmt_args);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
class Xob_css_tkn__base64 extends Xob_css_tkn__base {
|
||||
@Override public int Write(Bry_bfr bfr, byte[] src) {
|
||||
bfr.Add_mid(src, pos_bgn, pos_end);
|
||||
return pos_end;
|
||||
}
|
||||
public static Xob_css_tkn__base64 new_(int pos_bgn, int pos_end) {
|
||||
Xob_css_tkn__base64 rv = new Xob_css_tkn__base64();
|
||||
rv.Init(Tid_base64, pos_bgn, pos_end);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
class Xob_css_tkn__url extends Xob_css_tkn__base {
|
||||
public byte Quote_byte() {return quote_byte;} private byte quote_byte;
|
||||
public byte[] Src_url() {return src_url;} private byte[] src_url;
|
||||
public byte[] Trg_url() {return trg_url;} private byte[] trg_url;
|
||||
@Override public void Process(Xob_mirror_mgr mgr) {
|
||||
mgr.File_hash().Add_if_dupe_use_1st(src_url, new Xobc_download_itm(Xobc_download_itm.Tid_file, String_.new_u8(src_url), trg_url));
|
||||
}
|
||||
@Override public int Write(Bry_bfr bfr, byte[] src) {
|
||||
byte quote = quote_byte; if (quote == Byte_ascii.Null) quote = Byte_ascii.Apos;
|
||||
bfr.Add_str_a7(" url("); // EX: ' url('
|
||||
bfr.Add_byte(quote).Add(trg_url).Add_byte(quote); // EX: '"a.png"'
|
||||
bfr.Add_byte(Byte_ascii.Paren_end); // EX: ')'
|
||||
return pos_end;
|
||||
}
|
||||
public static Xob_css_tkn__url new_(int pos_bgn, int pos_end, byte[] src_url, byte quote_byte) {
|
||||
Xob_css_tkn__url rv = new Xob_css_tkn__url();
|
||||
rv.Init(Tid_url, pos_bgn, pos_end);
|
||||
rv.src_url = src_url; rv.trg_url = To_fsys(src_url); rv.quote_byte = quote_byte;
|
||||
return rv;
|
||||
}
|
||||
public static byte[] To_fsys(byte[] src) {
|
||||
if (!Op_sys.Cur().Tid_is_wnt()) return src;
|
||||
src = Bry_.Copy(src); // NOTE: must call ByteAry.Copy else url_actl will change *inside* bry
|
||||
int len = src.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Slash:
|
||||
case Byte_ascii.Backslash:
|
||||
break;
|
||||
case Byte_ascii.Lt: case Byte_ascii.Gt: case Byte_ascii.Colon: case Byte_ascii.Pipe: case Byte_ascii.Question: case Byte_ascii.Star: case Byte_ascii.Quote:
|
||||
src[i] = Byte_ascii.Underline;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return src;
|
||||
}
|
||||
}
|
||||
class Xob_css_tkn__import extends Xob_css_tkn__base {
|
||||
public byte Quote_byte() {return quote_byte;} private byte quote_byte;
|
||||
public byte[] Src_url() {return src_url;} private byte[] src_url;
|
||||
public byte[] Trg_url() {return trg_url;} private byte[] trg_url;
|
||||
@Override public void Process(Xob_mirror_mgr mgr) {
|
||||
mgr.Code_add(src_url);
|
||||
}
|
||||
@Override public int Write(Bry_bfr bfr, byte[] src) {
|
||||
byte quote = quote_byte; if (quote == Byte_ascii.Null) quote = Byte_ascii.Apos;
|
||||
bfr.Add_str_a7(" @import url("); // EX: ' @import url('
|
||||
bfr.Add_byte(quote).Add(trg_url).Add_byte(quote); // EX: '"a.png"'
|
||||
bfr.Add_byte(Byte_ascii.Paren_end); // EX: ')'
|
||||
return pos_end;
|
||||
}
|
||||
public static Xob_css_tkn__import new_(int pos_bgn, int pos_end, byte[] src_url, byte[] trg_url, byte quote_byte) {
|
||||
Xob_css_tkn__import rv = new Xob_css_tkn__import();
|
||||
rv.Init(Tid_import, pos_bgn, pos_end);
|
||||
rv.src_url = src_url; rv.trg_url = trg_url; rv.quote_byte = quote_byte;
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,45 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.ios.*; import gplx.xowa.files.downloads.*;
|
||||
public class Xob_mirror_mgr {
|
||||
private final Xof_download_wkr download_wkr; private final Xob_css_parser css_parser;
|
||||
private final byte[] page_url; private final Io_url fsys_root;
|
||||
public Xob_mirror_mgr(Gfo_usr_dlg usr_dlg, Xof_download_wkr download_wkr, byte[] site_url, byte[] page_url, Io_url fsys_root) {
|
||||
this.usr_dlg = usr_dlg; this.download_wkr = download_wkr;
|
||||
this.site_url = site_url; this.page_url = page_url; this.fsys_root = fsys_root;
|
||||
this.css_parser = new Xob_css_parser(this);
|
||||
}
|
||||
public Gfo_usr_dlg Usr_dlg() {return usr_dlg;} private final Gfo_usr_dlg usr_dlg;
|
||||
public byte[] Site_url() {return site_url;} private final byte[] site_url;
|
||||
public void Code_add(byte[] src_url) {
|
||||
byte[] trg_url = Xob_css_tkn__url.To_fsys(src_url);
|
||||
code_hash.Add_if_dupe_use_1st(src_url, new Xobc_download_itm(Xobc_download_itm.Tid_css, String_.new_u8(src_url), trg_url));
|
||||
}
|
||||
public Ordered_hash Code_hash() {return code_hash;} private final Ordered_hash code_hash = Ordered_hash_.New();
|
||||
public Ordered_hash File_hash() {return file_hash;} private final Ordered_hash file_hash = Ordered_hash_.New();
|
||||
public void Exec() {
|
||||
usr_dlg.Plog_many("", "", "html_mirror:download.root_page; url=~{0}", page_url);
|
||||
IoEngine_xrg_downloadFil download_xrg = download_wkr.Download_xrg();
|
||||
css_parser.Parse(download_xrg.Exec_as_bry(String_.new_u8(page_url)));
|
||||
while (true) {
|
||||
Xobc_download_itm[] code_ary = (Xobc_download_itm[])code_hash.To_ary_and_clear(Xobc_download_itm.class);
|
||||
int code_ary_len = code_ary.length;
|
||||
if (code_ary_len == 0) break;
|
||||
for (int i = 0; i < code_ary_len; ++i) {
|
||||
Xobc_download_itm code = code_ary[i];
|
||||
byte[] code_src = download_xrg.Exec_as_bry(code.Http_str());
|
||||
Io_mgr.Instance.SaveFilBry(fsys_root.Gen_sub_path_for_os(String_.new_u8(code.Fsys_url())), code_src);
|
||||
css_parser.Parse(code_src);
|
||||
}
|
||||
}
|
||||
Xobc_download_itm[] file_ary = (Xobc_download_itm[])file_hash.To_ary_and_clear(Xobc_download_itm.class);
|
||||
int file_ary_len = file_ary.length;
|
||||
for (int i = 0; i < file_ary_len; ++i) {
|
||||
Xobc_download_itm file = file_ary[i];
|
||||
download_xrg.Init(file.Http_str(), Io_url_.new_fil_(fsys_root.Gen_sub_path_for_os(String_.new_u8(file.Fsys_url()))));
|
||||
download_xrg.Exec();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,49 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
import gplx.xowa.files.downloads.*;
|
||||
public class Xob_mirror_mgr_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xob_mirror_mgr_fxt fxt = new Xob_mirror_mgr_fxt();
|
||||
@Test public void Download_1() {
|
||||
fxt.Fsys().Init_fil("mem/http/enwiki/file/a.png");
|
||||
fxt.Fsys().Init_fil("mem/http/enwiki/wiki/Main_Page", "url('//enwiki/wiki/a.png')");
|
||||
// fxt.Test_css();
|
||||
// fxt.Fsys().Test_fil("url('//enwiki/wiki/a.png')", "url('enwiki/wiki/a.png')"); // remove "//"
|
||||
// fxt.Fsys().Test_fil("mem/fsys/enwiki/file/a.png");
|
||||
}
|
||||
}
|
||||
class Xob_mirror_mgr_fxt {
|
||||
// private Xob_mirror_mgr mirror_mgr;
|
||||
public Io_fsys_fxt Fsys() {return fsys;} private final Io_fsys_fxt fsys = new Io_fsys_fxt();
|
||||
public void Clear() {
|
||||
fsys.Clear();
|
||||
// mirror_mgr = new Xob_mirror_mgr(Gfo_usr_dlg_.Noop, new Xof_download_wkr_test(), Bry_.new_a7("mem/http/enwiki"), Bry_.new_a7("mem/http/enwiki/wiki/Main_Page"), Io_url_.new_dir_("mem/fsys"));
|
||||
}
|
||||
public void Test_css(String raw, String expd) {
|
||||
// byte[] raw_bry = Bry_.new_u8(raw);
|
||||
// mirror_mgr.Exec();
|
||||
}
|
||||
}
|
||||
class Io_fsys_fxt {
|
||||
public void Clear() {
|
||||
Io_mgr.Instance.InitEngine_mem();
|
||||
}
|
||||
public void Init_fil(String url_str) {
|
||||
Io_url url = Io_url_.new_fil_(url_str);
|
||||
Init_fil(url, url.NameAndExt());
|
||||
}
|
||||
public void Init_fil(String url_str, String text) {Init_fil(Io_url_.new_fil_(url_str), text);}
|
||||
public void Init_fil(Io_url url, String text) {
|
||||
Io_mgr.Instance.SaveFilStr(url, text);
|
||||
}
|
||||
public void Test_fil(String url_str) {
|
||||
Io_url url = Io_url_.new_fil_(url_str);
|
||||
Test_fil(url, url.NameAndExt());
|
||||
}
|
||||
public void Test_fil(String url, String expd) {Test_fil(Io_url_.new_fil_(url), expd);}
|
||||
public void Test_fil(Io_url url, String expd) {
|
||||
Tfds.Eq_str_lines(expd, Io_mgr.Instance.LoadFilStr(url));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,85 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
class Xob_url_fixer {
|
||||
public static byte[] Fix(byte[] site, byte[] src, int src_len) { // return "site/img.png" if "//site/img.png" or "http://site/img.png"; also, handle "img.png?key=val"
|
||||
int bgn = 0; int bgn_tkn_tid = 0;
|
||||
Object o = Xob_url_fixer_tkn.Bgn_trie().Match_bgn(src, bgn, src_len);
|
||||
if (o != null) {
|
||||
Xob_url_fixer_tkn tkn = (Xob_url_fixer_tkn)o;
|
||||
bgn_tkn_tid = tkn.Tid();
|
||||
switch (bgn_tkn_tid) {
|
||||
case Xob_url_fixer_tkn.Tid_bgn_slash_2:
|
||||
case Xob_url_fixer_tkn.Tid_bgn_http:
|
||||
case Xob_url_fixer_tkn.Tid_bgn_https:
|
||||
bgn = tkn.Raw_len(); // remove "//", "http://", "https://"
|
||||
break;
|
||||
case Xob_url_fixer_tkn.Tid_bgn_slash_1: // convert "/a" to "site/a"
|
||||
src = Bry_.Add(site, src);
|
||||
src_len = src.length;
|
||||
break;
|
||||
}
|
||||
}
|
||||
int pos = bgn, end = src_len; boolean no_slashes = true;
|
||||
Btrie_slim_mgr mid_trie = Xob_url_fixer_tkn.Mid_trie();
|
||||
int[] seg_ary = new int[gplx.xowa.xtns.pfuncs.ttls.Pfunc_rel2abs.Ttl_max];
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
o = mid_trie.Match_bgn_w_byte(b, src, pos, src_len);
|
||||
if (o != null) {
|
||||
Xob_url_fixer_tkn tkn = (Xob_url_fixer_tkn)o;
|
||||
switch (tkn.Tid()) {
|
||||
case Xob_url_fixer_tkn.Tid_mid_slash: if (no_slashes) no_slashes = false; break;
|
||||
case Xob_url_fixer_tkn.Tid_mid_question: end = pos; pos = src_len; break;
|
||||
case Xob_url_fixer_tkn.Tid_mid_rel_1:
|
||||
case Xob_url_fixer_tkn.Tid_mid_rel_2:
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.New_w_size(src_len);
|
||||
byte[] to_rel_root = Bry_.Mid(src, bgn, pos);
|
||||
byte[] to_rel_qry = Bry_.Mid(src, pos, src_len);
|
||||
src = gplx.xowa.xtns.pfuncs.ttls.Pfunc_rel2abs.Rel2abs(tmp_bfr, seg_ary, to_rel_qry, to_rel_root, Int_obj_ref.New_neg1());
|
||||
bgn = pos = 0;
|
||||
end = src_len = src.length;
|
||||
no_slashes = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
if (no_slashes) return null; // invalid; EX: "//site"
|
||||
return Bry_.Mid(src, bgn, end);
|
||||
}
|
||||
}
|
||||
class Xob_url_fixer_tkn {
|
||||
public Xob_url_fixer_tkn(int tid, byte[] raw) {this.tid = tid; this.raw = raw; this.raw_len = raw.length;}
|
||||
public int Tid() {return tid;} private int tid;
|
||||
public byte[] Raw() {return raw;} private byte[] raw;
|
||||
public int Raw_len() {return raw_len;} private int raw_len;
|
||||
public static Xob_url_fixer_tkn new_(int tid, String raw) {return new Xob_url_fixer_tkn(tid, Bry_.new_u8(raw));}
|
||||
private static void trie_add(Btrie_slim_mgr trie, int tid, String s) {trie.Add_obj(s, new_(tid, s));}
|
||||
public static final int Tid_bgn_slash_1 = 1, Tid_bgn_slash_2 = 2, Tid_bgn_http = 3, Tid_bgn_https = 4;
|
||||
private static Btrie_slim_mgr bgn_trie;
|
||||
public static Btrie_slim_mgr Bgn_trie() {
|
||||
if (bgn_trie == null) {
|
||||
bgn_trie = Btrie_slim_mgr.ci_a7();
|
||||
trie_add(bgn_trie, Tid_bgn_slash_1 , "/");
|
||||
trie_add(bgn_trie, Tid_bgn_slash_2 , "//");
|
||||
trie_add(bgn_trie, Tid_bgn_http , "http://");
|
||||
trie_add(bgn_trie, Tid_bgn_https , "https://");
|
||||
}
|
||||
return bgn_trie;
|
||||
}
|
||||
public static final int Tid_mid_rel_1 = 1, Tid_mid_rel_2 = 2, Tid_mid_slash = 3, Tid_mid_question = 4;
|
||||
private static Btrie_slim_mgr mid_trie;
|
||||
public static Btrie_slim_mgr Mid_trie() {
|
||||
if (mid_trie == null) {
|
||||
mid_trie = Btrie_slim_mgr.ci_a7();
|
||||
trie_add(mid_trie, Tid_mid_rel_1 , "/../");
|
||||
trie_add(mid_trie, Tid_mid_rel_2 , "/./");
|
||||
trie_add(mid_trie, Tid_mid_slash , "/");
|
||||
trie_add(mid_trie, Tid_mid_question , "?");
|
||||
}
|
||||
return mid_trie;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,28 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
public class Xob_url_fixer_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xob_url_fixer_fxt fxt = new Xob_url_fixer_fxt();
|
||||
@Test public void Slash2() {fxt.Test_fix("//site/a.png" , "site/a.png");}
|
||||
@Test public void Http() {fxt.Test_fix("http://site/a.png" , "site/a.png");}
|
||||
@Test public void Https() {fxt.Test_fix("https://site/a.png" , "site/a.png");}
|
||||
@Test public void Qarg() {fxt.Test_fix("//site/a.png?key=val" , "site/a.png");}
|
||||
@Test public void Qarg_dir() {fxt.Test_fix("//site/a/b/c.png?key=val" , "site/a/b/c.png");}
|
||||
@Test public void Root() {fxt.Test_fix("/a/b.png" , "site/a/b.png");} // EX:/static/images/project-logos/wikivoyage.png; DATE:2015-05-09
|
||||
@Test public void Rel_dot2() {fxt.Test_fix("//site/a/../b/c.png" , "site/b/c.png");} // DATE:2015-05-09
|
||||
@Test public void Rel_dot2_mult() {fxt.Test_fix("//site/a/../b/../c/d.png" , "site/c/d.png");} // DATE:2015-05-09
|
||||
@Test public void Rel_dot1() {fxt.Test_fix("//site/a/./b/c.png" , "site/a/b/c.png");} // DATE:2015-05-09
|
||||
@Test public void Site_only() {fxt.Test_fix("//site" , null);}
|
||||
}
|
||||
class Xob_url_fixer_fxt {
|
||||
public void Site_(String v) {site_bry = Bry_.new_u8(v);} private byte[] site_bry;
|
||||
public void Clear() {
|
||||
this.Site_("site");
|
||||
}
|
||||
public void Test_fix(String raw, String expd) {
|
||||
byte[] raw_bry = Bry_.new_u8(raw);
|
||||
Tfds.Eq(expd, String_.new_u8(Xob_url_fixer.Fix(site_bry, raw_bry, raw_bry.length)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,11 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.css; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
class Xobc_download_itm {
|
||||
public Xobc_download_itm(int tid, String http_str, byte[] fsys_url) {this.tid = tid; this.http_str = http_str; this.fsys_url = fsys_url;}
|
||||
public int Tid() {return tid;} private final int tid;
|
||||
public String Http_str() {return http_str;} private final String http_str;
|
||||
public byte[] Fsys_url() {return fsys_url;} private final byte[] fsys_url;
|
||||
public static final int Tid_file = 1, Tid_html = 2, Tid_css = 3;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,29 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.filters.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
|
||||
import gplx.xowa.wikis.ttls.*;
|
||||
public class Xob_ttl_filter_mgr {
|
||||
private boolean exclude_is_empty = true, include_is_empty = true;
|
||||
private final Xob_ttl_filter_mgr_srl srl = new Xob_ttl_filter_mgr_srl();
|
||||
private Hash_adp_bry exclude_hash = Hash_adp_bry.cs(), include_hash = Hash_adp_bry.cs();
|
||||
public void Clear() {
|
||||
exclude_hash.Clear();
|
||||
include_hash.Clear();
|
||||
exclude_is_empty = include_is_empty = true;
|
||||
}
|
||||
public boolean Match_include(byte[] src) {return include_is_empty ? false : include_hash.Has(src);}
|
||||
public boolean Match_exclude(byte[] src) {return exclude_is_empty ? false : exclude_hash.Has(src);}
|
||||
public void Load(boolean exclude, Io_url url) {
|
||||
byte[] src = Io_mgr.Instance.LoadFilBry_loose(url);
|
||||
if (Bry_.Len_gt_0(src)) Load(exclude, src);
|
||||
}
|
||||
public void Load(boolean exclude, byte[] src) {
|
||||
Hash_adp_bry hash = exclude ? exclude_hash : include_hash;
|
||||
srl.Init(hash).Load_by_bry(src);
|
||||
if (exclude)
|
||||
exclude_is_empty = exclude_hash.Count() == 0;
|
||||
else
|
||||
include_is_empty = include_hash.Count() == 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,25 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.filters.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
|
||||
import gplx.langs.dsvs.*;
|
||||
class Xob_ttl_filter_mgr_srl extends Dsv_wkr_base {
|
||||
private byte[] ttl; private Hash_adp_bry hash;
|
||||
public Xob_ttl_filter_mgr_srl Init(Hash_adp_bry hash) {this.hash = hash; return this;}
|
||||
@Override public Dsv_fld_parser[] Fld_parsers() {return new Dsv_fld_parser[] {Dsv_fld_parser_.Line_parser__comment_is_pipe};}
|
||||
@Override public boolean Write_bry(Dsv_tbl_parser parser, int fld_idx, byte[] src, int bgn, int end) {
|
||||
switch (fld_idx) {
|
||||
case 0:
|
||||
if (end - bgn == 0) return true; // ignore blank lines
|
||||
if (src[bgn] == Byte_ascii.Pipe) return true; // ignore lines starting with pipe; EX: "| some comment"
|
||||
ttl = Bry_.Mid(src, bgn, end);
|
||||
return true;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
@Override public void Commit_itm(Dsv_tbl_parser parser, int pos) {
|
||||
if (ttl == null) return;
|
||||
hash.Add(ttl, ttl);
|
||||
ttl = null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,40 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.filters.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
|
||||
import org.junit.*;
|
||||
public class Xob_ttl_filter_mgr_srl_tst {
|
||||
@Before public void init() {fxt.Clear();} private final Xob_ttl_filter_mgr_srl_fxt fxt = new Xob_ttl_filter_mgr_srl_fxt();
|
||||
@Test public void One() {fxt.Test_parse("a" , 1, "a");}
|
||||
@Test public void Two() {fxt.Test_parse("a\nb" , 2, "a", "b");}
|
||||
@Test public void Comment() {fxt.Test_parse("|x" , 0);}
|
||||
@Test public void Comment_many() {fxt.Test_parse("|x||" , 0);}
|
||||
@Test public void Blank() {fxt.Test_parse("\n" , 0);}
|
||||
@Test public void Mix() {
|
||||
fxt.Test_parse(String_.Concat_lines_nl_skip_last
|
||||
( "|comment 1"
|
||||
, "a"
|
||||
, ""
|
||||
, "|comment 2"
|
||||
, "b"
|
||||
)
|
||||
, 2, "a", "b")
|
||||
;}
|
||||
}
|
||||
class Xob_ttl_filter_mgr_srl_fxt {
|
||||
private final Xob_ttl_filter_mgr_srl mgr = new Xob_ttl_filter_mgr_srl();
|
||||
private final Hash_adp_bry hash = Hash_adp_bry.cs();
|
||||
public void Clear() {
|
||||
hash.Clear();
|
||||
}
|
||||
public void Test_parse(String src, int expd_count, String... expd_itms) {
|
||||
mgr.Init(hash);
|
||||
mgr.Load_by_bry(Bry_.new_u8(src));
|
||||
Tfds.Eq(expd_count, hash.Count());
|
||||
int expd_len = expd_itms.length;
|
||||
for (int i = 0; i < expd_len; ++i) {
|
||||
String expd_itm = expd_itms[i];
|
||||
Tfds.Eq_true(hash.Has(Bry_.new_u8(expd_itm)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,37 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.filters.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
|
||||
import org.junit.*;
|
||||
public class Xob_ttl_filter_mgr_tst {
|
||||
@Before public void init() {fxt.Clear();} private final Xob_ttl_filter_mgr_fxt fxt = new Xob_ttl_filter_mgr_fxt();
|
||||
@Test public void One() {
|
||||
fxt.Init_load_exclude("A");
|
||||
fxt.Init_load_include("B");
|
||||
fxt.Test_match_exclude_y("A");
|
||||
fxt.Test_match_exclude_n("B", "C");
|
||||
fxt.Test_match_include_y("B");
|
||||
fxt.Test_match_include_n("A", "C");
|
||||
}
|
||||
}
|
||||
class Xob_ttl_filter_mgr_fxt {
|
||||
private final Xob_ttl_filter_mgr mgr = new Xob_ttl_filter_mgr();
|
||||
public void Clear() {
|
||||
mgr.Clear();
|
||||
}
|
||||
public void Init_load_exclude(String itm) {mgr.Load(Bool_.Y, Bry_.new_u8(itm));}
|
||||
public void Init_load_include(String itm) {mgr.Load(Bool_.N, Bry_.new_u8(itm));}
|
||||
public void Test_match_exclude_y(String... itms) {Test_match(Bool_.Y, Bool_.Y, itms);}
|
||||
public void Test_match_exclude_n(String... itms) {Test_match(Bool_.Y, Bool_.N, itms);}
|
||||
public void Test_match_include_y(String... itms) {Test_match(Bool_.N, Bool_.Y, itms);}
|
||||
public void Test_match_include_n(String... itms) {Test_match(Bool_.N, Bool_.N, itms);}
|
||||
private void Test_match(boolean exclude, boolean expd, String... itms) {
|
||||
for (String itm : itms) {
|
||||
byte[] itm_bry = Bry_.new_u8(itm);
|
||||
if (exclude)
|
||||
Tfds.Eq(expd, mgr.Match_exclude(itm_bry), itm);
|
||||
else
|
||||
Tfds.Eq(expd, mgr.Match_include(itm_bry), itm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,33 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
|
||||
import gplx.core.criterias.*;
|
||||
class Crt__match_exact implements Criteria {
|
||||
public Crt__match_exact(boolean negated, byte[][] ary) {this.negated = negated; Val_as_bry_ary_(ary);}
|
||||
public byte Tid() {return Tid_match_exact;}
|
||||
public String To_str_name() {return "MATCH_EXACT";}
|
||||
public boolean Matches(Object comp_obj) {
|
||||
if (ary_len == 0) return false; // empty array never matches
|
||||
byte[] comp = (byte[])comp_obj;
|
||||
boolean rv = false;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
byte[] val = ary[i];
|
||||
if (Bry_.Eq(val, comp)) {
|
||||
rv = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return negated ? !rv : rv;
|
||||
}
|
||||
public boolean Negated() {return negated;} private boolean negated;
|
||||
public byte[][] Val_as_bry_ary() {return ary;} protected byte[][] ary; protected int ary_len;
|
||||
protected void Val_as_bry_ary_(byte[][] v) {
|
||||
this.ary = v;
|
||||
ary_len = v.length;
|
||||
}
|
||||
public void Val_as_obj_(Object v) {Val_as_bry_ary_((byte[][])v);}
|
||||
public void Val_from_args(Hash_adp args) {throw Err_.new_unimplemented();}
|
||||
public String To_str() {return String_.Concat_any(this.To_str_name(), " ", String_.Ary(ary));}
|
||||
public byte Tid_match_exact = 12;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,71 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
|
||||
import gplx.core.primitives.*;
|
||||
class Dg_file {
|
||||
public Dg_file(int id, String rel_path, Dg_rule[] lines) {this.id = id; this.rel_path = rel_path; this.lines = lines;}
|
||||
public int Id() {return id;} private final int id;
|
||||
public String Rel_path() {return rel_path;} private final String rel_path; // EX: goodphrases/weighted_general
|
||||
public Dg_rule[] Lines() {return lines;} private final Dg_rule[] lines;
|
||||
}
|
||||
class Dg_rule {// EX: < wikipedia ><-30>
|
||||
private final Hash_adp_bry word_idx_hash = Hash_adp_bry.cs();
|
||||
public Dg_rule(int file_id, int id, int idx, int tid, byte[] key, int score, Dg_word[] words) {
|
||||
this.file_id = file_id;
|
||||
this.id = id; this.idx = idx; this.tid = tid; this.key = key; this.score = score; this.words = words;
|
||||
if (words != null) { // static rules will have null byte[][]
|
||||
int words_len = words.length;
|
||||
for (int i = 0; i < words_len; ++i) {
|
||||
Dg_word word = words[i];
|
||||
word_idx_hash.Add_bry_obj(word.Raw(), Int_obj_ref.New(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
public int File_id() {return file_id;} private final int file_id;
|
||||
public int Id() {return id;} private final int id;
|
||||
public int Idx() {return idx;} private final int idx;
|
||||
public int Tid() {return tid;} private final int tid;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public Dg_word[] Words() {return words;} private final Dg_word[] words;
|
||||
public Hash_adp_bry Word_idx_hash() {return word_idx_hash;}
|
||||
public int Score() {return score;} private final int score;
|
||||
public static final int
|
||||
Tid_rule = 0
|
||||
, Tid_comment = 1
|
||||
, Tid_blank = 3
|
||||
, Tid_invalid = 4
|
||||
;
|
||||
public static final Dg_rule
|
||||
Itm_comment = new Dg_rule(-1, -1, -1, Tid_comment, null, -1, null)
|
||||
, Itm_blank = new Dg_rule(-1, -1, -1, Tid_blank, null, -1, null)
|
||||
, Itm_invalid = new Dg_rule(-1, -1, -1, Tid_invalid, null, -1, null)
|
||||
;
|
||||
public static final int Score_banned = 0;
|
||||
}
|
||||
class Dg_word {
|
||||
public Dg_word(byte[] raw) {this.raw = raw;}
|
||||
public byte[] Raw() {return raw;} private final byte[] raw;
|
||||
public static String Ary_concat(Dg_word[] ary, Bry_bfr bfr, byte dlm) {
|
||||
if (ary == null) return String_.Empty;
|
||||
int len = ary.length;
|
||||
if (len == 0) return String_.Empty;
|
||||
bfr.Add_byte_apos();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Dg_word itm = ary[i];
|
||||
if (i != 0) bfr.Add_byte(dlm);
|
||||
bfr.Add(itm.Raw());
|
||||
}
|
||||
bfr.Add_byte_apos();
|
||||
return bfr.To_str_and_clear();
|
||||
}
|
||||
public static Dg_word[] Ary_new_by_str_ary(String[] ary) {
|
||||
int ary_len = ary.length;
|
||||
Dg_word[] rv = new Dg_word[ary_len];
|
||||
for (int i = 0; i < ary_len; ++i) {
|
||||
String raw = ary[i];
|
||||
rv[i] = new Dg_word(Bry_.new_u8(raw));
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,161 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
|
||||
import gplx.dbs.*;
|
||||
class Dg_log_mgr {
|
||||
private Db_conn conn;
|
||||
private final Dg_file_tbl tbl_file = new Dg_file_tbl();
|
||||
private final Dg_rule_tbl tbl_rule = new Dg_rule_tbl();
|
||||
private final Dg_page_score_tbl tbl_page_score = new Dg_page_score_tbl();
|
||||
private final Dg_page_rule_tbl tbl_page_rule = new Dg_page_rule_tbl();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr_.Reset(16);
|
||||
public void Init(Io_url db_url) {
|
||||
Db_conn_bldr_data conn_data = Db_conn_bldr.Instance.Get_or_new(db_url);
|
||||
conn = conn_data.Conn(); boolean created = conn_data.Created();
|
||||
tbl_file.Conn_(conn, created);
|
||||
tbl_rule.Conn_(conn, created);
|
||||
tbl_page_score.Conn_(conn, created);
|
||||
tbl_page_rule.Conn_(conn, created);
|
||||
conn.Txn_bgn("dansguardian");
|
||||
}
|
||||
public void Insert_file(Dg_file file) {tbl_file.Insert(file.Id(), file.Rel_path(), file.Lines().length);}
|
||||
public void Insert_rule(Dg_rule rule) {tbl_rule.Insert(rule.File_id(), rule.Id(), rule.Idx(), rule.Score(), Dg_word.Ary_concat(rule.Words(), tmp_bfr, Byte_ascii.Tilde));}
|
||||
public void Insert_page_score(int log_tid, int page_id, int page_ns, byte[] page_ttl, int page_len, int page_score, int page_rule_count, int clude_type) {
|
||||
tbl_page_score.Insert(log_tid, page_id, page_ns, page_ttl, page_len, page_score, page_rule_count, clude_type);
|
||||
}
|
||||
public void Insert_page_rule(int log_tid, int page_id, int rule_id, int rule_score_total) {tbl_page_rule.Insert(log_tid, page_id, rule_id, rule_score_total);}
|
||||
public void Commit() {conn.Txn_sav();}
|
||||
public void Rls() {conn.Txn_end();}
|
||||
}
|
||||
class Dg_file_tbl {
|
||||
private String tbl_name = "dg_file"; private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
|
||||
private String fld_file_id, fld_file_path, fld_rule_count;
|
||||
private Db_conn conn; private Db_stmt stmt_insert;
|
||||
public void Conn_(Db_conn new_conn, boolean created) {
|
||||
this.conn = new_conn; flds.Clear();
|
||||
fld_file_id = flds.Add_int("file_id");
|
||||
fld_file_path = flds.Add_str("file_path", 512);
|
||||
fld_rule_count = flds.Add_int("rule_count");
|
||||
if (created) {
|
||||
Dbmeta_tbl_itm meta = Dbmeta_tbl_itm.New(tbl_name, flds
|
||||
, Dbmeta_idx_itm.new_unique_by_tbl(tbl_name, "file_id", fld_file_id)
|
||||
);
|
||||
conn.Meta_tbl_create(meta);
|
||||
}
|
||||
stmt_insert = null;
|
||||
}
|
||||
public void Insert(int file_id, String file_path, int rule_count) {
|
||||
if (stmt_insert == null) stmt_insert = conn.Stmt_insert(tbl_name, flds);
|
||||
stmt_insert.Clear()
|
||||
.Val_int(fld_file_id , file_id)
|
||||
.Val_str(fld_file_path , file_path)
|
||||
.Val_int(fld_rule_count , rule_count)
|
||||
.Exec_insert();
|
||||
}
|
||||
}
|
||||
class Dg_rule_tbl implements Rls_able {
|
||||
private String tbl_name = "dg_rule"; private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
|
||||
private String fld_file_id, fld_rule_id, fld_rule_idx, fld_rule_score, fld_rule_text;
|
||||
private Db_conn conn; private Db_stmt stmt_insert;
|
||||
public void Conn_(Db_conn new_conn, boolean created) {
|
||||
this.conn = new_conn; flds.Clear();
|
||||
fld_file_id = flds.Add_int("file_id");
|
||||
fld_rule_id = flds.Add_int("rule_id");
|
||||
fld_rule_idx = flds.Add_int("rule_idx");
|
||||
fld_rule_score = flds.Add_int("rule_score");
|
||||
fld_rule_text = flds.Add_str("rule_text", 1024);
|
||||
if (created) {
|
||||
Dbmeta_tbl_itm meta = Dbmeta_tbl_itm.New(tbl_name, flds
|
||||
, Dbmeta_idx_itm.new_unique_by_tbl(tbl_name, "pkey", fld_rule_id)
|
||||
);
|
||||
conn.Meta_tbl_create(meta);
|
||||
}
|
||||
conn.Rls_reg(this);
|
||||
}
|
||||
public void Rls() {
|
||||
stmt_insert = Db_stmt_.Rls(stmt_insert);
|
||||
}
|
||||
public void Insert(int file_id, int rule_id, int rule_idx, int rule_score, String rule_text) {
|
||||
if (stmt_insert == null) stmt_insert = conn.Stmt_insert(tbl_name, flds);
|
||||
stmt_insert.Clear()
|
||||
.Val_int(fld_file_id , file_id)
|
||||
.Val_int(fld_rule_id , rule_id)
|
||||
.Val_int(fld_rule_idx , rule_idx)
|
||||
.Val_int(fld_rule_score , rule_score)
|
||||
.Val_str(fld_rule_text , rule_text)
|
||||
.Exec_insert();
|
||||
}
|
||||
}
|
||||
class Dg_page_score_tbl implements Rls_able {
|
||||
private String tbl_name = "dg_page_score"; private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
|
||||
private String fld_log_tid, fld_page_id, fld_page_ns, fld_page_ttl, fld_page_len, fld_page_score, fld_page_rule_count, fld_clude_type;
|
||||
private Db_conn conn; private Db_stmt stmt_insert;
|
||||
public void Conn_(Db_conn new_conn, boolean created) {
|
||||
this.conn = new_conn; flds.Clear();
|
||||
fld_log_tid = flds.Add_int("log_tid"); // title or text
|
||||
fld_page_id = flds.Add_int("page_id");
|
||||
fld_page_ns = flds.Add_int("page_ns");
|
||||
fld_page_ttl = flds.Add_int("page_ttl");
|
||||
fld_page_len = flds.Add_int("page_len");
|
||||
fld_page_score = flds.Add_int("page_score");
|
||||
fld_page_rule_count = flds.Add_int("page_rule_count");
|
||||
fld_clude_type = flds.Add_int("page_clude_type");
|
||||
if (created) {
|
||||
Dbmeta_tbl_itm meta = Dbmeta_tbl_itm.New(tbl_name, flds
|
||||
, Dbmeta_idx_itm.new_unique_by_tbl(tbl_name, "pkey", fld_log_tid, fld_page_id)
|
||||
);
|
||||
conn.Meta_tbl_create(meta);
|
||||
}
|
||||
stmt_insert = null;
|
||||
conn.Rls_reg(this);
|
||||
}
|
||||
public void Rls() {
|
||||
stmt_insert = Db_stmt_.Rls(stmt_insert);
|
||||
}
|
||||
public void Insert(int log_tid, int page_id, int page_ns, byte[] page_ttl, int page_len, int page_score, int page_rule_count, int clude_type) {
|
||||
if (stmt_insert == null) stmt_insert = conn.Stmt_insert(tbl_name, flds);
|
||||
stmt_insert.Clear()
|
||||
.Val_int(fld_log_tid , log_tid)
|
||||
.Val_int(fld_page_id , page_id)
|
||||
.Val_int(fld_page_ns , page_ns)
|
||||
.Val_bry_as_str(fld_page_ttl, page_ttl)
|
||||
.Val_int(fld_page_len , page_len)
|
||||
.Val_int(fld_page_score , page_score)
|
||||
.Val_int(fld_page_rule_count, page_rule_count)
|
||||
.Val_int(fld_clude_type , clude_type)
|
||||
.Exec_insert();
|
||||
}
|
||||
}
|
||||
class Dg_page_rule_tbl implements Rls_able {
|
||||
private String tbl_name = "dg_page_rule"; private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
|
||||
private String fld_log_tid, fld_page_id, fld_rule_id, fld_rule_score_total;
|
||||
private Db_conn conn; private Db_stmt stmt_insert;
|
||||
public void Conn_(Db_conn new_conn, boolean created) {
|
||||
this.conn = new_conn; flds.Clear();
|
||||
fld_log_tid = flds.Add_int("log_tid"); // title or text
|
||||
fld_page_id = flds.Add_int("page_id");
|
||||
fld_rule_id = flds.Add_int("rule_id");
|
||||
fld_rule_score_total = flds.Add_int("rule_score_total");
|
||||
if (created) {
|
||||
Dbmeta_tbl_itm meta = Dbmeta_tbl_itm.New(tbl_name, flds
|
||||
, Dbmeta_idx_itm.new_unique_by_tbl(tbl_name, "pkey", fld_log_tid, fld_page_id, fld_rule_id)
|
||||
);
|
||||
conn.Meta_tbl_create(meta);
|
||||
}
|
||||
stmt_insert = null;
|
||||
conn.Rls_reg(this);
|
||||
}
|
||||
public void Rls() {
|
||||
stmt_insert = Db_stmt_.Rls(stmt_insert);
|
||||
}
|
||||
public void Insert(int log_tid, int page_id, int rule_id, int rule_score_total) {
|
||||
if (stmt_insert == null) stmt_insert = conn.Stmt_insert(tbl_name, flds);
|
||||
stmt_insert.Clear()
|
||||
.Val_int(fld_log_tid , log_tid)
|
||||
.Val_int(fld_page_id , page_id)
|
||||
.Val_int(fld_rule_id , rule_id)
|
||||
.Val_int(fld_rule_score_total , rule_score_total)
|
||||
.Exec_insert();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,176 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
import gplx.xowa.addons.apps.cfgs.*;
|
||||
import gplx.xowa.langs.*;
|
||||
import gplx.xowa.bldrs.filters.core.*;
|
||||
public class Dg_match_mgr {
|
||||
private int score_init, score_fail; private boolean log_enabled, case_match;
|
||||
private final Btrie_slim_mgr btrie = Btrie_slim_mgr.cs();
|
||||
private final Ordered_hash rules = Ordered_hash_.New_bry();
|
||||
private final Ordered_hash rule_group_hash = Ordered_hash_.New_bry(), rule_tally_hash = Ordered_hash_.New_bry();
|
||||
private final Dg_parser parser = new Dg_parser();
|
||||
private final Xob_ttl_filter_mgr ttl_filter_mgr = new Xob_ttl_filter_mgr();
|
||||
private final Dg_ns_skip_mgr ns_skip_mgr = new Dg_ns_skip_mgr();
|
||||
private final Dg_log_mgr log_mgr = new Dg_log_mgr();
|
||||
public Dg_match_mgr(Io_url root_dir, int score_init, int score_fail, boolean case_match, boolean log_enabled, Io_url log_url) {
|
||||
this.score_init = score_init; this.score_fail = score_fail; this.case_match = case_match; this.log_enabled = log_enabled;
|
||||
if (log_enabled) log_mgr.Init(log_url);
|
||||
ttl_filter_mgr.Load(Bool_.N, root_dir.GenSubFil("xowa.title.include.txt"));
|
||||
ttl_filter_mgr.Load(Bool_.Y, root_dir.GenSubFil("xowa.title.exclude.txt"));
|
||||
ns_skip_mgr.Load(root_dir.GenSubFil("xowa.ns.skip.txt"));
|
||||
Io_url dg_root_url = root_dir.GenSubDir("dansguardian");
|
||||
Dg_file[] files = parser.Parse_dir(dg_root_url); Gfo_usr_dlg_.Instance.Plog_many("", "", "import.dg.rules: url=~{0} files=~{1}", dg_root_url, files.length);
|
||||
Init_by_files(files);
|
||||
if (log_enabled) log_mgr.Commit();
|
||||
}
|
||||
public void Clear() {
|
||||
btrie.Clear();
|
||||
rules.Clear();
|
||||
rule_group_hash.Clear();
|
||||
rule_tally_hash.Clear();
|
||||
}
|
||||
private void Init_by_files(Dg_file[] files) {
|
||||
for (Dg_file file : files) {
|
||||
Dg_rule[] rules = file.Lines();
|
||||
if (log_enabled) log_mgr.Insert_file(file);
|
||||
for (Dg_rule rule : rules)
|
||||
Init_by_rule(rule);
|
||||
}
|
||||
}
|
||||
@gplx.Internal protected void Init_by_rule(Dg_rule rule) {
|
||||
if (rule.Tid() != Dg_rule.Tid_rule) return;
|
||||
if (log_enabled) log_mgr.Insert_rule(rule);
|
||||
Dg_word[] words = rule.Words();
|
||||
for (Dg_word word : words) {
|
||||
Dg_rule_group rule_group = Get_rule_group_or_new(word.Raw());
|
||||
rule_group.Rules_list().Add(rule);
|
||||
btrie.Add_obj(word.Raw(), rule_group);
|
||||
}
|
||||
}
|
||||
private Dg_rule_group Get_rule_group_or_new(byte[] word) {
|
||||
Dg_rule_group rv = (Dg_rule_group)rule_group_hash.Get_by(word);
|
||||
if (rv == null) {
|
||||
rv = new Dg_rule_group(word);
|
||||
rule_group_hash.Add(word, rv);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private Dg_rule_tally Get_rule_tally_or_new(byte[] key, Dg_rule rule) {
|
||||
Dg_rule_tally rv = (Dg_rule_tally)rule_tally_hash.Get_by(key);
|
||||
if (rv == null) {
|
||||
rv = new Dg_rule_tally(rule);
|
||||
rule_tally_hash.Add(key, rv);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public boolean Match(int log_tid, int page_id, int page_ns, byte[] page_ttl, byte[] page_ttl_db, Xol_lang_itm lang, byte[] src) {
|
||||
// if ns is in skip_mgr, ignore; needed to skip Template and Module
|
||||
if (ns_skip_mgr.Has(page_ns))
|
||||
return false;
|
||||
|
||||
int src_len = src.length;
|
||||
int clude_type = 0;
|
||||
if (ttl_filter_mgr.Match_include(page_ttl_db)) clude_type = -1;
|
||||
else if (ttl_filter_mgr.Match_exclude(page_ttl_db)) clude_type = 1;
|
||||
if (clude_type != 0) {
|
||||
log_mgr.Insert_page_score(log_tid, page_id, page_ns, page_ttl, src_len, 0, 0, clude_type);
|
||||
return clude_type == 1;
|
||||
}
|
||||
if (!case_match) {
|
||||
src = lang.Case_mgr().Case_build_lower(src);
|
||||
src_len = src.length;
|
||||
}
|
||||
rules.Clear();
|
||||
rule_tally_hash.Clear();
|
||||
int pos = 0;
|
||||
int score_cur = score_init;
|
||||
while (pos < src_len) {
|
||||
Object o = btrie.Match_bgn(src, pos, src_len);
|
||||
if (o == null)
|
||||
++pos;
|
||||
else {
|
||||
Dg_rule_group rule_group = (Dg_rule_group)o;
|
||||
Dg_rule[] rules_ary = rule_group.Rules_ary();
|
||||
for (Dg_rule rule : rules_ary) {
|
||||
Dg_rule_tally rule_tally = Get_rule_tally_or_new(rule.Key(), rule);
|
||||
rule_tally.Process(rule_group.Word());
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
int rule_tally_len = rule_tally_hash.Count(); if (rule_tally_len == 0) return false;
|
||||
int rule_match_count = 0;
|
||||
for (int i = 0; i < rule_tally_len; ++i) {
|
||||
Dg_rule_tally rule_tally = (Dg_rule_tally)rule_tally_hash.Get_at(i);
|
||||
int min_results = rule_tally.Results_pass_count();
|
||||
if (min_results > 0) {
|
||||
int rule_score = rule_tally.Rule().Score();
|
||||
int rule_score_total = rule_score * min_results;
|
||||
if (log_enabled) log_mgr.Insert_page_rule(log_tid, page_id, rule_tally.Rule().Id(), rule_score_total);
|
||||
if (rule_score == Dg_rule.Score_banned) {score_cur = Int_.Max_value; break;}
|
||||
score_cur += rule_score_total;
|
||||
++rule_match_count;
|
||||
}
|
||||
}
|
||||
boolean rv = score_cur > score_fail;
|
||||
if (rv && log_enabled) log_mgr.Insert_page_score(log_tid, page_id, page_ns, page_ttl, src_len, score_cur, rule_match_count, 0);
|
||||
return rv;
|
||||
}
|
||||
public void Rls() {log_mgr.Rls();}
|
||||
public void Commit() {if (log_enabled) log_mgr.Commit();}
|
||||
|
||||
public static void Cfg__reg(Xoa_app app) {
|
||||
app.Cfg().Dflt_mgr().Add(Cfg__root_dir, app.Fsys_mgr().Bin_xowa_dir().GenSubDir_nest("cfg", "bldr", "filter").Raw());
|
||||
}
|
||||
public static Dg_match_mgr New_mgr(Xoa_app app, Xow_wiki wiki) {
|
||||
Xocfg_mgr cfg_mgr = app.Cfg();
|
||||
if (!cfg_mgr.Get_bool_wiki_or(wiki, Cfg__enabled, false)) return null;
|
||||
String ctx = cfg_mgr.To_ctx(wiki);
|
||||
return new Dg_match_mgr
|
||||
( cfg_mgr.Get_url_or(ctx, Cfg__root_dir, app.Fsys_mgr().Bin_xowa_dir().GenSubDir_nest("cfg", "bldr", "filter")).GenSubDir(wiki.Domain_str())
|
||||
, cfg_mgr.Get_int_or(ctx, "xowa.bldr.dansguardian.score_init", 0)
|
||||
, cfg_mgr.Get_int_or(ctx, "xowa.bldr.dansguardian.score_fail", 0)
|
||||
, cfg_mgr.Get_bool_or(ctx, "xowa.bldr.dansguardian.case_match", false)
|
||||
, cfg_mgr.Get_bool_or(ctx, "xowa.bldr.dansguardian.log_enabled", true)
|
||||
, wiki.Fsys_mgr().Root_dir().GenSubFil("dansguardian_log.sqlite3")
|
||||
);
|
||||
}
|
||||
public static final String Cfg__enabled = "xowa.bldr.dansguardian.enabled";
|
||||
private static final String Cfg__root_dir = "xowa.bldr.dansguardian.root_dir";
|
||||
}
|
||||
class Dg_rule_group {
|
||||
public Dg_rule_group(byte[] word) {this.word = word;}
|
||||
public byte[] Word() {return word;} private final byte[] word;
|
||||
public List_adp Rules_list() {return rules_list;} private final List_adp rules_list = List_adp_.New();
|
||||
public Dg_rule[] Rules_ary() {
|
||||
if (rules_ary == null)
|
||||
rules_ary = (Dg_rule[])rules_list.To_ary_and_clear(Dg_rule.class);
|
||||
return rules_ary;
|
||||
} private Dg_rule[] rules_ary;
|
||||
}
|
||||
class Dg_rule_tally {
|
||||
public Dg_rule_tally(Dg_rule rule) {
|
||||
this.rule = rule;
|
||||
Dg_word[] words = rule.Words();
|
||||
this.results_len = words.length;
|
||||
this.results = new int[results_len];
|
||||
}
|
||||
public Dg_rule Rule() {return rule;} private final Dg_rule rule;
|
||||
public int[] Results() {return results;} private final int[] results; private final int results_len;
|
||||
public void Process(byte[] word) {
|
||||
Int_obj_ref idx = (Int_obj_ref)rule.Word_idx_hash().Get_by_bry(word);
|
||||
int idx_val = idx.Val();
|
||||
results[idx_val] = results[idx_val] + 1;
|
||||
}
|
||||
public int Results_pass_count() {
|
||||
int rv = Int_.Max_value;
|
||||
for (int i = 0; i < results_len; ++i) {
|
||||
int result = results[i];
|
||||
if (rv > result) rv = result;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,44 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
|
||||
import org.junit.*; import gplx.dbs.*;
|
||||
public class Dg_match_mgr_tst {
|
||||
@Before public void init() {fxt.Clear();} private Dg_match_mgr_fxt fxt = new Dg_match_mgr_fxt();
|
||||
@Test public void One() {
|
||||
fxt.Init_line(100, "a");
|
||||
fxt.Test_match_many_y("a", "ab", "ba", "abc");
|
||||
fxt.Test_match_many_n("b");
|
||||
}
|
||||
}
|
||||
class Dg_match_mgr_fxt {
|
||||
private Dg_match_mgr match_mgr;
|
||||
private final List_adp rule_list = List_adp_.New();
|
||||
public void Clear() {
|
||||
Db_conn_bldr.Instance.Reg_default_mem();
|
||||
Io_url root_dir = Io_url_.mem_dir_("mem/dg/");
|
||||
match_mgr = new Dg_match_mgr(root_dir.GenSubDir("words"), 1, 0, Bool_.Y, Bool_.Y, root_dir.GenSubDir("log"));
|
||||
rule_list.Clear();
|
||||
}
|
||||
public void Init_line(int score, String... words) {
|
||||
Dg_rule line = new Dg_rule(-1, -1, -1, Dg_rule.Tid_rule, Bry_.new_a7("key"), score, Dg_word.Ary_new_by_str_ary(words));
|
||||
rule_list.Add(line);
|
||||
}
|
||||
public void Test_match_many_y(String... words) {Test_match_many(Bool_.Y, words);}
|
||||
public void Test_match_many_n(String... words) {Test_match_many(Bool_.N, words);}
|
||||
public void Test_match_many(boolean expd, String... words) {
|
||||
int words_len = words.length;
|
||||
for (int i = 0; i < words_len; ++i)
|
||||
Test_match_one(expd, words[i]);
|
||||
}
|
||||
public void Test_match_one(boolean expd, String word_str) {
|
||||
match_mgr.Clear();
|
||||
int rule_list_len = rule_list.Count();
|
||||
for (int j = 0; j < rule_list_len; ++j) {
|
||||
Dg_rule rule = (Dg_rule)rule_list.Get_at(j);
|
||||
match_mgr.Init_by_rule(rule);
|
||||
}
|
||||
byte[] word_bry = Bry_.new_u8(word_str);
|
||||
Tfds.Eq(expd, match_mgr.Match(1, 101, 0, Bry_.Empty, Bry_.Empty, null, word_bry), (expd ? "pass:" : "fail:") + word_str);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,28 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
|
||||
import gplx.core.lists.hashs.*;
|
||||
class Dg_ns_skip_mgr {
|
||||
private final Hash_adp__int ns_hash = new Hash_adp__int();
|
||||
private boolean is_empty = true;
|
||||
public boolean Has(int ns) {return is_empty ? false : ns_hash.Get_by_or_null(ns) != null;}
|
||||
public void Load(Io_url url) {
|
||||
// load from file
|
||||
Gfo_usr_dlg_.Instance.Log_many("", "", "loading ns.skip file; url=~{0}", url.Raw());
|
||||
byte[] src = Io_mgr.Instance.LoadFilBry_loose(url);
|
||||
|
||||
// parse to lines
|
||||
byte[][] lines = Bry_split_.Split_lines(src);
|
||||
|
||||
// add to hash
|
||||
for (byte[] line : lines) {
|
||||
int ns_id = Bry_.To_int_or(line, Int_.Max_value);
|
||||
if (ns_id != Int_.Max_value) {
|
||||
Gfo_usr_dlg_.Instance.Log_many("", "", "adding ns; ns_id=~{0}", ns_id);
|
||||
ns_hash.Add_if_dupe_use_1st(ns_id, line);
|
||||
is_empty = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,81 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
|
||||
class Dg_parser {
|
||||
private Gfo_usr_dlg usr_dlg = Gfo_usr_dlg_.Instance; private final Bry_bfr key_bldr = Bry_bfr_.Reset(32);
|
||||
private final List_adp files = List_adp_.New(), lines = List_adp_.New(), words = List_adp_.New();
|
||||
private int next_id = 0;
|
||||
public Dg_file[] Parse_dir(Io_url dir) {
|
||||
Io_url[] fil_urls = Io_mgr.Instance.QueryDir_args(dir).Recur_(true).ExecAsUrlAry();
|
||||
this.usr_dlg = Gfo_usr_dlg_.Instance;
|
||||
files.Clear();
|
||||
int len = fil_urls.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Io_url fil_url = fil_urls[i];
|
||||
byte[] fil_src = Io_mgr.Instance.LoadFilBry_loose(fil_url);
|
||||
Dg_file file = Parse_fil(i, fil_url.GenRelUrl_orEmpty(dir), fil_src);
|
||||
if (file != null) files.Add(file);
|
||||
}
|
||||
return (Dg_file[])files.To_ary_and_clear(Dg_file.class);
|
||||
}
|
||||
private Dg_file Parse_fil(int file_idx, String rel_path, byte[] src) {
|
||||
int line_idx = 0; int line_bgn = 0; int src_len = src.length;
|
||||
lines.Clear();
|
||||
int file_id = ++next_id;
|
||||
while (line_bgn < src_len) {
|
||||
++line_idx;
|
||||
int line_end = Bry_find_.Find_fwd(src, Byte_ascii.Nl, line_bgn); if (line_end == Bry_find_.Not_found) line_end = src_len;
|
||||
Dg_rule line = Parse_line(rel_path, file_id, line_idx, src, line_bgn, line_end);
|
||||
if (line.Tid() != Dg_rule.Tid_invalid)
|
||||
lines.Add(line);
|
||||
line_bgn = line_end + 1;
|
||||
}
|
||||
return new Dg_file(file_id, rel_path, (Dg_rule[])lines.To_ary_and_clear(Dg_rule.class));
|
||||
}
|
||||
public Dg_rule Parse_line(String rel_path, int file_id, int line_idx, byte[] src, int line_bgn, int line_end) {
|
||||
int score = Dg_rule.Score_banned;
|
||||
int brack_bgn = line_bgn;
|
||||
if (line_end - line_bgn <= 1) return Dg_rule.Itm_blank; // ignore blank lines; EX: ""
|
||||
if (src[line_bgn] == Byte_ascii.Hash) return Dg_rule.Itm_comment; // ignore lines starting with hash; EX: "# comment"
|
||||
while (brack_bgn < line_end) { // look for terms bracketed by "<>"
|
||||
if (src[brack_bgn] != Byte_ascii.Lt) {Warn("dg.invalid_line.term must start with angle_bgn", rel_path, line_idx, src, line_bgn, line_end); return Dg_rule.Itm_invalid;}
|
||||
int brack_end = Bry_find_.Find_fwd(src, Byte_ascii.Gt, brack_bgn);
|
||||
if (brack_end == Bry_find_.Not_found) {Warn("dg.invalid_line.angle_end not found", rel_path, line_idx, src, line_bgn, line_end); return Dg_rule.Itm_invalid;}
|
||||
byte[] word = Bry_.Mid(src, brack_bgn + 1, brack_end);
|
||||
words.Add(word);
|
||||
int next_pos = brack_end + 1;
|
||||
if (next_pos == line_end) {
|
||||
score = Dg_rule.Score_banned;
|
||||
break;
|
||||
}
|
||||
byte next = src[next_pos];
|
||||
if (next == Byte_ascii.Comma)
|
||||
brack_bgn = brack_end + 2;
|
||||
else {
|
||||
brack_bgn = brack_end + 1;
|
||||
if (src[brack_bgn] != Byte_ascii.Lt) {Warn("dg.invalid_line.wrong_term_dlm", rel_path, line_idx, src, line_bgn, line_end); break;}
|
||||
brack_end = Bry_find_.Find_fwd(src, Byte_ascii.Gt, brack_bgn);
|
||||
if (brack_end == Bry_find_.Not_found) {Warn("dg.invalid_line.score not found", rel_path, line_idx, src, line_bgn, line_end); break;}
|
||||
int parse_score = Bry_.To_int_or(src, brack_bgn + 1, brack_end, Int_.Min_value);
|
||||
if (parse_score == Int_.Min_value) {Warn("dg.invalid_line.score is invalid", rel_path, line_idx, src, line_bgn, line_end); break;}
|
||||
score = parse_score;
|
||||
break;
|
||||
}
|
||||
}
|
||||
byte[] key = key_bldr.Add_int_variable(file_id).Add_byte_dot().Add_int_variable(line_idx).To_bry_and_clear();
|
||||
return new Dg_rule(file_id, ++next_id, line_idx, Dg_rule.Tid_rule, key, score, Ary_new_by_ary((byte[][])words.To_ary_and_clear(byte[].class)));
|
||||
}
|
||||
private static Dg_word[] Ary_new_by_ary(byte[][] ary) {
|
||||
int ary_len = ary.length;
|
||||
Dg_word[] rv = new Dg_word[ary_len];
|
||||
for (int i = 0; i < ary_len; ++i) {
|
||||
byte[] raw = ary[i];
|
||||
rv[i] = new Dg_word(raw);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private void Warn(String err_msg, String rel_path, int line_idx, byte[] src, int line_bgn, int line_end) {
|
||||
usr_dlg.Warn_many("", "", err_msg + "; file=~{0} line_idx=~{1} line=~{2}", rel_path, line_idx, String_.new_u8(src, line_bgn, line_end));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,43 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.filters.dansguardians; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.filters.*;
|
||||
import org.junit.*;
|
||||
public class Dg_parser_tst {
|
||||
@Before public void init() {fxt.Init();} private Dg_parser_fxt fxt = new Dg_parser_fxt();
|
||||
@Test public void One() {fxt.Test_parse_line("<a><123>", fxt.Make_line(123, "a"));}
|
||||
@Test public void Many() {fxt.Test_parse_line("<a>,<b>,<c><-123>", fxt.Make_line(-123, "a", "b", "c"));}
|
||||
@Test public void Score_0() {fxt.Test_parse_line("<a><0>", fxt.Make_line(Dg_rule.Score_banned, "a"));}
|
||||
@Test public void Noscore() {fxt.Test_parse_line("<a>", fxt.Make_line(Dg_rule.Score_banned, "a"));}
|
||||
@Test public void Noscore_2() {fxt.Test_parse_line("<a>,<b>", fxt.Make_line(Dg_rule.Score_banned, "a", "b"));}
|
||||
@Test public void Comment() {fxt.Test_parse_line("# comment", Dg_rule.Itm_comment);}
|
||||
@Test public void Blank() {fxt.Test_parse_line("", Dg_rule.Itm_blank);}
|
||||
@Test public void Invalid_line_bgn() {fxt.Test_parse_line(" <a><1>", Dg_rule.Itm_invalid);}
|
||||
@Test public void Dangling_word() {fxt.Test_parse_line("<a", Dg_rule.Itm_invalid);}
|
||||
@Test public void Dangling_score() {fxt.Test_parse_line("<a><12", fxt.Make_line(Dg_rule.Score_banned, "a"));}
|
||||
@Test public void Invalid_dlm() {fxt.Test_parse_line("<a> <1>", fxt.Make_line(Dg_rule.Score_banned, "a"));}
|
||||
@Test public void Invalid_dlm_2() {fxt.Test_parse_line("<a>,<b><c><2>", fxt.Make_line(Dg_rule.Score_banned, "a", "b"));}
|
||||
@Test public void Invalid_score() {fxt.Test_parse_line("<a><1a>", fxt.Make_line(Dg_rule.Score_banned, "a"));}
|
||||
// @Test public void Parse_dir() {
|
||||
// Dg_parser parser = new Dg_parser();
|
||||
// Gfo_usr_dlg_.I = Xoa_app_.New__usr_dlg__console();
|
||||
// parser.Parse_dir(Io_url_.new_dir_("C:\\xowa\\bin\\any\\xowa\\bldr\\filters\simple.wikipedia.org\\Dansguardian\\\\"));
|
||||
// }
|
||||
}
|
||||
class Dg_parser_fxt {
|
||||
private final Dg_parser parser = new Dg_parser(); private final Bry_bfr bfr = Bry_bfr_.Reset(32);
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr_.Reset(16);
|
||||
public void Init() {}
|
||||
public Dg_rule Make_line(int score, String... words) {return new Dg_rule(-1, -1, -1, Dg_rule.Tid_rule, null, score, Dg_word.Ary_new_by_str_ary(words));}
|
||||
public void Test_parse_line(String str, Dg_rule expd) {
|
||||
byte[] src = Bry_.new_u8(str);
|
||||
Dg_rule actl = parser.Parse_line("rel_path", 0, 0, src, 0, src.length);
|
||||
Tfds.Eq_str_lines(Xto_str(bfr, expd), Xto_str(bfr, actl));
|
||||
}
|
||||
private String Xto_str(Bry_bfr bfr, Dg_rule line) {
|
||||
bfr .Add_str_a7("score=").Add_int_variable(line.Score()).Add_byte_nl()
|
||||
.Add_str_a7("words=").Add_str_u8(String_.Concat_with_str(";", Dg_word.Ary_concat(line.Words(), tmp_bfr, Byte_ascii.Tick))).Add_byte_nl()
|
||||
;
|
||||
return bfr.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,56 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.infos; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.dbs.*; import gplx.dbs.cfgs.*;
|
||||
public class Xob_info_file {
|
||||
public Xob_info_file(int id, String type, String ns_ids, int part_id, Guid_adp guid, int schema_version, String core_file_name, String orig_file_name) {
|
||||
this.id = id; this.type = type; this.ns_ids = ns_ids; this.part_id = part_id; this.guid = guid;
|
||||
this.schema_version = schema_version; this.core_file_name = core_file_name; this.orig_file_name = orig_file_name;
|
||||
}
|
||||
public int Id() {return id;} private final int id;
|
||||
public String Type() {return type;} private final String type;
|
||||
public String Ns_ids() {return ns_ids;} private final String ns_ids;
|
||||
public int Part_id() {return part_id;} private final int part_id;
|
||||
public Guid_adp Guid() {return guid;} private final Guid_adp guid;
|
||||
public int Schema_version() {return schema_version;} private final int schema_version;
|
||||
public String Core_file_name() {return core_file_name;} private final String core_file_name;
|
||||
public String Orig_file_name() {return orig_file_name;} private final String orig_file_name;
|
||||
public void Save(Db_cfg_tbl tbl) {
|
||||
tbl.Conn().Txn_bgn("make__info__file");
|
||||
tbl.Insert_int (Cfg_grp, Cfg_key__id , id);
|
||||
tbl.Insert_str (Cfg_grp, Cfg_key__type , type);
|
||||
tbl.Insert_str (Cfg_grp, Cfg_key__ns_ids , ns_ids);
|
||||
tbl.Insert_int (Cfg_grp, Cfg_key__part_id , part_id);
|
||||
tbl.Insert_guid (Cfg_grp, Cfg_key__guid , guid);
|
||||
tbl.Insert_int (Cfg_grp, Cfg_key__schema_version , schema_version);
|
||||
tbl.Insert_str (Cfg_grp, Cfg_key__core_file_name , core_file_name);
|
||||
tbl.Insert_str (Cfg_grp, Cfg_key__orig_file_name , orig_file_name);
|
||||
tbl.Conn().Txn_end();
|
||||
}
|
||||
public static Xob_info_file Load(Db_cfg_tbl tbl) {
|
||||
Db_cfg_hash hash = tbl.Select_as_hash(Cfg_grp);
|
||||
return new Xob_info_file
|
||||
( hash.Get_by(Cfg_key__id ).To_int_or(-1)
|
||||
, hash.Get_by(Cfg_key__type ).To_str_or("unknown")
|
||||
, hash.Get_by(Cfg_key__ns_ids ).To_str_or("")
|
||||
, hash.Get_by(Cfg_key__part_id ).To_int_or(-1)
|
||||
, hash.Get_by(Cfg_key__guid ).To_guid_or(Guid_adp_.Empty)
|
||||
, hash.Get_by(Cfg_key__schema_version ).To_int_or(2)
|
||||
, hash.Get_by(Cfg_key__core_file_name ).To_str_or("")
|
||||
, hash.Get_by(Cfg_key__orig_file_name ).To_str_or("")
|
||||
);
|
||||
}
|
||||
private static final String Cfg_grp = gplx.xowa.wikis.data.Xowd_cfg_key_.Grp__bldr_db
|
||||
, Cfg_key__id = "id" // EX: 1
|
||||
, Cfg_key__type = "type" // EX: core
|
||||
, Cfg_key__ns_ids = "ns_ids" // EX: 0
|
||||
, Cfg_key__part_id = "part_id" // EX: 0
|
||||
, Cfg_key__guid = "guid" // EX: 00000000-0000-0000-0000-000000000000
|
||||
, Cfg_key__schema_version = "schema_version" // EX: 2
|
||||
, Cfg_key__core_file_name = "core_file_name" // EX: en.wikipedia.org-text.xowa
|
||||
, Cfg_key__orig_file_name = "orig_file_name" // EX: en.wikipedia.org-text-ns.000-db.002.xowa
|
||||
;
|
||||
public static final String Ns_ids_empty = "";
|
||||
public static final int Part_id_1st = 1;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,47 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.infos; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.dbs.cfgs.*;
|
||||
public class Xob_info_session {
|
||||
Xob_info_session(String user, String version, String wiki_domain, String dump_name, DateAdp time, Guid_adp guid) {
|
||||
this.user = user; this.version = version; this.wiki_domain = wiki_domain; this.dump_name = dump_name; this.time = time; this.guid = guid;
|
||||
}
|
||||
public String User() {return user;} private final String user;
|
||||
public String Version() {return version;} private final String version;
|
||||
public String Wiki_domain() {return wiki_domain;} private final String wiki_domain;
|
||||
public String Dump_name() {return dump_name;} private final String dump_name;
|
||||
public DateAdp Time() {return time;} private final DateAdp time;
|
||||
public Guid_adp Uuid() {return guid;} private final Guid_adp guid;
|
||||
public void Save(Db_cfg_tbl tbl) {
|
||||
tbl.Conn().Txn_bgn("make__info__session");
|
||||
tbl.Insert_str (Cfg_grp, Cfg_key__user , user);
|
||||
tbl.Insert_str (Cfg_grp, Cfg_key__version , version);
|
||||
tbl.Insert_str (Cfg_grp, Cfg_key__wiki_domain , wiki_domain);
|
||||
tbl.Insert_str (Cfg_grp, Cfg_key__dump_name , dump_name);
|
||||
tbl.Insert_date (Cfg_grp, Cfg_key__time , time);
|
||||
tbl.Insert_guid (Cfg_grp, Cfg_key__guid , guid);
|
||||
tbl.Conn().Txn_end();
|
||||
}
|
||||
public static Xob_info_session Load(Db_cfg_tbl tbl) {
|
||||
Db_cfg_hash hash = tbl.Select_as_hash(Cfg_grp);
|
||||
return new Xob_info_session
|
||||
( hash.Get_by(Cfg_key__user).To_str_or("")
|
||||
, hash.Get_by(Cfg_key__version).To_str_or("")
|
||||
, hash.Get_by(Cfg_key__wiki_domain).To_str_or("")
|
||||
, hash.Get_by(Cfg_key__dump_name).To_str_or("")
|
||||
, hash.Get_by(Cfg_key__time).To_date_or(DateAdp_.MinValue)
|
||||
, hash.Get_by(Cfg_key__guid).To_guid_or(Guid_adp_.Empty)
|
||||
);
|
||||
}
|
||||
public static final String Cfg_grp = gplx.xowa.wikis.data.Xowd_cfg_key_.Grp__bldr_session
|
||||
, Cfg_key__user = "user" // EX: anonymous
|
||||
, Cfg_key__version = "version" // EX: 2.3.1.4
|
||||
, Cfg_key__wiki_domain = "wiki_domain" // EX: en.wikipedia.org
|
||||
, Cfg_key__dump_name = "dump_name" // EX: enwiki-latest-pages-articles
|
||||
, Cfg_key__time = "time" // EX: 20150102 030405
|
||||
, Cfg_key__guid = "guid" // EX: 00000000-0000-0000-0000-000000000000
|
||||
;
|
||||
public static Xob_info_session new_(String user, String wiki_domain, String dump_name) {return new Xob_info_session(user, Xoa_app_.Version, wiki_domain, dump_name, Datetime_now.Get(), Guid_adp_.New());}
|
||||
public static final Xob_info_session Test = new_("anonymous", "en.wikipedia.org", "enwiki-latest-pages-articles");
|
||||
}
|
||||
|
||||
@@ -13,3 +13,89 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.threads.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.xowa.wikis.domains.*;
|
||||
import gplx.xowa.bldrs.wms.dumps.*;
|
||||
abstract class Xoi_cmd_base implements Gfo_thread_cmd {
|
||||
public void Ctor(Xoi_setup_mgr install_mgr, String wiki_key) {
|
||||
this.install_mgr = install_mgr; this.wiki_key = wiki_key;
|
||||
this.Owner_(install_mgr);
|
||||
} private Xoi_setup_mgr install_mgr; String wiki_key;
|
||||
@gplx.Virtual public void Cmd_ctor() {}
|
||||
public abstract String Async_key();
|
||||
public int Async_sleep_interval() {return Gfo_thread_cmd_.Async_sleep_interval_1_second;}
|
||||
public boolean Async_prog_enabled() {return false;}
|
||||
public void Async_prog_run(int async_sleep_sum) {}
|
||||
public byte Async_init() {return Gfo_thread_cmd_.Init_ok;}
|
||||
public boolean Async_term() {return true;}
|
||||
public Gfo_invk Owner() {return owner;} public Xoi_cmd_base Owner_(Gfo_invk v) {owner = v; return this;} Gfo_invk owner;
|
||||
public Gfo_thread_cmd Async_next_cmd() {return next_cmd;} public void Async_next_cmd_(Gfo_thread_cmd v) {next_cmd = v;} Gfo_thread_cmd next_cmd;
|
||||
public void Async_run() {
|
||||
running = true;
|
||||
Thread_adp_.Start_by_key(this.Async_key(), this, Invk_process_async);
|
||||
}
|
||||
public boolean Async_running() {return running;} private boolean running;
|
||||
public void Process_async() {
|
||||
Xoae_app app = install_mgr.App();
|
||||
Xob_bldr bldr = app.Bldr();
|
||||
Xowe_wiki wiki = app.Wiki_mgr().Get_by_or_make(Bry_.new_a7(wiki_key));
|
||||
wiki.Init_assert();
|
||||
bldr.Cmd_mgr().Clear();
|
||||
Process_async_init(app, wiki, bldr);
|
||||
bldr.Pause_at_end_(false);
|
||||
try {bldr.Run();}
|
||||
catch (Exception e) {
|
||||
running = false;
|
||||
install_mgr.Cmd_mgr().Working_(Bool_.N);
|
||||
throw Err_.new_exc(e, "xo", "error during import");
|
||||
}
|
||||
app.Usr_dlg().Prog_none("", "clear", "");
|
||||
app.Usr_dlg().Note_none("", "clear", "");
|
||||
Process_async_done(app, wiki, bldr);
|
||||
running = false;
|
||||
}
|
||||
public abstract void Process_async_init(Xoae_app app, Xowe_wiki wiki, Xob_bldr bldr);
|
||||
public abstract void Process_async_done(Xoae_app app, Xowe_wiki wiki, Xob_bldr bldr);
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_process_async)) Process_async();
|
||||
else if (ctx.Match(k, Invk_owner)) return owner;
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
} private static final String Invk_process_async = "run_async", Invk_owner = "owner";
|
||||
}
|
||||
class Xoi_cmd_category2_page_props extends Xoi_cmd_wiki_download { public Xoi_cmd_category2_page_props(Xoi_setup_mgr install_mgr, String wiki_key, String dump_date) {this.Ctor_download_(install_mgr, wiki_key, dump_date, Xowm_dump_type_.Str__page_props);}
|
||||
@Override public String Download_file_ext() {return ".sql.gz";}
|
||||
public static final String KEY_category2 = "wiki.category2.download.page_props";
|
||||
}
|
||||
class Xoi_cmd_category2_categorylinks extends Xoi_cmd_wiki_download { public Xoi_cmd_category2_categorylinks(Xoi_setup_mgr install_mgr, String wiki_key, String dump_date) {this.Ctor_download_(install_mgr, wiki_key, dump_date, Xowm_dump_type_.Str__categorylinks);}
|
||||
@Override public String Download_file_ext() {return ".sql.gz";}
|
||||
public static final String KEY_category2 = "wiki.category2.download.categorylinks";
|
||||
}
|
||||
class Xoi_cmd_category2_build extends Xoi_cmd_base {
|
||||
public Xoi_cmd_category2_build(Xoi_setup_mgr install_mgr, String wiki_key) {this.Ctor(install_mgr, wiki_key); this.app = install_mgr.App(); this.wiki_key = wiki_key;} private Xoae_app app; private String wiki_key;
|
||||
@Override public void Cmd_ctor() {
|
||||
Xowe_wiki wiki = app.Wiki_mgr().Get_by_or_make(Bry_.new_u8(wiki_key));
|
||||
wiki.Import_cfg().Category_version_(gplx.xowa.addons.wikis.ctgs.Xoa_ctg_mgr.Version_2);
|
||||
}
|
||||
@Override public String Async_key() {return KEY;} public static final String KEY = "wiki.category2.build";
|
||||
@Override public void Process_async_init(Xoae_app app, Xowe_wiki wiki, Xob_bldr bldr) {
|
||||
wiki.Db_mgr_as_sql().Category_version_update(false);
|
||||
bldr.Cmd_mgr().Add_many(wiki, gplx.xowa.addons.wikis.ctgs.bldrs.Xob_pageprop_cmd.BLDR_CMD_KEY, gplx.xowa.addons.wikis.ctgs.bldrs.Xob_catlink_cmd.BLDR_CMD_KEY);
|
||||
}
|
||||
@Override public void Process_async_done(Xoae_app app, Xowe_wiki wiki, Xob_bldr bldr) {
|
||||
app.Usr_dlg().Prog_many("", "", "category2 setup done");
|
||||
}
|
||||
}
|
||||
class Xoi_cmd_search2_build extends Xoi_cmd_base {
|
||||
public Xoi_cmd_search2_build(Xoi_setup_mgr install_mgr, String wiki_key) {this.Ctor(install_mgr, wiki_key);}
|
||||
@Override public String Async_key() {return KEY;} public static final String KEY = "wiki.search2.build";
|
||||
@Override public void Process_async_init(Xoae_app app, Xowe_wiki wiki, Xob_bldr bldr) {
|
||||
wiki.Db_mgr_as_sql().Category_version_update(false);
|
||||
gplx.xowa.addons.wikis.searchs.bldrs.Srch_bldr_mgr_.Setup(wiki);
|
||||
}
|
||||
@Override public void Process_async_done(Xoae_app app, Xowe_wiki wiki, Xob_bldr bldr) {
|
||||
app.Usr_dlg().Prog_many("", "", "search2 setup done");
|
||||
// wiki.Db_mgr().Search_version_refresh();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,46 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.threads.*;
|
||||
class Xoi_cmd_dumpfile {
|
||||
public byte[] Domain() {return domain;} private byte[] domain;
|
||||
public Io_url Bz2_url() {return bz2_url;} Io_url bz2_url;
|
||||
public Io_url Xml_url() {return xml_url;} Io_url xml_url;
|
||||
public boolean Bz2_unzip() {return bz2_unzip;} private boolean bz2_unzip;
|
||||
public void Clear() {domain = null; bz2_url = xml_url = null; bz2_unzip = false;}
|
||||
public Xoi_cmd_dumpfile Parse_msg(GfoMsg m) {
|
||||
Io_url dump_url = m.ReadIoUrl("url");
|
||||
domain = m.ReadBry("domain");
|
||||
if (Bry_.Len_eq_0(domain)) domain = Bry_.new_u8(dump_url.OwnerDir().NameOnly());
|
||||
bz2_unzip = String_.Eq(m.ReadStr("args"), "unzip");
|
||||
String dump_ext = dump_url.Ext();
|
||||
if (String_.Eq(dump_ext, ".bz2")) {
|
||||
bz2_url = dump_url;
|
||||
if (bz2_unzip) {
|
||||
xml_url = bz2_url.GenNewExt(""); // remove .bz2 extension (new file path should be .xml)
|
||||
if (!String_.Eq(xml_url.Ext(), ".xml"))
|
||||
xml_url = xml_url.GenNewExt(".xml");
|
||||
}
|
||||
}
|
||||
else if (String_.Eq(dump_ext, ".xml")) { // user selected xml file;
|
||||
bz2_url = null;
|
||||
xml_url = dump_url;
|
||||
bz2_unzip = false; // ignore unzip arge
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public Gfo_thread_cmd Exec(Xoi_cmd_mgr cmd_mgr) {
|
||||
Xowe_wiki wiki = cmd_mgr.App().Wiki_mgr().Get_by_or_make(domain);
|
||||
if (bz2_unzip) { // unzip requested; add unzip cmd
|
||||
GfoMsg unzip_msg = GfoMsg_.new_parse_(Gfo_thread_cmd_unzip.KEY).Add("v", Gfo_thread_cmd_unzip.KEY).Add("src", bz2_url.Raw()).Add("trg", xml_url.Raw());
|
||||
Gfo_thread_cmd_unzip unzip_cmd = (Gfo_thread_cmd_unzip)cmd_mgr.Cmd_add(unzip_msg);
|
||||
unzip_cmd.Term_cmd_for_src_(Gfo_thread_cmd_unzip.Term_cmd_for_src_noop); // don't do anything with bz2 after unzip
|
||||
}
|
||||
if (xml_url == null)
|
||||
wiki.Import_cfg().Src_fil_bz2_(bz2_url);
|
||||
else
|
||||
wiki.Import_cfg().Src_fil_xml_(xml_url);
|
||||
return cmd_mgr.Dump_add_many_custom(String_.new_u8(domain), "", "", true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,62 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
public class Xoi_cmd_dumpfile_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoi_cmd_dumpfile_fxt fxt = new Xoi_cmd_dumpfile_fxt();
|
||||
@Test public void Bz2__unzip() {
|
||||
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.xml.bz2", "", "unzip")
|
||||
.Test_domain("en.wikipedia.org")
|
||||
.Test_vals("mem/en.wikipedia.org/fil.xml.bz2", "mem/en.wikipedia.org/fil.xml", true)
|
||||
;
|
||||
}
|
||||
@Test public void Bz2__unzip__assert_xml_ext() { // xml ext relies on removing ".bz2" from ".xml.bz2"; if just ".bz2" add an ".xml"
|
||||
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.bz2", "", "unzip")
|
||||
.Test_vals("mem/en.wikipedia.org/fil.bz2", "mem/en.wikipedia.org/fil.xml", true)
|
||||
;
|
||||
}
|
||||
@Test public void Bz2__direct() {
|
||||
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.bz2", "", "")
|
||||
.Test_vals("mem/en.wikipedia.org/fil.bz2", null, false)
|
||||
;
|
||||
}
|
||||
@Test public void Xml__unzip_n() {
|
||||
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.xml", "", "")
|
||||
.Test_vals(null, "mem/en.wikipedia.org/fil.xml", false)
|
||||
;
|
||||
}
|
||||
@Test public void Xml__unzip_y() {
|
||||
fxt .Exec_parse_msg("mem/en.wikipedia.org/fil.xml", "", "")
|
||||
.Test_vals(null, "mem/en.wikipedia.org/fil.xml", false)
|
||||
;
|
||||
}
|
||||
}
|
||||
class Xoi_cmd_dumpfile_fxt {
|
||||
public void Clear() {
|
||||
dumpfile.Clear();
|
||||
} private Xoi_cmd_dumpfile dumpfile = new Xoi_cmd_dumpfile();
|
||||
public Xoi_cmd_dumpfile_fxt Exec_parse_msg(String url, String domain, String args) {
|
||||
GfoMsg m = GfoMsg_.new_parse_("").Add("url", url).Add("domain", domain).Add("args", args);
|
||||
dumpfile.Parse_msg(m);
|
||||
return this;
|
||||
}
|
||||
public Xoi_cmd_dumpfile_fxt Test_vals(String expd_bz2, String expd_xml, boolean expd_unzip) {
|
||||
Eq_url(expd_bz2, dumpfile.Bz2_url());
|
||||
Eq_url(expd_xml, dumpfile.Xml_url());
|
||||
Tfds.Eq(expd_unzip, dumpfile.Bz2_unzip());
|
||||
return this;
|
||||
}
|
||||
public Xoi_cmd_dumpfile_fxt Test_domain(String expd_domain) {
|
||||
Tfds.Eq(expd_domain, String_.new_u8(dumpfile.Domain()));
|
||||
return this;
|
||||
}
|
||||
private void Eq_url(String expd, Io_url actl) {
|
||||
if (expd == null && actl == null) return;
|
||||
else if (expd != null && actl != null) {
|
||||
Tfds.Eq(expd, actl.Raw());
|
||||
}
|
||||
else if (expd == null) throw Err_.new_wo_type("actl should be null", "expd", expd);
|
||||
else if (actl == null) throw Err_.new_wo_type("actl should not be null", "expd", expd);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,35 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.gfui.*; import gplx.gfui.kits.core.*;
|
||||
import gplx.core.threads.*;
|
||||
class Xoi_cmd_imageMagick_download extends Gfo_thread_cmd_download implements Gfo_thread_cmd {// private static final byte[] Bry_windows_zip = Bry_.new_a7("-windows.zip");
|
||||
// static final String Src_imageMagick = "ftp://ftp.sunet.se/pub/multimedia/graphics/ImageMagick/binaries/";
|
||||
public Xoi_cmd_imageMagick_download(Gfo_usr_dlg usr_dlg, Gfui_kit kit, Io_url trg) {this.Ctor(usr_dlg, kit); this.trg = trg;} Io_url trg;
|
||||
@Override public byte Async_init() { // <a href="ImageMagick-6.8.1-9-Q16-x86-windows.zip">
|
||||
// byte[] raw = xrg.Exec_as_bry(Src_imageMagick);
|
||||
// int find_pos = Bry_find_.Find_fwd(raw, Bry_windows_zip); if (find_pos == Bry_find_.Not_found) return Fail();
|
||||
// int bgn_pos = Bry_find_.Find_bwd(raw, Byte_ascii.Quote, find_pos); if (bgn_pos == Bry_find_.Not_found) return Fail();
|
||||
// ++bgn_pos;
|
||||
// int end_pos = Bry_find_.Find_fwd(raw, Byte_ascii.Quote, bgn_pos); if (end_pos == Bry_find_.Not_found) return Fail();
|
||||
// String src = Src_imageMagick + String_.new_a7(Bry_.Mid(raw, bgn_pos, end_pos));
|
||||
String src = "http://ftp.sunet.se/pub/multimedia/graphics/ImageMagick/binaries/ImageMagick-6.8.8-1-Q16-x86-windows.zip";
|
||||
this.Init("downloading", src, trg);
|
||||
return super.Async_init();
|
||||
}
|
||||
byte Fail() {
|
||||
kit.Ask_ok(GRP_KEY, "windows_not_found", "Could not find Windows binary. Please download ImageMagick directly from the site.");
|
||||
return Gfo_thread_cmd_.Init_cancel_step;
|
||||
}
|
||||
public static final String KEY_imageMagick = "download.imageMagick";
|
||||
static final String GRP_KEY = "xowa.install.cmds.download.imageMagick";
|
||||
}
|
||||
class Xoi_cmd_msg_ok extends Gfo_thread_cmd_base implements Gfo_thread_cmd {
|
||||
public Xoi_cmd_msg_ok(Gfo_usr_dlg usr_dlg, Gfui_kit kit, String msg) {this.msg = msg; this.Ctor(usr_dlg, kit);} private String msg;
|
||||
@Override public boolean Async_term() {
|
||||
kit.Ask_ok("msg_ok", "msg", msg);
|
||||
return true;
|
||||
}
|
||||
public static final String KEY = "msg.ok";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,133 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.brys.fmtrs.*; import gplx.core.threads.*;
|
||||
public class Xoi_cmd_mgr implements Gfo_invk {
|
||||
List_adp cmds = List_adp_.New();
|
||||
public Xoi_cmd_mgr(Xoi_setup_mgr install_mgr) {this.app = install_mgr.App(); this.install_mgr = install_mgr;} private Xoae_app app; Xoi_setup_mgr install_mgr;
|
||||
public Xoae_app App() {return app;}
|
||||
public void Canceled_y_() {canceled = true;} private boolean canceled = false;
|
||||
public boolean Working() {return working;} private boolean working;
|
||||
public void Working_(boolean v) {
|
||||
working = v;
|
||||
app.Bldr__running_(v);
|
||||
}
|
||||
private void Process_async(Gfo_thread_cmd cmd) {
|
||||
byte init_rslt = cmd.Async_init();
|
||||
if (init_rslt == Gfo_thread_cmd_.Init_ok) {
|
||||
cmd.Async_run();
|
||||
int async_sleep_interval = cmd.Async_sleep_interval();
|
||||
boolean async_prog_enabled = cmd.Async_prog_enabled();
|
||||
int async_sleep_sum = 0;
|
||||
while (cmd.Async_running()) {
|
||||
if (canceled) {this.Working_(Bool_.N); return;}
|
||||
if (async_prog_enabled) cmd.Async_prog_run(async_sleep_sum);
|
||||
Thread_adp_.Sleep(async_sleep_interval);
|
||||
async_sleep_sum += async_sleep_interval; // NOTE: this is not exact
|
||||
}
|
||||
}
|
||||
boolean term_pass = cmd.Async_term();
|
||||
if (cmd.Async_next_cmd() != null && init_rslt != Gfo_thread_cmd_.Init_cancel_all && term_pass)
|
||||
Run_async(cmd.Async_next_cmd());
|
||||
else
|
||||
this.Working_(Bool_.N);
|
||||
}
|
||||
private void Run_async(Gfo_thread_cmd cmd) {Thread_adp_.Start_by_val(cmd.Async_key(), this, Invk_process_async, cmd);}
|
||||
private void Cmds_run() {
|
||||
if (working) {
|
||||
app.Gui_mgr().Kit().Ask_ok("", "", "An import is in progress. Please wait for it to complete. If you want to do multiple imports at once, see Dashboard/Import/Offline."); // HOME
|
||||
return;
|
||||
}
|
||||
int cmds_len = cmds.Count();
|
||||
if (cmds_len == 0) return;
|
||||
for (int i = 0; i < cmds_len - 1; i++) {
|
||||
Gfo_thread_cmd cur_cmd = (Gfo_thread_cmd)cmds.Get_at(i);
|
||||
Gfo_thread_cmd nxt_cmd = (Gfo_thread_cmd)cmds.Get_at(i + 1);
|
||||
cur_cmd.Cmd_ctor();
|
||||
cur_cmd.Async_next_cmd_(nxt_cmd);
|
||||
}
|
||||
Gfo_thread_cmd cmd = (Gfo_thread_cmd)cmds.Get_at(0);
|
||||
cmds.Clear();
|
||||
this.Working_(Bool_.Y);
|
||||
app.Bldr__running_(true);
|
||||
this.Run_async(cmd);
|
||||
}
|
||||
Object Dump_add_many(GfoMsg m) {
|
||||
int args_len = m.Args_count();
|
||||
if (args_len < 4) throw Err_.new_wo_type("Please provide the following: wiki name, wiki date, dump_type, and one command; EX: ('simple.wikipedia.org', 'latest', 'pages-articles', 'wiki.download')");
|
||||
String wiki_key = m.Args_getAt(0).Val_to_str_or_empty();
|
||||
String wiki_date = m.Args_getAt(1).Val_to_str_or_empty();
|
||||
String dump_type = m.Args_getAt(2).Val_to_str_or_empty();
|
||||
Gfo_thread_cmd cmd = null;
|
||||
for (int i = 3; i < args_len; i++) {
|
||||
Keyval kv = m.Args_getAt(i);
|
||||
String kv_val = kv.Val_to_str_or_empty();
|
||||
if (String_.Eq(kv_val, Wiki_cmd_custom))
|
||||
return Dump_add_many_custom(wiki_key, wiki_date, dump_type, false);
|
||||
else {
|
||||
cmd = Dump_cmd_new(wiki_key, wiki_date, dump_type, kv.Val_to_str_or_empty());
|
||||
cmds.Add(cmd);
|
||||
}
|
||||
}
|
||||
return cmd; // return last cmd
|
||||
}
|
||||
public Gfo_thread_cmd Dump_add_many_custom(String wiki_key, String wiki_date, String dump_type, boolean dumpfile_cmd) {
|
||||
String[] custom_cmds = (app.Cfg().Get_bool_app_or("xowa.bldr.import.unzip_bz2_file", false)) // CFG: Cfg__
|
||||
? String_.Ary(Xoi_cmd_wiki_download.Key_wiki_download, Xoi_cmd_wiki_unzip.KEY_dump, Xoi_cmd_wiki_import.KEY)
|
||||
: String_.Ary(Xoi_cmd_wiki_download.Key_wiki_download, Xoi_cmd_wiki_import.KEY);
|
||||
int custom_cmds_len = custom_cmds.length;
|
||||
Gfo_thread_cmd cmd = null;
|
||||
for (int j = 0; j < custom_cmds_len; j++) {
|
||||
cmd = Dump_cmd_new(wiki_key, wiki_date, dump_type, custom_cmds[j]);
|
||||
if (dumpfile_cmd) {
|
||||
if (String_.Eq(cmd.Async_key(), Xoi_cmd_wiki_download.Key_wiki_download)) continue; // skip download if wiki.dump_file
|
||||
else if (String_.Eq(cmd.Async_key(), Xoi_cmd_wiki_unzip.KEY_dump)) {
|
||||
Xowe_wiki wiki = app.Wiki_mgr().Get_by_or_make(Bry_.new_u8(wiki_key));
|
||||
if (wiki.Import_cfg().Src_fil_xml() != null) continue; // skip unzip if xml exists
|
||||
}
|
||||
else if (String_.Eq(cmd.Async_key(), Xoi_cmd_wiki_import.KEY)) {
|
||||
((Xoi_cmd_wiki_import)cmd).Import_move_bz2_to_done_(false);
|
||||
}
|
||||
}
|
||||
cmds.Add(cmd);
|
||||
}
|
||||
return cmd;
|
||||
}
|
||||
Gfo_thread_cmd Dump_cmd_new(String wiki_key, String wiki_date, String dump_type, String cmd_key) {
|
||||
if (String_.Eq(cmd_key, Xoi_cmd_wiki_download.Key_wiki_download)) return new Xoi_cmd_wiki_download().Ctor_download_(install_mgr, wiki_key, wiki_date, dump_type).Owner_(this);
|
||||
else if (String_.Eq(cmd_key, Xoi_cmd_wiki_unzip.KEY_dump)) return new Xoi_cmd_wiki_unzip(install_mgr, wiki_key, wiki_date, dump_type).Owner_(this);
|
||||
else if (String_.Eq(cmd_key, Xoi_cmd_wiki_import.KEY)) return new Xoi_cmd_wiki_import(install_mgr, wiki_key, wiki_date, dump_type).Owner_(this);
|
||||
else if (String_.Eq(cmd_key, Xoi_cmd_category2_build.KEY)) return new Xoi_cmd_category2_build(install_mgr, wiki_key).Owner_(this);
|
||||
else if (String_.Eq(cmd_key, Xoi_cmd_category2_page_props.KEY_category2)) return new Xoi_cmd_category2_page_props(install_mgr, wiki_key, wiki_date).Owner_(this);
|
||||
else if (String_.Eq(cmd_key, Xoi_cmd_category2_categorylinks.KEY_category2)) return new Xoi_cmd_category2_categorylinks(install_mgr, wiki_key, wiki_date).Owner_(this);
|
||||
else if (String_.Eq(cmd_key, Xoi_cmd_search2_build.KEY)) return new Xoi_cmd_search2_build(install_mgr, wiki_key).Owner_(this);
|
||||
else throw Err_.new_unhandled(cmd_key);
|
||||
}
|
||||
public static final String Wiki_cmd_custom = "wiki.custom", Wiki_cmd_dump_file = "wiki.dump_file";
|
||||
public Gfo_thread_cmd Cmd_add(GfoMsg m) {Gfo_thread_cmd rv = Cmd_clone(m); cmds.Add(rv); return rv;}
|
||||
Gfo_thread_cmd Cmd_clone(GfoMsg m) {
|
||||
String cmd_key = m.ReadStr("v");
|
||||
if (String_.Eq(cmd_key, Gfo_thread_cmd_download.KEY)) return new Gfo_thread_cmd_download().Init("downloading", m.ReadStr("src"), Bry_fmtr_eval_mgr_.Eval_url(app.Url_cmd_eval(), m.ReadBry("trg"))).Url_eval_mgr_(app.Url_cmd_eval()).Owner_(this).Ctor(app.Usr_dlg(), app.Gui_mgr().Kit());
|
||||
else if (String_.Eq(cmd_key, Gfo_thread_cmd_unzip.KEY)) return new Gfo_thread_cmd_unzip().Url_eval_mgr_(app.Url_cmd_eval()).Owner_(this).Init(app.Usr_dlg(), app.Gui_mgr().Kit(), app.Prog_mgr().App_decompress_bz2(), app.Prog_mgr().App_decompress_zip(), app.Prog_mgr().App_decompress_gz(), Bry_fmtr_eval_mgr_.Eval_url(app.Url_cmd_eval(), m.ReadBry("src")), Bry_fmtr_eval_mgr_.Eval_url(app.Url_cmd_eval(), m.ReadBry("trg")));
|
||||
else if (String_.Eq(cmd_key, Gfo_thread_cmd_replace.KEY)) return new Gfo_thread_cmd_replace().Url_eval_mgr_(app.Url_cmd_eval()).Owner_(this).Init(app.Usr_dlg(), app.Gui_mgr().Kit(), Bry_fmtr_eval_mgr_.Eval_url(app.Url_cmd_eval(), m.ReadBry("fil")));
|
||||
else if (String_.Eq(cmd_key, Xoi_cmd_wiki_goto_page.KEY)) return new Xoi_cmd_wiki_goto_page(app, m.ReadStr("v")).Owner_(this);
|
||||
else if (String_.Eq(cmd_key, Xoi_cmd_msg_ok.KEY)) return new Xoi_cmd_msg_ok(app.Usr_dlg(), app.Gui_mgr().Kit(), m.ReadStr("v")).Owner_(this);
|
||||
else if (String_.Eq(cmd_key, Xoi_cmd_imageMagick_download.KEY_imageMagick)) return new Xoi_cmd_imageMagick_download(app.Usr_dlg(), app.Gui_mgr().Kit(), Bry_fmtr_eval_mgr_.Eval_url(app.Url_cmd_eval(), m.ReadBry("trg"))).Owner_(this);
|
||||
else if (String_.Eq(cmd_key, Wiki_cmd_dump_file)) return Wiki_cmd_dump_file_make(m);
|
||||
else throw Err_.new_unhandled(cmd_key);
|
||||
}
|
||||
Gfo_thread_cmd Wiki_cmd_dump_file_make(GfoMsg m) { // note: might be used directly in home-wiki pages to download files
|
||||
Xoi_cmd_dumpfile dumpfile = new Xoi_cmd_dumpfile().Parse_msg(m);
|
||||
return dumpfile.Exec(this);
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_process_async)) Process_async((Gfo_thread_cmd)m.CastObj("v"));
|
||||
else if (ctx.Match(k, Invk_dump_add_many)) return Dump_add_many(m);
|
||||
else if (ctx.Match(k, Invk_cmd_add)) return Cmd_add(m);
|
||||
else if (ctx.Match(k, Invk_run)) Cmds_run();
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
} private static final String Invk_process_async = "process_async", Invk_dump_add_many = "dump_add_many", Invk_run = "run", Invk_cmd_add = "cmd_add";
|
||||
static final String GRP_KEY = "xowa.install_mgr.cmd_mgr";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,41 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.gfui.*;
|
||||
import gplx.core.threads.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.utils.*;
|
||||
import gplx.xowa.bldrs.wms.dumps.*;
|
||||
class Xoi_cmd_wiki_download extends Gfo_thread_cmd_download implements Gfo_thread_cmd { private Xoi_setup_mgr install_mgr; private String wiki_key, dump_date, dump_type;
|
||||
public Xoi_cmd_wiki_download Ctor_download_(Xoi_setup_mgr install_mgr, String wiki_key, String dump_date, String dump_type) {
|
||||
this.install_mgr = install_mgr;
|
||||
this.wiki_key = wiki_key;
|
||||
this.dump_date = dump_date;
|
||||
this.dump_type = dump_type;
|
||||
this.Owner_(install_mgr);
|
||||
return this;
|
||||
}
|
||||
@gplx.Virtual public String Download_file_ext() {return ".xml.bz2";} // wiki.download is primarily used for dump files; default to .xml.bz2; NOTE: changed from ".xml"; DATE:2013-11-07
|
||||
@Override public String Async_key() {return Key_wiki_download;} public static final String Key_wiki_download = "wiki.download";
|
||||
@Override public byte Async_init() {
|
||||
Xoae_app app = install_mgr.App();
|
||||
Xowm_dump_file dump_file = new Xowm_dump_file(wiki_key, dump_date, dump_type);
|
||||
String[] server_urls = gplx.xowa.bldrs.installs.Xoi_dump_mgr.Server_urls(app);
|
||||
boolean connected = Xowm_dump_file_.Connect_first(dump_file, server_urls);
|
||||
if (connected)
|
||||
app.Usr_dlg().Note_many("", "", "url: ~{0}", dump_file.File_url());
|
||||
else {
|
||||
if (!Dump_servers_offline_msg_shown) {
|
||||
app.Gui_mgr().Kit().Ask_ok("", "", "all dump servers are offline: ~{0}", String_.AryXtoStr(server_urls));
|
||||
Dump_servers_offline_msg_shown = true;
|
||||
}
|
||||
}
|
||||
Xowe_wiki wiki = app.Wiki_mgr().Get_by_or_make(dump_file.Domain_itm().Domain_bry());
|
||||
Io_url root_dir = wiki.Fsys_mgr().Root_dir();
|
||||
Io_url[] trg_fil_ary = Io_mgr.Instance.QueryDir_args(root_dir).FilPath_("*." + dump_type + Download_file_ext() + "*").ExecAsUrlAry();
|
||||
Io_url trg = trg_fil_ary.length == 0 ? root_dir.GenSubFil(dump_file.File_name()) : trg_fil_ary[0];
|
||||
this.Ctor(app.Usr_dlg(), app.Gui_mgr().Kit());
|
||||
this.Init("download", dump_file.File_url(), trg);
|
||||
return super.Async_init();
|
||||
}
|
||||
private static boolean Dump_servers_offline_msg_shown = false;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,16 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.threads.*;
|
||||
class Xoi_cmd_wiki_goto_page extends Gfo_thread_cmd_base implements Gfo_thread_cmd {
|
||||
public Xoi_cmd_wiki_goto_page(Xoae_app app, String page) {this.app = app; this.page = page; this.Ctor(app.Usr_dlg(), app.Gui_mgr().Kit());} private Xoae_app app; String page;
|
||||
@Override public void Async_run() {kit.New_cmd_sync(this).Invk(GfsCtx.new_(), 0, Invk_goto_page, GfoMsg_.Null);}
|
||||
private void Goto_page(String page) {app.Gui_mgr().Browser_win().Page__navigate_by_url_bar(page);}
|
||||
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_goto_page)) Goto_page(page);
|
||||
else return super.Invk(ctx, ikey, k, m);
|
||||
return this;
|
||||
} private static final String Invk_goto_page = "goto_page";
|
||||
public static final String KEY = "wiki.goto_page";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,99 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.core.threads.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.utils.*;
|
||||
import gplx.xowa.guis.views.*;
|
||||
import gplx.xowa.wikis.domains.*;
|
||||
import gplx.xowa.htmls.hrefs.*;
|
||||
import gplx.xowa.addons.wikis.ctgs.bldrs.*;
|
||||
class Xoi_cmd_wiki_import implements Gfo_thread_cmd {
|
||||
private boolean running;
|
||||
private Xowe_wiki wiki;
|
||||
public Xoi_cmd_wiki_import(Xoi_setup_mgr install_mgr, String wiki_key, String wiki_date, String dump_type) {this.install_mgr = install_mgr; this.Owner_(install_mgr); this.wiki_key = wiki_key; this.wiki_date = wiki_date; this.dump_type = dump_type;} private Xoi_setup_mgr install_mgr; String wiki_key, wiki_date, dump_type;
|
||||
public static final String KEY = "wiki.import";
|
||||
public void Cmd_ctor() {}
|
||||
public String Async_key() {return KEY;}
|
||||
public int Async_sleep_interval() {return Gfo_thread_cmd_.Async_sleep_interval_1_second;}
|
||||
public boolean Async_prog_enabled() {return false;}
|
||||
public void Async_prog_run(int async_sleep_sum) {}
|
||||
public byte Async_init() {return Gfo_thread_cmd_.Init_ok;}
|
||||
public boolean Async_term() {
|
||||
install_mgr.App().Usr_dlg().Log_many(GRP_KEY, "import.end", "import.end ~{0} ~{1} ~{2}", wiki_key, wiki_date, dump_type);
|
||||
return true;
|
||||
}
|
||||
public Gfo_invk Owner() {return owner;} public Xoi_cmd_wiki_import Owner_(Gfo_invk v) {owner = v; return this;} Gfo_invk owner;
|
||||
public Gfo_thread_cmd Async_next_cmd() {return next_cmd;} public void Async_next_cmd_(Gfo_thread_cmd v) {next_cmd = v;} Gfo_thread_cmd next_cmd;
|
||||
public void Async_run() {
|
||||
running = true;
|
||||
install_mgr.App().Usr_dlg().Log_many(GRP_KEY, "import.bgn", "import.bgn ~{0} ~{1} ~{2}", wiki_key, wiki_date, dump_type);
|
||||
Thread_adp_.Start_by_key(this.Async_key(), this, Invk_process_async);
|
||||
}
|
||||
public boolean Async_running() {
|
||||
return running;
|
||||
}
|
||||
public boolean Import_move_bz2_to_done() {return import_move_bz2_to_done;} public Xoi_cmd_wiki_import Import_move_bz2_to_done_(boolean v) {import_move_bz2_to_done = v; return this;} private boolean import_move_bz2_to_done = true;
|
||||
private void Process_async() {
|
||||
Xoae_app app = install_mgr.App();
|
||||
app.Usr_dlg().Prog_one("", "", "preparing import: ~{0}", wiki_key);
|
||||
Xob_bldr bldr = app.Bldr();
|
||||
wiki = app.Wiki_mgr().Get_by_or_make(Bry_.new_a7(wiki_key));
|
||||
wiki.Init_assert();
|
||||
bldr.Cmd_mgr().Clear();
|
||||
bldr.Pause_at_end_(false);
|
||||
Io_url src_url = wiki.Import_cfg().Src_rdr().Url();
|
||||
Process_sql(bldr, src_url);
|
||||
bldr.Run();
|
||||
app.Usr_dlg().Prog_none(GRP_KEY, "clear", ""); app.Usr_dlg().Note_none(GRP_KEY, "clear", "");
|
||||
app.Usere().Available_from_fsys();
|
||||
wiki.Init_needed_(true);
|
||||
wiki.Html_mgr().Page_wtr_mgr().Init_(true);
|
||||
wiki.Init_assert();
|
||||
if (String_.Eq(src_url.Ext(), ".xml")) {
|
||||
if ( app.Cfg().Get_bool_app_or("xowa.bldr.import.delete_xml_file", true) // CFG: Cfg__
|
||||
&& Io_mgr.Instance.ExistsFil(src_url.GenNewExt(".bz2")) // only delete the file if there is a corresponding bz2 file; BUG.GH:#124; DATE:2017-02-02
|
||||
)
|
||||
Io_mgr.Instance.DeleteFil(src_url);
|
||||
}
|
||||
else if (String_.Eq(src_url.Ext(), ".bz2")) {
|
||||
Io_url trg_fil = app.Fsys_mgr().Wiki_dir().GenSubFil_nest("#dump", "done", src_url.NameAndExt());
|
||||
if (import_move_bz2_to_done)
|
||||
Io_mgr.Instance.MoveFil_args(src_url, trg_fil, true).Exec();
|
||||
}
|
||||
running = false;
|
||||
wiki.Import_cfg().Src_fil_xml_(null).Src_fil_bz2_(null); // reset file else error when going from Import/Script to Import/List
|
||||
app.Gui_mgr().Kit().New_cmd_sync(this).Invk(GfsCtx.new_(), 0, Invk_open_wiki, GfoMsg_.Null);
|
||||
}
|
||||
private void Process_sql(Xob_bldr bldr, Io_url dump_url) {
|
||||
// setup wiki
|
||||
((Xob_cleanup_cmd)bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_util_cleanup)).Delete_tdb_(true).Delete_sqlite3_(true);
|
||||
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_text_init);
|
||||
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_text_page);
|
||||
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_text_css);
|
||||
// if (wiki.Appe().Setup_mgr().Dump_mgr().Search_version() == gplx.xowa.addons.wikis.searchs.specials.Srch_special_page.Version_2)
|
||||
gplx.xowa.addons.wikis.searchs.bldrs.Srch_bldr_mgr_.Setup(wiki);
|
||||
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_text_term);
|
||||
|
||||
// setup category
|
||||
if (wiki.Domain_itm().Domain_type_id() != Xow_domain_tid_.Tid__other) { // do not add category if not wmf; note that wikia wikis will not have category dumps; DATE:2016-10-22
|
||||
Xob_download_cmd.Add_if_not_found_many(bldr, wiki, Xob_catlink_cmd.Dump_file_name, Xob_pageprop_cmd.Dump_file_name);
|
||||
bldr.Cmd_mgr().Add(new gplx.xowa.addons.wikis.ctgs.bldrs.Xob_pageprop_cmd(bldr, wiki).Src_dir_manual_(dump_url.OwnerDir()));
|
||||
bldr.Cmd_mgr().Add(new gplx.xowa.addons.wikis.ctgs.bldrs.Xob_catlink_cmd(bldr, wiki).Src_dir_manual_(dump_url.OwnerDir()));
|
||||
}
|
||||
}
|
||||
private void Open_wiki(String wiki_key) {
|
||||
Xog_win_itm main_win = install_mgr.App().Gui_mgr().Browser_win();
|
||||
if (main_win.Active_page() == null) return; // will be null when invoked through cmd-line
|
||||
byte[] url = Bry_.Add(wiki.Domain_bry(), Xoh_href_.Bry__wiki, wiki.Props().Main_page());
|
||||
main_win.Page__navigate_by_url_bar(String_.new_u8(url));
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_process_async)) Process_async();
|
||||
else if (ctx.Match(k, Invk_owner)) return owner;
|
||||
else if (ctx.Match(k, Invk_open_wiki)) Open_wiki(wiki_key);
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
} private static final String Invk_process_async = "run_async", Invk_owner = "owner", Invk_open_wiki = "open_wiki";
|
||||
static final String GRP_KEY = "xowa.thread.op.build";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,116 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
import gplx.core.consoles.*;
|
||||
import gplx.core.brys.args.*; import gplx.core.threads.*; import gplx.xowa.bldrs.setups.maints.*; import gplx.xowa.xtns.wbases.imports.*;
|
||||
import gplx.xowa.wikis.domains.*;
|
||||
import gplx.xowa.bldrs.wms.*; import gplx.xowa.bldrs.wms.dumps.*;
|
||||
public class Xoi_cmd_wiki_tst {
|
||||
@Test public void Run() { // MAINT:2017-03-28
|
||||
// Bld_import_list(Xow_domain_regy.All);
|
||||
// Bld_cfg_files(Xow_domain_regy.All); // NOTE: remember to carry over the wikisource / page / index commands from the existing xowa_build_cfg.gfs; also, only run the xowa_build_cfg.gfs once; DATE:2013-10-15; last run: DATE:2014-09-09
|
||||
}
|
||||
public void Bld_import_list(String... ary) {
|
||||
int ary_len = ary.length;
|
||||
Bry_bfr bfr = Bry_bfr_.Reset(255);
|
||||
Wmf_latest_parser parser = new Wmf_latest_parser();
|
||||
Bfr_arg__time time_fmtr = new Bfr_arg__time();
|
||||
for (int i = 0; i < ary_len; i++)
|
||||
Bld_import_list_itm2(bfr, parser, time_fmtr, ary, i);
|
||||
Io_mgr.Instance.SaveFilStr("C:\\xowa\\user\\temp.txt", bfr.To_str());
|
||||
}
|
||||
private void Bld_import_list_itm2(Bry_bfr bfr, Wmf_latest_parser parser, Bfr_arg__time time_fmtr, String[] ary, int i) {
|
||||
String domain_str = ary[i];
|
||||
byte[] domain_bry = Bry_.new_a7(domain_str);
|
||||
Xow_domain_itm domain_itm = Xow_domain_itm_.parse(domain_bry);
|
||||
byte[] wmf_key_bry = Bry_.Replace(Xow_abrv_wm_.To_abrv(domain_itm), Byte_ascii.Dash, Byte_ascii.Underline);
|
||||
String wmf_key = String_.new_u8(wmf_key_bry);
|
||||
String url = "https://dumps.wikimedia.org/" + wmf_key + "/latest";
|
||||
byte[] latest_html = null;
|
||||
for (int j = 0; j < 5; ++j) {
|
||||
latest_html = Io_mgr.Instance.DownloadFil_args("", Io_url_.Empty).Exec_as_bry(url);
|
||||
if (latest_html != null) break;
|
||||
Tfds.Dbg("fail|" + domain_str + "|" + url);
|
||||
if (j == 4) return;
|
||||
}
|
||||
parser.Parse(latest_html);
|
||||
Xowm_dump_file dump_file = new Xowm_dump_file(domain_str, "latest", Xowm_dump_type_.Str__pages_articles);
|
||||
dump_file.Server_url_(Xowm_dump_file_.Server_wmf_https);
|
||||
byte[] pages_articles_key = Bry_.new_a7(wmf_key + "-latest-pages-articles.xml.bz2");
|
||||
Wmf_latest_itm latest_itm = parser.Get_by(pages_articles_key);
|
||||
if (latest_itm == null) {Tfds.Dbg("missing|" + domain_str + "|" + url); return;} // NOTE: commonswiki missing entry for commonswiki-latest-pages-articles.xml.bz2 DATE:2016-05-01
|
||||
Tfds.Dbg("pass|" + domain_str + "|" + url);
|
||||
bfr.Add(domain_bry).Add_byte_pipe();
|
||||
bfr.Add_str_u8(dump_file.File_url()).Add_byte_pipe();
|
||||
bfr.Add(Xow_domain_tid_.Get_type_as_bry(domain_itm.Domain_type_id())).Add_byte_pipe();
|
||||
long src_size = latest_itm.Size();
|
||||
bfr.Add_long_variable(src_size).Add_byte_pipe();
|
||||
bfr.Add_str_a7(gplx.core.ios.Io_size_.To_str(src_size)).Add_byte_pipe();
|
||||
time_fmtr.Seconds_(Math_.Div_safe_as_long(src_size, 1000000)).Bfr_arg__add(bfr);
|
||||
bfr.Add_byte_pipe();
|
||||
bfr.Add_str_a7(latest_itm.Date().XtoStr_fmt_yyyy_MM_dd_HH_mm());
|
||||
bfr.Add_byte_pipe();
|
||||
bfr.Add_str_a7(dump_file.Dump_date());
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
/*
|
||||
private void Bld_import_list_itm(Bry_bfr bfr, Xowm_dump_file dump_file, Bry_fmtr_arg_time time_fmtr, String[] ary, int i) {
|
||||
String itm = ary[i];
|
||||
dump_file.Ctor(itm, "latest", Xowm_dump_type_.Str__pages_articles);
|
||||
int count = 0;
|
||||
while (count++ < 1) {
|
||||
dump_file.Server_url_(Xowm_dump_file_.Server_wmf);
|
||||
if (dump_file.Connect()) break;
|
||||
Tfds.WriteText(String_.Format("retrying: {0} {1}\n", count, dump_file.File_modified()));
|
||||
Thread_adp_.Sleep(15000); // wait for connection to reset
|
||||
}
|
||||
if (count == 10) {
|
||||
Tfds.WriteText(String_.Format("failed: {0}\n", dump_file.File_url()));
|
||||
return;
|
||||
}
|
||||
else
|
||||
Tfds.WriteText(String_.Format("passed: {0}\n", itm));
|
||||
bfr.Add_str(itm).Add_byte_pipe();
|
||||
bfr.Add_str(dump_file.File_url()).Add_byte_pipe();
|
||||
bfr.Add(Xow_domain_tid_.Get_type_as_bry(dump_file.Wiki_type().Wiki_tid())).Add_byte_pipe();
|
||||
// Xol_lang_stub lang_itm = Xol_lang_stub_.Get_by_key(wiki_type.Lang_key());
|
||||
// if (lang_itm == null) lang_itm = Xol_lang_stub_.Get_by_key(Xol_lang_itm_.Key_en); // commons, species, meta, etc will have no lang
|
||||
// bfr.Add(lang_itm.Local_name()).Add_byte_pipe();
|
||||
// bfr.Add(lang_itm.Canonical_name()).Add_byte_pipe();
|
||||
long src_size = dump_file.File_len();
|
||||
bfr.Add_long_variable(src_size).Add_byte_pipe();
|
||||
bfr.Add_str(gplx.core.ios.Io_size_.To_str(src_size)).Add_byte_pipe();
|
||||
time_fmtr.Seconds_(Math_.Div_safe_as_long(src_size, 1000000)).XferAry(bfr, 0);
|
||||
bfr.Add_byte_pipe();
|
||||
bfr.Add_str(dump_file.File_modified().XtoStr_fmt_yyyy_MM_dd_HH_mm());
|
||||
bfr.Add_byte_pipe();
|
||||
// bfr.Add_str(String_.Concat_with_obj(",", (Object[])dump_file.Dump_available_dates()));
|
||||
// bfr.Add_byte_pipe();
|
||||
bfr.Add_str(dump_file.Dump_date());
|
||||
bfr.Add_byte_nl();
|
||||
Thread_adp_.Sleep(1000);
|
||||
}
|
||||
*/
|
||||
public void Bld_cfg_files(String... ary) {
|
||||
Bry_bfr bfr = Bry_bfr_.Reset(255);
|
||||
gplx.xowa.bldrs.wiki_cfgs.Xoi_wiki_props_api api = new gplx.xowa.bldrs.wiki_cfgs.Xoi_wiki_props_api();
|
||||
gplx.xowa.bldrs.wiki_cfgs.Xoi_wiki_props_wiki wiki = new gplx.xowa.bldrs.wiki_cfgs.Xoi_wiki_props_wiki();
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
String wiki_domain = ary[i];
|
||||
try {
|
||||
byte[] xml = api.Exec_api(api.Api_src(wiki_domain));
|
||||
wiki.Wiki_domain_(Bry_.new_a7(wiki_domain));
|
||||
api.Parse(wiki, String_.new_u8(xml));
|
||||
api.Build_cfg(bfr, wiki);
|
||||
}
|
||||
catch (Exception e) {
|
||||
Console_adp__sys.Instance.Write_str_w_nl(Err_.Message_gplx_full(e));
|
||||
}
|
||||
}
|
||||
bfr.Add_str_a7("app.bldr.wiki_cfg_bldr.run;").Add_byte_nl();
|
||||
Io_mgr.Instance.SaveFilStr("C:\\user\\xowa_build_cfg.gfs", bfr.To_str());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,36 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.gfui.*; import gplx.gfui.kits.core.*;
|
||||
import gplx.core.threads.*;
|
||||
class Xoi_cmd_wiki_unzip extends Gfo_thread_cmd_unzip implements Gfo_thread_cmd { public static final String KEY_dump = "wiki.unzip";
|
||||
public Xoi_cmd_wiki_unzip(Xoi_setup_mgr install_mgr, String wiki_key, String dump_date, String dump_type) {this.install_mgr = install_mgr; this.Owner_(install_mgr); this.wiki_key = wiki_key; this.dump_date = dump_date; this.dump_type = dump_type;} private Xoi_setup_mgr install_mgr; String wiki_key, dump_date, dump_type;
|
||||
@Override public String Async_key() {return KEY_dump;}
|
||||
@Override public byte Async_init() {
|
||||
Xoae_app app = install_mgr.App(); Gfui_kit kit = app.Gui_mgr().Kit();
|
||||
Xowe_wiki wiki = app.Wiki_mgr().Get_by_or_make(Bry_.new_u8(wiki_key));
|
||||
Io_url wiki_dir = wiki.Import_cfg().Src_dir();
|
||||
Io_url[] urls = Io_mgr.Instance.QueryDir_args(wiki_dir).Recur_(false).FilPath_("*.xml.bz2").ExecAsUrlAry();
|
||||
if (urls.length == 0) {
|
||||
kit.Ask_ok(GRP_KEY, "dump.unzip_latest.file_missing", "Could not find a dump file for ~{0} in ~{1}", wiki_key, wiki_dir.Raw());
|
||||
return Gfo_thread_cmd_.Init_cancel_step;
|
||||
}
|
||||
Io_url src = urls[urls.length - 1];
|
||||
Io_url trg = app.Fsys_mgr().Wiki_dir().GenSubFil_nest(wiki_key, src.NameOnly()); // NOTE: NameOnly() will strip trailing .bz2; EX: a.xml.bz2 -> a.xml
|
||||
super.Init(app.Usr_dlg(), app.Gui_mgr().Kit(), app.Prog_mgr().App_decompress_bz2(), app.Prog_mgr().App_decompress_zip(), app.Prog_mgr().App_decompress_gz(), src, trg);
|
||||
this.Term_cmd_for_src_(Term_cmd_for_src_move);
|
||||
this.Term_cmd_for_src_url_(app.Fsys_mgr().Wiki_dir().GenSubFil_nest("#dump", "done", src.NameAndExt()));
|
||||
if (Io_mgr.Instance.ExistsFil(trg)) {
|
||||
int rslt = kit.Ask_yes_no_cancel(GRP_KEY, "target_exists", "Target file already exists: '~{0}'.\nDo you want to delete it?", trg.Raw());
|
||||
switch (rslt) {
|
||||
case Gfui_dlg_msg_.Btn_yes: Io_mgr.Instance.DeleteFil(trg); break;
|
||||
case Gfui_dlg_msg_.Btn_no: return Gfo_thread_cmd_.Init_cancel_step;
|
||||
case Gfui_dlg_msg_.Btn_cancel: return Gfo_thread_cmd_.Init_cancel_all;
|
||||
default: throw Err_.new_unhandled(rslt);
|
||||
}
|
||||
}
|
||||
return Gfo_thread_cmd_.Init_ok;
|
||||
}
|
||||
static final String GRP_KEY = "xowa.thread.dump.unzip";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,12 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.xowa.bldrs.wms.dumps.*;
|
||||
public class Xoi_dump_mgr {
|
||||
public static boolean Import_bz2_by_stdout(Xoa_app app) {return app.Cfg().Get_bool_app_or("xowa.bldr.import.apps.bz2_stdout.enabled", true);} // CFG: Cfg__
|
||||
public static String[] Server_urls(Xoa_app app) {
|
||||
String[] or = String_.Ary(Xowm_dump_file_.Server_your_org, Xowm_dump_file_.Server_wmf_https, Xowm_dump_file_.Server_c3sl, Xowm_dump_file_.Server_masaryk); // promote your.org to primary url; DATE:2016-08-07
|
||||
return app.Cfg().Get_strary_app_or("xowa.bldr.import.dump_servers", ",", or); // CFG: Cfg__
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,33 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
public class Xoi_mirror_parser {
|
||||
public String[] Parse(String raw_str) {
|
||||
if (String_.Len_eq_0(raw_str)) return String_.Ary_empty;
|
||||
byte[] raw = Bry_.new_u8(raw_str);
|
||||
List_adp rv = List_adp_.New();
|
||||
int pos = 0;
|
||||
while (true) {
|
||||
int bgn = Bry_find_.Find_fwd(raw, CONST_href_bgn, pos); if (bgn == Bry_find_.Not_found) break;
|
||||
bgn += CONST_href_bgn.length;
|
||||
int end = Bry_find_.Find_fwd(raw, CONST_href_end, bgn); if (end == Bry_find_.Not_found) return String_.Ary_empty;
|
||||
byte[] date = Bry_.Mid(raw, bgn, end);
|
||||
pos = end + CONST_href_end.length;
|
||||
if (Bry_.Match(date, CONST_date_parent_dir)) continue;
|
||||
int date_pos_last = date.length - 1;
|
||||
if (date_pos_last == -1) return String_.Ary_empty;
|
||||
if (date[date_pos_last] == Byte_ascii.Slash) date = Bry_.Mid(date, 0, date_pos_last); // trim trailing /; EX: "20130101/" -> "20130101"
|
||||
rv.Add(String_.new_u8(date));
|
||||
}
|
||||
return rv.To_str_ary();
|
||||
} private static final byte[] CONST_href_bgn = Bry_.new_a7("<a href=\""), CONST_href_end = Bry_.new_a7("\""), CONST_date_parent_dir = Bry_.new_a7("../");
|
||||
public static String Find_last_lte(String[] ary, String comp) { // assuming sorted ary, find last entry that is lte comp
|
||||
int len = ary.length;
|
||||
for (int i = len - 1; i > -1; i--) {
|
||||
String itm = ary[i];
|
||||
if (CompareAble_.Is(CompareAble_.Less_or_same, itm, comp)) return itm;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,46 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.bldrs.installs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*;
|
||||
import org.junit.*;
|
||||
public class Xoi_mirror_parser_tst {
|
||||
@Test public void Basic() {
|
||||
Tst_parse(String_.Concat_lines_nl
|
||||
( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
|
||||
, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"
|
||||
, "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">"
|
||||
, "<head>"
|
||||
, "<title>Index of /simplewiki/</title>"
|
||||
, "<link rel=\"stylesheet\" type=\"text/css\" href=\"/pub/misc/lighttpd-white-dir.css\" />"
|
||||
, "</head>"
|
||||
, "<body>"
|
||||
, "<h2>Index of /simplewiki/</h2>"
|
||||
, "<div class=\"list\">"
|
||||
, "<table summary=\"Directory Listing\" cellpadding=\"0\" cellspacing=\"0\">"
|
||||
, "<thead><tr><th class=\"n\">Name</th><th class=\"m\">Last Modified</th><th class=\"s\">Size</th><th class=\"t\">Type</th></tr></thead>"
|
||||
, "<tbody>"
|
||||
, "<tr><td class=\"n\"><a href=\"../\">Parent Directory</a>/</td><td class=\"m\"> </td><td class=\"s\">- </td><td class=\"t\">Directory</td></tr>"
|
||||
, "<tr><td class=\"n\"><a href=\"20120516/\">20120516</a>/</td><td class=\"m\">2012-May-17 01:04:39</td><td class=\"s\">- </td><td class=\"t\">Directory</td></tr>"
|
||||
, "<tr><td class=\"n\"><a href=\"20121220/\">20121220</a>/</td><td class=\"m\">2012-Dec-20 20:15:55</td><td class=\"s\">- </td><td class=\"t\">Directory</td></tr>"
|
||||
, "<tr><td class=\"n\"><a href=\"20130214/\">20130214</a>/</td><td class=\"m\">2013-Feb-14 06:28:41</td><td class=\"s\">- </td><td class=\"t\">Directory</td></tr>"
|
||||
, "<tr><td class=\"n\"><a href=\"latest/\">latest</a>/</td><td class=\"m\">2013-Feb-14 06:28:41</td><td class=\"s\">- </td><td class=\"t\">Directory</td></tr>"
|
||||
, "</tbody>"
|
||||
, "</table>"
|
||||
, "</div>"
|
||||
, "<div class=\"foot\">lighttpd</div>"
|
||||
, "</body>"
|
||||
, "</html>"
|
||||
), String_.Ary("20120516", "20121220", "20130214", "latest"));
|
||||
}
|
||||
@Test public void Find_last_lte() {
|
||||
Tst_find_last_lte(String_.Ary("20120516", "20121220", "20130214", "latest"), "20130101", "20121220");
|
||||
Tst_find_last_lte(String_.Ary("20120516", "20121220", "20130214", "latest"), "20120101", "");
|
||||
}
|
||||
private void Tst_parse(String raw, String[] expd) {
|
||||
Xoi_mirror_parser parser = new Xoi_mirror_parser();
|
||||
Tfds.Eq_ary_str(expd, parser.Parse(raw));
|
||||
}
|
||||
private void Tst_find_last_lte(String[] ary, String comp, String expd) {
|
||||
Tfds.Eq(expd, Xoi_mirror_parser.Find_last_lte(ary, comp));
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user