1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

General: Remove text database support

This commit is contained in:
gnosygnu
2018-07-11 07:53:27 -04:00
parent fe0d1abde5
commit be3979c5af
44 changed files with 663 additions and 1954 deletions

View File

@@ -1,92 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.primitives.*; import gplx.core.strings.*;
import gplx.dbs.*; import gplx.dbs.qrys.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.addons.wikis.ctgs.*;
import gplx.xowa.wikis.nss.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.infos.*;
import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*;
public class Db_mgr_fxt {
public Db_mgr_fxt Ctor_fsys() {bldr_fxt = new Xob_fxt().Ctor(Xoa_test_.Url_root().GenSubDir("root")); return this;}
public Db_mgr_fxt Ctor_mem() {bldr_fxt = new Xob_fxt().Ctor_mem(); return this;} private Xob_fxt bldr_fxt;
public Xowd_page_itm page_(int id, String modified_on, boolean type_redirect, int text_len) {return new Xowd_page_itm().Id_(id).Modified_on_(DateAdp_.parse_gplx(modified_on)).Redirected_(type_redirect).Text_len_(text_len);}
public Xowe_wiki Wiki() {return bldr_fxt.Wiki();}
public Xob_bldr Bldr() {return bldr_fxt.Bldr();}
public Db_mgr_fxt doc_ary_(Xowd_page_itm... v) {bldr_fxt.doc_ary_(v); return this;}
public Xowd_page_itm doc_(int id, String date, String title, String text) {return bldr_fxt.doc_(id, date, title, text);}
public Xowd_page_itm doc_wo_date_(int id, String title, String text) {return bldr_fxt.doc_(id, "2012-01-02 03:04", title, text);}
public Xowd_page_itm doc_ttl_(int id, String title) {return bldr_fxt.doc_(id, "2012-01-02 03:04", title, "IGNORE");}
public Db_mgr_fxt Init_fil(String url, String raw) {return Init_fil(Io_url_.new_fil_(url), raw);}
public Db_mgr_fxt Init_fil(Io_url url, String raw) {Io_mgr.Instance.SaveFilStr(url, raw); return this;}
public Db_mgr_fxt Exec_run(Xob_page_wkr wkr) {bldr_fxt.Run_page_wkrs(wkr); return this;}
public Db_mgr_fxt Exec_run(Xob_cmd cmd) {bldr_fxt.Run_cmds(cmd); return this;}
public Db_mgr_fxt Exec_run(Xobd_parser_wkr wkr) {bldr_fxt.Run(wkr); return this;}
public void Init_page_insert(Int_obj_ref page_id_next, int ns_id, String[] ttls) {
Xowe_wiki wiki = this.Wiki();
int len = ttls.length;
DateAdp modified_on = Datetime_now.Dflt_add_min_(0);
Xowd_page_tbl tbl_page = wiki.Db_mgr_as_sql().Core_data_mgr().Tbl__page();
tbl_page.Insert_bgn();
for (int i = 0; i < len; i++) {
String ttl = ttls[i];
int page_id = page_id_next.Val();
tbl_page.Insert_cmd_by_batch(page_id, ns_id, Bry_.new_u8(ttl), false, modified_on, 0, page_id, 0, 0, -1);
page_id_next.Val_add(1);
}
tbl_page.Insert_end();
}
public void Test_load_ttl(int ns_id, String ttl_str, Xowd_page_itm expd) {
Xowe_wiki wiki = bldr_fxt.Wiki();
Xow_ns ns = wiki.Ns_mgr().Ids_get_or_null(ns_id);
byte[] ttl_bry = Bry_.new_a7(ttl_str);
wiki.Db_mgr_as_sql().Load_mgr().Load_by_ttl(actl, ns, ttl_bry);
Tfds.Eq(expd.Id(), actl.Id());
Tfds.Eq_date(expd.Modified_on(), actl.Modified_on());
Tfds.Eq(expd.Redirected(), actl.Redirected());
Tfds.Eq(expd.Text_len(), actl.Text_len());
} private Xowd_page_itm actl = new Xowd_page_itm();
public void Test_load_page(int ns_id, int page_id, String expd) {
Xowe_wiki wiki = bldr_fxt.Wiki();
Xow_ns ns = wiki.Ns_mgr().Ids_get_or_null(ns_id);
wiki.Db_mgr_as_sql().Load_mgr().Load_page(actl.Id_(page_id), ns);
Tfds.Eq(expd, String_.new_a7(actl.Text()));
}
int[] Xto_int_ary(List_adp rslts) {
int len = rslts.Count();
int[] rv = new int[len];
for (int i = 0; i < len; i++) {
Xowd_page_itm page = (Xowd_page_itm)rslts.Get_at(i);
rv[i] = page.Id();
}
return rv;
}
public void Test_file(String url, String expd) {
String actl = Io_mgr.Instance.LoadFilStr(url);
Tfds.Eq_str_lines(expd, actl);
}
public void Init_db_sqlite() {
Xowe_wiki wiki = this.Wiki();
Db_conn_pool.Instance.Rls_all();
Db_conn_bldr.Instance.Reg_default_sqlite();
Io_mgr.Instance.DeleteDir_cmd(wiki.Fsys_mgr().Root_dir()).MissingIgnored_().Exec();
wiki.Db_mgr_create_as_sql().Core_data_mgr().Init_by_make(Xowd_core_db_props.Test, Xob_info_session.Test);
Io_mgr.Instance.SaveFilStr(wiki.Import_cfg().Src_dir().GenSubFil("a.xml"), "<test/>");
}
public void Rls() {
this.Wiki().Db_mgr_as_sql().Core_data_mgr().Rls();
}
}

View File

@@ -36,12 +36,6 @@ public class Xob_cmd_keys {
, Key_diff_merge = "diff.merge"
, Key_text_delete_page = "text.delete_page"
, Key_tdb_text_init = "tdb.text.init" // "core.init"
, Key_tdb_make_page = "tdb.text.page" // "core.make_page"
, Key_tdb_make_id = "core.make_id"
, Key_tdb_calc_stats = "core.calc_stats"
, Key_tdb_text_wdata_qid = "tdb.text.wdata.qid"
, Key_tdb_text_wdata_pid = "tdb.text.wdata.pid"
, Key_exec_sql = "import.sql.exec_sql"
, Key_decompress_bz2 = "core.decompress_bz2"
;

View File

@@ -49,16 +49,11 @@ public class Xob_cmd_mgr implements Gfo_invk {
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_delete)) return Add(new Xob_delete_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_download)) return Add(new Xob_download_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_util_xml_dump)) return Add(new Xob_xml_dumper_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_qid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_qid_sql().Ctor(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_pid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_pid_sql(null, wiki).Ctor(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_qid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_qid(null).Ctor(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_pid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_pid(null).Ctor(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_wbase_db)) return Add(new Xob_wdata_db_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_site_meta)) return Add(new Xob_site_meta_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_text_init)) return Add(new Xob_init_tdb(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_make_id)) return Xml_rdr_direct_add(wiki, new Xob_make_id_wkr(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_calc_stats)) return Add(new Xob_calc_stats_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_text_wdata_qid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_qid_txt().Ctor(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_tdb_text_wdata_pid)) return Xml_rdr_direct_add(wiki, new Xob_wdata_pid_txt().Ctor(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_diff_build)) return Add(new Xob_diff_build_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_exec_sql)) return Add(new Xob_exec_sql_cmd(bldr, wiki));
else if (String_.Eq(cmd_key, Xob_cmd_keys.Key_decompress_bz2)) return Add(new Xob_decompress_bz2_cmd(bldr, wiki));

View File

@@ -1,154 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.tests.*; import gplx.core.ios.*; import gplx.core.times.*;
import gplx.dbs.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.bldrs.cmds.texts.tdbs.*;
import gplx.xowa.bldrs.wkrs.*;
public class Xob_fxt {
private final Tst_mgr tst_mgr = new Tst_mgr();
private final DateAdp_parser dateParser = DateAdp_parser.new_();
public Xob_fxt Ctor_mem() {
Io_mgr.Instance.InitEngine_mem();
return Ctor(Io_url_.mem_dir_("mem/xowa/"));
}
public Xob_fxt Ctor(Io_url root_dir) {
Db_conn_bldr.Instance.Reg_default_sqlite();
app = Xoa_app_fxt.Make__app__edit("linux", root_dir);
wiki = Xoa_app_fxt.Make__wiki__edit(app);
bldr = Xoa_app_fxt.bldr_(app);
return this;
}
public Xoae_app App() {return app;} private Xoae_app app;
public Xob_bldr Bldr() {return bldr;} private Xob_bldr bldr;
public Xowe_wiki Wiki() {return wiki;} private Xowe_wiki wiki;
public Xob_fxt Fil_expd(Io_url url, String... expd) {
String text = String_.Concat_lines_nl_skip_last(expd); // skipLast b/c if trailing line wanted, easier to pass in extra argument for ""
expd_list.Add(new Io_fil_chkr(url, text));
return this;
} List_adp expd_list = List_adp_.New();
public Xob_fxt Fil_skip(Io_url... urls) {
for (int i = 0; i < urls.length; i++)
skip_list.Add(urls[i]);
return this;
} List_adp skip_list = List_adp_.New();
public Xob_fxt doc_ary_(Xowd_page_itm... v) {doc_ary = v; return this;} private Xowd_page_itm[] doc_ary;
public Xowd_page_itm doc_wo_date_(int id, String title, String text) {return doc_(id, "2012-01-02 13:14", title, text);}
public Xowd_page_itm doc_(int id, String date, String title, String text) {
Xowd_page_itm rv = new Xowd_page_itm().Id_(id).Ttl_(Bry_.new_u8(title), wiki.Ns_mgr()).Text_(Bry_.new_u8(text));
int[] modified_on = new int[7];
dateParser.Parse_iso8651_like(modified_on, date);
rv.Modified_on_(DateAdp_.seg_(modified_on));
return rv;
}
public Xob_fxt Run_id() {
Xob_make_id_wkr wkr = new Xob_make_id_wkr(bldr, wiki);
Run_page_wkrs(wkr);
return this;
}
private void Run_wkr(Xob_page_wkr wkr) {
wkr.Page_wkr__bgn();
for (int i = 0; i < doc_ary.length; i++) {
Xowd_page_itm page = doc_ary[i];
wkr.Page_wkr__run(page);
}
wkr.Page_wkr__end();
}
private void tst_fils(Io_url[] ary) {
Io_fil[] actls = Get_actl(ary);
Io_fil_chkr[] expds = (Io_fil_chkr[])expd_list.To_ary(Io_fil_chkr.class);
tst_mgr.Tst_ary("all", expds, actls);
}
Io_fil[] Get_actl(Io_url[] ary) {
int len = ary.length;
Io_fil[] rv = new Io_fil[len];
for (int i = 0; i < len; i++) {
Io_url url = ary[i];
String data = Io_mgr.Instance.LoadFilStr(url);
rv[i] = new Io_fil(url, data);
}
return rv;
}
public Xob_fxt Run_tmpl_dump() {
Xob_parse_dump_templates_cmd wkr = new Xob_parse_dump_templates_cmd(bldr, wiki);
Run_wkr(wkr);
tst_fils(wkr.Dump_url_gen().Prv_urls());
return this;
}
public Xob_fxt Run(Xobd_parser_wkr... wkrs) {
Xobd_parser parser_wkr = new Xobd_parser(bldr);
int len = wkrs.length;
for (int i = 0; i < len; i++)
parser_wkr.Wkr_add(wkrs[i]);
Run_page_wkrs(parser_wkr);
return this;
}
public Xob_fxt Run_page_wkrs(Xob_page_wkr... wkrs) {
int doc_ary_len = doc_ary.length;
for (int j = 0; j < wkrs.length; j++) {
Xob_page_wkr wkr = wkrs[j];
wkr.Page_wkr__bgn();
for (int i = 0; i < doc_ary_len; i++) {
Xowd_page_itm page = doc_ary[i];
wkr.Page_wkr__run(page);
}
wkr.Page_wkr__end();
}
Test_expd_files();
return this;
}
public Xob_fxt Run_cmds(Xob_cmd... cmds) {
for (int j = 0; j < cmds.length; j++) {
Xob_cmd cmd = cmds[j];
cmd.Cmd_bgn(bldr);
cmd.Cmd_run();
cmd.Cmd_end();
}
Test_expd_files();
return this;
}
private void Test_expd_files() {
if (expd_list.Count() > 0) {
Io_fil_chkr[] expd = (Io_fil_chkr[])expd_list.To_ary(Io_fil_chkr.class);
Io_fil[] actl = wiki_();
tst_mgr.Tst_ary("all", expd, actl);
}
}
Io_fil[] wiki_() {
List_adp rv = List_adp_.New();
wiki_fil_add(rv, wiki.Tdb_fsys_mgr().Ns_dir());
wiki_fil_add(rv, wiki.Tdb_fsys_mgr().Site_dir());
rv.Sort();
return (Io_fil[])rv.To_ary(Io_fil.class);
}
private void wiki_fil_add(List_adp list, Io_url root_dir) {
Io_url[] ary = Io_mgr.Instance.QueryDir_args(root_dir).Recur_().ExecAsUrlAry();
for (int i = 0; i < ary.length; i++) {
Io_url url = ary[i];
Io_fil fil = new Io_fil(url, Io_mgr.Instance.LoadFilStr_args(url).MissingIgnored_().Exec());
list.Add(fil);
}
}
public void Run_page_wkr(Xob_page_wkr wkr, Xowd_page_itm... pages) {
int len = pages.length;
wkr.Page_wkr__bgn();
for (int i = 0; i < len; i++) {
Xowd_page_itm page = pages[i];
wkr.Page_wkr__run(page);
}
wkr.Page_wkr__end();
}
}

View File

@@ -0,0 +1,53 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs; import gplx.*; import gplx.xowa.*;
import gplx.core.tests.*; import gplx.core.times.*;
import gplx.dbs.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.bldrs.wkrs.*;
public class Xobldr_fxt {
private final DateAdp_parser dateParser = DateAdp_parser.new_();
public Xoae_app App() {return app;} private Xoae_app app;
public Xob_bldr Bldr() {return bldr;} private Xob_bldr bldr;
public Xowe_wiki Wiki() {return wiki;} private Xowe_wiki wiki;
public Xobldr_fxt Ctor_mem() {
Io_mgr.Instance.InitEngine_mem();
return Ctor(Io_url_.mem_dir_("mem/xowa/"));
}
private Xobldr_fxt Ctor(Io_url root_dir) {
Db_conn_bldr.Instance.Reg_default_sqlite();
app = Xoa_app_fxt.Make__app__edit("linux", root_dir);
wiki = Xoa_app_fxt.Make__wiki__edit(app);
bldr = Xoa_app_fxt.bldr_(app);
return this;
}
public Xowd_page_itm New_page_wo_date(int id, String title, String text) {return New_page(id, "2012-01-02 13:14", title, text);}
public Xowd_page_itm New_page(int id, String date, String title, String text) {
Xowd_page_itm rv = new Xowd_page_itm().Id_(id).Ttl_(Bry_.new_u8(title), wiki.Ns_mgr()).Text_(Bry_.new_u8(text));
int[] modified_on = new int[7];
dateParser.Parse_iso8651_like(modified_on, date);
rv.Modified_on_(DateAdp_.seg_(modified_on));
return rv;
}
public void Run_page_wkr(Xob_page_wkr wkr, Xowd_page_itm... pages) {
int len = pages.length;
wkr.Page_wkr__bgn();
for (int i = 0; i < len; i++) {
Xowd_page_itm page = pages[i];
wkr.Page_wkr__run(page);
}
wkr.Page_wkr__end();
}
}

View File

@@ -1,105 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.core.primitives.*; import gplx.core.ios.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.wtrs.*;
import gplx.xowa.langs.*; import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.data.tbls.*;
public abstract class Srch_bldr_wkr_base extends Xob_itm_dump_base implements Xob_page_wkr {
private final Ordered_hash list = Ordered_hash_.New(); private Xol_lang_itm lang;
public abstract String Page_wkr__key();
public void Page_wkr__bgn() {
make_dir = wiki.Tdb_fsys_mgr().Ns_dir();
this.Init_dump(this.Page_wkr__key(), make_dir);
lang = wiki.Lang(); // wiki.Appe().Lang_mgr().Lang_en(); // NOTE: was .Lang_en which is wrong (should match lang of wiki); DATE:2013-05-11
tmp_wtr_mgr = new Xob_tmp_wtr_mgr(new Xob_tmp_wtr_wkr__ttl(temp_dir, dump_fil_len));
if (wiki.Db_mgr().Tid() == Xodb_mgr_sql.Tid_sql) // if sqlite, hard-code to ns_main; aggregates all ns into one
ns_main = wiki.Ns_mgr().Ns_main();
} private Xob_tmp_wtr_mgr tmp_wtr_mgr; private Xow_ns ns_main;
public void Page_wkr__run(Xowd_page_itm page) {
// if (page.Ns_id() != Xow_ns_.Tid__main) return; // limit to main ns for now
try {
byte[] ttl = page.Ttl_page_db();
byte[][] words = Split_ttl_into_words(lang, list, dump_bfr, ttl);
Xob_tmp_wtr wtr = tmp_wtr_mgr.Get_or_new(ns_main == null ? page.Ns() : ns_main);
int words_len = words.length;
int row_len = 0;
for (int i = 0; i < words_len; i++) {
byte[] word = words[i];
row_len += word.length + 13; // 13=5(id) + 5(page_len) + 3(dlms)
}
if (wtr.FlushNeeded(row_len)) wtr.Flush(bldr.Usr_dlg());
for (int i = 0; i < words_len; i++) {
byte[] word = words[i];
wtr.Bfr() .Add(word) .Add_byte(Byte_ascii.Pipe)
.Add_base85_len_5(page.Id()) .Add_byte(Byte_ascii.Semic)
.Add_base85_len_5(page.Text().length) .Add_byte(Byte_ascii.Nl);
}
} catch (Exception e) {bldr.Usr_dlg().Warn_many("", "", "search_index:fatal error: err=~{0}", Err_.Message_gplx_full(e));} // never let single page crash entire import
}
public void Page_wkr__run_cleanup() {}
public void Page_wkr__end() {
tmp_wtr_mgr.Flush_all(bldr.Usr_dlg());
dump_bfr.ClearAndReset();
Xobdc_merger.Ns(bldr.Usr_dlg(), tmp_wtr_mgr.Regy(), Xotdb_dir_info_.Name_search_ttl, temp_dir, make_dir, sort_mem_len, Io_line_rdr_key_gen_.first_pipe, this.Make_cmd_site());
tmp_wtr_mgr.Rls_all();
if (delete_temp) Io_mgr.Instance.DeleteDirDeep(temp_dir);
}
public abstract Io_make_cmd Make_cmd_site();
public static byte[][] Split_ttl_into_words(Xol_lang_itm lang, Ordered_hash list, Bry_bfr bfr, byte[] ttl) {
if (lang != null) // null lang passed in by searcher
ttl = lang.Case_mgr().Case_build_lower(ttl);
int ttl_len = ttl.length; Bry_obj_ref word_ref = Bry_obj_ref.New(Bry_.Empty);
int i = 0; boolean word_done = false;
while (true) {
if (word_done || i == ttl_len) {
if (bfr.Len() > 0) {
byte[] word = bfr.To_bry_and_clear();
word_ref.Val_(word);
if (!list.Has(word_ref)) list.Add(word_ref, word); // don't add same word twice; EX: Title of "Can Can" should only have "Can" in index
}
if (i == ttl_len) break;
word_done = false;
}
byte b = ttl[i];
switch (b) {
case Byte_ascii.Underline: // underline is word-breaking; EX: A_B -> A, B
case Byte_ascii.Space: // should not occur, but just in case (only underscores)
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: // should not occur in titles, but just in case
case Byte_ascii.Dash: // treat hypenated words separately
case Byte_ascii.Dot: // treat abbreviations as separate words; EX: A.B.C.
case Byte_ascii.Bang: case Byte_ascii.Hash: case Byte_ascii.Dollar: case Byte_ascii.Percent:
case Byte_ascii.Amp: case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star:
case Byte_ascii.Comma: case Byte_ascii.Slash:
case Byte_ascii.Colon: case Byte_ascii.Semic: case Byte_ascii.Gt:
case Byte_ascii.Question: case Byte_ascii.At: case Byte_ascii.Brack_bgn: case Byte_ascii.Brack_end:
case Byte_ascii.Pow: case Byte_ascii.Tick:
case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde:
case Byte_ascii.Quote: case Byte_ascii.Apos: // FUTURE: apos will split "Earth's" to Earth and s; should remove latter
++i;
word_done = true;
break;
default:
bfr.Add_byte(b);
++i;
break;
}
}
byte[][] rv = (byte[][])list.To_ary(byte[].class);
list.Clear(); list.Resize_bounds(16);
return rv;
}
}

View File

@@ -1,113 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.wikis.nss.*;
import gplx.xowa.wikis.data.site_stats.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.tdbs.hives.*; import gplx.xowa.wikis.tdbs.xdats.*;
public class Xob_calc_stats_cmd extends Xob_itm_basic_base implements Xob_cmd {
public Xob_calc_stats_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
public String Cmd_key() {return Xob_cmd_keys.Key_tdb_calc_stats;}
public void Cmd_init(Xob_bldr bldr) {}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_run() {Exec();}
public void Cmd_end() {}
public void Cmd_term() {}
private void Exec() {
int ns_len = wiki.Ns_mgr().Ords_len();
int total = 0;
for (int i = 0; i < ns_len; i++) {
Xow_ns ns = wiki.Ns_mgr().Ords_ary()[i];
int ns_count = Calc_counts(ns);
ns.Count_(ns_count);
total += ns_count;
}
int count_main = Calc_count_articles(wiki.Ns_mgr().Ns_main());
int count_file = Calc_count_articles(wiki.Ns_mgr().Ns_file());
Bry_bfr bfr = Bry_bfr_.New();
Gen_call(Bool_.Y, bfr, Xowe_wiki.Invk_stats);
Gen_call(Bool_.N, bfr, Xowd_site_stats_mgr.Invk_number_of_articles_, count_main);
Gen_call(Bool_.N, bfr, Xowd_site_stats_mgr.Invk_number_of_files_, count_file);
Gen_call(Bool_.N, bfr, Xowd_site_stats_mgr.Invk_number_of_pages_, total);
for (int i = 0; i < ns_len; i++) {
Xow_ns ns = wiki.Ns_mgr().Ords_ary()[i];
if (ns.Id() < 0) continue;
bfr.Add_byte_nl();
Gen_call(Bool_.N, bfr, Xowd_site_stats_mgr.Invk_number_of_articles_in_ns_, ns.Num_str(), Int_.To_str_pad_bgn_zero(ns.Count(), 10));
}
bfr.Add_byte_nl().Add_byte(Byte_ascii.Semic).Add_byte_nl();
Io_url wiki_gfs = Wiki_gfs_url(wiki);
Io_mgr.Instance.SaveFilBfr(wiki_gfs, bfr);
}
private void Gen_call(boolean first, Bry_bfr bfr, String key, Object... vals) {
if (!first) bfr.Add_byte(Byte_ascii.Dot);
bfr.Add_str_u8(key);
int len = vals.length;
if (len > 0) {
bfr.Add_byte(Byte_ascii.Paren_bgn);
for (int i = 0; i < len; i++) {
if (i != 0) bfr.Add_byte(Byte_ascii.Comma).Add_byte(Byte_ascii.Space);
Object val = vals[i];
bfr.Add_str_u8(Object_.Xto_str_strict_or_null_mark(val));
}
bfr.Add_byte(Byte_ascii.Paren_end);
}
}
int Calc_counts(Xow_ns ns) {
Io_url reg_url = wiki.Tdb_fsys_mgr().Url_ns_reg(ns.Num_str(), Xotdb_dir_info_.Tid_ttl);
Xowd_regy_mgr reg_mgr = new Xowd_regy_mgr(reg_url);
int files_ary_len = reg_mgr.Files_ary().length;
int count = 0;
for (int i = 0; i < files_ary_len; i++) {
count += reg_mgr.Files_ary()[i].Count();
}
return count;
}
int Calc_count_articles(Xow_ns ns) {
Io_url hive_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest(Xotdb_dir_info_.Name_ns, ns.Num_str(), Xotdb_dir_info_.Name_title);
return Calc_count_articles_dir(ns, hive_dir);
}
int Calc_count_articles_dir(Xow_ns ns, Io_url dir) {
Io_url[] subs = Io_mgr.Instance.QueryDir_args(dir).DirInclude_().ExecAsUrlAry();
int count = 0;
int subs_len = subs.length;
bldr.Usr_dlg().Prog_one(GRP_KEY, "count", "calculating: ~{0}", dir.Raw());
for (int i = 0; i < subs_len; i++) {
Io_url sub = subs[i];
if (sub.Type_dir())
count += Calc_count_articles_dir(ns, sub);
else
count += Calc_count_articles_fil(ns, sub);
}
return count;
}
int Calc_count_articles_fil(Xow_ns ns, Io_url fil) {
if (String_.Eq(fil.NameAndExt(), Xotdb_dir_info_.Name_reg_fil)) return 0;
int rv = 0;
byte[] bry = Io_mgr.Instance.LoadFilBry(fil);
Xob_xdat_file xdat_file = new Xob_xdat_file().Parse(bry, bry.length, fil);
Xowd_page_itm page = Xowd_page_itm.new_tmp();
int count = xdat_file.Count();
for (int i = 0; i < count; i++) {
byte[] ttl_bry = xdat_file.Get_bry(i);
Xotdb_page_itm_.Txt_ttl_load(page, ttl_bry);
rv += page.Redirected() ? 0 : 1;
}
return rv;
}
static final String GRP_KEY = "xowa.bldr.calc_stats";
public static Io_url Wiki_gfs_url(Xowe_wiki wiki) {return wiki.Fsys_mgr().Root_dir().GenSubFil_nest("cfg", "wiki_stats.gfs");}
}

View File

@@ -15,6 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import org.junit.*; import gplx.xowa.htmls.portal.*; import gplx.xowa.wikis.xwikis.*;
import gplx.xowa.bldrs.cmds.texts.sqls.*;
public class Xob_init_base_tst {
@Before public void init() {fxt.Clear();} private Xob_init_base_fxt fxt = new Xob_init_base_fxt();
@Test public void Dirty_wiki_itms() {
@@ -24,7 +25,7 @@ public class Xob_init_base_tst {
Xow_xwiki_itm xwiki_itm = app.Usere().Wiki().Xwiki_mgr().Add_by_atrs("en.wikipedia.org", "en.wikipedia.org");
xwiki_itm.Offline_(Bool_.Y); // simulate add via Available_from_fsys; DATE:2014-09-21
Tfds.Eq("", wikis_list.Itms_as_html()); // still empty
new Xob_init_tdb(app.Bldr(), wiki).Cmd_end(); // mock "init" task
new Xob_init_cmd(app.Bldr(), wiki).Cmd_end(); // mock "init" task
Tfds.Eq("\n <li><a href=\"/site/en.wikipedia.org/\" class='xowa-hover-off'>en.wikipedia.org</a></li>", wikis_list.Itms_as_html()); // no longer empty
}
}

View File

@@ -1,26 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.xowa.xtns.wbases.imports.*;
public class Xob_init_tdb extends Xob_init_base {
public Xob_init_tdb(Xob_bldr bldr, Xowe_wiki wiki) {this.Ctor(bldr, wiki);}
@Override public String Cmd_key() {return Xob_cmd_keys.Key_tdb_text_init;}
@Override public void Cmd_ini_wdata(Xob_bldr bldr, Xowe_wiki wiki) {
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_tdb_text_wdata_qid);
bldr.Cmd_mgr().Add_cmd(wiki, Xob_cmd_keys.Key_tdb_text_wdata_pid);
}
@Override public void Cmd_run_end(Xowe_wiki wiki) {}
}

View File

@@ -1,36 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.bldrs.cmds.texts.tdbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.*;
import gplx.core.ios.*; import gplx.xowa.wikis.tdbs.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.bldrs.wkrs.*;
public class Xob_make_id_wkr extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk {
public Xob_make_id_wkr(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
public String Page_wkr__key() {return KEY;} public static final String KEY = "core.make_id";
public void Page_wkr__bgn() {
this.Init_dump(KEY, wiki.Tdb_fsys_mgr().Site_dir().GenSubDir(Xotdb_dir_info_.Name_id));
}
public void Page_wkr__run(Xowd_page_itm page) {
byte[] ttl = page.Ttl_page_db();
if (dump_bfr.Len() + row_fixed_len + ttl.length > dump_fil_len) Io_mgr.Instance.AppendFilBfr(dump_url_gen.Nxt_url(), dump_bfr);
Xotdb_page_itm_.Txt_id_save(dump_bfr, page);
}
public void Page_wkr__run_cleanup() {}
public void Page_wkr__end() {
this.Term_dump(new Xob_make_cmd_site(bldr.Usr_dlg(), make_dir, make_fil_len));
if (delete_temp) Io_mgr.Instance.DeleteDirDeep(temp_dir);
}
static final int row_fixed_len = 25 + 1 + 7; // 25=5 base_85 flds; 1=Redirect; 7=dlm
}