diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr.java b/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr.java index abcf3fd4b..39dc22734 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr.java @@ -106,7 +106,6 @@ public class Wdata_wiki_mgr implements Gfo_evt_itm, Gfo_invk { } return wdata_wiki; } private Xowe_wiki wdata_wiki; - public Json_parser Jdoc_parser() {return jdoc_parser;} private Json_parser jdoc_parser = new Json_parser(); public void Init_by_app() {} public Wdata_doc_parser Wdoc_parser(Json_doc jdoc) { Json_kv itm_0 = Json_kv.Cast(jdoc.Root_nde().Get_at(0)); // get 1st node diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr_fxt.java b/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr_fxt.java index bb637a940..9e4c9ebde 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr_fxt.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr_fxt.java @@ -29,6 +29,7 @@ import gplx.Tfds; import gplx.core.primitives.Gfo_number_parser; import gplx.core.primitives.Int_obj_ref; import gplx.langs.jsons.Json_doc; +import gplx.langs.jsons.Json_parser; import gplx.xowa.Xoa_ttl; import gplx.xowa.Xoae_app; import gplx.xowa.Xoae_page; @@ -62,6 +63,7 @@ public class Wdata_wiki_mgr_fxt { private Xoae_app app; private Xowe_wiki wiki; private Wdata_doc_bldr wdoc_bldr; private final Wdata_xwiki_link_wtr wdata_lang_wtr = new Wdata_xwiki_link_wtr(); private final Bry_bfr tmp_time_bfr = Bry_bfr_.New(); + private final Json_parser jsonParser = new Json_parser(); public Xowe_wiki Wiki() {return parser_fxt.Wiki();} public Wdata_wiki_mgr_fxt Init() {return Init(new Xop_fxt(), true);} public Wdata_wiki_mgr_fxt Init(Xop_fxt parser_fxt, boolean reset) { @@ -224,7 +226,7 @@ public class Wdata_wiki_mgr_fxt { byte[] raw_bry = Bry_.new_a7(raw_str); raw_bry = Bry_.new_u8(Json_doc.Make_str_by_apos(raw_str)); Bry_bfr bfr = wiki.Utl__bfr_mkr().Get_b512(); - Wdata_wiki_mgr.Write_json_as_html(wdata_mgr.Jdoc_parser(), bfr, raw_bry); + Wdata_wiki_mgr.Write_json_as_html(jsonParser, bfr, raw_bry); Tfds.Eq(expd, bfr.To_str_and_rls()); } public static String New_json(String entity_id, String grp_key, String[] grp_vals) { diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_db_cmd.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_db_cmd.java index 77255227b..d0e063595 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_db_cmd.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_db_cmd.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,414 +13,451 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases.imports; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; -import gplx.dbs.*; import gplx.dbs.cfgs.*; import gplx.dbs.engines.sqlite.*; import gplx.xowa.bldrs.*; import gplx.xowa.files.fsdb.*; import gplx.xowa.files.origs.*; -import gplx.xowa.bldrs.wkrs.*; -import gplx.langs.jsons.*; -import gplx.xowa.langs.*; -import gplx.xowa.wikis.nss.*; -import gplx.xowa.bldrs.cmds.*; import gplx.xowa.wikis.data.tbls.*; -import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.*; import gplx.xowa.xtns.wbases.claims.itms.*; -public class Xob_wdata_db_cmd extends Xob_dump_mgr_base implements Xob_cmd { - private Wdata_tbl_mgr tbl_mgr = new Wdata_tbl_mgr(); - private Wdata_wiki_mgr wdata_mgr; private Json_parser json_parser; - private byte[] lang_key = Xol_lang_itm_.Key_en; - public Xob_wdata_db_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);} - @Override public String Cmd_key() {return Xob_cmd_keys.Key_wbase_db;} - @Override public byte Init_redirect() {return Bool_.N_byte;} // json will never be found in a redirect - @Override public int[] Init_ns_ary() {return Int_ary_.New(Xow_ns_.Tid__main, Wdata_wiki_mgr.Ns_property);} - @Override protected void Init_reset(Db_conn conn) { - Db_cfg_tbl cfg_tbl = gplx.xowa.wikis.data.Xowd_cfg_tbl_.New(conn); - cfg_tbl.Delete_all(); - } - @Override protected Db_conn Init_db_file() { - Xob_db_file tbl_file = Xob_db_file.New(wiki.Fsys_mgr().Root_dir(), "wdata_db.sqlite3"); - Db_conn conn = tbl_file.Conn(); - tbl_mgr.Init(conn); - return conn; - } - @Override protected void Cmd_bgn_end() { - wdata_mgr = bldr.App().Wiki_mgr().Wdata_mgr(); - json_parser = wdata_mgr.Jdoc_parser(); - tbl_mgr.Conn().Txn_bgn("bldr__wdata_db"); - } - @Override public void Exec_pg_itm_hook(int ns_ord, Xow_ns ns, Xowd_page_itm page, byte[] page_src) { - Json_doc jdoc = json_parser.Parse(page_src); if (jdoc == null) return; // not a json document - Wdata_doc wdoc = new Wdata_doc(wdata_mgr, jdoc, page.Ttl_page_db()); - tbl_mgr.Exec_insert_by_wdoc(lang_key, wdata_mgr, page.Id(), wdoc); - } - @Override public void Exec_commit_hook() { - tbl_mgr.Conn().Txn_sav(); - } - @Override public void Exec_end_hook() { - tbl_mgr.Term(usr_dlg); - } -} -class Wdata_tbl_mgr { - private Wdata_tbl_base[] tbls; private int tbls_len; - public Wdata_tbl_mgr() { - tbls = new Wdata_tbl_base[] {label_tbl, alias_tbl, description_tbl, link_tbl, claim_tbl, claim_time_tbl, claim_geo_tbl}; - tbls_len = tbls.length; - } - public Db_conn Conn() {return conn;} private Db_conn conn; - public Wdata_label_tbl Label_tbl() {return label_tbl;} private Wdata_label_tbl label_tbl = new Wdata_label_tbl(); - public Wdata_alias_tbl Alias_tbl() {return alias_tbl;} private Wdata_alias_tbl alias_tbl = new Wdata_alias_tbl(); - public Wdata_description_tbl Description_tbl() {return description_tbl;} private Wdata_description_tbl description_tbl = new Wdata_description_tbl(); - public Wdata_link_tbl Link_tbl() {return link_tbl;} private Wdata_link_tbl link_tbl = new Wdata_link_tbl(); - public Wbase_claim_tbl Claim_tbl() {return claim_tbl;} private Wbase_claim_tbl claim_tbl = new Wbase_claim_tbl(); - public Wbase_claim_time_tbl Claim_time_tbl() {return claim_time_tbl;} private Wbase_claim_time_tbl claim_time_tbl = new Wbase_claim_time_tbl(); - public Wbase_claim_geo_tbl Claim_geo_tbl() {return claim_geo_tbl;} private Wbase_claim_geo_tbl claim_geo_tbl = new Wbase_claim_geo_tbl(); - public void Init(Db_conn conn) { - this.conn = conn; - for (int i = 0; i < tbls_len; i++) - tbls[i].Init(conn); - } - public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) { - for (int i = 0; i < tbls_len; i++) - tbls[i].Exec_insert_by_wdoc(lang_key, wdata_mgr, page_id, wdoc); - } - public void Term(Gfo_usr_dlg usr_dlg) { - conn.Txn_end(); - for (int i = 0; i < tbls_len; i++) - tbls[i].Make_idxs(usr_dlg, conn); - } -} -abstract class Wdata_tbl_base { - public abstract String Tbl_name(); - public abstract String Tbl_create_sql(); - public abstract Db_idx_itm[] Idx_ary(); - public abstract String[] Fld_ary(); - @gplx.Virtual public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {} - public void Make_tbl(Db_conn p) {Sqlite_engine_.Tbl_create(p, this.Tbl_name(), this.Tbl_create_sql());} - public void Make_idxs(Gfo_usr_dlg usr_dlg, Db_conn p) { - Sqlite_engine_.Idx_create(usr_dlg, p, this.Tbl_name(), this.Idx_ary()); - } - public Db_stmt Make_insert_stmt(Db_conn p) {return Db_stmt_.new_insert_(p, this.Tbl_name(), this.Fld_ary());} - public Db_stmt Insert_stmt() {return insert_stmt;} private Db_stmt insert_stmt; - public void Init(Db_conn conn) { - this.Make_tbl(conn); - insert_stmt = this.Make_insert_stmt(conn); - } - public static void Exec_insert_kvs(Db_stmt stmt, int page_id, Ordered_hash hash) { - int len = hash.Count(); - for (int i = 0; i < len; i++) { - Json_kv kv = (Json_kv)hash.Get_at(i); - stmt.Clear() - .Val_int(page_id) - .Val_bry_as_str(kv.Key().Data_bry()) - .Val_bry_as_str(kv.Val().Data_bry()) - .Exec_insert(); - } - } -} -class Wdata_label_tbl extends Wdata_tbl_base { - @Override public String Tbl_name() {return "wdata_label";} - @Override public String Tbl_create_sql() { - return String_.Concat_lines_nl - ( "CREATE TABLE IF NOT EXISTS wdata_label" - , "( page_id integer NOT NULL" - , ", lang_key varchar(16) NOT NULL" - , ", val varchar(255) NOT NULL" - , ");" - ); - } - @Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_label__main ON wdata_label (page_id, lang_key);")};} - @Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_lang_key, Fld_val};} - @Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {Exec_insert_kvs(this.Insert_stmt(), page_id, wdoc.Label_list());} - private static final String Fld_page_id = "page_id", Fld_lang_key = "lang_key", Fld_val = "val"; -} -class Wdata_alias_tbl extends Wdata_tbl_base { - @Override public String Tbl_name() {return "wdata_alias";} - @Override public String Tbl_create_sql() { - return String_.Concat_lines_nl - ( "CREATE TABLE IF NOT EXISTS wdata_alias" - , "( page_id integer NOT NULL" - , ", lang_key varchar(16) NOT NULL" - , ", val varchar(255) NOT NULL" - , ");" - ); - } - @Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_alias__main ON wdata_alias (page_id, lang_key);")};} - @Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_lang_key, Fld_val};} - @Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) { - Ordered_hash hash = wdoc.Alias_list(); - int len = hash.Count(); - Db_stmt insert_stmt = this.Insert_stmt(); - for (int i = 0; i < len; i++) { - Json_kv kv = (Json_kv)hash.Get_at(i); - byte[] key = kv.Key().Data_bry(); - Json_grp val_grp = (Json_grp)kv.Val(); - int val_grp_len = val_grp.Len(); - for (int j = 0; j < val_grp_len; j++) { - Json_itm val_itm = val_grp.Get_at(j); - byte[] val = Bry_.Empty; - if (val_itm.Tid() == Json_itm_.Tid__str) - val = val_itm.Data_bry(); - else if (val_itm.Tid() == Json_itm_.Tid__kv) { // EX: q80 and de aliases - val = ((Json_kv)val_itm).Val().Data_bry(); - } - insert_stmt.Clear() - .Val_int(page_id) - .Val_bry_as_str(key) - .Val_bry_as_str(val) - .Exec_insert(); - } - } - } - private static final String Fld_page_id = "page_id", Fld_lang_key = "lang_key", Fld_val = "val"; -} -class Wdata_description_tbl extends Wdata_tbl_base { - @Override public String Tbl_name() {return "wdata_description";} - @Override public String Tbl_create_sql() { - return String_.Concat_lines_nl - ( "CREATE TABLE IF NOT EXISTS wdata_description" - , "( page_id integer NOT NULL" - , ", lang_key varchar(16) NOT NULL" - , ", val varchar(255) NOT NULL" - , ");" - ); - } - @Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_description__main ON wdata_description (page_id, lang_key);")};} - @Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {Exec_insert_kvs(this.Insert_stmt(), page_id, wdoc.Descr_list());} - @Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_lang_key, Fld_val};} - private static final String Fld_page_id = "page_id", Fld_lang_key = "lang_key", Fld_val = "val"; -} -class Wdata_link_tbl extends Wdata_tbl_base { - @Override public String Tbl_name() {return "wdata_link";} - @Override public String Tbl_create_sql() { - return String_.Concat_lines_nl - ( "CREATE TABLE IF NOT EXISTS wdata_link" - , "( page_id integer NOT NULL" - , ", wiki_key varchar(255) NOT NULL" - , ", val varchar(255) NOT NULL" - , ");" - ); - } - @Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_link__main ON wdata_link (page_id, wiki_key);")};} - @Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_wiki_key, Fld_val};} - @Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) { - Ordered_hash hash = wdoc.Slink_list(); - int len = hash.Count(); - Db_stmt insert_stmt = this.Insert_stmt(); - for (int i = 0; i < len; i++) { - Json_kv kv = (Json_kv)hash.Get_at(i); - byte[] key = kv.Key().Data_bry(); - Json_itm kv_val = kv.Val(); - byte[] val = Bry_.Empty; - if (kv_val.Tid() == Json_itm_.Tid__str) - val = kv_val.Data_bry(); - else { - Json_nde val_nde = (Json_nde)kv.Val(); - Json_kv val_name_kv = (Json_kv)val_nde.Get_at(0); // ASSUME: 1st item is always "name" kv; EX: "name":"Earth" - val = val_name_kv.Val().Data_bry(); - } - insert_stmt.Clear() - .Val_int(page_id) - .Val_bry_as_str(key) - .Val_bry_as_str(val) - .Exec_insert(); - } - } - private static final String Fld_page_id = "page_id", Fld_wiki_key = "wiki_key", Fld_val = "val"; -} -class Wbase_claim_tbl extends Wdata_tbl_base { - @Override public String Tbl_name() {return "wdata_claim";} - @Override public String Tbl_create_sql() { - return String_.Concat_lines_nl - ( "CREATE TABLE IF NOT EXISTS wdata_claim" - , "( claim_id integer NOT NULL" - , ", page_id integer NOT NULL" - , ", prop_id integer NOT NULL" // 60; P60 - , ", val_tid smallint NOT NULL" // String;wikibase-entity-id;time;globecoordinate - , ", entity_tid smallint NOT NULL" // null;item - , ", entity_id integer NOT NULL" // null;123 - , ", val_text varchar(255) NOT NULL" - , ", guid varchar(64) NOT NULL" - , ", rank integer NOT NULL" - , ", ref_count integer NOT NULL" - , ", qual_count integer NOT NULL" - , ");" - ); - } - @Override public Db_idx_itm[] Idx_ary() { - return new Db_idx_itm[] - { Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_claim__main ON wdata_claim (page_id, prop_id, val_tid, entity_tid);") - }; - } - @Override public String[] Fld_ary() {return new String[] {Fld_claim_id, Fld_page_id, Fld_prop_id, Fld_val_tid, Fld_entity_tid, Fld_entity_id, Fld_val_text, Fld_guid, Fld_rank, Fld_ref_count, Fld_qual_count};} - private int next_claim_id = 0; - private Xob_wdata_db_visitor visitor; - @Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) { - if (visitor == null) visitor = new Xob_wdata_db_visitor(wdata_mgr); - visitor.Init(lang_key); - Ordered_hash list = wdoc.Claim_list(); - int list_len = list.Count(); - for (int i = 0; i < list_len; i++) { - Wbase_claim_grp claim_grp = (Wbase_claim_grp)list.Get_at(i); - int itms_len = claim_grp.Len(); - int entity_id = -1; - byte[] claim_val = Bry_.Empty; - for (int j = 0; j < itms_len; j++) { - Wbase_claim_base claim = claim_grp.Get_at(j); - claim.Welcome(visitor); - claim_val = visitor.Rv(); - Exec_insert(++next_claim_id, page_id, claim_grp.Id(), claim.Val_tid(), claim.Snak_tid(), entity_id, claim_val, claim.Wguid(), claim.Rank_tid(), 0, 0); - } - } - } - public void Exec_insert(int claim_id, int page_id, int prop_id, byte val_tid, byte entity_tid, int entity_id, byte[] val_text, byte[] guid, int rank, int ref_count, int qual_count) { - if (val_text == null) val_text = Bry_.Empty; - if (guid == null) guid = Bry_.Empty; - this.Insert_stmt().Clear() - .Val_int(claim_id) - .Val_int(page_id) - .Val_int(prop_id) - .Val_byte(val_tid) - .Val_byte(entity_tid) - .Val_int(entity_id) - .Val_bry_as_str(val_text) - .Val_bry_as_str(guid) - .Val_int(rank) - .Val_int(ref_count) - .Val_int(qual_count) - .Exec_insert(); - } - private static final String Fld_claim_id = "claim_id", Fld_page_id = "page_id", Fld_prop_id = "prop_id", Fld_val_tid = "val_tid", Fld_entity_tid = "entity_tid", Fld_entity_id = "entity_id", Fld_val_text = "val_text" - , Fld_guid = "guid", Fld_rank = "rank", Fld_ref_count = "ref_count", Fld_qual_count = "qual_count" - ; -} -class Wbase_claim_time_tbl extends Wdata_tbl_base { - @Override public String Tbl_name() {return "wdata_claim_time";} - @Override public String Tbl_create_sql() { - return String_.Concat_lines_nl - ( "CREATE TABLE IF NOT EXISTS wdata_claim_time" - , "( claim_id integer NOT NULL" - , ", time_val varchar(64) NOT NULL" // -04540000000-01-01T00:00:00Z - , ", time_tz integer NOT NULL" // 0 - , ", time_before integer NOT NULL" // 0 - , ", time_after integer NOT NULL" // 0 - , ", time_precision integer NOT NULL" // 2; number of digits - , ", time_model varchar(64) NOT NULL" // http:\/\/www.wikidata.org\/entity\/Q1985727 - , ");" - ); - } - @Override public Db_idx_itm[] Idx_ary() { - return new Db_idx_itm[] { - Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_claim_time__main ON wdata_claim_time (claim_id);") - }; - } - @Override public String[] Fld_ary() {return new String[] {Fld_claim_id, Fld_time_val, Fld_time_tz, Fld_time_before, Fld_time_after, Fld_time_precision, Fld_time_model};} - public void Insert(Db_stmt stmt, int claim_id, byte[] time_val, int tz, int before, int after, int precision, byte[] model) { - stmt.Clear() - .Val_int(claim_id) - .Val_bry_as_str(time_val) - .Val_int(tz) - .Val_int(before) - .Val_int(after) - .Val_int(precision) - .Val_bry_as_str(model) - .Exec_insert(); - } - private static final String Fld_claim_id = "claim_id", Fld_time_val = "time_val", Fld_time_tz = "time_tz", Fld_time_before = "time_before", Fld_time_after = "time_after", Fld_time_precision = "time_precision", Fld_time_model = "time_model"; -} -class Wbase_claim_geo_tbl extends Wdata_tbl_base { - @Override public String Tbl_name() {return "wdata_claim_geo";} - @Override public String Tbl_create_sql() { - return String_.Concat_lines_nl - ( "CREATE TABLE IF NOT EXISTS wdata_claim_geo" - , "( claim_id integer NOT NULL" - , ", geo_latitude double NOT NULL" // 41.590833333333 - , ", geo_longitude double NOT NULL" // -93.620833333333 - , ", geo_altitude varchar(255) NOT NULL" // null - , ", geo_precision double NOT NULL" // 0.00027777777777778 - , ", geo_globe integer NOT NULL" // http:\/\/www.wikidata.org\/entity\/Q2 - , ");" - ); - } - @Override public Db_idx_itm[] Idx_ary() { - return new Db_idx_itm[] - { Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_claim_geo__main ON wdata_claim_geo (claim_id);") - }; - } - public void Insert(Db_stmt stmt, int claim_id, double latitude, double longitude, byte[] altitude, double precision, byte[] globe) { - stmt.Clear() - .Val_int(claim_id) - .Val_double(latitude) - .Val_double(longitude) - .Val_bry_as_str(altitude) - .Val_double(precision) - .Val_bry_as_str(globe) - .Exec_insert(); - } - @Override public String[] Fld_ary() {return new String[] {Fld_claim_id, Fld_geo_latitude, Fld_geo_longitude, Fld_geo_altitude, Fld_geo_precision, Fld_geo_globe};} - private static final String Fld_claim_id = "claim_id", Fld_geo_latitude = "geo_latitude", Fld_geo_longitude = "geo_longitude", Fld_geo_altitude = "geo_altitude", Fld_geo_precision = "geo_precision", Fld_geo_globe = "geo_globe"; -} -class Wdata_ref_tbl extends Wdata_tbl_base { - @Override public String Tbl_name() {return "wdata_ref";} - @Override public String Tbl_create_sql() { - return String_.Concat_lines_nl - ( "CREATE TABLE IF NOT EXISTS wdata_ref" - , "( ref_id integer NOT NULL" - , ", page_id integer NOT NULL" - , ", prop_id integer NOT NULL" // 60; P60 - , ", val_tid smallint NOT NULL" // String;wikibase-entity-id;time;globecoordinate - , ", entity_tid smallint NOT NULL" // null;item - , ", entity_id integer NOT NULL" // null;123 - , ", val_text varchar(255) NOT NULL" - , ");" - ); - } - @Override public Db_idx_itm[] Idx_ary() { - return new Db_idx_itm[] { - Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_ref__main ON wdata_ref (page_id, prop_id, val_tid, entity_tid);") - }; - } - @Override public String[] Fld_ary() {return new String[] {Fld_ref_id, Fld_page_id, Fld_prop_id, Fld_val_tid, Fld_entity_tid, Fld_entity_id, Fld_val_text};} - private static final String Fld_ref_id = "ref_id", Fld_page_id = "page_id", Fld_prop_id = "prop_id", Fld_val_tid = "val_tid", Fld_entity_tid = "entity_tid", Fld_entity_id = "entity_id", Fld_val_text = "val_ext"; -} -class Wdata_qual_tbl extends Wdata_tbl_base { - @Override public String Tbl_name() {return "wdata_qual";} - @Override public String Tbl_create_sql() { - return String_.Concat_lines_nl - ( "CREATE TABLE IF NOT EXISTS wdata_qual" - , "( qual_id integer NOT NULL" - , ", page_id integer NOT NULL" - , ", val_text varchar(4096) NOT NULL" - , ");" - ); - } - @Override public Db_idx_itm[] Idx_ary() { - return new Db_idx_itm[] { - Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_qual__main ON wdata_ref (qual_id, page_id);") - }; - } - @Override public String[] Fld_ary() {return new String[] {Fld_qual_id, Fld_page_id, Fld_val_text};} - public void Insert(Db_stmt stmt, int qual_id, int page_id, byte[] val_text) { - stmt.Clear() - .Val_int(qual_id) - .Val_int(page_id) - .Val_bry_as_str(val_text) - .Exec_insert(); - } - private static final String Fld_qual_id = "qual_id", Fld_page_id = "page_id", Fld_val_text = "val_text"; -} -class Xob_wdata_db_visitor implements Wbase_claim_visitor { - private final Wdata_wiki_mgr wdata_mgr; private byte[] lang_key; - public Xob_wdata_db_visitor(Wdata_wiki_mgr wdata_mgr) {this.wdata_mgr = wdata_mgr;} - public void Init(byte[] lang_key) {this.lang_key = lang_key;} - public byte[] Rv() {return rv;} private byte[] rv; - public void Visit_str(Wbase_claim_string itm) {rv = itm.Val_bry();} - public void Visit_monolingualtext(Wbase_claim_monolingualtext itm) {rv = Bry_.Add_w_dlm(Byte_ascii.Pipe, itm.Lang(), itm.Text());} - public void Visit_quantity(Wbase_claim_quantity itm) {rv = itm.Amount();} - public void Visit_time(Wbase_claim_time itm) {rv = itm.Time();} - public void Visit_globecoordinate(Wbase_claim_globecoordinate itm) {rv = Bry_.Add_w_dlm(Byte_ascii.Comma, itm.Lat(), itm.Lng());} - public void Visit_system(Wbase_claim_value itm) {rv = Bry_.Empty;} - public void Visit_entity(Wbase_claim_entity itm) { - Wdata_doc entity_doc = wdata_mgr.Doc_mgr.Get_by_xid_or_null(itm.Page_ttl_db()); - if (entity_doc != null) { - rv = entity_doc.Get_label_bry_or_null(lang_key); - } - if (rv == null) // can be null if entity_doc is null or if label is null; - rv = Bry_.Empty; - } -} +package gplx.xowa.xtns.wbases.imports; + +import gplx.Bool_; +import gplx.Bry_; +import gplx.Byte_ascii; +import gplx.Gfo_usr_dlg; +import gplx.Int_ary_; +import gplx.Ordered_hash; +import gplx.String_; +import gplx.dbs.Db_conn; +import gplx.dbs.Db_idx_itm; +import gplx.dbs.Db_stmt; +import gplx.dbs.Db_stmt_; +import gplx.dbs.cfgs.Db_cfg_tbl; +import gplx.dbs.engines.sqlite.Sqlite_engine_; +import gplx.langs.jsons.Json_doc; +import gplx.langs.jsons.Json_grp; +import gplx.langs.jsons.Json_itm; +import gplx.langs.jsons.Json_itm_; +import gplx.langs.jsons.Json_kv; +import gplx.langs.jsons.Json_nde; +import gplx.langs.jsons.Json_parser; +import gplx.xowa.Xowe_wiki; +import gplx.xowa.bldrs.Xob_bldr; +import gplx.xowa.bldrs.Xob_cmd_keys; +import gplx.xowa.bldrs.Xob_db_file; +import gplx.xowa.bldrs.cmds.Xob_dump_mgr_base; +import gplx.xowa.bldrs.wkrs.Xob_cmd; +import gplx.xowa.langs.Xol_lang_itm_; +import gplx.xowa.wikis.data.tbls.Xowd_page_itm; +import gplx.xowa.wikis.nss.Xow_ns; +import gplx.xowa.wikis.nss.Xow_ns_; +import gplx.xowa.xtns.wbases.Wdata_doc; +import gplx.xowa.xtns.wbases.Wdata_wiki_mgr; +import gplx.xowa.xtns.wbases.claims.Wbase_claim_grp; +import gplx.xowa.xtns.wbases.claims.Wbase_claim_visitor; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_base; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_entity; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_globecoordinate; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_monolingualtext; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_quantity; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_string; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_value; + +public class Xob_wdata_db_cmd extends Xob_dump_mgr_base implements Xob_cmd { + private Wdata_tbl_mgr tbl_mgr = new Wdata_tbl_mgr(); + private Wdata_wiki_mgr wdata_mgr; + private byte[] lang_key = Xol_lang_itm_.Key_en; + private final Json_parser json_parser = new Json_parser(); + public Xob_wdata_db_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);} + @Override public String Cmd_key() {return Xob_cmd_keys.Key_wbase_db;} + @Override public byte Init_redirect() {return Bool_.N_byte;} // json will never be found in a redirect + @Override public int[] Init_ns_ary() {return Int_ary_.New(Xow_ns_.Tid__main, Wdata_wiki_mgr.Ns_property);} + @Override protected void Init_reset(Db_conn conn) { + Db_cfg_tbl cfg_tbl = gplx.xowa.wikis.data.Xowd_cfg_tbl_.New(conn); + cfg_tbl.Delete_all(); + } + @Override protected Db_conn Init_db_file() { + Xob_db_file tbl_file = Xob_db_file.New(wiki.Fsys_mgr().Root_dir(), "wdata_db.sqlite3"); + Db_conn conn = tbl_file.Conn(); + tbl_mgr.Init(conn); + return conn; + } + @Override protected void Cmd_bgn_end() { + wdata_mgr = bldr.App().Wiki_mgr().Wdata_mgr(); + tbl_mgr.Conn().Txn_bgn("bldr__wdata_db"); + } + @Override public void Exec_pg_itm_hook(int ns_ord, Xow_ns ns, Xowd_page_itm page, byte[] page_src) { + Json_doc jdoc = json_parser.Parse(page_src); if (jdoc == null) return; // not a json document + Wdata_doc wdoc = new Wdata_doc(wdata_mgr, jdoc, page.Ttl_page_db()); + tbl_mgr.Exec_insert_by_wdoc(lang_key, wdata_mgr, page.Id(), wdoc); + } + @Override public void Exec_commit_hook() { + tbl_mgr.Conn().Txn_sav(); + } + @Override public void Exec_end_hook() { + tbl_mgr.Term(usr_dlg); + } +} +class Wdata_tbl_mgr { + private Wdata_tbl_base[] tbls; private int tbls_len; + public Wdata_tbl_mgr() { + tbls = new Wdata_tbl_base[] {label_tbl, alias_tbl, description_tbl, link_tbl, claim_tbl, claim_time_tbl, claim_geo_tbl}; + tbls_len = tbls.length; + } + public Db_conn Conn() {return conn;} private Db_conn conn; + public Wdata_label_tbl Label_tbl() {return label_tbl;} private Wdata_label_tbl label_tbl = new Wdata_label_tbl(); + public Wdata_alias_tbl Alias_tbl() {return alias_tbl;} private Wdata_alias_tbl alias_tbl = new Wdata_alias_tbl(); + public Wdata_description_tbl Description_tbl() {return description_tbl;} private Wdata_description_tbl description_tbl = new Wdata_description_tbl(); + public Wdata_link_tbl Link_tbl() {return link_tbl;} private Wdata_link_tbl link_tbl = new Wdata_link_tbl(); + public Wbase_claim_tbl Claim_tbl() {return claim_tbl;} private Wbase_claim_tbl claim_tbl = new Wbase_claim_tbl(); + public Wbase_claim_time_tbl Claim_time_tbl() {return claim_time_tbl;} private Wbase_claim_time_tbl claim_time_tbl = new Wbase_claim_time_tbl(); + public Wbase_claim_geo_tbl Claim_geo_tbl() {return claim_geo_tbl;} private Wbase_claim_geo_tbl claim_geo_tbl = new Wbase_claim_geo_tbl(); + public void Init(Db_conn conn) { + this.conn = conn; + for (int i = 0; i < tbls_len; i++) + tbls[i].Init(conn); + } + public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) { + for (int i = 0; i < tbls_len; i++) + tbls[i].Exec_insert_by_wdoc(lang_key, wdata_mgr, page_id, wdoc); + } + public void Term(Gfo_usr_dlg usr_dlg) { + conn.Txn_end(); + for (int i = 0; i < tbls_len; i++) + tbls[i].Make_idxs(usr_dlg, conn); + } +} +abstract class Wdata_tbl_base { + public abstract String Tbl_name(); + public abstract String Tbl_create_sql(); + public abstract Db_idx_itm[] Idx_ary(); + public abstract String[] Fld_ary(); + @gplx.Virtual public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {} + public void Make_tbl(Db_conn p) {Sqlite_engine_.Tbl_create(p, this.Tbl_name(), this.Tbl_create_sql());} + public void Make_idxs(Gfo_usr_dlg usr_dlg, Db_conn p) { + Sqlite_engine_.Idx_create(usr_dlg, p, this.Tbl_name(), this.Idx_ary()); + } + public Db_stmt Make_insert_stmt(Db_conn p) {return Db_stmt_.new_insert_(p, this.Tbl_name(), this.Fld_ary());} + public Db_stmt Insert_stmt() {return insert_stmt;} private Db_stmt insert_stmt; + public void Init(Db_conn conn) { + this.Make_tbl(conn); + insert_stmt = this.Make_insert_stmt(conn); + } + public static void Exec_insert_kvs(Db_stmt stmt, int page_id, Ordered_hash hash) { + int len = hash.Count(); + for (int i = 0; i < len; i++) { + Json_kv kv = (Json_kv)hash.Get_at(i); + stmt.Clear() + .Val_int(page_id) + .Val_bry_as_str(kv.Key().Data_bry()) + .Val_bry_as_str(kv.Val().Data_bry()) + .Exec_insert(); + } + } +} +class Wdata_label_tbl extends Wdata_tbl_base { + @Override public String Tbl_name() {return "wdata_label";} + @Override public String Tbl_create_sql() { + return String_.Concat_lines_nl + ( "CREATE TABLE IF NOT EXISTS wdata_label" + , "( page_id integer NOT NULL" + , ", lang_key varchar(16) NOT NULL" + , ", val varchar(255) NOT NULL" + , ");" + ); + } + @Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_label__main ON wdata_label (page_id, lang_key);")};} + @Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_lang_key, Fld_val};} + @Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {Exec_insert_kvs(this.Insert_stmt(), page_id, wdoc.Label_list());} + private static final String Fld_page_id = "page_id", Fld_lang_key = "lang_key", Fld_val = "val"; +} +class Wdata_alias_tbl extends Wdata_tbl_base { + @Override public String Tbl_name() {return "wdata_alias";} + @Override public String Tbl_create_sql() { + return String_.Concat_lines_nl + ( "CREATE TABLE IF NOT EXISTS wdata_alias" + , "( page_id integer NOT NULL" + , ", lang_key varchar(16) NOT NULL" + , ", val varchar(255) NOT NULL" + , ");" + ); + } + @Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_alias__main ON wdata_alias (page_id, lang_key);")};} + @Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_lang_key, Fld_val};} + @Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) { + Ordered_hash hash = wdoc.Alias_list(); + int len = hash.Count(); + Db_stmt insert_stmt = this.Insert_stmt(); + for (int i = 0; i < len; i++) { + Json_kv kv = (Json_kv)hash.Get_at(i); + byte[] key = kv.Key().Data_bry(); + Json_grp val_grp = (Json_grp)kv.Val(); + int val_grp_len = val_grp.Len(); + for (int j = 0; j < val_grp_len; j++) { + Json_itm val_itm = val_grp.Get_at(j); + byte[] val = Bry_.Empty; + if (val_itm.Tid() == Json_itm_.Tid__str) + val = val_itm.Data_bry(); + else if (val_itm.Tid() == Json_itm_.Tid__kv) { // EX: q80 and de aliases + val = ((Json_kv)val_itm).Val().Data_bry(); + } + insert_stmt.Clear() + .Val_int(page_id) + .Val_bry_as_str(key) + .Val_bry_as_str(val) + .Exec_insert(); + } + } + } + private static final String Fld_page_id = "page_id", Fld_lang_key = "lang_key", Fld_val = "val"; +} +class Wdata_description_tbl extends Wdata_tbl_base { + @Override public String Tbl_name() {return "wdata_description";} + @Override public String Tbl_create_sql() { + return String_.Concat_lines_nl + ( "CREATE TABLE IF NOT EXISTS wdata_description" + , "( page_id integer NOT NULL" + , ", lang_key varchar(16) NOT NULL" + , ", val varchar(255) NOT NULL" + , ");" + ); + } + @Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_description__main ON wdata_description (page_id, lang_key);")};} + @Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {Exec_insert_kvs(this.Insert_stmt(), page_id, wdoc.Descr_list());} + @Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_lang_key, Fld_val};} + private static final String Fld_page_id = "page_id", Fld_lang_key = "lang_key", Fld_val = "val"; +} +class Wdata_link_tbl extends Wdata_tbl_base { + @Override public String Tbl_name() {return "wdata_link";} + @Override public String Tbl_create_sql() { + return String_.Concat_lines_nl + ( "CREATE TABLE IF NOT EXISTS wdata_link" + , "( page_id integer NOT NULL" + , ", wiki_key varchar(255) NOT NULL" + , ", val varchar(255) NOT NULL" + , ");" + ); + } + @Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_link__main ON wdata_link (page_id, wiki_key);")};} + @Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_wiki_key, Fld_val};} + @Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) { + Ordered_hash hash = wdoc.Slink_list(); + int len = hash.Count(); + Db_stmt insert_stmt = this.Insert_stmt(); + for (int i = 0; i < len; i++) { + Json_kv kv = (Json_kv)hash.Get_at(i); + byte[] key = kv.Key().Data_bry(); + Json_itm kv_val = kv.Val(); + byte[] val = Bry_.Empty; + if (kv_val.Tid() == Json_itm_.Tid__str) + val = kv_val.Data_bry(); + else { + Json_nde val_nde = (Json_nde)kv.Val(); + Json_kv val_name_kv = (Json_kv)val_nde.Get_at(0); // ASSUME: 1st item is always "name" kv; EX: "name":"Earth" + val = val_name_kv.Val().Data_bry(); + } + insert_stmt.Clear() + .Val_int(page_id) + .Val_bry_as_str(key) + .Val_bry_as_str(val) + .Exec_insert(); + } + } + private static final String Fld_page_id = "page_id", Fld_wiki_key = "wiki_key", Fld_val = "val"; +} +class Wbase_claim_tbl extends Wdata_tbl_base { + @Override public String Tbl_name() {return "wdata_claim";} + @Override public String Tbl_create_sql() { + return String_.Concat_lines_nl + ( "CREATE TABLE IF NOT EXISTS wdata_claim" + , "( claim_id integer NOT NULL" + , ", page_id integer NOT NULL" + , ", prop_id integer NOT NULL" // 60; P60 + , ", val_tid smallint NOT NULL" // String;wikibase-entity-id;time;globecoordinate + , ", entity_tid smallint NOT NULL" // null;item + , ", entity_id integer NOT NULL" // null;123 + , ", val_text varchar(255) NOT NULL" + , ", guid varchar(64) NOT NULL" + , ", rank integer NOT NULL" + , ", ref_count integer NOT NULL" + , ", qual_count integer NOT NULL" + , ");" + ); + } + @Override public Db_idx_itm[] Idx_ary() { + return new Db_idx_itm[] + { Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_claim__main ON wdata_claim (page_id, prop_id, val_tid, entity_tid);") + }; + } + @Override public String[] Fld_ary() {return new String[] {Fld_claim_id, Fld_page_id, Fld_prop_id, Fld_val_tid, Fld_entity_tid, Fld_entity_id, Fld_val_text, Fld_guid, Fld_rank, Fld_ref_count, Fld_qual_count};} + private int next_claim_id = 0; + private Xob_wdata_db_visitor visitor; + @Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) { + if (visitor == null) visitor = new Xob_wdata_db_visitor(wdata_mgr); + visitor.Init(lang_key); + Ordered_hash list = wdoc.Claim_list(); + int list_len = list.Count(); + for (int i = 0; i < list_len; i++) { + Wbase_claim_grp claim_grp = (Wbase_claim_grp)list.Get_at(i); + int itms_len = claim_grp.Len(); + int entity_id = -1; + byte[] claim_val = Bry_.Empty; + for (int j = 0; j < itms_len; j++) { + Wbase_claim_base claim = claim_grp.Get_at(j); + claim.Welcome(visitor); + claim_val = visitor.Rv(); + Exec_insert(++next_claim_id, page_id, claim_grp.Id(), claim.Val_tid(), claim.Snak_tid(), entity_id, claim_val, claim.Wguid(), claim.Rank_tid(), 0, 0); + } + } + } + public void Exec_insert(int claim_id, int page_id, int prop_id, byte val_tid, byte entity_tid, int entity_id, byte[] val_text, byte[] guid, int rank, int ref_count, int qual_count) { + if (val_text == null) val_text = Bry_.Empty; + if (guid == null) guid = Bry_.Empty; + this.Insert_stmt().Clear() + .Val_int(claim_id) + .Val_int(page_id) + .Val_int(prop_id) + .Val_byte(val_tid) + .Val_byte(entity_tid) + .Val_int(entity_id) + .Val_bry_as_str(val_text) + .Val_bry_as_str(guid) + .Val_int(rank) + .Val_int(ref_count) + .Val_int(qual_count) + .Exec_insert(); + } + private static final String Fld_claim_id = "claim_id", Fld_page_id = "page_id", Fld_prop_id = "prop_id", Fld_val_tid = "val_tid", Fld_entity_tid = "entity_tid", Fld_entity_id = "entity_id", Fld_val_text = "val_text" + , Fld_guid = "guid", Fld_rank = "rank", Fld_ref_count = "ref_count", Fld_qual_count = "qual_count" + ; +} +class Wbase_claim_time_tbl extends Wdata_tbl_base { + @Override public String Tbl_name() {return "wdata_claim_time";} + @Override public String Tbl_create_sql() { + return String_.Concat_lines_nl + ( "CREATE TABLE IF NOT EXISTS wdata_claim_time" + , "( claim_id integer NOT NULL" + , ", time_val varchar(64) NOT NULL" // -04540000000-01-01T00:00:00Z + , ", time_tz integer NOT NULL" // 0 + , ", time_before integer NOT NULL" // 0 + , ", time_after integer NOT NULL" // 0 + , ", time_precision integer NOT NULL" // 2; number of digits + , ", time_model varchar(64) NOT NULL" // http:\/\/www.wikidata.org\/entity\/Q1985727 + , ");" + ); + } + @Override public Db_idx_itm[] Idx_ary() { + return new Db_idx_itm[] { + Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_claim_time__main ON wdata_claim_time (claim_id);") + }; + } + @Override public String[] Fld_ary() {return new String[] {Fld_claim_id, Fld_time_val, Fld_time_tz, Fld_time_before, Fld_time_after, Fld_time_precision, Fld_time_model};} + public void Insert(Db_stmt stmt, int claim_id, byte[] time_val, int tz, int before, int after, int precision, byte[] model) { + stmt.Clear() + .Val_int(claim_id) + .Val_bry_as_str(time_val) + .Val_int(tz) + .Val_int(before) + .Val_int(after) + .Val_int(precision) + .Val_bry_as_str(model) + .Exec_insert(); + } + private static final String Fld_claim_id = "claim_id", Fld_time_val = "time_val", Fld_time_tz = "time_tz", Fld_time_before = "time_before", Fld_time_after = "time_after", Fld_time_precision = "time_precision", Fld_time_model = "time_model"; +} +class Wbase_claim_geo_tbl extends Wdata_tbl_base { + @Override public String Tbl_name() {return "wdata_claim_geo";} + @Override public String Tbl_create_sql() { + return String_.Concat_lines_nl + ( "CREATE TABLE IF NOT EXISTS wdata_claim_geo" + , "( claim_id integer NOT NULL" + , ", geo_latitude double NOT NULL" // 41.590833333333 + , ", geo_longitude double NOT NULL" // -93.620833333333 + , ", geo_altitude varchar(255) NOT NULL" // null + , ", geo_precision double NOT NULL" // 0.00027777777777778 + , ", geo_globe integer NOT NULL" // http:\/\/www.wikidata.org\/entity\/Q2 + , ");" + ); + } + @Override public Db_idx_itm[] Idx_ary() { + return new Db_idx_itm[] + { Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_claim_geo__main ON wdata_claim_geo (claim_id);") + }; + } + public void Insert(Db_stmt stmt, int claim_id, double latitude, double longitude, byte[] altitude, double precision, byte[] globe) { + stmt.Clear() + .Val_int(claim_id) + .Val_double(latitude) + .Val_double(longitude) + .Val_bry_as_str(altitude) + .Val_double(precision) + .Val_bry_as_str(globe) + .Exec_insert(); + } + @Override public String[] Fld_ary() {return new String[] {Fld_claim_id, Fld_geo_latitude, Fld_geo_longitude, Fld_geo_altitude, Fld_geo_precision, Fld_geo_globe};} + private static final String Fld_claim_id = "claim_id", Fld_geo_latitude = "geo_latitude", Fld_geo_longitude = "geo_longitude", Fld_geo_altitude = "geo_altitude", Fld_geo_precision = "geo_precision", Fld_geo_globe = "geo_globe"; +} +class Wdata_ref_tbl extends Wdata_tbl_base { + @Override public String Tbl_name() {return "wdata_ref";} + @Override public String Tbl_create_sql() { + return String_.Concat_lines_nl + ( "CREATE TABLE IF NOT EXISTS wdata_ref" + , "( ref_id integer NOT NULL" + , ", page_id integer NOT NULL" + , ", prop_id integer NOT NULL" // 60; P60 + , ", val_tid smallint NOT NULL" // String;wikibase-entity-id;time;globecoordinate + , ", entity_tid smallint NOT NULL" // null;item + , ", entity_id integer NOT NULL" // null;123 + , ", val_text varchar(255) NOT NULL" + , ");" + ); + } + @Override public Db_idx_itm[] Idx_ary() { + return new Db_idx_itm[] { + Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_ref__main ON wdata_ref (page_id, prop_id, val_tid, entity_tid);") + }; + } + @Override public String[] Fld_ary() {return new String[] {Fld_ref_id, Fld_page_id, Fld_prop_id, Fld_val_tid, Fld_entity_tid, Fld_entity_id, Fld_val_text};} + private static final String Fld_ref_id = "ref_id", Fld_page_id = "page_id", Fld_prop_id = "prop_id", Fld_val_tid = "val_tid", Fld_entity_tid = "entity_tid", Fld_entity_id = "entity_id", Fld_val_text = "val_ext"; +} +class Wdata_qual_tbl extends Wdata_tbl_base { + @Override public String Tbl_name() {return "wdata_qual";} + @Override public String Tbl_create_sql() { + return String_.Concat_lines_nl + ( "CREATE TABLE IF NOT EXISTS wdata_qual" + , "( qual_id integer NOT NULL" + , ", page_id integer NOT NULL" + , ", val_text varchar(4096) NOT NULL" + , ");" + ); + } + @Override public Db_idx_itm[] Idx_ary() { + return new Db_idx_itm[] { + Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_qual__main ON wdata_ref (qual_id, page_id);") + }; + } + @Override public String[] Fld_ary() {return new String[] {Fld_qual_id, Fld_page_id, Fld_val_text};} + public void Insert(Db_stmt stmt, int qual_id, int page_id, byte[] val_text) { + stmt.Clear() + .Val_int(qual_id) + .Val_int(page_id) + .Val_bry_as_str(val_text) + .Exec_insert(); + } + private static final String Fld_qual_id = "qual_id", Fld_page_id = "page_id", Fld_val_text = "val_text"; +} +class Xob_wdata_db_visitor implements Wbase_claim_visitor { + private final Wdata_wiki_mgr wdata_mgr; private byte[] lang_key; + public Xob_wdata_db_visitor(Wdata_wiki_mgr wdata_mgr) {this.wdata_mgr = wdata_mgr;} + public void Init(byte[] lang_key) {this.lang_key = lang_key;} + public byte[] Rv() {return rv;} private byte[] rv; + public void Visit_str(Wbase_claim_string itm) {rv = itm.Val_bry();} + public void Visit_monolingualtext(Wbase_claim_monolingualtext itm) {rv = Bry_.Add_w_dlm(Byte_ascii.Pipe, itm.Lang(), itm.Text());} + public void Visit_quantity(Wbase_claim_quantity itm) {rv = itm.Amount();} + public void Visit_time(Wbase_claim_time itm) {rv = itm.Time();} + public void Visit_globecoordinate(Wbase_claim_globecoordinate itm) {rv = Bry_.Add_w_dlm(Byte_ascii.Comma, itm.Lat(), itm.Lng());} + public void Visit_system(Wbase_claim_value itm) {rv = Bry_.Empty;} + public void Visit_entity(Wbase_claim_entity itm) { + Wdata_doc entity_doc = wdata_mgr.Doc_mgr.Get_by_xid_or_null(itm.Page_ttl_db()); + if (entity_doc != null) { + rv = entity_doc.Get_label_bry_or_null(lang_key); + } + if (rv == null) // can be null if entity_doc is null or if label is null; + rv = Bry_.Empty; + } +} diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid.java index 04931cf0a..87df7b44a 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,82 +13,102 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases.imports; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; -import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; -import gplx.xowa.wikis.data.*; import gplx.dbs.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.data.tbls.*; -import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.enums.*; import gplx.xowa.xtns.wbases.parsers.*; import gplx.xowa.xtns.wbases.dbs.*; -import gplx.langs.jsons.*; -public class Xob_wdata_pid extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk { - private Db_conn conn; - private Wbase_pid_tbl tbl__pid; - private Xowb_prop_tbl tbl__prop; - private Json_parser jdoc_parser; - private final Ordered_hash datatype_hash = Ordered_hash_.New_bry(); - public Xob_wdata_pid(Db_conn conn) { - this.conn = conn; - } - public Xob_wdata_pid Ctor(Xob_bldr bldr, Xowe_wiki wiki) { - this.Cmd_ctor(bldr, wiki); - this.jdoc_parser = bldr.App().Wiki_mgr().Wdata_mgr().Jdoc_parser(); - return this; - } - public String Page_wkr__key() {return gplx.xowa.bldrs.Xob_cmd_keys.Key_wbase_pid;} - public void Page_wkr__bgn() {this.Pid__bgn();} - public void Page_wkr__run(Xowd_page_itm page) { - if (page.Ns_id() != Wdata_wiki_mgr.Ns_property) return; - - Json_doc jdoc = jdoc_parser.Parse(page.Text()); - if (jdoc == null) { - bldr.Usr_dlg().Warn_many(GRP_KEY, "json.invalid", "json is invalid: ns=~{0} id=~{1}", page.Ns_id(), String_.new_u8(page.Ttl_page_db())); - return; - } - Pid__run(jdoc); - } - public void Page_wkr__run_cleanup() {} - public void Page_wkr__end() {this.Pid__end();} - public void Pid__bgn() { - if (conn == null) // conn will be null unless test - conn = wiki.Data__core_mgr().Db__wbase().Conn(); - - // init datatype_hash - Wbase_enum_hash enum_hash = Wbase_claim_type_.Reg; - byte len = (byte)enum_hash.Len(); - for (byte i = 0; i < len; i++) { - Wbase_claim_type claim_type = (Wbase_claim_type)enum_hash.Get_itm_or(i, null); - datatype_hash.Add(Bry_.new_u8(claim_type.Key_for_scrib()), claim_type); - } - - // init wbase_pid - tbl__pid = Wbase_pid_tbl.New_make(conn); - tbl__pid.Create_tbl(); - tbl__pid.Insert_bgn(); - - // init wbase_prop - tbl__prop = new Xowb_prop_tbl(conn); - tbl__prop.Create_tbl(); - tbl__prop.Insert_bgn(); - } - public void Pid__run(Json_doc jdoc) { - Wdata_doc_parser wdoc_parser = app.Wiki_mgr().Wdata_mgr().Wdoc_parser(jdoc); - byte[] pid = wdoc_parser.Parse_qid(jdoc); - - // add datatype - byte[] datatype = jdoc.Root_nde().Get_as_bry(Wdata_dict_mainsnak.Itm__datatype.Key_str()); - Wbase_claim_type claim_type = (Wbase_claim_type)datatype_hash.Get_by_or_fail(datatype); - tbl__prop.Insert_cmd_by_batch(pid, claim_type.Tid()); - - // add langs - Ordered_hash list = wdoc_parser.Parse_langvals(pid, jdoc, Bool_.Y); - int len = list.Count(); - for (int i = 0; i < len; ++i) { - Wdata_langtext_itm label = (Wdata_langtext_itm)list.Get_at(i); - tbl__pid.Insert_cmd_by_batch(label.Lang(), label.Text(), pid); - } - } - public void Pid__end() { - tbl__pid.Insert_end(); - tbl__pid.Create_idx(); - tbl__prop.Insert_end(); - } - private static final String GRP_KEY = "xowa.wdata.pid_wkr"; -} +package gplx.xowa.xtns.wbases.imports; + +import gplx.Bool_; +import gplx.Bry_; +import gplx.Gfo_invk; +import gplx.Ordered_hash; +import gplx.Ordered_hash_; +import gplx.String_; +import gplx.dbs.Db_conn; +import gplx.langs.jsons.Json_doc; +import gplx.langs.jsons.Json_parser; +import gplx.xowa.Xowe_wiki; +import gplx.xowa.bldrs.Xob_bldr; +import gplx.xowa.bldrs.wkrs.Xob_itm_dump_base; +import gplx.xowa.bldrs.wkrs.Xob_page_wkr; +import gplx.xowa.wikis.data.tbls.Xowd_page_itm; +import gplx.xowa.xtns.wbases.Wdata_wiki_mgr; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_type; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_type_; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_enum_hash; +import gplx.xowa.xtns.wbases.core.Wdata_dict_mainsnak; +import gplx.xowa.xtns.wbases.core.Wdata_langtext_itm; +import gplx.xowa.xtns.wbases.dbs.Wbase_pid_tbl; +import gplx.xowa.xtns.wbases.dbs.Xowb_prop_tbl; +import gplx.xowa.xtns.wbases.parsers.Wdata_doc_parser; + +public class Xob_wdata_pid extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk { + private Db_conn conn; + private Wbase_pid_tbl tbl__pid; + private Xowb_prop_tbl tbl__prop; + private final Json_parser jdoc_parser = new Json_parser(); + private final Ordered_hash datatype_hash = Ordered_hash_.New_bry(); + public Xob_wdata_pid(Db_conn conn) { + this.conn = conn; + } + public Xob_wdata_pid Ctor(Xob_bldr bldr, Xowe_wiki wiki) { + this.Cmd_ctor(bldr, wiki); + return this; + } + public String Page_wkr__key() {return gplx.xowa.bldrs.Xob_cmd_keys.Key_wbase_pid;} + public void Page_wkr__bgn() {this.Pid__bgn();} + public void Page_wkr__run(Xowd_page_itm page) { + if (page.Ns_id() != Wdata_wiki_mgr.Ns_property) return; + + Json_doc jdoc = jdoc_parser.Parse(page.Text()); + if (jdoc == null) { + bldr.Usr_dlg().Warn_many(GRP_KEY, "json.invalid", "json is invalid: ns=~{0} id=~{1}", page.Ns_id(), String_.new_u8(page.Ttl_page_db())); + return; + } + Pid__run(jdoc); + } + public void Page_wkr__run_cleanup() {} + public void Page_wkr__end() {this.Pid__end();} + public void Pid__bgn() { + if (conn == null) // conn will be null unless test + conn = wiki.Data__core_mgr().Db__wbase().Conn(); + + // init datatype_hash + Wbase_enum_hash enum_hash = Wbase_claim_type_.Reg; + byte len = (byte)enum_hash.Len(); + for (byte i = 0; i < len; i++) { + Wbase_claim_type claim_type = (Wbase_claim_type)enum_hash.Get_itm_or(i, null); + datatype_hash.Add(Bry_.new_u8(claim_type.Key_for_scrib()), claim_type); + } + + // init wbase_pid + tbl__pid = Wbase_pid_tbl.New_make(conn); + tbl__pid.Create_tbl(); + tbl__pid.Insert_bgn(); + + // init wbase_prop + tbl__prop = new Xowb_prop_tbl(conn); + tbl__prop.Create_tbl(); + tbl__prop.Insert_bgn(); + } + public void Pid__run(Json_doc jdoc) { + Wdata_doc_parser wdoc_parser = app.Wiki_mgr().Wdata_mgr().Wdoc_parser(jdoc); + byte[] pid = wdoc_parser.Parse_qid(jdoc); + + // add datatype + byte[] datatype = jdoc.Root_nde().Get_as_bry(Wdata_dict_mainsnak.Itm__datatype.Key_str()); + Wbase_claim_type claim_type = (Wbase_claim_type)datatype_hash.Get_by_or_fail(datatype); + tbl__prop.Insert_cmd_by_batch(pid, claim_type.Tid()); + + // add langs + Ordered_hash list = wdoc_parser.Parse_langvals(pid, jdoc, Bool_.Y); + int len = list.Count(); + for (int i = 0; i < len; ++i) { + Wdata_langtext_itm label = (Wdata_langtext_itm)list.Get_at(i); + tbl__pid.Insert_cmd_by_batch(label.Lang(), label.Text(), pid); + } + } + public void Pid__end() { + tbl__pid.Insert_end(); + tbl__pid.Create_idx(); + tbl__prop.Insert_end(); + } + private static final String GRP_KEY = "xowa.wdata.pid_wkr"; +} diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_qid.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_qid.java index 29156429d..bcb7ef6b1 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_qid.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_qid.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,74 +13,93 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases.imports; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; -import gplx.langs.jsons.*; -import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*; -import gplx.xowa.wikis.data.*; import gplx.dbs.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.data.tbls.*; -import gplx.xowa.wikis.nss.*; -import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.dbs.*; import gplx.xowa.xtns.wbases.parsers.*; -public class Xob_wdata_qid extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk { - private Db_conn conn; - private Wbase_qid_tbl tbl; - private final Object thread_lock = new Object(); - private Json_parser parser; private Xob_wbase_ns_parser ns_parser; private final Xob_wbase_ns_parser_rslt ns_parser_rslt = new Xob_wbase_ns_parser_rslt(); - public Xob_wdata_qid(Db_conn conn) { - this.conn = conn; - } - public String Page_wkr__key() {return gplx.xowa.bldrs.Xob_cmd_keys.Key_wbase_qid;} - public Xob_wdata_qid Ctor(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki); return this;} - public void Page_wkr__bgn() { - this.parser = bldr.App().Wiki_mgr().Wdata_mgr().Jdoc_parser(); - this.ns_parser = new Xob_wbase_ns_parser(bldr.App().Fsys_mgr().Cfg_site_meta_fil()); - this.Qid__bgn(); - } - public void Page_wkr__run(Xowd_page_itm page) { - if (page.Ns_id() != Xow_ns_.Tid__main) return; // qid pages are only in the Main Srch_rslt_cbk - Json_doc jdoc = parser.Parse(page.Text()); - if (jdoc == null) {bldr.Usr_dlg().Warn_many("", "", "json is invalid: ns=~{0} id=~{1}", page.Ns_id(), String_.new_u8(page.Ttl_page_db())); return;} - this.Qid__run(jdoc); - } - public void Page_wkr__run_cleanup() {} - public void Page_wkr__end() {this.Qid__end();} - public void Qid__bgn() { - if (conn == null) { - Xow_db_file wbase_db = Make_wbase_db(wiki.Db_mgr_as_sql().Core_data_mgr()); - conn = wbase_db.Conn(); - } - tbl = Wbase_qid_tbl.New_make(conn, false); - tbl.Create_tbl(); - tbl.Insert_bgn(); - } - public void Qid__run(Json_doc jdoc) { - synchronized (thread_lock) { - Wdata_doc_parser wdoc_parser = app.Wiki_mgr().Wdata_mgr().Wdoc_parser(jdoc); - byte[] qid = wdoc_parser.Parse_qid(jdoc); - Bry_bfr tmp_bfr = Bry_bfr_.Reset(255); - Ordered_hash sitelinks = wdoc_parser.Parse_sitelinks(qid, jdoc); - int sitelinks_len = sitelinks.Count(); if (sitelinks_len == 0) return; // no subs; return; - for (int i = 0; i < sitelinks_len; i++) { // iterate sitelinks - Wdata_sitelink_itm sitelink = (Wdata_sitelink_itm)sitelinks.Get_at(i); - byte[] sitelink_site = sitelink.Site(), sitelink_ttl = sitelink.Name(); - ns_parser.Find(ns_parser_rslt, sitelink_site, sitelink_ttl); - int sitelink_ns = ns_parser_rslt.Ns_id(); - if (sitelink_ns != Xow_ns_.Tid__main) // ttl not in main; chop off ns portion; EX:Aide:French_title -> French_title - sitelink_ttl = Bry_.Mid(sitelink_ttl, ns_parser_rslt.Ttl_bgn(), sitelink_ttl.length); - sitelink_ttl = wiki.Lang().Case_mgr().Case_build_1st_upper(tmp_bfr, sitelink_ttl, 0, sitelink_ttl.length); - tbl.Insert_cmd_by_batch(sitelink.Site(), sitelink_ns, Xoa_ttl.Replace_spaces(sitelink_ttl), qid); // NOTE: always convert spaces to underscores; EX: "A B" -> "A_B" DATE:2015-04-21 - } - } - } - public void Qid__end() { - tbl.Insert_end(); - tbl.Create_idx(); - } - public static Xow_db_file Make_wbase_db(Xow_db_mgr db_mgr) { - boolean db_is_all_or_few = db_mgr.Props().Layout_text().Tid_is_all_or_few(); - Xow_db_file wbase_db = db_is_all_or_few - ? db_mgr.Db__core() - : db_mgr.Dbs__make_by_tid(Xow_db_file_.Tid__wbase); - if (db_is_all_or_few) - db_mgr.Db__wbase_(wbase_db); - return wbase_db; - } -} +package gplx.xowa.xtns.wbases.imports; + +import gplx.Bry_; +import gplx.Bry_bfr; +import gplx.Bry_bfr_; +import gplx.Gfo_invk; +import gplx.Ordered_hash; +import gplx.String_; +import gplx.dbs.Db_conn; +import gplx.langs.jsons.Json_doc; +import gplx.langs.jsons.Json_parser; +import gplx.xowa.Xoa_ttl; +import gplx.xowa.Xowe_wiki; +import gplx.xowa.bldrs.Xob_bldr; +import gplx.xowa.bldrs.wkrs.Xob_itm_dump_base; +import gplx.xowa.bldrs.wkrs.Xob_page_wkr; +import gplx.xowa.wikis.data.Xow_db_file; +import gplx.xowa.wikis.data.Xow_db_file_; +import gplx.xowa.wikis.data.Xow_db_mgr; +import gplx.xowa.wikis.data.tbls.Xowd_page_itm; +import gplx.xowa.wikis.nss.Xow_ns_; +import gplx.xowa.xtns.wbases.core.Wdata_sitelink_itm; +import gplx.xowa.xtns.wbases.dbs.Wbase_qid_tbl; +import gplx.xowa.xtns.wbases.parsers.Wdata_doc_parser; + +public class Xob_wdata_qid extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk { + private Db_conn conn; + private Wbase_qid_tbl tbl; + private final Object thread_lock = new Object(); + private final Json_parser parser = new Json_parser(); + private Xob_wbase_ns_parser ns_parser; private final Xob_wbase_ns_parser_rslt ns_parser_rslt = new Xob_wbase_ns_parser_rslt(); + public Xob_wdata_qid(Db_conn conn) { + this.conn = conn; + } + public String Page_wkr__key() {return gplx.xowa.bldrs.Xob_cmd_keys.Key_wbase_qid;} + public Xob_wdata_qid Ctor(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki); return this;} + public void Page_wkr__bgn() { + this.ns_parser = new Xob_wbase_ns_parser(bldr.App().Fsys_mgr().Cfg_site_meta_fil()); + this.Qid__bgn(); + } + public void Page_wkr__run(Xowd_page_itm page) { + if (page.Ns_id() != Xow_ns_.Tid__main) return; // qid pages are only in the Main Srch_rslt_cbk + Json_doc jdoc = parser.Parse(page.Text()); + if (jdoc == null) {bldr.Usr_dlg().Warn_many("", "", "json is invalid: ns=~{0} id=~{1}", page.Ns_id(), String_.new_u8(page.Ttl_page_db())); return;} + this.Qid__run(jdoc); + } + public void Page_wkr__run_cleanup() {} + public void Page_wkr__end() {this.Qid__end();} + public void Qid__bgn() { + if (conn == null) { + Xow_db_file wbase_db = Make_wbase_db(wiki.Db_mgr_as_sql().Core_data_mgr()); + conn = wbase_db.Conn(); + } + tbl = Wbase_qid_tbl.New_make(conn, false); + tbl.Create_tbl(); + tbl.Insert_bgn(); + } + public void Qid__run(Json_doc jdoc) { + synchronized (thread_lock) { + Wdata_doc_parser wdoc_parser = app.Wiki_mgr().Wdata_mgr().Wdoc_parser(jdoc); + byte[] qid = wdoc_parser.Parse_qid(jdoc); + Bry_bfr tmp_bfr = Bry_bfr_.Reset(255); + Ordered_hash sitelinks = wdoc_parser.Parse_sitelinks(qid, jdoc); + int sitelinks_len = sitelinks.Count(); if (sitelinks_len == 0) return; // no subs; return; + for (int i = 0; i < sitelinks_len; i++) { // iterate sitelinks + Wdata_sitelink_itm sitelink = (Wdata_sitelink_itm)sitelinks.Get_at(i); + byte[] sitelink_site = sitelink.Site(), sitelink_ttl = sitelink.Name(); + ns_parser.Find(ns_parser_rslt, sitelink_site, sitelink_ttl); + int sitelink_ns = ns_parser_rslt.Ns_id(); + if (sitelink_ns != Xow_ns_.Tid__main) // ttl not in main; chop off ns portion; EX:Aide:French_title -> French_title + sitelink_ttl = Bry_.Mid(sitelink_ttl, ns_parser_rslt.Ttl_bgn(), sitelink_ttl.length); + sitelink_ttl = wiki.Lang().Case_mgr().Case_build_1st_upper(tmp_bfr, sitelink_ttl, 0, sitelink_ttl.length); + tbl.Insert_cmd_by_batch(sitelink.Site(), sitelink_ns, Xoa_ttl.Replace_spaces(sitelink_ttl), qid); // NOTE: always convert spaces to underscores; EX: "A B" -> "A_B" DATE:2015-04-21 + } + } + } + public void Qid__end() { + tbl.Insert_end(); + tbl.Create_idx(); + } + public static Xow_db_file Make_wbase_db(Xow_db_mgr db_mgr) { + boolean db_is_all_or_few = db_mgr.Props().Layout_text().Tid_is_all_or_few(); + Xow_db_file wbase_db = db_is_all_or_few + ? db_mgr.Db__core() + : db_mgr.Dbs__make_by_tid(Xow_db_file_.Tid__wbase); + if (db_is_all_or_few) + db_mgr.Db__wbase_(wbase_db); + return wbase_db; + } +} diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_db.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_db.java index 96a326cd7..166eb55c7 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_db.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/imports/json/Xowb_json_dump_db.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,90 +13,114 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases.imports.json; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.imports.*; -import gplx.core.ios.*; -import gplx.langs.jsons.*; -import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.sqls.*; import gplx.xowa.apps.apis.xowa.bldrs.imports.*; -import gplx.xowa.wikis.*; import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*; -import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.parsers.*; -class Xowb_json_dump_db { - private final Xoae_app app; private final Gfo_usr_dlg usr_dlg; private final Xowe_wiki wiki; private final Xob_bldr bldr; - private final Json_parser json_parser; - private final Xob_wdata_pid pid_cmd; private final Xob_wdata_qid qid_cmd; - private Xow_ns_mgr ns_mgr; private Xow_db_mgr db_mgr; - private Xowd_page_tbl page_tbl; private Xob_ns_to_db_mgr ns_to_db_mgr; - private Io_stream_zip_mgr text_zip_mgr; private byte text_zip_tid; - private DateAdp page_modified_on; - private int page_id = 0, page_count_main = 0; - public Xowb_json_dump_db(Xob_bldr bldr, Xowe_wiki wiki) { - this.app = bldr.App(); this.usr_dlg = app.Usr_dlg(); this.wiki = wiki; this.bldr = bldr; - this.json_parser = bldr.App().Wiki_mgr().Wdata_mgr().Jdoc_parser(); - this.ns_mgr = wiki.Ns_mgr(); - this.pid_cmd = new Xob_wdata_pid(wiki.Data__core_mgr().Db__wbase().Conn()); - this.qid_cmd = new Xob_wdata_qid(wiki.Data__core_mgr().Db__wbase().Conn()); - } - public void Parse_all_bgn(long src_fil_len, String src_fil_name) { - // load wiki - Xowe_wiki_.Create(wiki, src_fil_len, src_fil_name); - this.db_mgr = wiki.Data__core_mgr(); - this.page_tbl = db_mgr.Tbl__page(); - pid_cmd.Cmd_ctor(bldr, wiki); qid_cmd.Cmd_ctor(bldr, wiki); - - // create ns_mgr - wiki.Ns_mgr().Add_defaults(); - wiki.Ns_mgr().Add_new(Wdata_wiki_mgr.Ns_property, Wdata_wiki_mgr.Ns_property_name); - wiki.Ns_mgr().Init(); - - // init ns_map - this.ns_to_db_mgr = new Xob_ns_to_db_mgr(new Xob_ns_to_db_wkr__text(), db_mgr, Xobldr_cfg.Max_size__text(app)); - byte[] ns_file_map = Xobldr_cfg.New_ns_file_map(app, src_fil_len); - Xob_ns_file_itm.Init_ns_bldr_data(Xow_db_file_.Tid__text, wiki.Ns_mgr(), ns_file_map); - - // start import - this.text_zip_mgr = wiki.Utl__zip_mgr(); - this.text_zip_tid = Xobldr_cfg.Zip_mode__text(app); - this.page_modified_on = Datetime_now.Get(); - page_tbl.Insert_bgn(); - qid_cmd.Page_wkr__bgn(); - pid_cmd.Pid__bgn(); - } - public void Parse_doc(byte[] json_bry) { - // parse to jdoc - Json_doc jdoc = json_parser.Parse(json_bry); - if (jdoc == null) {usr_dlg.Warn_many("", "", "wbase.json_dump:json is invalid: json=~{0}", json_bry); return;} - - // extract xid - byte[] id = jdoc.Get_val_as_bry_or(Bry__id_key, null); - if (id == null) {usr_dlg.Warn_many("", "", "wbase.json_dump:id is invalid: json=~{0}", json_bry); return;} - boolean jdoc_is_qid = Bry_.Has_at_bgn(id, Byte_ascii.Ltr_Q, 0); - Xow_ns ns = jdoc_is_qid ? ns_mgr.Ns_main() : ns_mgr.Ids_get_or_null(Wdata_wiki_mgr.Ns_property); - - // create page entry - int random_int = ns.Count() + 1; ns.Count_(random_int); - byte[] json_zip = text_zip_mgr.Zip(text_zip_tid, json_bry); - Xow_db_file text_db = ns_to_db_mgr.Get_by_ns(ns.Bldr_data(), json_zip.length); - db_mgr.Create_page(page_tbl, text_db.Tbl__text(), ++page_id, ns.Id(), id, Bool_.N, page_modified_on, json_zip, json_bry.length, random_int, text_db.Id(), -1); - - // insert text - if (jdoc_is_qid) { - qid_cmd.Qid__run(jdoc); - ++page_count_main; - } - else - pid_cmd.Pid__run(jdoc); - } - public void Parse_all_end() { - page_tbl.Insert_end(); - page_tbl.Create_idx(); - qid_cmd.Qid__end(); - pid_cmd.Pid__end(); - ns_to_db_mgr.Rls_all(); - - // cleanup core - Xow_db_file db_core = db_mgr.Db__core(); - db_core.Tbl__site_stats().Update(page_count_main, page_id, ns_mgr.Ns_file().Count()); // save page stats - db_core.Tbl__ns().Insert(ns_mgr); // save ns - db_mgr.Tbl__cfg().Insert_str(Xowd_cfg_key_.Grp__wiki_init, Xowd_cfg_key_.Key__init__modified_latest, page_modified_on.XtoStr_fmt(DateAdp_.Fmt_iso8561_date_time)); - } - private static final byte[] Bry__id_key = Bry_.new_a7("id"); -} +package gplx.xowa.xtns.wbases.imports.json; + +import gplx.Bool_; +import gplx.Bry_; +import gplx.Byte_ascii; +import gplx.DateAdp; +import gplx.DateAdp_; +import gplx.Datetime_now; +import gplx.Gfo_usr_dlg; +import gplx.core.ios.Io_stream_zip_mgr; +import gplx.langs.jsons.Json_doc; +import gplx.langs.jsons.Json_parser; +import gplx.xowa.Xoae_app; +import gplx.xowa.Xowe_wiki; +import gplx.xowa.Xowe_wiki_; +import gplx.xowa.bldrs.Xob_bldr; +import gplx.xowa.bldrs.Xob_ns_to_db_mgr; +import gplx.xowa.bldrs.Xobldr_cfg; +import gplx.xowa.bldrs.cmds.Xob_ns_file_itm; +import gplx.xowa.bldrs.cmds.texts.sqls.Xob_ns_to_db_wkr__text; +import gplx.xowa.wikis.data.Xow_db_file; +import gplx.xowa.wikis.data.Xow_db_file_; +import gplx.xowa.wikis.data.Xow_db_mgr; +import gplx.xowa.wikis.data.Xowd_cfg_key_; +import gplx.xowa.wikis.data.tbls.Xowd_page_tbl; +import gplx.xowa.wikis.nss.Xow_ns; +import gplx.xowa.wikis.nss.Xow_ns_mgr; +import gplx.xowa.xtns.wbases.Wdata_wiki_mgr; +import gplx.xowa.xtns.wbases.imports.Xob_wdata_pid; +import gplx.xowa.xtns.wbases.imports.Xob_wdata_qid; + +class Xowb_json_dump_db { + private final Xoae_app app; private final Gfo_usr_dlg usr_dlg; private final Xowe_wiki wiki; private final Xob_bldr bldr; + private final Json_parser json_parser = new Json_parser(); + private final Xob_wdata_pid pid_cmd; private final Xob_wdata_qid qid_cmd; + private Xow_ns_mgr ns_mgr; private Xow_db_mgr db_mgr; + private Xowd_page_tbl page_tbl; private Xob_ns_to_db_mgr ns_to_db_mgr; + private Io_stream_zip_mgr text_zip_mgr; private byte text_zip_tid; + private DateAdp page_modified_on; + private int page_id = 0, page_count_main = 0; + public Xowb_json_dump_db(Xob_bldr bldr, Xowe_wiki wiki) { + this.app = bldr.App(); this.usr_dlg = app.Usr_dlg(); this.wiki = wiki; this.bldr = bldr; + this.ns_mgr = wiki.Ns_mgr(); + this.pid_cmd = new Xob_wdata_pid(wiki.Data__core_mgr().Db__wbase().Conn()); + this.qid_cmd = new Xob_wdata_qid(wiki.Data__core_mgr().Db__wbase().Conn()); + } + public void Parse_all_bgn(long src_fil_len, String src_fil_name) { + // load wiki + Xowe_wiki_.Create(wiki, src_fil_len, src_fil_name); + this.db_mgr = wiki.Data__core_mgr(); + this.page_tbl = db_mgr.Tbl__page(); + pid_cmd.Cmd_ctor(bldr, wiki); qid_cmd.Cmd_ctor(bldr, wiki); + + // create ns_mgr + wiki.Ns_mgr().Add_defaults(); + wiki.Ns_mgr().Add_new(Wdata_wiki_mgr.Ns_property, Wdata_wiki_mgr.Ns_property_name); + wiki.Ns_mgr().Init(); + + // init ns_map + this.ns_to_db_mgr = new Xob_ns_to_db_mgr(new Xob_ns_to_db_wkr__text(), db_mgr, Xobldr_cfg.Max_size__text(app)); + byte[] ns_file_map = Xobldr_cfg.New_ns_file_map(app, src_fil_len); + Xob_ns_file_itm.Init_ns_bldr_data(Xow_db_file_.Tid__text, wiki.Ns_mgr(), ns_file_map); + + // start import + this.text_zip_mgr = wiki.Utl__zip_mgr(); + this.text_zip_tid = Xobldr_cfg.Zip_mode__text(app); + this.page_modified_on = Datetime_now.Get(); + page_tbl.Insert_bgn(); + qid_cmd.Page_wkr__bgn(); + pid_cmd.Pid__bgn(); + } + public void Parse_doc(byte[] json_bry) { + // parse to jdoc + Json_doc jdoc = json_parser.Parse(json_bry); + if (jdoc == null) {usr_dlg.Warn_many("", "", "wbase.json_dump:json is invalid: json=~{0}", json_bry); return;} + + // extract xid + byte[] id = jdoc.Get_val_as_bry_or(Bry__id_key, null); + if (id == null) {usr_dlg.Warn_many("", "", "wbase.json_dump:id is invalid: json=~{0}", json_bry); return;} + boolean jdoc_is_qid = Bry_.Has_at_bgn(id, Byte_ascii.Ltr_Q, 0); + Xow_ns ns = jdoc_is_qid ? ns_mgr.Ns_main() : ns_mgr.Ids_get_or_null(Wdata_wiki_mgr.Ns_property); + + // create page entry + int random_int = ns.Count() + 1; ns.Count_(random_int); + byte[] json_zip = text_zip_mgr.Zip(text_zip_tid, json_bry); + Xow_db_file text_db = ns_to_db_mgr.Get_by_ns(ns.Bldr_data(), json_zip.length); + db_mgr.Create_page(page_tbl, text_db.Tbl__text(), ++page_id, ns.Id(), id, Bool_.N, page_modified_on, json_zip, json_bry.length, random_int, text_db.Id(), -1); + + // insert text + if (jdoc_is_qid) { + qid_cmd.Qid__run(jdoc); + ++page_count_main; + } + else + pid_cmd.Pid__run(jdoc); + } + public void Parse_all_end() { + page_tbl.Insert_end(); + page_tbl.Create_idx(); + qid_cmd.Qid__end(); + pid_cmd.Pid__end(); + ns_to_db_mgr.Rls_all(); + + // cleanup core + Xow_db_file db_core = db_mgr.Db__core(); + db_core.Tbl__site_stats().Update(page_count_main, page_id, ns_mgr.Ns_file().Count()); // save page stats + db_core.Tbl__ns().Insert(ns_mgr); // save ns + db_mgr.Tbl__cfg().Insert_str(Xowd_cfg_key_.Grp__wiki_init, Xowd_cfg_key_.Key__init__modified_latest, page_modified_on.XtoStr_fmt(DateAdp_.Fmt_iso8561_date_time)); + } + private static final byte[] Bry__id_key = Bry_.new_a7("id"); +} diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/stores/Wbase_doc_mgr.java b/400_xowa/src/gplx/xowa/xtns/wbases/stores/Wbase_doc_mgr.java index ace2bedf2..efdf300f0 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/stores/Wbase_doc_mgr.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/stores/Wbase_doc_mgr.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,147 +13,164 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases.stores; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; -import gplx.core.logs.*; import gplx.core.primitives.*; -import gplx.langs.jsons.*; -import gplx.xowa.wikis.pages.*; -import gplx.xowa.xtns.wbases.core.*; -public class Wbase_doc_mgr { - private final Wdata_wiki_mgr wbase_mgr; - private final Wbase_qid_mgr qid_mgr; - private Wbase_doc_cache doc_cache; - private final Object thread_lock = new Object(); - private final Ordered_hash wbase_db_hash = Ordered_hash_.New_bry(); - private final Gfo_log_wtr wbase_db_log; - public Wbase_doc_mgr(Wdata_wiki_mgr wbase_mgr, Wbase_qid_mgr qid_mgr) { - this.wbase_mgr = wbase_mgr; - this.qid_mgr = qid_mgr; - this.doc_cache = new Wbase_doc_cache__hash(); - this.wbase_db_log = Gfo_log_wtr.New_dflt("wbase", "db_log_{0}.csv"); - } - public void Enabled_(boolean v) {this.enabled = v;} private boolean enabled; - public void Cache__init(String cache_type, long cache_max, long compress_size, long used_weight) { - if (String_.Eq(cache_type, "null")) doc_cache = new Wbase_doc_cache__null(); - else if (String_.Eq(cache_type, "hash")) doc_cache = new Wbase_doc_cache__hash(); - else if (String_.Eq(cache_type, "mru" )) doc_cache = new Wbase_doc_cache__mru(cache_max, compress_size, used_weight); - else throw Err_.new_unhandled_default(cache_type); - } - public void Cleanup() { - doc_cache.Term(); - wbase_db_log__flush(); - } - private void wbase_db_log__flush() { - int len = wbase_db_hash.Len(); - Bry_bfr tmp_bfr = Bry_bfr_.New(); - for (int i = 0; i < len; i++) { - Wbase_db_log_itm itm = (Wbase_db_log_itm)wbase_db_hash.Get_at(i); - tmp_bfr.Add(itm.Ttl()); - tmp_bfr.Add_byte_pipe().Add_int_variable(itm.Count()); - tmp_bfr.Add_byte_pipe().Add_int_variable(itm.Elapsed()); - tmp_bfr.Add_byte_nl(); - wbase_db_log.Write(tmp_bfr); - } - wbase_db_log.Flush(); - } - public void Clear() { - synchronized (thread_lock) { // LOCK:app-level - doc_cache.Clear(); - } - } - public Wdata_doc Get_by_ttl_or_null(Xowe_wiki wiki, Xoa_ttl ttl) { // "enwiki", "Earth" -> "Q2" wdoc - byte[] qid_bry = qid_mgr.Get_qid_or_null(wiki, ttl); // EX: "enwiki", "Earth" -> "Q2" - return qid_bry == null ? null : this.Get_by_exact_id_or_null(qid_bry); - } - public Wdata_doc Get_by_xid_or_null(byte[] xid) {return Get_by_loose_id_or_null(Wbase_pid.Prepend_property_if_needed(xid));}// scribunto passes either p1 or q1; convert p1 to "Property:p1" - public Wdata_doc Get_by_loose_id_or_null(byte[] ttl_bry) { - return Get_by_exact_id_or_null(ttl_bry); - } - public Wdata_doc Get_by_exact_id_or_null(byte[] ttl_bry) {// must correct case and ns; EX:"Q2" or "Property:P1"; not "q2" or "P2" - // load from cache - Wdata_doc rv = null; - synchronized (thread_lock) { - rv = doc_cache.Get_or_null(ttl_bry); - if (rv == null) { - // load from db - rv = Load_wdoc_or_null(ttl_bry); - if (rv == null) return null; // page not found - Add(ttl_bry, rv);// NOTE: use ttl_bry, not rv.Qid; allows subsequent lookups to skip this redirect cycle - } - } - return rv; - } - private Wdata_doc Load_wdoc_or_null(byte[] ttl_bry) { // EX:"Q2" or "Property:P1" - if (!enabled) return null; - - // loggging - Wbase_db_log_itm wbase_db_itm = (Wbase_db_log_itm)wbase_db_hash.Get_by(ttl_bry); - if (wbase_db_itm == null) { - wbase_db_itm = new Wbase_db_log_itm(ttl_bry); - wbase_db_hash.Add(ttl_bry, wbase_db_itm); - } - long time_bgn = gplx.core.envs.System_.Ticks(); - - Wdata_doc rv = null; - synchronized (thread_lock) { // LOCK:app-level; jdoc_parser; moved synchronized higher up; DATE:2016-09-03 - byte[] cur_ttl_bry = ttl_bry; - int load_count = -1; - while (load_count < 2) { // limit to 2 tries (i.e.: 1 redirect) - // parse ttl; note that "q2" will get parsed to "Q2" b/c of ns casing - Xoa_ttl cur_ttl = wbase_mgr.Wdata_wiki().Ttl_parse(cur_ttl_bry); - if (cur_ttl == null) { - Gfo_usr_dlg_.Instance.Warn_many("", "", "invalid wbase ttl: orig=~{0} cur=~{1}", ttl_bry, cur_ttl_bry); - break; - } - - // get page - Xoae_page page = wbase_mgr.Wdata_wiki().Data_mgr().Load_page_by_ttl(cur_ttl); - if (!page.Db().Page().Exists()) break; - - // parse jdoc - Json_doc jdoc = wbase_mgr.Jdoc_parser().Parse(page.Db().Text().Text_bry()); - if (jdoc == null) { - Gfo_usr_dlg_.Instance.Warn_many("", "", "invalid jdoc for ttl: orig=~{0} cur=~{1}", ttl_bry, cur_ttl_bry); - break; - } - - // check for redirect; EX: {"entity":"Q22350516","redirect":"Q21006972"}; PAGE:fr.w:Tour_du_Táchira_2016; DATE:2016-08-13 - Json_nde jdoc_root = jdoc.Root_nde(); - byte[] redirect_ttl = jdoc_root.Get_as_bry_or(Bry__redirect, null); - if (redirect_ttl != null) { - cur_ttl_bry = redirect_ttl; - load_count++; - continue; - } - - // is json doc, and not a redirect; return - rv = new Wdata_doc(wbase_mgr, jdoc, cur_ttl_bry); - break; - } - if (rv == null && load_count >= 2) - Gfo_usr_dlg_.Instance.Warn_many("", "", "too many redirects for ttl: orig=~{0} cur=~{1}", ttl_bry, cur_ttl_bry); - } - - wbase_db_itm.Update(gplx.core.envs.System_.Ticks__elapsed_in_frac(time_bgn)); - return rv; - } - private static final byte[] Bry__redirect = Bry_.new_a7("redirect"); - - public void Add(byte[] full_db, Wdata_doc page) { // TEST: - synchronized (thread_lock) { // LOCK:app-level - if (doc_cache.Get_or_null(full_db) == null) - doc_cache.Add(full_db, page); - } - } -} -class Wbase_db_log_itm { - public Wbase_db_log_itm(byte[] ttl) { - this.ttl = ttl; - } - public byte[] Ttl() {return ttl;} private final byte[] ttl; - public int Count() {return count;} private int count; - public int Elapsed() {return elapsed;} private int elapsed; - public void Update(int elapsed_diff) { - count++; - this.elapsed += elapsed_diff; - } -} +package gplx.xowa.xtns.wbases.stores; + +import gplx.Bry_; +import gplx.Bry_bfr; +import gplx.Bry_bfr_; +import gplx.Err_; +import gplx.Gfo_usr_dlg_; +import gplx.Ordered_hash; +import gplx.Ordered_hash_; +import gplx.String_; +import gplx.core.logs.Gfo_log_wtr; +import gplx.langs.jsons.Json_doc; +import gplx.langs.jsons.Json_nde; +import gplx.langs.jsons.Json_parser; +import gplx.xowa.Xoa_ttl; +import gplx.xowa.Xoae_page; +import gplx.xowa.Xowe_wiki; +import gplx.xowa.xtns.wbases.Wdata_doc; +import gplx.xowa.xtns.wbases.Wdata_wiki_mgr; +import gplx.xowa.xtns.wbases.core.Wbase_pid; + +public class Wbase_doc_mgr { + private final Wdata_wiki_mgr wbase_mgr; + private final Wbase_qid_mgr qid_mgr; + private Wbase_doc_cache doc_cache; + private final Object thread_lock = new Object(); + private final Ordered_hash wbase_db_hash = Ordered_hash_.New_bry(); + private final Gfo_log_wtr wbase_db_log; + private final Json_parser jsonParser = new Json_parser(); + public Wbase_doc_mgr(Wdata_wiki_mgr wbase_mgr, Wbase_qid_mgr qid_mgr) { + this.wbase_mgr = wbase_mgr; + this.qid_mgr = qid_mgr; + this.doc_cache = new Wbase_doc_cache__hash(); + this.wbase_db_log = Gfo_log_wtr.New_dflt("wbase", "db_log_{0}.csv"); + } + public void Enabled_(boolean v) {this.enabled = v;} private boolean enabled; + public void Cache__init(String cache_type, long cache_max, long compress_size, long used_weight) { + if (String_.Eq(cache_type, "null")) doc_cache = new Wbase_doc_cache__null(); + else if (String_.Eq(cache_type, "hash")) doc_cache = new Wbase_doc_cache__hash(); + else if (String_.Eq(cache_type, "mru" )) doc_cache = new Wbase_doc_cache__mru(cache_max, compress_size, used_weight); + else throw Err_.new_unhandled_default(cache_type); + } + public void Cleanup() { + doc_cache.Term(); + wbase_db_log__flush(); + } + private void wbase_db_log__flush() { + int len = wbase_db_hash.Len(); + Bry_bfr tmp_bfr = Bry_bfr_.New(); + for (int i = 0; i < len; i++) { + Wbase_db_log_itm itm = (Wbase_db_log_itm)wbase_db_hash.Get_at(i); + tmp_bfr.Add(itm.Ttl()); + tmp_bfr.Add_byte_pipe().Add_int_variable(itm.Count()); + tmp_bfr.Add_byte_pipe().Add_int_variable(itm.Elapsed()); + tmp_bfr.Add_byte_nl(); + wbase_db_log.Write(tmp_bfr); + } + wbase_db_log.Flush(); + } + public void Clear() { + synchronized (thread_lock) { // LOCK:app-level + doc_cache.Clear(); + } + } + public Wdata_doc Get_by_ttl_or_null(Xowe_wiki wiki, Xoa_ttl ttl) { // "enwiki", "Earth" -> "Q2" wdoc + byte[] qid_bry = qid_mgr.Get_qid_or_null(wiki, ttl); // EX: "enwiki", "Earth" -> "Q2" + return qid_bry == null ? null : this.Get_by_exact_id_or_null(qid_bry); + } + public Wdata_doc Get_by_xid_or_null(byte[] xid) {return Get_by_loose_id_or_null(Wbase_pid.Prepend_property_if_needed(xid));}// scribunto passes either p1 or q1; convert p1 to "Property:p1" + public Wdata_doc Get_by_loose_id_or_null(byte[] ttl_bry) { + return Get_by_exact_id_or_null(ttl_bry); + } + public Wdata_doc Get_by_exact_id_or_null(byte[] ttl_bry) {// must correct case and ns; EX:"Q2" or "Property:P1"; not "q2" or "P2" + // load from cache + Wdata_doc rv = null; + synchronized (thread_lock) { + rv = doc_cache.Get_or_null(ttl_bry); + if (rv == null) { + // load from db + rv = Load_wdoc_or_null(ttl_bry); + if (rv == null) return null; // page not found + Add(ttl_bry, rv);// NOTE: use ttl_bry, not rv.Qid; allows subsequent lookups to skip this redirect cycle + } + } + return rv; + } + private Wdata_doc Load_wdoc_or_null(byte[] ttl_bry) { // EX:"Q2" or "Property:P1" + if (!enabled) return null; + + // loggging + Wbase_db_log_itm wbase_db_itm = (Wbase_db_log_itm)wbase_db_hash.Get_by(ttl_bry); + if (wbase_db_itm == null) { + wbase_db_itm = new Wbase_db_log_itm(ttl_bry); + wbase_db_hash.Add(ttl_bry, wbase_db_itm); + } + long time_bgn = gplx.core.envs.System_.Ticks(); + + Wdata_doc rv = null; + synchronized (thread_lock) { // LOCK:app-level; jdoc_parser; moved synchronized higher up; DATE:2016-09-03 + byte[] cur_ttl_bry = ttl_bry; + int load_count = -1; + while (load_count < 2) { // limit to 2 tries (i.e.: 1 redirect) + // parse ttl; note that "q2" will get parsed to "Q2" b/c of ns casing + Xoa_ttl cur_ttl = wbase_mgr.Wdata_wiki().Ttl_parse(cur_ttl_bry); + if (cur_ttl == null) { + Gfo_usr_dlg_.Instance.Warn_many("", "", "invalid wbase ttl: orig=~{0} cur=~{1}", ttl_bry, cur_ttl_bry); + break; + } + + // get page + Xoae_page page = wbase_mgr.Wdata_wiki().Data_mgr().Load_page_by_ttl(cur_ttl); + if (!page.Db().Page().Exists()) break; + + // parse jdoc + Json_doc jdoc = jsonParser.Parse(page.Db().Text().Text_bry()); + if (jdoc == null) { + Gfo_usr_dlg_.Instance.Warn_many("", "", "invalid jdoc for ttl: orig=~{0} cur=~{1}", ttl_bry, cur_ttl_bry); + break; + } + + // check for redirect; EX: {"entity":"Q22350516","redirect":"Q21006972"}; PAGE:fr.w:Tour_du_Táchira_2016; DATE:2016-08-13 + Json_nde jdoc_root = jdoc.Root_nde(); + byte[] redirect_ttl = jdoc_root.Get_as_bry_or(Bry__redirect, null); + if (redirect_ttl != null) { + cur_ttl_bry = redirect_ttl; + load_count++; + continue; + } + + // is json doc, and not a redirect; return + rv = new Wdata_doc(wbase_mgr, jdoc, cur_ttl_bry); + break; + } + if (rv == null && load_count >= 2) + Gfo_usr_dlg_.Instance.Warn_many("", "", "too many redirects for ttl: orig=~{0} cur=~{1}", ttl_bry, cur_ttl_bry); + } + + wbase_db_itm.Update(gplx.core.envs.System_.Ticks__elapsed_in_frac(time_bgn)); + return rv; + } + private static final byte[] Bry__redirect = Bry_.new_a7("redirect"); + + public void Add(byte[] full_db, Wdata_doc page) { // TEST: + synchronized (thread_lock) { // LOCK:app-level + if (doc_cache.Get_or_null(full_db) == null) + doc_cache.Add(full_db, page); + } + } +} +class Wbase_db_log_itm { + public Wbase_db_log_itm(byte[] ttl) { + this.ttl = ttl; + } + public byte[] Ttl() {return ttl;} private final byte[] ttl; + public int Count() {return count;} private int count; + public int Elapsed() {return elapsed;} private int elapsed; + public void Update(int elapsed_diff) { + count++; + this.elapsed += elapsed_diff; + } +}