mirror of
https://github.com/gnosygnu/xowa.git
synced 2025-06-01 06:54:20 +00:00
Mass_parse: Do not reuse json_parser else threading issues with itwiki [#818]
This commit is contained in:
parent
541f5f6524
commit
845253af79
@ -106,7 +106,6 @@ public class Wdata_wiki_mgr implements Gfo_evt_itm, Gfo_invk {
|
||||
}
|
||||
return wdata_wiki;
|
||||
} private Xowe_wiki wdata_wiki;
|
||||
public Json_parser Jdoc_parser() {return jdoc_parser;} private Json_parser jdoc_parser = new Json_parser();
|
||||
public void Init_by_app() {}
|
||||
public Wdata_doc_parser Wdoc_parser(Json_doc jdoc) {
|
||||
Json_kv itm_0 = Json_kv.Cast(jdoc.Root_nde().Get_at(0)); // get 1st node
|
||||
|
@ -29,6 +29,7 @@ import gplx.Tfds;
|
||||
import gplx.core.primitives.Gfo_number_parser;
|
||||
import gplx.core.primitives.Int_obj_ref;
|
||||
import gplx.langs.jsons.Json_doc;
|
||||
import gplx.langs.jsons.Json_parser;
|
||||
import gplx.xowa.Xoa_ttl;
|
||||
import gplx.xowa.Xoae_app;
|
||||
import gplx.xowa.Xoae_page;
|
||||
@ -62,6 +63,7 @@ public class Wdata_wiki_mgr_fxt {
|
||||
private Xoae_app app; private Xowe_wiki wiki; private Wdata_doc_bldr wdoc_bldr;
|
||||
private final Wdata_xwiki_link_wtr wdata_lang_wtr = new Wdata_xwiki_link_wtr();
|
||||
private final Bry_bfr tmp_time_bfr = Bry_bfr_.New();
|
||||
private final Json_parser jsonParser = new Json_parser();
|
||||
public Xowe_wiki Wiki() {return parser_fxt.Wiki();}
|
||||
public Wdata_wiki_mgr_fxt Init() {return Init(new Xop_fxt(), true);}
|
||||
public Wdata_wiki_mgr_fxt Init(Xop_fxt parser_fxt, boolean reset) {
|
||||
@ -224,7 +226,7 @@ public class Wdata_wiki_mgr_fxt {
|
||||
byte[] raw_bry = Bry_.new_a7(raw_str);
|
||||
raw_bry = Bry_.new_u8(Json_doc.Make_str_by_apos(raw_str));
|
||||
Bry_bfr bfr = wiki.Utl__bfr_mkr().Get_b512();
|
||||
Wdata_wiki_mgr.Write_json_as_html(wdata_mgr.Jdoc_parser(), bfr, raw_bry);
|
||||
Wdata_wiki_mgr.Write_json_as_html(jsonParser, bfr, raw_bry);
|
||||
Tfds.Eq(expd, bfr.To_str_and_rls());
|
||||
}
|
||||
public static String New_json(String entity_id, String grp_key, String[] grp_vals) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
Copyright (C) 2012-2020 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
@ -13,414 +13,451 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.wbases.imports; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
|
||||
import gplx.dbs.*; import gplx.dbs.cfgs.*; import gplx.dbs.engines.sqlite.*; import gplx.xowa.bldrs.*; import gplx.xowa.files.fsdb.*; import gplx.xowa.files.origs.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.langs.jsons.*;
|
||||
import gplx.xowa.langs.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.bldrs.cmds.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.*; import gplx.xowa.xtns.wbases.claims.itms.*;
|
||||
public class Xob_wdata_db_cmd extends Xob_dump_mgr_base implements Xob_cmd {
|
||||
private Wdata_tbl_mgr tbl_mgr = new Wdata_tbl_mgr();
|
||||
private Wdata_wiki_mgr wdata_mgr; private Json_parser json_parser;
|
||||
private byte[] lang_key = Xol_lang_itm_.Key_en;
|
||||
public Xob_wdata_db_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
|
||||
@Override public String Cmd_key() {return Xob_cmd_keys.Key_wbase_db;}
|
||||
@Override public byte Init_redirect() {return Bool_.N_byte;} // json will never be found in a redirect
|
||||
@Override public int[] Init_ns_ary() {return Int_ary_.New(Xow_ns_.Tid__main, Wdata_wiki_mgr.Ns_property);}
|
||||
@Override protected void Init_reset(Db_conn conn) {
|
||||
Db_cfg_tbl cfg_tbl = gplx.xowa.wikis.data.Xowd_cfg_tbl_.New(conn);
|
||||
cfg_tbl.Delete_all();
|
||||
}
|
||||
@Override protected Db_conn Init_db_file() {
|
||||
Xob_db_file tbl_file = Xob_db_file.New(wiki.Fsys_mgr().Root_dir(), "wdata_db.sqlite3");
|
||||
Db_conn conn = tbl_file.Conn();
|
||||
tbl_mgr.Init(conn);
|
||||
return conn;
|
||||
}
|
||||
@Override protected void Cmd_bgn_end() {
|
||||
wdata_mgr = bldr.App().Wiki_mgr().Wdata_mgr();
|
||||
json_parser = wdata_mgr.Jdoc_parser();
|
||||
tbl_mgr.Conn().Txn_bgn("bldr__wdata_db");
|
||||
}
|
||||
@Override public void Exec_pg_itm_hook(int ns_ord, Xow_ns ns, Xowd_page_itm page, byte[] page_src) {
|
||||
Json_doc jdoc = json_parser.Parse(page_src); if (jdoc == null) return; // not a json document
|
||||
Wdata_doc wdoc = new Wdata_doc(wdata_mgr, jdoc, page.Ttl_page_db());
|
||||
tbl_mgr.Exec_insert_by_wdoc(lang_key, wdata_mgr, page.Id(), wdoc);
|
||||
}
|
||||
@Override public void Exec_commit_hook() {
|
||||
tbl_mgr.Conn().Txn_sav();
|
||||
}
|
||||
@Override public void Exec_end_hook() {
|
||||
tbl_mgr.Term(usr_dlg);
|
||||
}
|
||||
}
|
||||
class Wdata_tbl_mgr {
|
||||
private Wdata_tbl_base[] tbls; private int tbls_len;
|
||||
public Wdata_tbl_mgr() {
|
||||
tbls = new Wdata_tbl_base[] {label_tbl, alias_tbl, description_tbl, link_tbl, claim_tbl, claim_time_tbl, claim_geo_tbl};
|
||||
tbls_len = tbls.length;
|
||||
}
|
||||
public Db_conn Conn() {return conn;} private Db_conn conn;
|
||||
public Wdata_label_tbl Label_tbl() {return label_tbl;} private Wdata_label_tbl label_tbl = new Wdata_label_tbl();
|
||||
public Wdata_alias_tbl Alias_tbl() {return alias_tbl;} private Wdata_alias_tbl alias_tbl = new Wdata_alias_tbl();
|
||||
public Wdata_description_tbl Description_tbl() {return description_tbl;} private Wdata_description_tbl description_tbl = new Wdata_description_tbl();
|
||||
public Wdata_link_tbl Link_tbl() {return link_tbl;} private Wdata_link_tbl link_tbl = new Wdata_link_tbl();
|
||||
public Wbase_claim_tbl Claim_tbl() {return claim_tbl;} private Wbase_claim_tbl claim_tbl = new Wbase_claim_tbl();
|
||||
public Wbase_claim_time_tbl Claim_time_tbl() {return claim_time_tbl;} private Wbase_claim_time_tbl claim_time_tbl = new Wbase_claim_time_tbl();
|
||||
public Wbase_claim_geo_tbl Claim_geo_tbl() {return claim_geo_tbl;} private Wbase_claim_geo_tbl claim_geo_tbl = new Wbase_claim_geo_tbl();
|
||||
public void Init(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
for (int i = 0; i < tbls_len; i++)
|
||||
tbls[i].Init(conn);
|
||||
}
|
||||
public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {
|
||||
for (int i = 0; i < tbls_len; i++)
|
||||
tbls[i].Exec_insert_by_wdoc(lang_key, wdata_mgr, page_id, wdoc);
|
||||
}
|
||||
public void Term(Gfo_usr_dlg usr_dlg) {
|
||||
conn.Txn_end();
|
||||
for (int i = 0; i < tbls_len; i++)
|
||||
tbls[i].Make_idxs(usr_dlg, conn);
|
||||
}
|
||||
}
|
||||
abstract class Wdata_tbl_base {
|
||||
public abstract String Tbl_name();
|
||||
public abstract String Tbl_create_sql();
|
||||
public abstract Db_idx_itm[] Idx_ary();
|
||||
public abstract String[] Fld_ary();
|
||||
@gplx.Virtual public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {}
|
||||
public void Make_tbl(Db_conn p) {Sqlite_engine_.Tbl_create(p, this.Tbl_name(), this.Tbl_create_sql());}
|
||||
public void Make_idxs(Gfo_usr_dlg usr_dlg, Db_conn p) {
|
||||
Sqlite_engine_.Idx_create(usr_dlg, p, this.Tbl_name(), this.Idx_ary());
|
||||
}
|
||||
public Db_stmt Make_insert_stmt(Db_conn p) {return Db_stmt_.new_insert_(p, this.Tbl_name(), this.Fld_ary());}
|
||||
public Db_stmt Insert_stmt() {return insert_stmt;} private Db_stmt insert_stmt;
|
||||
public void Init(Db_conn conn) {
|
||||
this.Make_tbl(conn);
|
||||
insert_stmt = this.Make_insert_stmt(conn);
|
||||
}
|
||||
public static void Exec_insert_kvs(Db_stmt stmt, int page_id, Ordered_hash hash) {
|
||||
int len = hash.Count();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Json_kv kv = (Json_kv)hash.Get_at(i);
|
||||
stmt.Clear()
|
||||
.Val_int(page_id)
|
||||
.Val_bry_as_str(kv.Key().Data_bry())
|
||||
.Val_bry_as_str(kv.Val().Data_bry())
|
||||
.Exec_insert();
|
||||
}
|
||||
}
|
||||
}
|
||||
class Wdata_label_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_label";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_label"
|
||||
, "( page_id integer NOT NULL"
|
||||
, ", lang_key varchar(16) NOT NULL"
|
||||
, ", val varchar(255) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_label__main ON wdata_label (page_id, lang_key);")};}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_lang_key, Fld_val};}
|
||||
@Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {Exec_insert_kvs(this.Insert_stmt(), page_id, wdoc.Label_list());}
|
||||
private static final String Fld_page_id = "page_id", Fld_lang_key = "lang_key", Fld_val = "val";
|
||||
}
|
||||
class Wdata_alias_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_alias";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_alias"
|
||||
, "( page_id integer NOT NULL"
|
||||
, ", lang_key varchar(16) NOT NULL"
|
||||
, ", val varchar(255) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_alias__main ON wdata_alias (page_id, lang_key);")};}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_lang_key, Fld_val};}
|
||||
@Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {
|
||||
Ordered_hash hash = wdoc.Alias_list();
|
||||
int len = hash.Count();
|
||||
Db_stmt insert_stmt = this.Insert_stmt();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Json_kv kv = (Json_kv)hash.Get_at(i);
|
||||
byte[] key = kv.Key().Data_bry();
|
||||
Json_grp val_grp = (Json_grp)kv.Val();
|
||||
int val_grp_len = val_grp.Len();
|
||||
for (int j = 0; j < val_grp_len; j++) {
|
||||
Json_itm val_itm = val_grp.Get_at(j);
|
||||
byte[] val = Bry_.Empty;
|
||||
if (val_itm.Tid() == Json_itm_.Tid__str)
|
||||
val = val_itm.Data_bry();
|
||||
else if (val_itm.Tid() == Json_itm_.Tid__kv) { // EX: q80 and de aliases
|
||||
val = ((Json_kv)val_itm).Val().Data_bry();
|
||||
}
|
||||
insert_stmt.Clear()
|
||||
.Val_int(page_id)
|
||||
.Val_bry_as_str(key)
|
||||
.Val_bry_as_str(val)
|
||||
.Exec_insert();
|
||||
}
|
||||
}
|
||||
}
|
||||
private static final String Fld_page_id = "page_id", Fld_lang_key = "lang_key", Fld_val = "val";
|
||||
}
|
||||
class Wdata_description_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_description";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_description"
|
||||
, "( page_id integer NOT NULL"
|
||||
, ", lang_key varchar(16) NOT NULL"
|
||||
, ", val varchar(255) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_description__main ON wdata_description (page_id, lang_key);")};}
|
||||
@Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {Exec_insert_kvs(this.Insert_stmt(), page_id, wdoc.Descr_list());}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_lang_key, Fld_val};}
|
||||
private static final String Fld_page_id = "page_id", Fld_lang_key = "lang_key", Fld_val = "val";
|
||||
}
|
||||
class Wdata_link_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_link";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_link"
|
||||
, "( page_id integer NOT NULL"
|
||||
, ", wiki_key varchar(255) NOT NULL"
|
||||
, ", val varchar(255) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_link__main ON wdata_link (page_id, wiki_key);")};}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_wiki_key, Fld_val};}
|
||||
@Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {
|
||||
Ordered_hash hash = wdoc.Slink_list();
|
||||
int len = hash.Count();
|
||||
Db_stmt insert_stmt = this.Insert_stmt();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Json_kv kv = (Json_kv)hash.Get_at(i);
|
||||
byte[] key = kv.Key().Data_bry();
|
||||
Json_itm kv_val = kv.Val();
|
||||
byte[] val = Bry_.Empty;
|
||||
if (kv_val.Tid() == Json_itm_.Tid__str)
|
||||
val = kv_val.Data_bry();
|
||||
else {
|
||||
Json_nde val_nde = (Json_nde)kv.Val();
|
||||
Json_kv val_name_kv = (Json_kv)val_nde.Get_at(0); // ASSUME: 1st item is always "name" kv; EX: "name":"Earth"
|
||||
val = val_name_kv.Val().Data_bry();
|
||||
}
|
||||
insert_stmt.Clear()
|
||||
.Val_int(page_id)
|
||||
.Val_bry_as_str(key)
|
||||
.Val_bry_as_str(val)
|
||||
.Exec_insert();
|
||||
}
|
||||
}
|
||||
private static final String Fld_page_id = "page_id", Fld_wiki_key = "wiki_key", Fld_val = "val";
|
||||
}
|
||||
class Wbase_claim_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_claim";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_claim"
|
||||
, "( claim_id integer NOT NULL"
|
||||
, ", page_id integer NOT NULL"
|
||||
, ", prop_id integer NOT NULL" // 60; P60
|
||||
, ", val_tid smallint NOT NULL" // String;wikibase-entity-id;time;globecoordinate
|
||||
, ", entity_tid smallint NOT NULL" // null;item
|
||||
, ", entity_id integer NOT NULL" // null;123
|
||||
, ", val_text varchar(255) NOT NULL"
|
||||
, ", guid varchar(64) NOT NULL"
|
||||
, ", rank integer NOT NULL"
|
||||
, ", ref_count integer NOT NULL"
|
||||
, ", qual_count integer NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {
|
||||
return new Db_idx_itm[]
|
||||
{ Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_claim__main ON wdata_claim (page_id, prop_id, val_tid, entity_tid);")
|
||||
};
|
||||
}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_claim_id, Fld_page_id, Fld_prop_id, Fld_val_tid, Fld_entity_tid, Fld_entity_id, Fld_val_text, Fld_guid, Fld_rank, Fld_ref_count, Fld_qual_count};}
|
||||
private int next_claim_id = 0;
|
||||
private Xob_wdata_db_visitor visitor;
|
||||
@Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {
|
||||
if (visitor == null) visitor = new Xob_wdata_db_visitor(wdata_mgr);
|
||||
visitor.Init(lang_key);
|
||||
Ordered_hash list = wdoc.Claim_list();
|
||||
int list_len = list.Count();
|
||||
for (int i = 0; i < list_len; i++) {
|
||||
Wbase_claim_grp claim_grp = (Wbase_claim_grp)list.Get_at(i);
|
||||
int itms_len = claim_grp.Len();
|
||||
int entity_id = -1;
|
||||
byte[] claim_val = Bry_.Empty;
|
||||
for (int j = 0; j < itms_len; j++) {
|
||||
Wbase_claim_base claim = claim_grp.Get_at(j);
|
||||
claim.Welcome(visitor);
|
||||
claim_val = visitor.Rv();
|
||||
Exec_insert(++next_claim_id, page_id, claim_grp.Id(), claim.Val_tid(), claim.Snak_tid(), entity_id, claim_val, claim.Wguid(), claim.Rank_tid(), 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Exec_insert(int claim_id, int page_id, int prop_id, byte val_tid, byte entity_tid, int entity_id, byte[] val_text, byte[] guid, int rank, int ref_count, int qual_count) {
|
||||
if (val_text == null) val_text = Bry_.Empty;
|
||||
if (guid == null) guid = Bry_.Empty;
|
||||
this.Insert_stmt().Clear()
|
||||
.Val_int(claim_id)
|
||||
.Val_int(page_id)
|
||||
.Val_int(prop_id)
|
||||
.Val_byte(val_tid)
|
||||
.Val_byte(entity_tid)
|
||||
.Val_int(entity_id)
|
||||
.Val_bry_as_str(val_text)
|
||||
.Val_bry_as_str(guid)
|
||||
.Val_int(rank)
|
||||
.Val_int(ref_count)
|
||||
.Val_int(qual_count)
|
||||
.Exec_insert();
|
||||
}
|
||||
private static final String Fld_claim_id = "claim_id", Fld_page_id = "page_id", Fld_prop_id = "prop_id", Fld_val_tid = "val_tid", Fld_entity_tid = "entity_tid", Fld_entity_id = "entity_id", Fld_val_text = "val_text"
|
||||
, Fld_guid = "guid", Fld_rank = "rank", Fld_ref_count = "ref_count", Fld_qual_count = "qual_count"
|
||||
;
|
||||
}
|
||||
class Wbase_claim_time_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_claim_time";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_claim_time"
|
||||
, "( claim_id integer NOT NULL"
|
||||
, ", time_val varchar(64) NOT NULL" // -04540000000-01-01T00:00:00Z
|
||||
, ", time_tz integer NOT NULL" // 0
|
||||
, ", time_before integer NOT NULL" // 0
|
||||
, ", time_after integer NOT NULL" // 0
|
||||
, ", time_precision integer NOT NULL" // 2; number of digits
|
||||
, ", time_model varchar(64) NOT NULL" // http:\/\/www.wikidata.org\/entity\/Q1985727
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {
|
||||
return new Db_idx_itm[] {
|
||||
Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_claim_time__main ON wdata_claim_time (claim_id);")
|
||||
};
|
||||
}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_claim_id, Fld_time_val, Fld_time_tz, Fld_time_before, Fld_time_after, Fld_time_precision, Fld_time_model};}
|
||||
public void Insert(Db_stmt stmt, int claim_id, byte[] time_val, int tz, int before, int after, int precision, byte[] model) {
|
||||
stmt.Clear()
|
||||
.Val_int(claim_id)
|
||||
.Val_bry_as_str(time_val)
|
||||
.Val_int(tz)
|
||||
.Val_int(before)
|
||||
.Val_int(after)
|
||||
.Val_int(precision)
|
||||
.Val_bry_as_str(model)
|
||||
.Exec_insert();
|
||||
}
|
||||
private static final String Fld_claim_id = "claim_id", Fld_time_val = "time_val", Fld_time_tz = "time_tz", Fld_time_before = "time_before", Fld_time_after = "time_after", Fld_time_precision = "time_precision", Fld_time_model = "time_model";
|
||||
}
|
||||
class Wbase_claim_geo_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_claim_geo";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_claim_geo"
|
||||
, "( claim_id integer NOT NULL"
|
||||
, ", geo_latitude double NOT NULL" // 41.590833333333
|
||||
, ", geo_longitude double NOT NULL" // -93.620833333333
|
||||
, ", geo_altitude varchar(255) NOT NULL" // null
|
||||
, ", geo_precision double NOT NULL" // 0.00027777777777778
|
||||
, ", geo_globe integer NOT NULL" // http:\/\/www.wikidata.org\/entity\/Q2
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {
|
||||
return new Db_idx_itm[]
|
||||
{ Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_claim_geo__main ON wdata_claim_geo (claim_id);")
|
||||
};
|
||||
}
|
||||
public void Insert(Db_stmt stmt, int claim_id, double latitude, double longitude, byte[] altitude, double precision, byte[] globe) {
|
||||
stmt.Clear()
|
||||
.Val_int(claim_id)
|
||||
.Val_double(latitude)
|
||||
.Val_double(longitude)
|
||||
.Val_bry_as_str(altitude)
|
||||
.Val_double(precision)
|
||||
.Val_bry_as_str(globe)
|
||||
.Exec_insert();
|
||||
}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_claim_id, Fld_geo_latitude, Fld_geo_longitude, Fld_geo_altitude, Fld_geo_precision, Fld_geo_globe};}
|
||||
private static final String Fld_claim_id = "claim_id", Fld_geo_latitude = "geo_latitude", Fld_geo_longitude = "geo_longitude", Fld_geo_altitude = "geo_altitude", Fld_geo_precision = "geo_precision", Fld_geo_globe = "geo_globe";
|
||||
}
|
||||
class Wdata_ref_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_ref";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_ref"
|
||||
, "( ref_id integer NOT NULL"
|
||||
, ", page_id integer NOT NULL"
|
||||
, ", prop_id integer NOT NULL" // 60; P60
|
||||
, ", val_tid smallint NOT NULL" // String;wikibase-entity-id;time;globecoordinate
|
||||
, ", entity_tid smallint NOT NULL" // null;item
|
||||
, ", entity_id integer NOT NULL" // null;123
|
||||
, ", val_text varchar(255) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {
|
||||
return new Db_idx_itm[] {
|
||||
Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_ref__main ON wdata_ref (page_id, prop_id, val_tid, entity_tid);")
|
||||
};
|
||||
}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_ref_id, Fld_page_id, Fld_prop_id, Fld_val_tid, Fld_entity_tid, Fld_entity_id, Fld_val_text};}
|
||||
private static final String Fld_ref_id = "ref_id", Fld_page_id = "page_id", Fld_prop_id = "prop_id", Fld_val_tid = "val_tid", Fld_entity_tid = "entity_tid", Fld_entity_id = "entity_id", Fld_val_text = "val_ext";
|
||||
}
|
||||
class Wdata_qual_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_qual";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_qual"
|
||||
, "( qual_id integer NOT NULL"
|
||||
, ", page_id integer NOT NULL"
|
||||
, ", val_text varchar(4096) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {
|
||||
return new Db_idx_itm[] {
|
||||
Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_qual__main ON wdata_ref (qual_id, page_id);")
|
||||
};
|
||||
}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_qual_id, Fld_page_id, Fld_val_text};}
|
||||
public void Insert(Db_stmt stmt, int qual_id, int page_id, byte[] val_text) {
|
||||
stmt.Clear()
|
||||
.Val_int(qual_id)
|
||||
.Val_int(page_id)
|
||||
.Val_bry_as_str(val_text)
|
||||
.Exec_insert();
|
||||
}
|
||||
private static final String Fld_qual_id = "qual_id", Fld_page_id = "page_id", Fld_val_text = "val_text";
|
||||
}
|
||||
class Xob_wdata_db_visitor implements Wbase_claim_visitor {
|
||||
private final Wdata_wiki_mgr wdata_mgr; private byte[] lang_key;
|
||||
public Xob_wdata_db_visitor(Wdata_wiki_mgr wdata_mgr) {this.wdata_mgr = wdata_mgr;}
|
||||
public void Init(byte[] lang_key) {this.lang_key = lang_key;}
|
||||
public byte[] Rv() {return rv;} private byte[] rv;
|
||||
public void Visit_str(Wbase_claim_string itm) {rv = itm.Val_bry();}
|
||||
public void Visit_monolingualtext(Wbase_claim_monolingualtext itm) {rv = Bry_.Add_w_dlm(Byte_ascii.Pipe, itm.Lang(), itm.Text());}
|
||||
public void Visit_quantity(Wbase_claim_quantity itm) {rv = itm.Amount();}
|
||||
public void Visit_time(Wbase_claim_time itm) {rv = itm.Time();}
|
||||
public void Visit_globecoordinate(Wbase_claim_globecoordinate itm) {rv = Bry_.Add_w_dlm(Byte_ascii.Comma, itm.Lat(), itm.Lng());}
|
||||
public void Visit_system(Wbase_claim_value itm) {rv = Bry_.Empty;}
|
||||
public void Visit_entity(Wbase_claim_entity itm) {
|
||||
Wdata_doc entity_doc = wdata_mgr.Doc_mgr.Get_by_xid_or_null(itm.Page_ttl_db());
|
||||
if (entity_doc != null) {
|
||||
rv = entity_doc.Get_label_bry_or_null(lang_key);
|
||||
}
|
||||
if (rv == null) // can be null if entity_doc is null or if label is null;
|
||||
rv = Bry_.Empty;
|
||||
}
|
||||
}
|
||||
package gplx.xowa.xtns.wbases.imports;
|
||||
|
||||
import gplx.Bool_;
|
||||
import gplx.Bry_;
|
||||
import gplx.Byte_ascii;
|
||||
import gplx.Gfo_usr_dlg;
|
||||
import gplx.Int_ary_;
|
||||
import gplx.Ordered_hash;
|
||||
import gplx.String_;
|
||||
import gplx.dbs.Db_conn;
|
||||
import gplx.dbs.Db_idx_itm;
|
||||
import gplx.dbs.Db_stmt;
|
||||
import gplx.dbs.Db_stmt_;
|
||||
import gplx.dbs.cfgs.Db_cfg_tbl;
|
||||
import gplx.dbs.engines.sqlite.Sqlite_engine_;
|
||||
import gplx.langs.jsons.Json_doc;
|
||||
import gplx.langs.jsons.Json_grp;
|
||||
import gplx.langs.jsons.Json_itm;
|
||||
import gplx.langs.jsons.Json_itm_;
|
||||
import gplx.langs.jsons.Json_kv;
|
||||
import gplx.langs.jsons.Json_nde;
|
||||
import gplx.langs.jsons.Json_parser;
|
||||
import gplx.xowa.Xowe_wiki;
|
||||
import gplx.xowa.bldrs.Xob_bldr;
|
||||
import gplx.xowa.bldrs.Xob_cmd_keys;
|
||||
import gplx.xowa.bldrs.Xob_db_file;
|
||||
import gplx.xowa.bldrs.cmds.Xob_dump_mgr_base;
|
||||
import gplx.xowa.bldrs.wkrs.Xob_cmd;
|
||||
import gplx.xowa.langs.Xol_lang_itm_;
|
||||
import gplx.xowa.wikis.data.tbls.Xowd_page_itm;
|
||||
import gplx.xowa.wikis.nss.Xow_ns;
|
||||
import gplx.xowa.wikis.nss.Xow_ns_;
|
||||
import gplx.xowa.xtns.wbases.Wdata_doc;
|
||||
import gplx.xowa.xtns.wbases.Wdata_wiki_mgr;
|
||||
import gplx.xowa.xtns.wbases.claims.Wbase_claim_grp;
|
||||
import gplx.xowa.xtns.wbases.claims.Wbase_claim_visitor;
|
||||
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_base;
|
||||
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_entity;
|
||||
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_globecoordinate;
|
||||
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_monolingualtext;
|
||||
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_quantity;
|
||||
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_string;
|
||||
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time;
|
||||
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_value;
|
||||
|
||||
public class Xob_wdata_db_cmd extends Xob_dump_mgr_base implements Xob_cmd {
|
||||
private Wdata_tbl_mgr tbl_mgr = new Wdata_tbl_mgr();
|
||||
private Wdata_wiki_mgr wdata_mgr;
|
||||
private byte[] lang_key = Xol_lang_itm_.Key_en;
|
||||
private final Json_parser json_parser = new Json_parser();
|
||||
public Xob_wdata_db_cmd(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki);}
|
||||
@Override public String Cmd_key() {return Xob_cmd_keys.Key_wbase_db;}
|
||||
@Override public byte Init_redirect() {return Bool_.N_byte;} // json will never be found in a redirect
|
||||
@Override public int[] Init_ns_ary() {return Int_ary_.New(Xow_ns_.Tid__main, Wdata_wiki_mgr.Ns_property);}
|
||||
@Override protected void Init_reset(Db_conn conn) {
|
||||
Db_cfg_tbl cfg_tbl = gplx.xowa.wikis.data.Xowd_cfg_tbl_.New(conn);
|
||||
cfg_tbl.Delete_all();
|
||||
}
|
||||
@Override protected Db_conn Init_db_file() {
|
||||
Xob_db_file tbl_file = Xob_db_file.New(wiki.Fsys_mgr().Root_dir(), "wdata_db.sqlite3");
|
||||
Db_conn conn = tbl_file.Conn();
|
||||
tbl_mgr.Init(conn);
|
||||
return conn;
|
||||
}
|
||||
@Override protected void Cmd_bgn_end() {
|
||||
wdata_mgr = bldr.App().Wiki_mgr().Wdata_mgr();
|
||||
tbl_mgr.Conn().Txn_bgn("bldr__wdata_db");
|
||||
}
|
||||
@Override public void Exec_pg_itm_hook(int ns_ord, Xow_ns ns, Xowd_page_itm page, byte[] page_src) {
|
||||
Json_doc jdoc = json_parser.Parse(page_src); if (jdoc == null) return; // not a json document
|
||||
Wdata_doc wdoc = new Wdata_doc(wdata_mgr, jdoc, page.Ttl_page_db());
|
||||
tbl_mgr.Exec_insert_by_wdoc(lang_key, wdata_mgr, page.Id(), wdoc);
|
||||
}
|
||||
@Override public void Exec_commit_hook() {
|
||||
tbl_mgr.Conn().Txn_sav();
|
||||
}
|
||||
@Override public void Exec_end_hook() {
|
||||
tbl_mgr.Term(usr_dlg);
|
||||
}
|
||||
}
|
||||
class Wdata_tbl_mgr {
|
||||
private Wdata_tbl_base[] tbls; private int tbls_len;
|
||||
public Wdata_tbl_mgr() {
|
||||
tbls = new Wdata_tbl_base[] {label_tbl, alias_tbl, description_tbl, link_tbl, claim_tbl, claim_time_tbl, claim_geo_tbl};
|
||||
tbls_len = tbls.length;
|
||||
}
|
||||
public Db_conn Conn() {return conn;} private Db_conn conn;
|
||||
public Wdata_label_tbl Label_tbl() {return label_tbl;} private Wdata_label_tbl label_tbl = new Wdata_label_tbl();
|
||||
public Wdata_alias_tbl Alias_tbl() {return alias_tbl;} private Wdata_alias_tbl alias_tbl = new Wdata_alias_tbl();
|
||||
public Wdata_description_tbl Description_tbl() {return description_tbl;} private Wdata_description_tbl description_tbl = new Wdata_description_tbl();
|
||||
public Wdata_link_tbl Link_tbl() {return link_tbl;} private Wdata_link_tbl link_tbl = new Wdata_link_tbl();
|
||||
public Wbase_claim_tbl Claim_tbl() {return claim_tbl;} private Wbase_claim_tbl claim_tbl = new Wbase_claim_tbl();
|
||||
public Wbase_claim_time_tbl Claim_time_tbl() {return claim_time_tbl;} private Wbase_claim_time_tbl claim_time_tbl = new Wbase_claim_time_tbl();
|
||||
public Wbase_claim_geo_tbl Claim_geo_tbl() {return claim_geo_tbl;} private Wbase_claim_geo_tbl claim_geo_tbl = new Wbase_claim_geo_tbl();
|
||||
public void Init(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
for (int i = 0; i < tbls_len; i++)
|
||||
tbls[i].Init(conn);
|
||||
}
|
||||
public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {
|
||||
for (int i = 0; i < tbls_len; i++)
|
||||
tbls[i].Exec_insert_by_wdoc(lang_key, wdata_mgr, page_id, wdoc);
|
||||
}
|
||||
public void Term(Gfo_usr_dlg usr_dlg) {
|
||||
conn.Txn_end();
|
||||
for (int i = 0; i < tbls_len; i++)
|
||||
tbls[i].Make_idxs(usr_dlg, conn);
|
||||
}
|
||||
}
|
||||
abstract class Wdata_tbl_base {
|
||||
public abstract String Tbl_name();
|
||||
public abstract String Tbl_create_sql();
|
||||
public abstract Db_idx_itm[] Idx_ary();
|
||||
public abstract String[] Fld_ary();
|
||||
@gplx.Virtual public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {}
|
||||
public void Make_tbl(Db_conn p) {Sqlite_engine_.Tbl_create(p, this.Tbl_name(), this.Tbl_create_sql());}
|
||||
public void Make_idxs(Gfo_usr_dlg usr_dlg, Db_conn p) {
|
||||
Sqlite_engine_.Idx_create(usr_dlg, p, this.Tbl_name(), this.Idx_ary());
|
||||
}
|
||||
public Db_stmt Make_insert_stmt(Db_conn p) {return Db_stmt_.new_insert_(p, this.Tbl_name(), this.Fld_ary());}
|
||||
public Db_stmt Insert_stmt() {return insert_stmt;} private Db_stmt insert_stmt;
|
||||
public void Init(Db_conn conn) {
|
||||
this.Make_tbl(conn);
|
||||
insert_stmt = this.Make_insert_stmt(conn);
|
||||
}
|
||||
public static void Exec_insert_kvs(Db_stmt stmt, int page_id, Ordered_hash hash) {
|
||||
int len = hash.Count();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Json_kv kv = (Json_kv)hash.Get_at(i);
|
||||
stmt.Clear()
|
||||
.Val_int(page_id)
|
||||
.Val_bry_as_str(kv.Key().Data_bry())
|
||||
.Val_bry_as_str(kv.Val().Data_bry())
|
||||
.Exec_insert();
|
||||
}
|
||||
}
|
||||
}
|
||||
class Wdata_label_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_label";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_label"
|
||||
, "( page_id integer NOT NULL"
|
||||
, ", lang_key varchar(16) NOT NULL"
|
||||
, ", val varchar(255) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_label__main ON wdata_label (page_id, lang_key);")};}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_lang_key, Fld_val};}
|
||||
@Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {Exec_insert_kvs(this.Insert_stmt(), page_id, wdoc.Label_list());}
|
||||
private static final String Fld_page_id = "page_id", Fld_lang_key = "lang_key", Fld_val = "val";
|
||||
}
|
||||
class Wdata_alias_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_alias";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_alias"
|
||||
, "( page_id integer NOT NULL"
|
||||
, ", lang_key varchar(16) NOT NULL"
|
||||
, ", val varchar(255) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_alias__main ON wdata_alias (page_id, lang_key);")};}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_lang_key, Fld_val};}
|
||||
@Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {
|
||||
Ordered_hash hash = wdoc.Alias_list();
|
||||
int len = hash.Count();
|
||||
Db_stmt insert_stmt = this.Insert_stmt();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Json_kv kv = (Json_kv)hash.Get_at(i);
|
||||
byte[] key = kv.Key().Data_bry();
|
||||
Json_grp val_grp = (Json_grp)kv.Val();
|
||||
int val_grp_len = val_grp.Len();
|
||||
for (int j = 0; j < val_grp_len; j++) {
|
||||
Json_itm val_itm = val_grp.Get_at(j);
|
||||
byte[] val = Bry_.Empty;
|
||||
if (val_itm.Tid() == Json_itm_.Tid__str)
|
||||
val = val_itm.Data_bry();
|
||||
else if (val_itm.Tid() == Json_itm_.Tid__kv) { // EX: q80 and de aliases
|
||||
val = ((Json_kv)val_itm).Val().Data_bry();
|
||||
}
|
||||
insert_stmt.Clear()
|
||||
.Val_int(page_id)
|
||||
.Val_bry_as_str(key)
|
||||
.Val_bry_as_str(val)
|
||||
.Exec_insert();
|
||||
}
|
||||
}
|
||||
}
|
||||
private static final String Fld_page_id = "page_id", Fld_lang_key = "lang_key", Fld_val = "val";
|
||||
}
|
||||
class Wdata_description_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_description";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_description"
|
||||
, "( page_id integer NOT NULL"
|
||||
, ", lang_key varchar(16) NOT NULL"
|
||||
, ", val varchar(255) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_description__main ON wdata_description (page_id, lang_key);")};}
|
||||
@Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {Exec_insert_kvs(this.Insert_stmt(), page_id, wdoc.Descr_list());}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_lang_key, Fld_val};}
|
||||
private static final String Fld_page_id = "page_id", Fld_lang_key = "lang_key", Fld_val = "val";
|
||||
}
|
||||
class Wdata_link_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_link";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_link"
|
||||
, "( page_id integer NOT NULL"
|
||||
, ", wiki_key varchar(255) NOT NULL"
|
||||
, ", val varchar(255) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {return new Db_idx_itm[] {Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_link__main ON wdata_link (page_id, wiki_key);")};}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_page_id, Fld_wiki_key, Fld_val};}
|
||||
@Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {
|
||||
Ordered_hash hash = wdoc.Slink_list();
|
||||
int len = hash.Count();
|
||||
Db_stmt insert_stmt = this.Insert_stmt();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Json_kv kv = (Json_kv)hash.Get_at(i);
|
||||
byte[] key = kv.Key().Data_bry();
|
||||
Json_itm kv_val = kv.Val();
|
||||
byte[] val = Bry_.Empty;
|
||||
if (kv_val.Tid() == Json_itm_.Tid__str)
|
||||
val = kv_val.Data_bry();
|
||||
else {
|
||||
Json_nde val_nde = (Json_nde)kv.Val();
|
||||
Json_kv val_name_kv = (Json_kv)val_nde.Get_at(0); // ASSUME: 1st item is always "name" kv; EX: "name":"Earth"
|
||||
val = val_name_kv.Val().Data_bry();
|
||||
}
|
||||
insert_stmt.Clear()
|
||||
.Val_int(page_id)
|
||||
.Val_bry_as_str(key)
|
||||
.Val_bry_as_str(val)
|
||||
.Exec_insert();
|
||||
}
|
||||
}
|
||||
private static final String Fld_page_id = "page_id", Fld_wiki_key = "wiki_key", Fld_val = "val";
|
||||
}
|
||||
class Wbase_claim_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_claim";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_claim"
|
||||
, "( claim_id integer NOT NULL"
|
||||
, ", page_id integer NOT NULL"
|
||||
, ", prop_id integer NOT NULL" // 60; P60
|
||||
, ", val_tid smallint NOT NULL" // String;wikibase-entity-id;time;globecoordinate
|
||||
, ", entity_tid smallint NOT NULL" // null;item
|
||||
, ", entity_id integer NOT NULL" // null;123
|
||||
, ", val_text varchar(255) NOT NULL"
|
||||
, ", guid varchar(64) NOT NULL"
|
||||
, ", rank integer NOT NULL"
|
||||
, ", ref_count integer NOT NULL"
|
||||
, ", qual_count integer NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {
|
||||
return new Db_idx_itm[]
|
||||
{ Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_claim__main ON wdata_claim (page_id, prop_id, val_tid, entity_tid);")
|
||||
};
|
||||
}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_claim_id, Fld_page_id, Fld_prop_id, Fld_val_tid, Fld_entity_tid, Fld_entity_id, Fld_val_text, Fld_guid, Fld_rank, Fld_ref_count, Fld_qual_count};}
|
||||
private int next_claim_id = 0;
|
||||
private Xob_wdata_db_visitor visitor;
|
||||
@Override public void Exec_insert_by_wdoc(byte[] lang_key, Wdata_wiki_mgr wdata_mgr, int page_id, Wdata_doc wdoc) {
|
||||
if (visitor == null) visitor = new Xob_wdata_db_visitor(wdata_mgr);
|
||||
visitor.Init(lang_key);
|
||||
Ordered_hash list = wdoc.Claim_list();
|
||||
int list_len = list.Count();
|
||||
for (int i = 0; i < list_len; i++) {
|
||||
Wbase_claim_grp claim_grp = (Wbase_claim_grp)list.Get_at(i);
|
||||
int itms_len = claim_grp.Len();
|
||||
int entity_id = -1;
|
||||
byte[] claim_val = Bry_.Empty;
|
||||
for (int j = 0; j < itms_len; j++) {
|
||||
Wbase_claim_base claim = claim_grp.Get_at(j);
|
||||
claim.Welcome(visitor);
|
||||
claim_val = visitor.Rv();
|
||||
Exec_insert(++next_claim_id, page_id, claim_grp.Id(), claim.Val_tid(), claim.Snak_tid(), entity_id, claim_val, claim.Wguid(), claim.Rank_tid(), 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Exec_insert(int claim_id, int page_id, int prop_id, byte val_tid, byte entity_tid, int entity_id, byte[] val_text, byte[] guid, int rank, int ref_count, int qual_count) {
|
||||
if (val_text == null) val_text = Bry_.Empty;
|
||||
if (guid == null) guid = Bry_.Empty;
|
||||
this.Insert_stmt().Clear()
|
||||
.Val_int(claim_id)
|
||||
.Val_int(page_id)
|
||||
.Val_int(prop_id)
|
||||
.Val_byte(val_tid)
|
||||
.Val_byte(entity_tid)
|
||||
.Val_int(entity_id)
|
||||
.Val_bry_as_str(val_text)
|
||||
.Val_bry_as_str(guid)
|
||||
.Val_int(rank)
|
||||
.Val_int(ref_count)
|
||||
.Val_int(qual_count)
|
||||
.Exec_insert();
|
||||
}
|
||||
private static final String Fld_claim_id = "claim_id", Fld_page_id = "page_id", Fld_prop_id = "prop_id", Fld_val_tid = "val_tid", Fld_entity_tid = "entity_tid", Fld_entity_id = "entity_id", Fld_val_text = "val_text"
|
||||
, Fld_guid = "guid", Fld_rank = "rank", Fld_ref_count = "ref_count", Fld_qual_count = "qual_count"
|
||||
;
|
||||
}
|
||||
class Wbase_claim_time_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_claim_time";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_claim_time"
|
||||
, "( claim_id integer NOT NULL"
|
||||
, ", time_val varchar(64) NOT NULL" // -04540000000-01-01T00:00:00Z
|
||||
, ", time_tz integer NOT NULL" // 0
|
||||
, ", time_before integer NOT NULL" // 0
|
||||
, ", time_after integer NOT NULL" // 0
|
||||
, ", time_precision integer NOT NULL" // 2; number of digits
|
||||
, ", time_model varchar(64) NOT NULL" // http:\/\/www.wikidata.org\/entity\/Q1985727
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {
|
||||
return new Db_idx_itm[] {
|
||||
Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_claim_time__main ON wdata_claim_time (claim_id);")
|
||||
};
|
||||
}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_claim_id, Fld_time_val, Fld_time_tz, Fld_time_before, Fld_time_after, Fld_time_precision, Fld_time_model};}
|
||||
public void Insert(Db_stmt stmt, int claim_id, byte[] time_val, int tz, int before, int after, int precision, byte[] model) {
|
||||
stmt.Clear()
|
||||
.Val_int(claim_id)
|
||||
.Val_bry_as_str(time_val)
|
||||
.Val_int(tz)
|
||||
.Val_int(before)
|
||||
.Val_int(after)
|
||||
.Val_int(precision)
|
||||
.Val_bry_as_str(model)
|
||||
.Exec_insert();
|
||||
}
|
||||
private static final String Fld_claim_id = "claim_id", Fld_time_val = "time_val", Fld_time_tz = "time_tz", Fld_time_before = "time_before", Fld_time_after = "time_after", Fld_time_precision = "time_precision", Fld_time_model = "time_model";
|
||||
}
|
||||
class Wbase_claim_geo_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_claim_geo";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_claim_geo"
|
||||
, "( claim_id integer NOT NULL"
|
||||
, ", geo_latitude double NOT NULL" // 41.590833333333
|
||||
, ", geo_longitude double NOT NULL" // -93.620833333333
|
||||
, ", geo_altitude varchar(255) NOT NULL" // null
|
||||
, ", geo_precision double NOT NULL" // 0.00027777777777778
|
||||
, ", geo_globe integer NOT NULL" // http:\/\/www.wikidata.org\/entity\/Q2
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {
|
||||
return new Db_idx_itm[]
|
||||
{ Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_claim_geo__main ON wdata_claim_geo (claim_id);")
|
||||
};
|
||||
}
|
||||
public void Insert(Db_stmt stmt, int claim_id, double latitude, double longitude, byte[] altitude, double precision, byte[] globe) {
|
||||
stmt.Clear()
|
||||
.Val_int(claim_id)
|
||||
.Val_double(latitude)
|
||||
.Val_double(longitude)
|
||||
.Val_bry_as_str(altitude)
|
||||
.Val_double(precision)
|
||||
.Val_bry_as_str(globe)
|
||||
.Exec_insert();
|
||||
}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_claim_id, Fld_geo_latitude, Fld_geo_longitude, Fld_geo_altitude, Fld_geo_precision, Fld_geo_globe};}
|
||||
private static final String Fld_claim_id = "claim_id", Fld_geo_latitude = "geo_latitude", Fld_geo_longitude = "geo_longitude", Fld_geo_altitude = "geo_altitude", Fld_geo_precision = "geo_precision", Fld_geo_globe = "geo_globe";
|
||||
}
|
||||
class Wdata_ref_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_ref";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_ref"
|
||||
, "( ref_id integer NOT NULL"
|
||||
, ", page_id integer NOT NULL"
|
||||
, ", prop_id integer NOT NULL" // 60; P60
|
||||
, ", val_tid smallint NOT NULL" // String;wikibase-entity-id;time;globecoordinate
|
||||
, ", entity_tid smallint NOT NULL" // null;item
|
||||
, ", entity_id integer NOT NULL" // null;123
|
||||
, ", val_text varchar(255) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {
|
||||
return new Db_idx_itm[] {
|
||||
Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_ref__main ON wdata_ref (page_id, prop_id, val_tid, entity_tid);")
|
||||
};
|
||||
}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_ref_id, Fld_page_id, Fld_prop_id, Fld_val_tid, Fld_entity_tid, Fld_entity_id, Fld_val_text};}
|
||||
private static final String Fld_ref_id = "ref_id", Fld_page_id = "page_id", Fld_prop_id = "prop_id", Fld_val_tid = "val_tid", Fld_entity_tid = "entity_tid", Fld_entity_id = "entity_id", Fld_val_text = "val_ext";
|
||||
}
|
||||
class Wdata_qual_tbl extends Wdata_tbl_base {
|
||||
@Override public String Tbl_name() {return "wdata_qual";}
|
||||
@Override public String Tbl_create_sql() {
|
||||
return String_.Concat_lines_nl
|
||||
( "CREATE TABLE IF NOT EXISTS wdata_qual"
|
||||
, "( qual_id integer NOT NULL"
|
||||
, ", page_id integer NOT NULL"
|
||||
, ", val_text varchar(4096) NOT NULL"
|
||||
, ");"
|
||||
);
|
||||
}
|
||||
@Override public Db_idx_itm[] Idx_ary() {
|
||||
return new Db_idx_itm[] {
|
||||
Db_idx_itm.sql_("CREATE INDEX IF NOT EXISTS wdata_qual__main ON wdata_ref (qual_id, page_id);")
|
||||
};
|
||||
}
|
||||
@Override public String[] Fld_ary() {return new String[] {Fld_qual_id, Fld_page_id, Fld_val_text};}
|
||||
public void Insert(Db_stmt stmt, int qual_id, int page_id, byte[] val_text) {
|
||||
stmt.Clear()
|
||||
.Val_int(qual_id)
|
||||
.Val_int(page_id)
|
||||
.Val_bry_as_str(val_text)
|
||||
.Exec_insert();
|
||||
}
|
||||
private static final String Fld_qual_id = "qual_id", Fld_page_id = "page_id", Fld_val_text = "val_text";
|
||||
}
|
||||
class Xob_wdata_db_visitor implements Wbase_claim_visitor {
|
||||
private final Wdata_wiki_mgr wdata_mgr; private byte[] lang_key;
|
||||
public Xob_wdata_db_visitor(Wdata_wiki_mgr wdata_mgr) {this.wdata_mgr = wdata_mgr;}
|
||||
public void Init(byte[] lang_key) {this.lang_key = lang_key;}
|
||||
public byte[] Rv() {return rv;} private byte[] rv;
|
||||
public void Visit_str(Wbase_claim_string itm) {rv = itm.Val_bry();}
|
||||
public void Visit_monolingualtext(Wbase_claim_monolingualtext itm) {rv = Bry_.Add_w_dlm(Byte_ascii.Pipe, itm.Lang(), itm.Text());}
|
||||
public void Visit_quantity(Wbase_claim_quantity itm) {rv = itm.Amount();}
|
||||
public void Visit_time(Wbase_claim_time itm) {rv = itm.Time();}
|
||||
public void Visit_globecoordinate(Wbase_claim_globecoordinate itm) {rv = Bry_.Add_w_dlm(Byte_ascii.Comma, itm.Lat(), itm.Lng());}
|
||||
public void Visit_system(Wbase_claim_value itm) {rv = Bry_.Empty;}
|
||||
public void Visit_entity(Wbase_claim_entity itm) {
|
||||
Wdata_doc entity_doc = wdata_mgr.Doc_mgr.Get_by_xid_or_null(itm.Page_ttl_db());
|
||||
if (entity_doc != null) {
|
||||
rv = entity_doc.Get_label_bry_or_null(lang_key);
|
||||
}
|
||||
if (rv == null) // can be null if entity_doc is null or if label is null;
|
||||
rv = Bry_.Empty;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
Copyright (C) 2012-2020 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
@ -13,82 +13,102 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.wbases.imports; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.dbs.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.enums.*; import gplx.xowa.xtns.wbases.parsers.*; import gplx.xowa.xtns.wbases.dbs.*;
|
||||
import gplx.langs.jsons.*;
|
||||
public class Xob_wdata_pid extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk {
|
||||
private Db_conn conn;
|
||||
private Wbase_pid_tbl tbl__pid;
|
||||
private Xowb_prop_tbl tbl__prop;
|
||||
private Json_parser jdoc_parser;
|
||||
private final Ordered_hash datatype_hash = Ordered_hash_.New_bry();
|
||||
public Xob_wdata_pid(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
}
|
||||
public Xob_wdata_pid Ctor(Xob_bldr bldr, Xowe_wiki wiki) {
|
||||
this.Cmd_ctor(bldr, wiki);
|
||||
this.jdoc_parser = bldr.App().Wiki_mgr().Wdata_mgr().Jdoc_parser();
|
||||
return this;
|
||||
}
|
||||
public String Page_wkr__key() {return gplx.xowa.bldrs.Xob_cmd_keys.Key_wbase_pid;}
|
||||
public void Page_wkr__bgn() {this.Pid__bgn();}
|
||||
public void Page_wkr__run(Xowd_page_itm page) {
|
||||
if (page.Ns_id() != Wdata_wiki_mgr.Ns_property) return;
|
||||
|
||||
Json_doc jdoc = jdoc_parser.Parse(page.Text());
|
||||
if (jdoc == null) {
|
||||
bldr.Usr_dlg().Warn_many(GRP_KEY, "json.invalid", "json is invalid: ns=~{0} id=~{1}", page.Ns_id(), String_.new_u8(page.Ttl_page_db()));
|
||||
return;
|
||||
}
|
||||
Pid__run(jdoc);
|
||||
}
|
||||
public void Page_wkr__run_cleanup() {}
|
||||
public void Page_wkr__end() {this.Pid__end();}
|
||||
public void Pid__bgn() {
|
||||
if (conn == null) // conn will be null unless test
|
||||
conn = wiki.Data__core_mgr().Db__wbase().Conn();
|
||||
|
||||
// init datatype_hash
|
||||
Wbase_enum_hash enum_hash = Wbase_claim_type_.Reg;
|
||||
byte len = (byte)enum_hash.Len();
|
||||
for (byte i = 0; i < len; i++) {
|
||||
Wbase_claim_type claim_type = (Wbase_claim_type)enum_hash.Get_itm_or(i, null);
|
||||
datatype_hash.Add(Bry_.new_u8(claim_type.Key_for_scrib()), claim_type);
|
||||
}
|
||||
|
||||
// init wbase_pid
|
||||
tbl__pid = Wbase_pid_tbl.New_make(conn);
|
||||
tbl__pid.Create_tbl();
|
||||
tbl__pid.Insert_bgn();
|
||||
|
||||
// init wbase_prop
|
||||
tbl__prop = new Xowb_prop_tbl(conn);
|
||||
tbl__prop.Create_tbl();
|
||||
tbl__prop.Insert_bgn();
|
||||
}
|
||||
public void Pid__run(Json_doc jdoc) {
|
||||
Wdata_doc_parser wdoc_parser = app.Wiki_mgr().Wdata_mgr().Wdoc_parser(jdoc);
|
||||
byte[] pid = wdoc_parser.Parse_qid(jdoc);
|
||||
|
||||
// add datatype
|
||||
byte[] datatype = jdoc.Root_nde().Get_as_bry(Wdata_dict_mainsnak.Itm__datatype.Key_str());
|
||||
Wbase_claim_type claim_type = (Wbase_claim_type)datatype_hash.Get_by_or_fail(datatype);
|
||||
tbl__prop.Insert_cmd_by_batch(pid, claim_type.Tid());
|
||||
|
||||
// add langs
|
||||
Ordered_hash list = wdoc_parser.Parse_langvals(pid, jdoc, Bool_.Y);
|
||||
int len = list.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Wdata_langtext_itm label = (Wdata_langtext_itm)list.Get_at(i);
|
||||
tbl__pid.Insert_cmd_by_batch(label.Lang(), label.Text(), pid);
|
||||
}
|
||||
}
|
||||
public void Pid__end() {
|
||||
tbl__pid.Insert_end();
|
||||
tbl__pid.Create_idx();
|
||||
tbl__prop.Insert_end();
|
||||
}
|
||||
private static final String GRP_KEY = "xowa.wdata.pid_wkr";
|
||||
}
|
||||
package gplx.xowa.xtns.wbases.imports;
|
||||
|
||||
import gplx.Bool_;
|
||||
import gplx.Bry_;
|
||||
import gplx.Gfo_invk;
|
||||
import gplx.Ordered_hash;
|
||||
import gplx.Ordered_hash_;
|
||||
import gplx.String_;
|
||||
import gplx.dbs.Db_conn;
|
||||
import gplx.langs.jsons.Json_doc;
|
||||
import gplx.langs.jsons.Json_parser;
|
||||
import gplx.xowa.Xowe_wiki;
|
||||
import gplx.xowa.bldrs.Xob_bldr;
|
||||
import gplx.xowa.bldrs.wkrs.Xob_itm_dump_base;
|
||||
import gplx.xowa.bldrs.wkrs.Xob_page_wkr;
|
||||
import gplx.xowa.wikis.data.tbls.Xowd_page_itm;
|
||||
import gplx.xowa.xtns.wbases.Wdata_wiki_mgr;
|
||||
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_type;
|
||||
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_type_;
|
||||
import gplx.xowa.xtns.wbases.claims.enums.Wbase_enum_hash;
|
||||
import gplx.xowa.xtns.wbases.core.Wdata_dict_mainsnak;
|
||||
import gplx.xowa.xtns.wbases.core.Wdata_langtext_itm;
|
||||
import gplx.xowa.xtns.wbases.dbs.Wbase_pid_tbl;
|
||||
import gplx.xowa.xtns.wbases.dbs.Xowb_prop_tbl;
|
||||
import gplx.xowa.xtns.wbases.parsers.Wdata_doc_parser;
|
||||
|
||||
public class Xob_wdata_pid extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk {
|
||||
private Db_conn conn;
|
||||
private Wbase_pid_tbl tbl__pid;
|
||||
private Xowb_prop_tbl tbl__prop;
|
||||
private final Json_parser jdoc_parser = new Json_parser();
|
||||
private final Ordered_hash datatype_hash = Ordered_hash_.New_bry();
|
||||
public Xob_wdata_pid(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
}
|
||||
public Xob_wdata_pid Ctor(Xob_bldr bldr, Xowe_wiki wiki) {
|
||||
this.Cmd_ctor(bldr, wiki);
|
||||
return this;
|
||||
}
|
||||
public String Page_wkr__key() {return gplx.xowa.bldrs.Xob_cmd_keys.Key_wbase_pid;}
|
||||
public void Page_wkr__bgn() {this.Pid__bgn();}
|
||||
public void Page_wkr__run(Xowd_page_itm page) {
|
||||
if (page.Ns_id() != Wdata_wiki_mgr.Ns_property) return;
|
||||
|
||||
Json_doc jdoc = jdoc_parser.Parse(page.Text());
|
||||
if (jdoc == null) {
|
||||
bldr.Usr_dlg().Warn_many(GRP_KEY, "json.invalid", "json is invalid: ns=~{0} id=~{1}", page.Ns_id(), String_.new_u8(page.Ttl_page_db()));
|
||||
return;
|
||||
}
|
||||
Pid__run(jdoc);
|
||||
}
|
||||
public void Page_wkr__run_cleanup() {}
|
||||
public void Page_wkr__end() {this.Pid__end();}
|
||||
public void Pid__bgn() {
|
||||
if (conn == null) // conn will be null unless test
|
||||
conn = wiki.Data__core_mgr().Db__wbase().Conn();
|
||||
|
||||
// init datatype_hash
|
||||
Wbase_enum_hash enum_hash = Wbase_claim_type_.Reg;
|
||||
byte len = (byte)enum_hash.Len();
|
||||
for (byte i = 0; i < len; i++) {
|
||||
Wbase_claim_type claim_type = (Wbase_claim_type)enum_hash.Get_itm_or(i, null);
|
||||
datatype_hash.Add(Bry_.new_u8(claim_type.Key_for_scrib()), claim_type);
|
||||
}
|
||||
|
||||
// init wbase_pid
|
||||
tbl__pid = Wbase_pid_tbl.New_make(conn);
|
||||
tbl__pid.Create_tbl();
|
||||
tbl__pid.Insert_bgn();
|
||||
|
||||
// init wbase_prop
|
||||
tbl__prop = new Xowb_prop_tbl(conn);
|
||||
tbl__prop.Create_tbl();
|
||||
tbl__prop.Insert_bgn();
|
||||
}
|
||||
public void Pid__run(Json_doc jdoc) {
|
||||
Wdata_doc_parser wdoc_parser = app.Wiki_mgr().Wdata_mgr().Wdoc_parser(jdoc);
|
||||
byte[] pid = wdoc_parser.Parse_qid(jdoc);
|
||||
|
||||
// add datatype
|
||||
byte[] datatype = jdoc.Root_nde().Get_as_bry(Wdata_dict_mainsnak.Itm__datatype.Key_str());
|
||||
Wbase_claim_type claim_type = (Wbase_claim_type)datatype_hash.Get_by_or_fail(datatype);
|
||||
tbl__prop.Insert_cmd_by_batch(pid, claim_type.Tid());
|
||||
|
||||
// add langs
|
||||
Ordered_hash list = wdoc_parser.Parse_langvals(pid, jdoc, Bool_.Y);
|
||||
int len = list.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Wdata_langtext_itm label = (Wdata_langtext_itm)list.Get_at(i);
|
||||
tbl__pid.Insert_cmd_by_batch(label.Lang(), label.Text(), pid);
|
||||
}
|
||||
}
|
||||
public void Pid__end() {
|
||||
tbl__pid.Insert_end();
|
||||
tbl__pid.Create_idx();
|
||||
tbl__prop.Insert_end();
|
||||
}
|
||||
private static final String GRP_KEY = "xowa.wdata.pid_wkr";
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
Copyright (C) 2012-2020 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
@ -13,74 +13,93 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.wbases.imports; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
|
||||
import gplx.langs.jsons.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.dbs.*; import gplx.xowa.wikis.dbs.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.dbs.*; import gplx.xowa.xtns.wbases.parsers.*;
|
||||
public class Xob_wdata_qid extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk {
|
||||
private Db_conn conn;
|
||||
private Wbase_qid_tbl tbl;
|
||||
private final Object thread_lock = new Object();
|
||||
private Json_parser parser; private Xob_wbase_ns_parser ns_parser; private final Xob_wbase_ns_parser_rslt ns_parser_rslt = new Xob_wbase_ns_parser_rslt();
|
||||
public Xob_wdata_qid(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
}
|
||||
public String Page_wkr__key() {return gplx.xowa.bldrs.Xob_cmd_keys.Key_wbase_qid;}
|
||||
public Xob_wdata_qid Ctor(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki); return this;}
|
||||
public void Page_wkr__bgn() {
|
||||
this.parser = bldr.App().Wiki_mgr().Wdata_mgr().Jdoc_parser();
|
||||
this.ns_parser = new Xob_wbase_ns_parser(bldr.App().Fsys_mgr().Cfg_site_meta_fil());
|
||||
this.Qid__bgn();
|
||||
}
|
||||
public void Page_wkr__run(Xowd_page_itm page) {
|
||||
if (page.Ns_id() != Xow_ns_.Tid__main) return; // qid pages are only in the Main Srch_rslt_cbk
|
||||
Json_doc jdoc = parser.Parse(page.Text());
|
||||
if (jdoc == null) {bldr.Usr_dlg().Warn_many("", "", "json is invalid: ns=~{0} id=~{1}", page.Ns_id(), String_.new_u8(page.Ttl_page_db())); return;}
|
||||
this.Qid__run(jdoc);
|
||||
}
|
||||
public void Page_wkr__run_cleanup() {}
|
||||
public void Page_wkr__end() {this.Qid__end();}
|
||||
public void Qid__bgn() {
|
||||
if (conn == null) {
|
||||
Xow_db_file wbase_db = Make_wbase_db(wiki.Db_mgr_as_sql().Core_data_mgr());
|
||||
conn = wbase_db.Conn();
|
||||
}
|
||||
tbl = Wbase_qid_tbl.New_make(conn, false);
|
||||
tbl.Create_tbl();
|
||||
tbl.Insert_bgn();
|
||||
}
|
||||
public void Qid__run(Json_doc jdoc) {
|
||||
synchronized (thread_lock) {
|
||||
Wdata_doc_parser wdoc_parser = app.Wiki_mgr().Wdata_mgr().Wdoc_parser(jdoc);
|
||||
byte[] qid = wdoc_parser.Parse_qid(jdoc);
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.Reset(255);
|
||||
Ordered_hash sitelinks = wdoc_parser.Parse_sitelinks(qid, jdoc);
|
||||
int sitelinks_len = sitelinks.Count(); if (sitelinks_len == 0) return; // no subs; return;
|
||||
for (int i = 0; i < sitelinks_len; i++) { // iterate sitelinks
|
||||
Wdata_sitelink_itm sitelink = (Wdata_sitelink_itm)sitelinks.Get_at(i);
|
||||
byte[] sitelink_site = sitelink.Site(), sitelink_ttl = sitelink.Name();
|
||||
ns_parser.Find(ns_parser_rslt, sitelink_site, sitelink_ttl);
|
||||
int sitelink_ns = ns_parser_rslt.Ns_id();
|
||||
if (sitelink_ns != Xow_ns_.Tid__main) // ttl not in main; chop off ns portion; EX:Aide:French_title -> French_title
|
||||
sitelink_ttl = Bry_.Mid(sitelink_ttl, ns_parser_rslt.Ttl_bgn(), sitelink_ttl.length);
|
||||
sitelink_ttl = wiki.Lang().Case_mgr().Case_build_1st_upper(tmp_bfr, sitelink_ttl, 0, sitelink_ttl.length);
|
||||
tbl.Insert_cmd_by_batch(sitelink.Site(), sitelink_ns, Xoa_ttl.Replace_spaces(sitelink_ttl), qid); // NOTE: always convert spaces to underscores; EX: "A B" -> "A_B" DATE:2015-04-21
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Qid__end() {
|
||||
tbl.Insert_end();
|
||||
tbl.Create_idx();
|
||||
}
|
||||
public static Xow_db_file Make_wbase_db(Xow_db_mgr db_mgr) {
|
||||
boolean db_is_all_or_few = db_mgr.Props().Layout_text().Tid_is_all_or_few();
|
||||
Xow_db_file wbase_db = db_is_all_or_few
|
||||
? db_mgr.Db__core()
|
||||
: db_mgr.Dbs__make_by_tid(Xow_db_file_.Tid__wbase);
|
||||
if (db_is_all_or_few)
|
||||
db_mgr.Db__wbase_(wbase_db);
|
||||
return wbase_db;
|
||||
}
|
||||
}
|
||||
package gplx.xowa.xtns.wbases.imports;
|
||||
|
||||
import gplx.Bry_;
|
||||
import gplx.Bry_bfr;
|
||||
import gplx.Bry_bfr_;
|
||||
import gplx.Gfo_invk;
|
||||
import gplx.Ordered_hash;
|
||||
import gplx.String_;
|
||||
import gplx.dbs.Db_conn;
|
||||
import gplx.langs.jsons.Json_doc;
|
||||
import gplx.langs.jsons.Json_parser;
|
||||
import gplx.xowa.Xoa_ttl;
|
||||
import gplx.xowa.Xowe_wiki;
|
||||
import gplx.xowa.bldrs.Xob_bldr;
|
||||
import gplx.xowa.bldrs.wkrs.Xob_itm_dump_base;
|
||||
import gplx.xowa.bldrs.wkrs.Xob_page_wkr;
|
||||
import gplx.xowa.wikis.data.Xow_db_file;
|
||||
import gplx.xowa.wikis.data.Xow_db_file_;
|
||||
import gplx.xowa.wikis.data.Xow_db_mgr;
|
||||
import gplx.xowa.wikis.data.tbls.Xowd_page_itm;
|
||||
import gplx.xowa.wikis.nss.Xow_ns_;
|
||||
import gplx.xowa.xtns.wbases.core.Wdata_sitelink_itm;
|
||||
import gplx.xowa.xtns.wbases.dbs.Wbase_qid_tbl;
|
||||
import gplx.xowa.xtns.wbases.parsers.Wdata_doc_parser;
|
||||
|
||||
public class Xob_wdata_qid extends Xob_itm_dump_base implements Xob_page_wkr, Gfo_invk {
|
||||
private Db_conn conn;
|
||||
private Wbase_qid_tbl tbl;
|
||||
private final Object thread_lock = new Object();
|
||||
private final Json_parser parser = new Json_parser();
|
||||
private Xob_wbase_ns_parser ns_parser; private final Xob_wbase_ns_parser_rslt ns_parser_rslt = new Xob_wbase_ns_parser_rslt();
|
||||
public Xob_wdata_qid(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
}
|
||||
public String Page_wkr__key() {return gplx.xowa.bldrs.Xob_cmd_keys.Key_wbase_qid;}
|
||||
public Xob_wdata_qid Ctor(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki); return this;}
|
||||
public void Page_wkr__bgn() {
|
||||
this.ns_parser = new Xob_wbase_ns_parser(bldr.App().Fsys_mgr().Cfg_site_meta_fil());
|
||||
this.Qid__bgn();
|
||||
}
|
||||
public void Page_wkr__run(Xowd_page_itm page) {
|
||||
if (page.Ns_id() != Xow_ns_.Tid__main) return; // qid pages are only in the Main Srch_rslt_cbk
|
||||
Json_doc jdoc = parser.Parse(page.Text());
|
||||
if (jdoc == null) {bldr.Usr_dlg().Warn_many("", "", "json is invalid: ns=~{0} id=~{1}", page.Ns_id(), String_.new_u8(page.Ttl_page_db())); return;}
|
||||
this.Qid__run(jdoc);
|
||||
}
|
||||
public void Page_wkr__run_cleanup() {}
|
||||
public void Page_wkr__end() {this.Qid__end();}
|
||||
public void Qid__bgn() {
|
||||
if (conn == null) {
|
||||
Xow_db_file wbase_db = Make_wbase_db(wiki.Db_mgr_as_sql().Core_data_mgr());
|
||||
conn = wbase_db.Conn();
|
||||
}
|
||||
tbl = Wbase_qid_tbl.New_make(conn, false);
|
||||
tbl.Create_tbl();
|
||||
tbl.Insert_bgn();
|
||||
}
|
||||
public void Qid__run(Json_doc jdoc) {
|
||||
synchronized (thread_lock) {
|
||||
Wdata_doc_parser wdoc_parser = app.Wiki_mgr().Wdata_mgr().Wdoc_parser(jdoc);
|
||||
byte[] qid = wdoc_parser.Parse_qid(jdoc);
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.Reset(255);
|
||||
Ordered_hash sitelinks = wdoc_parser.Parse_sitelinks(qid, jdoc);
|
||||
int sitelinks_len = sitelinks.Count(); if (sitelinks_len == 0) return; // no subs; return;
|
||||
for (int i = 0; i < sitelinks_len; i++) { // iterate sitelinks
|
||||
Wdata_sitelink_itm sitelink = (Wdata_sitelink_itm)sitelinks.Get_at(i);
|
||||
byte[] sitelink_site = sitelink.Site(), sitelink_ttl = sitelink.Name();
|
||||
ns_parser.Find(ns_parser_rslt, sitelink_site, sitelink_ttl);
|
||||
int sitelink_ns = ns_parser_rslt.Ns_id();
|
||||
if (sitelink_ns != Xow_ns_.Tid__main) // ttl not in main; chop off ns portion; EX:Aide:French_title -> French_title
|
||||
sitelink_ttl = Bry_.Mid(sitelink_ttl, ns_parser_rslt.Ttl_bgn(), sitelink_ttl.length);
|
||||
sitelink_ttl = wiki.Lang().Case_mgr().Case_build_1st_upper(tmp_bfr, sitelink_ttl, 0, sitelink_ttl.length);
|
||||
tbl.Insert_cmd_by_batch(sitelink.Site(), sitelink_ns, Xoa_ttl.Replace_spaces(sitelink_ttl), qid); // NOTE: always convert spaces to underscores; EX: "A B" -> "A_B" DATE:2015-04-21
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Qid__end() {
|
||||
tbl.Insert_end();
|
||||
tbl.Create_idx();
|
||||
}
|
||||
public static Xow_db_file Make_wbase_db(Xow_db_mgr db_mgr) {
|
||||
boolean db_is_all_or_few = db_mgr.Props().Layout_text().Tid_is_all_or_few();
|
||||
Xow_db_file wbase_db = db_is_all_or_few
|
||||
? db_mgr.Db__core()
|
||||
: db_mgr.Dbs__make_by_tid(Xow_db_file_.Tid__wbase);
|
||||
if (db_is_all_or_few)
|
||||
db_mgr.Db__wbase_(wbase_db);
|
||||
return wbase_db;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
Copyright (C) 2012-2020 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
@ -13,90 +13,114 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.wbases.imports.json; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.imports.*;
|
||||
import gplx.core.ios.*;
|
||||
import gplx.langs.jsons.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.cmds.texts.sqls.*; import gplx.xowa.apps.apis.xowa.bldrs.imports.*;
|
||||
import gplx.xowa.wikis.*; import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.parsers.*;
|
||||
class Xowb_json_dump_db {
|
||||
private final Xoae_app app; private final Gfo_usr_dlg usr_dlg; private final Xowe_wiki wiki; private final Xob_bldr bldr;
|
||||
private final Json_parser json_parser;
|
||||
private final Xob_wdata_pid pid_cmd; private final Xob_wdata_qid qid_cmd;
|
||||
private Xow_ns_mgr ns_mgr; private Xow_db_mgr db_mgr;
|
||||
private Xowd_page_tbl page_tbl; private Xob_ns_to_db_mgr ns_to_db_mgr;
|
||||
private Io_stream_zip_mgr text_zip_mgr; private byte text_zip_tid;
|
||||
private DateAdp page_modified_on;
|
||||
private int page_id = 0, page_count_main = 0;
|
||||
public Xowb_json_dump_db(Xob_bldr bldr, Xowe_wiki wiki) {
|
||||
this.app = bldr.App(); this.usr_dlg = app.Usr_dlg(); this.wiki = wiki; this.bldr = bldr;
|
||||
this.json_parser = bldr.App().Wiki_mgr().Wdata_mgr().Jdoc_parser();
|
||||
this.ns_mgr = wiki.Ns_mgr();
|
||||
this.pid_cmd = new Xob_wdata_pid(wiki.Data__core_mgr().Db__wbase().Conn());
|
||||
this.qid_cmd = new Xob_wdata_qid(wiki.Data__core_mgr().Db__wbase().Conn());
|
||||
}
|
||||
public void Parse_all_bgn(long src_fil_len, String src_fil_name) {
|
||||
// load wiki
|
||||
Xowe_wiki_.Create(wiki, src_fil_len, src_fil_name);
|
||||
this.db_mgr = wiki.Data__core_mgr();
|
||||
this.page_tbl = db_mgr.Tbl__page();
|
||||
pid_cmd.Cmd_ctor(bldr, wiki); qid_cmd.Cmd_ctor(bldr, wiki);
|
||||
|
||||
// create ns_mgr
|
||||
wiki.Ns_mgr().Add_defaults();
|
||||
wiki.Ns_mgr().Add_new(Wdata_wiki_mgr.Ns_property, Wdata_wiki_mgr.Ns_property_name);
|
||||
wiki.Ns_mgr().Init();
|
||||
|
||||
// init ns_map
|
||||
this.ns_to_db_mgr = new Xob_ns_to_db_mgr(new Xob_ns_to_db_wkr__text(), db_mgr, Xobldr_cfg.Max_size__text(app));
|
||||
byte[] ns_file_map = Xobldr_cfg.New_ns_file_map(app, src_fil_len);
|
||||
Xob_ns_file_itm.Init_ns_bldr_data(Xow_db_file_.Tid__text, wiki.Ns_mgr(), ns_file_map);
|
||||
|
||||
// start import
|
||||
this.text_zip_mgr = wiki.Utl__zip_mgr();
|
||||
this.text_zip_tid = Xobldr_cfg.Zip_mode__text(app);
|
||||
this.page_modified_on = Datetime_now.Get();
|
||||
page_tbl.Insert_bgn();
|
||||
qid_cmd.Page_wkr__bgn();
|
||||
pid_cmd.Pid__bgn();
|
||||
}
|
||||
public void Parse_doc(byte[] json_bry) {
|
||||
// parse to jdoc
|
||||
Json_doc jdoc = json_parser.Parse(json_bry);
|
||||
if (jdoc == null) {usr_dlg.Warn_many("", "", "wbase.json_dump:json is invalid: json=~{0}", json_bry); return;}
|
||||
|
||||
// extract xid
|
||||
byte[] id = jdoc.Get_val_as_bry_or(Bry__id_key, null);
|
||||
if (id == null) {usr_dlg.Warn_many("", "", "wbase.json_dump:id is invalid: json=~{0}", json_bry); return;}
|
||||
boolean jdoc_is_qid = Bry_.Has_at_bgn(id, Byte_ascii.Ltr_Q, 0);
|
||||
Xow_ns ns = jdoc_is_qid ? ns_mgr.Ns_main() : ns_mgr.Ids_get_or_null(Wdata_wiki_mgr.Ns_property);
|
||||
|
||||
// create page entry
|
||||
int random_int = ns.Count() + 1; ns.Count_(random_int);
|
||||
byte[] json_zip = text_zip_mgr.Zip(text_zip_tid, json_bry);
|
||||
Xow_db_file text_db = ns_to_db_mgr.Get_by_ns(ns.Bldr_data(), json_zip.length);
|
||||
db_mgr.Create_page(page_tbl, text_db.Tbl__text(), ++page_id, ns.Id(), id, Bool_.N, page_modified_on, json_zip, json_bry.length, random_int, text_db.Id(), -1);
|
||||
|
||||
// insert text
|
||||
if (jdoc_is_qid) {
|
||||
qid_cmd.Qid__run(jdoc);
|
||||
++page_count_main;
|
||||
}
|
||||
else
|
||||
pid_cmd.Pid__run(jdoc);
|
||||
}
|
||||
public void Parse_all_end() {
|
||||
page_tbl.Insert_end();
|
||||
page_tbl.Create_idx();
|
||||
qid_cmd.Qid__end();
|
||||
pid_cmd.Pid__end();
|
||||
ns_to_db_mgr.Rls_all();
|
||||
|
||||
// cleanup core
|
||||
Xow_db_file db_core = db_mgr.Db__core();
|
||||
db_core.Tbl__site_stats().Update(page_count_main, page_id, ns_mgr.Ns_file().Count()); // save page stats
|
||||
db_core.Tbl__ns().Insert(ns_mgr); // save ns
|
||||
db_mgr.Tbl__cfg().Insert_str(Xowd_cfg_key_.Grp__wiki_init, Xowd_cfg_key_.Key__init__modified_latest, page_modified_on.XtoStr_fmt(DateAdp_.Fmt_iso8561_date_time));
|
||||
}
|
||||
private static final byte[] Bry__id_key = Bry_.new_a7("id");
|
||||
}
|
||||
package gplx.xowa.xtns.wbases.imports.json;
|
||||
|
||||
import gplx.Bool_;
|
||||
import gplx.Bry_;
|
||||
import gplx.Byte_ascii;
|
||||
import gplx.DateAdp;
|
||||
import gplx.DateAdp_;
|
||||
import gplx.Datetime_now;
|
||||
import gplx.Gfo_usr_dlg;
|
||||
import gplx.core.ios.Io_stream_zip_mgr;
|
||||
import gplx.langs.jsons.Json_doc;
|
||||
import gplx.langs.jsons.Json_parser;
|
||||
import gplx.xowa.Xoae_app;
|
||||
import gplx.xowa.Xowe_wiki;
|
||||
import gplx.xowa.Xowe_wiki_;
|
||||
import gplx.xowa.bldrs.Xob_bldr;
|
||||
import gplx.xowa.bldrs.Xob_ns_to_db_mgr;
|
||||
import gplx.xowa.bldrs.Xobldr_cfg;
|
||||
import gplx.xowa.bldrs.cmds.Xob_ns_file_itm;
|
||||
import gplx.xowa.bldrs.cmds.texts.sqls.Xob_ns_to_db_wkr__text;
|
||||
import gplx.xowa.wikis.data.Xow_db_file;
|
||||
import gplx.xowa.wikis.data.Xow_db_file_;
|
||||
import gplx.xowa.wikis.data.Xow_db_mgr;
|
||||
import gplx.xowa.wikis.data.Xowd_cfg_key_;
|
||||
import gplx.xowa.wikis.data.tbls.Xowd_page_tbl;
|
||||
import gplx.xowa.wikis.nss.Xow_ns;
|
||||
import gplx.xowa.wikis.nss.Xow_ns_mgr;
|
||||
import gplx.xowa.xtns.wbases.Wdata_wiki_mgr;
|
||||
import gplx.xowa.xtns.wbases.imports.Xob_wdata_pid;
|
||||
import gplx.xowa.xtns.wbases.imports.Xob_wdata_qid;
|
||||
|
||||
class Xowb_json_dump_db {
|
||||
private final Xoae_app app; private final Gfo_usr_dlg usr_dlg; private final Xowe_wiki wiki; private final Xob_bldr bldr;
|
||||
private final Json_parser json_parser = new Json_parser();
|
||||
private final Xob_wdata_pid pid_cmd; private final Xob_wdata_qid qid_cmd;
|
||||
private Xow_ns_mgr ns_mgr; private Xow_db_mgr db_mgr;
|
||||
private Xowd_page_tbl page_tbl; private Xob_ns_to_db_mgr ns_to_db_mgr;
|
||||
private Io_stream_zip_mgr text_zip_mgr; private byte text_zip_tid;
|
||||
private DateAdp page_modified_on;
|
||||
private int page_id = 0, page_count_main = 0;
|
||||
public Xowb_json_dump_db(Xob_bldr bldr, Xowe_wiki wiki) {
|
||||
this.app = bldr.App(); this.usr_dlg = app.Usr_dlg(); this.wiki = wiki; this.bldr = bldr;
|
||||
this.ns_mgr = wiki.Ns_mgr();
|
||||
this.pid_cmd = new Xob_wdata_pid(wiki.Data__core_mgr().Db__wbase().Conn());
|
||||
this.qid_cmd = new Xob_wdata_qid(wiki.Data__core_mgr().Db__wbase().Conn());
|
||||
}
|
||||
public void Parse_all_bgn(long src_fil_len, String src_fil_name) {
|
||||
// load wiki
|
||||
Xowe_wiki_.Create(wiki, src_fil_len, src_fil_name);
|
||||
this.db_mgr = wiki.Data__core_mgr();
|
||||
this.page_tbl = db_mgr.Tbl__page();
|
||||
pid_cmd.Cmd_ctor(bldr, wiki); qid_cmd.Cmd_ctor(bldr, wiki);
|
||||
|
||||
// create ns_mgr
|
||||
wiki.Ns_mgr().Add_defaults();
|
||||
wiki.Ns_mgr().Add_new(Wdata_wiki_mgr.Ns_property, Wdata_wiki_mgr.Ns_property_name);
|
||||
wiki.Ns_mgr().Init();
|
||||
|
||||
// init ns_map
|
||||
this.ns_to_db_mgr = new Xob_ns_to_db_mgr(new Xob_ns_to_db_wkr__text(), db_mgr, Xobldr_cfg.Max_size__text(app));
|
||||
byte[] ns_file_map = Xobldr_cfg.New_ns_file_map(app, src_fil_len);
|
||||
Xob_ns_file_itm.Init_ns_bldr_data(Xow_db_file_.Tid__text, wiki.Ns_mgr(), ns_file_map);
|
||||
|
||||
// start import
|
||||
this.text_zip_mgr = wiki.Utl__zip_mgr();
|
||||
this.text_zip_tid = Xobldr_cfg.Zip_mode__text(app);
|
||||
this.page_modified_on = Datetime_now.Get();
|
||||
page_tbl.Insert_bgn();
|
||||
qid_cmd.Page_wkr__bgn();
|
||||
pid_cmd.Pid__bgn();
|
||||
}
|
||||
public void Parse_doc(byte[] json_bry) {
|
||||
// parse to jdoc
|
||||
Json_doc jdoc = json_parser.Parse(json_bry);
|
||||
if (jdoc == null) {usr_dlg.Warn_many("", "", "wbase.json_dump:json is invalid: json=~{0}", json_bry); return;}
|
||||
|
||||
// extract xid
|
||||
byte[] id = jdoc.Get_val_as_bry_or(Bry__id_key, null);
|
||||
if (id == null) {usr_dlg.Warn_many("", "", "wbase.json_dump:id is invalid: json=~{0}", json_bry); return;}
|
||||
boolean jdoc_is_qid = Bry_.Has_at_bgn(id, Byte_ascii.Ltr_Q, 0);
|
||||
Xow_ns ns = jdoc_is_qid ? ns_mgr.Ns_main() : ns_mgr.Ids_get_or_null(Wdata_wiki_mgr.Ns_property);
|
||||
|
||||
// create page entry
|
||||
int random_int = ns.Count() + 1; ns.Count_(random_int);
|
||||
byte[] json_zip = text_zip_mgr.Zip(text_zip_tid, json_bry);
|
||||
Xow_db_file text_db = ns_to_db_mgr.Get_by_ns(ns.Bldr_data(), json_zip.length);
|
||||
db_mgr.Create_page(page_tbl, text_db.Tbl__text(), ++page_id, ns.Id(), id, Bool_.N, page_modified_on, json_zip, json_bry.length, random_int, text_db.Id(), -1);
|
||||
|
||||
// insert text
|
||||
if (jdoc_is_qid) {
|
||||
qid_cmd.Qid__run(jdoc);
|
||||
++page_count_main;
|
||||
}
|
||||
else
|
||||
pid_cmd.Pid__run(jdoc);
|
||||
}
|
||||
public void Parse_all_end() {
|
||||
page_tbl.Insert_end();
|
||||
page_tbl.Create_idx();
|
||||
qid_cmd.Qid__end();
|
||||
pid_cmd.Pid__end();
|
||||
ns_to_db_mgr.Rls_all();
|
||||
|
||||
// cleanup core
|
||||
Xow_db_file db_core = db_mgr.Db__core();
|
||||
db_core.Tbl__site_stats().Update(page_count_main, page_id, ns_mgr.Ns_file().Count()); // save page stats
|
||||
db_core.Tbl__ns().Insert(ns_mgr); // save ns
|
||||
db_mgr.Tbl__cfg().Insert_str(Xowd_cfg_key_.Grp__wiki_init, Xowd_cfg_key_.Key__init__modified_latest, page_modified_on.XtoStr_fmt(DateAdp_.Fmt_iso8561_date_time));
|
||||
}
|
||||
private static final byte[] Bry__id_key = Bry_.new_a7("id");
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
Copyright (C) 2012-2020 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
@ -13,147 +13,164 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.wbases.stores; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
|
||||
import gplx.core.logs.*; import gplx.core.primitives.*;
|
||||
import gplx.langs.jsons.*;
|
||||
import gplx.xowa.wikis.pages.*;
|
||||
import gplx.xowa.xtns.wbases.core.*;
|
||||
public class Wbase_doc_mgr {
|
||||
private final Wdata_wiki_mgr wbase_mgr;
|
||||
private final Wbase_qid_mgr qid_mgr;
|
||||
private Wbase_doc_cache doc_cache;
|
||||
private final Object thread_lock = new Object();
|
||||
private final Ordered_hash wbase_db_hash = Ordered_hash_.New_bry();
|
||||
private final Gfo_log_wtr wbase_db_log;
|
||||
public Wbase_doc_mgr(Wdata_wiki_mgr wbase_mgr, Wbase_qid_mgr qid_mgr) {
|
||||
this.wbase_mgr = wbase_mgr;
|
||||
this.qid_mgr = qid_mgr;
|
||||
this.doc_cache = new Wbase_doc_cache__hash();
|
||||
this.wbase_db_log = Gfo_log_wtr.New_dflt("wbase", "db_log_{0}.csv");
|
||||
}
|
||||
public void Enabled_(boolean v) {this.enabled = v;} private boolean enabled;
|
||||
public void Cache__init(String cache_type, long cache_max, long compress_size, long used_weight) {
|
||||
if (String_.Eq(cache_type, "null")) doc_cache = new Wbase_doc_cache__null();
|
||||
else if (String_.Eq(cache_type, "hash")) doc_cache = new Wbase_doc_cache__hash();
|
||||
else if (String_.Eq(cache_type, "mru" )) doc_cache = new Wbase_doc_cache__mru(cache_max, compress_size, used_weight);
|
||||
else throw Err_.new_unhandled_default(cache_type);
|
||||
}
|
||||
public void Cleanup() {
|
||||
doc_cache.Term();
|
||||
wbase_db_log__flush();
|
||||
}
|
||||
private void wbase_db_log__flush() {
|
||||
int len = wbase_db_hash.Len();
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Wbase_db_log_itm itm = (Wbase_db_log_itm)wbase_db_hash.Get_at(i);
|
||||
tmp_bfr.Add(itm.Ttl());
|
||||
tmp_bfr.Add_byte_pipe().Add_int_variable(itm.Count());
|
||||
tmp_bfr.Add_byte_pipe().Add_int_variable(itm.Elapsed());
|
||||
tmp_bfr.Add_byte_nl();
|
||||
wbase_db_log.Write(tmp_bfr);
|
||||
}
|
||||
wbase_db_log.Flush();
|
||||
}
|
||||
public void Clear() {
|
||||
synchronized (thread_lock) { // LOCK:app-level
|
||||
doc_cache.Clear();
|
||||
}
|
||||
}
|
||||
public Wdata_doc Get_by_ttl_or_null(Xowe_wiki wiki, Xoa_ttl ttl) { // "enwiki", "Earth" -> "Q2" wdoc
|
||||
byte[] qid_bry = qid_mgr.Get_qid_or_null(wiki, ttl); // EX: "enwiki", "Earth" -> "Q2"
|
||||
return qid_bry == null ? null : this.Get_by_exact_id_or_null(qid_bry);
|
||||
}
|
||||
public Wdata_doc Get_by_xid_or_null(byte[] xid) {return Get_by_loose_id_or_null(Wbase_pid.Prepend_property_if_needed(xid));}// scribunto passes either p1 or q1; convert p1 to "Property:p1"
|
||||
public Wdata_doc Get_by_loose_id_or_null(byte[] ttl_bry) {
|
||||
return Get_by_exact_id_or_null(ttl_bry);
|
||||
}
|
||||
public Wdata_doc Get_by_exact_id_or_null(byte[] ttl_bry) {// must correct case and ns; EX:"Q2" or "Property:P1"; not "q2" or "P2"
|
||||
// load from cache
|
||||
Wdata_doc rv = null;
|
||||
synchronized (thread_lock) {
|
||||
rv = doc_cache.Get_or_null(ttl_bry);
|
||||
if (rv == null) {
|
||||
// load from db
|
||||
rv = Load_wdoc_or_null(ttl_bry);
|
||||
if (rv == null) return null; // page not found
|
||||
Add(ttl_bry, rv);// NOTE: use ttl_bry, not rv.Qid; allows subsequent lookups to skip this redirect cycle
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private Wdata_doc Load_wdoc_or_null(byte[] ttl_bry) { // EX:"Q2" or "Property:P1"
|
||||
if (!enabled) return null;
|
||||
|
||||
// loggging
|
||||
Wbase_db_log_itm wbase_db_itm = (Wbase_db_log_itm)wbase_db_hash.Get_by(ttl_bry);
|
||||
if (wbase_db_itm == null) {
|
||||
wbase_db_itm = new Wbase_db_log_itm(ttl_bry);
|
||||
wbase_db_hash.Add(ttl_bry, wbase_db_itm);
|
||||
}
|
||||
long time_bgn = gplx.core.envs.System_.Ticks();
|
||||
|
||||
Wdata_doc rv = null;
|
||||
synchronized (thread_lock) { // LOCK:app-level; jdoc_parser; moved synchronized higher up; DATE:2016-09-03
|
||||
byte[] cur_ttl_bry = ttl_bry;
|
||||
int load_count = -1;
|
||||
while (load_count < 2) { // limit to 2 tries (i.e.: 1 redirect)
|
||||
// parse ttl; note that "q2" will get parsed to "Q2" b/c of ns casing
|
||||
Xoa_ttl cur_ttl = wbase_mgr.Wdata_wiki().Ttl_parse(cur_ttl_bry);
|
||||
if (cur_ttl == null) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "invalid wbase ttl: orig=~{0} cur=~{1}", ttl_bry, cur_ttl_bry);
|
||||
break;
|
||||
}
|
||||
|
||||
// get page
|
||||
Xoae_page page = wbase_mgr.Wdata_wiki().Data_mgr().Load_page_by_ttl(cur_ttl);
|
||||
if (!page.Db().Page().Exists()) break;
|
||||
|
||||
// parse jdoc
|
||||
Json_doc jdoc = wbase_mgr.Jdoc_parser().Parse(page.Db().Text().Text_bry());
|
||||
if (jdoc == null) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "invalid jdoc for ttl: orig=~{0} cur=~{1}", ttl_bry, cur_ttl_bry);
|
||||
break;
|
||||
}
|
||||
|
||||
// check for redirect; EX: {"entity":"Q22350516","redirect":"Q21006972"}; PAGE:fr.w:Tour_du_Táchira_2016; DATE:2016-08-13
|
||||
Json_nde jdoc_root = jdoc.Root_nde();
|
||||
byte[] redirect_ttl = jdoc_root.Get_as_bry_or(Bry__redirect, null);
|
||||
if (redirect_ttl != null) {
|
||||
cur_ttl_bry = redirect_ttl;
|
||||
load_count++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// is json doc, and not a redirect; return
|
||||
rv = new Wdata_doc(wbase_mgr, jdoc, cur_ttl_bry);
|
||||
break;
|
||||
}
|
||||
if (rv == null && load_count >= 2)
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "too many redirects for ttl: orig=~{0} cur=~{1}", ttl_bry, cur_ttl_bry);
|
||||
}
|
||||
|
||||
wbase_db_itm.Update(gplx.core.envs.System_.Ticks__elapsed_in_frac(time_bgn));
|
||||
return rv;
|
||||
}
|
||||
private static final byte[] Bry__redirect = Bry_.new_a7("redirect");
|
||||
|
||||
public void Add(byte[] full_db, Wdata_doc page) { // TEST:
|
||||
synchronized (thread_lock) { // LOCK:app-level
|
||||
if (doc_cache.Get_or_null(full_db) == null)
|
||||
doc_cache.Add(full_db, page);
|
||||
}
|
||||
}
|
||||
}
|
||||
class Wbase_db_log_itm {
|
||||
public Wbase_db_log_itm(byte[] ttl) {
|
||||
this.ttl = ttl;
|
||||
}
|
||||
public byte[] Ttl() {return ttl;} private final byte[] ttl;
|
||||
public int Count() {return count;} private int count;
|
||||
public int Elapsed() {return elapsed;} private int elapsed;
|
||||
public void Update(int elapsed_diff) {
|
||||
count++;
|
||||
this.elapsed += elapsed_diff;
|
||||
}
|
||||
}
|
||||
package gplx.xowa.xtns.wbases.stores;
|
||||
|
||||
import gplx.Bry_;
|
||||
import gplx.Bry_bfr;
|
||||
import gplx.Bry_bfr_;
|
||||
import gplx.Err_;
|
||||
import gplx.Gfo_usr_dlg_;
|
||||
import gplx.Ordered_hash;
|
||||
import gplx.Ordered_hash_;
|
||||
import gplx.String_;
|
||||
import gplx.core.logs.Gfo_log_wtr;
|
||||
import gplx.langs.jsons.Json_doc;
|
||||
import gplx.langs.jsons.Json_nde;
|
||||
import gplx.langs.jsons.Json_parser;
|
||||
import gplx.xowa.Xoa_ttl;
|
||||
import gplx.xowa.Xoae_page;
|
||||
import gplx.xowa.Xowe_wiki;
|
||||
import gplx.xowa.xtns.wbases.Wdata_doc;
|
||||
import gplx.xowa.xtns.wbases.Wdata_wiki_mgr;
|
||||
import gplx.xowa.xtns.wbases.core.Wbase_pid;
|
||||
|
||||
public class Wbase_doc_mgr {
|
||||
private final Wdata_wiki_mgr wbase_mgr;
|
||||
private final Wbase_qid_mgr qid_mgr;
|
||||
private Wbase_doc_cache doc_cache;
|
||||
private final Object thread_lock = new Object();
|
||||
private final Ordered_hash wbase_db_hash = Ordered_hash_.New_bry();
|
||||
private final Gfo_log_wtr wbase_db_log;
|
||||
private final Json_parser jsonParser = new Json_parser();
|
||||
public Wbase_doc_mgr(Wdata_wiki_mgr wbase_mgr, Wbase_qid_mgr qid_mgr) {
|
||||
this.wbase_mgr = wbase_mgr;
|
||||
this.qid_mgr = qid_mgr;
|
||||
this.doc_cache = new Wbase_doc_cache__hash();
|
||||
this.wbase_db_log = Gfo_log_wtr.New_dflt("wbase", "db_log_{0}.csv");
|
||||
}
|
||||
public void Enabled_(boolean v) {this.enabled = v;} private boolean enabled;
|
||||
public void Cache__init(String cache_type, long cache_max, long compress_size, long used_weight) {
|
||||
if (String_.Eq(cache_type, "null")) doc_cache = new Wbase_doc_cache__null();
|
||||
else if (String_.Eq(cache_type, "hash")) doc_cache = new Wbase_doc_cache__hash();
|
||||
else if (String_.Eq(cache_type, "mru" )) doc_cache = new Wbase_doc_cache__mru(cache_max, compress_size, used_weight);
|
||||
else throw Err_.new_unhandled_default(cache_type);
|
||||
}
|
||||
public void Cleanup() {
|
||||
doc_cache.Term();
|
||||
wbase_db_log__flush();
|
||||
}
|
||||
private void wbase_db_log__flush() {
|
||||
int len = wbase_db_hash.Len();
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Wbase_db_log_itm itm = (Wbase_db_log_itm)wbase_db_hash.Get_at(i);
|
||||
tmp_bfr.Add(itm.Ttl());
|
||||
tmp_bfr.Add_byte_pipe().Add_int_variable(itm.Count());
|
||||
tmp_bfr.Add_byte_pipe().Add_int_variable(itm.Elapsed());
|
||||
tmp_bfr.Add_byte_nl();
|
||||
wbase_db_log.Write(tmp_bfr);
|
||||
}
|
||||
wbase_db_log.Flush();
|
||||
}
|
||||
public void Clear() {
|
||||
synchronized (thread_lock) { // LOCK:app-level
|
||||
doc_cache.Clear();
|
||||
}
|
||||
}
|
||||
public Wdata_doc Get_by_ttl_or_null(Xowe_wiki wiki, Xoa_ttl ttl) { // "enwiki", "Earth" -> "Q2" wdoc
|
||||
byte[] qid_bry = qid_mgr.Get_qid_or_null(wiki, ttl); // EX: "enwiki", "Earth" -> "Q2"
|
||||
return qid_bry == null ? null : this.Get_by_exact_id_or_null(qid_bry);
|
||||
}
|
||||
public Wdata_doc Get_by_xid_or_null(byte[] xid) {return Get_by_loose_id_or_null(Wbase_pid.Prepend_property_if_needed(xid));}// scribunto passes either p1 or q1; convert p1 to "Property:p1"
|
||||
public Wdata_doc Get_by_loose_id_or_null(byte[] ttl_bry) {
|
||||
return Get_by_exact_id_or_null(ttl_bry);
|
||||
}
|
||||
public Wdata_doc Get_by_exact_id_or_null(byte[] ttl_bry) {// must correct case and ns; EX:"Q2" or "Property:P1"; not "q2" or "P2"
|
||||
// load from cache
|
||||
Wdata_doc rv = null;
|
||||
synchronized (thread_lock) {
|
||||
rv = doc_cache.Get_or_null(ttl_bry);
|
||||
if (rv == null) {
|
||||
// load from db
|
||||
rv = Load_wdoc_or_null(ttl_bry);
|
||||
if (rv == null) return null; // page not found
|
||||
Add(ttl_bry, rv);// NOTE: use ttl_bry, not rv.Qid; allows subsequent lookups to skip this redirect cycle
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private Wdata_doc Load_wdoc_or_null(byte[] ttl_bry) { // EX:"Q2" or "Property:P1"
|
||||
if (!enabled) return null;
|
||||
|
||||
// loggging
|
||||
Wbase_db_log_itm wbase_db_itm = (Wbase_db_log_itm)wbase_db_hash.Get_by(ttl_bry);
|
||||
if (wbase_db_itm == null) {
|
||||
wbase_db_itm = new Wbase_db_log_itm(ttl_bry);
|
||||
wbase_db_hash.Add(ttl_bry, wbase_db_itm);
|
||||
}
|
||||
long time_bgn = gplx.core.envs.System_.Ticks();
|
||||
|
||||
Wdata_doc rv = null;
|
||||
synchronized (thread_lock) { // LOCK:app-level; jdoc_parser; moved synchronized higher up; DATE:2016-09-03
|
||||
byte[] cur_ttl_bry = ttl_bry;
|
||||
int load_count = -1;
|
||||
while (load_count < 2) { // limit to 2 tries (i.e.: 1 redirect)
|
||||
// parse ttl; note that "q2" will get parsed to "Q2" b/c of ns casing
|
||||
Xoa_ttl cur_ttl = wbase_mgr.Wdata_wiki().Ttl_parse(cur_ttl_bry);
|
||||
if (cur_ttl == null) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "invalid wbase ttl: orig=~{0} cur=~{1}", ttl_bry, cur_ttl_bry);
|
||||
break;
|
||||
}
|
||||
|
||||
// get page
|
||||
Xoae_page page = wbase_mgr.Wdata_wiki().Data_mgr().Load_page_by_ttl(cur_ttl);
|
||||
if (!page.Db().Page().Exists()) break;
|
||||
|
||||
// parse jdoc
|
||||
Json_doc jdoc = jsonParser.Parse(page.Db().Text().Text_bry());
|
||||
if (jdoc == null) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "invalid jdoc for ttl: orig=~{0} cur=~{1}", ttl_bry, cur_ttl_bry);
|
||||
break;
|
||||
}
|
||||
|
||||
// check for redirect; EX: {"entity":"Q22350516","redirect":"Q21006972"}; PAGE:fr.w:Tour_du_Táchira_2016; DATE:2016-08-13
|
||||
Json_nde jdoc_root = jdoc.Root_nde();
|
||||
byte[] redirect_ttl = jdoc_root.Get_as_bry_or(Bry__redirect, null);
|
||||
if (redirect_ttl != null) {
|
||||
cur_ttl_bry = redirect_ttl;
|
||||
load_count++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// is json doc, and not a redirect; return
|
||||
rv = new Wdata_doc(wbase_mgr, jdoc, cur_ttl_bry);
|
||||
break;
|
||||
}
|
||||
if (rv == null && load_count >= 2)
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "too many redirects for ttl: orig=~{0} cur=~{1}", ttl_bry, cur_ttl_bry);
|
||||
}
|
||||
|
||||
wbase_db_itm.Update(gplx.core.envs.System_.Ticks__elapsed_in_frac(time_bgn));
|
||||
return rv;
|
||||
}
|
||||
private static final byte[] Bry__redirect = Bry_.new_a7("redirect");
|
||||
|
||||
public void Add(byte[] full_db, Wdata_doc page) { // TEST:
|
||||
synchronized (thread_lock) { // LOCK:app-level
|
||||
if (doc_cache.Get_or_null(full_db) == null)
|
||||
doc_cache.Add(full_db, page);
|
||||
}
|
||||
}
|
||||
}
|
||||
class Wbase_db_log_itm {
|
||||
public Wbase_db_log_itm(byte[] ttl) {
|
||||
this.ttl = ttl;
|
||||
}
|
||||
public byte[] Ttl() {return ttl;} private final byte[] ttl;
|
||||
public int Count() {return count;} private int count;
|
||||
public int Elapsed() {return elapsed;} private int elapsed;
|
||||
public void Update(int elapsed_diff) {
|
||||
count++;
|
||||
this.elapsed += elapsed_diff;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user