From 2a0b5769ad73ded91ccaeaf554cb261555298787 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Thu, 1 Dec 2016 14:35:47 -0500 Subject: [PATCH] Wikibase: Uniquefy items in wbase_prop table --- .../xowa/parsers/vnts/Vnt_convert_lang__html__tst.java | 2 +- .../src/gplx/xowa/parsers/vnts/Vnt_html_doc_wkr.java | 8 ++++---- .../src/gplx/xowa/xtns/wbases/dbs/Xowb_prop_tbl.java | 4 ++-- .../xowa/xtns/wbases/imports/Xob_wdata_pid_base.java | 10 ++++++++-- .../xowa/xtns/wbases/imports/Xob_wdata_pid_sql.java | 5 +++-- .../xowa/xtns/wbases/imports/Xob_wdata_pid_txt.java | 3 ++- 6 files changed, 20 insertions(+), 12 deletions(-) diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_convert_lang__html__tst.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_convert_lang__html__tst.java index 4ebb131ab..bb22b82a5 100644 --- a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_convert_lang__html__tst.java +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_convert_lang__html__tst.java @@ -18,7 +18,7 @@ along with this program. If not, see . package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*; public class Vnt_convert_lang__html__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax - private final Vnt_convert_lang_fxt fxt = new Vnt_convert_lang_fxt(); + private final Vnt_convert_lang_fxt fxt = new Vnt_convert_lang_fxt(); private String rule; @Before public void init() { rule = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-"; diff --git a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_html_doc_wkr.java b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_html_doc_wkr.java index edab1888d..6078898f2 100644 --- a/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_html_doc_wkr.java +++ b/400_xowa/src/gplx/xowa/parsers/vnts/Vnt_html_doc_wkr.java @@ -21,8 +21,8 @@ import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.xndes.*; import gplx. import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*; import gplx.xowa.htmls.*; class Vnt_html_doc_wkr implements Mwh_doc_wkr { - private final Hash_adp_bry atr_hash = Hash_adp_bry.ci_a7(); - private final Xol_convert_mgr convert_mgr; private final Xol_vnt_regy vnt_regy; + private final Hash_adp_bry atr_hash = Hash_adp_bry.ci_a7(); + private final Xol_convert_mgr convert_mgr; private final Xol_vnt_regy vnt_regy; private Vnt_convert_lang atr_converter; private Xol_vnt_itm vnt_itm; private int convert_vnt_idx; private Bry_bfr bfr; @@ -30,7 +30,7 @@ class Vnt_html_doc_wkr implements Mwh_doc_wkr { this.convert_mgr = convert_mgr; this.vnt_regy = vnt_regy; atr_hash.Add_many_str("title", "alt"); } - public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw(); + public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw(); public void Init(Bry_bfr bfr, Xol_vnt_itm vnt_itm) {this.bfr = bfr; this.vnt_itm = vnt_itm; this.convert_vnt_idx = vnt_itm.Idx();} public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) { boolean literal = true; @@ -78,5 +78,5 @@ class Vnt_html_doc_wkr implements Mwh_doc_wkr { public void On_nde_tail_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);} public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);} public void On_entity_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);} - private static final byte[] Bry__url_frag = Bry_.new_a7("://"); // REF.MW: if ( !strpos( $attr, '://' ) ) { + private static final byte[] Bry__url_frag = Bry_.new_a7("://"); // REF.MW: if ( !strpos( $attr, '://' ) ) { } diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/dbs/Xowb_prop_tbl.java b/400_xowa/src/gplx/xowa/xtns/wbases/dbs/Xowb_prop_tbl.java index 266dd85da..115869d87 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/dbs/Xowb_prop_tbl.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/dbs/Xowb_prop_tbl.java @@ -26,7 +26,7 @@ public class Xowb_prop_tbl implements Db_tbl { public Xowb_prop_tbl(Db_conn conn) { this.conn = conn; this.tbl_name = "wbase_prop"; - this.fld__wbp_pid = flds.Add_str("wbp_pid", 16); // EX: "p1"; NOTE: String, not int to conform to wbase_pid + this.fld__wbp_pid = flds.Add_str_pkey("wbp_pid", 16); // EX: "p1"; NOTE: String, not int to conform to wbase_pid this.fld__wbp_datatype = flds.Add_int("wbp_datatype"); // EX: 12=commonsMedia; SEE:Wbase_claim_type_ conn.Rls_reg(this); } @@ -56,7 +56,7 @@ public class Xowb_prop_tbl implements Db_tbl { Gfo_usr_dlg_.Instance.Warn_many("", "", "wbase:invalid prop datatype_id; pid=~{0} datatype=~{1}", pid, datatype_id); datatype_itm = Wbase_claim_type_.Itm__string; } - hash.Add_if_dupe_use_1st(pid, datatype_itm.Key_str()); + hash.Add(pid, datatype_itm.Key_str()); } public void Rls() {} } diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid_base.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid_base.java index b6018374c..7c8bc5182 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid_base.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid_base.java @@ -23,7 +23,8 @@ public abstract class Xob_wdata_pid_base extends Xob_itm_dump_base implements Xo public Xob_wdata_pid_base Ctor(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki); return this;} public abstract String Page_wkr__key(); public abstract void Pid_bgn(); - public abstract void Pid_add(byte[] src_lang, byte[] src_ttl, byte[] trg_ttl, byte[] datatype); + public abstract void Pid_add(byte[] src_lang, byte[] src_ttl, byte[] trg_ttl); + public abstract void Pid_datatype(byte[] pid, byte[] datatype_bry); public abstract void Pid_end(); public void Page_wkr__bgn() { this.Init_dump(this.Page_wkr__key(), wiki.Tdb_fsys_mgr().Site_dir().GenSubDir_nest("data", "pid")); // NOTE: must pass in correct make_dir in order to delete earlier version (else make_dirs will append) @@ -43,12 +44,17 @@ public abstract class Xob_wdata_pid_base extends Xob_itm_dump_base implements Xo public void Parse_jdoc(Json_doc jdoc) { Wdata_doc_parser wdoc_parser = app.Wiki_mgr().Wdata_mgr().Wdoc_parser(jdoc); byte[] qid = wdoc_parser.Parse_qid(jdoc); + + // add datatype byte[] datatype = jdoc.Root_nde().Get_as_bry(Wdata_dict_mainsnak.Itm__datatype.Key_str()); + this.Pid_datatype(qid, datatype); + + // add langs Ordered_hash list = wdoc_parser.Parse_langvals(qid, jdoc, Bool_.Y); int len = list.Count(); for (int i = 0; i < len; ++i) { Wdata_langtext_itm label = (Wdata_langtext_itm)list.Get_at(i); - this.Pid_add(label.Lang(), label.Text(), qid, datatype); + this.Pid_add(label.Lang(), label.Text(), qid); } } public void Page_wkr__end() {this.Pid_end();} diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid_sql.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid_sql.java index e762e918a..311b784cf 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid_sql.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid_sql.java @@ -44,9 +44,10 @@ public class Xob_wdata_pid_sql extends Xob_wdata_pid_base { tbl__prop.Create_tbl(); tbl__prop.Insert_bgn(); } - @Override public void Pid_add(byte[] lang_key, byte[] ttl, byte[] pid, byte[] datatype_bry) { + @Override public void Pid_add(byte[] lang_key, byte[] ttl, byte[] pid) { tbl__pid.Insert_cmd_by_batch(lang_key, ttl, pid); - + } + @Override public void Pid_datatype(byte[] pid, byte[] datatype_bry) { Wbase_claim_type claim_type = (Wbase_claim_type)datatype_hash.Get_by_or_fail(datatype_bry); tbl__prop.Insert_cmd_by_batch(pid, claim_type.Tid()); } diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid_txt.java b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid_txt.java index 1e4744c17..27814a60e 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid_txt.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/imports/Xob_wdata_pid_txt.java @@ -22,9 +22,10 @@ public class Xob_wdata_pid_txt extends Xob_wdata_pid_base { @Override public void Pid_bgn() { pid_bldr = new Wdata_idx_bldr_pid(this, bldr, wiki, dump_fil_len); } Wdata_idx_bldr_pid pid_bldr; - @Override public void Pid_add(byte[] lang_key, byte[] prop_key, byte[] qid, byte[] datatype) { + @Override public void Pid_add(byte[] lang_key, byte[] prop_key, byte[] qid) { pid_bldr.Add(lang_key, prop_key, qid); } + @Override public void Pid_datatype(byte[] pid, byte[] datatype_bry) {} @Override public void Pid_end() { pid_bldr.Flush(); pid_bldr.Make();