1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2025-05-31 22:44:34 +00:00

Wikibase: Uniquefy items in wbase_prop table

This commit is contained in:
gnosygnu 2016-12-01 14:35:47 -05:00
parent da18f05d9f
commit 2a0b5769ad
6 changed files with 20 additions and 12 deletions

View File

@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
public class Vnt_convert_lang__html__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax
private final Vnt_convert_lang_fxt fxt = new Vnt_convert_lang_fxt();
private final Vnt_convert_lang_fxt fxt = new Vnt_convert_lang_fxt();
private String rule;
@Before public void init() {
rule = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-";

View File

@ -21,8 +21,8 @@ import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.xndes.*; import gplx.
import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
import gplx.xowa.htmls.*;
class Vnt_html_doc_wkr implements Mwh_doc_wkr {
private final Hash_adp_bry atr_hash = Hash_adp_bry.ci_a7();
private final Xol_convert_mgr convert_mgr; private final Xol_vnt_regy vnt_regy;
private final Hash_adp_bry atr_hash = Hash_adp_bry.ci_a7();
private final Xol_convert_mgr convert_mgr; private final Xol_vnt_regy vnt_regy;
private Vnt_convert_lang atr_converter;
private Xol_vnt_itm vnt_itm; private int convert_vnt_idx;
private Bry_bfr bfr;
@ -30,7 +30,7 @@ class Vnt_html_doc_wkr implements Mwh_doc_wkr {
this.convert_mgr = convert_mgr; this.vnt_regy = vnt_regy;
atr_hash.Add_many_str("title", "alt");
}
public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw();
public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw();
public void Init(Bry_bfr bfr, Xol_vnt_itm vnt_itm) {this.bfr = bfr; this.vnt_itm = vnt_itm; this.convert_vnt_idx = vnt_itm.Idx();}
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {
boolean literal = true;
@ -78,5 +78,5 @@ class Vnt_html_doc_wkr implements Mwh_doc_wkr {
public void On_nde_tail_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
public void On_entity_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
private static final byte[] Bry__url_frag = Bry_.new_a7("://"); // REF.MW: if ( !strpos( $attr, '://' ) ) {
private static final byte[] Bry__url_frag = Bry_.new_a7("://"); // REF.MW: if ( !strpos( $attr, '://' ) ) {
}

View File

@ -26,7 +26,7 @@ public class Xowb_prop_tbl implements Db_tbl {
public Xowb_prop_tbl(Db_conn conn) {
this.conn = conn;
this.tbl_name = "wbase_prop";
this.fld__wbp_pid = flds.Add_str("wbp_pid", 16); // EX: "p1"; NOTE: String, not int to conform to wbase_pid
this.fld__wbp_pid = flds.Add_str_pkey("wbp_pid", 16); // EX: "p1"; NOTE: String, not int to conform to wbase_pid
this.fld__wbp_datatype = flds.Add_int("wbp_datatype"); // EX: 12=commonsMedia; SEE:Wbase_claim_type_
conn.Rls_reg(this);
}
@ -56,7 +56,7 @@ public class Xowb_prop_tbl implements Db_tbl {
Gfo_usr_dlg_.Instance.Warn_many("", "", "wbase:invalid prop datatype_id; pid=~{0} datatype=~{1}", pid, datatype_id);
datatype_itm = Wbase_claim_type_.Itm__string;
}
hash.Add_if_dupe_use_1st(pid, datatype_itm.Key_str());
hash.Add(pid, datatype_itm.Key_str());
}
public void Rls() {}
}

View File

@ -23,7 +23,8 @@ public abstract class Xob_wdata_pid_base extends Xob_itm_dump_base implements Xo
public Xob_wdata_pid_base Ctor(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki); return this;}
public abstract String Page_wkr__key();
public abstract void Pid_bgn();
public abstract void Pid_add(byte[] src_lang, byte[] src_ttl, byte[] trg_ttl, byte[] datatype);
public abstract void Pid_add(byte[] src_lang, byte[] src_ttl, byte[] trg_ttl);
public abstract void Pid_datatype(byte[] pid, byte[] datatype_bry);
public abstract void Pid_end();
public void Page_wkr__bgn() {
this.Init_dump(this.Page_wkr__key(), wiki.Tdb_fsys_mgr().Site_dir().GenSubDir_nest("data", "pid")); // NOTE: must pass in correct make_dir in order to delete earlier version (else make_dirs will append)
@ -43,12 +44,17 @@ public abstract class Xob_wdata_pid_base extends Xob_itm_dump_base implements Xo
public void Parse_jdoc(Json_doc jdoc) {
Wdata_doc_parser wdoc_parser = app.Wiki_mgr().Wdata_mgr().Wdoc_parser(jdoc);
byte[] qid = wdoc_parser.Parse_qid(jdoc);
// add datatype
byte[] datatype = jdoc.Root_nde().Get_as_bry(Wdata_dict_mainsnak.Itm__datatype.Key_str());
this.Pid_datatype(qid, datatype);
// add langs
Ordered_hash list = wdoc_parser.Parse_langvals(qid, jdoc, Bool_.Y);
int len = list.Count();
for (int i = 0; i < len; ++i) {
Wdata_langtext_itm label = (Wdata_langtext_itm)list.Get_at(i);
this.Pid_add(label.Lang(), label.Text(), qid, datatype);
this.Pid_add(label.Lang(), label.Text(), qid);
}
}
public void Page_wkr__end() {this.Pid_end();}

View File

@ -44,9 +44,10 @@ public class Xob_wdata_pid_sql extends Xob_wdata_pid_base {
tbl__prop.Create_tbl();
tbl__prop.Insert_bgn();
}
@Override public void Pid_add(byte[] lang_key, byte[] ttl, byte[] pid, byte[] datatype_bry) {
@Override public void Pid_add(byte[] lang_key, byte[] ttl, byte[] pid) {
tbl__pid.Insert_cmd_by_batch(lang_key, ttl, pid);
}
@Override public void Pid_datatype(byte[] pid, byte[] datatype_bry) {
Wbase_claim_type claim_type = (Wbase_claim_type)datatype_hash.Get_by_or_fail(datatype_bry);
tbl__prop.Insert_cmd_by_batch(pid, claim_type.Tid());
}

View File

@ -22,9 +22,10 @@ public class Xob_wdata_pid_txt extends Xob_wdata_pid_base {
@Override public void Pid_bgn() {
pid_bldr = new Wdata_idx_bldr_pid(this, bldr, wiki, dump_fil_len);
} Wdata_idx_bldr_pid pid_bldr;
@Override public void Pid_add(byte[] lang_key, byte[] prop_key, byte[] qid, byte[] datatype) {
@Override public void Pid_add(byte[] lang_key, byte[] prop_key, byte[] qid) {
pid_bldr.Add(lang_key, prop_key, qid);
}
@Override public void Pid_datatype(byte[] pid, byte[] datatype_bry) {}
@Override public void Pid_end() {
pid_bldr.Flush();
pid_bldr.Make();