mirror of
https://github.com/gnosygnu/xowa.git
synced 2025-05-31 22:44:34 +00:00
Wikibase: Uniquefy items in wbase_prop table
This commit is contained in:
parent
da18f05d9f
commit
2a0b5769ad
@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.parsers.vnts; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
|
||||
public class Vnt_convert_lang__html__tst { // REF: https://www.mediawiki.org/wiki/Writing_systems/Syntax
|
||||
private final Vnt_convert_lang_fxt fxt = new Vnt_convert_lang_fxt();
|
||||
private final Vnt_convert_lang_fxt fxt = new Vnt_convert_lang_fxt();
|
||||
private String rule;
|
||||
@Before public void init() {
|
||||
rule = "-{H|zh-cn:cn;zh-hk:hk;zh-tw:tw}-";
|
||||
|
@ -21,8 +21,8 @@ import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.xndes.*; import gplx.
|
||||
import gplx.xowa.langs.vnts.*; import gplx.xowa.langs.vnts.converts.*;
|
||||
import gplx.xowa.htmls.*;
|
||||
class Vnt_html_doc_wkr implements Mwh_doc_wkr {
|
||||
private final Hash_adp_bry atr_hash = Hash_adp_bry.ci_a7();
|
||||
private final Xol_convert_mgr convert_mgr; private final Xol_vnt_regy vnt_regy;
|
||||
private final Hash_adp_bry atr_hash = Hash_adp_bry.ci_a7();
|
||||
private final Xol_convert_mgr convert_mgr; private final Xol_vnt_regy vnt_regy;
|
||||
private Vnt_convert_lang atr_converter;
|
||||
private Xol_vnt_itm vnt_itm; private int convert_vnt_idx;
|
||||
private Bry_bfr bfr;
|
||||
@ -30,7 +30,7 @@ class Vnt_html_doc_wkr implements Mwh_doc_wkr {
|
||||
this.convert_mgr = convert_mgr; this.vnt_regy = vnt_regy;
|
||||
atr_hash.Add_many_str("title", "alt");
|
||||
}
|
||||
public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw();
|
||||
public Hash_adp_bry Nde_regy() {return nde_regy;} private final Hash_adp_bry nde_regy = Mwh_doc_wkr_.Nde_regy__mw();
|
||||
public void Init(Bry_bfr bfr, Xol_vnt_itm vnt_itm) {this.bfr = bfr; this.vnt_itm = vnt_itm; this.convert_vnt_idx = vnt_itm.Idx();}
|
||||
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {
|
||||
boolean literal = true;
|
||||
@ -78,5 +78,5 @@ class Vnt_html_doc_wkr implements Mwh_doc_wkr {
|
||||
public void On_nde_tail_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
|
||||
public void On_comment_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
|
||||
public void On_entity_end (Mwh_doc_parser mgr, byte[] src, int nde_tid, int itm_bgn, int itm_end) {bfr.Add_mid(src, itm_bgn, itm_end);}
|
||||
private static final byte[] Bry__url_frag = Bry_.new_a7("://"); // REF.MW: if ( !strpos( $attr, '://' ) ) {
|
||||
private static final byte[] Bry__url_frag = Bry_.new_a7("://"); // REF.MW: if ( !strpos( $attr, '://' ) ) {
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ public class Xowb_prop_tbl implements Db_tbl {
|
||||
public Xowb_prop_tbl(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
this.tbl_name = "wbase_prop";
|
||||
this.fld__wbp_pid = flds.Add_str("wbp_pid", 16); // EX: "p1"; NOTE: String, not int to conform to wbase_pid
|
||||
this.fld__wbp_pid = flds.Add_str_pkey("wbp_pid", 16); // EX: "p1"; NOTE: String, not int to conform to wbase_pid
|
||||
this.fld__wbp_datatype = flds.Add_int("wbp_datatype"); // EX: 12=commonsMedia; SEE:Wbase_claim_type_
|
||||
conn.Rls_reg(this);
|
||||
}
|
||||
@ -56,7 +56,7 @@ public class Xowb_prop_tbl implements Db_tbl {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "wbase:invalid prop datatype_id; pid=~{0} datatype=~{1}", pid, datatype_id);
|
||||
datatype_itm = Wbase_claim_type_.Itm__string;
|
||||
}
|
||||
hash.Add_if_dupe_use_1st(pid, datatype_itm.Key_str());
|
||||
hash.Add(pid, datatype_itm.Key_str());
|
||||
}
|
||||
public void Rls() {}
|
||||
}
|
||||
|
@ -23,7 +23,8 @@ public abstract class Xob_wdata_pid_base extends Xob_itm_dump_base implements Xo
|
||||
public Xob_wdata_pid_base Ctor(Xob_bldr bldr, Xowe_wiki wiki) {this.Cmd_ctor(bldr, wiki); return this;}
|
||||
public abstract String Page_wkr__key();
|
||||
public abstract void Pid_bgn();
|
||||
public abstract void Pid_add(byte[] src_lang, byte[] src_ttl, byte[] trg_ttl, byte[] datatype);
|
||||
public abstract void Pid_add(byte[] src_lang, byte[] src_ttl, byte[] trg_ttl);
|
||||
public abstract void Pid_datatype(byte[] pid, byte[] datatype_bry);
|
||||
public abstract void Pid_end();
|
||||
public void Page_wkr__bgn() {
|
||||
this.Init_dump(this.Page_wkr__key(), wiki.Tdb_fsys_mgr().Site_dir().GenSubDir_nest("data", "pid")); // NOTE: must pass in correct make_dir in order to delete earlier version (else make_dirs will append)
|
||||
@ -43,12 +44,17 @@ public abstract class Xob_wdata_pid_base extends Xob_itm_dump_base implements Xo
|
||||
public void Parse_jdoc(Json_doc jdoc) {
|
||||
Wdata_doc_parser wdoc_parser = app.Wiki_mgr().Wdata_mgr().Wdoc_parser(jdoc);
|
||||
byte[] qid = wdoc_parser.Parse_qid(jdoc);
|
||||
|
||||
// add datatype
|
||||
byte[] datatype = jdoc.Root_nde().Get_as_bry(Wdata_dict_mainsnak.Itm__datatype.Key_str());
|
||||
this.Pid_datatype(qid, datatype);
|
||||
|
||||
// add langs
|
||||
Ordered_hash list = wdoc_parser.Parse_langvals(qid, jdoc, Bool_.Y);
|
||||
int len = list.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Wdata_langtext_itm label = (Wdata_langtext_itm)list.Get_at(i);
|
||||
this.Pid_add(label.Lang(), label.Text(), qid, datatype);
|
||||
this.Pid_add(label.Lang(), label.Text(), qid);
|
||||
}
|
||||
}
|
||||
public void Page_wkr__end() {this.Pid_end();}
|
||||
|
@ -44,9 +44,10 @@ public class Xob_wdata_pid_sql extends Xob_wdata_pid_base {
|
||||
tbl__prop.Create_tbl();
|
||||
tbl__prop.Insert_bgn();
|
||||
}
|
||||
@Override public void Pid_add(byte[] lang_key, byte[] ttl, byte[] pid, byte[] datatype_bry) {
|
||||
@Override public void Pid_add(byte[] lang_key, byte[] ttl, byte[] pid) {
|
||||
tbl__pid.Insert_cmd_by_batch(lang_key, ttl, pid);
|
||||
|
||||
}
|
||||
@Override public void Pid_datatype(byte[] pid, byte[] datatype_bry) {
|
||||
Wbase_claim_type claim_type = (Wbase_claim_type)datatype_hash.Get_by_or_fail(datatype_bry);
|
||||
tbl__prop.Insert_cmd_by_batch(pid, claim_type.Tid());
|
||||
}
|
||||
|
@ -22,9 +22,10 @@ public class Xob_wdata_pid_txt extends Xob_wdata_pid_base {
|
||||
@Override public void Pid_bgn() {
|
||||
pid_bldr = new Wdata_idx_bldr_pid(this, bldr, wiki, dump_fil_len);
|
||||
} Wdata_idx_bldr_pid pid_bldr;
|
||||
@Override public void Pid_add(byte[] lang_key, byte[] prop_key, byte[] qid, byte[] datatype) {
|
||||
@Override public void Pid_add(byte[] lang_key, byte[] prop_key, byte[] qid) {
|
||||
pid_bldr.Add(lang_key, prop_key, qid);
|
||||
}
|
||||
@Override public void Pid_datatype(byte[] pid, byte[] datatype_bry) {}
|
||||
@Override public void Pid_end() {
|
||||
pid_bldr.Flush();
|
||||
pid_bldr.Make();
|
||||
|
Loading…
Reference in New Issue
Block a user