Wikibase: Implement lexeme, form, and sense [#771]

staging
gnosygnu 4 years ago
parent a0c0b5b2fd
commit 7b6b3ed09a

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -14,6 +14,7 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx;
public class Type_ {//RF:2017-10-08
public static Class<?> Type_by_obj(Object o) {return o.getClass();}
public static Class<?> Type_by_primitive(Object o) {
@ -45,6 +46,11 @@ public class Type_ {//RF:2017-10-08
return type.getName();
}
public static String SimpleName_by_obj(Object obj) {return obj == null ? String_.Null_mark : SimpleName(Type_by_obj(obj));}
public static String SimpleName(Class<?> type) {
return type.getSimpleName();
}
public static boolean Is_array(Class<?> t) {
return t.isArray();
}

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,21 +13,50 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.*; import gplx.xowa.xtns.wbases.claims.enums.*; import gplx.xowa.xtns.wbases.claims.itms.*; import gplx.xowa.xtns.wbases.parsers.*; import gplx.xowa.xtns.wbases.stores.*;
package gplx.xowa.xtns.scribunto.libs;
import gplx.Bry_;
import gplx.Int_;
import gplx.Keyval;
import gplx.Keyval_;
import gplx.List_adp;
import gplx.List_adp_;
import gplx.Ordered_hash;
import gplx.String_;
import gplx.xowa.xtns.wbases.Wdata_doc;
import gplx.xowa.xtns.wbases.Wdata_wiki_mgr;
import gplx.xowa.xtns.wbases.claims.Wbase_claim_grp;
import gplx.xowa.xtns.wbases.claims.Wbase_claim_grp_list;
import gplx.xowa.xtns.wbases.claims.Wbase_references_grp;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_entity_type_;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_rank_;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_type_;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_value_type_;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_enum_itm;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_base;
import gplx.xowa.xtns.wbases.core.Wdata_alias_itm;
import gplx.xowa.xtns.wbases.core.Wdata_dict_claim;
import gplx.xowa.xtns.wbases.core.Wdata_dict_claim_v1;
import gplx.xowa.xtns.wbases.core.Wdata_dict_langtext;
import gplx.xowa.xtns.wbases.core.Wdata_dict_sitelink;
import gplx.xowa.xtns.wbases.core.Wdata_langtext_itm;
import gplx.xowa.xtns.wbases.core.Wdata_sitelink_itm;
import gplx.xowa.xtns.wbases.parsers.Wdata_doc_parser_v2;
import gplx.xowa.xtns.wbases.stores.Wbase_prop_mgr;
public class Scrib_lib_wikibase_srl {
public static Keyval[] Srl(Wbase_prop_mgr prop_mgr, Wdata_doc wdoc, boolean header_enabled, boolean legacy_style, byte[] page_url) {// REF.MW:/Wikibase/lib/includes/serializers/EntitySerializer.php!getSerialized; http://www.mediawiki.org/wiki/Extension:Wikibase_Client/Lua
int base_adj = legacy_style ? 0 : 1;
List_adp rv = List_adp_.New();
if (header_enabled) {
byte[] qid = wdoc.Qid();
boolean doc_is_qid = Bry_.Has_at_bgn(qid, Byte_ascii.Ltr_q) || Bry_.Has_at_bgn(qid, Byte_ascii.Ltr_Q);
Wbase_enum_itm entity_itm = Wbase_claim_entity_type_.ToTid(qid);
rv.Add(Keyval_.new_("id", qid));
rv.Add(Keyval_.new_("type", doc_is_qid ? Wbase_claim_entity_type_.Itm__item.Key_str() : Wbase_claim_entity_type_.Itm__property.Key_str())); // type should be "property"; PAGE:ru.w:Викитека:Проект:Викиданные DATE:2016-11-23
rv.Add(Keyval_.new_("type", entity_itm.Key_str())); // type should be "property"; PAGE:ru.w:Викитека:Проект:Викиданные DATE:2016-11-23
rv.Add(Keyval_.new_("schemaVersion", base_adj + 1)); // NOTE: needed by mw.wikibase.lua
// for Property pages, add a "datatype" property PAGE:ru.w:Маргарян,_Андраник_Наапетович; wd:Property:P18; DATE:2017-03-27
if (!doc_is_qid) {
if (entity_itm == Wbase_claim_entity_type_.Itm__property) {
String pid_name = String_.new_u8(Bry_.Mid(qid, Wdata_wiki_mgr.Ns_property_name_bry.length + 1));// +1 for ":" in "Property:"
rv.Add(Keyval_.new_("datatype", prop_mgr.Get_or_null(pid_name, page_url)));
}
@ -153,7 +182,7 @@ public class Scrib_lib_wikibase_srl {
Wbase_claim_base itm = grp.Get_at(j);
pid_list.Add(Keyval_.int_(j + base_adj, Srl_claims_prop_itm_core(prop_mgr, visitor, itm_pid, itm, page_url))); // NOTE: was originally "+ 1"; changed to base_adj; PAGE:ru.w:Tor ru.w:Кактусовые DATE:2014-10-25
}
rv.Add(Keyval_.new_(itm_pid, (Keyval[])pid_list.To_ary_and_clear(Keyval.class)));
rv.Add(Keyval_.new_(itm_pid, pid_list.To_ary_and_clear(Keyval.class)));
}
return (Keyval[])rv.To_ary_and_clear(Keyval.class);
}

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,10 +13,41 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.wbases; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
import gplx.core.brys.fmtrs.*;
import gplx.xowa.langs.*;
import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.*; import gplx.xowa.xtns.wbases.claims.itms.*; import gplx.xowa.xtns.wbases.hwtrs.*; import gplx.xowa.xtns.wbases.claims.itms.times.*;
package gplx.xowa.xtns.wbases;
import gplx.Bool_;
import gplx.Bry_;
import gplx.Bry_bfr;
import gplx.Bry_bfr_;
import gplx.Bry_find_;
import gplx.Byte_ascii;
import gplx.Decimal_adp;
import gplx.Decimal_adp_;
import gplx.Err_;
import gplx.Gfo_usr_dlg_;
import gplx.Math_;
import gplx.Object_;
import gplx.String_;
import gplx.core.brys.fmtrs.Bry_fmtr;
import gplx.xowa.Xoae_app;
import gplx.xowa.langs.Xol_lang_itm;
import gplx.xowa.langs.Xol_lang_itm_;
import gplx.xowa.xtns.wbases.claims.Wbase_claim_visitor;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_entity;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_globecoordinate;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_monolingualtext;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_quantity;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_string;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_value;
import gplx.xowa.xtns.wbases.claims.itms.times.Wbase_date;
import gplx.xowa.xtns.wbases.claims.itms.times.Wbase_date_;
import gplx.xowa.xtns.wbases.core.Wdata_langtext_itm;
import gplx.xowa.xtns.wbases.hwtrs.Wdata_hwtr_mgr;
import gplx.xowa.xtns.wbases.hwtrs.Wdata_hwtr_msgs;
import gplx.xowa.xtns.wbases.hwtrs.Wdata_lbl_itm;
import gplx.xowa.xtns.wbases.hwtrs.Wdata_lbl_mgr;
public class Wdata_prop_val_visitor implements Wbase_claim_visitor { // THREAD.UNSAFE; callers must do synchronized
private Wdata_wiki_mgr wdata_mgr; private Xoae_app app; private Bry_bfr bfr;
private Xol_lang_itm lang;

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,15 +13,55 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.wbases; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
import gplx.core.primitives.*;
import gplx.xowa.langs.msgs.*; import gplx.langs.jsons.*;
import gplx.xowa.wikis.nss.*;
import gplx.xowa.langs.*;
import gplx.xowa.parsers.*;
import gplx.xowa.wikis.domains.*; import gplx.xowa.htmls.*; import gplx.xowa.parsers.logs.*; import gplx.xowa.apps.apis.xowa.xtns.*; import gplx.xowa.apps.apis.xowa.html.*; import gplx.xowa.users.*;
import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.*; import gplx.xowa.xtns.wbases.claims.enums.*; import gplx.xowa.xtns.wbases.claims.itms.*; import gplx.xowa.xtns.wbases.parsers.*; import gplx.xowa.xtns.wbases.pfuncs.*; import gplx.xowa.xtns.wbases.hwtrs.*; import gplx.xowa.xtns.wbases.stores.*;
import gplx.xowa.mediawiki.extensions.Wikibase.client.includes.dataAccess.scribunto.*;
package gplx.xowa.xtns.wbases;
import gplx.Bool_;
import gplx.Bry_;
import gplx.Bry_bfr;
import gplx.Bry_bfr_;
import gplx.GfoMsg;
import gplx.Gfo_evt_itm;
import gplx.Gfo_evt_mgr;
import gplx.Gfo_evt_mgr_;
import gplx.Gfo_invk;
import gplx.Gfo_invk_;
import gplx.GfsCtx;
import gplx.String_;
import gplx.Yn;
import gplx.langs.jsons.Json_doc;
import gplx.langs.jsons.Json_kv;
import gplx.langs.jsons.Json_parser;
import gplx.xowa.Xoa_ttl;
import gplx.xowa.Xoae_app;
import gplx.xowa.Xoae_page;
import gplx.xowa.Xowe_wiki;
import gplx.xowa.apps.apis.xowa.html.Xoapi_toggle_mgr;
import gplx.xowa.apps.apis.xowa.xtns.Xoapi_wikibase;
import gplx.xowa.htmls.Xoh_consts;
import gplx.xowa.langs.Xol_lang_itm;
import gplx.xowa.langs.msgs.Xol_msg_itm_;
import gplx.xowa.parsers.Xop_ctx;
import gplx.xowa.parsers.logs.Xop_log_property_wkr;
import gplx.xowa.users.Xoue_user;
import gplx.xowa.wikis.domains.Xow_domain_itm;
import gplx.xowa.wikis.domains.Xow_domain_tid_;
import gplx.xowa.wikis.nss.Xow_ns_;
import gplx.xowa.xtns.wbases.claims.Wbase_claim_grp;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_rank_;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_value_type_;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_base;
import gplx.xowa.xtns.wbases.hwtrs.Wdata_hwtr_mgr;
import gplx.xowa.xtns.wbases.hwtrs.Wdata_hwtr_msgs;
import gplx.xowa.xtns.wbases.hwtrs.Wdata_lbl_wkr_wiki;
import gplx.xowa.xtns.wbases.parsers.Wdata_doc_parser;
import gplx.xowa.xtns.wbases.parsers.Wdata_doc_parser_v1;
import gplx.xowa.xtns.wbases.parsers.Wdata_doc_parser_v2;
import gplx.xowa.xtns.wbases.stores.Wbase_doc_mgr;
import gplx.xowa.xtns.wbases.stores.Wbase_pid_mgr;
import gplx.xowa.xtns.wbases.stores.Wbase_prop_mgr;
import gplx.xowa.xtns.wbases.stores.Wbase_prop_mgr_loader_;
import gplx.xowa.xtns.wbases.stores.Wbase_qid_mgr;
public class Wdata_wiki_mgr implements Gfo_evt_itm, Gfo_invk {
private final Xoae_app app;
private final Wdata_prop_val_visitor prop_val_visitor;
@ -180,19 +220,22 @@ public class Wdata_wiki_mgr implements Gfo_evt_itm, Gfo_invk {
public static final int Ns_property = 120;
public static final String Ns_property_name = "Property";
public static final byte[] Ns_property_name_bry = Bry_.new_a7(Ns_property_name);
public static final byte[] Bry_q = Bry_.new_a7("q");
public static final byte[]
Ttl_prefix_qid_bry_db = Bry_.new_a7("q") // NOTE: for historical reasons this is standardized as lowercase q not Q; DATE:2015-06-12
, Ttl_prefix_qid_bry_gui = Bry_.new_a7("Q") // NOTE: use uppercase Q for writing html; DATE:2015-06-12
, Ttl_prefix_pid_bry = Bry_.new_a7("Property:P")
;
public static final int Ns_lexeme = 146;
public static final String Ns_lexeme_name = "Lexeme";
public static final byte[] Ns_lexeme_name_bry = Bry_.new_a7(Ns_lexeme_name);
public static final byte[] Html_json_id = Bry_.new_a7("xowa-wikidata-json");
public static boolean Wiki_page_is_json(int wiki_tid, int ns_id) {
switch (wiki_tid) {
case Xow_domain_tid_.Tid__wikidata:
if (ns_id == Xow_ns_.Tid__main || ns_id == gplx.xowa.xtns.wbases.Wdata_wiki_mgr.Ns_property)
switch (ns_id) {
case Xow_ns_.Tid__main:
case Wdata_wiki_mgr.Ns_property:
case Wdata_wiki_mgr.Ns_lexeme:
return true;
break;
default:
return false;
}
case Xow_domain_tid_.Tid__home:
if (ns_id == gplx.xowa.xtns.wbases.Wdata_wiki_mgr.Ns_property)
return true;

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,8 +13,16 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.wbases.claims; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
import gplx.xowa.xtns.wbases.claims.itms.*;
package gplx.xowa.xtns.wbases.claims;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_entity;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_globecoordinate;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_monolingualtext;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_quantity;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_string;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_value;
public interface Wbase_claim_visitor {
void Visit_str (Wbase_claim_string itm);
void Visit_entity (Wbase_claim_entity itm);

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,17 +13,54 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.wbases.claims.enums; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.claims.*;
package gplx.xowa.xtns.wbases.claims.enums;
import gplx.Bry_;
import gplx.Byte_ascii;
import gplx.Err_;
// NOTE: could not find definitive list, so using these links for now
// REF.MW:https://github.com/Wikidata/Wikidata-Toolkit/blob/master/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/EntityIdValueImpl.java
// REF.MW:https://github.com/wikimedia/wikibase-property-suggester-scripts/blob/1d25e76f894796bfd57dd107102cf39088885138/propertysuggester/parser/JsonReader.py
public class Wbase_claim_entity_type_ {
public static final byte
Tid__item = 0
, Tid__property = 1
, Tid__lexeme = 2
, Tid__sense = 3
, Tid__form = 4
;
public static final Wbase_enum_hash Reg = new Wbase_enum_hash("claim.entity_type", 3);
public static final Wbase_enum_hash Reg = new Wbase_enum_hash("claim.entity_type", 5);
public static final Wbase_enum_itm
Itm__item = Reg.Add(Tid__item , "item")
, Itm__property = Reg.Add(Tid__property , "property")
, Itm__lexeme = Reg.Add(Tid__lexeme , "lexeme")
, Itm__sense = Reg.Add(Tid__sense , "sense")
, Itm__form = Reg.Add(Tid__form , "form")
;
public static Wbase_enum_itm ToTid(byte[] id) {
// fail if null or 0-length
if (Bry_.Len_eq_0(id)) {
throw Err_.new_unhandled_default(id);
}
// get 1st byte and uppercase it
byte b0 = id[0];
if (b0 > Byte_ascii.Ltr_Z) {
b0 -= 32; // uppercases
}
// return item; NOTE: only doing types which have namespaces (i.e.: there is no Sense:S1 or Form:F1)
switch (b0) {
case Byte_ascii.Ltr_Q:
return Wbase_claim_entity_type_.Itm__item;
case Byte_ascii.Ltr_P:
return Wbase_claim_entity_type_.Itm__property;
case Byte_ascii.Ltr_L:
return Wbase_claim_entity_type_.Itm__lexeme;
default:
throw Err_.new_unhandled_default(id);
}
}
}

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,7 +13,10 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.wbases.claims.enums; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.claims.*;
package gplx.xowa.xtns.wbases.claims.enums;
import gplx.Bry_;
public class Wbase_claim_type_ {
public static final byte // SERIALIZED:wbase_prop|datatype; REF:https://www.wikidata.org/wiki/Help:Data_type
Tid__unknown = 0
@ -33,8 +36,10 @@ public class Wbase_claim_type_ {
, Tid__geo_shape = 13
, Tid__tabular_data = 14
, Tid__lexeme = 15
, Tid__form = 16
, Tid__sense = 17
;
public static final Wbase_enum_hash Reg = new Wbase_enum_hash("claim.data_type", 16);
public static final Wbase_enum_hash Reg = new Wbase_enum_hash("claim.data_type", 18);
public static final Wbase_enum_itm
Itm__unknown = New(Tid__unknown , "unknown")
, Itm__bad = New(Tid__bad , "bad") // NOTE: wikidata identifies several entries as "bad"; Q1615351|'s-Graveland, Q107538|Baco; DATE:2013-10-20
@ -51,7 +56,9 @@ public class Wbase_claim_type_ {
, Itm__math = New(Tid__math , "math") // EX:wd:Property:P2534
, Itm__geo_shape = New(Tid__geo_shape , "geo-shape") // EX:wd:Property:P3896
, Itm__tabular_data = New(Tid__tabular_data , "tabular-data") // EX:wd:Property:P4179
, Itm__lexeme = New(Tid__lexeme , "lexeme") // EX:wd:Property:P5188
, Itm__lexeme = New(Tid__lexeme , "lexeme") // EX:wd:Lexeme:L2
, Itm__form = New(Tid__form , "form") // EX:wd:Lexeme:L2 P5830
, Itm__sense = New(Tid__sense , "sense") // EX:wd:Lexeme:L2 P6072
;
private static Wbase_enum_itm New(byte tid, String key) {return New(tid, key, key);}
private static Wbase_enum_itm New(byte tid, String key, String scrib) {return Reg.Add(new Wbase_claim_type(tid, key, scrib));}

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,38 +13,78 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.wbases.claims.itms; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.claims.*;
import gplx.xowa.xtns.wbases.claims.enums.*;
package gplx.xowa.xtns.wbases.claims.itms;
import gplx.Bry_;
import gplx.Byte_ascii;
import gplx.Err_;
import gplx.Int_;
import gplx.String_;
import gplx.xowa.xtns.wbases.claims.Wbase_claim_visitor;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_entity_type_;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_type_;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_value_type_;
public class Wbase_claim_entity extends Wbase_claim_base {
public Wbase_claim_entity(int pid, byte snak_tid, byte entity_tid, byte[] entity_id_bry) {super(pid, snak_tid);
this.entity_tid = entity_tid;
this.entity_id_bry = entity_id_bry;
this.entity_id = Bry_.To_int(entity_id_bry);
public Wbase_claim_entity(int pid, byte snak_tid, byte entityType, byte[] numericIdBry) {
this(pid, snak_tid, entityType, numericIdBry, null);
}
@Override public byte Val_tid() {return Wbase_claim_type_.Tid__entity;}
public int Entity_id() {return entity_id;} private final int entity_id;
public byte[] Entity_id_bry() {return entity_id_bry;} private final byte[] entity_id_bry;
public byte Entity_tid() {return entity_tid;} private final byte entity_tid;
public boolean Entity_tid_is_qid() {return entity_tid == Wbase_claim_entity_type_.Tid__item;}
public String Entity_tid_str() {return Wbase_claim_entity_type_.Reg.Get_str_or_fail(entity_tid);}
public byte[] Entity_tid_bry() {return Wbase_claim_entity_type_.Reg.Get_bry_or_fail(entity_tid);}
public byte[] Page_ttl_db() {return To_xid__db(entity_tid, entity_id_bry);}
public byte[] Page_ttl_gui() {
return entity_tid == Wbase_claim_entity_type_.Tid__item
? Bry_.Add(Wdata_wiki_mgr.Ttl_prefix_qid_bry_gui, entity_id_bry)
: Bry_.Add(Wdata_wiki_mgr.Ttl_prefix_pid_bry, entity_id_bry)
;
public Wbase_claim_entity(int pid, byte snak_tid, byte entityType, byte[] numericIdBry, byte[] id) {
super(pid, snak_tid);
this.entityType = entityType;
this.numericIdBry = numericIdBry;
// NOTE: form and sense claims do not have `numeric-id`; DATE:2020-07-27
if (numericIdBry != null)
this.numericId = Bry_.To_int(numericIdBry);
// NOTE: item, property, lexeme do not have an id (Make_claims calls don't pass them)
this.id = id == null ? ToId(entityType, numericIdBry) : id;
}
@Override public byte Val_tid() {return Wbase_claim_type_.Tid__entity;}
public byte[] Id() {return id;} private final byte[] id; // EX: Q123
public int Entity_id() {return numericId;} private int numericId; // EX: 123
public byte[] Entity_id_bry() {return numericIdBry;} private final byte[] numericIdBry;
public byte Entity_tid() {return entityType;} private final byte entityType;
public boolean Entity_tid_is_qid() {return entityType == Wbase_claim_entity_type_.Tid__item;}
public String Entity_tid_str() {return Wbase_claim_entity_type_.Reg.Get_str_or_fail(entityType);}
public byte[] Entity_tid_bry() {return Wbase_claim_entity_type_.Reg.Get_bry_or_fail(entityType);}
public byte[] Page_ttl_db() {return To_xid__db(entityType, numericIdBry);}
public byte[] Page_ttl_gui() {return Bry_.Add(ToTtlPrefix(entityType), numericIdBry);}
@Override public void Welcome(Wbase_claim_visitor visitor) {visitor.Visit_entity(this);}
@Override public String toString() {// TEST:
return String_.Concat_with_str("|", Wbase_claim_value_type_.Reg.Get_str_or_fail(this.Snak_tid()), Wbase_claim_type_.Reg.Get_str_or_fail(this.Val_tid()), this.Entity_tid_str(), Int_.To_str(entity_id));
return String_.Concat_with_str("|", Wbase_claim_value_type_.Reg.Get_str_or_fail(this.Snak_tid()), Wbase_claim_type_.Reg.Get_str_or_fail(this.Val_tid()), this.Entity_tid_str(), Int_.To_str(numericId), String_.new_u8(id));
}
public static byte[] To_xid__db(byte tid, byte[] bry) { // EX: 'item,2' -> q2; 'property,2' -> Property:P2
return tid == Wbase_claim_entity_type_.Tid__item
? Bry_.Add(Wdata_wiki_mgr.Ttl_prefix_qid_bry_gui, bry)
: Bry_.Add(Wdata_wiki_mgr.Ttl_prefix_pid_bry, bry)
;
public static byte[] To_xid__db(byte tid, byte[] bry) {return Bry_.Add(ToTtlPrefix(tid), bry);} // EX: 'item,2' -> Q2; 'property,2' -> Property:P2
private static byte[] ToTtlPrefix(byte entityType) {
switch (entityType) {
case Wbase_claim_entity_type_.Tid__item:
return TTL_PREFIX_QID;
case Wbase_claim_entity_type_.Tid__property:
return TTL_PREFIX_PID;
case Wbase_claim_entity_type_.Tid__lexeme:
return TTL_PREFIX_LID;
default:
throw Err_.new_unhandled_default(entityType);
}
}
private static byte[] ToId(byte entityType, byte[] numericId) {
switch (entityType) {
case Wbase_claim_entity_type_.Tid__item:
return Bry_.Add(Byte_ascii.Ltr_Q, numericId);
case Wbase_claim_entity_type_.Tid__property:
return Bry_.Add(Byte_ascii.Ltr_P, numericId);
case Wbase_claim_entity_type_.Tid__lexeme:
return Bry_.Add(Byte_ascii.Ltr_L, numericId);
case Wbase_claim_entity_type_.Tid__form:
case Wbase_claim_entity_type_.Tid__sense:
default:
throw Err_.new_unhandled_default(entityType);
}
}
private static final byte[]
TTL_PREFIX_QID = Bry_.new_a7("Q") // NOTE: use uppercase Q for writing html; DATE:2015-06-12
, TTL_PREFIX_PID = Bry_.new_a7("Property:P")
, TTL_PREFIX_LID = Bry_.new_a7("Lexeme:L")
// TOMBSTONE: TTL_PREFIX_QID_OLD = Bry_.new_a7("q") // NOTE: for historical reasons this is standardized as lowercase q not Q; DATE:2015-06-12
;
}

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,9 +13,28 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.wbases.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
import gplx.langs.jsons.*;
import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.*; import gplx.xowa.xtns.wbases.claims.enums.*; import gplx.xowa.xtns.wbases.claims.itms.*;
package gplx.xowa.xtns.wbases.parsers;
import gplx.Byte_;
import gplx.Err_;
import gplx.langs.jsons.Json_itm;
import gplx.langs.jsons.Json_kv;
import gplx.langs.jsons.Json_nde;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_entity_type_;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_type_;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_base;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_entity;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_entity_;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_globecoordinate;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_globecoordinate_;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_monolingualtext;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_monolingualtext_;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_quantity;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_quantity_;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_string;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time_;
public class Wbase_claim_factory {
public Wbase_claim_base Parse(byte[] qid, int pid, byte snak_tid, Json_nde nde, byte value_tid, Json_itm value_itm) {
switch (value_tid) {
@ -30,19 +49,22 @@ public class Wbase_claim_factory {
}
private Wbase_claim_entity Parse_datavalue_entity(byte[] qid, int pid, byte snak_tid, Json_nde nde) {
int len = nde.Len();
byte entity_tid = Byte_.Max_value_127;
byte[] entity_id_bry = null;
byte entityType = Byte_.Max_value_127;
byte[] numericId = null;
byte[] id = null;
for (int i = 0; i < len; ++i) {
Json_kv sub = Json_kv.cast(nde.Get_at(i));
byte tid = Wbase_claim_entity_.Reg.Get_tid_or_max_and_log(qid, sub.Key().Data_bry()); if (tid == Byte_.Max_value_127) continue;
byte[] subValBry = sub.Val().Data_bry();
switch (tid) {
case Wbase_claim_entity_.Tid__entity_type: entity_tid = Wbase_claim_entity_type_.Reg.Get_tid_or_fail(sub.Val().Data_bry()); break;
case Wbase_claim_entity_.Tid__numeric_id: entity_id_bry = sub.Val().Data_bry(); break;
case Wbase_claim_entity_.Tid__id: break; // ignore
case Wbase_claim_entity_.Tid__entity_type: entityType = Wbase_claim_entity_type_.Reg.Get_tid_or_fail(subValBry); break;
case Wbase_claim_entity_.Tid__numeric_id: numericId = subValBry; break;
case Wbase_claim_entity_.Tid__id: id = subValBry; break; // needed for sense and form
}
}
if (entity_id_bry == null) throw Err_.new_wo_type("pid is invalid entity", "pid", pid);
return new Wbase_claim_entity(pid, snak_tid, entity_tid, entity_id_bry);
// TOMBSTONE:senses and forms do not have "numeric-id"; EX:wd:Lexeme:L2 and p6072 has a value of `{"entity-type":"form", "id":"L2-F3"}`; DATE:2020-07-27
// if (numericId == null) throw Err_.new_wo_type("pid is invalid entity", "pid", pid);
return new Wbase_claim_entity(pid, snak_tid, entityType, numericId, id);
}
private Wbase_claim_monolingualtext Parse_datavalue_monolingualtext(byte[] qid, int pid, byte snak_tid, Json_nde nde) {
int len = nde.Len();

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,8 +13,43 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.wbases.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
import gplx.langs.jsons.*; import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.*; import gplx.xowa.xtns.wbases.claims.enums.*; import gplx.xowa.xtns.wbases.claims.itms.*;
package gplx.xowa.xtns.wbases.parsers;
import gplx.Bool_;
import gplx.Bry_;
import gplx.Bry_bfr;
import gplx.Bry_bfr_;
import gplx.Int_;
import gplx.List_adp;
import gplx.List_adp_;
import gplx.Object_;
import gplx.Ordered_hash;
import gplx.String_;
import gplx.Tfds;
import gplx.langs.jsons.Json_ary;
import gplx.langs.jsons.Json_doc;
import gplx.langs.jsons.Json_kv;
import gplx.langs.jsons.Json_nde;
import gplx.langs.jsons.Json_parser;
import gplx.xowa.xtns.wbases.claims.Wbase_claim_grp;
import gplx.xowa.xtns.wbases.claims.Wbase_claim_grp_list;
import gplx.xowa.xtns.wbases.claims.Wbase_references_grp;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_entity_type_;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_type_;
import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_value_type_;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_base;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_entity;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_globecoordinate;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_monolingualtext;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_quantity;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_string;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time_;
import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_value;
import gplx.xowa.xtns.wbases.core.Wdata_alias_itm;
import gplx.xowa.xtns.wbases.core.Wdata_langtext_itm;
import gplx.xowa.xtns.wbases.core.Wdata_sitelink_itm;
abstract class Wdata_doc_parser_fxt_base {
protected Wdata_doc_parser wdoc_parser;
private final Json_parser json_parser = new Json_parser();
@ -29,6 +64,9 @@ abstract class Wdata_doc_parser_fxt_base {
public Wbase_claim_base Make_claim_string (int pid, String val) {return new Wbase_claim_string(pid, Wbase_claim_value_type_.Tid__value, Bry_.new_u8(val));}
public Wbase_claim_base Make_claim_entity_qid (int pid, int eid) {return new Wbase_claim_entity(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_entity_type_.Tid__item, Int_.To_bry(eid));}
public Wbase_claim_base Make_claim_entity_pid (int pid, int eid) {return new Wbase_claim_entity(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_entity_type_.Tid__property, Int_.To_bry(eid));}
public Wbase_claim_base Make_claim_entity_lid (int pid, int eid) {return new Wbase_claim_entity(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_entity_type_.Tid__lexeme, Int_.To_bry(eid));}
public Wbase_claim_base Make_claim_entity_fid (int pid, String id) {return new Wbase_claim_entity(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_entity_type_.Tid__form, null, Bry_.new_u8(id));}
public Wbase_claim_base Make_claim_entity_sid (int pid, String id) {return new Wbase_claim_entity(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_entity_type_.Tid__sense, null, Bry_.new_u8(id));}
public Wbase_claim_base Make_claim_monolingualtext (int pid, String lang, String text) {return new Wbase_claim_monolingualtext(pid, Wbase_claim_value_type_.Tid__value, Bry_.new_u8(lang), Bry_.new_u8(text));}
public Wbase_claim_base Make_claim_globecoordinate (int pid, String lat, String lng, String prc) {return new Wbase_claim_globecoordinate(pid, Wbase_claim_value_type_.Tid__value, Bry_.new_u8(lat), Bry_.new_u8(lng), Object_.Bry__null, Bry_.new_u8(prc), Bry_.new_a7("http://www.wikidata.org/entity/Q2"));}
public Wbase_claim_base Make_claim_quantity (int pid, int val, int unit, int ubound, int lbound) {return new Wbase_claim_quantity(pid, Wbase_claim_value_type_.Tid__value, Bry_.new_u8(Int_.To_str(val)), Bry_.new_u8(Int_.To_str(unit)), Bry_.new_u8(Int_.To_str(ubound)), Bry_.new_u8(Int_.To_str(lbound)));}
@ -38,22 +76,22 @@ abstract class Wdata_doc_parser_fxt_base {
public void Test_entity(String raw, String expd) {Tfds.Eq(expd, String_.new_u8(wdoc_parser.Parse_qid(json_parser.Parse_by_apos(raw))));}
public void Test_sitelinks(String raw, Wdata_sitelink_itm... expd) {
Ordered_hash actl_hash = wdoc_parser.Parse_sitelinks(Q1_bry, json_parser.Parse_by_apos(raw));
Tfds.Eq_ary_str((Wdata_sitelink_itm[])actl_hash.To_ary(Wdata_sitelink_itm.class), expd);
Tfds.Eq_ary_str(actl_hash.To_ary(Wdata_sitelink_itm.class), expd);
}
public void Test_labels(String raw, Wdata_langtext_itm... expd) {Test_langvals(raw, Bool_.Y, expd);}
public void Test_descriptions(String raw, Wdata_langtext_itm... expd) {Test_langvals(raw, Bool_.N, expd);}
private void Test_langvals(String raw, boolean labels_or_descriptions, Wdata_langtext_itm... expd) {
Ordered_hash actl_hash = wdoc_parser.Parse_langvals(Q1_bry, json_parser.Parse_by_apos(raw), labels_or_descriptions);
Tfds.Eq_ary_str((Wdata_langtext_itm[])actl_hash.To_ary(Wdata_langtext_itm.class), expd);
Tfds.Eq_ary_str(actl_hash.To_ary(Wdata_langtext_itm.class), expd);
}
public void Test_aliases(String raw, Wdata_alias_itm... expd) {
Ordered_hash actl_hash = wdoc_parser.Parse_aliases(Q1_bry, json_parser.Parse_by_apos(raw));
Tfds.Eq_ary_str((Wdata_alias_itm[])actl_hash.To_ary(Wdata_alias_itm.class), expd);
Tfds.Eq_ary_str(actl_hash.To_ary(Wdata_alias_itm.class), expd);
}
public void Test_claims(String raw, Wbase_claim_base... expd) {
Ordered_hash actl_hash = wdoc_parser.Parse_claims(Q1_bry, json_parser.Parse_by_apos(raw));
List_adp actl_list = Wbase_claim_grp.Xto_list(actl_hash);
Tfds.Eq_ary_str((Wbase_claim_base[])actl_list.To_ary(Wbase_claim_base.class), expd);
Tfds.Eq_ary_str(actl_list.To_ary(Wbase_claim_base.class), expd);
}
public void Test_claims_data(String raw, Wbase_claim_base expd) {
Json_doc jdoc = json_parser.Parse_by_apos(raw);

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,9 +13,12 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.wbases.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*;
import org.junit.*;
import gplx.langs.jsons.*; import gplx.xowa.xtns.wbases.core.*;
package gplx.xowa.xtns.wbases.parsers;
import gplx.String_;
import org.junit.Before;
import org.junit.Test;
public class Wdata_doc_parser_v2__claims__tst {
@Before public void init() {fxt.Init();} private Wdata_doc_parser_v2_fxt fxt = new Wdata_doc_parser_v2_fxt();
@Test public void Full__string() {
@ -187,5 +190,41 @@ public class Wdata_doc_parser_v2__claims__tst {
, fxt.Make_claim_string(1, "a^2+b^2=c^2")
);
}
// www.wikidata.org/wiki/Q11518
@Test public void ValueLexeme() {// EX:wd:L2 P46028; DATE:2020-07-28
fxt.Test_claims_data(String_.Concat_lines_nl_skip_last
( "{ 'value':"
, " { 'entity-type':'lexeme'"
, " , 'numeric-id':'123'"
, " , 'id':'L123'"
, " }"
, ", 'type':'wikibase-entityid'"
, "}"
)
, fxt.Make_claim_entity_lid(1, 123)
);
}
@Test public void ValueForm() {// EX:wd:L2 L2-F3; DATE:2020-07-28
fxt.Test_claims_data(String_.Concat_lines_nl_skip_last
( "{ 'value':"
, " { 'entity-type':'form'"
, " , 'id':'L2-F3'"
, " }"
, ", 'type':'wikibase-entityid'"
, "}"
)
, fxt.Make_claim_entity_fid(1, "L2-F3")
);
}
@Test public void ValueSense() {// EX:wd:L2 L2-S1; DATE:2020-07-28
fxt.Test_claims_data(String_.Concat_lines_nl_skip_last
( "{ 'value':"
, " { 'entity-type':'sense'"
, " , 'id':'L2-S1'"
, " }"
, ", 'type':'wikibase-entityid'"
, "}"
)
, fxt.Make_claim_entity_sid(1, "L2-S1")
);
}
}

Loading…
Cancel
Save