From 7b6b3ed09a00cca0efef71434e1baad5823f0ead Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Fri, 31 Jul 2020 07:44:00 -0400 Subject: [PATCH] Wikibase: Implement lexeme, form, and sense [#771] --- 100_core/src/gplx/Type_.java | 90 ++-- .../libs/Scrib_lib_wikibase_srl.java | 425 +++++++++-------- .../xtns/wbases/Wdata_prop_val_visitor.java | 367 ++++++++------- .../gplx/xowa/xtns/wbases/Wdata_wiki_mgr.java | 443 ++++++++++-------- .../wbases/claims/Wbase_claim_visitor.java | 32 +- .../enums/Wbase_claim_entity_type_.java | 65 ++- .../claims/enums/Wbase_claim_type_.java | 101 ++-- .../claims/itms/Wbase_claim_entity.java | 112 +++-- .../wbases/parsers/Wbase_claim_factory.java | 226 +++++---- .../parsers/Wdata_doc_parser_fxt_base.java | 210 +++++---- .../Wdata_doc_parser_v2__claims__tst.java | 393 +++++++++------- 11 files changed, 1382 insertions(+), 1082 deletions(-) diff --git a/100_core/src/gplx/Type_.java b/100_core/src/gplx/Type_.java index 47ebce803..589772756 100644 --- a/100_core/src/gplx/Type_.java +++ b/100_core/src/gplx/Type_.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,44 +13,50 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx; -public class Type_ {//RF:2017-10-08 - public static Class Type_by_obj(Object o) {return o.getClass();} - public static Class Type_by_primitive(Object o) { - Class rv = o.getClass(); - if (rv == Integer.class) rv = int.class; - else if (rv == Long.class) rv = long.class; - else if (rv == Byte.class) rv = byte.class; - else if (rv == Short.class) rv = short.class; - return rv; - } - - public static boolean Eq_by_obj(Object lhs_obj, Class rhs_type) { - Class lhs_type = lhs_obj == null ? null : lhs_obj.getClass(); - return Type_.Eq(lhs_type, rhs_type); - } - public static boolean Eq(Class lhs, Class rhs) {// DUPE_FOR_TRACKING: same as Object_.Eq - if (lhs == null && rhs == null) return true; - else if (lhs == null || rhs == null) return false; - else return lhs.equals(rhs); - } - - public static String Canonical_name_by_obj(Object o) {return Canonical_name(o.getClass());} - public static String Canonical_name(Class type) { - return type.getCanonicalName(); - } - - public static String Name_by_obj(Object obj) {return obj == null ? String_.Null_mark : Name(Type_by_obj(obj));} - public static String Name(Class type) { - return type.getName(); - } - - public static boolean Is_array(Class t) { - return t.isArray(); - } - - public static boolean Is_assignable_from_by_obj(Object o, Class generic) {return o == null ? false : Is_assignable_from(generic, o.getClass());} - public static boolean Is_assignable_from(Class generic, Class specific) { - return generic.isAssignableFrom(specific); - } -} +package gplx; + +public class Type_ {//RF:2017-10-08 + public static Class Type_by_obj(Object o) {return o.getClass();} + public static Class Type_by_primitive(Object o) { + Class rv = o.getClass(); + if (rv == Integer.class) rv = int.class; + else if (rv == Long.class) rv = long.class; + else if (rv == Byte.class) rv = byte.class; + else if (rv == Short.class) rv = short.class; + return rv; + } + + public static boolean Eq_by_obj(Object lhs_obj, Class rhs_type) { + Class lhs_type = lhs_obj == null ? null : lhs_obj.getClass(); + return Type_.Eq(lhs_type, rhs_type); + } + public static boolean Eq(Class lhs, Class rhs) {// DUPE_FOR_TRACKING: same as Object_.Eq + if (lhs == null && rhs == null) return true; + else if (lhs == null || rhs == null) return false; + else return lhs.equals(rhs); + } + + public static String Canonical_name_by_obj(Object o) {return Canonical_name(o.getClass());} + public static String Canonical_name(Class type) { + return type.getCanonicalName(); + } + + public static String Name_by_obj(Object obj) {return obj == null ? String_.Null_mark : Name(Type_by_obj(obj));} + public static String Name(Class type) { + return type.getName(); + } + + public static String SimpleName_by_obj(Object obj) {return obj == null ? String_.Null_mark : SimpleName(Type_by_obj(obj));} + public static String SimpleName(Class type) { + return type.getSimpleName(); + } + + public static boolean Is_array(Class t) { + return t.isArray(); + } + + public static boolean Is_assignable_from_by_obj(Object o, Class generic) {return o == null ? false : Is_assignable_from(generic, o.getClass());} + public static boolean Is_assignable_from(Class generic, Class specific) { + return generic.isAssignableFrom(specific); + } +} diff --git a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_wikibase_srl.java b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_wikibase_srl.java index 89fb198e5..9fb778ebd 100644 --- a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_wikibase_srl.java +++ b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_wikibase_srl.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,200 +13,229 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; -import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.*; import gplx.xowa.xtns.wbases.claims.enums.*; import gplx.xowa.xtns.wbases.claims.itms.*; import gplx.xowa.xtns.wbases.parsers.*; import gplx.xowa.xtns.wbases.stores.*; -public class Scrib_lib_wikibase_srl { - public static Keyval[] Srl(Wbase_prop_mgr prop_mgr, Wdata_doc wdoc, boolean header_enabled, boolean legacy_style, byte[] page_url) {// REF.MW:/Wikibase/lib/includes/serializers/EntitySerializer.php!getSerialized; http://www.mediawiki.org/wiki/Extension:Wikibase_Client/Lua - int base_adj = legacy_style ? 0 : 1; - List_adp rv = List_adp_.New(); - if (header_enabled) { - byte[] qid = wdoc.Qid(); - boolean doc_is_qid = Bry_.Has_at_bgn(qid, Byte_ascii.Ltr_q) || Bry_.Has_at_bgn(qid, Byte_ascii.Ltr_Q); - rv.Add(Keyval_.new_("id", qid)); - rv.Add(Keyval_.new_("type", doc_is_qid ? Wbase_claim_entity_type_.Itm__item.Key_str() : Wbase_claim_entity_type_.Itm__property.Key_str())); // type should be "property"; PAGE:ru.w:Викитека:Проект:Викиданные DATE:2016-11-23 - rv.Add(Keyval_.new_("schemaVersion", base_adj + 1)); // NOTE: needed by mw.wikibase.lua - - // for Property pages, add a "datatype" property PAGE:ru.w:Маргарян,_Андраник_Наапетович; wd:Property:P18; DATE:2017-03-27 - if (!doc_is_qid) { - String pid_name = String_.new_u8(Bry_.Mid(qid, Wdata_wiki_mgr.Ns_property_name_bry.length + 1));// +1 for ":" in "Property:" - rv.Add(Keyval_.new_("datatype", prop_mgr.Get_or_null(pid_name, page_url))); - } - } - Srl_root(rv, Wdata_doc_parser_v2.Str_labels , Srl_langtexts (Wdata_dict_langtext.Itm__language.Key_str(), Wdata_dict_langtext.Itm__value.Key_str(), wdoc.Label_list())); - Srl_root(rv, Wdata_doc_parser_v2.Str_descriptions , Srl_langtexts (Wdata_dict_langtext.Itm__language.Key_str(), Wdata_dict_langtext.Itm__value.Key_str(), wdoc.Descr_list())); - Srl_root(rv, Wdata_doc_parser_v2.Str_sitelinks , Srl_sitelinks (Wdata_dict_sitelink.Itm__site.Key_str() , Wdata_dict_sitelink.Itm__title.Key_str(), wdoc.Slink_list(), base_adj)); - Srl_root(rv, Wdata_doc_parser_v2.Str_aliases , Srl_aliases (base_adj, wdoc.Alias_list())); - Srl_root(rv, Wdata_doc_parser_v2.Str_claims , Srl_claims (base_adj, legacy_style, prop_mgr, wdoc.Claim_list(), page_url)); - return (Keyval[])rv.To_ary(Keyval.class); - } - private static void Srl_root(List_adp rv, String label, Keyval[] ary) { - if (ary == null) return; // don't add node if empty; EX: labels:{} should not add "labels" kv - rv.Add(Keyval_.new_(label, ary)); - } - private static Keyval[] Srl_langtexts(String lang_label, String text_label, Ordered_hash list) { - int len = list.Count(); if (len == 0) return null; - Keyval[] rv = new Keyval[len]; - for (int i = 0; i < len; i++) { - Wdata_langtext_itm itm = (Wdata_langtext_itm)list.Get_at(i); - String lang = String_.new_u8(itm.Lang()); - String text = String_.new_u8(itm.Text()); - rv[i] = Keyval_.new_(lang, Keyval_.Ary(Keyval_.new_(lang_label, lang), Keyval_.new_(text_label, text))); - } - return rv; - } - private static Keyval[] Srl_sitelinks(String key_label, String val_label, Ordered_hash list, int base_adj) { - int len = list.Count(); if (len == 0) return null; - Keyval[] rv = new Keyval[len]; - for (int i = 0; i < len; i++) { - Wdata_sitelink_itm itm = (Wdata_sitelink_itm)list.Get_at(i); - String site = String_.new_u8(itm.Site()); - String name = String_.new_u8(itm.Name()); - rv[i] = Keyval_.new_(site, Keyval_.Ary(Keyval_.new_(key_label, site), Keyval_.new_(val_label, name), Srl_sitelinks_badges(itm.Badges(), base_adj))); - } - return rv; - } - private static Keyval Srl_sitelinks_badges(byte[][] badges, int base_adj) { // DATE:2014-11-13 - if (badges == null) badges = Bry_.Ary_empty; // null badges -> badges:[] - int len = badges.length; - Keyval[] kvs = len == 0 ? Keyval_.Ary_empty : new Keyval[len]; - for (int i = 0; i < len; i++) { - byte[] badge = badges[i]; - kvs[i] = Keyval_.int_(i + base_adj, String_.new_u8(badge)); - } - return Keyval_.new_("badges", kvs); - } - private static Keyval[] Srl_aliases(int base_adj, Ordered_hash list) { - int len = list.Count(); if (len == 0) return null; - Keyval[] rv = new Keyval[len]; - for (int i = 0; i < len; i++) { - Wdata_alias_itm itm = (Wdata_alias_itm)list.Get_at(i); - String lang = String_.new_u8(itm.Lang()); - rv[i] = Keyval_.new_(lang, Srl_aliases_langs(base_adj, lang, itm.Vals())); - } - return rv; - } - private static Keyval[] Srl_aliases_langs(int base_adj, String lang, byte[][] ary) { - int len = ary.length; - Keyval[] rv = new Keyval[len]; - for (int i = 0; i < len; i++) { - byte[] itm = ary[i]; - rv[i] = Keyval_.int_(i + base_adj, Keyval_.Ary(Keyval_.new_(Wdata_dict_langtext.Itm__language.Key_str(), lang), Keyval_.new_(Wdata_dict_langtext.Itm__value.Key_str(), String_.new_u8(itm)))); // NOTE: using same base_adj logic as claims - } - return rv; - } - private static Keyval[] Srl_claims(int base_adj, boolean legacy_style, Wbase_prop_mgr prop_mgr, Ordered_hash claim_grps, byte[] page_url) { - int len = claim_grps.Count(); if (len == 0) return null; - Scrib_lib_wikibase_srl_visitor visitor = new Scrib_lib_wikibase_srl_visitor(); - int rv_len = legacy_style ? len * 2 : len; // NOTE: legacyStyle returns 2 sets of properties: official "P" and legacy "p"; DATE:2014-05-11 - Keyval[] rv = new Keyval[rv_len]; - for (int i = 0; i < len; i++) { - Wbase_claim_grp grp = (Wbase_claim_grp)claim_grps.Get_at(i); - String pid_str = Int_.To_str(grp.Id()); - Keyval[] grp_val = Srl_claims_prop_grp(prop_mgr, visitor, "P" + pid_str, grp, base_adj, page_url); - rv[i] = Keyval_.new_("P" + pid_str, grp_val); - if (legacy_style) - rv[i + len] = Keyval_.new_("p" + pid_str, grp_val); // SEE:WikibaseLuaBindings.php; This is a B/C hack to allow existing lua code to use hardcoded IDs in both lower (legacy) and upper case.; DATE:2014-05-11 - } - return rv; - } - private static Keyval[] Srl_claims_prop_grp(Wbase_prop_mgr prop_mgr, Scrib_lib_wikibase_srl_visitor visitor, String pid, Wbase_claim_grp grp, int base_adj, byte[] page_url) { - int len = grp.Len(); - Keyval[] rv = new Keyval[len]; - for (int i = 0; i < len; i++) { - Wbase_claim_base itm = grp.Get_at(i); - rv[i] = Keyval_.int_(i + base_adj, Srl_claims_prop_itm(prop_mgr, visitor, pid, itm, base_adj, page_url)); // NOTE: must be super 0 or super 1; DATE:2014-05-09 - } - return rv; - } - public static Keyval[] Srl_claims_prop_ary(Wbase_prop_mgr prop_mgr, String pid, Wbase_claim_base[] itms, int base_adj, byte[] page_url) { - Scrib_lib_wikibase_srl_visitor visitor = new Scrib_lib_wikibase_srl_visitor(); - int len = itms.length; - Keyval[] rv = new Keyval[len]; - for (int i = 0; i < len; i++) { - Wbase_claim_base itm = itms[i]; - rv[i] = Keyval_.int_(i + base_adj, Srl_claims_prop_itm(prop_mgr, visitor, pid, itm, base_adj, page_url)); // NOTE: must be super 0 or super 1; DATE:2014-05-09 - } - return rv; - } - private static Keyval[] Srl_claims_prop_itm(Wbase_prop_mgr prop_mgr, Scrib_lib_wikibase_srl_visitor visitor, String pid, Wbase_claim_base itm, int base_adj, byte[] page_url) { - List_adp list = List_adp_.New(); - list.Add(Keyval_.new_("id", pid)); - list.Add(Keyval_.new_("mainsnak", Srl_claims_prop_itm_core(prop_mgr, visitor, pid, itm, page_url))); - list.Add(Keyval_.new_(Wdata_dict_claim_v1.Str_rank, Wbase_claim_rank_.Reg.Get_str_or_fail(itm.Rank_tid()))); - list.Add(Keyval_.new_("type", itm.Prop_type())); - Srl_root(list, Wdata_dict_claim.Itm__qualifiers.Key_str(), Srl_qualifiers(prop_mgr, visitor, itm.Qualifiers(), base_adj, page_url)); - Srl_root(list, Wdata_dict_claim.Itm__qualifiers_order.Key_str(), Srl_qualifiers_order(prop_mgr, visitor, itm.Qualifiers_order(), base_adj, page_url)); - Srl_root(list, Wdata_dict_claim.Itm__references.Key_str(), Srl_references(prop_mgr, visitor, itm.References(), base_adj, page_url)); - return (Keyval[])list.To_ary_and_clear(Keyval.class); - } - private static Keyval[] Srl_qualifiers(Wbase_prop_mgr prop_mgr, Scrib_lib_wikibase_srl_visitor visitor, Wbase_claim_grp_list list, int base_adj, byte[] page_url) { - if (list == null) return null; - int list_len = list.Len(); if (list_len == 0) return Keyval_.Ary_empty; - List_adp rv = List_adp_.New(); - List_adp pid_list = List_adp_.New(); - for (int i = 0; i < list_len; ++i) { - Wbase_claim_grp grp = list.Get_at(i); - int grp_len = grp.Len(); - pid_list.Clear(); - String itm_pid = grp.Id_str(); - for (int j = 0; j < grp_len; ++j) { - Wbase_claim_base itm = grp.Get_at(j); - pid_list.Add(Keyval_.int_(j + base_adj, Srl_claims_prop_itm_core(prop_mgr, visitor, itm_pid, itm, page_url))); // NOTE: was originally "+ 1"; changed to base_adj; PAGE:ru.w:Tor ru.w:Кактусовые DATE:2014-10-25 - } - rv.Add(Keyval_.new_(itm_pid, (Keyval[])pid_list.To_ary_and_clear(Keyval.class))); - } - return (Keyval[])rv.To_ary_and_clear(Keyval.class); - } - private static Keyval[] Srl_qualifiers_order(Wbase_prop_mgr prop_mgr, Scrib_lib_wikibase_srl_visitor visitor, int[] list, int base_adj, byte[] page_url) { - if (list == null) return null; - int list_len = list.length; if (list_len == 0) return Keyval_.Ary_empty; - List_adp rv = List_adp_.New(); - for (int i = 0; i < list_len; ++i) { - String itm_pid = "P" + Int_.To_str(list[i]); - rv.Add(Keyval_.int_(i + base_adj, itm_pid)); - } - return (Keyval[])rv.To_ary_and_clear(Keyval.class); - } - private static Keyval[] Srl_references(Wbase_prop_mgr prop_mgr, Scrib_lib_wikibase_srl_visitor visitor, Wbase_references_grp[] list, int base_adj, byte[] page_url) { - if (list == null) return null; - int list_len = list.length; if (list_len == 0) return Keyval_.Ary_empty; - List_adp rv = List_adp_.New(); - for (int i = 0; i < list_len; ++i) { - Wbase_references_grp references_grp = list[i]; - Keyval[] references_kvs = new Keyval[3]; - references_kvs[0] = Keyval_.new_("hash", references_grp.Hash()); - references_kvs[1] = Keyval_.new_("snaks", Srl_qualifiers(prop_mgr, visitor, references_grp.Snaks(), base_adj, page_url)); - references_kvs[2] = Keyval_.new_("snaks-order", Srl_qualifiers_order(prop_mgr, visitor, references_grp.Snaks_order(), base_adj, page_url)); - rv.Add(Keyval_.int_(i + base_adj, references_kvs)); - } - return (Keyval[])rv.To_ary_and_clear(Keyval.class); - } - private static Keyval[] Srl_claims_prop_itm_core(Wbase_prop_mgr prop_mgr, Scrib_lib_wikibase_srl_visitor visitor, String pid, Wbase_claim_base itm, byte[] page_url) { - boolean snak_is_valued = itm.Snak_tid() == Wbase_claim_value_type_.Tid__value; // PURPOSE: was != Wbase_claim_value_type_.Tid__novalue; PAGE:it.s:Autore:Anonimo DATE:2015-12-06 - int snak_is_valued_adj = snak_is_valued ? 1 : 0; - Keyval[] rv = new Keyval[3 + snak_is_valued_adj]; - if (snak_is_valued) // NOTE: novalue must not return slot (no datavalue node in json); PAGE:ru.w:Лимонов,_Эдуард_Вениаминович; DATE:2015-02-16; ALSO: sv.w:Joseph_Jaquet; DATE:2015-07-31 - rv[0] = Keyval_.new_("datavalue", Srl_claims_prop_itm_core_val(visitor, itm)); - rv[0 + snak_is_valued_adj] = Keyval_.new_("property", pid); - rv[1 + snak_is_valued_adj] = Keyval_.new_("snaktype", Wbase_claim_value_type_.Reg.Get_str_or_fail(itm.Snak_tid())); - - // get prop datatype; NOTE: datatype needed for Modules; PAGE:eo.w:WikidataKoord; DATE:2015-11-08 - String datatype = prop_mgr.Get_or_null(pid, page_url); - if (datatype == null) // if null, fallback to value based on tid; needed for (a) tests and (b) old wbase dbs that don't have wbase_prop tbl; DATE:2016-12-01 - datatype = Wbase_claim_type_.Get_scrib_or_unknown(itm.Val_tid()); - rv[2 + snak_is_valued_adj] = Keyval_.new_("datatype", datatype); - return rv; - } - private static Keyval[] Srl_claims_prop_itm_core_val(Scrib_lib_wikibase_srl_visitor visitor, Wbase_claim_base itm) { - switch (itm.Snak_tid()) { - case Wbase_claim_value_type_.Tid__somevalue: return Datavalue_somevalue; - case Wbase_claim_value_type_.Tid__novalue: return Datavalue_novalue; // TODO_OLD: throw exc - default: - itm.Welcome(visitor); - return visitor.Rv(); - } - } - public static final String Key_type = "type", Key_value = "value"; - private static final Keyval[] Datavalue_somevalue = Keyval_.Ary_empty; // changed to not return value-node; PAGE:it.s:Autore:Anonimo DATE:2015-12-06 // new Keyval[] {Keyval_.new_(Key_type, ""), Keyval_.new_(Key_value, "")}; // NOTE: must return ""; null fails; EX:w:Joseph-François_Malgaigne; DATE:2014-04-07 - private static final Keyval[] Datavalue_novalue = Keyval_.Ary_empty; -} +package gplx.xowa.xtns.scribunto.libs; + +import gplx.Bry_; +import gplx.Int_; +import gplx.Keyval; +import gplx.Keyval_; +import gplx.List_adp; +import gplx.List_adp_; +import gplx.Ordered_hash; +import gplx.String_; +import gplx.xowa.xtns.wbases.Wdata_doc; +import gplx.xowa.xtns.wbases.Wdata_wiki_mgr; +import gplx.xowa.xtns.wbases.claims.Wbase_claim_grp; +import gplx.xowa.xtns.wbases.claims.Wbase_claim_grp_list; +import gplx.xowa.xtns.wbases.claims.Wbase_references_grp; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_entity_type_; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_rank_; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_type_; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_value_type_; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_enum_itm; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_base; +import gplx.xowa.xtns.wbases.core.Wdata_alias_itm; +import gplx.xowa.xtns.wbases.core.Wdata_dict_claim; +import gplx.xowa.xtns.wbases.core.Wdata_dict_claim_v1; +import gplx.xowa.xtns.wbases.core.Wdata_dict_langtext; +import gplx.xowa.xtns.wbases.core.Wdata_dict_sitelink; +import gplx.xowa.xtns.wbases.core.Wdata_langtext_itm; +import gplx.xowa.xtns.wbases.core.Wdata_sitelink_itm; +import gplx.xowa.xtns.wbases.parsers.Wdata_doc_parser_v2; +import gplx.xowa.xtns.wbases.stores.Wbase_prop_mgr; + +public class Scrib_lib_wikibase_srl { + public static Keyval[] Srl(Wbase_prop_mgr prop_mgr, Wdata_doc wdoc, boolean header_enabled, boolean legacy_style, byte[] page_url) {// REF.MW:/Wikibase/lib/includes/serializers/EntitySerializer.php!getSerialized; http://www.mediawiki.org/wiki/Extension:Wikibase_Client/Lua + int base_adj = legacy_style ? 0 : 1; + List_adp rv = List_adp_.New(); + if (header_enabled) { + byte[] qid = wdoc.Qid(); + Wbase_enum_itm entity_itm = Wbase_claim_entity_type_.ToTid(qid); + rv.Add(Keyval_.new_("id", qid)); + rv.Add(Keyval_.new_("type", entity_itm.Key_str())); // type should be "property"; PAGE:ru.w:Викитека:Проект:Викиданные DATE:2016-11-23 + rv.Add(Keyval_.new_("schemaVersion", base_adj + 1)); // NOTE: needed by mw.wikibase.lua + + // for Property pages, add a "datatype" property PAGE:ru.w:Маргарян,_Андраник_Наапетович; wd:Property:P18; DATE:2017-03-27 + if (entity_itm == Wbase_claim_entity_type_.Itm__property) { + String pid_name = String_.new_u8(Bry_.Mid(qid, Wdata_wiki_mgr.Ns_property_name_bry.length + 1));// +1 for ":" in "Property:" + rv.Add(Keyval_.new_("datatype", prop_mgr.Get_or_null(pid_name, page_url))); + } + } + Srl_root(rv, Wdata_doc_parser_v2.Str_labels , Srl_langtexts (Wdata_dict_langtext.Itm__language.Key_str(), Wdata_dict_langtext.Itm__value.Key_str(), wdoc.Label_list())); + Srl_root(rv, Wdata_doc_parser_v2.Str_descriptions , Srl_langtexts (Wdata_dict_langtext.Itm__language.Key_str(), Wdata_dict_langtext.Itm__value.Key_str(), wdoc.Descr_list())); + Srl_root(rv, Wdata_doc_parser_v2.Str_sitelinks , Srl_sitelinks (Wdata_dict_sitelink.Itm__site.Key_str() , Wdata_dict_sitelink.Itm__title.Key_str(), wdoc.Slink_list(), base_adj)); + Srl_root(rv, Wdata_doc_parser_v2.Str_aliases , Srl_aliases (base_adj, wdoc.Alias_list())); + Srl_root(rv, Wdata_doc_parser_v2.Str_claims , Srl_claims (base_adj, legacy_style, prop_mgr, wdoc.Claim_list(), page_url)); + return (Keyval[])rv.To_ary(Keyval.class); + } + private static void Srl_root(List_adp rv, String label, Keyval[] ary) { + if (ary == null) return; // don't add node if empty; EX: labels:{} should not add "labels" kv + rv.Add(Keyval_.new_(label, ary)); + } + private static Keyval[] Srl_langtexts(String lang_label, String text_label, Ordered_hash list) { + int len = list.Count(); if (len == 0) return null; + Keyval[] rv = new Keyval[len]; + for (int i = 0; i < len; i++) { + Wdata_langtext_itm itm = (Wdata_langtext_itm)list.Get_at(i); + String lang = String_.new_u8(itm.Lang()); + String text = String_.new_u8(itm.Text()); + rv[i] = Keyval_.new_(lang, Keyval_.Ary(Keyval_.new_(lang_label, lang), Keyval_.new_(text_label, text))); + } + return rv; + } + private static Keyval[] Srl_sitelinks(String key_label, String val_label, Ordered_hash list, int base_adj) { + int len = list.Count(); if (len == 0) return null; + Keyval[] rv = new Keyval[len]; + for (int i = 0; i < len; i++) { + Wdata_sitelink_itm itm = (Wdata_sitelink_itm)list.Get_at(i); + String site = String_.new_u8(itm.Site()); + String name = String_.new_u8(itm.Name()); + rv[i] = Keyval_.new_(site, Keyval_.Ary(Keyval_.new_(key_label, site), Keyval_.new_(val_label, name), Srl_sitelinks_badges(itm.Badges(), base_adj))); + } + return rv; + } + private static Keyval Srl_sitelinks_badges(byte[][] badges, int base_adj) { // DATE:2014-11-13 + if (badges == null) badges = Bry_.Ary_empty; // null badges -> badges:[] + int len = badges.length; + Keyval[] kvs = len == 0 ? Keyval_.Ary_empty : new Keyval[len]; + for (int i = 0; i < len; i++) { + byte[] badge = badges[i]; + kvs[i] = Keyval_.int_(i + base_adj, String_.new_u8(badge)); + } + return Keyval_.new_("badges", kvs); + } + private static Keyval[] Srl_aliases(int base_adj, Ordered_hash list) { + int len = list.Count(); if (len == 0) return null; + Keyval[] rv = new Keyval[len]; + for (int i = 0; i < len; i++) { + Wdata_alias_itm itm = (Wdata_alias_itm)list.Get_at(i); + String lang = String_.new_u8(itm.Lang()); + rv[i] = Keyval_.new_(lang, Srl_aliases_langs(base_adj, lang, itm.Vals())); + } + return rv; + } + private static Keyval[] Srl_aliases_langs(int base_adj, String lang, byte[][] ary) { + int len = ary.length; + Keyval[] rv = new Keyval[len]; + for (int i = 0; i < len; i++) { + byte[] itm = ary[i]; + rv[i] = Keyval_.int_(i + base_adj, Keyval_.Ary(Keyval_.new_(Wdata_dict_langtext.Itm__language.Key_str(), lang), Keyval_.new_(Wdata_dict_langtext.Itm__value.Key_str(), String_.new_u8(itm)))); // NOTE: using same base_adj logic as claims + } + return rv; + } + private static Keyval[] Srl_claims(int base_adj, boolean legacy_style, Wbase_prop_mgr prop_mgr, Ordered_hash claim_grps, byte[] page_url) { + int len = claim_grps.Count(); if (len == 0) return null; + Scrib_lib_wikibase_srl_visitor visitor = new Scrib_lib_wikibase_srl_visitor(); + int rv_len = legacy_style ? len * 2 : len; // NOTE: legacyStyle returns 2 sets of properties: official "P" and legacy "p"; DATE:2014-05-11 + Keyval[] rv = new Keyval[rv_len]; + for (int i = 0; i < len; i++) { + Wbase_claim_grp grp = (Wbase_claim_grp)claim_grps.Get_at(i); + String pid_str = Int_.To_str(grp.Id()); + Keyval[] grp_val = Srl_claims_prop_grp(prop_mgr, visitor, "P" + pid_str, grp, base_adj, page_url); + rv[i] = Keyval_.new_("P" + pid_str, grp_val); + if (legacy_style) + rv[i + len] = Keyval_.new_("p" + pid_str, grp_val); // SEE:WikibaseLuaBindings.php; This is a B/C hack to allow existing lua code to use hardcoded IDs in both lower (legacy) and upper case.; DATE:2014-05-11 + } + return rv; + } + private static Keyval[] Srl_claims_prop_grp(Wbase_prop_mgr prop_mgr, Scrib_lib_wikibase_srl_visitor visitor, String pid, Wbase_claim_grp grp, int base_adj, byte[] page_url) { + int len = grp.Len(); + Keyval[] rv = new Keyval[len]; + for (int i = 0; i < len; i++) { + Wbase_claim_base itm = grp.Get_at(i); + rv[i] = Keyval_.int_(i + base_adj, Srl_claims_prop_itm(prop_mgr, visitor, pid, itm, base_adj, page_url)); // NOTE: must be super 0 or super 1; DATE:2014-05-09 + } + return rv; + } + public static Keyval[] Srl_claims_prop_ary(Wbase_prop_mgr prop_mgr, String pid, Wbase_claim_base[] itms, int base_adj, byte[] page_url) { + Scrib_lib_wikibase_srl_visitor visitor = new Scrib_lib_wikibase_srl_visitor(); + int len = itms.length; + Keyval[] rv = new Keyval[len]; + for (int i = 0; i < len; i++) { + Wbase_claim_base itm = itms[i]; + rv[i] = Keyval_.int_(i + base_adj, Srl_claims_prop_itm(prop_mgr, visitor, pid, itm, base_adj, page_url)); // NOTE: must be super 0 or super 1; DATE:2014-05-09 + } + return rv; + } + private static Keyval[] Srl_claims_prop_itm(Wbase_prop_mgr prop_mgr, Scrib_lib_wikibase_srl_visitor visitor, String pid, Wbase_claim_base itm, int base_adj, byte[] page_url) { + List_adp list = List_adp_.New(); + list.Add(Keyval_.new_("id", pid)); + list.Add(Keyval_.new_("mainsnak", Srl_claims_prop_itm_core(prop_mgr, visitor, pid, itm, page_url))); + list.Add(Keyval_.new_(Wdata_dict_claim_v1.Str_rank, Wbase_claim_rank_.Reg.Get_str_or_fail(itm.Rank_tid()))); + list.Add(Keyval_.new_("type", itm.Prop_type())); + Srl_root(list, Wdata_dict_claim.Itm__qualifiers.Key_str(), Srl_qualifiers(prop_mgr, visitor, itm.Qualifiers(), base_adj, page_url)); + Srl_root(list, Wdata_dict_claim.Itm__qualifiers_order.Key_str(), Srl_qualifiers_order(prop_mgr, visitor, itm.Qualifiers_order(), base_adj, page_url)); + Srl_root(list, Wdata_dict_claim.Itm__references.Key_str(), Srl_references(prop_mgr, visitor, itm.References(), base_adj, page_url)); + return (Keyval[])list.To_ary_and_clear(Keyval.class); + } + private static Keyval[] Srl_qualifiers(Wbase_prop_mgr prop_mgr, Scrib_lib_wikibase_srl_visitor visitor, Wbase_claim_grp_list list, int base_adj, byte[] page_url) { + if (list == null) return null; + int list_len = list.Len(); if (list_len == 0) return Keyval_.Ary_empty; + List_adp rv = List_adp_.New(); + List_adp pid_list = List_adp_.New(); + for (int i = 0; i < list_len; ++i) { + Wbase_claim_grp grp = list.Get_at(i); + int grp_len = grp.Len(); + pid_list.Clear(); + String itm_pid = grp.Id_str(); + for (int j = 0; j < grp_len; ++j) { + Wbase_claim_base itm = grp.Get_at(j); + pid_list.Add(Keyval_.int_(j + base_adj, Srl_claims_prop_itm_core(prop_mgr, visitor, itm_pid, itm, page_url))); // NOTE: was originally "+ 1"; changed to base_adj; PAGE:ru.w:Tor ru.w:Кактусовые DATE:2014-10-25 + } + rv.Add(Keyval_.new_(itm_pid, pid_list.To_ary_and_clear(Keyval.class))); + } + return (Keyval[])rv.To_ary_and_clear(Keyval.class); + } + private static Keyval[] Srl_qualifiers_order(Wbase_prop_mgr prop_mgr, Scrib_lib_wikibase_srl_visitor visitor, int[] list, int base_adj, byte[] page_url) { + if (list == null) return null; + int list_len = list.length; if (list_len == 0) return Keyval_.Ary_empty; + List_adp rv = List_adp_.New(); + for (int i = 0; i < list_len; ++i) { + String itm_pid = "P" + Int_.To_str(list[i]); + rv.Add(Keyval_.int_(i + base_adj, itm_pid)); + } + return (Keyval[])rv.To_ary_and_clear(Keyval.class); + } + private static Keyval[] Srl_references(Wbase_prop_mgr prop_mgr, Scrib_lib_wikibase_srl_visitor visitor, Wbase_references_grp[] list, int base_adj, byte[] page_url) { + if (list == null) return null; + int list_len = list.length; if (list_len == 0) return Keyval_.Ary_empty; + List_adp rv = List_adp_.New(); + for (int i = 0; i < list_len; ++i) { + Wbase_references_grp references_grp = list[i]; + Keyval[] references_kvs = new Keyval[3]; + references_kvs[0] = Keyval_.new_("hash", references_grp.Hash()); + references_kvs[1] = Keyval_.new_("snaks", Srl_qualifiers(prop_mgr, visitor, references_grp.Snaks(), base_adj, page_url)); + references_kvs[2] = Keyval_.new_("snaks-order", Srl_qualifiers_order(prop_mgr, visitor, references_grp.Snaks_order(), base_adj, page_url)); + rv.Add(Keyval_.int_(i + base_adj, references_kvs)); + } + return (Keyval[])rv.To_ary_and_clear(Keyval.class); + } + private static Keyval[] Srl_claims_prop_itm_core(Wbase_prop_mgr prop_mgr, Scrib_lib_wikibase_srl_visitor visitor, String pid, Wbase_claim_base itm, byte[] page_url) { + boolean snak_is_valued = itm.Snak_tid() == Wbase_claim_value_type_.Tid__value; // PURPOSE: was != Wbase_claim_value_type_.Tid__novalue; PAGE:it.s:Autore:Anonimo DATE:2015-12-06 + int snak_is_valued_adj = snak_is_valued ? 1 : 0; + Keyval[] rv = new Keyval[3 + snak_is_valued_adj]; + if (snak_is_valued) // NOTE: novalue must not return slot (no datavalue node in json); PAGE:ru.w:Лимонов,_Эдуард_Вениаминович; DATE:2015-02-16; ALSO: sv.w:Joseph_Jaquet; DATE:2015-07-31 + rv[0] = Keyval_.new_("datavalue", Srl_claims_prop_itm_core_val(visitor, itm)); + rv[0 + snak_is_valued_adj] = Keyval_.new_("property", pid); + rv[1 + snak_is_valued_adj] = Keyval_.new_("snaktype", Wbase_claim_value_type_.Reg.Get_str_or_fail(itm.Snak_tid())); + + // get prop datatype; NOTE: datatype needed for Modules; PAGE:eo.w:WikidataKoord; DATE:2015-11-08 + String datatype = prop_mgr.Get_or_null(pid, page_url); + if (datatype == null) // if null, fallback to value based on tid; needed for (a) tests and (b) old wbase dbs that don't have wbase_prop tbl; DATE:2016-12-01 + datatype = Wbase_claim_type_.Get_scrib_or_unknown(itm.Val_tid()); + rv[2 + snak_is_valued_adj] = Keyval_.new_("datatype", datatype); + return rv; + } + private static Keyval[] Srl_claims_prop_itm_core_val(Scrib_lib_wikibase_srl_visitor visitor, Wbase_claim_base itm) { + switch (itm.Snak_tid()) { + case Wbase_claim_value_type_.Tid__somevalue: return Datavalue_somevalue; + case Wbase_claim_value_type_.Tid__novalue: return Datavalue_novalue; // TODO_OLD: throw exc + default: + itm.Welcome(visitor); + return visitor.Rv(); + } + } + public static final String Key_type = "type", Key_value = "value"; + private static final Keyval[] Datavalue_somevalue = Keyval_.Ary_empty; // changed to not return value-node; PAGE:it.s:Autore:Anonimo DATE:2015-12-06 // new Keyval[] {Keyval_.new_(Key_type, ""), Keyval_.new_(Key_value, "")}; // NOTE: must return ""; null fails; EX:w:Joseph-François_Malgaigne; DATE:2014-04-07 + private static final Keyval[] Datavalue_novalue = Keyval_.Ary_empty; +} diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_prop_val_visitor.java b/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_prop_val_visitor.java index 87424820e..b015064a2 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_prop_val_visitor.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_prop_val_visitor.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,170 +13,201 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; -import gplx.core.brys.fmtrs.*; -import gplx.xowa.langs.*; -import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.*; import gplx.xowa.xtns.wbases.claims.itms.*; import gplx.xowa.xtns.wbases.hwtrs.*; import gplx.xowa.xtns.wbases.claims.itms.times.*; -public class Wdata_prop_val_visitor implements Wbase_claim_visitor { // THREAD.UNSAFE; callers must do synchronized - private Wdata_wiki_mgr wdata_mgr; private Xoae_app app; private Bry_bfr bfr; - private Xol_lang_itm lang; - private final Bry_bfr tmp_time_bfr = Bry_bfr_.Reset(255); private final Bry_fmtr tmp_time_fmtr = Bry_fmtr.new_(); - private Wdata_hwtr_msgs msgs; - private boolean mode_is_statements; - public Wdata_prop_val_visitor(Xoae_app app, Wdata_wiki_mgr wdata_mgr) { - this.app = app; this.wdata_mgr = wdata_mgr; - } - public void Init(Bry_bfr bfr, Wdata_hwtr_msgs msgs, byte[] lang_key, boolean mode_is_statements) { - // init some member variables; - this.bfr = bfr; this.msgs = msgs; - this.lang = app.Lang_mgr().Get_by_or_null(lang_key); - if (lang == null) lang = app.Lang_mgr().Lang_en(); // TEST: needed for one test; DATE:2016-10-20 - this.mode_is_statements = mode_is_statements; - } - public void Visit_str(Wbase_claim_string itm) {Write_str(bfr, itm.Val_bry());} - public static void Write_str(Bry_bfr bfr, byte[] bry) {bfr.Add(bry);} - public void Visit_time(Wbase_claim_time itm) { - Write_time(bfr, tmp_time_bfr, tmp_time_fmtr, msgs, Bry_.Empty, -1, itm.Time_as_date()); // for now, don't bother passing ttl; only used for error msg; DATE:2015-08-03 - } - public static void Write_time(Bry_bfr bfr, Bry_bfr tmp_bfr, Bry_fmtr tmp_fmtr, Wdata_hwtr_msgs msgs, byte[] page_url, int pid, Wbase_date date) { - try { - Wbase_date_.To_bfr(bfr, tmp_fmtr, tmp_bfr, msgs, date); - if (date.Calendar_is_julian()) bfr.Add_byte_space().Add(msgs.Time_julian()); - } catch (Exception e) { - Gfo_usr_dlg_.Instance.Warn_many("", "", "failed to write time; ttl=~{0} pid=~{1} err=~{2}", page_url, pid, Err_.Message_gplx_log(e)); - } - } - public void Visit_monolingualtext(Wbase_claim_monolingualtext itm) {Write_langtext(bfr, itm.Text());} - public static void Write_langtext(Bry_bfr bfr, byte[] text) {bfr.Add(text);} // phrase only; PAGE:en.w:Alberta; EX: {{#property:motto}} -> "Fortis et libre"; DATE:2014-08-28 - public void Visit_entity(Wbase_claim_entity itm) {Write_entity(bfr, wdata_mgr, lang.Key_bry(), itm.Page_ttl_db(), mode_is_statements);} - public static void Write_entity(Bry_bfr bfr, Wdata_wiki_mgr wdata_mgr, byte[] lang_key, byte[] entity_ttl_db, boolean mode_is_statements) { - // get entity - Wdata_doc entity_doc = wdata_mgr.Doc_mgr.Get_by_xid_or_null(entity_ttl_db); - - // NOTE: wiki may refer to entity that no longer exists; EX: {{#property:p1}} which links to Q1, but p1 links to Q2 and Q2 was deleted; DATE:2014-02-01 - if (entity_doc == null) - return; - - // get label - byte[] label = entity_doc.Get_label_bry_or_null(lang_key); - - // NOTE: some properties may not exist in language of wiki; default to english; DATE:2013-12-19 - if (label == null && !Bry_.Eq(lang_key, Xol_lang_itm_.Key_en)) - label = entity_doc.Get_label_bry_or_null(Xol_lang_itm_.Key_en); - - // if label is still not found, don't add null reference - if (label != null) { - // if statements, add "[[entity_val]]"; DATE:2017-04-04 - if (mode_is_statements) { - bfr.Add(gplx.xowa.parsers.tmpls.Xop_tkn_.Lnki_bgn); - bfr.Add(label); - bfr.Add(gplx.xowa.parsers.tmpls.Xop_tkn_.Lnki_end); - } - // else, just add "entity_val" - else - bfr.Add(label); - } - } - public void Visit_quantity(Wbase_claim_quantity itm) {Write_quantity(bfr, wdata_mgr, lang, itm.Amount(), itm.Lbound(), itm.Ubound(), itm.Unit());} - public static void Write_quantity(Bry_bfr bfr, Wdata_wiki_mgr wdata_mgr, Xol_lang_itm lang, byte[] val_bry, byte[] lo_bry, byte[] hi_bry, byte[] unit) { - // get val, lo, hi; NOTE: must handle large numbers; EX:{{#property:P1082}} PAGE:en.w:Earth; DATE:2015-08-02; NOTE: must handle decimals; PAGE:en.w:Malinao,_Aklan; DATE:2016-11-08 - Decimal_adp val = Decimal__parse_or(val_bry, null); if (val == null) throw Err_.new_wo_type("wbase:quanity val can not be null"); - Decimal_adp lo = Decimal__parse_or(lo_bry, val); - Decimal_adp hi = Decimal__parse_or(hi_bry, val); - - // fmt val - if (lo.Eq(hi) && hi.Eq(val))// lo, hi, val are same; print val only; - bfr.Add(lang.Num_mgr().Format_num_by_decimal(val)); // amount; EX: 1,234 - else { - Wdata_hwtr_msgs msgs = wdata_mgr.Hwtr_mgr().Msgs(); - Decimal_adp lo_dif = val.Subtract(lo); - Decimal_adp hi_dif = hi.Subtract(val); - if (lo_dif.Eq(hi_dif)) { // lo_dif, hi_dif are same; print val±dif - bfr.Add(lang.Num_mgr().Format_num_by_decimal(val)); // amount; EX: 1,234 - bfr.Add(msgs.Sym_plusminus()); // symbol: EX: ± - bfr.Add(lang.Num_mgr().Format_num_by_decimal(lo_dif)); // amount; EX: 4 - } - else { // lo_dif, hi_dif are diff; print lo - hi; this may not be what MW does - bfr.Add(lang.Num_mgr().Format_num_by_decimal(lo)); // lo; EX: 1,230 - bfr.Add_byte(Byte_ascii.Dash); // dash: EX: - - bfr.Add(lang.Num_mgr().Format_num_by_decimal(hi)); // hi; EX: 1,238 - } - } - - // output unit - int unit_qid_bgn = unit == null ? Bry_find_.Not_found : Bry_find_.Find_fwd(unit, Wikidata_url); - if (unit_qid_bgn == Bry_find_.Not_found) {} // entity missing; output nothing; EX:"unit":"1"; PAGE:en.w:Malinao,_Aklan DATE:2016-11-08 - else { // entity exists; EX:"http://www.wikidata.org/entity/Q11573" (meter) - bfr.Add_byte_space(); - byte[] xid = Bry_.Mid(unit, Wikidata_url.length); - Wdata_doc entity_doc = wdata_mgr.Doc_mgr.Get_by_xid_or_null(xid); - if (entity_doc != null) { - Wdata_langtext_itm label = entity_doc.Get_label_itm_or_null(lang); - if (label != null) - bfr.Add(label.Text()); - } - } - } - private static Decimal_adp Decimal__parse_or(byte[] bry, Decimal_adp or) { // handle missing lbound / ubound; DATE:2016-12-03 - return bry == null ? or : Decimal_adp_.parse(String_.new_u8(Normalize_for_decimal(bry))); - } - public static byte[] Normalize_for_decimal(byte[] bry) { // remove leading "+" and any commas; was Bry_.To_long_or(val_bry, Byte_ascii.Comma_bry, 0, val_bry.length, 0) - if (bry == null) return null; - Bry_bfr bfr = null; - int len = bry.length; - for (int i = 0; i < len; i++) { - byte b = bry[i]; - switch (b) { - case Byte_ascii.Plus: - if (i == 0) { - if (bfr == null) bfr = Bry_bfr_.New(); - } - else { - throw Err_.new_wo_type("invalid decimal format; plus must be at start of String", "raw", bry); - } - break; - case Byte_ascii.Comma: - if (bfr == null) { - bfr = Bry_bfr_.New(); - bfr.Add_mid(bry, 0, i); - } - break; - default: - if (bfr != null) - bfr.Add_byte(b); - break; - } - } - return bfr == null ? bry : bfr.To_bry_and_clear(); - } - public void Visit_globecoordinate(Wbase_claim_globecoordinate itm) {Write_geo(Bool_.N, bfr, wdata_mgr.Hwtr_mgr().Lbl_mgr(), msgs, itm.Lat(), itm.Lng(), itm.Alt(), itm.Prc(), itm.Glb());} - public static void Write_geo(boolean wikidata_page, Bry_bfr bfr, Wdata_lbl_mgr lbl_mgr, Wdata_hwtr_msgs msgs, byte[] lat, byte[] lng, byte[] alt, byte[] prc, byte[] glb) { - // get precision - int precision_int = -1; - if (Bry_.Eq(prc, Object_.Bry__null) || Bry_.Eq(prc, Byte_ascii.Num_0_bry)) // "null" or "0" should be 1; PAGE:ru.w:Лысково_(Калужская_область) DATE:2016-11-24 - precision_int = 1; - else { - Decimal_adp precision_frac = Decimal_adp_.parse(String_.new_a7(prc)); - precision_int = Math_.Log10(Decimal_adp_.One.Divide(precision_frac).To_int()); // convert precision to log10 integer; EX: .00027777 -> 3600 -> 3 - } - - // build String - gplx.xowa.xtns.mapSources.Map_dd2dms_func.Deg_to_dms(bfr, Bool_.Y, Bool_.N, lat, precision_int); - bfr.Add_byte_comma().Add_byte_space(); - gplx.xowa.xtns.mapSources.Map_dd2dms_func.Deg_to_dms(bfr, Bool_.Y, Bool_.Y, lng, precision_int); - - // write globe if any - if (wikidata_page) { - byte[] glb_ttl = Wdata_lbl_itm.Extract_ttl(glb); - if (glb_ttl != null) { - byte[] glb_lbl = lbl_mgr.Get_text__ttl(glb_ttl, glb); - bfr.Add_byte_space().Add_byte(Byte_ascii.Paren_bgn); - Wdata_hwtr_mgr.Write_link_wikidata(bfr, glb_ttl, glb_lbl); - bfr.Add_byte(Byte_ascii.Paren_end); - } - } - } - - private static final byte[] Wikidata_url = Bry_.new_a7("http://www.wikidata.org/entity/"); - public void Visit_system(Wbase_claim_value itm) {} -} +package gplx.xowa.xtns.wbases; + +import gplx.Bool_; +import gplx.Bry_; +import gplx.Bry_bfr; +import gplx.Bry_bfr_; +import gplx.Bry_find_; +import gplx.Byte_ascii; +import gplx.Decimal_adp; +import gplx.Decimal_adp_; +import gplx.Err_; +import gplx.Gfo_usr_dlg_; +import gplx.Math_; +import gplx.Object_; +import gplx.String_; +import gplx.core.brys.fmtrs.Bry_fmtr; +import gplx.xowa.Xoae_app; +import gplx.xowa.langs.Xol_lang_itm; +import gplx.xowa.langs.Xol_lang_itm_; +import gplx.xowa.xtns.wbases.claims.Wbase_claim_visitor; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_entity; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_globecoordinate; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_monolingualtext; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_quantity; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_string; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_value; +import gplx.xowa.xtns.wbases.claims.itms.times.Wbase_date; +import gplx.xowa.xtns.wbases.claims.itms.times.Wbase_date_; +import gplx.xowa.xtns.wbases.core.Wdata_langtext_itm; +import gplx.xowa.xtns.wbases.hwtrs.Wdata_hwtr_mgr; +import gplx.xowa.xtns.wbases.hwtrs.Wdata_hwtr_msgs; +import gplx.xowa.xtns.wbases.hwtrs.Wdata_lbl_itm; +import gplx.xowa.xtns.wbases.hwtrs.Wdata_lbl_mgr; + +public class Wdata_prop_val_visitor implements Wbase_claim_visitor { // THREAD.UNSAFE; callers must do synchronized + private Wdata_wiki_mgr wdata_mgr; private Xoae_app app; private Bry_bfr bfr; + private Xol_lang_itm lang; + private final Bry_bfr tmp_time_bfr = Bry_bfr_.Reset(255); private final Bry_fmtr tmp_time_fmtr = Bry_fmtr.new_(); + private Wdata_hwtr_msgs msgs; + private boolean mode_is_statements; + public Wdata_prop_val_visitor(Xoae_app app, Wdata_wiki_mgr wdata_mgr) { + this.app = app; this.wdata_mgr = wdata_mgr; + } + public void Init(Bry_bfr bfr, Wdata_hwtr_msgs msgs, byte[] lang_key, boolean mode_is_statements) { + // init some member variables; + this.bfr = bfr; this.msgs = msgs; + this.lang = app.Lang_mgr().Get_by_or_null(lang_key); + if (lang == null) lang = app.Lang_mgr().Lang_en(); // TEST: needed for one test; DATE:2016-10-20 + this.mode_is_statements = mode_is_statements; + } + public void Visit_str(Wbase_claim_string itm) {Write_str(bfr, itm.Val_bry());} + public static void Write_str(Bry_bfr bfr, byte[] bry) {bfr.Add(bry);} + public void Visit_time(Wbase_claim_time itm) { + Write_time(bfr, tmp_time_bfr, tmp_time_fmtr, msgs, Bry_.Empty, -1, itm.Time_as_date()); // for now, don't bother passing ttl; only used for error msg; DATE:2015-08-03 + } + public static void Write_time(Bry_bfr bfr, Bry_bfr tmp_bfr, Bry_fmtr tmp_fmtr, Wdata_hwtr_msgs msgs, byte[] page_url, int pid, Wbase_date date) { + try { + Wbase_date_.To_bfr(bfr, tmp_fmtr, tmp_bfr, msgs, date); + if (date.Calendar_is_julian()) bfr.Add_byte_space().Add(msgs.Time_julian()); + } catch (Exception e) { + Gfo_usr_dlg_.Instance.Warn_many("", "", "failed to write time; ttl=~{0} pid=~{1} err=~{2}", page_url, pid, Err_.Message_gplx_log(e)); + } + } + public void Visit_monolingualtext(Wbase_claim_monolingualtext itm) {Write_langtext(bfr, itm.Text());} + public static void Write_langtext(Bry_bfr bfr, byte[] text) {bfr.Add(text);} // phrase only; PAGE:en.w:Alberta; EX: {{#property:motto}} -> "Fortis et libre"; DATE:2014-08-28 + public void Visit_entity(Wbase_claim_entity itm) {Write_entity(bfr, wdata_mgr, lang.Key_bry(), itm.Page_ttl_db(), mode_is_statements);} + public static void Write_entity(Bry_bfr bfr, Wdata_wiki_mgr wdata_mgr, byte[] lang_key, byte[] entity_ttl_db, boolean mode_is_statements) { + // get entity + Wdata_doc entity_doc = wdata_mgr.Doc_mgr.Get_by_xid_or_null(entity_ttl_db); + + // NOTE: wiki may refer to entity that no longer exists; EX: {{#property:p1}} which links to Q1, but p1 links to Q2 and Q2 was deleted; DATE:2014-02-01 + if (entity_doc == null) + return; + + // get label + byte[] label = entity_doc.Get_label_bry_or_null(lang_key); + + // NOTE: some properties may not exist in language of wiki; default to english; DATE:2013-12-19 + if (label == null && !Bry_.Eq(lang_key, Xol_lang_itm_.Key_en)) + label = entity_doc.Get_label_bry_or_null(Xol_lang_itm_.Key_en); + + // if label is still not found, don't add null reference + if (label != null) { + // if statements, add "[[entity_val]]"; DATE:2017-04-04 + if (mode_is_statements) { + bfr.Add(gplx.xowa.parsers.tmpls.Xop_tkn_.Lnki_bgn); + bfr.Add(label); + bfr.Add(gplx.xowa.parsers.tmpls.Xop_tkn_.Lnki_end); + } + // else, just add "entity_val" + else + bfr.Add(label); + } + } + public void Visit_quantity(Wbase_claim_quantity itm) {Write_quantity(bfr, wdata_mgr, lang, itm.Amount(), itm.Lbound(), itm.Ubound(), itm.Unit());} + public static void Write_quantity(Bry_bfr bfr, Wdata_wiki_mgr wdata_mgr, Xol_lang_itm lang, byte[] val_bry, byte[] lo_bry, byte[] hi_bry, byte[] unit) { + // get val, lo, hi; NOTE: must handle large numbers; EX:{{#property:P1082}} PAGE:en.w:Earth; DATE:2015-08-02; NOTE: must handle decimals; PAGE:en.w:Malinao,_Aklan; DATE:2016-11-08 + Decimal_adp val = Decimal__parse_or(val_bry, null); if (val == null) throw Err_.new_wo_type("wbase:quanity val can not be null"); + Decimal_adp lo = Decimal__parse_or(lo_bry, val); + Decimal_adp hi = Decimal__parse_or(hi_bry, val); + + // fmt val + if (lo.Eq(hi) && hi.Eq(val))// lo, hi, val are same; print val only; + bfr.Add(lang.Num_mgr().Format_num_by_decimal(val)); // amount; EX: 1,234 + else { + Wdata_hwtr_msgs msgs = wdata_mgr.Hwtr_mgr().Msgs(); + Decimal_adp lo_dif = val.Subtract(lo); + Decimal_adp hi_dif = hi.Subtract(val); + if (lo_dif.Eq(hi_dif)) { // lo_dif, hi_dif are same; print val±dif + bfr.Add(lang.Num_mgr().Format_num_by_decimal(val)); // amount; EX: 1,234 + bfr.Add(msgs.Sym_plusminus()); // symbol: EX: ± + bfr.Add(lang.Num_mgr().Format_num_by_decimal(lo_dif)); // amount; EX: 4 + } + else { // lo_dif, hi_dif are diff; print lo - hi; this may not be what MW does + bfr.Add(lang.Num_mgr().Format_num_by_decimal(lo)); // lo; EX: 1,230 + bfr.Add_byte(Byte_ascii.Dash); // dash: EX: - + bfr.Add(lang.Num_mgr().Format_num_by_decimal(hi)); // hi; EX: 1,238 + } + } + + // output unit + int unit_qid_bgn = unit == null ? Bry_find_.Not_found : Bry_find_.Find_fwd(unit, Wikidata_url); + if (unit_qid_bgn == Bry_find_.Not_found) {} // entity missing; output nothing; EX:"unit":"1"; PAGE:en.w:Malinao,_Aklan DATE:2016-11-08 + else { // entity exists; EX:"http://www.wikidata.org/entity/Q11573" (meter) + bfr.Add_byte_space(); + byte[] xid = Bry_.Mid(unit, Wikidata_url.length); + Wdata_doc entity_doc = wdata_mgr.Doc_mgr.Get_by_xid_or_null(xid); + if (entity_doc != null) { + Wdata_langtext_itm label = entity_doc.Get_label_itm_or_null(lang); + if (label != null) + bfr.Add(label.Text()); + } + } + } + private static Decimal_adp Decimal__parse_or(byte[] bry, Decimal_adp or) { // handle missing lbound / ubound; DATE:2016-12-03 + return bry == null ? or : Decimal_adp_.parse(String_.new_u8(Normalize_for_decimal(bry))); + } + public static byte[] Normalize_for_decimal(byte[] bry) { // remove leading "+" and any commas; was Bry_.To_long_or(val_bry, Byte_ascii.Comma_bry, 0, val_bry.length, 0) + if (bry == null) return null; + Bry_bfr bfr = null; + int len = bry.length; + for (int i = 0; i < len; i++) { + byte b = bry[i]; + switch (b) { + case Byte_ascii.Plus: + if (i == 0) { + if (bfr == null) bfr = Bry_bfr_.New(); + } + else { + throw Err_.new_wo_type("invalid decimal format; plus must be at start of String", "raw", bry); + } + break; + case Byte_ascii.Comma: + if (bfr == null) { + bfr = Bry_bfr_.New(); + bfr.Add_mid(bry, 0, i); + } + break; + default: + if (bfr != null) + bfr.Add_byte(b); + break; + } + } + return bfr == null ? bry : bfr.To_bry_and_clear(); + } + public void Visit_globecoordinate(Wbase_claim_globecoordinate itm) {Write_geo(Bool_.N, bfr, wdata_mgr.Hwtr_mgr().Lbl_mgr(), msgs, itm.Lat(), itm.Lng(), itm.Alt(), itm.Prc(), itm.Glb());} + public static void Write_geo(boolean wikidata_page, Bry_bfr bfr, Wdata_lbl_mgr lbl_mgr, Wdata_hwtr_msgs msgs, byte[] lat, byte[] lng, byte[] alt, byte[] prc, byte[] glb) { + // get precision + int precision_int = -1; + if (Bry_.Eq(prc, Object_.Bry__null) || Bry_.Eq(prc, Byte_ascii.Num_0_bry)) // "null" or "0" should be 1; PAGE:ru.w:Лысково_(Калужская_область) DATE:2016-11-24 + precision_int = 1; + else { + Decimal_adp precision_frac = Decimal_adp_.parse(String_.new_a7(prc)); + precision_int = Math_.Log10(Decimal_adp_.One.Divide(precision_frac).To_int()); // convert precision to log10 integer; EX: .00027777 -> 3600 -> 3 + } + + // build String + gplx.xowa.xtns.mapSources.Map_dd2dms_func.Deg_to_dms(bfr, Bool_.Y, Bool_.N, lat, precision_int); + bfr.Add_byte_comma().Add_byte_space(); + gplx.xowa.xtns.mapSources.Map_dd2dms_func.Deg_to_dms(bfr, Bool_.Y, Bool_.Y, lng, precision_int); + + // write globe if any + if (wikidata_page) { + byte[] glb_ttl = Wdata_lbl_itm.Extract_ttl(glb); + if (glb_ttl != null) { + byte[] glb_lbl = lbl_mgr.Get_text__ttl(glb_ttl, glb); + bfr.Add_byte_space().Add_byte(Byte_ascii.Paren_bgn); + Wdata_hwtr_mgr.Write_link_wikidata(bfr, glb_ttl, glb_lbl); + bfr.Add_byte(Byte_ascii.Paren_end); + } + } + } + + private static final byte[] Wikidata_url = Bry_.new_a7("http://www.wikidata.org/entity/"); + public void Visit_system(Wbase_claim_value itm) {} +} diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr.java b/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr.java index d8dbd8627..df09ae753 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/Wdata_wiki_mgr.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,202 +13,245 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; -import gplx.core.primitives.*; -import gplx.xowa.langs.msgs.*; import gplx.langs.jsons.*; -import gplx.xowa.wikis.nss.*; -import gplx.xowa.langs.*; -import gplx.xowa.parsers.*; -import gplx.xowa.wikis.domains.*; import gplx.xowa.htmls.*; import gplx.xowa.parsers.logs.*; import gplx.xowa.apps.apis.xowa.xtns.*; import gplx.xowa.apps.apis.xowa.html.*; import gplx.xowa.users.*; -import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.*; import gplx.xowa.xtns.wbases.claims.enums.*; import gplx.xowa.xtns.wbases.claims.itms.*; import gplx.xowa.xtns.wbases.parsers.*; import gplx.xowa.xtns.wbases.pfuncs.*; import gplx.xowa.xtns.wbases.hwtrs.*; import gplx.xowa.xtns.wbases.stores.*; -import gplx.xowa.mediawiki.extensions.Wikibase.client.includes.dataAccess.scribunto.*; -public class Wdata_wiki_mgr implements Gfo_evt_itm, Gfo_invk { - private final Xoae_app app; - private final Wdata_prop_val_visitor prop_val_visitor; - private final Wdata_doc_parser wdoc_parser_v1 = new Wdata_doc_parser_v1(), wdoc_parser_v2 = new Wdata_doc_parser_v2(); - private final Object thread_lock = new Object(); - private final Bry_bfr tmp_bfr = Bry_bfr_.New_w_size(32); - public Wdata_wiki_mgr(Xoae_app app) { - this.app = app; - this.evt_mgr = new Gfo_evt_mgr(this); - this.Qid_mgr = new Wbase_qid_mgr(this); - this.Pid_mgr = new Wbase_pid_mgr(this); - this.Doc_mgr = new Wbase_doc_mgr(this, this.Qid_mgr); - this.prop_mgr = new Wbase_prop_mgr(Wbase_prop_mgr_loader_.New_db(this)); - this.prop_val_visitor = new Wdata_prop_val_visitor(app, this); - this.Enabled_(true); - } - public Gfo_evt_mgr Evt_mgr() {return evt_mgr;} private final Gfo_evt_mgr evt_mgr; - public final Wbase_qid_mgr Qid_mgr; - public final Wbase_pid_mgr Pid_mgr; - public final Wbase_doc_mgr Doc_mgr; - public Wbase_prop_mgr Prop_mgr() {return prop_mgr;} private final Wbase_prop_mgr prop_mgr; - public boolean Enabled() {return enabled;} private boolean enabled; - public void Enabled_(boolean v) { - this.enabled = v; - Qid_mgr.Enabled_(v); - Pid_mgr.Enabled_(v); - Doc_mgr.Enabled_(v); - } - public byte[] Domain() {return domain;} public void Domain_(byte[] v) {domain = v;} private byte[] domain = Bry_.new_a7("www.wikidata.org"); - public Wdata_hwtr_mgr Hwtr_mgr() { - if (hwtr_mgr == null) - Hwtr_mgr_assert(); - return hwtr_mgr; - } private Wdata_hwtr_mgr hwtr_mgr; - public Xowe_wiki Wdata_wiki() { - if (wdata_wiki == null) { - synchronized (thread_lock) { // LOCK:must synchronized b/c multiple threads may init wdata_mgr at same time; - Xowe_wiki tmp_wdata_wiki = app.Wiki_mgr().Get_by_or_make(domain).Init_assert(); - if (wdata_wiki == null) // synchronized is not around "if (wdata_wiki == null)", so multiple threads may try to set; only set if null; DATE:2016-09-12 - wdata_wiki = tmp_wdata_wiki; - } - } - return wdata_wiki; - } private Xowe_wiki wdata_wiki; - public Json_parser Jdoc_parser() {return jdoc_parser;} private Json_parser jdoc_parser = new Json_parser(); - public void Init_by_app() {} - public Wdata_doc_parser Wdoc_parser(Json_doc jdoc) { - Json_kv itm_0 = Json_kv.cast(jdoc.Root_nde().Get_at(0)); // get 1st node - return Bry_.Eq(itm_0.Key().Data_bry(), Wdata_doc_parser_v2.Bry_type) - || Bry_.Eq(itm_0.Key().Data_bry(), Wdata_doc_parser_v2.Bry_id) - ? wdoc_parser_v2 : wdoc_parser_v1; // if "type", must be v2 - } - public Xop_log_property_wkr Property_wkr() {return property_wkr;} private Xop_log_property_wkr property_wkr; - public void Clear() { - synchronized (thread_lock) { // LOCK:app-level - Qid_mgr.Clear(); - Pid_mgr.Clear(); - Doc_mgr.Clear(); - } - } - public byte[] Get_claim_or(Xow_domain_itm domain, Xoa_ttl page_ttl, int pid, byte[] or) { - byte[] qid = this.Qid_mgr.Get_qid_or_null(domain.Abrv_wm(), page_ttl); if (qid == null) return or; - Wdata_doc wdoc = Doc_mgr.Get_by_loose_id_or_null(qid); if (wdoc == null) return or; - Wbase_claim_grp claim_grp = wdoc.Get_claim_grp_or_null(pid); - if (claim_grp == null || claim_grp.Len() == 0) return or; - Wbase_claim_base claim_itm = claim_grp.Get_at(0); - Resolve_claim(tmp_bfr, domain, claim_itm); - return tmp_bfr.To_bry_and_clear(); - } - public void Resolve_claim(Bry_bfr rv, Xow_domain_itm domain, Wbase_claim_base claim_itm) { - synchronized (thread_lock) { // LOCK:must synchronized b/c prop_val_visitor has member bfr which can get overwritten; DATE:2016-07-06 - if (hwtr_mgr == null) Hwtr_mgr_assert(); - prop_val_visitor.Init(rv, hwtr_mgr.Msgs(), domain.Lang_orig_key(), Bool_.N); - claim_itm.Welcome(prop_val_visitor); - } - } - public void Resolve_to_bfr(Bry_bfr bfr, Xowe_wiki wiki, Wbase_claim_grp prop_grp, byte[] lang_key, boolean mode_is_statements) { - synchronized (thread_lock) { // LOCK:must synchronized b/c prop_val_visitor has member bfr which can get overwritten; DATE:2016-07-06 - if (hwtr_mgr == null) Hwtr_mgr_assert(); - int len = prop_grp.Len(); - Wbase_claim_base selected = null; - for (int i = 0; i < len; i++) { // NOTE: multiple props possible; EX: {{#property:P1082}}; PAGE:en.w:Earth DATE:2015-08-02 - Wbase_claim_base prop = prop_grp.Get_at(i); - if (selected == null) selected = prop; // if selected not set, set it; will always set to 1st prop - if (prop.Rank_tid() == Wbase_claim_rank_.Tid__preferred) { // if prop is preferred, select it and exit; - selected = prop; - break; - } - } - switch (selected.Snak_tid()) { // SEE:NOTE:novalue/somevalue - case Wbase_claim_value_type_.Tid__novalue: - bfr.Add(wiki.Msg_mgr().Val_by_id(Xol_msg_itm_.Id_xowa_wikidata_novalue)); - break; - case Wbase_claim_value_type_.Tid__somevalue: - bfr.Add(wiki.Msg_mgr().Val_by_id(Xol_msg_itm_.Id_xowa_wikidata_somevalue)); - break; - default: { - prop_val_visitor.Init(bfr, hwtr_mgr.Msgs(), lang_key, mode_is_statements); - selected.Welcome(prop_val_visitor); - break; - } - } - } - } - public byte[] Popup_text(Xoae_page page) { - Hwtr_mgr_assert(); - Wdata_doc wdoc = Doc_mgr.Get_by_exact_id_or_null(page.Ttl().Full_db()); - if (wdoc == null) return Bry_.Empty; - return hwtr_mgr.Popup(wdoc); - } - public void Write_json_as_html(Bry_bfr bfr, Xoa_ttl page_ttl, byte[] data_raw) { - Hwtr_mgr_assert(); - Wdata_doc wdoc = Doc_mgr.Get_by_exact_id_or_null(page_ttl.Full_db()); - if (wdoc == null) return; - hwtr_mgr.Init_by_wdoc(wdoc); - bfr.Add(hwtr_mgr.Write(wdoc)); - } - private void Hwtr_mgr_assert() { - if (hwtr_mgr != null) return; - Xoapi_toggle_mgr toggle_mgr = app.Api_root().Html().Page().Toggle_mgr(); - Xoapi_wikibase wikibase_api = app.Api_root().Xtns().Wikibase(); - hwtr_mgr = new Wdata_hwtr_mgr(); - hwtr_mgr.Init_by_ctor(wikibase_api, this, new Wdata_lbl_wkr_wiki(wikibase_api, this), gplx.langs.htmls.encoders.Gfo_url_encoder_.Href, toggle_mgr, app.Usere().Wiki().Xwiki_mgr()); - this.Hwtr_msgs_make(); - Gfo_evt_mgr_.Sub_same_many(app.Usere(), this, Xoue_user.Evt_lang_changed); - } - private void Hwtr_msgs_make() { - // if (!app.Wiki_mgr().Wiki_regy().Has(Xow_domain_itm_.Bry__wikidata)) return; // DELETE: don't know why guard is needed; breaks test; DATE:2016-10-20 - Xol_lang_itm new_lang = app.Usere().Lang(); - Xowe_wiki cur_wiki = this.Wdata_wiki(); - cur_wiki.Xtn_mgr().Xtn_wikibase().Load_msgs(cur_wiki, new_lang); - Wdata_hwtr_msgs hwtr_msgs = Wdata_hwtr_msgs.new_(cur_wiki.Msg_mgr()); - hwtr_mgr.Init_by_lang(new_lang, hwtr_msgs); - } - public static void Write_json_as_html(Json_parser jdoc_parser, Bry_bfr bfr, byte[] data_raw) { - bfr.Add(Xoh_consts.Span_bgn_open).Add(Xoh_consts.Id_atr).Add(Html_json_id).Add(Xoh_consts.__end_quote); // - Json_doc json = jdoc_parser.Parse(data_raw); - json.Root_nde().Print_as_json(bfr, 0); - bfr.Add(Xoh_consts.Span_end); - } - public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) { - if (ctx.Match(k, Invk_enabled)) return Yn.To_str(enabled); - else if (ctx.Match(k, Invk_enabled_)) enabled = m.ReadYn("v"); - else if (ctx.Match(k, Invk_domain)) return String_.new_u8(domain); - else if (ctx.Match(k, Invk_domain_)) domain = m.ReadBry("v"); - else if (ctx.Match(k, Invk_property_wkr)) return m.ReadYnOrY("v") ? Property_wkr_or_new() : Gfo_invk_.Noop; - else if (ctx.Match(k, Xoue_user.Evt_lang_changed)) Hwtr_msgs_make(); - else return Gfo_invk_.Rv_unhandled; - return this; - } - private static final String Invk_enabled = "enabled", Invk_enabled_ = "enabled_", Invk_domain = "domain", Invk_domain_ = "domain_", Invk_property_wkr = "property_wkr"; - public Xop_log_property_wkr Property_wkr_or_new() { - if (property_wkr == null) property_wkr = app.Log_mgr().Make_wkr_property(); - return property_wkr; - } - public static final int Ns_property = 120; - public static final String Ns_property_name = "Property"; - public static final byte[] Ns_property_name_bry = Bry_.new_a7(Ns_property_name); - public static final byte[] Bry_q = Bry_.new_a7("q"); - public static final byte[] - Ttl_prefix_qid_bry_db = Bry_.new_a7("q") // NOTE: for historical reasons this is standardized as lowercase q not Q; DATE:2015-06-12 - , Ttl_prefix_qid_bry_gui = Bry_.new_a7("Q") // NOTE: use uppercase Q for writing html; DATE:2015-06-12 - , Ttl_prefix_pid_bry = Bry_.new_a7("Property:P") - ; - public static final byte[] Html_json_id = Bry_.new_a7("xowa-wikidata-json"); - public static boolean Wiki_page_is_json(int wiki_tid, int ns_id) { - switch (wiki_tid) { - case Xow_domain_tid_.Tid__wikidata: - if (ns_id == Xow_ns_.Tid__main || ns_id == gplx.xowa.xtns.wbases.Wdata_wiki_mgr.Ns_property) - return true; - break; - case Xow_domain_tid_.Tid__home: - if (ns_id == gplx.xowa.xtns.wbases.Wdata_wiki_mgr.Ns_property) - return true; - break; - } - return false; - } - public static void Log_missing_qid(Xop_ctx ctx, String type, byte[] id) { - if (id == null) id = Bry_.Empty; - ctx.Wiki().Appe().Usr_dlg().Log_many("", "", "Unknown id in wikidata; type=~{0} id=~{1} page=~{2}", type, id, ctx.Page().Url_bry_safe()); - } -} -/* -NOTE:novalue/somevalue -Rough approximation of wikibase logic which is more involved with its different SnakFormatters -* https://github.com/wikimedia/mediawiki-extensions-Wikibase/blob/master/lib/includes/Formatters/OutputFormatSnakFormatterFactory.php: formatter factory; note lines for somevalue / novalue -* https://github.com/wikimedia/mediawiki-extensions-Wikibase/blob/master/lib/includes/Formatters/MessageSnakFormatter.php: formatter definition -* https://github.com/wikimedia/mediawiki-extensions-Wikibase/blob/master/repo/i18n/en.json: message definitions -*/ +package gplx.xowa.xtns.wbases; + +import gplx.Bool_; +import gplx.Bry_; +import gplx.Bry_bfr; +import gplx.Bry_bfr_; +import gplx.GfoMsg; +import gplx.Gfo_evt_itm; +import gplx.Gfo_evt_mgr; +import gplx.Gfo_evt_mgr_; +import gplx.Gfo_invk; +import gplx.Gfo_invk_; +import gplx.GfsCtx; +import gplx.String_; +import gplx.Yn; +import gplx.langs.jsons.Json_doc; +import gplx.langs.jsons.Json_kv; +import gplx.langs.jsons.Json_parser; +import gplx.xowa.Xoa_ttl; +import gplx.xowa.Xoae_app; +import gplx.xowa.Xoae_page; +import gplx.xowa.Xowe_wiki; +import gplx.xowa.apps.apis.xowa.html.Xoapi_toggle_mgr; +import gplx.xowa.apps.apis.xowa.xtns.Xoapi_wikibase; +import gplx.xowa.htmls.Xoh_consts; +import gplx.xowa.langs.Xol_lang_itm; +import gplx.xowa.langs.msgs.Xol_msg_itm_; +import gplx.xowa.parsers.Xop_ctx; +import gplx.xowa.parsers.logs.Xop_log_property_wkr; +import gplx.xowa.users.Xoue_user; +import gplx.xowa.wikis.domains.Xow_domain_itm; +import gplx.xowa.wikis.domains.Xow_domain_tid_; +import gplx.xowa.wikis.nss.Xow_ns_; +import gplx.xowa.xtns.wbases.claims.Wbase_claim_grp; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_rank_; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_value_type_; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_base; +import gplx.xowa.xtns.wbases.hwtrs.Wdata_hwtr_mgr; +import gplx.xowa.xtns.wbases.hwtrs.Wdata_hwtr_msgs; +import gplx.xowa.xtns.wbases.hwtrs.Wdata_lbl_wkr_wiki; +import gplx.xowa.xtns.wbases.parsers.Wdata_doc_parser; +import gplx.xowa.xtns.wbases.parsers.Wdata_doc_parser_v1; +import gplx.xowa.xtns.wbases.parsers.Wdata_doc_parser_v2; +import gplx.xowa.xtns.wbases.stores.Wbase_doc_mgr; +import gplx.xowa.xtns.wbases.stores.Wbase_pid_mgr; +import gplx.xowa.xtns.wbases.stores.Wbase_prop_mgr; +import gplx.xowa.xtns.wbases.stores.Wbase_prop_mgr_loader_; +import gplx.xowa.xtns.wbases.stores.Wbase_qid_mgr; + +public class Wdata_wiki_mgr implements Gfo_evt_itm, Gfo_invk { + private final Xoae_app app; + private final Wdata_prop_val_visitor prop_val_visitor; + private final Wdata_doc_parser wdoc_parser_v1 = new Wdata_doc_parser_v1(), wdoc_parser_v2 = new Wdata_doc_parser_v2(); + private final Object thread_lock = new Object(); + private final Bry_bfr tmp_bfr = Bry_bfr_.New_w_size(32); + public Wdata_wiki_mgr(Xoae_app app) { + this.app = app; + this.evt_mgr = new Gfo_evt_mgr(this); + this.Qid_mgr = new Wbase_qid_mgr(this); + this.Pid_mgr = new Wbase_pid_mgr(this); + this.Doc_mgr = new Wbase_doc_mgr(this, this.Qid_mgr); + this.prop_mgr = new Wbase_prop_mgr(Wbase_prop_mgr_loader_.New_db(this)); + this.prop_val_visitor = new Wdata_prop_val_visitor(app, this); + this.Enabled_(true); + } + public Gfo_evt_mgr Evt_mgr() {return evt_mgr;} private final Gfo_evt_mgr evt_mgr; + public final Wbase_qid_mgr Qid_mgr; + public final Wbase_pid_mgr Pid_mgr; + public final Wbase_doc_mgr Doc_mgr; + public Wbase_prop_mgr Prop_mgr() {return prop_mgr;} private final Wbase_prop_mgr prop_mgr; + public boolean Enabled() {return enabled;} private boolean enabled; + public void Enabled_(boolean v) { + this.enabled = v; + Qid_mgr.Enabled_(v); + Pid_mgr.Enabled_(v); + Doc_mgr.Enabled_(v); + } + public byte[] Domain() {return domain;} public void Domain_(byte[] v) {domain = v;} private byte[] domain = Bry_.new_a7("www.wikidata.org"); + public Wdata_hwtr_mgr Hwtr_mgr() { + if (hwtr_mgr == null) + Hwtr_mgr_assert(); + return hwtr_mgr; + } private Wdata_hwtr_mgr hwtr_mgr; + public Xowe_wiki Wdata_wiki() { + if (wdata_wiki == null) { + synchronized (thread_lock) { // LOCK:must synchronized b/c multiple threads may init wdata_mgr at same time; + Xowe_wiki tmp_wdata_wiki = app.Wiki_mgr().Get_by_or_make(domain).Init_assert(); + if (wdata_wiki == null) // synchronized is not around "if (wdata_wiki == null)", so multiple threads may try to set; only set if null; DATE:2016-09-12 + wdata_wiki = tmp_wdata_wiki; + } + } + return wdata_wiki; + } private Xowe_wiki wdata_wiki; + public Json_parser Jdoc_parser() {return jdoc_parser;} private Json_parser jdoc_parser = new Json_parser(); + public void Init_by_app() {} + public Wdata_doc_parser Wdoc_parser(Json_doc jdoc) { + Json_kv itm_0 = Json_kv.cast(jdoc.Root_nde().Get_at(0)); // get 1st node + return Bry_.Eq(itm_0.Key().Data_bry(), Wdata_doc_parser_v2.Bry_type) + || Bry_.Eq(itm_0.Key().Data_bry(), Wdata_doc_parser_v2.Bry_id) + ? wdoc_parser_v2 : wdoc_parser_v1; // if "type", must be v2 + } + public Xop_log_property_wkr Property_wkr() {return property_wkr;} private Xop_log_property_wkr property_wkr; + public void Clear() { + synchronized (thread_lock) { // LOCK:app-level + Qid_mgr.Clear(); + Pid_mgr.Clear(); + Doc_mgr.Clear(); + } + } + public byte[] Get_claim_or(Xow_domain_itm domain, Xoa_ttl page_ttl, int pid, byte[] or) { + byte[] qid = this.Qid_mgr.Get_qid_or_null(domain.Abrv_wm(), page_ttl); if (qid == null) return or; + Wdata_doc wdoc = Doc_mgr.Get_by_loose_id_or_null(qid); if (wdoc == null) return or; + Wbase_claim_grp claim_grp = wdoc.Get_claim_grp_or_null(pid); + if (claim_grp == null || claim_grp.Len() == 0) return or; + Wbase_claim_base claim_itm = claim_grp.Get_at(0); + Resolve_claim(tmp_bfr, domain, claim_itm); + return tmp_bfr.To_bry_and_clear(); + } + public void Resolve_claim(Bry_bfr rv, Xow_domain_itm domain, Wbase_claim_base claim_itm) { + synchronized (thread_lock) { // LOCK:must synchronized b/c prop_val_visitor has member bfr which can get overwritten; DATE:2016-07-06 + if (hwtr_mgr == null) Hwtr_mgr_assert(); + prop_val_visitor.Init(rv, hwtr_mgr.Msgs(), domain.Lang_orig_key(), Bool_.N); + claim_itm.Welcome(prop_val_visitor); + } + } + public void Resolve_to_bfr(Bry_bfr bfr, Xowe_wiki wiki, Wbase_claim_grp prop_grp, byte[] lang_key, boolean mode_is_statements) { + synchronized (thread_lock) { // LOCK:must synchronized b/c prop_val_visitor has member bfr which can get overwritten; DATE:2016-07-06 + if (hwtr_mgr == null) Hwtr_mgr_assert(); + int len = prop_grp.Len(); + Wbase_claim_base selected = null; + for (int i = 0; i < len; i++) { // NOTE: multiple props possible; EX: {{#property:P1082}}; PAGE:en.w:Earth DATE:2015-08-02 + Wbase_claim_base prop = prop_grp.Get_at(i); + if (selected == null) selected = prop; // if selected not set, set it; will always set to 1st prop + if (prop.Rank_tid() == Wbase_claim_rank_.Tid__preferred) { // if prop is preferred, select it and exit; + selected = prop; + break; + } + } + switch (selected.Snak_tid()) { // SEE:NOTE:novalue/somevalue + case Wbase_claim_value_type_.Tid__novalue: + bfr.Add(wiki.Msg_mgr().Val_by_id(Xol_msg_itm_.Id_xowa_wikidata_novalue)); + break; + case Wbase_claim_value_type_.Tid__somevalue: + bfr.Add(wiki.Msg_mgr().Val_by_id(Xol_msg_itm_.Id_xowa_wikidata_somevalue)); + break; + default: { + prop_val_visitor.Init(bfr, hwtr_mgr.Msgs(), lang_key, mode_is_statements); + selected.Welcome(prop_val_visitor); + break; + } + } + } + } + public byte[] Popup_text(Xoae_page page) { + Hwtr_mgr_assert(); + Wdata_doc wdoc = Doc_mgr.Get_by_exact_id_or_null(page.Ttl().Full_db()); + if (wdoc == null) return Bry_.Empty; + return hwtr_mgr.Popup(wdoc); + } + public void Write_json_as_html(Bry_bfr bfr, Xoa_ttl page_ttl, byte[] data_raw) { + Hwtr_mgr_assert(); + Wdata_doc wdoc = Doc_mgr.Get_by_exact_id_or_null(page_ttl.Full_db()); + if (wdoc == null) return; + hwtr_mgr.Init_by_wdoc(wdoc); + bfr.Add(hwtr_mgr.Write(wdoc)); + } + private void Hwtr_mgr_assert() { + if (hwtr_mgr != null) return; + Xoapi_toggle_mgr toggle_mgr = app.Api_root().Html().Page().Toggle_mgr(); + Xoapi_wikibase wikibase_api = app.Api_root().Xtns().Wikibase(); + hwtr_mgr = new Wdata_hwtr_mgr(); + hwtr_mgr.Init_by_ctor(wikibase_api, this, new Wdata_lbl_wkr_wiki(wikibase_api, this), gplx.langs.htmls.encoders.Gfo_url_encoder_.Href, toggle_mgr, app.Usere().Wiki().Xwiki_mgr()); + this.Hwtr_msgs_make(); + Gfo_evt_mgr_.Sub_same_many(app.Usere(), this, Xoue_user.Evt_lang_changed); + } + private void Hwtr_msgs_make() { + // if (!app.Wiki_mgr().Wiki_regy().Has(Xow_domain_itm_.Bry__wikidata)) return; // DELETE: don't know why guard is needed; breaks test; DATE:2016-10-20 + Xol_lang_itm new_lang = app.Usere().Lang(); + Xowe_wiki cur_wiki = this.Wdata_wiki(); + cur_wiki.Xtn_mgr().Xtn_wikibase().Load_msgs(cur_wiki, new_lang); + Wdata_hwtr_msgs hwtr_msgs = Wdata_hwtr_msgs.new_(cur_wiki.Msg_mgr()); + hwtr_mgr.Init_by_lang(new_lang, hwtr_msgs); + } + public static void Write_json_as_html(Json_parser jdoc_parser, Bry_bfr bfr, byte[] data_raw) { + bfr.Add(Xoh_consts.Span_bgn_open).Add(Xoh_consts.Id_atr).Add(Html_json_id).Add(Xoh_consts.__end_quote); // + Json_doc json = jdoc_parser.Parse(data_raw); + json.Root_nde().Print_as_json(bfr, 0); + bfr.Add(Xoh_consts.Span_end); + } + public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) { + if (ctx.Match(k, Invk_enabled)) return Yn.To_str(enabled); + else if (ctx.Match(k, Invk_enabled_)) enabled = m.ReadYn("v"); + else if (ctx.Match(k, Invk_domain)) return String_.new_u8(domain); + else if (ctx.Match(k, Invk_domain_)) domain = m.ReadBry("v"); + else if (ctx.Match(k, Invk_property_wkr)) return m.ReadYnOrY("v") ? Property_wkr_or_new() : Gfo_invk_.Noop; + else if (ctx.Match(k, Xoue_user.Evt_lang_changed)) Hwtr_msgs_make(); + else return Gfo_invk_.Rv_unhandled; + return this; + } + private static final String Invk_enabled = "enabled", Invk_enabled_ = "enabled_", Invk_domain = "domain", Invk_domain_ = "domain_", Invk_property_wkr = "property_wkr"; + public Xop_log_property_wkr Property_wkr_or_new() { + if (property_wkr == null) property_wkr = app.Log_mgr().Make_wkr_property(); + return property_wkr; + } + public static final int Ns_property = 120; + public static final String Ns_property_name = "Property"; + public static final byte[] Ns_property_name_bry = Bry_.new_a7(Ns_property_name); + public static final int Ns_lexeme = 146; + public static final String Ns_lexeme_name = "Lexeme"; + public static final byte[] Ns_lexeme_name_bry = Bry_.new_a7(Ns_lexeme_name); + + public static final byte[] Html_json_id = Bry_.new_a7("xowa-wikidata-json"); + public static boolean Wiki_page_is_json(int wiki_tid, int ns_id) { + switch (wiki_tid) { + case Xow_domain_tid_.Tid__wikidata: + switch (ns_id) { + case Xow_ns_.Tid__main: + case Wdata_wiki_mgr.Ns_property: + case Wdata_wiki_mgr.Ns_lexeme: + return true; + default: + return false; + } + case Xow_domain_tid_.Tid__home: + if (ns_id == gplx.xowa.xtns.wbases.Wdata_wiki_mgr.Ns_property) + return true; + break; + } + return false; + } + public static void Log_missing_qid(Xop_ctx ctx, String type, byte[] id) { + if (id == null) id = Bry_.Empty; + ctx.Wiki().Appe().Usr_dlg().Log_many("", "", "Unknown id in wikidata; type=~{0} id=~{1} page=~{2}", type, id, ctx.Page().Url_bry_safe()); + } +} +/* +NOTE:novalue/somevalue +Rough approximation of wikibase logic which is more involved with its different SnakFormatters +* https://github.com/wikimedia/mediawiki-extensions-Wikibase/blob/master/lib/includes/Formatters/OutputFormatSnakFormatterFactory.php: formatter factory; note lines for somevalue / novalue +* https://github.com/wikimedia/mediawiki-extensions-Wikibase/blob/master/lib/includes/Formatters/MessageSnakFormatter.php: formatter definition +* https://github.com/wikimedia/mediawiki-extensions-Wikibase/blob/master/repo/i18n/en.json: message definitions +*/ diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/claims/Wbase_claim_visitor.java b/400_xowa/src/gplx/xowa/xtns/wbases/claims/Wbase_claim_visitor.java index 8fc7dca0c..2f100222a 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/claims/Wbase_claim_visitor.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/claims/Wbase_claim_visitor.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,14 +13,22 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases.claims; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; -import gplx.xowa.xtns.wbases.claims.itms.*; -public interface Wbase_claim_visitor { - void Visit_str (Wbase_claim_string itm); - void Visit_entity (Wbase_claim_entity itm); - void Visit_monolingualtext (Wbase_claim_monolingualtext itm); - void Visit_quantity (Wbase_claim_quantity itm); - void Visit_time (Wbase_claim_time itm); - void Visit_globecoordinate (Wbase_claim_globecoordinate itm); - void Visit_system (Wbase_claim_value itm); -} +package gplx.xowa.xtns.wbases.claims; + +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_entity; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_globecoordinate; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_monolingualtext; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_quantity; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_string; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_value; + +public interface Wbase_claim_visitor { + void Visit_str (Wbase_claim_string itm); + void Visit_entity (Wbase_claim_entity itm); + void Visit_monolingualtext (Wbase_claim_monolingualtext itm); + void Visit_quantity (Wbase_claim_quantity itm); + void Visit_time (Wbase_claim_time itm); + void Visit_globecoordinate (Wbase_claim_globecoordinate itm); + void Visit_system (Wbase_claim_value itm); +} diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/claims/enums/Wbase_claim_entity_type_.java b/400_xowa/src/gplx/xowa/xtns/wbases/claims/enums/Wbase_claim_entity_type_.java index 85cf3e950..5f013971c 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/claims/enums/Wbase_claim_entity_type_.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/claims/enums/Wbase_claim_entity_type_.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,17 +13,54 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases.claims.enums; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.claims.*; -public class Wbase_claim_entity_type_ { - public static final byte - Tid__item = 0 - , Tid__property = 1 - , Tid__lexeme = 2 - ; - public static final Wbase_enum_hash Reg = new Wbase_enum_hash("claim.entity_type", 3); - public static final Wbase_enum_itm - Itm__item = Reg.Add(Tid__item , "item") - , Itm__property = Reg.Add(Tid__property , "property") - , Itm__lexeme = Reg.Add(Tid__lexeme , "lexeme") - ; +package gplx.xowa.xtns.wbases.claims.enums; + +import gplx.Bry_; +import gplx.Byte_ascii; +import gplx.Err_; + +// NOTE: could not find definitive list, so using these links for now +// REF.MW:https://github.com/Wikidata/Wikidata-Toolkit/blob/master/wdtk-datamodel/src/main/java/org/wikidata/wdtk/datamodel/implementation/EntityIdValueImpl.java +// REF.MW:https://github.com/wikimedia/wikibase-property-suggester-scripts/blob/1d25e76f894796bfd57dd107102cf39088885138/propertysuggester/parser/JsonReader.py +public class Wbase_claim_entity_type_ { + public static final byte + Tid__item = 0 + , Tid__property = 1 + , Tid__lexeme = 2 + , Tid__sense = 3 + , Tid__form = 4 + ; + public static final Wbase_enum_hash Reg = new Wbase_enum_hash("claim.entity_type", 5); + public static final Wbase_enum_itm + Itm__item = Reg.Add(Tid__item , "item") + , Itm__property = Reg.Add(Tid__property , "property") + , Itm__lexeme = Reg.Add(Tid__lexeme , "lexeme") + , Itm__sense = Reg.Add(Tid__sense , "sense") + , Itm__form = Reg.Add(Tid__form , "form") + ; + + public static Wbase_enum_itm ToTid(byte[] id) { + // fail if null or 0-length + if (Bry_.Len_eq_0(id)) { + throw Err_.new_unhandled_default(id); + } + + // get 1st byte and uppercase it + byte b0 = id[0]; + if (b0 > Byte_ascii.Ltr_Z) { + b0 -= 32; // uppercases + } + + // return item; NOTE: only doing types which have namespaces (i.e.: there is no Sense:S1 or Form:F1) + switch (b0) { + case Byte_ascii.Ltr_Q: + return Wbase_claim_entity_type_.Itm__item; + case Byte_ascii.Ltr_P: + return Wbase_claim_entity_type_.Itm__property; + case Byte_ascii.Ltr_L: + return Wbase_claim_entity_type_.Itm__lexeme; + default: + throw Err_.new_unhandled_default(id); + } + } } \ No newline at end of file diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/claims/enums/Wbase_claim_type_.java b/400_xowa/src/gplx/xowa/xtns/wbases/claims/enums/Wbase_claim_type_.java index 458f6ccd3..4aa3952e4 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/claims/enums/Wbase_claim_type_.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/claims/enums/Wbase_claim_type_.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,50 +13,57 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases.claims.enums; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.claims.*; -public class Wbase_claim_type_ { - public static final byte // SERIALIZED:wbase_prop|datatype; REF:https://www.wikidata.org/wiki/Help:Data_type - Tid__unknown = 0 - , Tid__value = 1 - , Tid__bad = 1 - , Tid__string = 2 - , Tid__quantity = 3 - , Tid__time = 4 - , Tid__globecoordinate = 5 - , Tid__monolingualtext = 6 - , Tid__entity = 7 - , Tid__property = 8 - , Tid__math = 9 - , Tid__url = 10 - , Tid__externalid = 11 - , Tid__commonsmedia = 12 - , Tid__geo_shape = 13 - , Tid__tabular_data = 14 - , Tid__lexeme = 15 - ; - public static final Wbase_enum_hash Reg = new Wbase_enum_hash("claim.data_type", 16); - public static final Wbase_enum_itm - Itm__unknown = New(Tid__unknown , "unknown") - , Itm__bad = New(Tid__bad , "bad") // NOTE: wikidata identifies several entries as "bad"; Q1615351|'s-Graveland, Q107538|Baco; DATE:2013-10-20 - , Itm__string = New(Tid__string , "string") // EX:wd:Property:P1030 - , Itm__quantity = New(Tid__quantity , "quantity") - , Itm__time = New(Tid__time , "time") - , Itm__globecoordinate = New(Tid__globecoordinate , "globecoordinate" , "globe-coordinate") - , Itm__monolingualtext = New(Tid__monolingualtext , "monolingualtext") - , Itm__entity = New(Tid__entity , "wikibase-entityid" , "wikibase-item") - , Itm__property = New(Tid__property , "wikibase-property") // EX:wd:Property:P1646 - , Itm__url = New(Tid__url , "url") // EX:wd:Property:P1019 - , Itm__commonsmedia = New(Tid__commonsmedia , "commonsMedia") // EX:wd:Property:P14 - , Itm__externalid = New(Tid__externalid , "external-id") // EX:wd:Property:P1003 - , Itm__math = New(Tid__math , "math") // EX:wd:Property:P2534 - , Itm__geo_shape = New(Tid__geo_shape , "geo-shape") // EX:wd:Property:P3896 - , Itm__tabular_data = New(Tid__tabular_data , "tabular-data") // EX:wd:Property:P4179 - , Itm__lexeme = New(Tid__lexeme , "lexeme") // EX:wd:Property:P5188 - ; - private static Wbase_enum_itm New(byte tid, String key) {return New(tid, key, key);} - private static Wbase_enum_itm New(byte tid, String key, String scrib) {return Reg.Add(new Wbase_claim_type(tid, key, scrib));} - public static String Get_scrib_or_unknown(byte tid) {return ((Wbase_claim_type)Reg.Get_itm_or(tid, Itm__unknown)).Key_for_scrib();} - public static byte Get_tid_or_unknown(String key) {return Get_tid_or_unknown(Bry_.new_u8(key));} - public static byte Get_tid_or_unknown(byte[] key) {return Get_tid_or_unknown(key, 0, key.length);} - public static byte Get_tid_or_unknown(byte[] key, int bgn, int end) {return Reg.Get_tid_or(key, bgn, end, Tid__unknown);} +package gplx.xowa.xtns.wbases.claims.enums; + +import gplx.Bry_; + +public class Wbase_claim_type_ { + public static final byte // SERIALIZED:wbase_prop|datatype; REF:https://www.wikidata.org/wiki/Help:Data_type + Tid__unknown = 0 + , Tid__value = 1 + , Tid__bad = 1 + , Tid__string = 2 + , Tid__quantity = 3 + , Tid__time = 4 + , Tid__globecoordinate = 5 + , Tid__monolingualtext = 6 + , Tid__entity = 7 + , Tid__property = 8 + , Tid__math = 9 + , Tid__url = 10 + , Tid__externalid = 11 + , Tid__commonsmedia = 12 + , Tid__geo_shape = 13 + , Tid__tabular_data = 14 + , Tid__lexeme = 15 + , Tid__form = 16 + , Tid__sense = 17 + ; + public static final Wbase_enum_hash Reg = new Wbase_enum_hash("claim.data_type", 18); + public static final Wbase_enum_itm + Itm__unknown = New(Tid__unknown , "unknown") + , Itm__bad = New(Tid__bad , "bad") // NOTE: wikidata identifies several entries as "bad"; Q1615351|'s-Graveland, Q107538|Baco; DATE:2013-10-20 + , Itm__string = New(Tid__string , "string") // EX:wd:Property:P1030 + , Itm__quantity = New(Tid__quantity , "quantity") + , Itm__time = New(Tid__time , "time") + , Itm__globecoordinate = New(Tid__globecoordinate , "globecoordinate" , "globe-coordinate") + , Itm__monolingualtext = New(Tid__monolingualtext , "monolingualtext") + , Itm__entity = New(Tid__entity , "wikibase-entityid" , "wikibase-item") + , Itm__property = New(Tid__property , "wikibase-property") // EX:wd:Property:P1646 + , Itm__url = New(Tid__url , "url") // EX:wd:Property:P1019 + , Itm__commonsmedia = New(Tid__commonsmedia , "commonsMedia") // EX:wd:Property:P14 + , Itm__externalid = New(Tid__externalid , "external-id") // EX:wd:Property:P1003 + , Itm__math = New(Tid__math , "math") // EX:wd:Property:P2534 + , Itm__geo_shape = New(Tid__geo_shape , "geo-shape") // EX:wd:Property:P3896 + , Itm__tabular_data = New(Tid__tabular_data , "tabular-data") // EX:wd:Property:P4179 + , Itm__lexeme = New(Tid__lexeme , "lexeme") // EX:wd:Lexeme:L2 + , Itm__form = New(Tid__form , "form") // EX:wd:Lexeme:L2 P5830 + , Itm__sense = New(Tid__sense , "sense") // EX:wd:Lexeme:L2 P6072 + ; + private static Wbase_enum_itm New(byte tid, String key) {return New(tid, key, key);} + private static Wbase_enum_itm New(byte tid, String key, String scrib) {return Reg.Add(new Wbase_claim_type(tid, key, scrib));} + public static String Get_scrib_or_unknown(byte tid) {return ((Wbase_claim_type)Reg.Get_itm_or(tid, Itm__unknown)).Key_for_scrib();} + public static byte Get_tid_or_unknown(String key) {return Get_tid_or_unknown(Bry_.new_u8(key));} + public static byte Get_tid_or_unknown(byte[] key) {return Get_tid_or_unknown(key, 0, key.length);} + public static byte Get_tid_or_unknown(byte[] key, int bgn, int end) {return Reg.Get_tid_or(key, bgn, end, Tid__unknown);} } \ No newline at end of file diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/claims/itms/Wbase_claim_entity.java b/400_xowa/src/gplx/xowa/xtns/wbases/claims/itms/Wbase_claim_entity.java index ec9ba0cc4..97e4b769f 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/claims/itms/Wbase_claim_entity.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/claims/itms/Wbase_claim_entity.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,38 +13,78 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases.claims.itms; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; import gplx.xowa.xtns.wbases.claims.*; -import gplx.xowa.xtns.wbases.claims.enums.*; -public class Wbase_claim_entity extends Wbase_claim_base { - public Wbase_claim_entity(int pid, byte snak_tid, byte entity_tid, byte[] entity_id_bry) {super(pid, snak_tid); - this.entity_tid = entity_tid; - this.entity_id_bry = entity_id_bry; - this.entity_id = Bry_.To_int(entity_id_bry); - } - @Override public byte Val_tid() {return Wbase_claim_type_.Tid__entity;} - public int Entity_id() {return entity_id;} private final int entity_id; - public byte[] Entity_id_bry() {return entity_id_bry;} private final byte[] entity_id_bry; - public byte Entity_tid() {return entity_tid;} private final byte entity_tid; - public boolean Entity_tid_is_qid() {return entity_tid == Wbase_claim_entity_type_.Tid__item;} - public String Entity_tid_str() {return Wbase_claim_entity_type_.Reg.Get_str_or_fail(entity_tid);} - public byte[] Entity_tid_bry() {return Wbase_claim_entity_type_.Reg.Get_bry_or_fail(entity_tid);} - - public byte[] Page_ttl_db() {return To_xid__db(entity_tid, entity_id_bry);} - public byte[] Page_ttl_gui() { - return entity_tid == Wbase_claim_entity_type_.Tid__item - ? Bry_.Add(Wdata_wiki_mgr.Ttl_prefix_qid_bry_gui, entity_id_bry) - : Bry_.Add(Wdata_wiki_mgr.Ttl_prefix_pid_bry, entity_id_bry) - ; - } - @Override public void Welcome(Wbase_claim_visitor visitor) {visitor.Visit_entity(this);} - @Override public String toString() {// TEST: - return String_.Concat_with_str("|", Wbase_claim_value_type_.Reg.Get_str_or_fail(this.Snak_tid()), Wbase_claim_type_.Reg.Get_str_or_fail(this.Val_tid()), this.Entity_tid_str(), Int_.To_str(entity_id)); - } - - public static byte[] To_xid__db(byte tid, byte[] bry) { // EX: 'item,2' -> q2; 'property,2' -> Property:P2 - return tid == Wbase_claim_entity_type_.Tid__item - ? Bry_.Add(Wdata_wiki_mgr.Ttl_prefix_qid_bry_gui, bry) - : Bry_.Add(Wdata_wiki_mgr.Ttl_prefix_pid_bry, bry) - ; - } -} +package gplx.xowa.xtns.wbases.claims.itms; + +import gplx.Bry_; +import gplx.Byte_ascii; +import gplx.Err_; +import gplx.Int_; +import gplx.String_; +import gplx.xowa.xtns.wbases.claims.Wbase_claim_visitor; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_entity_type_; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_type_; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_value_type_; + +public class Wbase_claim_entity extends Wbase_claim_base { + public Wbase_claim_entity(int pid, byte snak_tid, byte entityType, byte[] numericIdBry) { + this(pid, snak_tid, entityType, numericIdBry, null); + } + public Wbase_claim_entity(int pid, byte snak_tid, byte entityType, byte[] numericIdBry, byte[] id) { + super(pid, snak_tid); + this.entityType = entityType; + this.numericIdBry = numericIdBry; + // NOTE: form and sense claims do not have `numeric-id`; DATE:2020-07-27 + if (numericIdBry != null) + this.numericId = Bry_.To_int(numericIdBry); + // NOTE: item, property, lexeme do not have an id (Make_claims calls don't pass them) + this.id = id == null ? ToId(entityType, numericIdBry) : id; + } + @Override public byte Val_tid() {return Wbase_claim_type_.Tid__entity;} + public byte[] Id() {return id;} private final byte[] id; // EX: Q123 + public int Entity_id() {return numericId;} private int numericId; // EX: 123 + public byte[] Entity_id_bry() {return numericIdBry;} private final byte[] numericIdBry; + public byte Entity_tid() {return entityType;} private final byte entityType; + public boolean Entity_tid_is_qid() {return entityType == Wbase_claim_entity_type_.Tid__item;} + public String Entity_tid_str() {return Wbase_claim_entity_type_.Reg.Get_str_or_fail(entityType);} + public byte[] Entity_tid_bry() {return Wbase_claim_entity_type_.Reg.Get_bry_or_fail(entityType);} + public byte[] Page_ttl_db() {return To_xid__db(entityType, numericIdBry);} + public byte[] Page_ttl_gui() {return Bry_.Add(ToTtlPrefix(entityType), numericIdBry);} + @Override public void Welcome(Wbase_claim_visitor visitor) {visitor.Visit_entity(this);} + @Override public String toString() {// TEST: + return String_.Concat_with_str("|", Wbase_claim_value_type_.Reg.Get_str_or_fail(this.Snak_tid()), Wbase_claim_type_.Reg.Get_str_or_fail(this.Val_tid()), this.Entity_tid_str(), Int_.To_str(numericId), String_.new_u8(id)); + } + + public static byte[] To_xid__db(byte tid, byte[] bry) {return Bry_.Add(ToTtlPrefix(tid), bry);} // EX: 'item,2' -> Q2; 'property,2' -> Property:P2 + private static byte[] ToTtlPrefix(byte entityType) { + switch (entityType) { + case Wbase_claim_entity_type_.Tid__item: + return TTL_PREFIX_QID; + case Wbase_claim_entity_type_.Tid__property: + return TTL_PREFIX_PID; + case Wbase_claim_entity_type_.Tid__lexeme: + return TTL_PREFIX_LID; + default: + throw Err_.new_unhandled_default(entityType); + } + } + private static byte[] ToId(byte entityType, byte[] numericId) { + switch (entityType) { + case Wbase_claim_entity_type_.Tid__item: + return Bry_.Add(Byte_ascii.Ltr_Q, numericId); + case Wbase_claim_entity_type_.Tid__property: + return Bry_.Add(Byte_ascii.Ltr_P, numericId); + case Wbase_claim_entity_type_.Tid__lexeme: + return Bry_.Add(Byte_ascii.Ltr_L, numericId); + case Wbase_claim_entity_type_.Tid__form: + case Wbase_claim_entity_type_.Tid__sense: + default: + throw Err_.new_unhandled_default(entityType); + } + } + private static final byte[] + TTL_PREFIX_QID = Bry_.new_a7("Q") // NOTE: use uppercase Q for writing html; DATE:2015-06-12 + , TTL_PREFIX_PID = Bry_.new_a7("Property:P") + , TTL_PREFIX_LID = Bry_.new_a7("Lexeme:L") + // TOMBSTONE: TTL_PREFIX_QID_OLD = Bry_.new_a7("q") // NOTE: for historical reasons this is standardized as lowercase q not Q; DATE:2015-06-12 + ; +} diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/parsers/Wbase_claim_factory.java b/400_xowa/src/gplx/xowa/xtns/wbases/parsers/Wbase_claim_factory.java index 797efd16b..6e85a2e34 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/parsers/Wbase_claim_factory.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/parsers/Wbase_claim_factory.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,104 +13,126 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; -import gplx.langs.jsons.*; -import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.*; import gplx.xowa.xtns.wbases.claims.enums.*; import gplx.xowa.xtns.wbases.claims.itms.*; -public class Wbase_claim_factory { - public Wbase_claim_base Parse(byte[] qid, int pid, byte snak_tid, Json_nde nde, byte value_tid, Json_itm value_itm) { - switch (value_tid) { - case Wbase_claim_type_.Tid__string: return new Wbase_claim_string(pid, snak_tid, value_itm.Data_bry()); - case Wbase_claim_type_.Tid__entity: return Parse_datavalue_entity (qid, pid, snak_tid, Json_nde.cast(value_itm)); - case Wbase_claim_type_.Tid__time: return Parse_datavalue_time (qid, pid, snak_tid, Json_nde.cast(value_itm)); - case Wbase_claim_type_.Tid__quantity: return Parse_datavalue_quantity (qid, pid, snak_tid, Json_nde.cast(value_itm)); - case Wbase_claim_type_.Tid__globecoordinate: return Parse_datavalue_globecoordinate (qid, pid, snak_tid, Json_nde.cast(value_itm)); - case Wbase_claim_type_.Tid__monolingualtext: return Parse_datavalue_monolingualtext (qid, pid, snak_tid, Json_nde.cast(value_itm)); - default: throw Err_.new_unhandled_default(value_tid); - } - } - private Wbase_claim_entity Parse_datavalue_entity(byte[] qid, int pid, byte snak_tid, Json_nde nde) { - int len = nde.Len(); - byte entity_tid = Byte_.Max_value_127; - byte[] entity_id_bry = null; - for (int i = 0; i < len; ++i) { - Json_kv sub = Json_kv.cast(nde.Get_at(i)); - byte tid = Wbase_claim_entity_.Reg.Get_tid_or_max_and_log(qid, sub.Key().Data_bry()); if (tid == Byte_.Max_value_127) continue; - switch (tid) { - case Wbase_claim_entity_.Tid__entity_type: entity_tid = Wbase_claim_entity_type_.Reg.Get_tid_or_fail(sub.Val().Data_bry()); break; - case Wbase_claim_entity_.Tid__numeric_id: entity_id_bry = sub.Val().Data_bry(); break; - case Wbase_claim_entity_.Tid__id: break; // ignore - } - } - if (entity_id_bry == null) throw Err_.new_wo_type("pid is invalid entity", "pid", pid); - return new Wbase_claim_entity(pid, snak_tid, entity_tid, entity_id_bry); - } - private Wbase_claim_monolingualtext Parse_datavalue_monolingualtext(byte[] qid, int pid, byte snak_tid, Json_nde nde) { - int len = nde.Len(); - byte[] lang = null, text = null; - for (int i = 0; i < len; ++i) { - Json_kv sub = Json_kv.cast(nde.Get_at(i)); - byte tid = Wbase_claim_monolingualtext_.Reg.Get_tid_or_max_and_log(qid, sub.Key().Data_bry()); if (tid == Byte_.Max_value_127) continue; - byte[] sub_val_bry = sub.Val().Data_bry(); - switch (tid) { - case Wbase_claim_monolingualtext_.Tid__text: text = sub_val_bry; break; - case Wbase_claim_monolingualtext_.Tid__language: lang = sub_val_bry; break; - } - } - if (lang == null || text == null) throw Err_.new_wo_type("pid is invalid monolingualtext", "pid", pid); - return new Wbase_claim_monolingualtext(pid, snak_tid, lang, text); - } - private Wbase_claim_globecoordinate Parse_datavalue_globecoordinate(byte[] qid, int pid, byte snak_tid, Json_nde nde) { - int len = nde.Len(); - byte[] lat = null, lng = null, alt = null, prc = null, glb = null; - for (int i = 0; i < len; ++i) { - Json_kv sub = Json_kv.cast(nde.Get_at(i)); - byte tid = Wbase_claim_globecoordinate_.Reg.Get_tid_or_max_and_log(qid, sub.Key().Data_bry()); if (tid == Byte_.Max_value_127) continue; - byte[] sub_val_bry = sub.Val().Data_bry(); - switch (tid) { - case Wbase_claim_globecoordinate_.Tid__latitude: lat = sub_val_bry; break; - case Wbase_claim_globecoordinate_.Tid__longitude: lng = sub_val_bry; break; - case Wbase_claim_globecoordinate_.Tid__altitude: alt = sub_val_bry; break; - case Wbase_claim_globecoordinate_.Tid__precision: prc = sub_val_bry; break; - case Wbase_claim_globecoordinate_.Tid__globe: glb = sub_val_bry; break; - } - } - if (lat == null || lng == null) throw Err_.new_wo_type("pid is invalid globecoordinate", "pid", pid); - return new Wbase_claim_globecoordinate(pid, snak_tid, lat, lng, alt, prc, glb); - } - private Wbase_claim_quantity Parse_datavalue_quantity(byte[] qid, int pid, byte snak_tid, Json_nde nde) { - int len = nde.Len(); - byte[] amount = null, unit = null, ubound = null, lbound = null; - for (int i = 0; i < len; ++i) { - Json_kv sub = Json_kv.cast(nde.Get_at(i)); - byte tid = Wbase_claim_quantity_.Reg.Get_tid_or_max_and_log(qid, sub.Key().Data_bry()); if (tid == Byte_.Max_value_127) continue; - byte[] sub_val_bry = sub.Val().Data_bry(); - switch (tid) { - case Wbase_claim_quantity_.Tid__amount: amount = sub_val_bry; break; - case Wbase_claim_quantity_.Tid__unit: unit = sub_val_bry; break; - case Wbase_claim_quantity_.Tid__upperbound: ubound = sub_val_bry; break; - case Wbase_claim_quantity_.Tid__lowerbound: lbound = sub_val_bry; break; - } - } - if (amount == null) throw Err_.new_wo_type("pid is invalid quantity", "pid", pid); - return new Wbase_claim_quantity(pid, snak_tid, amount, unit, ubound, lbound); - } - private Wbase_claim_time Parse_datavalue_time(byte[] qid, int pid, byte snak_tid, Json_nde nde) { - int len = nde.Len(); - byte[] time = null, timezone = null, before = null, after = null, precision = null, calendarmodel = null; - for (int i = 0; i < len; ++i) { - Json_kv sub = Json_kv.cast(nde.Get_at(i)); - byte tid = Wbase_claim_time_.Reg.Get_tid_or_max_and_log(qid, sub.Key().Data_bry()); if (tid == Byte_.Max_value_127) continue; - byte[] sub_val_bry = sub.Val().Data_bry(); - switch (tid) { - case Wbase_claim_time_.Tid__time: time = sub_val_bry; break; - case Wbase_claim_time_.Tid__timezone: timezone = sub_val_bry; break; - case Wbase_claim_time_.Tid__before: before = sub_val_bry; break; - case Wbase_claim_time_.Tid__after: after = sub_val_bry; break; - case Wbase_claim_time_.Tid__precision: precision = sub_val_bry; break; - case Wbase_claim_time_.Tid__calendarmodel: calendarmodel = sub_val_bry; break; - } - } - if (time == null) throw Err_.new_wo_type("pid is invalid time", "pid", pid); - return new Wbase_claim_time(pid, snak_tid, time, timezone, before, after, precision, calendarmodel); - } -} +package gplx.xowa.xtns.wbases.parsers; + +import gplx.Byte_; +import gplx.Err_; +import gplx.langs.jsons.Json_itm; +import gplx.langs.jsons.Json_kv; +import gplx.langs.jsons.Json_nde; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_entity_type_; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_type_; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_base; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_entity; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_entity_; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_globecoordinate; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_globecoordinate_; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_monolingualtext; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_monolingualtext_; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_quantity; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_quantity_; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_string; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time_; + +public class Wbase_claim_factory { + public Wbase_claim_base Parse(byte[] qid, int pid, byte snak_tid, Json_nde nde, byte value_tid, Json_itm value_itm) { + switch (value_tid) { + case Wbase_claim_type_.Tid__string: return new Wbase_claim_string(pid, snak_tid, value_itm.Data_bry()); + case Wbase_claim_type_.Tid__entity: return Parse_datavalue_entity (qid, pid, snak_tid, Json_nde.cast(value_itm)); + case Wbase_claim_type_.Tid__time: return Parse_datavalue_time (qid, pid, snak_tid, Json_nde.cast(value_itm)); + case Wbase_claim_type_.Tid__quantity: return Parse_datavalue_quantity (qid, pid, snak_tid, Json_nde.cast(value_itm)); + case Wbase_claim_type_.Tid__globecoordinate: return Parse_datavalue_globecoordinate (qid, pid, snak_tid, Json_nde.cast(value_itm)); + case Wbase_claim_type_.Tid__monolingualtext: return Parse_datavalue_monolingualtext (qid, pid, snak_tid, Json_nde.cast(value_itm)); + default: throw Err_.new_unhandled_default(value_tid); + } + } + private Wbase_claim_entity Parse_datavalue_entity(byte[] qid, int pid, byte snak_tid, Json_nde nde) { + int len = nde.Len(); + byte entityType = Byte_.Max_value_127; + byte[] numericId = null; + byte[] id = null; + for (int i = 0; i < len; ++i) { + Json_kv sub = Json_kv.cast(nde.Get_at(i)); + byte tid = Wbase_claim_entity_.Reg.Get_tid_or_max_and_log(qid, sub.Key().Data_bry()); if (tid == Byte_.Max_value_127) continue; + byte[] subValBry = sub.Val().Data_bry(); + switch (tid) { + case Wbase_claim_entity_.Tid__entity_type: entityType = Wbase_claim_entity_type_.Reg.Get_tid_or_fail(subValBry); break; + case Wbase_claim_entity_.Tid__numeric_id: numericId = subValBry; break; + case Wbase_claim_entity_.Tid__id: id = subValBry; break; // needed for sense and form + } + } + // TOMBSTONE:senses and forms do not have "numeric-id"; EX:wd:Lexeme:L2 and p6072 has a value of `{"entity-type":"form", "id":"L2-F3"}`; DATE:2020-07-27 + // if (numericId == null) throw Err_.new_wo_type("pid is invalid entity", "pid", pid); + return new Wbase_claim_entity(pid, snak_tid, entityType, numericId, id); + } + private Wbase_claim_monolingualtext Parse_datavalue_monolingualtext(byte[] qid, int pid, byte snak_tid, Json_nde nde) { + int len = nde.Len(); + byte[] lang = null, text = null; + for (int i = 0; i < len; ++i) { + Json_kv sub = Json_kv.cast(nde.Get_at(i)); + byte tid = Wbase_claim_monolingualtext_.Reg.Get_tid_or_max_and_log(qid, sub.Key().Data_bry()); if (tid == Byte_.Max_value_127) continue; + byte[] sub_val_bry = sub.Val().Data_bry(); + switch (tid) { + case Wbase_claim_monolingualtext_.Tid__text: text = sub_val_bry; break; + case Wbase_claim_monolingualtext_.Tid__language: lang = sub_val_bry; break; + } + } + if (lang == null || text == null) throw Err_.new_wo_type("pid is invalid monolingualtext", "pid", pid); + return new Wbase_claim_monolingualtext(pid, snak_tid, lang, text); + } + private Wbase_claim_globecoordinate Parse_datavalue_globecoordinate(byte[] qid, int pid, byte snak_tid, Json_nde nde) { + int len = nde.Len(); + byte[] lat = null, lng = null, alt = null, prc = null, glb = null; + for (int i = 0; i < len; ++i) { + Json_kv sub = Json_kv.cast(nde.Get_at(i)); + byte tid = Wbase_claim_globecoordinate_.Reg.Get_tid_or_max_and_log(qid, sub.Key().Data_bry()); if (tid == Byte_.Max_value_127) continue; + byte[] sub_val_bry = sub.Val().Data_bry(); + switch (tid) { + case Wbase_claim_globecoordinate_.Tid__latitude: lat = sub_val_bry; break; + case Wbase_claim_globecoordinate_.Tid__longitude: lng = sub_val_bry; break; + case Wbase_claim_globecoordinate_.Tid__altitude: alt = sub_val_bry; break; + case Wbase_claim_globecoordinate_.Tid__precision: prc = sub_val_bry; break; + case Wbase_claim_globecoordinate_.Tid__globe: glb = sub_val_bry; break; + } + } + if (lat == null || lng == null) throw Err_.new_wo_type("pid is invalid globecoordinate", "pid", pid); + return new Wbase_claim_globecoordinate(pid, snak_tid, lat, lng, alt, prc, glb); + } + private Wbase_claim_quantity Parse_datavalue_quantity(byte[] qid, int pid, byte snak_tid, Json_nde nde) { + int len = nde.Len(); + byte[] amount = null, unit = null, ubound = null, lbound = null; + for (int i = 0; i < len; ++i) { + Json_kv sub = Json_kv.cast(nde.Get_at(i)); + byte tid = Wbase_claim_quantity_.Reg.Get_tid_or_max_and_log(qid, sub.Key().Data_bry()); if (tid == Byte_.Max_value_127) continue; + byte[] sub_val_bry = sub.Val().Data_bry(); + switch (tid) { + case Wbase_claim_quantity_.Tid__amount: amount = sub_val_bry; break; + case Wbase_claim_quantity_.Tid__unit: unit = sub_val_bry; break; + case Wbase_claim_quantity_.Tid__upperbound: ubound = sub_val_bry; break; + case Wbase_claim_quantity_.Tid__lowerbound: lbound = sub_val_bry; break; + } + } + if (amount == null) throw Err_.new_wo_type("pid is invalid quantity", "pid", pid); + return new Wbase_claim_quantity(pid, snak_tid, amount, unit, ubound, lbound); + } + private Wbase_claim_time Parse_datavalue_time(byte[] qid, int pid, byte snak_tid, Json_nde nde) { + int len = nde.Len(); + byte[] time = null, timezone = null, before = null, after = null, precision = null, calendarmodel = null; + for (int i = 0; i < len; ++i) { + Json_kv sub = Json_kv.cast(nde.Get_at(i)); + byte tid = Wbase_claim_time_.Reg.Get_tid_or_max_and_log(qid, sub.Key().Data_bry()); if (tid == Byte_.Max_value_127) continue; + byte[] sub_val_bry = sub.Val().Data_bry(); + switch (tid) { + case Wbase_claim_time_.Tid__time: time = sub_val_bry; break; + case Wbase_claim_time_.Tid__timezone: timezone = sub_val_bry; break; + case Wbase_claim_time_.Tid__before: before = sub_val_bry; break; + case Wbase_claim_time_.Tid__after: after = sub_val_bry; break; + case Wbase_claim_time_.Tid__precision: precision = sub_val_bry; break; + case Wbase_claim_time_.Tid__calendarmodel: calendarmodel = sub_val_bry; break; + } + } + if (time == null) throw Err_.new_wo_type("pid is invalid time", "pid", pid); + return new Wbase_claim_time(pid, snak_tid, time, timezone, before, after, precision, calendarmodel); + } +} diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/parsers/Wdata_doc_parser_fxt_base.java b/400_xowa/src/gplx/xowa/xtns/wbases/parsers/Wdata_doc_parser_fxt_base.java index d5614a0c1..7942dac84 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/parsers/Wdata_doc_parser_fxt_base.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/parsers/Wdata_doc_parser_fxt_base.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,88 +13,126 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; -import gplx.langs.jsons.*; import gplx.xowa.xtns.wbases.core.*; import gplx.xowa.xtns.wbases.claims.*; import gplx.xowa.xtns.wbases.claims.enums.*; import gplx.xowa.xtns.wbases.claims.itms.*; -abstract class Wdata_doc_parser_fxt_base { - protected Wdata_doc_parser wdoc_parser; - private final Json_parser json_parser = new Json_parser(); - private final Bry_bfr tmp_time_bfr = Bry_bfr_.New(); - public void Init() { - if (wdoc_parser == null) wdoc_parser = Make_parser(); - } - public abstract Wdata_doc_parser Make_parser(); - public Wdata_sitelink_itm Make_sitelink(String site, String name, String... badges) {return new Wdata_sitelink_itm(Bry_.new_u8(site), Bry_.new_u8(name), Bry_.Ary(badges));} - public Wdata_langtext_itm Make_langval(String lang, String text) {return new Wdata_langtext_itm(Bry_.new_u8(lang), Bry_.new_u8(text));} - public Wdata_alias_itm Make_alias(String lang, String... vals) {return new Wdata_alias_itm(Bry_.new_u8(lang), Bry_.Ary(vals));} - public Wbase_claim_base Make_claim_string (int pid, String val) {return new Wbase_claim_string(pid, Wbase_claim_value_type_.Tid__value, Bry_.new_u8(val));} - public Wbase_claim_base Make_claim_entity_qid (int pid, int eid) {return new Wbase_claim_entity(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_entity_type_.Tid__item, Int_.To_bry(eid));} - public Wbase_claim_base Make_claim_entity_pid (int pid, int eid) {return new Wbase_claim_entity(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_entity_type_.Tid__property, Int_.To_bry(eid));} - public Wbase_claim_base Make_claim_monolingualtext (int pid, String lang, String text) {return new Wbase_claim_monolingualtext(pid, Wbase_claim_value_type_.Tid__value, Bry_.new_u8(lang), Bry_.new_u8(text));} - public Wbase_claim_base Make_claim_globecoordinate (int pid, String lat, String lng, String prc) {return new Wbase_claim_globecoordinate(pid, Wbase_claim_value_type_.Tid__value, Bry_.new_u8(lat), Bry_.new_u8(lng), Object_.Bry__null, Bry_.new_u8(prc), Bry_.new_a7("http://www.wikidata.org/entity/Q2"));} - public Wbase_claim_base Make_claim_quantity (int pid, int val, int unit, int ubound, int lbound) {return new Wbase_claim_quantity(pid, Wbase_claim_value_type_.Tid__value, Bry_.new_u8(Int_.To_str(val)), Bry_.new_u8(Int_.To_str(unit)), Bry_.new_u8(Int_.To_str(ubound)), Bry_.new_u8(Int_.To_str(lbound)));} - public Wbase_claim_base Make_claim_time (int pid, String val) {return new Wbase_claim_time(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_time_.To_bry(tmp_time_bfr, val), Wbase_claim_time_.Dflt__timezone.Val_bry(), Wbase_claim_time_.Dflt__before.Val_bry(), Wbase_claim_time_.Dflt__after.Val_bry(), Wbase_claim_time_.Dflt__precision.Val_bry(), Wbase_claim_time_.Dflt__calendarmodel.Val_bry());} - public Wbase_claim_base Make_claim_novalue (int pid) {return new Wbase_claim_value(pid, Wbase_claim_type_.Tid__unknown, Wbase_claim_value_type_.Tid__novalue);} - - public void Test_entity(String raw, String expd) {Tfds.Eq(expd, String_.new_u8(wdoc_parser.Parse_qid(json_parser.Parse_by_apos(raw))));} - public void Test_sitelinks(String raw, Wdata_sitelink_itm... expd) { - Ordered_hash actl_hash = wdoc_parser.Parse_sitelinks(Q1_bry, json_parser.Parse_by_apos(raw)); - Tfds.Eq_ary_str((Wdata_sitelink_itm[])actl_hash.To_ary(Wdata_sitelink_itm.class), expd); - } - public void Test_labels(String raw, Wdata_langtext_itm... expd) {Test_langvals(raw, Bool_.Y, expd);} - public void Test_descriptions(String raw, Wdata_langtext_itm... expd) {Test_langvals(raw, Bool_.N, expd);} - private void Test_langvals(String raw, boolean labels_or_descriptions, Wdata_langtext_itm... expd) { - Ordered_hash actl_hash = wdoc_parser.Parse_langvals(Q1_bry, json_parser.Parse_by_apos(raw), labels_or_descriptions); - Tfds.Eq_ary_str((Wdata_langtext_itm[])actl_hash.To_ary(Wdata_langtext_itm.class), expd); - } - public void Test_aliases(String raw, Wdata_alias_itm... expd) { - Ordered_hash actl_hash = wdoc_parser.Parse_aliases(Q1_bry, json_parser.Parse_by_apos(raw)); - Tfds.Eq_ary_str((Wdata_alias_itm[])actl_hash.To_ary(Wdata_alias_itm.class), expd); - } - public void Test_claims(String raw, Wbase_claim_base... expd) { - Ordered_hash actl_hash = wdoc_parser.Parse_claims(Q1_bry, json_parser.Parse_by_apos(raw)); - List_adp actl_list = Wbase_claim_grp.Xto_list(actl_hash); - Tfds.Eq_ary_str((Wbase_claim_base[])actl_list.To_ary(Wbase_claim_base.class), expd); - } - public void Test_claims_data(String raw, Wbase_claim_base expd) { - Json_doc jdoc = json_parser.Parse_by_apos(raw); - Wbase_claim_base actl = wdoc_parser.Parse_claims_data(Q1_bry, 1, Wbase_claim_value_type_.Tid__value, jdoc.Root_nde()); - Tfds.Eq(expd.toString(), actl.toString()); - } - public void Test_qualifiers(String raw, Wbase_claim_base... expd_itms) { - Json_doc jdoc = json_parser.Parse_by_apos(raw); - Json_nde qualifiers_nde = Json_nde.cast(Json_kv.cast(jdoc.Root_nde().Get_at(0)).Val()); - Wbase_claim_grp_list actl = wdoc_parser.Parse_qualifiers(Q1_bry, qualifiers_nde); - Tfds.Eq_ary_str(expd_itms, To_ary(actl)); - } - public void Test_references(String raw, int[] expd_order, Wbase_claim_base... expd_itms) { - Json_doc jdoc = json_parser.Parse_by_apos(raw); - Json_ary owner = Json_ary.cast_or_null(Json_kv.cast(jdoc.Root_nde().Get_at(0)).Val()); - Wbase_references_grp[] actl = wdoc_parser.Parse_references(Q1_bry, owner); - Wbase_references_grp actl_grp = actl[0]; - Tfds.Eq_ary(expd_order, actl_grp.Snaks_order()); - Tfds.Eq_ary_str(expd_itms, To_ary(actl_grp.Snaks())); - } - public void Test_pid_order(String raw, int... expd) { - Json_doc jdoc = json_parser.Parse_by_apos(raw); - Json_ary nde = Json_ary.cast_or_null(Json_kv.cast(jdoc.Root_nde().Get_at(0)).Val()); - int[] actl = wdoc_parser.Parse_pid_order(Q1_bry, nde); - Tfds.Eq_ary(expd, actl); - } - Wbase_claim_base[] To_ary(Wbase_claim_grp_list list) { - List_adp rv = List_adp_.New(); - int list_len = list.Len(); - for (int i = 0; i < list_len; ++i) { - Wbase_claim_grp grp = list.Get_at(i); - int grp_len = grp.Len(); - for (int j = 0; j < grp_len; ++j) { - Wbase_claim_base itm = grp.Get_at(j); - rv.Add(itm); - } - } - return (Wbase_claim_base[])rv.To_ary_and_clear(Wbase_claim_base.class); - } - private static final byte[] Q1_bry = Bry_.new_a7("Q1"); -} -class Wdata_doc_parser_v2_fxt extends Wdata_doc_parser_fxt_base { - @Override public Wdata_doc_parser Make_parser() {return new Wdata_doc_parser_v2();} -} +package gplx.xowa.xtns.wbases.parsers; + +import gplx.Bool_; +import gplx.Bry_; +import gplx.Bry_bfr; +import gplx.Bry_bfr_; +import gplx.Int_; +import gplx.List_adp; +import gplx.List_adp_; +import gplx.Object_; +import gplx.Ordered_hash; +import gplx.String_; +import gplx.Tfds; +import gplx.langs.jsons.Json_ary; +import gplx.langs.jsons.Json_doc; +import gplx.langs.jsons.Json_kv; +import gplx.langs.jsons.Json_nde; +import gplx.langs.jsons.Json_parser; +import gplx.xowa.xtns.wbases.claims.Wbase_claim_grp; +import gplx.xowa.xtns.wbases.claims.Wbase_claim_grp_list; +import gplx.xowa.xtns.wbases.claims.Wbase_references_grp; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_entity_type_; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_type_; +import gplx.xowa.xtns.wbases.claims.enums.Wbase_claim_value_type_; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_base; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_entity; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_globecoordinate; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_monolingualtext; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_quantity; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_string; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_time_; +import gplx.xowa.xtns.wbases.claims.itms.Wbase_claim_value; +import gplx.xowa.xtns.wbases.core.Wdata_alias_itm; +import gplx.xowa.xtns.wbases.core.Wdata_langtext_itm; +import gplx.xowa.xtns.wbases.core.Wdata_sitelink_itm; + +abstract class Wdata_doc_parser_fxt_base { + protected Wdata_doc_parser wdoc_parser; + private final Json_parser json_parser = new Json_parser(); + private final Bry_bfr tmp_time_bfr = Bry_bfr_.New(); + public void Init() { + if (wdoc_parser == null) wdoc_parser = Make_parser(); + } + public abstract Wdata_doc_parser Make_parser(); + public Wdata_sitelink_itm Make_sitelink(String site, String name, String... badges) {return new Wdata_sitelink_itm(Bry_.new_u8(site), Bry_.new_u8(name), Bry_.Ary(badges));} + public Wdata_langtext_itm Make_langval(String lang, String text) {return new Wdata_langtext_itm(Bry_.new_u8(lang), Bry_.new_u8(text));} + public Wdata_alias_itm Make_alias(String lang, String... vals) {return new Wdata_alias_itm(Bry_.new_u8(lang), Bry_.Ary(vals));} + public Wbase_claim_base Make_claim_string (int pid, String val) {return new Wbase_claim_string(pid, Wbase_claim_value_type_.Tid__value, Bry_.new_u8(val));} + public Wbase_claim_base Make_claim_entity_qid (int pid, int eid) {return new Wbase_claim_entity(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_entity_type_.Tid__item, Int_.To_bry(eid));} + public Wbase_claim_base Make_claim_entity_pid (int pid, int eid) {return new Wbase_claim_entity(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_entity_type_.Tid__property, Int_.To_bry(eid));} + public Wbase_claim_base Make_claim_entity_lid (int pid, int eid) {return new Wbase_claim_entity(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_entity_type_.Tid__lexeme, Int_.To_bry(eid));} + public Wbase_claim_base Make_claim_entity_fid (int pid, String id) {return new Wbase_claim_entity(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_entity_type_.Tid__form, null, Bry_.new_u8(id));} + public Wbase_claim_base Make_claim_entity_sid (int pid, String id) {return new Wbase_claim_entity(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_entity_type_.Tid__sense, null, Bry_.new_u8(id));} + public Wbase_claim_base Make_claim_monolingualtext (int pid, String lang, String text) {return new Wbase_claim_monolingualtext(pid, Wbase_claim_value_type_.Tid__value, Bry_.new_u8(lang), Bry_.new_u8(text));} + public Wbase_claim_base Make_claim_globecoordinate (int pid, String lat, String lng, String prc) {return new Wbase_claim_globecoordinate(pid, Wbase_claim_value_type_.Tid__value, Bry_.new_u8(lat), Bry_.new_u8(lng), Object_.Bry__null, Bry_.new_u8(prc), Bry_.new_a7("http://www.wikidata.org/entity/Q2"));} + public Wbase_claim_base Make_claim_quantity (int pid, int val, int unit, int ubound, int lbound) {return new Wbase_claim_quantity(pid, Wbase_claim_value_type_.Tid__value, Bry_.new_u8(Int_.To_str(val)), Bry_.new_u8(Int_.To_str(unit)), Bry_.new_u8(Int_.To_str(ubound)), Bry_.new_u8(Int_.To_str(lbound)));} + public Wbase_claim_base Make_claim_time (int pid, String val) {return new Wbase_claim_time(pid, Wbase_claim_value_type_.Tid__value, Wbase_claim_time_.To_bry(tmp_time_bfr, val), Wbase_claim_time_.Dflt__timezone.Val_bry(), Wbase_claim_time_.Dflt__before.Val_bry(), Wbase_claim_time_.Dflt__after.Val_bry(), Wbase_claim_time_.Dflt__precision.Val_bry(), Wbase_claim_time_.Dflt__calendarmodel.Val_bry());} + public Wbase_claim_base Make_claim_novalue (int pid) {return new Wbase_claim_value(pid, Wbase_claim_type_.Tid__unknown, Wbase_claim_value_type_.Tid__novalue);} + + public void Test_entity(String raw, String expd) {Tfds.Eq(expd, String_.new_u8(wdoc_parser.Parse_qid(json_parser.Parse_by_apos(raw))));} + public void Test_sitelinks(String raw, Wdata_sitelink_itm... expd) { + Ordered_hash actl_hash = wdoc_parser.Parse_sitelinks(Q1_bry, json_parser.Parse_by_apos(raw)); + Tfds.Eq_ary_str(actl_hash.To_ary(Wdata_sitelink_itm.class), expd); + } + public void Test_labels(String raw, Wdata_langtext_itm... expd) {Test_langvals(raw, Bool_.Y, expd);} + public void Test_descriptions(String raw, Wdata_langtext_itm... expd) {Test_langvals(raw, Bool_.N, expd);} + private void Test_langvals(String raw, boolean labels_or_descriptions, Wdata_langtext_itm... expd) { + Ordered_hash actl_hash = wdoc_parser.Parse_langvals(Q1_bry, json_parser.Parse_by_apos(raw), labels_or_descriptions); + Tfds.Eq_ary_str(actl_hash.To_ary(Wdata_langtext_itm.class), expd); + } + public void Test_aliases(String raw, Wdata_alias_itm... expd) { + Ordered_hash actl_hash = wdoc_parser.Parse_aliases(Q1_bry, json_parser.Parse_by_apos(raw)); + Tfds.Eq_ary_str(actl_hash.To_ary(Wdata_alias_itm.class), expd); + } + public void Test_claims(String raw, Wbase_claim_base... expd) { + Ordered_hash actl_hash = wdoc_parser.Parse_claims(Q1_bry, json_parser.Parse_by_apos(raw)); + List_adp actl_list = Wbase_claim_grp.Xto_list(actl_hash); + Tfds.Eq_ary_str(actl_list.To_ary(Wbase_claim_base.class), expd); + } + public void Test_claims_data(String raw, Wbase_claim_base expd) { + Json_doc jdoc = json_parser.Parse_by_apos(raw); + Wbase_claim_base actl = wdoc_parser.Parse_claims_data(Q1_bry, 1, Wbase_claim_value_type_.Tid__value, jdoc.Root_nde()); + Tfds.Eq(expd.toString(), actl.toString()); + } + public void Test_qualifiers(String raw, Wbase_claim_base... expd_itms) { + Json_doc jdoc = json_parser.Parse_by_apos(raw); + Json_nde qualifiers_nde = Json_nde.cast(Json_kv.cast(jdoc.Root_nde().Get_at(0)).Val()); + Wbase_claim_grp_list actl = wdoc_parser.Parse_qualifiers(Q1_bry, qualifiers_nde); + Tfds.Eq_ary_str(expd_itms, To_ary(actl)); + } + public void Test_references(String raw, int[] expd_order, Wbase_claim_base... expd_itms) { + Json_doc jdoc = json_parser.Parse_by_apos(raw); + Json_ary owner = Json_ary.cast_or_null(Json_kv.cast(jdoc.Root_nde().Get_at(0)).Val()); + Wbase_references_grp[] actl = wdoc_parser.Parse_references(Q1_bry, owner); + Wbase_references_grp actl_grp = actl[0]; + Tfds.Eq_ary(expd_order, actl_grp.Snaks_order()); + Tfds.Eq_ary_str(expd_itms, To_ary(actl_grp.Snaks())); + } + public void Test_pid_order(String raw, int... expd) { + Json_doc jdoc = json_parser.Parse_by_apos(raw); + Json_ary nde = Json_ary.cast_or_null(Json_kv.cast(jdoc.Root_nde().Get_at(0)).Val()); + int[] actl = wdoc_parser.Parse_pid_order(Q1_bry, nde); + Tfds.Eq_ary(expd, actl); + } + Wbase_claim_base[] To_ary(Wbase_claim_grp_list list) { + List_adp rv = List_adp_.New(); + int list_len = list.Len(); + for (int i = 0; i < list_len; ++i) { + Wbase_claim_grp grp = list.Get_at(i); + int grp_len = grp.Len(); + for (int j = 0; j < grp_len; ++j) { + Wbase_claim_base itm = grp.Get_at(j); + rv.Add(itm); + } + } + return (Wbase_claim_base[])rv.To_ary_and_clear(Wbase_claim_base.class); + } + private static final byte[] Q1_bry = Bry_.new_a7("Q1"); +} +class Wdata_doc_parser_v2_fxt extends Wdata_doc_parser_fxt_base { + @Override public Wdata_doc_parser Make_parser() {return new Wdata_doc_parser_v2();} +} diff --git a/400_xowa/src/gplx/xowa/xtns/wbases/parsers/Wdata_doc_parser_v2__claims__tst.java b/400_xowa/src/gplx/xowa/xtns/wbases/parsers/Wdata_doc_parser_v2__claims__tst.java index c00489c7d..be2315bf9 100644 --- a/400_xowa/src/gplx/xowa/xtns/wbases/parsers/Wdata_doc_parser_v2__claims__tst.java +++ b/400_xowa/src/gplx/xowa/xtns/wbases/parsers/Wdata_doc_parser_v2__claims__tst.java @@ -1,6 +1,6 @@ /* XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com +Copyright (C) 2012-2020 gnosygnu@gmail.com XOWA is licensed under the terms of the General Public License (GPL) Version 3, or alternatively under the terms of the Apache License Version 2.0. @@ -13,179 +13,218 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.xtns.wbases.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.wbases.*; -import org.junit.*; -import gplx.langs.jsons.*; import gplx.xowa.xtns.wbases.core.*; -public class Wdata_doc_parser_v2__claims__tst { - @Before public void init() {fxt.Init();} private Wdata_doc_parser_v2_fxt fxt = new Wdata_doc_parser_v2_fxt(); - @Test public void Full__string() { - fxt.Test_claims(String_.Concat_lines_nl_skip_last - ( "{ 'claims':" - , " { 'P1':" - , " [" - , " { 'mainsnak':" - , " { 'snaktype':'value'" - , " , 'property':'P1'" - , " , 'hash':'84487fc3f93b4f74ab1cc5a47d78f596f0b49390'" - , " , 'datavalue':" - , " { 'value':'abc'" - , " , 'type':'string'" - , " }" - , " }" - , " , 'type':'statement'" - , " , 'id':'Q2$e8ba1188-4aec-9e37-a75e-f79466c1913e'" - , " , 'rank':'normal'" - , " }" - , " ]" - , " }" - , "}" - ) - , fxt.Make_claim_string(1, "abc") - ); - } - @Test public void Full__novalue() { - fxt.Test_claims(String_.Concat_lines_nl_skip_last - ( "{ 'claims':" - , " { 'P1':" - , " [" - , " { 'mainsnak':" - , " { 'snaktype':'novalue'" - , " , 'property':'P1'" - , " , 'hash':'84487fc3f93b4f74ab1cc5a47d78f596f0b49390'" - , " }" - , " }" - , " ]" - , " }" - , "}" - ) - , fxt.Make_claim_novalue(1) - ); - } - @Test public void Data__string() { - fxt.Test_claims_data(String_.Concat_lines_nl_skip_last - ( "{ 'value':'abc'" - , ", 'type':'string'" - , "}" - ) - , fxt.Make_claim_string(1, "abc") - ); - } - @Test public void Data__item() { - fxt.Test_claims_data(String_.Concat_lines_nl_skip_last - ( "{ 'value':" - , " { 'entity-type':'item'" - , " , 'numeric-id':'123'" - , " }" - , ", 'type':'wikibase-entityid'" - , "}" - ) - , fxt.Make_claim_entity_qid(1, 123) - ); - } - @Test public void Data__property() { - fxt.Test_claims_data(String_.Concat_lines_nl_skip_last - ( "{ 'value':" - , " { 'entity-type':'property'" - , " , 'numeric-id':'398'" - , " }" - , ", 'type':'wikibase-entityid'" - , "}" - ) - , fxt.Make_claim_entity_pid(1, 398) - ); - } - @Test public void Data__monolingualtext() { - fxt.Test_claims_data(String_.Concat_lines_nl_skip_last - ( "{ 'value':" - , " { 'text':'en_text'" - , " , 'language':'en'" - , " }" - , ", 'type':'monolingualtext'" - , "}" - ) - , fxt.Make_claim_monolingualtext(1, "en", "en_text") - ); - } - @Test public void Data__globecoordinate() { - fxt.Test_claims_data(String_.Concat_lines_nl_skip_last - ( "{ 'value':" - , " { 'latitude':1.2" - , " , 'longitude':3.4" - , " , 'altitude':null" - , " , 'precision':0.0002" - , " , 'globe':'http:\\/\\/www.wikidata.org\\/entity\\/Q2'" - , " }" - , ", 'type':'globecoordinate'" - , "}" - ) - , fxt.Make_claim_globecoordinate(1, "1.2", "3.4", "0.0002") - ); - } - @Test public void Data__quantity() { - fxt.Test_claims_data(String_.Concat_lines_nl_skip_last - ( "{ 'value':" - , " { 'amount':'123'" - , " , 'unit':'2'" - , " , 'upperBound':'125'" - , " , 'lowerBound':'121'" - , " }" - , ", 'type':'quantity'" - , "}" - ) - , fxt.Make_claim_quantity(1, 123, 2, 125, 121) - ); - } - @Test public void Data__time() { - fxt.Test_claims_data(String_.Concat_lines_nl_skip_last - ( "{ 'value':" - , " { 'time':'+00000002001-02-03T04:05:06Z'" - , " , 'timezone':0" - , " , 'before':0" - , " , 'after':0" - , " , 'precision':11" - , " , 'calendarmodel':'http:\\/\\/www.wikidata.org\\/entity\\/Q1985727'" - , " }" - , ", 'type':'time'" - , "}" - ) - , fxt.Make_claim_time(1, "2001-02-03 04:05:06") - ); - } - @Test public void Data__url() { // NOTE:has "String" property-type; EX:wd:Q23548; DATE:2016-07-28 - fxt.Test_claims_data(String_.Concat_lines_nl_skip_last - ( "{ 'value':'http:\\/\\/www.nasa.gov\\/rss\\/dyn\\/breaking_news.rss'" - , ", 'type':'string'" - , "}" - ) - , fxt.Make_claim_string(1, "http://www.nasa.gov/rss/dyn/breaking_news.rss") - ); - } - @Test public void Data__commonsMedia() { // NOTE:has "String" property-type; EX:wd:Q327162; DATE:2016-07-28 - fxt.Test_claims_data(String_.Concat_lines_nl_skip_last - ( "{ 'value':'Tabliczka E40.svg'" - , ", 'type':'string'" - , "}" - ) - , fxt.Make_claim_string(1, "Tabliczka E40.svg") - ); - } - @Test public void Data__externalid() { // NOTE:has "String" property-type; EX:wd:Q77177; DATE:2016-07-28 - fxt.Test_claims_data(String_.Concat_lines_nl_skip_last - ( "{ 'value':'000331371'" - , ", 'type':'string'" - , "}" - ) - , fxt.Make_claim_string(1, "000331371") - ); - } - @Test public void Data__math() { // NOTE:has "String" property-type; EX:wd:Q11518; DATE:2016-07-28 - fxt.Test_claims_data(String_.Concat_lines_nl_skip_last - ( "{ 'value':'a^2+b^2=c^2'" - , ", 'type':'string'" - , "}" - ) - , fxt.Make_claim_string(1, "a^2+b^2=c^2") - ); - } - // www.wikidata.org/wiki/Q11518 -} +package gplx.xowa.xtns.wbases.parsers; + +import gplx.String_; +import org.junit.Before; +import org.junit.Test; + +public class Wdata_doc_parser_v2__claims__tst { + @Before public void init() {fxt.Init();} private Wdata_doc_parser_v2_fxt fxt = new Wdata_doc_parser_v2_fxt(); + @Test public void Full__string() { + fxt.Test_claims(String_.Concat_lines_nl_skip_last + ( "{ 'claims':" + , " { 'P1':" + , " [" + , " { 'mainsnak':" + , " { 'snaktype':'value'" + , " , 'property':'P1'" + , " , 'hash':'84487fc3f93b4f74ab1cc5a47d78f596f0b49390'" + , " , 'datavalue':" + , " { 'value':'abc'" + , " , 'type':'string'" + , " }" + , " }" + , " , 'type':'statement'" + , " , 'id':'Q2$e8ba1188-4aec-9e37-a75e-f79466c1913e'" + , " , 'rank':'normal'" + , " }" + , " ]" + , " }" + , "}" + ) + , fxt.Make_claim_string(1, "abc") + ); + } + @Test public void Full__novalue() { + fxt.Test_claims(String_.Concat_lines_nl_skip_last + ( "{ 'claims':" + , " { 'P1':" + , " [" + , " { 'mainsnak':" + , " { 'snaktype':'novalue'" + , " , 'property':'P1'" + , " , 'hash':'84487fc3f93b4f74ab1cc5a47d78f596f0b49390'" + , " }" + , " }" + , " ]" + , " }" + , "}" + ) + , fxt.Make_claim_novalue(1) + ); + } + @Test public void Data__string() { + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':'abc'" + , ", 'type':'string'" + , "}" + ) + , fxt.Make_claim_string(1, "abc") + ); + } + @Test public void Data__item() { + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':" + , " { 'entity-type':'item'" + , " , 'numeric-id':'123'" + , " }" + , ", 'type':'wikibase-entityid'" + , "}" + ) + , fxt.Make_claim_entity_qid(1, 123) + ); + } + @Test public void Data__property() { + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':" + , " { 'entity-type':'property'" + , " , 'numeric-id':'398'" + , " }" + , ", 'type':'wikibase-entityid'" + , "}" + ) + , fxt.Make_claim_entity_pid(1, 398) + ); + } + @Test public void Data__monolingualtext() { + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':" + , " { 'text':'en_text'" + , " , 'language':'en'" + , " }" + , ", 'type':'monolingualtext'" + , "}" + ) + , fxt.Make_claim_monolingualtext(1, "en", "en_text") + ); + } + @Test public void Data__globecoordinate() { + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':" + , " { 'latitude':1.2" + , " , 'longitude':3.4" + , " , 'altitude':null" + , " , 'precision':0.0002" + , " , 'globe':'http:\\/\\/www.wikidata.org\\/entity\\/Q2'" + , " }" + , ", 'type':'globecoordinate'" + , "}" + ) + , fxt.Make_claim_globecoordinate(1, "1.2", "3.4", "0.0002") + ); + } + @Test public void Data__quantity() { + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':" + , " { 'amount':'123'" + , " , 'unit':'2'" + , " , 'upperBound':'125'" + , " , 'lowerBound':'121'" + , " }" + , ", 'type':'quantity'" + , "}" + ) + , fxt.Make_claim_quantity(1, 123, 2, 125, 121) + ); + } + @Test public void Data__time() { + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':" + , " { 'time':'+00000002001-02-03T04:05:06Z'" + , " , 'timezone':0" + , " , 'before':0" + , " , 'after':0" + , " , 'precision':11" + , " , 'calendarmodel':'http:\\/\\/www.wikidata.org\\/entity\\/Q1985727'" + , " }" + , ", 'type':'time'" + , "}" + ) + , fxt.Make_claim_time(1, "2001-02-03 04:05:06") + ); + } + @Test public void Data__url() { // NOTE:has "String" property-type; EX:wd:Q23548; DATE:2016-07-28 + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':'http:\\/\\/www.nasa.gov\\/rss\\/dyn\\/breaking_news.rss'" + , ", 'type':'string'" + , "}" + ) + , fxt.Make_claim_string(1, "http://www.nasa.gov/rss/dyn/breaking_news.rss") + ); + } + @Test public void Data__commonsMedia() { // NOTE:has "String" property-type; EX:wd:Q327162; DATE:2016-07-28 + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':'Tabliczka E40.svg'" + , ", 'type':'string'" + , "}" + ) + , fxt.Make_claim_string(1, "Tabliczka E40.svg") + ); + } + @Test public void Data__externalid() { // NOTE:has "String" property-type; EX:wd:Q77177; DATE:2016-07-28 + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':'000331371'" + , ", 'type':'string'" + , "}" + ) + , fxt.Make_claim_string(1, "000331371") + ); + } + @Test public void Data__math() { // NOTE:has "String" property-type; EX:wd:Q11518; DATE:2016-07-28 + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':'a^2+b^2=c^2'" + , ", 'type':'string'" + , "}" + ) + , fxt.Make_claim_string(1, "a^2+b^2=c^2") + ); + } + @Test public void ValueLexeme() {// EX:wd:L2 P46028; DATE:2020-07-28 + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':" + , " { 'entity-type':'lexeme'" + , " , 'numeric-id':'123'" + , " , 'id':'L123'" + , " }" + , ", 'type':'wikibase-entityid'" + , "}" + ) + , fxt.Make_claim_entity_lid(1, 123) + ); + } + @Test public void ValueForm() {// EX:wd:L2 L2-F3; DATE:2020-07-28 + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':" + , " { 'entity-type':'form'" + , " , 'id':'L2-F3'" + , " }" + , ", 'type':'wikibase-entityid'" + , "}" + ) + , fxt.Make_claim_entity_fid(1, "L2-F3") + ); + } + @Test public void ValueSense() {// EX:wd:L2 L2-S1; DATE:2020-07-28 + fxt.Test_claims_data(String_.Concat_lines_nl_skip_last + ( "{ 'value':" + , " { 'entity-type':'sense'" + , " , 'id':'L2-S1'" + , " }" + , ", 'type':'wikibase-entityid'" + , "}" + ) + , fxt.Make_claim_entity_sid(1, "L2-S1") + ); + } +}