mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.7.3.2
This commit is contained in:
@@ -49,7 +49,7 @@ public class Scrib_lib_ustring__lib_tst {
|
||||
Exec_match("aaa" , "(a)" , 1, "a;a;a"); // should return all matches
|
||||
Exec_match("a b" , "%S" , 1, "a"); // %S was returning every match instead of 1st; PAGE:en.w:Bertrand_Russell; DATE:2014-04-02
|
||||
Exec_match(1 , "a" , 1, String_.Null_mark); // Module can pass raw ints; PAGE:en.w:Budget_of_the_European_Union; DATE:2015-01-22
|
||||
Exec_match("" , "a?" , 1, ""); // no results with ? should return "" not nil; PAGE:en.d:民; DATE:2015-01-30
|
||||
Exec_match("" , "a?" , 1, ""); // no results with ? should return "" not nil; PAGE:en.d:民; DATE:2015-01-30
|
||||
}
|
||||
@Test public void Match_args_out_of_order() {
|
||||
fxt.Test_scrib_proc_empty(lib, Scrib_lib_ustring.Invk_match, KeyVal_.Ary(KeyVal_.int_(2, "[a]")));
|
||||
@@ -116,6 +116,11 @@ public class Scrib_lib_ustring__lib_tst {
|
||||
, " 1=2"
|
||||
));
|
||||
}
|
||||
@Test public void Gsub_frontier_pattern() { // PURPOSE: handle frontier pattern; EX:"%f[%a]"; NOTE:test will fail if run in 1.6 environment; DATE:2015-07-20
|
||||
// fxt.Init_cbk(Scrib_core.Key_mw_interface, fxt.Core().Lib_ustring(), Scrib_lib_ustring.Invk_gsub);
|
||||
// //Exec_gsub_regx("THE QUICK brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", 1, "", "THE;1;QUICK;2;JUMPS;3;");
|
||||
// Exec_gsub_regx("thE QUICK brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", 1, "", "THE;1;QUICK;2;JUMPS;3;");
|
||||
}
|
||||
// @Test public void Match_viwiktionary() {
|
||||
// fxt.Init_cbk(Scrib_core.Key_mw_interface, fxt.Core().Lib_ustring(), Scrib_lib_ustring.Invk_match);
|
||||
// Exec_match("tr" , "()(r)", 1, ";"); // should return all matches
|
||||
|
||||
@@ -71,7 +71,7 @@ public class Scrib_regx_converter {
|
||||
switch (nxt) {
|
||||
case Byte_ascii.Ltr_b: // EX: "%b()"
|
||||
i += 2;
|
||||
if (i >= len) throw Err_.new_wo_type("malformed pattern (missing arguments to \'%b\')");
|
||||
if (i >= len) throw Err_.new_wo_type("malformed pattern (missing arguments to '%b')");
|
||||
byte char_0 = src[i - 1];
|
||||
byte char_1 = src[i];
|
||||
if (char_0 == char_1) { // same char: easier regex; REF.MW: $bfr .= "{$d1}[^$d1]*$d1";
|
||||
@@ -94,6 +94,22 @@ public class Scrib_regx_converter {
|
||||
}
|
||||
}
|
||||
break;
|
||||
// case Byte_ascii.Ltr_f: { // EX: "%f[%a]"
|
||||
// ++i;
|
||||
// if (i + 1 >= len || src[i] != Byte_ascii.Brack_bgn) throw Err_.new_("scribunto", "missing '[' after %f in pattern at pattern character $ii");
|
||||
// Bry_bfr tmp_bfr = Xoa_app_.Utl__bfr_mkr().Get_b128();
|
||||
// i = bracketedCharSetToRegex(tmp_bfr, src, i, len);
|
||||
// byte[] bracketed_regx = tmp_bfr.To_bry_and_rls();
|
||||
// int j = 1;
|
||||
// bfr.Add_str_a7("(?<!").Add(bracketed_regx).Add_str_a7(")(?=$").Add(bracketed_regx).Add_str_a7(")");
|
||||
//// if ( preg_match( "/$re2/us", "\0" ) ) {
|
||||
//// $re .= "(?<!^)(?<!$re2)(?=$re2|$)";
|
||||
//// } else {
|
||||
//// $re .= "(?<!$re2)(?=$re2)";
|
||||
//// }
|
||||
//
|
||||
// break;
|
||||
// }
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
grps_len = nxt - Byte_ascii.Num_0;
|
||||
@@ -109,52 +125,7 @@ public class Scrib_regx_converter {
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Brack_bgn:
|
||||
bfr.Add_byte(Byte_ascii.Brack_bgn);
|
||||
++i;
|
||||
if (i < len && src[i] == Byte_ascii.Pow) { // ^
|
||||
bfr.Add_byte(Byte_ascii.Pow);
|
||||
++i;
|
||||
}
|
||||
boolean stop = false;
|
||||
for (; i < len; i++) {
|
||||
byte tmp_b = src[i];
|
||||
switch (tmp_b) {
|
||||
case Byte_ascii.Brack_end:
|
||||
stop = true;
|
||||
break;
|
||||
case Byte_ascii.Percent:
|
||||
++i;
|
||||
if (i >= len)
|
||||
stop = true;
|
||||
else {
|
||||
Object brack_obj = brack_hash.Get_by_mid(src, i, i + 1);
|
||||
if (brack_obj != null)
|
||||
bfr.Add((byte[])brack_obj);
|
||||
else
|
||||
Regx_quote(bfr, src[i]);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
boolean normal = true;
|
||||
if (i + 2 < len) {
|
||||
byte dash_1 = src[i + 1];
|
||||
byte dash_2 = src[i + 2];
|
||||
if (dash_1 == Byte_ascii.Dash && dash_2 != Byte_ascii.Brack_end) {
|
||||
Regx_quote(bfr, tmp_b);
|
||||
bfr.Add_byte(Byte_ascii.Dash);
|
||||
Regx_quote(bfr, dash_2);
|
||||
i += 2;
|
||||
normal = false;
|
||||
}
|
||||
}
|
||||
if (normal)
|
||||
Regx_quote(bfr, src[i]);
|
||||
break;
|
||||
}
|
||||
if (stop) break;
|
||||
}
|
||||
if (i >= len) throw Err_.new_wo_type("Missing close-bracket for character set beginning at pattern character $nxt_pos");
|
||||
bfr.Add_byte(Byte_ascii.Brack_end);
|
||||
i = bracketedCharSetToRegex(bfr, src, i, len);
|
||||
q_flag = true;
|
||||
break;
|
||||
case Byte_ascii.Brack_end: throw Err_.new_wo_type("Unmatched close-bracket at pattern character " + Int_.Xto_str(i));
|
||||
@@ -188,6 +159,55 @@ public class Scrib_regx_converter {
|
||||
regx = bfr.Xto_str_and_clear();
|
||||
return regx;
|
||||
} private Bry_bfr bfr = Bry_bfr.new_();
|
||||
private int bracketedCharSetToRegex(Bry_bfr bfr, byte[] src, int i, int len) {
|
||||
bfr.Add_byte(Byte_ascii.Brack_bgn);
|
||||
++i;
|
||||
if (i < len && src[i] == Byte_ascii.Pow) { // ^
|
||||
bfr.Add_byte(Byte_ascii.Pow);
|
||||
++i;
|
||||
}
|
||||
boolean stop = false;
|
||||
for (; i < len; i++) {
|
||||
byte tmp_b = src[i];
|
||||
switch (tmp_b) {
|
||||
case Byte_ascii.Brack_end:
|
||||
stop = true;
|
||||
break;
|
||||
case Byte_ascii.Percent:
|
||||
++i;
|
||||
if (i >= len)
|
||||
stop = true;
|
||||
else {
|
||||
Object brack_obj = brack_hash.Get_by_mid(src, i, i + 1);
|
||||
if (brack_obj != null)
|
||||
bfr.Add((byte[])brack_obj);
|
||||
else
|
||||
Regx_quote(bfr, src[i]);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
boolean normal = true;
|
||||
if (i + 2 < len) {
|
||||
byte dash_1 = src[i + 1];
|
||||
byte dash_2 = src[i + 2];
|
||||
if (dash_1 == Byte_ascii.Dash && dash_2 != Byte_ascii.Brack_end) {
|
||||
Regx_quote(bfr, tmp_b);
|
||||
bfr.Add_byte(Byte_ascii.Dash);
|
||||
Regx_quote(bfr, dash_2);
|
||||
i += 2;
|
||||
normal = false;
|
||||
}
|
||||
}
|
||||
if (normal)
|
||||
Regx_quote(bfr, src[i]);
|
||||
break;
|
||||
}
|
||||
if (stop) break;
|
||||
}
|
||||
if (i >= len) throw Err_.new_wo_type("Missing close-bracket for character set beginning at pattern character $nxt_pos");
|
||||
bfr.Add_byte(Byte_ascii.Brack_end);
|
||||
return i;
|
||||
}
|
||||
boolean grps_open_Has(List_adp list, int v) {
|
||||
int len = list.Count();
|
||||
for (int i = 0; i < len; i++) {
|
||||
|
||||
@@ -40,7 +40,7 @@ public class Wdata_wiki_mgr implements GfoEvObj, GfoInvkAble {
|
||||
public Json_parser Jdoc_parser() {return jdoc_parser;} private Json_parser jdoc_parser = new Json_parser();
|
||||
public void Init_by_app() {}
|
||||
public Wdata_doc_parser Wdoc_parser(Json_doc jdoc) {
|
||||
Json_itm_kv itm_0 = Json_itm_kv.cast_(jdoc.Root().Get_at(0)); // get 1st node
|
||||
Json_kv itm_0 = Json_kv.cast_(jdoc.Root().Get_at(0)); // get 1st node
|
||||
return Bry_.Eq(itm_0.Key().Data_bry(), Wdata_doc_parser_v2.Bry_type)
|
||||
|| Bry_.Eq(itm_0.Key().Data_bry(), Wdata_doc_parser_v2.Bry_id)
|
||||
? wdoc_parser_v2 : wdoc_parser_v1; // if "type", must be v2
|
||||
|
||||
@@ -102,7 +102,7 @@ abstract class Wdata_tbl_base {
|
||||
public static void Exec_insert_kvs(Db_stmt stmt, int page_id, Ordered_hash hash) {
|
||||
int len = hash.Count();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Json_itm_kv kv = (Json_itm_kv)hash.Get_at(i);
|
||||
Json_kv kv = (Json_kv)hash.Get_at(i);
|
||||
stmt.Clear()
|
||||
.Val_int(page_id)
|
||||
.Val_bry_as_str(kv.Key().Data_bry())
|
||||
@@ -145,7 +145,7 @@ class Wdata_alias_tbl extends Wdata_tbl_base {
|
||||
int len = hash.Count();
|
||||
Db_stmt insert_stmt = this.Insert_stmt();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Json_itm_kv kv = (Json_itm_kv)hash.Get_at(i);
|
||||
Json_kv kv = (Json_kv)hash.Get_at(i);
|
||||
byte[] key = kv.Key().Data_bry();
|
||||
Json_grp val_grp = (Json_grp)kv.Val();
|
||||
int val_grp_len = val_grp.Len();
|
||||
@@ -155,7 +155,7 @@ class Wdata_alias_tbl extends Wdata_tbl_base {
|
||||
if (val_itm.Tid() == Json_itm_.Tid_string)
|
||||
val = val_itm.Data_bry();
|
||||
else if (val_itm.Tid() == Json_itm_.Tid_kv) { // EX: q80 and de aliases
|
||||
val = ((Json_itm_kv)val_itm).Val().Data_bry();
|
||||
val = ((Json_kv)val_itm).Val().Data_bry();
|
||||
}
|
||||
insert_stmt.Clear()
|
||||
.Val_int(page_id)
|
||||
@@ -201,7 +201,7 @@ class Wdata_link_tbl extends Wdata_tbl_base {
|
||||
int len = hash.Count();
|
||||
Db_stmt insert_stmt = this.Insert_stmt();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Json_itm_kv kv = (Json_itm_kv)hash.Get_at(i);
|
||||
Json_kv kv = (Json_kv)hash.Get_at(i);
|
||||
byte[] key = kv.Key().Data_bry();
|
||||
Json_itm kv_val = kv.Val();
|
||||
byte[] val = Bry_.Empty;
|
||||
@@ -209,7 +209,7 @@ class Wdata_link_tbl extends Wdata_tbl_base {
|
||||
val = kv_val.Data_bry();
|
||||
else {
|
||||
Json_nde val_nde = (Json_nde)kv.Val();
|
||||
Json_itm_kv val_name_kv = (Json_itm_kv)val_nde.Get_at(0); // ASSUME: 1st item is always "name" kv; EX: "name":"Earth"
|
||||
Json_kv val_name_kv = (Json_kv)val_nde.Get_at(0); // ASSUME: 1st item is always "name" kv; EX: "name":"Earth"
|
||||
val = val_name_kv.Val().Data_bry();
|
||||
}
|
||||
insert_stmt.Clear()
|
||||
|
||||
@@ -19,8 +19,8 @@ package gplx.xowa.xtns.wdatas.parsers; import gplx.*; import gplx.xowa.*; import
|
||||
import gplx.core.primitives.*;
|
||||
import gplx.core.json.*; import gplx.xowa.xtns.wdatas.core.*;
|
||||
class Wdata_claims_parser_v2 {
|
||||
public void Make_claim_itms(byte[] qid, List_adp claim_itms_list, byte[] src, Json_itm_kv claim_grp) {
|
||||
Json_itm_ary claim_itms_ary = Json_itm_ary.cast_(claim_grp.Val());
|
||||
public void Make_claim_itms(byte[] qid, List_adp claim_itms_list, byte[] src, Json_kv claim_grp) {
|
||||
Json_ary claim_itms_ary = Json_ary.cast_or_null(claim_grp.Val());
|
||||
int claim_itms_len = claim_itms_ary.Len();
|
||||
int pid = Parse_pid(claim_grp.Key().Data_bry());
|
||||
for (int i = 0; i < claim_itms_len; ++i) {
|
||||
@@ -36,14 +36,14 @@ class Wdata_claims_parser_v2 {
|
||||
byte rank_tid = Wdata_dict_rank.Tid_unknown;
|
||||
Wdata_claim_itm_core claim_itm = null; Wdata_claim_grp_list qualifiers = null; int[] qualifiers_order = null; Wdata_references_grp[] snaks_grp = null;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_itm_kv sub = Json_itm_kv.cast_(nde.Get_at(i));
|
||||
Json_kv sub = Json_kv.cast_(nde.Get_at(i));
|
||||
byte tid = Wdata_dict_utl.Get_tid_or_invalid(qid, dict, sub.Key().Data_bry()); if (tid == Wdata_dict_utl.Tid_invalid) continue;
|
||||
switch (tid) {
|
||||
case Wdata_dict_claim.Tid_mainsnak: claim_itm = Parse_mainsnak(qid, Json_nde.cast_(sub.Val()), pid); break;
|
||||
case Wdata_dict_claim.Tid_rank: rank_tid = Wdata_dict_rank.Xto_tid(sub.Val().Data_bry()); break;
|
||||
case Wdata_dict_claim.Tid_references: snaks_grp = Parse_references(qid, Json_itm_ary.cast_(sub.Val())); break;
|
||||
case Wdata_dict_claim.Tid_references: snaks_grp = Parse_references(qid, Json_ary.cast_or_null(sub.Val())); break;
|
||||
case Wdata_dict_claim.Tid_qualifiers: qualifiers = Parse_qualifiers(qid, Json_nde.cast_(sub.Val())); break;
|
||||
case Wdata_dict_claim.Tid_qualifiers_order: qualifiers_order = Parse_pid_order(Json_itm_ary.cast_(sub.Val())); break;
|
||||
case Wdata_dict_claim.Tid_qualifiers_order: qualifiers_order = Parse_pid_order(Json_ary.cast_or_null(sub.Val())); break;
|
||||
case Wdata_dict_claim.Tid_type: break; // ignore: "statement"
|
||||
case Wdata_dict_claim.Tid_id: break; // ignore: "Q2$F909BD1C-D34D-423F-9ED2-3493663321AF"
|
||||
}
|
||||
@@ -56,7 +56,7 @@ class Wdata_claims_parser_v2 {
|
||||
}
|
||||
return claim_itm;
|
||||
}
|
||||
public Wdata_references_grp[] Parse_references(byte[] qid, Json_itm_ary owner) {
|
||||
public Wdata_references_grp[] Parse_references(byte[] qid, Json_ary owner) {
|
||||
int len = owner.Len();
|
||||
Wdata_references_grp[] rv = new Wdata_references_grp[len];
|
||||
for (int i = 0; i < len; ++i) {
|
||||
@@ -70,12 +70,12 @@ class Wdata_claims_parser_v2 {
|
||||
Hash_adp_bry dict = Wdata_dict_reference.Dict;
|
||||
Wdata_claim_grp_list snaks = null; int[] snaks_order = null;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_itm_kv sub = Json_itm_kv.cast_(owner.Get_at(i));
|
||||
Json_kv sub = Json_kv.cast_(owner.Get_at(i));
|
||||
byte tid = Wdata_dict_utl.Get_tid_or_invalid(qid, dict, sub.Key().Data_bry()); if (tid == Wdata_dict_utl.Tid_invalid) continue;
|
||||
switch (tid) {
|
||||
case Wdata_dict_reference.Tid_hash: break; // ignore: "b923b0d68beb300866b87ead39f61e63ec30d8af"
|
||||
case Wdata_dict_reference.Tid_snaks: snaks = Parse_qualifiers(qid, Json_nde.cast_(sub.Val())); break;
|
||||
case Wdata_dict_reference.Tid_snaks_order: snaks_order = Parse_pid_order(Json_itm_ary.cast_(sub.Val())); break;
|
||||
case Wdata_dict_reference.Tid_snaks_order: snaks_order = Parse_pid_order(Json_ary.cast_or_null(sub.Val())); break;
|
||||
}
|
||||
}
|
||||
return new Wdata_references_grp(snaks, snaks_order);
|
||||
@@ -85,14 +85,14 @@ class Wdata_claims_parser_v2 {
|
||||
if (qualifiers_nde == null) return rv; // NOTE:sometimes references can have 0 snaks; return back an empty Wdata_claim_grp_list, not null; PAGE:Птичкин,_Евгений_Николаевич; DATE:2015-02-16
|
||||
int len = qualifiers_nde.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_itm_kv qualifier_kv = Json_itm_kv.cast_(qualifiers_nde.Get_at(i));
|
||||
Json_kv qualifier_kv = Json_kv.cast_(qualifiers_nde.Get_at(i));
|
||||
int pid = Parse_pid(qualifier_kv.Key().Data_bry());
|
||||
Wdata_claim_grp claims_grp = Parse_props_grp(qid, pid, Json_itm_ary.cast_(qualifier_kv.Val()));
|
||||
Wdata_claim_grp claims_grp = Parse_props_grp(qid, pid, Json_ary.cast_or_null(qualifier_kv.Val()));
|
||||
rv.Add(claims_grp);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public int[] Parse_pid_order(Json_itm_ary ary) {
|
||||
public int[] Parse_pid_order(Json_ary ary) {
|
||||
int len = ary.Len();
|
||||
int[] rv = new int[len];
|
||||
for (int i = 0; i < len; ++i) {
|
||||
@@ -101,7 +101,7 @@ class Wdata_claims_parser_v2 {
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private Wdata_claim_grp Parse_props_grp(byte[] qid, int pid, Json_itm_ary props_ary) {
|
||||
private Wdata_claim_grp Parse_props_grp(byte[] qid, int pid, Json_ary props_ary) {
|
||||
List_adp list = List_adp_.new_();
|
||||
int len = props_ary.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
@@ -116,7 +116,7 @@ class Wdata_claims_parser_v2 {
|
||||
Hash_adp_bry dict = Wdata_dict_mainsnak.Dict;
|
||||
byte snak_tid = Byte_.Max_value_127;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_itm_kv sub = Json_itm_kv.cast_(nde.Get_at(i));
|
||||
Json_kv sub = Json_kv.cast_(nde.Get_at(i));
|
||||
byte tid = Wdata_dict_utl.Get_tid_or_invalid(qid, dict, sub.Key().Data_bry()); if (tid == Wdata_dict_utl.Tid_invalid) continue;
|
||||
switch (tid) {
|
||||
case Wdata_dict_mainsnak.Tid_snaktype: snak_tid = Wdata_dict_snak_tid.Xto_tid(sub.Val().Data_bry()); break;
|
||||
@@ -133,7 +133,7 @@ class Wdata_claims_parser_v2 {
|
||||
Hash_adp_bry dict = Wdata_dict_datavalue.Dict;
|
||||
Json_itm value_itm = null; byte value_tid = Wdata_dict_val_tid.Tid_unknown;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_itm_kv sub = Json_itm_kv.cast_(nde.Get_at(i));
|
||||
Json_kv sub = Json_kv.cast_(nde.Get_at(i));
|
||||
byte tid = Wdata_dict_utl.Get_tid_or_invalid(qid, dict, sub.Key().Data_bry()); if (tid == Wdata_dict_utl.Tid_invalid) continue;
|
||||
switch (tid) {
|
||||
case Wdata_dict_datavalue.Tid_type: value_tid = Wdata_dict_val_tid.Xto_tid(sub.Val().Data_bry()); break;
|
||||
@@ -157,7 +157,7 @@ class Wdata_claims_parser_v2 {
|
||||
byte entity_tid = Byte_.Max_value_127;
|
||||
byte[] entity_id_bry = null;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_itm_kv sub = Json_itm_kv.cast_(nde.Get_at(i));
|
||||
Json_kv sub = Json_kv.cast_(nde.Get_at(i));
|
||||
byte tid = Wdata_dict_utl.Get_tid_or_invalid(qid, dict, sub.Key().Data_bry()); if (tid == Wdata_dict_utl.Tid_invalid) continue;
|
||||
switch (tid) {
|
||||
case Wdata_dict_value_entity.Tid_entity_type: entity_tid = Wdata_dict_value_entity_tid.Xto_tid(sub.Val().Data_bry()); break;
|
||||
@@ -172,7 +172,7 @@ class Wdata_claims_parser_v2 {
|
||||
int len = nde.Len();
|
||||
byte[] lang = null, text = null;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_itm_kv sub = Json_itm_kv.cast_(nde.Get_at(i));
|
||||
Json_kv sub = Json_kv.cast_(nde.Get_at(i));
|
||||
byte tid = Wdata_dict_utl.Get_tid_or_invalid(qid, dict, sub.Key().Data_bry()); if (tid == Wdata_dict_utl.Tid_invalid) continue;
|
||||
byte[] sub_val_bry = sub.Val().Data_bry();
|
||||
switch (tid) {
|
||||
@@ -188,7 +188,7 @@ class Wdata_claims_parser_v2 {
|
||||
int len = nde.Len();
|
||||
byte[] lat = null, lng = null, alt = null, prc = null, glb = null;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_itm_kv sub = Json_itm_kv.cast_(nde.Get_at(i));
|
||||
Json_kv sub = Json_kv.cast_(nde.Get_at(i));
|
||||
byte tid = Wdata_dict_utl.Get_tid_or_invalid(qid, dict, sub.Key().Data_bry()); if (tid == Wdata_dict_utl.Tid_invalid) continue;
|
||||
byte[] sub_val_bry = sub.Val().Data_bry();
|
||||
switch (tid) {
|
||||
@@ -207,7 +207,7 @@ class Wdata_claims_parser_v2 {
|
||||
int len = nde.Len();
|
||||
byte[] amount = null, unit = null, ubound = null, lbound = null;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_itm_kv sub = Json_itm_kv.cast_(nde.Get_at(i));
|
||||
Json_kv sub = Json_kv.cast_(nde.Get_at(i));
|
||||
byte tid = Wdata_dict_utl.Get_tid_or_invalid(qid, dict, sub.Key().Data_bry()); if (tid == Wdata_dict_utl.Tid_invalid) continue;
|
||||
byte[] sub_val_bry = sub.Val().Data_bry();
|
||||
switch (tid) {
|
||||
@@ -225,7 +225,7 @@ class Wdata_claims_parser_v2 {
|
||||
int len = nde.Len();
|
||||
byte[] time = null, timezone = null, before = null, after = null, precision = null, calendarmodel = null;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_itm_kv sub = Json_itm_kv.cast_(nde.Get_at(i));
|
||||
Json_kv sub = Json_kv.cast_(nde.Get_at(i));
|
||||
byte tid = Wdata_dict_utl.Get_tid_or_invalid(qid, dict, sub.Key().Data_bry()); if (tid == Wdata_dict_utl.Tid_invalid) continue;
|
||||
byte[] sub_val_bry = sub.Val().Data_bry();
|
||||
switch (tid) {
|
||||
|
||||
@@ -25,6 +25,6 @@ public interface Wdata_doc_parser {
|
||||
Ordered_hash Parse_claims(byte[] qid, Json_doc doc);
|
||||
Wdata_claim_itm_base Parse_claims_data(byte[] qid, int pid, byte snak_tid, Json_nde nde);
|
||||
Wdata_claim_grp_list Parse_qualifiers(byte[] qid, Json_nde nde);
|
||||
int[] Parse_pid_order(byte[] qid, Json_itm_ary ary);
|
||||
Wdata_references_grp[] Parse_references(byte[] qid, Json_itm_ary owner);
|
||||
int[] Parse_pid_order(byte[] qid, Json_ary ary);
|
||||
Wdata_references_grp[] Parse_references(byte[] qid, Json_ary owner);
|
||||
}
|
||||
|
||||
@@ -61,13 +61,13 @@ abstract class Wdata_doc_parser_fxt_base {
|
||||
}
|
||||
public void Test_qualifiers(String raw, Wdata_claim_itm_base... expd_itms) {
|
||||
Json_doc jdoc = Json_doc.new_apos_(raw);
|
||||
Json_nde qualifiers_nde = Json_nde.cast_(Json_itm_kv.cast_(jdoc.Root().Get_at(0)).Val());
|
||||
Json_nde qualifiers_nde = Json_nde.cast_(Json_kv.cast_(jdoc.Root().Get_at(0)).Val());
|
||||
Wdata_claim_grp_list actl = parser.Parse_qualifiers(Q1_bry, qualifiers_nde);
|
||||
Tfds.Eq_ary_str(expd_itms, To_ary(actl));
|
||||
}
|
||||
public void Test_references(String raw, int[] expd_order, Wdata_claim_itm_base... expd_itms) {
|
||||
Json_doc jdoc = Json_doc.new_apos_(raw);
|
||||
Json_itm_ary owner = Json_itm_ary.cast_(Json_itm_kv.cast_(jdoc.Root().Get_at(0)).Val());
|
||||
Json_ary owner = Json_ary.cast_or_null(Json_kv.cast_(jdoc.Root().Get_at(0)).Val());
|
||||
Wdata_references_grp[] actl = parser.Parse_references(Q1_bry, owner);
|
||||
Wdata_references_grp actl_grp = actl[0];
|
||||
Tfds.Eq_ary(expd_order, actl_grp.References_order());
|
||||
@@ -75,7 +75,7 @@ abstract class Wdata_doc_parser_fxt_base {
|
||||
}
|
||||
public void Test_pid_order(String raw, int... expd) {
|
||||
Json_doc jdoc = Json_doc.new_apos_(raw);
|
||||
Json_itm_ary nde = Json_itm_ary.cast_(Json_itm_kv.cast_(jdoc.Root().Get_at(0)).Val());
|
||||
Json_ary nde = Json_ary.cast_or_null(Json_kv.cast_(jdoc.Root().Get_at(0)).Val());
|
||||
int[] actl = parser.Parse_pid_order(Q1_bry, nde);
|
||||
Tfds.Eq_ary(expd, actl);
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ public class Wdata_doc_parser_v1 implements Wdata_doc_parser {
|
||||
case Json_itm_.Tid_string: // "entity":"q1"
|
||||
return kv_val.Data_bry();
|
||||
case Json_itm_.Tid_array: // "entity":["item",1]
|
||||
Json_itm_ary kv_val_as_ary = (Json_itm_ary)kv_val;
|
||||
Json_ary kv_val_as_ary = (Json_ary)kv_val;
|
||||
Json_itm entity_id = kv_val_as_ary.Get_at(1);
|
||||
return Bry_.Add(Byte_ascii.Ltr_q, entity_id.Data_bry());
|
||||
default:
|
||||
@@ -41,16 +41,16 @@ public class Wdata_doc_parser_v1 implements Wdata_doc_parser {
|
||||
Ordered_hash rv = Ordered_hash_.new_bry_();
|
||||
int list_len = list_nde.Len();
|
||||
for (int i = 0; i < list_len; ++i) {
|
||||
Json_itm_kv wiki_kv = Json_itm_kv.cast_(list_nde.Get_at(i));
|
||||
Json_kv wiki_kv = Json_kv.cast_(list_nde.Get_at(i));
|
||||
byte[] site_bry = wiki_kv.Key().Data_bry();
|
||||
byte[] title_bry = null; byte[][] badges_bry_ary = null;
|
||||
if (wiki_kv.Val().Tid() == Json_itm_.Tid_nde) { // v1.2: "enwiki":{name:"Earth", badges:[]}
|
||||
Json_nde val_nde = Json_nde.cast_(wiki_kv.Val());
|
||||
Json_itm_kv name_kv = Json_itm_kv.cast_(val_nde.Get_at(0));
|
||||
Json_kv name_kv = Json_kv.cast_(val_nde.Get_at(0));
|
||||
title_bry = name_kv.Val().Data_bry();
|
||||
Json_itm_kv badges_kv = Json_itm_kv.cast_(val_nde.Get_at(1));
|
||||
Json_kv badges_kv = Json_kv.cast_(val_nde.Get_at(1));
|
||||
if (badges_kv != null) {// TEST:some tests do not define a badges nde; ignore if null; DATE:2014-09-19
|
||||
Json_itm_ary badges_ary = Json_itm_ary.cast_(badges_kv.Val());
|
||||
Json_ary badges_ary = Json_ary.cast_or_null(badges_kv.Val());
|
||||
badges_bry_ary = badges_ary.Xto_bry_ary();
|
||||
}
|
||||
}
|
||||
@@ -70,7 +70,7 @@ public class Wdata_doc_parser_v1 implements Wdata_doc_parser {
|
||||
Ordered_hash rv = Ordered_hash_.new_bry_();
|
||||
int list_len = list_nde.Len();
|
||||
for (int i = 0; i < list_len; ++i) {
|
||||
Json_itm_kv data_kv = Json_itm_kv.cast_(list_nde.Get_at(i));
|
||||
Json_kv data_kv = Json_kv.cast_(list_nde.Get_at(i));
|
||||
byte[] lang_bry = data_kv.Key().Data_bry();
|
||||
Wdata_langtext_itm itm = new Wdata_langtext_itm(lang_bry, data_kv.Val().Data_bry());
|
||||
rv.Add(lang_bry, itm);
|
||||
@@ -84,13 +84,13 @@ public class Wdata_doc_parser_v1 implements Wdata_doc_parser {
|
||||
Ordered_hash rv = Ordered_hash_.new_bry_();
|
||||
int list_len = list_nde.Len();
|
||||
for (int i = 0; i < list_len; ++i) {
|
||||
Json_itm_kv data_kv = Json_itm_kv.cast_(list_nde.Get_at(i));
|
||||
Json_kv data_kv = Json_kv.cast_(list_nde.Get_at(i));
|
||||
byte[] lang_bry = data_kv.Key().Data_bry();
|
||||
byte[][] vals_bry_ary = null;
|
||||
Json_itm data_val = data_kv.Val();
|
||||
switch (data_val.Tid()) {
|
||||
case Json_itm_.Tid_array: // EX: 'en':['en_val_1','en_val_2']
|
||||
Json_itm_ary vals_ary = Json_itm_ary.cast_(data_val);
|
||||
Json_ary vals_ary = Json_ary.cast_or_null(data_val);
|
||||
vals_bry_ary = vals_ary.Xto_bry_ary();
|
||||
break;
|
||||
case Json_itm_.Tid_nde: // EX: 'en':{'0:en_val_1','1:en_val_2'}; PAGE:wd.q:621080 DATE:2014-09-21
|
||||
@@ -98,7 +98,7 @@ public class Wdata_doc_parser_v1 implements Wdata_doc_parser {
|
||||
int vals_len = vals_nde.Len();
|
||||
vals_bry_ary = new byte[vals_len][];
|
||||
for (int j = 0; j < vals_len; ++j) {
|
||||
Json_itm_kv vals_sub_kv = Json_itm_kv.cast_(vals_nde.Get_at(j));
|
||||
Json_kv vals_sub_kv = Json_kv.cast_(vals_nde.Get_at(j));
|
||||
vals_bry_ary[j] = vals_sub_kv.Val().Data_bry();
|
||||
}
|
||||
break;
|
||||
@@ -112,7 +112,7 @@ public class Wdata_doc_parser_v1 implements Wdata_doc_parser {
|
||||
}
|
||||
public Ordered_hash Parse_claims(byte[] qid, Json_doc doc) {
|
||||
try {
|
||||
Json_itm_ary list_nde = Json_itm_ary.cast_(doc.Get_grp(Bry_claims)); if (list_nde == null) return Empty_ordered_hash_generic;
|
||||
Json_ary list_nde = Json_ary.cast_or_null(doc.Get_grp(Bry_claims)); if (list_nde == null) return Empty_ordered_hash_generic;
|
||||
List_adp temp_list = List_adp_.new_();
|
||||
byte[] src = doc.Src();
|
||||
int len = list_nde.Len();
|
||||
@@ -151,13 +151,13 @@ public class Wdata_doc_parser_v1 implements Wdata_doc_parser {
|
||||
int len = prop_nde.Len(); // should have 5 (m, q, g, rank, refs), but don't enforce (can rely on keys)
|
||||
Wdata_claim_itm_core rv = null;
|
||||
for (int i = 0; i < len; i++) {
|
||||
Json_itm_kv kv = Json_itm_kv.cast_(prop_nde.Get_at(i));
|
||||
Json_kv kv = Json_kv.cast_(prop_nde.Get_at(i));
|
||||
Json_itm kv_key = kv.Key();
|
||||
Byte_obj_val bv = (Byte_obj_val)Prop_key_hash.Get_by_mid(src, kv_key.Src_bgn(), kv_key.Src_end());
|
||||
if (bv == null) {Warn("invalid prop node: ~{0}", String_.new_u8(src, kv_key.Src_bgn(), kv_key.Src_end())); return null;}
|
||||
switch (bv.Val()) {
|
||||
case Prop_tid_m:
|
||||
rv = New_prop_by_m(src, Json_itm_ary.cast_(kv.Val()));
|
||||
rv = New_prop_by_m(src, Json_ary.cast_or_null(kv.Val()));
|
||||
if (rv == null) return null;
|
||||
break;
|
||||
case Prop_tid_g:
|
||||
@@ -175,7 +175,7 @@ public class Wdata_doc_parser_v1 implements Wdata_doc_parser {
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private Wdata_claim_itm_core New_prop_by_m(byte[] src, Json_itm_ary ary) {
|
||||
private Wdata_claim_itm_core New_prop_by_m(byte[] src, Json_ary ary) {
|
||||
byte snak_tid = Wdata_dict_snak_tid.Xto_tid(ary.Get_at(0).Data_bry());
|
||||
int pid = Json_itm_int.cast_(ary.Get_at(1)).Data_as_int();
|
||||
switch (snak_tid) {
|
||||
@@ -187,13 +187,13 @@ public class Wdata_doc_parser_v1 implements Wdata_doc_parser {
|
||||
return Make_itm(pid, snak_tid, val_tid, ary);
|
||||
}
|
||||
|
||||
private Wdata_claim_itm_core Make_itm(int pid, byte snak_tid, byte val_tid, Json_itm_ary ary) {
|
||||
private Wdata_claim_itm_core Make_itm(int pid, byte snak_tid, byte val_tid, Json_ary ary) {
|
||||
switch (val_tid) {
|
||||
case Wdata_dict_val_tid.Tid_string:
|
||||
return new Wdata_claim_itm_str(pid, snak_tid, ary.Get_at(3).Data_bry());
|
||||
case Wdata_dict_val_tid.Tid_entity: {
|
||||
Json_nde sub_nde = Json_nde.cast_(ary.Get_at(3));
|
||||
Json_itm_kv entity_kv = Json_itm_kv.cast_(sub_nde.Get_at(1));
|
||||
Json_kv entity_kv = Json_kv.cast_(sub_nde.Get_at(1));
|
||||
return new Wdata_claim_itm_entity(pid, snak_tid, Wdata_dict_value_entity_tid.Tid_item, entity_kv.Val().Data_bry());
|
||||
}
|
||||
case Wdata_dict_val_tid.Tid_time: {
|
||||
@@ -216,7 +216,7 @@ public class Wdata_doc_parser_v1 implements Wdata_doc_parser {
|
||||
}
|
||||
}
|
||||
private static byte[] Get_val(Json_nde sub_nde, int i) {
|
||||
Json_itm_kv kv = Json_itm_kv.cast_(sub_nde.Get_at(i));
|
||||
Json_kv kv = Json_kv.cast_(sub_nde.Get_at(i));
|
||||
return kv.Val().Data_bry();
|
||||
}
|
||||
private void Warn(String fmt, Object... args) {usr_dlg.Warn_many("", "", fmt, args);}
|
||||
@@ -233,15 +233,15 @@ public class Wdata_doc_parser_v1 implements Wdata_doc_parser {
|
||||
Ordered_hash rv = Ordered_hash_.new_bry_();
|
||||
int len = nde.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Json_itm_kv kv = Json_itm_kv.cast_(nde.Get_at(i));
|
||||
Json_kv kv = Json_kv.cast_(nde.Get_at(i));
|
||||
byte[] kv_key = kv.Key().Data_bry();
|
||||
rv.Add(kv_key, kv);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public Wdata_claim_grp_list Parse_qualifiers(byte[] qid, Json_nde nde) {throw Err_.new_unimplemented();}
|
||||
public Wdata_references_grp[] Parse_references(byte[] qid, Json_itm_ary owner) {throw Err_.new_unimplemented();}
|
||||
public int[] Parse_pid_order(byte[] qid, Json_itm_ary ary) {throw Err_.new_unimplemented();}
|
||||
public Wdata_references_grp[] Parse_references(byte[] qid, Json_ary owner) {throw Err_.new_unimplemented();}
|
||||
public int[] Parse_pid_order(byte[] qid, Json_ary ary) {throw Err_.new_unimplemented();}
|
||||
public static final String
|
||||
Str_entity = "entity"
|
||||
, Str_id = "id"
|
||||
|
||||
@@ -32,17 +32,17 @@ public class Wdata_doc_parser_v2 implements Wdata_doc_parser {
|
||||
int list_len = list_nde.Len();
|
||||
Hash_adp_bry dict = Wdata_dict_sitelink.Dict;
|
||||
for (int i = 0; i < list_len; ++i) {
|
||||
Json_itm_kv data_kv = Json_itm_kv.cast_(list_nde.Get_at(i));
|
||||
Json_kv data_kv = Json_kv.cast_(list_nde.Get_at(i));
|
||||
Json_nde data_nde = Json_nde.cast_(data_kv.Val());
|
||||
int data_nde_len = data_nde.Len();
|
||||
Json_itm_kv site_kv = null, name_kv = null; Json_itm_ary badges_ary = null;
|
||||
Json_kv site_kv = null, name_kv = null; Json_ary badges_ary = null;
|
||||
for (int j = 0; j < data_nde_len; ++j) {
|
||||
Json_itm_kv sub = Json_itm_kv.cast_(data_nde.Get_at(j));
|
||||
Json_kv sub = Json_kv.cast_(data_nde.Get_at(j));
|
||||
byte tid = Wdata_dict_utl.Get_tid_or_invalid(qid, dict, sub.Key().Data_bry()); if (tid == Wdata_dict_utl.Tid_invalid) continue;
|
||||
switch (tid) {
|
||||
case Wdata_dict_sitelink.Tid_site: site_kv = Json_itm_kv.cast_(sub); break;
|
||||
case Wdata_dict_sitelink.Tid_title: name_kv = Json_itm_kv.cast_(sub); break;
|
||||
case Wdata_dict_sitelink.Tid_badges: badges_ary = Json_itm_ary.cast_(Json_itm_kv.cast_(sub).Val()); break;
|
||||
case Wdata_dict_sitelink.Tid_site: site_kv = Json_kv.cast_(sub); break;
|
||||
case Wdata_dict_sitelink.Tid_title: name_kv = Json_kv.cast_(sub); break;
|
||||
case Wdata_dict_sitelink.Tid_badges: badges_ary = Json_ary.cast_or_null(Json_kv.cast_(sub).Val()); break;
|
||||
}
|
||||
}
|
||||
byte[] site_bry = site_kv.Val().Data_bry();
|
||||
@@ -60,16 +60,16 @@ public class Wdata_doc_parser_v2 implements Wdata_doc_parser {
|
||||
int list_len = list_nde.Len();
|
||||
Hash_adp_bry dict = Wdata_dict_langtext.Dict;
|
||||
for (int i = 0; i < list_len; ++i) {
|
||||
Json_itm_kv data_kv = Json_itm_kv.cast_(list_nde.Get_at(i));
|
||||
Json_kv data_kv = Json_kv.cast_(list_nde.Get_at(i));
|
||||
Json_nde data_nde = Json_nde.cast_(data_kv.Val());
|
||||
Json_itm_kv text_kv = null;
|
||||
Json_kv text_kv = null;
|
||||
int data_nde_len = data_nde.Len();
|
||||
for (int j = 0; j < data_nde_len; ++j) {
|
||||
Json_itm_kv sub = Json_itm_kv.cast_(data_nde.Get_at(j));
|
||||
Json_kv sub = Json_kv.cast_(data_nde.Get_at(j));
|
||||
byte tid = Wdata_dict_utl.Get_tid_or_invalid(qid, dict, sub.Key().Data_bry()); if (tid == Wdata_dict_utl.Tid_invalid) continue;
|
||||
switch (tid) {
|
||||
case Wdata_dict_langtext.Tid_language: break;
|
||||
case Wdata_dict_langtext.Tid_value: text_kv = Json_itm_kv.cast_(sub); break;
|
||||
case Wdata_dict_langtext.Tid_value: text_kv = Json_kv.cast_(sub); break;
|
||||
}
|
||||
}
|
||||
byte[] lang_bry = data_kv.Key().Data_bry();
|
||||
@@ -86,15 +86,15 @@ public class Wdata_doc_parser_v2 implements Wdata_doc_parser {
|
||||
int list_len = list_nde.Len();
|
||||
Hash_adp_bry dict = Wdata_dict_langtext.Dict;
|
||||
for (int i = 0; i < list_len; ++i) {
|
||||
Json_itm_kv data_kv = Json_itm_kv.cast_(list_nde.Get_at(i));
|
||||
Json_itm_ary vals_ary = Json_itm_ary.cast_(data_kv.Val());
|
||||
Json_kv data_kv = Json_kv.cast_(list_nde.Get_at(i));
|
||||
Json_ary vals_ary = Json_ary.cast_or_null(data_kv.Val());
|
||||
int vals_len = vals_ary.Len();
|
||||
byte[][] vals = new byte[vals_len][];
|
||||
for (int j = 0; j < vals_len; ++j) {
|
||||
Json_nde lang_nde = Json_nde.cast_(vals_ary.Get_at(j));
|
||||
int k_len = lang_nde.Len();
|
||||
for (int k = 0; k < k_len; ++k) {
|
||||
Json_itm_kv sub = Json_itm_kv.cast_(lang_nde.Get_at(k));
|
||||
Json_kv sub = Json_kv.cast_(lang_nde.Get_at(k));
|
||||
byte tid = Wdata_dict_utl.Get_tid_or_invalid(qid, dict, sub.Key().Data_bry()); if (tid == Wdata_dict_utl.Tid_invalid) continue;
|
||||
switch (tid) {
|
||||
case Wdata_dict_langtext.Tid_language: break;
|
||||
@@ -116,7 +116,7 @@ public class Wdata_doc_parser_v2 implements Wdata_doc_parser {
|
||||
byte[] src = doc.Src();
|
||||
int len = list_nde.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Json_itm_kv claim_nde = Json_itm_kv.cast_(list_nde.Get_at(i));
|
||||
Json_kv claim_nde = Json_kv.cast_(list_nde.Get_at(i));
|
||||
claims_parser.Make_claim_itms(qid, temp_list, src, claim_nde);
|
||||
}
|
||||
return Wdata_doc_parser_v1.Claims_list_to_hash(temp_list);
|
||||
@@ -124,8 +124,8 @@ public class Wdata_doc_parser_v2 implements Wdata_doc_parser {
|
||||
}
|
||||
public Wdata_claim_itm_base Parse_claims_data(byte[] qid, int pid, byte snak_tid, Json_nde nde) {return claims_parser.Parse_datavalue(qid, pid, snak_tid, nde);}
|
||||
public Wdata_claim_grp_list Parse_qualifiers(byte[] qid, Json_nde nde) {return claims_parser.Parse_qualifiers(qid, nde);}
|
||||
public Wdata_references_grp[] Parse_references(byte[] qid, Json_itm_ary owner) {return claims_parser.Parse_references(qid, owner);}
|
||||
public int[] Parse_pid_order(byte[] qid, Json_itm_ary ary) {return claims_parser.Parse_pid_order(ary);}
|
||||
public Wdata_references_grp[] Parse_references(byte[] qid, Json_ary owner) {return claims_parser.Parse_references(qid, owner);}
|
||||
public int[] Parse_pid_order(byte[] qid, Json_ary ary) {return claims_parser.Parse_pid_order(ary);}
|
||||
public static final String
|
||||
Str_id = "id"
|
||||
, Str_sitelinks = "sitelinks"
|
||||
|
||||
Reference in New Issue
Block a user