Wikidata: Trim whitespace from start and end of property keys [#361]

pull/620/head
gnosygnu 5 years ago
parent 298c83a806
commit 21309d66be

@ -184,6 +184,7 @@ public class Wdata_pf_property__basic__tst {
fxt.Test_parse("{{#property:p1|from=P2}}", "a");
fxt.Test_parse("{{#property:p1|from=}}", "");
fxt.Test_parse("{{#property:p1| from = P2 }}", "a"); // PURPOSE: trim ws; ISSUE#:361; DATE:2019-02-11
}
@Test public void Pid_as_name() {
fxt.Init__docs__add(fxt.Wdoc("Q2")

@ -35,6 +35,12 @@ public class Wdata_pf_property_data {
Arg_itm_tkn nde_key = nde.Key_tkn();
int key_bgn = nde_key.Src_bgn(), key_end = nde_key.Src_end();
if (key_bgn == key_end && key_bgn == -1) continue; // null arg; ignore, else will throw warning below; EX: {{#property:p1|}}; DATE:2013-11-15
// trim ws; ISSUE#:361; DATE:2019-02-11
key_bgn = Bry_find_.Find_fwd_while_space_or_tab(src, key_bgn, key_end);
key_end = Bry_find_.Find_bwd__while_space_or_tab(src, key_end, key_bgn);
// get key_tid
byte key_tid = atrs_hash.Get_as_byte_or(src, key_bgn, key_end, Byte_.Max_value_127);
switch (key_tid) {
case Byte_.Max_value_127:

@ -1,239 +0,0 @@
commit cd56234e281079c95afad775cb4da2ba70c03783
Author: gnosygnu <gnosygnu@gmail.com>
Date: Sun Feb 3 23:48:23 2019 -0500
HTML Databases: Show redlinks for htxt [#320]
diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hdoc_wkr__make.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hdoc_wkr__make.java
index 1323bc1..44d4766 100644
--- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hdoc_wkr__make.java
+++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hdoc_wkr__make.java
@@ -14,21 +14,54 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*;
-import gplx.langs.htmls.docs.*;
-import gplx.xowa.wikis.ttls.*;
-import gplx.xowa.htmls.core.hzips.*;
-import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*;
+import gplx.langs.htmls.docs.*; import gplx.langs.htmls.encoders.*; 
+import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.wkrs.hdrs.*; import gplx.xowa.htmls.core.wkrs.imgs.*; import gplx.xowa.htmls.core.wkrs.lnkis.*; import gplx.xowa.htmls.core.wkrs.lnkis.anchs.*; 
+import gplx.xowa.wikis.ttls.*; 
public class Xoh_hdoc_wkr__make implements Xoh_hdoc_wkr {
private Xoh_hzip_bfr bfr; private Xoh_page hpg; private Xoh_hdoc_ctx hctx; private byte[] src;
private final Xoh_hdr_wtr wkr__hdr = new Xoh_hdr_wtr();
- private final Xoh_img_wtr wkr__img = new Xoh_img_wtr(); 
+ private final Xoh_img_wtr wkr__img = new Xoh_img_wtr(); 
+ private int html_uid; 
public void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
this.bfr = bfr; this.hpg = hpg; this.hctx = hctx; this.src = src;
+ this.html_uid = 0; 
+ } 
+ public void On_txt(int rng_bgn, int rng_end) { 
+ // text; just add it 
+ bfr.Add_mid(src, rng_bgn, rng_end); 
+ } 
+ public void On_escape(gplx.xowa.htmls.core.wkrs.escapes.Xoh_escape_data data) { 
+ // hzip escape byte ((byte)27); should never happen but if it does, add it 
+ bfr.Add(data.Hook()); 
+ } 
+ public void On_xnde(gplx.xowa.htmls.core.wkrs.xndes.Xoh_xnde_parser data) { 
+ // regular xml node; just add it 
+ bfr.Add_mid(src, data.Src_bgn(), data.Src_end()); 
+ } 
+ public void On_lnki(gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_data data) { 
+ // <a> node 
+ // handle "#" 
+ if (data.Href_itm().Tid() == Xoh_anch_href_data.Tid__anch) { 
+ bfr.Add_mid(src, data.Src_bgn(), data.Src_end()); 
+ return; 
+ } 
+ 
+ // increment html_uid and add "id=xolnki_" 
+ byte[] ttl_bry = data.Href_itm().Ttl_page_db(); 
+ this.html_uid = Lnki_redlink_reg(hpg, hctx, ttl_bry, html_uid); 
+ int src_bgn_lhs = data.Src_bgn(); 
+ int src_bgn_rhs = src_bgn_lhs + 3; // +3 to skip over "<a " 
+ if (Bry_.Match(src, src_bgn_lhs, src_bgn_rhs, Bry__a__bgn)) { 
+ bfr.Add(Bry__a__id); 
+ bfr.Add_int_variable(html_uid); 
+ bfr.Add_byte_quote().Add_byte_space(); 
+ bfr.Add_mid(src, src_bgn_rhs, data.Src_end()); 
+ } 
+ else { 
+ bfr.Add_mid(src, data.Src_bgn(), data.Src_end()); 
+ Gfo_usr_dlg_.Instance.Warn_many("", "", "anchor hook should start with <a; url=~{0}", hpg.Url_bry_safe()); 
+ } 
}
- public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
- public void On_escape (gplx.xowa.htmls.core.wkrs.escapes.Xoh_escape_data data) {bfr.Add(data.Hook());}
- public void On_xnde (gplx.xowa.htmls.core.wkrs.xndes.Xoh_xnde_parser data) {bfr.Add_mid(src, data.Src_bgn(), data.Src_end());}
- public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_data data) {bfr.Add_mid(src, data.Src_bgn(), data.Src_end());}
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_data data) {
Xoh_img_data img_data = (gplx.xowa.htmls.core.wkrs.imgs.Xoh_img_data)data.Img_data();
bfr.Add_mid(src, data.Src_bgn(), img_data.Src_bgn()); 
@@ -50,4 +83,18 @@ public class Xoh_hdoc_wkr__make implements Xoh_hdoc_wkr {
}
return true;
}
+ public static int Lnki_redlink_reg(Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] href_bry, int html_uid) { 
+ if (hctx.Mode_is_diff()) return html_uid; // PERF: don't do redlinks during hzip_diff 
+ try { 
+ Xoa_ttl ttl = hpg.Wiki().Ttl_parse(Gfo_url_encoder_.Href.Decode(href_bry)); 
+ Xopg_lnki_itm__hdump lnki_itm = new Xopg_lnki_itm__hdump(ttl); 
+ hpg.Html_data().Redlink_list().Add(lnki_itm); 
+ return lnki_itm.Html_uid(); 
+ } 
+ catch (Exception e) { 
+ Gfo_log_.Instance.Warn("failed to add lnki to redlinks", "page", hpg.Url_bry_safe(), "href_bry", href_bry, "e", Err_.Message_gplx_log(e)); 
+ return html_uid; 
+ } 
+ } 
+ private static final byte[] Bry__a__bgn = Bry_.new_a7("<a "), Bry__a__id = Bry_.new_a7("<a id=\"xolnki_"); 
}
diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_htxt_fxt.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_htxt_fxt.java
new file mode 100644
index 0000000..d9393ec
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_htxt_fxt.java
@@ -0,0 +1,58 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012-2017 gnosygnu@gmail.com
+
+XOWA is licensed under the terms of the General Public License (GPL) Version 3,
+or alternatively under the terms of the Apache License Version 2.0.
+
+You may use XOWA according to either of these licenses as is most appropriate
+for your project on a case-by-case basis.
+
+The terms of each license can be found in the source code repository:
+
+GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
+Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
+*/
+package gplx.xowa.htmls.core.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; 
+import gplx.core.tests.*; 
+import gplx.xowa.files.caches.*; 
+import gplx.langs.htmls.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.makes.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.wikis.pages.*; 
+import gplx.xowa.wikis.pages.lnkis.*; 
+public class Xoh_htxt_fxt { 
+ private final Xowe_wiki wiki; 
+ private final Xop_fxt parser_fxt = new Xop_fxt(); 
+ private final Xoh_page hpg = new Xoh_page(); 
+ private final Xoh_make_mgr make_mgr = new Xoh_make_mgr(); 
+ public Xoh_htxt_fxt() { 
+ this.wiki = parser_fxt.Wiki(); 
+ Xoa_app_fxt.repo2_(parser_fxt.App(), wiki); // needed else will be old "mem/wiki/repo/trg/thumb/" instead of standard "mem/file/en.wikipedia.org/thumb/" 
+ wiki.Html__hdump_mgr().Init_by_db(parser_fxt.Wiki()); 
+ parser_fxt.Hctx_(Xoh_wtr_ctx.Hdump_by_hzip_tid(Xoh_hzip_dict_.Hzip__none)); 
+ hpg.Ctor_by_hview(wiki, Xoa_url.blank(), parser_fxt.Wiki().Ttl_parse(Xoa_page_.Main_page_bry), 1); 
+ } 
+ public Xow_wiki Wiki() {return wiki;} 
+ public Xoa_page Page() {return hpg;} 
+ public void Clear() {hpg.Clear();} 
+ public void Test__decode(String htxt) {Test__decode(htxt, htxt);} 
+ public void Test__decode(String htxt, String html) { 
+ htxt = Gfh_utl.Replace_apos(htxt); 
+ html = Gfh_utl.Replace_apos(html); 
+ Test__decode__raw(htxt, html); 
+ } 
+ public void Test__decode__raw(String htxt, String expd) { 
+ hpg.Section_mgr().Clear(); 
+ byte[] actl = make_mgr.Parse(Bry_.new_u8(htxt), hpg, hpg.Wiki()); 
+ Tfds.Eq_str_lines(expd, String_.new_u8(actl)); 
+ } 
+ public void Test__hpg__redlinks(String... expd_ttls) { 
+ Xopg_lnki_list actl_list = hpg.Html_data().Redlink_list(); 
+ int len = actl_list.Len(); 
+ String[] actl_ttls = new String[len]; 
+ for (int i = 0; i < len; i++) { 
+ Xopg_lnki_itm actl_itm = actl_list.Get_at(i); 
+ actl_ttls[i] = actl_itm.Ttl().Full_db_as_str(); 
+ } 
+ Gftest.Eq__ary(expd_ttls, actl_ttls); 
+ } 
+ public static String Escape(String v) {return String_.Replace(v, "~", "");} 
+} 
diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/escapes/Xoh_escape_hzip.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/escapes/Xoh_escape_hzip.java
index ded5b82..531e06d 100644
--- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/escapes/Xoh_escape_hzip.java
+++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/escapes/Xoh_escape_hzip.java
@@ -22,6 +22,7 @@ public class Xoh_escape_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
public byte[] Hook() {return hook;} private byte[] hook;
public String Key() {return Xoh_hzip_dict_.Key__escape;}
public Gfo_poolable_itm Encode1(Xoh_hzip_bfr bfr, Xoh_hdoc_wkr hdoc_wkr, Xoh_hdoc_ctx hctx, Xoh_page hpg, boolean wkr_is_root, byte[] src, Object data_obj) {
+ // escapes an escape byte; should not happen, since (byte)27 doesn't exist in an html document, but just to be sure 
Xoh_escape_data data = (Xoh_escape_data)data_obj;
bfr.Add(hook); // EX: 1,0
bfr.Add(data.Hook()); // EX: 2
diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_htxt__tst.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_htxt__tst.java
new file mode 100644
index 0000000..d70bd37
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_htxt__tst.java
@@ -0,0 +1,32 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012-2017 gnosygnu@gmail.com
+
+XOWA is licensed under the terms of the General Public License (GPL) Version 3,
+or alternatively under the terms of the Apache License Version 2.0.
+
+You may use XOWA according to either of these licenses as is most appropriate
+for your project on a case-by-case basis.
+
+The terms of each license can be found in the source code repository:
+
+GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
+Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
+*/
+package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; 
+import org.junit.*; 
+public class Xoh_lnki_htxt__tst { 
+ private final Xoh_htxt_fxt fxt = new Xoh_htxt_fxt(); 
+ @Test public void Redlink__basic() { 
+ fxt.Test__decode 
+ ( "<a href='/wiki/A'>a</a> <a href='/wiki/B'>b</a>" 
+ , "<a id='xolnki_2' href='/wiki/A'>a</a> <a id='xolnki_3' href='/wiki/B'>b</a>"); 
+ fxt.Test__hpg__redlinks("A", "B"); 
+ } 
+ @Test public void Redlink__anchor() { 
+ fxt.Test__decode 
+ ( "<a href='#A'>a</a>" 
+ , "<a href='#A'>a</a>"); 
+ fxt.Test__hpg__redlinks(); 
+ } 
+} 
diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_hzip.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_hzip.java
index 889f2bb..41186df 100644
--- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_hzip.java
+++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_hzip.java
@@ -124,14 +124,7 @@ public class Xoh_lnki_hzip implements Xoh_hzip_wkr, Gfo_poolable_itm {
href_bry = tmp_bfr.To_bry_and_clear();

// generate stub for redlink
- if ( !hctx.Mode_is_diff()) { // PERF: don't do redlinks during hzip_diff
- try {
- Xoa_ttl ttl = hpg.Wiki().Ttl_parse(Gfo_url_encoder_.Href.Decode(href_bry));
- Xopg_lnki_itm__hdump lnki_itm = new Xopg_lnki_itm__hdump(ttl);
- hpg.Html_data().Redlink_list().Add(lnki_itm);
- html_uid = lnki_itm.Html_uid();
- } catch (Exception e) {Gfo_log_.Instance.Warn("failed to add lnki to redlinks", "page", hpg.Url_bry_safe(), "href_bry", href_bry, "e", Err_.Message_gplx_log(e));}
- }
+ html_uid = Xoh_hdoc_wkr__make.Lnki_redlink_reg(hpg, hctx, href_bry, html_uid); 
break;
}
byte[] capt_bry = Xoh_lnki_hzip_.Bld_capt(tmp_bfr, href_type, text_type, capt_has_ns, capt_cs0_tid, ns_bry, src, text_0_bgn, text_0_end, src, text_1_bgn, text_1_end);
diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xopg_lnki_itm__hdump.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xopg_lnki_itm__hdump.java
index 2af9d2f..dc8fb60 100644
--- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xopg_lnki_itm__hdump.java
+++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xopg_lnki_itm__hdump.java
@@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.xowa.wikis.pages.lnkis.*;
-class Xopg_lnki_itm__hdump implements Xopg_lnki_itm {
+public class Xopg_lnki_itm__hdump implements Xopg_lnki_itm { 
public Xopg_lnki_itm__hdump(Xoa_ttl ttl) {this.ttl = ttl;}
public Xoa_ttl Ttl() {return
Loading…
Cancel
Save