From 87cb6b5a01556bc17b446c52648cbebc21c7b033 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Thu, 27 Dec 2018 09:41:00 -0500 Subject: [PATCH] Html: Do not hzip anchors with invalid href [#311] --- .../src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_data.java | 6 ++++-- .../htmls/core/wkrs/lnkis/Xoh_lnki_hzip__anch__tst.java | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_data.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_data.java index 42f45286e..7f6b8071b 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_data.java +++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_data.java @@ -56,20 +56,21 @@ public class Xoh_lnki_data { this.src_bgn = anch_head.Src_bgn(); rdr.Init_by_wkr(tag_rdr.Err_wkr(), "lnki", src_bgn, src.length); Gfh_atr title_atr = anch_head.Atrs__get_by_or_empty(Gfh_atr_.Bry__title); - Parse_href(hctx, anch_head); + if (!Parse_href(hctx, anch_head)) return false; Parse_cls(anch_head); Parse_capt(tag_rdr, anch_head); Parse_title(title_atr); hdoc_wkr.On_lnki(this); return true; } - private void Parse_href(Xoh_hdoc_ctx hctx, Gfh_tag anch_head) { + private boolean Parse_href(Xoh_hdoc_ctx hctx, Gfh_tag anch_head) { href_itm.Parse(rdr.Err_wkr(), hctx, src, anch_head); this.href_bgn = href_itm.Ttl_bgn(); this.href_end = href_itm.Ttl_end(); switch (href_itm.Tid()) { case Xoh_anch_href_data.Tid__wiki: case Xoh_anch_href_data.Tid__site: this.href_ns_id = href_itm.Ttl_ns_id(); this.href_src = href_itm.Ttl_full_txt(); + if (this.href_src == null) return false; // NOTE: handle invalid href such as embedded quotes; ISSUE#:311; PAGE:en.v:Research_in_programming_Wikidata/Banks DATE:2018-12-27 this.href_bgn = 0; this.href_end = href_src.length; if (href_ns_id != Xow_ns_.Tid__main) { // not main; try to remove template name; @@ -79,6 +80,7 @@ public class Xoh_lnki_data { } break; } + return true; } private void Parse_cls(Gfh_tag anch_head) { byte[] cls_bry = anch_head.Atrs__get_as_bry(Gfh_atr_.Bry__class); if (Bry_.Len_eq_0(cls_bry)) return; diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_hzip__anch__tst.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_hzip__anch__tst.java index f4ccb877f..d77e8695d 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_hzip__anch__tst.java +++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/lnkis/Xoh_lnki_hzip__anch__tst.java @@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*; import org.junit.*; public class Xoh_lnki_hzip__anch__tst { - private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_(); + private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_(); @Test public void Basic() { // EX: [[#a]] fxt.Test__bicode("~$Ba~#a~", "#a"); } @@ -26,6 +26,9 @@ public class Xoh_lnki_hzip__anch__tst { @Test public void Capt_similar() { // EX: [[#a|a]] fxt.Test__bicode("~$Ba~a~", "a"); } + @Test public void Quote() { // PURPOSE: handle invalid href such as embedded quotes; ISSUE#:311; PAGE:en.v:Research_in_programming_Wikidata/Banks DATE:2018-12-27 + fxt.Test__encode("", ""); + } @Test public void Error() { // EX: [[#a|b]]; make sure bad title character does not cause error fxt.Test__bicode("~$Ba|b~#a|b~", "#a|b"); // NOTE: the "|" should be url-encoded }