1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-09-28 22:40:50 +00:00

Html: Do not hzip anchors with invalid href [#311]

This commit is contained in:
gnosygnu 2018-12-27 09:41:00 -05:00
parent 1f2b73699b
commit 87cb6b5a01
2 changed files with 8 additions and 3 deletions

View File

@ -56,20 +56,21 @@ public class Xoh_lnki_data {
this.src_bgn = anch_head.Src_bgn();
rdr.Init_by_wkr(tag_rdr.Err_wkr(), "lnki", src_bgn, src.length);
Gfh_atr title_atr = anch_head.Atrs__get_by_or_empty(Gfh_atr_.Bry__title);
Parse_href(hctx, anch_head);
if (!Parse_href(hctx, anch_head)) return false;
Parse_cls(anch_head);
Parse_capt(tag_rdr, anch_head);
Parse_title(title_atr);
hdoc_wkr.On_lnki(this);
return true;
}
private void Parse_href(Xoh_hdoc_ctx hctx, Gfh_tag anch_head) {
private boolean Parse_href(Xoh_hdoc_ctx hctx, Gfh_tag anch_head) {
href_itm.Parse(rdr.Err_wkr(), hctx, src, anch_head);
this.href_bgn = href_itm.Ttl_bgn(); this.href_end = href_itm.Ttl_end();
switch (href_itm.Tid()) {
case Xoh_anch_href_data.Tid__wiki: case Xoh_anch_href_data.Tid__site:
this.href_ns_id = href_itm.Ttl_ns_id();
this.href_src = href_itm.Ttl_full_txt();
if (this.href_src == null) return false; // NOTE: handle invalid href such as embedded quotes; ISSUE#:311; PAGE:en.v:Research_in_programming_Wikidata/Banks DATE:2018-12-27
this.href_bgn = 0;
this.href_end = href_src.length;
if (href_ns_id != Xow_ns_.Tid__main) { // not main; try to remove template name;
@ -79,6 +80,7 @@ public class Xoh_lnki_data {
}
break;
}
return true;
}
private void Parse_cls(Gfh_tag anch_head) {
byte[] cls_bry = anch_head.Atrs__get_as_bry(Gfh_atr_.Bry__class); if (Bry_.Len_eq_0(cls_bry)) return;

View File

@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.xowa.htmls.core.wkrs.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.*; import gplx.xowa.htmls.core.wkrs.*;
import org.junit.*;
public class Xoh_lnki_hzip__anch__tst {
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_();
private final Xoh_hzip_fxt fxt = new Xoh_hzip_fxt().Init_mode_diff_y_();
@Test public void Basic() { // EX: [[#a]]
fxt.Test__bicode("~$Ba~#a~", "<a href='#a'>#a</a>");
}
@ -26,6 +26,9 @@ public class Xoh_lnki_hzip__anch__tst {
@Test public void Capt_similar() { // EX: [[#a|a]]
fxt.Test__bicode("~$Ba~a~", "<a href='#a'>a</a>");
}
@Test public void Quote() { // PURPOSE: handle invalid href such as embedded quotes; ISSUE#:311; PAGE:en.v:Research_in_programming_Wikidata/Banks DATE:2018-12-27
fxt.Test__encode("<a href=\"#a\"b\"c\"></a>", "<a href=\"#a\"b\"c\"></a>");
}
@Test public void Error() { // EX: [[#a|b]]; make sure bad title character does not cause error
fxt.Test__bicode("~$Ba|b~#a|b~", "<a href='#a|b'>#a|b</a>"); // NOTE: the "|" should be url-encoded
}