mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Table_of_contents: Skip whitespace after slash; Add <mark> as valid HTML tag [#542]
This commit is contained in:
parent
bc65e9fa61
commit
ed010f76a8
@ -81,8 +81,8 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
, Id__rp = 59
|
||||
, Id__rt = 60
|
||||
, Id__form = 61
|
||||
, Id__mark = 62
|
||||
;
|
||||
// private static final int Id__ary_max = 60;
|
||||
public static final byte[]
|
||||
Bry__a = Bry_.new_a7("a")
|
||||
, Bry__ul = Bry_.new_a7("ul")
|
||||
@ -167,6 +167,7 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
.Add_str_int("rp" , Id__rp)
|
||||
.Add_str_int("rt" , Id__rt)
|
||||
.Add_str_int("form" , Id__form)
|
||||
.Add_str_int("mark" , Id__mark)
|
||||
;
|
||||
public static String To_str(int tid) {
|
||||
switch (tid) {
|
||||
@ -235,6 +236,7 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
case Id__rp: return "rp";
|
||||
case Id__rt: return "rt";
|
||||
case Id__form: return "form";
|
||||
case Id__mark: return "mark";
|
||||
default: throw Err_.new_unhandled(tid);
|
||||
}
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ public class Gfh_tag_rdr {
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public Bry_err_wkr Err_wkr() {return err_wkr;} private final Bry_err_wkr err_wkr = new Bry_err_wkr();
|
||||
public Gfh_tag_rdr Skip_ws_after_slash_y_() {skip_ws_after_slash_y = true; return this;} private boolean skip_ws_after_slash_y;
|
||||
public Gfh_tag_rdr Reg(String tag_name, int tag_id) {name_hash.Add_str_int(tag_name, tag_id); return this;}
|
||||
public Gfh_tag_rdr Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) {
|
||||
this.src = src; this.pos = src_bgn; this.src_end = src_end;
|
||||
@ -205,6 +206,9 @@ public class Gfh_tag_rdr {
|
||||
case Byte_ascii.Slash: // EX: "<a/>"
|
||||
name_end = name_pos;
|
||||
tag_end = name_pos + 1; if (tag_end == src_end) return Tag__eos(tag_bgn);// EX: "<a/EOS"
|
||||
if (skip_ws_after_slash_y) {// skip ws after slash; EX:"<br />"; ISSUE#:542: DATE:2020-03-09
|
||||
tag_end = Bry_find_.Find_fwd_while_ws(src, tag_end, src_end);
|
||||
}
|
||||
if (src[tag_end] == Byte_ascii.Angle_end) {
|
||||
atrs_end = name_end;
|
||||
inline = true;
|
||||
@ -267,6 +271,9 @@ public class Gfh_tag_rdr {
|
||||
case Byte_ascii.Slash: // EX: "<a/>"
|
||||
name_end = name_pos;
|
||||
tag_end = name_pos + 1; if (tag_end == src_end) return Tag__eos(tag_bgn);// EX: "<a/EOS"
|
||||
if (skip_ws_after_slash_y) {// skip ws after slash; EX:"<br />"; ISSUE#:542: DATE:2020-03-09
|
||||
tag_end = Bry_find_.Find_fwd_while_ws(src, tag_end, src_end);
|
||||
}
|
||||
if (src[tag_end] == Byte_ascii.Angle_end) {
|
||||
atrs_end = name_end;
|
||||
inline = true;
|
||||
|
@ -17,7 +17,7 @@ package gplx.xowa.addons.htmls.tocs; import gplx.*; import gplx.xowa.*; import g
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.docs.*; import gplx.langs.htmls.encoders.*; import gplx.xowa.htmls.core.htmls.tidy.*;
|
||||
import gplx.xowa.parsers.amps.*; import gplx.core.primitives.*;
|
||||
class Xoh_toc_wkr__txt {
|
||||
private final Gfh_tag_rdr tag_rdr = Gfh_tag_rdr.New__html();
|
||||
private final Gfh_tag_rdr tag_rdr = Gfh_tag_rdr.New__html().Skip_ws_after_slash_y_();
|
||||
private final Bry_bfr anch_bfr = Bry_bfr_.New(), text_bfr = Bry_bfr_.New();
|
||||
private final Gfo_url_encoder anch_encoder = Gfo_url_encoder_.New__html_id().Make();
|
||||
private final Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance;
|
||||
@ -105,6 +105,7 @@ class Xoh_toc_wkr__txt {
|
||||
case Gfh_tag_.Id__i:
|
||||
case Gfh_tag_.Id__b:
|
||||
case Gfh_tag_.Id__bdi:
|
||||
case Gfh_tag_.Id__mark:// include mark; ISSUE#:542: DATE:2020-03-09
|
||||
print_tag = true;
|
||||
break;
|
||||
case Gfh_tag_.Id__span: // print span only if it has a dir attribute
|
||||
|
@ -67,6 +67,9 @@ class Xoh_toc_wkr__txt__fxt {
|
||||
if (expd_anch != null) Gftest.Eq__str(expd_anch, itm.Anch(), "anch");
|
||||
if (expd_text != null) Gftest.Eq__str(expd_text, itm.Text(), "text");
|
||||
}
|
||||
public void Test__both2(String html, String expd) {
|
||||
Test__both(html, expd, expd);
|
||||
}
|
||||
public void Test__remove_comment(String html, String expd) {
|
||||
byte[] html_bry = Bry_.new_u8(html);
|
||||
Gftest.Eq__str(expd, Gfh_utl.Del_comments(tmp, html_bry, 0, html_bry.length));
|
||||
|
@ -23,6 +23,7 @@ public class Xoh_toc_wkr__txt__xnde__tst {
|
||||
@Test public void Sup() {fxt.Test__both("<sup>a</sup>" , "a", "<sup>a</sup>");}
|
||||
@Test public void Sub() {fxt.Test__both("<sub>a</sub>" , "a", "<sub>a</sub>");}
|
||||
@Test public void Bdi() {fxt.Test__both("<bdi>a</bdi>" , "a", "<bdi>a</bdi>");}
|
||||
@Test public void Mark() {fxt.Test__both("<mark>a</mark>" , "a", "<mark>a</mark>");}
|
||||
@Test public void Span() {fxt.Test__both("<span>a</span>" , "a", "a");}
|
||||
@Test public void Span__id() {fxt.Test__both("<span id='1'>a</span>" , "a", "a");}
|
||||
@Test public void Span__dir() {fxt.Test__both("<span dir=\"ltr\">a</span>" , "a", "<span dir=\"ltr\">a</span>");}
|
||||
@ -31,6 +32,7 @@ public class Xoh_toc_wkr__txt__xnde__tst {
|
||||
@Test public void A() {fxt.Test__both("<a href=\"/wiki/A\">b</a>" , "b");}
|
||||
@Test public void A__nest() {fxt.Test__both("<a href=\"/wiki/A\">b<i>c</i>d</a>" , "bcd", "b<i>c</i>d");}
|
||||
@Test public void Br() {fxt.Test__both("a<br/>b" , "ab");}
|
||||
@Test public void Br__ws() {fxt.Test__both2("a<br/ >b", "ab");}
|
||||
@Test public void Br__dangling() {fxt.Test__both("a<br>b" , "ab");}
|
||||
@Test public void Wbr__dangling() {fxt.Test__both("a<wbr>b" , "ab");}
|
||||
@Test public void H2() {fxt.Test__both("a<h2>b</h2>c" , "abc");} // NOTE: not a valid test; MW actually generates "ab" b/c of tidy; see corresponding edit test; DATE:2016-06-28
|
||||
@ -38,6 +40,9 @@ public class Xoh_toc_wkr__txt__xnde__tst {
|
||||
@Test public void Table() {fxt.Test__text("a<table><tr><td>b</td></tr></table>c" , "abc");}
|
||||
@Test public void Unknown__i() {fxt.Test__both("a<unknown>b<i>c</i>d</unknown>e" , "abcde", "a<unknown>b<i>c</i>d</unknown>e");} // NOTE: technically, anch should be href_encoded a<unknown>b<i>c</i>d</unknown>e b/c <unknown> is not a valid tag; compare with known tags like <li> / <table> which are just stripped
|
||||
@Test public void Unknown__a() {fxt.Test__both("a<unknown>b<a>c</a>d</unknown>e" , "abcde", "a<unknown>bcd</unknown>e");}
|
||||
// @Test public void Br_w_space() {
|
||||
// fxt.Test__remove_comment("1<!--2-->3<!--4->5", "13");
|
||||
// }
|
||||
@Test public void Fail() {
|
||||
String html = "<i><a href='b'>c</i></a>";
|
||||
fxt.Init__tidy(html, "<i><a href='b'>c</a></i>");
|
||||
|
Loading…
Reference in New Issue
Block a user