1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Table_of_contents: Skip whitespace after slash; Add <mark> as valid HTML tag [#542]

This commit is contained in:
gnosygnu
2020-03-09 07:56:21 -04:00
parent bc65e9fa61
commit ed010f76a8
5 changed files with 20 additions and 2 deletions

View File

@@ -17,7 +17,7 @@ package gplx.xowa.addons.htmls.tocs; import gplx.*; import gplx.xowa.*; import g
import gplx.langs.htmls.*; import gplx.langs.htmls.docs.*; import gplx.langs.htmls.encoders.*; import gplx.xowa.htmls.core.htmls.tidy.*;
import gplx.xowa.parsers.amps.*; import gplx.core.primitives.*;
class Xoh_toc_wkr__txt {
private final Gfh_tag_rdr tag_rdr = Gfh_tag_rdr.New__html();
private final Gfh_tag_rdr tag_rdr = Gfh_tag_rdr.New__html().Skip_ws_after_slash_y_();
private final Bry_bfr anch_bfr = Bry_bfr_.New(), text_bfr = Bry_bfr_.New();
private final Gfo_url_encoder anch_encoder = Gfo_url_encoder_.New__html_id().Make();
private final Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance;
@@ -105,6 +105,7 @@ class Xoh_toc_wkr__txt {
case Gfh_tag_.Id__i:
case Gfh_tag_.Id__b:
case Gfh_tag_.Id__bdi:
case Gfh_tag_.Id__mark:// include mark; ISSUE#:542: DATE:2020-03-09
print_tag = true;
break;
case Gfh_tag_.Id__span: // print span only if it has a dir attribute

View File

@@ -67,6 +67,9 @@ class Xoh_toc_wkr__txt__fxt {
if (expd_anch != null) Gftest.Eq__str(expd_anch, itm.Anch(), "anch");
if (expd_text != null) Gftest.Eq__str(expd_text, itm.Text(), "text");
}
public void Test__both2(String html, String expd) {
Test__both(html, expd, expd);
}
public void Test__remove_comment(String html, String expd) {
byte[] html_bry = Bry_.new_u8(html);
Gftest.Eq__str(expd, Gfh_utl.Del_comments(tmp, html_bry, 0, html_bry.length));

View File

@@ -23,6 +23,7 @@ public class Xoh_toc_wkr__txt__xnde__tst {
@Test public void Sup() {fxt.Test__both("<sup>a</sup>" , "a", "<sup>a</sup>");}
@Test public void Sub() {fxt.Test__both("<sub>a</sub>" , "a", "<sub>a</sub>");}
@Test public void Bdi() {fxt.Test__both("<bdi>a</bdi>" , "a", "<bdi>a</bdi>");}
@Test public void Mark() {fxt.Test__both("<mark>a</mark>" , "a", "<mark>a</mark>");}
@Test public void Span() {fxt.Test__both("<span>a</span>" , "a", "a");}
@Test public void Span__id() {fxt.Test__both("<span id='1'>a</span>" , "a", "a");}
@Test public void Span__dir() {fxt.Test__both("<span dir=\"ltr\">a</span>" , "a", "<span dir=\"ltr\">a</span>");}
@@ -31,6 +32,7 @@ public class Xoh_toc_wkr__txt__xnde__tst {
@Test public void A() {fxt.Test__both("<a href=\"/wiki/A\">b</a>" , "b");}
@Test public void A__nest() {fxt.Test__both("<a href=\"/wiki/A\">b<i>c</i>d</a>" , "bcd", "b<i>c</i>d");}
@Test public void Br() {fxt.Test__both("a<br/>b" , "ab");}
@Test public void Br__ws() {fxt.Test__both2("a<br/ >b", "ab");}
@Test public void Br__dangling() {fxt.Test__both("a<br>b" , "ab");}
@Test public void Wbr__dangling() {fxt.Test__both("a<wbr>b" , "ab");}
@Test public void H2() {fxt.Test__both("a<h2>b</h2>c" , "abc");} // NOTE: not a valid test; MW actually generates "ab" b/c of tidy; see corresponding edit test; DATE:2016-06-28
@@ -38,6 +40,9 @@ public class Xoh_toc_wkr__txt__xnde__tst {
@Test public void Table() {fxt.Test__text("a<table><tr><td>b</td></tr></table>c" , "abc");}
@Test public void Unknown__i() {fxt.Test__both("a<unknown>b<i>c</i>d</unknown>e" , "abcde", "a<unknown>b<i>c</i>d</unknown>e");} // NOTE: technically, anch should be href_encoded a<unknown>b<i>c</i>d</unknown>e b/c <unknown> is not a valid tag; compare with known tags like <li> / <table> which are just stripped
@Test public void Unknown__a() {fxt.Test__both("a<unknown>b<a>c</a>d</unknown>e" , "abcde", "a<unknown>bcd</unknown>e");}
// @Test public void Br_w_space() {
// fxt.Test__remove_comment("1<!--2-->3<!--4->5", "13");
// }
@Test public void Fail() {
String html = "<i><a href='b'>c</i></a>";
fxt.Init__tidy(html, "<i><a href='b'>c</a></i>");