Parser: Change encoding of html id to encode fewer characters [#462]

pull/620/head
gnosygnu 5 years ago
parent ba35901865
commit c07e6e25b5

@ -127,3 +127,11 @@ public class Byte_ascii {
, Num_1_bry = new byte[] {Byte_ascii.Num_1}
;
}
/*
SYMBOLS
-------
Byte_ascii.Bang | Byte_ascii.Slash | 33 -> 47 | !"#$%&'()*+,-./
Byte_ascii.Colon | Byte_ascii.At | 58 -> 64 | :;<=>?@
Byte_ascii.Brack_bgn | Byte_ascii.Tick | 91 -> 96 | [\]^_`
Byte_ascii.Curly_bgn | Byte_ascii.Tilde | 123 -> 126 | {|}~
*/

@ -17,12 +17,14 @@ package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gp
import gplx.core.btries.*;
import gplx.langs.htmls.entitys.*;
public class Gfo_url_encoder_ {
public static Gfo_url_encoder New__id() {return Gfo_url_encoder_.New__html_id().Make();}
public static Gfo_url_encoder_mkr New__html_id() { // EX: "<a id='a<>b'>" -> "<a id='a.C3.A9b'>"
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Dot).Init_common(Bool_.Y)
public static Gfo_url_encoder_mkr New__html_id() { // EX: "<a id='a<>b'>" -> "<a id='a.C3.A9b'>"
return new Gfo_url_encoder_mkr()
.Init(Byte_ascii.Dot)
.Init__same__rng(0, 255) // clear everything and set to do-not-encode
.Init__encode_hex(Byte_ascii.Angle_bgn, Byte_ascii.Angle_end) // NOTE: should not be encoded, but will break existings tests; EX:{{#tag:pre|a|id='<br/>'}}; DATE:2019-05-12
.Init__decode_mark(Byte_ascii.Dot)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
.Init__html_ent(Byte_ascii.Amp, Gfh_entity_trie.Instance);
.Init__html_ent(Byte_ascii.Amp, Gfh_entity_trie.Instance, false);
}
public static Gfo_url_encoder_mkr New__html_href_mw(boolean use_anchor_encoder) { // EX: "<a href='^#^'>" -> "<a href='%5E#.5E'>"; REF.MW: ";:@$!*(),/"
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
@ -84,9 +86,10 @@ public class Gfo_url_encoder_ {
.Init__same__many(Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Apos, Byte_ascii.Semic);
}
public static Gfo_url_encoder_mkr New__mw_ttl() {
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent)
return new Gfo_url_encoder_mkr()
.Init(Byte_ascii.Percent)
.Init__same__rng(0, 255)
.Init__diff__many(Byte_ascii.Percent, Byte_ascii.Amp, Byte_ascii.Apos, Byte_ascii.Eq, Byte_ascii.Plus)
.Init__diff__many(Byte_ascii.Amp, Byte_ascii.Apos, Byte_ascii.Eq, Byte_ascii.Plus)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
;
}

@ -76,7 +76,11 @@ class Gfo_url_encoder_itm_hex implements Gfo_url_encoder_itm {
}
class Gfo_url_encoder_itm_html_ent implements Gfo_url_encoder_itm {
private final Btrie_slim_mgr amp_trie;
public Gfo_url_encoder_itm_html_ent(Btrie_slim_mgr amp_trie) {this.amp_trie = amp_trie;}
private final boolean encode_unknown_amp;
public Gfo_url_encoder_itm_html_ent(Btrie_slim_mgr amp_trie, boolean encode_unknown_amp) {
this.encode_unknown_amp = encode_unknown_amp;
this.amp_trie = amp_trie;
}
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {
++idx; // b is &; get next character afterwards
if (idx == end) { // & is last char; return
@ -86,7 +90,10 @@ class Gfo_url_encoder_itm_html_ent implements Gfo_url_encoder_itm {
b = src[idx];
Object o = amp_trie.Match_bgn_w_byte(b, src, idx, end);
if (o == null) { // unknown entity (EX:&unknown;); return &;
Gfo_url_encoder_itm_hex.Encode_byte(Byte_ascii.Amp, bfr, Byte_ascii.Dot);
if (encode_unknown_amp)
Gfo_url_encoder_itm_hex.Encode_byte(Byte_ascii.Amp, bfr, Byte_ascii.Dot);
else
bfr.Add_byte(Byte_ascii.Amp);
return 0;
}
else {

@ -16,15 +16,26 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.core.btries.*;
public class Gfo_url_encoder_mkr {
private Gfo_url_encoder_itm[] encode_ary, decode_ary; private Gfo_url_encoder anchor_encoder;
private Gfo_url_encoder_itm[] encode_ary, decode_ary;
private Gfo_url_encoder anchor_encoder;
private Gfo_url_encoder_itm_hex encoder_hex;
private byte bicode_mark;
public Gfo_url_encoder_mkr Init(byte bicode_mark) {
this.bicode_mark = bicode_mark;
encode_ary = new Gfo_url_encoder_itm[256]; decode_ary = new Gfo_url_encoder_itm[256];
Gfo_url_encoder_itm_hex hex = new Gfo_url_encoder_itm_hex(bicode_mark);
encoder_hex = new Gfo_url_encoder_itm_hex(bicode_mark);
for (int i = 0; i < 256; ++i) {
encode_ary[i] = hex; // default encode to hex
decode_ary[i] = Gfo_url_encoder_itm_same.Instance; // default decode to same; needed for files; EX: A!%21.png -> A!!.png;
encode_ary[i] = encoder_hex; // default encode to hex
decode_ary[i] = Gfo_url_encoder_itm_same.Instance; // default decode to same; needed for files; EX: A!%21.png -> A!!.png;
}
decode_ary[bicode_mark] = encoder_hex;
return this;
}
public Gfo_url_encoder_mkr Init__encode_hex(int... ary) {
for (int i = 0; i < ary.length; i++) {
int idx = ary[i];
encode_ary[idx] = encoder_hex;
}
decode_ary[bicode_mark] = hex;
return this;
}
public Gfo_url_encoder_mkr Init__same__rng(int bgn, int end) {
@ -49,7 +60,7 @@ public class Gfo_url_encoder_mkr {
return this;
}
public Gfo_url_encoder_mkr Init__decode_mark(byte decode_mark) {
decode_ary[decode_mark & 0xff] = new Gfo_url_encoder_itm_hex(decode_mark);// PATCH.JAVA:need to convert to unsigned byte
decode_ary[decode_mark & 0xff] = encoder_hex;// PATCH.JAVA:need to convert to unsigned byte
return this;
}
public Gfo_url_encoder_mkr Init__diff__one(byte src, byte trg) {
@ -57,18 +68,17 @@ public class Gfo_url_encoder_mkr {
encode_ary[src] = decode_ary[trg] = itm;
return this;
}
public Gfo_url_encoder_mkr Init__diff__many(byte bicode_mark, int... ary) {
Gfo_url_encoder_itm_hex hex = new Gfo_url_encoder_itm_hex(bicode_mark);
public Gfo_url_encoder_mkr Init__diff__many(int... ary) {
int len = ary.length;
for (int i = 0; i < len; i++) {
int idx = ary[i];
encode_ary[idx] = decode_ary[idx] = hex;
encode_ary[idx] = decode_ary[idx] = encoder_hex;
}
decode_ary[bicode_mark] = hex;
decode_ary[bicode_mark] = encoder_hex;
return this;
}
public Gfo_url_encoder_mkr Init__html_ent(byte src, Btrie_slim_mgr trie) {
Gfo_url_encoder_itm_html_ent itm = new Gfo_url_encoder_itm_html_ent(trie);
public Gfo_url_encoder_mkr Init__html_ent(byte src, Btrie_slim_mgr trie, boolean encode_unknown_amp) {
Gfo_url_encoder_itm_html_ent itm = new Gfo_url_encoder_itm_html_ent(trie, encode_unknown_amp);
encode_ary[src] = itm;
return this;
}

@ -17,13 +17,29 @@ package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gp
import org.junit.*;
public class Gfo_url_encoder_tst {
private final Gfo_url_encoder_fxt fxt = new Gfo_url_encoder_fxt();
@Test public void Id__nums() {fxt.Encoder_id().Test__bicode("0123456789" , "0123456789");}
@Test public void Id__ltrs_lower() {fxt.Encoder_id().Test__bicode("abcdefghijklmnopqrstuvwxyz" , "abcdefghijklmnopqrstuvwxyz");}
@Test public void Id__ltrs_upper() {fxt.Encoder_id().Test__bicode("ABCDEFGHIJKLMNOPQRSTUVWXYZ" , "ABCDEFGHIJKLMNOPQRSTUVWXYZ");}
@Test public void Id__syms() {fxt.Encoder_id().Test__encode("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ".21.22.23.24.25.26.27.28.29.2A.2B.2C-..2F:.3B.3C.3D.3E.3F.40.5B.5C.5D.5E_.60.7B.7C.7D.7E");} // NOTE: not reversible since "." is encode_marker but not encoded
@Test public void Id__foreign() {fxt.Encoder_id().Test__bicode("aéb", "a.C3.A9b");}
@Test public void Id__nbsp() {fxt.Encoder_id().Test__encode("a&nbsp;b", "a.C2.A0b");} // NOTE: not just .A0 (160) but utf8-encoded .C2.A0
@Test public void Id__nums() {fxt.Encoder_id().Test__bicode("0123456789");}
@Test public void Id__ltrs_lower() {fxt.Encoder_id().Test__bicode("abcdefghijklmnopqrstuvwxyz");}
@Test public void Id__ltrs_upper() {fxt.Encoder_id().Test__bicode("ABCDEFGHIJKLMNOPQRSTUVWXYZ");}
@Test public void Id__syms_0() {fxt.Encoder_id().Test__bicode("!\"#$%&'()*+,-./");} // ISSUE#:462; DATE:2019-05-12
@Test public void Id__syms_1() {fxt.Encoder_id().Test__bicode(":;=?@");} // ISSUE#:462; DATE:2019-05-12
@Test public void Id__syms_2() {fxt.Encoder_id().Test__bicode("[\\]^`");} // ISSUE#:462; DATE:2019-05-12
@Test public void Id__syms_3() {fxt.Encoder_id().Test__bicode("{|}~");}// ISSUE#:462; DATE:2019-05-12
@Test public void Id__foreign() {fxt.Encoder_id().Test__bicode("aéb");}
@Test public void Id__space() {fxt.Encoder_id().Test__bicode("a b", "a_b");}
@Test public void Id__syms_1_angles() { // NOTE:should not be encoded, but will break existings tests; EX:{{#tag:pre|a|id='<br/>'}}; DATE:2019-05-12;
fxt.Encoder_id().Test__encode("<", ".3C");
fxt.Encoder_id().Test__decode(".3C", "<");
fxt.Encoder_id().Test__encode(">", ".3E");
fxt.Encoder_id().Test__decode(".3E", ">");
}
@Test public void Id__syms_2_lodash() { // ISSUE#:462; DATE:2019-05-12
fxt.Encoder_id().Test__encode("_", "_");
fxt.Encoder_id().Test__decode("_", " ");
}
@Test public void Id__nbsp() {
fxt.Encoder_id().Test__encode("a&nbsp;b", "a.C2.A0b"); // NOTE: not just .A0 (160) but utf8-encoded .C2.A0
fxt.Encoder_id().Test__decode("a.C2.A0b", "a b"); // WS is nbsp
}
@Test public void Id__err() {
byte[] raw = Bry_.new_a7("0%.jpg");
Bry_bfr tmp_bfr = Bry_bfr_.New();
@ -39,7 +55,7 @@ public class Gfo_url_encoder_tst {
fxt.Encoder_href().Test__encode("a b", "a_b");
}
@Test public void Href__special_and_anchor() { // PURPOSE: MediaWiki encodes with % for ttls, but . for anchors; REF:Title.php!(before-anchor)getLocalUrl;wfUrlencode (after-anchor)escapeFragmentForURL
fxt.Encoder_href().Test__bicode("^#^", "%5E#.5E");
fxt.Encoder_href().Test__bicode("^#^", "%5E#^");
fxt.Encoder_href().Test__encode("A#", "A#");
}
@Test public void Href__invalid() { // PURPOSE: check that invalid url decodings are rendered literally; DATE:2014-04-10
@ -56,6 +72,7 @@ class Gfo_url_encoder_fxt {
public Gfo_url_encoder_fxt Encoder_url() {encoder = Gfo_url_encoder_.Http_url; return this;}
public Gfo_url_encoder_fxt Encoder_ttl() {encoder = Gfo_url_encoder_.Mw_ttl; return this;}
public Gfo_url_encoder_fxt Encoder_fsys_safe() {encoder = Gfo_url_encoder_.New__fsys_wnt().Make(); return this;}
public void Test__bicode(String raw) {Test__bicode(raw, raw);}
public void Test__bicode(String raw, String encoded) {
Test__encode(raw, encoded);
Test__decode(encoded, raw);

@ -490,7 +490,7 @@ public class Xop_fxt {
*/
Bry_bfr bfr = Bry_bfr_.New();
bfr.Add_str_a7("|-\n");
bfr.Add_str_u8("| {{#ifeq:" + expd + "|" + expr + "|<span style='color:green'>pass</span>|<span style='color:red'>fail</span>}}\n");
bfr.Add_str_u8("| {{#ifeq:" + String_.Replace(expd, "|", "{{!}}") + "|" + String_.Replace(expr, "|", "{{!}}") + "|<span style='color:green'>pass</span>|<span style='color:red'>fail</span>}}\n");
bfr.Add_str_u8("| " + expd + "\n");
bfr.Add_str_u8("| " + expr + "\n");
bfr.Add_str_u8("| <nowiki>" + expr + "</nowiki>\n");

@ -19,7 +19,7 @@ import gplx.xowa.parsers.amps.*; import gplx.core.primitives.*;
class Xoh_toc_wkr__txt {
private final Gfh_tag_rdr tag_rdr = Gfh_tag_rdr.New__html();
private final Bry_bfr anch_bfr = Bry_bfr_.New(), text_bfr = Bry_bfr_.New();
private final Gfo_url_encoder anch_encoder = Gfo_url_encoder_.New__id();
private final Gfo_url_encoder anch_encoder = Gfo_url_encoder_.New__html_id().Make();
private final Xop_amp_mgr amp_mgr = Xop_amp_mgr.Instance;
private final Hash_adp anch_hash = Hash_adp_bry.ci_u8(gplx.xowa.langs.cases.Xol_case_mgr_.U8());
private Xow_tidy_mgr_interface tidy_mgr;

@ -30,10 +30,10 @@ public class Xoh_toc_wkr__txt__basic__tst {
fxt.Test__both("", "", "");
}
@Test public void Amp__ncr() {
fxt.Test__both("&#91;a&#93;", ".5Ba.5D", "&#91;a&#93;");
fxt.Test__both("&#91;a&#93;", "[a]", "&#91;a&#93;");// ISSUE#:462; DATE:2019-05-12
}
@Test public void Encode() {
fxt.Test__both("a+b", "a.2Bb", "a+b");
fxt.Test__both("a+b", "a+b", "a+b");// ISSUE#:462; DATE:2019-05-12
}
@Test public void Comment() {
fxt.Test__text("a<!--b-->c", "ac");

@ -178,7 +178,7 @@ public class Xowe_hdr_bldr__tst {
, "==a+b=="
), fxt.toc_tbl_nl_y
( " <ul>"
, " <li class=\"toclevel-1 tocsection-1\"><a href=\"#a.2Bb\"><span class=\"tocnumber\">1</span> <span class=\"toctext\">a+b</span></a>"
, " <li class=\"toclevel-1 tocsection-1\"><a href=\"#a+b\"><span class=\"tocnumber\">1</span> <span class=\"toctext\">a+b</span></a>" // ISSUE#:462; DATE:2019-05-12
, " </li>"
, " </ul>"
));
@ -403,11 +403,11 @@ public class Xowe_hdr_bldr__tst {
, String_.Concat_lines_nl
( fxt.toc_tbl_nl_n
( " <ul>"
, " <li class=\"toclevel-1 tocsection-1\"><a href=\"#.5Ba.5D\"><span class=\"tocnumber\">1</span> <span class=\"toctext\">&#91;a&#93;</span></a>"
, " <li class=\"toclevel-1 tocsection-1\"><a href=\"#[a]\"><span class=\"tocnumber\">1</span> <span class=\"toctext\">&#91;a&#93;</span></a>" // ISSUE#:462; DATE:2019-05-12
, " </li>"
, " </ul>"
)
, "<h2><span class='mw-headline' id='.5Ba.5D'>&#91;a&#93;</span></h2>"
, "<h2><span class='mw-headline' id='[a]'>&#91;a&#93;</span></h2>"// ISSUE#:462; DATE:2019-05-12
));
}
@Test public void Fix_large_before_small() { // PURPOSE.fix: "===a===\n===b===\n" followed by "==c==" causes improper formatting; DATE:2013-05-16
@ -469,11 +469,11 @@ public class Xowe_hdr_bldr__tst {
, String_.Concat_lines_nl
( fxt.toc_tbl_nl_n
( " <ul>"
, " <li class=\"toclevel-1 tocsection-1\"><a href=\"#a.5B1.5D\"><span class=\"tocnumber\">1</span> <span class=\"toctext\">a<sup>[1]</sup></span></a>"
, " <li class=\"toclevel-1 tocsection-1\"><a href=\"#a[1]\"><span class=\"tocnumber\">1</span> <span class=\"toctext\">a<sup>[1]</sup></span></a>" // ISSUE#:462; DATE:2019-05-12
, " </li>"
, " </ul>"
)
, "<h2><span class='mw-headline' id='a.5B1.5D'>a<sup id=\"cite_ref-0\" class=\"reference\"><a href=\"#cite_note-0\">[1]</a></sup></span></h2>"
, "<h2><span class='mw-headline' id='a[1]'>a<sup id=\"cite_ref-0\" class=\"reference\"><a href=\"#cite_note-0\">[1]</a></sup></span></h2>"// ISSUE#:462; DATE:2019-05-12
));
}
@Test public void Category() { // PURPOSE: Category should not show in in TOC; DATE:2013-12-09
@ -529,11 +529,11 @@ public class Xowe_hdr_bldr__tst {
, String_.Concat_lines_nl
( fxt.toc_tbl_nl_n
( " <ul>"
, " <li class=\"toclevel-1 tocsection-1\"><a href=\"#.5B.5B.5D.5D\"><span class=\"tocnumber\">1</span> <span class=\"toctext\">[[]]</span></a>"
, " <li class=\"toclevel-1 tocsection-1\"><a href=\"#[[]]\"><span class=\"tocnumber\">1</span> <span class=\"toctext\">[[]]</span></a>" // ISSUE#:462; DATE:2019-05-12
, " </li>"
, " </ul>"
)
, "<h2><span class='mw-headline' id='.5B.5B.5D.5D'>[[]]</span></h2>"
, "<h2><span class='mw-headline' id='[[]]'>[[]]</span></h2>" // ISSUE#:462; DATE:2019-05-12
));
}
@Test public void File_in_tbl() { // PURPOSE: two issues (a) don't show file if in tbl; (b) if v2, file inside tbl fails; PAGE:en.w:Holmes County,_Mississippi; DATE:2014-06-22

@ -29,13 +29,13 @@ public class Srch_html_page_bldr_tst {
@Test public void Rows() {
fxt.Test_rows(new Srch_rslt_row[] {fxt.Make_row(10, "A"), fxt.Make_row(20, "B")}, String_.Concat_lines_nl_skip_last
( ""
, " <tr id='w.7C1'>"
, " <tr id='w|1'>" // ISSUE#:462; DATE:2019-05-12
, " <td style='padding-right:5px; vertical-align:top; text-align:right;'>10"
, " </td>"
, " <td style='padding-left:5px; vertical-align:top;'><a href='/site/w/wiki/A' title='A'>A</a>"
, " </td>"
, " </tr>"
, " <tr id='w.7C2'>"
, " <tr id='w|2'>" // ISSUE#:462; DATE:2019-05-12
, " <td style='padding-right:5px; vertical-align:top; text-align:right;'>20"
, " </td>"
, " <td style='padding-left:5px; vertical-align:top;'><a href='/site/w/wiki/B' title='B'>B</a>"

@ -17,15 +17,15 @@ package gplx.xowa.addons.wikis.searchs.specials.htmls; import gplx.*; import gpl
import org.junit.*; import gplx.xowa.htmls.core.htmls.utls.*; import gplx.xowa.guis.cbks.js.*; import gplx.xowa.addons.wikis.searchs.searchers.rslts.*;
public class Srch_rslt_cbk_tst {
@Before public void init() {fxt.Clear();} private Srch_rslt_cbk_fxt fxt = new Srch_rslt_cbk_fxt();
@Test public void Basic() {
fxt.Test_add(fxt.Make_rslt(50, "L"), fxt.Make_args_append("xowa_insert_w" , "w.7CL")); // insert new
fxt.Test_add(fxt.Make_rslt(30, "N"), fxt.Make_args_append("xowa_insert_w" , "w.7CN")); // insert below last
fxt.Test_add(fxt.Make_rslt(70, "J"), fxt.Make_args_append("w.7CL" , "w.7CJ")); // insert above first
fxt.Test_add(fxt.Make_rslt(60, "K"), fxt.Make_args_append("w.7CL" , "w.7CK")); // insert above mid
fxt.Test_add(fxt.Make_rslt(40, "M"), fxt.Make_args_append("w.7CN" , "w.7CM")); // insert below mid
fxt.Test_add(fxt.Make_rslt(10, "P")); // insert noop
fxt.Test_add(fxt.Make_rslt(80, "I"), fxt.Make_args_append("w.7CJ" , "w.7CI") , fxt.Make_args_replace("w.7CN")); // insert displace all
fxt.Test_add(fxt.Make_rslt(61, "K1"), fxt.Make_args_append("w.7CK" , "w.7CK1"), fxt.Make_args_replace("w.7CM")); // insert displace mid
@Test public void Basic() {// ISSUE#:462; DATE:2019-05-12
fxt.Test_add(fxt.Make_rslt(50, "L"), fxt.Make_args_append("xowa_insert_w" , "w|L")); // insert new
fxt.Test_add(fxt.Make_rslt(30, "N"), fxt.Make_args_append("xowa_insert_w" , "w|N")); // insert below last
fxt.Test_add(fxt.Make_rslt(70, "J"), fxt.Make_args_append("w|L" , "w|J")); // insert above first
fxt.Test_add(fxt.Make_rslt(60, "K"), fxt.Make_args_append("w|L" , "w|K")); // insert above mid
fxt.Test_add(fxt.Make_rslt(40, "M"), fxt.Make_args_append("w|N" , "w|M")); // insert below mid
fxt.Test_add(fxt.Make_rslt(10, "P")); // insert noop
fxt.Test_add(fxt.Make_rslt(80, "I"), fxt.Make_args_append("w|J" , "w|I") , fxt.Make_args_replace("w|N")); // insert displace all
fxt.Test_add(fxt.Make_rslt(61, "K1"), fxt.Make_args_append("w|K" , "w|K1"), fxt.Make_args_replace("w|M")); // insert displace mid
}
}
class Srch_rslt_cbk_fxt {

@ -40,7 +40,7 @@ public class Xow_url_parser__qarg__tst {
fxt.Exec__parse("A?B").Test__wiki("en.wikipedia.org").Test__page("A?B").Test__qargs("");
}
@Test public void Question_is_anchor() {
fxt.Exec__parse("A#b?c").Test__wiki("en.wikipedia.org").Test__page("A").Test__anch("b.3Fc");
fxt.Exec__parse("A#b?c").Test__wiki("en.wikipedia.org").Test__page("A").Test__anch("b?c");// ISSUE#:462; DATE:2019-05-12
}
@Test public void Title_remove_w() { // PURPOSE: fix /w/ showing up as seg; DATE:2014-05-30
fxt.Exec__parse("http://en.wikipedia.org/w/index.php?title=A").Test__wiki("en.wikipedia.org").Test__page("A");

@ -30,7 +30,7 @@ public class Xow_url_parser__ttl_tst {
tstr.Exec__parse("A#b").Test__wiki("en.wikipedia.org").Test__page("A").Test__anch("b");
}
@Test public void Anch_w_slash() { // PURPOSE: A/b#c/d was not parsing correctly; PAGE:en.w:Enlightenment_Spain#Enlightened_despotism_.281759%E2%80%931788.29
tstr.Exec__parse("A/b#c/d").Test__page("A/b").Test__anch("c.2Fd");
tstr.Exec__parse("A/b#c/d").Test__page("A/b").Test__anch("c/d");// ISSUE#:462; DATE:2019-05-12
}
@Test public void Ns_category() {
tstr.Exec__parse("Category:A").Test__wiki("en.wikipedia.org").Test__page("Category:A");

@ -315,7 +315,7 @@ public class Xoh_html_wtr_tst {
fxt.Test_parse_page_wiki_str("<div/>", "<div></div>");
}
@Test public void Xnde_id_encode() { // PURPOSE: id should be url-encoded; DATE: 2013-11-13;
fxt.Test_parse_page_wiki_str("<div id='a*'></div>", "<div id='a.2A'></div>");
fxt.Test_parse_page_wiki_str("<div id='a*'></div>", "<div id='a*'></div>"); // ISSUE#:462; DATE:2019-05-12
fxt.Test_parse_page_wiki_str("<div id='a b'></div>", "<div id='a_b'></div>");
}
@Test public void Timeline() {// PURPOSE: embed timeline contents in pre; DATE:2014-05-22

@ -87,7 +87,7 @@ public class Xop_section_list__slice__tst {
, "== Hdr 2 =="
, "Para 2"
);
fxt.Test__slice_bry_or_null(".5Cdelta", String_.Concat_lines_nl_skip_last
fxt.Test__slice_bry_or_null("\\delta", String_.Concat_lines_nl_skip_last // ISSUE#:462; DATE:2019-05-12
( "== <math>\\delta</math> =="
, "Para 1"
));

@ -246,7 +246,7 @@ public class Xop_lnki_wkr__basic_tst {
fxt.Test_parse_page_all_str("[[B|=]]", "<a href=\"/wiki/B\">=</a>");
}
@Test public void Href_encode_anchor() { // PURPOSE: test separate encoding for ttl (%) and anchor (.)
fxt.Test_parse_page_all_str("[[^#^]]", "<a href=\"/wiki/%5E#.5E\">^#^</a>");
fxt.Test_parse_page_all_str("[[^#^]]", "<a href=\"/wiki/%5E#^\">^#^</a>"); // ISSUE#:462; DATE:2019-05-12
}
@Test public void Href_question() { // PURPOSE.fix: ttl with ? at end should not be considered qarg; DATE:2013-02-08
fxt.Test_parse_page_all_str("[[A?]]", "<a href=\"/wiki/A%3F\">A?</a>");

@ -22,14 +22,14 @@ public class Pfunc_tag_tst {
@Test public void Val_apos() {fxt.Test_html_full_str("{{#tag:pre|a|id='b'}}" , "<pre id=\"b\">a</pre>");}
@Test public void Val_quote() {fxt.Test_html_full_str("{{#tag:pre|a|id=\"b\"}}" , "<pre id=\"b\">a</pre>");}
@Test public void Val_empty() {fxt.Test_html_full_str("{{#tag:pre|a|id=}}" , "<pre>a</pre>");} // PURPOSE: ignore atrs with no val; EX:{{#ref||group=}} PAGE:ru.w:Колчак,_Александр_Васильевич; DATE:2014-07-03
@Test public void Val_multiple() {fxt.Test_html_full_str("{{#tag:pre|c|id='a'b'}}" , "<pre id=\"a.27b\">c</pre>");} // PURPOSE: multiple quotes should use 1st and nth; DATE:2018-12-24
@Test public void Val_quote_w_apos() {fxt.Test_html_full_str("{{#tag:pre|c|id=\"a'b\"}}" , "<pre id=\"a.27b\">c</pre>");} // PURPOSE.fix: tag was not handling apos within quotes; PAGE:en.s:The_formative_period_in_Colby%27s_history DATE:2016-06-23
@Test public void Val_multiple() {fxt.Test_html_full_str("{{#tag:pre|c|id='a'b'}}" , "<pre id=\"a'b\">c</pre>");} // PURPOSE: multiple quotes should use 1st and nth; DATE:2018-12-24 // ISSUE#:462; DATE:2019-05-12
@Test public void Val_quote_w_apos() {fxt.Test_html_full_str("{{#tag:pre|c|id=\"a'b\"}}" , "<pre id=\"a'b\">c</pre>");} // PURPOSE.fix: tag was not handling apos within quotes; PAGE:en.s:The_formative_period_in_Colby%27s_history DATE:2016-06-23; // ISSUE#:462; DATE:2019-05-12
@Test public void Val_mismatched() {fxt.Test_html_full_str("{{#tag:pre|c|id=\"a'}}" , "<pre id=\"a\">c</pre>");} // PURPOSE: emulate MW behavior; DATE:2018-12-24
@Test public void Tmpl() {fxt.Test_html_full_str("{{#tag:pre|a|{{#switch:a|a=id}}=c}}" , "<pre id=\"c\">a</pre>");} // PURPOSE: args must be evaluated
@Test public void Ws_all() {fxt.Test_html_full_str("{{#tag:pre|a| id = b }}" , "<pre id=\"b\">a</pre>");}
@Test public void Ws_quoted() {fxt.Test_html_full_str("{{#tag:pre|a| id = ' b ' }}" , "<pre id=\"_b_\">a</pre>");}
@Test public void Err_bad_key() {fxt.Test_html_full_str("{{#tag:pre|a|id=val|b}}" , "<pre id=\"val\">a</pre>");} // PURPOSE: b was failing b/c id was larger and key_end set to 4 (whereas b was len=1)
@Test public void Html_is_escaped() {fxt.Test_html_full_str("{{#tag:pre|a|id='<br/>'}}" , "<pre id=\".3Cbr.2F.3E\">a</pre>");} // PURPOSE: escape html in atrs; PAGE:fr.w:France; DATE:2017-06-01
@Test public void Err_bad_key() {fxt.Test_html_full_str("{{#tag:pre|a|id=val|b}}" , "<pre id=\"val\">a</pre>");} // PURPOSE: b was failing b/c id was larger and key_end set to 4 (whereas b was len=1)
@Test public void Html_is_escaped() {fxt.Test_html_full_str("{{#tag:pre|a|id='<br/>'}}" , "<pre id=\".3Cbr/.3E\">a</pre>");} // PURPOSE: escape html in atrs; PAGE:fr.w:France; DATE:2017-06-01; // ISSUE#:462; DATE:2019-05-12
@Test public void Nested_tmpl() { // PURPOSE: nested template must get re-evaluated; EX:de.wikipedia.org/wiki/Freiburg_im_Breisgau; DATE:2013-12-18;
fxt.Init_page_create("Template:!", "|");
fxt.Init_page_create("Template:A", "{{#ifeq:{{{1}}}|expd|pass|fail}}");

@ -20,16 +20,16 @@ public class Pfunc_anchorencode_tst {
private final Pfunc_anchorenchode_fxt fxt = new Pfunc_anchorenchode_fxt(Bool_.N);
@Before public void init() {fxt.Reset();}
@Test public void Text_apos() {
fxt.Test("{{anchorencode:a 'b c}}", "a_.27b_c");
fxt.Test("{{anchorencode:a 'b c}}", "a_'b_c");
}
@Test public void Apos_bold() {
fxt.Test("{{anchorencode:a ''b'' c}}", "a_b_c");
}
@Test public void Html_ncr() {
fxt.Test("{{anchorencode:a &#34; b}}", "a_.22_b");
fxt.Test("{{anchorencode:a &#34; b}}", "a_&quot;_b");
}
@Test public void Html_ref() {
fxt.Test("{{anchorencode:a &quot; b}}", "a_.22_b");
fxt.Test("{{anchorencode:a &quot; b}}", "a_&quot;_b");
}
@Test public void Lnke() {
fxt.Test("{{anchorencode:[irc://a b c]}}", "b_c");
@ -41,7 +41,7 @@ public class Pfunc_anchorencode_tst {
fxt.Test("{{anchorencode:a [[b|c]] c}}", "a_c_c");
}
@Test public void Lnki_file() {
fxt.Test("{{anchorencode:a [[Image:b|thumb|123px|c]] d}}", "a_thumb.7C123px.7Cc_d");
fxt.Test("{{anchorencode:a [[Image:b|thumb|123px|c]] d}}", "a_thumb|123px|c_d");
}
@Test public void Lnki_trailing() {
fxt.Test("{{anchorencode:a [[b]]c d}}", "a_bc_d");
@ -59,7 +59,7 @@ public class Pfunc_anchorencode_tst {
fxt.Test("{{anchorencode:{{xowa_na}}}}", "Template:xowa_na");
}
@Test public void Tmpl_missing_colon() {
fxt.Test("{{anchorencode:{{:a}}}}", "a"); // NOTE: changed from "Template:A" to "a"; DATE:2016-06-24
fxt.Test("{{anchorencode:{{:xowa_na}}}}", "xowa_na"); // NOTE: changed from "Template:A" to "a"; DATE:2016-06-24
}
}
class Pfunc_anchorenchode_fxt {
@ -73,6 +73,6 @@ class Pfunc_anchorenchode_fxt {
}
public void Test(String raw, String expd) {
if (dbg) Console_adp__sys.Instance.Write_str(fxt.Make__test_string(raw, expd));
fxt.Test__parse__tmpl_to_html(raw, expd);
fxt.Test_str_full(raw, expd, fxt.Exec_parse_page_all_as_str(raw));
}
}

@ -76,7 +76,7 @@ public class Scrib_lib_mw__invoke_tst {
}
@Test public void CallParserFunction_tag() {
fxt.Init_page("{{#invoke:Mod_0|Prc_0}}");
fxt.Test_lib_proc_kv(lib, Scrib_lib_mw.Invk_callParserFunction, Scrib_kv_utl_.flat_many_(1, "current", 2, "#tag", 3, Scrib_kv_utl_.flat_many_("3", "id=1", "2", "text", "1", "pre")), "<pre 3=\"id.3D1\">2=text</pre>");// named: sort args; NOTE: keys should probably be stripped
fxt.Test_lib_proc_kv(lib, Scrib_lib_mw.Invk_callParserFunction, Scrib_kv_utl_.flat_many_(1, "current", 2, "#tag", 3, Scrib_kv_utl_.flat_many_("3", "id=1", "2", "text", "1", "pre")), "<pre 3=\"id=1\">2=text</pre>");// named: sort args; NOTE: keys should probably be stripped; // ISSUE#:462; DATE:2019-05-12
}
@Test public void CallParserFunction__no_args() { // PURPOSE.fix: 0 args should not fail
fxt.Init_page("{{#invoke:Mod_0|Prc_0}}");

Loading…
Cancel
Save