1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Parser: Parse html in internal link captions [#460]

This commit is contained in:
gnosygnu
2019-05-12 20:09:51 -04:00
parent 4d9072830c
commit ba35901865
10 changed files with 179 additions and 93 deletions

View File

@@ -21,12 +21,12 @@ public class Pfunc_anchorencode extends Pf_func_base { // EX: {{anchorencode:a b
@Override public int Id() {return Xol_kwd_grp_.Id_url_anchorencode;}
@Override public Pf_func New(int id, byte[] name) {return new Pfunc_anchorencode().Name_(name);}
@Override public void Func_evaluate(Bry_bfr bfr, Xop_ctx ctx, Xot_invk caller, Xot_invk self, byte[] src) {
byte[] raw_bry = Eval_argx(ctx, src, caller, self); if (raw_bry == Bry_.Empty) return;
Anchor_encode(bfr, ctx, raw_bry);
byte[] raw_bry = Eval_argx(ctx, src, caller, self);
if (Bry_.Len_gt_0(raw_bry))
Anchor_encode(bfr, ctx, raw_bry);
}
public static void Anchor_encode(Bry_bfr bfr, Xop_ctx ctx, byte[] raw) {
Pfunc_anchorencode_mgr mgr = ctx.Wiki().Parser_mgr().Anchor_encoder_mgr__dflt_or_new(ctx);
try {mgr.Encode_anchor(bfr, ctx, raw);}
finally {mgr.Used_(Bool_.N);}
Pfunc_anchorencode_mgr mgr = new Pfunc_anchorencode_mgr(ctx.Wiki().Parser_mgr().Anchor_encoder_parser_or_new(), ctx, raw);
mgr.Encode_anchor(bfr);
}
}

View File

@@ -17,44 +17,60 @@ package gplx.xowa.xtns.pfuncs.ttls; import gplx.*; import gplx.xowa.*; import gp
import gplx.core.brys.*; import gplx.core.btries.*;
import gplx.langs.htmls.encoders.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.amps.*; import gplx.xowa.parsers.lnkes.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.tmpls.*;
public class Pfunc_anchorencode_mgr { // TS
public class Pfunc_anchorencode_mgr {
private final Xop_parser parser; // create a special-parser for handling wikitext inside {{anchorencode:}}
private final Bry_bfr tmp_bfr = Bry_bfr_.Reset(255);
public Pfunc_anchorencode_mgr(Xowe_wiki wiki) {
this.parser = Xop_parser.new_(wiki, wiki.Parser_mgr().Main().Tmpl_lxr_mgr(), Xop_lxr_mgr.new_anchor_encoder());
parser.Init_by_wiki(wiki);
parser.Init_by_lang(wiki.Lang());
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
private final Xop_ctx ctx;
private final byte[] src;
public Pfunc_anchorencode_mgr(Xop_parser parser, Xop_ctx owner_ctx, byte[] src) {
this.parser = parser;
this.ctx = Xop_ctx.New__sub__reuse_page(owner_ctx);
this.ctx.Lnki().Build_args_list_(true);
this.src = src;
}
public boolean Used() {return used;} private boolean used;
public void Used_(boolean v) {used = v;}
public void Encode_anchor(Bry_bfr bfr, Xop_ctx ctx, byte[] src) {
// parse {{anchorencode:}}; note that wikitext inside anchorencode gets serialized by different rules
public void Encode_anchor(Bry_bfr bfr) {
// parse
Xop_tkn_mkr tkn_mkr = ctx.Tkn_mkr();
boolean para_enabled = ctx.Para().Enabled();
ctx.Para().Enabled_n_(); // HACK: disable para
try {
Xop_root_tkn root = tkn_mkr.Root(src);
parser.Parse_wtxt_to_wdom(root, ctx, tkn_mkr, src, Xop_parser_.Doc_bgn_bos);
int subs_len = root.Subs_len();
for (int i = 0; i < subs_len; i++) {
Xop_tkn_itm sub = root.Subs_get(i);
Tkn(ctx, src, sub, root, i, tmp_bfr);
}
} finally {ctx.Para().Enabled_(para_enabled);}
Xop_root_tkn root = tkn_mkr.Root(src);
parser.Parse_wtxt_to_wdom(root, ctx, tkn_mkr, src, Xop_parser_.Doc_bgn_bos);
int subs_len = root.Subs_len();
for (int i = 0; i < subs_len; i++) {
Xop_tkn_itm sub = root.Subs_get(i);
Tkn(sub);
}
// write to bfr and encode it
byte[] unencoded = tmp_bfr.To_bry_and_clear();
Gfo_url_encoder_.Id.Encode(tmp_bfr, unencoded);
bfr.Add_bfr_and_clear(tmp_bfr);
}
private static void Tkn(Xop_ctx ctx, byte[] src, Xop_tkn_itm sub, Xop_tkn_grp grp, int sub_idx, Bry_bfr tmp_bfr) {
private void Tkn(Xop_tkn_itm sub) {
switch (sub.Tkn_tid()) {
case Xop_tkn_itm_.Tid_lnke: Lnke(src, (Xop_lnke_tkn)sub, tmp_bfr); break; // FUTURE: need to move number to lnke_tkn so that number will be correct/consistent?
case Xop_tkn_itm_.Tid_lnki: Lnki(src, (Xop_lnki_tkn)sub, tmp_bfr); break;
case Xop_tkn_itm_.Tid_apos: break; // noop
case Xop_tkn_itm_.Tid_xnde: Xnde(ctx, src, (Xop_xnde_tkn)sub, tmp_bfr); break;
case Xop_tkn_itm_.Tid_html_ncr: tmp_bfr.Add_u8_int(((Xop_amp_tkn_num)sub).Val()); break;
case Xop_tkn_itm_.Tid_html_ref: tmp_bfr.Add_u8_int(((Xop_amp_tkn_ent)sub).Char_int()); break;
case Xop_tkn_itm_.Tid_apos: // noop
break;
case Xop_tkn_itm_.Tid_html_ncr:
tmp_bfr.Add_u8_int(((Xop_amp_tkn_num)sub).Val());
break;
case Xop_tkn_itm_.Tid_html_ref:
tmp_bfr.Add_u8_int(((Xop_amp_tkn_ent)sub).Char_int());
break;
case Xop_tkn_itm_.Tid_lnke: { // FUTURE: need to move number to lnke_tkn so that number will be correct/consistent?
Xop_lnke_tkn lnke = (Xop_lnke_tkn)sub;
int subs_len = lnke.Subs_len();
for (int i = 0; i < subs_len; i++) {
Xop_tkn_itm lnke_sub = lnke.Subs_get(i);
tmp_bfr.Add_mid(src, lnke_sub.Src_bgn(), lnke_sub.Src_end());
}
break;
}
case Xop_tkn_itm_.Tid_xnde: {
Xop_xnde_tkn xnde = (Xop_xnde_tkn)sub;
int subs_len = xnde.Subs_len();
for (int i = 0; i < subs_len; i++) {
Tkn(xnde.Subs_get(i));
}
break;
}
case Xop_tkn_itm_.Tid_tmpl_invk:
Xot_invk_tkn invk_tkn = (Xot_invk_tkn)sub;
Arg_itm_tkn name_tkn = invk_tkn.Name_tkn().Key_tkn();
@@ -66,36 +82,44 @@ public class Pfunc_anchorencode_mgr { // TS
else // regular tmpl; EX: {{a}}
tmp_bfr.Add(ctx.Wiki().Ns_mgr().Ns_template().Gen_ttl(name_ary));
break;
default: tmp_bfr.Add_mid(src, sub.Src_bgn_grp(grp, sub_idx), sub.Src_end_grp(grp, sub_idx)); break;
case Xop_tkn_itm_.Tid_lnki:
Lnki((Xop_lnki_tkn)sub);
break;
default:
tmp_bfr.Add_mid(src, sub.Src_bgn(), sub.Src_end());
break;
}
}
private static void Lnke(byte[] src, Xop_lnke_tkn lnke, Bry_bfr tmp_bfr) {
int subs_len = lnke.Subs_len();
for (int i = 0; i < subs_len; i++) {
Xop_tkn_itm lnke_sub = lnke.Subs_get(i);
tmp_bfr.Add_mid(src, lnke_sub.Src_bgn_grp(lnke, i), lnke_sub.Src_end_grp(lnke, i));
}
}
private static void Lnki(byte[] src, Xop_lnki_tkn lnki, Bry_bfr tmp_bfr) {
int trg_end = lnki.Trg_tkn().Src_end(); // pos after last trg char; EX: "]" in "[[A]]"; "|" in "[[A|b]]"
private void Lnki(Xop_lnki_tkn lnki) {
if (lnki.Pipe_count_is_zero()) { // trg only; EX: [[A]]
int trg_bgn = lnki.Trg_tkn().Src_bgn();
if (lnki.Ttl().ForceLiteralLink()) // literal link; skip colon; EX: [[:a]] -> a
++trg_bgn;
// add trg only
tmp_bfr.Add_mid(src, trg_bgn, trg_end);
++trg_bgn;
tmp_bfr.Add_mid(src, trg_bgn, lnki.Trg_tkn().Src_end()); // pos after last trg char; EX: "]" in "[[A]]"
}
else { // trg + caption + other; EX: [[A|b]]; [[File:A.png|thumb|caption]]
tmp_bfr.Add_mid(src, trg_end + 1, lnki.Brack_end_pos()); //+1 is len of pipe
List_adp args_list = lnki.Args_list();
int len = args_list.Len();
for (int i = 0; i < len; i++) {
if (i != 0) tmp_bfr.Add_byte_pipe();
Arg_nde_tkn arg = (Arg_nde_tkn)args_list.Get_at(i);
switch (arg.Arg_tid()) {
case Xop_lnki_arg_parser.Tid_caption:
Xop_tkn_itm caption_tkn = lnki.Caption_val_tkn();
int caption_subs_len = caption_tkn.Subs_len();
for (int j = 0; j < caption_subs_len; j++) {
Tkn(caption_tkn.Subs_get(j));
}
break;
default:
tmp_bfr.Add_mid(src, arg.Src_bgn(), arg.Src_end());
break;
}
}
}
// add tail; EX: [[A]]b
if (lnki.Tail_bgn() != -1)
tmp_bfr.Add_mid(src, lnki.Tail_bgn(), lnki.Tail_end());
}
private static void Xnde(Xop_ctx ctx, byte[] src, Xop_xnde_tkn xnde, Bry_bfr tmp_bfr) {
int subs_len = xnde.Subs_len();
for (int i = 0; i < subs_len; i++) {
Tkn(ctx, src, xnde.Subs_get(i), xnde, i, tmp_bfr);
}
}
}

View File

@@ -15,20 +15,64 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.pfuncs.ttls; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.*;
import org.junit.*;
import gplx.core.consoles.*;
public class Pfunc_anchorencode_tst {
private final Xop_fxt fxt = new Xop_fxt();
@Before public void init() {fxt.Reset();}
@Test public void Lnke() {fxt.Test_parse_tmpl_str_test("{{anchorencode:[irc://a b c]}}" , "{{test}}" , "b_c");}
@Test public void Apos_bold() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a ''b'' c}}" , "{{test}}" , "a_b_c");}
@Test public void Apos_1() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a 'b c}}" , "{{test}}" , "a_.27b_c");}
@Test public void Lnki_trg() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a [[b]] c}}" , "{{test}}" , "a_b_c");}
@Test public void Lnki_caption() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a [[b|c]] c}}" , "{{test}}" , "a_c_c");}
@Test public void Lnki_file() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a [[Image:b|thumb|c]] d}}" , "{{test}}" , "a_thumb.7Cc_d");}
@Test public void Lnki_trailing() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a [[b]]c d}}" , "{{test}}" , "a_bc_d");}
@Test public void Xnde() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a <i>b</i> c}}" , "{{test}}" , "a_b_c");}
@Test public void Html_ncr() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a &#34; b}}" , "{{test}}" , "a_.22_b");}
@Test public void Html_ref() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a &quot; b}}" , "{{test}}" , "a_.22_b");}
@Test public void Tmpl_missing_basic() {fxt.Test_parse_tmpl_str_test("{{anchorencode:{{a}}}}" , "{{test}}" , "Template:a");}
@Test public void Tmpl_missing_colon() {fxt.Test_parse_tmpl_str_test("{{anchorencode:{{:a}}}}" , "{{test}}" , "a");} // NOTE: changed from "Template:A" to "a"; DATE:2016-06-24
@Test public void Lnki_literal() {fxt.Test_parse_tmpl_str_test("{{anchorencode:[[:a]]}}" , "{{test}}" , "a");}
private final Pfunc_anchorenchode_fxt fxt = new Pfunc_anchorenchode_fxt(Bool_.N);
@Before public void init() {fxt.Reset();}
@Test public void Text_apos() {
fxt.Test("{{anchorencode:a 'b c}}", "a_.27b_c");
}
@Test public void Apos_bold() {
fxt.Test("{{anchorencode:a ''b'' c}}", "a_b_c");
}
@Test public void Html_ncr() {
fxt.Test("{{anchorencode:a &#34; b}}", "a_.22_b");
}
@Test public void Html_ref() {
fxt.Test("{{anchorencode:a &quot; b}}", "a_.22_b");
}
@Test public void Lnke() {
fxt.Test("{{anchorencode:[irc://a b c]}}", "b_c");
}
@Test public void Lnki_trg() {
fxt.Test("{{anchorencode:a [[b]] c}}", "a_b_c");
}
@Test public void Lnki_caption() {
fxt.Test("{{anchorencode:a [[b|c]] c}}", "a_c_c");
}
@Test public void Lnki_file() {
fxt.Test("{{anchorencode:a [[Image:b|thumb|123px|c]] d}}", "a_thumb.7C123px.7Cc_d");
}
@Test public void Lnki_trailing() {
fxt.Test("{{anchorencode:a [[b]]c d}}", "a_bc_d");
}
@Test public void Lnki_literal() {
fxt.Test("{{anchorencode:[[:a]]}}", "a");
}
@Test public void Lnki_caption_html() { // ISSUE#:460
fxt.Test("{{anchorencode:[[a|<span style=\"color:red\">b</span>]]}}", "b");
}
@Test public void Xnde() {
fxt.Test("{{anchorencode:a <i>b</i> c}}", "a_b_c");
}
@Test public void Tmpl_missing_basic() {
fxt.Test("{{anchorencode:{{xowa_na}}}}", "Template:xowa_na");
}
@Test public void Tmpl_missing_colon() {
fxt.Test("{{anchorencode:{{:a}}}}", "a"); // NOTE: changed from "Template:A" to "a"; DATE:2016-06-24
}
}
class Pfunc_anchorenchode_fxt {
private final Xop_fxt fxt = new Xop_fxt();
private final boolean dbg;
public Pfunc_anchorenchode_fxt(boolean dbg) {
this.dbg = dbg;
}
public void Reset() {
fxt.Reset();
}
public void Test(String raw, String expd) {
if (dbg) Console_adp__sys.Instance.Write_str(fxt.Make__test_string(raw, expd));
fxt.Test__parse__tmpl_to_html(raw, expd);
}
}

View File

@@ -87,19 +87,8 @@ class Scrib_lib_ustring__find__fxt {
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(text, regx, bgn, plain), expd);
}
private String Bld_test_string(Object text, String regx, int bgn, boolean plain, String expd) {
/*
{| class=wikitable
! rslt !! expd !! actl !! code
|}
*/
String invk = "{{" + String_.Format("#invoke:Sandbox/Gnosygnu|ustring_find|{0}|{1}|{2}|{3}", Object_.Xto_str_strict_or_empty(text), regx, bgn, plain ? Bool_.True_str : Bool_.False_str) + "}}";
Bry_bfr bfr = Bry_bfr_.New();
bfr.Add_str_a7("|-\n");
bfr.Add_str_u8("| {{#ifeq:" + invk + "|" + expd + "|<span style='color:green'>pass</span>|<span style='color:red'>fail</span>}}\n");
bfr.Add_str_u8("| " + expd + "\n");
bfr.Add_str_u8("| " + invk + "\n");
bfr.Add_str_u8("| <nowiki>" + invk + "</nowiki>\n");
return bfr.To_str();
return fxt.Parser_fxt().Make__test_string(invk, expd);
}
}
/*