1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Parser: Parse html in internal link captions [#460]

This commit is contained in:
gnosygnu 2019-05-12 20:09:51 -04:00
parent 4d9072830c
commit ba35901865
10 changed files with 179 additions and 93 deletions

View File

@ -482,6 +482,21 @@ public class Xop_fxt {
public void Test__parse_to_html_w_skin(String raw, String expd) {
Tfds.Eq_str_lines(expd, Exec__parse_to_html_w_skin(raw));
}
public String Make__test_string(String expr, String expd) {
/*
{| class=wikitable
! rslt !! expd !! actl !! code
|}
*/
Bry_bfr bfr = Bry_bfr_.New();
bfr.Add_str_a7("|-\n");
bfr.Add_str_u8("| {{#ifeq:" + expd + "|" + expr + "|<span style='color:green'>pass</span>|<span style='color:red'>fail</span>}}\n");
bfr.Add_str_u8("| " + expd + "\n");
bfr.Add_str_u8("| " + expr + "\n");
bfr.Add_str_u8("| <nowiki>" + expr + "</nowiki>\n");
return bfr.To_str();
}
public static Xop_fxt New_app_html() {
Xop_fxt fxt = new Xop_fxt();
fxt.Wiki().Html_mgr().Page_wtr_mgr().Page_read_fmtr().Fmt_("~{page_data}");

View File

@ -20,6 +20,7 @@ import gplx.xowa.files.*;
import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.wbases.hwtrs.*; import gplx.xowa.xtns.pfuncs.ifs.*; import gplx.xowa.xtns.pfuncs.times.*; import gplx.xowa.xtns.pfuncs.ttls.*;
import gplx.xowa.xtns.math.*; import gplx.xowa.parsers.uniqs.*; import gplx.xowa.parsers.hdrs.sections.*;
public class Xow_parser_mgr {
private final Object thread_lock = new Object();
private final Xowe_wiki wiki; private final Xop_tkn_mkr tkn_mkr;
public Xow_parser_mgr(Xowe_wiki wiki) {
this.wiki = wiki; this.tkn_mkr = wiki.Appe().Parser_mgr().Tkn_mkr();
@ -64,17 +65,16 @@ public class Xow_parser_mgr {
tmpl_stack_ary_len = new_len;
return true;
} private byte[][] tmpl_stack_ary = Bry_.Ary_empty; private int tmpl_stack_ary_len = 0, tmpl_stack_ary_max = 0;
public Pfunc_anchorencode_mgr Anchor_encoder_mgr__dflt_or_new(Xop_ctx calling_ctx) {
// lazy-instantiate anchor_encoder_mgr
if (anchor_encoder_mgr == null) anchor_encoder_mgr = new Pfunc_anchorencode_mgr(wiki);
// default to member instance
Pfunc_anchorencode_mgr rv = anchor_encoder_mgr;
// if used, create a new one; only occurs if {{anchorencode}} is nested
if (rv.Used()) rv = new Pfunc_anchorencode_mgr(wiki);
rv.Used_(Bool_.Y);
return rv;
} private Pfunc_anchorencode_mgr anchor_encoder_mgr;
public Xop_parser Anchor_encoder_parser_or_new() {
synchronized (thread_lock) {
if (anchor_encoder_parser == null) {
anchor_encoder_parser = Xop_parser.new_(wiki, wiki.Parser_mgr().Main().Tmpl_lxr_mgr(), Xop_lxr_mgr.new_anchor_encoder());
anchor_encoder_parser.Init_by_wiki(wiki);
anchor_encoder_parser.Init_by_lang(wiki.Lang());
}
return anchor_encoder_parser;
}
} private Xop_parser anchor_encoder_parser;
public void Init_by_wiki() {
math__core.Init_by_wiki(wiki);
hdr__section_editable__mgr.Init_by_wiki(wiki);

View File

@ -50,6 +50,13 @@ public class Xop_lnki_tkn extends Xop_tkn_itm_base implements gplx.xowa.wikis.pa
public int Brack_end_pos() {return brack_end_pos;} public void Brack_end_pos_(int v) {this.brack_end_pos = v;} private int brack_end_pos;
public boolean Xtn_sites_link() {return xtn_sites_link;} public void Xtn_sites_link_(boolean v) {xtn_sites_link = v;} private boolean xtn_sites_link;
public Xoh_file_fmtr Lnki_file_wkr() {return lnki_file_wkr;} public void Lnki_file_wkr_(Xoh_file_fmtr v) {lnki_file_wkr = v;} private Xoh_file_fmtr lnki_file_wkr;
public List_adp Args_list() {return args_list;}
public void Args_list_add(Arg_nde_tkn arg, int arg_tid) {
if (args_list == null)
args_list = List_adp_.New();
arg.Arg_tid_(arg_tid);
args_list.Add(arg);
} private List_adp args_list;
public byte[] Target;
public byte[] Ttl_ary() {
return ttl.ForceLiteralLink() || ns_id != Xow_ns_.Tid__main // if [[:]] or non-main (Category, Template)

View File

@ -28,6 +28,7 @@ public class Xop_lnki_wkr implements Xop_ctx_wkr, Xop_arg_wkr {
}
public void Page_end(Xop_ctx ctx, Xop_root_tkn root, byte[] src, int src_len) {}
public Xop_file_logger File_logger() {return lnki_logger;} public Xop_lnki_wkr File_logger_(Xop_file_logger v) {lnki_logger = v; return this;} private Xop_file_logger lnki_logger = Xop_file_logger_.Noop;
public boolean Build_args_list() {return build_args_list;} public void Build_args_list_(boolean v) {build_args_list = v;} private boolean build_args_list;
public void Auto_close(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, int bgn_pos, int cur_pos, Xop_tkn_itm tkn) {
Xop_lnki_tkn lnki = (Xop_lnki_tkn)tkn;
lnki.Tkn_tid_to_txt();
@ -196,6 +197,11 @@ public class Xop_lnki_wkr implements Xop_ctx_wkr, Xop_arg_wkr {
case Xop_lnki_arg_parser.Tid_page: Xop_lnki_wkr_.Page_parse(ctx, src, number_parser, lnki, arg); break;
case Xop_lnki_arg_parser.Tid_thumbtime: Xop_lnki_wkr_.Thumbtime_parse(ctx, src, number_parser, lnki, arg); break;
}
// anchorencode uses build_args_list; ISSUE#:460 DATE:2019-05-12
if (build_args_list) {
lnki.Args_list_add(arg, arg_tid);
}
}
return true;
} catch (Exception e) {

View File

@ -24,6 +24,7 @@ public class Arg_nde_tkn extends Xop_tkn_itm_base {
public Arg_itm_tkn Val_tkn() {return val_tkn;} public Arg_nde_tkn Val_tkn_(Arg_itm_tkn v) {val_tkn = v; return this;} Arg_itm_tkn val_tkn = Arg_itm_tkn_null.Null_arg_itm;
@gplx.Virtual public boolean KeyTkn_exists() {return key_tkn != Arg_itm_tkn_null.Null_arg_itm;}
public Xop_tkn_itm Eq_tkn() {return eq_tkn;} public Arg_nde_tkn Eq_tkn_(Xop_tkn_itm v) {eq_tkn = v; return this;} private Xop_tkn_itm eq_tkn = Xop_tkn_null.Null_tkn;
public int Arg_tid() {return arg_tid;} public void Arg_tid_(int v) {arg_tid = v;} private int arg_tid = Int_.Max_value;
@Override public void Tmpl_fmt(Xop_ctx ctx, byte[] src, Xot_fmtr fmtr) {fmtr.Reg_arg(ctx, src, arg_idx, this);}
@Override public void Tmpl_compile(Xop_ctx ctx, byte[] src, Xot_compile_data prep_data) {
key_tkn.Tmpl_compile(ctx, src, prep_data);

View File

@ -21,12 +21,12 @@ public class Pfunc_anchorencode extends Pf_func_base { // EX: {{anchorencode:a b
@Override public int Id() {return Xol_kwd_grp_.Id_url_anchorencode;}
@Override public Pf_func New(int id, byte[] name) {return new Pfunc_anchorencode().Name_(name);}
@Override public void Func_evaluate(Bry_bfr bfr, Xop_ctx ctx, Xot_invk caller, Xot_invk self, byte[] src) {
byte[] raw_bry = Eval_argx(ctx, src, caller, self); if (raw_bry == Bry_.Empty) return;
byte[] raw_bry = Eval_argx(ctx, src, caller, self);
if (Bry_.Len_gt_0(raw_bry))
Anchor_encode(bfr, ctx, raw_bry);
}
public static void Anchor_encode(Bry_bfr bfr, Xop_ctx ctx, byte[] raw) {
Pfunc_anchorencode_mgr mgr = ctx.Wiki().Parser_mgr().Anchor_encoder_mgr__dflt_or_new(ctx);
try {mgr.Encode_anchor(bfr, ctx, raw);}
finally {mgr.Used_(Bool_.N);}
Pfunc_anchorencode_mgr mgr = new Pfunc_anchorencode_mgr(ctx.Wiki().Parser_mgr().Anchor_encoder_parser_or_new(), ctx, raw);
mgr.Encode_anchor(bfr);
}
}

View File

@ -17,44 +17,60 @@ package gplx.xowa.xtns.pfuncs.ttls; import gplx.*; import gplx.xowa.*; import gp
import gplx.core.brys.*; import gplx.core.btries.*;
import gplx.langs.htmls.encoders.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.amps.*; import gplx.xowa.parsers.lnkes.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.tmpls.*;
public class Pfunc_anchorencode_mgr { // TS
public class Pfunc_anchorencode_mgr {
private final Xop_parser parser; // create a special-parser for handling wikitext inside {{anchorencode:}}
private final Bry_bfr tmp_bfr = Bry_bfr_.Reset(255);
public Pfunc_anchorencode_mgr(Xowe_wiki wiki) {
this.parser = Xop_parser.new_(wiki, wiki.Parser_mgr().Main().Tmpl_lxr_mgr(), Xop_lxr_mgr.new_anchor_encoder());
parser.Init_by_wiki(wiki);
parser.Init_by_lang(wiki.Lang());
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
private final Xop_ctx ctx;
private final byte[] src;
public Pfunc_anchorencode_mgr(Xop_parser parser, Xop_ctx owner_ctx, byte[] src) {
this.parser = parser;
this.ctx = Xop_ctx.New__sub__reuse_page(owner_ctx);
this.ctx.Lnki().Build_args_list_(true);
this.src = src;
}
public boolean Used() {return used;} private boolean used;
public void Used_(boolean v) {used = v;}
public void Encode_anchor(Bry_bfr bfr, Xop_ctx ctx, byte[] src) {
// parse {{anchorencode:}}; note that wikitext inside anchorencode gets serialized by different rules
public void Encode_anchor(Bry_bfr bfr) {
// parse
Xop_tkn_mkr tkn_mkr = ctx.Tkn_mkr();
boolean para_enabled = ctx.Para().Enabled();
ctx.Para().Enabled_n_(); // HACK: disable para
try {
Xop_root_tkn root = tkn_mkr.Root(src);
parser.Parse_wtxt_to_wdom(root, ctx, tkn_mkr, src, Xop_parser_.Doc_bgn_bos);
int subs_len = root.Subs_len();
for (int i = 0; i < subs_len; i++) {
Xop_tkn_itm sub = root.Subs_get(i);
Tkn(ctx, src, sub, root, i, tmp_bfr);
Tkn(sub);
}
} finally {ctx.Para().Enabled_(para_enabled);}
// write to bfr and encode it
byte[] unencoded = tmp_bfr.To_bry_and_clear();
Gfo_url_encoder_.Id.Encode(tmp_bfr, unencoded);
bfr.Add_bfr_and_clear(tmp_bfr);
}
private static void Tkn(Xop_ctx ctx, byte[] src, Xop_tkn_itm sub, Xop_tkn_grp grp, int sub_idx, Bry_bfr tmp_bfr) {
private void Tkn(Xop_tkn_itm sub) {
switch (sub.Tkn_tid()) {
case Xop_tkn_itm_.Tid_lnke: Lnke(src, (Xop_lnke_tkn)sub, tmp_bfr); break; // FUTURE: need to move number to lnke_tkn so that number will be correct/consistent?
case Xop_tkn_itm_.Tid_lnki: Lnki(src, (Xop_lnki_tkn)sub, tmp_bfr); break;
case Xop_tkn_itm_.Tid_apos: break; // noop
case Xop_tkn_itm_.Tid_xnde: Xnde(ctx, src, (Xop_xnde_tkn)sub, tmp_bfr); break;
case Xop_tkn_itm_.Tid_html_ncr: tmp_bfr.Add_u8_int(((Xop_amp_tkn_num)sub).Val()); break;
case Xop_tkn_itm_.Tid_html_ref: tmp_bfr.Add_u8_int(((Xop_amp_tkn_ent)sub).Char_int()); break;
case Xop_tkn_itm_.Tid_apos: // noop
break;
case Xop_tkn_itm_.Tid_html_ncr:
tmp_bfr.Add_u8_int(((Xop_amp_tkn_num)sub).Val());
break;
case Xop_tkn_itm_.Tid_html_ref:
tmp_bfr.Add_u8_int(((Xop_amp_tkn_ent)sub).Char_int());
break;
case Xop_tkn_itm_.Tid_lnke: { // FUTURE: need to move number to lnke_tkn so that number will be correct/consistent?
Xop_lnke_tkn lnke = (Xop_lnke_tkn)sub;
int subs_len = lnke.Subs_len();
for (int i = 0; i < subs_len; i++) {
Xop_tkn_itm lnke_sub = lnke.Subs_get(i);
tmp_bfr.Add_mid(src, lnke_sub.Src_bgn(), lnke_sub.Src_end());
}
break;
}
case Xop_tkn_itm_.Tid_xnde: {
Xop_xnde_tkn xnde = (Xop_xnde_tkn)sub;
int subs_len = xnde.Subs_len();
for (int i = 0; i < subs_len; i++) {
Tkn(xnde.Subs_get(i));
}
break;
}
case Xop_tkn_itm_.Tid_tmpl_invk:
Xot_invk_tkn invk_tkn = (Xot_invk_tkn)sub;
Arg_itm_tkn name_tkn = invk_tkn.Name_tkn().Key_tkn();
@ -66,36 +82,44 @@ public class Pfunc_anchorencode_mgr { // TS
else // regular tmpl; EX: {{a}}
tmp_bfr.Add(ctx.Wiki().Ns_mgr().Ns_template().Gen_ttl(name_ary));
break;
default: tmp_bfr.Add_mid(src, sub.Src_bgn_grp(grp, sub_idx), sub.Src_end_grp(grp, sub_idx)); break;
case Xop_tkn_itm_.Tid_lnki:
Lnki((Xop_lnki_tkn)sub);
break;
default:
tmp_bfr.Add_mid(src, sub.Src_bgn(), sub.Src_end());
break;
}
}
private static void Lnke(byte[] src, Xop_lnke_tkn lnke, Bry_bfr tmp_bfr) {
int subs_len = lnke.Subs_len();
for (int i = 0; i < subs_len; i++) {
Xop_tkn_itm lnke_sub = lnke.Subs_get(i);
tmp_bfr.Add_mid(src, lnke_sub.Src_bgn_grp(lnke, i), lnke_sub.Src_end_grp(lnke, i));
}
}
private static void Lnki(byte[] src, Xop_lnki_tkn lnki, Bry_bfr tmp_bfr) {
int trg_end = lnki.Trg_tkn().Src_end(); // pos after last trg char; EX: "]" in "[[A]]"; "|" in "[[A|b]]"
private void Lnki(Xop_lnki_tkn lnki) {
if (lnki.Pipe_count_is_zero()) { // trg only; EX: [[A]]
int trg_bgn = lnki.Trg_tkn().Src_bgn();
if (lnki.Ttl().ForceLiteralLink()) // literal link; skip colon; EX: [[:a]] -> a
++trg_bgn;
// add trg only
tmp_bfr.Add_mid(src, trg_bgn, trg_end);
tmp_bfr.Add_mid(src, trg_bgn, lnki.Trg_tkn().Src_end()); // pos after last trg char; EX: "]" in "[[A]]"
}
else { // trg + caption + other; EX: [[A|b]]; [[File:A.png|thumb|caption]]
tmp_bfr.Add_mid(src, trg_end + 1, lnki.Brack_end_pos()); //+1 is len of pipe
List_adp args_list = lnki.Args_list();
int len = args_list.Len();
for (int i = 0; i < len; i++) {
if (i != 0) tmp_bfr.Add_byte_pipe();
Arg_nde_tkn arg = (Arg_nde_tkn)args_list.Get_at(i);
switch (arg.Arg_tid()) {
case Xop_lnki_arg_parser.Tid_caption:
Xop_tkn_itm caption_tkn = lnki.Caption_val_tkn();
int caption_subs_len = caption_tkn.Subs_len();
for (int j = 0; j < caption_subs_len; j++) {
Tkn(caption_tkn.Subs_get(j));
}
break;
default:
tmp_bfr.Add_mid(src, arg.Src_bgn(), arg.Src_end());
break;
}
}
}
// add tail; EX: [[A]]b
if (lnki.Tail_bgn() != -1)
tmp_bfr.Add_mid(src, lnki.Tail_bgn(), lnki.Tail_end());
}
private static void Xnde(Xop_ctx ctx, byte[] src, Xop_xnde_tkn xnde, Bry_bfr tmp_bfr) {
int subs_len = xnde.Subs_len();
for (int i = 0; i < subs_len; i++) {
Tkn(ctx, src, xnde.Subs_get(i), xnde, i, tmp_bfr);
}
}
}

View File

@ -15,20 +15,64 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.pfuncs.ttls; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.*;
import org.junit.*;
import gplx.core.consoles.*;
public class Pfunc_anchorencode_tst {
private final Xop_fxt fxt = new Xop_fxt();
private final Pfunc_anchorenchode_fxt fxt = new Pfunc_anchorenchode_fxt(Bool_.N);
@Before public void init() {fxt.Reset();}
@Test public void Lnke() {fxt.Test_parse_tmpl_str_test("{{anchorencode:[irc://a b c]}}" , "{{test}}" , "b_c");}
@Test public void Apos_bold() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a ''b'' c}}" , "{{test}}" , "a_b_c");}
@Test public void Apos_1() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a 'b c}}" , "{{test}}" , "a_.27b_c");}
@Test public void Lnki_trg() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a [[b]] c}}" , "{{test}}" , "a_b_c");}
@Test public void Lnki_caption() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a [[b|c]] c}}" , "{{test}}" , "a_c_c");}
@Test public void Lnki_file() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a [[Image:b|thumb|c]] d}}" , "{{test}}" , "a_thumb.7Cc_d");}
@Test public void Lnki_trailing() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a [[b]]c d}}" , "{{test}}" , "a_bc_d");}
@Test public void Xnde() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a <i>b</i> c}}" , "{{test}}" , "a_b_c");}
@Test public void Html_ncr() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a &#34; b}}" , "{{test}}" , "a_.22_b");}
@Test public void Html_ref() {fxt.Test_parse_tmpl_str_test("{{anchorencode:a &quot; b}}" , "{{test}}" , "a_.22_b");}
@Test public void Tmpl_missing_basic() {fxt.Test_parse_tmpl_str_test("{{anchorencode:{{a}}}}" , "{{test}}" , "Template:a");}
@Test public void Tmpl_missing_colon() {fxt.Test_parse_tmpl_str_test("{{anchorencode:{{:a}}}}" , "{{test}}" , "a");} // NOTE: changed from "Template:A" to "a"; DATE:2016-06-24
@Test public void Lnki_literal() {fxt.Test_parse_tmpl_str_test("{{anchorencode:[[:a]]}}" , "{{test}}" , "a");}
@Test public void Text_apos() {
fxt.Test("{{anchorencode:a 'b c}}", "a_.27b_c");
}
@Test public void Apos_bold() {
fxt.Test("{{anchorencode:a ''b'' c}}", "a_b_c");
}
@Test public void Html_ncr() {
fxt.Test("{{anchorencode:a &#34; b}}", "a_.22_b");
}
@Test public void Html_ref() {
fxt.Test("{{anchorencode:a &quot; b}}", "a_.22_b");
}
@Test public void Lnke() {
fxt.Test("{{anchorencode:[irc://a b c]}}", "b_c");
}
@Test public void Lnki_trg() {
fxt.Test("{{anchorencode:a [[b]] c}}", "a_b_c");
}
@Test public void Lnki_caption() {
fxt.Test("{{anchorencode:a [[b|c]] c}}", "a_c_c");
}
@Test public void Lnki_file() {
fxt.Test("{{anchorencode:a [[Image:b|thumb|123px|c]] d}}", "a_thumb.7C123px.7Cc_d");
}
@Test public void Lnki_trailing() {
fxt.Test("{{anchorencode:a [[b]]c d}}", "a_bc_d");
}
@Test public void Lnki_literal() {
fxt.Test("{{anchorencode:[[:a]]}}", "a");
}
@Test public void Lnki_caption_html() { // ISSUE#:460
fxt.Test("{{anchorencode:[[a|<span style=\"color:red\">b</span>]]}}", "b");
}
@Test public void Xnde() {
fxt.Test("{{anchorencode:a <i>b</i> c}}", "a_b_c");
}
@Test public void Tmpl_missing_basic() {
fxt.Test("{{anchorencode:{{xowa_na}}}}", "Template:xowa_na");
}
@Test public void Tmpl_missing_colon() {
fxt.Test("{{anchorencode:{{:a}}}}", "a"); // NOTE: changed from "Template:A" to "a"; DATE:2016-06-24
}
}
class Pfunc_anchorenchode_fxt {
private final Xop_fxt fxt = new Xop_fxt();
private final boolean dbg;
public Pfunc_anchorenchode_fxt(boolean dbg) {
this.dbg = dbg;
}
public void Reset() {
fxt.Reset();
}
public void Test(String raw, String expd) {
if (dbg) Console_adp__sys.Instance.Write_str(fxt.Make__test_string(raw, expd));
fxt.Test__parse__tmpl_to_html(raw, expd);
}
}

View File

@ -87,19 +87,8 @@ class Scrib_lib_ustring__find__fxt {
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(text, regx, bgn, plain), expd);
}
private String Bld_test_string(Object text, String regx, int bgn, boolean plain, String expd) {
/*
{| class=wikitable
! rslt !! expd !! actl !! code
|}
*/
String invk = "{{" + String_.Format("#invoke:Sandbox/Gnosygnu|ustring_find|{0}|{1}|{2}|{3}", Object_.Xto_str_strict_or_empty(text), regx, bgn, plain ? Bool_.True_str : Bool_.False_str) + "}}";
Bry_bfr bfr = Bry_bfr_.New();
bfr.Add_str_a7("|-\n");
bfr.Add_str_u8("| {{#ifeq:" + invk + "|" + expd + "|<span style='color:green'>pass</span>|<span style='color:red'>fail</span>}}\n");
bfr.Add_str_u8("| " + expd + "\n");
bfr.Add_str_u8("| " + invk + "\n");
bfr.Add_str_u8("| <nowiki>" + invk + "</nowiki>\n");
return bfr.To_str();
return fxt.Parser_fxt().Make__test_string(invk, expd);
}
}
/*