1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Parser: Fix inconsistent html-encoding of {{#tag}} calls [#312]

This commit is contained in:
gnosygnu
2018-12-25 12:27:23 -05:00
parent 54ad1d697d
commit 1d54b8a756
13 changed files with 294 additions and 163 deletions

View File

@@ -16,12 +16,23 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.xowa.xtns.pfuncs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
import gplx.core.primitives.*;
import gplx.xowa.langs.*; import gplx.xowa.langs.msgs.*; import gplx.xowa.langs.kwds.*;
import gplx.xowa.xtns.pfuncs.ifs.*; import gplx.xowa.xtns.pfuncs.times.*; import gplx.xowa.xtns.pfuncs.numbers.*; import gplx.xowa.xtns.pfuncs.ttls.*; import gplx.xowa.xtns.pfuncs.langs.*; import gplx.xowa.xtns.pfuncs.strings.*; import gplx.xowa.xtns.pfuncs.stringutils.*; import gplx.xowa.xtns.pfuncs.pages.*; import gplx.xowa.xtns.pfuncs.wikis.*;
import gplx.xowa.xtns.pfuncs.ifs.*; import gplx.xowa.xtns.pfuncs.times.*; import gplx.xowa.xtns.pfuncs.numbers.*; import gplx.xowa.xtns.pfuncs.ttls.*; import gplx.xowa.xtns.pfuncs.langs.*; import gplx.xowa.xtns.pfuncs.strings.*; import gplx.xowa.xtns.pfuncs.tags.*; import gplx.xowa.xtns.pfuncs.stringutils.*; import gplx.xowa.xtns.pfuncs.pages.*; import gplx.xowa.xtns.pfuncs.wikis.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.tmpls.*;
import gplx.xowa.wikis.domains.*;
public class Pf_func_ {
public static byte[] Eval_arg_or_empty(Xop_ctx ctx, byte[] src, Xot_invk caller, Xot_invk self, int self_args_len, int i) {return Eval_arg_or(ctx, src, caller, self, self_args_len, i, Bry_.Empty);}
public static final byte Name_dlm = Byte_ascii.Colon;
public static boolean Eval_arg_to_kvp(byte[][] rslt, Xop_ctx ctx, byte[] src, Xot_invk caller, Xot_invk self, int self_args_len, Bry_bfr tmp_bfr, int i) {
if (i >= self_args_len) return false;
// NOTE: must call Tmpl_evaluate; don't try to parse key / val by hand; EX:{{#tag:pre|a|{{#switch:a|a=id}}=c}}
Arg_nde_tkn nde = self.Args_get_by_idx(i);
nde.Key_tkn().Tmpl_evaluate(ctx, src, caller, tmp_bfr);
rslt[0] = tmp_bfr.To_bry_and_clear_and_trim();
nde.Val_tkn().Tmpl_evaluate(ctx, src, caller, tmp_bfr);
rslt[1] = tmp_bfr.To_bry_and_clear_and_trim();
return true;
}
public static byte[] Eval_arg_or_empty(Xop_ctx ctx, byte[] src, Xot_invk caller, Xot_invk self, int self_args_len, int i) {return Eval_arg_or(ctx, src, caller, self, self_args_len, i, Bry_.Empty);}
public static byte[] Eval_arg_or(Xop_ctx ctx, byte[] src, Xot_invk caller, Xot_invk self, int self_args_len, int i, byte[] or) {
if (i >= self_args_len) return or;
Arg_nde_tkn nde = self.Args_get_by_idx(i);

View File

@@ -1,60 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.pfuncs.strings; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.*;
import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.tmpls.*;
public class Pfunc_tag extends Pf_func_base {// REF:CoreParserFunctions.php
@Override public int Id() {return Xol_kwd_grp_.Id_misc_tag;}
@Override public Pf_func New(int id, byte[] name) {return new Pfunc_tag().Name_(name);}
@Override public boolean Func_require_colon_arg() {return true;}
@Override public void Func_evaluate(Bry_bfr bfr, Xop_ctx ctx, Xot_invk caller, Xot_invk self, byte[] src) {
// make <xnde> based on {{#tag}}; EX: {{#tag:ref|a|name=1}} -> <ref name='1'>a</ref>
Bry_bfr tmp_bfr = ctx.Wiki().Utl__bfr_mkr().Get_b512();
try {
// get vars
byte[] tag_name = Eval_argx(ctx, src, caller, self); if (tag_name.length == 0) return;
// open tag
tmp_bfr.Add_byte(Byte_ascii.Lt).Add(tag_name); // EX: "<ref"
// iterate args and build attributes; EX: "|a=1|b=2" -> "a='1' b='2'"
int args_len = self.Args_len();
if (args_len > 1) { // NOTE: starting from 1 b/c 0 is innerText
Pfunc_tag_kvp_wtr kvp_wtr = new Pfunc_tag_kvp_wtr();
for (int i = 1; i < args_len; i++) {
byte[] arg = Pf_func_.Eval_arg_or_empty(ctx, src, caller, self, args_len, i); // NOTE: must evaluate arg; don't try to parse arg_tkn's key / val separately; EX:{{#tag:pre|a|{{#switch:a|a=id}}=c}}
if (arg.length == 0) continue; // skip empty atrs
tmp_bfr.Add_byte(Byte_ascii.Space); // write space between html_args
kvp_wtr.Write_as_html_atr(tmp_bfr, arg); // write html_arg
}
}
tmp_bfr.Add_byte(Byte_ascii.Gt); // EX: ">"
// add innerText;
if (args_len > 0) // handle no args; EX: "{{#tag:ref}}" -> "<ref></ref>"
tmp_bfr.Add(Pf_func_.Eval_arg_or_empty(ctx, src, caller, self, args_len, 0));
// close tag
tmp_bfr.Add_byte(Byte_ascii.Lt).Add_byte(Byte_ascii.Slash).Add(tag_name).Add_byte(Byte_ascii.Gt); // EX: "</ref>"
// add to UNIQ hash; DATE:2017-03-31
byte[] val = tmp_bfr.To_bry_and_clear();
byte[] key = ctx.Wiki().Parser_mgr().Uniq_mgr().Add(Bool_.Y, tag_name, val);
bfr.Add(key);
}
finally {tmp_bfr.Mkr_rls();}
}
}

View File

@@ -1,76 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.pfuncs.strings; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.*;
class Pfunc_tag_kvp_wtr {
private int key_bgn, key_end;
private int val_bgn, val_end;
public void Write_as_html_atr(Bry_bfr tmp, byte[] kvp_bry) {
Parse(kvp_bry);
// write as html
if (val_bgn == -1) return; // ignore atrs with empty vals: EX:{{#tag:ref||group=}} PAGE:ru.w:Колчак,_Александр_Васильевич DATE:2014-07-03
if (key_bgn != -1)
tmp.Add(Bry_.Mid(kvp_bry, key_bgn, key_end));
if (val_bgn != -1) {
if (key_bgn != -1)
tmp.Add_byte(Byte_ascii.Eq);
tmp.Add_byte(Byte_ascii.Quote);
// gplx.langs.htmls.encoders.Gfo_url_encoder_.Id.Encode(tmp, kvp_bry, val_bgn, val_end);// PURPOSE: escape html in atrs; PAGE:fr.w:France; DATE:2017-06-01
gplx.langs.htmls.Gfh_utl.Escape_html_to_bfr(tmp, kvp_bry, val_bgn, val_end, true, true, true, true, true);
tmp.Add_byte(Byte_ascii.Quote);
}
}
private void Parse(byte[] src) {
this.key_bgn = this.key_end = this.val_bgn = this.val_end = -1; // NOTE: must clear; DATE:2014-07-20
int itm_bgn = -1, itm_end = -1, src_len = src.length;
byte quote_byte = Byte_ascii.Null;
boolean mode_is_key = true;
for (int i = 0; i < src_len; ++i) {
byte b = src[i];
switch (b) {
case Byte_ascii.Eq:
if (mode_is_key) {
mode_is_key = false;
if (itm_end == -1) itm_end = i;
this.key_bgn = itm_bgn;
this.key_end = itm_end;
itm_bgn = itm_end = -1;
}
break;
// quote-char encountered ...
// NOTE: quotes cannot be escaped; also, in case of multiple quotes (a="b"c") regx uses first two quotes; REF:MW:CoreParserFunctions.php|tagObj
case Byte_ascii.Quote:
case Byte_ascii.Apos:
if (itm_bgn == -1) { // ... quote hasn't started; start quote
itm_bgn = i + 1;
quote_byte = b;
}
else if (itm_end == -1 // ... quote has started and quote hasn't ended; note that this ends quote immediately; EX: 'id="a"b"' -> 'id=a' x> 'id=a"b'
&& b == quote_byte) // handle alternating quotes; EX: id="a'b" -> id=a'b x> id=a; PAGE:en.s:The_formative_period_in_Colby%27s_history; DATE:2016-06-23
itm_end = i;
break;
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl:// NOTE: do not need to handle ws, b/c argBldr will trim it EX: {{#tag|a| b = c }}; " b " and " c " are automatically trimmed
break;
default:
if (itm_bgn == -1) itm_bgn = i;
break;
}
}
if (itm_end == -1) itm_end = src_len;
this.val_bgn = itm_bgn;
this.val_end = itm_end;
}
}

View File

@@ -0,0 +1,103 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.pfuncs.tags; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.*;
import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.tmpls.*;
public class Pfunc_tag extends Pf_func_base {// REF:/includes/parser/CoreParserFunctions.php|tagObj
@Override public int Id() {return Xol_kwd_grp_.Id_misc_tag;}
@Override public Pf_func New(int id, byte[] name) {return new Pfunc_tag().Name_(name);}
@Override public boolean Func_require_colon_arg() {return true;}
// make <xnde> based on {{#tag}}; EX: {{#tag:ref|a|name=1}} -> <ref name='1'>a</ref>
@Override public void Func_evaluate(Bry_bfr bfr, Xop_ctx ctx, Xot_invk caller, Xot_invk self, byte[] src) {
// get tag_name
byte[] tag_name = Eval_argx(ctx, src, caller, self);
if (tag_name.length == 0) return; // EX: {{#tag}}
// get html_mkr; similar to MW "call_user_func_array"
Tag_html_mkr html_mkr = null;
Xop_xnde_tag xnde_tkn = ctx.Xnde_tag_regy().Get_tag_in_tmpl(tag_name);
if (xnde_tkn != null) html_mkr = xnde_tkn.Html_mkr();
if (html_mkr == null) html_mkr = Tag_html_mkr_.Basic(true);
// build html
Xowe_wiki wiki = ctx.Wiki();
Tag_html_wkr html_wkr = html_mkr.Tag__create(wiki, ctx);
try {
// process name
html_wkr.Tag__process_name(tag_name);
// process args; EX: "|a=1|b=2" -> "a='1' b='2'"
int args_len = self.Args_len();
Eval_attrs(ctx, wiki, caller, self, src, args_len, html_wkr);
// process body
byte[] body = args_len == 0
? Bry_.Empty
: Pf_func_.Eval_arg_or_empty(ctx, src, caller, self, args_len, 0);
html_wkr.Tag__process_body(body);
// add to UNIQ hash; DATE:2017-03-31
byte[] val = html_wkr.Tag__build(ctx.Wiki(), ctx);
byte[] key = wiki.Parser_mgr().Uniq_mgr().Add(Bool_.Y, tag_name, val);
bfr.Add(key);
}
finally {
html_wkr.Tag__rls();
}
}
private void Eval_attrs(Xop_ctx ctx, Xowe_wiki wiki, Xot_invk caller, Xot_invk self, byte[] src, int args_len, Tag_html_wkr html_wkr) {
if (args_len <= 1) return; // NOTE: 1 b/c 0 is innerText
Bry_bfr atr_bfr = wiki.Utl__bfr_mkr().Get_b512();
try {
byte[][] kvp = new byte[2][];
for (int i = 1; i < args_len; i++) {
// extract kv
if (!Pf_func_.Eval_arg_to_kvp(kvp, ctx, src, caller, self, args_len, atr_bfr, i)) // skip empty atrs
continue;
// strip flanking-matching quotes; EX: "'abc'" -> "abc"; REF.MW:preg_match( '/^(?:["\'](.+)["\']|""|\'\')$/s', $value, $m )
byte[] atr_val = kvp[1];
int atr_len = Bry_.Len(atr_val);
if (atr_len > 1) {
int atr_bgn = 0;
boolean trim_bgn = false, trim_end = false;
switch (atr_val[0]) {
case Byte_ascii.Quote:
case Byte_ascii.Apos:
atr_bgn++;
trim_bgn = true;
break;
}
int atr_end = atr_len - 1;
switch (atr_val[atr_end]) {
case Byte_ascii.Quote:
case Byte_ascii.Apos:
trim_end = true;
break;
}
if (trim_bgn && trim_end)
kvp[1] = Bry_.Mid(atr_val, atr_bgn, atr_end);
}
// process attr
html_wkr.Tag__process_attr(kvp[0], kvp[1]);
}
} finally {
atr_bfr.Mkr_rls();
}
}
}

View File

@@ -13,7 +13,7 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.pfuncs.strings; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.*;
package gplx.xowa.xtns.pfuncs.tags; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.pfuncs.*;
import org.junit.*;
public class Pfunc_tag_tst {
@Before public void init() {fxt.Reset();} private final Xop_fxt fxt = new Xop_fxt();
@@ -22,16 +22,14 @@ public class Pfunc_tag_tst {
@Test public void Val_apos() {fxt.Test_html_full_str("{{#tag:pre|a|id='b'}}" , "<pre id=\"b\">a</pre>");}
@Test public void Val_quote() {fxt.Test_html_full_str("{{#tag:pre|a|id=\"b\"}}" , "<pre id=\"b\">a</pre>");}
@Test public void Val_empty() {fxt.Test_html_full_str("{{#tag:pre|a|id=}}" , "<pre>a</pre>");} // PURPOSE: ignore atrs with no val; EX:{{#ref||group=}} PAGE:ru.w:Колчак,_Александр_Васильевич; DATE:2014-07-03
@Test public void Val_multiple() {fxt.Test_html_full_str("{{#tag:pre|c|id='a'b'}}" , "<pre id=\"a.27b\">c</pre>");} // PURPOSE: multiple quotes should use 1st and nth; DATE:2018-12-24
@Test public void Val_quote_w_apos() {fxt.Test_html_full_str("{{#tag:pre|c|id=\"a'b\"}}" , "<pre id=\"a.27b\">c</pre>");} // PURPOSE.fix: tag was not handling apos within quotes; PAGE:en.s:The_formative_period_in_Colby%27s_history DATE:2016-06-23
@Test public void Val_mismatched() {fxt.Test_html_full_str("{{#tag:pre|c|id=\"a'}}" , "<pre id=\"a\">c</pre>");} // PURPOSE: emulate MW behavior; DATE:2018-12-24
@Test public void Tmpl() {fxt.Test_html_full_str("{{#tag:pre|a|{{#switch:a|a=id}}=c}}" , "<pre id=\"c\">a</pre>");} // PURPOSE: args must be evaluated
@Test public void Ws_all() {fxt.Test_html_full_str("{{#tag:pre|a| id = b }}" , "<pre id=\"b\">a</pre>");}
@Test public void Ws_quoted() {fxt.Test_html_full_str("{{#tag:pre|a| id = ' b ' }}" , "<pre id=\"_b_\">a</pre>");}
@Test public void Err_bad_key() {fxt.Test_html_full_str("{{#tag:pre|a|id=val|b}}" , "<pre id=\"val\">a</pre>");} // PURPOSE: b was failing b/c id was larger and key_end set to 4 (whereas b was len=1)
@Test public void Html_is_escaped() {fxt.Test_html_full_str("{{#tag:pre|a|id='<br/>'}}" , "<pre id=\".3Cbr.2F.3E\">a</pre>");} // PURPOSE: escape html in atrs; PAGE:fr.w:France; DATE:2017-06-01
// @Test public void Missing_val() {fxt.ini_Msg(Mwl_tag_rsc.Instance.Invalid).Test_parse_tmpl_str_test("{{#tag:pre|a|id=}}" , "{{test}}" , "");} // see {{Reflist|colwidth=30em}} -> <ref group=a>a</ref>{{#tag:references||group=}} -> ""
// @Test public void Err() {
// fxt.Test_parse_tmpl_str_test("{{#tag:ref|George Robertson announced in January 2003 that he would be stepping down in December.<ref> {{cite news|title =NATO Secretary General to Leave His Post in December After 4 Years |first = Craig | last = Smith | work = The New York Times | date = January 23, 2003| url = http://www.nytimes.com/2003/01/23/world/nato-secretary-general-to-leave-his-post-in-december-after-4-years.html?scp=2&sq=lord+robertson&st=nyt|accessdate = 2009-03-29}}</ref> Jaap de Hoop Scheffer was selected as his successor, but could not assume the office until January 2004 because of his commitment in the Dutch Parliament.<ref> {{cite news|title = Jaap de Hoop Scheffer | work = Newsmakers | issue = 1 | publisher = Thomson Gale | date = January 1, 2005}}</ref> Robertson was asked to extend his term until Scheffer was ready, but declined, so Minuto-Rizzo, the Deputy Secretary General, took over in the interim.<ref name =\"ncsd\" /> |group=N|}}"
// , "{{test}}" , "<pre id=\" b \">a</pre>");}
@Test public void Nested_tmpl() { // PURPOSE: nested template must get re-evaluated; EX:de.wikipedia.org/wiki/Freiburg_im_Breisgau; DATE:2013-12-18;
fxt.Init_page_create("Template:!", "|");
fxt.Init_page_create("Template:A", "{{#ifeq:{{{1}}}|expd|pass|fail}}");

View File

@@ -30,7 +30,6 @@ public class Template_styles_nde implements Xox_xnde, Mwh_atr_itm_owner2 {
public void Xtn_parse(Xowe_wiki wiki, Xop_ctx ctx, Xop_root_tkn root, byte[] src, Xop_xnde_tkn xnde) {
ctx.Para().Process_block__xnde(xnde.Tag(), Xop_xnde_tag.Block_bgn);
Xox_xnde_.Parse_xatrs(wiki, this, xatrs_hash, src, xnde);
// get css_ttl
css_ttl = wiki.Ttl_parse(css_ttl_bry);
if (css_ttl == null) {

View File

@@ -43,6 +43,13 @@ public class Template_styles_nde_tst {
, Style_red
);
}
@Test public void Tag() { // PURPOSE: {{#tag}}
fxt.Init__page("Module:A/Test.css", Css_red);
fxt.Test__parse
( "{{#tag:templatestyles||src='Module:A/Test.css'}}"
, Style_red
);
}
@Test public void Error__invalid_title() {
fxt.Test__parse
( "<templatestyles src='A|b.css'/>"