mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Mw_parse: Add equivalent of htmlspecialchar
This commit is contained in:
parent
6ee274efd6
commit
25f74d7d80
@ -17,6 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx;
|
||||
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.encoders.*;
|
||||
import gplx.langs.htmls.entitys.*;
|
||||
public class Bry_bfr {
|
||||
private Bry_bfr_mkr_mgr mkr_mgr; private int reset;
|
||||
public byte[] Bfr() {return bfr;} private byte[] bfr;
|
||||
@ -293,7 +294,38 @@ public class Bry_bfr {
|
||||
}
|
||||
}
|
||||
if (clean)
|
||||
Add(val);
|
||||
Add_mid(val, bgn, end);
|
||||
return this;
|
||||
}
|
||||
public Bry_bfr Add_bry_escape_html(byte[] val) {return Add_bry_escape_html(val, 0, val.length);}
|
||||
public Bry_bfr Add_bry_escape_html(byte[] val, int bgn, int end) { // uses PHP rules for htmlspecialchars; REF.PHP:http://php.net/manual/en/function.htmlspecialchars.php
|
||||
boolean clean = true;
|
||||
for (int i = bgn; i < end; ++i) {
|
||||
byte[] escaped = null;
|
||||
byte b = val[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Amp: escaped = Gfh_entity_.Amp_bry; break;
|
||||
case Byte_ascii.Quote: escaped = Gfh_entity_.Quote_bry; break;
|
||||
case Byte_ascii.Apos: escaped = Gfh_entity_.Apos_num_bry; break;
|
||||
case Byte_ascii.Lt: escaped = Gfh_entity_.Lt_bry; break;
|
||||
case Byte_ascii.Gt: escaped = Gfh_entity_.Gt_bry; break;
|
||||
}
|
||||
if (escaped == null && clean) {
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
if (clean) {
|
||||
clean = false;
|
||||
this.Add_mid(val, bgn, i);
|
||||
}
|
||||
if (escaped == null)
|
||||
this.Add_byte(b);
|
||||
else
|
||||
this.Add(escaped);
|
||||
}
|
||||
}
|
||||
if (clean)
|
||||
Add_mid(val, bgn, end);
|
||||
return this;
|
||||
}
|
||||
public Bry_bfr Add_str_u8_w_nl(String s) {Add_str_u8(s); return Add_byte_nl();}
|
||||
|
@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx;
|
||||
import org.junit.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Bry_bfr_tst {
|
||||
private Bry_bfr bb = Bry_bfr_.New();
|
||||
@Before public void setup() {bb.Clear();} private ByteAryBfr_fxt fxt = new ByteAryBfr_fxt();
|
||||
@ -187,9 +187,16 @@ public class Bry_bfr_tst {
|
||||
fxt.Test_Add_int_pad_bgn(Byte_ascii.Num_0, 3, 1000, "1000");
|
||||
}
|
||||
@Test public void Add_bry_escape() {
|
||||
fxt.Test_Add_bry_escape("abc" , "abc"); // nothing to escape
|
||||
fxt.Test_Add_bry_escape("a'bc" , "a''bc"); // single escape (code handles first quote differently)
|
||||
fxt.Test_Add_bry_escape("a'b'c" , "a''b''c"); // double escape (code handles subsequent quotes different than first)
|
||||
fxt.Test__add_bry_escape("abc" , "abc"); // nothing to escape
|
||||
fxt.Test__add_bry_escape("a'bc" , "a''bc"); // single escape (code handles first quote differently)
|
||||
fxt.Test__add_bry_escape("a'b'c" , "a''b''c"); // double escape (code handles subsequent quotes different than first)
|
||||
fxt.Test__add_bry_escape("abc", 1, 2 , "b"); // nothing to escape
|
||||
}
|
||||
@Test public void Add_bry_escape_html() {
|
||||
fxt.Test__add_bry_escape_html("abc" , "abc"); // escape=none
|
||||
fxt.Test__add_bry_escape_html("a&\"'<>b" , "a&"'<>b"); // escape=all; code handles first escape differently
|
||||
fxt.Test__add_bry_escape_html("a&b&c" , "a&b&c"); // staggered; code handles subsequent escapes differently
|
||||
fxt.Test__add_bry_escape_html("abc", 1, 2 , "b"); // by index; fixes bug in initial implementation
|
||||
}
|
||||
@Test public void Insert_at() {
|
||||
fxt.Test_Insert_at("abcd", 0, "xyz" , "xyzabcd"); // bgn
|
||||
@ -217,13 +224,15 @@ public class Bry_bfr_tst {
|
||||
}
|
||||
class ByteAryBfr_fxt {
|
||||
private final Bry_bfr bfr = Bry_bfr_.Reset(16);
|
||||
public Bry_bfr Bfr() {return bfr;}
|
||||
public void Clear() {
|
||||
bfr.ClearAndReset();
|
||||
}
|
||||
public void Test_Add_int_pad_bgn(byte pad_byte, int str_len, int val, String expd) {Tfds.Eq(expd, bfr.Add_int_pad_bgn(pad_byte, str_len, val).To_str_and_clear());}
|
||||
public void Test_Add_bry_escape(String val, String expd) {
|
||||
byte[] val_bry = Bry_.new_u8(val);
|
||||
Tfds.Eq(expd, bfr.Add_bry_escape(Byte_ascii.Apos, Byte_.Ary(Byte_ascii.Apos, Byte_ascii.Apos), val_bry, 0, val_bry.length).To_str_and_clear());
|
||||
public void Test__add_bry_escape(String src, String expd) {Test__add_bry_escape(src, 0, String_.Len(src), expd);}
|
||||
public void Test__add_bry_escape(String src, int src_bgn, int src_end, String expd) {
|
||||
byte[] val_bry = Bry_.new_u8(src);
|
||||
Tfds.Eq(expd, bfr.Add_bry_escape(Byte_ascii.Apos, Byte_.Ary(Byte_ascii.Apos, Byte_ascii.Apos), val_bry, src_bgn, src_end).To_str_and_clear());
|
||||
}
|
||||
public void Test_Insert_at(String init, int pos, String val, String expd) {Tfds.Eq(expd, bfr.Add_str_u8(init).Insert_at(pos, Bry_.new_u8(val)).To_str_and_clear());}
|
||||
public void Test_Insert_at(String init, int pos, String val, int val_bgn, int val_end, String expd) {Tfds.Eq(expd, bfr.Add_str_u8(init).Insert_at(pos, Bry_.new_u8(val), val_bgn, val_end).To_str_and_clear());}
|
||||
@ -233,4 +242,8 @@ class ByteAryBfr_fxt {
|
||||
public void Test__to_bry_ary_and_clear(String bfr_str, String... expd) {
|
||||
Tfds.Eq_ary(expd, String_.Ary(bfr.Add_str_u8(bfr_str).To_bry_ary_and_clear()));
|
||||
}
|
||||
public void Test__add_bry_escape_html(String src, String expd) {Test__add_bry_escape_html(src, 0, String_.Len(src), expd);}
|
||||
public void Test__add_bry_escape_html(String src, int src_bgn, int src_end, String expd) {
|
||||
Gftest.Eq__bry(Bry_.new_u8(expd), bfr.Add_bry_escape_html(Bry_.new_u8(src), src_bgn, src_end).To_bry_and_clear());
|
||||
}
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ public class Xomp_parse_wkr implements Gfo_invk {
|
||||
// if ns changed and prv_ns is main
|
||||
if (cur_ns != prv_ns) {
|
||||
if (prv_ns == gplx.xowa.wikis.nss.Xow_ns_.Tid__main)
|
||||
wiki.Cache_mgr().Free_mem__all(); // NOTE: clears page and wbase cache only; needed else OutOfMemory error for en.w in 25th hour; DATE:2017-01-07
|
||||
wiki.Cache_mgr().Free_mem__all(); // NOTE: clears page and wbase cache only; needed else OutOfMemory error for en.w in 25th hour; DATE:2017-01-11
|
||||
prv_ns = cur_ns;
|
||||
}
|
||||
Xoae_page wpg = Xoae_page.New(wiki, ttl);
|
||||
|
@ -132,11 +132,11 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
||||
int start_pos = Bry_find_.Find_fwd(src, Bry__only_include_bgn, i, src_len);
|
||||
if (start_pos == Bry_find_.Not_found) {
|
||||
// Ignored section runs to the end
|
||||
accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, i))).Add_str_a7("</ignore>");
|
||||
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, src_len).Add_str_a7("</ignore>");
|
||||
break;
|
||||
}
|
||||
int tag_end_pos = start_pos + Bry__only_include_bgn.length; // past-the-end
|
||||
accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, i, tag_end_pos))).Add_str_a7("</ignore>");
|
||||
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, tag_end_pos).Add_str_a7("</ignore>");
|
||||
i = tag_end_pos;
|
||||
find_only_include = false;
|
||||
}
|
||||
@ -205,7 +205,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
||||
literal_len++;
|
||||
}
|
||||
if (literal_len > 0) {
|
||||
accum.Add(htmlspecialchars(Bry_.Mid(src, i, i + literal_len)));
|
||||
accum.Add_bry_escape_html(src, i, i + literal_len);
|
||||
i += literal_len;
|
||||
}
|
||||
if (i >= src_len) {
|
||||
@ -277,7 +277,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
||||
int end_pos = Bry_find_.Find_fwd(src, Bry__comment_end, i + 4, src_len);
|
||||
if (end_pos == Bry_find_.Not_found) {
|
||||
// Unclosed comment in input, runs to end
|
||||
accum.Add_str_a7("<comment>").Add(htmlspecialchars(Bry_.Mid(src, i))).Add_str_a7("</comment>");
|
||||
accum.Add_str_a7("<comment>").Add_bry_escape_html(src, i, src_len).Add_str_a7("</comment>");
|
||||
i = src_len;
|
||||
}
|
||||
else {
|
||||
@ -331,7 +331,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
||||
break;
|
||||
}
|
||||
inner = Bry_.Mid(src, bgn_pos, end_pos);
|
||||
accum.Add_str_a7("<comment>").Add(htmlspecialchars(inner)).Add_str_a7("</comment>");
|
||||
accum.Add_str_a7("<comment>").Add_bry_escape_html(inner).Add_str_a7("</comment>");
|
||||
}
|
||||
|
||||
// Do a line-start run next time to look for headings after the comment
|
||||
@ -353,7 +353,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
||||
}
|
||||
i = end_pos + 1;
|
||||
inner = Bry_.Mid(src, bgn_pos, end_pos + 1);
|
||||
accum.Add_str_a7("<comment>").Add(htmlspecialchars(inner)).Add_str_a7("</comment>");
|
||||
accum.Add_str_a7("<comment>").Add_bry_escape_html(inner).Add_str_a7("</comment>");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -375,7 +375,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
||||
|
||||
// Handle ignored tags
|
||||
if (ignored_tags.Has(name)) {
|
||||
accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, i, tag_end_pos + 1))).Add_str_a7("</ignore>");
|
||||
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, tag_end_pos + 1).Add_str_a7("</ignore>");
|
||||
i = tag_end_pos + 1;
|
||||
continue;
|
||||
}
|
||||
@ -426,7 +426,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
||||
&& elem_end_found) {
|
||||
inner = Bry_.Mid(src, tag_end_pos + 1, elem_end_lhs);
|
||||
i = elem_end_rhs;
|
||||
tmp_bfr.Add_str_a7("<close>").Add(htmlspecialchars(Bry_.Mid(src, elem_end_lhs, elem_end_rhs))).Add_str_a7("</close>");
|
||||
tmp_bfr.Add_str_a7("<close>").Add_bry_escape_html(src, elem_end_lhs, elem_end_rhs).Add_str_a7("</close>");
|
||||
close = tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
@ -440,7 +440,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
||||
else {
|
||||
// Don't match the tag, treat opening tag as literal and resume parsing.
|
||||
i = tag_end_pos + 1;
|
||||
accum.Add(htmlspecialchars(Bry_.Mid(src, tag_bgn_pos, tag_end_pos + 1)));
|
||||
accum.Add_bry_escape_html(src, tag_bgn_pos, tag_end_pos + 1);
|
||||
// Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
|
||||
no_more_closing_tag.Add_if_dupe_use_nth(name, name);
|
||||
continue;
|
||||
@ -450,18 +450,26 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
||||
|
||||
// <includeonly> and <noinclude> just become <ignore> tags
|
||||
if (ignored_elements.Has(name)) {
|
||||
accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, tag_bgn_pos, i))).Add_str_a7("</ignore>");
|
||||
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, tag_bgn_pos, i).Add_str_a7("</ignore>");
|
||||
continue;
|
||||
}
|
||||
|
||||
accum.Add_str_a7("<ext>");
|
||||
byte[] atr_bry = atr_end <= atr_bgn ? Bry_.Empty : Bry_.Mid(src, atr_bgn, atr_end);
|
||||
// PORTED:
|
||||
// if ( $attrEnd <= $attrStart ) {
|
||||
// $attr = '';
|
||||
// } else {
|
||||
// $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
|
||||
// }
|
||||
accum.Add_str_a7("<name>").Add(name).Add_str_a7("</name>");
|
||||
// Note that the attr element contains the whitespace between name and attribute,
|
||||
// this is necessary for precise reconstruction during pre-save transform.
|
||||
accum.Add_str_a7("<attr>").Add(htmlspecialchars(atr_bry)).Add_str_a7("</attr>");
|
||||
accum.Add_str_a7("<attr>");
|
||||
if (atr_end > atr_bgn)
|
||||
accum.Add_bry_escape_html(src, atr_bgn, atr_end);
|
||||
accum.Add_str_a7("</attr>");
|
||||
if (inner != null) {
|
||||
accum.Add_str_a7("<inner>").Add(htmlspecialchars(inner)).Add_str_a7("</inner>");
|
||||
accum.Add_str_a7("<inner>").Add_bry_escape_html(inner).Add_str_a7("</inner>");
|
||||
}
|
||||
accum.Add(close).Add_str_a7("</ext>");
|
||||
}
|
||||
@ -582,7 +590,8 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
||||
}
|
||||
else {
|
||||
// Add literal brace(s)
|
||||
accum.Add(htmlspecialchars(Bry_.Repeat_bry(cur_char, count)));
|
||||
for (int j = 0; j < count; j++)
|
||||
accum.Add_bry_escape_html(cur_char);
|
||||
}
|
||||
i += count;
|
||||
}
|
||||
@ -613,7 +622,8 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
||||
if (matching_count <= 0) {
|
||||
// No matching element found in callback array
|
||||
// Output a literal closing brace and continue
|
||||
accum.Add(htmlspecialchars(Bry_.Repeat_bry(cur_char, count)));
|
||||
for (int j = 0; j < count; j++)
|
||||
accum.Add_bry_escape_html(cur_char);
|
||||
i += count;
|
||||
continue;
|
||||
}
|
||||
@ -718,15 +728,6 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
||||
root_accum.Add_str_a7("</root>");
|
||||
return root_accum.To_bry_and_clear();
|
||||
}
|
||||
private byte[] htmlspecialchars(byte[] bry) {
|
||||
// http://php.net/manual/en/function.htmlspecialchars.php
|
||||
//& (ampersand) &
|
||||
//" (double quote) ", unless ENT_NOQUOTES is set
|
||||
//' (single quote) ' (for ENT_HTML401) or ' (for ENT_XML1, ENT_XHTML or ENT_HTML5), but only when ENT_QUOTES is set
|
||||
//< (less than) <
|
||||
//> (greater than) >
|
||||
return bry;
|
||||
}
|
||||
private Xomw_prepro_rule Get_rule(byte[] bry) {
|
||||
if (Bry_.Eq(bry, rule_curly.bgn)) return rule_curly;
|
||||
else if (Bry_.Eq(bry, rule_brack.bgn)) return rule_brack;
|
||||
|
@ -32,15 +32,13 @@ public class Xomw_prepro_wkr__tst {
|
||||
fxt.Test__parse("a{{{b}}}c", "<root>a<tplarg lineStart=\"1\"><title>b</title></tplarg>c</root>");
|
||||
}
|
||||
@Test public void Comment() {
|
||||
fxt.Test__parse("a<!--b-->c", "<root>a<comment><!--b--></comment>c</root>");
|
||||
fxt.Test__parse("a<!--b-->c", "<root>a<comment><!--b--></comment>c</root>");
|
||||
}
|
||||
@Test public void Ext__pre() {
|
||||
fxt.Test__parse("a<pre id=\"1\">b</pre>c", "<root>a<ext><name>pre</name><attr> id=\"1\"</attr><inner>b</inner><close></pre></close></ext>c</root>");
|
||||
fxt.Test__parse("a<pre id=\"1\">b</pre>c", "<root>a<ext><name>pre</name><attr> id="1"</attr><inner>b</inner><close></pre></close></ext>c</root>");
|
||||
}
|
||||
/*
|
||||
TODO:
|
||||
* htmlspecialchars
|
||||
|
||||
* for_inclusion; <onlyinclude> in String
|
||||
* heading.general
|
||||
* heading.EOS: "==a" (no closing ==)
|
||||
|
Loading…
Reference in New Issue
Block a user