mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Mw_parse: Add basic implementation of Mw_preprocessor
This commit is contained in:
parent
006c14db4e
commit
d15630c433
@ -164,6 +164,17 @@ public class Bry_ {
|
||||
rv[i] = b;
|
||||
return rv;
|
||||
}
|
||||
public static byte[] Repeat_bry(byte[] bry, int len) {
|
||||
int bry_len = bry.length;
|
||||
int rv_len = len * bry_len;
|
||||
byte[] rv = new byte[rv_len];
|
||||
for (int i = 0; i < len; i++) {
|
||||
for (int j = 0; j < bry_len; j++) {
|
||||
rv[(i * bry_len) + j] = bry[j];
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static byte[] Add(byte[] src, byte b) {
|
||||
int src_len = src.length;
|
||||
byte[] rv = new byte[src_len + 1];
|
||||
|
@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx;
|
||||
import org.junit.*; import gplx.core.primitives.*; import gplx.core.brys.*;
|
||||
import org.junit.*; import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.tests.*;
|
||||
public class Bry__tst {
|
||||
private final Bry__fxt fxt = new Bry__fxt();
|
||||
@Test public void new_ascii_() {
|
||||
@ -276,6 +276,9 @@ public class Bry__tst {
|
||||
fxt.Test__new_u8_nl_apos(String_.Ary("a", "b"), "a\nb");
|
||||
fxt.Test__new_u8_nl_apos(String_.Ary("a", "b'c", "d"), "a\nb\"c\nd");
|
||||
}
|
||||
@Test public void Repeat_bry() {
|
||||
fxt.Test__repeat_bry("abc" , 3, "abcabcabc");
|
||||
}
|
||||
}
|
||||
class Bry__fxt {
|
||||
public void Test_trim_end(String raw, byte trim, String expd) {
|
||||
@ -292,4 +295,7 @@ class Bry__fxt {
|
||||
public void Test__new_u8_nl_apos(String[] ary, String expd) {
|
||||
Tfds.Eq_str_lines(expd, String_.new_u8(Bry_.New_u8_nl_apos(ary)));
|
||||
}
|
||||
public void Test__repeat_bry(String s, int count, String expd) {
|
||||
Gftest.Eq__str(expd, Bry_.Repeat_bry(Bry_.new_u8(s), count));
|
||||
}
|
||||
}
|
||||
|
@ -158,6 +158,19 @@ public class Bry_find_ {
|
||||
}
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
public static int Find_bwd__while_space_or_tab(byte[] src, int cur, int end) { // get pos of 1st char that is not \t or \s
|
||||
if (cur >= src.length) return Bry_find_.Not_found;
|
||||
for (int i = cur; i >= end; i--) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab:
|
||||
break;
|
||||
default:
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
public static int Find_bwd_non_ws_or_end(byte[] src, int cur, int end) {
|
||||
if (cur >= src.length) return Bry_find_.Not_found;
|
||||
for (int i = cur; i >= end; i--) {
|
||||
|
@ -32,8 +32,8 @@ public class Xoa_app_ {
|
||||
}
|
||||
}
|
||||
public static final String Name = "xowa";
|
||||
public static final int Version_id = 513;
|
||||
public static final String Version = "4.0.1.1701"; // RELEASE:2017-01-03 20:30
|
||||
public static final int Version_id = 512;
|
||||
public static final String Version = "4.1.0.1701"; // RELEASE:2017-01-03 20:30
|
||||
public static String Build_date = "2012-12-30 00:00:00";
|
||||
public static String Build_date_fmt = "yyyy-MM-dd HH:mm:ss";
|
||||
public static String Op_sys_str;
|
||||
|
@ -0,0 +1,155 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
class Xomw_prepro_stack {
|
||||
public List_adp stack = List_adp_.New();
|
||||
public Xomw_prepro_piece top;
|
||||
private Bry_bfr accum = Bry_bfr_.New(), root_accum = Bry_bfr_.New();
|
||||
private final Xomw_prepro_flags flags = new Xomw_prepro_flags();
|
||||
|
||||
public int Count() {return stack.Len();}
|
||||
public Bry_bfr Get_accum() {return accum;}
|
||||
public Bry_bfr Get_root_accum() {return root_accum;}
|
||||
|
||||
public Xomw_prepro_part Get_current_part() {
|
||||
if (top == null) {
|
||||
return null;
|
||||
} else {
|
||||
return top.Get_current_part();
|
||||
}
|
||||
}
|
||||
|
||||
public void Push(Xomw_prepro_piece item) {
|
||||
stack.Add(item);
|
||||
this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);
|
||||
accum.Clear().Add(top.Get_accum());
|
||||
}
|
||||
|
||||
public Xomw_prepro_piece Pop() {
|
||||
int len = stack.Count();
|
||||
if (len == 0) {
|
||||
throw Err_.new_wo_type("Xomw_prepro_stack: no elements remaining");
|
||||
}
|
||||
|
||||
Xomw_prepro_piece rv = (Xomw_prepro_piece)stack.Get_at(len - 1);
|
||||
stack.Del_at(len - 1);
|
||||
|
||||
if (len > 0) {
|
||||
this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);
|
||||
accum.Clear().Add(top.Get_accum());
|
||||
} else {
|
||||
this.top = null;
|
||||
this.accum = root_accum;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
public void Add_part(byte[] bry) {
|
||||
top.Add_part(bry);
|
||||
accum.Clear().Add(top.Get_accum());
|
||||
}
|
||||
|
||||
public Xomw_prepro_flags Get_flags() {
|
||||
if (stack.Count() == 0) {
|
||||
flags.Find_eq = false;
|
||||
flags.Find_pipe = false;
|
||||
flags.In_heading = false;
|
||||
return flags;
|
||||
}
|
||||
else {
|
||||
top.Set_flags(flags);
|
||||
return flags;
|
||||
}
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_flags {
|
||||
public boolean Find_pipe;
|
||||
public boolean Find_eq;
|
||||
public boolean In_heading;
|
||||
}
|
||||
class Xomw_prepro_piece {
|
||||
public final byte[] open; // Opening character (\n for heading)
|
||||
public final byte[] close; // Matching closing char;
|
||||
public int count; // Number of opening characters found (number of "=" for heading)
|
||||
public final boolean line_start; // True if the open char appeared at the start of the input line; Not set for headings.
|
||||
public final int start_pos;
|
||||
public List_adp parts = List_adp_.New();
|
||||
public Xomw_prepro_piece(byte[] open, byte[] close, int count, int start_pos, boolean line_start) {
|
||||
this.open = open;
|
||||
this.close = close;
|
||||
this.count = count;
|
||||
this.start_pos = start_pos;
|
||||
this.line_start = line_start;
|
||||
}
|
||||
public Xomw_prepro_part Get_current_part() {
|
||||
return (Xomw_prepro_part)parts.Get_at(parts.Len() - 1);
|
||||
}
|
||||
public byte[] Get_accum() {
|
||||
return Get_current_part().bry;
|
||||
}
|
||||
public void Add_part(byte[] bry) {
|
||||
parts.Add(new Xomw_prepro_part(bry));
|
||||
}
|
||||
public static final byte[] Brack_bgn_bry = Bry_.new_a7("[");
|
||||
public void Set_flags(Xomw_prepro_flags flags) {
|
||||
int parts_len = parts.Len();
|
||||
boolean open_is_nl = Bry_.Eq(open, Byte_ascii.Nl_bry);
|
||||
boolean find_pipe = !open_is_nl && Bry_.Eq(open, Brack_bgn_bry);
|
||||
flags.Find_pipe = find_pipe;
|
||||
flags.Find_eq = find_pipe && parts_len > 1 && ((Xomw_prepro_part)parts.Get_at(parts_len - 1)).Eqpos != -1;
|
||||
flags.In_heading = open_is_nl;
|
||||
}
|
||||
// Get the output String that would result if the close is not found.
|
||||
public byte[] Break_syntax(Bry_bfr tmp_bfr, int opening_count) {
|
||||
byte[] rv = Bry_.Empty;
|
||||
if (Bry_.Eq(open, Byte_ascii.Nl_bry)) {
|
||||
rv = ((Xomw_prepro_part)parts.Get_at(0)).bry;
|
||||
}
|
||||
else {
|
||||
if (opening_count == -1) {
|
||||
opening_count = count;
|
||||
}
|
||||
rv = Bry_.Repeat_bry(open, opening_count);
|
||||
|
||||
// concat parts with "|"
|
||||
boolean first = true;
|
||||
int len = parts.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(i);
|
||||
if (first) {
|
||||
first = false;
|
||||
}
|
||||
else {
|
||||
tmp_bfr.Add_byte_pipe();
|
||||
}
|
||||
tmp_bfr.Add(part.bry);
|
||||
}
|
||||
rv = tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_part {
|
||||
public Xomw_prepro_part(byte[] bry) {
|
||||
this.bry = bry;
|
||||
}
|
||||
public final byte[] bry;
|
||||
public int Eqpos = -1;
|
||||
public int comment_end = -1;
|
||||
public int visual_end = -1;
|
||||
}
|
715
400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java
Normal file
715
400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java
Normal file
@ -0,0 +1,715 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xomw_prepro_wkr {
|
||||
private static final Xomw_prepro_rule
|
||||
rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}") , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ})
|
||||
, rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]") , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
|
||||
, rule_langv = new Xomw_prepro_rule(Bry_.new_a7("-{"), Bry_.new_a7("}-"), 1, 1, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
|
||||
;
|
||||
private static final byte[]
|
||||
Bry__only_include_bgn = Bry_.new_a7("<onlyinclude>")
|
||||
, Bry__only_include_end = Bry_.new_a7("</onlyinclude>")
|
||||
, Bry__comment_bgn = Bry_.new_a7("<!--")
|
||||
, Bry__comment_end = Bry_.new_a7("-->")
|
||||
, Bry__escaped_lt = Bry_.new_a7("<")
|
||||
, Bry__includeonly = Bry_.new_a7("includeonly")
|
||||
, Bry__noinclude = Bry_.new_a7("noinclude")
|
||||
, Bry__onlyinclude = Bry_.new_a7("onlyinclude")
|
||||
;
|
||||
private static final int Len__only_include_end = Bry__only_include_end.length;
|
||||
private static final int
|
||||
Found__line_bgn = 0
|
||||
, Found__line_end = 1
|
||||
, Found__pipe = 2
|
||||
, Found__equals = 3
|
||||
, Found__angle = 4
|
||||
, Found__close = 5
|
||||
, Found__open = 6
|
||||
;
|
||||
|
||||
private Bry_bfr accum = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New();
|
||||
private List_adp comments_list = List_adp_.New();
|
||||
private byte[] htmlspecialchars(byte[] bry) {
|
||||
return bry;
|
||||
}
|
||||
private Xomw_prepro_rule Get_rule(byte[] bry) {
|
||||
if (Bry_.Eq(bry, rule_curly.bgn)) return rule_curly;
|
||||
else if (Bry_.Eq(bry, rule_brack.bgn)) return rule_brack;
|
||||
else if (Bry_.Eq(bry, rule_langv.bgn)) return rule_langv;
|
||||
else throw Err_.new_unhandled(bry);
|
||||
}
|
||||
public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) {
|
||||
Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7(); // parser->getStripList();
|
||||
|
||||
Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs();
|
||||
xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__includeonly);
|
||||
xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__noinclude);
|
||||
xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__onlyinclude);
|
||||
|
||||
boolean enable_only_include = false;
|
||||
|
||||
Hash_adp_bry ignored_tags = Hash_adp_bry.cs();
|
||||
Hash_adp_bry ignored_elements = Hash_adp_bry.cs();
|
||||
if (for_inclusion) {
|
||||
ignored_tags.Add_many_str("includeonly", "/includeonly");
|
||||
ignored_elements.Add_many_str("noinclude");
|
||||
xmlish_elems.Add_many_str("noinclude");
|
||||
if ( Bry_.Has(src, Bry__only_include_bgn)
|
||||
&& Bry_.Has(src, Bry__only_include_end)) {
|
||||
enable_only_include = true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ignored_tags.Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude");
|
||||
ignored_elements.Add_many_str("includeonly");
|
||||
xmlish_elems.Add_many_str("includeonly");
|
||||
}
|
||||
|
||||
// $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
|
||||
// Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
|
||||
// $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
|
||||
|
||||
Xomw_prepro_stack stack = new Xomw_prepro_stack();
|
||||
// $searchBase = "[{<\n"; # }
|
||||
|
||||
// Input pointer, starts out pointing to a pseudo-newline before the start
|
||||
int i = 0;
|
||||
|
||||
// Current accumulator
|
||||
accum = stack.Get_accum();
|
||||
accum.Add_str_a7("<root>");
|
||||
|
||||
// True to find equals signs in arguments
|
||||
boolean find_equals = false;
|
||||
|
||||
// True to take notice of pipe characters
|
||||
boolean find_pipe = false;
|
||||
int heading_index = 1;
|
||||
|
||||
// True if $i is inside a possible heading
|
||||
boolean in_heading = false;
|
||||
|
||||
// True if there are no more greater-than (>) signs right of $i
|
||||
boolean no_more_gt = false;
|
||||
|
||||
// Map of tag name => true if there are no more closing tags of given type right of $i
|
||||
Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs();
|
||||
|
||||
// True to ignore all input up to the next <onlyinclude>
|
||||
boolean find_only_include = enable_only_include;
|
||||
|
||||
// Do a line-start run without outputting an LF character
|
||||
boolean fake_line_start = true;
|
||||
|
||||
int src_len = src.length;
|
||||
int found = -1;
|
||||
byte[] cur_closing = Bry_.Empty;
|
||||
|
||||
Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7();
|
||||
Btrie_slim_mgr elements_end_trie = Btrie_slim_mgr.ci_a7();
|
||||
Btrie_rv elements_trv = new Btrie_rv();
|
||||
|
||||
Btrie_slim_mgr cur_char_trie = Btrie_slim_mgr.ci_a7();
|
||||
byte[] inner = null;
|
||||
|
||||
while (true) {
|
||||
if (find_only_include) {
|
||||
// Ignore all input up to the next <onlyinclude>
|
||||
int start_pos = Bry_find_.Find_fwd(src, Bry__only_include_bgn, i, src_len);
|
||||
if (start_pos == Bry_find_.Not_found) {
|
||||
// Ignored section runs to the end
|
||||
accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, i))).Add_str_a7("</ignore>");
|
||||
break;
|
||||
}
|
||||
int tag_end_pos = start_pos + Bry__only_include_bgn.length; // past-the-end
|
||||
accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, i, tag_end_pos))).Add_str_a7("</ignore>");
|
||||
i = tag_end_pos;
|
||||
find_only_include = false;
|
||||
}
|
||||
|
||||
byte[] cur_char = Bry_.Empty;
|
||||
if (fake_line_start) {
|
||||
found = Found__line_bgn;
|
||||
cur_char = Bry_.Empty;
|
||||
}
|
||||
|
||||
// Find next opening brace, closing brace or pipe
|
||||
// $search = $searchBase;
|
||||
if (stack.top == null) {
|
||||
cur_closing = Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
cur_closing = stack.top.close;
|
||||
// $search .= $currentClosing;
|
||||
}
|
||||
if (find_pipe) {
|
||||
// $search .= '|';
|
||||
}
|
||||
if (find_equals) {
|
||||
// First equals will be for the template
|
||||
// $search .= '=';
|
||||
}
|
||||
Xomw_prepro_rule rule = null;
|
||||
|
||||
// Output literal section, advance input counter
|
||||
int literal_len = 0; // strcspn(src, $search, i);
|
||||
if (literal_len > 0) {
|
||||
accum.Add(htmlspecialchars(Bry_.Mid(src, i, i + literal_len)));
|
||||
i += literal_len;
|
||||
}
|
||||
|
||||
if (i >= src_len) {
|
||||
if (Bry_.Eq(cur_closing, Byte_ascii.Nl_bry)) {
|
||||
// Do a past-the-end run to finish off the heading
|
||||
cur_char = Byte_ascii.Nl_bry;
|
||||
found = Found__line_end;
|
||||
}
|
||||
else {
|
||||
// All done
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(elements_trv, src, i, src_len);
|
||||
cur_char = cur_char_itm.sequence;
|
||||
switch (cur_char_itm.type) {
|
||||
case Byte_ascii.Pipe: found = Found__pipe; break;
|
||||
case Byte_ascii.Eq: found = Found__equals; break;
|
||||
case Byte_ascii.Angle_bgn: found = Found__angle; break;
|
||||
case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break;
|
||||
case Byte_ascii.Curly_bgn: {
|
||||
found = Found__open;
|
||||
rule = rule_curly;
|
||||
break;
|
||||
}
|
||||
case Byte_ascii.Brack_bgn: {
|
||||
found = Found__open;
|
||||
rule = rule_brack;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
if (cur_char_itm.type == Byte_ascii.Dash) {
|
||||
int nxt_pos = i + 1;
|
||||
if (nxt_pos < src_len) {
|
||||
if (src[i + 1] == Byte_ascii.Curly_bgn) {
|
||||
found = Found__open;
|
||||
rule = rule_langv;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Bry_.Eq(cur_char, cur_closing)) {
|
||||
found = Found__close;
|
||||
}
|
||||
else {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found == Found__angle) {
|
||||
// Handle </onlyinclude>
|
||||
if ( enable_only_include
|
||||
&& Bry_.Eq(src, i, i + Len__only_include_end, Bry__only_include_end)) {
|
||||
find_only_include = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Determine element name; $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(span|div)(?:\s|\/>|>)|(!--)
|
||||
Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(elements_trv, src, i + 1, src_len);
|
||||
if (element == null) {// Element name missing or not listed
|
||||
accum.Add(Bry__escaped_lt);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle comments
|
||||
if (element.type == Xomw_prepro_elem.Type__comment) {
|
||||
// To avoid leaving blank lines,
|
||||
// when a sequence of space-separated comments is both preceded and followed by a newline (ignoring spaces),
|
||||
// then trim leading and trailing spaces and the trailing newline.
|
||||
|
||||
// Find the end
|
||||
int comment_end_pos = Bry_find_.Find_fwd(src, Bry__comment_end, i + 4, src_len);
|
||||
if (comment_end_pos == Bry_find_.Not_found) {
|
||||
// Unclosed comment in input, runs to end
|
||||
accum.Add_str_a7("<comment>").Add(htmlspecialchars(Bry_.Mid(src, i))).Add_str_a7("</comment>");
|
||||
i = src_len;
|
||||
}
|
||||
else {
|
||||
// Search backwards for leading whitespace
|
||||
int ws_bgn = i > 0 ? i - Bry_find_.Find_bwd__while_space_or_tab(src, i, 0) : 0;
|
||||
|
||||
// Search forwards for trailing whitespace
|
||||
// $wsEnd will be the position of the last space (or the '>' if there's none)
|
||||
int ws_end = comment_end_pos + 2 + Bry_find_.Find_fwd_while_space_or_tab(src, comment_end_pos + 3, src_len);
|
||||
|
||||
// Keep looking forward as long as we're finding more comments.
|
||||
comments_list.Clear();
|
||||
comments_list.Add(new int[] {ws_bgn, ws_end});
|
||||
while (Bry_.Eq(src, ws_end + 1, ws_end + 5, Bry__comment_bgn)) {
|
||||
int cur_char_pos = Bry_find_.Find_fwd(src, Bry__comment_end, ws_end + 4);
|
||||
if (cur_char_pos == Bry_find_.Not_found) {
|
||||
break;
|
||||
}
|
||||
cur_char_pos = cur_char_pos + 2 + Bry_find_.Find_fwd_while_space_or_tab(src, cur_char_pos + 3, src_len);
|
||||
comments_list.Add(new int[] {ws_end + 1, cur_char_pos});
|
||||
ws_end = cur_char_pos;
|
||||
}
|
||||
|
||||
// Eat the line if possible
|
||||
// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at the overall start.
|
||||
// That's not how Sanitizer::removeHTMLcomments() did it, but it's a possible beneficial b/c break.
|
||||
int comment_bgn_pos = -1;
|
||||
if ( ws_bgn > 0
|
||||
&& Bry_.Eq(src, ws_bgn - 1, ws_bgn , Byte_ascii.Nl_bry)
|
||||
&& Bry_.Eq(src, ws_end + 1, ws_end + 2, Byte_ascii.Nl_bry)
|
||||
) {
|
||||
// Remove leading whitespace from the end of the accumulator
|
||||
// Sanity check first though
|
||||
int ws_len = i - ws_bgn;
|
||||
if ( ws_len > 0
|
||||
&& Bry_find_.Find_fwd_while_space_or_tab(accum.To_bry(), -ws_len, src_len) == ws_len
|
||||
) {
|
||||
accum.Clear().Add(Bry_.Mid(accum.To_bry(), 0, -ws_len));
|
||||
}
|
||||
|
||||
// Dump all but the last comment to the accumulator
|
||||
int comments_list_len = comments_list.Len();
|
||||
for (int j = 0; j < comments_list_len; j++) {
|
||||
int[] com = (int[])comments_list.Get_at(j);
|
||||
comment_bgn_pos = com[0];
|
||||
comment_end_pos = com[1] + 1;
|
||||
if (j == comments_list_len - 1) {
|
||||
break;
|
||||
}
|
||||
inner = Bry_.Mid(src, comment_bgn_pos, comment_end_pos);
|
||||
accum.Add_str_a7("<comment>").Add(htmlspecialchars(inner)).Add_str_a7("</comment>");
|
||||
}
|
||||
|
||||
// Do a line-start run next time to look for headings after the comment
|
||||
fake_line_start = true;
|
||||
}
|
||||
else {
|
||||
// No line to eat, just take the comment itself
|
||||
comment_bgn_pos = i;
|
||||
comment_end_pos += 2;
|
||||
}
|
||||
|
||||
if (stack.top != null) {
|
||||
Xomw_prepro_part part = stack.top.Get_current_part();
|
||||
if (!(part.comment_end == ws_end - 1)) {
|
||||
part.visual_end = ws_bgn;
|
||||
}
|
||||
// Else comments abutting, no change in visual end
|
||||
part.comment_end = comment_end_pos;
|
||||
}
|
||||
i = comment_end_pos + 1;
|
||||
inner = Bry_.Mid(src, comment_bgn_pos, comment_end_pos + 1);
|
||||
accum.Add_str_a7("<comment>").Add(htmlspecialchars(inner)).Add_str_a7("</comment>");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
byte[] name = element.name;
|
||||
int atr_bgn = i + name.length + 1;
|
||||
|
||||
// Find end of tag
|
||||
int tag_end_pos = no_more_gt ? Bry_find_.Not_found : Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, atr_bgn);
|
||||
if (tag_end_pos == Bry_find_.Not_found) {
|
||||
// Infinite backtrack; Disable tag search to prevent worst-case O(N^2) performance
|
||||
no_more_gt = true;
|
||||
accum.Add(Bry__escaped_lt);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ignored_tags.Has(name)) {
|
||||
accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, i, tag_end_pos - i + 1))).Add_str_a7("</ignore>");
|
||||
i = tag_end_pos + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
int tag_bgn_pos = i;
|
||||
int atr_end = -1;
|
||||
byte[] close = null;
|
||||
if (src[tag_end_pos - 1] == Byte_ascii.Slash) {
|
||||
atr_end = tag_end_pos - 1;
|
||||
inner = null;
|
||||
i = tag_end_pos + 1;
|
||||
close = null;
|
||||
}
|
||||
else {
|
||||
atr_end = tag_end_pos;
|
||||
// Find closing tag
|
||||
// FIXME: need to search forward
|
||||
Xomw_prepro_elem elem_end = (Xomw_prepro_elem)elements_end_trie.Match_at(elements_trv, src, tag_end_pos + 1, src_len); // preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
|
||||
int elem_end_lhs = elements_trv.Pos();
|
||||
int elem_end_rhs = elements_trv.Pos();
|
||||
// check for "\s*>"
|
||||
if (elem_end != null) {
|
||||
elem_end_rhs = Bry_find_.Find_fwd_while(src, elem_end_rhs, src_len, Byte_ascii.Space);
|
||||
if (elem_end_rhs == src_len) {
|
||||
elem_end = null;
|
||||
}
|
||||
else {
|
||||
if (src[elem_end_rhs] == Byte_ascii.Gt)
|
||||
elem_end_rhs = elem_end_rhs + 1;
|
||||
else
|
||||
elem_end = null;
|
||||
}
|
||||
}
|
||||
if ( !no_more_closing_tag.Has(name)
|
||||
&& elem_end != null) {
|
||||
inner = Bry_.Mid(src, tag_end_pos + 1, elem_end_lhs);
|
||||
i = elem_end_rhs;
|
||||
tmp_bfr.Add_str_a7("<close>").Add(htmlspecialchars(Bry_.Mid(src, elem_end_lhs, elem_end_rhs))).Add_str_a7("</close>");
|
||||
close = tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
// No end tag
|
||||
if (xmlish_allow_missing_end_tag.Has(name)) {
|
||||
// Let it run out to the end of the src.
|
||||
inner = Bry_.Mid(src, tag_end_pos + 1);
|
||||
i = src_len;
|
||||
close = Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
// Don't match the tag, treat opening tag as literal and resume parsing.
|
||||
i = tag_end_pos + 1;
|
||||
accum.Add(htmlspecialchars(Bry_.Mid(src, tag_bgn_pos, tag_end_pos + 1)));
|
||||
// Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
|
||||
no_more_closing_tag.Add_if_dupe_use_nth(name, name);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// <includeonly> and <noinclude> just become <ignore> tags
|
||||
if (ignored_elements.Has(name)) {
|
||||
accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, tag_bgn_pos, i))).Add_str_a7("</ignore>");
|
||||
continue;
|
||||
}
|
||||
|
||||
accum.Add_str_a7("<ext>");
|
||||
byte[] atr_bry = atr_end <= atr_bgn ? Bry_.Empty : Bry_.Mid(src, atr_bgn, atr_end);
|
||||
accum.Add_str_a7("<name>").Add(name).Add_str_a7("</name>");
|
||||
// Note that the attr element contains the whitespace between name and attribute,
|
||||
// this is necessary for precise reconstruction during pre-save transform.
|
||||
accum.Add_str_a7("<attr>").Add(htmlspecialchars(atr_bry)).Add_str_a7("</attr>");
|
||||
if (inner != null) {
|
||||
accum.Add_str_a7("<inner>").Add(htmlspecialchars(inner)).Add_str_a7("</inner>");
|
||||
}
|
||||
accum.Add(close).Add_str_a7("</ext>");
|
||||
}
|
||||
else if (found == Found__line_bgn) {
|
||||
// Is this the start of a heading?; Line break belongs before the heading element in any case
|
||||
if (fake_line_start) {
|
||||
fake_line_start = false;
|
||||
} else {
|
||||
accum.Add(cur_char);
|
||||
i++;
|
||||
}
|
||||
|
||||
int eq_end = Bry_find_.Find_fwd_while(src, i, i + 6, Byte_ascii.Eq); // strspn( $src, '=', $i, 6 );
|
||||
int count = i - eq_end;
|
||||
if (count == 1 && find_equals) {
|
||||
// DWIM: This looks kind of like a name/value separator.
|
||||
// Let's let the equals handler have it and break the potential heading.
|
||||
// This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex.
|
||||
}
|
||||
else if (count > 0) {
|
||||
Xomw_prepro_piece piece = new Xomw_prepro_piece(Byte_ascii.Nl_bry, Byte_ascii.Nl_bry, count, i, false);
|
||||
piece.Add_part(Bry_.Repeat(Byte_ascii.Eq, count));
|
||||
stack.Push(piece);
|
||||
accum = stack.Get_accum();
|
||||
Xomw_prepro_flags flags = stack.Get_flags();
|
||||
find_pipe = flags.Find_pipe;
|
||||
find_equals = flags.Find_eq;
|
||||
in_heading = flags.In_heading;
|
||||
i += count;
|
||||
}
|
||||
}
|
||||
else if (found == Found__line_end) {
|
||||
Xomw_prepro_piece piece = stack.top;
|
||||
// A heading must be open, otherwise \n wouldn't have been in the search list
|
||||
if (!Bry_.Eq(piece.open, Byte_ascii.Nl_bry)) throw Err_.new_wo_type("assertion:piece must start with \\n");
|
||||
Xomw_prepro_part part = piece.Get_current_part();
|
||||
|
||||
// Search back through the input to see if it has a proper close.
|
||||
// Do this using the reversed String since the other solutions (end anchor, etc.) are inefficient.
|
||||
int search_bgn = Bry_find_.Find_bwd__while_space_or_tab(src, i, 0);
|
||||
if (part.comment_end != -1 && search_bgn -1 == part.comment_end) {
|
||||
// Comment found at line end; Search for equals signs before the comment
|
||||
search_bgn = part.visual_end;
|
||||
search_bgn -= Bry_find_.Find_bwd__while_space_or_tab(src, search_bgn, 0);
|
||||
}
|
||||
int count = piece.count;
|
||||
int eq_len = Bry_find_.Find_bwd_while(src, search_bgn, 0, Byte_ascii.Eq);
|
||||
|
||||
byte[] element = Bry_.Empty;
|
||||
if (eq_len > 0) {
|
||||
if (search_bgn - eq_len == piece.start_pos) {
|
||||
// This is just a single String of equals signs on its own line
|
||||
// Replicate the doHeadings behavior /={count}(.+)={count}/
|
||||
// First find out how many equals signs there really are (don't stop at 6)
|
||||
count = eq_len;
|
||||
if (count < 3) {
|
||||
count = 0;
|
||||
} else {
|
||||
count = (count - 1) / 2;
|
||||
if (count > 6) count = 6;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (eq_len < count)
|
||||
count = eq_len;
|
||||
}
|
||||
if (count > 0) {
|
||||
// Normal match, output <h>
|
||||
element = tmp_bfr.Add_str_a7("<h level=\"").Add_int_variable(count).Add_str_a7("\" i=\"").Add_int_variable(heading_index).Add_str_a7("\">").Add_bfr_and_preserve(accum).Add_str_a7("</h>").To_bry_and_clear();
|
||||
heading_index++;
|
||||
} else {
|
||||
// Single equals sign on its own line, count=0
|
||||
element = accum.To_bry();
|
||||
}
|
||||
}
|
||||
else {
|
||||
// No match, no <h>, just pass down the inner src
|
||||
element = accum.To_bry();
|
||||
}
|
||||
|
||||
// Unwind the stack
|
||||
stack.Pop();
|
||||
accum = stack.Get_accum();
|
||||
|
||||
Xomw_prepro_flags flags = stack.Get_flags();
|
||||
find_pipe = flags.Find_pipe;
|
||||
find_equals = flags.Find_eq;
|
||||
in_heading = flags.In_heading;
|
||||
|
||||
// Append the result to the enclosing accumulator
|
||||
accum.Add(element);
|
||||
// Note that we do NOT increment the input pointer. This is because the closing linebreak could be the opening linebreak of another heading.
|
||||
// Infinite loops are avoided because the next iteration MUST hit the heading open case above, which unconditionally increments the input pointer.
|
||||
}
|
||||
else if (found == Found__open) {
|
||||
// count opening brace characters
|
||||
int count = Bry_find_.Find_fwd(src, cur_char, i, src_len);
|
||||
|
||||
// we need to add to stack only if opening brace count is enough for one of the rules
|
||||
if (count >= rule.min) {
|
||||
// Add it to the stack
|
||||
Xomw_prepro_piece piece = new Xomw_prepro_piece(cur_char, rule.end, count, -1, i > 0 && src[i - 1] == Byte_ascii.Nl);
|
||||
stack.Push(piece);
|
||||
accum = stack.Get_accum();
|
||||
Xomw_prepro_flags flags = stack.Get_flags();
|
||||
find_pipe = flags.Find_pipe;
|
||||
find_equals = flags.Find_eq;
|
||||
in_heading = flags.In_heading;
|
||||
}
|
||||
else {
|
||||
// Add literal brace(s)
|
||||
accum.Add(htmlspecialchars(Bry_.Repeat_bry(cur_char, count)));
|
||||
}
|
||||
i += count;
|
||||
}
|
||||
else if (found == Found__close) {
|
||||
Xomw_prepro_piece piece = stack.top;
|
||||
// lets check if there are enough characters for closing brace
|
||||
int count = Bry_find_.Find_fwd(src, cur_char, i, src_len);
|
||||
int max_count = piece.count;
|
||||
if (count > max_count) count = max_count;
|
||||
|
||||
// check for maximum matching characters (if there are 5 closing characters, we will probably need only 3 - depending on the rules)
|
||||
rule = Get_rule(piece.open);
|
||||
int matching_count = -1;
|
||||
if (count > rule.max) {
|
||||
// The specified maximum exists in the callback array, unless the caller has made an error
|
||||
matching_count = rule.max;
|
||||
}
|
||||
else {
|
||||
// Count is less than the maximum
|
||||
// Skip any gaps in the callback array to find the true largest match
|
||||
// Need to use array_key_exists not isset because the callback can be null
|
||||
matching_count = count;
|
||||
while (matching_count > 0 && !rule.Names_exist(matching_count)) {
|
||||
matching_count--;
|
||||
}
|
||||
}
|
||||
|
||||
if (matching_count <= 0) {
|
||||
// No matching element found in callback array
|
||||
// Output a literal closing brace and continue
|
||||
accum.Add(htmlspecialchars(Bry_.Repeat_bry(cur_char, count)));
|
||||
i += count;
|
||||
continue;
|
||||
}
|
||||
int name_type = rule.names[matching_count];
|
||||
byte[] element = null;
|
||||
if (name_type == Xomw_prepro_rule.Name__null) {
|
||||
// No element, just literal text
|
||||
piece.Break_syntax(tmp_bfr, matching_count);
|
||||
element = tmp_bfr.Add(Bry_.Repeat_bry(rule.end, matching_count)).To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
// Create XML element; Note: $parts is already XML, does not need to be encoded further
|
||||
List_adp parts = piece.parts;
|
||||
byte[] title = ((Xomw_prepro_part)parts.Get_at(0)).bry;
|
||||
parts.Del_at(0);
|
||||
|
||||
// The invocation is at the start of the line if lineStart is set in the stack, and all opening brackets are used up.
|
||||
byte[] attr = null;
|
||||
if (max_count == matching_count && !piece.line_start) {
|
||||
attr = Bry_.new_a7(" lineStart=\"1\"");
|
||||
}
|
||||
else {
|
||||
attr = Bry_.Empty;
|
||||
}
|
||||
|
||||
byte[] name_bry = Xomw_prepro_rule.Name(name_type);
|
||||
tmp_bfr.Add_str_a7("<").Add(name_bry).Add(attr).Add_str_a7(">");
|
||||
tmp_bfr.Add_str_a7("<title>").Add(title).Add_str_a7("</title>");
|
||||
|
||||
int arg_idx = 1;
|
||||
int parts_len = parts.Len();
|
||||
for (int j = 0; j < parts_len; j++) {
|
||||
Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(j);
|
||||
if (part.Eqpos != -1) {
|
||||
byte[] arg_key = Bry_.Mid(part.bry, 0, part.Eqpos);
|
||||
byte[] arg_val = Bry_.Mid(part.bry, part.Eqpos + 1);
|
||||
tmp_bfr.Add_str_a7("<part><name>").Add(arg_key).Add_str_a7("</name>=<value>").Add(arg_val).Add_str_a7("</value></part>");
|
||||
}
|
||||
else {
|
||||
tmp_bfr.Add_str_a7("<part><name index=\"").Add_int_variable(arg_idx).Add_str_a7("\" /><value>{").Add(part.bry).Add_str_a7("}</value></part>");
|
||||
arg_idx++;
|
||||
}
|
||||
}
|
||||
tmp_bfr.Add_str_a7("</").Add(name_bry).Add_str_a7(">");
|
||||
element = tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
|
||||
// Advance input pointer
|
||||
i += matching_count;
|
||||
|
||||
// Unwind the stack
|
||||
stack.Pop();
|
||||
accum = stack.Get_accum();
|
||||
|
||||
// Re-add the old stack element if it still has unmatched opening characters remaining
|
||||
if (matching_count < piece.count) {
|
||||
piece.parts.Clear(); // piece.parts = [ new PPDPart ];
|
||||
piece.count -= matching_count;
|
||||
|
||||
// do we still qualify for any callback with remaining count?
|
||||
int min = Get_rule(piece.open).min;
|
||||
if (piece.count >= min) {
|
||||
stack.Push(piece);
|
||||
accum = stack.Get_accum();
|
||||
}
|
||||
else {
|
||||
accum.Add(Bry_.Repeat_bry(piece.open, piece.count));
|
||||
}
|
||||
}
|
||||
|
||||
Xomw_prepro_flags flags = stack.Get_flags();
|
||||
find_pipe = flags.Find_pipe;
|
||||
find_equals = flags.Find_eq;
|
||||
in_heading = flags.In_heading;
|
||||
|
||||
// Add XML element to the enclosing accumulator
|
||||
accum.Add(element);
|
||||
}
|
||||
else if (found == Found__pipe) {
|
||||
find_equals = true; // shortcut for getFlags()
|
||||
stack.Add_part(Bry_.Empty);
|
||||
accum = stack.Get_accum();
|
||||
i++;
|
||||
}
|
||||
else if (found == Found__equals) {
|
||||
find_equals = false; // shortcut for getFlags()
|
||||
stack.Get_current_part().Eqpos = accum.Len();
|
||||
accum.Add_byte(Byte_ascii.Eq);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
// Output any remaining unclosed brackets
|
||||
Bry_bfr root_accum = stack.Get_root_accum();
|
||||
int stack_len = stack.stack.Len();
|
||||
for (int j = 0; j < stack_len; j++) {
|
||||
Xomw_prepro_piece piece = (Xomw_prepro_piece)stack.stack.Get_at(j);
|
||||
root_accum.Add(piece.Break_syntax(tmp_bfr, -1));
|
||||
}
|
||||
root_accum.Add_str_a7("</root>");
|
||||
return root_accum.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_rule {
|
||||
public Xomw_prepro_rule(byte[] bgn, byte[] end, int min, int max, int[] names) {
|
||||
this.bgn = bgn;
|
||||
this.end = end;
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
this.names = names;
|
||||
}
|
||||
public final byte[] bgn;
|
||||
public final byte[] end;
|
||||
public final int min;
|
||||
public final int max;
|
||||
public final int[] names;
|
||||
public boolean Names_exist(int idx) {
|
||||
return idx < names.length && names[idx] != Name__invalid;
|
||||
}
|
||||
private static final byte[] Name__tmpl_bry = Bry_.new_a7("template"), Name__targ_bry = Bry_.new_a7("tplarg");
|
||||
public static final int Name__invalid = -1, Name__null = 0, Name__tmpl = 1, Name__targ = 2;
|
||||
public static byte[] Name(int type) {
|
||||
switch (type) {
|
||||
case Name__tmpl: return Name__tmpl_bry;
|
||||
case Name__targ: return Name__targ_bry;
|
||||
default:
|
||||
case Name__invalid: return null;
|
||||
case Name__null: return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_elem {
|
||||
private static final byte[] Bry__tag_end = Bry_.new_a7("</");
|
||||
public Xomw_prepro_elem(int type, byte[] name) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.tag_end_lhs = Bry_.Add(Bry__tag_end, name);
|
||||
}
|
||||
public final int type;
|
||||
public final byte[] name;
|
||||
public final byte[] tag_end_lhs;
|
||||
public static final int Type__comment = 0;
|
||||
}
|
||||
class Xomw_prepro_curchar_itm {
|
||||
public Xomw_prepro_curchar_itm(int type, byte[] sequence) {
|
||||
this.type = type;
|
||||
this.sequence = sequence;
|
||||
}
|
||||
public int type;
|
||||
public byte[] sequence;
|
||||
}
|
@ -38,6 +38,6 @@ public class Arg_nde_tkn extends Xop_tkn_itm_base {
|
||||
val_tkn.Tmpl_evaluate(ctx, src, caller, bfr);
|
||||
return true;
|
||||
}
|
||||
public static final Arg_nde_tkn[] Ary_empty = new Arg_nde_tkn[0];
|
||||
public static final Arg_nde_tkn Null = new Arg_nde_tkn(-1, -1);
|
||||
public static final Arg_nde_tkn[] Ary_empty = new Arg_nde_tkn[0];
|
||||
public static final Arg_nde_tkn Null = new Arg_nde_tkn(-1, -1);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user