diff --git a/100_core/src/gplx/Bry_.java b/100_core/src/gplx/Bry_.java
index 864d8652f..67444bbc7 100644
--- a/100_core/src/gplx/Bry_.java
+++ b/100_core/src/gplx/Bry_.java
@@ -164,6 +164,17 @@ public class Bry_ {
rv[i] = b;
return rv;
}
+ public static byte[] Repeat_bry(byte[] bry, int len) {
+ int bry_len = bry.length;
+ int rv_len = len * bry_len;
+ byte[] rv = new byte[rv_len];
+ for (int i = 0; i < len; i++) {
+ for (int j = 0; j < bry_len; j++) {
+ rv[(i * bry_len) + j] = bry[j];
+ }
+ }
+ return rv;
+ }
public static byte[] Add(byte[] src, byte b) {
int src_len = src.length;
byte[] rv = new byte[src_len + 1];
diff --git a/100_core/src/gplx/Bry__tst.java b/100_core/src/gplx/Bry__tst.java
index 79d381a15..c02d5f75f 100644
--- a/100_core/src/gplx/Bry__tst.java
+++ b/100_core/src/gplx/Bry__tst.java
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
package gplx;
-import org.junit.*; import gplx.core.primitives.*; import gplx.core.brys.*;
+import org.junit.*; import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.tests.*;
public class Bry__tst {
private final Bry__fxt fxt = new Bry__fxt();
@Test public void new_ascii_() {
@@ -276,6 +276,9 @@ public class Bry__tst {
fxt.Test__new_u8_nl_apos(String_.Ary("a", "b"), "a\nb");
fxt.Test__new_u8_nl_apos(String_.Ary("a", "b'c", "d"), "a\nb\"c\nd");
}
+ @Test public void Repeat_bry() {
+ fxt.Test__repeat_bry("abc" , 3, "abcabcabc");
+ }
}
class Bry__fxt {
public void Test_trim_end(String raw, byte trim, String expd) {
@@ -292,4 +295,7 @@ class Bry__fxt {
public void Test__new_u8_nl_apos(String[] ary, String expd) {
Tfds.Eq_str_lines(expd, String_.new_u8(Bry_.New_u8_nl_apos(ary)));
}
+ public void Test__repeat_bry(String s, int count, String expd) {
+ Gftest.Eq__str(expd, Bry_.Repeat_bry(Bry_.new_u8(s), count));
+ }
}
diff --git a/100_core/src/gplx/Bry_find_.java b/100_core/src/gplx/Bry_find_.java
index e0b4ed4ec..1be0b0a88 100644
--- a/100_core/src/gplx/Bry_find_.java
+++ b/100_core/src/gplx/Bry_find_.java
@@ -158,6 +158,19 @@ public class Bry_find_ {
}
return Bry_find_.Not_found;
}
+ public static int Find_bwd__while_space_or_tab(byte[] src, int cur, int end) { // get pos of 1st char that is not \t or \s
+ if (cur >= src.length) return Bry_find_.Not_found;
+ for (int i = cur; i >= end; i--) {
+ byte b = src[i];
+ switch (b) {
+ case Byte_ascii.Space: case Byte_ascii.Tab:
+ break;
+ default:
+ return i;
+ }
+ }
+ return Bry_find_.Not_found;
+ }
public static int Find_bwd_non_ws_or_end(byte[] src, int cur, int end) {
if (cur >= src.length) return Bry_find_.Not_found;
for (int i = cur; i >= end; i--) {
diff --git a/400_xowa/src/gplx/xowa/Xoa_app_.java b/400_xowa/src/gplx/xowa/Xoa_app_.java
index 682f53f54..76407230d 100644
--- a/400_xowa/src/gplx/xowa/Xoa_app_.java
+++ b/400_xowa/src/gplx/xowa/Xoa_app_.java
@@ -32,8 +32,8 @@ public class Xoa_app_ {
}
}
public static final String Name = "xowa";
- public static final int Version_id = 513;
- public static final String Version = "4.0.1.1701"; // RELEASE:2017-01-03 20:30
+ public static final int Version_id = 512;
+ public static final String Version = "4.1.0.1701"; // RELEASE:2017-01-03 20:30
public static String Build_date = "2012-12-30 00:00:00";
public static String Build_date_fmt = "yyyy-MM-dd HH:mm:ss";
public static String Op_sys_str;
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java
new file mode 100644
index 000000000..c442bd3d6
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java
@@ -0,0 +1,155 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+class Xomw_prepro_stack {
+ public List_adp stack = List_adp_.New();
+ public Xomw_prepro_piece top;
+ private Bry_bfr accum = Bry_bfr_.New(), root_accum = Bry_bfr_.New();
+ private final Xomw_prepro_flags flags = new Xomw_prepro_flags();
+
+ public int Count() {return stack.Len();}
+ public Bry_bfr Get_accum() {return accum;}
+ public Bry_bfr Get_root_accum() {return root_accum;}
+
+ public Xomw_prepro_part Get_current_part() {
+ if (top == null) {
+ return null;
+ } else {
+ return top.Get_current_part();
+ }
+ }
+
+ public void Push(Xomw_prepro_piece item) {
+ stack.Add(item);
+ this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);
+ accum.Clear().Add(top.Get_accum());
+ }
+
+ public Xomw_prepro_piece Pop() {
+ int len = stack.Count();
+ if (len == 0) {
+ throw Err_.new_wo_type("Xomw_prepro_stack: no elements remaining");
+ }
+
+ Xomw_prepro_piece rv = (Xomw_prepro_piece)stack.Get_at(len - 1);
+ stack.Del_at(len - 1);
+
+ if (len > 0) {
+ this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);
+ accum.Clear().Add(top.Get_accum());
+ } else {
+ this.top = null;
+ this.accum = root_accum;
+ }
+ return rv;
+ }
+
+ public void Add_part(byte[] bry) {
+ top.Add_part(bry);
+ accum.Clear().Add(top.Get_accum());
+ }
+
+ public Xomw_prepro_flags Get_flags() {
+ if (stack.Count() == 0) {
+ flags.Find_eq = false;
+ flags.Find_pipe = false;
+ flags.In_heading = false;
+ return flags;
+ }
+ else {
+ top.Set_flags(flags);
+ return flags;
+ }
+ }
+}
+class Xomw_prepro_flags {
+ public boolean Find_pipe;
+ public boolean Find_eq;
+ public boolean In_heading;
+}
+class Xomw_prepro_piece {
+ public final byte[] open; // Opening character (\n for heading)
+ public final byte[] close; // Matching closing char;
+ public int count; // Number of opening characters found (number of "=" for heading)
+ public final boolean line_start; // True if the open char appeared at the start of the input line; Not set for headings.
+ public final int start_pos;
+ public List_adp parts = List_adp_.New();
+ public Xomw_prepro_piece(byte[] open, byte[] close, int count, int start_pos, boolean line_start) {
+ this.open = open;
+ this.close = close;
+ this.count = count;
+ this.start_pos = start_pos;
+ this.line_start = line_start;
+ }
+ public Xomw_prepro_part Get_current_part() {
+ return (Xomw_prepro_part)parts.Get_at(parts.Len() - 1);
+ }
+ public byte[] Get_accum() {
+ return Get_current_part().bry;
+ }
+ public void Add_part(byte[] bry) {
+ parts.Add(new Xomw_prepro_part(bry));
+ }
+ public static final byte[] Brack_bgn_bry = Bry_.new_a7("[");
+ public void Set_flags(Xomw_prepro_flags flags) {
+ int parts_len = parts.Len();
+ boolean open_is_nl = Bry_.Eq(open, Byte_ascii.Nl_bry);
+ boolean find_pipe = !open_is_nl && Bry_.Eq(open, Brack_bgn_bry);
+ flags.Find_pipe = find_pipe;
+ flags.Find_eq = find_pipe && parts_len > 1 && ((Xomw_prepro_part)parts.Get_at(parts_len - 1)).Eqpos != -1;
+ flags.In_heading = open_is_nl;
+ }
+ // Get the output String that would result if the close is not found.
+ public byte[] Break_syntax(Bry_bfr tmp_bfr, int opening_count) {
+ byte[] rv = Bry_.Empty;
+ if (Bry_.Eq(open, Byte_ascii.Nl_bry)) {
+ rv = ((Xomw_prepro_part)parts.Get_at(0)).bry;
+ }
+ else {
+ if (opening_count == -1) {
+ opening_count = count;
+ }
+ rv = Bry_.Repeat_bry(open, opening_count);
+
+ // concat parts with "|"
+ boolean first = true;
+ int len = parts.Len();
+ for (int i = 0; i < len; i++) {
+ Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(i);
+ if (first) {
+ first = false;
+ }
+ else {
+ tmp_bfr.Add_byte_pipe();
+ }
+ tmp_bfr.Add(part.bry);
+ }
+ rv = tmp_bfr.To_bry_and_clear();
+ }
+ return rv;
+ }
+}
+class Xomw_prepro_part {
+ public Xomw_prepro_part(byte[] bry) {
+ this.bry = bry;
+ }
+ public final byte[] bry;
+ public int Eqpos = -1;
+ public int comment_end = -1;
+ public int visual_end = -1;
+}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java
new file mode 100644
index 000000000..e81eb6c82
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java
@@ -0,0 +1,715 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+import gplx.core.btries.*;
+public class Xomw_prepro_wkr {
+ private static final Xomw_prepro_rule
+ rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}") , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ})
+ , rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]") , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
+ , rule_langv = new Xomw_prepro_rule(Bry_.new_a7("-{"), Bry_.new_a7("}-"), 1, 1, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
+ ;
+ private static final byte[]
+ Bry__only_include_bgn = Bry_.new_a7("")
+ , Bry__only_include_end = Bry_.new_a7("")
+ , Bry__comment_bgn = Bry_.new_a7("")
+ , Bry__escaped_lt = Bry_.new_a7("<")
+ , Bry__includeonly = Bry_.new_a7("includeonly")
+ , Bry__noinclude = Bry_.new_a7("noinclude")
+ , Bry__onlyinclude = Bry_.new_a7("onlyinclude")
+ ;
+ private static final int Len__only_include_end = Bry__only_include_end.length;
+ private static final int
+ Found__line_bgn = 0
+ , Found__line_end = 1
+ , Found__pipe = 2
+ , Found__equals = 3
+ , Found__angle = 4
+ , Found__close = 5
+ , Found__open = 6
+ ;
+
+ private Bry_bfr accum = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New();
+ private List_adp comments_list = List_adp_.New();
+ private byte[] htmlspecialchars(byte[] bry) {
+ return bry;
+ }
+ private Xomw_prepro_rule Get_rule(byte[] bry) {
+ if (Bry_.Eq(bry, rule_curly.bgn)) return rule_curly;
+ else if (Bry_.Eq(bry, rule_brack.bgn)) return rule_brack;
+ else if (Bry_.Eq(bry, rule_langv.bgn)) return rule_langv;
+ else throw Err_.new_unhandled(bry);
+ }
+ public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) {
+ Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7(); // parser->getStripList();
+
+ Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs();
+ xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__includeonly);
+ xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__noinclude);
+ xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__onlyinclude);
+
+ boolean enable_only_include = false;
+
+ Hash_adp_bry ignored_tags = Hash_adp_bry.cs();
+ Hash_adp_bry ignored_elements = Hash_adp_bry.cs();
+ if (for_inclusion) {
+ ignored_tags.Add_many_str("includeonly", "/includeonly");
+ ignored_elements.Add_many_str("noinclude");
+ xmlish_elems.Add_many_str("noinclude");
+ if ( Bry_.Has(src, Bry__only_include_bgn)
+ && Bry_.Has(src, Bry__only_include_end)) {
+ enable_only_include = true;
+ }
+ }
+ else {
+ ignored_tags.Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude");
+ ignored_elements.Add_many_str("includeonly");
+ xmlish_elems.Add_many_str("includeonly");
+ }
+
+ // $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
+ // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
+ // $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
+
+ Xomw_prepro_stack stack = new Xomw_prepro_stack();
+// $searchBase = "[{<\n"; # }
+
+ // Input pointer, starts out pointing to a pseudo-newline before the start
+ int i = 0;
+
+ // Current accumulator
+ accum = stack.Get_accum();
+ accum.Add_str_a7("");
+
+ // True to find equals signs in arguments
+ boolean find_equals = false;
+
+ // True to take notice of pipe characters
+ boolean find_pipe = false;
+ int heading_index = 1;
+
+ // True if $i is inside a possible heading
+ boolean in_heading = false;
+
+ // True if there are no more greater-than (>) signs right of $i
+ boolean no_more_gt = false;
+
+ // Map of tag name => true if there are no more closing tags of given type right of $i
+ Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs();
+
+ // True to ignore all input up to the next
+ boolean find_only_include = enable_only_include;
+
+ // Do a line-start run without outputting an LF character
+ boolean fake_line_start = true;
+
+ int src_len = src.length;
+ int found = -1;
+ byte[] cur_closing = Bry_.Empty;
+
+ Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7();
+ Btrie_slim_mgr elements_end_trie = Btrie_slim_mgr.ci_a7();
+ Btrie_rv elements_trv = new Btrie_rv();
+
+ Btrie_slim_mgr cur_char_trie = Btrie_slim_mgr.ci_a7();
+ byte[] inner = null;
+
+ while (true) {
+ if (find_only_include) {
+ // Ignore all input up to the next
+ int start_pos = Bry_find_.Find_fwd(src, Bry__only_include_bgn, i, src_len);
+ if (start_pos == Bry_find_.Not_found) {
+ // Ignored section runs to the end
+ accum.Add_str_a7("").Add(htmlspecialchars(Bry_.Mid(src, i))).Add_str_a7("");
+ break;
+ }
+ int tag_end_pos = start_pos + Bry__only_include_bgn.length; // past-the-end
+ accum.Add_str_a7("").Add(htmlspecialchars(Bry_.Mid(src, i, tag_end_pos))).Add_str_a7("");
+ i = tag_end_pos;
+ find_only_include = false;
+ }
+
+ byte[] cur_char = Bry_.Empty;
+ if (fake_line_start) {
+ found = Found__line_bgn;
+ cur_char = Bry_.Empty;
+ }
+
+ // Find next opening brace, closing brace or pipe
+// $search = $searchBase;
+ if (stack.top == null) {
+ cur_closing = Bry_.Empty;
+ }
+ else {
+ cur_closing = stack.top.close;
+ // $search .= $currentClosing;
+ }
+ if (find_pipe) {
+ // $search .= '|';
+ }
+ if (find_equals) {
+ // First equals will be for the template
+ // $search .= '=';
+ }
+ Xomw_prepro_rule rule = null;
+
+ // Output literal section, advance input counter
+ int literal_len = 0; // strcspn(src, $search, i);
+ if (literal_len > 0) {
+ accum.Add(htmlspecialchars(Bry_.Mid(src, i, i + literal_len)));
+ i += literal_len;
+ }
+
+ if (i >= src_len) {
+ if (Bry_.Eq(cur_closing, Byte_ascii.Nl_bry)) {
+ // Do a past-the-end run to finish off the heading
+ cur_char = Byte_ascii.Nl_bry;
+ found = Found__line_end;
+ }
+ else {
+ // All done
+ break;
+ }
+ }
+ else {
+ Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(elements_trv, src, i, src_len);
+ cur_char = cur_char_itm.sequence;
+ switch (cur_char_itm.type) {
+ case Byte_ascii.Pipe: found = Found__pipe; break;
+ case Byte_ascii.Eq: found = Found__equals; break;
+ case Byte_ascii.Angle_bgn: found = Found__angle; break;
+ case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break;
+ case Byte_ascii.Curly_bgn: {
+ found = Found__open;
+ rule = rule_curly;
+ break;
+ }
+ case Byte_ascii.Brack_bgn: {
+ found = Found__open;
+ rule = rule_brack;
+ break;
+ }
+ default:
+ if (cur_char_itm.type == Byte_ascii.Dash) {
+ int nxt_pos = i + 1;
+ if (nxt_pos < src_len) {
+ if (src[i + 1] == Byte_ascii.Curly_bgn) {
+ found = Found__open;
+ rule = rule_langv;
+ continue;
+ }
+ }
+ }
+
+ if (Bry_.Eq(cur_char, cur_closing)) {
+ found = Found__close;
+ }
+ else {
+ i++;
+ continue;
+ }
+ break;
+ }
+ }
+
+ if (found == Found__angle) {
+ // Handle
+ if ( enable_only_include
+ && Bry_.Eq(src, i, i + Len__only_include_end, Bry__only_include_end)) {
+ find_only_include = true;
+ continue;
+ }
+
+ // Determine element name; $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(span|div)(?:\s|\/>|>)|(!--)
+ Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(elements_trv, src, i + 1, src_len);
+ if (element == null) {// Element name missing or not listed
+ accum.Add(Bry__escaped_lt);
+ i++;
+ continue;
+ }
+
+ // Handle comments
+ if (element.type == Xomw_prepro_elem.Type__comment) {
+ // To avoid leaving blank lines,
+ // when a sequence of space-separated comments is both preceded and followed by a newline (ignoring spaces),
+ // then trim leading and trailing spaces and the trailing newline.
+
+ // Find the end
+ int comment_end_pos = Bry_find_.Find_fwd(src, Bry__comment_end, i + 4, src_len);
+ if (comment_end_pos == Bry_find_.Not_found) {
+ // Unclosed comment in input, runs to end
+ accum.Add_str_a7("").Add(htmlspecialchars(Bry_.Mid(src, i))).Add_str_a7("");
+ i = src_len;
+ }
+ else {
+ // Search backwards for leading whitespace
+ int ws_bgn = i > 0 ? i - Bry_find_.Find_bwd__while_space_or_tab(src, i, 0) : 0;
+
+ // Search forwards for trailing whitespace
+ // $wsEnd will be the position of the last space (or the '>' if there's none)
+ int ws_end = comment_end_pos + 2 + Bry_find_.Find_fwd_while_space_or_tab(src, comment_end_pos + 3, src_len);
+
+ // Keep looking forward as long as we're finding more comments.
+ comments_list.Clear();
+ comments_list.Add(new int[] {ws_bgn, ws_end});
+ while (Bry_.Eq(src, ws_end + 1, ws_end + 5, Bry__comment_bgn)) {
+ int cur_char_pos = Bry_find_.Find_fwd(src, Bry__comment_end, ws_end + 4);
+ if (cur_char_pos == Bry_find_.Not_found) {
+ break;
+ }
+ cur_char_pos = cur_char_pos + 2 + Bry_find_.Find_fwd_while_space_or_tab(src, cur_char_pos + 3, src_len);
+ comments_list.Add(new int[] {ws_end + 1, cur_char_pos});
+ ws_end = cur_char_pos;
+ }
+
+ // Eat the line if possible
+ // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at the overall start.
+ // That's not how Sanitizer::removeHTMLcomments() did it, but it's a possible beneficial b/c break.
+ int comment_bgn_pos = -1;
+ if ( ws_bgn > 0
+ && Bry_.Eq(src, ws_bgn - 1, ws_bgn , Byte_ascii.Nl_bry)
+ && Bry_.Eq(src, ws_end + 1, ws_end + 2, Byte_ascii.Nl_bry)
+ ) {
+ // Remove leading whitespace from the end of the accumulator
+ // Sanity check first though
+ int ws_len = i - ws_bgn;
+ if ( ws_len > 0
+ && Bry_find_.Find_fwd_while_space_or_tab(accum.To_bry(), -ws_len, src_len) == ws_len
+ ) {
+ accum.Clear().Add(Bry_.Mid(accum.To_bry(), 0, -ws_len));
+ }
+
+ // Dump all but the last comment to the accumulator
+ int comments_list_len = comments_list.Len();
+ for (int j = 0; j < comments_list_len; j++) {
+ int[] com = (int[])comments_list.Get_at(j);
+ comment_bgn_pos = com[0];
+ comment_end_pos = com[1] + 1;
+ if (j == comments_list_len - 1) {
+ break;
+ }
+ inner = Bry_.Mid(src, comment_bgn_pos, comment_end_pos);
+ accum.Add_str_a7("").Add(htmlspecialchars(inner)).Add_str_a7("");
+ }
+
+ // Do a line-start run next time to look for headings after the comment
+ fake_line_start = true;
+ }
+ else {
+ // No line to eat, just take the comment itself
+ comment_bgn_pos = i;
+ comment_end_pos += 2;
+ }
+
+ if (stack.top != null) {
+ Xomw_prepro_part part = stack.top.Get_current_part();
+ if (!(part.comment_end == ws_end - 1)) {
+ part.visual_end = ws_bgn;
+ }
+ // Else comments abutting, no change in visual end
+ part.comment_end = comment_end_pos;
+ }
+ i = comment_end_pos + 1;
+ inner = Bry_.Mid(src, comment_bgn_pos, comment_end_pos + 1);
+ accum.Add_str_a7("").Add(htmlspecialchars(inner)).Add_str_a7("");
+ continue;
+ }
+ }
+
+ byte[] name = element.name;
+ int atr_bgn = i + name.length + 1;
+
+ // Find end of tag
+ int tag_end_pos = no_more_gt ? Bry_find_.Not_found : Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, atr_bgn);
+ if (tag_end_pos == Bry_find_.Not_found) {
+ // Infinite backtrack; Disable tag search to prevent worst-case O(N^2) performance
+ no_more_gt = true;
+ accum.Add(Bry__escaped_lt);
+ i++;
+ continue;
+ }
+
+ if (ignored_tags.Has(name)) {
+ accum.Add_str_a7("").Add(htmlspecialchars(Bry_.Mid(src, i, tag_end_pos - i + 1))).Add_str_a7("");
+ i = tag_end_pos + 1;
+ continue;
+ }
+
+ int tag_bgn_pos = i;
+ int atr_end = -1;
+ byte[] close = null;
+ if (src[tag_end_pos - 1] == Byte_ascii.Slash) {
+ atr_end = tag_end_pos - 1;
+ inner = null;
+ i = tag_end_pos + 1;
+ close = null;
+ }
+ else {
+ atr_end = tag_end_pos;
+ // Find closing tag
+ // FIXME: need to search forward
+ Xomw_prepro_elem elem_end = (Xomw_prepro_elem)elements_end_trie.Match_at(elements_trv, src, tag_end_pos + 1, src_len); // preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
+ int elem_end_lhs = elements_trv.Pos();
+ int elem_end_rhs = elements_trv.Pos();
+ // check for "\s*>"
+ if (elem_end != null) {
+ elem_end_rhs = Bry_find_.Find_fwd_while(src, elem_end_rhs, src_len, Byte_ascii.Space);
+ if (elem_end_rhs == src_len) {
+ elem_end = null;
+ }
+ else {
+ if (src[elem_end_rhs] == Byte_ascii.Gt)
+ elem_end_rhs = elem_end_rhs + 1;
+ else
+ elem_end = null;
+ }
+ }
+ if ( !no_more_closing_tag.Has(name)
+ && elem_end != null) {
+ inner = Bry_.Mid(src, tag_end_pos + 1, elem_end_lhs);
+ i = elem_end_rhs;
+ tmp_bfr.Add_str_a7("").Add(htmlspecialchars(Bry_.Mid(src, elem_end_lhs, elem_end_rhs))).Add_str_a7("");
+ close = tmp_bfr.To_bry_and_clear();
+ }
+ else {
+ // No end tag
+ if (xmlish_allow_missing_end_tag.Has(name)) {
+ // Let it run out to the end of the src.
+ inner = Bry_.Mid(src, tag_end_pos + 1);
+ i = src_len;
+ close = Bry_.Empty;
+ }
+ else {
+ // Don't match the tag, treat opening tag as literal and resume parsing.
+ i = tag_end_pos + 1;
+ accum.Add(htmlspecialchars(Bry_.Mid(src, tag_bgn_pos, tag_end_pos + 1)));
+ // Cache results, otherwise we have O(N^2) performance for input like ...
+ no_more_closing_tag.Add_if_dupe_use_nth(name, name);
+ continue;
+ }
+ }
+ }
+
+ // and just become tags
+ if (ignored_elements.Has(name)) {
+ accum.Add_str_a7("").Add(htmlspecialchars(Bry_.Mid(src, tag_bgn_pos, i))).Add_str_a7("");
+ continue;
+ }
+
+ accum.Add_str_a7("");
+ byte[] atr_bry = atr_end <= atr_bgn ? Bry_.Empty : Bry_.Mid(src, atr_bgn, atr_end);
+ accum.Add_str_a7("").Add(name).Add_str_a7("");
+ // Note that the attr element contains the whitespace between name and attribute,
+ // this is necessary for precise reconstruction during pre-save transform.
+ accum.Add_str_a7("").Add(htmlspecialchars(atr_bry)).Add_str_a7("");
+ if (inner != null) {
+ accum.Add_str_a7("").Add(htmlspecialchars(inner)).Add_str_a7("");
+ }
+ accum.Add(close).Add_str_a7("");
+ }
+ else if (found == Found__line_bgn) {
+ // Is this the start of a heading?; Line break belongs before the heading element in any case
+ if (fake_line_start) {
+ fake_line_start = false;
+ } else {
+ accum.Add(cur_char);
+ i++;
+ }
+
+ int eq_end = Bry_find_.Find_fwd_while(src, i, i + 6, Byte_ascii.Eq); // strspn( $src, '=', $i, 6 );
+ int count = i - eq_end;
+ if (count == 1 && find_equals) {
+ // DWIM: This looks kind of like a name/value separator.
+ // Let's let the equals handler have it and break the potential heading.
+ // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex.
+ }
+ else if (count > 0) {
+ Xomw_prepro_piece piece = new Xomw_prepro_piece(Byte_ascii.Nl_bry, Byte_ascii.Nl_bry, count, i, false);
+ piece.Add_part(Bry_.Repeat(Byte_ascii.Eq, count));
+ stack.Push(piece);
+ accum = stack.Get_accum();
+ Xomw_prepro_flags flags = stack.Get_flags();
+ find_pipe = flags.Find_pipe;
+ find_equals = flags.Find_eq;
+ in_heading = flags.In_heading;
+ i += count;
+ }
+ }
+ else if (found == Found__line_end) {
+ Xomw_prepro_piece piece = stack.top;
+ // A heading must be open, otherwise \n wouldn't have been in the search list
+ if (!Bry_.Eq(piece.open, Byte_ascii.Nl_bry)) throw Err_.new_wo_type("assertion:piece must start with \\n");
+ Xomw_prepro_part part = piece.Get_current_part();
+
+ // Search back through the input to see if it has a proper close.
+ // Do this using the reversed String since the other solutions (end anchor, etc.) are inefficient.
+ int search_bgn = Bry_find_.Find_bwd__while_space_or_tab(src, i, 0);
+ if (part.comment_end != -1 && search_bgn -1 == part.comment_end) {
+ // Comment found at line end; Search for equals signs before the comment
+ search_bgn = part.visual_end;
+ search_bgn -= Bry_find_.Find_bwd__while_space_or_tab(src, search_bgn, 0);
+ }
+ int count = piece.count;
+ int eq_len = Bry_find_.Find_bwd_while(src, search_bgn, 0, Byte_ascii.Eq);
+
+ byte[] element = Bry_.Empty;
+ if (eq_len > 0) {
+ if (search_bgn - eq_len == piece.start_pos) {
+ // This is just a single String of equals signs on its own line
+ // Replicate the doHeadings behavior /={count}(.+)={count}/
+ // First find out how many equals signs there really are (don't stop at 6)
+ count = eq_len;
+ if (count < 3) {
+ count = 0;
+ } else {
+ count = (count - 1) / 2;
+ if (count > 6) count = 6;
+ }
+ }
+ else {
+ if (eq_len < count)
+ count = eq_len;
+ }
+ if (count > 0) {
+ // Normal match, output
+ element = tmp_bfr.Add_str_a7("").Add_bfr_and_preserve(accum).Add_str_a7("").To_bry_and_clear();
+ heading_index++;
+ } else {
+ // Single equals sign on its own line, count=0
+ element = accum.To_bry();
+ }
+ }
+ else {
+ // No match, no , just pass down the inner src
+ element = accum.To_bry();
+ }
+
+ // Unwind the stack
+ stack.Pop();
+ accum = stack.Get_accum();
+
+ Xomw_prepro_flags flags = stack.Get_flags();
+ find_pipe = flags.Find_pipe;
+ find_equals = flags.Find_eq;
+ in_heading = flags.In_heading;
+
+ // Append the result to the enclosing accumulator
+ accum.Add(element);
+ // Note that we do NOT increment the input pointer. This is because the closing linebreak could be the opening linebreak of another heading.
+ // Infinite loops are avoided because the next iteration MUST hit the heading open case above, which unconditionally increments the input pointer.
+ }
+ else if (found == Found__open) {
+ // count opening brace characters
+ int count = Bry_find_.Find_fwd(src, cur_char, i, src_len);
+
+ // we need to add to stack only if opening brace count is enough for one of the rules
+ if (count >= rule.min) {
+ // Add it to the stack
+ Xomw_prepro_piece piece = new Xomw_prepro_piece(cur_char, rule.end, count, -1, i > 0 && src[i - 1] == Byte_ascii.Nl);
+ stack.Push(piece);
+ accum = stack.Get_accum();
+ Xomw_prepro_flags flags = stack.Get_flags();
+ find_pipe = flags.Find_pipe;
+ find_equals = flags.Find_eq;
+ in_heading = flags.In_heading;
+ }
+ else {
+ // Add literal brace(s)
+ accum.Add(htmlspecialchars(Bry_.Repeat_bry(cur_char, count)));
+ }
+ i += count;
+ }
+ else if (found == Found__close) {
+ Xomw_prepro_piece piece = stack.top;
+ // lets check if there are enough characters for closing brace
+ int count = Bry_find_.Find_fwd(src, cur_char, i, src_len);
+ int max_count = piece.count;
+ if (count > max_count) count = max_count;
+
+ // check for maximum matching characters (if there are 5 closing characters, we will probably need only 3 - depending on the rules)
+ rule = Get_rule(piece.open);
+ int matching_count = -1;
+ if (count > rule.max) {
+ // The specified maximum exists in the callback array, unless the caller has made an error
+ matching_count = rule.max;
+ }
+ else {
+ // Count is less than the maximum
+ // Skip any gaps in the callback array to find the true largest match
+ // Need to use array_key_exists not isset because the callback can be null
+ matching_count = count;
+ while (matching_count > 0 && !rule.Names_exist(matching_count)) {
+ matching_count--;
+ }
+ }
+
+ if (matching_count <= 0) {
+ // No matching element found in callback array
+ // Output a literal closing brace and continue
+ accum.Add(htmlspecialchars(Bry_.Repeat_bry(cur_char, count)));
+ i += count;
+ continue;
+ }
+ int name_type = rule.names[matching_count];
+ byte[] element = null;
+ if (name_type == Xomw_prepro_rule.Name__null) {
+ // No element, just literal text
+ piece.Break_syntax(tmp_bfr, matching_count);
+ element = tmp_bfr.Add(Bry_.Repeat_bry(rule.end, matching_count)).To_bry_and_clear();
+ }
+ else {
+ // Create XML element; Note: $parts is already XML, does not need to be encoded further
+ List_adp parts = piece.parts;
+ byte[] title = ((Xomw_prepro_part)parts.Get_at(0)).bry;
+ parts.Del_at(0);
+
+ // The invocation is at the start of the line if lineStart is set in the stack, and all opening brackets are used up.
+ byte[] attr = null;
+ if (max_count == matching_count && !piece.line_start) {
+ attr = Bry_.new_a7(" lineStart=\"1\"");
+ }
+ else {
+ attr = Bry_.Empty;
+ }
+
+ byte[] name_bry = Xomw_prepro_rule.Name(name_type);
+ tmp_bfr.Add_str_a7("<").Add(name_bry).Add(attr).Add_str_a7(">");
+ tmp_bfr.Add_str_a7("").Add(title).Add_str_a7("");
+
+ int arg_idx = 1;
+ int parts_len = parts.Len();
+ for (int j = 0; j < parts_len; j++) {
+ Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(j);
+ if (part.Eqpos != -1) {
+ byte[] arg_key = Bry_.Mid(part.bry, 0, part.Eqpos);
+ byte[] arg_val = Bry_.Mid(part.bry, part.Eqpos + 1);
+ tmp_bfr.Add_str_a7("").Add(arg_key).Add_str_a7("=").Add(arg_val).Add_str_a7("");
+ }
+ else {
+ tmp_bfr.Add_str_a7("{").Add(part.bry).Add_str_a7("}");
+ arg_idx++;
+ }
+ }
+ tmp_bfr.Add_str_a7("").Add(name_bry).Add_str_a7(">");
+ element = tmp_bfr.To_bry_and_clear();
+ }
+
+ // Advance input pointer
+ i += matching_count;
+
+ // Unwind the stack
+ stack.Pop();
+ accum = stack.Get_accum();
+
+ // Re-add the old stack element if it still has unmatched opening characters remaining
+ if (matching_count < piece.count) {
+ piece.parts.Clear(); // piece.parts = [ new PPDPart ];
+ piece.count -= matching_count;
+
+ // do we still qualify for any callback with remaining count?
+ int min = Get_rule(piece.open).min;
+ if (piece.count >= min) {
+ stack.Push(piece);
+ accum = stack.Get_accum();
+ }
+ else {
+ accum.Add(Bry_.Repeat_bry(piece.open, piece.count));
+ }
+ }
+
+ Xomw_prepro_flags flags = stack.Get_flags();
+ find_pipe = flags.Find_pipe;
+ find_equals = flags.Find_eq;
+ in_heading = flags.In_heading;
+
+ // Add XML element to the enclosing accumulator
+ accum.Add(element);
+ }
+ else if (found == Found__pipe) {
+ find_equals = true; // shortcut for getFlags()
+ stack.Add_part(Bry_.Empty);
+ accum = stack.Get_accum();
+ i++;
+ }
+ else if (found == Found__equals) {
+ find_equals = false; // shortcut for getFlags()
+ stack.Get_current_part().Eqpos = accum.Len();
+ accum.Add_byte(Byte_ascii.Eq);
+ i++;
+ }
+ }
+
+ // Output any remaining unclosed brackets
+ Bry_bfr root_accum = stack.Get_root_accum();
+ int stack_len = stack.stack.Len();
+ for (int j = 0; j < stack_len; j++) {
+ Xomw_prepro_piece piece = (Xomw_prepro_piece)stack.stack.Get_at(j);
+ root_accum.Add(piece.Break_syntax(tmp_bfr, -1));
+ }
+ root_accum.Add_str_a7("");
+ return root_accum.To_bry_and_clear();
+ }
+}
+class Xomw_prepro_rule {
+ public Xomw_prepro_rule(byte[] bgn, byte[] end, int min, int max, int[] names) {
+ this.bgn = bgn;
+ this.end = end;
+ this.min = min;
+ this.max = max;
+ this.names = names;
+ }
+ public final byte[] bgn;
+ public final byte[] end;
+ public final int min;
+ public final int max;
+ public final int[] names;
+ public boolean Names_exist(int idx) {
+ return idx < names.length && names[idx] != Name__invalid;
+ }
+ private static final byte[] Name__tmpl_bry = Bry_.new_a7("template"), Name__targ_bry = Bry_.new_a7("tplarg");
+ public static final int Name__invalid = -1, Name__null = 0, Name__tmpl = 1, Name__targ = 2;
+ public static byte[] Name(int type) {
+ switch (type) {
+ case Name__tmpl: return Name__tmpl_bry;
+ case Name__targ: return Name__targ_bry;
+ default:
+ case Name__invalid: return null;
+ case Name__null: return null;
+ }
+ }
+}
+class Xomw_prepro_elem {
+ private static final byte[] Bry__tag_end = Bry_.new_a7("");
+ public Xomw_prepro_elem(int type, byte[] name) {
+ this.type = type;
+ this.name = name;
+ this.tag_end_lhs = Bry_.Add(Bry__tag_end, name);
+ }
+ public final int type;
+ public final byte[] name;
+ public final byte[] tag_end_lhs;
+ public static final int Type__comment = 0;
+}
+class Xomw_prepro_curchar_itm {
+ public Xomw_prepro_curchar_itm(int type, byte[] sequence) {
+ this.type = type;
+ this.sequence = sequence;
+ }
+ public int type;
+ public byte[] sequence;
+}
diff --git a/400_xowa/src/gplx/xowa/parsers/tmpls/Arg_nde_tkn.java b/400_xowa/src/gplx/xowa/parsers/tmpls/Arg_nde_tkn.java
index 19b78c91d..d6f058b32 100644
--- a/400_xowa/src/gplx/xowa/parsers/tmpls/Arg_nde_tkn.java
+++ b/400_xowa/src/gplx/xowa/parsers/tmpls/Arg_nde_tkn.java
@@ -38,6 +38,6 @@ public class Arg_nde_tkn extends Xop_tkn_itm_base {
val_tkn.Tmpl_evaluate(ctx, src, caller, bfr);
return true;
}
- public static final Arg_nde_tkn[] Ary_empty = new Arg_nde_tkn[0];
- public static final Arg_nde_tkn Null = new Arg_nde_tkn(-1, -1);
+ public static final Arg_nde_tkn[] Ary_empty = new Arg_nde_tkn[0];
+ public static final Arg_nde_tkn Null = new Arg_nde_tkn(-1, -1);
}