diff --git a/100_core/src/gplx/Bry_.java b/100_core/src/gplx/Bry_.java index 864d8652f..67444bbc7 100644 --- a/100_core/src/gplx/Bry_.java +++ b/100_core/src/gplx/Bry_.java @@ -164,6 +164,17 @@ public class Bry_ { rv[i] = b; return rv; } + public static byte[] Repeat_bry(byte[] bry, int len) { + int bry_len = bry.length; + int rv_len = len * bry_len; + byte[] rv = new byte[rv_len]; + for (int i = 0; i < len; i++) { + for (int j = 0; j < bry_len; j++) { + rv[(i * bry_len) + j] = bry[j]; + } + } + return rv; + } public static byte[] Add(byte[] src, byte b) { int src_len = src.length; byte[] rv = new byte[src_len + 1]; diff --git a/100_core/src/gplx/Bry__tst.java b/100_core/src/gplx/Bry__tst.java index 79d381a15..c02d5f75f 100644 --- a/100_core/src/gplx/Bry__tst.java +++ b/100_core/src/gplx/Bry__tst.java @@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx; -import org.junit.*; import gplx.core.primitives.*; import gplx.core.brys.*; +import org.junit.*; import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.tests.*; public class Bry__tst { private final Bry__fxt fxt = new Bry__fxt(); @Test public void new_ascii_() { @@ -276,6 +276,9 @@ public class Bry__tst { fxt.Test__new_u8_nl_apos(String_.Ary("a", "b"), "a\nb"); fxt.Test__new_u8_nl_apos(String_.Ary("a", "b'c", "d"), "a\nb\"c\nd"); } + @Test public void Repeat_bry() { + fxt.Test__repeat_bry("abc" , 3, "abcabcabc"); + } } class Bry__fxt { public void Test_trim_end(String raw, byte trim, String expd) { @@ -292,4 +295,7 @@ class Bry__fxt { public void Test__new_u8_nl_apos(String[] ary, String expd) { Tfds.Eq_str_lines(expd, String_.new_u8(Bry_.New_u8_nl_apos(ary))); } + public void Test__repeat_bry(String s, int count, String expd) { + Gftest.Eq__str(expd, Bry_.Repeat_bry(Bry_.new_u8(s), count)); + } } diff --git a/100_core/src/gplx/Bry_find_.java b/100_core/src/gplx/Bry_find_.java index e0b4ed4ec..1be0b0a88 100644 --- a/100_core/src/gplx/Bry_find_.java +++ b/100_core/src/gplx/Bry_find_.java @@ -158,6 +158,19 @@ public class Bry_find_ { } return Bry_find_.Not_found; } + public static int Find_bwd__while_space_or_tab(byte[] src, int cur, int end) { // get pos of 1st char that is not \t or \s + if (cur >= src.length) return Bry_find_.Not_found; + for (int i = cur; i >= end; i--) { + byte b = src[i]; + switch (b) { + case Byte_ascii.Space: case Byte_ascii.Tab: + break; + default: + return i; + } + } + return Bry_find_.Not_found; + } public static int Find_bwd_non_ws_or_end(byte[] src, int cur, int end) { if (cur >= src.length) return Bry_find_.Not_found; for (int i = cur; i >= end; i--) { diff --git a/400_xowa/src/gplx/xowa/Xoa_app_.java b/400_xowa/src/gplx/xowa/Xoa_app_.java index 682f53f54..76407230d 100644 --- a/400_xowa/src/gplx/xowa/Xoa_app_.java +++ b/400_xowa/src/gplx/xowa/Xoa_app_.java @@ -32,8 +32,8 @@ public class Xoa_app_ { } } public static final String Name = "xowa"; - public static final int Version_id = 513; - public static final String Version = "4.0.1.1701"; // RELEASE:2017-01-03 20:30 + public static final int Version_id = 512; + public static final String Version = "4.1.0.1701"; // RELEASE:2017-01-03 20:30 public static String Build_date = "2012-12-30 00:00:00"; public static String Build_date_fmt = "yyyy-MM-dd HH:mm:ss"; public static String Op_sys_str; diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java new file mode 100644 index 000000000..c442bd3d6 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java @@ -0,0 +1,155 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; +class Xomw_prepro_stack { + public List_adp stack = List_adp_.New(); + public Xomw_prepro_piece top; + private Bry_bfr accum = Bry_bfr_.New(), root_accum = Bry_bfr_.New(); + private final Xomw_prepro_flags flags = new Xomw_prepro_flags(); + + public int Count() {return stack.Len();} + public Bry_bfr Get_accum() {return accum;} + public Bry_bfr Get_root_accum() {return root_accum;} + + public Xomw_prepro_part Get_current_part() { + if (top == null) { + return null; + } else { + return top.Get_current_part(); + } + } + + public void Push(Xomw_prepro_piece item) { + stack.Add(item); + this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1); + accum.Clear().Add(top.Get_accum()); + } + + public Xomw_prepro_piece Pop() { + int len = stack.Count(); + if (len == 0) { + throw Err_.new_wo_type("Xomw_prepro_stack: no elements remaining"); + } + + Xomw_prepro_piece rv = (Xomw_prepro_piece)stack.Get_at(len - 1); + stack.Del_at(len - 1); + + if (len > 0) { + this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1); + accum.Clear().Add(top.Get_accum()); + } else { + this.top = null; + this.accum = root_accum; + } + return rv; + } + + public void Add_part(byte[] bry) { + top.Add_part(bry); + accum.Clear().Add(top.Get_accum()); + } + + public Xomw_prepro_flags Get_flags() { + if (stack.Count() == 0) { + flags.Find_eq = false; + flags.Find_pipe = false; + flags.In_heading = false; + return flags; + } + else { + top.Set_flags(flags); + return flags; + } + } +} +class Xomw_prepro_flags { + public boolean Find_pipe; + public boolean Find_eq; + public boolean In_heading; +} +class Xomw_prepro_piece { + public final byte[] open; // Opening character (\n for heading) + public final byte[] close; // Matching closing char; + public int count; // Number of opening characters found (number of "=" for heading) + public final boolean line_start; // True if the open char appeared at the start of the input line; Not set for headings. + public final int start_pos; + public List_adp parts = List_adp_.New(); + public Xomw_prepro_piece(byte[] open, byte[] close, int count, int start_pos, boolean line_start) { + this.open = open; + this.close = close; + this.count = count; + this.start_pos = start_pos; + this.line_start = line_start; + } + public Xomw_prepro_part Get_current_part() { + return (Xomw_prepro_part)parts.Get_at(parts.Len() - 1); + } + public byte[] Get_accum() { + return Get_current_part().bry; + } + public void Add_part(byte[] bry) { + parts.Add(new Xomw_prepro_part(bry)); + } + public static final byte[] Brack_bgn_bry = Bry_.new_a7("["); + public void Set_flags(Xomw_prepro_flags flags) { + int parts_len = parts.Len(); + boolean open_is_nl = Bry_.Eq(open, Byte_ascii.Nl_bry); + boolean find_pipe = !open_is_nl && Bry_.Eq(open, Brack_bgn_bry); + flags.Find_pipe = find_pipe; + flags.Find_eq = find_pipe && parts_len > 1 && ((Xomw_prepro_part)parts.Get_at(parts_len - 1)).Eqpos != -1; + flags.In_heading = open_is_nl; + } + // Get the output String that would result if the close is not found. + public byte[] Break_syntax(Bry_bfr tmp_bfr, int opening_count) { + byte[] rv = Bry_.Empty; + if (Bry_.Eq(open, Byte_ascii.Nl_bry)) { + rv = ((Xomw_prepro_part)parts.Get_at(0)).bry; + } + else { + if (opening_count == -1) { + opening_count = count; + } + rv = Bry_.Repeat_bry(open, opening_count); + + // concat parts with "|" + boolean first = true; + int len = parts.Len(); + for (int i = 0; i < len; i++) { + Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(i); + if (first) { + first = false; + } + else { + tmp_bfr.Add_byte_pipe(); + } + tmp_bfr.Add(part.bry); + } + rv = tmp_bfr.To_bry_and_clear(); + } + return rv; + } +} +class Xomw_prepro_part { + public Xomw_prepro_part(byte[] bry) { + this.bry = bry; + } + public final byte[] bry; + public int Eqpos = -1; + public int comment_end = -1; + public int visual_end = -1; +} diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java new file mode 100644 index 000000000..e81eb6c82 --- /dev/null +++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java @@ -0,0 +1,715 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; +import gplx.core.btries.*; +public class Xomw_prepro_wkr { + private static final Xomw_prepro_rule + rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}") , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ}) + , rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]") , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null}) + , rule_langv = new Xomw_prepro_rule(Bry_.new_a7("-{"), Bry_.new_a7("}-"), 1, 1, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null}) + ; + private static final byte[] + Bry__only_include_bgn = Bry_.new_a7("") + , Bry__only_include_end = Bry_.new_a7("") + , Bry__comment_bgn = Bry_.new_a7("") + , Bry__escaped_lt = Bry_.new_a7("<") + , Bry__includeonly = Bry_.new_a7("includeonly") + , Bry__noinclude = Bry_.new_a7("noinclude") + , Bry__onlyinclude = Bry_.new_a7("onlyinclude") + ; + private static final int Len__only_include_end = Bry__only_include_end.length; + private static final int + Found__line_bgn = 0 + , Found__line_end = 1 + , Found__pipe = 2 + , Found__equals = 3 + , Found__angle = 4 + , Found__close = 5 + , Found__open = 6 + ; + + private Bry_bfr accum = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New(); + private List_adp comments_list = List_adp_.New(); + private byte[] htmlspecialchars(byte[] bry) { + return bry; + } + private Xomw_prepro_rule Get_rule(byte[] bry) { + if (Bry_.Eq(bry, rule_curly.bgn)) return rule_curly; + else if (Bry_.Eq(bry, rule_brack.bgn)) return rule_brack; + else if (Bry_.Eq(bry, rule_langv.bgn)) return rule_langv; + else throw Err_.new_unhandled(bry); + } + public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) { + Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7(); // parser->getStripList(); + + Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs(); + xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__includeonly); + xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__noinclude); + xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__onlyinclude); + + boolean enable_only_include = false; + + Hash_adp_bry ignored_tags = Hash_adp_bry.cs(); + Hash_adp_bry ignored_elements = Hash_adp_bry.cs(); + if (for_inclusion) { + ignored_tags.Add_many_str("includeonly", "/includeonly"); + ignored_elements.Add_many_str("noinclude"); + xmlish_elems.Add_many_str("noinclude"); + if ( Bry_.Has(src, Bry__only_include_bgn) + && Bry_.Has(src, Bry__only_include_end)) { + enable_only_include = true; + } + } + else { + ignored_tags.Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude"); + ignored_elements.Add_many_str("includeonly"); + xmlish_elems.Add_many_str("includeonly"); + } + + // $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) ); + // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset + // $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; + + Xomw_prepro_stack stack = new Xomw_prepro_stack(); +// $searchBase = "[{<\n"; # } + + // Input pointer, starts out pointing to a pseudo-newline before the start + int i = 0; + + // Current accumulator + accum = stack.Get_accum(); + accum.Add_str_a7(""); + + // True to find equals signs in arguments + boolean find_equals = false; + + // True to take notice of pipe characters + boolean find_pipe = false; + int heading_index = 1; + + // True if $i is inside a possible heading + boolean in_heading = false; + + // True if there are no more greater-than (>) signs right of $i + boolean no_more_gt = false; + + // Map of tag name => true if there are no more closing tags of given type right of $i + Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs(); + + // True to ignore all input up to the next + boolean find_only_include = enable_only_include; + + // Do a line-start run without outputting an LF character + boolean fake_line_start = true; + + int src_len = src.length; + int found = -1; + byte[] cur_closing = Bry_.Empty; + + Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7(); + Btrie_slim_mgr elements_end_trie = Btrie_slim_mgr.ci_a7(); + Btrie_rv elements_trv = new Btrie_rv(); + + Btrie_slim_mgr cur_char_trie = Btrie_slim_mgr.ci_a7(); + byte[] inner = null; + + while (true) { + if (find_only_include) { + // Ignore all input up to the next + int start_pos = Bry_find_.Find_fwd(src, Bry__only_include_bgn, i, src_len); + if (start_pos == Bry_find_.Not_found) { + // Ignored section runs to the end + accum.Add_str_a7("").Add(htmlspecialchars(Bry_.Mid(src, i))).Add_str_a7(""); + break; + } + int tag_end_pos = start_pos + Bry__only_include_bgn.length; // past-the-end + accum.Add_str_a7("").Add(htmlspecialchars(Bry_.Mid(src, i, tag_end_pos))).Add_str_a7(""); + i = tag_end_pos; + find_only_include = false; + } + + byte[] cur_char = Bry_.Empty; + if (fake_line_start) { + found = Found__line_bgn; + cur_char = Bry_.Empty; + } + + // Find next opening brace, closing brace or pipe +// $search = $searchBase; + if (stack.top == null) { + cur_closing = Bry_.Empty; + } + else { + cur_closing = stack.top.close; + // $search .= $currentClosing; + } + if (find_pipe) { + // $search .= '|'; + } + if (find_equals) { + // First equals will be for the template + // $search .= '='; + } + Xomw_prepro_rule rule = null; + + // Output literal section, advance input counter + int literal_len = 0; // strcspn(src, $search, i); + if (literal_len > 0) { + accum.Add(htmlspecialchars(Bry_.Mid(src, i, i + literal_len))); + i += literal_len; + } + + if (i >= src_len) { + if (Bry_.Eq(cur_closing, Byte_ascii.Nl_bry)) { + // Do a past-the-end run to finish off the heading + cur_char = Byte_ascii.Nl_bry; + found = Found__line_end; + } + else { + // All done + break; + } + } + else { + Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(elements_trv, src, i, src_len); + cur_char = cur_char_itm.sequence; + switch (cur_char_itm.type) { + case Byte_ascii.Pipe: found = Found__pipe; break; + case Byte_ascii.Eq: found = Found__equals; break; + case Byte_ascii.Angle_bgn: found = Found__angle; break; + case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break; + case Byte_ascii.Curly_bgn: { + found = Found__open; + rule = rule_curly; + break; + } + case Byte_ascii.Brack_bgn: { + found = Found__open; + rule = rule_brack; + break; + } + default: + if (cur_char_itm.type == Byte_ascii.Dash) { + int nxt_pos = i + 1; + if (nxt_pos < src_len) { + if (src[i + 1] == Byte_ascii.Curly_bgn) { + found = Found__open; + rule = rule_langv; + continue; + } + } + } + + if (Bry_.Eq(cur_char, cur_closing)) { + found = Found__close; + } + else { + i++; + continue; + } + break; + } + } + + if (found == Found__angle) { + // Handle + if ( enable_only_include + && Bry_.Eq(src, i, i + Len__only_include_end, Bry__only_include_end)) { + find_only_include = true; + continue; + } + + // Determine element name; $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(span|div)(?:\s|\/>|>)|(!--) + Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(elements_trv, src, i + 1, src_len); + if (element == null) {// Element name missing or not listed + accum.Add(Bry__escaped_lt); + i++; + continue; + } + + // Handle comments + if (element.type == Xomw_prepro_elem.Type__comment) { + // To avoid leaving blank lines, + // when a sequence of space-separated comments is both preceded and followed by a newline (ignoring spaces), + // then trim leading and trailing spaces and the trailing newline. + + // Find the end + int comment_end_pos = Bry_find_.Find_fwd(src, Bry__comment_end, i + 4, src_len); + if (comment_end_pos == Bry_find_.Not_found) { + // Unclosed comment in input, runs to end + accum.Add_str_a7("").Add(htmlspecialchars(Bry_.Mid(src, i))).Add_str_a7(""); + i = src_len; + } + else { + // Search backwards for leading whitespace + int ws_bgn = i > 0 ? i - Bry_find_.Find_bwd__while_space_or_tab(src, i, 0) : 0; + + // Search forwards for trailing whitespace + // $wsEnd will be the position of the last space (or the '>' if there's none) + int ws_end = comment_end_pos + 2 + Bry_find_.Find_fwd_while_space_or_tab(src, comment_end_pos + 3, src_len); + + // Keep looking forward as long as we're finding more comments. + comments_list.Clear(); + comments_list.Add(new int[] {ws_bgn, ws_end}); + while (Bry_.Eq(src, ws_end + 1, ws_end + 5, Bry__comment_bgn)) { + int cur_char_pos = Bry_find_.Find_fwd(src, Bry__comment_end, ws_end + 4); + if (cur_char_pos == Bry_find_.Not_found) { + break; + } + cur_char_pos = cur_char_pos + 2 + Bry_find_.Find_fwd_while_space_or_tab(src, cur_char_pos + 3, src_len); + comments_list.Add(new int[] {ws_end + 1, cur_char_pos}); + ws_end = cur_char_pos; + } + + // Eat the line if possible + // TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at the overall start. + // That's not how Sanitizer::removeHTMLcomments() did it, but it's a possible beneficial b/c break. + int comment_bgn_pos = -1; + if ( ws_bgn > 0 + && Bry_.Eq(src, ws_bgn - 1, ws_bgn , Byte_ascii.Nl_bry) + && Bry_.Eq(src, ws_end + 1, ws_end + 2, Byte_ascii.Nl_bry) + ) { + // Remove leading whitespace from the end of the accumulator + // Sanity check first though + int ws_len = i - ws_bgn; + if ( ws_len > 0 + && Bry_find_.Find_fwd_while_space_or_tab(accum.To_bry(), -ws_len, src_len) == ws_len + ) { + accum.Clear().Add(Bry_.Mid(accum.To_bry(), 0, -ws_len)); + } + + // Dump all but the last comment to the accumulator + int comments_list_len = comments_list.Len(); + for (int j = 0; j < comments_list_len; j++) { + int[] com = (int[])comments_list.Get_at(j); + comment_bgn_pos = com[0]; + comment_end_pos = com[1] + 1; + if (j == comments_list_len - 1) { + break; + } + inner = Bry_.Mid(src, comment_bgn_pos, comment_end_pos); + accum.Add_str_a7("").Add(htmlspecialchars(inner)).Add_str_a7(""); + } + + // Do a line-start run next time to look for headings after the comment + fake_line_start = true; + } + else { + // No line to eat, just take the comment itself + comment_bgn_pos = i; + comment_end_pos += 2; + } + + if (stack.top != null) { + Xomw_prepro_part part = stack.top.Get_current_part(); + if (!(part.comment_end == ws_end - 1)) { + part.visual_end = ws_bgn; + } + // Else comments abutting, no change in visual end + part.comment_end = comment_end_pos; + } + i = comment_end_pos + 1; + inner = Bry_.Mid(src, comment_bgn_pos, comment_end_pos + 1); + accum.Add_str_a7("").Add(htmlspecialchars(inner)).Add_str_a7(""); + continue; + } + } + + byte[] name = element.name; + int atr_bgn = i + name.length + 1; + + // Find end of tag + int tag_end_pos = no_more_gt ? Bry_find_.Not_found : Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, atr_bgn); + if (tag_end_pos == Bry_find_.Not_found) { + // Infinite backtrack; Disable tag search to prevent worst-case O(N^2) performance + no_more_gt = true; + accum.Add(Bry__escaped_lt); + i++; + continue; + } + + if (ignored_tags.Has(name)) { + accum.Add_str_a7("").Add(htmlspecialchars(Bry_.Mid(src, i, tag_end_pos - i + 1))).Add_str_a7(""); + i = tag_end_pos + 1; + continue; + } + + int tag_bgn_pos = i; + int atr_end = -1; + byte[] close = null; + if (src[tag_end_pos - 1] == Byte_ascii.Slash) { + atr_end = tag_end_pos - 1; + inner = null; + i = tag_end_pos + 1; + close = null; + } + else { + atr_end = tag_end_pos; + // Find closing tag + // FIXME: need to search forward + Xomw_prepro_elem elem_end = (Xomw_prepro_elem)elements_end_trie.Match_at(elements_trv, src, tag_end_pos + 1, src_len); // preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i", + int elem_end_lhs = elements_trv.Pos(); + int elem_end_rhs = elements_trv.Pos(); + // check for "\s*>" + if (elem_end != null) { + elem_end_rhs = Bry_find_.Find_fwd_while(src, elem_end_rhs, src_len, Byte_ascii.Space); + if (elem_end_rhs == src_len) { + elem_end = null; + } + else { + if (src[elem_end_rhs] == Byte_ascii.Gt) + elem_end_rhs = elem_end_rhs + 1; + else + elem_end = null; + } + } + if ( !no_more_closing_tag.Has(name) + && elem_end != null) { + inner = Bry_.Mid(src, tag_end_pos + 1, elem_end_lhs); + i = elem_end_rhs; + tmp_bfr.Add_str_a7("").Add(htmlspecialchars(Bry_.Mid(src, elem_end_lhs, elem_end_rhs))).Add_str_a7(""); + close = tmp_bfr.To_bry_and_clear(); + } + else { + // No end tag + if (xmlish_allow_missing_end_tag.Has(name)) { + // Let it run out to the end of the src. + inner = Bry_.Mid(src, tag_end_pos + 1); + i = src_len; + close = Bry_.Empty; + } + else { + // Don't match the tag, treat opening tag as literal and resume parsing. + i = tag_end_pos + 1; + accum.Add(htmlspecialchars(Bry_.Mid(src, tag_bgn_pos, tag_end_pos + 1))); + // Cache results, otherwise we have O(N^2) performance for input like ... + no_more_closing_tag.Add_if_dupe_use_nth(name, name); + continue; + } + } + } + + // and just become tags + if (ignored_elements.Has(name)) { + accum.Add_str_a7("").Add(htmlspecialchars(Bry_.Mid(src, tag_bgn_pos, i))).Add_str_a7(""); + continue; + } + + accum.Add_str_a7(""); + byte[] atr_bry = atr_end <= atr_bgn ? Bry_.Empty : Bry_.Mid(src, atr_bgn, atr_end); + accum.Add_str_a7("").Add(name).Add_str_a7(""); + // Note that the attr element contains the whitespace between name and attribute, + // this is necessary for precise reconstruction during pre-save transform. + accum.Add_str_a7("").Add(htmlspecialchars(atr_bry)).Add_str_a7(""); + if (inner != null) { + accum.Add_str_a7("").Add(htmlspecialchars(inner)).Add_str_a7(""); + } + accum.Add(close).Add_str_a7(""); + } + else if (found == Found__line_bgn) { + // Is this the start of a heading?; Line break belongs before the heading element in any case + if (fake_line_start) { + fake_line_start = false; + } else { + accum.Add(cur_char); + i++; + } + + int eq_end = Bry_find_.Find_fwd_while(src, i, i + 6, Byte_ascii.Eq); // strspn( $src, '=', $i, 6 ); + int count = i - eq_end; + if (count == 1 && find_equals) { + // DWIM: This looks kind of like a name/value separator. + // Let's let the equals handler have it and break the potential heading. + // This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex. + } + else if (count > 0) { + Xomw_prepro_piece piece = new Xomw_prepro_piece(Byte_ascii.Nl_bry, Byte_ascii.Nl_bry, count, i, false); + piece.Add_part(Bry_.Repeat(Byte_ascii.Eq, count)); + stack.Push(piece); + accum = stack.Get_accum(); + Xomw_prepro_flags flags = stack.Get_flags(); + find_pipe = flags.Find_pipe; + find_equals = flags.Find_eq; + in_heading = flags.In_heading; + i += count; + } + } + else if (found == Found__line_end) { + Xomw_prepro_piece piece = stack.top; + // A heading must be open, otherwise \n wouldn't have been in the search list + if (!Bry_.Eq(piece.open, Byte_ascii.Nl_bry)) throw Err_.new_wo_type("assertion:piece must start with \\n"); + Xomw_prepro_part part = piece.Get_current_part(); + + // Search back through the input to see if it has a proper close. + // Do this using the reversed String since the other solutions (end anchor, etc.) are inefficient. + int search_bgn = Bry_find_.Find_bwd__while_space_or_tab(src, i, 0); + if (part.comment_end != -1 && search_bgn -1 == part.comment_end) { + // Comment found at line end; Search for equals signs before the comment + search_bgn = part.visual_end; + search_bgn -= Bry_find_.Find_bwd__while_space_or_tab(src, search_bgn, 0); + } + int count = piece.count; + int eq_len = Bry_find_.Find_bwd_while(src, search_bgn, 0, Byte_ascii.Eq); + + byte[] element = Bry_.Empty; + if (eq_len > 0) { + if (search_bgn - eq_len == piece.start_pos) { + // This is just a single String of equals signs on its own line + // Replicate the doHeadings behavior /={count}(.+)={count}/ + // First find out how many equals signs there really are (don't stop at 6) + count = eq_len; + if (count < 3) { + count = 0; + } else { + count = (count - 1) / 2; + if (count > 6) count = 6; + } + } + else { + if (eq_len < count) + count = eq_len; + } + if (count > 0) { + // Normal match, output + element = tmp_bfr.Add_str_a7("").Add_bfr_and_preserve(accum).Add_str_a7("").To_bry_and_clear(); + heading_index++; + } else { + // Single equals sign on its own line, count=0 + element = accum.To_bry(); + } + } + else { + // No match, no , just pass down the inner src + element = accum.To_bry(); + } + + // Unwind the stack + stack.Pop(); + accum = stack.Get_accum(); + + Xomw_prepro_flags flags = stack.Get_flags(); + find_pipe = flags.Find_pipe; + find_equals = flags.Find_eq; + in_heading = flags.In_heading; + + // Append the result to the enclosing accumulator + accum.Add(element); + // Note that we do NOT increment the input pointer. This is because the closing linebreak could be the opening linebreak of another heading. + // Infinite loops are avoided because the next iteration MUST hit the heading open case above, which unconditionally increments the input pointer. + } + else if (found == Found__open) { + // count opening brace characters + int count = Bry_find_.Find_fwd(src, cur_char, i, src_len); + + // we need to add to stack only if opening brace count is enough for one of the rules + if (count >= rule.min) { + // Add it to the stack + Xomw_prepro_piece piece = new Xomw_prepro_piece(cur_char, rule.end, count, -1, i > 0 && src[i - 1] == Byte_ascii.Nl); + stack.Push(piece); + accum = stack.Get_accum(); + Xomw_prepro_flags flags = stack.Get_flags(); + find_pipe = flags.Find_pipe; + find_equals = flags.Find_eq; + in_heading = flags.In_heading; + } + else { + // Add literal brace(s) + accum.Add(htmlspecialchars(Bry_.Repeat_bry(cur_char, count))); + } + i += count; + } + else if (found == Found__close) { + Xomw_prepro_piece piece = stack.top; + // lets check if there are enough characters for closing brace + int count = Bry_find_.Find_fwd(src, cur_char, i, src_len); + int max_count = piece.count; + if (count > max_count) count = max_count; + + // check for maximum matching characters (if there are 5 closing characters, we will probably need only 3 - depending on the rules) + rule = Get_rule(piece.open); + int matching_count = -1; + if (count > rule.max) { + // The specified maximum exists in the callback array, unless the caller has made an error + matching_count = rule.max; + } + else { + // Count is less than the maximum + // Skip any gaps in the callback array to find the true largest match + // Need to use array_key_exists not isset because the callback can be null + matching_count = count; + while (matching_count > 0 && !rule.Names_exist(matching_count)) { + matching_count--; + } + } + + if (matching_count <= 0) { + // No matching element found in callback array + // Output a literal closing brace and continue + accum.Add(htmlspecialchars(Bry_.Repeat_bry(cur_char, count))); + i += count; + continue; + } + int name_type = rule.names[matching_count]; + byte[] element = null; + if (name_type == Xomw_prepro_rule.Name__null) { + // No element, just literal text + piece.Break_syntax(tmp_bfr, matching_count); + element = tmp_bfr.Add(Bry_.Repeat_bry(rule.end, matching_count)).To_bry_and_clear(); + } + else { + // Create XML element; Note: $parts is already XML, does not need to be encoded further + List_adp parts = piece.parts; + byte[] title = ((Xomw_prepro_part)parts.Get_at(0)).bry; + parts.Del_at(0); + + // The invocation is at the start of the line if lineStart is set in the stack, and all opening brackets are used up. + byte[] attr = null; + if (max_count == matching_count && !piece.line_start) { + attr = Bry_.new_a7(" lineStart=\"1\""); + } + else { + attr = Bry_.Empty; + } + + byte[] name_bry = Xomw_prepro_rule.Name(name_type); + tmp_bfr.Add_str_a7("<").Add(name_bry).Add(attr).Add_str_a7(">"); + tmp_bfr.Add_str_a7("").Add(title).Add_str_a7(""); + + int arg_idx = 1; + int parts_len = parts.Len(); + for (int j = 0; j < parts_len; j++) { + Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(j); + if (part.Eqpos != -1) { + byte[] arg_key = Bry_.Mid(part.bry, 0, part.Eqpos); + byte[] arg_val = Bry_.Mid(part.bry, part.Eqpos + 1); + tmp_bfr.Add_str_a7("").Add(arg_key).Add_str_a7("=").Add(arg_val).Add_str_a7(""); + } + else { + tmp_bfr.Add_str_a7("{").Add(part.bry).Add_str_a7("}"); + arg_idx++; + } + } + tmp_bfr.Add_str_a7(""); + element = tmp_bfr.To_bry_and_clear(); + } + + // Advance input pointer + i += matching_count; + + // Unwind the stack + stack.Pop(); + accum = stack.Get_accum(); + + // Re-add the old stack element if it still has unmatched opening characters remaining + if (matching_count < piece.count) { + piece.parts.Clear(); // piece.parts = [ new PPDPart ]; + piece.count -= matching_count; + + // do we still qualify for any callback with remaining count? + int min = Get_rule(piece.open).min; + if (piece.count >= min) { + stack.Push(piece); + accum = stack.Get_accum(); + } + else { + accum.Add(Bry_.Repeat_bry(piece.open, piece.count)); + } + } + + Xomw_prepro_flags flags = stack.Get_flags(); + find_pipe = flags.Find_pipe; + find_equals = flags.Find_eq; + in_heading = flags.In_heading; + + // Add XML element to the enclosing accumulator + accum.Add(element); + } + else if (found == Found__pipe) { + find_equals = true; // shortcut for getFlags() + stack.Add_part(Bry_.Empty); + accum = stack.Get_accum(); + i++; + } + else if (found == Found__equals) { + find_equals = false; // shortcut for getFlags() + stack.Get_current_part().Eqpos = accum.Len(); + accum.Add_byte(Byte_ascii.Eq); + i++; + } + } + + // Output any remaining unclosed brackets + Bry_bfr root_accum = stack.Get_root_accum(); + int stack_len = stack.stack.Len(); + for (int j = 0; j < stack_len; j++) { + Xomw_prepro_piece piece = (Xomw_prepro_piece)stack.stack.Get_at(j); + root_accum.Add(piece.Break_syntax(tmp_bfr, -1)); + } + root_accum.Add_str_a7(""); + return root_accum.To_bry_and_clear(); + } +} +class Xomw_prepro_rule { + public Xomw_prepro_rule(byte[] bgn, byte[] end, int min, int max, int[] names) { + this.bgn = bgn; + this.end = end; + this.min = min; + this.max = max; + this.names = names; + } + public final byte[] bgn; + public final byte[] end; + public final int min; + public final int max; + public final int[] names; + public boolean Names_exist(int idx) { + return idx < names.length && names[idx] != Name__invalid; + } + private static final byte[] Name__tmpl_bry = Bry_.new_a7("template"), Name__targ_bry = Bry_.new_a7("tplarg"); + public static final int Name__invalid = -1, Name__null = 0, Name__tmpl = 1, Name__targ = 2; + public static byte[] Name(int type) { + switch (type) { + case Name__tmpl: return Name__tmpl_bry; + case Name__targ: return Name__targ_bry; + default: + case Name__invalid: return null; + case Name__null: return null; + } + } +} +class Xomw_prepro_elem { + private static final byte[] Bry__tag_end = Bry_.new_a7("