Mw_parse: Add basic implementation of Mw_preprocessor

2026-03-02 03:49:30 +00:00 · 2017-01-09 21:33:45 -05:00
parent 006c14db4e
commit d15630c433
7 changed files with 905 additions and 5 deletions
--- a/100_core/src/gplx/Bry_.java
+++ b/100_core/src/gplx/Bry_.java
@@ -164,6 +164,17 @@ public class Bry_ {
 			rv[i] = b;
 		return rv;
 	}
+	public static byte[] Repeat_bry(byte[] bry, int len) {
+		int bry_len = bry.length;
+		int rv_len = len * bry_len;
+		byte[] rv = new byte[rv_len];
+		for (int i = 0; i < len; i++) {
+			for (int j = 0; j < bry_len; j++) {
+				rv[(i * bry_len) + j] = bry[j];
+			}
+		}
+		return rv;
+	}
 	public static byte[] Add(byte[] src, byte b) {
 		int src_len = src.length;
 		byte[] rv = new byte[src_len + 1];
--- a/100_core/src/gplx/Bry__tst.java
+++ b/100_core/src/gplx/Bry__tst.java
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 package gplx;
-import org.junit.*; import gplx.core.primitives.*; import gplx.core.brys.*;
+import org.junit.*; import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.tests.*;
 public class Bry__tst {
 	private final    Bry__fxt fxt = new Bry__fxt();
 	@Test  public void new_ascii_() {
@@ -276,6 +276,9 @@ public class Bry__tst {
 		fxt.Test__new_u8_nl_apos(String_.Ary("a", "b"), "a\nb");
 		fxt.Test__new_u8_nl_apos(String_.Ary("a", "b'c", "d"), "a\nb\"c\nd");
 	}
+	@Test   public void Repeat_bry() {
+		fxt.Test__repeat_bry("abc"  , 3, "abcabcabc");
+	}
 }
 class Bry__fxt {
 	public void Test_trim_end(String raw, byte trim, String expd) {
@@ -292,4 +295,7 @@ class Bry__fxt {
 	public void Test__new_u8_nl_apos(String[] ary, String expd) {
 		Tfds.Eq_str_lines(expd, String_.new_u8(Bry_.New_u8_nl_apos(ary)));
 	}
+	public void Test__repeat_bry(String s, int count, String expd) {
+		Gftest.Eq__str(expd, Bry_.Repeat_bry(Bry_.new_u8(s), count));
+	}
 }
--- a/100_core/src/gplx/Bry_find_.java
+++ b/100_core/src/gplx/Bry_find_.java
@@ -158,6 +158,19 @@ public class Bry_find_ {
 		}
 		return Bry_find_.Not_found;
 	}
+	public static int Find_bwd__while_space_or_tab(byte[] src, int cur, int end) { // get pos of 1st char that is not \t or \s 
+		if (cur >= src.length) return Bry_find_.Not_found;
+		for (int i = cur; i >= end; i--) {
+			byte b = src[i];
+			switch (b) {
+				case Byte_ascii.Space: case Byte_ascii.Tab:
+					break;
+				default:
+					return i;
+			}
+		}
+		return Bry_find_.Not_found;
+	}
 	public static int Find_bwd_non_ws_or_end(byte[] src, int cur, int end) {
 		if (cur >= src.length) return Bry_find_.Not_found;
 		for (int i = cur; i >= end; i--) {
--- a/400_xowa/src/gplx/xowa/Xoa_app_.java
+++ b/400_xowa/src/gplx/xowa/Xoa_app_.java
@@ -32,8 +32,8 @@ public class Xoa_app_ {
 		}
 	}
 	public static final String		Name			= "xowa";
-	public static final int		Version_id		= 513;
-	public static final String		Version			= "4.0.1.1701";	// RELEASE:2017-01-03 20:30
+	public static final int		Version_id		= 512;
+	public static final String		Version			= "4.1.0.1701";	// RELEASE:2017-01-03 20:30
 	public static String	Build_date		= "2012-12-30 00:00:00";
 	public static String	Build_date_fmt	= "yyyy-MM-dd HH:mm:ss";
 	public static String	Op_sys_str;
--- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java
+++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java
@@ -0,0 +1,155 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+class Xomw_prepro_stack {
+	public List_adp stack = List_adp_.New();
+	public Xomw_prepro_piece top;
+	private Bry_bfr accum = Bry_bfr_.New(), root_accum = Bry_bfr_.New();
+	private final    Xomw_prepro_flags flags = new Xomw_prepro_flags();
+
+	public int Count() {return stack.Len();}
+	public Bry_bfr Get_accum() {return accum;}
+	public Bry_bfr Get_root_accum() {return root_accum;}
+
+	public Xomw_prepro_part Get_current_part() {
+		if (top == null) {
+			return null;
+		} else {
+			return top.Get_current_part();
+		}
+	}
+
+	public void Push(Xomw_prepro_piece item) {
+		stack.Add(item);
+		this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);			
+		accum.Clear().Add(top.Get_accum());
+	}
+
+	public Xomw_prepro_piece Pop() {
+		int len = stack.Count();
+		if (len == 0) {
+			throw Err_.new_wo_type("Xomw_prepro_stack: no elements remaining");
+		}
+
+		Xomw_prepro_piece rv = (Xomw_prepro_piece)stack.Get_at(len - 1);
+		stack.Del_at(len - 1);
+
+		if (len > 0) {
+			this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);			
+			accum.Clear().Add(top.Get_accum());
+		} else {
+			this.top = null;
+			this.accum = root_accum;
+		}
+		return rv;
+	}
+
+	public void Add_part(byte[] bry) {
+		top.Add_part(bry);
+		accum.Clear().Add(top.Get_accum());
+	}
+
+	public Xomw_prepro_flags Get_flags() {
+		if (stack.Count() == 0) {
+			flags.Find_eq = false;
+			flags.Find_pipe = false;
+			flags.In_heading = false;
+			return flags;
+		}
+		else {
+			top.Set_flags(flags);
+			return flags;
+		}
+	}
+}
+class Xomw_prepro_flags {
+	public boolean Find_pipe;
+	public boolean Find_eq;
+	public boolean In_heading;
+}
+class Xomw_prepro_piece {
+	public final    byte[] open;					// Opening character (\n for heading)
+	public final    byte[] close;                   // Matching closing char;
+	public int count;                               // Number of opening characters found (number of "=" for heading)
+	public final    boolean line_start;                // True if the open char appeared at the start of the input line; Not set for headings.
+	public final    int start_pos;
+	public List_adp parts = List_adp_.New();
+	public Xomw_prepro_piece(byte[] open, byte[] close, int count, int start_pos, boolean line_start) {
+		this.open = open;
+		this.close = close;
+		this.count = count;
+		this.start_pos = start_pos;
+		this.line_start = line_start;
+	}
+	public Xomw_prepro_part Get_current_part() {
+		return (Xomw_prepro_part)parts.Get_at(parts.Len() - 1);
+	}
+	public byte[] Get_accum() {
+		return Get_current_part().bry;
+	}
+	public void Add_part(byte[] bry) {
+		parts.Add(new Xomw_prepro_part(bry));
+	}
+	public static final    byte[] Brack_bgn_bry = Bry_.new_a7("[");
+	public void Set_flags(Xomw_prepro_flags flags) {
+		int parts_len = parts.Len();
+		boolean open_is_nl = Bry_.Eq(open, Byte_ascii.Nl_bry);
+		boolean find_pipe = !open_is_nl && Bry_.Eq(open, Brack_bgn_bry);
+		flags.Find_pipe = find_pipe;
+		flags.Find_eq = find_pipe && parts_len > 1 && ((Xomw_prepro_part)parts.Get_at(parts_len - 1)).Eqpos != -1;
+		flags.In_heading = open_is_nl;
+	}
+	// Get the output String that would result if the close is not found.
+	public byte[] Break_syntax(Bry_bfr tmp_bfr, int opening_count) {
+		byte[] rv = Bry_.Empty;
+		if (Bry_.Eq(open, Byte_ascii.Nl_bry)) {
+			rv = ((Xomw_prepro_part)parts.Get_at(0)).bry;
+		}
+		else {
+			if (opening_count == -1) {
+				opening_count = count;
+			}
+			rv = Bry_.Repeat_bry(open, opening_count);
+
+			// concat parts with "|"
+			boolean first = true;
+			int len = parts.Len();
+			for (int i = 0; i < len; i++) {
+				Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(i);
+				if (first) {
+					first = false;
+				}
+				else {
+					tmp_bfr.Add_byte_pipe();
+				}
+				tmp_bfr.Add(part.bry);
+			}
+			rv = tmp_bfr.To_bry_and_clear();
+		}
+		return rv;
+	}
+}
+class Xomw_prepro_part {
+	public Xomw_prepro_part(byte[] bry) {
+		this.bry = bry;
+	}
+	public final    byte[] bry;
+	public int Eqpos = -1;
+	public int comment_end = -1;
+	public int visual_end = -1;
+}
--- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java
+++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java
@@ -0,0 +1,715 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+import gplx.core.btries.*;
+public class Xomw_prepro_wkr {
+	private static final    Xomw_prepro_rule 
+	  rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}")  , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ})
+	, rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]")  , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
+	, rule_langv = new Xomw_prepro_rule(Bry_.new_a7("-{"), Bry_.new_a7("}-"), 1, 1, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
+	;
+	private static final    byte[] 
+	  Bry__only_include_bgn = Bry_.new_a7("<onlyinclude>")
+	, Bry__only_include_end = Bry_.new_a7("</onlyinclude>")
+	, Bry__comment_bgn = Bry_.new_a7("<!--")
+	, Bry__comment_end = Bry_.new_a7("-->")
+	, Bry__escaped_lt = Bry_.new_a7("&lt;")
+	, Bry__includeonly = Bry_.new_a7("includeonly")
+	, Bry__noinclude = Bry_.new_a7("noinclude")
+	, Bry__onlyinclude = Bry_.new_a7("onlyinclude")
+	;
+	private static final    int Len__only_include_end = Bry__only_include_end.length;
+	private static final int 
+	  Found__line_bgn = 0
+	, Found__line_end = 1
+	, Found__pipe = 2
+	, Found__equals = 3
+	, Found__angle = 4
+	, Found__close = 5
+	, Found__open = 6
+	;
+
+	private Bry_bfr accum = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New();
+	private List_adp comments_list = List_adp_.New();
+	private byte[] htmlspecialchars(byte[] bry) {
+		return bry;
+	}
+	private Xomw_prepro_rule Get_rule(byte[] bry) {
+		if		(Bry_.Eq(bry, rule_curly.bgn))   return rule_curly;
+		else if	(Bry_.Eq(bry, rule_brack.bgn))   return rule_brack;
+		else if	(Bry_.Eq(bry, rule_langv.bgn))   return rule_langv;
+		else                                     throw Err_.new_unhandled(bry);
+	}
+	public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) {
+		Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7();	// parser->getStripList();
+
+		Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs();
+		xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__includeonly);
+		xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__noinclude);
+		xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__onlyinclude);
+
+		boolean enable_only_include = false;
+
+		Hash_adp_bry ignored_tags = Hash_adp_bry.cs();
+		Hash_adp_bry ignored_elements = Hash_adp_bry.cs();
+		if (for_inclusion) {
+			ignored_tags.Add_many_str("includeonly", "/includeonly");
+			ignored_elements.Add_many_str("noinclude");
+			xmlish_elems.Add_many_str("noinclude");
+			if (	Bry_.Has(src, Bry__only_include_bgn)
+				&&	Bry_.Has(src, Bry__only_include_end)) {
+				enable_only_include = true;
+			}
+		}
+		else {
+			ignored_tags.Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude");
+			ignored_elements.Add_many_str("includeonly");
+			xmlish_elems.Add_many_str("includeonly");
+		}
+
+		// $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
+		// Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
+		// $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
+
+		Xomw_prepro_stack stack = new Xomw_prepro_stack();
+//			$searchBase = "[{<\n"; # }
+
+		// Input pointer, starts out pointing to a pseudo-newline before the start
+		int i = 0;
+
+		// Current accumulator
+		accum = stack.Get_accum();
+		accum.Add_str_a7("<root>");
+
+		// True to find equals signs in arguments
+		boolean find_equals = false;
+
+		// True to take notice of pipe characters
+		boolean find_pipe = false;
+		int heading_index = 1;
+
+		// True if $i is inside a possible heading
+		boolean in_heading = false;
+
+		// True if there are no more greater-than (>) signs right of $i
+		boolean no_more_gt = false;
+
+		// Map of tag name => true if there are no more closing tags of given type right of $i
+		Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs();
+
+		// True to ignore all input up to the next <onlyinclude>
+		boolean find_only_include = enable_only_include;
+
+		// Do a line-start run without outputting an LF character
+		boolean fake_line_start = true;
+
+		int src_len = src.length;
+		int found = -1;
+		byte[] cur_closing = Bry_.Empty;
+
+		Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7();
+		Btrie_slim_mgr elements_end_trie = Btrie_slim_mgr.ci_a7();
+		Btrie_rv elements_trv = new Btrie_rv();
+
+		Btrie_slim_mgr cur_char_trie = Btrie_slim_mgr.ci_a7();
+		byte[] inner = null;
+
+		while (true) {
+			if (find_only_include) {
+				// Ignore all input up to the next <onlyinclude>
+				int start_pos = Bry_find_.Find_fwd(src, Bry__only_include_bgn, i, src_len);
+				if (start_pos == Bry_find_.Not_found) {
+					// Ignored section runs to the end
+					accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, i))).Add_str_a7("</ignore>");
+					break;
+				}
+				int tag_end_pos = start_pos + Bry__only_include_bgn.length; // past-the-end
+				accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, i, tag_end_pos))).Add_str_a7("</ignore>");
+				i = tag_end_pos;
+				find_only_include = false;
+			}
+
+			byte[] cur_char = Bry_.Empty;
+			if (fake_line_start) {
+				found = Found__line_bgn;
+				cur_char = Bry_.Empty;
+			}
+
+			// Find next opening brace, closing brace or pipe
+//				$search = $searchBase;
+			if (stack.top == null) {
+				cur_closing = Bry_.Empty;
+			}
+			else {
+				cur_closing = stack.top.close;
+				// $search .= $currentClosing;
+			}
+			if (find_pipe) {
+				// $search .= '|';
+			}
+			if (find_equals) {
+				// First equals will be for the template
+				// $search .= '=';
+			}
+			Xomw_prepro_rule rule = null;
+
+			// Output literal section, advance input counter
+			int literal_len = 0; // strcspn(src, $search, i);
+			if (literal_len > 0) {
+				accum.Add(htmlspecialchars(Bry_.Mid(src, i, i + literal_len)));
+				i += literal_len;
+			}
+
+			if (i >= src_len) {
+				if (Bry_.Eq(cur_closing, Byte_ascii.Nl_bry)) {
+					// Do a past-the-end run to finish off the heading
+					cur_char = Byte_ascii.Nl_bry;
+					found = Found__line_end;
+				}
+				else {
+					// All done
+					break;
+				}
+			}
+			else {
+				Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(elements_trv, src, i, src_len);
+				cur_char = cur_char_itm.sequence;
+				switch (cur_char_itm.type) {
+					case Byte_ascii.Pipe:         found = Found__pipe; break;
+					case Byte_ascii.Eq:           found = Found__equals; break;
+					case Byte_ascii.Angle_bgn:    found = Found__angle; break;
+					case Byte_ascii.Nl:           found = in_heading ? Found__line_end : Found__line_bgn; break;
+					case Byte_ascii.Curly_bgn: {
+						found = Found__open;
+						rule = rule_curly;
+						break;
+					}
+					case Byte_ascii.Brack_bgn: {
+						found = Found__open;
+						rule = rule_brack;
+						break;
+					}
+					default:
+						if (cur_char_itm.type == Byte_ascii.Dash) {
+							int nxt_pos = i + 1;
+							if (nxt_pos < src_len) {
+								if (src[i + 1] == Byte_ascii.Curly_bgn) {
+									found = Found__open;
+									rule = rule_langv;
+									continue;
+								}
+							}
+						}
+
+						if (Bry_.Eq(cur_char, cur_closing)) {
+							found = Found__close;
+						}
+						else {
+							i++;
+							continue;
+						}
+						break;
+				}
+			}
+
+			if (found == Found__angle) {
+				// Handle </onlyinclude>
+				if (	enable_only_include
+					&&	Bry_.Eq(src, i, i + Len__only_include_end, Bry__only_include_end)) {
+					find_only_include = true;
+					continue;
+				}
+
+				// Determine element name; $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(span|div)(?:\s|\/>|>)|(!--)
+				Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(elements_trv, src, i + 1, src_len);
+				if (element == null) {// Element name missing or not listed
+					accum.Add(Bry__escaped_lt);
+					i++;
+					continue;
+				}
+
+				// Handle comments
+				if (element.type == Xomw_prepro_elem.Type__comment) {
+					// To avoid leaving blank lines,
+					// when a sequence of space-separated comments is both preceded and followed by a newline (ignoring spaces), 
+					// then trim leading and trailing spaces and the trailing newline.
+
+					// Find the end
+					int comment_end_pos = Bry_find_.Find_fwd(src, Bry__comment_end, i + 4, src_len);
+					if (comment_end_pos == Bry_find_.Not_found) {
+						// Unclosed comment in input, runs to end
+						accum.Add_str_a7("<comment>").Add(htmlspecialchars(Bry_.Mid(src, i))).Add_str_a7("</comment>");
+						i = src_len;
+					}
+					else {
+						// Search backwards for leading whitespace
+						int ws_bgn = i > 0 ? i - Bry_find_.Find_bwd__while_space_or_tab(src, i, 0) : 0;
+
+						// Search forwards for trailing whitespace
+						// $wsEnd will be the position of the last space (or the '>' if there's none)
+						int ws_end = comment_end_pos + 2 + Bry_find_.Find_fwd_while_space_or_tab(src, comment_end_pos + 3, src_len);
+
+						// Keep looking forward as long as we're finding more comments.
+						comments_list.Clear();
+						comments_list.Add(new int[] {ws_bgn, ws_end});
+						while (Bry_.Eq(src, ws_end + 1, ws_end + 5, Bry__comment_bgn)) {
+							int cur_char_pos = Bry_find_.Find_fwd(src, Bry__comment_end, ws_end + 4);
+							if (cur_char_pos == Bry_find_.Not_found) {
+								break;
+							}
+							cur_char_pos = cur_char_pos + 2 + Bry_find_.Find_fwd_while_space_or_tab(src, cur_char_pos + 3, src_len);
+							comments_list.Add(new int[] {ws_end + 1, cur_char_pos});
+							ws_end = cur_char_pos;
+						}
+
+						// Eat the line if possible
+						// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at the overall start.
+						// That's not how Sanitizer::removeHTMLcomments() did it, but it's a possible beneficial b/c break.
+						int comment_bgn_pos = -1;
+						if (	ws_bgn > 0 
+							&&	Bry_.Eq(src, ws_bgn - 1, ws_bgn    , Byte_ascii.Nl_bry)
+							&&	Bry_.Eq(src, ws_end + 1, ws_end + 2, Byte_ascii.Nl_bry)
+						) {
+							// Remove leading whitespace from the end of the accumulator
+							// Sanity check first though
+							int ws_len = i - ws_bgn;
+							if (   ws_len > 0
+								&& Bry_find_.Find_fwd_while_space_or_tab(accum.To_bry(), -ws_len, src_len) == ws_len
+							) {
+								accum.Clear().Add(Bry_.Mid(accum.To_bry(), 0, -ws_len));
+							}
+
+							// Dump all but the last comment to the accumulator
+							int comments_list_len = comments_list.Len();
+							for (int j = 0; j < comments_list_len; j++) {
+								int[] com = (int[])comments_list.Get_at(j);
+								comment_bgn_pos = com[0];
+								comment_end_pos = com[1] + 1;
+								if (j == comments_list_len - 1) {
+									break;
+								}
+								inner = Bry_.Mid(src, comment_bgn_pos, comment_end_pos);
+								accum.Add_str_a7("<comment>").Add(htmlspecialchars(inner)).Add_str_a7("</comment>");
+							}
+
+							// Do a line-start run next time to look for headings after the comment
+							fake_line_start = true;
+						}
+						else {
+							// No line to eat, just take the comment itself
+							comment_bgn_pos = i;
+							comment_end_pos += 2;
+						}
+
+						if (stack.top != null) {
+							Xomw_prepro_part part = stack.top.Get_current_part();
+							if (!(part.comment_end == ws_end - 1)) {
+								part.visual_end = ws_bgn;
+							}
+							// Else comments abutting, no change in visual end
+							part.comment_end = comment_end_pos;
+						}
+						i = comment_end_pos + 1;
+						inner = Bry_.Mid(src, comment_bgn_pos, comment_end_pos + 1);
+						accum.Add_str_a7("<comment>").Add(htmlspecialchars(inner)).Add_str_a7("</comment>");
+						continue;
+					}
+				}
+
+				byte[] name = element.name;
+				int atr_bgn = i + name.length + 1;
+
+				// Find end of tag
+				int tag_end_pos = no_more_gt ? Bry_find_.Not_found : Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, atr_bgn);
+				if (tag_end_pos == Bry_find_.Not_found) {
+					// Infinite backtrack; Disable tag search to prevent worst-case O(N^2) performance
+					no_more_gt = true;
+					accum.Add(Bry__escaped_lt);
+					i++;
+					continue;
+				}
+
+				if (ignored_tags.Has(name)) {
+					accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, i, tag_end_pos - i + 1))).Add_str_a7("</ignore>");
+					i = tag_end_pos + 1;
+					continue;
+				}
+
+				int tag_bgn_pos = i;
+				int atr_end = -1;
+				byte[] close = null;
+				if (src[tag_end_pos - 1] == Byte_ascii.Slash) {
+					atr_end = tag_end_pos - 1;
+					inner = null;
+					i = tag_end_pos + 1;
+					close = null;
+				}
+				else {
+					atr_end = tag_end_pos;
+					// Find closing tag
+					// FIXME: need to search forward
+					Xomw_prepro_elem elem_end = (Xomw_prepro_elem)elements_end_trie.Match_at(elements_trv, src, tag_end_pos + 1, src_len); // preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
+					int elem_end_lhs = elements_trv.Pos();
+					int elem_end_rhs = elements_trv.Pos();
+					// check for "\s*>"
+					if (elem_end != null) {
+						elem_end_rhs = Bry_find_.Find_fwd_while(src, elem_end_rhs, src_len, Byte_ascii.Space);
+						if (elem_end_rhs == src_len) {
+							elem_end = null;
+						}
+						else {
+							if (src[elem_end_rhs] == Byte_ascii.Gt) 
+								elem_end_rhs = elem_end_rhs + 1;
+							else
+								elem_end = null;
+						}
+					}
+					if (	!no_more_closing_tag.Has(name)
+						&&	elem_end != null) {
+						inner = Bry_.Mid(src, tag_end_pos + 1, elem_end_lhs);
+						i = elem_end_rhs;
+						tmp_bfr.Add_str_a7("<close>").Add(htmlspecialchars(Bry_.Mid(src, elem_end_lhs, elem_end_rhs))).Add_str_a7("</close>");
+						close = tmp_bfr.To_bry_and_clear();
+					} 
+					else {
+						// No end tag
+						if (xmlish_allow_missing_end_tag.Has(name)) {
+							// Let it run out to the end of the src.
+							inner = Bry_.Mid(src, tag_end_pos + 1);
+							i = src_len;
+							close = Bry_.Empty;
+						}
+						else {
+							// Don't match the tag, treat opening tag as literal and resume parsing.
+							i = tag_end_pos + 1;
+							accum.Add(htmlspecialchars(Bry_.Mid(src, tag_bgn_pos, tag_end_pos + 1)));
+							// Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
+							no_more_closing_tag.Add_if_dupe_use_nth(name, name);
+							continue;
+						}
+					}
+				}
+
+				// <includeonly> and <noinclude> just become <ignore> tags
+				if (ignored_elements.Has(name)) {
+					accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, tag_bgn_pos, i))).Add_str_a7("</ignore>");
+					continue;
+				}
+
+				accum.Add_str_a7("<ext>");
+				byte[] atr_bry = atr_end <= atr_bgn ? Bry_.Empty : Bry_.Mid(src, atr_bgn, atr_end);
+				accum.Add_str_a7("<name>").Add(name).Add_str_a7("</name>");
+				// Note that the attr element contains the whitespace between name and attribute,
+				// this is necessary for precise reconstruction during pre-save transform.
+				accum.Add_str_a7("<attr>").Add(htmlspecialchars(atr_bry)).Add_str_a7("</attr>");
+				if (inner != null) {
+					accum.Add_str_a7("<inner>").Add(htmlspecialchars(inner)).Add_str_a7("</inner>");
+				}
+				accum.Add(close).Add_str_a7("</ext>");
+			}
+			else if (found == Found__line_bgn) {
+				// Is this the start of a heading?; Line break belongs before the heading element in any case
+				if (fake_line_start) {
+					fake_line_start = false;
+				} else {
+					accum.Add(cur_char);
+					i++;
+				}
+
+				int eq_end = Bry_find_.Find_fwd_while(src, i, i + 6, Byte_ascii.Eq);	// strspn( $src, '=', $i, 6 );					
+				int count = i - eq_end;
+				if (count == 1 && find_equals) {
+					// DWIM: This looks kind of like a name/value separator.
+					// Let's let the equals handler have it and break the potential heading.
+					// This is heuristic, but AFAICT the methods for completely correct disambiguation are very complex.
+				}
+				else if (count > 0) {
+					Xomw_prepro_piece piece = new Xomw_prepro_piece(Byte_ascii.Nl_bry, Byte_ascii.Nl_bry, count, i, false);
+					piece.Add_part(Bry_.Repeat(Byte_ascii.Eq, count));
+					stack.Push(piece);
+					accum = stack.Get_accum();
+					Xomw_prepro_flags flags = stack.Get_flags();
+					find_pipe = flags.Find_pipe;
+					find_equals = flags.Find_eq;
+					in_heading = flags.In_heading;
+					i += count;
+				}
+			}
+			else if (found == Found__line_end) {
+				Xomw_prepro_piece piece = stack.top;
+				// A heading must be open, otherwise \n wouldn't have been in the search list
+				if (!Bry_.Eq(piece.open, Byte_ascii.Nl_bry)) throw Err_.new_wo_type("assertion:piece must start with \\n");
+				Xomw_prepro_part part = piece.Get_current_part();
+
+				// Search back through the input to see if it has a proper close.
+				// Do this using the reversed String since the other solutions (end anchor, etc.) are inefficient.
+				int search_bgn = Bry_find_.Find_bwd__while_space_or_tab(src, i, 0);
+				if (part.comment_end != -1 && search_bgn -1 == part.comment_end) {
+					// Comment found at line end; Search for equals signs before the comment
+					search_bgn = part.visual_end;
+					search_bgn -= Bry_find_.Find_bwd__while_space_or_tab(src, search_bgn, 0);
+				}
+				int count = piece.count;
+				int eq_len = Bry_find_.Find_bwd_while(src, search_bgn, 0, Byte_ascii.Eq);
+
+				byte[] element = Bry_.Empty;
+				if (eq_len > 0) {
+					if (search_bgn - eq_len == piece.start_pos) {
+						// This is just a single String of equals signs on its own line
+						// Replicate the doHeadings behavior /={count}(.+)={count}/
+						// First find out how many equals signs there really are (don't stop at 6)
+						count = eq_len;
+						if (count < 3) {
+							count = 0;
+						} else {
+							count = (count - 1) / 2;
+							if (count > 6) count = 6;
+						}
+					} 
+					else {
+						if (eq_len < count)
+							count = eq_len;
+					}
+					if (count > 0) {
+						// Normal match, output <h>
+						element = tmp_bfr.Add_str_a7("<h level=\"").Add_int_variable(count).Add_str_a7("\" i=\"").Add_int_variable(heading_index).Add_str_a7("\">").Add_bfr_and_preserve(accum).Add_str_a7("</h>").To_bry_and_clear();
+						heading_index++;
+					} else {
+						// Single equals sign on its own line, count=0
+						element = accum.To_bry();
+					}
+				}
+				else {
+					// No match, no <h>, just pass down the inner src
+					element = accum.To_bry();
+				}
+
+				// Unwind the stack
+				stack.Pop();
+				accum = stack.Get_accum();
+				
+				Xomw_prepro_flags flags = stack.Get_flags();
+				find_pipe = flags.Find_pipe;
+				find_equals = flags.Find_eq;
+				in_heading = flags.In_heading;
+
+				// Append the result to the enclosing accumulator
+				accum.Add(element);
+				// Note that we do NOT increment the input pointer. This is because the closing linebreak could be the opening linebreak of another heading.
+				// Infinite loops are avoided because the next iteration MUST hit the heading open case above, which unconditionally increments the input pointer.
+			}
+			else if (found == Found__open) {
+				// count opening brace characters
+				int count = Bry_find_.Find_fwd(src, cur_char, i, src_len);
+
+				// we need to add to stack only if opening brace count is enough for one of the rules
+				if (count >= rule.min) {
+					// Add it to the stack
+					Xomw_prepro_piece piece = new Xomw_prepro_piece(cur_char, rule.end, count, -1, i > 0 && src[i - 1] == Byte_ascii.Nl);
+					stack.Push(piece);
+					accum = stack.Get_accum();
+					Xomw_prepro_flags flags = stack.Get_flags();
+					find_pipe = flags.Find_pipe;
+					find_equals = flags.Find_eq;
+					in_heading = flags.In_heading;
+				}
+				else {
+					// Add literal brace(s)
+					accum.Add(htmlspecialchars(Bry_.Repeat_bry(cur_char, count)));
+				}
+				i += count;
+			}
+			else if (found == Found__close) {
+				Xomw_prepro_piece piece = stack.top;
+				// lets check if there are enough characters for closing brace
+				int count = Bry_find_.Find_fwd(src, cur_char, i, src_len);
+				int max_count = piece.count;
+				if (count > max_count) count = max_count;
+
+				// check for maximum matching characters (if there are 5 closing characters, we will probably need only 3 - depending on the rules)
+				rule = Get_rule(piece.open);
+				int matching_count = -1;
+				if (count > rule.max) {
+					// The specified maximum exists in the callback array, unless the caller has made an error
+					matching_count = rule.max;
+				}
+				else {
+					// Count is less than the maximum
+					// Skip any gaps in the callback array to find the true largest match
+					// Need to use array_key_exists not isset because the callback can be null
+					matching_count = count;
+					while (matching_count > 0 && !rule.Names_exist(matching_count)) {
+						matching_count--;
+					}
+				}
+
+				if (matching_count <= 0) {
+					// No matching element found in callback array
+					// Output a literal closing brace and continue
+					accum.Add(htmlspecialchars(Bry_.Repeat_bry(cur_char, count)));
+					i += count;
+					continue;
+				}
+				int name_type = rule.names[matching_count];
+				byte[] element = null;
+				if (name_type == Xomw_prepro_rule.Name__null) {
+					// No element, just literal text
+					piece.Break_syntax(tmp_bfr, matching_count);
+					element = tmp_bfr.Add(Bry_.Repeat_bry(rule.end, matching_count)).To_bry_and_clear();
+				}
+				else {
+					// Create XML element; Note: $parts is already XML, does not need to be encoded further
+					List_adp parts = piece.parts;
+					byte[] title = ((Xomw_prepro_part)parts.Get_at(0)).bry;
+					parts.Del_at(0);
+
+					// The invocation is at the start of the line if lineStart is set in the stack, and all opening brackets are used up.
+					byte[] attr = null;
+					if (max_count == matching_count && !piece.line_start) {
+						attr = Bry_.new_a7(" lineStart=\"1\"");
+					}
+					else {
+						attr = Bry_.Empty;
+					}
+
+					byte[] name_bry = Xomw_prepro_rule.Name(name_type);
+					tmp_bfr.Add_str_a7("<").Add(name_bry).Add(attr).Add_str_a7(">");
+					tmp_bfr.Add_str_a7("<title>").Add(title).Add_str_a7("</title>");
+
+					int arg_idx = 1;
+					int parts_len = parts.Len();
+					for (int j = 0; j < parts_len; j++) {
+						Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(j);
+						if (part.Eqpos != -1) {
+							byte[] arg_key = Bry_.Mid(part.bry, 0, part.Eqpos);
+							byte[] arg_val = Bry_.Mid(part.bry, part.Eqpos + 1);
+							tmp_bfr.Add_str_a7("<part><name>").Add(arg_key).Add_str_a7("</name>=<value>").Add(arg_val).Add_str_a7("</value></part>");
+						}
+						else {
+							tmp_bfr.Add_str_a7("<part><name index=\"").Add_int_variable(arg_idx).Add_str_a7("\" /><value>{").Add(part.bry).Add_str_a7("}</value></part>");
+							arg_idx++;
+						}
+					}
+					tmp_bfr.Add_str_a7("</").Add(name_bry).Add_str_a7(">");
+					element = tmp_bfr.To_bry_and_clear();
+				}
+
+				// Advance input pointer
+				i += matching_count;
+
+				// Unwind the stack
+				stack.Pop();
+				accum = stack.Get_accum();
+
+				// Re-add the old stack element if it still has unmatched opening characters remaining
+				if (matching_count < piece.count) {
+					piece.parts.Clear(); // piece.parts = [ new PPDPart ];
+					piece.count -= matching_count;
+
+					// do we still qualify for any callback with remaining count?
+					int min = Get_rule(piece.open).min;
+					if (piece.count >= min) {
+						stack.Push(piece);
+						accum = stack.Get_accum();
+					}
+					else {
+						accum.Add(Bry_.Repeat_bry(piece.open, piece.count));
+					}
+				}
+
+				Xomw_prepro_flags flags = stack.Get_flags();
+				find_pipe = flags.Find_pipe;
+				find_equals = flags.Find_eq;
+				in_heading = flags.In_heading;
+
+				// Add XML element to the enclosing accumulator
+				accum.Add(element);
+			}
+			else if (found == Found__pipe) {
+				find_equals = true; // shortcut for getFlags()
+				stack.Add_part(Bry_.Empty);
+				accum = stack.Get_accum();
+				i++;
+			}
+			else if (found == Found__equals) {
+				find_equals = false; // shortcut for getFlags()
+				stack.Get_current_part().Eqpos = accum.Len();
+				accum.Add_byte(Byte_ascii.Eq);
+				i++;
+			}
+		}
+
+		// Output any remaining unclosed brackets
+		Bry_bfr root_accum = stack.Get_root_accum();
+		int stack_len = stack.stack.Len();
+		for (int j = 0; j < stack_len; j++) {
+			Xomw_prepro_piece piece = (Xomw_prepro_piece)stack.stack.Get_at(j);
+			root_accum.Add(piece.Break_syntax(tmp_bfr, -1));
+		}
+		root_accum.Add_str_a7("</root>");
+		return root_accum.To_bry_and_clear();
+	}
+}
+class Xomw_prepro_rule {
+	public Xomw_prepro_rule(byte[] bgn, byte[] end, int min, int max, int[] names) {
+		this.bgn = bgn;
+		this.end = end;
+		this.min = min;
+		this.max = max;
+		this.names = names;
+	}
+	public final    byte[] bgn;
+	public final    byte[] end;
+	public final    int min;
+	public final    int max;
+	public final    int[] names;
+	public boolean Names_exist(int idx) {
+		return idx < names.length && names[idx] != Name__invalid;
+	}
+	private static final    byte[] Name__tmpl_bry = Bry_.new_a7("template"), Name__targ_bry = Bry_.new_a7("tplarg");
+	public static final int Name__invalid = -1, Name__null = 0, Name__tmpl = 1, Name__targ = 2;
+	public static byte[] Name(int type) {
+		switch (type) {
+			case Name__tmpl:    return Name__tmpl_bry;
+			case Name__targ:    return Name__targ_bry;
+			default:
+			case Name__invalid: return null;
+			case Name__null:    return null;
+		}
+	}
+}
+class Xomw_prepro_elem {
+	private static final    byte[] Bry__tag_end = Bry_.new_a7("</");
+	public Xomw_prepro_elem(int type, byte[] name) {
+		this.type = type;
+		this.name = name;
+		this.tag_end_lhs = Bry_.Add(Bry__tag_end, name);
+	}
+	public final    int type;
+	public final    byte[] name;
+	public final    byte[] tag_end_lhs;
+	public static final int Type__comment = 0;
+}
+class Xomw_prepro_curchar_itm {
+	public Xomw_prepro_curchar_itm(int type, byte[] sequence) {
+		this.type = type;
+		this.sequence = sequence;
+	}
+	public int type;
+	public byte[] sequence;
+}