Mw_parse.Prepro: Support template pipe; Cover more tests

2025-06-13 12:54:14 +00:00 · 2017-01-11 16:48:21 -05:00 · 2017-01-11 16:48:21 -05:00 · 46274e9b6a
commit 46274e9b6a
parent 7fae7b832b
3 changed files with 71 additions and 26 deletions
--- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java
+++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java
@ -120,7 +120,7 @@ class Xomw_prepro_piece {
 	public void Set_flags(Xomw_prepro_flags flags) {
 		int parts_len = parts.Len();
 		boolean open_is_nl = Bry_.Eq(open, Byte_ascii.Nl_bry);
-		boolean find_pipe = !open_is_nl && Bry_.Eq(open, Brack_bgn_bry);
+		boolean find_pipe = !open_is_nl && !Bry_.Eq(open, Brack_bgn_bry);
 		flags.Find_pipe = find_pipe;
 		flags.Find_eq = find_pipe && parts_len > 1 && ((Xomw_prepro_part)parts.Get_at(parts_len - 1)).Eqpos != -1;
 		flags.In_heading = open_is_nl;
--- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java
+++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java
@ -153,27 +153,27 @@ public class Xomw_prepro_wkr {	// THREAD.UNSAFE: caching for repeated calls
 			}
 			else {
 				// Find next opening brace, closing brace or pipe		
-				// PORTED: $search = $searchBase;
+				// RELIC.REGEX: $search = $searchBase;
 				if (stack.top == null) {
 					cur_closing = Bry_.Empty;
 				}
 				else {
 					cur_closing = stack.top.close;
-					// RELIC: $search .= $currentClosing;
+					// RELIC.REGEX: $search .= $currentClosing;
 				}
 				if (find_pipe) {
-					// RELIC: $search .= '|';
+					// RELIC.REGEX: $search .= '|';
 				}
 				if (find_equals) {
 					// First equals will be for the template
-					// RELIC: $search .= '=';
+					// RELIC.REGEX: $search .= '=';
 				}

 				// Output literal section, advance input counter
 				// PORTED: "$literalLength = strcspn(src, $search, i)"; NOTE: no trie b/c of frequent changes to $search
 				int literal_len = 0; 
 				boolean loop_stop = false;
-				// read String until search_char is found
+				// loop chars until search_char is found
 				for (int j = i; j < src_len; j++) {
 					byte b = src[j];
 					switch (b) {                // handle '$searchBase = "[{<\n";'
@ -226,7 +226,7 @@ public class Xomw_prepro_wkr {	// THREAD.UNSAFE: caching for repeated calls
 					}
 				}
 				else {
-					// PORTED: corresponding block of MW code; note complexity to handle 2 char byte[]
+					// PORTED: "if ( $curChar == '|' ) {", etc..
 					Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len);
 					if (cur_char_itm != null) {
 						cur_char = cur_char_itm.bry;
@ -239,7 +239,7 @@ public class Xomw_prepro_wkr {	// THREAD.UNSAFE: caching for repeated calls
 							// PORTED: "elseif ( $curChar == $currentClosing )"
 							case Byte_ascii.Curly_end:    found = Found__close; break;
 							case Byte_ascii.Brack_end:    found = Found__close; break;
-							case Byte_ascii.Bang:         found = Found__close; break;
+							case Byte_ascii.At:           found = Found__close; break;	// NOTE: At is type for "}-"

 							// PORTED: "elseif ( isset( $this->rules[$curChar] ) )"
 							case Byte_ascii.Curly_bgn:   {found = Found__open; rule = rule_curly; break;}
@ -263,7 +263,7 @@ public class Xomw_prepro_wkr {	// THREAD.UNSAFE: caching for repeated calls
 				}

 				// Determine element name
-				// PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(span|div)(?:\s|\/>|>)|(!--)
+				// PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(pre|ref)(?:\s|\/>|>)|(!--)
 				Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(trv, src, i + 1, src_len);
 				if (element == null) {
 					// Element name missing or not listed
@ -295,8 +295,8 @@ public class Xomw_prepro_wkr {	// THREAD.UNSAFE: caching for repeated calls
 						// PORTED: $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 );
 						int ws_end = end_pos + 2;	// set pos to ">"
 						int ws_end2 = Bry_find_.Find_fwd_while_space_or_tab(src, end_pos + 3, src_len);
-						if (ws_end2 != ws_end + 1)	// if ws after ">"
-							ws_end = ws_end2 - 1;	// set to "last space"
+						if (ws_end2 != ws_end + 1)	// if ws is after ">"...
+							ws_end = ws_end2 - 1;	// ...then set to "last space" as per comment above

 						// Keep looking forward as long as we're finding more
 						// comments.
@ -370,7 +370,7 @@ public class Xomw_prepro_wkr {	// THREAD.UNSAFE: caching for repeated calls
 				}

 				byte[] name = element.name;
-				// RELIC:$lowerName = strtolower( $name );
+				// RELIC.BTRIE_CI: $lowerName = strtolower( $name );
 				int atr_bgn = i + name.length + 1;

 				// Find end of tag
@ -496,7 +496,7 @@ public class Xomw_prepro_wkr {	// THREAD.UNSAFE: caching for repeated calls

 				int eq_end = Bry_find_.Find_fwd_while(src, i, i + 6, Byte_ascii.Eq);	// PORTED:strspn( $src, '=', $i, 6 );					
 				int count = eq_end - i;
-				if (count == 1 && find_equals) {
+				if (count == 1 && find_equals) {	// EX: "{{a|\n=b=\n"
 					// DWIM: This looks kind of like a name/value separator.
 					// Let's let the equals handler have it and break the
 					// potential heading. This is heuristic, but AFAICT the
@ -655,7 +655,7 @@ public class Xomw_prepro_wkr {	// THREAD.UNSAFE: caching for repeated calls
 					// The invocation is at the start of the line if lineStart is set in
 					// the stack, and all opening brackets are used up.
 					byte[] attr = null;
-					if (max_count == matching_count && !piece.line_start) {
+					if (max_count == matching_count && piece.line_start) {	// RELIC:!empty( $piece->lineStart )
 						attr = Bry_.new_a7(" lineStart=\"1\"");
 					}
 					else {
@ -786,7 +786,7 @@ public class Xomw_prepro_wkr {	// THREAD.UNSAFE: caching for repeated calls

 		// handle "}-" separately
 		byte[] langv_end = Bry_.new_a7("}-");
-		rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.Bang));
+		rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.At));
 		return rv;
 	}
 }
--- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java
+++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java
@ -26,24 +26,27 @@ public class Xomw_prepro_wkr__tst {
 		fxt.Test__parse("a[[b]]c", "<root>a[[b]]c</root>");
 	}
 	@Test  public void Template() {
-		fxt.Test__parse("a{{b}}c", "<root>a<template lineStart=\"1\"><title>b</title></template>c</root>");
+		fxt.Test__parse("a{{b}}c", "<root>a<template><title>b</title></template>c</root>");
+	}
+	@Test  public void Template__args() {
+		fxt.Test__parse("a{{b|c|d}}e", "<root>a<template><title>b</title><part><name index=\"1\" /><value>c</value></part><part><name index=\"2\" /><value>d</value></part></template>e</root>");
 	}
 	@Test  public void Tplarg() {
-		fxt.Test__parse("a{{{b}}}c", "<root>a<tplarg lineStart=\"1\"><title>b</title></tplarg>c</root>");
+		fxt.Test__parse("a{{{b}}}c", "<root>a<tplarg><title>b</title></tplarg>c</root>");
 	}
 	@Test  public void Comment() {
 		fxt.Test__parse("a<!--b-->c", "<root>a<comment>&lt;!--b--&gt;</comment>c</root>");
 	}
-	@Test  public void Comment__dangling() {
+	@Test  public void Comment__dangling() {// COVERS: "Unclosed comment in input, runs to end"
 		fxt.Test__parse("a<!--b", "<root>a<comment>&lt;!--b</comment></root>");
 	}
-	@Test  public void Comment__ws() {		// NOTE: space is outside comment
-		fxt.Test__parse("a <!--b--> c", "<root>a <comment>&lt;!--b--&gt;</comment> c</root>");
+	@Test  public void Comment__ws() {		// COVERS: "Search backwards for leading whitespace"
+		fxt.Test__parse("a <!--b--> c", "<root>a <comment>&lt;!--b--&gt;</comment> c</root>");	// NOTE: space is outside comment
 	}
-	@Test  public void Comment__many__ws() {		// NOTE: space is outside comment
-		fxt.Test__parse("a <!--1--> <!--2--> z", "<root>a <comment>&lt;!--1--&gt;</comment> <comment>&lt;!--2--&gt;</comment> z</root>");
+	@Test  public void Comment__many__ws() {// COVERS: "Dump all but the last comment to the accumulator"
+		fxt.Test__parse("a <!--1--> <!--2--> z", "<root>a <comment>&lt;!--1--&gt;</comment> <comment>&lt;!--2--&gt;</comment> z</root>"); // NOTE: space is outside comment; 
 	}
-	@Test  public void Comment__nl__ws() {	// NOTE: space is inside comment if flanked by nl
+	@Test  public void Comment__nl__ws() {	// COVERS: "Eat the line if possible"
 		fxt.Test__parse(String_.Concat_lines_nl_skip_last
 		( "a"
 		, " <!--1--> "
@ -51,14 +54,40 @@ public class Xomw_prepro_wkr__tst {
 		, "z"
 		), String_.Concat_lines_nl_skip_last
 		( "<root>a"
-		, "<comment> &lt;!--1--&gt; "
+		, "<comment> &lt;!--1--&gt; "  // NOTE: space is inside </comment> if flanked by nl; 
 		, "</comment><comment> &lt;!--2--&gt; "
 		, "</comment>z</root>"
 		));
 	}
-	@Test  public void Ext__pre() {
+	@Test  public void Ext() {					// COVERS.ALSO: "Note that the attr element contains the whitespace between name and attribute," 
 		fxt.Test__parse("a<pre id=\"1\">b</pre>c", "<root>a<ext><name>pre</name><attr> id=&quot;1&quot;</attr><inner>b</inner><close>&lt;/pre&gt;</close></ext>c</root>");
 	}
+	@Test  public void Ext__inline() {			// COVERS: "if ( $text[$tagEndPos - 1] == '/' ) {"
+		fxt.Test__parse("a<pre/>b"   , "<root>a<ext><name>pre</name><attr></attr></ext>b</root>");
+		fxt.Test__parse("a<pre />b"  , "<root>a<ext><name>pre</name><attr> </attr></ext>b</root>");
+	}
+	@Test  public void Ext__end__pass__space() {// COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
+		fxt.Test__parse("a<pre>b</pre >c", "<root>a<ext><name>pre</name><attr></attr><inner>b</inner><close>&lt;/pre &gt;</close></ext>c</root>");
+	}
+	@Test  public void Ext__end__pass__name() { // COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
+		fxt.Test__parse("a<pre>b</pro></pre>c", "<root>a<ext><name>pre</name><attr></attr><inner>b&lt;/pro&gt;</inner><close>&lt;/pre&gt;</close></ext>c</root>");
+	}
+	@Test  public void Ext__end__fail__angle() {// COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
+		fxt.Test__parse("a<pre>b</pre c", "<root>a&lt;pre&gt;b&lt;/pre c</root>");
+	}
+	@Test  public void Ext__dangling() {		// COVERS: "Let it run out to the end of the text."
+		fxt.Test__parse("a<pre>bc", "<root>a&lt;pre&gt;bc</root>");
+	}
+	@Test  public void Ext__dangling__many() {	// COVERS: "Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>..."
+		fxt.Test__parse("a<pre><pre><pre>bc", "<root>a&lt;pre&gt;&lt;pre&gt;&lt;pre&gt;bc</root>");
+	}
+	@Test  public void Ext__unclosed() {		// COVERS: "Infinite backtrack"
+		fxt.Test__parse("a<pre bcd", "<root>a&lt;pre bcd</root>");
+	}		
+	@Test  public void Ext__noinclude() {	    // COVERS: "<includeonly> and <noinclude> just become <ignore> tags"
+		fxt.Init__for_inclusion_(Bool_.N);
+		fxt.Test__parse("a<includeonly>b<noinclude>c</noinclude>d</includeonly>e", "<root>a<ignore>&lt;includeonly&gt;b&lt;noinclude&gt;c&lt;/noinclude&gt;d&lt;/includeonly&gt;</ignore>e</root>");
+	}
 	@Test  public void Heading() {
 		fxt.Test__parse(String_.Concat_lines_nl_skip_last
 		( "a"
@ -79,7 +108,7 @@ public class Xomw_prepro_wkr__tst {
 		, "<h level=\"2\" i=\"1\">== b1 ==</h></root>"
 		));
 	}
-	@Test  public void Heading__bos__implied_nl() {
+	@Test  public void Heading__bos__implied_nl() {  // COVERS: "Is this the start of a heading?"
 		fxt.Test__parse(String_.Concat_lines_nl_skip_last
 		( "== b1 =="
 		, "z"
@ -88,6 +117,18 @@ public class Xomw_prepro_wkr__tst {
 		, "z</root>"
 		));
 	}
+	@Test  public void Heading__eq_1() {	// COVERS: "DWIM: This looks kind of like a name/value separator."
+		fxt.Test__parse(String_.Concat_lines_nl_skip_last
+		( "a{{b|"
+		, "=c="
+		, "}}d"
+		), String_.Concat_lines_nl_skip_last
+		( "<root>a<template><title>b</title><part><name>"
+		, "</name>=<value>c="
+		, "</value></part></template>d</root>"
+		));
+	}
+	
 	@Test  public void Inclusion__n() {
 		fxt.Init__for_inclusion_(Bool_.N);
 		fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root>a<ignore>&lt;onlyinclude&gt;</ignore>b<ignore>&lt;/onlyinclude&gt;</ignore>c</root>");
@ -96,6 +137,10 @@ public class Xomw_prepro_wkr__tst {
 		fxt.Init__for_inclusion_(Bool_.Y);
 		fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root><ignore>a&lt;onlyinclude&gt;</ignore>b<ignore>&lt;/onlyinclude&gt;c</ignore></root>");
 	}
+	@Test  public void Ignored__noinclude() {	// COVERS: "Handle ignored tags"
+		fxt.Init__for_inclusion_(Bool_.N);
+		fxt.Test__parse("a<noinclude>b</noinclude>c", "<root>a<ignore>&lt;noinclude&gt;</ignore>b<ignore>&lt;/noinclude&gt;</ignore>c</root>");
+	}
 }
 class Xomw_prepro_wkr__fxt {
 	private final    Xomw_prepro_wkr wkr = new Xomw_prepro_wkr();