From 46274e9b6a2f2a6595a355778f2d3c59f1b175d6 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Wed, 11 Jan 2017 16:48:21 -0500 Subject: [PATCH] Mw_parse.Prepro: Support template pipe; Cover more tests --- .../mws/prepros/Xomw_prepro_stack.java | 2 +- .../parsers/mws/prepros/Xomw_prepro_wkr.java | 28 ++++---- .../mws/prepros/Xomw_prepro_wkr__tst.java | 67 ++++++++++++++++--- 3 files changed, 71 insertions(+), 26 deletions(-) diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java index c848b6827..29472ac7b 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java @@ -120,7 +120,7 @@ class Xomw_prepro_piece { public void Set_flags(Xomw_prepro_flags flags) { int parts_len = parts.Len(); boolean open_is_nl = Bry_.Eq(open, Byte_ascii.Nl_bry); - boolean find_pipe = !open_is_nl && Bry_.Eq(open, Brack_bgn_bry); + boolean find_pipe = !open_is_nl && !Bry_.Eq(open, Brack_bgn_bry); flags.Find_pipe = find_pipe; flags.Find_eq = find_pipe && parts_len > 1 && ((Xomw_prepro_part)parts.Get_at(parts_len - 1)).Eqpos != -1; flags.In_heading = open_is_nl; diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java index 964c84da2..9f8d64738 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java @@ -153,27 +153,27 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls } else { // Find next opening brace, closing brace or pipe - // PORTED: $search = $searchBase; + // RELIC.REGEX: $search = $searchBase; if (stack.top == null) { cur_closing = Bry_.Empty; } else { cur_closing = stack.top.close; - // RELIC: $search .= $currentClosing; + // RELIC.REGEX: $search .= $currentClosing; } if (find_pipe) { - // RELIC: $search .= '|'; + // RELIC.REGEX: $search .= '|'; } if (find_equals) { // First equals will be for the template - // RELIC: $search .= '='; + // RELIC.REGEX: $search .= '='; } // Output literal section, advance input counter // PORTED: "$literalLength = strcspn(src, $search, i)"; NOTE: no trie b/c of frequent changes to $search int literal_len = 0; boolean loop_stop = false; - // read String until search_char is found + // loop chars until search_char is found for (int j = i; j < src_len; j++) { byte b = src[j]; switch (b) { // handle '$searchBase = "[{<\n";' @@ -226,7 +226,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls } } else { - // PORTED: corresponding block of MW code; note complexity to handle 2 char byte[] + // PORTED: "if ( $curChar == '|' ) {", etc.. Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len); if (cur_char_itm != null) { cur_char = cur_char_itm.bry; @@ -239,7 +239,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls // PORTED: "elseif ( $curChar == $currentClosing )" case Byte_ascii.Curly_end: found = Found__close; break; case Byte_ascii.Brack_end: found = Found__close; break; - case Byte_ascii.Bang: found = Found__close; break; + case Byte_ascii.At: found = Found__close; break; // NOTE: At is type for "}-" // PORTED: "elseif ( isset( $this->rules[$curChar] ) )" case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;} @@ -263,7 +263,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls } // Determine element name - // PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(span|div)(?:\s|\/>|>)|(!--) + // PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(pre|ref)(?:\s|\/>|>)|(!--) Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(trv, src, i + 1, src_len); if (element == null) { // Element name missing or not listed @@ -295,8 +295,8 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls // PORTED: $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 ); int ws_end = end_pos + 2; // set pos to ">" int ws_end2 = Bry_find_.Find_fwd_while_space_or_tab(src, end_pos + 3, src_len); - if (ws_end2 != ws_end + 1) // if ws after ">" - ws_end = ws_end2 - 1; // set to "last space" + if (ws_end2 != ws_end + 1) // if ws is after ">"... + ws_end = ws_end2 - 1; // ...then set to "last space" as per comment above // Keep looking forward as long as we're finding more // comments. @@ -370,7 +370,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls } byte[] name = element.name; - // RELIC:$lowerName = strtolower( $name ); + // RELIC.BTRIE_CI: $lowerName = strtolower( $name ); int atr_bgn = i + name.length + 1; // Find end of tag @@ -496,7 +496,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls int eq_end = Bry_find_.Find_fwd_while(src, i, i + 6, Byte_ascii.Eq); // PORTED:strspn( $src, '=', $i, 6 ); int count = eq_end - i; - if (count == 1 && find_equals) { + if (count == 1 && find_equals) { // EX: "{{a|\n=b=\n" // DWIM: This looks kind of like a name/value separator. // Let's let the equals handler have it and break the // potential heading. This is heuristic, but AFAICT the @@ -655,7 +655,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls // The invocation is at the start of the line if lineStart is set in // the stack, and all opening brackets are used up. byte[] attr = null; - if (max_count == matching_count && !piece.line_start) { + if (max_count == matching_count && piece.line_start) { // RELIC:!empty( $piece->lineStart ) attr = Bry_.new_a7(" lineStart=\"1\""); } else { @@ -786,7 +786,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls // handle "}-" separately byte[] langv_end = Bry_.new_a7("}-"); - rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.Bang)); + rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.At)); return rv; } } diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java index e22779cb1..346162b02 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java @@ -26,24 +26,27 @@ public class Xomw_prepro_wkr__tst { fxt.Test__parse("a[[b]]c", "a[[b]]c"); } @Test public void Template() { - fxt.Test__parse("a{{b}}c", "ac"); + fxt.Test__parse("a{{b}}c", "ac"); + } + @Test public void Template__args() { + fxt.Test__parse("a{{b|c|d}}e", "ae"); } @Test public void Tplarg() { - fxt.Test__parse("a{{{b}}}c", "abc"); + fxt.Test__parse("a{{{b}}}c", "abc"); } @Test public void Comment() { fxt.Test__parse("ac", "a<!--b-->c"); } - @Test public void Comment__dangling() { + @Test public void Comment__dangling() {// COVERS: "Unclosed comment in input, runs to end" fxt.Test__parse("a c", "a <!--b--> c"); + @Test public void Comment__ws() { // COVERS: "Search backwards for leading whitespace" + fxt.Test__parse("a c", "a <!--b--> c"); // NOTE: space is outside comment } - @Test public void Comment__many__ws() { // NOTE: space is outside comment - fxt.Test__parse("a z", "a <!--1--> <!--2--> z"); + @Test public void Comment__many__ws() {// COVERS: "Dump all but the last comment to the accumulator" + fxt.Test__parse("a z", "a <!--1--> <!--2--> z"); // NOTE: space is outside comment; } - @Test public void Comment__nl__ws() { // NOTE: space is inside comment if flanked by nl + @Test public void Comment__nl__ws() { // COVERS: "Eat the line if possible" fxt.Test__parse(String_.Concat_lines_nl_skip_last ( "a" , " " @@ -51,14 +54,40 @@ public class Xomw_prepro_wkr__tst { , "z" ), String_.Concat_lines_nl_skip_last ( "a" - , " <!--1--> " + , " <!--1--> " // NOTE: space is inside if flanked by nl; , " <!--2--> " , "z" )); } - @Test public void Ext__pre() { + @Test public void Ext() { // COVERS.ALSO: "Note that the attr element contains the whitespace between name and attribute," fxt.Test__parse("a
b
c", "apre id="1"b</pre>c"); } + @Test public void Ext__inline() { // COVERS: "if ( $text[$tagEndPos - 1] == '/' ) {" + fxt.Test__parse("a
b"   , "apreb");
+		fxt.Test__parse("a
b"  , "apre b");
+	}
+	@Test  public void Ext__end__pass__space() {// COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
+		fxt.Test__parse("a
b
c", "apreb</pre >c"); + } + @Test public void Ext__end__pass__name() { // COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",` + fxt.Test__parse("a
b
c", "apreb</pro></pre>c"); + } + @Test public void Ext__end__fail__angle() {// COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",` + fxt.Test__parse("a
b
a<pre>b</pre c"); + } + @Test public void Ext__dangling() { // COVERS: "Let it run out to the end of the text." + fxt.Test__parse("a
bc", "a<pre>bc");
+	}
+	@Test  public void Ext__dangling__many() {	// COVERS: "Cache results, otherwise we have O(N^2) performance for input like ..."
+		fxt.Test__parse("a
bc", "a<pre><pre><pre>bc");
+	}
+	@Test  public void Ext__unclosed() {		// COVERS: "Infinite backtrack"
+		fxt.Test__parse("a
a<pre bcd");
+	}		
+	@Test  public void Ext__noinclude() {	    // COVERS: " and  just become  tags"
+		fxt.Init__for_inclusion_(Bool_.N);
+		fxt.Test__parse("abcde", "a<includeonly>b<noinclude>c</noinclude>d</includeonly>e");
+	}
 	@Test  public void Heading() {
 		fxt.Test__parse(String_.Concat_lines_nl_skip_last
 		( "a"
@@ -79,7 +108,7 @@ public class Xomw_prepro_wkr__tst {
 		, "== b1 =="
 		));
 	}
-	@Test  public void Heading__bos__implied_nl() {
+	@Test  public void Heading__bos__implied_nl() {  // COVERS: "Is this the start of a heading?"
 		fxt.Test__parse(String_.Concat_lines_nl_skip_last
 		( "== b1 =="
 		, "z"
@@ -88,6 +117,18 @@ public class Xomw_prepro_wkr__tst {
 		, "z"
 		));
 	}
+	@Test  public void Heading__eq_1() {	// COVERS: "DWIM: This looks kind of like a name/value separator."
+		fxt.Test__parse(String_.Concat_lines_nl_skip_last
+		( "a{{b|"
+		, "=c="
+		, "}}d"
+		), String_.Concat_lines_nl_skip_last
+		( "ad"
+		));
+	}
+	
 	@Test  public void Inclusion__n() {
 		fxt.Init__for_inclusion_(Bool_.N);
 		fxt.Test__parse("abc", "a<onlyinclude>b</onlyinclude>c");
@@ -96,6 +137,10 @@ public class Xomw_prepro_wkr__tst {
 		fxt.Init__for_inclusion_(Bool_.Y);
 		fxt.Test__parse("abc", "a<onlyinclude>b</onlyinclude>c");
 	}
+	@Test  public void Ignored__noinclude() {	// COVERS: "Handle ignored tags"
+		fxt.Init__for_inclusion_(Bool_.N);
+		fxt.Test__parse("abc", "a<noinclude>b</noinclude>c");
+	}
 }
 class Xomw_prepro_wkr__fxt {
 	private final    Xomw_prepro_wkr wkr = new Xomw_prepro_wkr();