Mw_parse.Prepro: Support template pipe; Cover more tests

pull/620/head
gnosygnu 8 years ago
parent 7fae7b832b
commit 46274e9b6a

@ -120,7 +120,7 @@ class Xomw_prepro_piece {
public void Set_flags(Xomw_prepro_flags flags) {
int parts_len = parts.Len();
boolean open_is_nl = Bry_.Eq(open, Byte_ascii.Nl_bry);
boolean find_pipe = !open_is_nl && Bry_.Eq(open, Brack_bgn_bry);
boolean find_pipe = !open_is_nl && !Bry_.Eq(open, Brack_bgn_bry);
flags.Find_pipe = find_pipe;
flags.Find_eq = find_pipe && parts_len > 1 && ((Xomw_prepro_part)parts.Get_at(parts_len - 1)).Eqpos != -1;
flags.In_heading = open_is_nl;

@ -153,27 +153,27 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
}
else {
// Find next opening brace, closing brace or pipe
// PORTED: $search = $searchBase;
// RELIC.REGEX: $search = $searchBase;
if (stack.top == null) {
cur_closing = Bry_.Empty;
}
else {
cur_closing = stack.top.close;
// RELIC: $search .= $currentClosing;
// RELIC.REGEX: $search .= $currentClosing;
}
if (find_pipe) {
// RELIC: $search .= '|';
// RELIC.REGEX: $search .= '|';
}
if (find_equals) {
// First equals will be for the template
// RELIC: $search .= '=';
// RELIC.REGEX: $search .= '=';
}
// Output literal section, advance input counter
// PORTED: "$literalLength = strcspn(src, $search, i)"; NOTE: no trie b/c of frequent changes to $search
int literal_len = 0;
boolean loop_stop = false;
// read String until search_char is found
// loop chars until search_char is found
for (int j = i; j < src_len; j++) {
byte b = src[j];
switch (b) { // handle '$searchBase = "[{<\n";'
@ -226,7 +226,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
}
}
else {
// PORTED: corresponding block of MW code; note complexity to handle 2 char byte[]
// PORTED: "if ( $curChar == '|' ) {", etc..
Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len);
if (cur_char_itm != null) {
cur_char = cur_char_itm.bry;
@ -239,7 +239,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
// PORTED: "elseif ( $curChar == $currentClosing )"
case Byte_ascii.Curly_end: found = Found__close; break;
case Byte_ascii.Brack_end: found = Found__close; break;
case Byte_ascii.Bang: found = Found__close; break;
case Byte_ascii.At: found = Found__close; break; // NOTE: At is type for "}-"
// PORTED: "elseif ( isset( $this->rules[$curChar] ) )"
case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;}
@ -263,7 +263,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
}
// Determine element name
// PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(span|div)(?:\s|\/>|>)|(!--)
// PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(pre|ref)(?:\s|\/>|>)|(!--)
Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(trv, src, i + 1, src_len);
if (element == null) {
// Element name missing or not listed
@ -295,8 +295,8 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
// PORTED: $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 );
int ws_end = end_pos + 2; // set pos to ">"
int ws_end2 = Bry_find_.Find_fwd_while_space_or_tab(src, end_pos + 3, src_len);
if (ws_end2 != ws_end + 1) // if ws after ">"
ws_end = ws_end2 - 1; // set to "last space"
if (ws_end2 != ws_end + 1) // if ws is after ">"...
ws_end = ws_end2 - 1; // ...then set to "last space" as per comment above
// Keep looking forward as long as we're finding more
// comments.
@ -370,7 +370,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
}
byte[] name = element.name;
// RELIC:$lowerName = strtolower( $name );
// RELIC.BTRIE_CI: $lowerName = strtolower( $name );
int atr_bgn = i + name.length + 1;
// Find end of tag
@ -496,7 +496,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
int eq_end = Bry_find_.Find_fwd_while(src, i, i + 6, Byte_ascii.Eq); // PORTED:strspn( $src, '=', $i, 6 );
int count = eq_end - i;
if (count == 1 && find_equals) {
if (count == 1 && find_equals) { // EX: "{{a|\n=b=\n"
// DWIM: This looks kind of like a name/value separator.
// Let's let the equals handler have it and break the
// potential heading. This is heuristic, but AFAICT the
@ -655,7 +655,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
// The invocation is at the start of the line if lineStart is set in
// the stack, and all opening brackets are used up.
byte[] attr = null;
if (max_count == matching_count && !piece.line_start) {
if (max_count == matching_count && piece.line_start) { // RELIC:!empty( $piece->lineStart )
attr = Bry_.new_a7(" lineStart=\"1\"");
}
else {
@ -786,7 +786,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
// handle "}-" separately
byte[] langv_end = Bry_.new_a7("}-");
rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.Bang));
rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.At));
return rv;
}
}

@ -26,24 +26,27 @@ public class Xomw_prepro_wkr__tst {
fxt.Test__parse("a[[b]]c", "<root>a[[b]]c</root>");
}
@Test public void Template() {
fxt.Test__parse("a{{b}}c", "<root>a<template lineStart=\"1\"><title>b</title></template>c</root>");
fxt.Test__parse("a{{b}}c", "<root>a<template><title>b</title></template>c</root>");
}
@Test public void Template__args() {
fxt.Test__parse("a{{b|c|d}}e", "<root>a<template><title>b</title><part><name index=\"1\" /><value>c</value></part><part><name index=\"2\" /><value>d</value></part></template>e</root>");
}
@Test public void Tplarg() {
fxt.Test__parse("a{{{b}}}c", "<root>a<tplarg lineStart=\"1\"><title>b</title></tplarg>c</root>");
fxt.Test__parse("a{{{b}}}c", "<root>a<tplarg><title>b</title></tplarg>c</root>");
}
@Test public void Comment() {
fxt.Test__parse("a<!--b-->c", "<root>a<comment>&lt;!--b--&gt;</comment>c</root>");
}
@Test public void Comment__dangling() {
@Test public void Comment__dangling() {// COVERS: "Unclosed comment in input, runs to end"
fxt.Test__parse("a<!--b", "<root>a<comment>&lt;!--b</comment></root>");
}
@Test public void Comment__ws() { // NOTE: space is outside comment
fxt.Test__parse("a <!--b--> c", "<root>a <comment>&lt;!--b--&gt;</comment> c</root>");
@Test public void Comment__ws() { // COVERS: "Search backwards for leading whitespace"
fxt.Test__parse("a <!--b--> c", "<root>a <comment>&lt;!--b--&gt;</comment> c</root>"); // NOTE: space is outside comment
}
@Test public void Comment__many__ws() { // NOTE: space is outside comment
fxt.Test__parse("a <!--1--> <!--2--> z", "<root>a <comment>&lt;!--1--&gt;</comment> <comment>&lt;!--2--&gt;</comment> z</root>");
@Test public void Comment__many__ws() {// COVERS: "Dump all but the last comment to the accumulator"
fxt.Test__parse("a <!--1--> <!--2--> z", "<root>a <comment>&lt;!--1--&gt;</comment> <comment>&lt;!--2--&gt;</comment> z</root>"); // NOTE: space is outside comment;
}
@Test public void Comment__nl__ws() { // NOTE: space is inside comment if flanked by nl
@Test public void Comment__nl__ws() { // COVERS: "Eat the line if possible"
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a"
, " <!--1--> "
@ -51,14 +54,40 @@ public class Xomw_prepro_wkr__tst {
, "z"
), String_.Concat_lines_nl_skip_last
( "<root>a"
, "<comment> &lt;!--1--&gt; "
, "<comment> &lt;!--1--&gt; " // NOTE: space is inside </comment> if flanked by nl;
, "</comment><comment> &lt;!--2--&gt; "
, "</comment>z</root>"
));
}
@Test public void Ext__pre() {
@Test public void Ext() { // COVERS.ALSO: "Note that the attr element contains the whitespace between name and attribute,"
fxt.Test__parse("a<pre id=\"1\">b</pre>c", "<root>a<ext><name>pre</name><attr> id=&quot;1&quot;</attr><inner>b</inner><close>&lt;/pre&gt;</close></ext>c</root>");
}
@Test public void Ext__inline() { // COVERS: "if ( $text[$tagEndPos - 1] == '/' ) {"
fxt.Test__parse("a<pre/>b" , "<root>a<ext><name>pre</name><attr></attr></ext>b</root>");
fxt.Test__parse("a<pre />b" , "<root>a<ext><name>pre</name><attr> </attr></ext>b</root>");
}
@Test public void Ext__end__pass__space() {// COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
fxt.Test__parse("a<pre>b</pre >c", "<root>a<ext><name>pre</name><attr></attr><inner>b</inner><close>&lt;/pre &gt;</close></ext>c</root>");
}
@Test public void Ext__end__pass__name() { // COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
fxt.Test__parse("a<pre>b</pro></pre>c", "<root>a<ext><name>pre</name><attr></attr><inner>b&lt;/pro&gt;</inner><close>&lt;/pre&gt;</close></ext>c</root>");
}
@Test public void Ext__end__fail__angle() {// COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
fxt.Test__parse("a<pre>b</pre c", "<root>a&lt;pre&gt;b&lt;/pre c</root>");
}
@Test public void Ext__dangling() { // COVERS: "Let it run out to the end of the text."
fxt.Test__parse("a<pre>bc", "<root>a&lt;pre&gt;bc</root>");
}
@Test public void Ext__dangling__many() { // COVERS: "Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>..."
fxt.Test__parse("a<pre><pre><pre>bc", "<root>a&lt;pre&gt;&lt;pre&gt;&lt;pre&gt;bc</root>");
}
@Test public void Ext__unclosed() { // COVERS: "Infinite backtrack"
fxt.Test__parse("a<pre bcd", "<root>a&lt;pre bcd</root>");
}
@Test public void Ext__noinclude() { // COVERS: "<includeonly> and <noinclude> just become <ignore> tags"
fxt.Init__for_inclusion_(Bool_.N);
fxt.Test__parse("a<includeonly>b<noinclude>c</noinclude>d</includeonly>e", "<root>a<ignore>&lt;includeonly&gt;b&lt;noinclude&gt;c&lt;/noinclude&gt;d&lt;/includeonly&gt;</ignore>e</root>");
}
@Test public void Heading() {
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a"
@ -79,7 +108,7 @@ public class Xomw_prepro_wkr__tst {
, "<h level=\"2\" i=\"1\">== b1 ==</h></root>"
));
}
@Test public void Heading__bos__implied_nl() {
@Test public void Heading__bos__implied_nl() { // COVERS: "Is this the start of a heading?"
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "== b1 =="
, "z"
@ -88,6 +117,18 @@ public class Xomw_prepro_wkr__tst {
, "z</root>"
));
}
@Test public void Heading__eq_1() { // COVERS: "DWIM: This looks kind of like a name/value separator."
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a{{b|"
, "=c="
, "}}d"
), String_.Concat_lines_nl_skip_last
( "<root>a<template><title>b</title><part><name>"
, "</name>=<value>c="
, "</value></part></template>d</root>"
));
}
@Test public void Inclusion__n() {
fxt.Init__for_inclusion_(Bool_.N);
fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root>a<ignore>&lt;onlyinclude&gt;</ignore>b<ignore>&lt;/onlyinclude&gt;</ignore>c</root>");
@ -96,6 +137,10 @@ public class Xomw_prepro_wkr__tst {
fxt.Init__for_inclusion_(Bool_.Y);
fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root><ignore>a&lt;onlyinclude&gt;</ignore>b<ignore>&lt;/onlyinclude&gt;c</ignore></root>");
}
@Test public void Ignored__noinclude() { // COVERS: "Handle ignored tags"
fxt.Init__for_inclusion_(Bool_.N);
fxt.Test__parse("a<noinclude>b</noinclude>c", "<root>a<ignore>&lt;noinclude&gt;</ignore>b<ignore>&lt;/noinclude&gt;</ignore>c</root>");
}
}
class Xomw_prepro_wkr__fxt {
private final Xomw_prepro_wkr wkr = new Xomw_prepro_wkr();

Loading…
Cancel
Save