mirror of
https://github.com/gnosygnu/xowa.git
synced 2025-06-06 09:24:20 +00:00
Mw_parse.Prepro: Support template pipe; Cover more tests
This commit is contained in:
parent
7fae7b832b
commit
46274e9b6a
@ -120,7 +120,7 @@ class Xomw_prepro_piece {
|
|||||||
public void Set_flags(Xomw_prepro_flags flags) {
|
public void Set_flags(Xomw_prepro_flags flags) {
|
||||||
int parts_len = parts.Len();
|
int parts_len = parts.Len();
|
||||||
boolean open_is_nl = Bry_.Eq(open, Byte_ascii.Nl_bry);
|
boolean open_is_nl = Bry_.Eq(open, Byte_ascii.Nl_bry);
|
||||||
boolean find_pipe = !open_is_nl && Bry_.Eq(open, Brack_bgn_bry);
|
boolean find_pipe = !open_is_nl && !Bry_.Eq(open, Brack_bgn_bry);
|
||||||
flags.Find_pipe = find_pipe;
|
flags.Find_pipe = find_pipe;
|
||||||
flags.Find_eq = find_pipe && parts_len > 1 && ((Xomw_prepro_part)parts.Get_at(parts_len - 1)).Eqpos != -1;
|
flags.Find_eq = find_pipe && parts_len > 1 && ((Xomw_prepro_part)parts.Get_at(parts_len - 1)).Eqpos != -1;
|
||||||
flags.In_heading = open_is_nl;
|
flags.In_heading = open_is_nl;
|
||||||
|
@ -153,27 +153,27 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// Find next opening brace, closing brace or pipe
|
// Find next opening brace, closing brace or pipe
|
||||||
// PORTED: $search = $searchBase;
|
// RELIC.REGEX: $search = $searchBase;
|
||||||
if (stack.top == null) {
|
if (stack.top == null) {
|
||||||
cur_closing = Bry_.Empty;
|
cur_closing = Bry_.Empty;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
cur_closing = stack.top.close;
|
cur_closing = stack.top.close;
|
||||||
// RELIC: $search .= $currentClosing;
|
// RELIC.REGEX: $search .= $currentClosing;
|
||||||
}
|
}
|
||||||
if (find_pipe) {
|
if (find_pipe) {
|
||||||
// RELIC: $search .= '|';
|
// RELIC.REGEX: $search .= '|';
|
||||||
}
|
}
|
||||||
if (find_equals) {
|
if (find_equals) {
|
||||||
// First equals will be for the template
|
// First equals will be for the template
|
||||||
// RELIC: $search .= '=';
|
// RELIC.REGEX: $search .= '=';
|
||||||
}
|
}
|
||||||
|
|
||||||
// Output literal section, advance input counter
|
// Output literal section, advance input counter
|
||||||
// PORTED: "$literalLength = strcspn(src, $search, i)"; NOTE: no trie b/c of frequent changes to $search
|
// PORTED: "$literalLength = strcspn(src, $search, i)"; NOTE: no trie b/c of frequent changes to $search
|
||||||
int literal_len = 0;
|
int literal_len = 0;
|
||||||
boolean loop_stop = false;
|
boolean loop_stop = false;
|
||||||
// read String until search_char is found
|
// loop chars until search_char is found
|
||||||
for (int j = i; j < src_len; j++) {
|
for (int j = i; j < src_len; j++) {
|
||||||
byte b = src[j];
|
byte b = src[j];
|
||||||
switch (b) { // handle '$searchBase = "[{<\n";'
|
switch (b) { // handle '$searchBase = "[{<\n";'
|
||||||
@ -226,7 +226,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// PORTED: corresponding block of MW code; note complexity to handle 2 char byte[]
|
// PORTED: "if ( $curChar == '|' ) {", etc..
|
||||||
Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len);
|
Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len);
|
||||||
if (cur_char_itm != null) {
|
if (cur_char_itm != null) {
|
||||||
cur_char = cur_char_itm.bry;
|
cur_char = cur_char_itm.bry;
|
||||||
@ -239,7 +239,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
|
|||||||
// PORTED: "elseif ( $curChar == $currentClosing )"
|
// PORTED: "elseif ( $curChar == $currentClosing )"
|
||||||
case Byte_ascii.Curly_end: found = Found__close; break;
|
case Byte_ascii.Curly_end: found = Found__close; break;
|
||||||
case Byte_ascii.Brack_end: found = Found__close; break;
|
case Byte_ascii.Brack_end: found = Found__close; break;
|
||||||
case Byte_ascii.Bang: found = Found__close; break;
|
case Byte_ascii.At: found = Found__close; break; // NOTE: At is type for "}-"
|
||||||
|
|
||||||
// PORTED: "elseif ( isset( $this->rules[$curChar] ) )"
|
// PORTED: "elseif ( isset( $this->rules[$curChar] ) )"
|
||||||
case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;}
|
case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;}
|
||||||
@ -263,7 +263,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Determine element name
|
// Determine element name
|
||||||
// PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(span|div)(?:\s|\/>|>)|(!--)
|
// PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(pre|ref)(?:\s|\/>|>)|(!--)
|
||||||
Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(trv, src, i + 1, src_len);
|
Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(trv, src, i + 1, src_len);
|
||||||
if (element == null) {
|
if (element == null) {
|
||||||
// Element name missing or not listed
|
// Element name missing or not listed
|
||||||
@ -295,8 +295,8 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
|
|||||||
// PORTED: $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 );
|
// PORTED: $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 );
|
||||||
int ws_end = end_pos + 2; // set pos to ">"
|
int ws_end = end_pos + 2; // set pos to ">"
|
||||||
int ws_end2 = Bry_find_.Find_fwd_while_space_or_tab(src, end_pos + 3, src_len);
|
int ws_end2 = Bry_find_.Find_fwd_while_space_or_tab(src, end_pos + 3, src_len);
|
||||||
if (ws_end2 != ws_end + 1) // if ws after ">"
|
if (ws_end2 != ws_end + 1) // if ws is after ">"...
|
||||||
ws_end = ws_end2 - 1; // set to "last space"
|
ws_end = ws_end2 - 1; // ...then set to "last space" as per comment above
|
||||||
|
|
||||||
// Keep looking forward as long as we're finding more
|
// Keep looking forward as long as we're finding more
|
||||||
// comments.
|
// comments.
|
||||||
@ -370,7 +370,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
|
|||||||
}
|
}
|
||||||
|
|
||||||
byte[] name = element.name;
|
byte[] name = element.name;
|
||||||
// RELIC:$lowerName = strtolower( $name );
|
// RELIC.BTRIE_CI: $lowerName = strtolower( $name );
|
||||||
int atr_bgn = i + name.length + 1;
|
int atr_bgn = i + name.length + 1;
|
||||||
|
|
||||||
// Find end of tag
|
// Find end of tag
|
||||||
@ -496,7 +496,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
|
|||||||
|
|
||||||
int eq_end = Bry_find_.Find_fwd_while(src, i, i + 6, Byte_ascii.Eq); // PORTED:strspn( $src, '=', $i, 6 );
|
int eq_end = Bry_find_.Find_fwd_while(src, i, i + 6, Byte_ascii.Eq); // PORTED:strspn( $src, '=', $i, 6 );
|
||||||
int count = eq_end - i;
|
int count = eq_end - i;
|
||||||
if (count == 1 && find_equals) {
|
if (count == 1 && find_equals) { // EX: "{{a|\n=b=\n"
|
||||||
// DWIM: This looks kind of like a name/value separator.
|
// DWIM: This looks kind of like a name/value separator.
|
||||||
// Let's let the equals handler have it and break the
|
// Let's let the equals handler have it and break the
|
||||||
// potential heading. This is heuristic, but AFAICT the
|
// potential heading. This is heuristic, but AFAICT the
|
||||||
@ -655,7 +655,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
|
|||||||
// The invocation is at the start of the line if lineStart is set in
|
// The invocation is at the start of the line if lineStart is set in
|
||||||
// the stack, and all opening brackets are used up.
|
// the stack, and all opening brackets are used up.
|
||||||
byte[] attr = null;
|
byte[] attr = null;
|
||||||
if (max_count == matching_count && !piece.line_start) {
|
if (max_count == matching_count && piece.line_start) { // RELIC:!empty( $piece->lineStart )
|
||||||
attr = Bry_.new_a7(" lineStart=\"1\"");
|
attr = Bry_.new_a7(" lineStart=\"1\"");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -786,7 +786,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
|
|||||||
|
|
||||||
// handle "}-" separately
|
// handle "}-" separately
|
||||||
byte[] langv_end = Bry_.new_a7("}-");
|
byte[] langv_end = Bry_.new_a7("}-");
|
||||||
rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.Bang));
|
rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.At));
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -26,24 +26,27 @@ public class Xomw_prepro_wkr__tst {
|
|||||||
fxt.Test__parse("a[[b]]c", "<root>a[[b]]c</root>");
|
fxt.Test__parse("a[[b]]c", "<root>a[[b]]c</root>");
|
||||||
}
|
}
|
||||||
@Test public void Template() {
|
@Test public void Template() {
|
||||||
fxt.Test__parse("a{{b}}c", "<root>a<template lineStart=\"1\"><title>b</title></template>c</root>");
|
fxt.Test__parse("a{{b}}c", "<root>a<template><title>b</title></template>c</root>");
|
||||||
|
}
|
||||||
|
@Test public void Template__args() {
|
||||||
|
fxt.Test__parse("a{{b|c|d}}e", "<root>a<template><title>b</title><part><name index=\"1\" /><value>c</value></part><part><name index=\"2\" /><value>d</value></part></template>e</root>");
|
||||||
}
|
}
|
||||||
@Test public void Tplarg() {
|
@Test public void Tplarg() {
|
||||||
fxt.Test__parse("a{{{b}}}c", "<root>a<tplarg lineStart=\"1\"><title>b</title></tplarg>c</root>");
|
fxt.Test__parse("a{{{b}}}c", "<root>a<tplarg><title>b</title></tplarg>c</root>");
|
||||||
}
|
}
|
||||||
@Test public void Comment() {
|
@Test public void Comment() {
|
||||||
fxt.Test__parse("a<!--b-->c", "<root>a<comment><!--b--></comment>c</root>");
|
fxt.Test__parse("a<!--b-->c", "<root>a<comment><!--b--></comment>c</root>");
|
||||||
}
|
}
|
||||||
@Test public void Comment__dangling() {
|
@Test public void Comment__dangling() {// COVERS: "Unclosed comment in input, runs to end"
|
||||||
fxt.Test__parse("a<!--b", "<root>a<comment><!--b</comment></root>");
|
fxt.Test__parse("a<!--b", "<root>a<comment><!--b</comment></root>");
|
||||||
}
|
}
|
||||||
@Test public void Comment__ws() { // NOTE: space is outside comment
|
@Test public void Comment__ws() { // COVERS: "Search backwards for leading whitespace"
|
||||||
fxt.Test__parse("a <!--b--> c", "<root>a <comment><!--b--></comment> c</root>");
|
fxt.Test__parse("a <!--b--> c", "<root>a <comment><!--b--></comment> c</root>"); // NOTE: space is outside comment
|
||||||
}
|
}
|
||||||
@Test public void Comment__many__ws() { // NOTE: space is outside comment
|
@Test public void Comment__many__ws() {// COVERS: "Dump all but the last comment to the accumulator"
|
||||||
fxt.Test__parse("a <!--1--> <!--2--> z", "<root>a <comment><!--1--></comment> <comment><!--2--></comment> z</root>");
|
fxt.Test__parse("a <!--1--> <!--2--> z", "<root>a <comment><!--1--></comment> <comment><!--2--></comment> z</root>"); // NOTE: space is outside comment;
|
||||||
}
|
}
|
||||||
@Test public void Comment__nl__ws() { // NOTE: space is inside comment if flanked by nl
|
@Test public void Comment__nl__ws() { // COVERS: "Eat the line if possible"
|
||||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||||
( "a"
|
( "a"
|
||||||
, " <!--1--> "
|
, " <!--1--> "
|
||||||
@ -51,14 +54,40 @@ public class Xomw_prepro_wkr__tst {
|
|||||||
, "z"
|
, "z"
|
||||||
), String_.Concat_lines_nl_skip_last
|
), String_.Concat_lines_nl_skip_last
|
||||||
( "<root>a"
|
( "<root>a"
|
||||||
, "<comment> <!--1--> "
|
, "<comment> <!--1--> " // NOTE: space is inside </comment> if flanked by nl;
|
||||||
, "</comment><comment> <!--2--> "
|
, "</comment><comment> <!--2--> "
|
||||||
, "</comment>z</root>"
|
, "</comment>z</root>"
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
@Test public void Ext__pre() {
|
@Test public void Ext() { // COVERS.ALSO: "Note that the attr element contains the whitespace between name and attribute,"
|
||||||
fxt.Test__parse("a<pre id=\"1\">b</pre>c", "<root>a<ext><name>pre</name><attr> id="1"</attr><inner>b</inner><close></pre></close></ext>c</root>");
|
fxt.Test__parse("a<pre id=\"1\">b</pre>c", "<root>a<ext><name>pre</name><attr> id="1"</attr><inner>b</inner><close></pre></close></ext>c</root>");
|
||||||
}
|
}
|
||||||
|
@Test public void Ext__inline() { // COVERS: "if ( $text[$tagEndPos - 1] == '/' ) {"
|
||||||
|
fxt.Test__parse("a<pre/>b" , "<root>a<ext><name>pre</name><attr></attr></ext>b</root>");
|
||||||
|
fxt.Test__parse("a<pre />b" , "<root>a<ext><name>pre</name><attr> </attr></ext>b</root>");
|
||||||
|
}
|
||||||
|
@Test public void Ext__end__pass__space() {// COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
|
||||||
|
fxt.Test__parse("a<pre>b</pre >c", "<root>a<ext><name>pre</name><attr></attr><inner>b</inner><close></pre ></close></ext>c</root>");
|
||||||
|
}
|
||||||
|
@Test public void Ext__end__pass__name() { // COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
|
||||||
|
fxt.Test__parse("a<pre>b</pro></pre>c", "<root>a<ext><name>pre</name><attr></attr><inner>b</pro></inner><close></pre></close></ext>c</root>");
|
||||||
|
}
|
||||||
|
@Test public void Ext__end__fail__angle() {// COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
|
||||||
|
fxt.Test__parse("a<pre>b</pre c", "<root>a<pre>b</pre c</root>");
|
||||||
|
}
|
||||||
|
@Test public void Ext__dangling() { // COVERS: "Let it run out to the end of the text."
|
||||||
|
fxt.Test__parse("a<pre>bc", "<root>a<pre>bc</root>");
|
||||||
|
}
|
||||||
|
@Test public void Ext__dangling__many() { // COVERS: "Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>..."
|
||||||
|
fxt.Test__parse("a<pre><pre><pre>bc", "<root>a<pre><pre><pre>bc</root>");
|
||||||
|
}
|
||||||
|
@Test public void Ext__unclosed() { // COVERS: "Infinite backtrack"
|
||||||
|
fxt.Test__parse("a<pre bcd", "<root>a<pre bcd</root>");
|
||||||
|
}
|
||||||
|
@Test public void Ext__noinclude() { // COVERS: "<includeonly> and <noinclude> just become <ignore> tags"
|
||||||
|
fxt.Init__for_inclusion_(Bool_.N);
|
||||||
|
fxt.Test__parse("a<includeonly>b<noinclude>c</noinclude>d</includeonly>e", "<root>a<ignore><includeonly>b<noinclude>c</noinclude>d</includeonly></ignore>e</root>");
|
||||||
|
}
|
||||||
@Test public void Heading() {
|
@Test public void Heading() {
|
||||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||||
( "a"
|
( "a"
|
||||||
@ -79,7 +108,7 @@ public class Xomw_prepro_wkr__tst {
|
|||||||
, "<h level=\"2\" i=\"1\">== b1 ==</h></root>"
|
, "<h level=\"2\" i=\"1\">== b1 ==</h></root>"
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
@Test public void Heading__bos__implied_nl() {
|
@Test public void Heading__bos__implied_nl() { // COVERS: "Is this the start of a heading?"
|
||||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||||
( "== b1 =="
|
( "== b1 =="
|
||||||
, "z"
|
, "z"
|
||||||
@ -88,6 +117,18 @@ public class Xomw_prepro_wkr__tst {
|
|||||||
, "z</root>"
|
, "z</root>"
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
@Test public void Heading__eq_1() { // COVERS: "DWIM: This looks kind of like a name/value separator."
|
||||||
|
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||||
|
( "a{{b|"
|
||||||
|
, "=c="
|
||||||
|
, "}}d"
|
||||||
|
), String_.Concat_lines_nl_skip_last
|
||||||
|
( "<root>a<template><title>b</title><part><name>"
|
||||||
|
, "</name>=<value>c="
|
||||||
|
, "</value></part></template>d</root>"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
@Test public void Inclusion__n() {
|
@Test public void Inclusion__n() {
|
||||||
fxt.Init__for_inclusion_(Bool_.N);
|
fxt.Init__for_inclusion_(Bool_.N);
|
||||||
fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root>a<ignore><onlyinclude></ignore>b<ignore></onlyinclude></ignore>c</root>");
|
fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root>a<ignore><onlyinclude></ignore>b<ignore></onlyinclude></ignore>c</root>");
|
||||||
@ -96,6 +137,10 @@ public class Xomw_prepro_wkr__tst {
|
|||||||
fxt.Init__for_inclusion_(Bool_.Y);
|
fxt.Init__for_inclusion_(Bool_.Y);
|
||||||
fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root><ignore>a<onlyinclude></ignore>b<ignore></onlyinclude>c</ignore></root>");
|
fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root><ignore>a<onlyinclude></ignore>b<ignore></onlyinclude>c</ignore></root>");
|
||||||
}
|
}
|
||||||
|
@Test public void Ignored__noinclude() { // COVERS: "Handle ignored tags"
|
||||||
|
fxt.Init__for_inclusion_(Bool_.N);
|
||||||
|
fxt.Test__parse("a<noinclude>b</noinclude>c", "<root>a<ignore><noinclude></ignore>b<ignore></noinclude></ignore>c</root>");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
class Xomw_prepro_wkr__fxt {
|
class Xomw_prepro_wkr__fxt {
|
||||||
private final Xomw_prepro_wkr wkr = new Xomw_prepro_wkr();
|
private final Xomw_prepro_wkr wkr = new Xomw_prepro_wkr();
|
||||||
|
Loading…
Reference in New Issue
Block a user