1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2025-06-04 00:14:14 +00:00

Mw_parse: Add more implementation of Mw_preprocessor

This commit is contained in:
gnosygnu 2017-01-10 12:01:03 -05:00
parent d15630c433
commit 9610db0cab
4 changed files with 299 additions and 197 deletions

View File

@ -0,0 +1,66 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
class Xomw_prepro_rule {
public Xomw_prepro_rule(byte[] bgn, byte[] end, int min, int max, int[] names) {
this.bgn = bgn;
this.end = end;
this.min = min;
this.max = max;
this.names = names;
}
public final byte[] bgn;
public final byte[] end;
public final int min;
public final int max;
public final int[] names;
public boolean Names_exist(int idx) {
return idx < names.length && names[idx] != Name__invalid;
}
private static final byte[] Name__tmpl_bry = Bry_.new_a7("template"), Name__targ_bry = Bry_.new_a7("tplarg");
public static final int Name__invalid = -1, Name__null = 0, Name__tmpl = 1, Name__targ = 2;
public static byte[] Name(int type) {
switch (type) {
case Name__tmpl: return Name__tmpl_bry;
case Name__targ: return Name__targ_bry;
default:
case Name__invalid: return null;
case Name__null: return null;
}
}
}
class Xomw_prepro_elem {
private static final byte[] Bry__tag_end = Bry_.new_a7("</");
public Xomw_prepro_elem(int type, byte[] name) {
this.type = type;
this.name = name;
this.tag_end_lhs = Bry_.Add(Bry__tag_end, name);
}
public final int type;
public final byte[] name;
public final byte[] tag_end_lhs;
public static final int Type__comment = 0;
}
class Xomw_prepro_curchar_itm {
public Xomw_prepro_curchar_itm(byte[] bry) {
this.bry = bry;
this.type = bry[0];
}
public byte[] bry;
public byte type;
}

View File

@ -22,6 +22,12 @@ class Xomw_prepro_stack {
private Bry_bfr accum = Bry_bfr_.New(), root_accum = Bry_bfr_.New(); private Bry_bfr accum = Bry_bfr_.New(), root_accum = Bry_bfr_.New();
private final Xomw_prepro_flags flags = new Xomw_prepro_flags(); private final Xomw_prepro_flags flags = new Xomw_prepro_flags();
public void Clear() {
stack.Clear();
accum.Clear();
root_accum.Clear();
top = null;
}
public int Count() {return stack.Len();} public int Count() {return stack.Len();}
public Bry_bfr Get_accum() {return accum;} public Bry_bfr Get_accum() {return accum;}
public Bry_bfr Get_root_accum() {return root_accum;} public Bry_bfr Get_root_accum() {return root_accum;}

View File

@ -17,59 +17,43 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
import gplx.core.btries.*; import gplx.core.btries.*;
public class Xomw_prepro_wkr { public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
private static final Xomw_prepro_rule private final Bry_bfr tmp_bfr = Bry_bfr_.New();
rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}") , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ}) private final List_adp comments_list = List_adp_.New();
, rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]") , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null}) private final Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7();
, rule_langv = new Xomw_prepro_rule(Bry_.new_a7("-{"), Bry_.new_a7("}-"), 1, 1, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null}) private final Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs();
; private final Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs();
private static final byte[] // private final Btrie_slim_mgr search_dflt_trie = Btrie_slim_mgr.cs().Add_many_int(0, "[", "{", "<", "\n"); // $searchBase = "[{<\n";
Bry__only_include_bgn = Bry_.new_a7("<onlyinclude>") private final Xomw_prepro_stack stack = new Xomw_prepro_stack();
, Bry__only_include_end = Bry_.new_a7("</onlyinclude>") private Bry_bfr accum = Bry_bfr_.New();
, Bry__comment_bgn = Bry_.new_a7("<!--")
, Bry__comment_end = Bry_.new_a7("-->")
, Bry__escaped_lt = Bry_.new_a7("&lt;")
, Bry__includeonly = Bry_.new_a7("includeonly")
, Bry__noinclude = Bry_.new_a7("noinclude")
, Bry__onlyinclude = Bry_.new_a7("onlyinclude")
;
private static final int Len__only_include_end = Bry__only_include_end.length;
private static final int
Found__line_bgn = 0
, Found__line_end = 1
, Found__pipe = 2
, Found__equals = 3
, Found__angle = 4
, Found__close = 5
, Found__open = 6
;
private Bry_bfr accum = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New(); private final Btrie_rv trv = new Btrie_rv();
private List_adp comments_list = List_adp_.New();
private byte[] htmlspecialchars(byte[] bry) { private static final Btrie_slim_mgr cur_char_trie = Cur_char_trie__new();
return bry; private static final Hash_adp_bry
} ignored_tags__noinclude = Hash_adp_bry.cs().Add_many_str("includeonly", "/includeonly")
private Xomw_prepro_rule Get_rule(byte[] bry) { , ignored_elements__noinclude = Hash_adp_bry.cs().Add_many_str("noinclude")
if (Bry_.Eq(bry, rule_curly.bgn)) return rule_curly; , ignored_tags__includeonly = Hash_adp_bry.cs().Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude")
else if (Bry_.Eq(bry, rule_brack.bgn)) return rule_brack; , ignored_elements__includeonly = Hash_adp_bry.cs().Add_many_str("includeonly");
else if (Bry_.Eq(bry, rule_langv.bgn)) return rule_langv; private static Btrie_slim_mgr Cur_char_trie__new() {
else throw Err_.new_unhandled(bry); Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
String[] ary = new String[] {"|", "=", "<", "\n", "{", "[", "-{"};
for (String str : ary) {
byte[] bry = Bry_.new_a7(str);
rv.Add_obj(bry, new Xomw_prepro_curchar_itm(bry));
}
return rv;
} }
public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) { public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) {
Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7(); // parser->getStripList(); xmlish_elems.Clear(); // TODO.XO: parser->getStripList();
xmlish_allow_missing_end_tag.Add_many_str("includeonly", "noinclude", "onlyinclude");
Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs();
xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__includeonly);
xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__noinclude);
xmlish_allow_missing_end_tag.Add_as_key_and_val(Bry__onlyinclude);
boolean enable_only_include = false; boolean enable_only_include = false;
Hash_adp_bry ignored_tags = Hash_adp_bry.cs(); Hash_adp_bry ignored_tags, ignored_elements;
Hash_adp_bry ignored_elements = Hash_adp_bry.cs();
if (for_inclusion) { if (for_inclusion) {
ignored_tags.Add_many_str("includeonly", "/includeonly"); ignored_tags = ignored_tags__noinclude;
ignored_elements.Add_many_str("noinclude"); ignored_elements = ignored_elements__noinclude;
xmlish_elems.Add_many_str("noinclude"); xmlish_elems.Add_many_str("noinclude");
if ( Bry_.Has(src, Bry__only_include_bgn) if ( Bry_.Has(src, Bry__only_include_bgn)
&& Bry_.Has(src, Bry__only_include_end)) { && Bry_.Has(src, Bry__only_include_end)) {
@ -77,8 +61,8 @@ public class Xomw_prepro_wkr {
} }
} }
else { else {
ignored_tags.Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude"); ignored_tags = ignored_tags__includeonly;
ignored_elements.Add_many_str("includeonly"); ignored_elements = ignored_elements__includeonly;
xmlish_elems.Add_many_str("includeonly"); xmlish_elems.Add_many_str("includeonly");
} }
@ -86,8 +70,7 @@ public class Xomw_prepro_wkr {
// Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
// $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; // $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
Xomw_prepro_stack stack = new Xomw_prepro_stack(); stack.Clear();
// $searchBase = "[{<\n"; # }
// Input pointer, starts out pointing to a pseudo-newline before the start // Input pointer, starts out pointing to a pseudo-newline before the start
int i = 0; int i = 0;
@ -110,7 +93,7 @@ public class Xomw_prepro_wkr {
boolean no_more_gt = false; boolean no_more_gt = false;
// Map of tag name => true if there are no more closing tags of given type right of $i // Map of tag name => true if there are no more closing tags of given type right of $i
Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs(); no_more_closing_tag.Clear();
// True to ignore all input up to the next <onlyinclude> // True to ignore all input up to the next <onlyinclude>
boolean find_only_include = enable_only_include; boolean find_only_include = enable_only_include;
@ -118,15 +101,14 @@ public class Xomw_prepro_wkr {
// Do a line-start run without outputting an LF character // Do a line-start run without outputting an LF character
boolean fake_line_start = true; boolean fake_line_start = true;
// XOWA-related init
int src_len = src.length; int src_len = src.length;
int found = -1; int found = -1;
byte[] cur_closing = Bry_.Empty;
Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7(); Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7();
Btrie_slim_mgr elements_end_trie = Btrie_slim_mgr.ci_a7(); Btrie_slim_mgr elements_end_trie = Btrie_slim_mgr.ci_a7();
Btrie_rv elements_trv = new Btrie_rv();
Btrie_slim_mgr cur_char_trie = Btrie_slim_mgr.ci_a7(); byte[] cur_closing = Bry_.Empty;
byte[] inner = null; byte[] inner = null;
while (true) { while (true) {
@ -145,85 +127,107 @@ public class Xomw_prepro_wkr {
} }
byte[] cur_char = Bry_.Empty; byte[] cur_char = Bry_.Empty;
Xomw_prepro_rule rule = null;
if (fake_line_start) { if (fake_line_start) {
found = Found__line_bgn; found = Found__line_bgn;
cur_char = Bry_.Empty; cur_char = Bry_.Empty;
} }
// Find next opening brace, closing brace or pipe
// $search = $searchBase;
if (stack.top == null) {
cur_closing = Bry_.Empty;
}
else { else {
cur_closing = stack.top.close; // Find next opening brace, closing brace or pipe
// $search .= $currentClosing;
} // $search = $searchBase;
if (find_pipe) { if (stack.top == null) {
// $search .= '|'; cur_closing = Bry_.Empty;
}
if (find_equals) {
// First equals will be for the template
// $search .= '=';
}
Xomw_prepro_rule rule = null;
// Output literal section, advance input counter
int literal_len = 0; // strcspn(src, $search, i);
if (literal_len > 0) {
accum.Add(htmlspecialchars(Bry_.Mid(src, i, i + literal_len)));
i += literal_len;
}
if (i >= src_len) {
if (Bry_.Eq(cur_closing, Byte_ascii.Nl_bry)) {
// Do a past-the-end run to finish off the heading
cur_char = Byte_ascii.Nl_bry;
found = Found__line_end;
} }
else { else {
// All done cur_closing = stack.top.close;
break; // $search .= $currentClosing;
} }
} if (find_pipe) {
else { // $search .= '|';
Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(elements_trv, src, i, src_len); }
cur_char = cur_char_itm.sequence; if (find_equals) {
switch (cur_char_itm.type) { // First equals will be for the template
case Byte_ascii.Pipe: found = Found__pipe; break; // $search .= '=';
case Byte_ascii.Eq: found = Found__equals; break; }
case Byte_ascii.Angle_bgn: found = Found__angle; break;
case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break; // Output literal section, advance input counter
case Byte_ascii.Curly_bgn: { int literal_len = 0;
found = Found__open; // NOTE: hard-coded translation of "strcspn(src, $search, i)"; no trie b/c of frequent additions / deletions
rule = rule_curly; boolean loop_stop = false;
break; for (int j = i; j < src_len; j++) {
} byte b = src[j];
case Byte_ascii.Brack_bgn: { switch (b) { // handle '$searchBase = "[{<\n";'
found = Found__open; case Byte_ascii.Brack_bgn:
rule = rule_brack; case Byte_ascii.Curly_bgn:
break; case Byte_ascii.Angle_bgn:
} case Byte_ascii.Nl:
default: loop_stop = true;
if (cur_char_itm.type == Byte_ascii.Dash) { break;
int nxt_pos = i + 1; case Byte_ascii.Pipe: // handle "find_pipe"
if (nxt_pos < src_len) { if (find_pipe) loop_stop = true;
if (src[i + 1] == Byte_ascii.Curly_bgn) { break;
found = Found__open; case Byte_ascii.Eq: // handle "find_equals"
rule = rule_langv; if (find_equals) loop_stop = true;
continue; break;
default: // handle "cur_closing"; specified by piece.close and rule.close, so "\n", "}", "]" and "}-"
if (cur_closing != Bry_.Empty) {
byte cur_closing_0 = cur_closing[0];
if (b == cur_closing_0) {
if (cur_closing.length == 1) { // handle "\n", "}", "]"
loop_stop = true;
}
else {// handle "}-"
int nxt_idx = j + 1;
if (nxt_idx < src_len && src[nxt_idx] == Byte_ascii.Dash)
loop_stop = true;
}
} }
} }
} break;
}
if (Bry_.Eq(cur_char, cur_closing)) { if (loop_stop)
found = Found__close;
}
else {
i++;
continue;
}
break; break;
else
literal_len++;
}
if (literal_len > 0) {
accum.Add(htmlspecialchars(Bry_.Mid(src, i, i + literal_len)));
i += literal_len;
}
if (i >= src_len) {
if (Bry_.Eq(cur_closing, Byte_ascii.Nl_bry)) {
// Do a past-the-end run to finish off the heading
cur_char = Bry_.Empty;
found = Found__line_end;
}
else {
// All done
break;
}
}
else {
Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len);
cur_char = cur_char_itm.bry;
switch (cur_char_itm.type) {
case Byte_ascii.Pipe: found = Found__pipe; break;
case Byte_ascii.Eq: found = Found__equals; break;
case Byte_ascii.Angle_bgn: found = Found__angle; break;
case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break;
case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;}
case Byte_ascii.Brack_bgn: {found = Found__open; rule = rule_brack; break;}
case Byte_ascii.Dash: {found = Found__open; rule = rule_langv; break;}
default:
if (Bry_.Eq(cur_char, cur_closing)) {
found = Found__close;
}
else {
i++;
continue;
}
break;
}
} }
} }
@ -236,8 +240,9 @@ public class Xomw_prepro_wkr {
} }
// Determine element name; $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(span|div)(?:\s|\/>|>)|(!--) // Determine element name; $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(span|div)(?:\s|\/>|>)|(!--)
Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(elements_trv, src, i + 1, src_len); Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(trv, src, i + 1, src_len);
if (element == null) {// Element name missing or not listed if (element == null) {
// Element name missing or not listed
accum.Add(Bry__escaped_lt); accum.Add(Bry__escaped_lt);
i++; i++;
continue; continue;
@ -245,9 +250,8 @@ public class Xomw_prepro_wkr {
// Handle comments // Handle comments
if (element.type == Xomw_prepro_elem.Type__comment) { if (element.type == Xomw_prepro_elem.Type__comment) {
// To avoid leaving blank lines, // To avoid leaving blank lines, when a sequence of space-separated comments is both preceded and followed by a newline
// when a sequence of space-separated comments is both preceded and followed by a newline (ignoring spaces), // (ignoring spaces), then trim leading and trailing spaces and the trailing newline.
// then trim leading and trailing spaces and the trailing newline.
// Find the end // Find the end
int comment_end_pos = Bry_find_.Find_fwd(src, Bry__comment_end, i + 4, src_len); int comment_end_pos = Bry_find_.Find_fwd(src, Bry__comment_end, i + 4, src_len);
@ -288,10 +292,11 @@ public class Xomw_prepro_wkr {
// Remove leading whitespace from the end of the accumulator // Remove leading whitespace from the end of the accumulator
// Sanity check first though // Sanity check first though
int ws_len = i - ws_bgn; int ws_len = i - ws_bgn;
byte[] accum_bry = accum.To_bry();
if ( ws_len > 0 if ( ws_len > 0
&& Bry_find_.Find_fwd_while_space_or_tab(accum.To_bry(), -ws_len, src_len) == ws_len && Bry_find_.Find_fwd_while_space_or_tab(accum_bry, -ws_len, src_len) == ws_len
) { ) {
accum.Clear().Add(Bry_.Mid(accum.To_bry(), 0, -ws_len)); accum.Clear().Add(Bry_.Mid(accum_bry, 0, -ws_len));
} }
// Dump all but the last comment to the accumulator // Dump all but the last comment to the accumulator
@ -318,7 +323,7 @@ public class Xomw_prepro_wkr {
if (stack.top != null) { if (stack.top != null) {
Xomw_prepro_part part = stack.top.Get_current_part(); Xomw_prepro_part part = stack.top.Get_current_part();
if (!(part.comment_end == ws_end - 1)) { if (!(part.comment_end != -1 && part.comment_end == ws_bgn - 1)) {
part.visual_end = ws_bgn; part.visual_end = ws_bgn;
} }
// Else comments abutting, no change in visual end // Else comments abutting, no change in visual end
@ -344,8 +349,9 @@ public class Xomw_prepro_wkr {
continue; continue;
} }
// Handle ignored tags
if (ignored_tags.Has(name)) { if (ignored_tags.Has(name)) {
accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, i, tag_end_pos - i + 1))).Add_str_a7("</ignore>"); accum.Add_str_a7("<ignore>").Add(htmlspecialchars(Bry_.Mid(src, i, tag_end_pos + 1))).Add_str_a7("</ignore>");
i = tag_end_pos + 1; i = tag_end_pos + 1;
continue; continue;
} }
@ -357,28 +363,35 @@ public class Xomw_prepro_wkr {
atr_end = tag_end_pos - 1; atr_end = tag_end_pos - 1;
inner = null; inner = null;
i = tag_end_pos + 1; i = tag_end_pos + 1;
close = null; close = Bry_.Empty;
} }
else { else {
atr_end = tag_end_pos; atr_end = tag_end_pos;
// Find closing tag // Find closing tag
// FIXME: need to search forward // NOTE: translation of `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
Xomw_prepro_elem elem_end = (Xomw_prepro_elem)elements_end_trie.Match_at(elements_trv, src, tag_end_pos + 1, src_len); // preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i", Xomw_prepro_elem elem_end = null;
int elem_end_lhs = elements_trv.Pos(); int elem_end_lhs = -1, elem_end_rhs = -1;
int elem_end_rhs = elements_trv.Pos(); for (int j = tag_end_pos + 1; j < src_len; j++) {
// check for "\s*>" elem_end = (Xomw_prepro_elem)elements_end_trie.Match_at(trv, src, j, src_len);
if (elem_end != null) { elem_end_lhs = elem_end_rhs = trv.Pos();
elem_end_rhs = Bry_find_.Find_fwd_while(src, elem_end_rhs, src_len, Byte_ascii.Space);
if (elem_end_rhs == src_len) { // found a possible elem_end tag; validate "\s*>"
elem_end = null; if (elem_end != null) {
} elem_end_rhs = Bry_find_.Find_fwd_while(src, elem_end_rhs, src_len, Byte_ascii.Space);
else { if (elem_end_rhs == src_len) {
if (src[elem_end_rhs] == Byte_ascii.Gt)
elem_end_rhs = elem_end_rhs + 1;
else
elem_end = null; elem_end = null;
}
else {
if (src[elem_end_rhs] == Byte_ascii.Gt)
elem_end_rhs = elem_end_rhs + 1;
else
elem_end = null;
}
} }
if (elem_end != null)
break;
} }
if ( !no_more_closing_tag.Has(name) if ( !no_more_closing_tag.Has(name)
&& elem_end != null) { && elem_end != null) {
inner = Bry_.Mid(src, tag_end_pos + 1, elem_end_lhs); inner = Bry_.Mid(src, tag_end_pos + 1, elem_end_lhs);
@ -605,8 +618,7 @@ public class Xomw_prepro_wkr {
arg_idx++; arg_idx++;
} }
} }
tmp_bfr.Add_str_a7("</").Add(name_bry).Add_str_a7(">"); element = tmp_bfr.Add_str_a7("</").Add(name_bry).Add_str_a7(">").To_bry_and_clear();
element = tmp_bfr.To_bry_and_clear();
} }
// Advance input pointer // Advance input pointer
@ -664,52 +676,35 @@ public class Xomw_prepro_wkr {
root_accum.Add_str_a7("</root>"); root_accum.Add_str_a7("</root>");
return root_accum.To_bry_and_clear(); return root_accum.To_bry_and_clear();
} }
} private byte[] htmlspecialchars(byte[] bry) {
class Xomw_prepro_rule { return bry;
public Xomw_prepro_rule(byte[] bgn, byte[] end, int min, int max, int[] names) { }
this.bgn = bgn; private Xomw_prepro_rule Get_rule(byte[] bry) {
this.end = end; if (Bry_.Eq(bry, rule_curly.bgn)) return rule_curly;
this.min = min; else if (Bry_.Eq(bry, rule_brack.bgn)) return rule_brack;
this.max = max; else if (Bry_.Eq(bry, rule_langv.bgn)) return rule_langv;
this.names = names; else throw Err_.new_unhandled(bry);
} }
public final byte[] bgn; private static final Xomw_prepro_rule
public final byte[] end; rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}") , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ})
public final int min; , rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]") , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
public final int max; , rule_langv = new Xomw_prepro_rule(Bry_.new_a7("-{"), Bry_.new_a7("}-"), 1, 1, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
public final int[] names; ;
public boolean Names_exist(int idx) { private static final byte[]
return idx < names.length && names[idx] != Name__invalid; Bry__only_include_bgn = Bry_.new_a7("<onlyinclude>")
} , Bry__only_include_end = Bry_.new_a7("</onlyinclude>")
private static final byte[] Name__tmpl_bry = Bry_.new_a7("template"), Name__targ_bry = Bry_.new_a7("tplarg"); , Bry__comment_bgn = Bry_.new_a7("<!--")
public static final int Name__invalid = -1, Name__null = 0, Name__tmpl = 1, Name__targ = 2; , Bry__comment_end = Bry_.new_a7("-->")
public static byte[] Name(int type) { , Bry__escaped_lt = Bry_.new_a7("&lt;")
switch (type) { ;
case Name__tmpl: return Name__tmpl_bry; private static final int Len__only_include_end = Bry__only_include_end.length;
case Name__targ: return Name__targ_bry; private static final int
default: Found__line_bgn = 0
case Name__invalid: return null; , Found__line_end = 1
case Name__null: return null; , Found__pipe = 2
} , Found__equals = 3
} , Found__angle = 4
} , Found__close = 5
class Xomw_prepro_elem { , Found__open = 6
private static final byte[] Bry__tag_end = Bry_.new_a7("</"); ;
public Xomw_prepro_elem(int type, byte[] name) {
this.type = type;
this.name = name;
this.tag_end_lhs = Bry_.Add(Bry__tag_end, name);
}
public final int type;
public final byte[] name;
public final byte[] tag_end_lhs;
public static final int Type__comment = 0;
}
class Xomw_prepro_curchar_itm {
public Xomw_prepro_curchar_itm(int type, byte[] sequence) {
this.type = type;
this.sequence = sequence;
}
public int type;
public byte[] sequence;
} }

View File

@ -0,0 +1,35 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
import org.junit.*;
public class Xomw_prepro_wkr__tst {
// private final Xomw_prepro_wkr__fxt fxt = new Xomw_prepro_wkr__fxt();
@Test public void Basic() {
// fxt.Test__parse("a", "a");
}
}
class Xomw_prepro_wkr__fxt {
private final Xomw_prepro_wkr wkr = new Xomw_prepro_wkr();
private boolean for_inclusion = false;
public void Init__for_inclusion_y_() {for_inclusion = true;}
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
byte[] actl = wkr.Preprocess_to_xml(src_bry, for_inclusion);
Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str);
}
}