Mw_parse.Prepro: Support heading and onlyinclude

pull/620/head
gnosygnu 8 years ago
parent 683481abbd
commit 2ad1be255c

@ -159,7 +159,7 @@ public class Bry_find_ {
return Bry_find_.Not_found; return Bry_find_.Not_found;
} }
public static int Find_bwd__while_space_or_tab(byte[] src, int cur, int end) { // get pos of 1st char that is not \t or \s public static int Find_bwd__while_space_or_tab(byte[] src, int cur, int end) { // get pos of 1st char that is not \t or \s
if (cur < 0 || cur >= src.length) return Bry_find_.Not_found; if (cur < 0 || cur > src.length) return Bry_find_.Not_found;
for (int i = cur - 1; i >= end; i--) { for (int i = cur - 1; i >= end; i--) {
byte b = src[i]; byte b = src[i];
switch (b) { switch (b) {

@ -19,6 +19,7 @@ package gplx;
import gplx.core.lists.*; /*EnumerAble,ComparerAble*/ import gplx.core.lists.*; /*EnumerAble,ComparerAble*/
public interface Ordered_hash extends Hash_adp, List_adp__getable { public interface Ordered_hash extends Hash_adp, List_adp__getable {
void Add_at(int i, Object o); void Add_at(int i, Object o);
Ordered_hash Add_many_str(String... ary);
int Idx_of(Object item); int Idx_of(Object item);
void Sort(); void Sort();
void Sort_by(ComparerAble comparer); void Sort_by(ComparerAble comparer);

@ -54,6 +54,15 @@ public class Ordered_hash_base extends Hash_adp_base implements Ordered_hash, Gf
ordered.Add_at(i, val); ordered.Add_at(i, val);
AssertCounts(); AssertCounts();
} }
public Ordered_hash Add_many_str(String... ary) {
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
String itm = ary[i];
byte[] bry = Bry_.new_u8(itm);
this.Add(bry, bry);
}
return this;
}
void AssertCounts() { void AssertCounts() {
if (super.Count() != ordered.Count()) throw Err_.new_wo_type("counts do not match", "hash", super.Count(), "list", ordered.Count()); if (super.Count() != ordered.Count()) throw Err_.new_wo_type("counts do not match", "hash", super.Count(), "list", ordered.Count());
} }

@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
import gplx.core.btries.*; import gplx.core.btries.*;
public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls public class Xomw_prepro_wkr { // THREAD.UNSAFE:caching for repeated calls
private final Bry_bfr tmp_bfr = Bry_bfr_.New(); private final Bry_bfr tmp_bfr = Bry_bfr_.New();
private final List_adp comments_list = List_adp_.New(); private final List_adp comments_list = List_adp_.New();
private final Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7(); private final Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7();
@ -25,36 +25,15 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
private final Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs(); private final Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs();
private final Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7(); private final Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7();
private final Xomw_prepro_stack stack = new Xomw_prepro_stack(); private final Xomw_prepro_stack stack = new Xomw_prepro_stack();
private Bry_bfr accum = Bry_bfr_.New();
private final Btrie_rv trv = new Btrie_rv(); private final Btrie_rv trv = new Btrie_rv();
private Bry_bfr accum = Bry_bfr_.New();
private static final Btrie_slim_mgr cur_char_trie = Cur_char_trie__new();
private static final Hash_adp_bry
ignored_tags__noinclude = Hash_adp_bry.cs().Add_many_str("includeonly", "/includeonly")
, ignored_elements__noinclude = Hash_adp_bry.cs().Add_many_str("noinclude")
, ignored_tags__includeonly = Hash_adp_bry.cs().Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude")
, ignored_elements__includeonly = Hash_adp_bry.cs().Add_many_str("includeonly");
private static Btrie_slim_mgr Cur_char_trie__new() {
Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
String[] ary = new String[] {"|", "=", "<", "\n", "{", "[", "-{", "}", "]"};
for (String str : ary) {
byte[] bry = Bry_.new_a7(str);
rv.Add_obj(bry, new Xomw_prepro_curchar_itm(bry, bry[0]));
}
// handle "}-" separately
byte[] langv_end = Bry_.new_a7("}-");
rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.Bang));
return rv;
}
public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) { public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) {
xmlish_elems.Clear(); // TODO.XO: parser->getStripList(); pre|nowiki|gallery|indicator|ref|reference xmlish_elems.Clear(); // TODO.XO: parser->getStripList(); pre|nowiki|gallery|indicator|ref|reference
// RELIC: $xmlishAllowMissingEndTag = [ 'includeonly', 'noinclude', 'onlyinclude' ]; // RELIC: $xmlishAllowMissingEndTag = [ 'includeonly', 'noinclude', 'onlyinclude' ];
boolean enable_only_include = false; boolean enable_only_include = false;
Hash_adp_bry ignored_tags, ignored_elements; Ordered_hash ignored_tags; Hash_adp ignored_elements;
if (for_inclusion) { if (for_inclusion) {
ignored_tags = ignored_tags__noinclude; ignored_tags = ignored_tags__noinclude;
ignored_elements = ignored_elements__noinclude; ignored_elements = ignored_elements__noinclude;
@ -70,8 +49,17 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
xmlish_elems.Add_many_str("includeonly"); xmlish_elems.Add_many_str("includeonly");
} }
// PORTED:$xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
elements_trie.Clear();
elements_trie.Add_obj("pre", new Xomw_prepro_elem(Xomw_prepro_elem.Type__other, Bry_.new_a7("pre")));
elements_trie.Add_obj("!--", new Xomw_prepro_elem(Xomw_prepro_elem.Type__comment, Bry_.new_a7("comment")));
int ignored_tags_len = ignored_tags.Count();
for (int j = 0; j < ignored_tags_len; j++) {
byte[] bry = (byte[])ignored_tags.Get_at(j);
elements_trie.Add_obj(bry, new Xomw_prepro_elem(Xomw_prepro_elem.Type__other, bry));
}
// RELIC: // RELIC:
// $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
// Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
// $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; // $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
@ -120,11 +108,6 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
byte[] inner = null; byte[] inner = null;
Xomw_prepro_rule rule = null; Xomw_prepro_rule rule = null;
// XOWA: xml elements
elements_trie.Clear();
elements_trie.Add_obj("pre", new Xomw_prepro_elem(Xomw_prepro_elem.Type__other, Bry_.new_a7("pre")));
elements_trie.Add_obj("!--", new Xomw_prepro_elem(Xomw_prepro_elem.Type__comment, Bry_.new_a7("comment")));
while (true) { while (true) {
if (find_only_include) { if (find_only_include) {
// Ignore all input up to the next <onlyinclude> // Ignore all input up to the next <onlyinclude>
@ -488,7 +471,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
} }
int eq_end = Bry_find_.Find_fwd_while(src, i, i + 6, Byte_ascii.Eq); // PORTED:strspn( $src, '=', $i, 6 ); int eq_end = Bry_find_.Find_fwd_while(src, i, i + 6, Byte_ascii.Eq); // PORTED:strspn( $src, '=', $i, 6 );
int count = i - eq_end; int count = eq_end - i;
if (count == 1 && find_equals) { if (count == 1 && find_equals) {
// DWIM: This looks kind of like a name/value separator. // DWIM: This looks kind of like a name/value separator.
// Let's let the equals handler have it and break the // Let's let the equals handler have it and break the
@ -739,6 +722,15 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
else if (Bry_.Eq(bry, rule_langv.bgn)) return rule_langv; else if (Bry_.Eq(bry, rule_langv.bgn)) return rule_langv;
else throw Err_.new_unhandled(bry); else throw Err_.new_unhandled(bry);
} }
private static final int
Found__line_bgn = 0
, Found__line_end = 1
, Found__pipe = 2
, Found__equals = 3
, Found__angle = 4
, Found__close = 5
, Found__open = 6
;
private static final Xomw_prepro_rule private static final Xomw_prepro_rule
rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}") , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ}) rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}") , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ})
, rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]") , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null}) , rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]") , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
@ -753,13 +745,24 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
, Bry__end_lhs = Bry_.new_a7("</") , Bry__end_lhs = Bry_.new_a7("</")
; ;
private static final int Len__only_include_end = Bry__only_include_end.length; private static final int Len__only_include_end = Bry__only_include_end.length;
private static final int private static final Btrie_slim_mgr cur_char_trie = Cur_char_trie__new();
Found__line_bgn = 0 private static final Ordered_hash
, Found__line_end = 1 ignored_tags__noinclude = Ordered_hash_.New_bry().Add_many_str("includeonly", "/includeonly")
, Found__pipe = 2 , ignored_tags__includeonly = Ordered_hash_.New_bry().Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude");
, Found__equals = 3 private static final Hash_adp_bry
, Found__angle = 4 ignored_elements__noinclude = Hash_adp_bry.cs().Add_many_str("noinclude")
, Found__close = 5 , ignored_elements__includeonly = Hash_adp_bry.cs().Add_many_str("includeonly");
, Found__open = 6 private static Btrie_slim_mgr Cur_char_trie__new() {
; Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
String[] ary = new String[] {"|", "=", "<", "\n", "{", "[", "-{", "}", "]"};
for (String str : ary) {
byte[] bry = Bry_.new_a7(str);
rv.Add_obj(bry, new Xomw_prepro_curchar_itm(bry, bry[0]));
}
// handle "}-" separately
byte[] langv_end = Bry_.new_a7("}-");
rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.Bang));
return rv;
}
} }

@ -59,18 +59,48 @@ public class Xomw_prepro_wkr__tst {
@Test public void Ext__pre() { @Test public void Ext__pre() {
fxt.Test__parse("a<pre id=\"1\">b</pre>c", "<root>a<ext><name>pre</name><attr> id=&quot;1&quot;</attr><inner>b</inner><close>&lt;/pre&gt;</close></ext>c</root>"); fxt.Test__parse("a<pre id=\"1\">b</pre>c", "<root>a<ext><name>pre</name><attr> id=&quot;1&quot;</attr><inner>b</inner><close>&lt;/pre&gt;</close></ext>c</root>");
} }
/* @Test public void Heading() {
TODO: fxt.Test__parse(String_.Concat_lines_nl_skip_last
* for_inclusion; <onlyinclude> in String ( "a"
* heading.general , "== b1 =="
* heading.EOS: "==a" (no closing ==) , "z"
* ignored tags ), String_.Concat_lines_nl_skip_last
*/ ( "<root>a"
, "<h level=\"2\" i=\"1\">== b1 ==</h>"
, "z</root>"
));
}
@Test public void Heading__eos__no_nl() {
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a"
, "== b1 =="
), String_.Concat_lines_nl_skip_last
( "<root>a"
, "<h level=\"2\" i=\"1\">== b1 ==</h></root>"
));
}
@Test public void Heading__bos__implied_nl() {
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "== b1 =="
, "z"
), String_.Concat_lines_nl_skip_last
( "<root><h level=\"2\" i=\"1\">== b1 ==</h>"
, "z</root>"
));
}
@Test public void Inclusion__n() {
fxt.Init__for_inclusion_(Bool_.N);
fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root>a<ignore>&lt;onlyinclude&gt;</ignore>b<ignore>&lt;/onlyinclude&gt;</ignore>c</root>");
}
@Test public void Inclusion__y() {
fxt.Init__for_inclusion_(Bool_.Y);
fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root><ignore>a&lt;onlyinclude&gt;</ignore>b<ignore>&lt;/onlyinclude&gt;c</ignore></root>");
}
} }
class Xomw_prepro_wkr__fxt { class Xomw_prepro_wkr__fxt {
private final Xomw_prepro_wkr wkr = new Xomw_prepro_wkr(); private final Xomw_prepro_wkr wkr = new Xomw_prepro_wkr();
private boolean for_inclusion = false; private boolean for_inclusion = false;
public void Init__for_inclusion_y_() {for_inclusion = true;} public void Init__for_inclusion_(boolean v) {for_inclusion = v;}
public void Test__parse(String src_str, String expd) { public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str); byte[] src_bry = Bry_.new_u8(src_str);
byte[] actl = wkr.Preprocess_to_xml(src_bry, for_inclusion); byte[] actl = wkr.Preprocess_to_xml(src_bry, for_inclusion);

Loading…
Cancel
Save