From 2ad1be255cc8fc0b3e5192b3d66eae04a60b3c45 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Wed, 11 Jan 2017 13:52:41 -0500 Subject: [PATCH] Mw_parse.Prepro: Support heading and onlyinclude --- 100_core/src/gplx/Bry_find_.java | 2 +- 100_core/src/gplx/Ordered_hash.java | 1 + 100_core/src/gplx/Ordered_hash_base.java | 9 ++ .../parsers/mws/prepros/Xomw_prepro_wkr.java | 83 ++++++++++--------- .../mws/prepros/Xomw_prepro_wkr__tst.java | 46 ++++++++-- 5 files changed, 92 insertions(+), 49 deletions(-) diff --git a/100_core/src/gplx/Bry_find_.java b/100_core/src/gplx/Bry_find_.java index 0621e5f89..18f2314d2 100644 --- a/100_core/src/gplx/Bry_find_.java +++ b/100_core/src/gplx/Bry_find_.java @@ -159,7 +159,7 @@ public class Bry_find_ { return Bry_find_.Not_found; } public static int Find_bwd__while_space_or_tab(byte[] src, int cur, int end) { // get pos of 1st char that is not \t or \s - if (cur < 0 || cur >= src.length) return Bry_find_.Not_found; + if (cur < 0 || cur > src.length) return Bry_find_.Not_found; for (int i = cur - 1; i >= end; i--) { byte b = src[i]; switch (b) { diff --git a/100_core/src/gplx/Ordered_hash.java b/100_core/src/gplx/Ordered_hash.java index c71b66cdb..c2d828278 100644 --- a/100_core/src/gplx/Ordered_hash.java +++ b/100_core/src/gplx/Ordered_hash.java @@ -19,6 +19,7 @@ package gplx; import gplx.core.lists.*; /*EnumerAble,ComparerAble*/ public interface Ordered_hash extends Hash_adp, List_adp__getable { void Add_at(int i, Object o); + Ordered_hash Add_many_str(String... ary); int Idx_of(Object item); void Sort(); void Sort_by(ComparerAble comparer); diff --git a/100_core/src/gplx/Ordered_hash_base.java b/100_core/src/gplx/Ordered_hash_base.java index 9a1aaf8fe..a787109cd 100644 --- a/100_core/src/gplx/Ordered_hash_base.java +++ b/100_core/src/gplx/Ordered_hash_base.java @@ -54,6 +54,15 @@ public class Ordered_hash_base extends Hash_adp_base implements Ordered_hash, Gf ordered.Add_at(i, val); AssertCounts(); } + public Ordered_hash Add_many_str(String... ary) { + int ary_len = ary.length; + for (int i = 0; i < ary_len; i++) { + String itm = ary[i]; + byte[] bry = Bry_.new_u8(itm); + this.Add(bry, bry); + } + return this; + } void AssertCounts() { if (super.Count() != ordered.Count()) throw Err_.new_wo_type("counts do not match", "hash", super.Count(), "list", ordered.Count()); } diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java index dbe0b1c2a..15f88eb81 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java @@ -17,7 +17,7 @@ along with this program. If not, see . */ package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; import gplx.core.btries.*; -public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls +public class Xomw_prepro_wkr { // THREAD.UNSAFE:caching for repeated calls private final Bry_bfr tmp_bfr = Bry_bfr_.New(); private final List_adp comments_list = List_adp_.New(); private final Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7(); @@ -25,36 +25,15 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls private final Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs(); private final Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7(); private final Xomw_prepro_stack stack = new Xomw_prepro_stack(); - private Bry_bfr accum = Bry_bfr_.New(); - private final Btrie_rv trv = new Btrie_rv(); - - private static final Btrie_slim_mgr cur_char_trie = Cur_char_trie__new(); - private static final Hash_adp_bry - ignored_tags__noinclude = Hash_adp_bry.cs().Add_many_str("includeonly", "/includeonly") - , ignored_elements__noinclude = Hash_adp_bry.cs().Add_many_str("noinclude") - , ignored_tags__includeonly = Hash_adp_bry.cs().Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude") - , ignored_elements__includeonly = Hash_adp_bry.cs().Add_many_str("includeonly"); - private static Btrie_slim_mgr Cur_char_trie__new() { - Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7(); - String[] ary = new String[] {"|", "=", "<", "\n", "{", "[", "-{", "}", "]"}; - for (String str : ary) { - byte[] bry = Bry_.new_a7(str); - rv.Add_obj(bry, new Xomw_prepro_curchar_itm(bry, bry[0])); - } - - // handle "}-" separately - byte[] langv_end = Bry_.new_a7("}-"); - rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.Bang)); - return rv; - } + private Bry_bfr accum = Bry_bfr_.New(); public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) { xmlish_elems.Clear(); // TODO.XO: parser->getStripList(); pre|nowiki|gallery|indicator|ref|reference // RELIC: $xmlishAllowMissingEndTag = [ 'includeonly', 'noinclude', 'onlyinclude' ]; boolean enable_only_include = false; - Hash_adp_bry ignored_tags, ignored_elements; + Ordered_hash ignored_tags; Hash_adp ignored_elements; if (for_inclusion) { ignored_tags = ignored_tags__noinclude; ignored_elements = ignored_elements__noinclude; @@ -70,8 +49,17 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls xmlish_elems.Add_many_str("includeonly"); } + // PORTED:$xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) ); + elements_trie.Clear(); + elements_trie.Add_obj("pre", new Xomw_prepro_elem(Xomw_prepro_elem.Type__other, Bry_.new_a7("pre"))); + elements_trie.Add_obj("!--", new Xomw_prepro_elem(Xomw_prepro_elem.Type__comment, Bry_.new_a7("comment"))); + int ignored_tags_len = ignored_tags.Count(); + for (int j = 0; j < ignored_tags_len; j++) { + byte[] bry = (byte[])ignored_tags.Get_at(j); + elements_trie.Add_obj(bry, new Xomw_prepro_elem(Xomw_prepro_elem.Type__other, bry)); + } + // RELIC: - // $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) ); // Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset // $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; @@ -120,11 +108,6 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls byte[] inner = null; Xomw_prepro_rule rule = null; - // XOWA: xml elements - elements_trie.Clear(); - elements_trie.Add_obj("pre", new Xomw_prepro_elem(Xomw_prepro_elem.Type__other, Bry_.new_a7("pre"))); - elements_trie.Add_obj("!--", new Xomw_prepro_elem(Xomw_prepro_elem.Type__comment, Bry_.new_a7("comment"))); - while (true) { if (find_only_include) { // Ignore all input up to the next @@ -488,7 +471,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls } int eq_end = Bry_find_.Find_fwd_while(src, i, i + 6, Byte_ascii.Eq); // PORTED:strspn( $src, '=', $i, 6 ); - int count = i - eq_end; + int count = eq_end - i; if (count == 1 && find_equals) { // DWIM: This looks kind of like a name/value separator. // Let's let the equals handler have it and break the @@ -739,6 +722,15 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls else if (Bry_.Eq(bry, rule_langv.bgn)) return rule_langv; else throw Err_.new_unhandled(bry); } + private static final int + Found__line_bgn = 0 + , Found__line_end = 1 + , Found__pipe = 2 + , Found__equals = 3 + , Found__angle = 4 + , Found__close = 5 + , Found__open = 6 + ; private static final Xomw_prepro_rule rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}") , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ}) , rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]") , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null}) @@ -753,13 +745,24 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls , Bry__end_lhs = Bry_.new_a7("bc", "apre id="1"b</pre>c"); } -/* -TODO: -* for_inclusion; in String -* heading.general -* heading.EOS: "==a" (no closing ==) -* ignored tags -*/ + @Test public void Heading() { + fxt.Test__parse(String_.Concat_lines_nl_skip_last + ( "a" + , "== b1 ==" + , "z" + ), String_.Concat_lines_nl_skip_last + ( "a" + , "== b1 ==" + , "z" + )); + } + @Test public void Heading__eos__no_nl() { + fxt.Test__parse(String_.Concat_lines_nl_skip_last + ( "a" + , "== b1 ==" + ), String_.Concat_lines_nl_skip_last + ( "a" + , "== b1 ==" + )); + } + @Test public void Heading__bos__implied_nl() { + fxt.Test__parse(String_.Concat_lines_nl_skip_last + ( "== b1 ==" + , "z" + ), String_.Concat_lines_nl_skip_last + ( "== b1 ==" + , "z" + )); + } + @Test public void Inclusion__n() { + fxt.Init__for_inclusion_(Bool_.N); + fxt.Test__parse("abc", "a<onlyinclude>b</onlyinclude>c"); + } + @Test public void Inclusion__y() { + fxt.Init__for_inclusion_(Bool_.Y); + fxt.Test__parse("abc", "a<onlyinclude>b</onlyinclude>c"); + } } class Xomw_prepro_wkr__fxt { private final Xomw_prepro_wkr wkr = new Xomw_prepro_wkr(); private boolean for_inclusion = false; - public void Init__for_inclusion_y_() {for_inclusion = true;} + public void Init__for_inclusion_(boolean v) {for_inclusion = v;} public void Test__parse(String src_str, String expd) { byte[] src_bry = Bry_.new_u8(src_str); byte[] actl = wkr.Preprocess_to_xml(src_bry, for_inclusion);