From b989ff63df66d6cacaf87dcdc0a6081bfb3ca60e Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Tue, 10 Jan 2017 17:36:24 -0500 Subject: [PATCH] Mw_parse: Support bracket, template, and tplarg --- 100_core/src/gplx/Bry_find_.java | 10 +++ .../mws/prepros/Xomw_prepro_stack.java | 25 +++---- .../parsers/mws/prepros/Xomw_prepro_wkr.java | 65 +++++++++++-------- .../mws/prepros/Xomw_prepro_wkr__tst.java | 12 +++- 4 files changed, 70 insertions(+), 42 deletions(-) diff --git a/100_core/src/gplx/Bry_find_.java b/100_core/src/gplx/Bry_find_.java index 1be0b0a88..e645868de 100644 --- a/100_core/src/gplx/Bry_find_.java +++ b/100_core/src/gplx/Bry_find_.java @@ -235,6 +235,16 @@ public class Bry_find_ { cur++; } } + public static int Find_fwd_while(byte[] src, int cur, int end, byte[] while_bry) { + int while_len = while_bry.length; + while (true) { + if (cur == end) return cur; + for (int i = 0; i < while_len; i++) { + if (while_bry[i] != src[i + cur]) return cur; + } + cur += while_len; + } + } public static int Find_fwd_until(byte[] src, int cur, int end, byte until_byte) { while (true) { if ( cur == end diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java index 2806abfbb..c848b6827 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java @@ -37,7 +37,8 @@ class Xomw_prepro_stack { public Xomw_prepro_part Get_current_part() { if (top == null) { return null; - } else { + } + else { return top.Get_current_part(); } } @@ -45,7 +46,7 @@ class Xomw_prepro_stack { public void Push(Xomw_prepro_piece item) { stack.Add(item); this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1); - accum.Clear().Add(top.Get_accum()); + accum = top.Get_accum(); } public Xomw_prepro_piece Pop() { @@ -56,10 +57,11 @@ class Xomw_prepro_stack { Xomw_prepro_piece rv = (Xomw_prepro_piece)stack.Get_at(len - 1); stack.Del_at(len - 1); + len--; if (len > 0) { this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1); - accum.Clear().Add(top.Get_accum()); + accum = top.Get_accum(); } else { this.top = null; this.accum = root_accum; @@ -69,7 +71,7 @@ class Xomw_prepro_stack { public void Add_part(byte[] bry) { top.Add_part(bry); - accum.Clear().Add(top.Get_accum()); + accum = top.Get_accum(); } public Xomw_prepro_flags Get_flags() { @@ -103,12 +105,13 @@ class Xomw_prepro_piece { this.count = count; this.start_pos = start_pos; this.line_start = line_start; + parts.Add(new Xomw_prepro_part(Bry_.Empty)); } public Xomw_prepro_part Get_current_part() { return (Xomw_prepro_part)parts.Get_at(parts.Len() - 1); } - public byte[] Get_accum() { - return Get_current_part().bry; + public Bry_bfr Get_accum() { + return Get_current_part().bfr; } public void Add_part(byte[] bry) { parts.Add(new Xomw_prepro_part(bry)); @@ -126,13 +129,13 @@ class Xomw_prepro_piece { public byte[] Break_syntax(Bry_bfr tmp_bfr, int opening_count) { byte[] rv = Bry_.Empty; if (Bry_.Eq(open, Byte_ascii.Nl_bry)) { - rv = ((Xomw_prepro_part)parts.Get_at(0)).bry; + rv = ((Xomw_prepro_part)parts.Get_at(0)).bfr.To_bry(); } else { if (opening_count == -1) { opening_count = count; } - rv = Bry_.Repeat_bry(open, opening_count); + tmp_bfr.Add(Bry_.Repeat_bry(open, opening_count)); // concat parts with "|" boolean first = true; @@ -145,7 +148,7 @@ class Xomw_prepro_piece { else { tmp_bfr.Add_byte_pipe(); } - tmp_bfr.Add(part.bry); + tmp_bfr.Add(part.bfr.To_bry()); } rv = tmp_bfr.To_bry_and_clear(); } @@ -154,9 +157,9 @@ class Xomw_prepro_piece { } class Xomw_prepro_part { public Xomw_prepro_part(byte[] bry) { - this.bry = bry; + bfr.Add(bry); } - public final byte[] bry; + public final Bry_bfr bfr = Bry_bfr_.New(); public int Eqpos = -1; public int comment_end = -1; public int visual_end = -1; diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java index 9c711ddd0..b0b4a3b13 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java @@ -21,7 +21,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls private final Bry_bfr tmp_bfr = Bry_bfr_.New(); private final List_adp comments_list = List_adp_.New(); private final Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7(); - private final Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs(); + private final Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs().Add_many_str("includeonly", "noinclude", "onlyinclude"); private final Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs(); // private final Btrie_slim_mgr search_dflt_trie = Btrie_slim_mgr.cs().Add_many_int(0, "[", "{", "<", "\n"); // $searchBase = "[{<\n"; private final Xomw_prepro_stack stack = new Xomw_prepro_stack(); @@ -47,7 +47,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) { xmlish_elems.Clear(); // TODO.XO: parser->getStripList(); - xmlish_allow_missing_end_tag.Add_many_str("includeonly", "noinclude", "onlyinclude"); + // PERF: xmlish_allow_missing_end_tag.Add_many_str("includeonly", "noinclude", "onlyinclude") boolean enable_only_include = false; Hash_adp_bry ignored_tags, ignored_elements; @@ -108,6 +108,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7(); Btrie_slim_mgr elements_end_trie = Btrie_slim_mgr.ci_a7(); + byte[] cur_char = Bry_.Empty; byte[] cur_closing = Bry_.Empty; byte[] inner = null; @@ -126,7 +127,6 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls find_only_include = false; } - byte[] cur_char = Bry_.Empty; Xomw_prepro_rule rule = null; if (fake_line_start) { found = Found__line_bgn; @@ -208,25 +208,33 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls } } else { - Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len); - cur_char = cur_char_itm.bry; - switch (cur_char_itm.type) { - case Byte_ascii.Pipe: found = Found__pipe; break; - case Byte_ascii.Eq: found = Found__equals; break; - case Byte_ascii.Angle_bgn: found = Found__angle; break; - case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break; - case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;} - case Byte_ascii.Brack_bgn: {found = Found__open; rule = rule_brack; break;} - case Byte_ascii.Dash: {found = Found__open; rule = rule_langv; break;} - default: - if (Bry_.Eq(cur_char, cur_closing)) { - found = Found__close; + boolean match = false; + if (cur_closing != Bry_.Empty) { + if (Bry_.Match(src, i, i + cur_closing.length, cur_closing)) { + match = true; + found = Found__close; + cur_char = cur_closing; + } + } + else { + Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len); + if (cur_char_itm != null) { + match = true; + cur_char = cur_char_itm.bry; + switch (cur_char_itm.type) { + case Byte_ascii.Pipe: found = Found__pipe; break; + case Byte_ascii.Eq: found = Found__equals; break; + case Byte_ascii.Angle_bgn: found = Found__angle; break; + case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break; + case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;} + case Byte_ascii.Brack_bgn: {found = Found__open; rule = rule_brack; break;} + case Byte_ascii.Dash: {found = Found__open; rule = rule_langv; break;} } - else { - i++; - continue; - } - break; + } + } + if (!match) { + i++; + continue; } } } @@ -528,7 +536,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls } else if (found == Found__open) { // count opening brace characters - int count = Bry_find_.Find_fwd(src, cur_char, i, src_len); + int count = Bry_find_.Find_fwd_while(src, i, src_len, cur_char) - i; // we need to add to stack only if opening brace count is enough for one of the rules if (count >= rule.min) { @@ -550,7 +558,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls else if (found == Found__close) { Xomw_prepro_piece piece = stack.top; // lets check if there are enough characters for closing brace - int count = Bry_find_.Find_fwd(src, cur_char, i, src_len); + int count = Bry_find_.Find_fwd_while(src, i, src_len, cur_char) - i; int max_count = piece.count; if (count > max_count) count = max_count; @@ -582,13 +590,13 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls byte[] element = null; if (name_type == Xomw_prepro_rule.Name__null) { // No element, just literal text - piece.Break_syntax(tmp_bfr, matching_count); + tmp_bfr.Add(piece.Break_syntax(tmp_bfr, matching_count)); element = tmp_bfr.Add(Bry_.Repeat_bry(rule.end, matching_count)).To_bry_and_clear(); } else { // Create XML element; Note: $parts is already XML, does not need to be encoded further List_adp parts = piece.parts; - byte[] title = ((Xomw_prepro_part)parts.Get_at(0)).bry; + byte[] title = ((Xomw_prepro_part)parts.Get_at(0)).bfr.To_bry_and_clear(); parts.Del_at(0); // The invocation is at the start of the line if lineStart is set in the stack, and all opening brackets are used up. @@ -609,12 +617,13 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls for (int j = 0; j < parts_len; j++) { Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(j); if (part.Eqpos != -1) { - byte[] arg_key = Bry_.Mid(part.bry, 0, part.Eqpos); - byte[] arg_val = Bry_.Mid(part.bry, part.Eqpos + 1); + byte[] part_bry = part.bfr.To_bry(); + byte[] arg_key = Bry_.Mid(part_bry, 0, part.Eqpos); + byte[] arg_val = Bry_.Mid(part_bry, part.Eqpos + 1); tmp_bfr.Add_str_a7("").Add(arg_key).Add_str_a7("=").Add(arg_val).Add_str_a7(""); } else { - tmp_bfr.Add_str_a7("{").Add(part.bry).Add_str_a7("}"); + tmp_bfr.Add_str_a7("{").Add(part.bfr.To_bry()).Add_str_a7("}"); arg_idx++; } } diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java index 10eb1e304..a68640b53 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java @@ -22,9 +22,15 @@ public class Xomw_prepro_wkr__tst { @Test public void Text() { fxt.Test__parse("abc", "abc"); } -// @Test public void Brack() { -// fxt.Test__parse("a[[b]]c", "abc"); -// } + @Test public void Brack() { + fxt.Test__parse("a[[b]]c", "a[[b]]c"); + } + @Test public void Template() { + fxt.Test__parse("a{{b}}c", "ac"); + } + @Test public void Tplarg() { + fxt.Test__parse("a{{{b}}}c", "abc"); + } } class Xomw_prepro_wkr__fxt { private final Xomw_prepro_wkr wkr = new Xomw_prepro_wkr();