From 75972fcff09aa0b2aabf6d79991327cb15122675 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Wed, 11 Jan 2017 08:35:16 -0500 Subject: [PATCH] Mw_parse: Add support for gobbling single-line comments --- 100_core/src/gplx/Bry_find_.java | 6 ++--- .../parsers/mws/prepros/Xomw_prepro_wkr.java | 27 +++++++++++-------- .../mws/prepros/Xomw_prepro_wkr__tst.java | 10 +++---- 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/100_core/src/gplx/Bry_find_.java b/100_core/src/gplx/Bry_find_.java index e645868de..0621e5f89 100644 --- a/100_core/src/gplx/Bry_find_.java +++ b/100_core/src/gplx/Bry_find_.java @@ -159,14 +159,14 @@ public class Bry_find_ { return Bry_find_.Not_found; } public static int Find_bwd__while_space_or_tab(byte[] src, int cur, int end) { // get pos of 1st char that is not \t or \s - if (cur >= src.length) return Bry_find_.Not_found; - for (int i = cur; i >= end; i--) { + if (cur < 0 || cur >= src.length) return Bry_find_.Not_found; + for (int i = cur - 1; i >= end; i--) { byte b = src[i]; switch (b) { case Byte_ascii.Space: case Byte_ascii.Tab: break; default: - return i; + return i + 1; } } return Bry_find_.Not_found; diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java index 8dda808c2..921c73e74 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java @@ -23,7 +23,6 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls private final Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7(); private final Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs().Add_many_str("includeonly", "noinclude", "onlyinclude"); private final Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs(); - // private final Btrie_slim_mgr search_dflt_trie = Btrie_slim_mgr.cs().Add_many_int(0, "[", "{", "<", "\n"); // $searchBase = "[{<\n"; private final Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7(); private final Xomw_prepro_stack stack = new Xomw_prepro_stack(); private Bry_bfr accum = Bry_bfr_.New(); @@ -286,7 +285,11 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls // Search forwards for trailing whitespace // $wsEnd will be the position of the last space (or the '>' if there's none) - int ws_end = Bry_find_.Find_fwd_while_space_or_tab(src, end_pos + 3, src_len); + // PORTED: $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 ); + int ws_end = end_pos + 2; // set pos to ">" + int ws_end2 = Bry_find_.Find_fwd_while_space_or_tab(src, end_pos + 3, src_len); + if (ws_end2 != ws_end + 1) // if ws after ">" + ws_end = ws_end2 - 1; // set to "last space" // Keep looking forward as long as we're finding more // comments. @@ -314,11 +317,12 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls // Remove leading whitespace from the end of the accumulator // Sanity check first though int ws_len = i - ws_bgn; - byte[] accum_bry = accum.To_bry(); - if ( ws_len > 0 - && Bry_find_.Find_fwd_while_space_or_tab(accum_bry, -ws_len, src_len) == ws_len - ) { - accum.Clear().Add(Bry_.Mid(accum_bry, 0, -ws_len)); + if (ws_len > 0) { + // PORTED:"&& strspn( $accum, " \t", -$wsLength ) === $wsLength" + int accum_bry_len = accum.Len(); + int ws_end_lhs = Bry_find_.Find_bwd__while_space_or_tab(accum.Bfr(), accum_bry_len, 0); + if (accum_bry_len - ws_end_lhs == ws_len) + accum.Del_by(ws_len); } // Dump all but the last comment to the accumulator @@ -660,10 +664,11 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls for (int j = 0; j < parts_len; j++) { Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(j); if (part.Eqpos != -1) { - byte[] part_bry = part.bfr.To_bry(); - byte[] arg_key = Bry_.Mid(part_bry, 0, part.Eqpos); - byte[] arg_val = Bry_.Mid(part_bry, part.Eqpos + 1); - tmp_bfr.Add_str_a7("").Add(arg_key).Add_str_a7("=").Add(arg_val).Add_str_a7(""); + Bry_bfr part_bfr = part.bfr; + byte[] part_bfr_bry = part_bfr.Bfr(); + tmp_bfr.Add_str_a7("").Add_mid(part_bfr_bry, 0, part.Eqpos); + tmp_bfr.Add_str_a7("=").Add_mid(part_bfr_bry, part.Eqpos + 1, part_bfr.Len()); + tmp_bfr.Add_str_a7(""); } else { tmp_bfr.Add_str_a7("").Add(part.bfr.To_bry()).Add_str_a7(""); diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java index fddd546f0..52ee53260 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java @@ -34,6 +34,9 @@ public class Xomw_prepro_wkr__tst { @Test public void Comment() { fxt.Test__parse("ac", "a<!--b-->c"); } + @Test public void Comment__nl__ws() { + fxt.Test__parse("xo\n \n \nz", "xo\n <!--1--> \n <!--2--> \nz"); + } @Test public void Ext__pre() { fxt.Test__parse("a
b
c", "apre id="1"b</pre>c"); } @@ -43,13 +46,6 @@ TODO: * heading.general * heading.EOS: "==a" (no closing ==) * ignored tags -* FIX: -if ( ws_len > 0 - && Bry_find_.Find_fwd_while_space_or_tab(accum_bry, -ws_len, src_len) == ws_len -) { - accum.Clear().Add(Bry_.Mid(accum_bry, 0, -ws_len)); -} - */ } class Xomw_prepro_wkr__fxt {