diff --git a/400_xowa/src/gplx/langs/phps/Php_str_.java b/400_xowa/src/gplx/langs/phps/Php_str_.java new file mode 100644 index 000000000..1517582c8 --- /dev/null +++ b/400_xowa/src/gplx/langs/phps/Php_str_.java @@ -0,0 +1,76 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.langs.phps; import gplx.*; import gplx.langs.*; +public class Php_str_ { + public static int Strspn_fwd__byte(byte[] src, byte find, int bgn, int max, int src_len) { + if (max == -1) max = src_len; + int rv = 0; + for (int i = bgn; i < src_len; i++) { + if (find == src[i] && rv < max) + rv++; + else + break; + } + return rv; + } + public static int Strspn_fwd__space_or_tab(byte[] src, int bgn, int max, int src_len) { + if (max == -1) max = src_len; + int rv = 0; + for (int i = bgn; i < src_len; i++) { + switch (src[i]) { + case Byte_ascii.Space: + case Byte_ascii.Tab: + if (rv < max) { + rv++; + continue; + } + break; + } + break; + } + return rv; + } + public static int Strspn_bwd__byte(byte[] src, byte find, int bgn, int max) { + if (max == -1) max = Int_.Max_value; + int rv = 0; + for (int i = bgn - 1; i > -1; i--) { + if (find == src[i] && rv < max) + rv++; + else + break; + } + return rv; + } + public static int Strspn_bwd__space_or_tab(byte[] src, int bgn, int max) { + if (max == -1) max = Int_.Max_value; + int rv = 0; + for (int i = bgn - 1; i > -1; i--) { + switch (src[i]) { + case Byte_ascii.Space: + case Byte_ascii.Tab: + if (rv < max) { + rv++; + continue; + } + break; + } + break; + } + return rv; + } +} diff --git a/400_xowa/src/gplx/langs/phps/Php_str___tst.java b/400_xowa/src/gplx/langs/phps/Php_str___tst.java new file mode 100644 index 000000000..b654e37f0 --- /dev/null +++ b/400_xowa/src/gplx/langs/phps/Php_str___tst.java @@ -0,0 +1,58 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.langs.phps; import gplx.*; import gplx.langs.*; +import org.junit.*; import gplx.core.tests.*; +public class Php_str___tst { + private final Php_str___fxt fxt = new Php_str___fxt(); + @Test public void Strspn_fwd__byte() { + fxt.Test__strspn_fwd__byte("aaaaab", Byte_ascii.Ltr_a, 0, -1, 5); // basic + fxt.Test__strspn_fwd__byte("aaaaab", Byte_ascii.Ltr_a, 1, -1, 4); // bgn + fxt.Test__strspn_fwd__byte("aaaaab", Byte_ascii.Ltr_a, 1, 2, 2); // max + } + @Test public void Strspn_fwd__space_or_tab() { + fxt.Test__strspn_fwd__space_or_tab(" a", 0, -1, 5); // basic + fxt.Test__strspn_fwd__space_or_tab(" a", 1, -1, 4); // bgn + fxt.Test__strspn_fwd__space_or_tab(" a", 1, 2, 2); // max + } + @Test public void Strspn_bwd__byte() { + fxt.Test__strspn_bwd__byte("aaaaab", Byte_ascii.Ltr_a, 5, -1, 5); // basic + fxt.Test__strspn_bwd__byte("aaaaab", Byte_ascii.Ltr_a, 4, -1, 4); // bgn + fxt.Test__strspn_bwd__byte("aaaaab", Byte_ascii.Ltr_a, 4, 2, 2); // max + } + @Test public void Strspn_bwd__space_or_tab() { + fxt.Test__strspn_bwd__space_or_tab(" a", 5, -1, 5); // basic + fxt.Test__strspn_bwd__space_or_tab(" a", 4, -1, 4); // bgn + fxt.Test__strspn_bwd__space_or_tab(" a", 4, 2, 2); // max + } +} +class Php_str___fxt { + public void Test__strspn_fwd__byte(String src_str, byte find, int bgn, int max, int expd) { + byte[] src_bry = Bry_.new_u8(src_str); + Gftest.Eq__int(expd, Php_str_.Strspn_fwd__byte(src_bry, find, bgn, max, src_bry.length)); + } + public void Test__strspn_fwd__space_or_tab(String src_str, int bgn, int max, int expd) { + byte[] src_bry = Bry_.new_u8(src_str); + Gftest.Eq__int(expd, Php_str_.Strspn_fwd__space_or_tab(src_bry, bgn, max, src_bry.length)); + } + public void Test__strspn_bwd__byte(String src_str, byte find, int bgn, int max, int expd) { + Gftest.Eq__int(expd, Php_str_.Strspn_bwd__byte(Bry_.new_u8(src_str), find, bgn, max)); + } + public void Test__strspn_bwd__space_or_tab(String src_str, int bgn, int max, int expd) { + Gftest.Eq__int(expd, Php_str_.Strspn_bwd__space_or_tab(Bry_.new_u8(src_str), bgn, max)); + } +} diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java index 23c7150a3..f1d013524 100644 --- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java +++ b/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java @@ -17,6 +17,7 @@ along with this program. If not, see . */ package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*; import gplx.core.btries.*; +import gplx.langs.phps.*; public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls private final Bry_bfr tmp_bfr = Bry_bfr_.New(); private final List_adp comments_list = List_adp_.New(); @@ -288,15 +289,11 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls } else { // Search backwards for leading whitespace - int ws_bgn = i > 0 ? Bry_find_.Find_bwd__while_space_or_tab(src, i, 0) : 0; + int ws_bgn = i > 0 ? i - Php_str_.Strspn_bwd__space_or_tab(src, i, -1) : 0; // Search forwards for trailing whitespace // $wsEnd will be the position of the last space (or the '>' if there's none) - // PORTED: $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 ); - int ws_end = end_pos + 2; // set pos to ">" - int ws_end2 = Bry_find_.Find_fwd_while_space_or_tab(src, end_pos + 3, src_len); - if (ws_end2 != ws_end + 1) // if ws is after ">"... - ws_end = ws_end2 - 1; // ...then set to "last space" as per comment above + int ws_end = end_pos + 2 + Php_str_.Strspn_fwd__space_or_tab(src, end_pos + 3, -1, src_len); // Keep looking forward as long as we're finding more // comments. @@ -307,7 +304,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls if (cur_char_pos == Bry_find_.Not_found) { break; } - cur_char_pos = cur_char_pos + 2 + Bry_find_.Find_fwd_while_space_or_tab(src, cur_char_pos + 3, src_len); + cur_char_pos = cur_char_pos + 2 + Php_str_.Strspn_fwd__space_or_tab(src, cur_char_pos + 3, -1, src_len); comments_list.Add(new int[] {ws_end + 1, cur_char_pos}); ws_end = cur_char_pos; } @@ -324,12 +321,10 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls // Remove leading whitespace from the end of the accumulator // Sanity check first though int ws_len = i - ws_bgn; - if (ws_len > 0) { - // PORTED:"&& strspn( $accum, " \t", -$wsLength ) === $wsLength" - int accum_bry_len = accum.Len(); - int ws_end_lhs = Bry_find_.Find_bwd__while_space_or_tab(accum.Bfr(), accum_bry_len, 0); - if (accum_bry_len - ws_end_lhs == ws_len) - accum.Del_by(ws_len); + int accum_len = accum.Len(); + if ( ws_len > 0 + && Php_str_.Strspn_fwd__space_or_tab(accum.Bfr(), accum_len - ws_len, -1, accum_len) == ws_len) { + accum.Del_by(ws_len); } // Dump all but the last comment to the accumulator @@ -494,10 +489,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls i++; } - // PORTED: $count = strspn( $text, '=', $i, 6 ); - int eq_end_max = i + 6; if (eq_end_max > src_len) eq_end_max = src_len; - int eq_end = Bry_find_.Find_fwd_while(src, i, eq_end_max, Byte_ascii.Eq); - int count = eq_end - i; + int count = Php_str_.Strspn_fwd__byte(src, Byte_ascii.Eq, i, 6, src_len); if (count == 1 && find_equals) { // EX: "{{a|\n=b=\n" // DWIM: This looks kind of like a name/value separator. // Let's let the equals handler have it and break the @@ -526,15 +518,18 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls // Search back through the input to see if it has a proper close. // Do this using the reversed String since the other solutions // (end anchor, etc.) are inefficient. - int search_bgn = Bry_find_.Find_bwd__while_space_or_tab(src, i, 0); + int ws_len = Php_str_.Strspn_bwd__space_or_tab(src, src_len - i, -1); + int search_bgn = i - ws_len; + if (part.comment_end != -1 && search_bgn -1 == part.comment_end) { // Comment found at line end // Search for equals signs before the comment search_bgn = part.visual_end; search_bgn = Bry_find_.Find_bwd__while_space_or_tab(src, search_bgn, 0); + search_bgn -= Php_str_.Strspn_bwd__space_or_tab(src, search_bgn, -1); } int count = piece.count; - int eq_len = search_bgn - Bry_find_.Find_bwd_while(src, search_bgn, 0, Byte_ascii.Eq) - 1; + int eq_len = Php_str_.Strspn_bwd__byte(src, Byte_ascii.Eq, search_bgn, -1); byte[] element = Bry_.Empty; if (eq_len > 0) { @@ -587,7 +582,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls } else if (found == Found__open) { // count opening brace characters - int count = Bry_find_.Find_fwd_while(src, i, src_len, cur_char) - i; // PORTED: $count = strspn( $text, $curChar, $i ); + int count = Php_str_.Strspn_fwd__byte(src, cur_char[0], i, -1, src_len); // NOTE: don't know how MediaWiki will handle "-{" // we need to add to stack only if opening brace count is enough for one of the rules if (count >= rule.min) { @@ -612,7 +607,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls Xomw_prepro_piece piece = stack.top; // lets check if there are enough characters for closing brace int max_count = piece.count; - int count = Bry_find_.Find_fwd_while(src, i, i + max_count, cur_char) - i; // $count = strspn( $text, $curChar, $i, $maxCount ); + int count = Php_str_.Strspn_fwd__byte(src, cur_char[0], i, max_count, src_len); // check for maximum matching characters (if there are 5 closing characters, we will probably need only 3 - depending on the rules) rule = Get_rule(piece.open);