1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Mw_parse.Prepro: Convert some calls to Strspan

This commit is contained in:
gnosygnu 2017-01-11 21:33:39 -05:00
parent 40b520ecc6
commit 1c0f9f4555
3 changed files with 150 additions and 21 deletions

View File

@ -0,0 +1,76 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_str_ {
public static int Strspn_fwd__byte(byte[] src, byte find, int bgn, int max, int src_len) {
if (max == -1) max = src_len;
int rv = 0;
for (int i = bgn; i < src_len; i++) {
if (find == src[i] && rv < max)
rv++;
else
break;
}
return rv;
}
public static int Strspn_fwd__space_or_tab(byte[] src, int bgn, int max, int src_len) {
if (max == -1) max = src_len;
int rv = 0;
for (int i = bgn; i < src_len; i++) {
switch (src[i]) {
case Byte_ascii.Space:
case Byte_ascii.Tab:
if (rv < max) {
rv++;
continue;
}
break;
}
break;
}
return rv;
}
public static int Strspn_bwd__byte(byte[] src, byte find, int bgn, int max) {
if (max == -1) max = Int_.Max_value;
int rv = 0;
for (int i = bgn - 1; i > -1; i--) {
if (find == src[i] && rv < max)
rv++;
else
break;
}
return rv;
}
public static int Strspn_bwd__space_or_tab(byte[] src, int bgn, int max) {
if (max == -1) max = Int_.Max_value;
int rv = 0;
for (int i = bgn - 1; i > -1; i--) {
switch (src[i]) {
case Byte_ascii.Space:
case Byte_ascii.Tab:
if (rv < max) {
rv++;
continue;
}
break;
}
break;
}
return rv;
}
}

View File

@ -0,0 +1,58 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import org.junit.*; import gplx.core.tests.*;
public class Php_str___tst {
private final Php_str___fxt fxt = new Php_str___fxt();
@Test public void Strspn_fwd__byte() {
fxt.Test__strspn_fwd__byte("aaaaab", Byte_ascii.Ltr_a, 0, -1, 5); // basic
fxt.Test__strspn_fwd__byte("aaaaab", Byte_ascii.Ltr_a, 1, -1, 4); // bgn
fxt.Test__strspn_fwd__byte("aaaaab", Byte_ascii.Ltr_a, 1, 2, 2); // max
}
@Test public void Strspn_fwd__space_or_tab() {
fxt.Test__strspn_fwd__space_or_tab(" a", 0, -1, 5); // basic
fxt.Test__strspn_fwd__space_or_tab(" a", 1, -1, 4); // bgn
fxt.Test__strspn_fwd__space_or_tab(" a", 1, 2, 2); // max
}
@Test public void Strspn_bwd__byte() {
fxt.Test__strspn_bwd__byte("aaaaab", Byte_ascii.Ltr_a, 5, -1, 5); // basic
fxt.Test__strspn_bwd__byte("aaaaab", Byte_ascii.Ltr_a, 4, -1, 4); // bgn
fxt.Test__strspn_bwd__byte("aaaaab", Byte_ascii.Ltr_a, 4, 2, 2); // max
}
@Test public void Strspn_bwd__space_or_tab() {
fxt.Test__strspn_bwd__space_or_tab(" a", 5, -1, 5); // basic
fxt.Test__strspn_bwd__space_or_tab(" a", 4, -1, 4); // bgn
fxt.Test__strspn_bwd__space_or_tab(" a", 4, 2, 2); // max
}
}
class Php_str___fxt {
public void Test__strspn_fwd__byte(String src_str, byte find, int bgn, int max, int expd) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__int(expd, Php_str_.Strspn_fwd__byte(src_bry, find, bgn, max, src_bry.length));
}
public void Test__strspn_fwd__space_or_tab(String src_str, int bgn, int max, int expd) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__int(expd, Php_str_.Strspn_fwd__space_or_tab(src_bry, bgn, max, src_bry.length));
}
public void Test__strspn_bwd__byte(String src_str, byte find, int bgn, int max, int expd) {
Gftest.Eq__int(expd, Php_str_.Strspn_bwd__byte(Bry_.new_u8(src_str), find, bgn, max));
}
public void Test__strspn_bwd__space_or_tab(String src_str, int bgn, int max, int expd) {
Gftest.Eq__int(expd, Php_str_.Strspn_bwd__space_or_tab(Bry_.new_u8(src_str), bgn, max));
}
}

View File

@ -17,6 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
import gplx.core.btries.*;
import gplx.langs.phps.*;
public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
private final List_adp comments_list = List_adp_.New();
@ -288,15 +289,11 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
}
else {
// Search backwards for leading whitespace
int ws_bgn = i > 0 ? Bry_find_.Find_bwd__while_space_or_tab(src, i, 0) : 0;
int ws_bgn = i > 0 ? i - Php_str_.Strspn_bwd__space_or_tab(src, i, -1) : 0;
// Search forwards for trailing whitespace
// $wsEnd will be the position of the last space (or the '>' if there's none)
// PORTED: $wsEnd = $endPos + 2 + strspn( $text, " \t", $endPos + 3 );
int ws_end = end_pos + 2; // set pos to ">"
int ws_end2 = Bry_find_.Find_fwd_while_space_or_tab(src, end_pos + 3, src_len);
if (ws_end2 != ws_end + 1) // if ws is after ">"...
ws_end = ws_end2 - 1; // ...then set to "last space" as per comment above
int ws_end = end_pos + 2 + Php_str_.Strspn_fwd__space_or_tab(src, end_pos + 3, -1, src_len);
// Keep looking forward as long as we're finding more
// comments.
@ -307,7 +304,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
if (cur_char_pos == Bry_find_.Not_found) {
break;
}
cur_char_pos = cur_char_pos + 2 + Bry_find_.Find_fwd_while_space_or_tab(src, cur_char_pos + 3, src_len);
cur_char_pos = cur_char_pos + 2 + Php_str_.Strspn_fwd__space_or_tab(src, cur_char_pos + 3, -1, src_len);
comments_list.Add(new int[] {ws_end + 1, cur_char_pos});
ws_end = cur_char_pos;
}
@ -324,12 +321,10 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
// Remove leading whitespace from the end of the accumulator
// Sanity check first though
int ws_len = i - ws_bgn;
if (ws_len > 0) {
// PORTED:"&& strspn( $accum, " \t", -$wsLength ) === $wsLength"
int accum_bry_len = accum.Len();
int ws_end_lhs = Bry_find_.Find_bwd__while_space_or_tab(accum.Bfr(), accum_bry_len, 0);
if (accum_bry_len - ws_end_lhs == ws_len)
accum.Del_by(ws_len);
int accum_len = accum.Len();
if ( ws_len > 0
&& Php_str_.Strspn_fwd__space_or_tab(accum.Bfr(), accum_len - ws_len, -1, accum_len) == ws_len) {
accum.Del_by(ws_len);
}
// Dump all but the last comment to the accumulator
@ -494,10 +489,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
i++;
}
// PORTED: $count = strspn( $text, '=', $i, 6 );
int eq_end_max = i + 6; if (eq_end_max > src_len) eq_end_max = src_len;
int eq_end = Bry_find_.Find_fwd_while(src, i, eq_end_max, Byte_ascii.Eq);
int count = eq_end - i;
int count = Php_str_.Strspn_fwd__byte(src, Byte_ascii.Eq, i, 6, src_len);
if (count == 1 && find_equals) { // EX: "{{a|\n=b=\n"
// DWIM: This looks kind of like a name/value separator.
// Let's let the equals handler have it and break the
@ -526,15 +518,18 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
// Search back through the input to see if it has a proper close.
// Do this using the reversed String since the other solutions
// (end anchor, etc.) are inefficient.
int search_bgn = Bry_find_.Find_bwd__while_space_or_tab(src, i, 0);
int ws_len = Php_str_.Strspn_bwd__space_or_tab(src, src_len - i, -1);
int search_bgn = i - ws_len;
if (part.comment_end != -1 && search_bgn -1 == part.comment_end) {
// Comment found at line end
// Search for equals signs before the comment
search_bgn = part.visual_end;
search_bgn = Bry_find_.Find_bwd__while_space_or_tab(src, search_bgn, 0);
search_bgn -= Php_str_.Strspn_bwd__space_or_tab(src, search_bgn, -1);
}
int count = piece.count;
int eq_len = search_bgn - Bry_find_.Find_bwd_while(src, search_bgn, 0, Byte_ascii.Eq) - 1;
int eq_len = Php_str_.Strspn_bwd__byte(src, Byte_ascii.Eq, search_bgn, -1);
byte[] element = Bry_.Empty;
if (eq_len > 0) {
@ -587,7 +582,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
}
else if (found == Found__open) {
// count opening brace characters
int count = Bry_find_.Find_fwd_while(src, i, src_len, cur_char) - i; // PORTED: $count = strspn( $text, $curChar, $i );
int count = Php_str_.Strspn_fwd__byte(src, cur_char[0], i, -1, src_len); // NOTE: don't know how MediaWiki will handle "-{"
// we need to add to stack only if opening brace count is enough for one of the rules
if (count >= rule.min) {
@ -612,7 +607,7 @@ public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
Xomw_prepro_piece piece = stack.top;
// lets check if there are enough characters for closing brace
int max_count = piece.count;
int count = Bry_find_.Find_fwd_while(src, i, i + max_count, cur_char) - i; // $count = strspn( $text, $curChar, $i, $maxCount );
int count = Php_str_.Strspn_fwd__byte(src, cur_char[0], i, max_count, src_len);
// check for maximum matching characters (if there are 5 closing characters, we will probably need only 3 - depending on the rules)
rule = Get_rule(piece.open);