mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v3.1.4.1
This commit is contained in:
@@ -20,7 +20,7 @@ import gplx.langs.regxs.*; import gplx.core.intls.*;
|
||||
import gplx.xowa.parsers.*;
|
||||
public class Scrib_lib_ustring implements Scrib_lib {
|
||||
private final String_surrogate_utl surrogate_utl = new String_surrogate_utl();
|
||||
public Scrib_lib_ustring(Scrib_core core) {this.core = core; gsub_mgr = new Scrib_lib_ustring_gsub_mgr(core, regx_converter);} private Scrib_core core; Scrib_lib_ustring_gsub_mgr gsub_mgr;
|
||||
public Scrib_lib_ustring(Scrib_core core) {this.core = core;} private Scrib_core core;
|
||||
public Scrib_lua_mod Mod() {return mod;} private Scrib_lua_mod mod;
|
||||
public int String_len_max() {return string_len_max;} public Scrib_lib_ustring String_len_max_(int v) {string_len_max = v; return this;} private int string_len_max = Xoa_page_.Page_len_max;
|
||||
public int Pattern_len_max() {return pattern_len_max;} public Scrib_lib_ustring Pattern_len_max_(int v) {pattern_len_max = v; return this;} private int pattern_len_max = 10000;
|
||||
@@ -112,7 +112,30 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
}
|
||||
return rslt.Init_many_list(tmp_list);
|
||||
}
|
||||
public boolean Gsub(Scrib_proc_args args, Scrib_proc_rslt rslt) {return gsub_mgr.Exec(args, rslt);}
|
||||
private Scrib_lib_ustring_gsub_mgr[] gsub_mgr_ary = Scrib_lib_ustring_gsub_mgr.Ary_empty;
|
||||
private int gsub_mgr_max = 0, gsub_mgr_len = -1;
|
||||
private final Object gsub_mgr_lock = new Object();
|
||||
public boolean Gsub(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||
boolean rv = false;
|
||||
synchronized (gsub_mgr_lock) { // handle recursive gsub calls; PAGE:en.d:כלב; DATE:2016-01-22
|
||||
int new_len = gsub_mgr_len + 1;
|
||||
if (new_len == gsub_mgr_max) {
|
||||
this.gsub_mgr_max = new_len == 0 ? 2 : new_len * 2;
|
||||
Scrib_lib_ustring_gsub_mgr[] new_gsub_mgr_ary = new Scrib_lib_ustring_gsub_mgr[gsub_mgr_max];
|
||||
Array_.Copy(gsub_mgr_ary, new_gsub_mgr_ary);
|
||||
gsub_mgr_ary = new_gsub_mgr_ary;
|
||||
}
|
||||
Scrib_lib_ustring_gsub_mgr cur = gsub_mgr_ary[new_len];
|
||||
if (cur == null) {
|
||||
cur = new Scrib_lib_ustring_gsub_mgr(core, regx_converter);
|
||||
gsub_mgr_ary[new_len] = cur;
|
||||
}
|
||||
this.gsub_mgr_len = new_len;
|
||||
rv = cur.Exec(args, rslt);
|
||||
--gsub_mgr_len;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public boolean Gmatch_init(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||
// String text = Scrib_kv_utl_.Val_to_str(values, 0);
|
||||
byte[] regx = args.Pull_bry(1);
|
||||
@@ -143,7 +166,7 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
if ( j < capts_len // bounds check b/c null can be passed
|
||||
&& Bool_.cast(capts[j].Val()) // check if true; indicates that group is "()" or "anypos" see regex converter; DATE:2014-04-23
|
||||
)
|
||||
tmp_list.Add(Int_.To_str(grp.Bgn() + Scrib_lib_ustring.Base1)); // return index only for (); NOTE: always return as String; callers expect String, and may do operations like len(result), which will fail if int; DATE:2013-12-20
|
||||
tmp_list.Add(grp.Bgn() + Scrib_lib_ustring.Base1); // return index only for "()"; NOTE: do not return as String; callers expect int and will fail typed comparisons; DATE:2016-01-21
|
||||
else
|
||||
tmp_list.Add(grp.Val()); // return match
|
||||
}
|
||||
@@ -325,5 +348,6 @@ class Scrib_lib_ustring_gsub_mgr {
|
||||
default: throw Err_.new_unhandled(repl_tid);
|
||||
}
|
||||
}
|
||||
static final byte Repl_tid_null = 0, Repl_tid_string = 1, Repl_tid_table = 2, Repl_tid_luacbk = 3;
|
||||
private static final byte Repl_tid_null = 0, Repl_tid_string = 1, Repl_tid_table = 2, Repl_tid_luacbk = 3;
|
||||
public static final Scrib_lib_ustring_gsub_mgr[] Ary_empty = new Scrib_lib_ustring_gsub_mgr[0];
|
||||
}
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||
import org.junit.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
||||
public class Scrib_lib_ustring__find__tst {
|
||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
||||
@Before public void init() {
|
||||
fxt.Clear();
|
||||
lib = fxt.Core().Lib_ustring().Init();
|
||||
}
|
||||
@Test public void Basic() {
|
||||
Exec_find("abcd" , "b" , 1, Bool_.N, "2;2"); // basic
|
||||
Exec_find("abac" , "a" , 2, Bool_.N, "3;3"); // bgn
|
||||
Exec_find("()()" , "(" , 2, Bool_.Y, "3;3"); // plain; note that ( would "break" regx
|
||||
Exec_find("a bcd e" , "(b(c)d)" , 2, Bool_.N, "3;5;bcd;c"); // groups
|
||||
Exec_find("a bcd e" , "()(b)" , 2, Bool_.N, "3;3;3;b"); // groups; empty capture
|
||||
Exec_find("abcd" , "x" , 1, Bool_.N, ""); // empty
|
||||
Exec_find("abcd" , "" , 2, Bool_.Y, "2;1"); // empty regx should return values; plain; EX:w:Fool's_mate; DATE:2014-03-04
|
||||
Exec_find("abcd" , "" , 2, Bool_.N, "2;1"); // empty regx should return values; regx; EX:w:Fool's_mate; DATE:2014-03-04
|
||||
Exec_find("abcd" , "^(c)" , 3, Bool_.N, "3;3;c"); // ^ should be converted to \G; regx; EX:cs.n:Category:1._září_2008; DATE:2014-05-07
|
||||
}
|
||||
@Test public void Arg_int() { // PURPOSE: allow int find; PAGE:ro.w:Innsbruck DATE:2015-09-12
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(123, "2", 1, Bool_.N), "2;2");
|
||||
}
|
||||
@Test public void Return_int() {
|
||||
fxt.Test__proc__kvps__vals(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_("a", "()", 2, Bool_.N), 2, 1, 2);
|
||||
}
|
||||
@Test public void Surrogate() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
}
|
||||
private void Exec_find(String text, String regx, int bgn, boolean plain, String expd) {
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(text, regx, bgn, plain), expd);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||
import org.junit.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
||||
public class Scrib_lib_ustring__gmatch__tst {
|
||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
||||
@Before public void init() {
|
||||
fxt.Clear();
|
||||
lib = fxt.Core().Lib_ustring().Init();
|
||||
}
|
||||
@Test public void Init__basic() {
|
||||
fxt.Test__proc__objs__nest(lib, Scrib_lib_ustring.Invk_gmatch_init, Object_.Ary("abcabc", "a(b)") , "1=a(b)\n2=\n 1=false");
|
||||
fxt.Test__proc__objs__nest(lib, Scrib_lib_ustring.Invk_gmatch_init, Object_.Ary("abcabc", "a()(b)") , "1=a()(b)\n2=\n 1=true\n 2=false");
|
||||
}
|
||||
@Test public void Callback__basic() {
|
||||
fxt.Test__proc__objs__nest(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary("abcabc", "a(b)", Scrib_kv_utl_.base1_many_(false), 0) , "1=2\n2=\n 1=b");
|
||||
fxt.Test__proc__objs__nest(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary("abcabc", "a(b)", Scrib_kv_utl_.base1_many_(false), 2) , "1=5\n2=\n 1=b");
|
||||
fxt.Test__proc__objs__nest(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary("abcabc", "a(b)", Scrib_kv_utl_.base1_many_(false), 8) , "1=8\n2=");
|
||||
}
|
||||
@Test public void Callback__nomatch() {// PURPOSE.fix: was originally returning "" instead of original String; EX:vi.d:trở_thành; DATE:2014-04-23
|
||||
fxt.Test__proc__objs__nest(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary("a", "a" , KeyVal_.Ary_empty, 0) , "1=1\n2=\n 1=a");
|
||||
}
|
||||
@Test public void Callback__anypos() {// PURPOSE.fix: was not handling $capt argument; EX:vi.d:trở_thành; DATE:2014-04-23
|
||||
fxt.Test__proc__objs__nest(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary("a bcd e", "()(b)" , Scrib_kv_utl_.base1_many_(true, false), 0), String_.Concat_lines_nl_skip_last
|
||||
( "1=3"
|
||||
, "2="
|
||||
, " 1=3"
|
||||
, " 2=b"
|
||||
));
|
||||
}
|
||||
@Test public void Callback__text_as_number() { // PURPOSE: Gmatch_callback must be able to take non String value; DATE:2013-12-20
|
||||
fxt.Test__proc__objs__nest(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary(1234, "1(2)", Scrib_kv_utl_.base1_many_(false), 0), String_.Concat_lines_nl_skip_last
|
||||
( "1=2"
|
||||
, "2="
|
||||
, " 1=2"
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||
import org.junit.*; import gplx.langs.regxs.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
||||
public class Scrib_lib_ustring__gsub__tst {
|
||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
||||
@Before public void init() {
|
||||
fxt.Clear();
|
||||
lib = fxt.Core().Lib_ustring().Init();
|
||||
}
|
||||
@Test public void Replace__basic() {
|
||||
Exec_gsub("abcd", "[a]" , -1, "A" , "Abcd;1");
|
||||
Exec_gsub("aaaa", "[a]" , 2, "A" , "AAaa;2");
|
||||
Exec_gsub("a" , "(a)" , 1, "%%%1" , "%a;1");
|
||||
Exec_gsub("à{b}c", "{b}" , 1, "b" , "àbc;1"); // utf8
|
||||
Exec_gsub("àbc", "^%s*(.-)%s*$" , 1, "%1" , "àbc;1"); // utf8; regx is for trim line
|
||||
Exec_gsub("a" , "[^]" , 1, "b" , "a;0"); // invalid regx should not fail; should return self; DATE:2013-10-20
|
||||
}
|
||||
@Test public void Replace__none() {// PURPOSE: gsub with no replace argument should not fail; EX:d:'orse; DATE:2013-10-14
|
||||
fxt.Test__proc__objs__flat(lib, Scrib_lib_ustring.Invk_gsub, Object_.Ary("text", "regx") , "text"); // NOTE: repl, limit deliberately omitted
|
||||
}
|
||||
@Test public void Replace__int() { // PURPOSE: do not fail if integer is passed in for @replace; PAGE:en.d:λύω DATE:2014-09-02
|
||||
Exec_gsub("abcd", 1 , -1, 1 , "abcd;0");
|
||||
}
|
||||
@Test public void Replace__table() {
|
||||
Exec_gsub("abcd", "[ac]" , -1, Scrib_kv_utl_.flat_many_("a", "A", "c", "C") , "AbCd;2");
|
||||
Exec_gsub("abc" , "[ab]" , -1, Scrib_kv_utl_.flat_many_("a", "A") , "Abc;2"); // PURPOSE: match not in regex should still print itself; in this case [c] is not in tbl regex; DATE:2014-03-31
|
||||
}
|
||||
@Test public void Replace__table__match() {// PURPOSE: replace using group, not found term; EX:"b" not "%b%" PAGE:en.w:Bannered_routes_of_U.S._Route_60; DATE:2014-08-15
|
||||
Exec_gsub("a%b%c", "%%(%w+)%%" , -1, Scrib_kv_utl_.flat_many_("b", "B") , "aBc;1");
|
||||
}
|
||||
@Test public void Replace__proc__recursive() { // PURPOSE:handle recursive gsub calls; PAGE:en.d:כלב; DATE:2016-01-22
|
||||
Bry_bfr bfr = Bry_bfr.new_();
|
||||
Mock_proc__recursive proc_lvl2 = new Mock_proc__recursive(fxt, lib, bfr, 2, null);
|
||||
Mock_proc__recursive proc_lvl1 = new Mock_proc__recursive(fxt, lib, bfr, 1, proc_lvl2);
|
||||
Mock_proc__recursive proc_root = new Mock_proc__recursive(fxt, lib, bfr, 0, proc_lvl1);
|
||||
fxt.Init__cbk(proc_root, proc_lvl1, proc_lvl2);
|
||||
Exec_gsub("ab", ".", -1, proc_root.To_scrib_lua_proc(), "ab;2"); // fails if "ab;4"
|
||||
Tfds.Eq_str("0;1;2;0;1;2;", bfr.To_str_and_clear()); // fails if "0;1;1;1"
|
||||
}
|
||||
@Test public void Regx__int() { // PURPOSE: do not fail if integer is passed in for @regx; PAGE:en.d:λύω DATE:2014-09-02
|
||||
Exec_gsub("abcd", 1 , -1, "A" , "abcd;0");
|
||||
}
|
||||
@Test public void Regx__dash() { // PURPOSE: "-" at end of regx should be literal; EX:[A-]; PAGE:en.d:frei DATE:2016-01-23
|
||||
Exec_gsub("abc", "[a-]", -1, "d", "dbc;1");
|
||||
}
|
||||
@Test public void Regx__word_class() { // PURPOSE: handle %w in extended regex; PAGE:en.w:A♯_(musical_note) DATE:2015-06-10
|
||||
Exec_gsub("(a b)", "[^%w%p%s]", -1, "x", "(a b);0"); // was returning "(x x)" b/c ^%w was incorrectly matching "a" and "b"
|
||||
}
|
||||
@Test public void Regx__balanced_group() { // PURPOSE: handle balanced group regex; EX:"%b()"; NOTE:test will fail if run in 1.6 environment; DATE:2013-12-20
|
||||
Exec_gsub("(a)", "%b()", 1, "c", "c;1");
|
||||
}
|
||||
@Test public void Regx__capture() {
|
||||
Exec_gsub("aa" , "(a)%1" , 1, "%0z", "aaz;1"); // capture; handle %0; PAGE:en.w:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
||||
Exec_gsub("aa" , "(a)%1" , 1, "%1z", "az;1"); // capture; handle %1+; PAGE:en.w:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
||||
Exec_gsub("a\"b'c\"d" , "([\"'])(.-)%1" , 1, "%1z", "a\"zd;1"); // capture; http://www.lua.org/pil/20.3.html; {{#invoke:test|gsub_string|a"b'c"d|(["'])(.-)%1|%1z}}
|
||||
}
|
||||
@Test public void Regx__frontier_pattern() { // PURPOSE: handle frontier pattern; EX:"%f[%a]"; DATE:2015-07-21
|
||||
Exec_gsub("a b c", "%f[%W]", 5, "()", "a() b() c();3");
|
||||
Exec_gsub("abC DEF gHI JKm NOP", "%f[%a]%u+%f[%A]", Int_.Max_value, "()", "abC () gHI JKm ();2"); // based on http://lua-users.org/wiki/FrontierPattern
|
||||
}
|
||||
@Test public void Regx__frontier_pattern_utl() {// PURPOSE: standalone test for \0 logic in frontier pattern; note that verified against PHP: echo(preg_match( "/[\w]/us", "\0" )); DATE:2015-07-21
|
||||
Tfds.Eq(Bool_.N, Regx_adp_.Match("\0", "a")); // \0 not matched by a
|
||||
Tfds.Eq(Bool_.N, Regx_adp_.Match("\0", "0")); // \0 not matched by numeric 0
|
||||
Tfds.Eq(Bool_.N, Regx_adp_.Match("\0", "[\\w]")); // \0 not matched by word_char
|
||||
Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\W]")); // \0 matched by !word_char
|
||||
Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\x]")); // \0 matched by any_char
|
||||
Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\X]")); // \0 matched by !any_char
|
||||
}
|
||||
private void Exec_gsub(String text, Object regx, int limit, Object repl, String expd) {
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_gsub, Scrib_kv_utl_.base1_many_(text, regx, repl, limit), expd);
|
||||
}
|
||||
}
|
||||
class Mock_proc__recursive extends Mock_proc_fxt { private final Mock_scrib_fxt fxt; private final Scrib_lib lib; private final Mock_proc__recursive inner;
|
||||
private final Bry_bfr bfr;
|
||||
public Mock_proc__recursive(Mock_scrib_fxt fxt, Scrib_lib lib, Bry_bfr bfr, int id, Mock_proc__recursive inner) {super(id, "recur");
|
||||
this.fxt = fxt; this.lib = lib; this.inner = inner;
|
||||
this.bfr = bfr;
|
||||
}
|
||||
@Override public KeyVal[] Exec_by_scrib(KeyVal[] args) {
|
||||
bfr.Add_int_variable(this.Id()).Add_byte_semic();
|
||||
if (inner != null)
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_gsub, Scrib_kv_utl_.base1_many_("a", ".", inner.To_scrib_lua_proc(), -1), "a;1");
|
||||
return args;
|
||||
}
|
||||
}
|
||||
@@ -1,151 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||
import org.junit.*; import gplx.langs.regxs.*;
|
||||
public class Scrib_lib_ustring__lib_tst {
|
||||
@Before public void init() {
|
||||
fxt.Clear_for_lib();
|
||||
lib = fxt.Core().Lib_ustring().Init();
|
||||
} private Scrib_invoke_func_fxt fxt = new Scrib_invoke_func_fxt(); private Scrib_lib lib;
|
||||
@Test public void Find() {
|
||||
Exec_find("abcd" , "b" , 1, Bool_.N, "2;2"); // basic
|
||||
Exec_find("abac" , "a" , 2, Bool_.N, "3;3"); // bgn
|
||||
Exec_find("()()" , "(" , 2, Bool_.Y, "3;3"); // plain; note that ( would "break" regx
|
||||
Exec_find("a bcd e" , "(b(c)d)" , 2, Bool_.N, "3;5;bcd;c"); // groups
|
||||
Exec_find("a bcd e" , "()(b)" , 2, Bool_.N, "3;3;3;b"); // groups; empty capture
|
||||
Exec_find("abcd" , "x" , 1, Bool_.N, ""); // empty
|
||||
Exec_find("abcd" , "" , 2, Bool_.Y, "2;1"); // empty regx should return values; plain; EX:w:Fool's_mate; DATE:2014-03-04
|
||||
Exec_find("abcd" , "" , 2, Bool_.N, "2;1"); // empty regx should return values; regx; EX:w:Fool's_mate; DATE:2014-03-04
|
||||
Exec_find("abcd" , "^(c)" , 3, Bool_.N, "3;3;c"); // ^ should be converted to \G; regx; EX:cs.n:Category:1._září_2008; DATE:2014-05-07
|
||||
}
|
||||
@Test public void Find_int() { // PURPOSE: allow int find; PAGE:ro.w:Innsbruck DATE:2015-09-12
|
||||
fxt.Test_scrib_proc_kv_vals(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(123, "2", 1, Bool_.N), "2;2");
|
||||
}
|
||||
@Test public void Find_surrogate() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
}
|
||||
@Test public void Match() {
|
||||
Exec_match("abcd" , "bc" , 1, "bc"); // basic
|
||||
Exec_match("abcd" , "x" , 1, String_.Null_mark); // empty
|
||||
Exec_match("abcd" , "a" , 2, String_.Null_mark); // bgn
|
||||
Exec_match("abcd" , "b(c)" , 1, "c"); // group
|
||||
Exec_match(" a b " , "^%s*(.-)%s*$" , 1, "a b;"); // trim; NOTE: changed from "a b" to "a b;"; DATE:2015-01-30
|
||||
Exec_match("abcd" , "a" , 0, "a"); // handle 0; note that php/lua is super-1, but some modules pass in 0; ru.w:Module:Infocards; DATE:2013-11-08
|
||||
Exec_match("abcd" , "." , -1, "d"); // -1
|
||||
Exec_match("aaa" , "a" , 1, "a"); // should return 1st match not many
|
||||
Exec_match("aaa" , "(a)" , 1, "a;a;a"); // should return all matches
|
||||
Exec_match("a b" , "%S" , 1, "a"); // %S was returning every match instead of 1st; PAGE:en.w:Bertrand_Russell; DATE:2014-04-02
|
||||
Exec_match(1 , "a" , 1, String_.Null_mark); // Module can pass raw ints; PAGE:en.w:Budget_of_the_European_Union; DATE:2015-01-22
|
||||
Exec_match("" , "a?" , 1, ""); // no results with ? should return "" not nil; PAGE:en.d:民; DATE:2015-01-30
|
||||
}
|
||||
@Test public void Match_args_out_of_order() {
|
||||
fxt.Test_scrib_proc_empty(lib, Scrib_lib_ustring.Invk_match, KeyVal_.Ary(KeyVal_.int_(2, "[a]")));
|
||||
}
|
||||
@Test public void Gsub() {
|
||||
Exec_gsub_regx("abcd", "[a]" , -1, "A" , "Abcd;1");
|
||||
Exec_gsub_regx("aaaa", "[a]" , 2, "A" , "AAaa;2");
|
||||
Exec_gsub_regx("a" , "(a)" , 1, "%%%1" , "%a;1");
|
||||
Exec_gsub_regx("à{b}c", "{b}" , 1, "b" , "àbc;1"); // utf8
|
||||
Exec_gsub_regx("àbc", "^%s*(.-)%s*$", 1, "%1" , "àbc;1"); // utf8; regx is for trim line
|
||||
Exec_gsub_regx("a" , "[^]" , 1, "b" , "a;0"); // invalid regx should not fail; should return self; DATE:2013-10-20
|
||||
}
|
||||
@Test public void Gsub_table() {
|
||||
Exec_gsub_regx("abcd", "[ac]" , -1, Scrib_kv_utl_.flat_many_("a", "A", "c", "C") , "AbCd;2");
|
||||
Exec_gsub_regx("abc" , "[ab]" , -1, Scrib_kv_utl_.flat_many_("a", "A") , "Abc;2"); // PURPOSE: match not in regex should still print itself; in this case [c] is not in tbl regex; DATE:2014-03-31
|
||||
}
|
||||
@Test public void Gsub_table_match() { // PURPOSE: replace using group, not found term; EX:"b" not "%b%" PAGE:en.w:Bannered_routes_of_U.S._Route_60; DATE:2014-08-15
|
||||
Exec_gsub_regx("a%b%c", "%%(%w+)%%" , -1, Scrib_kv_utl_.flat_many_("b", "B") , "aBc;1");
|
||||
}
|
||||
@Test public void Gsub_capture() {
|
||||
Exec_gsub_regx("aa" , "(a)%1" , 1, "%0z", "aaz;1"); // capture; handle %0; PAGE:en.w:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
||||
Exec_gsub_regx("aa" , "(a)%1" , 1, "%1z", "az;1"); // capture; handle %1+; PAGE:en.w:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
||||
Exec_gsub_regx("a\"b'c\"d" , "([\"'])(.-)%1" , 1, "%1z", "a\"zd;1"); // capture; http://www.lua.org/pil/20.3.html; {{#invoke:test|gsub_string|a"b'c"d|(["'])(.-)%1|%1z}}
|
||||
}
|
||||
@Test public void Gsub_no_replace() {// PURPOSE: gsub with no replace argument should not fail; EX:d:'orse; DATE:2013-10-14
|
||||
fxt.Test_scrib_proc_str_ary(lib, Scrib_lib_ustring.Invk_gsub, Object_.Ary("text", "regx") , "1=text"); // NOTE: repl, limit deliberately omitted
|
||||
}
|
||||
@Test public void Gsub_pattern_is_int() { // PURPOSE: do not fail if integer is passed in for @regx; PAGE:en.d:λύω DATE:2014-09-02
|
||||
Exec_gsub_regx("abcd", 1 , -1, "A" , "abcd;0");
|
||||
}
|
||||
@Test public void Gsub_replace_is_int() { // PURPOSE: do not fail if integer is passed in for @replace; PAGE:en.d:λύω DATE:2014-09-02
|
||||
Exec_gsub_regx("abcd", 1 , -1, 1 , "abcd;0");
|
||||
}
|
||||
@Test public void Gsub_word_class() { // PURPOSE: handle %w in extended regex; PAGE:en.w:A♯_(musical_note) DATE:2015-06-10
|
||||
Exec_gsub_regx("(a b)", "[^%w%p%s]", -1, "x", "(a b);0"); // was returning "(x x)" b/c ^%w was incorrectly matching "a" and "b"
|
||||
}
|
||||
@Test public void Gmatch_init() {
|
||||
fxt.Test_scrib_proc_str_ary(lib, Scrib_lib_ustring.Invk_gmatch_init, Object_.Ary("abcabc", "a(b)") , "1=a(b)\n2=\n 1=false");
|
||||
fxt.Test_scrib_proc_str_ary(lib, Scrib_lib_ustring.Invk_gmatch_init, Object_.Ary("abcabc", "a()(b)") , "1=a()(b)\n2=\n 1=true\n 2=false");
|
||||
}
|
||||
@Test public void Gmatch_callback() {
|
||||
fxt.Test_scrib_proc_str_ary(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary("abcabc", "a(b)", Scrib_kv_utl_.base1_many_(false), 0) , "1=2\n2=\n 1=b");
|
||||
fxt.Test_scrib_proc_str_ary(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary("abcabc", "a(b)", Scrib_kv_utl_.base1_many_(false), 2) , "1=5\n2=\n 1=b");
|
||||
fxt.Test_scrib_proc_str_ary(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary("abcabc", "a(b)", Scrib_kv_utl_.base1_many_(false), 8) , "1=8\n2=");
|
||||
}
|
||||
@Test public void Gmatch_callback_nomatch() {// PURPOSE.fix: was originally returning "" instead of original String; EX:vi.d:trở_thành; DATE:2014-04-23
|
||||
fxt.Test_scrib_proc_str_ary(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary("a", "a" , KeyVal_.Ary_empty, 0) , "1=1\n2=\n 1=a");
|
||||
}
|
||||
@Test public void Gmatch_callback_anypos() {// PURPOSE.fix: was not handling $capt argument; EX:vi.d:trở_thành; DATE:2014-04-23
|
||||
fxt.Test_scrib_proc_str_ary(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary("a bcd e", "()(b)" , Scrib_kv_utl_.base1_many_(true, false), 0), String_.Concat_lines_nl_skip_last
|
||||
( "1=3"
|
||||
, "2="
|
||||
, " 1=3"
|
||||
, " 2=b"
|
||||
));
|
||||
}
|
||||
@Test public void Gsub_balanced_group() { // PURPOSE: handle balanced group regex; EX:"%b()"; NOTE:test will fail if run in 1.6 environment; DATE:2013-12-20
|
||||
fxt.Init_cbk(Scrib_core.Key_mw_interface, fxt.Core().Lib_ustring(), Scrib_lib_ustring.Invk_gsub);
|
||||
Exec_gsub_regx("(a)", "%b()", 1, "c", "c;1");
|
||||
}
|
||||
@Test public void Gmatch_callback__text_as_number() { // PURPOSE: Gmatch_callback must be able to take non String value; DATE:2013-12-20
|
||||
fxt.Test_scrib_proc_str_ary(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary(1234, "1(2)", Scrib_kv_utl_.base1_many_(false), 0), String_.Concat_lines_nl_skip_last
|
||||
( "1=2"
|
||||
, "2="
|
||||
, " 1=2"
|
||||
));
|
||||
}
|
||||
@Test public void Gsub_frontier_pattern() { // PURPOSE: handle frontier pattern; EX:"%f[%a]"; DATE:2015-07-21
|
||||
fxt.Init_cbk(Scrib_core.Key_mw_interface, fxt.Core().Lib_ustring(), Scrib_lib_ustring.Invk_gsub);
|
||||
Exec_gsub_regx("a b c", "%f[%W]", 5, "()", "a() b() c();3");
|
||||
Exec_gsub_regx("abC DEF gHI JKm NOP", "%f[%a]%u+%f[%A]", Int_.Max_value, "()", "abC () gHI JKm ();2"); // based on http://lua-users.org/wiki/FrontierPattern
|
||||
}
|
||||
@Test public void Gsub_frontier_pattern_utl() {// PURPOSE: standalone test for \0 logic in frontier pattern; note that verified against PHP: echo(preg_match( "/[\w]/us", "\0" )); DATE:2015-07-21
|
||||
Tfds.Eq(Bool_.N, Regx_adp_.Match("\0", "a")); // \0 not matched by a
|
||||
Tfds.Eq(Bool_.N, Regx_adp_.Match("\0", "0")); // \0 not matched by numeric 0
|
||||
Tfds.Eq(Bool_.N, Regx_adp_.Match("\0", "[\\w]")); // \0 not matched by word_char
|
||||
Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\W]")); // \0 matched by !word_char
|
||||
Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\x]")); // \0 matched by any_char
|
||||
Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\X]")); // \0 matched by !any_char
|
||||
}
|
||||
// @Test public void Match_viwiktionary() {
|
||||
// fxt.Init_cbk(Scrib_core.Key_mw_interface, fxt.Core().Lib_ustring(), Scrib_lib_ustring.Invk_match);
|
||||
// Exec_match("tr" , "()(r)", 1, ";"); // should return all matches
|
||||
// Exec_match("tr" , "^([b]*).-([c]*)$", 1, ";"); // should return all matches
|
||||
// }
|
||||
private void Exec_find(String text, String regx, int bgn, boolean plain, String expd) {
|
||||
fxt.Test_scrib_proc_kv_vals(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(text, regx, bgn, plain), expd);
|
||||
}
|
||||
private void Exec_match(Object text, String regx, int bgn, String expd) {
|
||||
fxt.Test_scrib_proc_kv_vals(lib, Scrib_lib_ustring.Invk_match, Scrib_kv_utl_.base1_many_(text, regx, bgn), expd);
|
||||
}
|
||||
private void Exec_gsub_regx(String text, Object regx, int limit, Object repl, String expd) {Exec_gsub(text, regx, limit, repl, expd);}
|
||||
private void Exec_gsub(String text, Object regx, int limit, Object repl, String expd) {
|
||||
fxt.Test_scrib_proc_kv_vals(lib, Scrib_lib_ustring.Invk_gsub, Scrib_kv_utl_.base1_many_(text, regx, repl, limit), expd);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||
import org.junit.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
||||
public class Scrib_lib_ustring__match__tst {
|
||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
||||
@Before public void init() {
|
||||
fxt.Clear();
|
||||
lib = fxt.Core().Lib_ustring().Init();
|
||||
}
|
||||
@Test public void Basic() {
|
||||
Exec_match("abcd" , "bc" , 1, "bc"); // basic
|
||||
Exec_match("abcd" , "x" , 1, String_.Null_mark); // empty
|
||||
Exec_match("abcd" , "a" , 2, String_.Null_mark); // bgn
|
||||
Exec_match("abcd" , "b(c)" , 1, "c"); // group
|
||||
Exec_match(" a b " , "^%s*(.-)%s*$" , 1, "a b;"); // trim; NOTE: changed from "a b" to "a b;"; DATE:2015-01-30
|
||||
Exec_match("abcd" , "a" , 0, "a"); // handle 0; note that php/lua is super-1, but some modules pass in 0; ru.w:Module:Infocards; DATE:2013-11-08
|
||||
Exec_match("abcd" , "." , -1, "d"); // -1
|
||||
Exec_match("aaa" , "a" , 1, "a"); // should return 1st match not many
|
||||
Exec_match("aaa" , "(a)" , 1, "a;a;a"); // should return all matches
|
||||
Exec_match("a b" , "%S" , 1, "a"); // %S was returning every match instead of 1st; PAGE:en.w:Bertrand_Russell; DATE:2014-04-02
|
||||
Exec_match(1 , "a" , 1, String_.Null_mark); // Module can pass raw ints; PAGE:en.w:Budget_of_the_European_Union; DATE:2015-01-22
|
||||
Exec_match("" , "a?" , 1, ""); // no results with ? should return "" not nil; PAGE:en.d:民; DATE:2015-01-30
|
||||
}
|
||||
@Test public void Args_out_of_order() {
|
||||
fxt.Test__proc__kvps__empty(lib, Scrib_lib_ustring.Invk_match, KeyVal_.Ary(KeyVal_.int_(2, "[a]")));
|
||||
}
|
||||
// @Test public void Match_viwiktionary() {
|
||||
// fxt.Init_cbk(Scrib_core.Key_mw_interface, fxt.Core().Lib_ustring(), Scrib_lib_ustring.Invk_match);
|
||||
// Exec_match("tr" , "()(r)", 1, ";"); // should return all matches
|
||||
// Exec_match("tr" , "^([b]*).-([c]*)$", 1, ";"); // should return all matches
|
||||
// }
|
||||
private void Exec_match(Object text, String regx, int bgn, String expd) {
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_match, Scrib_kv_utl_.base1_many_(text, regx, bgn), expd);
|
||||
}
|
||||
}
|
||||
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||
import org.junit.*;
|
||||
public class Scrib_lib_ustring__invoke_tst {
|
||||
public class Scrib_lib_ustring__shell_cmd__tst {
|
||||
@Before public void init() {
|
||||
fxt.Clear_for_invoke();
|
||||
fxt.Init_page("{{#invoke:Mod_0|Func_0}}");
|
||||
@@ -190,15 +190,30 @@ public class Scrib_regx_converter {
|
||||
break;
|
||||
default:
|
||||
boolean normal = true;
|
||||
if (i + 2 < len) {
|
||||
byte dash_1 = src[i + 1];
|
||||
byte dash_2 = src[i + 2];
|
||||
if (dash_1 == Byte_ascii.Dash && dash_2 != Byte_ascii.Brack_end) {
|
||||
Regx_quote(bfr, tmp_b);
|
||||
bfr.Add_byte(Byte_ascii.Dash);
|
||||
Regx_quote(bfr, dash_2);
|
||||
i += 2;
|
||||
normal = false;
|
||||
int lhs_pos = i; // NOTE: following block handles MBCS; EX:[𠀀-] PAGE:en.d:どう DATE:2016-01-22
|
||||
int lhs_len = gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(src[lhs_pos]);
|
||||
int dash_pos = i + lhs_len;
|
||||
if (dash_pos < len) {
|
||||
byte dash_char = src[dash_pos];
|
||||
if (dash_char == Byte_ascii.Dash) {
|
||||
int rhs_pos = dash_pos + 1;
|
||||
if (rhs_pos < len) {
|
||||
byte rhs_byte = src[rhs_pos];
|
||||
if (rhs_byte != Byte_ascii.Brack_end) {// ignore dash if followed by brack_end; EX: [a-]; PAGE:en.d:frei; DATE:2016-01-23
|
||||
int rhs_len = gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(rhs_byte);
|
||||
if (lhs_len == 1)
|
||||
Regx_quote(bfr, src[i]);
|
||||
else
|
||||
bfr.Add_mid(src, i, i + lhs_len);
|
||||
bfr.Add_byte(Byte_ascii.Dash);
|
||||
if (rhs_len == 1)
|
||||
Regx_quote(bfr, src[rhs_pos]);
|
||||
else
|
||||
bfr.Add_mid(src, rhs_pos, rhs_pos + rhs_len);
|
||||
i = rhs_pos + rhs_len - 1; // -1 b/c for() will do ++i
|
||||
normal = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (normal)
|
||||
|
||||
@@ -46,6 +46,9 @@ public class Scrib_regx_converter_tst {
|
||||
fxt.Test_replace("a(2(1)2)c" , "%b()", "b", "abc");
|
||||
fxt.Test_replace("a(3(2(1)2)3)c" , "%b()", "b", "a(3b3)c");
|
||||
}
|
||||
@Test public void Mbcs() { // PURPOSE: handle regex for multi-byte chars; PAGE:en.d:どう; DATE:2016-01-22; .NET.REGX:fails
|
||||
fxt.Test_replace("𠀀" , "[𠀀-]" , "a", "a");
|
||||
}
|
||||
}
|
||||
class Scrib_regx_converter_fxt {
|
||||
private Scrib_regx_converter under;
|
||||
|
||||
Reference in New Issue
Block a user