mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Scribunto: Handle anypos flag [#337]
This commit is contained in:
parent
f44a1874a8
commit
4626203f16
@ -54,6 +54,15 @@ public class Array_ {
|
||||
Copy_to(src, 0, trg, 0, copy_len);
|
||||
return trg;
|
||||
}
|
||||
public static Object Extract_by_pos(Object src, int src_bgn) {
|
||||
return Extract_by_pos(src, src_bgn, Array.getLength(src));
|
||||
}
|
||||
public static Object Extract_by_pos(Object src, int src_bgn, int src_end) {
|
||||
int trg_len = src_end - src_bgn;
|
||||
Object trg = Create(Component_type(src), trg_len);
|
||||
Copy_to(src, src_bgn, trg, 0, src_end - src_bgn);
|
||||
return trg;
|
||||
}
|
||||
public static List_adp To_list(Object ary) {
|
||||
int aryLen = Array_.Len(ary);
|
||||
List_adp rv = List_adp_.New();
|
||||
|
@ -70,6 +70,14 @@ public class Keyval_ {
|
||||
Ary__to_str__nest__ary(bfr, 0, true, ary);
|
||||
return bfr.To_str_and_clear();
|
||||
}
|
||||
public static Object[] Ary__to_objary__val(Keyval[] ary) {
|
||||
int ary_len = ary.length;
|
||||
Object[] rv = new Object[ary_len];
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
rv[i] = ary[i].Val();
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private static void Ary__to_str__nest__ary(Bry_bfr bfr, int indent, boolean is_kv, Object[] ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
|
@ -17,6 +17,7 @@ package gplx.core.tests; import gplx.*; import gplx.core.*;
|
||||
import gplx.core.brys.*;
|
||||
public class Gftest {
|
||||
private static final Bry_bfr bfr = Bry_bfr_.New();
|
||||
public static void Eq__ary(Object[] expd, Object[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__obj, expd, actl, msg_fmt, msg_args);}
|
||||
public static void Eq__ary(boolean[] expd, boolean[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__bool, expd, actl, msg_fmt, msg_args);}
|
||||
public static void Eq__ary(int[] expd, int[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__int, expd, actl, msg_fmt, msg_args);}
|
||||
public static void Eq__ary(long[] expd, long[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__long, expd, actl, msg_fmt, msg_args);}
|
||||
@ -151,12 +152,14 @@ public class Gftest {
|
||||
}
|
||||
private static void Write__itm(Bry_bfr bfr, int type_id, Object ary, int len, int idx) {
|
||||
if (idx < len) {
|
||||
Object val = Array_.Get_at(ary, idx);
|
||||
switch (type_id) {
|
||||
case Type_ids_.Id__bool: bfr.Add_yn(Bool_.Cast(Array_.Get_at(ary, idx))); break;
|
||||
case Type_ids_.Id__bry: bfr.Add_safe((byte[])Array_.Get_at(ary, idx)); break;
|
||||
case Type_ids_.Id__long: bfr.Add_long_variable(Long_.cast(Array_.Get_at(ary, idx))); break;
|
||||
case Type_ids_.Id__int: bfr.Add_int_variable(Int_.Cast(Array_.Get_at(ary, idx))); break;
|
||||
case Type_ids_.Id__byte: bfr.Add_int_variable((int)(Byte_.Cast(Array_.Get_at(ary, idx)))); break;
|
||||
case Type_ids_.Id__bool: bfr.Add_yn(Bool_.Cast(val)); break;
|
||||
case Type_ids_.Id__bry: bfr.Add_safe((byte[])val); break;
|
||||
case Type_ids_.Id__long: bfr.Add_long_variable(Long_.cast(val)); break;
|
||||
case Type_ids_.Id__int: bfr.Add_int_variable(Int_.Cast(val)); break;
|
||||
case Type_ids_.Id__byte: bfr.Add_int_variable((int)(Byte_.Cast(val))); break;
|
||||
case Type_ids_.Id__obj: bfr.Add_str_u8(Object_.Xto_str_strict_or_null_mark(val)); break;
|
||||
default: throw Err_.new_unhandled_default(type_id);
|
||||
}
|
||||
}
|
||||
@ -182,6 +185,7 @@ public class Gftest {
|
||||
case Type_ids_.Id__long: eq = Long_.cast(expd_obj) == Long_.cast(actl_obj); break;
|
||||
case Type_ids_.Id__int: eq = Int_.Cast(expd_obj) == Int_.Cast(actl_obj); break;
|
||||
case Type_ids_.Id__byte: eq = Byte_.Cast(expd_obj) == Byte_.Cast(actl_obj); break;
|
||||
case Type_ids_.Id__obj: eq = Object_.Eq(expd_obj, actl_obj); break;
|
||||
}
|
||||
}
|
||||
if (!eq) {
|
||||
|
@ -14,7 +14,7 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||
import gplx.langs.regxs.*; import gplx.core.intls.*;
|
||||
import gplx.core.intls.*; import gplx.langs.regxs.*;
|
||||
import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.xtns.scribunto.procs.*;
|
||||
public class Scrib_lib_ustring implements Scrib_lib {
|
||||
@ -118,30 +118,9 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
AddCapturesFromMatch(tmp_list, regx_rslts[0], text, regx_converter.Capt_ary(), true);
|
||||
return rslt.Init_many_list(tmp_list);
|
||||
}
|
||||
private Scrib_lib_ustring_gsub_mgr[] gsub_mgr_ary = Scrib_lib_ustring_gsub_mgr.Ary_empty;
|
||||
private int gsub_mgr_max = 0, gsub_mgr_len = -1;
|
||||
private final Object gsub_mgr_lock = new Object();
|
||||
public boolean Gsub(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||
boolean rv = false;
|
||||
synchronized (gsub_mgr_lock) { // handle recursive gsub calls; PAGE:en.d:כלב; DATE:2016-01-22
|
||||
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
||||
int new_len = gsub_mgr_len + 1;
|
||||
if (new_len == gsub_mgr_max) {
|
||||
this.gsub_mgr_max = new_len == 0 ? 2 : new_len * 2;
|
||||
Scrib_lib_ustring_gsub_mgr[] new_gsub_mgr_ary = new Scrib_lib_ustring_gsub_mgr[gsub_mgr_max];
|
||||
Array_.Copy(gsub_mgr_ary, new_gsub_mgr_ary);
|
||||
gsub_mgr_ary = new_gsub_mgr_ary;
|
||||
}
|
||||
Scrib_lib_ustring_gsub_mgr cur = gsub_mgr_ary[new_len];
|
||||
if (cur == null) {
|
||||
cur = new Scrib_lib_ustring_gsub_mgr(core, regx_converter);
|
||||
gsub_mgr_ary[new_len] = cur;
|
||||
}
|
||||
this.gsub_mgr_len = new_len;
|
||||
rv = cur.Exec(args, rslt);
|
||||
--gsub_mgr_len;
|
||||
}
|
||||
return rv;
|
||||
Scrib_lib_ustring_gsub_mgr gsub_mgr = new Scrib_lib_ustring_gsub_mgr(core, new Scrib_regx_converter());
|
||||
return gsub_mgr.Exec(args, rslt);
|
||||
}
|
||||
public boolean Gmatch_init(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||
// String text = Scrib_kv_utl_.Val_to_str(values, 0);
|
||||
@ -195,189 +174,3 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
private static final int Base1 = 1
|
||||
, End_adj = 1; // lua / php uses "end" as <= not <; EX: "abc" and bgn=0, end= 1; for XOWA, this is "a"; for MW / PHP it is "ab"
|
||||
}
|
||||
class Scrib_lib_ustring_gsub_mgr {
|
||||
private Scrib_regx_converter regx_converter;
|
||||
public Scrib_lib_ustring_gsub_mgr(Scrib_core core, Scrib_regx_converter regx_converter) {this.core = core; this.regx_converter = regx_converter;} private Scrib_core core;
|
||||
private byte tmp_repl_tid = Repl_tid_null; private byte[] tmp_repl_bry = null;
|
||||
private Hash_adp repl_hash = null; private Scrib_lua_proc repl_func = null;
|
||||
private int repl_count = 0;
|
||||
public boolean Exec(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||
Object text_obj = args.Cast_obj_or_null(0);
|
||||
String text = String_.as_(text_obj);
|
||||
if (text == null) text = Object_.Xto_str_strict_or_empty(text_obj);
|
||||
String regx = args.Xstr_str_or_null(1); // NOTE: @pattern sometimes int; PAGE:en.d:λύω; DATE:2014-09-02
|
||||
if (args.Len() == 2) return rslt.Init_obj(text); // if no replace arg, return self; PAGE:en.d:'orse; DATE:2013-10-13
|
||||
Object repl_obj = args.Cast_obj_or_null(2);
|
||||
regx = regx_converter.patternToRegex(Bry_.new_u8(regx), Scrib_regx_converter.Anchor_pow);
|
||||
int limit = args.Cast_int_or(3, -1);
|
||||
repl_count = 0;
|
||||
Identify_repl(repl_obj);
|
||||
String repl = Exec_repl(tmp_repl_tid, tmp_repl_bry, text, regx, limit);
|
||||
return rslt.Init_many_objs(repl, repl_count);
|
||||
}
|
||||
private void Identify_repl(Object repl_obj) {
|
||||
Class<?> repl_type = repl_obj.getClass();
|
||||
if (Object_.Eq(repl_type, String_.Cls_ref_type)) {
|
||||
tmp_repl_tid = Repl_tid_string;
|
||||
tmp_repl_bry = Bry_.new_u8((String)repl_obj);
|
||||
}
|
||||
else if (Object_.Eq(repl_type, Int_.Cls_ref_type)) { // NOTE:@replace sometimes int; PAGE:en.d:λύω; DATE:2014-09-02
|
||||
tmp_repl_tid = Repl_tid_string;
|
||||
tmp_repl_bry = Bry_.new_u8(Int_.To_str(Int_.Cast(repl_obj)));
|
||||
}
|
||||
else if (Object_.Eq(repl_type, Keyval[].class)) {
|
||||
tmp_repl_tid = Repl_tid_table;
|
||||
Keyval[] repl_tbl = (Keyval[])repl_obj;
|
||||
if (repl_hash == null)
|
||||
repl_hash = Hash_adp_.New();
|
||||
else
|
||||
repl_hash.Clear();
|
||||
int repl_tbl_len = repl_tbl.length;
|
||||
for (int i = 0; i < repl_tbl_len; i++) {
|
||||
Keyval repl_itm = repl_tbl[i];
|
||||
String repl_itm_val = repl_itm.Val_to_str_or_empty();
|
||||
repl_hash.Add(repl_itm.Key(), Bry_.new_u8(repl_itm_val));
|
||||
}
|
||||
}
|
||||
else if (Object_.Eq(repl_type, Scrib_lua_proc.class)) {
|
||||
tmp_repl_tid = Repl_tid_luacbk;
|
||||
repl_func = (Scrib_lua_proc)repl_obj;
|
||||
}
|
||||
else if (Object_.Eq(repl_type, Double_.Cls_ref_type)) { // NOTE:@replace sometimes double; PAGE:de.v:Wikivoyage:Wikidata/Test_Modul:Wikidata2; DATE:2016-04-21
|
||||
tmp_repl_tid = Repl_tid_string;
|
||||
tmp_repl_bry = Bry_.new_u8(Double_.To_str(Double_.cast(repl_obj)));
|
||||
}
|
||||
else throw Err_.new_unhandled(Type_.Name(repl_type));
|
||||
}
|
||||
private String Exec_repl(byte repl_tid, byte[] repl_bry, String text, String regx, int limit) {
|
||||
Regx_adp regx_mgr = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx);
|
||||
Regx_match[] rslts = regx_mgr.Match_all(text, 0);
|
||||
if ( rslts.length == 0 // PHP: If matches are found, the new subject will be returned, otherwise subject will be returned unchanged.; http://php.net/manual/en/function.preg-replace-callback.php
|
||||
|| regx_mgr.Pattern_is_invalid() // NOTE: invalid patterns should return self; EX:[^]; DATE:2014-09-02
|
||||
) return text;
|
||||
rslts = regx_converter.Adjust_balanced(rslts);
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
int len = rslts.length;
|
||||
int pos = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (limit > -1 && repl_count == limit) break;
|
||||
Regx_match rslt = rslts[i];
|
||||
tmp_bfr.Add_str_u8(String_.Mid(text, pos, rslt.Find_bgn())); // NOTE: regx returns char pos (not bry); must add as String, not bry; DATE:2013-07-17
|
||||
if (!Exec_repl_itm(tmp_bfr, repl_tid, repl_bry, text, rslt)) { // will be false when gsub_proc returns nothing; PAGE:en.d:tracer PAGE:en.d:שלום DATE:2017-04-22;
|
||||
tmp_bfr.Add_str_u8(String_.Mid(text, rslt.Find_bgn(), rslt.Find_end()));
|
||||
}
|
||||
pos = rslt.Find_end();
|
||||
++repl_count;
|
||||
}
|
||||
int text_len = String_.Len(text);
|
||||
if (pos < text_len)
|
||||
tmp_bfr.Add_str_u8(String_.Mid(text, pos, text_len)); // NOTE: regx returns char pos (not bry); must add as String, not bry; DATE:2013-07-17
|
||||
return tmp_bfr.To_str_and_clear();
|
||||
}
|
||||
private boolean Exec_repl_itm(Bry_bfr tmp_bfr, byte repl_tid, byte[] repl_bry, String text, Regx_match match) {
|
||||
switch (repl_tid) {
|
||||
case Repl_tid_string:
|
||||
int len = repl_bry.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte b = repl_bry[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Percent: {
|
||||
++i;
|
||||
if (i == len) // % at end of stream; just add %;
|
||||
tmp_bfr.Add_byte(b);
|
||||
else {
|
||||
b = repl_bry[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
int idx = b - Byte_ascii.Num_0;
|
||||
if (idx == 0) // NOTE: 0 means take result; REF.MW:if ($x === '0'); return $m[0]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
||||
tmp_bfr.Add_str_u8(String_.Mid(text, match.Find_bgn(), match.Find_end()));
|
||||
else { // NOTE: > 0 means get from groups if it exists; REF.MW:elseif (isset($m["m$x"])) return $m["m$x"]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
||||
idx -= List_adp_.Base1;
|
||||
if (idx < match.Groups().length) { // retrieve numbered capture; TODO_OLD: support more than 9 captures
|
||||
Regx_group grp = match.Groups()[idx];
|
||||
tmp_bfr.Add_str_u8(String_.Mid(text, grp.Bgn(), grp.End())); // NOTE: grp.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
|
||||
}
|
||||
else {
|
||||
tmp_bfr.Add_byte(Byte_ascii.Percent);
|
||||
tmp_bfr.Add_byte(b);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Percent:
|
||||
tmp_bfr.Add_byte(Byte_ascii.Percent);
|
||||
break;
|
||||
default: // not a number; add literal
|
||||
tmp_bfr.Add_byte(Byte_ascii.Percent);
|
||||
tmp_bfr.Add_byte(b);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
tmp_bfr.Add_byte(b);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Repl_tid_table: {
|
||||
int match_bgn = -1, match_end = -1;
|
||||
Regx_group[] grps = match.Groups();
|
||||
if (grps.length == 0) {
|
||||
match_bgn = match.Find_bgn();
|
||||
match_end = match.Find_end();
|
||||
}
|
||||
else { // group exists, take first one (logic matches Scribunto); PAGE:en.w:Bannered_routes_of_U.S._Route_60; DATE:2014-08-15
|
||||
Regx_group grp = grps[0];
|
||||
match_bgn = grp.Bgn();
|
||||
match_end = grp.End();
|
||||
}
|
||||
String find_str = String_.Mid(text, match_bgn, match_end); // NOTE: rslt.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
|
||||
Object actl_repl_obj = repl_hash.Get_by(find_str);
|
||||
if (actl_repl_obj == null) // match found, but no replacement specified; EX:"abc", "[ab]", "a:A"; "b" in regex but not in tbl; EX:d:DVD; DATE:2014-03-31
|
||||
tmp_bfr.Add_str_u8(find_str);
|
||||
else
|
||||
tmp_bfr.Add((byte[])actl_repl_obj);
|
||||
break;
|
||||
}
|
||||
case Repl_tid_luacbk: {
|
||||
// TOMBSTONE: was causing garbled text on PAGE:en.w:Template:Infobox_kommune DATE:2018-07-02
|
||||
/*
|
||||
String find_str = String_.Mid(text, match.Find_bgn(), match.Find_end());
|
||||
Keyval[] luacbk_args = Scrib_kv_utl_.base1_obj_(find_str);
|
||||
*/
|
||||
Keyval[] luacbk_args = null;
|
||||
Regx_group[] grps = match.Groups();
|
||||
int grps_len = grps.length;
|
||||
if (grps_len == 0) { // no match; use original String
|
||||
String find_str = String_.Mid(text, match.Find_bgn(), match.Find_end());
|
||||
luacbk_args = Scrib_kv_utl_.base1_obj_(find_str);
|
||||
}
|
||||
else { // match; build ary of matches; (see UStringLibrary.php)
|
||||
luacbk_args = new Keyval[grps_len];
|
||||
for (int i = 0; i < grps_len; i++) {
|
||||
Regx_group grp = grps[i];
|
||||
String find_str = String_.Mid(text, grp.Bgn(), grp.End());
|
||||
luacbk_args[i] = Keyval_.int_(i + Scrib_core.Base_1, find_str);
|
||||
}
|
||||
}
|
||||
/*
|
||||
*/
|
||||
Keyval[] rslts = core.Interpreter().CallFunction(repl_func.Id(), luacbk_args);
|
||||
if (rslts.length == 0) // will be 0 when gsub_proc returns nil; PAGE:en.d:tracer; DATE:2017-04-22
|
||||
return false;
|
||||
else { // ArrayIndex check
|
||||
Object rslt_obj = rslts[0].Val(); // 0th idx has result
|
||||
tmp_bfr.Add_str_u8(Object_.Xto_str_strict_or_empty(rslt_obj)); // NOTE: always convert to String; rslt_obj can be int; PAGE:en.d:seven DATE:2016-04-27
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: throw Err_.new_unhandled(repl_tid);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
private static final byte Repl_tid_null = 0, Repl_tid_string = 1, Repl_tid_table = 2, Repl_tid_luacbk = 3;
|
||||
public static final Scrib_lib_ustring_gsub_mgr[] Ary_empty = new Scrib_lib_ustring_gsub_mgr[0];
|
||||
}
|
||||
|
@ -14,7 +14,8 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||
import org.junit.*; import gplx.langs.regxs.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
import gplx.langs.regxs.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
||||
public class Scrib_lib_ustring__gsub__tst {
|
||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
||||
@Before public void init() {
|
||||
@ -30,6 +31,9 @@ public class Scrib_lib_ustring__gsub__tst {
|
||||
// TOMBSTONE: tested with local MW and {{#invoke:Test|test16|a|[^]|b}} -> Lua error: Missing close-bracket for character set beginning at pattern character 1.; DATE:2018-07-02
|
||||
// Exec_gsub("a" , "[^]" , 1, "b" , "a;0"); // invalid regx should not fail; should return self; DATE:2013-10-20
|
||||
}
|
||||
@Test public void Find__int() {// PURPOSE: gsub with integer arg should not fail; DATE:2013-11-06
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_gsub, Scrib_kv_utl_.base1_many_(1, "[1]", "2", 1), "2;1"); // NOTE: text is integer (lua / php are type-less)
|
||||
}
|
||||
@Test public void Replace__none() {// PURPOSE: gsub with no replace argument should not fail; EX:d:'orse; DATE:2013-10-14
|
||||
fxt.Test__proc__objs__flat(lib, Scrib_lib_ustring.Invk_gsub, Object_.Ary("text", "regx") , "text"); // NOTE: repl, limit deliberately omitted
|
||||
}
|
||||
@ -99,6 +103,20 @@ public class Scrib_lib_ustring__gsub__tst {
|
||||
Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\x]")); // \0 matched by any_char
|
||||
Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\X]")); // \0 matched by !any_char
|
||||
}
|
||||
@Test public void Luacbk__basic() {
|
||||
String text = "ad2f1e3z";
|
||||
String regx = "([1d])([2e])([3f])";
|
||||
Mock_proc__verify_args proc = new Mock_proc__verify_args(0, new Object[]{"B", "d", "2", "f"}, new Object[]{"Y", "1", "e", "3"});
|
||||
fxt.Init__cbk(proc);
|
||||
Exec_gsub(text, regx, -1, proc.To_scrib_lua_proc(), "aBYz;2");
|
||||
}
|
||||
@Test public void Luacbk__anypos() {
|
||||
String text = "ad2f1e3z";
|
||||
String regx = "()([1d])([2e])([3f])"; // "()" is anypos, which inserts find_pos to results
|
||||
Mock_proc__verify_args proc = new Mock_proc__verify_args(0, new Object[]{"B", 1, "d", "2", "f"}, new Object[]{"Y", 4, "1", "e", "3"});
|
||||
fxt.Init__cbk(proc);
|
||||
Exec_gsub(text, regx, -1, proc.To_scrib_lua_proc(), "aBYz;2");
|
||||
}
|
||||
private void Exec_gsub(String text, Object regx, int limit, Object repl, String expd) {
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_gsub, Scrib_kv_utl_.base1_many_(text, regx, repl, limit), expd);
|
||||
}
|
||||
@ -133,3 +151,17 @@ class Mock_proc__empty extends Mock_proc_fxt { private final String find, rep
|
||||
return String_.Eq(text, find) ? Keyval_.Ary(Keyval_.new_("0", repl)) : Keyval_.Ary_empty;
|
||||
}
|
||||
}
|
||||
class Mock_proc__verify_args extends Mock_proc_fxt { private final Object[][] expd_ary;
|
||||
private int expd_idx = -1;
|
||||
public Mock_proc__verify_args(int id, Object[]... expd_ary) {super(id, "number");
|
||||
this.expd_ary = expd_ary;
|
||||
}
|
||||
@Override public Keyval[] Exec_by_scrib(Keyval[] args) {
|
||||
Object[] expd_args = expd_ary[++expd_idx];
|
||||
Object rv = expd_args[0];
|
||||
expd_args = (Object[])Array_.Extract_by_pos(expd_args, 1);
|
||||
Object[] actl_args = Keyval_.Ary__to_objary__val(args);
|
||||
Gftest.Eq__ary(expd_args, actl_args, "failed lua_cbk");
|
||||
return Keyval_.Ary(Keyval_.int_(0, rv));
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,235 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||
import gplx.langs.regxs.*;
|
||||
import gplx.xowa.xtns.scribunto.procs.*;
|
||||
class Scrib_lib_ustring_gsub_mgr {
|
||||
private final Scrib_core core;
|
||||
private final Scrib_regx_converter regx_converter;
|
||||
private byte[] repl_bry; private Hash_adp repl_hash; private Scrib_lua_proc repl_func;
|
||||
private int repl_count = 0;
|
||||
public Scrib_lib_ustring_gsub_mgr(Scrib_core core, Scrib_regx_converter regx_converter) {
|
||||
this.core = core;
|
||||
this.regx_converter = regx_converter;
|
||||
}
|
||||
public boolean Exec(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||
// get @text; NOTE: sometimes int; DATE:2013-11-06
|
||||
String text = args.Xstr_str_or_null(0);
|
||||
if (args.Len() == 2) return rslt.Init_obj(text); // if no @replace, return @text; PAGE:en.d:'orse; DATE:2013-10-13
|
||||
|
||||
// get @pattern; NOTE: sometimes int; PAGE:en.d:λύω; DATE:2014-09-02
|
||||
String regx = args.Xstr_str_or_null(1);
|
||||
regx = regx_converter.patternToRegex(Bry_.new_u8(regx), Scrib_regx_converter.Anchor_pow);
|
||||
|
||||
// get @repl
|
||||
Object repl_obj = args.Cast_obj_or_null(2);
|
||||
byte repl_tid = Identify_repl(repl_obj);
|
||||
|
||||
// get @limit; reset repl_count
|
||||
int limit = args.Cast_int_or(3, -1);
|
||||
repl_count = 0;
|
||||
|
||||
// do repl
|
||||
String repl = Exec_repl(repl_tid, text, regx, limit);
|
||||
return rslt.Init_many_objs(repl, repl_count);
|
||||
}
|
||||
private byte Identify_repl(Object repl_obj) {
|
||||
byte repl_tid = Repl_tid_null;
|
||||
// @repl can be String, int, table, func
|
||||
Class<?> repl_type = repl_obj.getClass();
|
||||
if (Object_.Eq(repl_type, String_.Cls_ref_type)) {
|
||||
repl_tid = Repl_tid_string;
|
||||
repl_bry = Bry_.new_u8((String)repl_obj);
|
||||
}
|
||||
else if (Object_.Eq(repl_type, Int_.Cls_ref_type)) { // NOTE:@replace sometimes int; PAGE:en.d:λύω; DATE:2014-09-02
|
||||
repl_tid = Repl_tid_string;
|
||||
repl_bry = Bry_.new_u8(Int_.To_str(Int_.Cast(repl_obj)));
|
||||
}
|
||||
else if (Object_.Eq(repl_type, Keyval[].class)) {
|
||||
repl_tid = Repl_tid_table;
|
||||
repl_hash = Hash_adp_.New();
|
||||
Keyval[] kvs = (Keyval[])repl_obj;
|
||||
int kvs_len = kvs.length;
|
||||
for (int i = 0; i < kvs_len; i++) {
|
||||
Keyval kv = kvs[i];
|
||||
repl_hash.Add(kv.Key(), Bry_.new_u8(kv.Val_to_str_or_empty()));
|
||||
}
|
||||
}
|
||||
else if (Object_.Eq(repl_type, Scrib_lua_proc.class)) {
|
||||
repl_tid = Repl_tid_luacbk;
|
||||
repl_func = (Scrib_lua_proc)repl_obj;
|
||||
}
|
||||
else if (Object_.Eq(repl_type, Double_.Cls_ref_type)) { // NOTE:@replace sometimes double; PAGE:de.v:Wikivoyage:Wikidata/Test_Modul:Wikidata2; DATE:2016-04-21
|
||||
repl_tid = Repl_tid_string;
|
||||
repl_bry = Bry_.new_u8(Double_.To_str(Double_.cast(repl_obj)));
|
||||
}
|
||||
else
|
||||
throw Err_.new_unhandled(Type_.Name(repl_type));
|
||||
return repl_tid;
|
||||
}
|
||||
private String Exec_repl(byte repl_tid, String text, String regx, int limit) {
|
||||
// parse regx
|
||||
Regx_adp regx_mgr = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx);
|
||||
if (regx_mgr.Pattern_is_invalid()) return text; // NOTE: invalid patterns should return self; EX:[^]; DATE:2014-09-02)
|
||||
|
||||
// exec regx
|
||||
Regx_match[] rslts = regx_mgr.Match_all(text, 0);
|
||||
if (rslts.length == 0) return text; // PHP: If matches are found, the new subject will be returned, otherwise subject will be returned unchanged.; http://php.net/manual/en/function.preg-replace-callback.php
|
||||
rslts = regx_converter.Adjust_balanced(rslts);
|
||||
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
int rslts_len = rslts.length;
|
||||
int text_pos = 0;
|
||||
for (int i = 0; i < rslts_len; i++) {
|
||||
if (repl_count == limit) break; // stop if repl_count reaches limit; note that limit = -1 by default, unless specified
|
||||
|
||||
// add text up to find.bgn
|
||||
Regx_match rslt = rslts[i];
|
||||
tmp_bfr.Add_str_u8(String_.Mid(text, text_pos, rslt.Find_bgn())); // NOTE: regx returns char text_pos (not bry); must add as String, not bry; DATE:2013-07-17
|
||||
|
||||
// replace result
|
||||
if (!Exec_repl_itm(tmp_bfr, repl_tid, text, rslt)) {
|
||||
// will be false when gsub_proc returns nothing; PAGE:en.d:tracer PAGE:en.d:שלום DATE:2017-04-22;
|
||||
tmp_bfr.Add_str_u8(String_.Mid(text, rslt.Find_bgn(), rslt.Find_end()));
|
||||
}
|
||||
|
||||
// update
|
||||
text_pos = rslt.Find_end();
|
||||
repl_count++;
|
||||
}
|
||||
|
||||
// add rest of String
|
||||
int text_len = String_.Len(text);
|
||||
if (text_pos < text_len)
|
||||
tmp_bfr.Add_str_u8(String_.Mid(text, text_pos, text_len)); // NOTE: regx returns char text_pos (not bry); must add as String, not bry; DATE:2013-07-17
|
||||
return tmp_bfr.To_str_and_clear();
|
||||
}
|
||||
private boolean Exec_repl_itm(Bry_bfr tmp_bfr, byte repl_tid, String text, Regx_match match) {
|
||||
switch (repl_tid) {
|
||||
case Repl_tid_string:
|
||||
int len = repl_bry.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte b = repl_bry[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Percent: {
|
||||
++i;
|
||||
if (i == len) // % at end of stream; just add %;
|
||||
tmp_bfr.Add_byte(b);
|
||||
else {
|
||||
b = repl_bry[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
int idx = b - Byte_ascii.Num_0;
|
||||
if (idx == 0) // NOTE: 0 means take result; REF.MW:if ($x === '0'); return $m[0]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
||||
tmp_bfr.Add_str_u8(String_.Mid(text, match.Find_bgn(), match.Find_end()));
|
||||
else { // NOTE: > 0 means get from groups if it exists; REF.MW:elseif (isset($m["m$x"])) return $m["m$x"]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
||||
idx -= List_adp_.Base1;
|
||||
if (idx < match.Groups().length) { // retrieve numbered capture; TODO_OLD: support more than 9 captures
|
||||
Regx_group grp = match.Groups()[idx];
|
||||
tmp_bfr.Add_str_u8(String_.Mid(text, grp.Bgn(), grp.End())); // NOTE: grp.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
|
||||
}
|
||||
else {
|
||||
tmp_bfr.Add_byte(Byte_ascii.Percent);
|
||||
tmp_bfr.Add_byte(b);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Percent:
|
||||
tmp_bfr.Add_byte(Byte_ascii.Percent);
|
||||
break;
|
||||
default: // not a number; add literal
|
||||
tmp_bfr.Add_byte(Byte_ascii.Percent);
|
||||
tmp_bfr.Add_byte(b);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
tmp_bfr.Add_byte(b);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Repl_tid_table: {
|
||||
int match_bgn = -1, match_end = -1;
|
||||
Regx_group[] grps = match.Groups();
|
||||
if (grps.length == 0) {
|
||||
match_bgn = match.Find_bgn();
|
||||
match_end = match.Find_end();
|
||||
}
|
||||
else { // group exists, take first one (logic matches Scribunto); PAGE:en.w:Bannered_routes_of_U.S._Route_60; DATE:2014-08-15
|
||||
Regx_group grp = grps[0];
|
||||
match_bgn = grp.Bgn();
|
||||
match_end = grp.End();
|
||||
}
|
||||
String find_str = String_.Mid(text, match_bgn, match_end); // NOTE: rslt.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
|
||||
Object actl_repl_obj = repl_hash.Get_by(find_str);
|
||||
if (actl_repl_obj == null) // match found, but no replacement specified; EX:"abc", "[ab]", "a:A"; "b" in regex but not in tbl; EX:d:DVD; DATE:2014-03-31
|
||||
tmp_bfr.Add_str_u8(find_str);
|
||||
else
|
||||
tmp_bfr.Add((byte[])actl_repl_obj);
|
||||
break;
|
||||
}
|
||||
case Repl_tid_luacbk: {
|
||||
Keyval[] luacbk_args = null;
|
||||
Regx_group[] grps = match.Groups();
|
||||
int grps_len = grps.length;
|
||||
// no grps; pass 1 arg based on @match: EX: ("ace", "[b-d]"); args -> ("c")
|
||||
if (grps_len == 0) {
|
||||
String find_str = String_.Mid(text, match.Find_bgn(), match.Find_end());
|
||||
luacbk_args = Scrib_kv_utl_.base1_obj_(find_str);
|
||||
}
|
||||
// grps exist; pass n args based on grp[n].match; EX: ("acfg", "([b-d])([e-g])"); args -> ("c", "f")
|
||||
else {
|
||||
// memoize any_pos args for loop
|
||||
boolean any_pos = regx_converter.Any_pos();
|
||||
Keyval[] capt_ary = regx_converter.Capt_ary();
|
||||
int capt_ary_len = capt_ary.length;
|
||||
|
||||
// loop grps; for each grp, create corresponding arg in luacbk
|
||||
luacbk_args = new Keyval[grps_len];
|
||||
for (int i = 0; i < grps_len; i++) {
|
||||
Regx_group grp = grps[i];
|
||||
|
||||
// anypos will create @offset arg; everything else creates a @match arg based on grp
|
||||
Object val = any_pos && i < capt_ary_len && Bool_.Cast(capt_ary[i].Val())
|
||||
? (Object)grp.Bgn()
|
||||
: (Object)String_.Mid(text, grp.Bgn(), grp.End());
|
||||
luacbk_args[i] = Keyval_.int_(i + Scrib_core.Base_1, val);
|
||||
}
|
||||
}
|
||||
|
||||
// do callback
|
||||
Keyval[] rslts = core.Interpreter().CallFunction(repl_func.Id(), luacbk_args);
|
||||
|
||||
// eval result
|
||||
if (rslts.length == 0) // will be 0 when gsub_proc returns nil; PAGE:en.d:tracer; DATE:2017-04-22
|
||||
return false;
|
||||
else { // ArrayIndex check
|
||||
Object rslt_obj = rslts[0].Val(); // 0th idx has result
|
||||
tmp_bfr.Add_str_u8(Object_.Xto_str_strict_or_empty(rslt_obj)); // NOTE: always convert to String; rslt_obj can be int; PAGE:en.d:seven DATE:2016-04-27
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: throw Err_.new_unhandled(repl_tid);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
private static final byte Repl_tid_null = 0, Repl_tid_string = 1, Repl_tid_table = 2, Repl_tid_luacbk = 3;
|
||||
public static final Scrib_lib_ustring_gsub_mgr[] Ary_empty = new Scrib_lib_ustring_gsub_mgr[0];
|
||||
}
|
@ -37,6 +37,8 @@ public class Scrib_regx_converter {
|
||||
int len = pat.length;
|
||||
int grps_len = 0;
|
||||
int bct = 0;
|
||||
|
||||
// REF.MW: https://github.com/wikimedia/mediawiki-extensions-Scribunto/blob/master/includes/engines/LuaCommon/UstringLibrary.php#L415
|
||||
for (int i = 0; i < len; i++) {
|
||||
int i_end = i + 1;
|
||||
q_flag = false; // must be reset; REF.MW:UstringLibrary.php|patternToRegex; DATE:2014-02-08
|
||||
@ -44,24 +46,28 @@ public class Scrib_regx_converter {
|
||||
switch (cur) {
|
||||
case Byte_ascii.Pow:
|
||||
q_flag = i != 0;
|
||||
bfr.Add((anchor == Anchor_null || q_flag) ? Bry_pow_escaped : anchor); // NOTE: must add anchor \G when using offsets; EX:cs.n:Category:1._zárí_2008; DATE:2014-05-07
|
||||
bfr.Add((anchor == Anchor_null || q_flag) ? Bry_pow_escaped : anchor); // NOTE: must add anchor \G when using offsets; EX:cs.n:Category:1._zárí_2008; DATE:2014-05-07
|
||||
break;
|
||||
case Byte_ascii.Dollar:
|
||||
q_flag = i < len - 1;
|
||||
bfr.Add(q_flag ? Bry_dollar_escaped : Bry_dollar_literal);
|
||||
break;
|
||||
case Byte_ascii.Paren_bgn: {
|
||||
// fail if "(EOS"
|
||||
if (i + 1 >= len)
|
||||
throw Err_.new_wo_type("Unmatched open-paren at pattern character " + Int_.To_str(i_end));
|
||||
int grp_idx = grp_mgr.Capt__len() + 1;
|
||||
boolean is_empty_capture = pat[i + 1] == Byte_ascii.Paren_end; // current is "()"
|
||||
|
||||
// check for "()"; enables anypos flag
|
||||
boolean is_empty_capture = pat[i + 1] == Byte_ascii.Paren_end;
|
||||
if (is_empty_capture)
|
||||
any_pos = true;
|
||||
bfr.Add_byte(Byte_ascii.Paren_bgn); // $re .= "(?<m$n>";
|
||||
grp_mgr.Capt__add__real(grp_idx, is_empty_capture);
|
||||
bfr.Add_byte(Byte_ascii.Paren_bgn); // $re .= "(?<m$n>";
|
||||
break;
|
||||
}
|
||||
case Byte_ascii.Paren_end:
|
||||
// fail if ")" without preceding "("
|
||||
if (grp_mgr.Open__len() <= 0)
|
||||
throw Err_.new_wo_type("Unmatched close-paren at pattern character " + Int_.To_str(i_end));
|
||||
grp_mgr.Open__pop();
|
||||
|
Loading…
Reference in New Issue
Block a user