mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Scribunto: Handle anypos flag [#337]
This commit is contained in:
parent
f44a1874a8
commit
4626203f16
@ -54,6 +54,15 @@ public class Array_ {
|
|||||||
Copy_to(src, 0, trg, 0, copy_len);
|
Copy_to(src, 0, trg, 0, copy_len);
|
||||||
return trg;
|
return trg;
|
||||||
}
|
}
|
||||||
|
public static Object Extract_by_pos(Object src, int src_bgn) {
|
||||||
|
return Extract_by_pos(src, src_bgn, Array.getLength(src));
|
||||||
|
}
|
||||||
|
public static Object Extract_by_pos(Object src, int src_bgn, int src_end) {
|
||||||
|
int trg_len = src_end - src_bgn;
|
||||||
|
Object trg = Create(Component_type(src), trg_len);
|
||||||
|
Copy_to(src, src_bgn, trg, 0, src_end - src_bgn);
|
||||||
|
return trg;
|
||||||
|
}
|
||||||
public static List_adp To_list(Object ary) {
|
public static List_adp To_list(Object ary) {
|
||||||
int aryLen = Array_.Len(ary);
|
int aryLen = Array_.Len(ary);
|
||||||
List_adp rv = List_adp_.New();
|
List_adp rv = List_adp_.New();
|
||||||
|
@ -70,6 +70,14 @@ public class Keyval_ {
|
|||||||
Ary__to_str__nest__ary(bfr, 0, true, ary);
|
Ary__to_str__nest__ary(bfr, 0, true, ary);
|
||||||
return bfr.To_str_and_clear();
|
return bfr.To_str_and_clear();
|
||||||
}
|
}
|
||||||
|
public static Object[] Ary__to_objary__val(Keyval[] ary) {
|
||||||
|
int ary_len = ary.length;
|
||||||
|
Object[] rv = new Object[ary_len];
|
||||||
|
for (int i = 0; i < ary_len; i++) {
|
||||||
|
rv[i] = ary[i].Val();
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
private static void Ary__to_str__nest__ary(Bry_bfr bfr, int indent, boolean is_kv, Object[] ary) {
|
private static void Ary__to_str__nest__ary(Bry_bfr bfr, int indent, boolean is_kv, Object[] ary) {
|
||||||
int len = ary.length;
|
int len = ary.length;
|
||||||
for (int i = 0; i < len; ++i) {
|
for (int i = 0; i < len; ++i) {
|
||||||
|
@ -17,6 +17,7 @@ package gplx.core.tests; import gplx.*; import gplx.core.*;
|
|||||||
import gplx.core.brys.*;
|
import gplx.core.brys.*;
|
||||||
public class Gftest {
|
public class Gftest {
|
||||||
private static final Bry_bfr bfr = Bry_bfr_.New();
|
private static final Bry_bfr bfr = Bry_bfr_.New();
|
||||||
|
public static void Eq__ary(Object[] expd, Object[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__obj, expd, actl, msg_fmt, msg_args);}
|
||||||
public static void Eq__ary(boolean[] expd, boolean[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__bool, expd, actl, msg_fmt, msg_args);}
|
public static void Eq__ary(boolean[] expd, boolean[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__bool, expd, actl, msg_fmt, msg_args);}
|
||||||
public static void Eq__ary(int[] expd, int[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__int, expd, actl, msg_fmt, msg_args);}
|
public static void Eq__ary(int[] expd, int[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__int, expd, actl, msg_fmt, msg_args);}
|
||||||
public static void Eq__ary(long[] expd, long[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__long, expd, actl, msg_fmt, msg_args);}
|
public static void Eq__ary(long[] expd, long[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__long, expd, actl, msg_fmt, msg_args);}
|
||||||
@ -151,12 +152,14 @@ public class Gftest {
|
|||||||
}
|
}
|
||||||
private static void Write__itm(Bry_bfr bfr, int type_id, Object ary, int len, int idx) {
|
private static void Write__itm(Bry_bfr bfr, int type_id, Object ary, int len, int idx) {
|
||||||
if (idx < len) {
|
if (idx < len) {
|
||||||
|
Object val = Array_.Get_at(ary, idx);
|
||||||
switch (type_id) {
|
switch (type_id) {
|
||||||
case Type_ids_.Id__bool: bfr.Add_yn(Bool_.Cast(Array_.Get_at(ary, idx))); break;
|
case Type_ids_.Id__bool: bfr.Add_yn(Bool_.Cast(val)); break;
|
||||||
case Type_ids_.Id__bry: bfr.Add_safe((byte[])Array_.Get_at(ary, idx)); break;
|
case Type_ids_.Id__bry: bfr.Add_safe((byte[])val); break;
|
||||||
case Type_ids_.Id__long: bfr.Add_long_variable(Long_.cast(Array_.Get_at(ary, idx))); break;
|
case Type_ids_.Id__long: bfr.Add_long_variable(Long_.cast(val)); break;
|
||||||
case Type_ids_.Id__int: bfr.Add_int_variable(Int_.Cast(Array_.Get_at(ary, idx))); break;
|
case Type_ids_.Id__int: bfr.Add_int_variable(Int_.Cast(val)); break;
|
||||||
case Type_ids_.Id__byte: bfr.Add_int_variable((int)(Byte_.Cast(Array_.Get_at(ary, idx)))); break;
|
case Type_ids_.Id__byte: bfr.Add_int_variable((int)(Byte_.Cast(val))); break;
|
||||||
|
case Type_ids_.Id__obj: bfr.Add_str_u8(Object_.Xto_str_strict_or_null_mark(val)); break;
|
||||||
default: throw Err_.new_unhandled_default(type_id);
|
default: throw Err_.new_unhandled_default(type_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -182,6 +185,7 @@ public class Gftest {
|
|||||||
case Type_ids_.Id__long: eq = Long_.cast(expd_obj) == Long_.cast(actl_obj); break;
|
case Type_ids_.Id__long: eq = Long_.cast(expd_obj) == Long_.cast(actl_obj); break;
|
||||||
case Type_ids_.Id__int: eq = Int_.Cast(expd_obj) == Int_.Cast(actl_obj); break;
|
case Type_ids_.Id__int: eq = Int_.Cast(expd_obj) == Int_.Cast(actl_obj); break;
|
||||||
case Type_ids_.Id__byte: eq = Byte_.Cast(expd_obj) == Byte_.Cast(actl_obj); break;
|
case Type_ids_.Id__byte: eq = Byte_.Cast(expd_obj) == Byte_.Cast(actl_obj); break;
|
||||||
|
case Type_ids_.Id__obj: eq = Object_.Eq(expd_obj, actl_obj); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!eq) {
|
if (!eq) {
|
||||||
|
@ -14,7 +14,7 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
*/
|
*/
|
||||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||||
import gplx.langs.regxs.*; import gplx.core.intls.*;
|
import gplx.core.intls.*; import gplx.langs.regxs.*;
|
||||||
import gplx.xowa.parsers.*;
|
import gplx.xowa.parsers.*;
|
||||||
import gplx.xowa.xtns.scribunto.procs.*;
|
import gplx.xowa.xtns.scribunto.procs.*;
|
||||||
public class Scrib_lib_ustring implements Scrib_lib {
|
public class Scrib_lib_ustring implements Scrib_lib {
|
||||||
@ -118,30 +118,9 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
|||||||
AddCapturesFromMatch(tmp_list, regx_rslts[0], text, regx_converter.Capt_ary(), true);
|
AddCapturesFromMatch(tmp_list, regx_rslts[0], text, regx_converter.Capt_ary(), true);
|
||||||
return rslt.Init_many_list(tmp_list);
|
return rslt.Init_many_list(tmp_list);
|
||||||
}
|
}
|
||||||
private Scrib_lib_ustring_gsub_mgr[] gsub_mgr_ary = Scrib_lib_ustring_gsub_mgr.Ary_empty;
|
|
||||||
private int gsub_mgr_max = 0, gsub_mgr_len = -1;
|
|
||||||
private final Object gsub_mgr_lock = new Object();
|
|
||||||
public boolean Gsub(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
public boolean Gsub(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||||
boolean rv = false;
|
Scrib_lib_ustring_gsub_mgr gsub_mgr = new Scrib_lib_ustring_gsub_mgr(core, new Scrib_regx_converter());
|
||||||
synchronized (gsub_mgr_lock) { // handle recursive gsub calls; PAGE:en.d:כלב; DATE:2016-01-22
|
return gsub_mgr.Exec(args, rslt);
|
||||||
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
|
||||||
int new_len = gsub_mgr_len + 1;
|
|
||||||
if (new_len == gsub_mgr_max) {
|
|
||||||
this.gsub_mgr_max = new_len == 0 ? 2 : new_len * 2;
|
|
||||||
Scrib_lib_ustring_gsub_mgr[] new_gsub_mgr_ary = new Scrib_lib_ustring_gsub_mgr[gsub_mgr_max];
|
|
||||||
Array_.Copy(gsub_mgr_ary, new_gsub_mgr_ary);
|
|
||||||
gsub_mgr_ary = new_gsub_mgr_ary;
|
|
||||||
}
|
|
||||||
Scrib_lib_ustring_gsub_mgr cur = gsub_mgr_ary[new_len];
|
|
||||||
if (cur == null) {
|
|
||||||
cur = new Scrib_lib_ustring_gsub_mgr(core, regx_converter);
|
|
||||||
gsub_mgr_ary[new_len] = cur;
|
|
||||||
}
|
|
||||||
this.gsub_mgr_len = new_len;
|
|
||||||
rv = cur.Exec(args, rslt);
|
|
||||||
--gsub_mgr_len;
|
|
||||||
}
|
|
||||||
return rv;
|
|
||||||
}
|
}
|
||||||
public boolean Gmatch_init(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
public boolean Gmatch_init(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||||
// String text = Scrib_kv_utl_.Val_to_str(values, 0);
|
// String text = Scrib_kv_utl_.Val_to_str(values, 0);
|
||||||
@ -195,189 +174,3 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
|||||||
private static final int Base1 = 1
|
private static final int Base1 = 1
|
||||||
, End_adj = 1; // lua / php uses "end" as <= not <; EX: "abc" and bgn=0, end= 1; for XOWA, this is "a"; for MW / PHP it is "ab"
|
, End_adj = 1; // lua / php uses "end" as <= not <; EX: "abc" and bgn=0, end= 1; for XOWA, this is "a"; for MW / PHP it is "ab"
|
||||||
}
|
}
|
||||||
class Scrib_lib_ustring_gsub_mgr {
|
|
||||||
private Scrib_regx_converter regx_converter;
|
|
||||||
public Scrib_lib_ustring_gsub_mgr(Scrib_core core, Scrib_regx_converter regx_converter) {this.core = core; this.regx_converter = regx_converter;} private Scrib_core core;
|
|
||||||
private byte tmp_repl_tid = Repl_tid_null; private byte[] tmp_repl_bry = null;
|
|
||||||
private Hash_adp repl_hash = null; private Scrib_lua_proc repl_func = null;
|
|
||||||
private int repl_count = 0;
|
|
||||||
public boolean Exec(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
|
||||||
Object text_obj = args.Cast_obj_or_null(0);
|
|
||||||
String text = String_.as_(text_obj);
|
|
||||||
if (text == null) text = Object_.Xto_str_strict_or_empty(text_obj);
|
|
||||||
String regx = args.Xstr_str_or_null(1); // NOTE: @pattern sometimes int; PAGE:en.d:λύω; DATE:2014-09-02
|
|
||||||
if (args.Len() == 2) return rslt.Init_obj(text); // if no replace arg, return self; PAGE:en.d:'orse; DATE:2013-10-13
|
|
||||||
Object repl_obj = args.Cast_obj_or_null(2);
|
|
||||||
regx = regx_converter.patternToRegex(Bry_.new_u8(regx), Scrib_regx_converter.Anchor_pow);
|
|
||||||
int limit = args.Cast_int_or(3, -1);
|
|
||||||
repl_count = 0;
|
|
||||||
Identify_repl(repl_obj);
|
|
||||||
String repl = Exec_repl(tmp_repl_tid, tmp_repl_bry, text, regx, limit);
|
|
||||||
return rslt.Init_many_objs(repl, repl_count);
|
|
||||||
}
|
|
||||||
private void Identify_repl(Object repl_obj) {
|
|
||||||
Class<?> repl_type = repl_obj.getClass();
|
|
||||||
if (Object_.Eq(repl_type, String_.Cls_ref_type)) {
|
|
||||||
tmp_repl_tid = Repl_tid_string;
|
|
||||||
tmp_repl_bry = Bry_.new_u8((String)repl_obj);
|
|
||||||
}
|
|
||||||
else if (Object_.Eq(repl_type, Int_.Cls_ref_type)) { // NOTE:@replace sometimes int; PAGE:en.d:λύω; DATE:2014-09-02
|
|
||||||
tmp_repl_tid = Repl_tid_string;
|
|
||||||
tmp_repl_bry = Bry_.new_u8(Int_.To_str(Int_.Cast(repl_obj)));
|
|
||||||
}
|
|
||||||
else if (Object_.Eq(repl_type, Keyval[].class)) {
|
|
||||||
tmp_repl_tid = Repl_tid_table;
|
|
||||||
Keyval[] repl_tbl = (Keyval[])repl_obj;
|
|
||||||
if (repl_hash == null)
|
|
||||||
repl_hash = Hash_adp_.New();
|
|
||||||
else
|
|
||||||
repl_hash.Clear();
|
|
||||||
int repl_tbl_len = repl_tbl.length;
|
|
||||||
for (int i = 0; i < repl_tbl_len; i++) {
|
|
||||||
Keyval repl_itm = repl_tbl[i];
|
|
||||||
String repl_itm_val = repl_itm.Val_to_str_or_empty();
|
|
||||||
repl_hash.Add(repl_itm.Key(), Bry_.new_u8(repl_itm_val));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (Object_.Eq(repl_type, Scrib_lua_proc.class)) {
|
|
||||||
tmp_repl_tid = Repl_tid_luacbk;
|
|
||||||
repl_func = (Scrib_lua_proc)repl_obj;
|
|
||||||
}
|
|
||||||
else if (Object_.Eq(repl_type, Double_.Cls_ref_type)) { // NOTE:@replace sometimes double; PAGE:de.v:Wikivoyage:Wikidata/Test_Modul:Wikidata2; DATE:2016-04-21
|
|
||||||
tmp_repl_tid = Repl_tid_string;
|
|
||||||
tmp_repl_bry = Bry_.new_u8(Double_.To_str(Double_.cast(repl_obj)));
|
|
||||||
}
|
|
||||||
else throw Err_.new_unhandled(Type_.Name(repl_type));
|
|
||||||
}
|
|
||||||
private String Exec_repl(byte repl_tid, byte[] repl_bry, String text, String regx, int limit) {
|
|
||||||
Regx_adp regx_mgr = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx);
|
|
||||||
Regx_match[] rslts = regx_mgr.Match_all(text, 0);
|
|
||||||
if ( rslts.length == 0 // PHP: If matches are found, the new subject will be returned, otherwise subject will be returned unchanged.; http://php.net/manual/en/function.preg-replace-callback.php
|
|
||||||
|| regx_mgr.Pattern_is_invalid() // NOTE: invalid patterns should return self; EX:[^]; DATE:2014-09-02
|
|
||||||
) return text;
|
|
||||||
rslts = regx_converter.Adjust_balanced(rslts);
|
|
||||||
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
|
||||||
int len = rslts.length;
|
|
||||||
int pos = 0;
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
if (limit > -1 && repl_count == limit) break;
|
|
||||||
Regx_match rslt = rslts[i];
|
|
||||||
tmp_bfr.Add_str_u8(String_.Mid(text, pos, rslt.Find_bgn())); // NOTE: regx returns char pos (not bry); must add as String, not bry; DATE:2013-07-17
|
|
||||||
if (!Exec_repl_itm(tmp_bfr, repl_tid, repl_bry, text, rslt)) { // will be false when gsub_proc returns nothing; PAGE:en.d:tracer PAGE:en.d:שלום DATE:2017-04-22;
|
|
||||||
tmp_bfr.Add_str_u8(String_.Mid(text, rslt.Find_bgn(), rslt.Find_end()));
|
|
||||||
}
|
|
||||||
pos = rslt.Find_end();
|
|
||||||
++repl_count;
|
|
||||||
}
|
|
||||||
int text_len = String_.Len(text);
|
|
||||||
if (pos < text_len)
|
|
||||||
tmp_bfr.Add_str_u8(String_.Mid(text, pos, text_len)); // NOTE: regx returns char pos (not bry); must add as String, not bry; DATE:2013-07-17
|
|
||||||
return tmp_bfr.To_str_and_clear();
|
|
||||||
}
|
|
||||||
private boolean Exec_repl_itm(Bry_bfr tmp_bfr, byte repl_tid, byte[] repl_bry, String text, Regx_match match) {
|
|
||||||
switch (repl_tid) {
|
|
||||||
case Repl_tid_string:
|
|
||||||
int len = repl_bry.length;
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
byte b = repl_bry[i];
|
|
||||||
switch (b) {
|
|
||||||
case Byte_ascii.Percent: {
|
|
||||||
++i;
|
|
||||||
if (i == len) // % at end of stream; just add %;
|
|
||||||
tmp_bfr.Add_byte(b);
|
|
||||||
else {
|
|
||||||
b = repl_bry[i];
|
|
||||||
switch (b) {
|
|
||||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
|
||||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
|
||||||
int idx = b - Byte_ascii.Num_0;
|
|
||||||
if (idx == 0) // NOTE: 0 means take result; REF.MW:if ($x === '0'); return $m[0]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
|
||||||
tmp_bfr.Add_str_u8(String_.Mid(text, match.Find_bgn(), match.Find_end()));
|
|
||||||
else { // NOTE: > 0 means get from groups if it exists; REF.MW:elseif (isset($m["m$x"])) return $m["m$x"]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
|
||||||
idx -= List_adp_.Base1;
|
|
||||||
if (idx < match.Groups().length) { // retrieve numbered capture; TODO_OLD: support more than 9 captures
|
|
||||||
Regx_group grp = match.Groups()[idx];
|
|
||||||
tmp_bfr.Add_str_u8(String_.Mid(text, grp.Bgn(), grp.End())); // NOTE: grp.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
tmp_bfr.Add_byte(Byte_ascii.Percent);
|
|
||||||
tmp_bfr.Add_byte(b);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case Byte_ascii.Percent:
|
|
||||||
tmp_bfr.Add_byte(Byte_ascii.Percent);
|
|
||||||
break;
|
|
||||||
default: // not a number; add literal
|
|
||||||
tmp_bfr.Add_byte(Byte_ascii.Percent);
|
|
||||||
tmp_bfr.Add_byte(b);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
tmp_bfr.Add_byte(b);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case Repl_tid_table: {
|
|
||||||
int match_bgn = -1, match_end = -1;
|
|
||||||
Regx_group[] grps = match.Groups();
|
|
||||||
if (grps.length == 0) {
|
|
||||||
match_bgn = match.Find_bgn();
|
|
||||||
match_end = match.Find_end();
|
|
||||||
}
|
|
||||||
else { // group exists, take first one (logic matches Scribunto); PAGE:en.w:Bannered_routes_of_U.S._Route_60; DATE:2014-08-15
|
|
||||||
Regx_group grp = grps[0];
|
|
||||||
match_bgn = grp.Bgn();
|
|
||||||
match_end = grp.End();
|
|
||||||
}
|
|
||||||
String find_str = String_.Mid(text, match_bgn, match_end); // NOTE: rslt.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
|
|
||||||
Object actl_repl_obj = repl_hash.Get_by(find_str);
|
|
||||||
if (actl_repl_obj == null) // match found, but no replacement specified; EX:"abc", "[ab]", "a:A"; "b" in regex but not in tbl; EX:d:DVD; DATE:2014-03-31
|
|
||||||
tmp_bfr.Add_str_u8(find_str);
|
|
||||||
else
|
|
||||||
tmp_bfr.Add((byte[])actl_repl_obj);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Repl_tid_luacbk: {
|
|
||||||
// TOMBSTONE: was causing garbled text on PAGE:en.w:Template:Infobox_kommune DATE:2018-07-02
|
|
||||||
/*
|
|
||||||
String find_str = String_.Mid(text, match.Find_bgn(), match.Find_end());
|
|
||||||
Keyval[] luacbk_args = Scrib_kv_utl_.base1_obj_(find_str);
|
|
||||||
*/
|
|
||||||
Keyval[] luacbk_args = null;
|
|
||||||
Regx_group[] grps = match.Groups();
|
|
||||||
int grps_len = grps.length;
|
|
||||||
if (grps_len == 0) { // no match; use original String
|
|
||||||
String find_str = String_.Mid(text, match.Find_bgn(), match.Find_end());
|
|
||||||
luacbk_args = Scrib_kv_utl_.base1_obj_(find_str);
|
|
||||||
}
|
|
||||||
else { // match; build ary of matches; (see UStringLibrary.php)
|
|
||||||
luacbk_args = new Keyval[grps_len];
|
|
||||||
for (int i = 0; i < grps_len; i++) {
|
|
||||||
Regx_group grp = grps[i];
|
|
||||||
String find_str = String_.Mid(text, grp.Bgn(), grp.End());
|
|
||||||
luacbk_args[i] = Keyval_.int_(i + Scrib_core.Base_1, find_str);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
*/
|
|
||||||
Keyval[] rslts = core.Interpreter().CallFunction(repl_func.Id(), luacbk_args);
|
|
||||||
if (rslts.length == 0) // will be 0 when gsub_proc returns nil; PAGE:en.d:tracer; DATE:2017-04-22
|
|
||||||
return false;
|
|
||||||
else { // ArrayIndex check
|
|
||||||
Object rslt_obj = rslts[0].Val(); // 0th idx has result
|
|
||||||
tmp_bfr.Add_str_u8(Object_.Xto_str_strict_or_empty(rslt_obj)); // NOTE: always convert to String; rslt_obj can be int; PAGE:en.d:seven DATE:2016-04-27
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default: throw Err_.new_unhandled(repl_tid);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
private static final byte Repl_tid_null = 0, Repl_tid_string = 1, Repl_tid_table = 2, Repl_tid_luacbk = 3;
|
|
||||||
public static final Scrib_lib_ustring_gsub_mgr[] Ary_empty = new Scrib_lib_ustring_gsub_mgr[0];
|
|
||||||
}
|
|
||||||
|
@ -14,7 +14,8 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
*/
|
*/
|
||||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||||
import org.junit.*; import gplx.langs.regxs.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
import org.junit.*; import gplx.core.tests.*;
|
||||||
|
import gplx.langs.regxs.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
||||||
public class Scrib_lib_ustring__gsub__tst {
|
public class Scrib_lib_ustring__gsub__tst {
|
||||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
||||||
@Before public void init() {
|
@Before public void init() {
|
||||||
@ -30,6 +31,9 @@ public class Scrib_lib_ustring__gsub__tst {
|
|||||||
// TOMBSTONE: tested with local MW and {{#invoke:Test|test16|a|[^]|b}} -> Lua error: Missing close-bracket for character set beginning at pattern character 1.; DATE:2018-07-02
|
// TOMBSTONE: tested with local MW and {{#invoke:Test|test16|a|[^]|b}} -> Lua error: Missing close-bracket for character set beginning at pattern character 1.; DATE:2018-07-02
|
||||||
// Exec_gsub("a" , "[^]" , 1, "b" , "a;0"); // invalid regx should not fail; should return self; DATE:2013-10-20
|
// Exec_gsub("a" , "[^]" , 1, "b" , "a;0"); // invalid regx should not fail; should return self; DATE:2013-10-20
|
||||||
}
|
}
|
||||||
|
@Test public void Find__int() {// PURPOSE: gsub with integer arg should not fail; DATE:2013-11-06
|
||||||
|
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_gsub, Scrib_kv_utl_.base1_many_(1, "[1]", "2", 1), "2;1"); // NOTE: text is integer (lua / php are type-less)
|
||||||
|
}
|
||||||
@Test public void Replace__none() {// PURPOSE: gsub with no replace argument should not fail; EX:d:'orse; DATE:2013-10-14
|
@Test public void Replace__none() {// PURPOSE: gsub with no replace argument should not fail; EX:d:'orse; DATE:2013-10-14
|
||||||
fxt.Test__proc__objs__flat(lib, Scrib_lib_ustring.Invk_gsub, Object_.Ary("text", "regx") , "text"); // NOTE: repl, limit deliberately omitted
|
fxt.Test__proc__objs__flat(lib, Scrib_lib_ustring.Invk_gsub, Object_.Ary("text", "regx") , "text"); // NOTE: repl, limit deliberately omitted
|
||||||
}
|
}
|
||||||
@ -99,6 +103,20 @@ public class Scrib_lib_ustring__gsub__tst {
|
|||||||
Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\x]")); // \0 matched by any_char
|
Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\x]")); // \0 matched by any_char
|
||||||
Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\X]")); // \0 matched by !any_char
|
Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\X]")); // \0 matched by !any_char
|
||||||
}
|
}
|
||||||
|
@Test public void Luacbk__basic() {
|
||||||
|
String text = "ad2f1e3z";
|
||||||
|
String regx = "([1d])([2e])([3f])";
|
||||||
|
Mock_proc__verify_args proc = new Mock_proc__verify_args(0, new Object[]{"B", "d", "2", "f"}, new Object[]{"Y", "1", "e", "3"});
|
||||||
|
fxt.Init__cbk(proc);
|
||||||
|
Exec_gsub(text, regx, -1, proc.To_scrib_lua_proc(), "aBYz;2");
|
||||||
|
}
|
||||||
|
@Test public void Luacbk__anypos() {
|
||||||
|
String text = "ad2f1e3z";
|
||||||
|
String regx = "()([1d])([2e])([3f])"; // "()" is anypos, which inserts find_pos to results
|
||||||
|
Mock_proc__verify_args proc = new Mock_proc__verify_args(0, new Object[]{"B", 1, "d", "2", "f"}, new Object[]{"Y", 4, "1", "e", "3"});
|
||||||
|
fxt.Init__cbk(proc);
|
||||||
|
Exec_gsub(text, regx, -1, proc.To_scrib_lua_proc(), "aBYz;2");
|
||||||
|
}
|
||||||
private void Exec_gsub(String text, Object regx, int limit, Object repl, String expd) {
|
private void Exec_gsub(String text, Object regx, int limit, Object repl, String expd) {
|
||||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_gsub, Scrib_kv_utl_.base1_many_(text, regx, repl, limit), expd);
|
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_gsub, Scrib_kv_utl_.base1_many_(text, regx, repl, limit), expd);
|
||||||
}
|
}
|
||||||
@ -133,3 +151,17 @@ class Mock_proc__empty extends Mock_proc_fxt { private final String find, rep
|
|||||||
return String_.Eq(text, find) ? Keyval_.Ary(Keyval_.new_("0", repl)) : Keyval_.Ary_empty;
|
return String_.Eq(text, find) ? Keyval_.Ary(Keyval_.new_("0", repl)) : Keyval_.Ary_empty;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
class Mock_proc__verify_args extends Mock_proc_fxt { private final Object[][] expd_ary;
|
||||||
|
private int expd_idx = -1;
|
||||||
|
public Mock_proc__verify_args(int id, Object[]... expd_ary) {super(id, "number");
|
||||||
|
this.expd_ary = expd_ary;
|
||||||
|
}
|
||||||
|
@Override public Keyval[] Exec_by_scrib(Keyval[] args) {
|
||||||
|
Object[] expd_args = expd_ary[++expd_idx];
|
||||||
|
Object rv = expd_args[0];
|
||||||
|
expd_args = (Object[])Array_.Extract_by_pos(expd_args, 1);
|
||||||
|
Object[] actl_args = Keyval_.Ary__to_objary__val(args);
|
||||||
|
Gftest.Eq__ary(expd_args, actl_args, "failed lua_cbk");
|
||||||
|
return Keyval_.Ary(Keyval_.int_(0, rv));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -0,0 +1,235 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||||
|
import gplx.langs.regxs.*;
|
||||||
|
import gplx.xowa.xtns.scribunto.procs.*;
|
||||||
|
class Scrib_lib_ustring_gsub_mgr {
|
||||||
|
private final Scrib_core core;
|
||||||
|
private final Scrib_regx_converter regx_converter;
|
||||||
|
private byte[] repl_bry; private Hash_adp repl_hash; private Scrib_lua_proc repl_func;
|
||||||
|
private int repl_count = 0;
|
||||||
|
public Scrib_lib_ustring_gsub_mgr(Scrib_core core, Scrib_regx_converter regx_converter) {
|
||||||
|
this.core = core;
|
||||||
|
this.regx_converter = regx_converter;
|
||||||
|
}
|
||||||
|
public boolean Exec(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||||
|
// get @text; NOTE: sometimes int; DATE:2013-11-06
|
||||||
|
String text = args.Xstr_str_or_null(0);
|
||||||
|
if (args.Len() == 2) return rslt.Init_obj(text); // if no @replace, return @text; PAGE:en.d:'orse; DATE:2013-10-13
|
||||||
|
|
||||||
|
// get @pattern; NOTE: sometimes int; PAGE:en.d:λύω; DATE:2014-09-02
|
||||||
|
String regx = args.Xstr_str_or_null(1);
|
||||||
|
regx = regx_converter.patternToRegex(Bry_.new_u8(regx), Scrib_regx_converter.Anchor_pow);
|
||||||
|
|
||||||
|
// get @repl
|
||||||
|
Object repl_obj = args.Cast_obj_or_null(2);
|
||||||
|
byte repl_tid = Identify_repl(repl_obj);
|
||||||
|
|
||||||
|
// get @limit; reset repl_count
|
||||||
|
int limit = args.Cast_int_or(3, -1);
|
||||||
|
repl_count = 0;
|
||||||
|
|
||||||
|
// do repl
|
||||||
|
String repl = Exec_repl(repl_tid, text, regx, limit);
|
||||||
|
return rslt.Init_many_objs(repl, repl_count);
|
||||||
|
}
|
||||||
|
private byte Identify_repl(Object repl_obj) {
|
||||||
|
byte repl_tid = Repl_tid_null;
|
||||||
|
// @repl can be String, int, table, func
|
||||||
|
Class<?> repl_type = repl_obj.getClass();
|
||||||
|
if (Object_.Eq(repl_type, String_.Cls_ref_type)) {
|
||||||
|
repl_tid = Repl_tid_string;
|
||||||
|
repl_bry = Bry_.new_u8((String)repl_obj);
|
||||||
|
}
|
||||||
|
else if (Object_.Eq(repl_type, Int_.Cls_ref_type)) { // NOTE:@replace sometimes int; PAGE:en.d:λύω; DATE:2014-09-02
|
||||||
|
repl_tid = Repl_tid_string;
|
||||||
|
repl_bry = Bry_.new_u8(Int_.To_str(Int_.Cast(repl_obj)));
|
||||||
|
}
|
||||||
|
else if (Object_.Eq(repl_type, Keyval[].class)) {
|
||||||
|
repl_tid = Repl_tid_table;
|
||||||
|
repl_hash = Hash_adp_.New();
|
||||||
|
Keyval[] kvs = (Keyval[])repl_obj;
|
||||||
|
int kvs_len = kvs.length;
|
||||||
|
for (int i = 0; i < kvs_len; i++) {
|
||||||
|
Keyval kv = kvs[i];
|
||||||
|
repl_hash.Add(kv.Key(), Bry_.new_u8(kv.Val_to_str_or_empty()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (Object_.Eq(repl_type, Scrib_lua_proc.class)) {
|
||||||
|
repl_tid = Repl_tid_luacbk;
|
||||||
|
repl_func = (Scrib_lua_proc)repl_obj;
|
||||||
|
}
|
||||||
|
else if (Object_.Eq(repl_type, Double_.Cls_ref_type)) { // NOTE:@replace sometimes double; PAGE:de.v:Wikivoyage:Wikidata/Test_Modul:Wikidata2; DATE:2016-04-21
|
||||||
|
repl_tid = Repl_tid_string;
|
||||||
|
repl_bry = Bry_.new_u8(Double_.To_str(Double_.cast(repl_obj)));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
throw Err_.new_unhandled(Type_.Name(repl_type));
|
||||||
|
return repl_tid;
|
||||||
|
}
|
||||||
|
private String Exec_repl(byte repl_tid, String text, String regx, int limit) {
|
||||||
|
// parse regx
|
||||||
|
Regx_adp regx_mgr = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx);
|
||||||
|
if (regx_mgr.Pattern_is_invalid()) return text; // NOTE: invalid patterns should return self; EX:[^]; DATE:2014-09-02)
|
||||||
|
|
||||||
|
// exec regx
|
||||||
|
Regx_match[] rslts = regx_mgr.Match_all(text, 0);
|
||||||
|
if (rslts.length == 0) return text; // PHP: If matches are found, the new subject will be returned, otherwise subject will be returned unchanged.; http://php.net/manual/en/function.preg-replace-callback.php
|
||||||
|
rslts = regx_converter.Adjust_balanced(rslts);
|
||||||
|
|
||||||
|
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||||
|
int rslts_len = rslts.length;
|
||||||
|
int text_pos = 0;
|
||||||
|
for (int i = 0; i < rslts_len; i++) {
|
||||||
|
if (repl_count == limit) break; // stop if repl_count reaches limit; note that limit = -1 by default, unless specified
|
||||||
|
|
||||||
|
// add text up to find.bgn
|
||||||
|
Regx_match rslt = rslts[i];
|
||||||
|
tmp_bfr.Add_str_u8(String_.Mid(text, text_pos, rslt.Find_bgn())); // NOTE: regx returns char text_pos (not bry); must add as String, not bry; DATE:2013-07-17
|
||||||
|
|
||||||
|
// replace result
|
||||||
|
if (!Exec_repl_itm(tmp_bfr, repl_tid, text, rslt)) {
|
||||||
|
// will be false when gsub_proc returns nothing; PAGE:en.d:tracer PAGE:en.d:שלום DATE:2017-04-22;
|
||||||
|
tmp_bfr.Add_str_u8(String_.Mid(text, rslt.Find_bgn(), rslt.Find_end()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// update
|
||||||
|
text_pos = rslt.Find_end();
|
||||||
|
repl_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// add rest of String
|
||||||
|
int text_len = String_.Len(text);
|
||||||
|
if (text_pos < text_len)
|
||||||
|
tmp_bfr.Add_str_u8(String_.Mid(text, text_pos, text_len)); // NOTE: regx returns char text_pos (not bry); must add as String, not bry; DATE:2013-07-17
|
||||||
|
return tmp_bfr.To_str_and_clear();
|
||||||
|
}
|
||||||
|
private boolean Exec_repl_itm(Bry_bfr tmp_bfr, byte repl_tid, String text, Regx_match match) {
|
||||||
|
switch (repl_tid) {
|
||||||
|
case Repl_tid_string:
|
||||||
|
int len = repl_bry.length;
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
byte b = repl_bry[i];
|
||||||
|
switch (b) {
|
||||||
|
case Byte_ascii.Percent: {
|
||||||
|
++i;
|
||||||
|
if (i == len) // % at end of stream; just add %;
|
||||||
|
tmp_bfr.Add_byte(b);
|
||||||
|
else {
|
||||||
|
b = repl_bry[i];
|
||||||
|
switch (b) {
|
||||||
|
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||||
|
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||||
|
int idx = b - Byte_ascii.Num_0;
|
||||||
|
if (idx == 0) // NOTE: 0 means take result; REF.MW:if ($x === '0'); return $m[0]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
||||||
|
tmp_bfr.Add_str_u8(String_.Mid(text, match.Find_bgn(), match.Find_end()));
|
||||||
|
else { // NOTE: > 0 means get from groups if it exists; REF.MW:elseif (isset($m["m$x"])) return $m["m$x"]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
||||||
|
idx -= List_adp_.Base1;
|
||||||
|
if (idx < match.Groups().length) { // retrieve numbered capture; TODO_OLD: support more than 9 captures
|
||||||
|
Regx_group grp = match.Groups()[idx];
|
||||||
|
tmp_bfr.Add_str_u8(String_.Mid(text, grp.Bgn(), grp.End())); // NOTE: grp.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
tmp_bfr.Add_byte(Byte_ascii.Percent);
|
||||||
|
tmp_bfr.Add_byte(b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Byte_ascii.Percent:
|
||||||
|
tmp_bfr.Add_byte(Byte_ascii.Percent);
|
||||||
|
break;
|
||||||
|
default: // not a number; add literal
|
||||||
|
tmp_bfr.Add_byte(Byte_ascii.Percent);
|
||||||
|
tmp_bfr.Add_byte(b);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
tmp_bfr.Add_byte(b);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case Repl_tid_table: {
|
||||||
|
int match_bgn = -1, match_end = -1;
|
||||||
|
Regx_group[] grps = match.Groups();
|
||||||
|
if (grps.length == 0) {
|
||||||
|
match_bgn = match.Find_bgn();
|
||||||
|
match_end = match.Find_end();
|
||||||
|
}
|
||||||
|
else { // group exists, take first one (logic matches Scribunto); PAGE:en.w:Bannered_routes_of_U.S._Route_60; DATE:2014-08-15
|
||||||
|
Regx_group grp = grps[0];
|
||||||
|
match_bgn = grp.Bgn();
|
||||||
|
match_end = grp.End();
|
||||||
|
}
|
||||||
|
String find_str = String_.Mid(text, match_bgn, match_end); // NOTE: rslt.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
|
||||||
|
Object actl_repl_obj = repl_hash.Get_by(find_str);
|
||||||
|
if (actl_repl_obj == null) // match found, but no replacement specified; EX:"abc", "[ab]", "a:A"; "b" in regex but not in tbl; EX:d:DVD; DATE:2014-03-31
|
||||||
|
tmp_bfr.Add_str_u8(find_str);
|
||||||
|
else
|
||||||
|
tmp_bfr.Add((byte[])actl_repl_obj);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Repl_tid_luacbk: {
|
||||||
|
Keyval[] luacbk_args = null;
|
||||||
|
Regx_group[] grps = match.Groups();
|
||||||
|
int grps_len = grps.length;
|
||||||
|
// no grps; pass 1 arg based on @match: EX: ("ace", "[b-d]"); args -> ("c")
|
||||||
|
if (grps_len == 0) {
|
||||||
|
String find_str = String_.Mid(text, match.Find_bgn(), match.Find_end());
|
||||||
|
luacbk_args = Scrib_kv_utl_.base1_obj_(find_str);
|
||||||
|
}
|
||||||
|
// grps exist; pass n args based on grp[n].match; EX: ("acfg", "([b-d])([e-g])"); args -> ("c", "f")
|
||||||
|
else {
|
||||||
|
// memoize any_pos args for loop
|
||||||
|
boolean any_pos = regx_converter.Any_pos();
|
||||||
|
Keyval[] capt_ary = regx_converter.Capt_ary();
|
||||||
|
int capt_ary_len = capt_ary.length;
|
||||||
|
|
||||||
|
// loop grps; for each grp, create corresponding arg in luacbk
|
||||||
|
luacbk_args = new Keyval[grps_len];
|
||||||
|
for (int i = 0; i < grps_len; i++) {
|
||||||
|
Regx_group grp = grps[i];
|
||||||
|
|
||||||
|
// anypos will create @offset arg; everything else creates a @match arg based on grp
|
||||||
|
Object val = any_pos && i < capt_ary_len && Bool_.Cast(capt_ary[i].Val())
|
||||||
|
? (Object)grp.Bgn()
|
||||||
|
: (Object)String_.Mid(text, grp.Bgn(), grp.End());
|
||||||
|
luacbk_args[i] = Keyval_.int_(i + Scrib_core.Base_1, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// do callback
|
||||||
|
Keyval[] rslts = core.Interpreter().CallFunction(repl_func.Id(), luacbk_args);
|
||||||
|
|
||||||
|
// eval result
|
||||||
|
if (rslts.length == 0) // will be 0 when gsub_proc returns nil; PAGE:en.d:tracer; DATE:2017-04-22
|
||||||
|
return false;
|
||||||
|
else { // ArrayIndex check
|
||||||
|
Object rslt_obj = rslts[0].Val(); // 0th idx has result
|
||||||
|
tmp_bfr.Add_str_u8(Object_.Xto_str_strict_or_empty(rslt_obj)); // NOTE: always convert to String; rslt_obj can be int; PAGE:en.d:seven DATE:2016-04-27
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: throw Err_.new_unhandled(repl_tid);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
private static final byte Repl_tid_null = 0, Repl_tid_string = 1, Repl_tid_table = 2, Repl_tid_luacbk = 3;
|
||||||
|
public static final Scrib_lib_ustring_gsub_mgr[] Ary_empty = new Scrib_lib_ustring_gsub_mgr[0];
|
||||||
|
}
|
@ -37,6 +37,8 @@ public class Scrib_regx_converter {
|
|||||||
int len = pat.length;
|
int len = pat.length;
|
||||||
int grps_len = 0;
|
int grps_len = 0;
|
||||||
int bct = 0;
|
int bct = 0;
|
||||||
|
|
||||||
|
// REF.MW: https://github.com/wikimedia/mediawiki-extensions-Scribunto/blob/master/includes/engines/LuaCommon/UstringLibrary.php#L415
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
int i_end = i + 1;
|
int i_end = i + 1;
|
||||||
q_flag = false; // must be reset; REF.MW:UstringLibrary.php|patternToRegex; DATE:2014-02-08
|
q_flag = false; // must be reset; REF.MW:UstringLibrary.php|patternToRegex; DATE:2014-02-08
|
||||||
@ -44,24 +46,28 @@ public class Scrib_regx_converter {
|
|||||||
switch (cur) {
|
switch (cur) {
|
||||||
case Byte_ascii.Pow:
|
case Byte_ascii.Pow:
|
||||||
q_flag = i != 0;
|
q_flag = i != 0;
|
||||||
bfr.Add((anchor == Anchor_null || q_flag) ? Bry_pow_escaped : anchor); // NOTE: must add anchor \G when using offsets; EX:cs.n:Category:1._zárí_2008; DATE:2014-05-07
|
bfr.Add((anchor == Anchor_null || q_flag) ? Bry_pow_escaped : anchor); // NOTE: must add anchor \G when using offsets; EX:cs.n:Category:1._zárí_2008; DATE:2014-05-07
|
||||||
break;
|
break;
|
||||||
case Byte_ascii.Dollar:
|
case Byte_ascii.Dollar:
|
||||||
q_flag = i < len - 1;
|
q_flag = i < len - 1;
|
||||||
bfr.Add(q_flag ? Bry_dollar_escaped : Bry_dollar_literal);
|
bfr.Add(q_flag ? Bry_dollar_escaped : Bry_dollar_literal);
|
||||||
break;
|
break;
|
||||||
case Byte_ascii.Paren_bgn: {
|
case Byte_ascii.Paren_bgn: {
|
||||||
|
// fail if "(EOS"
|
||||||
if (i + 1 >= len)
|
if (i + 1 >= len)
|
||||||
throw Err_.new_wo_type("Unmatched open-paren at pattern character " + Int_.To_str(i_end));
|
throw Err_.new_wo_type("Unmatched open-paren at pattern character " + Int_.To_str(i_end));
|
||||||
int grp_idx = grp_mgr.Capt__len() + 1;
|
int grp_idx = grp_mgr.Capt__len() + 1;
|
||||||
boolean is_empty_capture = pat[i + 1] == Byte_ascii.Paren_end; // current is "()"
|
|
||||||
|
// check for "()"; enables anypos flag
|
||||||
|
boolean is_empty_capture = pat[i + 1] == Byte_ascii.Paren_end;
|
||||||
if (is_empty_capture)
|
if (is_empty_capture)
|
||||||
any_pos = true;
|
any_pos = true;
|
||||||
bfr.Add_byte(Byte_ascii.Paren_bgn); // $re .= "(?<m$n>";
|
|
||||||
grp_mgr.Capt__add__real(grp_idx, is_empty_capture);
|
grp_mgr.Capt__add__real(grp_idx, is_empty_capture);
|
||||||
|
bfr.Add_byte(Byte_ascii.Paren_bgn); // $re .= "(?<m$n>";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Byte_ascii.Paren_end:
|
case Byte_ascii.Paren_end:
|
||||||
|
// fail if ")" without preceding "("
|
||||||
if (grp_mgr.Open__len() <= 0)
|
if (grp_mgr.Open__len() <= 0)
|
||||||
throw Err_.new_wo_type("Unmatched close-paren at pattern character " + Int_.To_str(i_end));
|
throw Err_.new_wo_type("Unmatched close-paren at pattern character " + Int_.To_str(i_end));
|
||||||
grp_mgr.Open__pop();
|
grp_mgr.Open__pop();
|
||||||
|
Loading…
Reference in New Issue
Block a user