mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Scribunto: Do not fail in ustring.find if negative bgn is large [#366]
This commit is contained in:
@@ -18,7 +18,6 @@ import gplx.core.intls.*; import gplx.langs.regxs.*;
|
||||
import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.xtns.scribunto.procs.*;
|
||||
public class Scrib_lib_ustring implements Scrib_lib {
|
||||
private final String_surrogate_utl surrogate_utl = new String_surrogate_utl();
|
||||
public Scrib_lib_ustring(Scrib_core core) {this.core = core;} private Scrib_core core;
|
||||
public Scrib_lua_mod Mod() {return mod;} private Scrib_lua_mod mod;
|
||||
public int String_len_max() {return string_len_max;} public Scrib_lib_ustring String_len_max_(int v) {string_len_max = v; return this;} private int string_len_max = Xoa_page_.Page_len_max;
|
||||
@@ -48,74 +47,92 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
public static final String Invk_find = "find", Invk_match = "match", Invk_gmatch_init = "gmatch_init", Invk_gmatch_callback = "gmatch_callback", Invk_gsub = "gsub";
|
||||
private static final String[] Proc_names = String_.Ary(Invk_find, Invk_match, Invk_gmatch_init, Invk_gmatch_callback, Invk_gsub);
|
||||
public boolean Find(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||
String text_str = args.Xstr_str_or_null(0);
|
||||
String regx = args.Pull_str(1);
|
||||
int bgn_char_idx = args.Cast_int_or(2, 1);
|
||||
boolean plain = args.Cast_bool_or_n(3);
|
||||
synchronized (surrogate_utl) {
|
||||
byte[] text_bry = Bry_.new_u8(text_str); int text_bry_len = text_bry.length;
|
||||
bgn_char_idx = Bgn_adjust(text_str, bgn_char_idx);
|
||||
// get args
|
||||
String text_str = args.Xstr_str_or_null(0);
|
||||
String find_str = args.Pull_str(1);
|
||||
int bgn_as_codes_base1 = args.Cast_int_or(2, 1);
|
||||
boolean plain = args.Cast_bool_or_n(3);
|
||||
|
||||
// regx of "" should return (bgn, bgn - 1) regardless of whether plain is true or false;
|
||||
// NOTE: do not include surrogate calc; PAGE:en.d:佻 DATE:2017-04-24
|
||||
if (String_.Len_eq_0(regx)) // regx of "" should return (bgn, bgn - 1) regardless of whether plain is true or false
|
||||
return rslt.Init_many_objs(bgn_char_idx + Scrib_lib_ustring.Base1, bgn_char_idx + Scrib_lib_ustring.Base1 - 1);
|
||||
// init text vars
|
||||
byte[] text_bry = Bry_.new_u8(text_str);
|
||||
int text_bry_len = text_bry.length;
|
||||
Utf16_mapper text_map = new Utf16_mapper(text_str, text_bry, text_bry_len); // NOTE: must count codes for supplementaries; PAGE:en.d:iglesia DATE:2017-04-23
|
||||
|
||||
// NOTE: adjust for 2-len chars (surrogates); PAGE:en.d:iglesia DATE:2017-04-23
|
||||
int bgn_adj = surrogate_utl.Count_surrogates__char_idx(text_bry, text_bry_len, 0, bgn_char_idx); // NOTE: convert from lua / php charidx to java regex codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27
|
||||
int bgn_codepoint_idx = bgn_char_idx + bgn_adj;
|
||||
int bgn_byte_pos = surrogate_utl.Byte_pos();
|
||||
if (plain) {
|
||||
int pos = String_.FindFwd(text_str, regx, bgn_codepoint_idx);
|
||||
boolean found = pos != Bry_find_.Not_found;
|
||||
return found
|
||||
? rslt.Init_many_objs(pos + Scrib_lib_ustring.Base1, pos + Scrib_lib_ustring.Base1 + String_.Len(regx) - Scrib_lib_ustring.End_adj)
|
||||
: rslt.Init_ary_empty()
|
||||
;
|
||||
}
|
||||
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
||||
regx = regx_converter.patternToRegex(Bry_.new_u8(regx), Scrib_regx_converter.Anchor_G);
|
||||
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx);
|
||||
Regx_match[] regx_rslts = regx_adp.Match_all(text_str, bgn_codepoint_idx); // NOTE: MW calculates an offset to handle mb strings. however, java's regex always takes offset in chars (not bytes like PHP preg_match); DATE:2014-03-04
|
||||
int len = regx_rslts.length;
|
||||
if (len == 0) return rslt.Init_ary_empty();
|
||||
List_adp tmp_list = List_adp_.New();
|
||||
Regx_match match = regx_rslts[0]; // NOTE: take only 1st result; DATE:2014-08-27
|
||||
int match_find_bgn_codepoint = match.Find_bgn(); // NOTE: java regex returns results in codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27
|
||||
int match_find_bgn_adj = -surrogate_utl.Count_surrogates__codepoint_idx1(text_bry, text_bry_len, bgn_byte_pos, match_find_bgn_codepoint - bgn_codepoint_idx); // NOTE: convert from java regex codepoint to lua / php char_idx; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27
|
||||
tmp_list.Add(match_find_bgn_codepoint + match_find_bgn_adj + -bgn_adj + Scrib_lib_ustring.Base1);
|
||||
tmp_list.Add(match.Find_end() + match_find_bgn_adj + -bgn_adj + Scrib_lib_ustring.Base1 - Scrib_lib_ustring.End_adj);
|
||||
//Tfds.Dbg (match_find_bgn_codepoint + match_find_bgn_adj + -bgn_adj + Scrib_lib_ustring.Base1
|
||||
// ,match.Find_end() + match_find_bgn_adj + -bgn_adj + Scrib_lib_ustring.Base1 - Scrib_lib_ustring.End_adj);
|
||||
AddCapturesFromMatch(tmp_list, match, text_str, regx_converter.Capt_ary(), false);
|
||||
return rslt.Init_many_list(tmp_list);
|
||||
// convert bgn from base_1 to base_0
|
||||
int bgn_as_codes = To_java_by_lua(bgn_as_codes_base1, text_map.Len_in_codes());
|
||||
|
||||
/*
|
||||
int offset = 0;
|
||||
if (bgn_as_codes > 0) { // NOTE: MW.BASE
|
||||
// $offset = strlen( mb_substr( $s, 0, $init - 1, 'UTF-8' ) );
|
||||
}
|
||||
}
|
||||
private int Bgn_adjust(String text, int bgn) { // adjust to handle bgn < 0 or bgn > len (which PHP allows)
|
||||
if (bgn > 0) bgn -= Scrib_lib_ustring.Base1;
|
||||
int text_len = String_.Len(text);
|
||||
if (bgn < 0) // negative number means search from rear of String
|
||||
bgn += text_len; // NOTE: PHP has extra + 1 for Base 1
|
||||
else if (bgn > text_len) // bgn > text_len; confine to text_len; NOTE: PHP has extra + 1 for Base 1
|
||||
bgn = text_len; // NOTE: PHP has extra + 1 for Base 1
|
||||
return bgn;
|
||||
else {
|
||||
bgn_as_codes_base1 = 0; // NOTE: MW.BASE1
|
||||
offset = 0; // -1?
|
||||
}
|
||||
*/
|
||||
|
||||
// find_str of "" should return (bgn, bgn - 1) regardless of whether plain is true or false;
|
||||
// NOTE: do not include surrogate calc; PAGE:en.d:佻 DATE:2017-04-24
|
||||
// NOTE: not in MW; is this needed? DATE:2019-02-24
|
||||
if (String_.Len_eq_0(find_str))
|
||||
return rslt.Init_many_objs(bgn_as_codes_base1, bgn_as_codes_base1 - 1);
|
||||
|
||||
// if plain, just do literal match of find and exit
|
||||
if (plain) {
|
||||
// find pos by literal match
|
||||
byte[] find_bry = Bry_.new_u8(find_str);
|
||||
int pos = Bry_find_.Find_fwd(text_bry, find_bry, text_map.Get_byte_for_code_or_fail(bgn_as_codes));
|
||||
|
||||
// nothing found; return empty
|
||||
if (pos == Bry_find_.Not_found)
|
||||
return rslt.Init_ary_empty();
|
||||
|
||||
// bgn: convert pos from bytes back to codes; also adjust for base1
|
||||
int bgn = text_map.Get_code_for_byte_or_fail(pos) + Base1;
|
||||
|
||||
// end: add find.Len_in_codes and adjust end for PHP/LUA
|
||||
Utf16_mapper find_map = new Utf16_mapper(find_str, find_bry, find_bry.length);
|
||||
int end = bgn + find_map.Len_in_codes() - End_adj;
|
||||
|
||||
return rslt.Init_many_objs(bgn, end);
|
||||
}
|
||||
|
||||
// run regex
|
||||
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
||||
Regx_match[] regx_rslts = Run_regex_or_null(text_map, regx_converter, find_str, bgn_as_codes);
|
||||
if (regx_rslts.length == 0) return rslt.Init_ary_empty();
|
||||
|
||||
// add to tmp_list
|
||||
Regx_match match = regx_rslts[0]; // NOTE: take only 1st result; DATE:2014-08-27
|
||||
List_adp tmp_list = List_adp_.New();
|
||||
tmp_list.Add(text_map.Get_code_for_char_or_neg1(match.Find_bgn()) + Scrib_lib_ustring.Base1);
|
||||
tmp_list.Add(text_map.Get_code_for_char_or_neg1(match.Find_end()) + Scrib_lib_ustring.Base1 - Scrib_lib_ustring.End_adj);
|
||||
AddCapturesFromMatch(tmp_list, match, text_str, regx_converter.Capt_ary(), false);
|
||||
return rslt.Init_many_list(tmp_list);
|
||||
}
|
||||
public boolean Match(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||
String text = args.Xstr_str_or_null(0); // Module can pass raw ints; PAGE:en.w:Budget_of_the_European_Union; DATE:2015-01-22
|
||||
if (text == null) return rslt.Init_many_list(List_adp_.Noop); // if no text is passed, do not fail; return empty; EX:d:changed; DATE:2014-02-06
|
||||
// get args
|
||||
String text_str = args.Xstr_str_or_null(0); // Module can pass raw ints; PAGE:en.w:Budget_of_the_European_Union; DATE:2015-01-22
|
||||
String find_str = args.Cast_str_or_null(1);
|
||||
int bgn_as_codes_base1 = args.Cast_int_or(2, 1);
|
||||
|
||||
// validate / adjust
|
||||
if (text_str == null) // if no text_str is passed, do not fail; return empty; EX:d:changed; DATE:2014-02-06
|
||||
return rslt.Init_many_list(List_adp_.Noop);
|
||||
byte[] text_bry = Bry_.new_u8(text_str); int text_bry_len = text_bry.length;
|
||||
Utf16_mapper text_map = new Utf16_mapper(text_str, text_bry, text_bry_len); // NOTE: must count codes for supplementaries; PAGE:en.d:iglesia DATE:2017-04-23
|
||||
int bgn_as_codes = To_java_by_lua(bgn_as_codes_base1, text_map.Len_in_codes());
|
||||
|
||||
// run regex
|
||||
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
||||
String regx = regx_converter.patternToRegex(args.Cast_bry_or_null(1), Scrib_regx_converter.Anchor_G);
|
||||
int bgn = args.Cast_int_or(2, 1);
|
||||
bgn = Bgn_adjust(text, bgn);
|
||||
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx);
|
||||
Regx_match[] regx_rslts = regx_adp.Match_all(text, bgn);
|
||||
int len = regx_rslts.length;
|
||||
if (len == 0) return rslt.Init_null(); // return null if no matches found; EX:w:Mount_Gambier_(volcano); DATE:2014-04-02; confirmed with en.d:民; DATE:2015-01-30
|
||||
Regx_match[] regx_rslts = Run_regex_or_null(text_map, regx_converter, find_str, bgn_as_codes);
|
||||
if (regx_rslts.length == 0) return rslt.Init_null(); // return null if no matches found; EX:w:Mount_Gambier_(volcano); DATE:2014-04-02; confirmed with en.d:民; DATE:2015-01-30
|
||||
|
||||
// TOMBSTONE: add 1st match only; do not add all; PAGE:en.d:действительное_причастие_настоящего_времени DATE:2017-04-23
|
||||
regx_rslts = regx_converter.Adjust_balanced(regx_rslts);
|
||||
List_adp tmp_list = List_adp_.New();
|
||||
AddCapturesFromMatch(tmp_list, regx_rslts[0], text, regx_converter.Capt_ary(), true);
|
||||
AddCapturesFromMatch(tmp_list, regx_rslts[0], text_str, regx_converter.Capt_ary(), true);
|
||||
return rslt.Init_many_list(tmp_list);
|
||||
}
|
||||
public boolean Gsub(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||
@@ -143,6 +160,35 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
AddCapturesFromMatch(tmp_list, match, text, capt, true); // NOTE: was incorrectly set as false; DATE:2014-04-23
|
||||
return rslt.Init_many_objs(match.Find_end(), Scrib_kv_utl_.base1_list_(tmp_list));
|
||||
}
|
||||
private int To_java_by_lua(int bgn_as_codes_base1, int len_in_codes) {
|
||||
// convert bgn from base_1 to base_0
|
||||
int bgn_as_codes = bgn_as_codes_base1;
|
||||
if (bgn_as_codes > 0)
|
||||
bgn_as_codes -= Scrib_lib_ustring.Base1;
|
||||
// TOMBSTONE: do not adjust negative numbers for base1; fails tests
|
||||
// else if (bgn_as_codes < 0) bgn_as_codes += Scrib_lib_ustring.Base1;
|
||||
|
||||
// adjust bgn for negative-numbers and large positive-numbers
|
||||
// NOTE: MW uses mb_strlen which returns len of mb chars as 1; REF.PHP: http://php.net/manual/en/function.mb-strlen.php
|
||||
// NOTE: MW does additional +1 for PHP.base_1. This is not needed for JAVA; noted below as IGNORE_BASE_1_ADJ
|
||||
if (bgn_as_codes < 0) // negative number means search from rear of String
|
||||
bgn_as_codes += len_in_codes; // NOTE:IGNORE_BASE_1_ADJ
|
||||
else if (bgn_as_codes > len_in_codes) // bgn_as_codes > text_len; confine to text_len; NOTE:IGNORE_BASE_1_ADJ
|
||||
bgn_as_codes = len_in_codes; // NOTE:IGNORE_BASE_1_ADJ
|
||||
|
||||
// will be negative if Abs(bgn_as_codes) > text.length; ISSUE#:366; DATE:2019-02-23
|
||||
if (bgn_as_codes < 0)
|
||||
bgn_as_codes = 0;
|
||||
return bgn_as_codes;
|
||||
}
|
||||
private Regx_match[] Run_regex_or_null(Utf16_mapper text_map, Scrib_regx_converter regx_converter, String find_str, int bgn_as_codes) {
|
||||
// convert regex from lua to java
|
||||
find_str = regx_converter.patternToRegex(Bry_.new_u8(find_str), Scrib_regx_converter.Anchor_G);
|
||||
|
||||
// run regex
|
||||
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), find_str);
|
||||
return regx_adp.Match_all(text_map.Src_str(), text_map.Get_char_for_code_or_fail(bgn_as_codes)); // NOTE: MW calculates an offset to handle mb strings. however, java's regex always takes offset in chars (not bytes like PHP preg_match); DATE:2014-03-04
|
||||
}
|
||||
private void AddCapturesFromMatch(List_adp tmp_list, Regx_match rslt, String text, Keyval[] capts, boolean op_is_match) {// NOTE: this matches behavior in UstringLibrary.php!addCapturesFromMatch
|
||||
int capts_len = capts == null ? 0 : capts.length;
|
||||
if (capts_len > 0) { // NOTE: changed from "grps_len > 0"; PAGE:en.w:Portal:Constructed_languages/Intro DATE:2018-07-02
|
||||
@@ -171,6 +217,7 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private static final int Base1 = 1
|
||||
private static final int
|
||||
Base1 = 1
|
||||
, End_adj = 1; // lua / php uses "end" as <= not <; EX: "abc" and bgn=0, end= 1; for XOWA, this is "a"; for MW / PHP it is "ab"
|
||||
}
|
||||
|
||||
@@ -14,39 +14,107 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||
import org.junit.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
||||
import org.junit.*;
|
||||
import gplx.core.consoles.*;
|
||||
import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
||||
public class Scrib_lib_ustring__find__tst {
|
||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
||||
@Before public void init() {
|
||||
private final Scrib_lib_ustring__find__fxt fxt = new Scrib_lib_ustring__find__fxt();
|
||||
@Test public void Plain() {
|
||||
fxt.Test__find("aabaab" , "b" , 2, Bool_.Y, "3;3"); // bytes=1
|
||||
fxt.Test__find("€€b€€b" , "b" , 2, Bool_.Y, "3;3"); // bytes=3
|
||||
fxt.Test__find("𤭢𤭢b𤭢𤭢b" , "b" , 2, Bool_.Y, "3;3"); // bytes=4
|
||||
fxt.Test__find("()()" , "(" , 2, Bool_.Y, "3;3"); // exact match; note that "(" is invalid regx
|
||||
fxt.Test__find("abcd" , "" , 2, Bool_.Y, "2;1"); // empty find should return values; EX:w:Fool's_mate; DATE:2014-03-04
|
||||
fxt.Test__find("a€b" , "€" , 1, Bool_.Y, "2;2"); // find is bytes=3
|
||||
}
|
||||
@Test public void Bgn__negative() {
|
||||
fxt.Test__find("abab" , "b" , -1, Bool_.Y, "4;4"); // search from back of String
|
||||
fxt.Test__find("abab" , "b" , -9, Bool_.Y, "2;2"); // do not throw error if negative index > text.length; ISSUE#:366; DATE:2019-02-23
|
||||
fxt.Test__find("𤭢" , "𤭢" , -1, Bool_.Y, "1;1"); // fails if "" b/c it would have counted -1 as -1 char instead of -1 codepoint
|
||||
}
|
||||
@Test public void Regx__simple() {
|
||||
fxt.Test__find("abcd" , "b" , 1, Bool_.N, "2;2"); // basic
|
||||
fxt.Test__find("abad" , "a" , 2, Bool_.N, "3;3"); // bgn
|
||||
fxt.Test__find("abcd" , "x" , 1, Bool_.N, ""); // no-match
|
||||
fxt.Test__find("abcd" , "" , 2, Bool_.N, "2;1"); // empty regx should return values; regx; EX:w:Fool's_mate; DATE:2014-03-04
|
||||
}
|
||||
@Test public void Regx__int() { // PURPOSE: allow int find; PAGE:ro.w:Innsbruck DATE:2015-09-12
|
||||
fxt.Test__find(123 , "2" , 1, Bool_.N, "2;2");
|
||||
}
|
||||
@Test public void Regx__groups() {
|
||||
fxt.Test__find("a bcd e" , "(b(c)d)" , 2, Bool_.N, "3;5;bcd;c"); // groups
|
||||
fxt.Test__find("a bcd e" , "()(b)" , 2, Bool_.N, "3;3;3;b"); // groups; empty capture
|
||||
}
|
||||
@Test public void Regx__caret() {
|
||||
fxt.Test__find("abcd" , "^(c)" , 3, Bool_.N, "3;3;c"); // ^ should be converted to \G; regx; EX:cs.n:Category:1._září_2008; DATE:2014-05-07
|
||||
}
|
||||
@Test public void Regx__return_is_int() {
|
||||
fxt.Test__find("a" , "()" , 2, Bool_.N, "2;1;2");
|
||||
}
|
||||
@Test public void Surrogate__find__value() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
fxt.Test__find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
fxt.Test__find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
}
|
||||
@Test public void Surrogate__find__empty() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
fxt.Test__find("aé𡼾\nbî𡼾\n" , "" , 1, Bool_.N, "1;0"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
fxt.Test__find("aé𡼾\nbî𡼾\n" , "" , 5, Bool_.N, "5;4"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
}
|
||||
}
|
||||
class Scrib_lib_ustring__find__fxt {
|
||||
private boolean dbg = false;
|
||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt();
|
||||
private Scrib_lib lib;
|
||||
public Scrib_lib_ustring__find__fxt() {
|
||||
fxt.Clear();
|
||||
lib = fxt.Core().Lib_ustring().Init();
|
||||
}
|
||||
@Test public void Basic() {
|
||||
Exec_find("abcd" , "b" , 1, Bool_.N, "2;2"); // basic
|
||||
Exec_find("abac" , "a" , 2, Bool_.N, "3;3"); // bgn
|
||||
Exec_find("()()" , "(" , 2, Bool_.Y, "3;3"); // plain; note that ( would "break" regx
|
||||
Exec_find("a bcd e" , "(b(c)d)" , 2, Bool_.N, "3;5;bcd;c"); // groups
|
||||
Exec_find("a bcd e" , "()(b)" , 2, Bool_.N, "3;3;3;b"); // groups; empty capture
|
||||
Exec_find("abcd" , "x" , 1, Bool_.N, ""); // empty
|
||||
Exec_find("abcd" , "" , 2, Bool_.Y, "2;1"); // empty regx should return values; plain; EX:w:Fool's_mate; DATE:2014-03-04
|
||||
Exec_find("abcd" , "" , 2, Bool_.N, "2;1"); // empty regx should return values; regx; EX:w:Fool's_mate; DATE:2014-03-04
|
||||
Exec_find("abcd" , "^(c)" , 3, Bool_.N, "3;3;c"); // ^ should be converted to \G; regx; EX:cs.n:Category:1._září_2008; DATE:2014-05-07
|
||||
}
|
||||
@Test public void Arg_int() { // PURPOSE: allow int find; PAGE:ro.w:Innsbruck DATE:2015-09-12
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(123, "2", 1, Bool_.N), "2;2");
|
||||
}
|
||||
@Test public void Return_int() {
|
||||
fxt.Test__proc__kvps__vals(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_("a", "()", 2, Bool_.N), 2, 1, 2);
|
||||
}
|
||||
@Test public void Surrogate__find__value() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
}
|
||||
@Test public void Surrogate__find__empty() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "" , 1, Bool_.N, "1;0"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
// Exec_find("aé𡼾\nbî𡼾\n" , "" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
}
|
||||
private void Exec_find(String text, String regx, int bgn, boolean plain, String expd) {
|
||||
public Scrib_lib_ustring__find__fxt Dbg_y_() {dbg = Bool_.Y; return this;}
|
||||
public Scrib_lib_ustring__find__fxt Dbg_n_() {dbg = Bool_.N; return this;}
|
||||
public void Test__find(String text, String regx, int bgn, boolean plain, String expd) {
|
||||
if (dbg) Console_adp__sys.Instance.Write_str(Bld_test_string(text, regx, bgn, plain, expd));
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(text, regx, bgn, plain), expd);
|
||||
}
|
||||
public void Test__find(int text, String regx, int bgn, boolean plain, String expd) {
|
||||
if (dbg) Console_adp__sys.Instance.Write_str(Bld_test_string(text, regx, bgn, plain, expd));
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(text, regx, bgn, plain), expd);
|
||||
}
|
||||
private String Bld_test_string(Object text, String regx, int bgn, boolean plain, String expd) {
|
||||
/*
|
||||
{| class=wikitable
|
||||
! rslt !! expd !! actl !! code
|
||||
|}
|
||||
*/
|
||||
String invk = "{{" + String_.Format("#invoke:Sandbox/Gnosygnu|ustring_find|{0}|{1}|{2}|{3}", Object_.Xto_str_strict_or_empty(text), regx, bgn, plain ? Bool_.True_str : Bool_.False_str) + "}}";
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
bfr.Add_str_a7("|-\n");
|
||||
bfr.Add_str_u8("| {{#ifeq:" + invk + "|" + expd + "|<span style='color:green'>pass</span>|<span style='color:red'>fail</span>}}\n");
|
||||
bfr.Add_str_u8("| " + expd + "\n");
|
||||
bfr.Add_str_u8("| " + invk + "\n");
|
||||
bfr.Add_str_u8("| <nowiki>" + invk + "</nowiki>\n");
|
||||
return bfr.To_str();
|
||||
}
|
||||
}
|
||||
/*
|
||||
TEST:
|
||||
* URL: https://en.wikipedia.org/wiki/Project:Sandbox
|
||||
* CODE:
|
||||
{{#invoke:Sandbox/Gnosygnu|ustring_find|abab|b|3|true}}
|
||||
|
||||
MODULE:
|
||||
* URL: https://en.wikipedia.org/wiki/Module:Sandbox/Gnosygnu
|
||||
* CODE:
|
||||
function p.ustring_find(frame)
|
||||
local args = frame.args;
|
||||
local rslt = {mw.ustring.find(args[1], args[2], tonumber(args[3]), args[4] == 'true')};
|
||||
|
||||
local rv = '';
|
||||
local rslt_len = #rslt;
|
||||
for i=1,rslt_len do
|
||||
if i ~= 1 then
|
||||
rv = rv .. ';'
|
||||
end
|
||||
rv = rv .. rslt[i]
|
||||
end
|
||||
return rv;
|
||||
end
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user