Scribunto.Regex: Add only first match for mw.ustring.match, not all of them

pull/620/head
gnosygnu 7 years ago
parent 1d6b3779a0
commit 4a1546b9a2

@ -20,6 +20,7 @@ public class Regx_adp {
@gplx.Internal protected Regx_adp(String regx) {Pattern_(regx);}
public String Pattern() {return pattern;} public Regx_adp Pattern_(String val) {pattern = val; Under_sync(); return this;} private String pattern;
public boolean Pattern_is_invalid() {return pattern_is_invalid;} private boolean pattern_is_invalid = false;
public Exception Pattern_is_invalid_exception() {return pattern_is_invalid_exception;} private Exception pattern_is_invalid_exception = null;
public Regx_match[] Match_all(String text, int bgn) {
int idx = bgn;
List_adp rv = List_adp_.New();
@ -39,10 +40,11 @@ public class Regx_adp {
}
private Pattern under;
public Pattern Under() {return under;}
void Under_sync() {
private void Under_sync() {
try {under = Pattern.compile(pattern, Pattern.DOTALL | Pattern.UNICODE_CHARACTER_CLASS);} // JRE.7:UNICODE_CHARACTER_CLASS; added during %w fix for en.w:A#; DATE:2015-06-10
catch (Exception e) { // NOTE: if invalid, then default to empty pattern (which should return nothing); EX:d:〆る generates [^]; DATE:2013-10-20
pattern_is_invalid = true;
pattern_is_invalid_exception = e;
under = Pattern.compile("", Pattern.DOTALL | Pattern.UNICODE_CHARACTER_CLASS);
}
}

@ -107,11 +107,10 @@ public class Scrib_lib_ustring implements Scrib_lib {
Regx_match[] regx_rslts = regx_adp.Match_all(text, bgn);
int len = regx_rslts.length;
if (len == 0) return rslt.Init_null(); // return null if no matches found; EX:w:Mount_Gambier_(volcano); DATE:2014-04-02; confirmed with en.d:民; DATE:2015-01-30
// TOMBSTONE: add 1st match only; do not add all; PAGE:en.d:действительное_причастиеастоящегоремени DATE:2017-04-23
List_adp tmp_list = List_adp_.New();
for (int i = 0; i < len; i++) {
Regx_match match = regx_rslts[i];
AddCapturesFromMatch(tmp_list, match, text, regx_converter.Capt_ary(), true);
}
AddCapturesFromMatch(tmp_list, regx_rslts[0], text, regx_converter.Capt_ary(), true);
return rslt.Init_many_list(tmp_list);
}
private Scrib_lib_ustring_gsub_mgr[] gsub_mgr_ary = Scrib_lib_ustring_gsub_mgr.Ary_empty;
@ -180,7 +179,9 @@ public class Scrib_lib_ustring implements Scrib_lib {
public static Regx_adp RegxAdp_new_(Xop_ctx ctx, String regx) {
Regx_adp rv = Regx_adp_.new_(regx);
if (rv.Pattern_is_invalid()) {
ctx.App().Usr_dlg().Warn_many("", "", "regx is invalid: regx=~{0} page=~{1}", regx, String_.new_u8(ctx.Page().Ttl().Page_db()));
// try to identify [z-a] errors; PAGE:https://en.wiktionary.org/wiki/Module:scripts/data; DATE:2017-04-23
Exception exc = rv.Pattern_is_invalid_exception();
ctx.App().Usr_dlg().Warn_many("", "", "regx is invalid: regx=~{0} page=~{1} exc={2}", regx, ctx.Page().Ttl().Page_db(), Err_.Message_gplx_log(exc));
}
return rv;
}

@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
import org.junit.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
public class Scrib_lib_ustring__match__tst {
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
@Before public void init() {
fxt.Clear();
lib = fxt.Core().Lib_ustring().Init();
@ -26,11 +26,11 @@ public class Scrib_lib_ustring__match__tst {
Exec_match("abcd" , "x" , 1, String_.Null_mark); // empty
Exec_match("abcd" , "a" , 2, String_.Null_mark); // bgn
Exec_match("abcd" , "b(c)" , 1, "c"); // group
Exec_match(" a b " , "^%s*(.-)%s*$" , 1, "a b;"); // trim; NOTE: changed from "a b" to "a b;"; DATE:2015-01-30
Exec_match(" a b " , "^%s*(.-)%s*$" , 1, "a b"); // trim; NOTE: changed back from "a b;" to "a b"; DATE:2017-04-23; changed from "a b" to "a b;"; DATE:2015-01-30
Exec_match("abcd" , "a" , 0, "a"); // handle 0; note that php/lua is super-1, but some modules pass in 0; ru.w:Module:Infocards; DATE:2013-11-08
Exec_match("abcd" , "." , -1, "d"); // -1
Exec_match("aaa" , "a" , 1, "a"); // should return 1st match not many
Exec_match("aaa" , "(a)" , 1, "a;a;a"); // should return all matches
Exec_match("aaa" , "(a)" , 1, "a"); // should return 1st match only; PAGE:en.d:действительное_причастиеастоящегоремени DATE:2017-04-23
Exec_match("a b" , "%S" , 1, "a"); // %S was returning every match instead of 1st; PAGE:en.w:Bertrand_Russell; DATE:2014-04-02
Exec_match(1 , "a" , 1, String_.Null_mark); // Module can pass raw ints; PAGE:en.w:Budget_of_the_European_Union; DATE:2015-01-22
Exec_match("" , "a?" , 1, ""); // no results with ? should return "" not nil; PAGE:en.d:民; DATE:2015-01-30

Loading…
Cancel
Save