1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-09-28 14:30:51 +00:00

Luaj: Make anypos captures base-1, not base-0 [#726]

This commit is contained in:
gnosygnu 2020-05-17 11:25:23 -04:00
parent 654810d56c
commit 1ebc9d3488
4 changed files with 125 additions and 92 deletions

View File

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,9 +13,27 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
import org.junit.*; import gplx.core.tests.*;
import gplx.langs.regxs.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
package gplx.xowa.xtns.scribunto.libs;
import gplx.Array_;
import gplx.Bool_;
import gplx.Bry_bfr;
import gplx.Bry_bfr_;
import gplx.Err_;
import gplx.Int_;
import gplx.Keyval;
import gplx.Keyval_;
import gplx.Object_;
import gplx.String_;
import gplx.Tfds;
import gplx.core.tests.Gftest;
import gplx.langs.regxs.Regx_adp_;
import gplx.xowa.xtns.scribunto.Scrib_kv_utl_;
import gplx.xowa.xtns.scribunto.Scrib_lib;
import gplx.xowa.xtns.scribunto.engines.mocks.Mock_proc_stub;
import gplx.xowa.xtns.scribunto.engines.mocks.Mock_scrib_fxt;
import org.junit.Before;
import org.junit.Test;
public class Scrib_lib_ustring__gsub__tst {
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
@Before public void init() {
@ -130,7 +148,9 @@ public class Scrib_lib_ustring__gsub__tst {
@Test public void Luacbk__anypos() {
String text = "ad2f1e3z";
String regx = "()([1d])([2e])([3f])"; // "()" is anypos, which inserts find_pos to results
Mock_proc__verify_args proc = new Mock_proc__verify_args(0, new Object[]{"B", 1, "d", "2", "f"}, new Object[]{"Y", 4, "1", "e", "3"});
Mock_proc__verify_args proc = new Mock_proc__verify_args(0
, new Object[]{"B", 2, "d", "2", "f"} // NOTE: changed from 1 to 2 b/c of base-1 issues;ISSUE#:726; DATE:2020-05-17;
, new Object[]{"Y", 4, "1", "e", "3"}); // NOTE: changed from 4 to 5 b/c of base-1 issues;ISSUE#:726; DATE:2020-05-17;
fxt.Init__cbk(proc);
Exec_gsub(text, regx, -1, proc.To_scrib_lua_proc(), "aBYz;2");
}

View File

@ -178,7 +178,7 @@ public class Scrib_lib_ustring_gsub_mgr { // THREAD.UNSAFE:LOCAL_VALUES
// anypos will create @offset arg; everything else creates a @match arg based on grp
Object val = any_pos && i < capt_ary_len && Bool_.Cast(capt_ary[i].Val())
? (Object)grp.Bgn()
? (Object)(grp.Bgn() + List_adp_.Base1) // NOTE: must normalize to base-1 b/c lua callbacks expect base-1 arguments, not base-0; ISSUE#:726; DATE:2020-05-17;
: (Object)String_.Mid(src_str, grp.Bgn(), grp.End());
luacbk_args[i] = Keyval_.int_(i + Scrib_core.Base_1, val);
}

View File

@ -1,6 +1,6 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
@ -13,17 +13,23 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
import gplx.objects.strings.unicodes.*;
import gplx.langs.regxs.*;
import gplx.objects.strings.unicodes.*;
import org.luaj.vm2.lib.StringLib;
import org.luaj.vm2.Buffer;
import org.luaj.vm2.LuaValue;
package gplx.xowa.xtns.scribunto.libs.patterns;
import gplx.Bry_bfr;
import gplx.Bry_bfr_;
import gplx.String_;
import gplx.langs.regxs.Regx_group;
import gplx.langs.regxs.Regx_match;
import gplx.objects.strings.unicodes.Ustring;
import gplx.objects.strings.unicodes.Ustring_;
import gplx.xowa.xtns.scribunto.libs.Scrib_lib_ustring_gsub_mgr;
import gplx.xowa.xtns.scribunto.libs.Scrib_regx_converter;
import org.luaj.vm2.lib.Match_state;
import org.luaj.vm2.lib.Str_find_mgr;
import org.luaj.vm2.lib.Str_find_mgr__xowa;
class Scrib_pattern_matcher__xowa extends Scrib_pattern_matcher { public Scrib_pattern_matcher__xowa(byte[] page_url) {}
class Scrib_pattern_matcher__xowa extends Scrib_pattern_matcher {
public Scrib_pattern_matcher__xowa(byte[] page_url) {}
@Override public Regx_match Match_one(Ustring src_ucs, String pat_str, int bgn_as_codes, boolean replace) {
regx_converter.patternToRegex(pat_str, Scrib_regx_converter.Anchor_pow, true);
Str_find_mgr__xowa mgr = new Str_find_mgr__xowa(src_ucs, Ustring_.New_codepoints(pat_str), bgn_as_codes, false, false);
@ -41,6 +47,7 @@ class Scrib_pattern_matcher__xowa extends Scrib_pattern_matcher { public Scrib
Regx_group[] groups = Make_groups(src_ucs, mgr.Captures_ary());
return new Regx_match(found, find_bgn, find_end, groups);
}
@Override public String Gsub(Scrib_lib_ustring_gsub_mgr gsub_mgr, Ustring src_ucs, String pat_str, int bgn_as_codes) {
// get src vars
String src_str = src_ucs.Src();
@ -104,6 +111,7 @@ class Scrib_pattern_matcher__xowa extends Scrib_pattern_matcher { public Scrib
tmp_bfr.Add_str_u8(src_ucs.Substring(src_pos, src_len));
return tmp_bfr.To_str_and_clear();
}
private Regx_group[] Make_groups(Ustring src_ucs, int[] captures) {
if (captures == null) {
return Regx_group.Ary_empty;
@ -114,6 +122,11 @@ class Scrib_pattern_matcher__xowa extends Scrib_pattern_matcher { public Scrib
for (int i = 0; i < captures_len; i += 2) {
int capture_bgn = captures[i];
int capture_end = captures[i + 1];
// ISSUE#:726; DATE:2020-05-17;
// NOTE: capture values are base-0 and are added by any pattern captures, including:
// * standard captures EX: `a(bc)d` for `abcd` will have 1, 3
// * empty captures EX: `()bc` for `abcd` will have 1, 2
// Note that empty captures will be normalized to base-1 in Scrib_lib_ustring_gsub_mgr inside the any_pos code
capture_bgn = src_ucs.Map_data_to_char(capture_bgn);
capture_end = src_ucs.Map_data_to_char(capture_end);
groups[i / 2] = new Regx_group(true, capture_bgn, capture_end, String_.Mid(src_ucs.Src(), capture_bgn, capture_end));

Binary file not shown.