1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Luaj: Make anypos captures base-1, not base-0 [#726]

This commit is contained in:
gnosygnu 2020-05-17 11:25:23 -04:00
parent 654810d56c
commit 1ebc9d3488
4 changed files with 125 additions and 92 deletions

View File

@ -1,6 +1,6 @@
/* /*
XOWA: the XOWA Offline Wiki Application XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3, XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0. or alternatively under the terms of the Apache License Version 2.0.
@ -13,9 +13,27 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; package gplx.xowa.xtns.scribunto.libs;
import org.junit.*; import gplx.core.tests.*;
import gplx.langs.regxs.*; import gplx.xowa.xtns.scribunto.engines.mocks.*; import gplx.Array_;
import gplx.Bool_;
import gplx.Bry_bfr;
import gplx.Bry_bfr_;
import gplx.Err_;
import gplx.Int_;
import gplx.Keyval;
import gplx.Keyval_;
import gplx.Object_;
import gplx.String_;
import gplx.Tfds;
import gplx.core.tests.Gftest;
import gplx.langs.regxs.Regx_adp_;
import gplx.xowa.xtns.scribunto.Scrib_kv_utl_;
import gplx.xowa.xtns.scribunto.Scrib_lib;
import gplx.xowa.xtns.scribunto.engines.mocks.Mock_proc_stub;
import gplx.xowa.xtns.scribunto.engines.mocks.Mock_scrib_fxt;
import org.junit.Before;
import org.junit.Test;
public class Scrib_lib_ustring__gsub__tst { public class Scrib_lib_ustring__gsub__tst {
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib; private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
@Before public void init() { @Before public void init() {
@ -130,7 +148,9 @@ public class Scrib_lib_ustring__gsub__tst {
@Test public void Luacbk__anypos() { @Test public void Luacbk__anypos() {
String text = "ad2f1e3z"; String text = "ad2f1e3z";
String regx = "()([1d])([2e])([3f])"; // "()" is anypos, which inserts find_pos to results String regx = "()([1d])([2e])([3f])"; // "()" is anypos, which inserts find_pos to results
Mock_proc__verify_args proc = new Mock_proc__verify_args(0, new Object[]{"B", 1, "d", "2", "f"}, new Object[]{"Y", 4, "1", "e", "3"}); Mock_proc__verify_args proc = new Mock_proc__verify_args(0
, new Object[]{"B", 2, "d", "2", "f"} // NOTE: changed from 1 to 2 b/c of base-1 issues;ISSUE#:726; DATE:2020-05-17;
, new Object[]{"Y", 4, "1", "e", "3"}); // NOTE: changed from 4 to 5 b/c of base-1 issues;ISSUE#:726; DATE:2020-05-17;
fxt.Init__cbk(proc); fxt.Init__cbk(proc);
Exec_gsub(text, regx, -1, proc.To_scrib_lua_proc(), "aBYz;2"); Exec_gsub(text, regx, -1, proc.To_scrib_lua_proc(), "aBYz;2");
} }

View File

@ -178,7 +178,7 @@ public class Scrib_lib_ustring_gsub_mgr { // THREAD.UNSAFE:LOCAL_VALUES
// anypos will create @offset arg; everything else creates a @match arg based on grp // anypos will create @offset arg; everything else creates a @match arg based on grp
Object val = any_pos && i < capt_ary_len && Bool_.Cast(capt_ary[i].Val()) Object val = any_pos && i < capt_ary_len && Bool_.Cast(capt_ary[i].Val())
? (Object)grp.Bgn() ? (Object)(grp.Bgn() + List_adp_.Base1) // NOTE: must normalize to base-1 b/c lua callbacks expect base-1 arguments, not base-0; ISSUE#:726; DATE:2020-05-17;
: (Object)String_.Mid(src_str, grp.Bgn(), grp.End()); : (Object)String_.Mid(src_str, grp.Bgn(), grp.End());
luacbk_args[i] = Keyval_.int_(i + Scrib_core.Base_1, val); luacbk_args[i] = Keyval_.int_(i + Scrib_core.Base_1, val);
} }

View File

@ -1,6 +1,6 @@
/* /*
XOWA: the XOWA Offline Wiki Application XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com Copyright (C) 2012-2020 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3, XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0. or alternatively under the terms of the Apache License Version 2.0.
@ -13,17 +13,23 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*; package gplx.xowa.xtns.scribunto.libs.patterns;
import gplx.objects.strings.unicodes.*;
import gplx.langs.regxs.*; import gplx.Bry_bfr;
import gplx.objects.strings.unicodes.*; import gplx.Bry_bfr_;
import org.luaj.vm2.lib.StringLib; import gplx.String_;
import org.luaj.vm2.Buffer; import gplx.langs.regxs.Regx_group;
import org.luaj.vm2.LuaValue; import gplx.langs.regxs.Regx_match;
import gplx.objects.strings.unicodes.Ustring;
import gplx.objects.strings.unicodes.Ustring_;
import gplx.xowa.xtns.scribunto.libs.Scrib_lib_ustring_gsub_mgr;
import gplx.xowa.xtns.scribunto.libs.Scrib_regx_converter;
import org.luaj.vm2.lib.Match_state; import org.luaj.vm2.lib.Match_state;
import org.luaj.vm2.lib.Str_find_mgr;
import org.luaj.vm2.lib.Str_find_mgr__xowa; import org.luaj.vm2.lib.Str_find_mgr__xowa;
class Scrib_pattern_matcher__xowa extends Scrib_pattern_matcher { public Scrib_pattern_matcher__xowa(byte[] page_url) {}
class Scrib_pattern_matcher__xowa extends Scrib_pattern_matcher {
public Scrib_pattern_matcher__xowa(byte[] page_url) {}
@Override public Regx_match Match_one(Ustring src_ucs, String pat_str, int bgn_as_codes, boolean replace) { @Override public Regx_match Match_one(Ustring src_ucs, String pat_str, int bgn_as_codes, boolean replace) {
regx_converter.patternToRegex(pat_str, Scrib_regx_converter.Anchor_pow, true); regx_converter.patternToRegex(pat_str, Scrib_regx_converter.Anchor_pow, true);
Str_find_mgr__xowa mgr = new Str_find_mgr__xowa(src_ucs, Ustring_.New_codepoints(pat_str), bgn_as_codes, false, false); Str_find_mgr__xowa mgr = new Str_find_mgr__xowa(src_ucs, Ustring_.New_codepoints(pat_str), bgn_as_codes, false, false);
@ -41,6 +47,7 @@ class Scrib_pattern_matcher__xowa extends Scrib_pattern_matcher { public Scrib
Regx_group[] groups = Make_groups(src_ucs, mgr.Captures_ary()); Regx_group[] groups = Make_groups(src_ucs, mgr.Captures_ary());
return new Regx_match(found, find_bgn, find_end, groups); return new Regx_match(found, find_bgn, find_end, groups);
} }
@Override public String Gsub(Scrib_lib_ustring_gsub_mgr gsub_mgr, Ustring src_ucs, String pat_str, int bgn_as_codes) { @Override public String Gsub(Scrib_lib_ustring_gsub_mgr gsub_mgr, Ustring src_ucs, String pat_str, int bgn_as_codes) {
// get src vars // get src vars
String src_str = src_ucs.Src(); String src_str = src_ucs.Src();
@ -104,6 +111,7 @@ class Scrib_pattern_matcher__xowa extends Scrib_pattern_matcher { public Scrib
tmp_bfr.Add_str_u8(src_ucs.Substring(src_pos, src_len)); tmp_bfr.Add_str_u8(src_ucs.Substring(src_pos, src_len));
return tmp_bfr.To_str_and_clear(); return tmp_bfr.To_str_and_clear();
} }
private Regx_group[] Make_groups(Ustring src_ucs, int[] captures) { private Regx_group[] Make_groups(Ustring src_ucs, int[] captures) {
if (captures == null) { if (captures == null) {
return Regx_group.Ary_empty; return Regx_group.Ary_empty;
@ -114,10 +122,15 @@ class Scrib_pattern_matcher__xowa extends Scrib_pattern_matcher { public Scrib
for (int i = 0; i < captures_len; i += 2) { for (int i = 0; i < captures_len; i += 2) {
int capture_bgn = captures[i]; int capture_bgn = captures[i];
int capture_end = captures[i + 1]; int capture_end = captures[i + 1];
// ISSUE#:726; DATE:2020-05-17;
// NOTE: capture values are base-0 and are added by any pattern captures, including:
// * standard captures EX: `a(bc)d` for `abcd` will have 1, 3
// * empty captures EX: `()bc` for `abcd` will have 1, 2
// Note that empty captures will be normalized to base-1 in Scrib_lib_ustring_gsub_mgr inside the any_pos code
capture_bgn = src_ucs.Map_data_to_char(capture_bgn); capture_bgn = src_ucs.Map_data_to_char(capture_bgn);
capture_end = src_ucs.Map_data_to_char(capture_end); capture_end = src_ucs.Map_data_to_char(capture_end);
groups[i / 2] = new Regx_group(true, capture_bgn, capture_end, String_.Mid(src_ucs.Src(), capture_bgn, capture_end)); groups[i / 2] = new Regx_group(true, capture_bgn, capture_end, String_.Mid(src_ucs.Src(), capture_bgn, capture_end));
} }
return groups; return groups;
} }
} }

Binary file not shown.