mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Scribunto: Add initial support for LuaJ StringLib as replacement for Regex [#413]
This commit is contained in:
parent
2fc03f6211
commit
31c7604f03
@ -24,6 +24,7 @@ public interface Unicode_string {
|
||||
int Val_codes(int i);
|
||||
int Pos_codes_to_bytes(int i);
|
||||
int Pos_codes_to_chars(int i);
|
||||
int Pos_bytes_to_chars(int i);
|
||||
int Pos_bytes_to_codes(int i);
|
||||
int Pos_chars_to_codes(int i);
|
||||
}
|
||||
@ -43,7 +44,8 @@ class Unicode_string_single implements Unicode_string { // 1 byte == 1 codepoint
|
||||
public int Len_bytes() {return codes_len;}
|
||||
public int Val_codes(int i) {return codes[i];}
|
||||
public int Pos_codes_to_bytes(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
|
||||
public int Pos_codes_to_chars(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i);return i;}
|
||||
public int Pos_bytes_to_codes(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i);return i;}
|
||||
public int Pos_chars_to_codes(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i);return i;}
|
||||
public int Pos_codes_to_chars(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
|
||||
public int Pos_bytes_to_chars(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
|
||||
public int Pos_bytes_to_codes(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
|
||||
public int Pos_chars_to_codes(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ class Unicode_string_multi implements Unicode_string {
|
||||
private final int[] codes;
|
||||
private final int[] codes_to_bytes;
|
||||
private final int[] codes_to_chars;
|
||||
private final int[] bytes_to_chars;
|
||||
private final int[] bytes_to_codes;
|
||||
private final int[] chars_to_codes;
|
||||
|
||||
@ -34,6 +35,7 @@ class Unicode_string_multi implements Unicode_string {
|
||||
this.codes_to_bytes = new int[codes_len + Adj_end];
|
||||
this.codes_to_chars = new int[codes_len + Adj_end];
|
||||
this.bytes_to_codes = New_int_ary(bytes_len);
|
||||
this.bytes_to_chars = New_int_ary(bytes_len);
|
||||
this.chars_to_codes = New_int_ary(chars_len);
|
||||
|
||||
// init loop
|
||||
@ -46,6 +48,7 @@ class Unicode_string_multi implements Unicode_string {
|
||||
// update
|
||||
codes_to_bytes[codes_pos] = bytes_pos;
|
||||
codes_to_chars[codes_pos] = chars_pos;
|
||||
bytes_to_chars[bytes_pos] = chars_pos;
|
||||
bytes_to_codes[bytes_pos] = codes_pos;
|
||||
chars_to_codes[chars_pos] = codes_pos;
|
||||
|
||||
@ -67,6 +70,7 @@ class Unicode_string_multi implements Unicode_string {
|
||||
public int Val_codes(int i) {return codes[i];}
|
||||
public int Pos_codes_to_bytes(int i) {return codes_to_bytes[i];}
|
||||
public int Pos_codes_to_chars(int i) {return codes_to_chars[i];}
|
||||
public int Pos_bytes_to_chars(int i) {int rv = bytes_to_chars[i]; if (rv == Invalid) throw Err_.new_wo_type("invalid i", "src", src, "type", "bytes_to_chars", "i", i); return rv;}
|
||||
public int Pos_bytes_to_codes(int i) {int rv = bytes_to_codes[i]; if (rv == Invalid) throw Err_.new_wo_type("invalid i", "src", src, "type", "bytes_to_codes", "i", i); return rv;}
|
||||
public int Pos_chars_to_codes(int i) {int rv = chars_to_codes[i]; if (rv == Invalid) throw Err_.new_wo_type("invalid i", "src", src, "type", "chars_to_codes", "i", i); return rv;}
|
||||
|
||||
|
@ -17,6 +17,7 @@ package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import
|
||||
import gplx.core.intls.*; import gplx.langs.regxs.*;
|
||||
import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.xtns.scribunto.procs.*;
|
||||
import gplx.xowa.xtns.scribunto.libs.patterns.*;
|
||||
public class Scrib_lib_ustring implements Scrib_lib {
|
||||
public Scrib_lib_ustring(Scrib_core core) {this.core = core;} private Scrib_core core;
|
||||
public Scrib_lua_mod Mod() {return mod;} private Scrib_lua_mod mod;
|
||||
@ -98,7 +99,7 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
|
||||
// run regex
|
||||
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
||||
Regx_match[] regx_rslts = Run_regex_or_null(text_ucs, regx_converter, find_str, bgn_as_codes);
|
||||
Regx_match[] regx_rslts = Scrib_pattern_matcher_.Instance().Match(core.Ctx().Page().Url(), text_ucs, regx_converter, find_str, bgn_as_codes);
|
||||
if (regx_rslts.length == 0) return rslt.Init_ary_empty();
|
||||
|
||||
// add to tmp_list
|
||||
@ -123,7 +124,7 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
|
||||
// run regex
|
||||
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
||||
Regx_match[] regx_rslts = Run_regex_or_null(text_ucs, regx_converter, find_str, bgn_as_codes);
|
||||
Regx_match[] regx_rslts = Scrib_pattern_matcher_.Instance().Match(core.Ctx().Page().Url(), text_ucs, regx_converter, find_str, bgn_as_codes);
|
||||
if (regx_rslts.length == 0) return rslt.Init_null(); // return null if no matches found; EX:w:Mount_Gambier_(volcano); DATE:2014-04-02; confirmed with en.d:民; DATE:2015-01-30
|
||||
|
||||
// TOMBSTONE: add 1st match only; do not add all; PAGE:en.d:действительное_причастие_настоящего_времени DATE:2017-04-23
|
||||
@ -140,7 +141,7 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
// String text = Scrib_kv_utl_.Val_to_str(values, 0);
|
||||
String regx = args.Pull_str(1);
|
||||
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
||||
String pcre = regx_converter.patternToRegex(regx, Scrib_regx_converter.Anchor_null);
|
||||
String pcre = regx_converter.patternToRegex(regx, Scrib_regx_converter.Anchor_null, true);
|
||||
return rslt.Init_many_objs(pcre, regx_converter.Capt_ary());
|
||||
}
|
||||
public boolean Gmatch_callback(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||
@ -148,7 +149,7 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
String regx = args.Pull_str(1);
|
||||
Keyval[] capt = args.Cast_kv_ary_or_null(2);
|
||||
int pos = args.Pull_int(3);
|
||||
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx);
|
||||
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx().Page().Url(), regx);
|
||||
Regx_match[] regx_rslts = regx_adp.Match_all(text, pos);
|
||||
int len = regx_rslts.length;
|
||||
if (len == 0) return rslt.Init_many_objs(pos, Keyval_.Ary_empty);
|
||||
@ -178,14 +179,6 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
bgn_as_codes = 0;
|
||||
return bgn_as_codes;
|
||||
}
|
||||
private Regx_match[] Run_regex_or_null(Unicode_string text_ucs, Scrib_regx_converter regx_converter, String find_str, int bgn_as_codes) {
|
||||
// convert regex from lua to java
|
||||
find_str = regx_converter.patternToRegex(find_str, Scrib_regx_converter.Anchor_G);
|
||||
|
||||
// run regex
|
||||
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), find_str);
|
||||
return regx_adp.Match_all(text_ucs.Src_string(), text_ucs.Pos_codes_to_chars(bgn_as_codes)); // NOTE: MW calculates an offset to handle mb strings. however, java's regex always takes offset in chars (not bytes like PHP preg_match); DATE:2014-03-04
|
||||
}
|
||||
private void AddCapturesFromMatch(List_adp tmp_list, Regx_match rslt, String text, Keyval[] capts, boolean op_is_match) {// NOTE: this matches behavior in UstringLibrary.php!addCapturesFromMatch
|
||||
int capts_len = capts == null ? 0 : capts.length;
|
||||
if (capts_len > 0) { // NOTE: changed from "grps_len > 0"; PAGE:en.w:Portal:Constructed_languages/Intro DATE:2018-07-02
|
||||
@ -205,12 +198,12 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
&& tmp_list.Count() == 0) // only add match once; EX: "aaaa", "a" will have four matches; get 1st; DATE:2014-04-02
|
||||
tmp_list.Add(String_.Mid(text, rslt.Find_bgn(), rslt.Find_end()));
|
||||
}
|
||||
public static Regx_adp RegxAdp_new_(Xop_ctx ctx, String regx) {
|
||||
public static Regx_adp RegxAdp_new_(Xoa_url url, String regx) {
|
||||
Regx_adp rv = Regx_adp_.new_(regx);
|
||||
if (rv.Pattern_is_invalid()) {
|
||||
// try to identify [z-a] errors; PAGE:https://en.wiktionary.org/wiki/Module:scripts/data; DATE:2017-04-23
|
||||
Exception exc = rv.Pattern_is_invalid_exception();
|
||||
ctx.App().Usr_dlg().Log_many("", "", "regx is invalid: regx=~{0} page=~{1} exc=~{2}", regx, ctx.Page().Ttl().Page_db(), Err_.Message_gplx_log(exc));
|
||||
Gfo_usr_dlg_.Instance.Log_many("", "", "regx is invalid: regx=~{0} page=~{1} exc=~{2}", regx, url.To_bry(), Err_.Message_gplx_log(exc));
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ public class Scrib_lib_ustring__match__tst {
|
||||
Exec_match("abcd" , "a" , 2, String_.Null_mark); // bgn
|
||||
Exec_match("abcd" , "b(c)" , 1, "c"); // group
|
||||
Exec_match(" a b " , "^%s*(.-)%s*$" , 1, "a b"); // trim; NOTE: changed back from "a b;" to "a b"; DATE:2017-04-23; changed from "a b" to "a b;"; DATE:2015-01-30
|
||||
Exec_match("abcd" , "a" , 0, "a"); // handle 0; note that php/lua is super-1, but some modules pass in 0; ru.w:Module:Infocards; DATE:2013-11-08
|
||||
Exec_match("abcd" , "a" , 0, "a"); // handle 0; note that php/lua is BASE_1, but some modules pass in 0; ru.w:Module:Infocards; DATE:2013-11-08
|
||||
Exec_match("abcd" , "." , -1, "d"); // -1
|
||||
Exec_match("aaa" , "a" , 1, "a"); // should return 1st match not many
|
||||
Exec_match("aaa" , "(a)" , 1, "a"); // should return 1st match only; PAGE:en.d:действительное_причастие_настоящего_времени DATE:2017-04-23
|
||||
|
@ -32,7 +32,7 @@ class Scrib_lib_ustring_gsub_mgr {
|
||||
|
||||
// get @pattern; NOTE: sometimes int; PAGE:en.d:λύω; DATE:2014-09-02
|
||||
String regx = args.Xstr_str_or_null(1);
|
||||
regx = regx_converter.patternToRegex(regx, Scrib_regx_converter.Anchor_pow);
|
||||
regx = regx_converter.patternToRegex(regx, Scrib_regx_converter.Anchor_pow, true);
|
||||
|
||||
// get @repl
|
||||
Object repl_obj = args.Cast_obj_or_null(2);
|
||||
@ -82,7 +82,7 @@ class Scrib_lib_ustring_gsub_mgr {
|
||||
}
|
||||
private String Exec_repl(byte repl_tid, String text, String regx, int limit) {
|
||||
// parse regx
|
||||
Regx_adp regx_mgr = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx);
|
||||
Regx_adp regx_mgr = Scrib_lib_ustring.RegxAdp_new_(core.Ctx().Page().Url(), regx);
|
||||
if (regx_mgr.Pattern_is_invalid()) return text; // NOTE: invalid patterns should return self; EX:[^]; DATE:2014-09-02)
|
||||
|
||||
// exec regx
|
||||
|
@ -30,7 +30,7 @@ public class Scrib_regx_converter {
|
||||
public Keyval[] Capt_ary() {return grp_mgr.Capt__to_ary();}
|
||||
public boolean Any_pos() {return any_pos;} private boolean any_pos;
|
||||
public Regx_match[] Adjust_balanced(Regx_match[] rslts) {return grp_mgr.Adjust_balanced(rslts);}
|
||||
public String patternToRegex(String pat_str, byte[] anchor) {
|
||||
public String patternToRegex(String pat_str, byte[] anchor, boolean mode_is_regx) {
|
||||
Unicode_string pat_ucs = Unicode_string_.New(pat_str);
|
||||
// TODO.CACHE: if (!$this->patternRegexCache->has($cacheKey))
|
||||
grp_mgr.Clear();
|
||||
@ -49,10 +49,18 @@ public class Scrib_regx_converter {
|
||||
int cur = pat_ucs.Val_codes(i);
|
||||
switch (cur) {
|
||||
case Byte_ascii.Pow:
|
||||
if (!mode_is_regx) {
|
||||
bfr.Add_byte(Byte_ascii.Pow);
|
||||
continue;
|
||||
}
|
||||
q_flag = i != 0;
|
||||
bfr.Add((anchor == Anchor_null || q_flag) ? Bry_pow_escaped : anchor); // NOTE: must add anchor \G when using offsets; EX:cs.n:Category:1._zárí_2008; DATE:2014-05-07
|
||||
break;
|
||||
case Byte_ascii.Dollar:
|
||||
if (!mode_is_regx) {
|
||||
bfr.Add_byte(Byte_ascii.Dollar);
|
||||
continue;
|
||||
}
|
||||
q_flag = i < len - 1;
|
||||
bfr.Add(q_flag ? Bry_dollar_escaped : Bry_dollar_literal);
|
||||
break;
|
||||
@ -78,6 +86,10 @@ public class Scrib_regx_converter {
|
||||
bfr.Add_byte(Byte_ascii.Paren_end);
|
||||
break;
|
||||
case Byte_ascii.Percent:
|
||||
if (!mode_is_regx) {
|
||||
bfr.Add_byte(Byte_ascii.Percent);
|
||||
continue;
|
||||
}
|
||||
i++;
|
||||
if (i >= len)
|
||||
throw Err_.new_wo_type("malformed pattern (ends with '%')");
|
||||
@ -114,7 +126,8 @@ public class Scrib_regx_converter {
|
||||
++bct;
|
||||
int balanced_idx = grp_mgr.Full__len();
|
||||
fmtr_balanced.Bld_bfr(bfr_balanced, Int_.To_bry(bct), Utf16_.Encode_int_to_bry(char_0), Utf16_.Encode_int_to_bry(char_1), Int_.To_bry(balanced_idx + 1), Int_.To_bry(balanced_idx + 2));
|
||||
grp_mgr.Capt__add__fake(2);
|
||||
if (mode_is_regx)
|
||||
grp_mgr.Capt__add__fake(2);
|
||||
bfr.Add(bfr_balanced.To_bry_and_clear());
|
||||
}
|
||||
}
|
||||
@ -152,16 +165,32 @@ public class Scrib_regx_converter {
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Brack_bgn:
|
||||
if (!mode_is_regx) {
|
||||
bfr.Add_byte(Byte_ascii.Brack_bgn);
|
||||
continue;
|
||||
}
|
||||
i = bracketedCharSetToRegex(bfr, pat_ucs, i, len);
|
||||
q_flag = true;
|
||||
break;
|
||||
case Byte_ascii.Brack_end:
|
||||
if (!mode_is_regx) {
|
||||
bfr.Add_byte(Byte_ascii.Brack_end);
|
||||
continue;
|
||||
}
|
||||
throw Err_.new_wo_type("Unmatched close-bracket at pattern character " + Int_.To_str(i_end));
|
||||
case Byte_ascii.Dot:
|
||||
if (!mode_is_regx) {
|
||||
bfr.Add_byte(Byte_ascii.Dot);
|
||||
continue;
|
||||
}
|
||||
bfr.Add_byte(Byte_ascii.Dot);
|
||||
q_flag = true;
|
||||
break;
|
||||
default:
|
||||
if (!mode_is_regx) {
|
||||
bfr.Add_u8_int(cur);
|
||||
continue;
|
||||
}
|
||||
Regx_quote(bfr, cur);
|
||||
q_flag = true;
|
||||
break;
|
||||
|
@ -64,11 +64,11 @@ class Scrib_regx_converter_fxt {
|
||||
}
|
||||
}
|
||||
public void Test_parse(String raw, String expd) {
|
||||
under.patternToRegex(raw, Scrib_regx_converter.Anchor_G);
|
||||
under.patternToRegex(raw, Scrib_regx_converter.Anchor_G, true);
|
||||
Tfds.Eq(expd, under.Regx());
|
||||
}
|
||||
public void Test_replace(String text, String find, String replace, String expd) {
|
||||
String regex_str = under.patternToRegex(find, Scrib_regx_converter.Anchor_G);
|
||||
String regex_str = under.patternToRegex(find, Scrib_regx_converter.Anchor_G, true);
|
||||
String actl = Regx_adp_.Replace(text, regex_str, replace);
|
||||
Tfds.Eq(expd, actl);
|
||||
}
|
||||
|
@ -0,0 +1,21 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
|
||||
import gplx.core.intls.*;
|
||||
import gplx.langs.regxs.*;
|
||||
public interface Scrib_pattern_matcher {
|
||||
Regx_match[] Match(Xoa_url url, Unicode_string text_ucs, Scrib_regx_converter regx_converter, String find_str, int bgn_as_codes);
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
|
||||
import gplx.core.intls.*;
|
||||
import gplx.langs.regxs.*;
|
||||
public class Scrib_pattern_matcher_ {
|
||||
private static final Scrib_pattern_matcher instance = New();
|
||||
private static Scrib_pattern_matcher New() {
|
||||
return new Scrib_pattern_matcher__regx();
|
||||
// return new Scrib_pattern_matcher__luaj();
|
||||
}
|
||||
public static Scrib_pattern_matcher Instance() {return instance;}
|
||||
}
|
||||
class Scrib_pattern_matcher__regx implements Scrib_pattern_matcher {
|
||||
public Regx_match[] Match(Xoa_url url, Unicode_string text_ucs, Scrib_regx_converter regx_converter, String find_str, int bgn_as_codes) {
|
||||
// convert regex from lua to java
|
||||
find_str = regx_converter.patternToRegex(find_str, Scrib_regx_converter.Anchor_G, true);
|
||||
|
||||
// run regex
|
||||
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(url, find_str);
|
||||
return regx_adp.Match_all(text_ucs.Src_string(), text_ucs.Pos_codes_to_chars(bgn_as_codes)); // NOTE: MW calculates an offset to handle mb strings. however, java's regex always takes offset in chars (not bytes like PHP preg_match); DATE:2014-03-04
|
||||
}
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
|
||||
import gplx.core.intls.*;
|
||||
import gplx.langs.regxs.*;
|
||||
import org.luaj.vm2.lib.StringLib;
|
||||
import org.luaj.vm2.lib.Str_find_mgr;
|
||||
import org.luaj.vm2.lib.Str_find_mgr__regx;
|
||||
class Scrib_pattern_matcher__luaj implements Scrib_pattern_matcher {
|
||||
public Regx_match[] Match(Xoa_url url, Unicode_string text_ucs, Scrib_regx_converter regx_converter, String find_str, int bgn_as_codes) {
|
||||
// int src_bgn = bgn_as_codes < 0 ? bgn_as_codes : text_ucs.Pos_codes_to_bytes(bgn_as_codes);
|
||||
int src_bgn = bgn_as_codes < 0 ? Int_.Base1 : bgn_as_codes + Int_.Base1;
|
||||
src_bgn = src_bgn >= text_ucs.Len_codes() ? text_ucs.Len_codes() : text_ucs.Pos_codes_to_bytes(src_bgn);
|
||||
Str_find_mgr__regx mgr = new Str_find_mgr__regx(text_ucs.Src_string(), find_str, src_bgn, false, true);
|
||||
mgr.Process();
|
||||
|
||||
// convert to Regx_match
|
||||
int find_bgn = mgr.Bgn() == -1 ? -1 : text_ucs.Pos_bytes_to_chars(mgr.Bgn());
|
||||
int find_end = mgr.End() == -1 ? -1 : text_ucs.Pos_bytes_to_chars(mgr.End());
|
||||
boolean found = find_bgn != -1;
|
||||
if (!found) {
|
||||
return Regx_match.Ary_empty;
|
||||
}
|
||||
int[] captures = mgr.Capture_ints();
|
||||
Regx_group[] groups = null;
|
||||
if (found && captures != null) {
|
||||
int captures_len = captures.length;
|
||||
groups = new Regx_group[captures_len / 2];
|
||||
for (int i = 0; i < captures_len; i += 2) {
|
||||
groups[i / 2] = new Regx_group(true, captures[i], captures[i + 1], String_.Mid(text_ucs.Src_string(), text_ucs.Pos_bytes_to_chars(captures[i]), text_ucs.Pos_bytes_to_chars(captures[i + 1])));
|
||||
}
|
||||
}
|
||||
Regx_match rv = new Regx_match(found, find_bgn, find_end, groups);
|
||||
return new Regx_match[] {rv};
|
||||
}
|
||||
}
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user