mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Scribunto.Regex: Reinstate codepoint calculations for ustring.find
This commit is contained in:
parent
0e3af23c87
commit
ea3cb238fd
@ -56,13 +56,16 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
synchronized (surrogate_utl) {
|
||||
byte[] text_bry = Bry_.new_u8(text_str); int text_bry_len = text_bry.length;
|
||||
bgn_char_idx = Bgn_adjust(text_str, bgn_char_idx);
|
||||
// TOMBSTONE: do not adjust for 2-len chars (surrogates); lua always iterates correctly by chars; DATE:2017-04-23
|
||||
// int bgn_adj = surrogate_utl.Count_surrogates__char_idx(text_bry, text_bry_len, 0, bgn_char_idx); // NOTE: convert from lua / php charidx to java regex codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27
|
||||
int bgn_adj = 0;
|
||||
|
||||
// regx of "" should return (bgn, bgn - 1) regardless of whether plain is true or false;
|
||||
// NOTE: do not include surrogate calc; PAGE:en.d:佻 DATE:2017-04-24
|
||||
if (String_.Len_eq_0(regx)) // regx of "" should return (bgn, bgn - 1) regardless of whether plain is true or false
|
||||
return rslt.Init_many_objs(bgn_char_idx + Scrib_lib_ustring.Base1, bgn_char_idx + Scrib_lib_ustring.Base1 - 1);
|
||||
|
||||
// NOTE: adjust for 2-len chars (surrogates); PAGE:en.d:iglesia DATE:2017-04-23
|
||||
int bgn_adj = surrogate_utl.Count_surrogates__char_idx(text_bry, text_bry_len, 0, bgn_char_idx); // NOTE: convert from lua / php charidx to java regex codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27
|
||||
int bgn_codepoint_idx = bgn_char_idx + bgn_adj;
|
||||
int bgn_byte_pos = surrogate_utl.Byte_pos();
|
||||
if (String_.Len_eq_0(regx)) // regx of "" should return (bgn, bgn - 1) regardless of whether plain is true or false
|
||||
return rslt.Init_many_objs(bgn_codepoint_idx + Scrib_lib_ustring.Base1, bgn_codepoint_idx + Scrib_lib_ustring.Base1 - 1);
|
||||
if (plain) {
|
||||
int pos = String_.FindFwd(text_str, regx, bgn_codepoint_idx);
|
||||
boolean found = pos != Bry_find_.Not_found;
|
||||
@ -255,7 +258,7 @@ class Scrib_lib_ustring_gsub_mgr {
|
||||
if (limit > -1 && repl_count == limit) break;
|
||||
Regx_match rslt = rslts[i];
|
||||
tmp_bfr.Add_str_u8(String_.Mid(text, pos, rslt.Find_bgn())); // NOTE: regx returns char pos (not bry); must add as String, not bry; DATE:2013-07-17
|
||||
if (!Exec_repl_itm(tmp_bfr, repl_tid, repl_bry, text, rslt)) { // will be false when gsub_proc returns nothing; PAGE:en.d:tracer DATE:2017-04-22
|
||||
if (!Exec_repl_itm(tmp_bfr, repl_tid, repl_bry, text, rslt)) { // will be false when gsub_proc returns nothing; PAGE:en.d:tracer PAGE:en.d:שלום DATE:2017-04-22;
|
||||
tmp_bfr.Add_str_u8(String_.Mid(text, rslt.Find_bgn(), rslt.Find_end()));
|
||||
}
|
||||
pos = rslt.Find_end();
|
||||
|
@ -38,11 +38,14 @@ public class Scrib_lib_ustring__find__tst {
|
||||
@Test public void Return_int() {
|
||||
fxt.Test__proc__kvps__vals(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_("a", "()", 2, Bool_.N), 2, 1, 2);
|
||||
}
|
||||
// DELETE: no longer needed after tombstoning surrogate logic; DATE:2017-04-23
|
||||
// @Test public void Surrogate() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
// Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
// Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
// }
|
||||
@Test public void Surrogate__find__value() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
}
|
||||
@Test public void Surrogate__find__empty() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "" , 1, Bool_.N, "1;0"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
// Exec_find("aé𡼾\nbî𡼾\n" , "" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
}
|
||||
private void Exec_find(String text, String regx, int bgn, boolean plain, String expd) {
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(text, regx, bgn, plain), expd);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user