diff --git a/100_core/src/gplx/Bry__tst.java b/100_core/src/gplx/Bry__tst.java index 216224d80..d8241fb48 100644 --- a/100_core/src/gplx/Bry__tst.java +++ b/100_core/src/gplx/Bry__tst.java @@ -27,8 +27,8 @@ public class Bry__tst { fxt.Test_new_u8("a" , Bry_.New_by_ints(97)); // one fxt.Test_new_u8("abc" , Bry_.New_by_ints(97, 98, 99)); // many fxt.Test_new_u8("¢" , Bry_.New_by_ints(194, 162)); // bry_len=2; cent - fxt.Test_new_u8("€" , Bry_.New_by_ints(226, 130, 172)); // bry_len=3; euro - fxt.Test_new_u8("𤭢" , Bry_.New_by_ints(240, 164, 173, 162)); // bry_len=3; example from en.w:UTF-8 + fxt.Test_new_u8("€" , Bry_.New_by_ints(226, 130, 172)); // bry_len=3; euro + fxt.Test_new_u8("𤭢" , Bry_.New_by_ints(240, 164, 173, 162)); // bry_len=4; example from en.w:UTF-8 } @Test public void Add__bry_plus_byte() { fxt.Test_add("a" , Byte_ascii.Pipe , "a|"); // basic diff --git a/100_core/src/gplx/core/intls/Utf16__tst.java b/100_core/src/gplx/core/intls/Utf16__tst.java index 5b694e664..2ebb8799b 100644 --- a/100_core/src/gplx/core/intls/Utf16__tst.java +++ b/100_core/src/gplx/core/intls/Utf16__tst.java @@ -21,6 +21,7 @@ public class Utf16__tst { // fxt.Test_encode_decode(162, 194, 162); // cent // fxt.Test_encode_decode(8364, 226, 130, 172); // euro fxt.Test_encode_decode(150370, 240, 164, 173, 162); // example from [[UTF-8]]; should be encoded as two bytes + fxt.Test_encode_decode(143489, 240, 163, 130, 129); // EX: 駣𣂁脁 DATE:2017-04-22 } @Test public void Encode_as_bry_by_hex() { fxt.Test_Encode_hex_to_bry("00", 0); diff --git a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring.java b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring.java index 9ef839a8d..f2b2ed438 100644 --- a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring.java +++ b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring.java @@ -56,7 +56,9 @@ public class Scrib_lib_ustring implements Scrib_lib { synchronized (surrogate_utl) { byte[] text_bry = Bry_.new_u8(text_str); int text_bry_len = text_bry.length; bgn_char_idx = Bgn_adjust(text_str, bgn_char_idx); - int bgn_adj = surrogate_utl.Count_surrogates__char_idx(text_bry, text_bry_len, 0, bgn_char_idx); // NOTE: convert from lua / php charidx to java regex codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27 + // TOMBSTONE: do not adjust for 2-len chars (surrogates); lua always iterates correctly by chars; DATE:2017-04-23 + // int bgn_adj = surrogate_utl.Count_surrogates__char_idx(text_bry, text_bry_len, 0, bgn_char_idx); // NOTE: convert from lua / php charidx to java regex codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27 + int bgn_adj = 0; int bgn_codepoint_idx = bgn_char_idx + bgn_adj; int bgn_byte_pos = surrogate_utl.Byte_pos(); if (String_.Len_eq_0(regx)) // regx of "" should return (bgn, bgn - 1) regardless of whether plain is true or false diff --git a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring__find__tst.java b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring__find__tst.java index 34e0a9095..e26b9d658 100644 --- a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring__find__tst.java +++ b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring__find__tst.java @@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import org.junit.*; import gplx.xowa.xtns.scribunto.engines.mocks.*; public class Scrib_lib_ustring__find__tst { - private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib; + private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib; @Before public void init() { fxt.Clear(); lib = fxt.Core().Lib_ustring().Init(); @@ -38,10 +38,11 @@ public class Scrib_lib_ustring__find__tst { @Test public void Return_int() { fxt.Test__proc__kvps__vals(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_("a", "()", 2, Bool_.N), 2, 1, 2); } - @Test public void Surrogate() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28 - Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1) - Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1) - } + // DELETE: no longer needed after tombstoning surrogate logic; DATE:2017-04-23 + // @Test public void Surrogate() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28 + // Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1) + // Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1) + // } private void Exec_find(String text, String regx, int bgn, boolean plain, String expd) { fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(text, regx, bgn, plain), expd); }