mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Scribunto: Fix script error '=Module:zh-glyph:120 attempt to concatenate nil and string' on a few hundred en.d pages for Chinese chars
This commit is contained in:
parent
eaa83db644
commit
1d6b3779a0
@ -27,8 +27,8 @@ public class Bry__tst {
|
||||
fxt.Test_new_u8("a" , Bry_.New_by_ints(97)); // one
|
||||
fxt.Test_new_u8("abc" , Bry_.New_by_ints(97, 98, 99)); // many
|
||||
fxt.Test_new_u8("¢" , Bry_.New_by_ints(194, 162)); // bry_len=2; cent
|
||||
fxt.Test_new_u8("€" , Bry_.New_by_ints(226, 130, 172)); // bry_len=3; euro
|
||||
fxt.Test_new_u8("𤭢" , Bry_.New_by_ints(240, 164, 173, 162)); // bry_len=3; example from en.w:UTF-8
|
||||
fxt.Test_new_u8("€" , Bry_.New_by_ints(226, 130, 172)); // bry_len=3; euro
|
||||
fxt.Test_new_u8("𤭢" , Bry_.New_by_ints(240, 164, 173, 162)); // bry_len=4; example from en.w:UTF-8
|
||||
}
|
||||
@Test public void Add__bry_plus_byte() {
|
||||
fxt.Test_add("a" , Byte_ascii.Pipe , "a|"); // basic
|
||||
|
@ -21,6 +21,7 @@ public class Utf16__tst {
|
||||
// fxt.Test_encode_decode(162, 194, 162); // cent
|
||||
// fxt.Test_encode_decode(8364, 226, 130, 172); // euro
|
||||
fxt.Test_encode_decode(150370, 240, 164, 173, 162); // example from [[UTF-8]]; should be encoded as two bytes
|
||||
fxt.Test_encode_decode(143489, 240, 163, 130, 129); // EX: 駣𣂁脁 DATE:2017-04-22
|
||||
}
|
||||
@Test public void Encode_as_bry_by_hex() {
|
||||
fxt.Test_Encode_hex_to_bry("00", 0);
|
||||
|
@ -56,7 +56,9 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
synchronized (surrogate_utl) {
|
||||
byte[] text_bry = Bry_.new_u8(text_str); int text_bry_len = text_bry.length;
|
||||
bgn_char_idx = Bgn_adjust(text_str, bgn_char_idx);
|
||||
int bgn_adj = surrogate_utl.Count_surrogates__char_idx(text_bry, text_bry_len, 0, bgn_char_idx); // NOTE: convert from lua / php charidx to java regex codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27
|
||||
// TOMBSTONE: do not adjust for 2-len chars (surrogates); lua always iterates correctly by chars; DATE:2017-04-23
|
||||
// int bgn_adj = surrogate_utl.Count_surrogates__char_idx(text_bry, text_bry_len, 0, bgn_char_idx); // NOTE: convert from lua / php charidx to java regex codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27
|
||||
int bgn_adj = 0;
|
||||
int bgn_codepoint_idx = bgn_char_idx + bgn_adj;
|
||||
int bgn_byte_pos = surrogate_utl.Byte_pos();
|
||||
if (String_.Len_eq_0(regx)) // regx of "" should return (bgn, bgn - 1) regardless of whether plain is true or false
|
||||
|
@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||
import org.junit.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
||||
public class Scrib_lib_ustring__find__tst {
|
||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
||||
@Before public void init() {
|
||||
fxt.Clear();
|
||||
lib = fxt.Core().Lib_ustring().Init();
|
||||
@ -38,10 +38,11 @@ public class Scrib_lib_ustring__find__tst {
|
||||
@Test public void Return_int() {
|
||||
fxt.Test__proc__kvps__vals(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_("a", "()", 2, Bool_.N), 2, 1, 2);
|
||||
}
|
||||
@Test public void Surrogate() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
}
|
||||
// DELETE: no longer needed after tombstoning surrogate logic; DATE:2017-04-23
|
||||
// @Test public void Surrogate() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
// Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
// Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
// }
|
||||
private void Exec_find(String text, String regx, int bgn, boolean plain, String expd) {
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(text, regx, bgn, plain), expd);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user