mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Scribunto: Fix script error '=Module:zh-glyph:120 attempt to concatenate nil and string' on a few hundred en.d pages for Chinese chars
This commit is contained in:
@@ -56,7 +56,9 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
||||
synchronized (surrogate_utl) {
|
||||
byte[] text_bry = Bry_.new_u8(text_str); int text_bry_len = text_bry.length;
|
||||
bgn_char_idx = Bgn_adjust(text_str, bgn_char_idx);
|
||||
int bgn_adj = surrogate_utl.Count_surrogates__char_idx(text_bry, text_bry_len, 0, bgn_char_idx); // NOTE: convert from lua / php charidx to java regex codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27
|
||||
// TOMBSTONE: do not adjust for 2-len chars (surrogates); lua always iterates correctly by chars; DATE:2017-04-23
|
||||
// int bgn_adj = surrogate_utl.Count_surrogates__char_idx(text_bry, text_bry_len, 0, bgn_char_idx); // NOTE: convert from lua / php charidx to java regex codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27
|
||||
int bgn_adj = 0;
|
||||
int bgn_codepoint_idx = bgn_char_idx + bgn_adj;
|
||||
int bgn_byte_pos = surrogate_utl.Byte_pos();
|
||||
if (String_.Len_eq_0(regx)) // regx of "" should return (bgn, bgn - 1) regardless of whether plain is true or false
|
||||
|
||||
@@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||
import org.junit.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
||||
public class Scrib_lib_ustring__find__tst {
|
||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
||||
@Before public void init() {
|
||||
fxt.Clear();
|
||||
lib = fxt.Core().Lib_ustring().Init();
|
||||
@@ -38,10 +38,11 @@ public class Scrib_lib_ustring__find__tst {
|
||||
@Test public void Return_int() {
|
||||
fxt.Test__proc__kvps__vals(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_("a", "()", 2, Bool_.N), 2, 1, 2);
|
||||
}
|
||||
@Test public void Surrogate() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
}
|
||||
// DELETE: no longer needed after tombstoning surrogate logic; DATE:2017-04-23
|
||||
// @Test public void Surrogate() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
|
||||
// Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1)
|
||||
// Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
|
||||
// }
|
||||
private void Exec_find(String text, String regx, int bgn, boolean plain, String expd) {
|
||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(text, regx, bgn, plain), expd);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user