1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Scribunto: Fix script error '=Module:zh-glyph:120 attempt to concatenate nil and string' on a few hundred en.d pages for Chinese chars

This commit is contained in:
gnosygnu
2017-04-23 09:47:58 -04:00
parent eaa83db644
commit 1d6b3779a0
4 changed files with 12 additions and 8 deletions

View File

@@ -56,7 +56,9 @@ public class Scrib_lib_ustring implements Scrib_lib {
synchronized (surrogate_utl) {
byte[] text_bry = Bry_.new_u8(text_str); int text_bry_len = text_bry.length;
bgn_char_idx = Bgn_adjust(text_str, bgn_char_idx);
int bgn_adj = surrogate_utl.Count_surrogates__char_idx(text_bry, text_bry_len, 0, bgn_char_idx); // NOTE: convert from lua / php charidx to java regex codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27
// TOMBSTONE: do not adjust for 2-len chars (surrogates); lua always iterates correctly by chars; DATE:2017-04-23
// int bgn_adj = surrogate_utl.Count_surrogates__char_idx(text_bry, text_bry_len, 0, bgn_char_idx); // NOTE: convert from lua / php charidx to java regex codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27
int bgn_adj = 0;
int bgn_codepoint_idx = bgn_char_idx + bgn_adj;
int bgn_byte_pos = surrogate_utl.Byte_pos();
if (String_.Len_eq_0(regx)) // regx of "" should return (bgn, bgn - 1) regardless of whether plain is true or false

View File

@@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
import org.junit.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
public class Scrib_lib_ustring__find__tst {
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
@Before public void init() {
fxt.Clear();
lib = fxt.Core().Lib_ustring().Init();
@@ -38,10 +38,11 @@ public class Scrib_lib_ustring__find__tst {
@Test public void Return_int() {
fxt.Test__proc__kvps__vals(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_("a", "()", 2, Bool_.N), 2, 1, 2);
}
@Test public void Surrogate() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1)
Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
}
// DELETE: no longer needed after tombstoning surrogate logic; DATE:2017-04-23
// @Test public void Surrogate() { // PURPOSE: handle surrogates in Find PAGE:zh.w:南北鐵路_(越南); DATE:2014-08-28
// Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 1, Bool_.N, "4;4"); // 4 b/c \n starts at pos 4 (super 1)
// Exec_find("aé𡼾\nbî𡼾\n" , "\n" , 5, Bool_.N, "8;8"); // 8 b/c \n starts at pos 8 (super 1)
// }
private void Exec_find(String text, String regx, int bgn, boolean plain, String expd) {
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_find, Scrib_kv_utl_.base1_many_(text, regx, bgn, plain), expd);
}