mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Scribunto: Classify 3 byte UTF-8 sequences as 1 Java char, not 2 Java chars [#377]
This commit is contained in:
@@ -55,7 +55,7 @@ public class Utf16_mapper {
|
||||
|
||||
// get lengths
|
||||
int cur_len_in_bytes = Utf8_.Len_of_char_by_1st_byte(src_bry[pos_in_bytes]);
|
||||
int cur_len_in_chars = cur_len_in_bytes > 2 ? 2 : 1;
|
||||
int cur_len_in_chars = cur_len_in_bytes == 4 ? 2 : 1; // NOTE: 3 bytes represent up to U+FFFF (65,536) which will fit in 1 char; REF:en.w:UTF-8; ISSUE#:377; DATE:2019-03-04
|
||||
|
||||
// increment
|
||||
pos_in_bytes += cur_len_in_bytes;
|
||||
|
||||
@@ -21,8 +21,8 @@ public class Utf16_mapper_tst {
|
||||
fxt.Test__map("a¢€𤭢"
|
||||
, Int_ary_.New( 0, 1, -1, 2, -1, -1, 3, -1, -1, -1, 4)
|
||||
, Int_ary_.New( 0, 1, 3, 6, 10, -1, -1, -1, -1, -1, -1)
|
||||
, Int_ary_.New( 0, 1, 2, -1, 3, -1, 4, -1, -1, -1, -1)
|
||||
, Int_ary_.New( 0, 1, 2, 4, 6, -1, -1, -1, -1, -1, -1)
|
||||
, Int_ary_.New( 0, 1, 2, 3, -1, 4, -1, -1, -1, -1, -1)
|
||||
, Int_ary_.New( 0, 1, 2, 3, 5, -1, -1, -1, -1, -1, -1)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user