mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v1.9.1.1
This commit is contained in:
35
400_xowa/src/gplx/intl/String_surrogate_utl.java
Normal file
35
400_xowa/src/gplx/intl/String_surrogate_utl.java
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.intl; import gplx.*;
|
||||
public class String_surrogate_utl {
|
||||
public int Byte_pos() {return byte_pos;} int byte_pos;
|
||||
public int Count_surrogates__char_idx(byte[] src, int src_len, int byte_bgn, int char_idx) {return Count_surrogates(src, src_len, byte_bgn, Bool_.Y, char_idx);}
|
||||
public int Count_surrogates__codepoint_idx1(byte[] src, int src_len, int byte_bgn, int codepoint_idx) {return Count_surrogates(src, src_len, byte_bgn, Bool_.N, codepoint_idx);}
|
||||
private int Count_surrogates(byte[] src, int src_len, int byte_bgn, boolean stop_idx_is_char, int stop_idx) {
|
||||
int char_count = 0, codepoint_count = 0;
|
||||
byte_pos = byte_bgn;
|
||||
while (true) {
|
||||
if (stop_idx == (stop_idx_is_char ? char_count : codepoint_count)) return codepoint_count - char_count;
|
||||
if (byte_pos >= src_len) throw Err_.new_("codepoint_idx is not in string; stop_idx={0} stop_idx_is_char={1} byte_bgn={2} string={3}", stop_idx, stop_idx_is_char, byte_bgn, String_.new_utf8_(src));
|
||||
int char_len_in_bytes = gplx.intl.Utf8_.Len_of_char_by_1st_byte(src[byte_pos]);
|
||||
++char_count; // char_count always incremented by 1
|
||||
codepoint_count += (char_len_in_bytes == 4) ? 2 : 1; // codepoint_count incremented by 2 if surrogate pair; else 1
|
||||
byte_pos += char_len_in_bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
56
400_xowa/src/gplx/intl/String_surrogate_utl_tst.java
Normal file
56
400_xowa/src/gplx/intl/String_surrogate_utl_tst.java
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.intl; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class String_surrogate_utl_tst {
|
||||
@Before public void init() {fxt.Clear();} private String_surrogate_utl_fxt fxt = new String_surrogate_utl_fxt();
|
||||
@Test public void Char_idx() {
|
||||
String test_str = "aé𡼾bî𡼾";
|
||||
fxt.Test_count_surrogates__char_idx (test_str, 0, 1, 0, 1); // a
|
||||
fxt.Test_count_surrogates__char_idx (test_str, 0, 2, 0, 3); // aé
|
||||
fxt.Test_count_surrogates__char_idx (test_str, 0, 3, 1, 7); // aé𡼾
|
||||
fxt.Test_count_surrogates__char_idx (test_str, 7, 1, 0, 8); // b
|
||||
fxt.Test_count_surrogates__char_idx (test_str, 7, 2, 0, 10); // bî
|
||||
fxt.Test_count_surrogates__char_idx (test_str, 7, 3, 1, 14); // bî𡼾
|
||||
fxt.Test_count_surrogates__char_idx (test_str, 0, 6, 2, 14); // aé𡼾bî𡼾
|
||||
}
|
||||
@Test public void Codepoint_idx() {
|
||||
String test_str = "aé𡼾bî𡼾";
|
||||
fxt.Test_count_surrogates__codepoint_idx (test_str, 0, 1, 0, 1); // a
|
||||
fxt.Test_count_surrogates__codepoint_idx (test_str, 0, 2, 0, 3); // aé
|
||||
fxt.Test_count_surrogates__codepoint_idx (test_str, 0, 4, 1, 7); // aé𡼾
|
||||
fxt.Test_count_surrogates__codepoint_idx (test_str, 7, 1, 0, 8); // b
|
||||
fxt.Test_count_surrogates__codepoint_idx (test_str, 7, 2, 0, 10); // bî
|
||||
fxt.Test_count_surrogates__codepoint_idx (test_str, 7, 4, 1, 14); // bî𡼾
|
||||
fxt.Test_count_surrogates__codepoint_idx (test_str, 0, 8, 2, 14); // aé𡼾bî𡼾
|
||||
}
|
||||
}
|
||||
class String_surrogate_utl_fxt {
|
||||
private String_surrogate_utl codepoint_utl = new String_surrogate_utl();
|
||||
public void Clear() {}
|
||||
public void Test_count_surrogates__char_idx(String src_str, int bgn_byte, int char_idx, int expd_count, int expd_pos) {
|
||||
byte[] src_bry = Bry_.new_utf8_(src_str); int src_len = src_bry.length;
|
||||
Tfds.Eq(expd_count , codepoint_utl.Count_surrogates__char_idx(src_bry, src_len, bgn_byte, char_idx));
|
||||
Tfds.Eq(expd_pos , codepoint_utl.Byte_pos());
|
||||
}
|
||||
public void Test_count_surrogates__codepoint_idx(String src_str, int bgn_byte, int char_idx, int expd_count, int expd_pos) {
|
||||
byte[] src_bry = Bry_.new_utf8_(src_str); int src_len = src_bry.length;
|
||||
Tfds.Eq(expd_count , codepoint_utl.Count_surrogates__codepoint_idx1(src_bry, src_len, bgn_byte, char_idx), "count");
|
||||
Tfds.Eq(expd_pos , codepoint_utl.Byte_pos(), "pos");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user