mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Embeddable: Create core dbs in proper subdirectory
This commit is contained in:
@@ -13,73 +13,3 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.langs.cases; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import gplx.core.primitives.*;
|
||||
import gplx.core.intls.*;
|
||||
public interface Xol_case_itm extends Gfo_case_itm {
|
||||
byte Tid();
|
||||
byte[] Src_ary();
|
||||
byte[] Trg_ary();
|
||||
void Case_build_upper(Bry_bfr bfr);
|
||||
void Case_build_lower(Bry_bfr bfr);
|
||||
void Case_reuse_upper(byte[] ary, int bgn, int len);
|
||||
void Case_reuse_lower(byte[] ary, int bgn, int len);
|
||||
Xol_case_itm Clone();
|
||||
}
|
||||
class Xol_case_itm_byt implements Xol_case_itm {
|
||||
public Xol_case_itm_byt(byte tid, byte src_byte, byte trg_byte) {
|
||||
this.tid = tid; this.src_byte = src_byte; this.trg_byte = trg_byte; this.src_ary = new byte[] {src_byte}; this.trg_ary = new byte[] {trg_byte};
|
||||
switch (tid) {
|
||||
case Xol_case_itm_.Tid_both:
|
||||
case Xol_case_itm_.Tid_upper: upper_byte = trg_byte; lower_byte = src_byte; break;
|
||||
case Xol_case_itm_.Tid_lower: upper_byte = src_byte; lower_byte = trg_byte; break;
|
||||
}
|
||||
}
|
||||
public byte Tid() {return tid;} private byte tid;
|
||||
public boolean Is_single_byte() {return true;}
|
||||
public byte[] Src_ary() {return src_ary;} private byte[] src_ary;
|
||||
public byte[] Trg_ary() {return trg_ary;} private byte[] trg_ary;
|
||||
public byte Src_byte() {return src_byte;} private byte src_byte;
|
||||
public byte Trg_byte() {return trg_byte;} private byte trg_byte;
|
||||
public void Case_build_upper(Bry_bfr bfr) {bfr.Add_byte(upper_byte);} private byte upper_byte;
|
||||
public void Case_build_lower(Bry_bfr bfr) {bfr.Add_byte(lower_byte);} private byte lower_byte;
|
||||
public void Case_reuse_upper(byte[] ary, int bgn, int len) {ary[bgn] = upper_byte;}
|
||||
public void Case_reuse_lower(byte[] ary, int bgn, int len) {ary[bgn] = lower_byte;}
|
||||
public Xol_case_itm Clone() {return new Xol_case_itm_byt(tid, src_byte, trg_byte);}
|
||||
public int Utf8_id_lo() {return lower_byte;}
|
||||
public int Hashcode_lo() {return lower_byte;}
|
||||
public int Len_lo() {return 1;}
|
||||
public byte[] Asymmetric_bry() {return null;}
|
||||
}
|
||||
class Xol_case_itm_bry implements Xol_case_itm {
|
||||
public Xol_case_itm_bry(byte tid, byte[] src_ary, byte[] trg_ary) {
|
||||
this.tid = tid; this.src_ary = src_ary; this.trg_ary = trg_ary;
|
||||
switch (tid) {
|
||||
case Xol_case_itm_.Tid_both: upper_ary = trg_ary; lower_ary = src_ary; break;
|
||||
case Xol_case_itm_.Tid_upper: upper_ary = trg_ary; lower_ary = src_ary; asymmetric_bry = src_ary; break;
|
||||
case Xol_case_itm_.Tid_lower: upper_ary = src_ary; lower_ary = trg_ary; asymmetric_bry = trg_ary; break;
|
||||
}
|
||||
len_lo = lower_ary.length;
|
||||
utf8_id_lo = Utf16_.Decode_to_int(lower_ary, 0);
|
||||
hashcode_ci_lo = Bry_obj_ref.CalcHashCode(lower_ary, 0, len_lo);
|
||||
}
|
||||
public byte Tid() {return tid;} public Xol_case_itm_bry Tid_(byte v) {tid = v; return this;} private byte tid;
|
||||
public boolean Is_single_byte() {return false;}
|
||||
public byte[] Src_ary() {return src_ary;} private byte[] src_ary;
|
||||
public byte[] Trg_ary() {return trg_ary;} private byte[] trg_ary;
|
||||
public void Case_build_upper(Bry_bfr bfr) {bfr.Add(upper_ary);} private byte[] upper_ary;
|
||||
public void Case_build_lower(Bry_bfr bfr) {bfr.Add(lower_ary);} private byte[] lower_ary;
|
||||
public void Case_reuse_upper(byte[] ary, int bgn, int len) { // ASSUME: upper/lower have same width; i.e.: upper'ing a character doesn't go from a 2-width byte to a 3-width byte
|
||||
for (int i = 0; i < len; i++)
|
||||
ary[i + bgn] = upper_ary[i];
|
||||
}
|
||||
public void Case_reuse_lower(byte[] ary, int bgn, int len) { // ASSUME: upper/lower have same width; i.e.: upper'ing a character doesn't go from a 2-width byte to a 3-width byte
|
||||
for (int i = 0; i < len; i++)
|
||||
ary[i + bgn] = lower_ary[i];
|
||||
}
|
||||
public Xol_case_itm Clone() {return new Xol_case_itm_bry(tid, src_ary, trg_ary);}
|
||||
public int Len_lo() {return len_lo;} private int len_lo;
|
||||
public int Utf8_id_lo() {return utf8_id_lo;} private int utf8_id_lo;
|
||||
public byte[] Asymmetric_bry() {return asymmetric_bry;} private byte[] asymmetric_bry;
|
||||
public int Hashcode_lo() {return hashcode_ci_lo;} private int hashcode_ci_lo;
|
||||
}
|
||||
|
||||
@@ -13,135 +13,3 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.langs.cases; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import gplx.xowa.langs.parsers.*;
|
||||
public class Xol_case_itm_ {
|
||||
public static final byte Tid_both = 0, Tid_upper = 1, Tid_lower = 2;
|
||||
public static Xol_case_itm new_(int tid, String src_str, String trg_str) {return new_((byte)tid, Bry_.new_u8(src_str), Bry_.new_u8(trg_str));}
|
||||
public static Xol_case_itm new_(byte tid, byte[] src, byte[] trg) {
|
||||
if (src.length == 1 && trg.length == 1)
|
||||
return new Xol_case_itm_byt(tid, src[0], trg[0]);
|
||||
else
|
||||
return new Xol_case_itm_bry(tid, src, trg);
|
||||
}
|
||||
public static Xol_case_itm[] parse_xo_(byte[] src) {
|
||||
List_adp list = List_adp_.New();
|
||||
int src_len = src.length, src_pos = 0, fld_bgn = 0, fld_idx = 0;
|
||||
byte cur_cmd = Byte_.Zero;
|
||||
byte[] cur_lhs = null;
|
||||
Xol_csv_parser csv_parser = Xol_csv_parser.Instance;
|
||||
while (true) {
|
||||
boolean last = src_pos == src_len;
|
||||
byte b = last ? Byte_ascii.Nl : src[src_pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Pipe:
|
||||
switch (fld_idx) {
|
||||
case 0:
|
||||
boolean fail = true;
|
||||
if (src_pos - fld_bgn == 1) {
|
||||
byte cmd_byte = src[src_pos - 1];
|
||||
cur_cmd = Byte_.Zero;
|
||||
switch (cmd_byte) {
|
||||
case Byte_ascii.Num_0: cur_cmd = Xol_case_itm_.Tid_both; fail = false; break;
|
||||
case Byte_ascii.Num_1: cur_cmd = Xol_case_itm_.Tid_upper; fail = false; break;
|
||||
case Byte_ascii.Num_2: cur_cmd = Xol_case_itm_.Tid_lower; fail = false; break;
|
||||
}
|
||||
}
|
||||
if (fail) throw Err_.new_wo_type("cmd is invalid", "cmd", String_.new_u8(src, fld_bgn, src_pos));
|
||||
break;
|
||||
case 1: cur_lhs = csv_parser.Load(src, fld_bgn, src_pos); break;
|
||||
}
|
||||
++fld_idx;
|
||||
fld_bgn = src_pos + 1;
|
||||
break;
|
||||
case Byte_ascii.Nl:
|
||||
if (!(fld_idx == 0 && fld_bgn == src_pos)) {
|
||||
byte[] cur_rhs = csv_parser.Load(src, fld_bgn, src_pos);
|
||||
Xol_case_itm itm = Xol_case_itm_.new_(cur_cmd, cur_lhs, cur_rhs);
|
||||
list.Add(itm);
|
||||
}
|
||||
cur_cmd = Byte_.Zero;
|
||||
cur_lhs = null;
|
||||
fld_idx = 0;
|
||||
fld_bgn = src_pos + 1;
|
||||
break;
|
||||
}
|
||||
if (last) break;
|
||||
++src_pos;
|
||||
}
|
||||
return (Xol_case_itm[])list.To_ary(Xol_case_itm.class);
|
||||
}
|
||||
public static Xol_case_itm[] parse_mw_(byte[] raw) {
|
||||
Ordered_hash hash = Ordered_hash_.New_bry();
|
||||
int pos = 0;
|
||||
pos = parse_mw_grp(hash, raw, Bool_.Y, pos);
|
||||
pos = parse_mw_grp(hash, raw, Bool_.N, pos);
|
||||
return (Xol_case_itm[])hash.To_ary(Xol_case_itm.class);
|
||||
}
|
||||
private static int parse_mw_grp(Ordered_hash hash, byte[] raw, boolean section_is_upper, int find_bgn) {
|
||||
byte[] find = section_is_upper ? parse_mw_upper : parse_mw_lower;
|
||||
int raw_len = raw.length;
|
||||
int pos = Bry_find_.Find_fwd(raw, find, find_bgn); if (pos == Bry_find_.Not_found) throw Err_.new_wo_type("could not find section name", "name", String_.new_u8(find));
|
||||
pos = Bry_find_.Find_fwd(raw, Byte_ascii.Curly_bgn, pos, raw_len); if (pos == Bry_find_.Not_found) throw Err_.new_wo_type("could not find '{' after section name", "name", String_.new_u8(find));
|
||||
int itm_bgn = 0;
|
||||
boolean quote_off = true, itm_is_first = true;
|
||||
byte[] cur_lhs = Bry_.Empty;
|
||||
boolean loop = true;
|
||||
while (loop) {
|
||||
if (pos >= raw_len) break;
|
||||
byte b = raw[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Quote:
|
||||
if (quote_off) {
|
||||
itm_bgn = pos + 1;
|
||||
quote_off = false;
|
||||
}
|
||||
else {
|
||||
if (itm_is_first) {
|
||||
cur_lhs = Bry_.Mid(raw, itm_bgn, pos);
|
||||
itm_is_first = false;
|
||||
}
|
||||
else {
|
||||
byte[] cur_rhs = Bry_.Mid(raw, itm_bgn, pos);
|
||||
byte[] upper = null, lower = null; byte tid = Byte_.Zero, rev_tid = Byte_.Zero;
|
||||
if (section_is_upper) {
|
||||
upper = cur_rhs;
|
||||
lower = cur_lhs;
|
||||
tid = Xol_case_itm_.Tid_upper;
|
||||
rev_tid = Xol_case_itm_.Tid_lower;
|
||||
}
|
||||
else {
|
||||
upper = cur_lhs;
|
||||
lower = cur_rhs;
|
||||
tid = Xol_case_itm_.Tid_lower;
|
||||
rev_tid = Xol_case_itm_.Tid_upper;
|
||||
}
|
||||
Xol_case_itm_bry itm = (Xol_case_itm_bry)hash.Get_by(upper);
|
||||
if (itm == null) {
|
||||
itm = new Xol_case_itm_bry(tid, upper, lower);
|
||||
hash.Add(upper, itm);
|
||||
}
|
||||
else {
|
||||
if (itm.Tid() == rev_tid && Bry_.Eq(itm.Src_ary(), upper) && Bry_.Eq(itm.Trg_ary(), lower))
|
||||
itm.Tid_(Xol_case_itm_.Tid_both);
|
||||
else {
|
||||
itm = new Xol_case_itm_bry(tid, cur_lhs, cur_rhs);
|
||||
byte[] add_key = Bry_.Add(section_is_upper ? Bry_upper : Bry_lower, Bry_pipe, upper, Bry_pipe, lower);
|
||||
hash.Add(add_key, itm);
|
||||
}
|
||||
}
|
||||
itm_is_first = true;
|
||||
}
|
||||
quote_off = true;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Curly_end:
|
||||
loop = false;
|
||||
break;
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
return pos;
|
||||
} private static final byte[] parse_mw_upper= Bry_.new_a7("wikiUpperChars"), parse_mw_lower= Bry_.new_a7("wikiLowerChars"), Bry_upper = Bry_.new_a7("upper"), Bry_lower = Bry_.new_a7("lower"), Bry_pipe = Bry_.new_a7("|");
|
||||
static final String GRP_KEY = "xowa.langs.case_parser";
|
||||
}
|
||||
|
||||
@@ -13,127 +13,3 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.langs.cases; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import gplx.core.btries.*; import gplx.core.intls.*;
|
||||
public class Xol_case_mgr implements Gfo_invk, Gfo_case_mgr {
|
||||
private final Btrie_fast_mgr upper_trie = Btrie_fast_mgr.cs(), lower_trie = Btrie_fast_mgr.cs(); private Xol_case_itm[] itms;
|
||||
public Xol_case_mgr(byte tid) {this.tid = tid;}
|
||||
public byte Tid() {return tid;} private byte tid;
|
||||
public Gfo_case_itm Get_or_null(byte bgn_byte, byte[] src, int bgn, int end) {
|
||||
Object rv = lower_trie.Match_bgn_w_byte(bgn_byte, src, bgn, end);
|
||||
return rv == null
|
||||
? (Gfo_case_itm)upper_trie.Match_bgn_w_byte(bgn_byte, src, bgn, end)
|
||||
: (Gfo_case_itm)rv;
|
||||
}
|
||||
public void Clear() {upper_trie.Clear(); lower_trie.Clear();}
|
||||
public boolean Match_any_exists(byte b, byte[] src, int bgn_pos, int end_pos) {
|
||||
return upper_trie.Match_bgn_w_byte(b, src, bgn_pos, end_pos) != null
|
||||
|| lower_trie.Match_bgn_w_byte(b, src, bgn_pos, end_pos) != null
|
||||
;
|
||||
}
|
||||
public Object Match_upper(byte b, byte[] src, int bgn_pos, int end_pos) {return upper_trie.Match_bgn_w_byte(b, src, bgn_pos, end_pos);}
|
||||
public void Add_bulk(byte[] raw) {Add_bulk(Xol_case_itm_.parse_xo_(raw));}
|
||||
public Xol_case_mgr Add_bulk(Xol_case_itm[] ary) {
|
||||
itms = ary;
|
||||
int itms_len = itms.length;
|
||||
for (int i = 0; i < itms_len; i++) {
|
||||
Xol_case_itm itm = itms[i];
|
||||
switch (itm.Tid()) {
|
||||
case Xol_case_itm_.Tid_both:
|
||||
upper_trie.Add(itm.Src_ary(), itm);
|
||||
lower_trie.Add(itm.Trg_ary(), itm);
|
||||
break;
|
||||
case Xol_case_itm_.Tid_upper:
|
||||
upper_trie.Add(itm.Src_ary(), itm);
|
||||
break;
|
||||
case Xol_case_itm_.Tid_lower:
|
||||
lower_trie.Add(itm.Src_ary(), itm);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public byte[] Case_reuse_upper(byte[] src, int bgn, int end) {return Case_reuse(Bool_.Y, src, bgn, end);}
|
||||
public byte[] Case_reuse_lower(byte[] src, int bgn, int end) {return Case_reuse(Bool_.N, src, bgn, end);}
|
||||
public byte[] Case_reuse(boolean upper, byte[] src, int bgn, int end) {
|
||||
Btrie_fast_mgr trie = upper ? upper_trie : lower_trie;
|
||||
Btrie_rv trv = new Btrie_rv(); // TS.MEM: DATE:2016-07-12
|
||||
int pos = bgn;
|
||||
while (true) {
|
||||
if (pos >= end) break;
|
||||
byte b = src[pos];
|
||||
int b_len = gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, pos, end); // NOTE: used to be (b, src, bgn, end) which would never case correctly; DATE:2013-12-25; TS: DATE:2016-07-06
|
||||
if (o != null && pos < end) { // pos < end used for casing 1st letter only; upper_1st will pass end of 1
|
||||
Xol_case_itm itm = (Xol_case_itm)o;
|
||||
if (upper)
|
||||
itm.Case_reuse_upper(src, pos, b_len);
|
||||
else
|
||||
itm.Case_reuse_lower(src, pos, b_len);
|
||||
}
|
||||
else {} // noop
|
||||
pos += b_len;
|
||||
}
|
||||
return src;
|
||||
}
|
||||
public byte[] Case_reuse_1st_upper(byte[] src) { // NOTE: optimized version called by Frame_ttl; DATE:2014-06-21
|
||||
int src_len = src.length;
|
||||
if (src_len == 0) return src; // empty bry
|
||||
byte b = src[0];
|
||||
int b_len = gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
|
||||
Btrie_rv trv = new Btrie_rv(); // TS.MEM: DATE:2016-07-12
|
||||
Object o = upper_trie.Match_at_w_b0(trv, b, src, 0, b_len);
|
||||
if (o == null) return src; // 1st letter is not a lower case char (either num, symbol, or upper)
|
||||
Xol_case_itm itm = (Xol_case_itm)o;
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.New(); // TS.MEM: DATE:2016-07-12
|
||||
itm.Case_build_upper(tmp_bfr);
|
||||
tmp_bfr.Add_mid(src, trv.Pos(), src_len);
|
||||
return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
public byte[] Case_build_upper(byte[] src) {return Case_build_upper(src, 0, src.length);}
|
||||
public byte[] Case_build_upper(byte[] src, int bgn, int end) {return Case_build(Bool_.Y, src, bgn, end);}
|
||||
public byte[] Case_build_lower(byte[] src) {return Case_build_lower(src, 0, src.length);}
|
||||
public byte[] Case_build_lower(byte[] src, int bgn, int end) {return Case_build(Bool_.N, src, bgn, end);}
|
||||
public byte[] Case_build(boolean upper, byte[] src, int bgn, int end) {
|
||||
Btrie_fast_mgr trie = upper ? upper_trie : lower_trie;
|
||||
Btrie_rv trv = new Btrie_rv(); // TS.MEM: DATE:2016-07-12
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.New(); // TS.MEM: DATE:2016-07-12
|
||||
int pos = bgn;
|
||||
while (true) {
|
||||
if (pos >= end) break;
|
||||
byte b = src[pos];
|
||||
int b_len = gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, pos, end); // NOTE: used to be (b, src, bgn, end) which would never case correctly; DATE:2013-12-25;
|
||||
if (o != null && pos < end) { // pos < end used for casing 1st letter only; upper_1st will pass end of 1
|
||||
Xol_case_itm itm = (Xol_case_itm)o;
|
||||
if (upper)
|
||||
itm.Case_build_upper(tmp_bfr);
|
||||
else
|
||||
itm.Case_build_lower(tmp_bfr);
|
||||
}
|
||||
else {
|
||||
tmp_bfr.Add_mid(src, pos, pos + b_len);
|
||||
}
|
||||
pos += b_len;
|
||||
}
|
||||
return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
public byte[] Case_build_1st_upper(Bry_bfr bfr, byte[] src, int bgn, int end) {return Case_build_1st(bfr, Bool_.Y, src, bgn, end);}
|
||||
public byte[] Case_build_1st_lower(Bry_bfr bfr, byte[] src, int bgn, int end) {return Case_build_1st(bfr, Bool_.N, src, bgn, end);}
|
||||
public byte[] Case_build_1st(Bry_bfr bfr, boolean upper, byte[] src, int bgn, int end) {
|
||||
if (bgn == end) return Bry_.Empty; // upper "" -> ""
|
||||
int b_len = gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(src[bgn]);
|
||||
bfr.Add(Case_build(upper, src, bgn, bgn + b_len));
|
||||
bfr.Add_mid(src, bgn + b_len, end);
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_add_bulk)) Add_bulk(m.ReadBry("v"));
|
||||
else if (ctx.Match(k, Invk_clear)) throw Err_.new_unimplemented();
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
} private static final String Invk_clear = "clear", Invk_add_bulk = "add_bulk";
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -13,144 +13,3 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.langs.cases; import gplx.*; import gplx.xowa.*; import gplx.xowa.langs.*;
|
||||
import org.junit.*; import gplx.core.strings.*;
|
||||
public class Xol_case_mgr_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xol_case_mgr_fxt fxt = new Xol_case_mgr_fxt();
|
||||
@Test public void Mw_parse() {
|
||||
fxt.parse_mw__tst(fxt.itm_both_("A", "a"), fxt.itm_both_("B", "b"));
|
||||
}
|
||||
@Test public void Xo_parse() {
|
||||
fxt.parse_xo__tst(fxt.Init_ltrs_raw(), fxt.itm_both_("a", "A"), fxt.itm_upper_("b", "B"), fxt.itm_lower_("C", "c"));
|
||||
}
|
||||
@Test public void Upper_a() {fxt.Init_ltrs().Upper("aAaz", "AAAz");}
|
||||
@Test public void Upper_ab() {fxt.Init_ltrs().Upper("abac", "ABAc");}
|
||||
@Test public void Lower_a() {fxt.Init_ltrs().Lower("aAaZ", "aaaZ");}
|
||||
@Test public void Lower_ac() {fxt.Init_ltrs().Lower("ABAC", "aBac");}
|
||||
@Test public void Upper_1st() {
|
||||
fxt.Init_ltrs_universal();
|
||||
fxt.Test_reuse_1st_upper("a", "A");
|
||||
fxt.Test_reuse_1st_upper("abc", "Abc");
|
||||
fxt.Test_reuse_1st_upper("");
|
||||
fxt.Test_reuse_1st_upper("Abc");
|
||||
fxt.Test_reuse_1st_upper("é", "É");
|
||||
fxt.Test_reuse_1st_upper("É");
|
||||
fxt.Lower("Ι", "ι"); // PURPOSE:test reversal; PAGE:en.d:ἀρχιερεύς DATE:2014-09-02
|
||||
}
|
||||
@Test public void Turkish_redirect() { // PURPOSE: lowercase redirect should match uppercase for asymmetric brys; PAGE:tr.w:Zvishavane DATE:2015-09-07
|
||||
Hash_adp_bry hash = Hash_adp_bry.c__u8(Bool_.N, Xol_case_mgr_.U8());
|
||||
byte[] upper = Bry_.new_u8("YÖNLENDİRME");
|
||||
byte[] lower = Bry_.new_u8("yönlendirme");
|
||||
hash.Add(upper, upper); // add upper to hash
|
||||
Tfds.Eq_bry(upper, (byte[])hash.Get_by_bry(lower)); // get upper by using lower
|
||||
}
|
||||
// @Test public void Hack() {
|
||||
// Xol_case_itm[] ary = Xol_case_mgr_.Utf_8;
|
||||
// Bry_bfr bfr = Bry_bfr_.New();
|
||||
// for (int i = 0; i < ary.length; i++) {
|
||||
// Xol_case_itm itm = ary[i];
|
||||
// bfr.Add_str_a7("xo|");
|
||||
// bfr.Add_bry_comma(itm.Src_ary()).Add_byte_pipe();
|
||||
// bfr.Add_bry_comma(itm.Trg_ary()).Add_byte_nl();
|
||||
// }
|
||||
// Io_mgr.Instance.SaveFilStr("C:\\test1.txt", bfr.To_str_and_clear());
|
||||
// }
|
||||
}
|
||||
class Xol_case_mgr_fxt {
|
||||
private Xol_case_mgr case_mgr = Xol_case_mgr_.new_(); private String_bldr sb = String_bldr_.new_();
|
||||
public void Clear() {case_mgr.Clear();}
|
||||
public Xol_case_itm_bry itm_both_(String src, String trg) {return new Xol_case_itm_bry(Xol_case_itm_.Tid_both , Bry_.new_u8(src), Bry_.new_u8(trg));}
|
||||
public Xol_case_itm_bry itm_upper_(String src, String trg) {return new Xol_case_itm_bry(Xol_case_itm_.Tid_upper, Bry_.new_u8(src), Bry_.new_u8(trg));}
|
||||
public Xol_case_itm_bry itm_lower_(String src, String trg) {return new Xol_case_itm_bry(Xol_case_itm_.Tid_lower, Bry_.new_u8(src), Bry_.new_u8(trg));}
|
||||
public String Init_ltrs_raw() {
|
||||
return String_.Concat_lines_nl
|
||||
( "0|a|A"
|
||||
, "1|b|B"
|
||||
, "2|C|c"
|
||||
);
|
||||
}
|
||||
public Xol_case_mgr_fxt Init_ltrs() {
|
||||
case_mgr = Xol_case_mgr_.new_();
|
||||
case_mgr.Add_bulk(Bry_.new_u8(Init_ltrs_raw()));
|
||||
return this;
|
||||
}
|
||||
public Xol_case_mgr_fxt Init_ltrs_universal() {
|
||||
case_mgr = Xol_case_mgr_.U8();
|
||||
return this;
|
||||
}
|
||||
public Xol_case_mgr_fxt Upper(String raw_str, String expd) {return Case_build(Bool_.Y, raw_str, expd);}
|
||||
public Xol_case_mgr_fxt Lower(String raw_str, String expd) {return Case_build(Bool_.N, raw_str, expd);}
|
||||
public Xol_case_mgr_fxt Case_build(boolean upper, String raw_str, String expd) {
|
||||
byte[] raw = Bry_.new_u8(raw_str);
|
||||
byte[] actl = case_mgr.Case_build(upper, raw, 0, raw.length);
|
||||
Tfds.Eq(expd, String_.new_u8(actl));
|
||||
return this;
|
||||
}
|
||||
public void parse_xo__tst(String raw, Xol_case_itm_bry... expd) {
|
||||
Tfds.Eq_str_lines(Xto_str(expd), Xto_str(Xol_case_itm_.parse_xo_(Bry_.new_u8(raw))));
|
||||
}
|
||||
public void parse_mw__tst(Xol_case_itm_bry... expd) {
|
||||
String raw = raw_(expd);
|
||||
Xol_case_itm[] actl = Xol_case_itm_.parse_mw_(Bry_.new_u8(raw));
|
||||
Tfds.Eq_str_lines(Xto_str(expd), Xto_str(actl));
|
||||
}
|
||||
public String Xto_str(Xol_case_itm[] ary) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
Xol_case_itm itm = ary[i];
|
||||
sb.Add(Byte_.To_str(itm.Tid())).Add_char_pipe().Add(String_.new_u8(itm.Src_ary())).Add_char_pipe().Add(String_.new_u8(itm.Trg_ary())).Add_char_nl();
|
||||
}
|
||||
return sb.To_str_and_clear();
|
||||
}
|
||||
public String raw_(Xol_case_itm_bry[] itms) {
|
||||
int itms_len = itms.length;
|
||||
uppers_list.Clear(); lowers_list.Clear();
|
||||
for (int i = 0; i < itms_len; i++) {
|
||||
Xol_case_itm_bry itm = itms[i];
|
||||
String src = String_.new_u8(itm.Src_ary());
|
||||
String trg = String_.new_u8(itm.Trg_ary());
|
||||
switch (itm.Tid()) {
|
||||
case Xol_case_itm_.Tid_both:
|
||||
uppers_list.Add(trg); uppers_list.Add(src);
|
||||
lowers_list.Add(src); lowers_list.Add(trg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return raw_str_(uppers_list.To_str_ary(), lowers_list.To_str_ary());
|
||||
} List_adp uppers_list = List_adp_.New(), lowers_list = List_adp_.New();
|
||||
String raw_str_(String[] uppers, String[] lowers) {
|
||||
sb.Add("a:2:{s:14:\"wikiUpperChars\";a:1046:{");
|
||||
raw_ary(sb, uppers);
|
||||
sb.Add("}");
|
||||
sb.Add("s:14:\"wikiLowerChars\";a:1038:{");
|
||||
raw_ary(sb, lowers);
|
||||
sb.Add("}}");
|
||||
return sb.To_str_and_clear();
|
||||
}
|
||||
private void raw_ary(String_bldr sb, String[] ary) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
String itm = ary[i];
|
||||
int itm_len = String_.Len(itm);
|
||||
sb.Add_fmt("s:{0}:\"{1}\";", itm_len, itm);
|
||||
}
|
||||
}
|
||||
public void Test_reuse_1st_upper(String raw) {Test_reuse_1st_upper(raw, null, Bool_.Y);}
|
||||
public void Test_reuse_1st_upper(String raw, String expd) {Test_reuse_1st_upper(raw, expd, Bool_.N);}
|
||||
private void Test_reuse_1st_upper(String raw, String expd, boolean expd_is_same) {
|
||||
byte[] raw_bry = Bry_.new_u8(raw);
|
||||
byte[] actl_bry = case_mgr.Case_reuse_1st_upper(raw_bry);
|
||||
String actl_str = String_.new_u8(actl_bry);
|
||||
boolean actl_is_same = Object_.Eq(raw_bry, actl_bry); // pointers will be same if no change
|
||||
if (expd_is_same) {
|
||||
Tfds.Eq_true(actl_is_same, "expd should be same: " + actl_str);
|
||||
}
|
||||
else {
|
||||
Tfds.Eq_true(!actl_is_same, "expd should not be same: " + actl_str);
|
||||
Tfds.Eq(expd, actl_str, expd);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
a:2:{s:14:"wikiUpperChars";a:1046:{s:1:"a";s:1:"A";s:1:"b";}s:14:"wikiLowerChars";a:1038:{s:1:"A";s:1:"a";s:1:"B";}}
|
||||
*/
|
||||
Reference in New Issue
Block a user