1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-09-20 23:43:51 -04:00
parent 5fe27b5b3b
commit fa70c05354
1056 changed files with 8375 additions and 7095 deletions

View File

@@ -36,6 +36,7 @@ public class Err_ {
public static Err new_parse_exc(Exception e, Class<?> c, String raw) {return new_parse(Type_adp_.FullNameOf_type(c), raw).Args_add("e", Err_.Message_lang(e));}
public static Err new_parse(String type, String raw) {return new Err(Bool_.Y, Trace_null, Type__gplx, "parse failed", "type", type, "raw", raw);}
public static Err new_null() {return new Err(Bool_.Y, Trace_null, Type__gplx, "null obj");}
public static Err new_null(String arg) {return new Err(Bool_.Y, Trace_null, Type__gplx, "null obj", "arg", arg);}
public static Err new_missing_idx(int idx, int len) {return new Err(Bool_.Y, Trace_null, Type__gplx, "index is out of bounds", "idx", idx, "len", len);}
public static Err new_missing_key(String key) {return new Err(Bool_.Y, Trace_null, Type__gplx, "key not found", "key", key);}
public static Err new_invalid_op(String msg) {return new Err(Bool_.Y, Trace_null, Type__gplx, msg);}

View File

@@ -29,14 +29,14 @@ public class Bry_rdr {
public void Pos_add_one() {++pos;}
public int Or_int() {return or_int;} public void Or_int_(int v) {or_int = v;} private int or_int = Int_.Min_value;
public byte[] Or_bry() {return or_bry;} public void Or_bry_(byte[] v) {or_bry = v;} private byte[] or_bry;
public int Find_fwd(byte find) {return Bry_finder.Find_fwd(src, find, pos);}
public int Find_fwd_ws() {return Bry_finder.Find_fwd_until_ws(src, pos, src_len);}
public int Find_fwd(byte find) {return Bry_find_.Find_fwd(src, find, pos);}
public int Find_fwd_ws() {return Bry_find_.Find_fwd_until_ws(src, pos, src_len);}
public int Find_fwd__pos_at_lhs(byte[] find_bry) {return Find_fwd__pos_at(find_bry, Bool_.N);}
public int Find_fwd__pos_at_rhs(byte[] find_bry) {return Find_fwd__pos_at(find_bry, Bool_.Y);}
public int Find_fwd__pos_at(byte[] find_bry, boolean pos_at_rhs) {
int find_pos = Bry_finder.Find_fwd(src, find_bry, pos, src_len);
int find_pos = Bry_find_.Find_fwd(src, find_bry, pos, src_len);
if (pos_at_rhs) find_pos += find_bry.length;
if (find_pos != Bry_finder.Not_found) pos = find_pos;
if (find_pos != Bry_find_.Not_found) pos = find_pos;
return find_pos;
}
public int Read_int_to_semic() {return Read_int_to(Byte_ascii.Semic);}

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
import gplx.intl.*;
import gplx.core.intls.*;
class Btrie_u8_itm {
private Hash_adp_bry nxts;
private byte[] asymmetric_bry;
@@ -40,8 +40,8 @@ class Btrie_u8_itm {
else { // itm has asymmetric_bry; EX: "İ" was added to trie, must match "İ" and "i";
if (called_by_match) { // called by mgr.Match
return
( Bry_.Eq(rv.key, src, c_bgn, c_end) // key matches src; EX: "aİ"
|| Bry_.Eq(rv.asymmetric_bry, src, c_bgn, c_end) // asymmetric_bry matches src; EX: "ai"; note that "aI" won't match
( Bry_.Eq(src, c_bgn, c_end, rv.key) // key matches src; EX: "aİ"
|| Bry_.Eq(src, c_bgn, c_end, rv.asymmetric_bry) // asymmetric_bry matches src; EX: "ai"; note that "aI" won't match
)
? rv : null;
}

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
import gplx.intl.*;
import gplx.core.intls.*;
public class Btrie_u8_mgr implements Btrie_mgr {
private Btrie_u8_itm root; private Gfo_case_mgr case_mgr;
Btrie_u8_mgr(Gfo_case_mgr case_mgr) {

View File

@@ -0,0 +1,24 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.intls; import gplx.*; import gplx.core.*;
public interface Gfo_case_itm {
int Hashcode_lo();
int Len_lo();
byte[] Asymmetric_bry();
int Utf8_id_lo(); // lower-case byte or byte[] as single utf8 int
}

View File

@@ -0,0 +1,22 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.intls; import gplx.*; import gplx.core.*;
public interface Gfo_case_mgr {
byte Tid();
Gfo_case_itm Get_or_null(byte bgn_byte, byte[] src, int bgn, int end);
}

View File

@@ -0,0 +1,21 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.intls; import gplx.*; import gplx.core.*;
public class Gfo_case_mgr_ {
public static final byte Tid_a7 = 0, Tid_u8 = 1, Tid_custom = 2;
}

View File

@@ -0,0 +1,137 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.intls; import gplx.*; import gplx.core.*;
import gplx.core.primitives.*;
public class Utf16_ {
public static int Surrogate_merge(int hi, int lo) { // REF: http://perldoc.perl.org/Encode/Unicode.html
return 0x10000 + (hi - 0xD800) * 0x400 + (lo - 0xDC00);
}
public static void Surrogate_split(int v, Int_obj_ref hi, Int_obj_ref lo) {
hi.Val_((v - 0x10000) / 0x400 + 0xD800);
lo.Val_((v - 0x10000) % 0x400 + 0xDC00);
}
public static int Decode_to_int(byte[] ary, int pos) {
byte b0 = ary[pos];
if ((b0 & 0x80) == 0) {
return b0;
}
else if ((b0 & 0xE0) == 0xC0) {
return ( b0 & 0x1f) << 6
| ( ary[pos + 1] & 0x3f)
;
}
else if ((b0 & 0xF0) == 0xE0) {
return ( b0 & 0x0f) << 12
| ((ary[pos + 1] & 0x3f) << 6)
| ( ary[pos + 2] & 0x3f)
;
}
else if ((b0 & 0xF8) == 0xF0) {
return ( b0 & 0x07) << 18
| ((ary[pos + 1] & 0x3f) << 12)
| ((ary[pos + 2] & 0x3f) << 6)
| ( ary[pos + 3] & 0x3f)
;
}
else throw Err_.new_wo_type("invalid utf8 byte", "byte", b0);
}
public static byte[] Encode_hex_to_bry(String raw) {return Encode_hex_to_bry(Bry_.new_a7(raw));}
public static byte[] Encode_hex_to_bry(byte[] raw) {
if (raw == null) return null;
int int_val = gplx.texts.HexDecUtl.parse_or(raw, Int_.Min_value);
return int_val == Int_.Min_value ? null : Encode_int_to_bry(int_val);
}
public static byte[] Encode_int_to_bry(int c) {
int bry_len = Len_by_int(c);
byte[] bry = new byte[bry_len];
Encode_int(c, bry, 0);
return bry;
}
public static int Encode_char(int c, char[] c_ary, int c_pos, byte[] b_ary, int b_pos) {
if ((c > -1)
&& (c < 128)) {
b_ary[ b_pos] = (byte)c;
return 1;
}
else if (c < 2048) {
b_ary[ b_pos] = (byte)(0xC0 | (c >> 6));
b_ary[++b_pos] = (byte)(0x80 | (c & 0x3F));
return 1;
}
else if((c > 55295) // 0xD800
&& (c < 56320)) { // 0xDFFF
if (c_pos >= c_ary.length) throw Err_.new_wo_type("incomplete surrogate pair at end of String", "char", c);
char nxt_char = c_ary[c_pos + 1];
int v = Surrogate_merge(c, nxt_char);
b_ary[b_pos] = (byte)(0xF0 | (v >> 18));
b_ary[++b_pos] = (byte)(0x80 | (v >> 12) & 0x3F);
b_ary[++b_pos] = (byte)(0x80 | (v >> 6) & 0x3F);
b_ary[++b_pos] = (byte)(0x80 | (v & 0x3F));
return 2;
}
else {
b_ary[b_pos] = (byte)(0xE0 | (c >> 12));
b_ary[++b_pos] = (byte)(0x80 | (c >> 6) & 0x3F);
b_ary[++b_pos] = (byte)(0x80 | (c & 0x3F));
return 1;
}
}
public static int Encode_int(int c, byte[] src, int pos) {
if ((c > -1)
&& (c < 128)) {
src[ pos] = (byte)c;
return 1;
}
else if (c < 2048) {
src[ pos] = (byte)(0xC0 | (c >> 6));
src[++pos] = (byte)(0x80 | (c & 0x3F));
return 2;
}
else if (c < 65536) {
src[pos] = (byte)(0xE0 | (c >> 12));
src[++pos] = (byte)(0x80 | (c >> 6) & 0x3F);
src[++pos] = (byte)(0x80 | (c & 0x3F));
return 3;
}
else if (c < 2097152) {
src[pos] = (byte)(0xF0 | (c >> 18));
src[++pos] = (byte)(0x80 | (c >> 12) & 0x3F);
src[++pos] = (byte)(0x80 | (c >> 6) & 0x3F);
src[++pos] = (byte)(0x80 | (c & 0x3F));
return 4;
}
else throw Err_.new_wo_type("UTF-16 int must be between 0 and 2097152", "char", c);
}
private static int Len_by_int(int c) {
if ((c > -1)
&& (c < 128)) return 1; // 1 << 7
else if (c < 2048) return 2; // 1 << 11
else if (c < 65536) return 3; // 1 << 16
else if (c < 2097152) return 4;
else throw Err_.new_wo_type("UTF-16 int must be between 0 and 2097152", "char", c);
}
public static int Len_by_char(int c) {
if ((c > -1)
&& (c < 128)) return 1; // 1 << 7
else if (c < 2048) return 2; // 1 << 11
else if((c > 55295) // 0xD800
&& (c < 56320)) return 4; // 0xDFFF
else if (c < 65536) return 3; // 1 << 16
else throw Err_.new_wo_type("UTF-16 int must be between 0 and 65536", "char", c);
}
}

View File

@@ -0,0 +1,59 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.intls; import gplx.*; import gplx.core.*;
import org.junit.*; import gplx.core.primitives.*;
public class Utf16__tst {
private Utf16__fxt fxt = new Utf16__fxt();
@Test public void Encode_decode() {
// fxt.Test_encode_decode(162, 194, 162); // cent
// fxt.Test_encode_decode(8364, 226, 130, 172); // euro
fxt.Test_encode_decode(150370, 240, 164, 173, 162); // example from [[UTF-8]]; should be encoded as two bytes
}
@Test public void Encode_as_bry_by_hex() {
fxt.Test_Encode_hex_to_bry("00", 0);
fxt.Test_Encode_hex_to_bry("41", 65);
fxt.Test_Encode_hex_to_bry("0041", 65);
fxt.Test_Encode_hex_to_bry("00C0", 195, 128);
}
@Test public void Surrogate() {
fxt.Test_surrogate(0x64321, 0xD950, 0xDF21); // example from w:UTF-16
fxt.Test_surrogate(66643, 55297, 56403); // example from d:Boomerang
}
}
class Utf16__fxt {
private Int_obj_ref hi_ref = Int_obj_ref.neg1_(), lo_ref = Int_obj_ref.neg1_();
public void Test_encode_decode(int expd_c_int, int... expd_int) {
byte[] expd = Bry_.new_ints(expd_int);
byte[] bfr = new byte[10];
int bfr_len = Utf16_.Encode_int(expd_c_int, bfr, 0);
byte[] actl = Bry_.Mid_by_len(bfr, 0, bfr_len);
Tfds.Eq_ary(expd, actl);
int actl_c_int = Utf16_.Decode_to_int(bfr, 0);
Tfds.Eq(expd_c_int, actl_c_int);
}
public void Test_surrogate(int v, int hi, int lo) {
Tfds.Eq(v, Utf16_.Surrogate_merge((char)hi, (char)lo));
Utf16_.Surrogate_split(v, hi_ref, lo_ref);
Tfds.Eq(hi, hi_ref.Val());
Tfds.Eq(lo, lo_ref.Val());
}
public void Test_Encode_hex_to_bry(String raw, int... expd) {
byte[] actl = Utf16_.Encode_hex_to_bry(raw);
Tfds.Eq_ary(Byte_.Ary_by_ints(expd), actl);
}
}

View File

@@ -0,0 +1,117 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.intls; import gplx.*; import gplx.core.*;
public class Utf8_ {
public static int Len_of_bry(byte[] ary) {
if (ary == null) return 0;
int rv = 0;
int pos = 0, len = ary.length;
while (pos < len) {
int char_len = Len_of_char_by_1st_byte(ary[pos]);
++rv;
pos += char_len;
}
return rv;
}
public static int Len_of_char_by_1st_byte(byte b) {// SEE:w:UTF-8
int i = b & 0xff; // PATCH.JAVA:need to convert to unsigned byte
switch (i) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24: case 25: case 26: case 27: case 28: case 29: case 30: case 31:
case 32: case 33: case 34: case 35: case 36: case 37: case 38: case 39: case 40: case 41: case 42: case 43: case 44: case 45: case 46: case 47:
case 48: case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: case 58: case 59: case 60: case 61: case 62: case 63:
case 64: case 65: case 66: case 67: case 68: case 69: case 70: case 71: case 72: case 73: case 74: case 75: case 76: case 77: case 78: case 79:
case 80: case 81: case 82: case 83: case 84: case 85: case 86: case 87: case 88: case 89: case 90: case 91: case 92: case 93: case 94: case 95:
case 96: case 97: case 98: case 99: case 100: case 101: case 102: case 103: case 104: case 105: case 106: case 107: case 108: case 109: case 110: case 111:
case 112: case 113: case 114: case 115: case 116: case 117: case 118: case 119: case 120: case 121: case 122: case 123: case 124: case 125: case 126: case 127:
case 128: case 129: case 130: case 131: case 132: case 133: case 134: case 135: case 136: case 137: case 138: case 139: case 140: case 141: case 142: case 143:
case 144: case 145: case 146: case 147: case 148: case 149: case 150: case 151: case 152: case 153: case 154: case 155: case 156: case 157: case 158: case 159:
case 160: case 161: case 162: case 163: case 164: case 165: case 166: case 167: case 168: case 169: case 170: case 171: case 172: case 173: case 174: case 175:
case 176: case 177: case 178: case 179: case 180: case 181: case 182: case 183: case 184: case 185: case 186: case 187: case 188: case 189: case 190: case 191:
return 1;
case 192: case 193: case 194: case 195: case 196: case 197: case 198: case 199: case 200: case 201: case 202: case 203: case 204: case 205: case 206: case 207:
case 208: case 209: case 210: case 211: case 212: case 213: case 214: case 215: case 216: case 217: case 218: case 219: case 220: case 221: case 222: case 223:
return 2;
case 224: case 225: case 226: case 227: case 228: case 229: case 230: case 231: case 232: case 233: case 234: case 235: case 236: case 237: case 238: case 239:
return 3;
case 240: case 241: case 242: case 243: case 244: case 245: case 246: case 247:
return 4;
default: throw Err_.new_wo_type("invalid initial utf8 byte", "byte", b);
}
}
public static byte[] Get_char_at_pos_as_bry(byte[] bry, int pos) {
int len = Len_of_char_by_1st_byte(bry[pos]);
return Bry_.Mid(bry, pos, pos + len);
}
public static byte[] Increment_char_at_last_pos(byte[] bry) { // EX: abc -> abd; complexity is for multi-byte chars
int bry_len = bry.length; if (bry_len == 0) return bry;
int pos = bry_len - 1;
while (true) { // loop bwds
int cur_char_pos0 = Get_pos0_of_char_bwd(bry, pos); // get byte0 of char
int cur_char_len = (pos - cur_char_pos0) + 1; // calc len of char
int nxt_char = Codepoint_max;
if (cur_char_len == 1) { // len=1; just change 1 byte
nxt_char = Increment_char(bry[cur_char_pos0]); // get next char
if (nxt_char < 128) { // single-byte char; just change pos
bry = Bry_.Copy(bry); // always return new bry; never reuse existing
bry[cur_char_pos0] = (byte)nxt_char;
return bry;
}
}
int cur_char = Utf16_.Decode_to_int(bry, cur_char_pos0);
nxt_char = Increment_char(cur_char);
if (nxt_char != Int_.Min_value) {
byte[] nxt_char_as_bry = Utf16_.Encode_int_to_bry(nxt_char);
bry = Bry_.Add(Bry_.Mid(bry, 0, cur_char_pos0), nxt_char_as_bry);
return bry;
}
pos = cur_char_pos0 - 1;
if (pos < 0) return null;
}
}
public static int Get_pos0_of_char_bwd(byte[] bry, int pos) { // find pos0 of char while moving bwd through bry; see test
int stop = pos - 4; // UTF8 char has max of 4 bytes
if (stop < 0) stop = 0; // if at pos 0 - 3, stop at 0
for (int i = pos - 1; i >= stop; i--) { // start at pos - 1, and move bwd; NOTE: pos - 1 to skip pos, b/c pos will never definitively yield any char_len info
byte b = bry[i];
int char_len = Len_of_char_by_1st_byte(b);
switch (char_len) { // if char_len is multi-byte and pos is at correct multi-byte pos (pos - i = # of bytes - 1), then pos0 found; EX: <20> = {226,130,172}; 172 is skipped; 130 has len of 1 -> continue; 226 has len of 3 and is found at correct pos for 3 byte char -> return
case 2: if (pos - i == 1) return i; break;
case 3: if (pos - i == 2) return i; break;
case 4: if (pos - i == 3) return i; break;
}
}
return pos; // no mult-byte char found; return pos
}
@gplx.Internal protected static int Increment_char(int cur) {
while (cur++ < Codepoint_max) {
if (cur == Codepoint_surrogate_bgn) cur = Codepoint_surrogate_end + 1; // skip over surrogate range
if (!Codepoint_valid(cur)) continue;
return cur;
}
return Int_.Min_value;
}
private static boolean Codepoint_valid(int v) {
return Character.isDefined(v);
}
public static final int
Codepoint_max = 0x10FFFF //see http://unicode.org/glossary/
, Codepoint_surrogate_bgn = 0xD800
, Codepoint_surrogate_end = 0xDFFF
;
}

View File

@@ -0,0 +1,69 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.intls; import gplx.*; import gplx.core.*;
import org.junit.*;
public class Utf8__tst {
private Utf8__fxt fxt = new Utf8__fxt();
@Test public void Get_pos0_of_char_bwd() {
fxt.Test_Get_pos0_of_char_bwd("abcd", 3); // len=1; (note that bry.len = 4)
fxt.Test_Get_pos0_of_char_bwd("a", 0); // len=1; short-String
fxt.Test_Get_pos0_of_char_bwd("abc¢", 3); // len=2; (note that bry.len = 5)
fxt.Test_Get_pos0_of_char_bwd("abc€", 3); // len=3; (note that bry.len = 6)
fxt.Test_Get_pos0_of_char_bwd("abc" + String_.new_u8(Byte_.Ary_by_ints(240, 164, 173, 162)), 3); // len=4; (note that bry.len = 7)
}
@Test public void Increment_char_at_last_pos() {
fxt.Test_Increment_char_at_last_pos("a", "b");
fxt.Test_Increment_char_at_last_pos("abc", "abd");
fxt.Test_Increment_char_at_last_pos("É", "Ê"); // len=2
fxt.Test_Increment_char_at_last_pos("", ""); // len=3
}
// @Test public void Increment_char_at_last_pos_exhaustive_check() { // check all values; commented for perf
// Bry_bfr bfr = Bry_bfr.new_();
// int bgn = 32;
// while (true) {
// byte[] bgn_bry = Utf16_.Encode_int_to_bry(bgn);
// int end = Utf8_.Increment_char(bgn);
// if (end == Utf8_.Codepoint_max) break;
//// if (bgn > 1024 * 1024) break;
// byte[] end_by_codepoint_next = Utf16_.Encode_int_to_bry(end);
// byte[] end_by_increment_char = Utf8_.Increment_char_at_last_pos(bgn_bry);
// if (!Bry_.Eq(end_by_codepoint_next, end_by_increment_char)) {
// Tfds.Write(bgn);
// }
//// bfr .Add_int_variable(bgn).Add_byte(Byte_ascii.Tab)
//// .Add(bgn_bry).Add_byte(Byte_ascii.Tab)
//// .Add(end_by_codepoint_next).Add_byte(Byte_ascii.Tab)
//// .Add(end_by_increment_char).Add_byte(Byte_ascii.Tab)
//// .Add_byte_nl()
//// ;
// bgn = end;
// bgn_bry = end_by_codepoint_next;
// }
// Tfds.WriteText(bfr.Xto_str_and_clear());
// }
}
class Utf8__fxt {
public void Test_Get_pos0_of_char_bwd(String str, int expd) {
byte[] bry = Bry_.new_u8(str);
int pos = bry.length - 1; // always start from last char
Tfds.Eq(expd, Utf8_.Get_pos0_of_char_bwd(bry, pos));
}
public void Test_Increment_char_at_last_pos(String str, String expd) {
Tfds.Eq(expd, String_.new_u8(Utf8_.Increment_char_at_last_pos(Bry_.new_u8(str))));
}
}

View File

@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.regxs; import gplx.*; import gplx.core.*;
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Regx_adp {

View File

@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.regxs; import gplx.*; import gplx.core.*;
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
public class Regx_adp_ {
public static Regx_adp new_(String pattern) {return new Regx_adp(pattern);}
public static List_adp Find_all(String input, String find) {

View File

@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.regxs; import gplx.*; import gplx.core.*;
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Regx_adp__tst implements TfdsEqListItmStr {
@Test public void Match() {

View File

@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.regxs; import gplx.*; import gplx.core.*;
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
import gplx.core.strings.*;
public class Regx_bldr {
public static String Includes(String characters) {return String_.Concat_any(Regx_bldr.Tkn_CharSetBegin, characters, Regx_bldr.Tkn_CharSetEnd);}

View File

@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.regxs; import gplx.*; import gplx.core.*;
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
public class Regx_group {
public Regx_group(boolean rslt, int bgn, int end, String val) {this.rslt = rslt; this.bgn = bgn; this.end = end; this.val = val;}
public boolean Rslt() {return rslt;} private boolean rslt;

View File

@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.regxs; import gplx.*; import gplx.core.*;
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
public class Regx_match {
public Regx_match(boolean rslt, int find_bgn, int find_end, Regx_group[] groups) {this.rslt = rslt; this.find_bgn = find_bgn; this.find_end = find_end; this.groups = groups;}
public boolean Rslt() {return rslt;} private boolean rslt;