mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-09-29 23:10:52 +00:00
Scribunto: Use Luaj for pattern-matching (instead of Java Regex) [#413]
This commit is contained in:
parent
4a1b2e25c0
commit
f860edf064
@ -16,6 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|||||||
package gplx;
|
package gplx;
|
||||||
import gplx.core.strings.*; import gplx.langs.gfs.*;
|
import gplx.core.strings.*; import gplx.langs.gfs.*;
|
||||||
public class Int_ {
|
public class Int_ {
|
||||||
|
// -------- BASELIB_COPY --------
|
||||||
public static final String Cls_val_name = "int";
|
public static final String Cls_val_name = "int";
|
||||||
public static final Class<?> Cls_ref_type = Integer.class;
|
public static final Class<?> Cls_ref_type = Integer.class;
|
||||||
|
|
||||||
@ -37,6 +38,72 @@ public class Int_ {
|
|||||||
throw Err_.new_type_mismatch_w_exc(exc, int.class, obj);
|
throw Err_.new_type_mismatch_w_exc(exc, int.class, obj);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String To_str(int v) {return new Integer(v).toString();}
|
||||||
|
public static int Parse_or(String raw, int or) {
|
||||||
|
// process args
|
||||||
|
if (raw == null) return or;
|
||||||
|
int raw_len = String_.Len(raw);
|
||||||
|
if (raw_len == 0) return or;
|
||||||
|
|
||||||
|
// loop backwards from nth to 0th char
|
||||||
|
int rv = 0, power_of_10 = 1;
|
||||||
|
for (int idx = raw_len - 1; idx >= 0; idx--) {
|
||||||
|
char cur = String_.CharAt(raw, idx);
|
||||||
|
int digit = -1;
|
||||||
|
switch (cur) {
|
||||||
|
// numbers -> assign digit
|
||||||
|
case '0': digit = 0; break; case '1': digit = 1; break; case '2': digit = 2; break; case '3': digit = 3; break; case '4': digit = 4; break;
|
||||||
|
case '5': digit = 5; break; case '6': digit = 6; break; case '7': digit = 7; break; case '8': digit = 8; break; case '9': digit = 9; break;
|
||||||
|
|
||||||
|
// negative sign
|
||||||
|
case '-':
|
||||||
|
if (idx != 0) { // invalid if not 1st
|
||||||
|
return or;
|
||||||
|
}
|
||||||
|
else { // is first; multiply by -1
|
||||||
|
rv *= -1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// anything else
|
||||||
|
default:
|
||||||
|
return or;
|
||||||
|
}
|
||||||
|
rv += (digit * power_of_10);
|
||||||
|
power_of_10 *= 10;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int[] Log10Ary = new int[] {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, Int_.Max_value};
|
||||||
|
public static int Log10AryLen = 11;
|
||||||
|
public static int Log10(int v) {
|
||||||
|
if (v == 0) return 0;
|
||||||
|
int sign = 1;
|
||||||
|
if (v < 0) {
|
||||||
|
if (v == Int_.Min_value) return -9; // NOTE: Int_.Min_value * -1 = Int_.Min_value
|
||||||
|
v *= -1;
|
||||||
|
sign = -1;
|
||||||
|
}
|
||||||
|
int rv = Log10AryLen - 2; // rv will only happen when v == Int_.Max_value
|
||||||
|
int bgn = 0;
|
||||||
|
if (v > 1000) { // optimization to reduce number of ops to < 5
|
||||||
|
bgn = 3;
|
||||||
|
if (v > 1000000) bgn = 6;
|
||||||
|
}
|
||||||
|
for (int i = bgn; i < Log10AryLen; i++) {
|
||||||
|
if (v < Log10Ary[i]) {rv = i - 1; break;}
|
||||||
|
}
|
||||||
|
return rv * sign;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int DigitCount(int v) {
|
||||||
|
int log10 = Log10(v);
|
||||||
|
return v > -1 ? log10 + 1 : log10 * -1 + 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------- TO_MIGRATE --------
|
||||||
public static int Cast_or(Object obj, int or) {
|
public static int Cast_or(Object obj, int or) {
|
||||||
try {
|
try {
|
||||||
return (Integer)obj;
|
return (Integer)obj;
|
||||||
@ -55,23 +122,7 @@ public class Int_ {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static int Parse(String raw) {try {return Integer.parseInt(raw);} catch(Exception e) {throw Err_.new_parse_exc(e, int.class, raw);}}
|
public static int Parse(String raw) {try {return Integer.parseInt(raw);} catch(Exception e) {throw Err_.new_parse_exc(e, int.class, raw);}}
|
||||||
public static int Parse_or(String raw, int or) {
|
|
||||||
if (raw == null) return or;
|
|
||||||
int rawLen = String_.Len(raw); if (rawLen == 0) return or;
|
|
||||||
int rv = 0, tmp = 0, factor = 1;
|
|
||||||
for (int i = rawLen; i > 0; i--) {
|
|
||||||
char c = String_.CharAt(raw, i - 1);
|
|
||||||
switch (c) {
|
|
||||||
case '0': tmp = 0; break; case '1': tmp = 1; break; case '2': tmp = 2; break; case '3': tmp = 3; break; case '4': tmp = 4; break;
|
|
||||||
case '5': tmp = 5; break; case '6': tmp = 6; break; case '7': tmp = 7; break; case '8': tmp = 8; break; case '9': tmp = 9; break;
|
|
||||||
case '-': rv *= -1; continue; // NOTE: note continue
|
|
||||||
default: return or;
|
|
||||||
}
|
|
||||||
rv += (tmp * factor);
|
|
||||||
factor *= 10;
|
|
||||||
}
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int By_double(double v) {return (int)v;}
|
public static int By_double(double v) {return (int)v;}
|
||||||
public static int By_hex_bry(byte[] src) {return By_hex_bry(src, 0, src.length);}
|
public static int By_hex_bry(byte[] src) {return By_hex_bry(src, 0, src.length);}
|
||||||
@ -99,7 +150,6 @@ public class Int_ {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static byte[] To_bry(int v) {return Bry_.new_a7(To_str(v));}
|
public static byte[] To_bry(int v) {return Bry_.new_a7(To_str(v));}
|
||||||
public static String To_str(int v) {return new Integer(v).toString();}
|
|
||||||
public static String To_str_fmt(int v, String fmt) {return new java.text.DecimalFormat(fmt).format(v);}
|
public static String To_str_fmt(int v, String fmt) {return new java.text.DecimalFormat(fmt).format(v);}
|
||||||
public static String To_str_pad_bgn_space(int val, int reqd_len) {return To_str_pad(val, reqd_len, Bool_.Y, Byte_ascii.Space);} // EX: 1, 3 returns " 1"
|
public static String To_str_pad_bgn_space(int val, int reqd_len) {return To_str_pad(val, reqd_len, Bool_.Y, Byte_ascii.Space);} // EX: 1, 3 returns " 1"
|
||||||
public static String To_str_pad_bgn_zero (int val, int reqd_len) {return To_str_pad(val, reqd_len, Bool_.Y, Byte_ascii.Num_0);} // EX: 1, 3 returns "001"
|
public static String To_str_pad_bgn_zero (int val, int reqd_len) {return To_str_pad(val, reqd_len, Bool_.Y, Byte_ascii.Num_0);} // EX: 1, 3 returns "001"
|
||||||
@ -190,31 +240,4 @@ public class Int_ {
|
|||||||
float product = ((float)v * multiplier); // WORKAROUND (DotNet): (int)((float)v * multiplier) returns 0 for 100 and .01f
|
float product = ((float)v * multiplier); // WORKAROUND (DotNet): (int)((float)v * multiplier) returns 0 for 100 and .01f
|
||||||
return (int)product;
|
return (int)product;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int[] Log10Ary = new int[] {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, Int_.Max_value};
|
|
||||||
public static int Log10AryLen = 11;
|
|
||||||
public static int Log10(int v) {
|
|
||||||
if (v == 0) return 0;
|
|
||||||
int sign = 1;
|
|
||||||
if (v < 0) {
|
|
||||||
if (v == Int_.Min_value) return -9; // NOTE: Int_.Min_value * -1 = Int_.Min_value
|
|
||||||
v *= -1;
|
|
||||||
sign = -1;
|
|
||||||
}
|
|
||||||
int rv = Log10AryLen - 2; // rv will only happen when v == Int_.Max_value
|
|
||||||
int bgn = 0;
|
|
||||||
if (v > 1000) { // optimization to reduce number of ops to < 5
|
|
||||||
bgn = 3;
|
|
||||||
if (v > 1000000) bgn = 6;
|
|
||||||
}
|
|
||||||
for (int i = bgn; i < Log10AryLen; i++) {
|
|
||||||
if (v < Log10Ary[i]) {rv = i - 1; break;}
|
|
||||||
}
|
|
||||||
return rv * sign;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int DigitCount(int v) {
|
|
||||||
int log10 = Log10(v);
|
|
||||||
return v > -1 ? log10 + 1 : log10 * -1 + 2;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -25,72 +25,10 @@ public class Int__tst {
|
|||||||
tst_XtoStr_PadLeft_Zeroes(-123 , 3, "-123"); // negative
|
tst_XtoStr_PadLeft_Zeroes(-123 , 3, "-123"); // negative
|
||||||
tst_XtoStr_PadLeft_Zeroes(-1234 , 3, "-1234"); // negative
|
tst_XtoStr_PadLeft_Zeroes(-1234 , 3, "-1234"); // negative
|
||||||
} void tst_XtoStr_PadLeft_Zeroes(int val, int zeros, String expd) {Tfds.Eq(expd, Int_.To_str_pad_bgn_zero(val, zeros));}
|
} void tst_XtoStr_PadLeft_Zeroes(int val, int zeros, String expd) {Tfds.Eq(expd, Int_.To_str_pad_bgn_zero(val, zeros));}
|
||||||
@Test public void parseOr_() {
|
|
||||||
tst_ParseOr("", -1); // empty
|
|
||||||
tst_ParseOr("123", 123); // single
|
|
||||||
tst_ParseOr("1a", -1); // fail
|
|
||||||
} void tst_ParseOr(String raw, int expd) {Tfds.Eq(expd, Int_.Parse_or(raw, -1));}
|
|
||||||
@Test public void Between() {
|
|
||||||
tst_Between(1, 0, 2, true); // simple true
|
|
||||||
tst_Between(3, 0, 2, false); // simple false
|
|
||||||
tst_Between(0, 0, 2, true); // bgn true
|
|
||||||
tst_Between(2, 0, 2, true); // end true
|
|
||||||
} void tst_Between(int val, int lhs, int rhs, boolean expd) {Tfds.Eq(expd, Int_.Between(val, lhs, rhs));}
|
|
||||||
@Test public void Xto_fmt() {
|
@Test public void Xto_fmt() {
|
||||||
tst_XtoStr_fmt(1, "1");
|
tst_XtoStr_fmt(1, "1");
|
||||||
tst_XtoStr_fmt(1000, "1,000");
|
tst_XtoStr_fmt(1000, "1,000");
|
||||||
} void tst_XtoStr_fmt(int v, String expd) {Tfds.Eq(expd, Int_.To_str_fmt(v, "#,###"));}
|
} void tst_XtoStr_fmt(int v, String expd) {Tfds.Eq(expd, Int_.To_str_fmt(v, "#,###"));}
|
||||||
@Test public void Log10_pos() {
|
|
||||||
tst_Log10(0, 0);
|
|
||||||
tst_Log10(1, 0);
|
|
||||||
tst_Log10(9, 0);
|
|
||||||
tst_Log10(10, 1);
|
|
||||||
tst_Log10(100, 2);
|
|
||||||
tst_Log10(1000000, 6);
|
|
||||||
tst_Log10(1000000000, 9);
|
|
||||||
tst_Log10(Int_.Max_value, 9);
|
|
||||||
}
|
|
||||||
@Test public void Log10_neg() {
|
|
||||||
tst_Log10(-1, 0);
|
|
||||||
tst_Log10(-10, -1);
|
|
||||||
tst_Log10(-100, -2);
|
|
||||||
tst_Log10(-1000000, -6);
|
|
||||||
tst_Log10(-1000000000, -9);
|
|
||||||
tst_Log10(Int_.Min_value, -9);
|
|
||||||
tst_Log10(Int_.Min_value + 1, -9);
|
|
||||||
}
|
|
||||||
void tst_Log10(int val, int expd) {Tfds.Eq(expd, Int_.Log10(val));}
|
|
||||||
@Test public void DigitCount() {
|
|
||||||
tst_DigitCount(0, 1);
|
|
||||||
tst_DigitCount(9, 1);
|
|
||||||
tst_DigitCount(100, 3);
|
|
||||||
tst_DigitCount(-1, 2);
|
|
||||||
tst_DigitCount(-100, 4);
|
|
||||||
} void tst_DigitCount(int val, int expd) {Tfds.Eq(expd, Int_.DigitCount(val), Int_.To_str(val));}
|
|
||||||
@Test public void Log10() {
|
|
||||||
tst_Log10( 0, 0);
|
|
||||||
tst_Log10( 1, 0);
|
|
||||||
tst_Log10( 2, 0);
|
|
||||||
tst_Log10( 10, 1);
|
|
||||||
tst_Log10( 12, 1);
|
|
||||||
tst_Log10( 100, 2);
|
|
||||||
tst_Log10( 123, 2);
|
|
||||||
tst_Log10( 1000, 3);
|
|
||||||
tst_Log10( 1234, 3);
|
|
||||||
tst_Log10( 10000, 4);
|
|
||||||
tst_Log10( 12345, 4);
|
|
||||||
tst_Log10( 100000, 5);
|
|
||||||
tst_Log10( 123456, 5);
|
|
||||||
tst_Log10( 1000000, 6);
|
|
||||||
tst_Log10( 1234567, 6);
|
|
||||||
tst_Log10( 10000000, 7);
|
|
||||||
tst_Log10( 12345678, 7);
|
|
||||||
tst_Log10( 100000000, 8);
|
|
||||||
tst_Log10( 123456789, 8);
|
|
||||||
tst_Log10( 1000000000, 9);
|
|
||||||
tst_Log10( 1234567890, 9);
|
|
||||||
tst_Log10(Int_.Max_value, 9);
|
|
||||||
}
|
|
||||||
@Test public void Xto_int_hex_tst() {
|
@Test public void Xto_int_hex_tst() {
|
||||||
Xto_int_hex("007C", 124);
|
Xto_int_hex("007C", 124);
|
||||||
} void Xto_int_hex(String raw, int expd) {Tfds.Eq(expd, Int_.By_hex_bry(Bry_.new_a7(raw)));}
|
} void Xto_int_hex(String raw, int expd) {Tfds.Eq(expd, Int_.By_hex_bry(Bry_.new_a7(raw)));}
|
||||||
|
@ -17,8 +17,17 @@ package gplx;
|
|||||||
import java.lang.*;
|
import java.lang.*;
|
||||||
import gplx.core.strings.*; import gplx.langs.gfs.*; import gplx.core.envs.*;
|
import gplx.core.strings.*; import gplx.langs.gfs.*; import gplx.core.envs.*;
|
||||||
public class String_ {
|
public class String_ {
|
||||||
|
// -------- BASELIB_COPY --------
|
||||||
|
public static final Class<?> Cls_ref_type = String.class;
|
||||||
|
public static final String Cls_val_name = "str" + "ing";
|
||||||
|
public static final int Find_none = -1, Pos_neg1 = -1;
|
||||||
|
public static final String Empty = "", Null_mark = "<<NULL>>", Tab = "\t", Lf = "\n", CrLf = "\r\n";
|
||||||
|
|
||||||
|
public static boolean Eq(String lhs, String rhs) {return lhs == null ? rhs == null : lhs.equals(rhs);}
|
||||||
public static int Len(String s) {return s.length();}
|
public static int Len(String s) {return s.length();}
|
||||||
public static char CharAt(String s, int i) {return s.charAt(i);}
|
public static char CharAt(String s, int i) {return s.charAt(i);}
|
||||||
|
|
||||||
|
public static String new_u8(byte[] v) {return v == null ? null : new_u8(v, 0, v.length);}
|
||||||
public static String new_u8(byte[] v, int bgn, int end) {
|
public static String new_u8(byte[] v, int bgn, int end) {
|
||||||
try {
|
try {
|
||||||
return v == null
|
return v == null
|
||||||
@ -28,10 +37,62 @@ public class String_ {
|
|||||||
catch (Exception e) {Err_.Noop(e); throw Err_.new_("core", "unsupported encoding", "bgn", bgn, "end", end);}
|
catch (Exception e) {Err_.Noop(e); throw Err_.new_("core", "unsupported encoding", "bgn", bgn, "end", end);}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final Class<?> Cls_ref_type = String.class;
|
// use C# flavor ("a {0}") rather than Java format ("a %s"); also: (a) don't fail on format errors; (b) escape brackets by doubling
|
||||||
public static final String Cls_val_name = "str" + "ing";
|
private static final char FORMAT_ITM_LHS = '{', FORMAT_ITM_RHS = '}';
|
||||||
public static final int Find_none = -1, Pos_neg1 = -1;
|
public static String Format(String fmt, Object... args) {
|
||||||
public static final String Null = null, Empty = "", Null_mark = "<<NULL>>", Tab = "\t", Lf = "\n", CrLf = "\r\n";
|
// method vars
|
||||||
|
int args_len = Array_.Len_obj(args);
|
||||||
|
if (args_len == 0) return fmt; // nothing to format
|
||||||
|
int fmt_len = Len(fmt);
|
||||||
|
|
||||||
|
// loop vars
|
||||||
|
int pos = 0; String arg_idx_str = ""; boolean inside_brackets = false;
|
||||||
|
String_bldr bfr = String_bldr_.new_();
|
||||||
|
while (pos < fmt_len) { // loop over every char; NOTE: UT8-SAFE b/c only checking for "{"; "}"
|
||||||
|
char c = CharAt(fmt, pos);
|
||||||
|
if (inside_brackets) {
|
||||||
|
if (c == FORMAT_ITM_LHS) { // first FORMAT_ITM_LHS is fake; add FORMAT_ITM_LHS and whatever is in arg_idx_str
|
||||||
|
bfr.Add(FORMAT_ITM_LHS).Add(arg_idx_str);
|
||||||
|
arg_idx_str = "";
|
||||||
|
}
|
||||||
|
else if (c == FORMAT_ITM_RHS) { // itm completed
|
||||||
|
int args_idx = Int_.Parse_or(arg_idx_str, Int_.Min_value);
|
||||||
|
String itm = args_idx != Int_.Min_value && Int_.Between(args_idx, 0, args_len - 1) // check (a) args_idx is num; (b) args_idx is in bounds
|
||||||
|
? Object_.Xto_str_strict_or_empty(args[args_idx]) // valid; add itm
|
||||||
|
: String_.Concat_any(FORMAT_ITM_LHS, arg_idx_str, FORMAT_ITM_RHS); // not valid; just add String
|
||||||
|
bfr.Add(itm);
|
||||||
|
inside_brackets = false;
|
||||||
|
arg_idx_str = "";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
arg_idx_str += c;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (c == FORMAT_ITM_LHS || c == FORMAT_ITM_RHS) {
|
||||||
|
boolean pos_is_end = pos == fmt_len - 1;
|
||||||
|
if (pos_is_end) // last char is "{" or "}" (and not inside_brackets); ignore and just ad
|
||||||
|
bfr.Add(c);
|
||||||
|
else {
|
||||||
|
char next = CharAt(fmt, pos + 1);
|
||||||
|
if (next == c) { // "{{" or "}}": escape by doubling
|
||||||
|
bfr.Add(c);
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
inside_brackets = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
bfr.Add(c);
|
||||||
|
}
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
if (Len(arg_idx_str) > 0) // unclosed bracket; add FORMAT_ITM_LHS and whatever is in arg_idx_str; ex: "{0"
|
||||||
|
bfr.Add(FORMAT_ITM_LHS).Add(arg_idx_str);
|
||||||
|
return bfr.To_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------- TO_MIGRATE --------
|
||||||
public static String cast(Object v) {return (String)v;}
|
public static String cast(Object v) {return (String)v;}
|
||||||
public static String as_(Object obj) {return obj instanceof String ? (String)obj : null;}
|
public static String as_(Object obj) {return obj instanceof String ? (String)obj : null;}
|
||||||
public static String new_a7(byte[] v) {return v == null ? null : new_a7(v, 0, v.length);}
|
public static String new_a7(byte[] v) {return v == null ? null : new_a7(v, 0, v.length);}
|
||||||
@ -43,7 +104,6 @@ public class String_ {
|
|||||||
}
|
}
|
||||||
catch (Exception e) {throw Err_.new_exc(e, "core", "unsupported encoding");}
|
catch (Exception e) {throw Err_.new_exc(e, "core", "unsupported encoding");}
|
||||||
}
|
}
|
||||||
public static String new_u8(byte[] v) {return v == null ? null : new_u8(v, 0, v.length);}
|
|
||||||
public static String new_u8__by_len(byte[] v, int bgn, int len) {
|
public static String new_u8__by_len(byte[] v, int bgn, int len) {
|
||||||
int v_len = v.length;
|
int v_len = v.length;
|
||||||
if (bgn + len > v_len) len = v_len - bgn;
|
if (bgn + len > v_len) len = v_len - bgn;
|
||||||
@ -111,7 +171,6 @@ public class String_ {
|
|||||||
} while (true);
|
} while (true);
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
public static boolean Eq(String lhs, String rhs) {return lhs == null ? rhs == null : lhs.equals(rhs);}
|
|
||||||
public static boolean EqAny(String lhs, String... rhsAry) {
|
public static boolean EqAny(String lhs, String... rhsAry) {
|
||||||
for (int i = 0; i < rhsAry.length; i++)
|
for (int i = 0; i < rhsAry.length; i++)
|
||||||
if (Eq(lhs, rhsAry[i])) return true;
|
if (Eq(lhs, rhsAry[i])) return true;
|
||||||
@ -267,7 +326,6 @@ public class String_ {
|
|||||||
if (pos < 0 || pos >= String_.Len(s)) throw Err_.new_wo_type("String_.Insert failed; pos invalid", "pos", pos, "s", s, "toInsert", toInsert);
|
if (pos < 0 || pos >= String_.Len(s)) throw Err_.new_wo_type("String_.Insert failed; pos invalid", "pos", pos, "s", s, "toInsert", toInsert);
|
||||||
return s.substring(0, pos) + toInsert + s.substring(pos);
|
return s.substring(0, pos) + toInsert + s.substring(pos);
|
||||||
}
|
}
|
||||||
public static String Format(String fmt, Object... args) {return Format_do(fmt, args);}
|
|
||||||
public static String FormatOrEmptyStrIfNull(String fmt, Object arg) {return arg == null ? "" : Format(fmt, arg);}
|
public static String FormatOrEmptyStrIfNull(String fmt, Object arg) {return arg == null ? "" : Format(fmt, arg);}
|
||||||
public static String Concat(char... ary) {return new String(ary);}
|
public static String Concat(char... ary) {return new String(ary);}
|
||||||
public static String Concat(String s1, String s2, String s3) {return s1 + s2 + s3;}
|
public static String Concat(String s1, String s2, String s3) {return s1 + s2 + s3;}
|
||||||
@ -381,57 +439,6 @@ public class String_ {
|
|||||||
public static String[] SplitLines_any(String s) {return Split_do(s, Op_sys.Lnx.Nl_str(), true);}
|
public static String[] SplitLines_any(String s) {return Split_do(s, Op_sys.Lnx.Nl_str(), true);}
|
||||||
public static String[] Split_lang(String s, char c) {return s.split(Character.toString(c));}
|
public static String[] Split_lang(String s, char c) {return s.split(Character.toString(c));}
|
||||||
|
|
||||||
static String Format_do(String s, Object[] ary) {
|
|
||||||
int aryLength = Array_.Len_obj(ary); if (aryLength == 0) return s; // nothing to format
|
|
||||||
String_bldr sb = String_bldr_.new_();
|
|
||||||
char bracketBgn = '{', bracketEnd = '}';
|
|
||||||
String aryVal = null; char c, next;
|
|
||||||
int pos = 0; int textLength = Len(s); String numberStr = ""; boolean bracketsOn = false;
|
|
||||||
while (true) {
|
|
||||||
if (pos == textLength) break;
|
|
||||||
c = CharAt(s, pos);
|
|
||||||
if (bracketsOn) { // mode=bracketsOn
|
|
||||||
if (c == bracketBgn) { // first bracketBgn is fake; add bracketBgn and whatever is in numberStr
|
|
||||||
sb.Add(bracketBgn).Add(numberStr);
|
|
||||||
numberStr = "";
|
|
||||||
}
|
|
||||||
else if (c == bracketEnd) {
|
|
||||||
int aryIdx = Int_.Parse_or(numberStr, Int_.Min_value);
|
|
||||||
if (aryIdx != Int_.Min_value && Int_.Between(aryIdx, 0, aryLength - 1)) // check (a) aryIdx is num; (b) aryIdx is in bounds
|
|
||||||
aryVal = Object_.Xto_str_strict_or_empty(ary[aryIdx]);
|
|
||||||
else
|
|
||||||
aryVal = String_.Concat_any(bracketBgn, numberStr, bracketEnd); // not valid, just add String
|
|
||||||
sb.Add(aryVal);
|
|
||||||
bracketsOn = false;
|
|
||||||
numberStr = "";
|
|
||||||
}
|
|
||||||
else // char=anythingElse
|
|
||||||
numberStr += c;
|
|
||||||
}
|
|
||||||
else { // mode=bracketsOff
|
|
||||||
if (c == bracketBgn || c == bracketEnd) {
|
|
||||||
boolean isEnd = pos == textLength - 1;
|
|
||||||
if (isEnd)
|
|
||||||
sb.Add(c);
|
|
||||||
else {
|
|
||||||
next = CharAt(s, pos + 1);
|
|
||||||
if (next == c) { // "{{" or "}}": escape by doubling
|
|
||||||
sb.Add(c);
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
bracketsOn = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else // char=anythingElse
|
|
||||||
sb.Add(c);
|
|
||||||
}
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
if (Len(numberStr) > 0) // unclosed bracket; add bracketBgn and whatever is in numberStr; ex: "{0"
|
|
||||||
sb.Add(bracketBgn).Add(numberStr);
|
|
||||||
return sb.To_str();
|
|
||||||
}
|
|
||||||
static String[] Split_do(String s, String spr, boolean skipChar13) {
|
static String[] Split_do(String s, String spr, boolean skipChar13) {
|
||||||
if (String_.Eq(s, "") // "".Split('a') return array with one member: ""
|
if (String_.Eq(s, "") // "".Split('a') return array with one member: ""
|
||||||
|| String_.Eq(spr, "")) // "a".Split('\0') returns array with one member: "a"
|
|| String_.Eq(spr, "")) // "a".Split('\0') returns array with one member: "a"
|
||||||
|
@ -16,11 +16,6 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|||||||
package gplx;
|
package gplx;
|
||||||
import org.junit.*;
|
import org.junit.*;
|
||||||
public class String__tst {
|
public class String__tst {
|
||||||
@Test public void Len() {
|
|
||||||
tst_Len("", 0);
|
|
||||||
tst_Len("abc", 3);
|
|
||||||
} void tst_Len(String v, int expd) {Tfds.Eq(expd, String_.Len(v), "Len");}
|
|
||||||
|
|
||||||
@Test public void LimitToFirst() {
|
@Test public void LimitToFirst() {
|
||||||
tst_LimitToFirst("abc", 0, "");
|
tst_LimitToFirst("abc", 0, "");
|
||||||
tst_LimitToFirst("abc", 1, "a");
|
tst_LimitToFirst("abc", 1, "a");
|
||||||
@ -120,20 +115,6 @@ public class String__tst {
|
|||||||
@Test public void Repeat() {
|
@Test public void Repeat() {
|
||||||
Tfds.Eq("333", String_.Repeat("3", 3));
|
Tfds.Eq("333", String_.Repeat("3", 3));
|
||||||
}
|
}
|
||||||
@Test public void Format() {
|
|
||||||
tst_Format("", ""); // empty
|
|
||||||
tst_Format("no args", "no args"); // no args
|
|
||||||
tst_Format("0", "{0}", 0); // one
|
|
||||||
tst_Format("0 and 1", "{0} and {1}", 0, 1); // many
|
|
||||||
tst_Format("{", "{{", 0); // escape bracketBgn
|
|
||||||
tst_Format("}", "}}", 0); // escape bracketEnd
|
|
||||||
tst_Format("{a0c}", "{a{0}c}", 0); // nested;
|
|
||||||
tst_Format("{a{b}c}", "{a{b}c}", 0); // invalid invalid
|
|
||||||
tst_Format("{1}", "{1}", 1); // invalid array index
|
|
||||||
tst_Format("{a} {b}", "{a} {b}", 0); // invalid many
|
|
||||||
tst_Format("{a}0{b}1", "{a}{0}{b}{1}", 0, 1); // invalid and valid
|
|
||||||
tst_Format("{0", "{0", 0); // invalid dangling
|
|
||||||
} void tst_Format(String expd, String fmt, Object... ary) {Tfds.Eq(expd, String_.Format(fmt, ary));}
|
|
||||||
@Test public void Split() {
|
@Test public void Split() {
|
||||||
tst_Split("ab", " ", "ab"); // no match -> return array with original input
|
tst_Split("ab", " ", "ab"); // no match -> return array with original input
|
||||||
tst_Split("ab cd", " ", "ab", "cd"); // separator.length = 1
|
tst_Split("ab cd", " ", "ab", "cd"); // separator.length = 1
|
||||||
|
@ -16,17 +16,16 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|||||||
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
|
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
|
||||||
public class Regx_adp_ {
|
public class Regx_adp_ {
|
||||||
public static Regx_adp new_(String pattern) {return new Regx_adp(pattern);}
|
public static Regx_adp new_(String pattern) {return new Regx_adp(pattern);}
|
||||||
public static List_adp Find_all(String input, String find) {
|
public static List_adp Find_all(String src, String pat) {
|
||||||
Regx_adp regx = Regx_adp_.new_(find);
|
int src_len = String_.Len(src);
|
||||||
int idx = 0;
|
Regx_adp regx = Regx_adp_.new_(pat);
|
||||||
|
int pos = 0;
|
||||||
List_adp rv = List_adp_.New();
|
List_adp rv = List_adp_.New();
|
||||||
while (true) {
|
while (pos < src_len) {
|
||||||
Regx_match match = regx.Match(input, idx);
|
Regx_match match = regx.Match(src, pos);
|
||||||
if (match.Rslt_none()) break;
|
if (match.Rslt_none()) break;
|
||||||
rv.Add(match);
|
rv.Add(match);
|
||||||
int findBgn = match.Find_bgn();
|
pos = match.Find_bgn() + match.Find_len();
|
||||||
idx = findBgn + match.Find_len();
|
|
||||||
if (idx > String_.Len(input)) break;
|
|
||||||
}
|
}
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
@ -16,9 +16,9 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|||||||
package gplx.dbs.sqls.itms; import gplx.*; import gplx.dbs.*; import gplx.dbs.sqls.*;
|
package gplx.dbs.sqls.itms; import gplx.*; import gplx.dbs.*; import gplx.dbs.sqls.*;
|
||||||
public class Sql_order_fld {
|
public class Sql_order_fld {
|
||||||
public Sql_order_fld(String tbl, String name, byte sort) {this.Tbl = tbl; this.Name = name; this.Sort = sort;}
|
public Sql_order_fld(String tbl, String name, byte sort) {this.Tbl = tbl; this.Name = name; this.Sort = sort;}
|
||||||
public final String Tbl;
|
public final String Tbl;
|
||||||
public final String Name;
|
public final String Name;
|
||||||
public final byte Sort;
|
public final byte Sort;
|
||||||
public String To_sql() {
|
public String To_sql() {
|
||||||
String rv = this.Name;
|
String rv = this.Name;
|
||||||
if (Tbl != null) rv = Tbl + "." + rv;
|
if (Tbl != null) rv = Tbl + "." + rv;
|
||||||
@ -30,6 +30,6 @@ public class Sql_order_fld {
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String Tbl__null = String_.Null;
|
public static final String Tbl__null = null;
|
||||||
public static final byte Sort__asc = Bool_.Y_byte, Sort__dsc = Bool_.N_byte, Sort__nil = Bool_.__byte;
|
public static final byte Sort__asc = Bool_.Y_byte, Sort__dsc = Bool_.N_byte, Sort__nil = Bool_.__byte;
|
||||||
}
|
}
|
||||||
|
@ -22,15 +22,15 @@ public class Sql_tbl_itm {
|
|||||||
this.Alias = alias;
|
this.Alias = alias;
|
||||||
this.Join_flds = join_flds;
|
this.Join_flds = join_flds;
|
||||||
}
|
}
|
||||||
public final int Join_tid;
|
public final int Join_tid;
|
||||||
public final String Db;
|
public final String Db;
|
||||||
public final String Name;
|
public final String Name;
|
||||||
public final String Alias;
|
public final String Alias;
|
||||||
public boolean Db_enabled = true;
|
public boolean Db_enabled = true;
|
||||||
public final Sql_join_fld[] Join_flds;
|
public final Sql_join_fld[] Join_flds;
|
||||||
|
|
||||||
public static final String Alias__null = String_.Null;
|
public static final String Alias__null = null;
|
||||||
public static final String Db__null = String_.Null;
|
public static final String Db__null = null;
|
||||||
public static final int
|
public static final int
|
||||||
Tid__from = 0 // "FROM"
|
Tid__from = 0 // "FROM"
|
||||||
, Tid__inner = 1 // "INNER JOIN"
|
, Tid__inner = 1 // "INNER JOIN"
|
||||||
|
@ -1,16 +1,17 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<classpath>
|
<classpath>
|
||||||
|
<classpathentry kind="lib" path="lib/luaj_xowa.jar"/>
|
||||||
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||||
<classpathentry combineaccessrules="false" exported="true" kind="src" path="/100_core"/>
|
<classpathentry combineaccessrules="false" exported="true" kind="src" path="/100_core"/>
|
||||||
<classpathentry combineaccessrules="false" exported="true" kind="src" path="/140_dbs"/>
|
<classpathentry combineaccessrules="false" exported="true" kind="src" path="/140_dbs"/>
|
||||||
<classpathentry combineaccessrules="false" exported="true" kind="src" path="/150_gfui"/>
|
<classpathentry combineaccessrules="false" exported="true" kind="src" path="/150_gfui"/>
|
||||||
<classpathentry kind="src" path="src"/>
|
<classpathentry kind="src" path="src"/>
|
||||||
<classpathentry exported="true" kind="lib" path="lib/luaj_xowa.jar"/>
|
|
||||||
<classpathentry exported="true" kind="lib" path="lib/jtidy_xowa.jar"/>
|
<classpathentry exported="true" kind="lib" path="lib/jtidy_xowa.jar"/>
|
||||||
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
|
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
|
||||||
<classpathentry combineaccessrules="false" kind="src" path="/gplx.gflucene"/>
|
<classpathentry combineaccessrules="false" kind="src" path="/gplx.gflucene"/>
|
||||||
<classpathentry exported="true" kind="lib" path="lib/icu4j-57_1.jar"/>
|
<classpathentry exported="true" kind="lib" path="lib/icu4j-57_1.jar"/>
|
||||||
<classpathentry kind="lib" path="lib/vnu.jar"/>
|
<classpathentry kind="lib" path="lib/vnu.jar"/>
|
||||||
<classpathentry kind="lib" path="lib/Saxon-HE-9.9.1-2.jar"/>
|
<classpathentry kind="lib" path="lib/Saxon-HE-9.9.1-2.jar"/>
|
||||||
|
<classpathentry combineaccessrules="false" kind="src" path="/baselib"/>
|
||||||
<classpathentry kind="output" path="bin"/>
|
<classpathentry kind="output" path="bin"/>
|
||||||
</classpath>
|
</classpath>
|
||||||
|
@ -1,51 +0,0 @@
|
|||||||
/*
|
|
||||||
XOWA: the XOWA Offline Wiki Application
|
|
||||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
|
||||||
|
|
||||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
|
||||||
or alternatively under the terms of the Apache License Version 2.0.
|
|
||||||
|
|
||||||
You may use XOWA according to either of these licenses as is most appropriate
|
|
||||||
for your project on a case-by-case basis.
|
|
||||||
|
|
||||||
The terms of each license can be found in the source code repository:
|
|
||||||
|
|
||||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|
||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|
||||||
*/
|
|
||||||
package gplx.core.intls; import gplx.*; import gplx.core.*;
|
|
||||||
public interface Unicode_string {
|
|
||||||
boolean Tid_is_single();
|
|
||||||
String Src_string();
|
|
||||||
byte[] Src_bytes();
|
|
||||||
int Len_codes();
|
|
||||||
int Len_chars();
|
|
||||||
int Len_bytes();
|
|
||||||
int Val_codes(int i);
|
|
||||||
int Pos_codes_to_bytes(int i);
|
|
||||||
int Pos_codes_to_chars(int i);
|
|
||||||
int Pos_bytes_to_chars(int i);
|
|
||||||
int Pos_bytes_to_codes(int i);
|
|
||||||
int Pos_chars_to_codes(int i);
|
|
||||||
}
|
|
||||||
class Unicode_string_single implements Unicode_string { // 1 byte == 1 codepoint
|
|
||||||
private final int[] codes;
|
|
||||||
public Unicode_string_single(String src_string, byte[] src_bytes, int[] codes, int codes_len) {
|
|
||||||
this.src_string = src_string;
|
|
||||||
this.src_bytes = src_bytes;
|
|
||||||
this.codes = codes;
|
|
||||||
this.codes_len = codes_len;
|
|
||||||
}
|
|
||||||
public boolean Tid_is_single() {return true;}
|
|
||||||
public String Src_string() {return src_string;} private final String src_string;
|
|
||||||
public byte[] Src_bytes() {return src_bytes;} private final byte[] src_bytes;
|
|
||||||
public int Len_codes() {return codes_len;} private final int codes_len;
|
|
||||||
public int Len_chars() {return codes_len;}
|
|
||||||
public int Len_bytes() {return codes_len;}
|
|
||||||
public int Val_codes(int i) {return codes[i];}
|
|
||||||
public int Pos_codes_to_bytes(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
|
|
||||||
public int Pos_codes_to_chars(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
|
|
||||||
public int Pos_bytes_to_chars(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
|
|
||||||
public int Pos_bytes_to_codes(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
|
|
||||||
public int Pos_chars_to_codes(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
|
|
||||||
}
|
|
@ -1,48 +0,0 @@
|
|||||||
/*
|
|
||||||
XOWA: the XOWA Offline Wiki Application
|
|
||||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
|
||||||
|
|
||||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
|
||||||
or alternatively under the terms of the Apache License Version 2.0.
|
|
||||||
|
|
||||||
You may use XOWA according to either of these licenses as is most appropriate
|
|
||||||
for your project on a case-by-case basis.
|
|
||||||
|
|
||||||
The terms of each license can be found in the source code repository:
|
|
||||||
|
|
||||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|
||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|
||||||
*/
|
|
||||||
package gplx.core.intls; import gplx.*; import gplx.core.*;
|
|
||||||
public class Unicode_string_ {
|
|
||||||
public static Unicode_string New(String orig) {
|
|
||||||
// null
|
|
||||||
if (orig == null)
|
|
||||||
return new Unicode_string_single(null, null, null, 0);
|
|
||||||
|
|
||||||
// init bytes
|
|
||||||
byte[] bytes = Bry_.new_u8(orig);
|
|
||||||
int bytes_len = bytes.length;
|
|
||||||
|
|
||||||
// init codes
|
|
||||||
int[] codes = new int[bytes_len];
|
|
||||||
int codes_len = 0;
|
|
||||||
|
|
||||||
// loop
|
|
||||||
int bytes_pos = 0;
|
|
||||||
int chars_pos = 0;
|
|
||||||
while (bytes_pos < bytes_len) {
|
|
||||||
// set codes
|
|
||||||
codes[codes_len] = Utf16_.Decode_to_int(bytes, bytes_pos);
|
|
||||||
|
|
||||||
// increment
|
|
||||||
int cur_byte_len = Utf8_.Len_of_char_by_1st_byte(bytes[bytes_pos]);
|
|
||||||
bytes_pos += cur_byte_len;
|
|
||||||
chars_pos += Utf8_.Len_of_char_by_bytes_len(cur_byte_len);
|
|
||||||
codes_len += 1;
|
|
||||||
}
|
|
||||||
return codes_len == bytes_len
|
|
||||||
? (Unicode_string)new Unicode_string_single(orig, bytes, codes, codes_len)
|
|
||||||
: (Unicode_string)new Unicode_string_multi (orig, bytes, bytes_len, codes, codes_len, chars_pos);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,85 +0,0 @@
|
|||||||
/*
|
|
||||||
XOWA: the XOWA Offline Wiki Application
|
|
||||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
|
||||||
|
|
||||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
|
||||||
or alternatively under the terms of the Apache License Version 2.0.
|
|
||||||
|
|
||||||
You may use XOWA according to either of these licenses as is most appropriate
|
|
||||||
for your project on a case-by-case basis.
|
|
||||||
|
|
||||||
The terms of each license can be found in the source code repository:
|
|
||||||
|
|
||||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|
||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|
||||||
*/
|
|
||||||
package gplx.core.intls; import gplx.*; import gplx.core.*;
|
|
||||||
class Unicode_string_multi implements Unicode_string {
|
|
||||||
private final int[] codes;
|
|
||||||
private final int[] codes_to_bytes;
|
|
||||||
private final int[] codes_to_chars;
|
|
||||||
private final int[] bytes_to_chars;
|
|
||||||
private final int[] bytes_to_codes;
|
|
||||||
private final int[] chars_to_codes;
|
|
||||||
|
|
||||||
public Unicode_string_multi(String src, byte[] bytes, int bytes_len, int[] codes, int codes_len, int chars_len) {
|
|
||||||
// set member vars
|
|
||||||
this.src = src;
|
|
||||||
this.bytes = bytes;
|
|
||||||
this.bytes_len = bytes_len;
|
|
||||||
this.codes = codes;
|
|
||||||
this.codes_len = codes_len;
|
|
||||||
this.chars_len = chars_len;
|
|
||||||
|
|
||||||
// init maps
|
|
||||||
this.codes_to_bytes = new int[codes_len + Adj_end];
|
|
||||||
this.codes_to_chars = new int[codes_len + Adj_end];
|
|
||||||
this.bytes_to_codes = New_int_ary(bytes_len);
|
|
||||||
this.bytes_to_chars = New_int_ary(bytes_len);
|
|
||||||
this.chars_to_codes = New_int_ary(chars_len);
|
|
||||||
|
|
||||||
// init loop
|
|
||||||
int codes_pos = 0;
|
|
||||||
int bytes_pos = 0;
|
|
||||||
int chars_pos = 0;
|
|
||||||
|
|
||||||
// loop till EOS
|
|
||||||
while (true) {
|
|
||||||
// update
|
|
||||||
codes_to_bytes[codes_pos] = bytes_pos;
|
|
||||||
codes_to_chars[codes_pos] = chars_pos;
|
|
||||||
bytes_to_chars[bytes_pos] = chars_pos;
|
|
||||||
bytes_to_codes[bytes_pos] = codes_pos;
|
|
||||||
chars_to_codes[chars_pos] = codes_pos;
|
|
||||||
|
|
||||||
if (bytes_pos == bytes_len) break;
|
|
||||||
|
|
||||||
// increment
|
|
||||||
int cur_byte_len = Utf8_.Len_of_char_by_1st_byte(bytes[bytes_pos]);
|
|
||||||
bytes_pos += cur_byte_len;
|
|
||||||
chars_pos += Utf8_.Len_of_char_by_bytes_len(cur_byte_len);
|
|
||||||
codes_pos += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
public boolean Tid_is_single() {return false;}
|
|
||||||
public String Src_string() {return src;} private final String src;
|
|
||||||
public byte[] Src_bytes() {return bytes;} private final byte[] bytes;
|
|
||||||
public int Len_codes() {return codes_len;} private final int codes_len;
|
|
||||||
public int Len_chars() {return chars_len;} private final int chars_len;
|
|
||||||
public int Len_bytes() {return bytes_len;} private final int bytes_len;
|
|
||||||
public int Val_codes(int i) {return codes[i];}
|
|
||||||
public int Pos_codes_to_bytes(int i) {return codes_to_bytes[i];}
|
|
||||||
public int Pos_codes_to_chars(int i) {return codes_to_chars[i];}
|
|
||||||
public int Pos_bytes_to_chars(int i) {int rv = bytes_to_chars[i]; if (rv == Invalid) throw Err_.new_wo_type("invalid i", "src", src, "type", "bytes_to_chars", "i", i); return rv;}
|
|
||||||
public int Pos_bytes_to_codes(int i) {int rv = bytes_to_codes[i]; if (rv == Invalid) throw Err_.new_wo_type("invalid i", "src", src, "type", "bytes_to_codes", "i", i); return rv;}
|
|
||||||
public int Pos_chars_to_codes(int i) {int rv = chars_to_codes[i]; if (rv == Invalid) throw Err_.new_wo_type("invalid i", "src", src, "type", "chars_to_codes", "i", i); return rv;}
|
|
||||||
|
|
||||||
private static final int Invalid = -1, Adj_end = 1; // +1 to store last pos as len of String; needed for regex which returns match.Find_end() which will be len of String; EX: abc -> [0, 1, 2, 3]
|
|
||||||
private static int[] New_int_ary(int len) {
|
|
||||||
int rv_len = len + Adj_end;
|
|
||||||
int[] rv = new int[rv_len];
|
|
||||||
for (int i = 0; i < rv_len; i++)
|
|
||||||
rv[i] = Invalid;
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,110 +0,0 @@
|
|||||||
/*
|
|
||||||
XOWA: the XOWA Offline Wiki Application
|
|
||||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
|
||||||
|
|
||||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
|
||||||
or alternatively under the terms of the Apache License Version 2.0.
|
|
||||||
|
|
||||||
You may use XOWA according to either of these licenses as is most appropriate
|
|
||||||
for your project on a case-by-case basis.
|
|
||||||
|
|
||||||
The terms of each license can be found in the source code repository:
|
|
||||||
|
|
||||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|
||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|
||||||
*/
|
|
||||||
package gplx.core.intls; import gplx.*; import gplx.core.*;
|
|
||||||
import org.junit.*; import gplx.core.tests.*;
|
|
||||||
public class Unicode_string_tst {
|
|
||||||
private final Unicode_string_fxt fxt = new Unicode_string_fxt();
|
|
||||||
@Test public void Null() {
|
|
||||||
fxt.Init(null);
|
|
||||||
fxt.Test__Len(0, 0, 0);
|
|
||||||
}
|
|
||||||
@Test public void Blank() {
|
|
||||||
fxt.Init("");
|
|
||||||
fxt.Test__Len(0, 0, 0);
|
|
||||||
}
|
|
||||||
@Test public void Single() {
|
|
||||||
fxt.Init("Abc");
|
|
||||||
fxt.Test__Len(3, 3, 3);
|
|
||||||
fxt.Test__Val_codes(65, 98, 99);
|
|
||||||
fxt.Test__Pos_codes_to_bytes(0, 1, 2, 3);
|
|
||||||
fxt.Test__Pos_codes_to_chars(0, 1, 2, 3);
|
|
||||||
fxt.Test__Pos_chars_to_codes(0, 1, 2, 3);
|
|
||||||
fxt.Test__Pos_bytes_to_codes(0, 1, 2, 3);
|
|
||||||
}
|
|
||||||
@Test public void Multi() {
|
|
||||||
fxt.Init("a¢€𤭢");
|
|
||||||
fxt.Test__Len(4, 5, 10);
|
|
||||||
fxt.Test__Val_codes(97, 162, 8364, 150370);
|
|
||||||
fxt.Test__Pos_codes_to_bytes(0, 1, 3, 6, 10);
|
|
||||||
fxt.Test__Pos_codes_to_chars(0, 1, 2, 3, 5);
|
|
||||||
fxt.Test__Pos_chars_to_codes( 0, 1, 2, 3, -1, 4);
|
|
||||||
fxt.Test__Pos_bytes_to_codes( 0, 1, -1, 2, -1, -1, 3, -1, -1, -1, 4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
class Unicode_string_fxt {
|
|
||||||
private Unicode_string under;
|
|
||||||
public void Init(String src) {
|
|
||||||
this.under = Unicode_string_.New(src);
|
|
||||||
}
|
|
||||||
public void Test__Len(int expd_codes, int expd_chars, int expd_bytes) {
|
|
||||||
Gftest.Eq__int(expd_codes, under.Len_codes(), "codes");
|
|
||||||
Gftest.Eq__int(expd_chars, under.Len_chars(), "chars");
|
|
||||||
Gftest.Eq__int(expd_bytes, under.Len_bytes(), "bytes");
|
|
||||||
}
|
|
||||||
public void Test__Val_codes(int... expd) {
|
|
||||||
int actl_len = under.Len_codes();
|
|
||||||
int[] actl = new int[actl_len];
|
|
||||||
for (int i = 0; i < actl_len; i++)
|
|
||||||
actl[i] = under.Val_codes(i);
|
|
||||||
Gftest.Eq__ary(expd, actl);
|
|
||||||
}
|
|
||||||
public void Test__Pos_codes_to_bytes(int... expd) {
|
|
||||||
int actl_len = under.Len_codes() + 1;
|
|
||||||
int[] actl = new int[actl_len];
|
|
||||||
for (int i = 0; i < actl_len; i++)
|
|
||||||
actl[i] = under.Pos_codes_to_bytes(i);
|
|
||||||
Gftest.Eq__ary(expd, actl);
|
|
||||||
}
|
|
||||||
public void Test__Pos_codes_to_chars(int... expd) {
|
|
||||||
int actl_len = under.Len_codes() + 1;
|
|
||||||
int[] actl = new int[actl_len];
|
|
||||||
for (int i = 0; i < actl_len; i++)
|
|
||||||
actl[i] = under.Pos_codes_to_chars(i);
|
|
||||||
Gftest.Eq__ary(expd, actl);
|
|
||||||
}
|
|
||||||
public void Test__Pos_bytes_to_codes(int... expd) {
|
|
||||||
int actl_len = under.Len_bytes() + 1;
|
|
||||||
int[] actl = new int[actl_len];
|
|
||||||
for (int i = 0; i < actl_len; i++) {
|
|
||||||
int val = 0;
|
|
||||||
try {
|
|
||||||
val = under.Pos_bytes_to_codes(i);
|
|
||||||
}
|
|
||||||
catch (Exception exc) {
|
|
||||||
val = -1;
|
|
||||||
Err_.Noop(exc);
|
|
||||||
}
|
|
||||||
actl[i] = val;
|
|
||||||
}
|
|
||||||
Gftest.Eq__ary(expd, actl);
|
|
||||||
}
|
|
||||||
public void Test__Pos_chars_to_codes(int... expd) {
|
|
||||||
int actl_len = under.Len_chars() + 1;
|
|
||||||
int[] actl = new int[actl_len];
|
|
||||||
for (int i = 0; i < actl_len; i++) {
|
|
||||||
int val = 0;
|
|
||||||
try {
|
|
||||||
val = under.Pos_chars_to_codes(i);
|
|
||||||
}
|
|
||||||
catch (Exception exc) {
|
|
||||||
val = -1;
|
|
||||||
Err_.Noop(exc);
|
|
||||||
}
|
|
||||||
actl[i] = val;
|
|
||||||
}
|
|
||||||
Gftest.Eq__ary(expd, actl);
|
|
||||||
}
|
|
||||||
}
|
|
@ -14,6 +14,7 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
*/
|
*/
|
||||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||||
|
import gplx.objects.strings.unicodes.*;
|
||||||
import gplx.core.intls.*; import gplx.langs.regxs.*;
|
import gplx.core.intls.*; import gplx.langs.regxs.*;
|
||||||
import gplx.xowa.parsers.*;
|
import gplx.xowa.parsers.*;
|
||||||
import gplx.xowa.xtns.scribunto.procs.*;
|
import gplx.xowa.xtns.scribunto.procs.*;
|
||||||
@ -55,10 +56,10 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
|||||||
boolean plain = args.Cast_bool_or_n(3);
|
boolean plain = args.Cast_bool_or_n(3);
|
||||||
|
|
||||||
// init text vars
|
// init text vars
|
||||||
Unicode_string text_ucs = Unicode_string_.New(text_str); // NOTE: must count codes for supplementaries; PAGE:en.d:iglesia DATE:2017-04-23
|
Ustring text_ucs = Ustring_.New_codepoints(text_str); // NOTE: must count codes for supplementaries; PAGE:en.d:iglesia DATE:2017-04-23
|
||||||
|
|
||||||
// convert bgn from base_1 to base_0
|
// convert bgn from base_1 to base_0
|
||||||
int bgn_as_codes = To_java_by_lua(bgn_as_codes_base1, text_ucs.Len_codes());
|
int bgn_as_codes = To_java_by_lua(bgn_as_codes_base1, text_ucs.Len_in_data());
|
||||||
|
|
||||||
/*
|
/*
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
@ -80,34 +81,35 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
|||||||
// if plain, just do literal match of find and exit
|
// if plain, just do literal match of find and exit
|
||||||
if (plain) {
|
if (plain) {
|
||||||
// find pos by literal match
|
// find pos by literal match
|
||||||
Unicode_string find_ucs = Unicode_string_.New(find_str);
|
Ustring find_ucs = Ustring_.New_codepoints(find_str);
|
||||||
byte[] find_bry = find_ucs.Src_bytes();
|
int pos = String_.FindFwd(text_str, find_str, bgn_as_codes);
|
||||||
int pos = Bry_find_.Find_fwd(text_ucs.Src_bytes(), find_bry, text_ucs.Pos_codes_to_bytes(bgn_as_codes));
|
|
||||||
|
|
||||||
// nothing found; return empty
|
// if nothing found, return empty
|
||||||
if (pos == Bry_find_.Not_found)
|
if (pos == String_.Find_none)
|
||||||
return rslt.Init_ary_empty();
|
return rslt.Init_ary_empty();
|
||||||
|
// else, convert char_idx to code_idx
|
||||||
|
else
|
||||||
|
pos = text_ucs.Map_char_to_data(pos);
|
||||||
|
|
||||||
// bgn: convert pos from bytes back to codes; also adjust for base1
|
// bgn: convert pos from bytes back to codes; also adjust for base1
|
||||||
int bgn = text_ucs.Pos_bytes_to_codes(pos) + Base1;
|
int bgn = pos + Base1;
|
||||||
|
|
||||||
// end: add find.Len_in_codes and adjust end for PHP/LUA
|
// end: add find.Len_in_codes and adjust end for PHP/LUA
|
||||||
int end = bgn + find_ucs.Len_codes() - End_adj;
|
int end = bgn + find_ucs.Len_in_data() - End_adj;
|
||||||
|
|
||||||
return rslt.Init_many_objs(bgn, end);
|
return rslt.Init_many_objs(bgn, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
// run regex
|
// run regex; NOTE: take only 1st result; DATE:2014-08-27
|
||||||
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
Scrib_pattern_matcher matcher = Scrib_pattern_matcher.New(core.Page_url());
|
||||||
Regx_match[] regx_rslts = Scrib_pattern_matcher_.Instance().Match(core.Ctx().Page().Url(), text_ucs, regx_converter, find_str, bgn_as_codes);
|
Regx_match match = matcher.Match_one(text_ucs, find_str, bgn_as_codes, true);
|
||||||
if (regx_rslts.length == 0) return rslt.Init_ary_empty();
|
if (match.Rslt_none()) return rslt.Init_null(); // null verified on MW; EX: =mw.ustring.find("abc", "z"); DATE:2019-04-11
|
||||||
|
|
||||||
// add to tmp_list
|
// add to tmp_list
|
||||||
Regx_match match = regx_rslts[0]; // NOTE: take only 1st result; DATE:2014-08-27
|
|
||||||
List_adp tmp_list = List_adp_.New();
|
List_adp tmp_list = List_adp_.New();
|
||||||
tmp_list.Add(text_ucs.Pos_chars_to_codes(match.Find_bgn()) + Scrib_lib_ustring.Base1);
|
tmp_list.Add(text_ucs.Map_char_to_data(match.Find_bgn()) + Scrib_lib_ustring.Base1);
|
||||||
tmp_list.Add(text_ucs.Pos_chars_to_codes(match.Find_end()) + Scrib_lib_ustring.Base1 - Scrib_lib_ustring.End_adj);
|
tmp_list.Add(text_ucs.Map_char_to_data(match.Find_end()) + Scrib_lib_ustring.Base1 - Scrib_lib_ustring.End_adj);
|
||||||
AddCapturesFromMatch(tmp_list, match, text_str, regx_converter.Capt_ary(), false);
|
AddCapturesFromMatch(tmp_list, match, text_str, matcher.Capt_ary(), false);
|
||||||
return rslt.Init_many_list(tmp_list);
|
return rslt.Init_many_list(tmp_list);
|
||||||
}
|
}
|
||||||
public boolean Match(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
public boolean Match(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||||
@ -119,41 +121,42 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
|||||||
// validate / adjust
|
// validate / adjust
|
||||||
if (text_str == null) // if no text_str is passed, do not fail; return empty; EX:d:changed; DATE:2014-02-06
|
if (text_str == null) // if no text_str is passed, do not fail; return empty; EX:d:changed; DATE:2014-02-06
|
||||||
return rslt.Init_many_list(List_adp_.Noop);
|
return rslt.Init_many_list(List_adp_.Noop);
|
||||||
Unicode_string text_ucs = Unicode_string_.New(text_str); // NOTE: must count codes for supplementaries; PAGE:en.d:iglesia DATE:2017-04-23
|
Ustring text_ucs = Ustring_.New_codepoints(text_str); // NOTE: must count codes for supplementaries; PAGE:en.d:iglesia DATE:2017-04-23
|
||||||
int bgn_as_codes = To_java_by_lua(bgn_as_codes_base1, text_ucs.Len_codes());
|
int bgn_as_codes = To_java_by_lua(bgn_as_codes_base1, text_ucs.Len_in_data());
|
||||||
|
|
||||||
// run regex
|
// run regex; NOTE add 1st match only; do not add all; PAGE:en.d:действительное_причастие_настоящего_времени DATE:2017-04-23
|
||||||
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
Scrib_pattern_matcher matcher = Scrib_pattern_matcher.New(core.Page_url());
|
||||||
Regx_match[] regx_rslts = Scrib_pattern_matcher_.Instance().Match(core.Ctx().Page().Url(), text_ucs, regx_converter, find_str, bgn_as_codes);
|
Regx_match match = matcher.Match_one(text_ucs, find_str, bgn_as_codes, true);
|
||||||
if (regx_rslts.length == 0) return rslt.Init_null(); // return null if no matches found; EX:w:Mount_Gambier_(volcano); DATE:2014-04-02; confirmed with en.d:民; DATE:2015-01-30
|
if (match.Rslt_none()) return rslt.Init_null(); // return null if no matches found; EX:w:Mount_Gambier_(volcano); DATE:2014-04-02; confirmed with en.d:民; DATE:2015-01-30
|
||||||
|
|
||||||
// TOMBSTONE: add 1st match only; do not add all; PAGE:en.d:действительное_причастие_настоящего_времени DATE:2017-04-23
|
|
||||||
regx_rslts = regx_converter.Adjust_balanced(regx_rslts);
|
|
||||||
List_adp tmp_list = List_adp_.New();
|
List_adp tmp_list = List_adp_.New();
|
||||||
AddCapturesFromMatch(tmp_list, regx_rslts[0], text_str, regx_converter.Capt_ary(), true);
|
AddCapturesFromMatch(tmp_list, match, text_str, matcher.Capt_ary(), true);
|
||||||
return rslt.Init_many_list(tmp_list);
|
return rslt.Init_many_list(tmp_list);
|
||||||
}
|
}
|
||||||
public boolean Gsub(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
public boolean Gsub(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||||
Scrib_lib_ustring_gsub_mgr gsub_mgr = new Scrib_lib_ustring_gsub_mgr(core, new Scrib_regx_converter());
|
Scrib_lib_ustring_gsub_mgr gsub_mgr = new Scrib_lib_ustring_gsub_mgr(core);
|
||||||
return gsub_mgr.Exec(args, rslt);
|
return gsub_mgr.Exec(args, rslt);
|
||||||
}
|
}
|
||||||
public boolean Gmatch_init(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
public boolean Gmatch_init(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||||
// String text = Scrib_kv_utl_.Val_to_str(values, 0);
|
// String text = Scrib_kv_utl_.Val_to_str(values, 0);
|
||||||
String regx = args.Pull_str(1);
|
String regx = args.Pull_str(1);
|
||||||
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
||||||
String pcre = regx_converter.patternToRegex(regx, Scrib_regx_converter.Anchor_null, true);
|
if (Scrib_pattern_matcher.Mode_is_xowa())
|
||||||
return rslt.Init_many_objs(pcre, regx_converter.Capt_ary());
|
regx_converter.patternToRegex(regx, Scrib_regx_converter.Anchor_null, true);
|
||||||
|
else
|
||||||
|
regx = regx_converter.patternToRegex(regx, Scrib_regx_converter.Anchor_null, true);
|
||||||
|
return rslt.Init_many_objs(regx, regx_converter.Capt_ary());
|
||||||
}
|
}
|
||||||
public boolean Gmatch_callback(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
public boolean Gmatch_callback(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||||
String text = args.Xstr_str_or_null(0); // NOTE: UstringLibrary.php!ustringGmatchCallback calls preg_match directly; $s can be any type, and php casts automatically;
|
String text = args.Xstr_str_or_null(0); // NOTE: UstringLibrary.php!ustringGmatchCallback calls preg_match directly; $s can be any type, and php casts automatically;
|
||||||
String regx = args.Pull_str(1);
|
String regx = args.Pull_str(1);
|
||||||
Keyval[] capt = args.Cast_kv_ary_or_null(2);
|
Keyval[] capt = args.Cast_kv_ary_or_null(2);
|
||||||
int pos = args.Pull_int(3);
|
int pos = args.Pull_int(3);
|
||||||
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx().Page().Url(), regx);
|
|
||||||
Regx_match[] regx_rslts = regx_adp.Match_all(text, pos);
|
Ustring text_ucs = Ustring_.New_codepoints(text);
|
||||||
int len = regx_rslts.length;
|
// int pos_as_codes = To_java_by_lua(pos, text_ucs.Len_in_data());
|
||||||
if (len == 0) return rslt.Init_many_objs(pos, Keyval_.Ary_empty);
|
Regx_match match = Scrib_pattern_matcher.New(core.Page_url()).Match_one(text_ucs, regx, pos, false);
|
||||||
Regx_match match = regx_rslts[0]; // NOTE: take only 1st result
|
if (match.Rslt_none()) return rslt.Init_many_objs(pos, Keyval_.Ary_empty);
|
||||||
List_adp tmp_list = List_adp_.New();
|
List_adp tmp_list = List_adp_.New();
|
||||||
AddCapturesFromMatch(tmp_list, match, text, capt, true); // NOTE: was incorrectly set as false; DATE:2014-04-23
|
AddCapturesFromMatch(tmp_list, match, text, capt, true); // NOTE: was incorrectly set as false; DATE:2014-04-23
|
||||||
return rslt.Init_many_objs(match.Find_end(), Scrib_kv_utl_.base1_list_(tmp_list));
|
return rslt.Init_many_objs(match.Find_end(), Scrib_kv_utl_.base1_list_(tmp_list));
|
||||||
@ -198,12 +201,12 @@ public class Scrib_lib_ustring implements Scrib_lib {
|
|||||||
&& tmp_list.Count() == 0) // only add match once; EX: "aaaa", "a" will have four matches; get 1st; DATE:2014-04-02
|
&& tmp_list.Count() == 0) // only add match once; EX: "aaaa", "a" will have four matches; get 1st; DATE:2014-04-02
|
||||||
tmp_list.Add(String_.Mid(text, rslt.Find_bgn(), rslt.Find_end()));
|
tmp_list.Add(String_.Mid(text, rslt.Find_bgn(), rslt.Find_end()));
|
||||||
}
|
}
|
||||||
public static Regx_adp RegxAdp_new_(Xoa_url url, String regx) {
|
public static Regx_adp RegxAdp_new_(byte[] page_url, String regx) {
|
||||||
Regx_adp rv = Regx_adp_.new_(regx);
|
Regx_adp rv = Regx_adp_.new_(regx);
|
||||||
if (rv.Pattern_is_invalid()) {
|
if (rv.Pattern_is_invalid()) {
|
||||||
// try to identify [z-a] errors; PAGE:https://en.wiktionary.org/wiki/Module:scripts/data; DATE:2017-04-23
|
// try to identify [z-a] errors; PAGE:https://en.wiktionary.org/wiki/Module:scripts/data; DATE:2017-04-23
|
||||||
Exception exc = rv.Pattern_is_invalid_exception();
|
Exception exc = rv.Pattern_is_invalid_exception();
|
||||||
Gfo_usr_dlg_.Instance.Log_many("", "", "regx is invalid: regx=~{0} page=~{1} exc=~{2}", regx, url.To_bry(), Err_.Message_gplx_log(exc));
|
Gfo_usr_dlg_.Instance.Log_many("", "", "regx is invalid: regx=~{0} page=~{1} exc=~{2}", regx, page_url, Err_.Message_gplx_log(exc));
|
||||||
}
|
}
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
@ -40,7 +40,7 @@ public class Scrib_lib_ustring__find__tst {
|
|||||||
fxt.Test__find("𤭢𤭢b𤭢𤭢b" , "b" , 2, Bool_.N, "3;3"); // bytes=4
|
fxt.Test__find("𤭢𤭢b𤭢𤭢b" , "b" , 2, Bool_.N, "3;3"); // bytes=4
|
||||||
fxt.Test__find("abcd" , "b" , 1, Bool_.N, "2;2"); // basic
|
fxt.Test__find("abcd" , "b" , 1, Bool_.N, "2;2"); // basic
|
||||||
fxt.Test__find("abad" , "a" , 2, Bool_.N, "3;3"); // bgn
|
fxt.Test__find("abad" , "a" , 2, Bool_.N, "3;3"); // bgn
|
||||||
fxt.Test__find("abcd" , "x" , 1, Bool_.N, ""); // no-match
|
fxt.Test__find("abcd" , "x" , 1, Bool_.N, String_.Null_mark); // no-match
|
||||||
fxt.Test__find("abcd" , "" , 2, Bool_.N, "2;1"); // empty regx should return values; regx; EX:w:Fool's_mate; DATE:2014-03-04
|
fxt.Test__find("abcd" , "" , 2, Bool_.N, "2;1"); // empty regx should return values; regx; EX:w:Fool's_mate; DATE:2014-03-04
|
||||||
}
|
}
|
||||||
@Test public void Regx__int() { // PURPOSE: allow int find; PAGE:ro.w:Innsbruck DATE:2015-09-12
|
@Test public void Regx__int() { // PURPOSE: allow int find; PAGE:ro.w:Innsbruck DATE:2015-09-12
|
||||||
@ -64,6 +64,9 @@ public class Scrib_lib_ustring__find__tst {
|
|||||||
fxt.Test__find("aé𡼾\nbî𡼾\n" , "" , 1, Bool_.N, "1;0"); // 4 b/c \n starts at pos 4 (super 1)
|
fxt.Test__find("aé𡼾\nbî𡼾\n" , "" , 1, Bool_.N, "1;0"); // 4 b/c \n starts at pos 4 (super 1)
|
||||||
fxt.Test__find("aé𡼾\nbî𡼾\n" , "" , 5, Bool_.N, "5;4"); // 8 b/c \n starts at pos 8 (super 1)
|
fxt.Test__find("aé𡼾\nbî𡼾\n" , "" , 5, Bool_.N, "5;4"); // 8 b/c \n starts at pos 8 (super 1)
|
||||||
}
|
}
|
||||||
|
@Test public void Balanced__numbered_1() { // PURPOSE: handle mix of balanced and regular capture; PAGE:en.w:Bahamas
|
||||||
|
fxt.Test__find("[[5]]XccY", "%b[]X(%a)%1Y", 1, Bool_.N, "1;9;c");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
class Scrib_lib_ustring__find__fxt {
|
class Scrib_lib_ustring__find__fxt {
|
||||||
private boolean dbg = false;
|
private boolean dbg = false;
|
||||||
|
@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|||||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||||
import org.junit.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
import org.junit.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
|
||||||
public class Scrib_lib_ustring__gmatch__tst {
|
public class Scrib_lib_ustring__gmatch__tst {
|
||||||
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
|
||||||
@Before public void init() {
|
@Before public void init() {
|
||||||
fxt.Clear();
|
fxt.Clear();
|
||||||
lib = fxt.Core().Lib_ustring().Init();
|
lib = fxt.Core().Lib_ustring().Init();
|
||||||
@ -48,4 +48,7 @@ public class Scrib_lib_ustring__gmatch__tst {
|
|||||||
, " 1=2"
|
, " 1=2"
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
@Test public void Callback__pattern() {
|
||||||
|
fxt.Test__proc__objs__nest(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary("a", "%a+", Scrib_kv_utl_.base1_many_(false), 1) , "1=1\n2="); // fails if "a" is returned; note that 1 should be eos
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -43,6 +43,15 @@ public class Scrib_lib_ustring__gsub__tst {
|
|||||||
@Test public void Replace__double() { // PURPOSE: do not fail if double is passed in for @replace; PAGE:de.v:Wikivoyage:Wikidata/Test_Modul:Wikidata2 DATE:2016-04-21
|
@Test public void Replace__double() { // PURPOSE: do not fail if double is passed in for @replace; PAGE:de.v:Wikivoyage:Wikidata/Test_Modul:Wikidata2 DATE:2016-04-21
|
||||||
Exec_gsub("abcd", 1 , -1, 1.23d , "abcd;0");
|
Exec_gsub("abcd", 1 , -1, 1.23d , "abcd;0");
|
||||||
}
|
}
|
||||||
|
@Test public void Replace__anypos() { // PURPOSE:LUAJ_PATTERN_REPLACEMENT; DATE:2019-04-16
|
||||||
|
Exec_gsub("'''a'''b", "()'''(.-'*)'''", 1, "z", "zb;1");
|
||||||
|
}
|
||||||
|
@Test public void Replace__balanced_and_grouping() { // PURPOSE:LUAJ_PATTERN_REPLACEMENT; DATE:2019-04-16
|
||||||
|
Exec_gsub("[[b]]", "%[(%b[])%]" , -1, "z" , "z;1"); // NOTE: not "[z]"
|
||||||
|
}
|
||||||
|
@Test public void Replace__initial() { // PURPOSE:whitespace being replaced during gsub replacement; DATE:2019-04-21
|
||||||
|
Exec_gsub("a b c", "^%s*", -1, "x", "xa b c;1"); // fails if xabxc
|
||||||
|
}
|
||||||
@Test public void Replace__table() {
|
@Test public void Replace__table() {
|
||||||
Exec_gsub("abcd", "[ac]" , -1, Scrib_kv_utl_.flat_many_("a", "A", "c", "C") , "AbCd;2");
|
Exec_gsub("abcd", "[ac]" , -1, Scrib_kv_utl_.flat_many_("a", "A", "c", "C") , "AbCd;2");
|
||||||
Exec_gsub("abc" , "[ab]" , -1, Scrib_kv_utl_.flat_many_("a", "A") , "Abc;2"); // PURPOSE: match not in regex should still print itself; in this case [c] is not in tbl regex; DATE:2014-03-31
|
Exec_gsub("abc" , "[ab]" , -1, Scrib_kv_utl_.flat_many_("a", "A") , "Abc;2"); // PURPOSE: match not in regex should still print itself; in this case [c] is not in tbl regex; DATE:2014-03-31
|
||||||
@ -122,6 +131,17 @@ public class Scrib_lib_ustring__gsub__tst {
|
|||||||
fxt.Init__cbk(proc);
|
fxt.Init__cbk(proc);
|
||||||
Exec_gsub(text, regx, -1, proc.To_scrib_lua_proc(), "aBYz;2");
|
Exec_gsub(text, regx, -1, proc.To_scrib_lua_proc(), "aBYz;2");
|
||||||
}
|
}
|
||||||
|
@Test public void Luacbk__balanced() { // PURPOSE:LUAJ_PATTERN_REPLACEMENT; DATE:2019-04-16
|
||||||
|
String text = "}a{{b}}c{{d}}";
|
||||||
|
String regx = "%b{}"; // "()" is anypos, which inserts find_pos to results
|
||||||
|
Mock_proc__verify_args proc = new Mock_proc__verify_args(0, new Object[]{"x", "{{b}}"}, new Object[]{"y", "{{d}}"});
|
||||||
|
fxt.Init__cbk(proc);
|
||||||
|
Exec_gsub(text, regx, -1, proc.To_scrib_lua_proc(), "}axcy;2");
|
||||||
|
}
|
||||||
|
// Mock_proc__verify_args proc = new Mock_proc__verify_args(0, new Object[]{"x", "{{yes2}}"}, new Object[]{"x", "{{flagicon|USA}}"});
|
||||||
|
// fxt.Init__cbk(proc);
|
||||||
|
// Exec_gsub("}\n|-\n|28\n|{{yes2}}Win\n|28–0\n|style=\"text-align:left;\"|{{flagicon|USA}}", "%b{}", -1, proc.To_scrib_lua_proc(), "}axbx;2"); }
|
||||||
|
//
|
||||||
private void Exec_gsub(String text, Object regx, int limit, Object repl, String expd) {
|
private void Exec_gsub(String text, Object regx, int limit, Object repl, String expd) {
|
||||||
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_gsub, Scrib_kv_utl_.base1_many_(text, regx, repl, limit), expd);
|
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_gsub, Scrib_kv_utl_.base1_many_(text, regx, repl, limit), expd);
|
||||||
}
|
}
|
||||||
|
@ -15,35 +15,39 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|||||||
*/
|
*/
|
||||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||||
import gplx.langs.regxs.*;
|
import gplx.langs.regxs.*;
|
||||||
|
import gplx.objects.strings.unicodes.*;
|
||||||
|
import gplx.xowa.xtns.scribunto.libs.patterns.*;
|
||||||
import gplx.xowa.xtns.scribunto.procs.*;
|
import gplx.xowa.xtns.scribunto.procs.*;
|
||||||
class Scrib_lib_ustring_gsub_mgr {
|
public class Scrib_lib_ustring_gsub_mgr { // THREAD.UNSAFE:LOCAL_VALUES
|
||||||
private final Scrib_core core;
|
private final Scrib_core core;
|
||||||
private final Scrib_regx_converter regx_converter;
|
private String src_str;
|
||||||
|
private String pat_str;
|
||||||
|
private int limit;
|
||||||
|
private byte repl_tid;
|
||||||
private byte[] repl_bry; private Hash_adp repl_hash; private Scrib_lua_proc repl_func;
|
private byte[] repl_bry; private Hash_adp repl_hash; private Scrib_lua_proc repl_func;
|
||||||
private int repl_count = 0;
|
public int repl_count = 0;
|
||||||
public Scrib_lib_ustring_gsub_mgr(Scrib_core core, Scrib_regx_converter regx_converter) {
|
public Scrib_lib_ustring_gsub_mgr(Scrib_core core) {
|
||||||
this.core = core;
|
this.core = core;
|
||||||
this.regx_converter = regx_converter;
|
|
||||||
}
|
}
|
||||||
|
public void Repl_count__add() {repl_count++;}
|
||||||
|
public boolean Repl_count__done() {return repl_count == limit;}
|
||||||
public boolean Exec(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
public boolean Exec(Scrib_proc_args args, Scrib_proc_rslt rslt) {
|
||||||
// get @text; NOTE: sometimes int; DATE:2013-11-06
|
// get @src_str; NOTE: sometimes int; DATE:2013-11-06
|
||||||
String text = args.Xstr_str_or_null(0);
|
this.src_str = args.Xstr_str_or_null(0);
|
||||||
if (args.Len() == 2) return rslt.Init_obj(text); // if no @replace, return @text; PAGE:en.d:'orse; DATE:2013-10-13
|
if (args.Len() == 2) return rslt.Init_obj(src_str); // if no @replace, return @src_str; PAGE:en.d:'orse; DATE:2013-10-13
|
||||||
|
|
||||||
// get @pattern; NOTE: sometimes int; PAGE:en.d:λύω; DATE:2014-09-02
|
// get @pattern; NOTE: sometimes int; PAGE:en.d:λύω; DATE:2014-09-02
|
||||||
String regx = args.Xstr_str_or_null(1);
|
this.pat_str = args.Xstr_str_or_null(1);
|
||||||
regx = regx_converter.patternToRegex(regx, Scrib_regx_converter.Anchor_pow, true);
|
|
||||||
|
|
||||||
// get @repl
|
// get @repl
|
||||||
Object repl_obj = args.Cast_obj_or_null(2);
|
Object repl_obj = args.Cast_obj_or_null(2);
|
||||||
byte repl_tid = Identify_repl(repl_obj);
|
this.repl_tid = Identify_repl(repl_obj);
|
||||||
|
|
||||||
// get @limit; reset repl_count
|
// get @limit; reset repl_count
|
||||||
int limit = args.Cast_int_or(3, -1);
|
this.limit = args.Cast_int_or(3, -1);
|
||||||
repl_count = 0;
|
|
||||||
|
|
||||||
// do repl
|
// do repl
|
||||||
String repl = Exec_repl(repl_tid, text, regx, limit);
|
String repl = Scrib_pattern_matcher.New(core.Page_url()).Gsub(this, Ustring_.New_codepoints(src_str), pat_str, 0);
|
||||||
return rslt.Init_many_objs(repl, repl_count);
|
return rslt.Init_many_objs(repl, repl_count);
|
||||||
}
|
}
|
||||||
private byte Identify_repl(Object repl_obj) {
|
private byte Identify_repl(Object repl_obj) {
|
||||||
@ -80,44 +84,7 @@ class Scrib_lib_ustring_gsub_mgr {
|
|||||||
throw Err_.new_unhandled(Type_.Name(repl_type));
|
throw Err_.new_unhandled(Type_.Name(repl_type));
|
||||||
return repl_tid;
|
return repl_tid;
|
||||||
}
|
}
|
||||||
private String Exec_repl(byte repl_tid, String text, String regx, int limit) {
|
public boolean Exec_repl_itm(Bry_bfr tmp_bfr, Scrib_regx_converter regx_converter, Regx_match match) {
|
||||||
// parse regx
|
|
||||||
Regx_adp regx_mgr = Scrib_lib_ustring.RegxAdp_new_(core.Ctx().Page().Url(), regx);
|
|
||||||
if (regx_mgr.Pattern_is_invalid()) return text; // NOTE: invalid patterns should return self; EX:[^]; DATE:2014-09-02)
|
|
||||||
|
|
||||||
// exec regx
|
|
||||||
Regx_match[] rslts = regx_mgr.Match_all(text, 0);
|
|
||||||
if (rslts.length == 0) return text; // PHP: If matches are found, the new subject will be returned, otherwise subject will be returned unchanged.; http://php.net/manual/en/function.preg-replace-callback.php
|
|
||||||
rslts = regx_converter.Adjust_balanced(rslts);
|
|
||||||
|
|
||||||
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
|
||||||
int rslts_len = rslts.length;
|
|
||||||
int text_pos = 0;
|
|
||||||
for (int i = 0; i < rslts_len; i++) {
|
|
||||||
if (repl_count == limit) break; // stop if repl_count reaches limit; note that limit = -1 by default, unless specified
|
|
||||||
|
|
||||||
// add text up to find.bgn
|
|
||||||
Regx_match rslt = rslts[i];
|
|
||||||
tmp_bfr.Add_str_u8(String_.Mid(text, text_pos, rslt.Find_bgn())); // NOTE: regx returns char text_pos (not bry); must add as String, not bry; DATE:2013-07-17
|
|
||||||
|
|
||||||
// replace result
|
|
||||||
if (!Exec_repl_itm(tmp_bfr, repl_tid, text, rslt)) {
|
|
||||||
// will be false when gsub_proc returns nothing; PAGE:en.d:tracer PAGE:en.d:שלום DATE:2017-04-22;
|
|
||||||
tmp_bfr.Add_str_u8(String_.Mid(text, rslt.Find_bgn(), rslt.Find_end()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// update
|
|
||||||
text_pos = rslt.Find_end();
|
|
||||||
repl_count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// add rest of String
|
|
||||||
int text_len = String_.Len(text);
|
|
||||||
if (text_pos < text_len)
|
|
||||||
tmp_bfr.Add_str_u8(String_.Mid(text, text_pos, text_len)); // NOTE: regx returns char text_pos (not bry); must add as String, not bry; DATE:2013-07-17
|
|
||||||
return tmp_bfr.To_str_and_clear();
|
|
||||||
}
|
|
||||||
private boolean Exec_repl_itm(Bry_bfr tmp_bfr, byte repl_tid, String text, Regx_match match) {
|
|
||||||
switch (repl_tid) {
|
switch (repl_tid) {
|
||||||
case Repl_tid_string:
|
case Repl_tid_string:
|
||||||
int len = repl_bry.length;
|
int len = repl_bry.length;
|
||||||
@ -137,15 +104,15 @@ class Scrib_lib_ustring_gsub_mgr {
|
|||||||
// REF.MW: https://github.com/wikimedia/mediawiki-extensions-Scribunto/blob/master/includes/engines/LuaCommon/UstringLibrary.php#L785-L796
|
// REF.MW: https://github.com/wikimedia/mediawiki-extensions-Scribunto/blob/master/includes/engines/LuaCommon/UstringLibrary.php#L785-L796
|
||||||
// NOTE: 0 means take result; REF.MW:if ($x === '0'); return $m[0]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
// NOTE: 0 means take result; REF.MW:if ($x === '0'); return $m[0]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
||||||
if (idx == 0)
|
if (idx == 0)
|
||||||
tmp_bfr.Add_str_u8(String_.Mid(text, match.Find_bgn(), match.Find_end()));
|
tmp_bfr.Add_str_u8(String_.Mid(src_str, match.Find_bgn(), match.Find_end()));
|
||||||
// NOTE: > 0 means get from groups if it exists; REF.MW:elseif (isset($m["m$x"])) return $m["m$x"]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
// NOTE: > 0 means get from groups if it exists; REF.MW:elseif (isset($m["m$x"])) return $m["m$x"]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
|
||||||
else if (idx - 1 < match.Groups().length) { // retrieve numbered capture; TODO_OLD: support more than 9 captures
|
else if (idx - 1 < match.Groups().length) { // retrieve numbered capture; TODO_OLD: support more than 9 captures
|
||||||
Regx_group grp = match.Groups()[idx - 1];
|
Regx_group grp = match.Groups()[idx - 1];
|
||||||
tmp_bfr.Add_str_u8(String_.Mid(text, grp.Bgn(), grp.End())); // NOTE: grp.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
|
tmp_bfr.Add_str_u8(String_.Mid(src_str, grp.Bgn(), grp.End())); // NOTE: grp.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
|
||||||
}
|
}
|
||||||
// NOTE: 1 per MW "Match undocumented Lua String.gsub behavior"; PAGE:en.d:Wiktionary:Scripts ISSUE#:393; DATE:2019-03-20
|
// NOTE: 1 per MW "Match undocumented Lua String.gsub behavior"; PAGE:en.d:Wiktionary:Scripts ISSUE#:393; DATE:2019-03-20
|
||||||
else if (idx == 1) {
|
else if (idx == 1) {
|
||||||
tmp_bfr.Add_str_u8(String_.Mid(text, match.Find_bgn(), match.Find_end()));
|
tmp_bfr.Add_str_u8(String_.Mid(src_str, match.Find_bgn(), match.Find_end()));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
throw Err_.new_wo_type("invalid capture index %" + Char_.To_str(b) + " in replacement String");
|
throw Err_.new_wo_type("invalid capture index %" + Char_.To_str(b) + " in replacement String");
|
||||||
@ -180,7 +147,7 @@ class Scrib_lib_ustring_gsub_mgr {
|
|||||||
match_bgn = grp.Bgn();
|
match_bgn = grp.Bgn();
|
||||||
match_end = grp.End();
|
match_end = grp.End();
|
||||||
}
|
}
|
||||||
String find_str = String_.Mid(text, match_bgn, match_end); // NOTE: rslt.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
|
String find_str = String_.Mid(src_str, match_bgn, match_end); // NOTE: rslt.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
|
||||||
Object actl_repl_obj = repl_hash.Get_by(find_str);
|
Object actl_repl_obj = repl_hash.Get_by(find_str);
|
||||||
if (actl_repl_obj == null) // match found, but no replacement specified; EX:"abc", "[ab]", "a:A"; "b" in regex but not in tbl; EX:d:DVD; DATE:2014-03-31
|
if (actl_repl_obj == null) // match found, but no replacement specified; EX:"abc", "[ab]", "a:A"; "b" in regex but not in tbl; EX:d:DVD; DATE:2014-03-31
|
||||||
tmp_bfr.Add_str_u8(find_str);
|
tmp_bfr.Add_str_u8(find_str);
|
||||||
@ -194,7 +161,7 @@ class Scrib_lib_ustring_gsub_mgr {
|
|||||||
int grps_len = grps.length;
|
int grps_len = grps.length;
|
||||||
// no grps; pass 1 arg based on @match: EX: ("ace", "[b-d]"); args -> ("c")
|
// no grps; pass 1 arg based on @match: EX: ("ace", "[b-d]"); args -> ("c")
|
||||||
if (grps_len == 0) {
|
if (grps_len == 0) {
|
||||||
String find_str = String_.Mid(text, match.Find_bgn(), match.Find_end());
|
String find_str = String_.Mid(src_str, match.Find_bgn(), match.Find_end());
|
||||||
luacbk_args = Scrib_kv_utl_.base1_obj_(find_str);
|
luacbk_args = Scrib_kv_utl_.base1_obj_(find_str);
|
||||||
}
|
}
|
||||||
// grps exist; pass n args based on grp[n].match; EX: ("acfg", "([b-d])([e-g])"); args -> ("c", "f")
|
// grps exist; pass n args based on grp[n].match; EX: ("acfg", "([b-d])([e-g])"); args -> ("c", "f")
|
||||||
@ -202,7 +169,7 @@ class Scrib_lib_ustring_gsub_mgr {
|
|||||||
// memoize any_pos args for loop
|
// memoize any_pos args for loop
|
||||||
boolean any_pos = regx_converter.Any_pos();
|
boolean any_pos = regx_converter.Any_pos();
|
||||||
Keyval[] capt_ary = regx_converter.Capt_ary();
|
Keyval[] capt_ary = regx_converter.Capt_ary();
|
||||||
int capt_ary_len = capt_ary.length;
|
int capt_ary_len = capt_ary == null ? 0 : capt_ary.length; // capt_ary can be null b/c xowa_gsub will always create one group;
|
||||||
|
|
||||||
// loop grps; for each grp, create corresponding arg in luacbk
|
// loop grps; for each grp, create corresponding arg in luacbk
|
||||||
luacbk_args = new Keyval[grps_len];
|
luacbk_args = new Keyval[grps_len];
|
||||||
@ -212,7 +179,7 @@ class Scrib_lib_ustring_gsub_mgr {
|
|||||||
// anypos will create @offset arg; everything else creates a @match arg based on grp
|
// anypos will create @offset arg; everything else creates a @match arg based on grp
|
||||||
Object val = any_pos && i < capt_ary_len && Bool_.Cast(capt_ary[i].Val())
|
Object val = any_pos && i < capt_ary_len && Bool_.Cast(capt_ary[i].Val())
|
||||||
? (Object)grp.Bgn()
|
? (Object)grp.Bgn()
|
||||||
: (Object)String_.Mid(text, grp.Bgn(), grp.End());
|
: (Object)String_.Mid(src_str, grp.Bgn(), grp.End());
|
||||||
luacbk_args[i] = Keyval_.int_(i + Scrib_core.Base_1, val);
|
luacbk_args[i] = Keyval_.int_(i + Scrib_core.Base_1, val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,31 +14,33 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
*/
|
*/
|
||||||
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
|
||||||
import gplx.core.brys.fmtrs.*; import gplx.core.intls.*;
|
import gplx.objects.strings.unicodes.*;
|
||||||
|
import gplx.core.intls.*;
|
||||||
|
import gplx.core.brys.fmtrs.*;
|
||||||
import gplx.langs.regxs.*;
|
import gplx.langs.regxs.*;
|
||||||
public class Scrib_regx_converter {
|
public class Scrib_regx_converter {// THREAD.UNSAFE:MULTIPLE_RETURN_VALUES
|
||||||
private final Scrib_regx_grp_mgr grp_mgr = new Scrib_regx_grp_mgr();
|
private final Scrib_regx_grp_mgr grp_mgr = new Scrib_regx_grp_mgr();
|
||||||
private final Bry_bfr bfr = Bry_bfr_.New();
|
|
||||||
private Bry_bfr tmp_bfr;
|
|
||||||
private Bry_fmtr fmtr_balanced; private Bry_bfr bfr_balanced;
|
|
||||||
private final Lua_cls_to_regx_map percent_map, brack_map;
|
|
||||||
public Scrib_regx_converter() {
|
|
||||||
percent_map = Lua_cls_matcher.Instance.Percent();
|
|
||||||
brack_map = Lua_cls_matcher.Instance.Brack();
|
|
||||||
}
|
|
||||||
public String Regx() {return regx;} private String regx;
|
public String Regx() {return regx;} private String regx;
|
||||||
public Keyval[] Capt_ary() {return grp_mgr.Capt__to_ary();}
|
public Keyval[] Capt_ary() {return grp_mgr.Capt__to_ary();}
|
||||||
public boolean Any_pos() {return any_pos;} private boolean any_pos;
|
public boolean Any_pos() {return any_pos;} private boolean any_pos;
|
||||||
public Regx_match[] Adjust_balanced(Regx_match[] rslts) {return grp_mgr.Adjust_balanced(rslts);}
|
public Regx_match[] Adjust_balanced(Regx_match[] rslts) {return grp_mgr.Adjust_balanced_many(rslts);}
|
||||||
|
public Regx_match Adjust_balanced_one(Regx_match rslt) {return grp_mgr.Adjust_balanced_one(rslt);}
|
||||||
public String patternToRegex(String pat_str, byte[] anchor, boolean mode_is_regx) {
|
public String patternToRegex(String pat_str, byte[] anchor, boolean mode_is_regx) {
|
||||||
Unicode_string pat_ucs = Unicode_string_.New(pat_str);
|
Ustring pat_ucs = Ustring_.New_codepoints(pat_str);
|
||||||
// TODO.CACHE: if (!$this->patternRegexCache->has($cacheKey))
|
// TODO.CACHE: if (!$this->patternRegexCache->has($cacheKey))
|
||||||
grp_mgr.Clear();
|
grp_mgr.Clear();
|
||||||
any_pos = false;
|
any_pos = false;
|
||||||
boolean q_flag = false;
|
boolean q_flag = false;
|
||||||
|
Bry_bfr bfr = Bry_bfr_.New();
|
||||||
|
Bry_bfr tmp_bfr = null;
|
||||||
|
Bry_fmtr fmtr_balanced = null;
|
||||||
|
Bry_bfr bfr_balanced = null;
|
||||||
|
Lua_cls_to_regx_map percent_map = Lua_cls_matcher.Instance.Percent();
|
||||||
|
Lua_cls_to_regx_map brack_map = Lua_cls_matcher.Instance.Brack();
|
||||||
|
|
||||||
// bfr.Add_byte(Byte_ascii.Slash); // TOMBSTONE: do not add PHP "/" at start
|
// bfr.Add_byte(Byte_ascii.Slash); // TOMBSTONE: do not add PHP "/" at start
|
||||||
int len = pat_ucs.Len_codes();
|
int len = pat_ucs.Len_in_data();
|
||||||
int grps_len = 0;
|
int grps_len = 0;
|
||||||
int bct = 0;
|
int bct = 0;
|
||||||
|
|
||||||
@ -46,7 +48,7 @@ public class Scrib_regx_converter {
|
|||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
int i_end = i + 1;
|
int i_end = i + 1;
|
||||||
q_flag = false; // must be reset; REF.MW:UstringLibrary.php|patternToRegex; DATE:2014-02-08
|
q_flag = false; // must be reset; REF.MW:UstringLibrary.php|patternToRegex; DATE:2014-02-08
|
||||||
int cur = pat_ucs.Val_codes(i);
|
int cur = pat_ucs.Get_data(i);
|
||||||
switch (cur) {
|
switch (cur) {
|
||||||
case Byte_ascii.Pow:
|
case Byte_ascii.Pow:
|
||||||
if (!mode_is_regx) {
|
if (!mode_is_regx) {
|
||||||
@ -71,7 +73,7 @@ public class Scrib_regx_converter {
|
|||||||
int grp_idx = grp_mgr.Capt__len() + 1;
|
int grp_idx = grp_mgr.Capt__len() + 1;
|
||||||
|
|
||||||
// check for "()"; enables anypos flag
|
// check for "()"; enables anypos flag
|
||||||
boolean is_empty_capture = pat_ucs.Val_codes(i + 1) == Byte_ascii.Paren_end;
|
boolean is_empty_capture = pat_ucs.Get_data(i + 1) == Byte_ascii.Paren_end;
|
||||||
if (is_empty_capture)
|
if (is_empty_capture)
|
||||||
any_pos = true;
|
any_pos = true;
|
||||||
grp_mgr.Capt__add__real(grp_idx, is_empty_capture);
|
grp_mgr.Capt__add__real(grp_idx, is_empty_capture);
|
||||||
@ -93,19 +95,19 @@ public class Scrib_regx_converter {
|
|||||||
i++;
|
i++;
|
||||||
if (i >= len)
|
if (i >= len)
|
||||||
throw Err_.new_wo_type("malformed pattern (ends with '%')");
|
throw Err_.new_wo_type("malformed pattern (ends with '%')");
|
||||||
byte[] percent_bry = percent_map.Get_or_null(pat_ucs.Val_codes(i));
|
byte[] percent_bry = percent_map.Get_or_null(pat_ucs.Get_data(i));
|
||||||
if (percent_bry != null) {
|
if (percent_bry != null) {
|
||||||
bfr.Add(percent_bry);
|
bfr.Add(percent_bry);
|
||||||
q_flag = true;
|
q_flag = true;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
int nxt = pat_ucs.Val_codes(i);
|
int nxt = pat_ucs.Get_data(i);
|
||||||
switch (nxt) {
|
switch (nxt) {
|
||||||
case Byte_ascii.Ltr_b: // EX: "%b()"
|
case Byte_ascii.Ltr_b: // EX: "%b()"
|
||||||
i += 2;
|
i += 2;
|
||||||
if (i >= len) throw Err_.new_wo_type("malformed pattern (missing arguments to '%b')");
|
if (i >= len) throw Err_.new_wo_type("malformed pattern (missing arguments to '%b')");
|
||||||
int char_0 = pat_ucs.Val_codes(i - 1);
|
int char_0 = pat_ucs.Get_data(i - 1);
|
||||||
int char_1 = pat_ucs.Val_codes(i);
|
int char_1 = pat_ucs.Get_data(i);
|
||||||
if (char_0 == char_1) { // same char: easier regex; REF.MW: $bfr .= "{$d1}[^$d1]*$d1";
|
if (char_0 == char_1) { // same char: easier regex; REF.MW: $bfr .= "{$d1}[^$d1]*$d1";
|
||||||
bfr.Add(Bry_bf0_seg_0);
|
bfr.Add(Bry_bf0_seg_0);
|
||||||
Regx_quote(bfr, char_0);
|
Regx_quote(bfr, char_0);
|
||||||
@ -133,11 +135,11 @@ public class Scrib_regx_converter {
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Byte_ascii.Ltr_f: { // EX: lua frontier pattern; "%f[%a]"; DATE:2015-07-21
|
case Byte_ascii.Ltr_f: { // EX: lua frontier pattern; "%f[%a]"; DATE:2015-07-21
|
||||||
if (i + 1 >= len || pat_ucs.Val_codes(++i) != Byte_ascii.Brack_bgn)
|
if (i + 1 >= len || pat_ucs.Get_data(++i) != Byte_ascii.Brack_bgn)
|
||||||
throw Err_.new_("scribunto", "missing '[' after %f in pattern at pattern character " + Int_.To_str(i_end));
|
throw Err_.new_("scribunto", "missing '[' after %f in pattern at pattern character " + Int_.To_str(i_end));
|
||||||
// %f always followed by bracketed term; convert lua bracketed term to regex
|
// %f always followed by bracketed term; convert lua bracketed term to regex
|
||||||
if (tmp_bfr == null) tmp_bfr = Bry_bfr_.New();
|
if (tmp_bfr == null) tmp_bfr = Bry_bfr_.New();
|
||||||
i = bracketedCharSetToRegex(tmp_bfr, pat_ucs, i, len);
|
i = bracketedCharSetToRegex(tmp_bfr, brack_map, pat_ucs, i, len);
|
||||||
byte[] re2 = tmp_bfr.To_bry_and_clear();
|
byte[] re2 = tmp_bfr.To_bry_and_clear();
|
||||||
|
|
||||||
// scrib has following comment: 'Because %f considers the beginning and end of the String to be \0, determine if $re2 matches that and take it into account with "^" and "$".'
|
// scrib has following comment: 'Because %f considers the beginning and end of the String to be \0, determine if $re2 matches that and take it into account with "^" and "$".'
|
||||||
@ -169,7 +171,7 @@ public class Scrib_regx_converter {
|
|||||||
bfr.Add_byte(Byte_ascii.Brack_bgn);
|
bfr.Add_byte(Byte_ascii.Brack_bgn);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
i = bracketedCharSetToRegex(bfr, pat_ucs, i, len);
|
i = bracketedCharSetToRegex(bfr, brack_map, pat_ucs, i, len);
|
||||||
q_flag = true;
|
q_flag = true;
|
||||||
break;
|
break;
|
||||||
case Byte_ascii.Brack_end:
|
case Byte_ascii.Brack_end:
|
||||||
@ -196,7 +198,7 @@ public class Scrib_regx_converter {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (q_flag && i + 1 < len) {
|
if (q_flag && i + 1 < len) {
|
||||||
int tmp_b = pat_ucs.Val_codes(i + 1);
|
int tmp_b = pat_ucs.Get_data(i + 1);
|
||||||
switch (tmp_b) {
|
switch (tmp_b) {
|
||||||
case Byte_ascii.Star:
|
case Byte_ascii.Star:
|
||||||
case Byte_ascii.Plus:
|
case Byte_ascii.Plus:
|
||||||
@ -217,35 +219,35 @@ public class Scrib_regx_converter {
|
|||||||
regx = bfr.To_str_and_clear();
|
regx = bfr.To_str_and_clear();
|
||||||
return regx;
|
return regx;
|
||||||
}
|
}
|
||||||
private int bracketedCharSetToRegex(Bry_bfr bfr, Unicode_string pat_ucs, int i, int len) {
|
private int bracketedCharSetToRegex(Bry_bfr bfr, Lua_cls_to_regx_map brack_map, Ustring pat_ucs, int i, int len) {
|
||||||
bfr.Add_byte(Byte_ascii.Brack_bgn);
|
bfr.Add_byte(Byte_ascii.Brack_bgn);
|
||||||
i++;
|
i++;
|
||||||
if (i < len && pat_ucs.Val_codes(i) == Byte_ascii.Pow) { // ^
|
if (i < len && pat_ucs.Get_data(i) == Byte_ascii.Pow) { // ^
|
||||||
bfr.Add_byte(Byte_ascii.Pow);
|
bfr.Add_byte(Byte_ascii.Pow);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
for (int j = i; i < len && (j == i || pat_ucs.Val_codes(i) != Byte_ascii.Brack_end); i++) {
|
for (int j = i; i < len && (j == i || pat_ucs.Get_data(i) != Byte_ascii.Brack_end); i++) {
|
||||||
if (pat_ucs.Val_codes(i) == Byte_ascii.Percent) {
|
if (pat_ucs.Get_data(i) == Byte_ascii.Percent) {
|
||||||
i++;
|
i++;
|
||||||
if (i >= len) {
|
if (i >= len) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
byte[] brack_bry = brack_map.Get_or_null(pat_ucs.Val_codes(i));
|
byte[] brack_bry = brack_map.Get_or_null(pat_ucs.Get_data(i));
|
||||||
if (brack_bry != null)
|
if (brack_bry != null)
|
||||||
bfr.Add(brack_bry);
|
bfr.Add(brack_bry);
|
||||||
else
|
else
|
||||||
Regx_quote(bfr, pat_ucs.Val_codes(i));
|
Regx_quote(bfr, pat_ucs.Get_data(i));
|
||||||
}
|
}
|
||||||
else if (i + 2 < len && pat_ucs.Val_codes(i + 1) == Byte_ascii.Dash && pat_ucs.Val_codes(i + 2) != Byte_ascii.Brack_end && pat_ucs.Val_codes(i + 2) != Byte_ascii.Hash) {
|
else if (i + 2 < len && pat_ucs.Get_data(i + 1) == Byte_ascii.Dash && pat_ucs.Get_data(i + 2) != Byte_ascii.Brack_end && pat_ucs.Get_data(i + 2) != Byte_ascii.Hash) {
|
||||||
if (pat_ucs.Val_codes(i) <= pat_ucs.Val_codes(i + 2)) {
|
if (pat_ucs.Get_data(i) <= pat_ucs.Get_data(i + 2)) {
|
||||||
Regx_quote(bfr, pat_ucs.Val_codes(i));
|
Regx_quote(bfr, pat_ucs.Get_data(i));
|
||||||
bfr.Add_byte(Byte_ascii.Dash);
|
bfr.Add_byte(Byte_ascii.Dash);
|
||||||
Regx_quote(bfr, pat_ucs.Val_codes(i + 2));
|
Regx_quote(bfr, pat_ucs.Get_data(i + 2));
|
||||||
}
|
}
|
||||||
i += 2;
|
i += 2;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
Regx_quote(bfr, pat_ucs.Val_codes(i));
|
Regx_quote(bfr, pat_ucs.Get_data(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (i > len) throw Err_.new_wo_type("Missing close-bracket for character set beginning at pattern character $nxt_pos");
|
if (i > len) throw Err_.new_wo_type("Missing close-bracket for character set beginning at pattern character $nxt_pos");
|
||||||
|
@ -58,25 +58,29 @@ class Scrib_regx_grp_mgr {
|
|||||||
int actl_idx = Int_.Cast(idx_list.Get_by(regx_idx));
|
int actl_idx = Int_.Cast(idx_list.Get_by(regx_idx));
|
||||||
bfr.Add_int_variable(actl_idx);
|
bfr.Add_int_variable(actl_idx);
|
||||||
}
|
}
|
||||||
public Regx_match[] Adjust_balanced(Regx_match[] matches) {
|
public Regx_match[] Adjust_balanced_many(Regx_match[] matches) {
|
||||||
if (fake_count == 0) return matches;
|
if (fake_count == 0) return matches;
|
||||||
|
|
||||||
int matches_len = matches.length;
|
int matches_len = matches.length;
|
||||||
Regx_match[] rv = new Regx_match[matches_len];
|
Regx_match[] rv = new Regx_match[matches_len];
|
||||||
for (int i = 0; i < matches_len; i++) {
|
for (int i = 0; i < matches_len; i++) {
|
||||||
Regx_match match = matches[i];
|
rv[i] = Adjust_balanced_one(matches[i]);
|
||||||
Regx_group[] old_groups = match.Groups();
|
|
||||||
Regx_group[] new_groups = new Regx_group[full_list.Len() - fake_count];
|
|
||||||
int group_idx = 0;
|
|
||||||
for (int j = 0; j < old_groups.length; j++) {
|
|
||||||
Scrib_regx_grp_itm itm = (Scrib_regx_grp_itm)full_list.Get_at(j);
|
|
||||||
if (itm.Is_fake()) continue;
|
|
||||||
new_groups[group_idx++] = old_groups[j];
|
|
||||||
}
|
|
||||||
rv[i] = new Regx_match(match.Rslt(), match.Find_bgn(), match.Find_end(), new_groups);
|
|
||||||
}
|
}
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
public Regx_match Adjust_balanced_one(Regx_match match) {
|
||||||
|
if (full_list.Len() == 0) return match; // no capture groups, so don't bother adjusting for balanced; DATE:2019-04-16
|
||||||
|
|
||||||
|
Regx_group[] old_groups = match.Groups();
|
||||||
|
Regx_group[] new_groups = new Regx_group[full_list.Len() - fake_count];
|
||||||
|
int group_idx = 0;
|
||||||
|
for (int j = 0; j < old_groups.length; j++) {
|
||||||
|
Scrib_regx_grp_itm itm = (Scrib_regx_grp_itm)full_list.Get_at(j);
|
||||||
|
if (itm.Is_fake()) continue;
|
||||||
|
new_groups[group_idx++] = old_groups[j];
|
||||||
|
}
|
||||||
|
return new Regx_match(match.Rslt(), match.Find_bgn(), match.Find_end(), new_groups);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
class Scrib_regx_grp_itm {
|
class Scrib_regx_grp_itm {
|
||||||
public Scrib_regx_grp_itm(boolean is_fake, boolean is_empty_capture, int idx) {
|
public Scrib_regx_grp_itm(boolean is_fake, boolean is_empty_capture, int idx) {
|
||||||
|
@ -14,8 +14,19 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
*/
|
*/
|
||||||
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
|
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
|
||||||
import gplx.core.intls.*;
|
import gplx.objects.strings.unicodes.*;
|
||||||
import gplx.langs.regxs.*;
|
import gplx.langs.regxs.*;
|
||||||
public interface Scrib_pattern_matcher {
|
public abstract class Scrib_pattern_matcher {
|
||||||
Regx_match[] Match(Xoa_url url, Unicode_string text_ucs, Scrib_regx_converter regx_converter, String find_str, int bgn_as_codes);
|
protected final Scrib_regx_converter regx_converter = new Scrib_regx_converter();
|
||||||
|
public Keyval[] Capt_ary() {return regx_converter.Capt_ary();}
|
||||||
|
public abstract Regx_match Match_one(Ustring src_ucs, String pat_str, int bgn_as_codes, boolean replace);
|
||||||
|
public abstract String Gsub(Scrib_lib_ustring_gsub_mgr gsub_mgr, Ustring src_ucs, String pat_str, int bgn_as_codes);
|
||||||
|
|
||||||
|
public static boolean Mode_is_xowa() {return false;}
|
||||||
|
public static Scrib_pattern_matcher New(byte[] page_url) {
|
||||||
|
return Mode_is_xowa()
|
||||||
|
? (Scrib_pattern_matcher)new Scrib_pattern_matcher__xowa(page_url)
|
||||||
|
: (Scrib_pattern_matcher)new Scrib_pattern_matcher__regx(page_url)
|
||||||
|
;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,36 +0,0 @@
|
|||||||
/*
|
|
||||||
XOWA: the XOWA Offline Wiki Application
|
|
||||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
|
||||||
|
|
||||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
|
||||||
or alternatively under the terms of the Apache License Version 2.0.
|
|
||||||
|
|
||||||
You may use XOWA according to either of these licenses as is most appropriate
|
|
||||||
for your project on a case-by-case basis.
|
|
||||||
|
|
||||||
The terms of each license can be found in the source code repository:
|
|
||||||
|
|
||||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|
||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|
||||||
*/
|
|
||||||
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
|
|
||||||
import gplx.core.intls.*;
|
|
||||||
import gplx.langs.regxs.*;
|
|
||||||
public class Scrib_pattern_matcher_ {
|
|
||||||
private static final Scrib_pattern_matcher instance = New();
|
|
||||||
private static Scrib_pattern_matcher New() {
|
|
||||||
return new Scrib_pattern_matcher__regx();
|
|
||||||
// return new Scrib_pattern_matcher__luaj();
|
|
||||||
}
|
|
||||||
public static Scrib_pattern_matcher Instance() {return instance;}
|
|
||||||
}
|
|
||||||
class Scrib_pattern_matcher__regx implements Scrib_pattern_matcher {
|
|
||||||
public Regx_match[] Match(Xoa_url url, Unicode_string text_ucs, Scrib_regx_converter regx_converter, String find_str, int bgn_as_codes) {
|
|
||||||
// convert regex from lua to java
|
|
||||||
find_str = regx_converter.patternToRegex(find_str, Scrib_regx_converter.Anchor_G, true);
|
|
||||||
|
|
||||||
// run regex
|
|
||||||
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(url, find_str);
|
|
||||||
return regx_adp.Match_all(text_ucs.Src_string(), text_ucs.Pos_codes_to_chars(bgn_as_codes)); // NOTE: MW calculates an offset to handle mb strings. however, java's regex always takes offset in chars (not bytes like PHP preg_match); DATE:2014-03-04
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,50 +0,0 @@
|
|||||||
/*
|
|
||||||
XOWA: the XOWA Offline Wiki Application
|
|
||||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
|
||||||
|
|
||||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
|
||||||
or alternatively under the terms of the Apache License Version 2.0.
|
|
||||||
|
|
||||||
You may use XOWA according to either of these licenses as is most appropriate
|
|
||||||
for your project on a case-by-case basis.
|
|
||||||
|
|
||||||
The terms of each license can be found in the source code repository:
|
|
||||||
|
|
||||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|
||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|
||||||
*/
|
|
||||||
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
|
|
||||||
import gplx.core.intls.*;
|
|
||||||
import gplx.langs.regxs.*;
|
|
||||||
import org.luaj.vm2.lib.StringLib;
|
|
||||||
//import org.luaj.vm2.lib.Str_find_mgr;
|
|
||||||
//import org.luaj.vm2.lib.Str_find_mgr__regx;
|
|
||||||
class Scrib_pattern_matcher__luaj implements Scrib_pattern_matcher {
|
|
||||||
public Regx_match[] Match(Xoa_url url, Unicode_string text_ucs, Scrib_regx_converter regx_converter, String find_str, int bgn_as_codes) {
|
|
||||||
// int src_bgn = bgn_as_codes < 0 ? bgn_as_codes : text_ucs.Pos_codes_to_bytes(bgn_as_codes);
|
|
||||||
// int src_bgn = bgn_as_codes < 0 ? Int_.Base1 : bgn_as_codes + Int_.Base1;
|
|
||||||
// src_bgn = src_bgn >= text_ucs.Len_codes() ? text_ucs.Len_codes() : text_ucs.Pos_codes_to_bytes(src_bgn);
|
|
||||||
// Str_find_mgr__regx mgr = new Str_find_mgr__regx(text_ucs.Src_string(), find_str, src_bgn, false, true);
|
|
||||||
// mgr.Process();
|
|
||||||
//
|
|
||||||
// // convert to Regx_match
|
|
||||||
// int find_bgn = mgr.Bgn() == -1 ? -1 : text_ucs.Pos_bytes_to_chars(mgr.Bgn());
|
|
||||||
// int find_end = mgr.End() == -1 ? -1 : text_ucs.Pos_bytes_to_chars(mgr.End());
|
|
||||||
// boolean found = find_bgn != -1;
|
|
||||||
// if (!found) {
|
|
||||||
// return Regx_match.Ary_empty;
|
|
||||||
// }
|
|
||||||
// int[] captures = mgr.Capture_ints();
|
|
||||||
// Regx_group[] groups = null;
|
|
||||||
// if (found && captures != null) {
|
|
||||||
// int captures_len = captures.length;
|
|
||||||
// groups = new Regx_group[captures_len / 2];
|
|
||||||
// for (int i = 0; i < captures_len; i += 2) {
|
|
||||||
// groups[i / 2] = new Regx_group(true, captures[i], captures[i + 1], String_.Mid(text_ucs.Src_string(), text_ucs.Pos_bytes_to_chars(captures[i]), text_ucs.Pos_bytes_to_chars(captures[i + 1])));
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// Regx_match rv = new Regx_match(found, find_bgn, find_end, groups);
|
|
||||||
// return new Regx_match[] {rv};
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
@ -0,0 +1,74 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
|
||||||
|
import gplx.objects.strings.unicodes.*;
|
||||||
|
import gplx.langs.regxs.*;
|
||||||
|
class Scrib_pattern_matcher__regx extends Scrib_pattern_matcher { private final byte[] page_url;
|
||||||
|
public Scrib_pattern_matcher__regx(byte[] page_url) {
|
||||||
|
this.page_url = page_url;
|
||||||
|
}
|
||||||
|
@Override public Regx_match Match_one(Ustring src_ucs, String pat_str, int bgn_as_codes, boolean replace) {
|
||||||
|
// convert lua pattern to java regex
|
||||||
|
if (replace) // note that replace will be false for Gmatch_callback (b/c Gmatch_init already converted)
|
||||||
|
pat_str = regx_converter.patternToRegex(pat_str, Scrib_regx_converter.Anchor_G, true);
|
||||||
|
|
||||||
|
// run regex
|
||||||
|
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(page_url, pat_str);
|
||||||
|
Regx_match match = regx_adp.Match(src_ucs.Src(), src_ucs.Map_data_to_char(bgn_as_codes)); // NOTE: MW calculates an offset to handle mb strings. however, java's regex always takes offset in chars (not bytes like PHP preg_match); DATE:2014-03-04
|
||||||
|
match = regx_converter.Adjust_balanced_one(match);
|
||||||
|
return match;
|
||||||
|
}
|
||||||
|
@Override public String Gsub(Scrib_lib_ustring_gsub_mgr gsub_mgr, Ustring src_ucs, String pat_str, int bgn_as_codes) {
|
||||||
|
// convert lua pattern to java regex
|
||||||
|
pat_str = regx_converter.patternToRegex(pat_str, Scrib_regx_converter.Anchor_pow, true);
|
||||||
|
String src_str = src_ucs.Src();
|
||||||
|
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(page_url, pat_str);
|
||||||
|
if (regx_adp.Pattern_is_invalid()) return src_str; // NOTE: invalid patterns should return self; EX:[^]; DATE:2014-09-02
|
||||||
|
|
||||||
|
// run regex
|
||||||
|
Regx_match[] rslts = regx_adp.Match_all(src_str, src_ucs.Map_data_to_char(bgn_as_codes)); // NOTE: MW calculates an offset to handle mb strings. however, java's regex always takes offset in chars (not bytes like PHP preg_match); DATE:2014-03-04
|
||||||
|
if (rslts.length == 0) return src_str; // PHP: If matches are found, the new subject will be returned, otherwise subject will be returned unchanged.; http://php.net/manual/en/function.preg-replace-callback.php
|
||||||
|
rslts = regx_converter.Adjust_balanced(rslts);
|
||||||
|
|
||||||
|
// replace results
|
||||||
|
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||||
|
int rslts_len = rslts.length;
|
||||||
|
int text_pos = 0;
|
||||||
|
for (int i = 0; i < rslts_len; i++) {
|
||||||
|
if (gsub_mgr.Repl_count__done()) break; // stop if repl_count reaches limit; note that limit = -1 by default, unless specified
|
||||||
|
|
||||||
|
// add text up to find.bgn
|
||||||
|
Regx_match rslt = rslts[i];
|
||||||
|
tmp_bfr.Add_str_u8(String_.Mid(src_str, text_pos, rslt.Find_bgn())); // NOTE: regx returns char text_pos (not bry); must add as String, not bry; DATE:2013-07-17
|
||||||
|
|
||||||
|
// replace result
|
||||||
|
if (!gsub_mgr.Exec_repl_itm(tmp_bfr, regx_converter, rslt)) {
|
||||||
|
// will be false when gsub_proc returns nothing; PAGE:en.d:tracer PAGE:en.d:שלום DATE:2017-04-22;
|
||||||
|
tmp_bfr.Add_str_u8(String_.Mid(src_str, rslt.Find_bgn(), rslt.Find_end()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// update
|
||||||
|
text_pos = rslt.Find_end();
|
||||||
|
gsub_mgr.Repl_count__add();
|
||||||
|
}
|
||||||
|
|
||||||
|
// add rest of String
|
||||||
|
int text_len = String_.Len(src_str);
|
||||||
|
if (text_pos < text_len)
|
||||||
|
tmp_bfr.Add_str_u8(String_.Mid(src_str, text_pos, text_len)); // NOTE: regx returns char text_pos (not bry); must add as String, not bry; DATE:2013-07-17
|
||||||
|
return tmp_bfr.To_str_and_clear();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,123 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
|
||||||
|
import gplx.objects.strings.unicodes.*;
|
||||||
|
import gplx.langs.regxs.*;
|
||||||
|
import gplx.objects.strings.unicodes.*;
|
||||||
|
import org.luaj.vm2.lib.StringLib;
|
||||||
|
import org.luaj.vm2.Buffer;
|
||||||
|
import org.luaj.vm2.LuaValue;
|
||||||
|
import org.luaj.vm2.lib.Match_state;
|
||||||
|
import org.luaj.vm2.lib.Str_find_mgr;
|
||||||
|
import org.luaj.vm2.lib.Str_find_mgr__xowa;
|
||||||
|
class Scrib_pattern_matcher__xowa extends Scrib_pattern_matcher { public Scrib_pattern_matcher__xowa(byte[] page_url) {}
|
||||||
|
@Override public Regx_match Match_one(Ustring src_ucs, String pat_str, int bgn_as_codes, boolean replace) {
|
||||||
|
regx_converter.patternToRegex(pat_str, Scrib_regx_converter.Anchor_pow, true);
|
||||||
|
Str_find_mgr__xowa mgr = new Str_find_mgr__xowa(src_ucs, Ustring_.New_codepoints(pat_str), bgn_as_codes, false, false);
|
||||||
|
mgr.Process(false);
|
||||||
|
|
||||||
|
// convert to Regx_match
|
||||||
|
int find_bgn = mgr.Bgn();
|
||||||
|
int find_end = mgr.End();
|
||||||
|
boolean found = find_bgn != -1;
|
||||||
|
if (found) {
|
||||||
|
find_bgn = src_ucs.Map_data_to_char(find_bgn);
|
||||||
|
find_end = src_ucs.Map_data_to_char(find_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
Regx_group[] groups = Make_groups(src_ucs, mgr.Captures_ary());
|
||||||
|
return new Regx_match(found, find_bgn, find_end, groups);
|
||||||
|
}
|
||||||
|
@Override public String Gsub(Scrib_lib_ustring_gsub_mgr gsub_mgr, Ustring src_ucs, String pat_str, int bgn_as_codes) {
|
||||||
|
// get src vars
|
||||||
|
String src_str = src_ucs.Src();
|
||||||
|
int src_len = src_ucs.Len_in_data();
|
||||||
|
if (src_len == 0) {
|
||||||
|
return src_str;
|
||||||
|
}
|
||||||
|
int src_max = src_len + 1;
|
||||||
|
|
||||||
|
// get pat vars
|
||||||
|
regx_converter.patternToRegex(pat_str, Scrib_regx_converter.Anchor_G, true);
|
||||||
|
Ustring pat = Ustring_.New_codepoints(pat_str);
|
||||||
|
int pat_len = pat.Len_in_data();
|
||||||
|
final boolean pat_is_anchored = pat_len > 0 && pat.Get_data(0) == '^';
|
||||||
|
|
||||||
|
// get match vars
|
||||||
|
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||||
|
Str_find_mgr__xowa match_mgr = new Str_find_mgr__xowa(src_ucs, pat, bgn_as_codes, false, false);
|
||||||
|
Match_state ms = new Match_state(match_mgr);
|
||||||
|
|
||||||
|
int src_pos = 0;
|
||||||
|
int src_idx = 0;
|
||||||
|
while (src_idx < src_max) {
|
||||||
|
ms.reset();
|
||||||
|
int res = ms.match(src_pos, pat_is_anchored ? 1 : 0);
|
||||||
|
|
||||||
|
// match found
|
||||||
|
if (res != -1) {
|
||||||
|
if (gsub_mgr.Repl_count__done()) break;
|
||||||
|
src_idx++;
|
||||||
|
|
||||||
|
ms.push_captures(true, src_pos, res);
|
||||||
|
|
||||||
|
Regx_group[] groups = Make_groups(src_ucs, match_mgr.Captures_ary());
|
||||||
|
Regx_match match = new Regx_match(true, src_pos, res, groups);
|
||||||
|
if (!gsub_mgr.Exec_repl_itm(tmp_bfr, regx_converter, match)) {
|
||||||
|
tmp_bfr.Add_str_u8(src_ucs.Substring(match.Find_bgn(), match.Find_end()));
|
||||||
|
}
|
||||||
|
|
||||||
|
gsub_mgr.Repl_count__add();
|
||||||
|
}
|
||||||
|
|
||||||
|
// match found; set src_pos to match_end
|
||||||
|
if (res != -1 && res > src_pos)
|
||||||
|
src_pos = res;
|
||||||
|
// no match; add current byte
|
||||||
|
else if (src_pos < src_len) {
|
||||||
|
// lbuf.append( (byte) src.Get_data( src_pos++ ) );
|
||||||
|
tmp_bfr.Add_u8_int(src_ucs.Get_data(src_pos++));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (pat_is_anchored)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (src_pos > src_len) // XOWA:assert src_pos is in bounds, else will throw ArrayIndexOutOfBounds exception; DATE:2016-09-20
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp_bfr.Add_str_u8(src_ucs.Substring(src_pos, src_len));
|
||||||
|
return tmp_bfr.To_str_and_clear();
|
||||||
|
}
|
||||||
|
private Regx_group[] Make_groups(Ustring src_ucs, int[] captures) {
|
||||||
|
if (captures == null) {
|
||||||
|
return Regx_group.Ary_empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
int captures_len = captures.length;
|
||||||
|
Regx_group[] groups = new Regx_group[captures_len / 2];
|
||||||
|
for (int i = 0; i < captures_len; i += 2) {
|
||||||
|
int capture_bgn = captures[i];
|
||||||
|
int capture_end = captures[i + 1];
|
||||||
|
capture_bgn = src_ucs.Map_data_to_char(capture_bgn);
|
||||||
|
capture_end = src_ucs.Map_data_to_char(capture_end);
|
||||||
|
groups[i / 2] = new Regx_group(true, capture_bgn, capture_end, String_.Mid(src_ucs.Src(), capture_bgn, capture_end));
|
||||||
|
}
|
||||||
|
return groups;
|
||||||
|
}
|
||||||
|
}
|
34
baselib/src/gplx/objects/Object_.java
Normal file
34
baselib/src/gplx/objects/Object_.java
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects; import gplx.*;
|
||||||
|
import gplx.objects.brys.*;
|
||||||
|
import gplx.objects.strings.*;
|
||||||
|
import gplx.objects.types.*;
|
||||||
|
public class Object_ {
|
||||||
|
public static String To_str_or_null_mark(Object v) {return v == null ? "<<NULL>>": To_str(v);}
|
||||||
|
public static String To_str_or(Object v, String or) {return v == null ? or : To_str(v);}
|
||||||
|
public static String To_str(Object v) {
|
||||||
|
Class<?> c = v.getClass();
|
||||||
|
if (Type_.Eq(c, String_.Cls_ref_type)) return (String)v;
|
||||||
|
else if (Type_.Eq(c, Bry_.Cls_ref_type)) return String_.New_bry_utf8((byte[])v);
|
||||||
|
else return v.toString();
|
||||||
|
}
|
||||||
|
public static boolean Eq(Object lhs, Object rhs) {
|
||||||
|
if (lhs == null && rhs == null) return true;
|
||||||
|
else if (lhs == null || rhs == null) return false;
|
||||||
|
else return lhs.equals(rhs);
|
||||||
|
}
|
||||||
|
}
|
51
baselib/src/gplx/objects/arrays/Array_.java
Normal file
51
baselib/src/gplx/objects/arrays/Array_.java
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.arrays; import gplx.*; import gplx.objects.*;
|
||||||
|
import java.lang.reflect.Array;
|
||||||
|
import gplx.objects.errs.*;
|
||||||
|
public class Array_ {
|
||||||
|
public static int Len(Object ary) {return Array.getLength(ary);}
|
||||||
|
public static final int Len_obj(Object[] ary) {return ary == null ? 0 : ary.length;}
|
||||||
|
public static Object Get_at(Object ary, int i) {return Array.get(ary, i);}
|
||||||
|
public static void Set_at(Object ary, int i, Object o) {Array.set(ary, i, o);}
|
||||||
|
public static Object Create(Class<?> t, int count) {return Array.newInstance(t, count);}
|
||||||
|
public static Object Expand(Object src, Object trg, int src_len) {
|
||||||
|
try {System.arraycopy(src, 0, trg, 0, src_len);}
|
||||||
|
catch (Exception e) {throw Err_.New_fmt(e, "Array_.Expand failed; src_len={0}", src_len);}
|
||||||
|
return trg;
|
||||||
|
}
|
||||||
|
public static void Copy(Object src, Object trg) {System.arraycopy(src, 0, trg, 0, Len(src));}
|
||||||
|
public static void Copy_to(Object src, Object trg, int trgPos) {System.arraycopy(src, 0, trg, trgPos, Len(src));}
|
||||||
|
public static void Copy_to(Object src, int srcBgn, Object trg, int trgBgn, int srcLen) {System.arraycopy(src, srcBgn, trg, trgBgn, srcLen);}
|
||||||
|
private static Class<?> Component_type(Object ary) {
|
||||||
|
if (ary == null) throw Err_.New_msg("Array is null");
|
||||||
|
return ary.getClass().getComponentType();
|
||||||
|
}
|
||||||
|
public static Object Resize_add(Object src, Object add) {
|
||||||
|
int srcLen = Len(src);
|
||||||
|
int trgLen = srcLen + Len(add);
|
||||||
|
Object trg = Create(Component_type(src), trgLen);
|
||||||
|
Copy(src, trg);
|
||||||
|
for (int i = srcLen; i < trgLen; i++)
|
||||||
|
Set_at(trg, i, Get_at(add, i - srcLen));
|
||||||
|
return trg;
|
||||||
|
}
|
||||||
|
public static Object Clone(Object src) {
|
||||||
|
Object trg = Create(Component_type(src), Len(src));
|
||||||
|
Copy(src, trg);
|
||||||
|
return trg;
|
||||||
|
}
|
||||||
|
}
|
103
baselib/src/gplx/objects/brys/Bry_.java
Normal file
103
baselib/src/gplx/objects/brys/Bry_.java
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.brys; import gplx.*; import gplx.objects.*;
|
||||||
|
import gplx.objects.errs.*;
|
||||||
|
public class Bry_ {
|
||||||
|
public static final Class<?> Cls_ref_type = byte[].class;
|
||||||
|
public static final byte[] Empty = new byte[0];
|
||||||
|
|
||||||
|
public static boolean Eq(byte[] lhs, byte[] rhs) {return Eq(lhs, 0, lhs == null ? 0 : lhs.length, rhs);}
|
||||||
|
public static boolean Eq(byte[] lhs, int lhs_bgn, int lhs_end, byte[] rhs) {
|
||||||
|
if (lhs == null && rhs == null) return true;
|
||||||
|
else if (lhs == null || rhs == null) return false;
|
||||||
|
if (lhs_bgn < 0) return false;
|
||||||
|
int rhs_len = rhs.length;
|
||||||
|
if (rhs_len != lhs_end - lhs_bgn) return false;
|
||||||
|
int lhs_len = lhs.length;
|
||||||
|
for (int i = 0; i < rhs_len; i++) {
|
||||||
|
int lhs_pos = i + lhs_bgn;
|
||||||
|
if (lhs_pos == lhs_len) return false;
|
||||||
|
if (rhs[i] != lhs[lhs_pos]) return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte[][] Ary(byte[]... ary) {return ary;}
|
||||||
|
public static byte[][] Ary(String... ary) {
|
||||||
|
int ary_len = ary.length;
|
||||||
|
byte[][] rv = new byte[ary_len][];
|
||||||
|
for (int i = 0; i < ary_len; i++) {
|
||||||
|
String itm = ary[i];
|
||||||
|
rv[i] = itm == null ? null : Bry_.New_utf08(itm);
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static byte[] New_utf08(String src) {
|
||||||
|
try {
|
||||||
|
int src_len = src.length();
|
||||||
|
if (src_len == 0) return Bry_.Empty;
|
||||||
|
int bry_len = New_utf08__count(src, src_len);
|
||||||
|
byte[] bry = new byte[bry_len];
|
||||||
|
New_utf08__write(src, src_len, bry, 0);
|
||||||
|
return bry;
|
||||||
|
}
|
||||||
|
catch (Exception e) {throw Err_.New_fmt(e, "invalid UTF-8 sequence; src={0}", src);}
|
||||||
|
}
|
||||||
|
public static int New_utf08__count(String src, int src_len) {
|
||||||
|
int rv = 0;
|
||||||
|
for (int i = 0; i < src_len; ++i) {
|
||||||
|
char c = src.charAt(i);
|
||||||
|
int c_len = 0;
|
||||||
|
if ( c < 128) c_len = 1; // 1 << 7
|
||||||
|
else if ( c < 2048) c_len = 2; // 1 << 11
|
||||||
|
else if ( (c > 55295) // 0xD800
|
||||||
|
&& (c < 56320)) c_len = 4; // 0xDFFF
|
||||||
|
else c_len = 3; // 1 << 16
|
||||||
|
if (c_len == 4) ++i; // surrogate is 2 wide, not 1
|
||||||
|
rv += c_len;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
public static void New_utf08__write(String src, int src_len, byte[] bry, int bry_pos) {
|
||||||
|
for (int i = 0; i < src_len; ++i) {
|
||||||
|
char c = src.charAt(i);
|
||||||
|
if ( c < 128) {
|
||||||
|
bry[bry_pos++] = (byte)c;
|
||||||
|
}
|
||||||
|
else if ( c < 2048) {
|
||||||
|
bry[bry_pos++] = (byte)(0xC0 | (c >> 6));
|
||||||
|
bry[bry_pos++] = (byte)(0x80 | (c & 0x3F));
|
||||||
|
}
|
||||||
|
else if ( (c > 55295) // 0xD800
|
||||||
|
&& (c < 56320)) { // 0xDFFF
|
||||||
|
if (i >= src_len) throw Err_.New_msg("incomplete surrogate pair at end of String");
|
||||||
|
char nxt_char = src.charAt(i + 1);
|
||||||
|
int v = 0x10000 + (c - 0xD800) * 0x400 + (nxt_char - 0xDC00);
|
||||||
|
bry[bry_pos++] = (byte)(0xF0 | (v >> 18));
|
||||||
|
bry[bry_pos++] = (byte)(0x80 | (v >> 12) & 0x3F);
|
||||||
|
bry[bry_pos++] = (byte)(0x80 | (v >> 6) & 0x3F);
|
||||||
|
bry[bry_pos++] = (byte)(0x80 | (v & 0x3F));
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
bry[bry_pos++] = (byte)(0xE0 | (c >> 12));
|
||||||
|
bry[bry_pos++] = (byte)(0x80 | (c >> 6) & 0x3F);
|
||||||
|
bry[bry_pos++] = (byte)(0x80 | (c & 0x3F));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
21
baselib/src/gplx/objects/errs/Err.java
Normal file
21
baselib/src/gplx/objects/errs/Err.java
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.errs; import gplx.*; import gplx.objects.*;
|
||||||
|
public class Err extends RuntimeException {
|
||||||
|
private final String msg;
|
||||||
|
public Err(String msg) {this.msg = msg;}
|
||||||
|
@Override public String getMessage() {return msg;}
|
||||||
|
}
|
46
baselib/src/gplx/objects/errs/Err_.java
Normal file
46
baselib/src/gplx/objects/errs/Err_.java
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.errs; import gplx.*; import gplx.objects.*;
|
||||||
|
import gplx.objects.strings.*;
|
||||||
|
public class Err_ {
|
||||||
|
public static void Noop(Exception e) {}
|
||||||
|
public static Err New_fmt(String fmt, Object... args) {return new Err(String_.Format(fmt, args));}
|
||||||
|
public static Err New_msg(String msg) {return new Err(msg);}
|
||||||
|
public static Err New_fmt(Exception e, String fmt, Object... args) {
|
||||||
|
return new Err(String_.Format(fmt, args) + " exc=" + Err_.Message_lang(e));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Err New_null(String name) {return new Err("Object was null; name=" + name);}
|
||||||
|
public static Err New_unhandled_default(Object o) {
|
||||||
|
return new Err("val is not in switch; val=" + Object_.To_str(o));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String Message_lang(Exception e) {
|
||||||
|
return Error.class.isAssignableFrom(e.getClass())
|
||||||
|
? e.toString() // java.lang.Error returns null for "getMessage()"; return "toString()" instead
|
||||||
|
: e.getMessage();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String Trace_lang(Throwable e) {
|
||||||
|
StackTraceElement[] ary = e.getStackTrace();
|
||||||
|
String rv = "";
|
||||||
|
for (int i = 0; i < ary.length; i++) {
|
||||||
|
if (i != 0) rv += "\n";
|
||||||
|
rv += ary[i].toString();
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
}
|
36
baselib/src/gplx/objects/primitives/Bool_.java
Normal file
36
baselib/src/gplx/objects/primitives/Bool_.java
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
|
||||||
|
import gplx.objects.errs.*;
|
||||||
|
public class Bool_ {
|
||||||
|
public static final String Cls_val_name = "bool";
|
||||||
|
public static final Class<?> Cls_ref_type = Boolean.class;
|
||||||
|
|
||||||
|
public static final boolean N = false , Y = true;
|
||||||
|
public static final byte N_byte = 0 , Y_byte = 1 , __byte = 127;
|
||||||
|
public static final int N_int = 0 , Y_int = 1 , __int = -1;
|
||||||
|
public static final String True_str = "true", False_str = "false";
|
||||||
|
|
||||||
|
|
||||||
|
public static boolean Cast(Object o) {
|
||||||
|
try {
|
||||||
|
return (Boolean)o;
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw Err_.New_fmt(e, "failed to cast to boolean; obj={0}", Object_.To_str(o));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
29
baselib/src/gplx/objects/primitives/Byte_.java
Normal file
29
baselib/src/gplx/objects/primitives/Byte_.java
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
|
||||||
|
import gplx.objects.errs.*;
|
||||||
|
public class Byte_ {
|
||||||
|
public static final String Cls_val_name = "byte";
|
||||||
|
public static final Class<?> Cls_ref_type = Byte.class;
|
||||||
|
public static byte Cast(Object o) {
|
||||||
|
try {
|
||||||
|
return (Byte)o;
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw Err_.New_fmt(e, "failed to cast to byte; obj={0}", Object_.To_str(o));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
20
baselib/src/gplx/objects/primitives/Char_.java
Normal file
20
baselib/src/gplx/objects/primitives/Char_.java
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
|
||||||
|
public class Char_ {
|
||||||
|
public static final String Cls_val_name = "char";
|
||||||
|
public static final Class<?> Cls_ref_type = Character.class;
|
||||||
|
}
|
25
baselib/src/gplx/objects/primitives/Char_code_.java
Normal file
25
baselib/src/gplx/objects/primitives/Char_code_.java
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
|
||||||
|
public class Char_code_ {
|
||||||
|
public static final char
|
||||||
|
New_line = '\n'
|
||||||
|
, Space = ' '
|
||||||
|
, Colon = ':'
|
||||||
|
, Num_0 = '0'
|
||||||
|
, Pipe = '|'
|
||||||
|
;
|
||||||
|
}
|
20
baselib/src/gplx/objects/primitives/Double_.java
Normal file
20
baselib/src/gplx/objects/primitives/Double_.java
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
|
||||||
|
public class Double_ {
|
||||||
|
public static final String Cls_val_name = "double";
|
||||||
|
public static final Class<?> Cls_ref_type = Double.class;
|
||||||
|
}
|
20
baselib/src/gplx/objects/primitives/Float_.java
Normal file
20
baselib/src/gplx/objects/primitives/Float_.java
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
|
||||||
|
public class Float_ {
|
||||||
|
public static final String Cls_val_name = "float";
|
||||||
|
public static final Class<?> Cls_ref_type = Float.class;
|
||||||
|
}
|
112
baselib/src/gplx/objects/primitives/Int_.java
Normal file
112
baselib/src/gplx/objects/primitives/Int_.java
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
|
||||||
|
import gplx.objects.errs.*;
|
||||||
|
import gplx.objects.strings.*;
|
||||||
|
public class Int_ {
|
||||||
|
public static final String Cls_val_name = "int";
|
||||||
|
public static final Class<?> Cls_ref_type = Integer.class;
|
||||||
|
|
||||||
|
public static final int
|
||||||
|
Min_value = Integer.MIN_VALUE
|
||||||
|
, Max_value = Integer.MAX_VALUE
|
||||||
|
, Max_value__31 = 2147483647
|
||||||
|
, Neg1 = -1
|
||||||
|
, Null = Int_.Min_value
|
||||||
|
, Base1 = 1 // for super 1 lists / arrays; EX: PHP; [a, b, c]; [1] => a
|
||||||
|
, Offset_1 = 1 // common symbol for + 1 after current pos; EX: String_.Mid(lhs + Offset_1, rhs)
|
||||||
|
;
|
||||||
|
|
||||||
|
public static int Cast(Object o) {
|
||||||
|
try {
|
||||||
|
return (Integer)o;
|
||||||
|
}
|
||||||
|
catch(Exception e) {
|
||||||
|
throw Err_.New_fmt(e, "failed to cast to int; obj={0}", Object_.To_str(o));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String To_str(int v) {return new Integer(v).toString();}
|
||||||
|
|
||||||
|
public static int Parse_or(String raw, int or) {
|
||||||
|
// process args
|
||||||
|
if (raw == null) return or;
|
||||||
|
int raw_len = String_.Len(raw);
|
||||||
|
if (raw_len == 0) return or;
|
||||||
|
|
||||||
|
// loop backwards from nth to 0th char
|
||||||
|
int rv = 0, power_of_10 = 1;
|
||||||
|
for (int idx = raw_len - 1; idx >= 0; idx--) {
|
||||||
|
char cur = String_.Char_at(raw, idx);
|
||||||
|
int digit = -1;
|
||||||
|
switch (cur) {
|
||||||
|
// numbers -> assign digit
|
||||||
|
case '0': digit = 0; break; case '1': digit = 1; break; case '2': digit = 2; break; case '3': digit = 3; break; case '4': digit = 4; break;
|
||||||
|
case '5': digit = 5; break; case '6': digit = 6; break; case '7': digit = 7; break; case '8': digit = 8; break; case '9': digit = 9; break;
|
||||||
|
|
||||||
|
// negative sign
|
||||||
|
case '-':
|
||||||
|
if (idx != 0) { // invalid if not 1st
|
||||||
|
return or;
|
||||||
|
}
|
||||||
|
else { // is first; multiply by -1
|
||||||
|
rv *= -1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// anything else
|
||||||
|
default:
|
||||||
|
return or;
|
||||||
|
}
|
||||||
|
rv += (digit * power_of_10);
|
||||||
|
power_of_10 *= 10;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean Between(int v, int lhs, int rhs) {
|
||||||
|
int lhs_comp = v == lhs ? 0 : (v < lhs ? -1 : 1);
|
||||||
|
int rhs_comp = v == rhs ? 0 : (v < rhs ? -1 : 1);
|
||||||
|
return (lhs_comp * rhs_comp) != 1; // 1 when v is (a) greater than both or (b) less than both
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int[] Log_10s = new int[] {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, Int_.Max_value};
|
||||||
|
public static int Log10(int v) {
|
||||||
|
if (v == 0) return 0;
|
||||||
|
int sign = 1;
|
||||||
|
if (v < 0) {
|
||||||
|
if (v == Int_.Min_value) return -9; // NOTE: Int_.Min_value * -1 = Int_.Min_value
|
||||||
|
v *= -1;
|
||||||
|
sign = -1;
|
||||||
|
}
|
||||||
|
int log_10s_len = Log_10s.length;
|
||||||
|
int rv = log_10s_len - 2; // rv will only happen when v == Int_.Max_value
|
||||||
|
int bgn = 0;
|
||||||
|
if (v > 1000) { // optimization to reduce number of ops to < 5
|
||||||
|
bgn = 3;
|
||||||
|
if (v > 1000000) bgn = 6;
|
||||||
|
}
|
||||||
|
for (int i = bgn; i < log_10s_len; i++) {
|
||||||
|
if (v < Log_10s[i]) {rv = i - 1; break;}
|
||||||
|
}
|
||||||
|
return rv * sign;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int Count_digits(int v) {
|
||||||
|
int log10 = Log10(v);
|
||||||
|
return v > -1 ? log10 + 1 : log10 * -1 + 2;
|
||||||
|
}
|
||||||
|
}
|
90
baselib/src/gplx/objects/primitives/Int__tst.java
Normal file
90
baselib/src/gplx/objects/primitives/Int__tst.java
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
|
||||||
|
import org.junit.*; import gplx.tests.*;
|
||||||
|
public class Int__tst {
|
||||||
|
private final Int__fxt fxt = new Int__fxt();
|
||||||
|
@Test public void Parse_or() {
|
||||||
|
fxt.Test__Parse_or("123", 123); // basic
|
||||||
|
fxt.Test__Parse_or_min_value(null); // null
|
||||||
|
fxt.Test__Parse_or_min_value(""); // empty
|
||||||
|
fxt.Test__Parse_or_min_value("1a"); // invalid number
|
||||||
|
|
||||||
|
fxt.Test__Parse_or("-123", -123); // negative
|
||||||
|
fxt.Test__Parse_or_min_value("1-23"); // negative at invalid position
|
||||||
|
}
|
||||||
|
@Test public void Between() {
|
||||||
|
fxt.Test__Between(1, 0, 2, true); // simple true
|
||||||
|
fxt.Test__Between(3, 0, 2, false); // simple false
|
||||||
|
fxt.Test__Between(0, 0, 2, true); // bgn true
|
||||||
|
fxt.Test__Between(2, 0, 2, true); // end true
|
||||||
|
}
|
||||||
|
@Test public void Count_digits() {
|
||||||
|
fxt.Test__Count_digits( 0, 1);
|
||||||
|
fxt.Test__Count_digits( 9, 1);
|
||||||
|
fxt.Test__Count_digits( 100, 3);
|
||||||
|
fxt.Test__Count_digits( -1, 2);
|
||||||
|
fxt.Test__Count_digits(-100, 4);
|
||||||
|
}
|
||||||
|
@Test public void Log10() {
|
||||||
|
fxt.Test__Log10( 0, 0);
|
||||||
|
fxt.Test__Log10( 1, 0);
|
||||||
|
fxt.Test__Log10( 2, 0);
|
||||||
|
fxt.Test__Log10( 10, 1);
|
||||||
|
fxt.Test__Log10( 12, 1);
|
||||||
|
fxt.Test__Log10( 100, 2);
|
||||||
|
fxt.Test__Log10( 123, 2);
|
||||||
|
fxt.Test__Log10( 1000, 3);
|
||||||
|
fxt.Test__Log10( 1234, 3);
|
||||||
|
fxt.Test__Log10( 10000, 4);
|
||||||
|
fxt.Test__Log10( 12345, 4);
|
||||||
|
fxt.Test__Log10( 100000, 5);
|
||||||
|
fxt.Test__Log10( 123456, 5);
|
||||||
|
fxt.Test__Log10( 1000000, 6);
|
||||||
|
fxt.Test__Log10( 1234567, 6);
|
||||||
|
fxt.Test__Log10( 10000000, 7);
|
||||||
|
fxt.Test__Log10( 12345678, 7);
|
||||||
|
fxt.Test__Log10( 100000000, 8);
|
||||||
|
fxt.Test__Log10( 123456789, 8);
|
||||||
|
fxt.Test__Log10( 1000000000, 9);
|
||||||
|
fxt.Test__Log10( 1234567890, 9);
|
||||||
|
fxt.Test__Log10(Int_.Max_value, 9);
|
||||||
|
fxt.Test__Log10( -1, 0);
|
||||||
|
fxt.Test__Log10( -10, -1);
|
||||||
|
fxt.Test__Log10( -100, -2);
|
||||||
|
fxt.Test__Log10( -1000000, -6);
|
||||||
|
fxt.Test__Log10( -1000000000, -9);
|
||||||
|
fxt.Test__Log10(Int_.Min_value, -9);
|
||||||
|
fxt.Test__Log10(Int_.Min_value + 1, -9);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Int__fxt {
|
||||||
|
public void Test__Parse_or(String raw, int expd) {
|
||||||
|
Gftest_fxt.Eq__int(expd, Int_.Parse_or(raw, -1));
|
||||||
|
}
|
||||||
|
public void Test__Parse_or_min_value(String raw) {
|
||||||
|
Gftest_fxt.Eq__int(Int_.Min_value, Int_.Parse_or(raw, Int_.Min_value));
|
||||||
|
}
|
||||||
|
public void Test__Between(int val, int lhs, int rhs, boolean expd) {
|
||||||
|
Gftest_fxt.Eq__bool(expd, Int_.Between(val, lhs, rhs));
|
||||||
|
}
|
||||||
|
public void Test__Count_digits(int val, int expd) {
|
||||||
|
Gftest_fxt.Eq__int(expd, Int_.Count_digits(val), Int_.To_str(val));
|
||||||
|
}
|
||||||
|
public void Test__Log10(int val, int expd) {
|
||||||
|
Gftest_fxt.Eq__int(expd, Int_.Log10(val));
|
||||||
|
}
|
||||||
|
}
|
29
baselib/src/gplx/objects/primitives/Long_.java
Normal file
29
baselib/src/gplx/objects/primitives/Long_.java
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
|
||||||
|
import gplx.objects.errs.*;
|
||||||
|
public class Long_ {
|
||||||
|
public static final String Cls_val_name = "long";
|
||||||
|
public static final Class<?> Cls_ref_type = Long.class;
|
||||||
|
public static long Cast(Object o) {
|
||||||
|
try {
|
||||||
|
return (Long)o;
|
||||||
|
}
|
||||||
|
catch(Exception e) {
|
||||||
|
throw Err_.New_fmt(e, "failed to cast to long; obj={0}", Object_.To_str(o));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
20
baselib/src/gplx/objects/primitives/Short_.java
Normal file
20
baselib/src/gplx/objects/primitives/Short_.java
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
|
||||||
|
public class Short_ {
|
||||||
|
public static final String Cls_val_name = "short";
|
||||||
|
public static final Class<?> Cls_ref_type = Short.class;
|
||||||
|
}
|
95
baselib/src/gplx/objects/strings/String_.java
Normal file
95
baselib/src/gplx/objects/strings/String_.java
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.strings; import gplx.*; import gplx.objects.*;
|
||||||
|
import java.lang.*;
|
||||||
|
import gplx.objects.errs.*;
|
||||||
|
import gplx.objects.strings.bfrs.*;
|
||||||
|
import gplx.objects.arrays.*; import gplx.objects.primitives.*;
|
||||||
|
public class String_ {
|
||||||
|
public static final Class<?> Cls_ref_type = String.class;
|
||||||
|
public static final String Cls_val_name = "str" + "ing";
|
||||||
|
public static final int Find_none = -1, Pos_neg1 = -1;
|
||||||
|
public static final String Empty = "", Null_mark = "<<NULL>>", Tab = "\t", Lf = "\n", CrLf = "\r\n";
|
||||||
|
|
||||||
|
public static boolean Eq(String lhs, String rhs) {return lhs == null ? rhs == null : lhs.equals(rhs);}
|
||||||
|
public static int Len(String s) {return s.length();}
|
||||||
|
public static char Char_at(String s, int i) {return s.charAt(i);}
|
||||||
|
|
||||||
|
// use C# flavor ("a {0}") rather than Java format ("a %s"); also: (a) don't fail on format errors; (b) escape brackets by doubling
|
||||||
|
private static final char FORMAT_ITM_LHS = '{', FORMAT_ITM_RHS = '}';
|
||||||
|
public static String Format(String fmt, Object... args) {
|
||||||
|
// method vars
|
||||||
|
int args_len = Array_.Len_obj(args);
|
||||||
|
if (args_len == 0) return fmt; // nothing to format
|
||||||
|
int fmt_len = Len(fmt);
|
||||||
|
|
||||||
|
// loop vars
|
||||||
|
int pos = 0; String arg_idx_str = ""; boolean inside_brackets = false;
|
||||||
|
String_bfr bfr = new String_bfr();
|
||||||
|
while (pos < fmt_len) { // loop over every char; NOTE: UT8-SAFE b/c only checking for "{"; "}"
|
||||||
|
char c = Char_at(fmt, pos);
|
||||||
|
if (inside_brackets) {
|
||||||
|
if (c == FORMAT_ITM_LHS) { // first FORMAT_ITM_LHS is fake; add FORMAT_ITM_LHS and whatever is in arg_idx_str
|
||||||
|
bfr.Add_char(FORMAT_ITM_LHS).Add(arg_idx_str);
|
||||||
|
arg_idx_str = "";
|
||||||
|
}
|
||||||
|
else if (c == FORMAT_ITM_RHS) { // itm completed
|
||||||
|
int args_idx = Int_.Parse_or(arg_idx_str, Int_.Min_value);
|
||||||
|
String itm = args_idx != Int_.Min_value && Int_.Between(args_idx, 0, args_len - 1) // check (a) args_idx is num; (b) args_idx is in bounds
|
||||||
|
? Object_.To_str_or_null_mark(args[args_idx]) // valid; add itm
|
||||||
|
: FORMAT_ITM_LHS + arg_idx_str + FORMAT_ITM_RHS; // not valid; just add String
|
||||||
|
bfr.Add(itm);
|
||||||
|
inside_brackets = false;
|
||||||
|
arg_idx_str = "";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
arg_idx_str += c;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (c == FORMAT_ITM_LHS || c == FORMAT_ITM_RHS) {
|
||||||
|
boolean pos_is_end = pos == fmt_len - 1;
|
||||||
|
if (pos_is_end) // last char is "{" or "}" (and not inside_brackets); ignore and just ad
|
||||||
|
bfr.Add_char(c);
|
||||||
|
else {
|
||||||
|
char next = Char_at(fmt, pos + 1);
|
||||||
|
if (next == c) { // "{{" or "}}": escape by doubling
|
||||||
|
bfr.Add_char(c);
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
inside_brackets = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
bfr.Add_char(c);
|
||||||
|
}
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
if (Len(arg_idx_str) > 0) // unclosed bracket; add FORMAT_ITM_LHS and whatever is in arg_idx_str; ex: "{0"
|
||||||
|
bfr.Add_char(FORMAT_ITM_LHS).Add(arg_idx_str);
|
||||||
|
return bfr.To_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String New_bry_utf8(byte[] v) {return v == null ? null : New_bry_utf8(v, 0, v.length);}
|
||||||
|
public static String New_bry_utf8(byte[] v, int bgn, int end) {
|
||||||
|
try {
|
||||||
|
return v == null
|
||||||
|
? null
|
||||||
|
: new String(v, bgn, end - bgn, "UTF-8");
|
||||||
|
}
|
||||||
|
catch (Exception e) {throw Err_.New_fmt(e, "unsupported encoding; bgn={0} end={1}", bgn, end);}
|
||||||
|
}
|
||||||
|
}
|
47
baselib/src/gplx/objects/strings/String__tst.java
Normal file
47
baselib/src/gplx/objects/strings/String__tst.java
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.strings; import gplx.*; import gplx.objects.*;
|
||||||
|
import org.junit.*; import gplx.tests.*;
|
||||||
|
public class String__tst {
|
||||||
|
private final String__fxt fxt = new String__fxt();
|
||||||
|
@Test public void Len() {
|
||||||
|
fxt.Test__Len("" , 0);
|
||||||
|
fxt.Test__Len("abc", 3);
|
||||||
|
}
|
||||||
|
@Test public void Format() {
|
||||||
|
fxt.Test__Format("" , ""); // empty fmt
|
||||||
|
fxt.Test__Format("" , "", "a"); // empty fmt w/ args
|
||||||
|
fxt.Test__Format("a" , "a"); // no args
|
||||||
|
fxt.Test__Format("a" , "{0}", "a"); // args = 1
|
||||||
|
fxt.Test__Format("a + b" , "{0} + {1}", "a", "b"); // args = n
|
||||||
|
fxt.Test__Format("{" , "{{", 0); // escape "{"
|
||||||
|
fxt.Test__Format("}" , "}}", 0); // escape "}"
|
||||||
|
fxt.Test__Format("{a0c}" , "{a{0}c}", 0); // nested;
|
||||||
|
fxt.Test__Format("{a{b}c}" , "{a{b}c}", 0); // nested; invalid
|
||||||
|
fxt.Test__Format("{1}" , "{1}", "a"); // out of bounds
|
||||||
|
fxt.Test__Format("{a} {b}" , "{a} {b}", 0); // invalid arg
|
||||||
|
fxt.Test__Format("{a}0{b}1", "{a}{0}{b}{1}", 0, 1); // invalid and valid args
|
||||||
|
fxt.Test__Format("{0", "{0", 0); // dangling
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class String__fxt {
|
||||||
|
public void Test__Format(String expd, String fmt, Object... ary) {
|
||||||
|
Gftest_fxt.Eq__str(expd, String_.Format(fmt, ary));
|
||||||
|
}
|
||||||
|
public void Test__Len(String v, int expd) {
|
||||||
|
Gftest_fxt.Eq__int(expd, String_.Len(v));
|
||||||
|
}
|
||||||
|
}
|
76
baselib/src/gplx/objects/strings/bfrs/String_bfr.java
Normal file
76
baselib/src/gplx/objects/strings/bfrs/String_bfr.java
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.strings.bfrs; import gplx.*; import gplx.objects.*; import gplx.objects.strings.*;
|
||||||
|
import java.lang.*;
|
||||||
|
import gplx.objects.primitives.*;
|
||||||
|
import gplx.objects.errs.*;
|
||||||
|
public class String_bfr {
|
||||||
|
private java.lang.StringBuilder sb = new java.lang.StringBuilder();
|
||||||
|
public boolean Has_none() {return this.Len() == 0;}
|
||||||
|
public boolean Has_some() {return this.Len() > 0;}
|
||||||
|
public String_bfr Add_fmt(String format, Object... args) {Add(String_.Format(format, args)); return this;}
|
||||||
|
public String_bfr Add_char_pipe() {return Add_char(Char_code_.Pipe);}
|
||||||
|
public String_bfr Add_char_nl() {return Add_char(Char_code_.New_line);}
|
||||||
|
public String_bfr Add_char_space() {return Add_char(Char_code_.Space);}
|
||||||
|
public String_bfr Add_char_colon() {return Add_char(Char_code_.Colon);}
|
||||||
|
public String_bfr Add_char_repeat(char c, int repeat) {
|
||||||
|
this.Ensure_capacity(this.Len() + repeat);
|
||||||
|
for (int i = 0; i < repeat; i++)
|
||||||
|
Add_char(c);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
public String_bfr Add_int_pad_bgn(char pad_char, int str_len, int val) {
|
||||||
|
int digit_len = Int_.Count_digits(val);
|
||||||
|
int pad_len = str_len - digit_len;
|
||||||
|
if (pad_len > 0) // note that this skips pad_len == 0, as well as guarding against negative pad_len; EX: pad(" ", 3, 1234) -> "1234"
|
||||||
|
Add_char_repeat(pad_char, pad_len);
|
||||||
|
Add_int(val);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
public String_bfr Add_bool(boolean val) {
|
||||||
|
this.Add(val ? Bool_.True_str : Bool_.False_str);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
public String_bfr Add_bool_as_yn(boolean val) {
|
||||||
|
this.Add(val ? "y" : "n");
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
public String_bfr Clear() {Del(0, this.Len()); return this;}
|
||||||
|
public String To_str_and_clear() {
|
||||||
|
String rv = To_str();
|
||||||
|
Clear();
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
@Override public String toString() {return To_str();}
|
||||||
|
public String To_str() {return sb.toString();}
|
||||||
|
public int Len() {return sb.length();}
|
||||||
|
public String_bfr Add_at(int idx, String s) {sb.insert(idx, s); return this;}
|
||||||
|
public String_bfr Add(String s) {sb.append(s); return this;}
|
||||||
|
public String_bfr Add_char(char c) {sb.append(c); return this;}
|
||||||
|
public String_bfr Add_byte(byte i) {sb.append(i); return this;}
|
||||||
|
public String_bfr Add_int(int i) {sb.append(i); return this;}
|
||||||
|
public String_bfr Add_long(long i) {sb.append(i); return this;}
|
||||||
|
public String_bfr Add_double(double i) {sb.append(i); return this;}
|
||||||
|
public String_bfr Add_mid(char[] ary, int bgn, int count) {sb.append(ary, bgn, count); return this;}
|
||||||
|
public String_bfr Add_obj(Object o) {sb.append(o); return this;}
|
||||||
|
public String_bfr Add_bry(byte[] v) {
|
||||||
|
if (v != null)
|
||||||
|
sb.append(String_.New_bry_utf8(v));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
private void Ensure_capacity(int capacity) {sb.ensureCapacity(capacity);}
|
||||||
|
public String_bfr Del(int bgn, int len) {sb.delete(bgn, len); return this;}
|
||||||
|
}
|
@ -0,0 +1,25 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.strings.char_sources; import gplx.*; import gplx.objects.*; import gplx.objects.strings.*;
|
||||||
|
public interface Char_source {
|
||||||
|
String Src();
|
||||||
|
int Get_data(int pos);
|
||||||
|
int Len_in_data();
|
||||||
|
|
||||||
|
String Substring(int bgn, int end);
|
||||||
|
int Index_of(Char_source find, int bgn);
|
||||||
|
boolean Eq(int lhs_bgn, Char_source rhs, int rhs_bgn, int rhs_end);
|
||||||
|
}
|
@ -0,0 +1,30 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.strings.char_sources; import gplx.*; import gplx.objects.*; import gplx.objects.strings.*;
|
||||||
|
public class Char_source_ {
|
||||||
|
public static int Index_of_any(String src, char[] ary) {
|
||||||
|
int src_len = String_.Len(src);
|
||||||
|
int ary_len = ary.length;
|
||||||
|
for (int i = 0; i < src_len; i++) {
|
||||||
|
for (int j = 0; j < ary_len; j++) {
|
||||||
|
if (String_.Char_at(src, i) == ary[j] ) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
151
baselib/src/gplx/objects/strings/unicodes/Ustring.java
Normal file
151
baselib/src/gplx/objects/strings/unicodes/Ustring.java
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.strings.unicodes; import gplx.*; import gplx.objects.*; import gplx.objects.strings.*;
|
||||||
|
import gplx.objects.errs.*;
|
||||||
|
import gplx.objects.brys.*;
|
||||||
|
import gplx.objects.strings.char_sources.*;
|
||||||
|
|
||||||
|
public interface Ustring extends Char_source {
|
||||||
|
int Len_in_chars();
|
||||||
|
int Map_data_to_char(int pos);
|
||||||
|
int Map_char_to_data(int pos);
|
||||||
|
}
|
||||||
|
class Ustring_single implements Ustring { // 1 char == 1 codepoint
|
||||||
|
public Ustring_single(String src, int src_len) {
|
||||||
|
this.src = src;
|
||||||
|
this.src_len = src_len;
|
||||||
|
}
|
||||||
|
public String Src() {return src;} private final String src;
|
||||||
|
public int Len_in_chars() {return src_len;} private final int src_len;
|
||||||
|
public int Len_in_data() {return src_len;}
|
||||||
|
public String Substring(int bgn, int end) {return src.substring(bgn, end);}
|
||||||
|
public int Index_of(Char_source find, int bgn) {return src.indexOf(find.Src(), bgn);}
|
||||||
|
public boolean Eq(int lhs_bgn, Char_source rhs, int rhs_bgn, int rhs_end) {
|
||||||
|
if (src_len < lhs_bgn + rhs_end || rhs.Len_in_data() < rhs_bgn + rhs_end)
|
||||||
|
return false;
|
||||||
|
while ( --rhs_end>=0 )
|
||||||
|
if (this.Get_data(lhs_bgn++) != rhs.Get_data(rhs_bgn++))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
public int Get_data(int i) {return String_.Char_at(src, i);}
|
||||||
|
public int Map_data_to_char(int i) {if (i < 0 || i > src_len) throw Err_.New_fmt("invalid idx; idx={0} src={1}", i, src); return i;}
|
||||||
|
public int Map_char_to_data(int i) {if (i < 0 || i > src_len) throw Err_.New_fmt("invalid idx; idx={0} src={1}", i, src); return i;}
|
||||||
|
}
|
||||||
|
class Ustring_codepoints implements Ustring {
|
||||||
|
private final int[] codes;
|
||||||
|
public Ustring_codepoints(String src, int chars_len, int codes_len) {
|
||||||
|
// set members
|
||||||
|
this.src = src;
|
||||||
|
this.chars_len = chars_len;
|
||||||
|
this.codes_len = codes_len;
|
||||||
|
|
||||||
|
// make codes[]
|
||||||
|
this.codes = new int[codes_len];
|
||||||
|
int code_idx = 0;
|
||||||
|
for (int i = 0; i < chars_len; i++) {
|
||||||
|
char c = src.charAt(i);
|
||||||
|
if (c >= Ustring_.Surrogate_hi_bgn && c <= Ustring_.Surrogate_hi_end) { // character is 1st part of surrogate-pair
|
||||||
|
i++;
|
||||||
|
if (i == chars_len) throw Err_.New_fmt("invalid surrogate pair found; src={0}", src);
|
||||||
|
int c2 = src.charAt(i);
|
||||||
|
codes[code_idx++] = Ustring_.Surrogate_cp_bgn + (c - Ustring_.Surrogate_hi_bgn) * Ustring_.Surrogate_range + (c2 - Ustring_.Surrogate_lo_bgn);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
codes[code_idx++] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public String Src() {return src;} private final String src;
|
||||||
|
public String Substring(int bgn, int end) {
|
||||||
|
int len = 0;
|
||||||
|
for (int i = bgn; i < end; i++) {
|
||||||
|
int code = codes[i];
|
||||||
|
len += code >= Ustring_.Surrogate_cp_bgn && code <= Ustring_.Surrogate_cp_end ? 2 : 1;
|
||||||
|
}
|
||||||
|
char[] rv = new char[len];
|
||||||
|
int rv_idx = 0;
|
||||||
|
for (int i = bgn; i < end; i++) {
|
||||||
|
int code = codes[i];
|
||||||
|
if (code >= Ustring_.Surrogate_cp_bgn && code <= Ustring_.Surrogate_cp_end) {
|
||||||
|
rv[rv_idx++] = (char)((code - 0x10000) / 0x400 + 0xD800);
|
||||||
|
rv[rv_idx++] = (char)((code - 0x10000) % 0x400 + 0xDC00);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
rv[rv_idx++] = (char)code;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new String(rv);
|
||||||
|
}
|
||||||
|
public int Index_of(Char_source find, int bgn) {
|
||||||
|
int find_len = find.Len_in_data();
|
||||||
|
int codes_len = codes.length;
|
||||||
|
for (int i = bgn; i < codes.length; i++) {
|
||||||
|
boolean found = true;
|
||||||
|
for (int j = 0; j < find_len; j++) {
|
||||||
|
int codes_idx = i + j;
|
||||||
|
if (codes_idx >= codes_len) {
|
||||||
|
found = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (codes[codes_idx] != find.Get_data(j)) {
|
||||||
|
found = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found == true)
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
public boolean Eq(int lhs_bgn, Char_source rhs, int rhs_bgn, int rhs_end) {
|
||||||
|
if (this.Len_in_data() < lhs_bgn + rhs_end || rhs.Len_in_data() < rhs_bgn + rhs_end)
|
||||||
|
return false;
|
||||||
|
while ( --rhs_end>=0 )
|
||||||
|
if ((this.Get_data(lhs_bgn++) != rhs.Get_data(rhs_bgn++)))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
public int Len_in_chars() {return chars_len;} private final int chars_len;
|
||||||
|
public int Len_in_data() {return codes_len;} private final int codes_len;
|
||||||
|
public int Get_data(int i) {return codes[i];}
|
||||||
|
public int Map_data_to_char(int code_pos) {
|
||||||
|
if (code_pos == codes_len) return chars_len; // if char_pos is chars_len, return codes_len; allows "int end = u.Map_char_to_data(str_len)"
|
||||||
|
|
||||||
|
// sum all items before requested pos
|
||||||
|
int rv = 0;
|
||||||
|
for (int i = 0; i < code_pos; i++) {
|
||||||
|
rv += codes[i] < Ustring_.Surrogate_cp_bgn ? 1 : 2;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
public int Map_char_to_data(int char_pos) {
|
||||||
|
if (char_pos == chars_len) return codes_len; // if char_pos is chars_len, return codes_len; allows "int end = u.Map_char_to_data(str_len)"
|
||||||
|
|
||||||
|
// sum all items before requested pos
|
||||||
|
int rv = 0;
|
||||||
|
for (int i = 0; i < char_pos; i++) {
|
||||||
|
char c = src.charAt(i);
|
||||||
|
if (c >= Ustring_.Surrogate_hi_bgn && c <= Ustring_.Surrogate_hi_end){ // Surrogate_hi
|
||||||
|
if (i == char_pos - 1) // char_pos is Surrogate_lo; return -1 since Surrogate_lo doesn't map to a code_pos
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
rv++;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
}
|
51
baselib/src/gplx/objects/strings/unicodes/Ustring_.java
Normal file
51
baselib/src/gplx/objects/strings/unicodes/Ustring_.java
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.strings.unicodes; import gplx.*; import gplx.objects.*; import gplx.objects.strings.*;
|
||||||
|
import gplx.objects.errs.*;
|
||||||
|
public class Ustring_ {
|
||||||
|
public static Ustring New_codepoints(String src) {
|
||||||
|
if (src == null) throw Err_.New_null("src");
|
||||||
|
|
||||||
|
// calc lens
|
||||||
|
int chars_len = src.length();
|
||||||
|
int codes_len = Ustring_.Len(src, chars_len);
|
||||||
|
|
||||||
|
return chars_len == codes_len
|
||||||
|
? (Ustring)new Ustring_single(src, chars_len)
|
||||||
|
: (Ustring)new Ustring_codepoints(src, chars_len, codes_len);
|
||||||
|
}
|
||||||
|
public static int Len(String src, int src_len) {
|
||||||
|
int rv = 0;
|
||||||
|
for (int i = 0; i < src_len; i++) {
|
||||||
|
char c = src.charAt(i);
|
||||||
|
if (c >= Surrogate_hi_bgn && c <= Surrogate_hi_end) {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
rv++;
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final int // REF: https://en.wikipedia.org/wiki/Universal_Character_Set_characters
|
||||||
|
Surrogate_hi_bgn = 0xD800 // 55,296: Surrogate high start
|
||||||
|
, Surrogate_hi_end = 0xDBFF // 56,319: Surrogate high end
|
||||||
|
, Surrogate_lo_bgn = 0xDC00 // 56,320: Surrogate low start
|
||||||
|
, Surrogate_lo_end = 0xDFFF // 57,343: Surrogate low end
|
||||||
|
, Surrogate_cp_bgn = 0x010000 // 65,536: Surrogate codepoint start
|
||||||
|
, Surrogate_cp_end = 0x10FFFF // 1,114,111: Surrogate codepoint end
|
||||||
|
, Surrogate_range = 0x400 // 1,024: Surrogate range (end - start) for high / low
|
||||||
|
;
|
||||||
|
}
|
104
baselib/src/gplx/objects/strings/unicodes/Ustring_tst.java
Normal file
104
baselib/src/gplx/objects/strings/unicodes/Ustring_tst.java
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.strings.unicodes; import gplx.*; import gplx.objects.*; import gplx.objects.strings.*;
|
||||||
|
import org.junit.*; import gplx.tests.*;
|
||||||
|
import gplx.objects.errs.*;
|
||||||
|
public class Ustring_tst {
|
||||||
|
private final Ustring_fxt fxt = new Ustring_fxt();
|
||||||
|
@Test public void Empty() {
|
||||||
|
fxt.Init("");
|
||||||
|
fxt.Test__Len(0, 0);
|
||||||
|
}
|
||||||
|
@Test public void Blank() {
|
||||||
|
fxt.Init("");
|
||||||
|
fxt.Test__Len(0, 0);
|
||||||
|
}
|
||||||
|
@Test public void Single() {
|
||||||
|
fxt.Init("Abc");
|
||||||
|
fxt.Test__Len(3, 3);
|
||||||
|
fxt.Test__Get_code(65, 98, 99);
|
||||||
|
fxt.Test__Map_code_to_char(0, 1, 2, 3);
|
||||||
|
fxt.Test__Map_char_to_code(0, 1, 2, 3);
|
||||||
|
}
|
||||||
|
@Test public void Multi() {
|
||||||
|
fxt.Init("a¢€𤭢b");
|
||||||
|
fxt.Test__Len(5, 6);
|
||||||
|
fxt.Test__Get_code(97, 162, 8364, 150370, 98);
|
||||||
|
fxt.Test__Map_code_to_char(0, 1, 2, 3, 5, 6);
|
||||||
|
fxt.Test__Map_char_to_code(0, 1, 2, 3, -1, 4, 5);
|
||||||
|
}
|
||||||
|
@Test public void Index_of() {
|
||||||
|
fxt.Test__Index_of("abc", "b", 0, 1); // basic
|
||||||
|
fxt.Test__Index_of("ab", "bc", 0, -1); // out-of-bounds
|
||||||
|
fxt.Test__Index_of("a¢e", "¢", 0, 1); // check UTF-8 strings still match at byte-level
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void Substring() {
|
||||||
|
fxt.Test__Substring("abc", 1, 2, "b"); // basic
|
||||||
|
fxt.Test__Substring("¢bc", 1, 2, "b"); // check UTF-8 strings don't get lopped off
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Ustring_fxt {
|
||||||
|
private Ustring under;
|
||||||
|
public void Init(String src) {
|
||||||
|
this.under = Ustring_.New_codepoints(src);
|
||||||
|
}
|
||||||
|
public void Test__Len(int expd_codes, int expd_chars) {
|
||||||
|
Gftest_fxt.Eq__int(expd_codes, under.Len_in_data(), "codes");
|
||||||
|
Gftest_fxt.Eq__int(expd_chars, under.Len_in_chars(), "chars");
|
||||||
|
}
|
||||||
|
public void Test__Get_code(int... expd) {
|
||||||
|
int actl_len = under.Len_in_data();
|
||||||
|
int[] actl = new int[actl_len];
|
||||||
|
for (int i = 0; i < actl_len; i++)
|
||||||
|
actl[i] = under.Get_data(i);
|
||||||
|
Gftest_fxt.Eq__ary(expd, actl);
|
||||||
|
}
|
||||||
|
public void Test__Map_code_to_char(int... expd) {
|
||||||
|
int actl_len = under.Len_in_data() + 1;
|
||||||
|
int[] actl = new int[actl_len];
|
||||||
|
for (int i = 0; i < actl_len; i++)
|
||||||
|
actl[i] = under.Map_data_to_char(i);
|
||||||
|
Gftest_fxt.Eq__ary(expd, actl);
|
||||||
|
}
|
||||||
|
public void Test__Map_char_to_code(int... expd) {
|
||||||
|
int actl_len = under.Len_in_chars() + 1;
|
||||||
|
int[] actl = new int[actl_len];
|
||||||
|
for (int i = 0; i < actl_len; i++) {
|
||||||
|
int val = 0;
|
||||||
|
try {
|
||||||
|
val = under.Map_char_to_data(i);
|
||||||
|
}
|
||||||
|
catch (Exception exc) {
|
||||||
|
val = -1;
|
||||||
|
Err_.Noop(exc);
|
||||||
|
}
|
||||||
|
actl[i] = val;
|
||||||
|
}
|
||||||
|
Gftest_fxt.Eq__ary(expd, actl);
|
||||||
|
}
|
||||||
|
public void Test__Index_of(String src_str, String find_str, int bgn, int expd) {
|
||||||
|
Ustring src = Ustring_.New_codepoints(src_str);
|
||||||
|
Ustring find = Ustring_.New_codepoints(find_str);
|
||||||
|
int actl = src.Index_of(find, bgn);
|
||||||
|
Gftest_fxt.Eq__int(expd, actl);
|
||||||
|
}
|
||||||
|
public void Test__Substring(String src_str, int bgn, int end, String expd) {
|
||||||
|
Ustring src = Ustring_.New_codepoints(src_str);
|
||||||
|
String actl = src.Substring(bgn, end);
|
||||||
|
Gftest_fxt.Eq__str(expd, actl);
|
||||||
|
}
|
||||||
|
}
|
23
baselib/src/gplx/objects/types/Type_.java
Normal file
23
baselib/src/gplx/objects/types/Type_.java
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.types; import gplx.*; import gplx.objects.*;
|
||||||
|
public class Type_ {
|
||||||
|
public static boolean Eq(Class<?> lhs, Class<?> rhs) {// DUPE_FOR_TRACKING: same as Object_.Eq
|
||||||
|
if (lhs == null && rhs == null) return true;
|
||||||
|
else if (lhs == null || rhs == null) return false;
|
||||||
|
else return lhs.equals(rhs);
|
||||||
|
}
|
||||||
|
}
|
59
baselib/src/gplx/objects/types/Type_ids_.java
Normal file
59
baselib/src/gplx/objects/types/Type_ids_.java
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.objects.types; import gplx.*; import gplx.objects.*;
|
||||||
|
import gplx.objects.primitives.*;
|
||||||
|
import gplx.objects.strings.*;
|
||||||
|
public class Type_ids_ {
|
||||||
|
public static final int // SERIALIZABLE.N
|
||||||
|
Id__obj = 0
|
||||||
|
, Id__null = 1
|
||||||
|
, Id__bool = 2
|
||||||
|
, Id__byte = 3
|
||||||
|
, Id__short = 4
|
||||||
|
, Id__int = 5
|
||||||
|
, Id__long = 6
|
||||||
|
, Id__float = 7
|
||||||
|
, Id__double = 8
|
||||||
|
, Id__char = 9
|
||||||
|
, Id__str = 10
|
||||||
|
, Id__bry = 11
|
||||||
|
, Id__date = 12
|
||||||
|
, Id__decimal = 13
|
||||||
|
, Id__array = 14
|
||||||
|
;
|
||||||
|
|
||||||
|
public static int To_id_by_obj(Object o) {
|
||||||
|
if (o == null) return Type_ids_.Id__null;
|
||||||
|
Class<?> type = o.getClass();
|
||||||
|
return Type_ids_.To_id_by_type(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int To_id_by_type(Class<?> type) {
|
||||||
|
if (Type_.Eq(type, Int_.Cls_ref_type)) return Id__int;
|
||||||
|
else if (Type_.Eq(type, String_.Cls_ref_type)) return Id__str;
|
||||||
|
else if (Type_.Eq(type, byte[].class)) return Id__bry;
|
||||||
|
else if (Type_.Eq(type, Bool_.Cls_ref_type)) return Id__bool;
|
||||||
|
else if (Type_.Eq(type, Byte_.Cls_ref_type)) return Id__byte;
|
||||||
|
else if (Type_.Eq(type, Long_.Cls_ref_type)) return Id__long;
|
||||||
|
else if (Type_.Eq(type, Double_.Cls_ref_type)) return Id__double;
|
||||||
|
// else if (Type_.Eq(type, Decimal_.Cls_ref_type)) return Id__decimal;
|
||||||
|
// else if (Type_.Eq(type, Date_.Cls_ref_type)) return Id__date;
|
||||||
|
else if (Type_.Eq(type, Float_.Cls_ref_type)) return Id__float;
|
||||||
|
else if (Type_.Eq(type, Short_.Cls_ref_type)) return Id__short;
|
||||||
|
else if (Type_.Eq(type, Char_.Cls_ref_type)) return Id__char;
|
||||||
|
else return Id__obj;
|
||||||
|
}
|
||||||
|
}
|
220
baselib/src/gplx/tests/Gftest_fxt.java
Normal file
220
baselib/src/gplx/tests/Gftest_fxt.java
Normal file
@ -0,0 +1,220 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.tests; import gplx.*;
|
||||||
|
import gplx.objects.*;
|
||||||
|
import gplx.objects.errs.*;
|
||||||
|
import gplx.objects.primitives.*; import gplx.objects.brys.*;
|
||||||
|
import gplx.objects.strings.*; import gplx.objects.strings.bfrs.*;
|
||||||
|
import gplx.objects.arrays.*; import gplx.objects.types.*;
|
||||||
|
public class Gftest_fxt {
|
||||||
|
private static final String_bfr bfr = new String_bfr();
|
||||||
|
public static void Eq__ary(Object[] expd, Object[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__obj, expd, actl, msg_fmt, msg_args);}
|
||||||
|
public static void Eq__ary(boolean[] expd, boolean[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__bool, expd, actl, msg_fmt, msg_args);}
|
||||||
|
public static void Eq__ary(int[] expd, int[] actl) {Eq__array(Type_ids_.Id__int, expd, actl, "");}
|
||||||
|
public static void Eq__ary(int[] expd, int[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__int, expd, actl, msg_fmt, msg_args);}
|
||||||
|
public static void Eq__ary(long[] expd, long[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__long, expd, actl, msg_fmt, msg_args);}
|
||||||
|
public static void Eq__ary(byte[] expd, byte[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__byte, expd, actl, msg_fmt, msg_args);}
|
||||||
|
// public static void Eq__ary__lines(String expd, String actl) {Eq__ary__lines(expd, actl, "no_msg");}
|
||||||
|
// public static void Eq__ary__lines(String expd, byte[] actl) {Eq__ary__lines(expd, String_.New_bry_utf8(actl), "no_msg");}
|
||||||
|
// public static void Eq__ary__lines(String expd, byte[] actl, String msg_fmt, params Object[] msg_args) {Eq__ary__lines(expd, String_.New_bry_utf8(actl), msg_fmt, msg_args);}
|
||||||
|
// public static void Eq__ary__lines(String expd, String actl, String msg_fmt, params Object[] msg_args) {Eq__array(Type_ids_.Id__str, Bry_split_.Split_lines(Bry_.New_utf08(expd)), Bry_split_.Split_lines(Bry_.New_utf08(actl)), msg_fmt, msg_args);}
|
||||||
|
public static void Eq__ary(String[] expd, String[] actl) {Eq__array(Type_ids_.Id__bry, Bry_.Ary(expd), Bry_.Ary(actl), "no_msg");}
|
||||||
|
public static void Eq__ary(String[] expd, String[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__bry, Bry_.Ary(expd), Bry_.Ary(actl), msg_fmt, msg_args);}
|
||||||
|
public static void Eq__ary(String[] expd, byte[][] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__bry, Bry_.Ary(expd), actl, msg_fmt, msg_args);}
|
||||||
|
public static void Eq__ary(byte[][] expd, byte[][] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__bry, expd, actl, msg_fmt, msg_args);}
|
||||||
|
private static void Eq__array(int type_tid, Object expd_ary, Object actl_ary, String msg_fmt, Object... msg_args) {
|
||||||
|
boolean[] failures = Calc__failures(type_tid, expd_ary, actl_ary);
|
||||||
|
if (failures != null) {
|
||||||
|
Write_fail_head(bfr, msg_fmt, msg_args);
|
||||||
|
Write_fail_ary(bfr, failures, type_tid, expd_ary, actl_ary);
|
||||||
|
throw Err_.New_msg(bfr.To_str_and_clear());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public static void Eq__null(boolean expd, Object actl) {Eq__null(expd, actl, null);}
|
||||||
|
public static void Eq__null(boolean expd, Object actl, String msg_fmt, Object... msg_args) {
|
||||||
|
if ( expd && actl == null
|
||||||
|
|| !expd && actl != null
|
||||||
|
) return;
|
||||||
|
Write_fail_head(bfr, msg_fmt, msg_args);
|
||||||
|
String expd_str = expd ? "null" : "not null";
|
||||||
|
String actl_str = actl == null ? "null" : "not null";
|
||||||
|
bfr.Add("expd: ").Add(expd_str).Add_char_nl();
|
||||||
|
bfr.Add("actl: ").Add(actl_str).Add_char_nl();
|
||||||
|
bfr.Add(Section_end);
|
||||||
|
throw Err_.New_msg(bfr.To_str_and_clear());
|
||||||
|
}
|
||||||
|
public static void Eq__obj_or_null(Object expd, Object actl) {
|
||||||
|
if (expd == null) expd = Null;
|
||||||
|
if (actl == null) actl = Null;
|
||||||
|
Eq__str(Object_.To_str_or(expd, Null), Object_.To_str_or(actl, null), Null);
|
||||||
|
}
|
||||||
|
public static void Eq__str(String expd, byte[] actl, String msg_fmt, Object... msg_args) {Eq__str(expd, String_.New_bry_utf8(actl), msg_fmt, msg_args);}
|
||||||
|
public static void Eq__str(String expd, byte[] actl) {Eq__str(expd, String_.New_bry_utf8(actl), null);}
|
||||||
|
public static void Eq__str(String expd, String actl) {Eq__str(expd, actl, null);}
|
||||||
|
public static void Eq__str(String expd, String actl, String msg_fmt, Object... msg_args) {
|
||||||
|
if (String_.Eq(expd, actl)) return;
|
||||||
|
Write_fail_head(bfr, msg_fmt, msg_args);
|
||||||
|
bfr.Add("expd: ").Add(expd).Add_char_nl();
|
||||||
|
bfr.Add("actl: ").Add(actl).Add_char_nl();
|
||||||
|
bfr.Add(Section_end);
|
||||||
|
throw Err_.New_msg(bfr.To_str_and_clear());
|
||||||
|
}
|
||||||
|
public static void Eq__bry(byte[] expd, byte[] actl) {Eq__bry(expd, actl, null);}
|
||||||
|
public static void Eq__bry(byte[] expd, byte[] actl, String msg_fmt, Object... msg_args) {
|
||||||
|
if (Bry_.Eq(expd, actl)) return;
|
||||||
|
Write_fail_head(bfr, msg_fmt, msg_args);
|
||||||
|
bfr.Add("expd: ").Add(String_.New_bry_utf8(expd)).Add_char_nl();
|
||||||
|
bfr.Add("actl: ").Add(String_.New_bry_utf8(actl)).Add_char_nl();
|
||||||
|
bfr.Add(Section_end);
|
||||||
|
throw Err_.New_msg(bfr.To_str_and_clear());
|
||||||
|
}
|
||||||
|
public static void Eq__long(long expd, long actl) {Eq__long(expd, actl, null);}
|
||||||
|
public static void Eq__long(long expd, long actl, String msg_fmt, Object... msg_args) {
|
||||||
|
if (expd == actl) return;
|
||||||
|
Write_fail_head(bfr, msg_fmt, msg_args);
|
||||||
|
bfr.Add("expd: ").Add_long(expd).Add_char_nl();
|
||||||
|
bfr.Add("actl: ").Add_long(actl).Add_char_nl();
|
||||||
|
bfr.Add(Section_end);
|
||||||
|
throw Err_.New_msg(bfr.To_str_and_clear());
|
||||||
|
}
|
||||||
|
public static void Eq__byte(byte expd, byte actl) {Eq__byte(expd, actl, null);}
|
||||||
|
public static void Eq__byte(byte expd, byte actl, String msg_fmt, Object... msg_args) {
|
||||||
|
if (expd == actl) return;
|
||||||
|
Write_fail_head(bfr, msg_fmt, msg_args);
|
||||||
|
bfr.Add("expd: ").Add_byte(expd).Add_char_nl();
|
||||||
|
bfr.Add("actl: ").Add_byte(actl).Add_char_nl();
|
||||||
|
bfr.Add(Section_end);
|
||||||
|
throw Err_.New_msg(bfr.To_str_and_clear());
|
||||||
|
}
|
||||||
|
public static void Eq__int(int expd, int actl) {Eq__int(expd, actl, null);}
|
||||||
|
public static void Eq__int(int expd, int actl, String msg_fmt, Object... msg_args) {
|
||||||
|
if (expd == actl) return;
|
||||||
|
Write_fail_head(bfr, msg_fmt, msg_args);
|
||||||
|
bfr.Add("expd: ").Add_int(expd).Add_char_nl();
|
||||||
|
bfr.Add("actl: ").Add_int(actl).Add_char_nl();
|
||||||
|
bfr.Add(Section_end);
|
||||||
|
throw Err_.New_msg(bfr.To_str_and_clear());
|
||||||
|
}
|
||||||
|
public static void Eq__bool_y(boolean actl) {Eq__bool(Bool_.Y, actl, null);}
|
||||||
|
public static void Eq__bool_y(boolean actl, String msg_fmt, Object... msg_args) {Eq__bool(Bool_.Y, actl, msg_fmt, msg_args);}
|
||||||
|
public static void Eq__bool(boolean expd, boolean actl) {Eq__bool(expd, actl, null);}
|
||||||
|
public static void Eq__bool(boolean expd, boolean actl, String msg_fmt, Object... msg_args) {
|
||||||
|
if (expd == actl) return;
|
||||||
|
Write_fail_head(bfr, msg_fmt, msg_args);
|
||||||
|
bfr.Add("expd: ").Add_bool(expd).Add_char_nl();
|
||||||
|
bfr.Add("actl: ").Add_bool(actl).Add_char_nl();
|
||||||
|
bfr.Add(Section_end);
|
||||||
|
throw Err_.New_msg(bfr.To_str_and_clear());
|
||||||
|
}
|
||||||
|
public static void Eq__double(double expd, double actl) {Eq__double(expd, actl, null);}
|
||||||
|
public static void Eq__double(double expd, double actl, String msg_fmt, Object... msg_args) {
|
||||||
|
if (expd == actl) return;
|
||||||
|
Write_fail_head(bfr, msg_fmt, msg_args);
|
||||||
|
bfr.Add("expd: ").Add_double(expd).Add_char_nl();
|
||||||
|
bfr.Add("actl: ").Add_double(actl).Add_char_nl();
|
||||||
|
bfr.Add(Section_end);
|
||||||
|
throw Err_.New_msg(bfr.To_str_and_clear());
|
||||||
|
}
|
||||||
|
private static void Write_fail_head(String_bfr bfr, String msg_fmt, Object[] msg_args) {
|
||||||
|
bfr.Add(Section_bgn);
|
||||||
|
if (msg_fmt != null) {
|
||||||
|
bfr.Add(String_.Format(msg_fmt, msg_args));
|
||||||
|
bfr.Add(Section_mid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private static void Write_fail_ary(String_bfr bfr, boolean[] failures, int type_id, Object expd_ary, Object actl_ary) {
|
||||||
|
int len = failures.length;
|
||||||
|
int expd_len = Array_.Len(expd_ary);
|
||||||
|
int actl_len = Array_.Len(actl_ary);
|
||||||
|
for (int i = 0; i < len; ++i) {
|
||||||
|
boolean failure = failures[i];
|
||||||
|
int pad_len = 5 - Int_.Count_digits(i);
|
||||||
|
bfr.Add_int_pad_bgn(Char_code_.Num_0, pad_len, i).Add_char_colon().Add_char_space();
|
||||||
|
Write__itm(bfr, type_id, expd_ary, expd_len, i);
|
||||||
|
if (failure) {
|
||||||
|
bfr.Add(Eq_n).Add_char_repeat(Char_code_.Space, pad_len - 1);
|
||||||
|
Write__itm(bfr, type_id, actl_ary, actl_len, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bfr.Add(Section_end);
|
||||||
|
}
|
||||||
|
private static void Write__itm(String_bfr bfr, int type_id, Object ary, int len, int idx) {
|
||||||
|
if (idx < len) {
|
||||||
|
Object val = Array_.Get_at(ary, idx);
|
||||||
|
switch (type_id) {
|
||||||
|
case Type_ids_.Id__bool: bfr.Add_bool_as_yn(Bool_.Cast(val)); break;
|
||||||
|
case Type_ids_.Id__bry: bfr.Add_bry((byte[])val); break;
|
||||||
|
case Type_ids_.Id__long: bfr.Add_long(Long_.Cast(val)); break;
|
||||||
|
case Type_ids_.Id__int: bfr.Add_int(Int_.Cast(val)); break;
|
||||||
|
case Type_ids_.Id__byte: bfr.Add_int((int)(Byte_.Cast(val))); break;
|
||||||
|
case Type_ids_.Id__obj: bfr.Add(Object_.To_str(val)); break;
|
||||||
|
default: throw Err_.New_unhandled_default(type_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
bfr.Add(Null);
|
||||||
|
bfr.Add_char_nl();
|
||||||
|
}
|
||||||
|
private static boolean[] Calc__failures(int tid, Object expd_ary, Object actl_ary) {
|
||||||
|
int expd_len = Array_.Len(expd_ary);
|
||||||
|
int actl_len = Array_.Len(actl_ary);
|
||||||
|
int max_len = expd_len > actl_len ? expd_len : actl_len; if (max_len == 0) return null;
|
||||||
|
boolean[] rv = null;
|
||||||
|
for (int i = 0; i < max_len; ++i) {
|
||||||
|
Object expd_obj = i < expd_len ? Array_.Get_at(expd_ary, i) : null;
|
||||||
|
Object actl_obj = i < actl_len ? Array_.Get_at(actl_ary, i) : null;
|
||||||
|
boolean eq = false;
|
||||||
|
if (expd_obj == null && actl_obj == null) eq = true;
|
||||||
|
else if (expd_obj == null || actl_obj == null) eq = false;
|
||||||
|
else {
|
||||||
|
switch (tid) {
|
||||||
|
case Type_ids_.Id__bool: eq = Bool_.Cast(expd_obj) == Bool_.Cast(actl_obj); break;
|
||||||
|
case Type_ids_.Id__bry: eq = Bry_.Eq((byte[])expd_obj, (byte[])actl_obj); break;
|
||||||
|
case Type_ids_.Id__long: eq = Long_.Cast(expd_obj) == Long_.Cast(actl_obj); break;
|
||||||
|
case Type_ids_.Id__int: eq = Int_.Cast(expd_obj) == Int_.Cast(actl_obj); break;
|
||||||
|
case Type_ids_.Id__byte: eq = Byte_.Cast(expd_obj) == Byte_.Cast(actl_obj); break;
|
||||||
|
case Type_ids_.Id__obj: eq = Object_.Eq(expd_obj, actl_obj); break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!eq) {
|
||||||
|
if (rv == null) {
|
||||||
|
rv = new boolean[max_len];
|
||||||
|
}
|
||||||
|
rv[i] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
private static final String Null = "<<NULL>>";
|
||||||
|
private static final String Eq_n = "!= "
|
||||||
|
, Section_bgn = "\n************************************************************************************************\n"
|
||||||
|
, Section_mid = "\n------------------------------------------------------------------------------------------------\n"
|
||||||
|
, Section_end = "________________________________________________________________________________________________"
|
||||||
|
;
|
||||||
|
|
||||||
|
// public static void Write(byte[] s, int b, int e) {Write(Bry_.Mid(s, b, e));}
|
||||||
|
public static void Write() {Write("tmp");}
|
||||||
|
public static void Write(Object... ary) {
|
||||||
|
String_bfr bfr = new String_bfr();
|
||||||
|
int ary_len = Array_.Len(ary);
|
||||||
|
for (int i = 0; i < ary_len; i++) {
|
||||||
|
bfr.Add("'");
|
||||||
|
bfr.Add(Object_.To_str_or_null_mark(ary[i]));
|
||||||
|
bfr.Add("' ");
|
||||||
|
}
|
||||||
|
System.out.println(bfr.To_str() + String_.Lf);
|
||||||
|
}
|
||||||
|
}
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user