Scribunto: Use Luaj for pattern-matching (instead of Java Regex) [#413]

pull/620/head
gnosygnu 5 years ago
parent 4a1b2e25c0
commit f860edf064

@ -16,6 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx;
import gplx.core.strings.*; import gplx.langs.gfs.*;
public class Int_ {
// -------- BASELIB_COPY --------
public static final String Cls_val_name = "int";
public static final Class<?> Cls_ref_type = Integer.class;
@ -37,6 +38,72 @@ public class Int_ {
throw Err_.new_type_mismatch_w_exc(exc, int.class, obj);
}
}
public static String To_str(int v) {return new Integer(v).toString();}
public static int Parse_or(String raw, int or) {
// process args
if (raw == null) return or;
int raw_len = String_.Len(raw);
if (raw_len == 0) return or;
// loop backwards from nth to 0th char
int rv = 0, power_of_10 = 1;
for (int idx = raw_len - 1; idx >= 0; idx--) {
char cur = String_.CharAt(raw, idx);
int digit = -1;
switch (cur) {
// numbers -> assign digit
case '0': digit = 0; break; case '1': digit = 1; break; case '2': digit = 2; break; case '3': digit = 3; break; case '4': digit = 4; break;
case '5': digit = 5; break; case '6': digit = 6; break; case '7': digit = 7; break; case '8': digit = 8; break; case '9': digit = 9; break;
// negative sign
case '-':
if (idx != 0) { // invalid if not 1st
return or;
}
else { // is first; multiply by -1
rv *= -1;
continue;
}
// anything else
default:
return or;
}
rv += (digit * power_of_10);
power_of_10 *= 10;
}
return rv;
}
public static int[] Log10Ary = new int[] {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, Int_.Max_value};
public static int Log10AryLen = 11;
public static int Log10(int v) {
if (v == 0) return 0;
int sign = 1;
if (v < 0) {
if (v == Int_.Min_value) return -9; // NOTE: Int_.Min_value * -1 = Int_.Min_value
v *= -1;
sign = -1;
}
int rv = Log10AryLen - 2; // rv will only happen when v == Int_.Max_value
int bgn = 0;
if (v > 1000) { // optimization to reduce number of ops to < 5
bgn = 3;
if (v > 1000000) bgn = 6;
}
for (int i = bgn; i < Log10AryLen; i++) {
if (v < Log10Ary[i]) {rv = i - 1; break;}
}
return rv * sign;
}
public static int DigitCount(int v) {
int log10 = Log10(v);
return v > -1 ? log10 + 1 : log10 * -1 + 2;
}
// -------- TO_MIGRATE --------
public static int Cast_or(Object obj, int or) {
try {
return (Integer)obj;
@ -55,23 +122,7 @@ public class Int_ {
}
public static int Parse(String raw) {try {return Integer.parseInt(raw);} catch(Exception e) {throw Err_.new_parse_exc(e, int.class, raw);}}
public static int Parse_or(String raw, int or) {
if (raw == null) return or;
int rawLen = String_.Len(raw); if (rawLen == 0) return or;
int rv = 0, tmp = 0, factor = 1;
for (int i = rawLen; i > 0; i--) {
char c = String_.CharAt(raw, i - 1);
switch (c) {
case '0': tmp = 0; break; case '1': tmp = 1; break; case '2': tmp = 2; break; case '3': tmp = 3; break; case '4': tmp = 4; break;
case '5': tmp = 5; break; case '6': tmp = 6; break; case '7': tmp = 7; break; case '8': tmp = 8; break; case '9': tmp = 9; break;
case '-': rv *= -1; continue; // NOTE: note continue
default: return or;
}
rv += (tmp * factor);
factor *= 10;
}
return rv;
}
public static int By_double(double v) {return (int)v;}
public static int By_hex_bry(byte[] src) {return By_hex_bry(src, 0, src.length);}
@ -99,7 +150,6 @@ public class Int_ {
}
public static byte[] To_bry(int v) {return Bry_.new_a7(To_str(v));}
public static String To_str(int v) {return new Integer(v).toString();}
public static String To_str_fmt(int v, String fmt) {return new java.text.DecimalFormat(fmt).format(v);}
public static String To_str_pad_bgn_space(int val, int reqd_len) {return To_str_pad(val, reqd_len, Bool_.Y, Byte_ascii.Space);} // EX: 1, 3 returns " 1"
public static String To_str_pad_bgn_zero (int val, int reqd_len) {return To_str_pad(val, reqd_len, Bool_.Y, Byte_ascii.Num_0);} // EX: 1, 3 returns "001"
@ -190,31 +240,4 @@ public class Int_ {
float product = ((float)v * multiplier); // WORKAROUND (DotNet): (int)((float)v * multiplier) returns 0 for 100 and .01f
return (int)product;
}
public static int[] Log10Ary = new int[] {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, Int_.Max_value};
public static int Log10AryLen = 11;
public static int Log10(int v) {
if (v == 0) return 0;
int sign = 1;
if (v < 0) {
if (v == Int_.Min_value) return -9; // NOTE: Int_.Min_value * -1 = Int_.Min_value
v *= -1;
sign = -1;
}
int rv = Log10AryLen - 2; // rv will only happen when v == Int_.Max_value
int bgn = 0;
if (v > 1000) { // optimization to reduce number of ops to < 5
bgn = 3;
if (v > 1000000) bgn = 6;
}
for (int i = bgn; i < Log10AryLen; i++) {
if (v < Log10Ary[i]) {rv = i - 1; break;}
}
return rv * sign;
}
public static int DigitCount(int v) {
int log10 = Log10(v);
return v > -1 ? log10 + 1 : log10 * -1 + 2;
}
}

@ -25,72 +25,10 @@ public class Int__tst {
tst_XtoStr_PadLeft_Zeroes(-123 , 3, "-123"); // negative
tst_XtoStr_PadLeft_Zeroes(-1234 , 3, "-1234"); // negative
} void tst_XtoStr_PadLeft_Zeroes(int val, int zeros, String expd) {Tfds.Eq(expd, Int_.To_str_pad_bgn_zero(val, zeros));}
@Test public void parseOr_() {
tst_ParseOr("", -1); // empty
tst_ParseOr("123", 123); // single
tst_ParseOr("1a", -1); // fail
} void tst_ParseOr(String raw, int expd) {Tfds.Eq(expd, Int_.Parse_or(raw, -1));}
@Test public void Between() {
tst_Between(1, 0, 2, true); // simple true
tst_Between(3, 0, 2, false); // simple false
tst_Between(0, 0, 2, true); // bgn true
tst_Between(2, 0, 2, true); // end true
} void tst_Between(int val, int lhs, int rhs, boolean expd) {Tfds.Eq(expd, Int_.Between(val, lhs, rhs));}
@Test public void Xto_fmt() {
tst_XtoStr_fmt(1, "1");
tst_XtoStr_fmt(1000, "1,000");
} void tst_XtoStr_fmt(int v, String expd) {Tfds.Eq(expd, Int_.To_str_fmt(v, "#,###"));}
@Test public void Log10_pos() {
tst_Log10(0, 0);
tst_Log10(1, 0);
tst_Log10(9, 0);
tst_Log10(10, 1);
tst_Log10(100, 2);
tst_Log10(1000000, 6);
tst_Log10(1000000000, 9);
tst_Log10(Int_.Max_value, 9);
}
@Test public void Log10_neg() {
tst_Log10(-1, 0);
tst_Log10(-10, -1);
tst_Log10(-100, -2);
tst_Log10(-1000000, -6);
tst_Log10(-1000000000, -9);
tst_Log10(Int_.Min_value, -9);
tst_Log10(Int_.Min_value + 1, -9);
}
void tst_Log10(int val, int expd) {Tfds.Eq(expd, Int_.Log10(val));}
@Test public void DigitCount() {
tst_DigitCount(0, 1);
tst_DigitCount(9, 1);
tst_DigitCount(100, 3);
tst_DigitCount(-1, 2);
tst_DigitCount(-100, 4);
} void tst_DigitCount(int val, int expd) {Tfds.Eq(expd, Int_.DigitCount(val), Int_.To_str(val));}
@Test public void Log10() {
tst_Log10( 0, 0);
tst_Log10( 1, 0);
tst_Log10( 2, 0);
tst_Log10( 10, 1);
tst_Log10( 12, 1);
tst_Log10( 100, 2);
tst_Log10( 123, 2);
tst_Log10( 1000, 3);
tst_Log10( 1234, 3);
tst_Log10( 10000, 4);
tst_Log10( 12345, 4);
tst_Log10( 100000, 5);
tst_Log10( 123456, 5);
tst_Log10( 1000000, 6);
tst_Log10( 1234567, 6);
tst_Log10( 10000000, 7);
tst_Log10( 12345678, 7);
tst_Log10( 100000000, 8);
tst_Log10( 123456789, 8);
tst_Log10( 1000000000, 9);
tst_Log10( 1234567890, 9);
tst_Log10(Int_.Max_value, 9);
}
@Test public void Xto_int_hex_tst() {
Xto_int_hex("007C", 124);
} void Xto_int_hex(String raw, int expd) {Tfds.Eq(expd, Int_.By_hex_bry(Bry_.new_a7(raw)));}

@ -17,8 +17,17 @@ package gplx;
import java.lang.*;
import gplx.core.strings.*; import gplx.langs.gfs.*; import gplx.core.envs.*;
public class String_ {
// -------- BASELIB_COPY --------
public static final Class<?> Cls_ref_type = String.class;
public static final String Cls_val_name = "str" + "ing";
public static final int Find_none = -1, Pos_neg1 = -1;
public static final String Empty = "", Null_mark = "<<NULL>>", Tab = "\t", Lf = "\n", CrLf = "\r\n";
public static boolean Eq(String lhs, String rhs) {return lhs == null ? rhs == null : lhs.equals(rhs);}
public static int Len(String s) {return s.length();}
public static char CharAt(String s, int i) {return s.charAt(i);}
public static String new_u8(byte[] v) {return v == null ? null : new_u8(v, 0, v.length);}
public static String new_u8(byte[] v, int bgn, int end) {
try {
return v == null
@ -28,10 +37,62 @@ public class String_ {
catch (Exception e) {Err_.Noop(e); throw Err_.new_("core", "unsupported encoding", "bgn", bgn, "end", end);}
}
public static final Class<?> Cls_ref_type = String.class;
public static final String Cls_val_name = "str" + "ing";
public static final int Find_none = -1, Pos_neg1 = -1;
public static final String Null = null, Empty = "", Null_mark = "<<NULL>>", Tab = "\t", Lf = "\n", CrLf = "\r\n";
// use C# flavor ("a {0}") rather than Java format ("a %s"); also: (a) don't fail on format errors; (b) escape brackets by doubling
private static final char FORMAT_ITM_LHS = '{', FORMAT_ITM_RHS = '}';
public static String Format(String fmt, Object... args) {
// method vars
int args_len = Array_.Len_obj(args);
if (args_len == 0) return fmt; // nothing to format
int fmt_len = Len(fmt);
// loop vars
int pos = 0; String arg_idx_str = ""; boolean inside_brackets = false;
String_bldr bfr = String_bldr_.new_();
while (pos < fmt_len) { // loop over every char; NOTE: UT8-SAFE b/c only checking for "{"; "}"
char c = CharAt(fmt, pos);
if (inside_brackets) {
if (c == FORMAT_ITM_LHS) { // first FORMAT_ITM_LHS is fake; add FORMAT_ITM_LHS and whatever is in arg_idx_str
bfr.Add(FORMAT_ITM_LHS).Add(arg_idx_str);
arg_idx_str = "";
}
else if (c == FORMAT_ITM_RHS) { // itm completed
int args_idx = Int_.Parse_or(arg_idx_str, Int_.Min_value);
String itm = args_idx != Int_.Min_value && Int_.Between(args_idx, 0, args_len - 1) // check (a) args_idx is num; (b) args_idx is in bounds
? Object_.Xto_str_strict_or_empty(args[args_idx]) // valid; add itm
: String_.Concat_any(FORMAT_ITM_LHS, arg_idx_str, FORMAT_ITM_RHS); // not valid; just add String
bfr.Add(itm);
inside_brackets = false;
arg_idx_str = "";
}
else
arg_idx_str += c;
}
else {
if (c == FORMAT_ITM_LHS || c == FORMAT_ITM_RHS) {
boolean pos_is_end = pos == fmt_len - 1;
if (pos_is_end) // last char is "{" or "}" (and not inside_brackets); ignore and just ad
bfr.Add(c);
else {
char next = CharAt(fmt, pos + 1);
if (next == c) { // "{{" or "}}": escape by doubling
bfr.Add(c);
pos++;
}
else
inside_brackets = true;
}
}
else
bfr.Add(c);
}
pos++;
}
if (Len(arg_idx_str) > 0) // unclosed bracket; add FORMAT_ITM_LHS and whatever is in arg_idx_str; ex: "{0"
bfr.Add(FORMAT_ITM_LHS).Add(arg_idx_str);
return bfr.To_str();
}
// -------- TO_MIGRATE --------
public static String cast(Object v) {return (String)v;}
public static String as_(Object obj) {return obj instanceof String ? (String)obj : null;}
public static String new_a7(byte[] v) {return v == null ? null : new_a7(v, 0, v.length);}
@ -43,7 +104,6 @@ public class String_ {
}
catch (Exception e) {throw Err_.new_exc(e, "core", "unsupported encoding");}
}
public static String new_u8(byte[] v) {return v == null ? null : new_u8(v, 0, v.length);}
public static String new_u8__by_len(byte[] v, int bgn, int len) {
int v_len = v.length;
if (bgn + len > v_len) len = v_len - bgn;
@ -111,7 +171,6 @@ public class String_ {
} while (true);
return count;
}
public static boolean Eq(String lhs, String rhs) {return lhs == null ? rhs == null : lhs.equals(rhs);}
public static boolean EqAny(String lhs, String... rhsAry) {
for (int i = 0; i < rhsAry.length; i++)
if (Eq(lhs, rhsAry[i])) return true;
@ -267,7 +326,6 @@ public class String_ {
if (pos < 0 || pos >= String_.Len(s)) throw Err_.new_wo_type("String_.Insert failed; pos invalid", "pos", pos, "s", s, "toInsert", toInsert);
return s.substring(0, pos) + toInsert + s.substring(pos);
}
public static String Format(String fmt, Object... args) {return Format_do(fmt, args);}
public static String FormatOrEmptyStrIfNull(String fmt, Object arg) {return arg == null ? "" : Format(fmt, arg);}
public static String Concat(char... ary) {return new String(ary);}
public static String Concat(String s1, String s2, String s3) {return s1 + s2 + s3;}
@ -381,57 +439,6 @@ public class String_ {
public static String[] SplitLines_any(String s) {return Split_do(s, Op_sys.Lnx.Nl_str(), true);}
public static String[] Split_lang(String s, char c) {return s.split(Character.toString(c));}
static String Format_do(String s, Object[] ary) {
int aryLength = Array_.Len_obj(ary); if (aryLength == 0) return s; // nothing to format
String_bldr sb = String_bldr_.new_();
char bracketBgn = '{', bracketEnd = '}';
String aryVal = null; char c, next;
int pos = 0; int textLength = Len(s); String numberStr = ""; boolean bracketsOn = false;
while (true) {
if (pos == textLength) break;
c = CharAt(s, pos);
if (bracketsOn) { // mode=bracketsOn
if (c == bracketBgn) { // first bracketBgn is fake; add bracketBgn and whatever is in numberStr
sb.Add(bracketBgn).Add(numberStr);
numberStr = "";
}
else if (c == bracketEnd) {
int aryIdx = Int_.Parse_or(numberStr, Int_.Min_value);
if (aryIdx != Int_.Min_value && Int_.Between(aryIdx, 0, aryLength - 1)) // check (a) aryIdx is num; (b) aryIdx is in bounds
aryVal = Object_.Xto_str_strict_or_empty(ary[aryIdx]);
else
aryVal = String_.Concat_any(bracketBgn, numberStr, bracketEnd); // not valid, just add String
sb.Add(aryVal);
bracketsOn = false;
numberStr = "";
}
else // char=anythingElse
numberStr += c;
}
else { // mode=bracketsOff
if (c == bracketBgn || c == bracketEnd) {
boolean isEnd = pos == textLength - 1;
if (isEnd)
sb.Add(c);
else {
next = CharAt(s, pos + 1);
if (next == c) { // "{{" or "}}": escape by doubling
sb.Add(c);
pos++;
}
else
bracketsOn = true;
}
}
else // char=anythingElse
sb.Add(c);
}
pos++;
}
if (Len(numberStr) > 0) // unclosed bracket; add bracketBgn and whatever is in numberStr; ex: "{0"
sb.Add(bracketBgn).Add(numberStr);
return sb.To_str();
}
static String[] Split_do(String s, String spr, boolean skipChar13) {
if (String_.Eq(s, "") // "".Split('a') return array with one member: ""
|| String_.Eq(spr, "")) // "a".Split('\0') returns array with one member: "a"

@ -16,11 +16,6 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx;
import org.junit.*;
public class String__tst {
@Test public void Len() {
tst_Len("", 0);
tst_Len("abc", 3);
} void tst_Len(String v, int expd) {Tfds.Eq(expd, String_.Len(v), "Len");}
@Test public void LimitToFirst() {
tst_LimitToFirst("abc", 0, "");
tst_LimitToFirst("abc", 1, "a");
@ -120,20 +115,6 @@ public class String__tst {
@Test public void Repeat() {
Tfds.Eq("333", String_.Repeat("3", 3));
}
@Test public void Format() {
tst_Format("", ""); // empty
tst_Format("no args", "no args"); // no args
tst_Format("0", "{0}", 0); // one
tst_Format("0 and 1", "{0} and {1}", 0, 1); // many
tst_Format("{", "{{", 0); // escape bracketBgn
tst_Format("}", "}}", 0); // escape bracketEnd
tst_Format("{a0c}", "{a{0}c}", 0); // nested;
tst_Format("{a{b}c}", "{a{b}c}", 0); // invalid invalid
tst_Format("{1}", "{1}", 1); // invalid array index
tst_Format("{a} {b}", "{a} {b}", 0); // invalid many
tst_Format("{a}0{b}1", "{a}{0}{b}{1}", 0, 1); // invalid and valid
tst_Format("{0", "{0", 0); // invalid dangling
} void tst_Format(String expd, String fmt, Object... ary) {Tfds.Eq(expd, String_.Format(fmt, ary));}
@Test public void Split() {
tst_Split("ab", " ", "ab"); // no match -> return array with original input
tst_Split("ab cd", " ", "ab", "cd"); // separator.length = 1

@ -16,17 +16,16 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
public class Regx_adp_ {
public static Regx_adp new_(String pattern) {return new Regx_adp(pattern);}
public static List_adp Find_all(String input, String find) {
Regx_adp regx = Regx_adp_.new_(find);
int idx = 0;
public static List_adp Find_all(String src, String pat) {
int src_len = String_.Len(src);
Regx_adp regx = Regx_adp_.new_(pat);
int pos = 0;
List_adp rv = List_adp_.New();
while (true) {
Regx_match match = regx.Match(input, idx);
while (pos < src_len) {
Regx_match match = regx.Match(src, pos);
if (match.Rslt_none()) break;
rv.Add(match);
int findBgn = match.Find_bgn();
idx = findBgn + match.Find_len();
if (idx > String_.Len(input)) break;
pos = match.Find_bgn() + match.Find_len();
}
return rv;
}

@ -16,9 +16,9 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.dbs.sqls.itms; import gplx.*; import gplx.dbs.*; import gplx.dbs.sqls.*;
public class Sql_order_fld {
public Sql_order_fld(String tbl, String name, byte sort) {this.Tbl = tbl; this.Name = name; this.Sort = sort;}
public final String Tbl;
public final String Name;
public final byte Sort;
public final String Tbl;
public final String Name;
public final byte Sort;
public String To_sql() {
String rv = this.Name;
if (Tbl != null) rv = Tbl + "." + rv;
@ -30,6 +30,6 @@ public class Sql_order_fld {
return rv;
}
public static final String Tbl__null = String_.Null;
public static final String Tbl__null = null;
public static final byte Sort__asc = Bool_.Y_byte, Sort__dsc = Bool_.N_byte, Sort__nil = Bool_.__byte;
}

@ -22,15 +22,15 @@ public class Sql_tbl_itm {
this.Alias = alias;
this.Join_flds = join_flds;
}
public final int Join_tid;
public final String Db;
public final String Name;
public final String Alias;
public final int Join_tid;
public final String Db;
public final String Name;
public final String Alias;
public boolean Db_enabled = true;
public final Sql_join_fld[] Join_flds;
public final Sql_join_fld[] Join_flds;
public static final String Alias__null = String_.Null;
public static final String Db__null = String_.Null;
public static final String Alias__null = null;
public static final String Db__null = null;
public static final int
Tid__from = 0 // "FROM"
, Tid__inner = 1 // "INNER JOIN"

@ -1,16 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="lib" path="lib/luaj_xowa.jar"/>
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry combineaccessrules="false" exported="true" kind="src" path="/100_core"/>
<classpathentry combineaccessrules="false" exported="true" kind="src" path="/140_dbs"/>
<classpathentry combineaccessrules="false" exported="true" kind="src" path="/150_gfui"/>
<classpathentry kind="src" path="src"/>
<classpathentry exported="true" kind="lib" path="lib/luaj_xowa.jar"/>
<classpathentry exported="true" kind="lib" path="lib/jtidy_xowa.jar"/>
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
<classpathentry combineaccessrules="false" kind="src" path="/gplx.gflucene"/>
<classpathentry exported="true" kind="lib" path="lib/icu4j-57_1.jar"/>
<classpathentry kind="lib" path="lib/vnu.jar"/>
<classpathentry kind="lib" path="lib/Saxon-HE-9.9.1-2.jar"/>
<classpathentry combineaccessrules="false" kind="src" path="/baselib"/>
<classpathentry kind="output" path="bin"/>
</classpath>

@ -1,51 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.core.intls; import gplx.*; import gplx.core.*;
public interface Unicode_string {
boolean Tid_is_single();
String Src_string();
byte[] Src_bytes();
int Len_codes();
int Len_chars();
int Len_bytes();
int Val_codes(int i);
int Pos_codes_to_bytes(int i);
int Pos_codes_to_chars(int i);
int Pos_bytes_to_chars(int i);
int Pos_bytes_to_codes(int i);
int Pos_chars_to_codes(int i);
}
class Unicode_string_single implements Unicode_string { // 1 byte == 1 codepoint
private final int[] codes;
public Unicode_string_single(String src_string, byte[] src_bytes, int[] codes, int codes_len) {
this.src_string = src_string;
this.src_bytes = src_bytes;
this.codes = codes;
this.codes_len = codes_len;
}
public boolean Tid_is_single() {return true;}
public String Src_string() {return src_string;} private final String src_string;
public byte[] Src_bytes() {return src_bytes;} private final byte[] src_bytes;
public int Len_codes() {return codes_len;} private final int codes_len;
public int Len_chars() {return codes_len;}
public int Len_bytes() {return codes_len;}
public int Val_codes(int i) {return codes[i];}
public int Pos_codes_to_bytes(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
public int Pos_codes_to_chars(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
public int Pos_bytes_to_chars(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
public int Pos_bytes_to_codes(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
public int Pos_chars_to_codes(int i) {if (i < 0 || i > codes_len) throw Err_.new_wo_type("invalid idx", "src", src_string, "idx", i); return i;}
}

@ -1,48 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.core.intls; import gplx.*; import gplx.core.*;
public class Unicode_string_ {
public static Unicode_string New(String orig) {
// null
if (orig == null)
return new Unicode_string_single(null, null, null, 0);
// init bytes
byte[] bytes = Bry_.new_u8(orig);
int bytes_len = bytes.length;
// init codes
int[] codes = new int[bytes_len];
int codes_len = 0;
// loop
int bytes_pos = 0;
int chars_pos = 0;
while (bytes_pos < bytes_len) {
// set codes
codes[codes_len] = Utf16_.Decode_to_int(bytes, bytes_pos);
// increment
int cur_byte_len = Utf8_.Len_of_char_by_1st_byte(bytes[bytes_pos]);
bytes_pos += cur_byte_len;
chars_pos += Utf8_.Len_of_char_by_bytes_len(cur_byte_len);
codes_len += 1;
}
return codes_len == bytes_len
? (Unicode_string)new Unicode_string_single(orig, bytes, codes, codes_len)
: (Unicode_string)new Unicode_string_multi (orig, bytes, bytes_len, codes, codes_len, chars_pos);
}
}

@ -1,85 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.core.intls; import gplx.*; import gplx.core.*;
class Unicode_string_multi implements Unicode_string {
private final int[] codes;
private final int[] codes_to_bytes;
private final int[] codes_to_chars;
private final int[] bytes_to_chars;
private final int[] bytes_to_codes;
private final int[] chars_to_codes;
public Unicode_string_multi(String src, byte[] bytes, int bytes_len, int[] codes, int codes_len, int chars_len) {
// set member vars
this.src = src;
this.bytes = bytes;
this.bytes_len = bytes_len;
this.codes = codes;
this.codes_len = codes_len;
this.chars_len = chars_len;
// init maps
this.codes_to_bytes = new int[codes_len + Adj_end];
this.codes_to_chars = new int[codes_len + Adj_end];
this.bytes_to_codes = New_int_ary(bytes_len);
this.bytes_to_chars = New_int_ary(bytes_len);
this.chars_to_codes = New_int_ary(chars_len);
// init loop
int codes_pos = 0;
int bytes_pos = 0;
int chars_pos = 0;
// loop till EOS
while (true) {
// update
codes_to_bytes[codes_pos] = bytes_pos;
codes_to_chars[codes_pos] = chars_pos;
bytes_to_chars[bytes_pos] = chars_pos;
bytes_to_codes[bytes_pos] = codes_pos;
chars_to_codes[chars_pos] = codes_pos;
if (bytes_pos == bytes_len) break;
// increment
int cur_byte_len = Utf8_.Len_of_char_by_1st_byte(bytes[bytes_pos]);
bytes_pos += cur_byte_len;
chars_pos += Utf8_.Len_of_char_by_bytes_len(cur_byte_len);
codes_pos += 1;
}
}
public boolean Tid_is_single() {return false;}
public String Src_string() {return src;} private final String src;
public byte[] Src_bytes() {return bytes;} private final byte[] bytes;
public int Len_codes() {return codes_len;} private final int codes_len;
public int Len_chars() {return chars_len;} private final int chars_len;
public int Len_bytes() {return bytes_len;} private final int bytes_len;
public int Val_codes(int i) {return codes[i];}
public int Pos_codes_to_bytes(int i) {return codes_to_bytes[i];}
public int Pos_codes_to_chars(int i) {return codes_to_chars[i];}
public int Pos_bytes_to_chars(int i) {int rv = bytes_to_chars[i]; if (rv == Invalid) throw Err_.new_wo_type("invalid i", "src", src, "type", "bytes_to_chars", "i", i); return rv;}
public int Pos_bytes_to_codes(int i) {int rv = bytes_to_codes[i]; if (rv == Invalid) throw Err_.new_wo_type("invalid i", "src", src, "type", "bytes_to_codes", "i", i); return rv;}
public int Pos_chars_to_codes(int i) {int rv = chars_to_codes[i]; if (rv == Invalid) throw Err_.new_wo_type("invalid i", "src", src, "type", "chars_to_codes", "i", i); return rv;}
private static final int Invalid = -1, Adj_end = 1; // +1 to store last pos as len of String; needed for regex which returns match.Find_end() which will be len of String; EX: abc -> [0, 1, 2, 3]
private static int[] New_int_ary(int len) {
int rv_len = len + Adj_end;
int[] rv = new int[rv_len];
for (int i = 0; i < rv_len; i++)
rv[i] = Invalid;
return rv;
}
}

@ -1,110 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.core.intls; import gplx.*; import gplx.core.*;
import org.junit.*; import gplx.core.tests.*;
public class Unicode_string_tst {
private final Unicode_string_fxt fxt = new Unicode_string_fxt();
@Test public void Null() {
fxt.Init(null);
fxt.Test__Len(0, 0, 0);
}
@Test public void Blank() {
fxt.Init("");
fxt.Test__Len(0, 0, 0);
}
@Test public void Single() {
fxt.Init("Abc");
fxt.Test__Len(3, 3, 3);
fxt.Test__Val_codes(65, 98, 99);
fxt.Test__Pos_codes_to_bytes(0, 1, 2, 3);
fxt.Test__Pos_codes_to_chars(0, 1, 2, 3);
fxt.Test__Pos_chars_to_codes(0, 1, 2, 3);
fxt.Test__Pos_bytes_to_codes(0, 1, 2, 3);
}
@Test public void Multi() {
fxt.Init("a¢€𤭢");
fxt.Test__Len(4, 5, 10);
fxt.Test__Val_codes(97, 162, 8364, 150370);
fxt.Test__Pos_codes_to_bytes(0, 1, 3, 6, 10);
fxt.Test__Pos_codes_to_chars(0, 1, 2, 3, 5);
fxt.Test__Pos_chars_to_codes( 0, 1, 2, 3, -1, 4);
fxt.Test__Pos_bytes_to_codes( 0, 1, -1, 2, -1, -1, 3, -1, -1, -1, 4);
}
}
class Unicode_string_fxt {
private Unicode_string under;
public void Init(String src) {
this.under = Unicode_string_.New(src);
}
public void Test__Len(int expd_codes, int expd_chars, int expd_bytes) {
Gftest.Eq__int(expd_codes, under.Len_codes(), "codes");
Gftest.Eq__int(expd_chars, under.Len_chars(), "chars");
Gftest.Eq__int(expd_bytes, under.Len_bytes(), "bytes");
}
public void Test__Val_codes(int... expd) {
int actl_len = under.Len_codes();
int[] actl = new int[actl_len];
for (int i = 0; i < actl_len; i++)
actl[i] = under.Val_codes(i);
Gftest.Eq__ary(expd, actl);
}
public void Test__Pos_codes_to_bytes(int... expd) {
int actl_len = under.Len_codes() + 1;
int[] actl = new int[actl_len];
for (int i = 0; i < actl_len; i++)
actl[i] = under.Pos_codes_to_bytes(i);
Gftest.Eq__ary(expd, actl);
}
public void Test__Pos_codes_to_chars(int... expd) {
int actl_len = under.Len_codes() + 1;
int[] actl = new int[actl_len];
for (int i = 0; i < actl_len; i++)
actl[i] = under.Pos_codes_to_chars(i);
Gftest.Eq__ary(expd, actl);
}
public void Test__Pos_bytes_to_codes(int... expd) {
int actl_len = under.Len_bytes() + 1;
int[] actl = new int[actl_len];
for (int i = 0; i < actl_len; i++) {
int val = 0;
try {
val = under.Pos_bytes_to_codes(i);
}
catch (Exception exc) {
val = -1;
Err_.Noop(exc);
}
actl[i] = val;
}
Gftest.Eq__ary(expd, actl);
}
public void Test__Pos_chars_to_codes(int... expd) {
int actl_len = under.Len_chars() + 1;
int[] actl = new int[actl_len];
for (int i = 0; i < actl_len; i++) {
int val = 0;
try {
val = under.Pos_chars_to_codes(i);
}
catch (Exception exc) {
val = -1;
Err_.Noop(exc);
}
actl[i] = val;
}
Gftest.Eq__ary(expd, actl);
}
}

@ -14,6 +14,7 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
import gplx.objects.strings.unicodes.*;
import gplx.core.intls.*; import gplx.langs.regxs.*;
import gplx.xowa.parsers.*;
import gplx.xowa.xtns.scribunto.procs.*;
@ -55,10 +56,10 @@ public class Scrib_lib_ustring implements Scrib_lib {
boolean plain = args.Cast_bool_or_n(3);
// init text vars
Unicode_string text_ucs = Unicode_string_.New(text_str); // NOTE: must count codes for supplementaries; PAGE:en.d:iglesia DATE:2017-04-23
Ustring text_ucs = Ustring_.New_codepoints(text_str); // NOTE: must count codes for supplementaries; PAGE:en.d:iglesia DATE:2017-04-23
// convert bgn from base_1 to base_0
int bgn_as_codes = To_java_by_lua(bgn_as_codes_base1, text_ucs.Len_codes());
int bgn_as_codes = To_java_by_lua(bgn_as_codes_base1, text_ucs.Len_in_data());
/*
int offset = 0;
@ -80,34 +81,35 @@ public class Scrib_lib_ustring implements Scrib_lib {
// if plain, just do literal match of find and exit
if (plain) {
// find pos by literal match
Unicode_string find_ucs = Unicode_string_.New(find_str);
byte[] find_bry = find_ucs.Src_bytes();
int pos = Bry_find_.Find_fwd(text_ucs.Src_bytes(), find_bry, text_ucs.Pos_codes_to_bytes(bgn_as_codes));
Ustring find_ucs = Ustring_.New_codepoints(find_str);
int pos = String_.FindFwd(text_str, find_str, bgn_as_codes);
// nothing found; return empty
if (pos == Bry_find_.Not_found)
// if nothing found, return empty
if (pos == String_.Find_none)
return rslt.Init_ary_empty();
// else, convert char_idx to code_idx
else
pos = text_ucs.Map_char_to_data(pos);
// bgn: convert pos from bytes back to codes; also adjust for base1
int bgn = text_ucs.Pos_bytes_to_codes(pos) + Base1;
int bgn = pos + Base1;
// end: add find.Len_in_codes and adjust end for PHP/LUA
int end = bgn + find_ucs.Len_codes() - End_adj;
int end = bgn + find_ucs.Len_in_data() - End_adj;
return rslt.Init_many_objs(bgn, end);
}
// run regex
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
Regx_match[] regx_rslts = Scrib_pattern_matcher_.Instance().Match(core.Ctx().Page().Url(), text_ucs, regx_converter, find_str, bgn_as_codes);
if (regx_rslts.length == 0) return rslt.Init_ary_empty();
// run regex; NOTE: take only 1st result; DATE:2014-08-27
Scrib_pattern_matcher matcher = Scrib_pattern_matcher.New(core.Page_url());
Regx_match match = matcher.Match_one(text_ucs, find_str, bgn_as_codes, true);
if (match.Rslt_none()) return rslt.Init_null(); // null verified on MW; EX: =mw.ustring.find("abc", "z"); DATE:2019-04-11
// add to tmp_list
Regx_match match = regx_rslts[0]; // NOTE: take only 1st result; DATE:2014-08-27
List_adp tmp_list = List_adp_.New();
tmp_list.Add(text_ucs.Pos_chars_to_codes(match.Find_bgn()) + Scrib_lib_ustring.Base1);
tmp_list.Add(text_ucs.Pos_chars_to_codes(match.Find_end()) + Scrib_lib_ustring.Base1 - Scrib_lib_ustring.End_adj);
AddCapturesFromMatch(tmp_list, match, text_str, regx_converter.Capt_ary(), false);
tmp_list.Add(text_ucs.Map_char_to_data(match.Find_bgn()) + Scrib_lib_ustring.Base1);
tmp_list.Add(text_ucs.Map_char_to_data(match.Find_end()) + Scrib_lib_ustring.Base1 - Scrib_lib_ustring.End_adj);
AddCapturesFromMatch(tmp_list, match, text_str, matcher.Capt_ary(), false);
return rslt.Init_many_list(tmp_list);
}
public boolean Match(Scrib_proc_args args, Scrib_proc_rslt rslt) {
@ -119,41 +121,42 @@ public class Scrib_lib_ustring implements Scrib_lib {
// validate / adjust
if (text_str == null) // if no text_str is passed, do not fail; return empty; EX:d:changed; DATE:2014-02-06
return rslt.Init_many_list(List_adp_.Noop);
Unicode_string text_ucs = Unicode_string_.New(text_str); // NOTE: must count codes for supplementaries; PAGE:en.d:iglesia DATE:2017-04-23
int bgn_as_codes = To_java_by_lua(bgn_as_codes_base1, text_ucs.Len_codes());
Ustring text_ucs = Ustring_.New_codepoints(text_str); // NOTE: must count codes for supplementaries; PAGE:en.d:iglesia DATE:2017-04-23
int bgn_as_codes = To_java_by_lua(bgn_as_codes_base1, text_ucs.Len_in_data());
// run regex
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
Regx_match[] regx_rslts = Scrib_pattern_matcher_.Instance().Match(core.Ctx().Page().Url(), text_ucs, regx_converter, find_str, bgn_as_codes);
if (regx_rslts.length == 0) return rslt.Init_null(); // return null if no matches found; EX:w:Mount_Gambier_(volcano); DATE:2014-04-02; confirmed with en.d:民; DATE:2015-01-30
// run regex; NOTE add 1st match only; do not add all; PAGE:en.d:действительное_причастиеастоящегоремени DATE:2017-04-23
Scrib_pattern_matcher matcher = Scrib_pattern_matcher.New(core.Page_url());
Regx_match match = matcher.Match_one(text_ucs, find_str, bgn_as_codes, true);
if (match.Rslt_none()) return rslt.Init_null(); // return null if no matches found; EX:w:Mount_Gambier_(volcano); DATE:2014-04-02; confirmed with en.d:民; DATE:2015-01-30
// TOMBSTONE: add 1st match only; do not add all; PAGE:en.d:действительное_причастиеастоящегоремени DATE:2017-04-23
regx_rslts = regx_converter.Adjust_balanced(regx_rslts);
List_adp tmp_list = List_adp_.New();
AddCapturesFromMatch(tmp_list, regx_rslts[0], text_str, regx_converter.Capt_ary(), true);
AddCapturesFromMatch(tmp_list, match, text_str, matcher.Capt_ary(), true);
return rslt.Init_many_list(tmp_list);
}
public boolean Gsub(Scrib_proc_args args, Scrib_proc_rslt rslt) {
Scrib_lib_ustring_gsub_mgr gsub_mgr = new Scrib_lib_ustring_gsub_mgr(core, new Scrib_regx_converter());
Scrib_lib_ustring_gsub_mgr gsub_mgr = new Scrib_lib_ustring_gsub_mgr(core);
return gsub_mgr.Exec(args, rslt);
}
public boolean Gmatch_init(Scrib_proc_args args, Scrib_proc_rslt rslt) {
// String text = Scrib_kv_utl_.Val_to_str(values, 0);
String regx = args.Pull_str(1);
Scrib_regx_converter regx_converter = new Scrib_regx_converter();
String pcre = regx_converter.patternToRegex(regx, Scrib_regx_converter.Anchor_null, true);
return rslt.Init_many_objs(pcre, regx_converter.Capt_ary());
if (Scrib_pattern_matcher.Mode_is_xowa())
regx_converter.patternToRegex(regx, Scrib_regx_converter.Anchor_null, true);
else
regx = regx_converter.patternToRegex(regx, Scrib_regx_converter.Anchor_null, true);
return rslt.Init_many_objs(regx, regx_converter.Capt_ary());
}
public boolean Gmatch_callback(Scrib_proc_args args, Scrib_proc_rslt rslt) {
String text = args.Xstr_str_or_null(0); // NOTE: UstringLibrary.php!ustringGmatchCallback calls preg_match directly; $s can be any type, and php casts automatically;
String regx = args.Pull_str(1);
Keyval[] capt = args.Cast_kv_ary_or_null(2);
int pos = args.Pull_int(3);
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx().Page().Url(), regx);
Regx_match[] regx_rslts = regx_adp.Match_all(text, pos);
int len = regx_rslts.length;
if (len == 0) return rslt.Init_many_objs(pos, Keyval_.Ary_empty);
Regx_match match = regx_rslts[0]; // NOTE: take only 1st result
Ustring text_ucs = Ustring_.New_codepoints(text);
// int pos_as_codes = To_java_by_lua(pos, text_ucs.Len_in_data());
Regx_match match = Scrib_pattern_matcher.New(core.Page_url()).Match_one(text_ucs, regx, pos, false);
if (match.Rslt_none()) return rslt.Init_many_objs(pos, Keyval_.Ary_empty);
List_adp tmp_list = List_adp_.New();
AddCapturesFromMatch(tmp_list, match, text, capt, true); // NOTE: was incorrectly set as false; DATE:2014-04-23
return rslt.Init_many_objs(match.Find_end(), Scrib_kv_utl_.base1_list_(tmp_list));
@ -198,12 +201,12 @@ public class Scrib_lib_ustring implements Scrib_lib {
&& tmp_list.Count() == 0) // only add match once; EX: "aaaa", "a" will have four matches; get 1st; DATE:2014-04-02
tmp_list.Add(String_.Mid(text, rslt.Find_bgn(), rslt.Find_end()));
}
public static Regx_adp RegxAdp_new_(Xoa_url url, String regx) {
public static Regx_adp RegxAdp_new_(byte[] page_url, String regx) {
Regx_adp rv = Regx_adp_.new_(regx);
if (rv.Pattern_is_invalid()) {
// try to identify [z-a] errors; PAGE:https://en.wiktionary.org/wiki/Module:scripts/data; DATE:2017-04-23
Exception exc = rv.Pattern_is_invalid_exception();
Gfo_usr_dlg_.Instance.Log_many("", "", "regx is invalid: regx=~{0} page=~{1} exc=~{2}", regx, url.To_bry(), Err_.Message_gplx_log(exc));
Gfo_usr_dlg_.Instance.Log_many("", "", "regx is invalid: regx=~{0} page=~{1} exc=~{2}", regx, page_url, Err_.Message_gplx_log(exc));
}
return rv;
}

@ -40,7 +40,7 @@ public class Scrib_lib_ustring__find__tst {
fxt.Test__find("𤭢𤭢b𤭢𤭢b" , "b" , 2, Bool_.N, "3;3"); // bytes=4
fxt.Test__find("abcd" , "b" , 1, Bool_.N, "2;2"); // basic
fxt.Test__find("abad" , "a" , 2, Bool_.N, "3;3"); // bgn
fxt.Test__find("abcd" , "x" , 1, Bool_.N, ""); // no-match
fxt.Test__find("abcd" , "x" , 1, Bool_.N, String_.Null_mark); // no-match
fxt.Test__find("abcd" , "" , 2, Bool_.N, "2;1"); // empty regx should return values; regx; EX:w:Fool's_mate; DATE:2014-03-04
}
@Test public void Regx__int() { // PURPOSE: allow int find; PAGE:ro.w:Innsbruck DATE:2015-09-12
@ -64,6 +64,9 @@ public class Scrib_lib_ustring__find__tst {
fxt.Test__find("aé𡼾\nbî𡼾\n" , "" , 1, Bool_.N, "1;0"); // 4 b/c \n starts at pos 4 (super 1)
fxt.Test__find("aé𡼾\nbî𡼾\n" , "" , 5, Bool_.N, "5;4"); // 8 b/c \n starts at pos 8 (super 1)
}
@Test public void Balanced__numbered_1() { // PURPOSE: handle mix of balanced and regular capture; PAGE:en.w:Bahamas
fxt.Test__find("[[5]]XccY", "%b[]X(%a)%1Y", 1, Bool_.N, "1;9;c");
}
}
class Scrib_lib_ustring__find__fxt {
private boolean dbg = false;

@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
import org.junit.*; import gplx.xowa.xtns.scribunto.engines.mocks.*;
public class Scrib_lib_ustring__gmatch__tst {
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
private final Mock_scrib_fxt fxt = new Mock_scrib_fxt(); private Scrib_lib lib;
@Before public void init() {
fxt.Clear();
lib = fxt.Core().Lib_ustring().Init();
@ -48,4 +48,7 @@ public class Scrib_lib_ustring__gmatch__tst {
, " 1=2"
));
}
@Test public void Callback__pattern() {
fxt.Test__proc__objs__nest(lib, Scrib_lib_ustring.Invk_gmatch_callback, Object_.Ary("a", "%a+", Scrib_kv_utl_.base1_many_(false), 1) , "1=1\n2="); // fails if "a" is returned; note that 1 should be eos
}
}

@ -43,6 +43,15 @@ public class Scrib_lib_ustring__gsub__tst {
@Test public void Replace__double() { // PURPOSE: do not fail if double is passed in for @replace; PAGE:de.v:Wikivoyage:Wikidata/Test_Modul:Wikidata2 DATE:2016-04-21
Exec_gsub("abcd", 1 , -1, 1.23d , "abcd;0");
}
@Test public void Replace__anypos() { // PURPOSE:LUAJ_PATTERN_REPLACEMENT; DATE:2019-04-16
Exec_gsub("'''a'''b", "()'''(.-'*)'''", 1, "z", "zb;1");
}
@Test public void Replace__balanced_and_grouping() { // PURPOSE:LUAJ_PATTERN_REPLACEMENT; DATE:2019-04-16
Exec_gsub("[[b]]", "%[(%b[])%]" , -1, "z" , "z;1"); // NOTE: not "[z]"
}
@Test public void Replace__initial() { // PURPOSE:whitespace being replaced during gsub replacement; DATE:2019-04-21
Exec_gsub("a b c", "^%s*", -1, "x", "xa b c;1"); // fails if xabxc
}
@Test public void Replace__table() {
Exec_gsub("abcd", "[ac]" , -1, Scrib_kv_utl_.flat_many_("a", "A", "c", "C") , "AbCd;2");
Exec_gsub("abc" , "[ab]" , -1, Scrib_kv_utl_.flat_many_("a", "A") , "Abc;2"); // PURPOSE: match not in regex should still print itself; in this case [c] is not in tbl regex; DATE:2014-03-31
@ -122,6 +131,17 @@ public class Scrib_lib_ustring__gsub__tst {
fxt.Init__cbk(proc);
Exec_gsub(text, regx, -1, proc.To_scrib_lua_proc(), "aBYz;2");
}
@Test public void Luacbk__balanced() { // PURPOSE:LUAJ_PATTERN_REPLACEMENT; DATE:2019-04-16
String text = "}a{{b}}c{{d}}";
String regx = "%b{}"; // "()" is anypos, which inserts find_pos to results
Mock_proc__verify_args proc = new Mock_proc__verify_args(0, new Object[]{"x", "{{b}}"}, new Object[]{"y", "{{d}}"});
fxt.Init__cbk(proc);
Exec_gsub(text, regx, -1, proc.To_scrib_lua_proc(), "}axcy;2");
}
// Mock_proc__verify_args proc = new Mock_proc__verify_args(0, new Object[]{"x", "{{yes2}}"}, new Object[]{"x", "{{flagicon|USA}}"});
// fxt.Init__cbk(proc);
// Exec_gsub("}\n|-\n|28\n|{{yes2}}Win\n|280\n|style=\"text-align:left;\"|{{flagicon|USA}}", "%b{}", -1, proc.To_scrib_lua_proc(), "}axbx;2"); }
//
private void Exec_gsub(String text, Object regx, int limit, Object repl, String expd) {
fxt.Test__proc__kvps__flat(lib, Scrib_lib_ustring.Invk_gsub, Scrib_kv_utl_.base1_many_(text, regx, repl, limit), expd);
}

@ -15,35 +15,39 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
import gplx.langs.regxs.*;
import gplx.objects.strings.unicodes.*;
import gplx.xowa.xtns.scribunto.libs.patterns.*;
import gplx.xowa.xtns.scribunto.procs.*;
class Scrib_lib_ustring_gsub_mgr {
public class Scrib_lib_ustring_gsub_mgr { // THREAD.UNSAFE:LOCAL_VALUES
private final Scrib_core core;
private final Scrib_regx_converter regx_converter;
private String src_str;
private String pat_str;
private int limit;
private byte repl_tid;
private byte[] repl_bry; private Hash_adp repl_hash; private Scrib_lua_proc repl_func;
private int repl_count = 0;
public Scrib_lib_ustring_gsub_mgr(Scrib_core core, Scrib_regx_converter regx_converter) {
public int repl_count = 0;
public Scrib_lib_ustring_gsub_mgr(Scrib_core core) {
this.core = core;
this.regx_converter = regx_converter;
}
public void Repl_count__add() {repl_count++;}
public boolean Repl_count__done() {return repl_count == limit;}
public boolean Exec(Scrib_proc_args args, Scrib_proc_rslt rslt) {
// get @text; NOTE: sometimes int; DATE:2013-11-06
String text = args.Xstr_str_or_null(0);
if (args.Len() == 2) return rslt.Init_obj(text); // if no @replace, return @text; PAGE:en.d:'orse; DATE:2013-10-13
// get @src_str; NOTE: sometimes int; DATE:2013-11-06
this.src_str = args.Xstr_str_or_null(0);
if (args.Len() == 2) return rslt.Init_obj(src_str); // if no @replace, return @src_str; PAGE:en.d:'orse; DATE:2013-10-13
// get @pattern; NOTE: sometimes int; PAGE:en.d:λύω; DATE:2014-09-02
String regx = args.Xstr_str_or_null(1);
regx = regx_converter.patternToRegex(regx, Scrib_regx_converter.Anchor_pow, true);
this.pat_str = args.Xstr_str_or_null(1);
// get @repl
Object repl_obj = args.Cast_obj_or_null(2);
byte repl_tid = Identify_repl(repl_obj);
this.repl_tid = Identify_repl(repl_obj);
// get @limit; reset repl_count
int limit = args.Cast_int_or(3, -1);
repl_count = 0;
this.limit = args.Cast_int_or(3, -1);
// do repl
String repl = Exec_repl(repl_tid, text, regx, limit);
String repl = Scrib_pattern_matcher.New(core.Page_url()).Gsub(this, Ustring_.New_codepoints(src_str), pat_str, 0);
return rslt.Init_many_objs(repl, repl_count);
}
private byte Identify_repl(Object repl_obj) {
@ -80,44 +84,7 @@ class Scrib_lib_ustring_gsub_mgr {
throw Err_.new_unhandled(Type_.Name(repl_type));
return repl_tid;
}
private String Exec_repl(byte repl_tid, String text, String regx, int limit) {
// parse regx
Regx_adp regx_mgr = Scrib_lib_ustring.RegxAdp_new_(core.Ctx().Page().Url(), regx);
if (regx_mgr.Pattern_is_invalid()) return text; // NOTE: invalid patterns should return self; EX:[^]; DATE:2014-09-02)
// exec regx
Regx_match[] rslts = regx_mgr.Match_all(text, 0);
if (rslts.length == 0) return text; // PHP: If matches are found, the new subject will be returned, otherwise subject will be returned unchanged.; http://php.net/manual/en/function.preg-replace-callback.php
rslts = regx_converter.Adjust_balanced(rslts);
Bry_bfr tmp_bfr = Bry_bfr_.New();
int rslts_len = rslts.length;
int text_pos = 0;
for (int i = 0; i < rslts_len; i++) {
if (repl_count == limit) break; // stop if repl_count reaches limit; note that limit = -1 by default, unless specified
// add text up to find.bgn
Regx_match rslt = rslts[i];
tmp_bfr.Add_str_u8(String_.Mid(text, text_pos, rslt.Find_bgn())); // NOTE: regx returns char text_pos (not bry); must add as String, not bry; DATE:2013-07-17
// replace result
if (!Exec_repl_itm(tmp_bfr, repl_tid, text, rslt)) {
// will be false when gsub_proc returns nothing; PAGE:en.d:tracer PAGE:en.d:שלום DATE:2017-04-22;
tmp_bfr.Add_str_u8(String_.Mid(text, rslt.Find_bgn(), rslt.Find_end()));
}
// update
text_pos = rslt.Find_end();
repl_count++;
}
// add rest of String
int text_len = String_.Len(text);
if (text_pos < text_len)
tmp_bfr.Add_str_u8(String_.Mid(text, text_pos, text_len)); // NOTE: regx returns char text_pos (not bry); must add as String, not bry; DATE:2013-07-17
return tmp_bfr.To_str_and_clear();
}
private boolean Exec_repl_itm(Bry_bfr tmp_bfr, byte repl_tid, String text, Regx_match match) {
public boolean Exec_repl_itm(Bry_bfr tmp_bfr, Scrib_regx_converter regx_converter, Regx_match match) {
switch (repl_tid) {
case Repl_tid_string:
int len = repl_bry.length;
@ -137,15 +104,15 @@ class Scrib_lib_ustring_gsub_mgr {
// REF.MW: https://github.com/wikimedia/mediawiki-extensions-Scribunto/blob/master/includes/engines/LuaCommon/UstringLibrary.php#L785-L796
// NOTE: 0 means take result; REF.MW:if ($x === '0'); return $m[0]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
if (idx == 0)
tmp_bfr.Add_str_u8(String_.Mid(text, match.Find_bgn(), match.Find_end()));
tmp_bfr.Add_str_u8(String_.Mid(src_str, match.Find_bgn(), match.Find_end()));
// NOTE: > 0 means get from groups if it exists; REF.MW:elseif (isset($m["m$x"])) return $m["m$x"]; PAGE:Wikipedia:Wikipedia_Signpost/Templates/Voter/testcases; DATE:2015-08-02
else if (idx - 1 < match.Groups().length) { // retrieve numbered capture; TODO_OLD: support more than 9 captures
Regx_group grp = match.Groups()[idx - 1];
tmp_bfr.Add_str_u8(String_.Mid(text, grp.Bgn(), grp.End())); // NOTE: grp.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
tmp_bfr.Add_str_u8(String_.Mid(src_str, grp.Bgn(), grp.End())); // NOTE: grp.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
}
// NOTE: 1 per MW "Match undocumented Lua String.gsub behavior"; PAGE:en.d:Wiktionary:Scripts ISSUE#:393; DATE:2019-03-20
else if (idx == 1) {
tmp_bfr.Add_str_u8(String_.Mid(text, match.Find_bgn(), match.Find_end()));
tmp_bfr.Add_str_u8(String_.Mid(src_str, match.Find_bgn(), match.Find_end()));
}
else {
throw Err_.new_wo_type("invalid capture index %" + Char_.To_str(b) + " in replacement String");
@ -180,7 +147,7 @@ class Scrib_lib_ustring_gsub_mgr {
match_bgn = grp.Bgn();
match_end = grp.End();
}
String find_str = String_.Mid(text, match_bgn, match_end); // NOTE: rslt.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
String find_str = String_.Mid(src_str, match_bgn, match_end); // NOTE: rslt.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings)
Object actl_repl_obj = repl_hash.Get_by(find_str);
if (actl_repl_obj == null) // match found, but no replacement specified; EX:"abc", "[ab]", "a:A"; "b" in regex but not in tbl; EX:d:DVD; DATE:2014-03-31
tmp_bfr.Add_str_u8(find_str);
@ -194,7 +161,7 @@ class Scrib_lib_ustring_gsub_mgr {
int grps_len = grps.length;
// no grps; pass 1 arg based on @match: EX: ("ace", "[b-d]"); args -> ("c")
if (grps_len == 0) {
String find_str = String_.Mid(text, match.Find_bgn(), match.Find_end());
String find_str = String_.Mid(src_str, match.Find_bgn(), match.Find_end());
luacbk_args = Scrib_kv_utl_.base1_obj_(find_str);
}
// grps exist; pass n args based on grp[n].match; EX: ("acfg", "([b-d])([e-g])"); args -> ("c", "f")
@ -202,7 +169,7 @@ class Scrib_lib_ustring_gsub_mgr {
// memoize any_pos args for loop
boolean any_pos = regx_converter.Any_pos();
Keyval[] capt_ary = regx_converter.Capt_ary();
int capt_ary_len = capt_ary.length;
int capt_ary_len = capt_ary == null ? 0 : capt_ary.length; // capt_ary can be null b/c xowa_gsub will always create one group;
// loop grps; for each grp, create corresponding arg in luacbk
luacbk_args = new Keyval[grps_len];
@ -212,7 +179,7 @@ class Scrib_lib_ustring_gsub_mgr {
// anypos will create @offset arg; everything else creates a @match arg based on grp
Object val = any_pos && i < capt_ary_len && Bool_.Cast(capt_ary[i].Val())
? (Object)grp.Bgn()
: (Object)String_.Mid(text, grp.Bgn(), grp.End());
: (Object)String_.Mid(src_str, grp.Bgn(), grp.End());
luacbk_args[i] = Keyval_.int_(i + Scrib_core.Base_1, val);
}
}

@ -14,31 +14,33 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*;
import gplx.core.brys.fmtrs.*; import gplx.core.intls.*;
import gplx.objects.strings.unicodes.*;
import gplx.core.intls.*;
import gplx.core.brys.fmtrs.*;
import gplx.langs.regxs.*;
public class Scrib_regx_converter {
public class Scrib_regx_converter {// THREAD.UNSAFE:MULTIPLE_RETURN_VALUES
private final Scrib_regx_grp_mgr grp_mgr = new Scrib_regx_grp_mgr();
private final Bry_bfr bfr = Bry_bfr_.New();
private Bry_bfr tmp_bfr;
private Bry_fmtr fmtr_balanced; private Bry_bfr bfr_balanced;
private final Lua_cls_to_regx_map percent_map, brack_map;
public Scrib_regx_converter() {
percent_map = Lua_cls_matcher.Instance.Percent();
brack_map = Lua_cls_matcher.Instance.Brack();
}
public String Regx() {return regx;} private String regx;
public Keyval[] Capt_ary() {return grp_mgr.Capt__to_ary();}
public boolean Any_pos() {return any_pos;} private boolean any_pos;
public Regx_match[] Adjust_balanced(Regx_match[] rslts) {return grp_mgr.Adjust_balanced(rslts);}
public Regx_match[] Adjust_balanced(Regx_match[] rslts) {return grp_mgr.Adjust_balanced_many(rslts);}
public Regx_match Adjust_balanced_one(Regx_match rslt) {return grp_mgr.Adjust_balanced_one(rslt);}
public String patternToRegex(String pat_str, byte[] anchor, boolean mode_is_regx) {
Unicode_string pat_ucs = Unicode_string_.New(pat_str);
Ustring pat_ucs = Ustring_.New_codepoints(pat_str);
// TODO.CACHE: if (!$this->patternRegexCache->has($cacheKey))
grp_mgr.Clear();
any_pos = false;
boolean q_flag = false;
Bry_bfr bfr = Bry_bfr_.New();
Bry_bfr tmp_bfr = null;
Bry_fmtr fmtr_balanced = null;
Bry_bfr bfr_balanced = null;
Lua_cls_to_regx_map percent_map = Lua_cls_matcher.Instance.Percent();
Lua_cls_to_regx_map brack_map = Lua_cls_matcher.Instance.Brack();
// bfr.Add_byte(Byte_ascii.Slash); // TOMBSTONE: do not add PHP "/" at start
int len = pat_ucs.Len_codes();
int len = pat_ucs.Len_in_data();
int grps_len = 0;
int bct = 0;
@ -46,7 +48,7 @@ public class Scrib_regx_converter {
for (int i = 0; i < len; i++) {
int i_end = i + 1;
q_flag = false; // must be reset; REF.MW:UstringLibrary.php|patternToRegex; DATE:2014-02-08
int cur = pat_ucs.Val_codes(i);
int cur = pat_ucs.Get_data(i);
switch (cur) {
case Byte_ascii.Pow:
if (!mode_is_regx) {
@ -71,7 +73,7 @@ public class Scrib_regx_converter {
int grp_idx = grp_mgr.Capt__len() + 1;
// check for "()"; enables anypos flag
boolean is_empty_capture = pat_ucs.Val_codes(i + 1) == Byte_ascii.Paren_end;
boolean is_empty_capture = pat_ucs.Get_data(i + 1) == Byte_ascii.Paren_end;
if (is_empty_capture)
any_pos = true;
grp_mgr.Capt__add__real(grp_idx, is_empty_capture);
@ -93,19 +95,19 @@ public class Scrib_regx_converter {
i++;
if (i >= len)
throw Err_.new_wo_type("malformed pattern (ends with '%')");
byte[] percent_bry = percent_map.Get_or_null(pat_ucs.Val_codes(i));
byte[] percent_bry = percent_map.Get_or_null(pat_ucs.Get_data(i));
if (percent_bry != null) {
bfr.Add(percent_bry);
q_flag = true;
}
else {
int nxt = pat_ucs.Val_codes(i);
int nxt = pat_ucs.Get_data(i);
switch (nxt) {
case Byte_ascii.Ltr_b: // EX: "%b()"
i += 2;
if (i >= len) throw Err_.new_wo_type("malformed pattern (missing arguments to '%b')");
int char_0 = pat_ucs.Val_codes(i - 1);
int char_1 = pat_ucs.Val_codes(i);
int char_0 = pat_ucs.Get_data(i - 1);
int char_1 = pat_ucs.Get_data(i);
if (char_0 == char_1) { // same char: easier regex; REF.MW: $bfr .= "{$d1}[^$d1]*$d1";
bfr.Add(Bry_bf0_seg_0);
Regx_quote(bfr, char_0);
@ -133,11 +135,11 @@ public class Scrib_regx_converter {
}
break;
case Byte_ascii.Ltr_f: { // EX: lua frontier pattern; "%f[%a]"; DATE:2015-07-21
if (i + 1 >= len || pat_ucs.Val_codes(++i) != Byte_ascii.Brack_bgn)
if (i + 1 >= len || pat_ucs.Get_data(++i) != Byte_ascii.Brack_bgn)
throw Err_.new_("scribunto", "missing '[' after %f in pattern at pattern character " + Int_.To_str(i_end));
// %f always followed by bracketed term; convert lua bracketed term to regex
if (tmp_bfr == null) tmp_bfr = Bry_bfr_.New();
i = bracketedCharSetToRegex(tmp_bfr, pat_ucs, i, len);
i = bracketedCharSetToRegex(tmp_bfr, brack_map, pat_ucs, i, len);
byte[] re2 = tmp_bfr.To_bry_and_clear();
// scrib has following comment: 'Because %f considers the beginning and end of the String to be \0, determine if $re2 matches that and take it into account with "^" and "$".'
@ -169,7 +171,7 @@ public class Scrib_regx_converter {
bfr.Add_byte(Byte_ascii.Brack_bgn);
continue;
}
i = bracketedCharSetToRegex(bfr, pat_ucs, i, len);
i = bracketedCharSetToRegex(bfr, brack_map, pat_ucs, i, len);
q_flag = true;
break;
case Byte_ascii.Brack_end:
@ -196,7 +198,7 @@ public class Scrib_regx_converter {
break;
}
if (q_flag && i + 1 < len) {
int tmp_b = pat_ucs.Val_codes(i + 1);
int tmp_b = pat_ucs.Get_data(i + 1);
switch (tmp_b) {
case Byte_ascii.Star:
case Byte_ascii.Plus:
@ -217,35 +219,35 @@ public class Scrib_regx_converter {
regx = bfr.To_str_and_clear();
return regx;
}
private int bracketedCharSetToRegex(Bry_bfr bfr, Unicode_string pat_ucs, int i, int len) {
private int bracketedCharSetToRegex(Bry_bfr bfr, Lua_cls_to_regx_map brack_map, Ustring pat_ucs, int i, int len) {
bfr.Add_byte(Byte_ascii.Brack_bgn);
i++;
if (i < len && pat_ucs.Val_codes(i) == Byte_ascii.Pow) { // ^
if (i < len && pat_ucs.Get_data(i) == Byte_ascii.Pow) { // ^
bfr.Add_byte(Byte_ascii.Pow);
i++;
}
for (int j = i; i < len && (j == i || pat_ucs.Val_codes(i) != Byte_ascii.Brack_end); i++) {
if (pat_ucs.Val_codes(i) == Byte_ascii.Percent) {
for (int j = i; i < len && (j == i || pat_ucs.Get_data(i) != Byte_ascii.Brack_end); i++) {
if (pat_ucs.Get_data(i) == Byte_ascii.Percent) {
i++;
if (i >= len) {
break;
}
byte[] brack_bry = brack_map.Get_or_null(pat_ucs.Val_codes(i));
byte[] brack_bry = brack_map.Get_or_null(pat_ucs.Get_data(i));
if (brack_bry != null)
bfr.Add(brack_bry);
else
Regx_quote(bfr, pat_ucs.Val_codes(i));
Regx_quote(bfr, pat_ucs.Get_data(i));
}
else if (i + 2 < len && pat_ucs.Val_codes(i + 1) == Byte_ascii.Dash && pat_ucs.Val_codes(i + 2) != Byte_ascii.Brack_end && pat_ucs.Val_codes(i + 2) != Byte_ascii.Hash) {
if (pat_ucs.Val_codes(i) <= pat_ucs.Val_codes(i + 2)) {
Regx_quote(bfr, pat_ucs.Val_codes(i));
else if (i + 2 < len && pat_ucs.Get_data(i + 1) == Byte_ascii.Dash && pat_ucs.Get_data(i + 2) != Byte_ascii.Brack_end && pat_ucs.Get_data(i + 2) != Byte_ascii.Hash) {
if (pat_ucs.Get_data(i) <= pat_ucs.Get_data(i + 2)) {
Regx_quote(bfr, pat_ucs.Get_data(i));
bfr.Add_byte(Byte_ascii.Dash);
Regx_quote(bfr, pat_ucs.Val_codes(i + 2));
Regx_quote(bfr, pat_ucs.Get_data(i + 2));
}
i += 2;
}
else {
Regx_quote(bfr, pat_ucs.Val_codes(i));
Regx_quote(bfr, pat_ucs.Get_data(i));
}
}
if (i > len) throw Err_.new_wo_type("Missing close-bracket for character set beginning at pattern character $nxt_pos");

@ -58,25 +58,29 @@ class Scrib_regx_grp_mgr {
int actl_idx = Int_.Cast(idx_list.Get_by(regx_idx));
bfr.Add_int_variable(actl_idx);
}
public Regx_match[] Adjust_balanced(Regx_match[] matches) {
public Regx_match[] Adjust_balanced_many(Regx_match[] matches) {
if (fake_count == 0) return matches;
int matches_len = matches.length;
Regx_match[] rv = new Regx_match[matches_len];
for (int i = 0; i < matches_len; i++) {
Regx_match match = matches[i];
Regx_group[] old_groups = match.Groups();
Regx_group[] new_groups = new Regx_group[full_list.Len() - fake_count];
int group_idx = 0;
for (int j = 0; j < old_groups.length; j++) {
Scrib_regx_grp_itm itm = (Scrib_regx_grp_itm)full_list.Get_at(j);
if (itm.Is_fake()) continue;
new_groups[group_idx++] = old_groups[j];
}
rv[i] = new Regx_match(match.Rslt(), match.Find_bgn(), match.Find_end(), new_groups);
rv[i] = Adjust_balanced_one(matches[i]);
}
return rv;
}
public Regx_match Adjust_balanced_one(Regx_match match) {
if (full_list.Len() == 0) return match; // no capture groups, so don't bother adjusting for balanced; DATE:2019-04-16
Regx_group[] old_groups = match.Groups();
Regx_group[] new_groups = new Regx_group[full_list.Len() - fake_count];
int group_idx = 0;
for (int j = 0; j < old_groups.length; j++) {
Scrib_regx_grp_itm itm = (Scrib_regx_grp_itm)full_list.Get_at(j);
if (itm.Is_fake()) continue;
new_groups[group_idx++] = old_groups[j];
}
return new Regx_match(match.Rslt(), match.Find_bgn(), match.Find_end(), new_groups);
}
}
class Scrib_regx_grp_itm {
public Scrib_regx_grp_itm(boolean is_fake, boolean is_empty_capture, int idx) {

@ -14,8 +14,19 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
import gplx.core.intls.*;
import gplx.objects.strings.unicodes.*;
import gplx.langs.regxs.*;
public interface Scrib_pattern_matcher {
Regx_match[] Match(Xoa_url url, Unicode_string text_ucs, Scrib_regx_converter regx_converter, String find_str, int bgn_as_codes);
public abstract class Scrib_pattern_matcher {
protected final Scrib_regx_converter regx_converter = new Scrib_regx_converter();
public Keyval[] Capt_ary() {return regx_converter.Capt_ary();}
public abstract Regx_match Match_one(Ustring src_ucs, String pat_str, int bgn_as_codes, boolean replace);
public abstract String Gsub(Scrib_lib_ustring_gsub_mgr gsub_mgr, Ustring src_ucs, String pat_str, int bgn_as_codes);
public static boolean Mode_is_xowa() {return false;}
public static Scrib_pattern_matcher New(byte[] page_url) {
return Mode_is_xowa()
? (Scrib_pattern_matcher)new Scrib_pattern_matcher__xowa(page_url)
: (Scrib_pattern_matcher)new Scrib_pattern_matcher__regx(page_url)
;
}
}

@ -1,36 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
import gplx.core.intls.*;
import gplx.langs.regxs.*;
public class Scrib_pattern_matcher_ {
private static final Scrib_pattern_matcher instance = New();
private static Scrib_pattern_matcher New() {
return new Scrib_pattern_matcher__regx();
// return new Scrib_pattern_matcher__luaj();
}
public static Scrib_pattern_matcher Instance() {return instance;}
}
class Scrib_pattern_matcher__regx implements Scrib_pattern_matcher {
public Regx_match[] Match(Xoa_url url, Unicode_string text_ucs, Scrib_regx_converter regx_converter, String find_str, int bgn_as_codes) {
// convert regex from lua to java
find_str = regx_converter.patternToRegex(find_str, Scrib_regx_converter.Anchor_G, true);
// run regex
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(url, find_str);
return regx_adp.Match_all(text_ucs.Src_string(), text_ucs.Pos_codes_to_chars(bgn_as_codes)); // NOTE: MW calculates an offset to handle mb strings. however, java's regex always takes offset in chars (not bytes like PHP preg_match); DATE:2014-03-04
}
}

@ -1,50 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
import gplx.core.intls.*;
import gplx.langs.regxs.*;
import org.luaj.vm2.lib.StringLib;
//import org.luaj.vm2.lib.Str_find_mgr;
//import org.luaj.vm2.lib.Str_find_mgr__regx;
class Scrib_pattern_matcher__luaj implements Scrib_pattern_matcher {
public Regx_match[] Match(Xoa_url url, Unicode_string text_ucs, Scrib_regx_converter regx_converter, String find_str, int bgn_as_codes) {
// int src_bgn = bgn_as_codes < 0 ? bgn_as_codes : text_ucs.Pos_codes_to_bytes(bgn_as_codes);
// int src_bgn = bgn_as_codes < 0 ? Int_.Base1 : bgn_as_codes + Int_.Base1;
// src_bgn = src_bgn >= text_ucs.Len_codes() ? text_ucs.Len_codes() : text_ucs.Pos_codes_to_bytes(src_bgn);
// Str_find_mgr__regx mgr = new Str_find_mgr__regx(text_ucs.Src_string(), find_str, src_bgn, false, true);
// mgr.Process();
//
// // convert to Regx_match
// int find_bgn = mgr.Bgn() == -1 ? -1 : text_ucs.Pos_bytes_to_chars(mgr.Bgn());
// int find_end = mgr.End() == -1 ? -1 : text_ucs.Pos_bytes_to_chars(mgr.End());
// boolean found = find_bgn != -1;
// if (!found) {
// return Regx_match.Ary_empty;
// }
// int[] captures = mgr.Capture_ints();
// Regx_group[] groups = null;
// if (found && captures != null) {
// int captures_len = captures.length;
// groups = new Regx_group[captures_len / 2];
// for (int i = 0; i < captures_len; i += 2) {
// groups[i / 2] = new Regx_group(true, captures[i], captures[i + 1], String_.Mid(text_ucs.Src_string(), text_ucs.Pos_bytes_to_chars(captures[i]), text_ucs.Pos_bytes_to_chars(captures[i + 1])));
// }
// }
// Regx_match rv = new Regx_match(found, find_bgn, find_end, groups);
// return new Regx_match[] {rv};
return null;
}
}

@ -0,0 +1,74 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
import gplx.objects.strings.unicodes.*;
import gplx.langs.regxs.*;
class Scrib_pattern_matcher__regx extends Scrib_pattern_matcher { private final byte[] page_url;
public Scrib_pattern_matcher__regx(byte[] page_url) {
this.page_url = page_url;
}
@Override public Regx_match Match_one(Ustring src_ucs, String pat_str, int bgn_as_codes, boolean replace) {
// convert lua pattern to java regex
if (replace) // note that replace will be false for Gmatch_callback (b/c Gmatch_init already converted)
pat_str = regx_converter.patternToRegex(pat_str, Scrib_regx_converter.Anchor_G, true);
// run regex
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(page_url, pat_str);
Regx_match match = regx_adp.Match(src_ucs.Src(), src_ucs.Map_data_to_char(bgn_as_codes)); // NOTE: MW calculates an offset to handle mb strings. however, java's regex always takes offset in chars (not bytes like PHP preg_match); DATE:2014-03-04
match = regx_converter.Adjust_balanced_one(match);
return match;
}
@Override public String Gsub(Scrib_lib_ustring_gsub_mgr gsub_mgr, Ustring src_ucs, String pat_str, int bgn_as_codes) {
// convert lua pattern to java regex
pat_str = regx_converter.patternToRegex(pat_str, Scrib_regx_converter.Anchor_pow, true);
String src_str = src_ucs.Src();
Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(page_url, pat_str);
if (regx_adp.Pattern_is_invalid()) return src_str; // NOTE: invalid patterns should return self; EX:[^]; DATE:2014-09-02
// run regex
Regx_match[] rslts = regx_adp.Match_all(src_str, src_ucs.Map_data_to_char(bgn_as_codes)); // NOTE: MW calculates an offset to handle mb strings. however, java's regex always takes offset in chars (not bytes like PHP preg_match); DATE:2014-03-04
if (rslts.length == 0) return src_str; // PHP: If matches are found, the new subject will be returned, otherwise subject will be returned unchanged.; http://php.net/manual/en/function.preg-replace-callback.php
rslts = regx_converter.Adjust_balanced(rslts);
// replace results
Bry_bfr tmp_bfr = Bry_bfr_.New();
int rslts_len = rslts.length;
int text_pos = 0;
for (int i = 0; i < rslts_len; i++) {
if (gsub_mgr.Repl_count__done()) break; // stop if repl_count reaches limit; note that limit = -1 by default, unless specified
// add text up to find.bgn
Regx_match rslt = rslts[i];
tmp_bfr.Add_str_u8(String_.Mid(src_str, text_pos, rslt.Find_bgn())); // NOTE: regx returns char text_pos (not bry); must add as String, not bry; DATE:2013-07-17
// replace result
if (!gsub_mgr.Exec_repl_itm(tmp_bfr, regx_converter, rslt)) {
// will be false when gsub_proc returns nothing; PAGE:en.d:tracer PAGE:en.d:שלום DATE:2017-04-22;
tmp_bfr.Add_str_u8(String_.Mid(src_str, rslt.Find_bgn(), rslt.Find_end()));
}
// update
text_pos = rslt.Find_end();
gsub_mgr.Repl_count__add();
}
// add rest of String
int text_len = String_.Len(src_str);
if (text_pos < text_len)
tmp_bfr.Add_str_u8(String_.Mid(src_str, text_pos, text_len)); // NOTE: regx returns char text_pos (not bry); must add as String, not bry; DATE:2013-07-17
return tmp_bfr.To_str_and_clear();
}
}

@ -0,0 +1,123 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.scribunto.libs.patterns; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; import gplx.xowa.xtns.scribunto.libs.*;
import gplx.objects.strings.unicodes.*;
import gplx.langs.regxs.*;
import gplx.objects.strings.unicodes.*;
import org.luaj.vm2.lib.StringLib;
import org.luaj.vm2.Buffer;
import org.luaj.vm2.LuaValue;
import org.luaj.vm2.lib.Match_state;
import org.luaj.vm2.lib.Str_find_mgr;
import org.luaj.vm2.lib.Str_find_mgr__xowa;
class Scrib_pattern_matcher__xowa extends Scrib_pattern_matcher { public Scrib_pattern_matcher__xowa(byte[] page_url) {}
@Override public Regx_match Match_one(Ustring src_ucs, String pat_str, int bgn_as_codes, boolean replace) {
regx_converter.patternToRegex(pat_str, Scrib_regx_converter.Anchor_pow, true);
Str_find_mgr__xowa mgr = new Str_find_mgr__xowa(src_ucs, Ustring_.New_codepoints(pat_str), bgn_as_codes, false, false);
mgr.Process(false);
// convert to Regx_match
int find_bgn = mgr.Bgn();
int find_end = mgr.End();
boolean found = find_bgn != -1;
if (found) {
find_bgn = src_ucs.Map_data_to_char(find_bgn);
find_end = src_ucs.Map_data_to_char(find_end);
}
Regx_group[] groups = Make_groups(src_ucs, mgr.Captures_ary());
return new Regx_match(found, find_bgn, find_end, groups);
}
@Override public String Gsub(Scrib_lib_ustring_gsub_mgr gsub_mgr, Ustring src_ucs, String pat_str, int bgn_as_codes) {
// get src vars
String src_str = src_ucs.Src();
int src_len = src_ucs.Len_in_data();
if (src_len == 0) {
return src_str;
}
int src_max = src_len + 1;
// get pat vars
regx_converter.patternToRegex(pat_str, Scrib_regx_converter.Anchor_G, true);
Ustring pat = Ustring_.New_codepoints(pat_str);
int pat_len = pat.Len_in_data();
final boolean pat_is_anchored = pat_len > 0 && pat.Get_data(0) == '^';
// get match vars
Bry_bfr tmp_bfr = Bry_bfr_.New();
Str_find_mgr__xowa match_mgr = new Str_find_mgr__xowa(src_ucs, pat, bgn_as_codes, false, false);
Match_state ms = new Match_state(match_mgr);
int src_pos = 0;
int src_idx = 0;
while (src_idx < src_max) {
ms.reset();
int res = ms.match(src_pos, pat_is_anchored ? 1 : 0);
// match found
if (res != -1) {
if (gsub_mgr.Repl_count__done()) break;
src_idx++;
ms.push_captures(true, src_pos, res);
Regx_group[] groups = Make_groups(src_ucs, match_mgr.Captures_ary());
Regx_match match = new Regx_match(true, src_pos, res, groups);
if (!gsub_mgr.Exec_repl_itm(tmp_bfr, regx_converter, match)) {
tmp_bfr.Add_str_u8(src_ucs.Substring(match.Find_bgn(), match.Find_end()));
}
gsub_mgr.Repl_count__add();
}
// match found; set src_pos to match_end
if (res != -1 && res > src_pos)
src_pos = res;
// no match; add current byte
else if (src_pos < src_len) {
// lbuf.append( (byte) src.Get_data( src_pos++ ) );
tmp_bfr.Add_u8_int(src_ucs.Get_data(src_pos++));
}
else
break;
if (pat_is_anchored)
break;
if (src_pos > src_len) // XOWA:assert src_pos is in bounds, else will throw ArrayIndexOutOfBounds exception; DATE:2016-09-20
break;
}
tmp_bfr.Add_str_u8(src_ucs.Substring(src_pos, src_len));
return tmp_bfr.To_str_and_clear();
}
private Regx_group[] Make_groups(Ustring src_ucs, int[] captures) {
if (captures == null) {
return Regx_group.Ary_empty;
}
int captures_len = captures.length;
Regx_group[] groups = new Regx_group[captures_len / 2];
for (int i = 0; i < captures_len; i += 2) {
int capture_bgn = captures[i];
int capture_end = captures[i + 1];
capture_bgn = src_ucs.Map_data_to_char(capture_bgn);
capture_end = src_ucs.Map_data_to_char(capture_end);
groups[i / 2] = new Regx_group(true, capture_bgn, capture_end, String_.Mid(src_ucs.Src(), capture_bgn, capture_end));
}
return groups;
}
}

@ -0,0 +1,34 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects; import gplx.*;
import gplx.objects.brys.*;
import gplx.objects.strings.*;
import gplx.objects.types.*;
public class Object_ {
public static String To_str_or_null_mark(Object v) {return v == null ? "<<NULL>>": To_str(v);}
public static String To_str_or(Object v, String or) {return v == null ? or : To_str(v);}
public static String To_str(Object v) {
Class<?> c = v.getClass();
if (Type_.Eq(c, String_.Cls_ref_type)) return (String)v;
else if (Type_.Eq(c, Bry_.Cls_ref_type)) return String_.New_bry_utf8((byte[])v);
else return v.toString();
}
public static boolean Eq(Object lhs, Object rhs) {
if (lhs == null && rhs == null) return true;
else if (lhs == null || rhs == null) return false;
else return lhs.equals(rhs);
}
}

@ -0,0 +1,51 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.arrays; import gplx.*; import gplx.objects.*;
import java.lang.reflect.Array;
import gplx.objects.errs.*;
public class Array_ {
public static int Len(Object ary) {return Array.getLength(ary);}
public static final int Len_obj(Object[] ary) {return ary == null ? 0 : ary.length;}
public static Object Get_at(Object ary, int i) {return Array.get(ary, i);}
public static void Set_at(Object ary, int i, Object o) {Array.set(ary, i, o);}
public static Object Create(Class<?> t, int count) {return Array.newInstance(t, count);}
public static Object Expand(Object src, Object trg, int src_len) {
try {System.arraycopy(src, 0, trg, 0, src_len);}
catch (Exception e) {throw Err_.New_fmt(e, "Array_.Expand failed; src_len={0}", src_len);}
return trg;
}
public static void Copy(Object src, Object trg) {System.arraycopy(src, 0, trg, 0, Len(src));}
public static void Copy_to(Object src, Object trg, int trgPos) {System.arraycopy(src, 0, trg, trgPos, Len(src));}
public static void Copy_to(Object src, int srcBgn, Object trg, int trgBgn, int srcLen) {System.arraycopy(src, srcBgn, trg, trgBgn, srcLen);}
private static Class<?> Component_type(Object ary) {
if (ary == null) throw Err_.New_msg("Array is null");
return ary.getClass().getComponentType();
}
public static Object Resize_add(Object src, Object add) {
int srcLen = Len(src);
int trgLen = srcLen + Len(add);
Object trg = Create(Component_type(src), trgLen);
Copy(src, trg);
for (int i = srcLen; i < trgLen; i++)
Set_at(trg, i, Get_at(add, i - srcLen));
return trg;
}
public static Object Clone(Object src) {
Object trg = Create(Component_type(src), Len(src));
Copy(src, trg);
return trg;
}
}

@ -0,0 +1,103 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.brys; import gplx.*; import gplx.objects.*;
import gplx.objects.errs.*;
public class Bry_ {
public static final Class<?> Cls_ref_type = byte[].class;
public static final byte[] Empty = new byte[0];
public static boolean Eq(byte[] lhs, byte[] rhs) {return Eq(lhs, 0, lhs == null ? 0 : lhs.length, rhs);}
public static boolean Eq(byte[] lhs, int lhs_bgn, int lhs_end, byte[] rhs) {
if (lhs == null && rhs == null) return true;
else if (lhs == null || rhs == null) return false;
if (lhs_bgn < 0) return false;
int rhs_len = rhs.length;
if (rhs_len != lhs_end - lhs_bgn) return false;
int lhs_len = lhs.length;
for (int i = 0; i < rhs_len; i++) {
int lhs_pos = i + lhs_bgn;
if (lhs_pos == lhs_len) return false;
if (rhs[i] != lhs[lhs_pos]) return false;
}
return true;
}
public static byte[][] Ary(byte[]... ary) {return ary;}
public static byte[][] Ary(String... ary) {
int ary_len = ary.length;
byte[][] rv = new byte[ary_len][];
for (int i = 0; i < ary_len; i++) {
String itm = ary[i];
rv[i] = itm == null ? null : Bry_.New_utf08(itm);
}
return rv;
}
public static byte[] New_utf08(String src) {
try {
int src_len = src.length();
if (src_len == 0) return Bry_.Empty;
int bry_len = New_utf08__count(src, src_len);
byte[] bry = new byte[bry_len];
New_utf08__write(src, src_len, bry, 0);
return bry;
}
catch (Exception e) {throw Err_.New_fmt(e, "invalid UTF-8 sequence; src={0}", src);}
}
public static int New_utf08__count(String src, int src_len) {
int rv = 0;
for (int i = 0; i < src_len; ++i) {
char c = src.charAt(i);
int c_len = 0;
if ( c < 128) c_len = 1; // 1 << 7
else if ( c < 2048) c_len = 2; // 1 << 11
else if ( (c > 55295) // 0xD800
&& (c < 56320)) c_len = 4; // 0xDFFF
else c_len = 3; // 1 << 16
if (c_len == 4) ++i; // surrogate is 2 wide, not 1
rv += c_len;
}
return rv;
}
public static void New_utf08__write(String src, int src_len, byte[] bry, int bry_pos) {
for (int i = 0; i < src_len; ++i) {
char c = src.charAt(i);
if ( c < 128) {
bry[bry_pos++] = (byte)c;
}
else if ( c < 2048) {
bry[bry_pos++] = (byte)(0xC0 | (c >> 6));
bry[bry_pos++] = (byte)(0x80 | (c & 0x3F));
}
else if ( (c > 55295) // 0xD800
&& (c < 56320)) { // 0xDFFF
if (i >= src_len) throw Err_.New_msg("incomplete surrogate pair at end of String");
char nxt_char = src.charAt(i + 1);
int v = 0x10000 + (c - 0xD800) * 0x400 + (nxt_char - 0xDC00);
bry[bry_pos++] = (byte)(0xF0 | (v >> 18));
bry[bry_pos++] = (byte)(0x80 | (v >> 12) & 0x3F);
bry[bry_pos++] = (byte)(0x80 | (v >> 6) & 0x3F);
bry[bry_pos++] = (byte)(0x80 | (v & 0x3F));
++i;
}
else {
bry[bry_pos++] = (byte)(0xE0 | (c >> 12));
bry[bry_pos++] = (byte)(0x80 | (c >> 6) & 0x3F);
bry[bry_pos++] = (byte)(0x80 | (c & 0x3F));
}
}
}
}

@ -0,0 +1,21 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.errs; import gplx.*; import gplx.objects.*;
public class Err extends RuntimeException {
private final String msg;
public Err(String msg) {this.msg = msg;}
@Override public String getMessage() {return msg;}
}

@ -0,0 +1,46 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.errs; import gplx.*; import gplx.objects.*;
import gplx.objects.strings.*;
public class Err_ {
public static void Noop(Exception e) {}
public static Err New_fmt(String fmt, Object... args) {return new Err(String_.Format(fmt, args));}
public static Err New_msg(String msg) {return new Err(msg);}
public static Err New_fmt(Exception e, String fmt, Object... args) {
return new Err(String_.Format(fmt, args) + " exc=" + Err_.Message_lang(e));
}
public static Err New_null(String name) {return new Err("Object was null; name=" + name);}
public static Err New_unhandled_default(Object o) {
return new Err("val is not in switch; val=" + Object_.To_str(o));
}
public static String Message_lang(Exception e) {
return Error.class.isAssignableFrom(e.getClass())
? e.toString() // java.lang.Error returns null for "getMessage()"; return "toString()" instead
: e.getMessage();
}
public static String Trace_lang(Throwable e) {
StackTraceElement[] ary = e.getStackTrace();
String rv = "";
for (int i = 0; i < ary.length; i++) {
if (i != 0) rv += "\n";
rv += ary[i].toString();
}
return rv;
}
}

@ -0,0 +1,36 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
import gplx.objects.errs.*;
public class Bool_ {
public static final String Cls_val_name = "bool";
public static final Class<?> Cls_ref_type = Boolean.class;
public static final boolean N = false , Y = true;
public static final byte N_byte = 0 , Y_byte = 1 , __byte = 127;
public static final int N_int = 0 , Y_int = 1 , __int = -1;
public static final String True_str = "true", False_str = "false";
public static boolean Cast(Object o) {
try {
return (Boolean)o;
}
catch (Exception e) {
throw Err_.New_fmt(e, "failed to cast to boolean; obj={0}", Object_.To_str(o));
}
}
}

@ -0,0 +1,29 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
import gplx.objects.errs.*;
public class Byte_ {
public static final String Cls_val_name = "byte";
public static final Class<?> Cls_ref_type = Byte.class;
public static byte Cast(Object o) {
try {
return (Byte)o;
}
catch (Exception e) {
throw Err_.New_fmt(e, "failed to cast to byte; obj={0}", Object_.To_str(o));
}
}
}

@ -0,0 +1,20 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
public class Char_ {
public static final String Cls_val_name = "char";
public static final Class<?> Cls_ref_type = Character.class;
}

@ -0,0 +1,25 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
public class Char_code_ {
public static final char
New_line = '\n'
, Space = ' '
, Colon = ':'
, Num_0 = '0'
, Pipe = '|'
;
}

@ -0,0 +1,20 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
public class Double_ {
public static final String Cls_val_name = "double";
public static final Class<?> Cls_ref_type = Double.class;
}

@ -0,0 +1,20 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
public class Float_ {
public static final String Cls_val_name = "float";
public static final Class<?> Cls_ref_type = Float.class;
}

@ -0,0 +1,112 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
import gplx.objects.errs.*;
import gplx.objects.strings.*;
public class Int_ {
public static final String Cls_val_name = "int";
public static final Class<?> Cls_ref_type = Integer.class;
public static final int
Min_value = Integer.MIN_VALUE
, Max_value = Integer.MAX_VALUE
, Max_value__31 = 2147483647
, Neg1 = -1
, Null = Int_.Min_value
, Base1 = 1 // for super 1 lists / arrays; EX: PHP; [a, b, c]; [1] => a
, Offset_1 = 1 // common symbol for + 1 after current pos; EX: String_.Mid(lhs + Offset_1, rhs)
;
public static int Cast(Object o) {
try {
return (Integer)o;
}
catch(Exception e) {
throw Err_.New_fmt(e, "failed to cast to int; obj={0}", Object_.To_str(o));
}
}
public static String To_str(int v) {return new Integer(v).toString();}
public static int Parse_or(String raw, int or) {
// process args
if (raw == null) return or;
int raw_len = String_.Len(raw);
if (raw_len == 0) return or;
// loop backwards from nth to 0th char
int rv = 0, power_of_10 = 1;
for (int idx = raw_len - 1; idx >= 0; idx--) {
char cur = String_.Char_at(raw, idx);
int digit = -1;
switch (cur) {
// numbers -> assign digit
case '0': digit = 0; break; case '1': digit = 1; break; case '2': digit = 2; break; case '3': digit = 3; break; case '4': digit = 4; break;
case '5': digit = 5; break; case '6': digit = 6; break; case '7': digit = 7; break; case '8': digit = 8; break; case '9': digit = 9; break;
// negative sign
case '-':
if (idx != 0) { // invalid if not 1st
return or;
}
else { // is first; multiply by -1
rv *= -1;
continue;
}
// anything else
default:
return or;
}
rv += (digit * power_of_10);
power_of_10 *= 10;
}
return rv;
}
public static boolean Between(int v, int lhs, int rhs) {
int lhs_comp = v == lhs ? 0 : (v < lhs ? -1 : 1);
int rhs_comp = v == rhs ? 0 : (v < rhs ? -1 : 1);
return (lhs_comp * rhs_comp) != 1; // 1 when v is (a) greater than both or (b) less than both
}
private static int[] Log_10s = new int[] {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, Int_.Max_value};
public static int Log10(int v) {
if (v == 0) return 0;
int sign = 1;
if (v < 0) {
if (v == Int_.Min_value) return -9; // NOTE: Int_.Min_value * -1 = Int_.Min_value
v *= -1;
sign = -1;
}
int log_10s_len = Log_10s.length;
int rv = log_10s_len - 2; // rv will only happen when v == Int_.Max_value
int bgn = 0;
if (v > 1000) { // optimization to reduce number of ops to < 5
bgn = 3;
if (v > 1000000) bgn = 6;
}
for (int i = bgn; i < log_10s_len; i++) {
if (v < Log_10s[i]) {rv = i - 1; break;}
}
return rv * sign;
}
public static int Count_digits(int v) {
int log10 = Log10(v);
return v > -1 ? log10 + 1 : log10 * -1 + 2;
}
}

@ -0,0 +1,90 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
import org.junit.*; import gplx.tests.*;
public class Int__tst {
private final Int__fxt fxt = new Int__fxt();
@Test public void Parse_or() {
fxt.Test__Parse_or("123", 123); // basic
fxt.Test__Parse_or_min_value(null); // null
fxt.Test__Parse_or_min_value(""); // empty
fxt.Test__Parse_or_min_value("1a"); // invalid number
fxt.Test__Parse_or("-123", -123); // negative
fxt.Test__Parse_or_min_value("1-23"); // negative at invalid position
}
@Test public void Between() {
fxt.Test__Between(1, 0, 2, true); // simple true
fxt.Test__Between(3, 0, 2, false); // simple false
fxt.Test__Between(0, 0, 2, true); // bgn true
fxt.Test__Between(2, 0, 2, true); // end true
}
@Test public void Count_digits() {
fxt.Test__Count_digits( 0, 1);
fxt.Test__Count_digits( 9, 1);
fxt.Test__Count_digits( 100, 3);
fxt.Test__Count_digits( -1, 2);
fxt.Test__Count_digits(-100, 4);
}
@Test public void Log10() {
fxt.Test__Log10( 0, 0);
fxt.Test__Log10( 1, 0);
fxt.Test__Log10( 2, 0);
fxt.Test__Log10( 10, 1);
fxt.Test__Log10( 12, 1);
fxt.Test__Log10( 100, 2);
fxt.Test__Log10( 123, 2);
fxt.Test__Log10( 1000, 3);
fxt.Test__Log10( 1234, 3);
fxt.Test__Log10( 10000, 4);
fxt.Test__Log10( 12345, 4);
fxt.Test__Log10( 100000, 5);
fxt.Test__Log10( 123456, 5);
fxt.Test__Log10( 1000000, 6);
fxt.Test__Log10( 1234567, 6);
fxt.Test__Log10( 10000000, 7);
fxt.Test__Log10( 12345678, 7);
fxt.Test__Log10( 100000000, 8);
fxt.Test__Log10( 123456789, 8);
fxt.Test__Log10( 1000000000, 9);
fxt.Test__Log10( 1234567890, 9);
fxt.Test__Log10(Int_.Max_value, 9);
fxt.Test__Log10( -1, 0);
fxt.Test__Log10( -10, -1);
fxt.Test__Log10( -100, -2);
fxt.Test__Log10( -1000000, -6);
fxt.Test__Log10( -1000000000, -9);
fxt.Test__Log10(Int_.Min_value, -9);
fxt.Test__Log10(Int_.Min_value + 1, -9);
}
}
class Int__fxt {
public void Test__Parse_or(String raw, int expd) {
Gftest_fxt.Eq__int(expd, Int_.Parse_or(raw, -1));
}
public void Test__Parse_or_min_value(String raw) {
Gftest_fxt.Eq__int(Int_.Min_value, Int_.Parse_or(raw, Int_.Min_value));
}
public void Test__Between(int val, int lhs, int rhs, boolean expd) {
Gftest_fxt.Eq__bool(expd, Int_.Between(val, lhs, rhs));
}
public void Test__Count_digits(int val, int expd) {
Gftest_fxt.Eq__int(expd, Int_.Count_digits(val), Int_.To_str(val));
}
public void Test__Log10(int val, int expd) {
Gftest_fxt.Eq__int(expd, Int_.Log10(val));
}
}

@ -0,0 +1,29 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
import gplx.objects.errs.*;
public class Long_ {
public static final String Cls_val_name = "long";
public static final Class<?> Cls_ref_type = Long.class;
public static long Cast(Object o) {
try {
return (Long)o;
}
catch(Exception e) {
throw Err_.New_fmt(e, "failed to cast to long; obj={0}", Object_.To_str(o));
}
}
}

@ -0,0 +1,20 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.primitives; import gplx.*; import gplx.objects.*;
public class Short_ {
public static final String Cls_val_name = "short";
public static final Class<?> Cls_ref_type = Short.class;
}

@ -0,0 +1,95 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.strings; import gplx.*; import gplx.objects.*;
import java.lang.*;
import gplx.objects.errs.*;
import gplx.objects.strings.bfrs.*;
import gplx.objects.arrays.*; import gplx.objects.primitives.*;
public class String_ {
public static final Class<?> Cls_ref_type = String.class;
public static final String Cls_val_name = "str" + "ing";
public static final int Find_none = -1, Pos_neg1 = -1;
public static final String Empty = "", Null_mark = "<<NULL>>", Tab = "\t", Lf = "\n", CrLf = "\r\n";
public static boolean Eq(String lhs, String rhs) {return lhs == null ? rhs == null : lhs.equals(rhs);}
public static int Len(String s) {return s.length();}
public static char Char_at(String s, int i) {return s.charAt(i);}
// use C# flavor ("a {0}") rather than Java format ("a %s"); also: (a) don't fail on format errors; (b) escape brackets by doubling
private static final char FORMAT_ITM_LHS = '{', FORMAT_ITM_RHS = '}';
public static String Format(String fmt, Object... args) {
// method vars
int args_len = Array_.Len_obj(args);
if (args_len == 0) return fmt; // nothing to format
int fmt_len = Len(fmt);
// loop vars
int pos = 0; String arg_idx_str = ""; boolean inside_brackets = false;
String_bfr bfr = new String_bfr();
while (pos < fmt_len) { // loop over every char; NOTE: UT8-SAFE b/c only checking for "{"; "}"
char c = Char_at(fmt, pos);
if (inside_brackets) {
if (c == FORMAT_ITM_LHS) { // first FORMAT_ITM_LHS is fake; add FORMAT_ITM_LHS and whatever is in arg_idx_str
bfr.Add_char(FORMAT_ITM_LHS).Add(arg_idx_str);
arg_idx_str = "";
}
else if (c == FORMAT_ITM_RHS) { // itm completed
int args_idx = Int_.Parse_or(arg_idx_str, Int_.Min_value);
String itm = args_idx != Int_.Min_value && Int_.Between(args_idx, 0, args_len - 1) // check (a) args_idx is num; (b) args_idx is in bounds
? Object_.To_str_or_null_mark(args[args_idx]) // valid; add itm
: FORMAT_ITM_LHS + arg_idx_str + FORMAT_ITM_RHS; // not valid; just add String
bfr.Add(itm);
inside_brackets = false;
arg_idx_str = "";
}
else
arg_idx_str += c;
}
else {
if (c == FORMAT_ITM_LHS || c == FORMAT_ITM_RHS) {
boolean pos_is_end = pos == fmt_len - 1;
if (pos_is_end) // last char is "{" or "}" (and not inside_brackets); ignore and just ad
bfr.Add_char(c);
else {
char next = Char_at(fmt, pos + 1);
if (next == c) { // "{{" or "}}": escape by doubling
bfr.Add_char(c);
pos++;
}
else
inside_brackets = true;
}
}
else
bfr.Add_char(c);
}
pos++;
}
if (Len(arg_idx_str) > 0) // unclosed bracket; add FORMAT_ITM_LHS and whatever is in arg_idx_str; ex: "{0"
bfr.Add_char(FORMAT_ITM_LHS).Add(arg_idx_str);
return bfr.To_str();
}
public static String New_bry_utf8(byte[] v) {return v == null ? null : New_bry_utf8(v, 0, v.length);}
public static String New_bry_utf8(byte[] v, int bgn, int end) {
try {
return v == null
? null
: new String(v, bgn, end - bgn, "UTF-8");
}
catch (Exception e) {throw Err_.New_fmt(e, "unsupported encoding; bgn={0} end={1}", bgn, end);}
}
}

@ -0,0 +1,47 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.strings; import gplx.*; import gplx.objects.*;
import org.junit.*; import gplx.tests.*;
public class String__tst {
private final String__fxt fxt = new String__fxt();
@Test public void Len() {
fxt.Test__Len("" , 0);
fxt.Test__Len("abc", 3);
}
@Test public void Format() {
fxt.Test__Format("" , ""); // empty fmt
fxt.Test__Format("" , "", "a"); // empty fmt w/ args
fxt.Test__Format("a" , "a"); // no args
fxt.Test__Format("a" , "{0}", "a"); // args = 1
fxt.Test__Format("a + b" , "{0} + {1}", "a", "b"); // args = n
fxt.Test__Format("{" , "{{", 0); // escape "{"
fxt.Test__Format("}" , "}}", 0); // escape "}"
fxt.Test__Format("{a0c}" , "{a{0}c}", 0); // nested;
fxt.Test__Format("{a{b}c}" , "{a{b}c}", 0); // nested; invalid
fxt.Test__Format("{1}" , "{1}", "a"); // out of bounds
fxt.Test__Format("{a} {b}" , "{a} {b}", 0); // invalid arg
fxt.Test__Format("{a}0{b}1", "{a}{0}{b}{1}", 0, 1); // invalid and valid args
fxt.Test__Format("{0", "{0", 0); // dangling
}
}
class String__fxt {
public void Test__Format(String expd, String fmt, Object... ary) {
Gftest_fxt.Eq__str(expd, String_.Format(fmt, ary));
}
public void Test__Len(String v, int expd) {
Gftest_fxt.Eq__int(expd, String_.Len(v));
}
}

@ -0,0 +1,76 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.strings.bfrs; import gplx.*; import gplx.objects.*; import gplx.objects.strings.*;
import java.lang.*;
import gplx.objects.primitives.*;
import gplx.objects.errs.*;
public class String_bfr {
private java.lang.StringBuilder sb = new java.lang.StringBuilder();
public boolean Has_none() {return this.Len() == 0;}
public boolean Has_some() {return this.Len() > 0;}
public String_bfr Add_fmt(String format, Object... args) {Add(String_.Format(format, args)); return this;}
public String_bfr Add_char_pipe() {return Add_char(Char_code_.Pipe);}
public String_bfr Add_char_nl() {return Add_char(Char_code_.New_line);}
public String_bfr Add_char_space() {return Add_char(Char_code_.Space);}
public String_bfr Add_char_colon() {return Add_char(Char_code_.Colon);}
public String_bfr Add_char_repeat(char c, int repeat) {
this.Ensure_capacity(this.Len() + repeat);
for (int i = 0; i < repeat; i++)
Add_char(c);
return this;
}
public String_bfr Add_int_pad_bgn(char pad_char, int str_len, int val) {
int digit_len = Int_.Count_digits(val);
int pad_len = str_len - digit_len;
if (pad_len > 0) // note that this skips pad_len == 0, as well as guarding against negative pad_len; EX: pad(" ", 3, 1234) -> "1234"
Add_char_repeat(pad_char, pad_len);
Add_int(val);
return this;
}
public String_bfr Add_bool(boolean val) {
this.Add(val ? Bool_.True_str : Bool_.False_str);
return this;
}
public String_bfr Add_bool_as_yn(boolean val) {
this.Add(val ? "y" : "n");
return this;
}
public String_bfr Clear() {Del(0, this.Len()); return this;}
public String To_str_and_clear() {
String rv = To_str();
Clear();
return rv;
}
@Override public String toString() {return To_str();}
public String To_str() {return sb.toString();}
public int Len() {return sb.length();}
public String_bfr Add_at(int idx, String s) {sb.insert(idx, s); return this;}
public String_bfr Add(String s) {sb.append(s); return this;}
public String_bfr Add_char(char c) {sb.append(c); return this;}
public String_bfr Add_byte(byte i) {sb.append(i); return this;}
public String_bfr Add_int(int i) {sb.append(i); return this;}
public String_bfr Add_long(long i) {sb.append(i); return this;}
public String_bfr Add_double(double i) {sb.append(i); return this;}
public String_bfr Add_mid(char[] ary, int bgn, int count) {sb.append(ary, bgn, count); return this;}
public String_bfr Add_obj(Object o) {sb.append(o); return this;}
public String_bfr Add_bry(byte[] v) {
if (v != null)
sb.append(String_.New_bry_utf8(v));
return this;
}
private void Ensure_capacity(int capacity) {sb.ensureCapacity(capacity);}
public String_bfr Del(int bgn, int len) {sb.delete(bgn, len); return this;}
}

@ -0,0 +1,25 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.strings.char_sources; import gplx.*; import gplx.objects.*; import gplx.objects.strings.*;
public interface Char_source {
String Src();
int Get_data(int pos);
int Len_in_data();
String Substring(int bgn, int end);
int Index_of(Char_source find, int bgn);
boolean Eq(int lhs_bgn, Char_source rhs, int rhs_bgn, int rhs_end);
}

@ -0,0 +1,30 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.strings.char_sources; import gplx.*; import gplx.objects.*; import gplx.objects.strings.*;
public class Char_source_ {
public static int Index_of_any(String src, char[] ary) {
int src_len = String_.Len(src);
int ary_len = ary.length;
for (int i = 0; i < src_len; i++) {
for (int j = 0; j < ary_len; j++) {
if (String_.Char_at(src, i) == ary[j] ) {
return i;
}
}
}
return -1;
}
}

@ -0,0 +1,151 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.strings.unicodes; import gplx.*; import gplx.objects.*; import gplx.objects.strings.*;
import gplx.objects.errs.*;
import gplx.objects.brys.*;
import gplx.objects.strings.char_sources.*;
public interface Ustring extends Char_source {
int Len_in_chars();
int Map_data_to_char(int pos);
int Map_char_to_data(int pos);
}
class Ustring_single implements Ustring { // 1 char == 1 codepoint
public Ustring_single(String src, int src_len) {
this.src = src;
this.src_len = src_len;
}
public String Src() {return src;} private final String src;
public int Len_in_chars() {return src_len;} private final int src_len;
public int Len_in_data() {return src_len;}
public String Substring(int bgn, int end) {return src.substring(bgn, end);}
public int Index_of(Char_source find, int bgn) {return src.indexOf(find.Src(), bgn);}
public boolean Eq(int lhs_bgn, Char_source rhs, int rhs_bgn, int rhs_end) {
if (src_len < lhs_bgn + rhs_end || rhs.Len_in_data() < rhs_bgn + rhs_end)
return false;
while ( --rhs_end>=0 )
if (this.Get_data(lhs_bgn++) != rhs.Get_data(rhs_bgn++))
return false;
return true;
}
public int Get_data(int i) {return String_.Char_at(src, i);}
public int Map_data_to_char(int i) {if (i < 0 || i > src_len) throw Err_.New_fmt("invalid idx; idx={0} src={1}", i, src); return i;}
public int Map_char_to_data(int i) {if (i < 0 || i > src_len) throw Err_.New_fmt("invalid idx; idx={0} src={1}", i, src); return i;}
}
class Ustring_codepoints implements Ustring {
private final int[] codes;
public Ustring_codepoints(String src, int chars_len, int codes_len) {
// set members
this.src = src;
this.chars_len = chars_len;
this.codes_len = codes_len;
// make codes[]
this.codes = new int[codes_len];
int code_idx = 0;
for (int i = 0; i < chars_len; i++) {
char c = src.charAt(i);
if (c >= Ustring_.Surrogate_hi_bgn && c <= Ustring_.Surrogate_hi_end) { // character is 1st part of surrogate-pair
i++;
if (i == chars_len) throw Err_.New_fmt("invalid surrogate pair found; src={0}", src);
int c2 = src.charAt(i);
codes[code_idx++] = Ustring_.Surrogate_cp_bgn + (c - Ustring_.Surrogate_hi_bgn) * Ustring_.Surrogate_range + (c2 - Ustring_.Surrogate_lo_bgn);
}
else {
codes[code_idx++] = c;
}
}
}
public String Src() {return src;} private final String src;
public String Substring(int bgn, int end) {
int len = 0;
for (int i = bgn; i < end; i++) {
int code = codes[i];
len += code >= Ustring_.Surrogate_cp_bgn && code <= Ustring_.Surrogate_cp_end ? 2 : 1;
}
char[] rv = new char[len];
int rv_idx = 0;
for (int i = bgn; i < end; i++) {
int code = codes[i];
if (code >= Ustring_.Surrogate_cp_bgn && code <= Ustring_.Surrogate_cp_end) {
rv[rv_idx++] = (char)((code - 0x10000) / 0x400 + 0xD800);
rv[rv_idx++] = (char)((code - 0x10000) % 0x400 + 0xDC00);
}
else {
rv[rv_idx++] = (char)code;
}
}
return new String(rv);
}
public int Index_of(Char_source find, int bgn) {
int find_len = find.Len_in_data();
int codes_len = codes.length;
for (int i = bgn; i < codes.length; i++) {
boolean found = true;
for (int j = 0; j < find_len; j++) {
int codes_idx = i + j;
if (codes_idx >= codes_len) {
found = false;
break;
}
if (codes[codes_idx] != find.Get_data(j)) {
found = false;
break;
}
}
if (found == true)
return i;
}
return -1;
}
public boolean Eq(int lhs_bgn, Char_source rhs, int rhs_bgn, int rhs_end) {
if (this.Len_in_data() < lhs_bgn + rhs_end || rhs.Len_in_data() < rhs_bgn + rhs_end)
return false;
while ( --rhs_end>=0 )
if ((this.Get_data(lhs_bgn++) != rhs.Get_data(rhs_bgn++)))
return false;
return true;
}
public int Len_in_chars() {return chars_len;} private final int chars_len;
public int Len_in_data() {return codes_len;} private final int codes_len;
public int Get_data(int i) {return codes[i];}
public int Map_data_to_char(int code_pos) {
if (code_pos == codes_len) return chars_len; // if char_pos is chars_len, return codes_len; allows "int end = u.Map_char_to_data(str_len)"
// sum all items before requested pos
int rv = 0;
for (int i = 0; i < code_pos; i++) {
rv += codes[i] < Ustring_.Surrogate_cp_bgn ? 1 : 2;
}
return rv;
}
public int Map_char_to_data(int char_pos) {
if (char_pos == chars_len) return codes_len; // if char_pos is chars_len, return codes_len; allows "int end = u.Map_char_to_data(str_len)"
// sum all items before requested pos
int rv = 0;
for (int i = 0; i < char_pos; i++) {
char c = src.charAt(i);
if (c >= Ustring_.Surrogate_hi_bgn && c <= Ustring_.Surrogate_hi_end){ // Surrogate_hi
if (i == char_pos - 1) // char_pos is Surrogate_lo; return -1 since Surrogate_lo doesn't map to a code_pos
return -1;
}
else
rv++;
}
return rv;
}
}

@ -0,0 +1,51 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.strings.unicodes; import gplx.*; import gplx.objects.*; import gplx.objects.strings.*;
import gplx.objects.errs.*;
public class Ustring_ {
public static Ustring New_codepoints(String src) {
if (src == null) throw Err_.New_null("src");
// calc lens
int chars_len = src.length();
int codes_len = Ustring_.Len(src, chars_len);
return chars_len == codes_len
? (Ustring)new Ustring_single(src, chars_len)
: (Ustring)new Ustring_codepoints(src, chars_len, codes_len);
}
public static int Len(String src, int src_len) {
int rv = 0;
for (int i = 0; i < src_len; i++) {
char c = src.charAt(i);
if (c >= Surrogate_hi_bgn && c <= Surrogate_hi_end) {
i++;
}
rv++;
}
return rv;
}
public static final int // REF: https://en.wikipedia.org/wiki/Universal_Character_Set_characters
Surrogate_hi_bgn = 0xD800 // 55,296: Surrogate high start
, Surrogate_hi_end = 0xDBFF // 56,319: Surrogate high end
, Surrogate_lo_bgn = 0xDC00 // 56,320: Surrogate low start
, Surrogate_lo_end = 0xDFFF // 57,343: Surrogate low end
, Surrogate_cp_bgn = 0x010000 // 65,536: Surrogate codepoint start
, Surrogate_cp_end = 0x10FFFF // 1,114,111: Surrogate codepoint end
, Surrogate_range = 0x400 // 1,024: Surrogate range (end - start) for high / low
;
}

@ -0,0 +1,104 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.strings.unicodes; import gplx.*; import gplx.objects.*; import gplx.objects.strings.*;
import org.junit.*; import gplx.tests.*;
import gplx.objects.errs.*;
public class Ustring_tst {
private final Ustring_fxt fxt = new Ustring_fxt();
@Test public void Empty() {
fxt.Init("");
fxt.Test__Len(0, 0);
}
@Test public void Blank() {
fxt.Init("");
fxt.Test__Len(0, 0);
}
@Test public void Single() {
fxt.Init("Abc");
fxt.Test__Len(3, 3);
fxt.Test__Get_code(65, 98, 99);
fxt.Test__Map_code_to_char(0, 1, 2, 3);
fxt.Test__Map_char_to_code(0, 1, 2, 3);
}
@Test public void Multi() {
fxt.Init("a¢€𤭢b");
fxt.Test__Len(5, 6);
fxt.Test__Get_code(97, 162, 8364, 150370, 98);
fxt.Test__Map_code_to_char(0, 1, 2, 3, 5, 6);
fxt.Test__Map_char_to_code(0, 1, 2, 3, -1, 4, 5);
}
@Test public void Index_of() {
fxt.Test__Index_of("abc", "b", 0, 1); // basic
fxt.Test__Index_of("ab", "bc", 0, -1); // out-of-bounds
fxt.Test__Index_of("a¢e", "¢", 0, 1); // check UTF-8 strings still match at byte-level
}
@Test public void Substring() {
fxt.Test__Substring("abc", 1, 2, "b"); // basic
fxt.Test__Substring("¢bc", 1, 2, "b"); // check UTF-8 strings don't get lopped off
}
}
class Ustring_fxt {
private Ustring under;
public void Init(String src) {
this.under = Ustring_.New_codepoints(src);
}
public void Test__Len(int expd_codes, int expd_chars) {
Gftest_fxt.Eq__int(expd_codes, under.Len_in_data(), "codes");
Gftest_fxt.Eq__int(expd_chars, under.Len_in_chars(), "chars");
}
public void Test__Get_code(int... expd) {
int actl_len = under.Len_in_data();
int[] actl = new int[actl_len];
for (int i = 0; i < actl_len; i++)
actl[i] = under.Get_data(i);
Gftest_fxt.Eq__ary(expd, actl);
}
public void Test__Map_code_to_char(int... expd) {
int actl_len = under.Len_in_data() + 1;
int[] actl = new int[actl_len];
for (int i = 0; i < actl_len; i++)
actl[i] = under.Map_data_to_char(i);
Gftest_fxt.Eq__ary(expd, actl);
}
public void Test__Map_char_to_code(int... expd) {
int actl_len = under.Len_in_chars() + 1;
int[] actl = new int[actl_len];
for (int i = 0; i < actl_len; i++) {
int val = 0;
try {
val = under.Map_char_to_data(i);
}
catch (Exception exc) {
val = -1;
Err_.Noop(exc);
}
actl[i] = val;
}
Gftest_fxt.Eq__ary(expd, actl);
}
public void Test__Index_of(String src_str, String find_str, int bgn, int expd) {
Ustring src = Ustring_.New_codepoints(src_str);
Ustring find = Ustring_.New_codepoints(find_str);
int actl = src.Index_of(find, bgn);
Gftest_fxt.Eq__int(expd, actl);
}
public void Test__Substring(String src_str, int bgn, int end, String expd) {
Ustring src = Ustring_.New_codepoints(src_str);
String actl = src.Substring(bgn, end);
Gftest_fxt.Eq__str(expd, actl);
}
}

@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.types; import gplx.*; import gplx.objects.*;
public class Type_ {
public static boolean Eq(Class<?> lhs, Class<?> rhs) {// DUPE_FOR_TRACKING: same as Object_.Eq
if (lhs == null && rhs == null) return true;
else if (lhs == null || rhs == null) return false;
else return lhs.equals(rhs);
}
}

@ -0,0 +1,59 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.objects.types; import gplx.*; import gplx.objects.*;
import gplx.objects.primitives.*;
import gplx.objects.strings.*;
public class Type_ids_ {
public static final int // SERIALIZABLE.N
Id__obj = 0
, Id__null = 1
, Id__bool = 2
, Id__byte = 3
, Id__short = 4
, Id__int = 5
, Id__long = 6
, Id__float = 7
, Id__double = 8
, Id__char = 9
, Id__str = 10
, Id__bry = 11
, Id__date = 12
, Id__decimal = 13
, Id__array = 14
;
public static int To_id_by_obj(Object o) {
if (o == null) return Type_ids_.Id__null;
Class<?> type = o.getClass();
return Type_ids_.To_id_by_type(type);
}
public static int To_id_by_type(Class<?> type) {
if (Type_.Eq(type, Int_.Cls_ref_type)) return Id__int;
else if (Type_.Eq(type, String_.Cls_ref_type)) return Id__str;
else if (Type_.Eq(type, byte[].class)) return Id__bry;
else if (Type_.Eq(type, Bool_.Cls_ref_type)) return Id__bool;
else if (Type_.Eq(type, Byte_.Cls_ref_type)) return Id__byte;
else if (Type_.Eq(type, Long_.Cls_ref_type)) return Id__long;
else if (Type_.Eq(type, Double_.Cls_ref_type)) return Id__double;
// else if (Type_.Eq(type, Decimal_.Cls_ref_type)) return Id__decimal;
// else if (Type_.Eq(type, Date_.Cls_ref_type)) return Id__date;
else if (Type_.Eq(type, Float_.Cls_ref_type)) return Id__float;
else if (Type_.Eq(type, Short_.Cls_ref_type)) return Id__short;
else if (Type_.Eq(type, Char_.Cls_ref_type)) return Id__char;
else return Id__obj;
}
}

@ -0,0 +1,220 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.tests; import gplx.*;
import gplx.objects.*;
import gplx.objects.errs.*;
import gplx.objects.primitives.*; import gplx.objects.brys.*;
import gplx.objects.strings.*; import gplx.objects.strings.bfrs.*;
import gplx.objects.arrays.*; import gplx.objects.types.*;
public class Gftest_fxt {
private static final String_bfr bfr = new String_bfr();
public static void Eq__ary(Object[] expd, Object[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__obj, expd, actl, msg_fmt, msg_args);}
public static void Eq__ary(boolean[] expd, boolean[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__bool, expd, actl, msg_fmt, msg_args);}
public static void Eq__ary(int[] expd, int[] actl) {Eq__array(Type_ids_.Id__int, expd, actl, "");}
public static void Eq__ary(int[] expd, int[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__int, expd, actl, msg_fmt, msg_args);}
public static void Eq__ary(long[] expd, long[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__long, expd, actl, msg_fmt, msg_args);}
public static void Eq__ary(byte[] expd, byte[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__byte, expd, actl, msg_fmt, msg_args);}
// public static void Eq__ary__lines(String expd, String actl) {Eq__ary__lines(expd, actl, "no_msg");}
// public static void Eq__ary__lines(String expd, byte[] actl) {Eq__ary__lines(expd, String_.New_bry_utf8(actl), "no_msg");}
// public static void Eq__ary__lines(String expd, byte[] actl, String msg_fmt, params Object[] msg_args) {Eq__ary__lines(expd, String_.New_bry_utf8(actl), msg_fmt, msg_args);}
// public static void Eq__ary__lines(String expd, String actl, String msg_fmt, params Object[] msg_args) {Eq__array(Type_ids_.Id__str, Bry_split_.Split_lines(Bry_.New_utf08(expd)), Bry_split_.Split_lines(Bry_.New_utf08(actl)), msg_fmt, msg_args);}
public static void Eq__ary(String[] expd, String[] actl) {Eq__array(Type_ids_.Id__bry, Bry_.Ary(expd), Bry_.Ary(actl), "no_msg");}
public static void Eq__ary(String[] expd, String[] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__bry, Bry_.Ary(expd), Bry_.Ary(actl), msg_fmt, msg_args);}
public static void Eq__ary(String[] expd, byte[][] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__bry, Bry_.Ary(expd), actl, msg_fmt, msg_args);}
public static void Eq__ary(byte[][] expd, byte[][] actl, String msg_fmt, Object... msg_args) {Eq__array(Type_ids_.Id__bry, expd, actl, msg_fmt, msg_args);}
private static void Eq__array(int type_tid, Object expd_ary, Object actl_ary, String msg_fmt, Object... msg_args) {
boolean[] failures = Calc__failures(type_tid, expd_ary, actl_ary);
if (failures != null) {
Write_fail_head(bfr, msg_fmt, msg_args);
Write_fail_ary(bfr, failures, type_tid, expd_ary, actl_ary);
throw Err_.New_msg(bfr.To_str_and_clear());
}
}
public static void Eq__null(boolean expd, Object actl) {Eq__null(expd, actl, null);}
public static void Eq__null(boolean expd, Object actl, String msg_fmt, Object... msg_args) {
if ( expd && actl == null
|| !expd && actl != null
) return;
Write_fail_head(bfr, msg_fmt, msg_args);
String expd_str = expd ? "null" : "not null";
String actl_str = actl == null ? "null" : "not null";
bfr.Add("expd: ").Add(expd_str).Add_char_nl();
bfr.Add("actl: ").Add(actl_str).Add_char_nl();
bfr.Add(Section_end);
throw Err_.New_msg(bfr.To_str_and_clear());
}
public static void Eq__obj_or_null(Object expd, Object actl) {
if (expd == null) expd = Null;
if (actl == null) actl = Null;
Eq__str(Object_.To_str_or(expd, Null), Object_.To_str_or(actl, null), Null);
}
public static void Eq__str(String expd, byte[] actl, String msg_fmt, Object... msg_args) {Eq__str(expd, String_.New_bry_utf8(actl), msg_fmt, msg_args);}
public static void Eq__str(String expd, byte[] actl) {Eq__str(expd, String_.New_bry_utf8(actl), null);}
public static void Eq__str(String expd, String actl) {Eq__str(expd, actl, null);}
public static void Eq__str(String expd, String actl, String msg_fmt, Object... msg_args) {
if (String_.Eq(expd, actl)) return;
Write_fail_head(bfr, msg_fmt, msg_args);
bfr.Add("expd: ").Add(expd).Add_char_nl();
bfr.Add("actl: ").Add(actl).Add_char_nl();
bfr.Add(Section_end);
throw Err_.New_msg(bfr.To_str_and_clear());
}
public static void Eq__bry(byte[] expd, byte[] actl) {Eq__bry(expd, actl, null);}
public static void Eq__bry(byte[] expd, byte[] actl, String msg_fmt, Object... msg_args) {
if (Bry_.Eq(expd, actl)) return;
Write_fail_head(bfr, msg_fmt, msg_args);
bfr.Add("expd: ").Add(String_.New_bry_utf8(expd)).Add_char_nl();
bfr.Add("actl: ").Add(String_.New_bry_utf8(actl)).Add_char_nl();
bfr.Add(Section_end);
throw Err_.New_msg(bfr.To_str_and_clear());
}
public static void Eq__long(long expd, long actl) {Eq__long(expd, actl, null);}
public static void Eq__long(long expd, long actl, String msg_fmt, Object... msg_args) {
if (expd == actl) return;
Write_fail_head(bfr, msg_fmt, msg_args);
bfr.Add("expd: ").Add_long(expd).Add_char_nl();
bfr.Add("actl: ").Add_long(actl).Add_char_nl();
bfr.Add(Section_end);
throw Err_.New_msg(bfr.To_str_and_clear());
}
public static void Eq__byte(byte expd, byte actl) {Eq__byte(expd, actl, null);}
public static void Eq__byte(byte expd, byte actl, String msg_fmt, Object... msg_args) {
if (expd == actl) return;
Write_fail_head(bfr, msg_fmt, msg_args);
bfr.Add("expd: ").Add_byte(expd).Add_char_nl();
bfr.Add("actl: ").Add_byte(actl).Add_char_nl();
bfr.Add(Section_end);
throw Err_.New_msg(bfr.To_str_and_clear());
}
public static void Eq__int(int expd, int actl) {Eq__int(expd, actl, null);}
public static void Eq__int(int expd, int actl, String msg_fmt, Object... msg_args) {
if (expd == actl) return;
Write_fail_head(bfr, msg_fmt, msg_args);
bfr.Add("expd: ").Add_int(expd).Add_char_nl();
bfr.Add("actl: ").Add_int(actl).Add_char_nl();
bfr.Add(Section_end);
throw Err_.New_msg(bfr.To_str_and_clear());
}
public static void Eq__bool_y(boolean actl) {Eq__bool(Bool_.Y, actl, null);}
public static void Eq__bool_y(boolean actl, String msg_fmt, Object... msg_args) {Eq__bool(Bool_.Y, actl, msg_fmt, msg_args);}
public static void Eq__bool(boolean expd, boolean actl) {Eq__bool(expd, actl, null);}
public static void Eq__bool(boolean expd, boolean actl, String msg_fmt, Object... msg_args) {
if (expd == actl) return;
Write_fail_head(bfr, msg_fmt, msg_args);
bfr.Add("expd: ").Add_bool(expd).Add_char_nl();
bfr.Add("actl: ").Add_bool(actl).Add_char_nl();
bfr.Add(Section_end);
throw Err_.New_msg(bfr.To_str_and_clear());
}
public static void Eq__double(double expd, double actl) {Eq__double(expd, actl, null);}
public static void Eq__double(double expd, double actl, String msg_fmt, Object... msg_args) {
if (expd == actl) return;
Write_fail_head(bfr, msg_fmt, msg_args);
bfr.Add("expd: ").Add_double(expd).Add_char_nl();
bfr.Add("actl: ").Add_double(actl).Add_char_nl();
bfr.Add(Section_end);
throw Err_.New_msg(bfr.To_str_and_clear());
}
private static void Write_fail_head(String_bfr bfr, String msg_fmt, Object[] msg_args) {
bfr.Add(Section_bgn);
if (msg_fmt != null) {
bfr.Add(String_.Format(msg_fmt, msg_args));
bfr.Add(Section_mid);
}
}
private static void Write_fail_ary(String_bfr bfr, boolean[] failures, int type_id, Object expd_ary, Object actl_ary) {
int len = failures.length;
int expd_len = Array_.Len(expd_ary);
int actl_len = Array_.Len(actl_ary);
for (int i = 0; i < len; ++i) {
boolean failure = failures[i];
int pad_len = 5 - Int_.Count_digits(i);
bfr.Add_int_pad_bgn(Char_code_.Num_0, pad_len, i).Add_char_colon().Add_char_space();
Write__itm(bfr, type_id, expd_ary, expd_len, i);
if (failure) {
bfr.Add(Eq_n).Add_char_repeat(Char_code_.Space, pad_len - 1);
Write__itm(bfr, type_id, actl_ary, actl_len, i);
}
}
bfr.Add(Section_end);
}
private static void Write__itm(String_bfr bfr, int type_id, Object ary, int len, int idx) {
if (idx < len) {
Object val = Array_.Get_at(ary, idx);
switch (type_id) {
case Type_ids_.Id__bool: bfr.Add_bool_as_yn(Bool_.Cast(val)); break;
case Type_ids_.Id__bry: bfr.Add_bry((byte[])val); break;
case Type_ids_.Id__long: bfr.Add_long(Long_.Cast(val)); break;
case Type_ids_.Id__int: bfr.Add_int(Int_.Cast(val)); break;
case Type_ids_.Id__byte: bfr.Add_int((int)(Byte_.Cast(val))); break;
case Type_ids_.Id__obj: bfr.Add(Object_.To_str(val)); break;
default: throw Err_.New_unhandled_default(type_id);
}
}
else
bfr.Add(Null);
bfr.Add_char_nl();
}
private static boolean[] Calc__failures(int tid, Object expd_ary, Object actl_ary) {
int expd_len = Array_.Len(expd_ary);
int actl_len = Array_.Len(actl_ary);
int max_len = expd_len > actl_len ? expd_len : actl_len; if (max_len == 0) return null;
boolean[] rv = null;
for (int i = 0; i < max_len; ++i) {
Object expd_obj = i < expd_len ? Array_.Get_at(expd_ary, i) : null;
Object actl_obj = i < actl_len ? Array_.Get_at(actl_ary, i) : null;
boolean eq = false;
if (expd_obj == null && actl_obj == null) eq = true;
else if (expd_obj == null || actl_obj == null) eq = false;
else {
switch (tid) {
case Type_ids_.Id__bool: eq = Bool_.Cast(expd_obj) == Bool_.Cast(actl_obj); break;
case Type_ids_.Id__bry: eq = Bry_.Eq((byte[])expd_obj, (byte[])actl_obj); break;
case Type_ids_.Id__long: eq = Long_.Cast(expd_obj) == Long_.Cast(actl_obj); break;
case Type_ids_.Id__int: eq = Int_.Cast(expd_obj) == Int_.Cast(actl_obj); break;
case Type_ids_.Id__byte: eq = Byte_.Cast(expd_obj) == Byte_.Cast(actl_obj); break;
case Type_ids_.Id__obj: eq = Object_.Eq(expd_obj, actl_obj); break;
}
}
if (!eq) {
if (rv == null) {
rv = new boolean[max_len];
}
rv[i] = true;
}
}
return rv;
}
private static final String Null = "<<NULL>>";
private static final String Eq_n = "!= "
, Section_bgn = "\n************************************************************************************************\n"
, Section_mid = "\n------------------------------------------------------------------------------------------------\n"
, Section_end = "________________________________________________________________________________________________"
;
// public static void Write(byte[] s, int b, int e) {Write(Bry_.Mid(s, b, e));}
public static void Write() {Write("tmp");}
public static void Write(Object... ary) {
String_bfr bfr = new String_bfr();
int ary_len = Array_.Len(ary);
for (int i = 0; i < ary_len; i++) {
bfr.Add("'");
bfr.Add(Object_.To_str_or_null_mark(ary[i]));
bfr.Add("' ");
}
System.out.println(bfr.To_str() + String_.Lf);
}
}
Loading…
Cancel
Save