mirror of https://github.com/gnosygnu/xowa
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
481 lines
16 KiB
481 lines
16 KiB
/*
|
|
XOWA: the XOWA Offline Wiki Application
|
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
|
|
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
|
or alternatively under the terms of the Apache License Version 2.0.
|
|
|
|
You may use XOWA according to either of these licenses as is most appropriate
|
|
for your project on a case-by-case basis.
|
|
|
|
The terms of each license can be found in the source code repository:
|
|
|
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|
*/
|
|
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
|
|
import gplx.core.btries.*;
|
|
import gplx.core.intls.*;
|
|
import gplx.objects.strings.unicodes.*;
|
|
import gplx.core.primitives.*;
|
|
public class XophpString_ implements XophpCallbackOwner {
|
|
public static final String Null = null;
|
|
public static boolean is_true(String s) {return s != null;} // handles code like "if ($var)" where var is an Object;
|
|
|
|
// REF.PHP: https://www.php.net/manual/en/function.strpos.php
|
|
public static int strpos(String haystack, String needle) {return strpos(haystack, needle, 0);}
|
|
public static int strpos(String haystack, String needle, int offset) {
|
|
if (offset < 0) {
|
|
offset = String_.Len(haystack) + offset;
|
|
}
|
|
return String_.FindFwd(haystack, needle, offset);
|
|
}
|
|
public static int strpos(byte[] src, byte find) {return strpos(src, find, 0, src.length);}
|
|
public static int strpos(byte[] src, byte find, int bgn, int end) {
|
|
return Bry_find_.Find_fwd(src, find, bgn, end);
|
|
}
|
|
public static int strpos_NULL = -1;
|
|
|
|
// REF.PHP: https://www.php.net/manual/en/function.substr.php
|
|
public static String substr(String src, int bgn, int len) {return String_.new_u8(substr(Bry_.new_u8(src), bgn, len));}
|
|
public static String substr(String src, int bgn) {return String_.new_u8(substr(Bry_.new_u8(src), bgn, String_.Len(src)));}
|
|
public static byte[] substr(byte[] src, int bgn) {return substr(src, bgn, src.length);}
|
|
public static byte[] substr(byte[] src, int bgn, int len) {
|
|
int src_len = src.length;
|
|
if (bgn < 0) bgn = src_len + bgn; // handle negative
|
|
if (bgn < 0) bgn = 0; // handle out of bounds; EX: ("a", -1, -1)
|
|
int end = len < 0 ? src_len + len : bgn + len;
|
|
if (end > src.length) end = src.length;; // handle out of bounds;
|
|
return Bry_.Mid(src, bgn, end);
|
|
}
|
|
public static byte substr_byte(byte[] src, int bgn) {return substr_byte(src, bgn, src.length);}
|
|
public static byte substr_byte(byte[] src, int bgn, int len) {
|
|
int src_len = src.length;
|
|
if (src_len == 0) return Byte_ascii.Null;
|
|
if (bgn < 0) bgn = src_len + bgn; // handle negative
|
|
if (bgn < 0) bgn = 0; // handle out of bounds; EX: ("a", -1, -1)
|
|
int end = len < 0 ? src_len + len : bgn + len;
|
|
if (end > src.length) end = src.length;; // handle out of bounds;
|
|
return src[bgn];
|
|
}
|
|
// REF.PHP: https://www.php.net/manual/en/function.strspn.php
|
|
public static Hash_adp strspn_hash(String mask) {
|
|
Hash_adp rv = Hash_adp_.New();
|
|
int mask_len = String_.Len(mask);
|
|
int i = 0;
|
|
while (i < mask_len) {
|
|
char hi_char = String_.CharAt(mask, i);
|
|
String key = "";
|
|
if (Utf16_.Len_by_char(hi_char) == 2) {
|
|
i++;
|
|
char lo_char = String_.CharAt(mask, i);
|
|
int surrogate_char = Utf16_.Surrogate_merge(Char_.To_int(hi_char), Char_.To_int(lo_char));
|
|
key = String_.new_u8(Utf16_.Encode_int_to_bry(surrogate_char));
|
|
}
|
|
else {
|
|
key = Char_.To_str(hi_char);
|
|
}
|
|
rv.Add_if_dupe_use_1st(key, key);
|
|
i++;
|
|
}
|
|
return rv;
|
|
}
|
|
public static int strspn(String subject, Hash_adp mask, int start) {return strspn(subject, mask, start, Int_.Null);}
|
|
public static int strspn(String subject, Hash_adp mask, int start, int length) {
|
|
int subject_len = String_.Len(subject);
|
|
|
|
// get subject_end
|
|
int subject_end = 0;
|
|
if (length == Int_.Null) {
|
|
subject_end = subject_len;
|
|
}
|
|
else if (length < 0) {
|
|
subject_end = subject_len + length; // If length is given and is negative, then subject will be examined from the starting position up to length characters from the end of subject.
|
|
if (subject_end < start)
|
|
subject_end = start;
|
|
}
|
|
else {
|
|
subject_end = start + length; // If length is given and is non-negative, then subject will be examined for length characters after the starting position.
|
|
if (subject_end > subject_len)
|
|
subject_end = subject_len;
|
|
}
|
|
|
|
// loop subject until encountering character not in mask
|
|
int rv = 0;
|
|
int i = start;
|
|
while (i < subject_end) {
|
|
char subject_char = String_.CharAt(subject, i);
|
|
String mask_key = "";
|
|
if (Utf16_.Len_by_char(subject_char) == 2) {
|
|
i++;
|
|
char lo_char = String_.CharAt(subject, i);
|
|
int surrogate_char = Utf16_.Surrogate_merge(Char_.To_int(subject_char), Char_.To_int(lo_char));
|
|
mask_key = String_.new_u8(Utf16_.Encode_int_to_bry(surrogate_char));
|
|
}
|
|
else {
|
|
mask_key = Char_.To_str(subject_char);
|
|
}
|
|
|
|
if (mask.Has(mask_key)) {
|
|
rv++;
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
i++;
|
|
}
|
|
return rv;
|
|
}
|
|
public static int strspn_fwd__ary(byte[] src, boolean[] find, int bgn, int max, int src_len) {
|
|
if (max == -1) max = src_len;
|
|
int rv = 0;
|
|
for (int i = bgn; i < src_len; i++) {
|
|
if (find[src[i] & 0xFF] && rv < max) // PATCH.JAVA:need to convert to unsigned byte
|
|
rv++;
|
|
else
|
|
break;
|
|
}
|
|
return rv;
|
|
}
|
|
public static int strspn_fwd__byte(byte[] src, byte find, int bgn, int max, int src_len) {
|
|
if (max == -1) max = src_len;
|
|
int rv = 0;
|
|
for (int i = bgn; i < src_len; i++) {
|
|
if (find == src[i] && rv < max)
|
|
rv++;
|
|
else
|
|
break;
|
|
}
|
|
return rv;
|
|
}
|
|
public static int strspn_fwd__space_or_tab(byte[] src, int bgn, int max, int src_len) {
|
|
if (max == -1) max = src_len;
|
|
int rv = 0;
|
|
for (int i = bgn; i < src_len; i++) {
|
|
switch (src[i]) {
|
|
case Byte_ascii.Space:
|
|
case Byte_ascii.Tab:
|
|
if (rv < max) {
|
|
rv++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
return rv;
|
|
}
|
|
public static int strspn_bwd__byte(byte[] src, byte find, int bgn, int max) {
|
|
if (max == -1) max = Int_.Max_value;
|
|
int rv = 0;
|
|
for (int i = bgn - 1; i > -1; i--) {
|
|
if (find == src[i] && rv < max)
|
|
rv++;
|
|
else
|
|
break;
|
|
}
|
|
return rv;
|
|
}
|
|
public static int strspn_bwd__ary(byte[] src, boolean[] find, int bgn, int max) {
|
|
if (max == -1) max = Int_.Max_value;
|
|
int rv = 0;
|
|
for (int i = bgn - 1; i > -1; i--) {
|
|
if (find[src[i & 0xFF]] && rv < max) // PATCH.JAVA:need to convert to unsigned byte
|
|
rv++;
|
|
else
|
|
break;
|
|
}
|
|
return rv;
|
|
}
|
|
public static int strspn_bwd__space_or_tab(byte[] src, int bgn, int max) {
|
|
if (max == -1) max = Int_.Max_value;
|
|
int rv = 0;
|
|
for (int i = bgn - 1; i > -1; i--) {
|
|
switch (src[i]) {
|
|
case Byte_ascii.Space:
|
|
case Byte_ascii.Tab:
|
|
if (rv < max) {
|
|
rv++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
return rv;
|
|
}
|
|
public static byte[] strtr(byte[] src, Btrie_slim_mgr trie, Bry_bfr tmp, Btrie_rv trv) {
|
|
boolean dirty = false;
|
|
int src_bgn = 0;
|
|
int src_end = src.length;
|
|
int i = src_bgn;
|
|
|
|
while (true) {
|
|
if (i == src_end) break;
|
|
byte b = src[i];
|
|
Object o = trie.Match_at_w_b0(trv, b, src, i, src_end);
|
|
if (o == null) {
|
|
if (dirty) {
|
|
tmp.Add_byte(b);
|
|
}
|
|
i++;
|
|
}
|
|
else {
|
|
if (!dirty) {
|
|
dirty = true;
|
|
tmp.Add_mid(src, 0, i);
|
|
}
|
|
tmp.Add((byte[])o);
|
|
i = trv.Pos();
|
|
}
|
|
}
|
|
return dirty ? tmp.To_bry_and_clear() : src;
|
|
}
|
|
public static byte[] strtr(byte[] src, byte find, byte repl) {
|
|
return Bry_.Replace(src, 0, src.length, find, repl);
|
|
}
|
|
public static byte[] str_replace(byte find, byte repl, byte[] src) {
|
|
return Bry_.Replace(src, 0, src.length, find, repl);
|
|
}
|
|
public static byte[] str_replace(byte[] find, byte[] repl, byte[] src) {
|
|
return Bry_.Replace(src, find, repl);
|
|
}
|
|
public static byte[] strstr(byte[] src, byte[] find) {
|
|
int pos = Bry_find_.Find_fwd(src, find);
|
|
return pos == Bry_find_.Not_found ? null : Bry_.Mid(src, pos, src.length);
|
|
}
|
|
public static int strlen(String src) {return String_.Len(src);}
|
|
public static int strlen(byte[] src) {return src.length;}
|
|
|
|
// REF.PHP: https://www.php.net/manual/en/function.rtrim.php
|
|
private static final Hash_adp trim_ws_hash = Hash_adp_.New().Add_many_as_key_and_val
|
|
( Int_obj_ref.New(Byte_ascii.Space)
|
|
, Int_obj_ref.New(Byte_ascii.Tab)
|
|
, Int_obj_ref.New(Byte_ascii.Nl)
|
|
, Int_obj_ref.New(Byte_ascii.Cr)
|
|
, Int_obj_ref.New(Byte_ascii.Null)
|
|
, Int_obj_ref.New(Byte_ascii.Vertical_tab)
|
|
);
|
|
public static String rtrim(String src) {return rtrim(src, null);}
|
|
public static String rtrim(String src_str, String pad_str) {
|
|
Hash_adp pad_hash = null;
|
|
if (pad_str == null) pad_hash = trim_ws_hash;
|
|
|
|
// init brys / lens
|
|
byte[] src_bry = Bry_.new_u8(src_str);
|
|
int src_len = src_bry.length;
|
|
byte[] pad_bry = Bry_.new_u8(pad_str);
|
|
int pad_len = pad_bry.length;
|
|
|
|
// ----------------------
|
|
// 0, 1 chars (optimized)
|
|
// ----------------------
|
|
int last = 0;
|
|
switch (pad_len) {
|
|
// pad is ""
|
|
case 0:
|
|
return src_str;
|
|
// pad is 1 char
|
|
case 1:
|
|
last = src_len;
|
|
byte pad_byte = pad_bry[0];
|
|
for (int i = src_len - 1; i > -1; i--) {
|
|
byte cur = src_bry[i];
|
|
last = i + 1;
|
|
if (cur != pad_byte) {
|
|
break;
|
|
}
|
|
}
|
|
return (last == src_len) ? src_str : String_.new_u8(Bry_.Mid(src_bry, 0, last));
|
|
}
|
|
|
|
// --------
|
|
// 2+ chars
|
|
// --------
|
|
// create pad_hash if not ws_hash
|
|
// NOTE: PHP does not support multibyte strings; see TEST
|
|
if (pad_hash == null) {
|
|
pad_hash = Hash_adp_.New();
|
|
byte prv_byte = Byte_.Zero;
|
|
for (int i = 0; i < pad_len; i++) {
|
|
byte pad_byte = pad_bry[i];
|
|
if (pad_byte == Byte_ascii.Dot && i < pad_len - 1) {
|
|
byte nxt_byte = pad_bry[i + 1];
|
|
if (nxt_byte == Byte_ascii.Dot) {
|
|
if (i == 0) {
|
|
throw new XophpError(".. found but at start of String; src=" + pad_str);
|
|
}
|
|
else if (i == pad_len - 2) {
|
|
throw new XophpError(".. found but at end of String; src=" + pad_str);
|
|
}
|
|
else {
|
|
nxt_byte = pad_bry[i + 2];
|
|
if (nxt_byte > prv_byte) {
|
|
for (byte j = prv_byte; j < nxt_byte; j++) {
|
|
Byte_obj_ref rng_obj = Byte_obj_ref.new_(j);
|
|
if (!pad_hash.Has(rng_obj))
|
|
pad_hash.Add_as_key_and_val(rng_obj);
|
|
}
|
|
i += 2;
|
|
continue;
|
|
}
|
|
else {
|
|
throw new XophpError(".. found but next byte must be greater than previous byte; src=" + pad_str);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
prv_byte = pad_byte;
|
|
Byte_obj_ref pad_obj = Byte_obj_ref.new_(pad_byte);
|
|
if (!pad_hash.Has(pad_obj))
|
|
pad_hash.Add_as_key_and_val(pad_obj);
|
|
}
|
|
}
|
|
|
|
// loop src until non-matching pad int
|
|
Byte_obj_ref temp = Byte_obj_ref.zero_();
|
|
last = src_len;
|
|
for (int i = src_len - 1; i > -1; i--) {
|
|
temp.Val_(src_bry[i]);
|
|
last = i + 1;
|
|
if (!pad_hash.Has(temp)) {
|
|
break;
|
|
}
|
|
}
|
|
return (last == src_len) ? src_str : String_.new_u8(Bry_.Mid(src_bry, 0, last));
|
|
}
|
|
public static String str_repeat(String val, int count) {
|
|
int val_len = String_.Len(val);
|
|
int chry_len = val_len * count;
|
|
char[] chry = new char[chry_len];
|
|
for (int i = 0; i < count; i++) {
|
|
for (int j = 0; j < val_len; j++) {
|
|
chry[(i * val_len) + j] = String_.CharAt(val, j);
|
|
}
|
|
}
|
|
return String_.new_charAry_(chry, 0, chry_len);
|
|
}
|
|
public static boolean is_string(Object o) {
|
|
return String_.as_(o) != null;
|
|
}
|
|
|
|
// REF.PHP: https://www.php.net/manual/en/function.strtoupper.php
|
|
public static String strtoupper(String s) {
|
|
return String_.Upper(s);
|
|
}
|
|
public static String strtolower(String s) {
|
|
return String_.Lower(s);
|
|
}
|
|
// REF.PHP: https://www.php.net/manual/en/function.ord.php
|
|
public static int ord(String s) {
|
|
return String_.Len_eq_0(s) ? 0 : Char_.To_int(String_.CharAt(s, 0));
|
|
}
|
|
public static String[] explode(String delimiter, String str) {
|
|
return String_.Split(str, delimiter);
|
|
}
|
|
// NOTE: support simple syntax only
|
|
// REF.PHP: https://www.php.net/manual/en/language.types.String.php#language.types.String.parsing
|
|
public static String Fmt(String fmt_str, Object... args) {
|
|
byte[] fmt = Bry_.new_u8(fmt_str);
|
|
int len = fmt.length;
|
|
Bry_bfr bfr = Bry_bfr_.New();
|
|
int pos = 0;
|
|
int arg_idx = 0;
|
|
while (pos < len) {
|
|
// find next $
|
|
int dollar_pos = Bry_find_.Find_fwd(fmt, Byte_ascii.Dollar, pos);
|
|
|
|
// no more $
|
|
if (dollar_pos == Bry_find_.Not_found) {
|
|
// add rest of fmt
|
|
bfr.Add_mid(fmt, pos, len);
|
|
break;
|
|
}
|
|
|
|
int key_bgn = dollar_pos + 1;
|
|
// if $ at end, then just add it literally; also bound-check
|
|
if (key_bgn == len) {
|
|
bfr.Add_mid(fmt, pos, len);
|
|
break;
|
|
}
|
|
|
|
int key_end = len;
|
|
byte key_bgn_byte = fmt[key_bgn];
|
|
// if { after $, then search forward for }
|
|
if (key_bgn_byte == Byte_ascii.Curly_bgn) {
|
|
key_end = Bry_find_.Find_fwd(fmt, Byte_ascii.Curly_end, key_bgn + 1, len);
|
|
|
|
// no } found; fail; EX: $b = 'z'; echo("a${b");
|
|
if (key_end == Bry_find_.Not_found) {
|
|
throw Err_.new_wo_type("invalid fmt; fmt=" + fmt);
|
|
}
|
|
|
|
// skip past "}"
|
|
key_end++;
|
|
}
|
|
// no "{"
|
|
else {
|
|
// search forward according to regex; ^[a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*$; REF.PHP: https://www.php.net/manual/en/language.variables.basics.php
|
|
for (int i = key_bgn; i < key_end; i++) {
|
|
byte key_cur = fmt[i];
|
|
if (!Is_identifier_char(key_cur, i == key_bgn)) {
|
|
key_end = i;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// invalid key; EX: $0
|
|
if (key_bgn == key_end) {
|
|
bfr.Add_mid(fmt, pos, key_bgn);
|
|
pos = key_bgn;
|
|
continue;
|
|
}
|
|
|
|
// valid key; add everything before key_bgn
|
|
bfr.Add_mid(fmt, pos, dollar_pos);
|
|
|
|
// add arg_idx
|
|
bfr.Add_str_u8(Object_.Xto_str_strict_or_empty(args[arg_idx++]));
|
|
|
|
// update pos
|
|
pos = key_end;
|
|
}
|
|
return bfr.To_str_and_clear();
|
|
}
|
|
private static boolean Is_identifier_char(byte b, boolean is_first) {
|
|
switch (b) {
|
|
// alpha and _ is always valid
|
|
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
|
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
|
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
|
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
|
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
|
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
|
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
|
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
|
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
|
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
|
case Byte_ascii.Underline:
|
|
return true;
|
|
// number is only valid if !is_first
|
|
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
|
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
|
return !is_first;
|
|
default:
|
|
// \x80-\xff is always true;
|
|
return b >= 128 && b <= 255;
|
|
}
|
|
}
|
|
public Object Callback(String method, Object... args) {
|
|
if (String_.Eq(method, "strtoupper")) {
|
|
String val = (String)args[0];
|
|
return strtoupper(val);
|
|
}
|
|
else {
|
|
throw Err_.new_unhandled_default(method);
|
|
}
|
|
}
|
|
public static final XophpCallbackOwner Callback_owner = new XophpString_();
|
|
}
|