1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-06-21 23:25:42 -04:00
parent fe0ce6340d
commit bf44bcf3c6
191 changed files with 1347 additions and 430 deletions

View File

@@ -0,0 +1,155 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.brys; import gplx.*; import gplx.core.*;
public class Bry_rdr {
public byte[] Src() {return src;} protected byte[] src;
public int Src_len() {return src_len;} protected int src_len;
public void Init(byte[] src) {this.Init(src, 0);}
public void Init(byte[] src, int pos) {
this.src = src; this.src_len = src.length; this.pos = pos;
}
public int Pos() {return pos;} public Bry_rdr Pos_(int v) {this.pos = v; return this;} protected int pos;
public void Pos_add(int v) {pos += v;}
public boolean Pos_is_eos() {return pos == src_len;}
public void Pos_add_one() {++pos;}
public int Or_int() {return or_int;} public void Or_int_(int v) {or_int = v;} private int or_int = Int_.MinValue;
public byte[] Or_bry() {return or_bry;} public void Or_bry_(byte[] v) {or_bry = v;} private byte[] or_bry;
public int Find_fwd(byte find) {return Bry_finder.Find_fwd(src, find, pos);}
public int Find_fwd_ws() {return Bry_finder.Find_fwd_until_ws(src, pos, src_len);}
public int Find_fwd__pos_at_lhs(byte[] find_bry) {return Find_fwd__pos_at(find_bry, Bool_.N);}
public int Find_fwd__pos_at_rhs(byte[] find_bry) {return Find_fwd__pos_at(find_bry, Bool_.Y);}
public int Find_fwd__pos_at(byte[] find_bry, boolean pos_at_rhs) {
int find_pos = Bry_finder.Find_fwd(src, find_bry, pos, src_len);
if (pos_at_rhs) find_pos += find_bry.length;
if (find_pos != Bry_finder.Not_found) pos = find_pos;
return find_pos;
}
public int Read_int_to_semic() {return Read_int_to(Byte_ascii.Semic);}
public int Read_int_to_comma() {return Read_int_to(Byte_ascii.Comma);}
public int Read_int_to_pipe() {return Read_int_to(Byte_ascii.Pipe);}
public int Read_int_to_nl() {return Read_int_to(Byte_ascii.NewLine);}
public int Read_int_to_quote() {return Read_int_to(Byte_ascii.Quote);}
public int Read_int_to_non_num(){return Read_int_to(Byte_ascii.Nil);}
public int Read_int_to(byte to_char) {
int bgn = pos;
int rv = 0;
int negative = 1;
while (pos < src_len) {
byte b = src[pos++];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
rv = (rv * 10) + (b - Byte_ascii.Num_0);
break;
case Byte_ascii.Dash:
if (negative == -1) // 2nd negative
return or_int; // return or_int
else // 1st negative
negative = -1; // flag negative
break;
default: {
boolean match = b == to_char;
if (to_char == Byte_ascii.Nil) {// hack for Read_int_to_non_num
--pos;
match = true;
}
return match ? rv * negative : or_int;
}
}
}
return bgn == pos ? or_int : rv * negative;
}
public byte[] Read_bry_to_nl() {return Read_bry_to(Byte_ascii.NewLine);}
public byte[] Read_bry_to_semic() {return Read_bry_to(Byte_ascii.Semic);}
public byte[] Read_bry_to_pipe() {return Read_bry_to(Byte_ascii.Pipe);}
public byte[] Read_bry_to_quote() {return Read_bry_to(Byte_ascii.Quote);}
public byte[] Read_bry_to_apos() {return Read_bry_to(Byte_ascii.Apos);}
public byte[] Read_bry_to(byte to_char) {
int bgn = pos;
while (pos < src_len) {
byte b = src[pos];
if (b == to_char)
return Bry_.Mid(src, bgn, pos++);
else
++pos;
}
return bgn == pos ? or_bry : Bry_.Mid(src, bgn, src_len);
}
public boolean Read_yn_to_pipe() {return Read_byte_to_pipe() == Byte_ascii.Ltr_y;}
public byte Read_byte_to_pipe() {
byte rv = src[pos];
pos += 2; // 1 for byte; 1 for pipe;
return rv;
}
public double Read_double_to_pipe() {return Read_double_to(Byte_ascii.Pipe);}
public double Read_double_to(byte to_char) {
byte[] double_bry = Read_bry_to(to_char);
return Double_.parse_(String_.new_a7(double_bry)); // double will never have utf8
}
@gplx.Virtual public Bry_rdr Skip_ws() {
while (pos < src_len) {
switch (src[pos]) {
case Byte_ascii.Tab: case Byte_ascii.NewLine: case Byte_ascii.CarriageReturn: case Byte_ascii.Space:
++pos;
break;
default:
return this;
}
}
return this;
}
public Bry_rdr Skip_alpha_num_under() {
while (pos < src_len) {
switch (src[pos]) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Underline:
++pos;
break;
default:
return this;
}
}
return this;
}
public void Chk_bry_or_fail(byte[] bry) {
int bry_len = bry.length;
boolean match = Bry_.Match(src, pos, pos + bry_len, bry);
if (match) pos += bry_len;
else throw Err_.new_("bry.rdr:chk failed; bry={0} pos={1}", bry, pos);
}
public void Chk_byte_or_fail(byte b) {
boolean match = pos < src_len ? src[pos] == b : false;
if (match) ++pos;
else throw Err_.new_("bry.rdr:chk failed; byte={0} pos={1}", b, pos);
}
public byte[] Mid_by_len_safe(int len) {
int end = pos + len; if (end > src_len) end = src_len;
return Bry_.Mid(src, pos, end);
}
}

View File

@@ -0,0 +1,88 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
import gplx.core.primitives.*;
public class Btrie_bwd_mgr {
public int Match_pos() {return match_pos;} private int match_pos;
public Object Match_exact(byte[] src, int bgn_pos, int end_pos) {
Object rv = Match(src[bgn_pos], src, bgn_pos, end_pos);
return rv == null ? null : match_pos - bgn_pos == end_pos - bgn_pos ? rv : null;
}
public Object Match_bgn(byte[] src, int bgn_pos, int end_pos) {return Match(src[bgn_pos], src, bgn_pos, end_pos);}
public Object Match(byte b, byte[] src, int bgn_pos, int end_pos) {
// NOTE: bgn, end follows same semantics as fwd where bgn >= & end < except reversed: bgn <= & end >; EX: "abcde" should pass 5, -1
Object rv = null; int cur_pos = match_pos = bgn_pos;
Btrie_slim_itm cur = root;
while (true) {
Btrie_slim_itm nxt = cur.Ary_find(b); if (nxt == null) return rv; // nxt does not hav b; return rv;
--cur_pos;
if (nxt.Ary_is_empty()) {match_pos = cur_pos; return nxt.Val();} // nxt is leaf; return nxt.Val() (which should be non-null)
Object nxt_val = nxt.Val();
if (nxt_val != null) {match_pos = cur_pos; rv = nxt_val;} // nxt is node; cache rv (in case of false match)
if (cur_pos == end_pos) return rv; // increment cur_pos and exit if src_len
b = src[cur_pos];
cur = nxt;
}
}
public Btrie_bwd_mgr Add_str_byte(String key, byte val) {return Add(Bry_.new_u8(key), Byte_obj_val.new_(val));}
public Btrie_bwd_mgr Add_byteVal_strAry(byte val, String... ary) {
int ary_len = ary.length;
Byte_obj_val byteVal = Byte_obj_val.new_(val);
for (int i = 0; i < ary_len; i++) {
String itm = ary[i];
Add(Bry_.new_u8(itm), byteVal);
}
return this;
}
public Btrie_bwd_mgr Add(String key, Object val) {return Add(Bry_.new_u8(key), val);}
public Btrie_bwd_mgr Add(byte[] key, Object val) {
if (val == null) throw Err_.new_("null objects cannot be registered").Add("key", String_.new_u8(key));
int key_len = key.length;
Btrie_slim_itm cur = root;
for (int i = key_len - 1; i > -1; i--) {
byte b = key[i];
if (root.Case_any() && (b > 64 && b < 91)) b += 32;
Btrie_slim_itm nxt = cur.Ary_find(b);
if (nxt == null)
nxt = cur.Ary_add(b, null);
if (i == 0)
nxt.Val_set(val);
cur = nxt;
}
count++; // FUTURE: do not increment if replacing value
return this;
}
public int Count() {return count;} private int count;
public void Del(byte[] key) {
int key_len = key.length;
Btrie_slim_itm cur = root;
for (int i = 0; i < key_len; i++) {
byte b = key[i];
cur = cur.Ary_find(b);
if (cur == null) break;
cur.Ary_del(b);
}
count--; // FUTURE: do not decrement if not found
}
public void Clear() {root.Clear(); count = 0;}
public static Btrie_bwd_mgr cs_() {return new Btrie_bwd_mgr(false);}
public static Btrie_bwd_mgr ci_() {return new Btrie_bwd_mgr(true);}
public Btrie_bwd_mgr(boolean caseAny) {
root = new Btrie_slim_itm(Byte_.Zero, null, caseAny);
} private Btrie_slim_itm root;
}

View File

@@ -0,0 +1,87 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
import org.junit.*;
public class Btrie_bwd_mgr_tst {
@Before public void init() {} private Btrie_bwd_mgr trie;
private void ini_setup1() {
trie = new Btrie_bwd_mgr(false);
run_Add("c" , 1);
run_Add("abc" , 123);
}
@Test public void Get_by() {
ini_setup1();
tst_MatchAtCur("c" , 1);
tst_MatchAtCur("abc" , 123);
tst_MatchAtCur("bc" , 1);
tst_MatchAtCur("yzabc" , 123);
tst_MatchAtCur("ab" , null);
}
@Test public void Fetch_intl() {
trie = new Btrie_bwd_mgr(false);
run_Add("a<EFBFBD>", 1);
tst_MatchAtCur("a<EFBFBD>" , 1);
tst_MatchAtCur("<EFBFBD>" , null);
}
@Test public void Eos() {
ini_setup1();
tst_Match("ab", Byte_ascii.Ltr_c, 2, 123);
}
@Test public void Match_exact() {
ini_setup1();
tst_MatchAtCurExact("c", 1);
tst_MatchAtCurExact("bc", null);
tst_MatchAtCurExact("abc", 123);
}
private void ini_setup2() {
trie = new Btrie_bwd_mgr(false);
run_Add("a" , 1);
run_Add("b" , 2);
}
@Test public void Match_2() {
ini_setup2();
tst_MatchAtCur("a", 1);
tst_MatchAtCur("b", 2);
}
private void ini_setup_caseAny() {
trie = Btrie_bwd_mgr.ci_();
run_Add("a" , 1);
run_Add("b" , 2);
}
@Test public void CaseAny() {
ini_setup_caseAny();
tst_MatchAtCur("a", 1);
tst_MatchAtCur("A", 1);
}
private void run_Add(String k, int val) {trie.Add(Bry_.new_u8(k), val);}
private void tst_Match(String srcStr, byte b, int bgn_pos, int expd) {
byte[] src = Bry_.new_u8(srcStr);
Object actl = trie.Match(b, src, bgn_pos, -1);
Tfds.Eq(expd, actl);
}
private void tst_MatchAtCur(String srcStr, Object expd) {
byte[] src = Bry_.new_u8(srcStr);
Object actl = trie.Match(src[src.length - 1], src, src.length - 1, -1);
Tfds.Eq(expd, actl);
}
private void tst_MatchAtCurExact(String srcStr, Object expd) {
byte[] src = Bry_.new_u8(srcStr);
Object actl = trie.Match_exact(src, src.length - 1, -1);
Tfds.Eq(expd, actl);
}
}

View File

@@ -0,0 +1,156 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
import gplx.core.primitives.*;
public class Btrie_fast_mgr {
private ByteTrieItm_fast root;
public boolean CaseAny() {return root.CaseAny();} public Btrie_fast_mgr CaseAny_(boolean v) {root.CaseAny_(v); return this;}
public int Match_pos() {return match_pos;} private int match_pos;
public Object Match_exact(byte[] src, int bgn_pos, int end_pos) {
Object rv = Match_bgn_w_byte(src[bgn_pos], src, bgn_pos, end_pos);
return rv == null ? null : match_pos - bgn_pos == end_pos - bgn_pos ? rv : null;
}
public Object Match_bgn(byte[] src, int bgn_pos, int end_pos) {return Match_bgn_w_byte(src[bgn_pos], src, bgn_pos, end_pos);}
public Object Match_bgn_w_byte(byte b, byte[] src, int bgn_pos, int src_len) {
match_pos = bgn_pos;
ByteTrieItm_fast nxt = root.Ary_find(b); if (nxt == null) return null; // nxt does not have b; return rv;
Object rv = null; int cur_pos = bgn_pos + 1;
ByteTrieItm_fast cur = root;
while (true) {
if (nxt.Ary_is_empty()) {match_pos = cur_pos; return nxt.Val();} // nxt is leaf; return nxt.Val() (which should be non-null)
Object nxt_val = nxt.Val();
if (nxt_val != null) {match_pos = cur_pos; rv = nxt_val;} // nxt is node; cache rv (in case of false match)
if (cur_pos == src_len) return rv; // eos; exit
b = src[cur_pos];
cur = nxt;
nxt = cur.Ary_find(b); if (nxt == null) return rv;
++cur_pos;
}
}
public Btrie_fast_mgr Add_bry_bval(byte key, byte val) {return Add(new byte[] {key}, Byte_obj_val.new_(val));}
public Btrie_fast_mgr Add_bry_bval(byte[] key, byte val) {return Add(key, Byte_obj_val.new_(val));}
public Btrie_fast_mgr Add_str_byte(String key, byte val) {return Add(Bry_.new_u8(key), Byte_obj_val.new_(val));}
public Btrie_fast_mgr Add(byte key, Object val) {return Add(new byte[] {key}, val);}
public Btrie_fast_mgr Add(String key, Object val) {return Add(Bry_.new_u8(key), val);}
public Btrie_fast_mgr Add(byte[] key, Object val) {
if (val == null) throw Err_.new_("null objects cannot be registered").Add("key", String_.new_u8(key));
int key_len = key.length; int key_end = key_len - 1;
ByteTrieItm_fast cur = root;
for (int i = 0; i < key_len; i++) {
byte b = key[i];
ByteTrieItm_fast nxt = cur.Ary_find(b);
if (nxt == null)
nxt = cur.Ary_add(b, null);
if (i == key_end)
nxt.Val_set(val);
cur = nxt;
}
return this;
}
public Btrie_fast_mgr Add_stub(byte tid, String s) {
byte[] bry = Bry_.new_u8(s);
Btrie_itm_stub stub = new Btrie_itm_stub(tid, bry);
return Add(bry, stub);
}
public void Del(byte[] key) {
int key_len = key.length;
ByteTrieItm_fast cur = root;
for (int i = 0; i < key_len; i++) {
byte b = key[i];
Object itm_obj = cur.Ary_find(b);
if (itm_obj == null) break; // b not found; no match; exit;
ByteTrieItm_fast itm = (ByteTrieItm_fast)itm_obj;
if (i == key_len - 1) { // last char
if (itm.Val() == null) break; // itm does not have val; EX: trie with "abc", and "ab" deleted
if (itm.Ary_is_empty())
cur.Ary_del(b);
else
itm.Val_set(null);
}
else { // mid char; set itm as cur and continue
cur = itm;
}
}
}
public void Clear() {root.Clear();}
public byte[] Replace(Bry_bfr tmp_bfr, byte[] src, int bgn, int end) {
int pos = bgn;
boolean dirty = false;
while (pos < end) {
byte b = src[pos];
Object o = this.Match_bgn_w_byte(b, src, pos, end);
if (o == null) {
if (dirty)
tmp_bfr.Add_byte(b);
pos++;
}
else {
if (!dirty) {
tmp_bfr.Add_mid(src, bgn, pos);
dirty = true;
}
tmp_bfr.Add((byte[])o);
pos = match_pos;
}
}
return dirty ? tmp_bfr.Xto_bry_and_clear() : src;
}
public static Btrie_fast_mgr cs_() {return new Btrie_fast_mgr(false);}
public static Btrie_fast_mgr ci_ascii_() {return new Btrie_fast_mgr(true);}
public static Btrie_fast_mgr new_(boolean case_any) {return new Btrie_fast_mgr(case_any);}
Btrie_fast_mgr(boolean caseAny) {
root = new ByteTrieItm_fast(Byte_.Zero, null, caseAny);
}
}
class ByteTrieItm_fast {
private ByteTrieItm_fast[] ary = new ByteTrieItm_fast[256];
public byte Key_byte() {return key_byte;} private byte key_byte;
public Object Val() {return val;} public void Val_set(Object val) {this.val = val;} Object val;
public boolean Ary_is_empty() {return ary_is_empty;} private boolean ary_is_empty;
public boolean CaseAny() {return caseAny;} public ByteTrieItm_fast CaseAny_(boolean v) {caseAny = v; return this;} private boolean caseAny;
public void Clear() {
val = null;
for (int i = 0; i < 256; i++) {
if (ary[i] != null) {
ary[i].Clear();
ary[i] = null;
}
}
ary_len = 0;
ary_is_empty = true;
}
public ByteTrieItm_fast Ary_find(byte b) {
int key_byte = (caseAny && (b > 64 && b < 91) ? b + 32 : b) & 0xff;// PATCH.JAVA:need to convert to unsigned byte
return ary[key_byte];
}
public ByteTrieItm_fast Ary_add(byte b, Object val) {
int key_byte = (caseAny && (b > 64 && b < 91) ? b + 32 : b) & 0xff;// PATCH.JAVA:need to convert to unsigned byte
ByteTrieItm_fast rv = new ByteTrieItm_fast(b, val, caseAny);
ary[key_byte] = rv;
++ary_len;
ary_is_empty = false;
return rv;
}
public void Ary_del(byte b) {
int key_byte = (caseAny && (b > 64 && b < 91) ? b + 32 : b) & 0xff;// PATCH.JAVA:need to convert to unsigned byte
ary[key_byte] = null;
--ary_len;
ary_is_empty = ary_len == 0;
} int ary_len = 0;
public ByteTrieItm_fast(byte key_byte, Object val, boolean caseAny) {this.key_byte = key_byte; this.val = val; this.caseAny = caseAny;}
}

View File

@@ -0,0 +1,85 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
import org.junit.*;
public class Btrie_fast_mgr_tst {
private Btrie_fast_mgr_fxt fxt = new Btrie_fast_mgr_fxt();
@Before public void init() {fxt.Clear();}
@Test public void Get_by() {
fxt.Test_matchAtCur("a" , 1);
fxt.Test_matchAtCur("abc" , 123);
fxt.Test_matchAtCur("ab" , 1);
fxt.Test_matchAtCur("abcde" , 123);
fxt.Test_matchAtCur(" a" , null);
}
@Test public void Bos() {
fxt.Test_match("bc", Byte_ascii.Ltr_a, -1, 123);
}
@Test public void Match_exact() {
fxt.Test_matchAtCurExact("a", 1);
fxt.Test_matchAtCurExact("ab", null);
fxt.Test_matchAtCurExact("abc", 123);
}
@Test public void Del_noop__no_match() {
fxt.Exec_del("d");
fxt.Test_matchAtCurExact("a" , 1);
fxt.Test_matchAtCurExact("abc" , 123);
}
@Test public void Del_noop__partial_match() {
fxt.Exec_del("ab");
fxt.Test_matchAtCurExact("a" , 1);
fxt.Test_matchAtCurExact("abc" , 123);
}
@Test public void Del_match__long() {
fxt.Exec_del("abc");
fxt.Test_matchAtCurExact("a" , 1);
fxt.Test_matchAtCurExact("abc" , null);
}
@Test public void Del_match__short() {
fxt.Exec_del("a");
fxt.Test_matchAtCurExact("a" , null);
fxt.Test_matchAtCurExact("abc" , 123);
}
}
class Btrie_fast_mgr_fxt {
private Btrie_fast_mgr trie;
public void Clear() {
trie = Btrie_fast_mgr.cs_();
Init_add( 1 , Byte_ascii.Ltr_a);
Init_add(123 , Byte_ascii.Ltr_a, Byte_ascii.Ltr_b, Byte_ascii.Ltr_c);
}
public void Init_add(int val, byte... ary) {trie.Add(ary, val);}
public void Test_match(String src_str, byte b, int bgn_pos, int expd) {
byte[] src = Bry_.new_a7(src_str);
Object actl = trie.Match_bgn_w_byte(b, src, bgn_pos, src.length);
Tfds.Eq(expd, actl);
}
public void Test_matchAtCur(String src_str, Object expd) {
byte[] src = Bry_.new_a7(src_str);
Object actl = trie.Match_bgn(src, 0, src.length);
Tfds.Eq(expd, actl);
}
public void Test_matchAtCurExact(String src_str, Object expd) {
byte[] src = Bry_.new_a7(src_str);
Object actl = trie.Match_exact(src, 0, src.length);
Tfds.Eq(expd, actl);
}
public void Exec_del(String src_str) {
trie.Del(Bry_.new_u8(src_str));
}
}

View File

@@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
public class Btrie_itm_stub {
public Btrie_itm_stub(byte tid, byte[] val) {this.tid = tid; this.val = val;}
public byte Tid() {return tid;} private byte tid;
public byte[] Val() {return val;} private byte[] val;
}

View File

@@ -0,0 +1,24 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
public interface Btrie_mgr {
int Match_pos();
Object Match_bgn(byte[] src, int bgn_pos, int end_pos);
Btrie_mgr Add_obj(String key, Object val);
Btrie_mgr Add_obj(byte[] key, Object val);
}

View File

@@ -0,0 +1,130 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
public class Btrie_slim_itm {
private Btrie_slim_itm[] ary = Btrie_slim_itm.Ary_empty;
public Btrie_slim_itm(byte key_byte, Object val, boolean case_any) {this.key_byte = key_byte; this.val = val; this.case_any = case_any;}
public byte Key_byte() {return key_byte;} private byte key_byte;
public Object Val() {return val;} public void Val_set(Object val) {this.val = val;} private Object val;
public boolean Case_any() {return case_any;} private boolean case_any;
public boolean Ary_is_empty() {return ary == Btrie_slim_itm.Ary_empty;}
public void Clear() {
val = null;
for (int i = 0; i < ary_len; i++)
ary[i].Clear();
ary = Btrie_slim_itm.Ary_empty;
ary_len = ary_max = 0;
}
public Btrie_slim_itm Ary_find(byte b) {
int find_val = (case_any && (b > 64 && b < 91) ? b + 32 : b) & 0xff;// PATCH.JAVA:need to convert to unsigned byte
int key_val = 0;
switch (ary_len) {
case 0: return null;
case 1:
Btrie_slim_itm rv = ary[0];
key_val = rv.Key_byte() & 0xff;// PATCH.JAVA:need to convert to unsigned byte;
key_val = (case_any && (key_val > 64 && key_val < 91) ? key_val + 32 : key_val);
return key_val == find_val ? rv : null;
default:
int adj = 1;
int prv_pos = 0;
int prv_len = ary_len;
int cur_len = 0;
int cur_idx = 0;
Btrie_slim_itm itm = null;
while (true) {
cur_len = prv_len / 2;
if (prv_len % 2 == 1) ++cur_len;
cur_idx = prv_pos + (cur_len * adj);
if (cur_idx < 0) cur_idx = 0;
else if (cur_idx >= ary_len) cur_idx = ary_len - 1;
itm = ary[cur_idx];
key_val = itm.Key_byte() & 0xff; // PATCH.JAVA:need to convert to unsigned byte;
key_val = (case_any && (key_val > 64 && key_val < 91) ? key_val + 32 : key_val);
if (find_val < key_val) adj = -1;
else if (find_val > key_val) adj = 1;
else /*(find_val == cur_val)*/ return itm;
if (cur_len == 1) {
cur_idx += adj;
if (cur_idx < 0 || cur_idx >= ary_len) return null;
itm = ary[cur_idx];
return (itm.Key_byte() & 0xff) == find_val ? itm : null; // PATCH.JAVA:need to convert to unsigned byte;
}
prv_len = cur_len;
prv_pos = cur_idx;
}
}
}
public Btrie_slim_itm Ary_add(byte b, Object val) {
int new_len = ary_len + 1;
if (new_len > ary_max) {
ary_max += 4;
ary = (Btrie_slim_itm[])Array_.Resize(ary, ary_max);
}
Btrie_slim_itm rv = new Btrie_slim_itm(b, val, case_any);
ary[ary_len] = rv;
ary_len = new_len;
ByteHashItm_sorter._.Sort(ary, ary_len);
return rv;
}
public void Ary_del(byte b) {
boolean found = false;
for (int i = 0; i < ary_len; i++) {
if (found) {
if (i < ary_len - 1)
ary[i] = ary[i + 1];
}
else {
if (b == ary[i].Key_byte()) found = true;
}
}
if (found) --ary_len;
}
public static final Btrie_slim_itm[] Ary_empty = new Btrie_slim_itm[0]; int ary_len = 0, ary_max = 0;
}
class ByteHashItm_sorter {// quicksort
Btrie_slim_itm[] ary; int ary_len;
public void Sort(Btrie_slim_itm[] ary, int ary_len) {
if (ary == null || ary_len < 2) return;
this.ary = ary;
this.ary_len = ary_len;
Sort_recurse(0, ary_len - 1);
}
private void Sort_recurse(int lo, int hi) {
int i = lo, j = hi;
int mid = ary[lo + (hi-lo)/2].Key_byte()& 0xFF; // get mid itm
while (i <= j) { // divide into two lists
while ((ary[i].Key_byte() & 0xFF) < mid) // if lhs.cur < mid, then get next from lhs
i++;
while ((ary[j].Key_byte() & 0xFF) > mid) // if rhs.cur > mid, then get next from rhs
j--;
// lhs.cur > mid && rhs.cur < mid; switch lhs.cur and rhs.cur; increase i and j
if (i <= j) {
Btrie_slim_itm tmp = ary[i];
ary[i] = ary[j];
ary[j] = tmp;
i++;
j--;
}
}
if (lo < j) Sort_recurse(lo, j);
if (i < hi) Sort_recurse(i, hi);
}
public static final ByteHashItm_sorter _ = new ByteHashItm_sorter(); ByteHashItm_sorter() {}
}

View File

@@ -0,0 +1,49 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
import org.junit.*;
public class Btrie_slim_itm_tst {
private Btrie_slim_itm itm = new Btrie_slim_itm(Byte_.Zero, null, false);
@Before public void init() {itm.Clear();}
@Test public void Find_nil() {
tst_Find(Byte_ascii.Ltr_a, null);
}
@Test public void Add_one() {
run_Add(Byte_ascii.Ltr_a);
tst_Find(Byte_ascii.Ltr_a, "a");
}
@Test public void Add_many() {
run_Add(Byte_ascii.Bang, Byte_ascii.Num_0, Byte_ascii.Ltr_a, Byte_ascii.Ltr_B);
tst_Find(Byte_ascii.Ltr_a, "a");
}
@Test public void Del() {
run_Add(Byte_ascii.Bang, Byte_ascii.Num_0, Byte_ascii.Ltr_a, Byte_ascii.Ltr_B);
tst_Find(Byte_ascii.Ltr_a, "a");
run_Del(Byte_ascii.Ltr_a);
tst_Find(Byte_ascii.Ltr_a, null);
tst_Find(Byte_ascii.Num_0, "0");
tst_Find(Byte_ascii.Ltr_B, "B");
}
private void tst_Find(byte b, String expd) {
Btrie_slim_itm actl_itm = itm.Ary_find(b);
Object actl = actl_itm == null ? null : actl_itm.Val();
Tfds.Eq(expd, actl);
}
private void run_Add(byte... ary) {for (byte b : ary) itm.Ary_add(b, Char_.XtoStr((char)b));}
private void run_Del(byte... ary) {for (byte b : ary) itm.Ary_del(b);}
}

View File

@@ -0,0 +1,128 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
import gplx.core.primitives.*;
public class Btrie_slim_mgr implements Btrie_mgr {
Btrie_slim_mgr(boolean case_match) {root = new Btrie_slim_itm(Byte_.Zero, null, !case_match);} private Btrie_slim_itm root;
public int Count() {return count;} private int count;
public int Match_pos() {return match_pos;} private int match_pos;
public Object Match_exact(byte[] src, int bgn_pos, int end_pos) {
Object rv = Match_bgn_w_byte(src[bgn_pos], src, bgn_pos, end_pos);
return rv == null ? null : match_pos - bgn_pos == end_pos - bgn_pos ? rv : null;
}
public Object Match_bgn(byte[] src, int bgn_pos, int end_pos) {return Match_bgn_w_byte(src[bgn_pos], src, bgn_pos, end_pos);}
public Object Match_bgn_w_byte(byte b, byte[] src, int bgn_pos, int src_len) {
Object rv = null; int cur_pos = match_pos = bgn_pos;
Btrie_slim_itm cur = root;
while (true) {
Btrie_slim_itm nxt = cur.Ary_find(b); if (nxt == null) return rv; // nxt does not hav b; return rv;
++cur_pos;
if (nxt.Ary_is_empty()) {match_pos = cur_pos; return nxt.Val();} // nxt is leaf; return nxt.Val() (which should be non-null)
Object nxt_val = nxt.Val();
if (nxt_val != null) {match_pos = cur_pos; rv = nxt_val;} // nxt is node; cache rv (in case of false match)
if (cur_pos == src_len) return rv; // increment cur_pos and exit if src_len
b = src[cur_pos];
cur = nxt;
}
}
public Btrie_slim_mgr Add_bry_tid(byte[] bry, byte tid) {return (Btrie_slim_mgr)Add_obj(bry, Byte_obj_val.new_(tid));}
public Btrie_slim_mgr Add_str_byte(String key, byte val) {return (Btrie_slim_mgr)Add_obj(Bry_.new_u8(key), Byte_obj_val.new_(val));}
public Btrie_slim_mgr Add_str_int(String key, int val) {return (Btrie_slim_mgr)Add_obj(Bry_.new_u8(key), Int_obj_val.new_(val));}
public Btrie_slim_mgr Add_bry(String key, String val) {return (Btrie_slim_mgr)Add_obj(Bry_.new_u8(key), Bry_.new_u8(val));}
public Btrie_slim_mgr Add_bry(String key, byte[] val) {return (Btrie_slim_mgr)Add_obj(Bry_.new_u8(key), val);}
public Btrie_slim_mgr Add_bry(byte[] v) {return (Btrie_slim_mgr)Add_obj(v, v);}
public Btrie_slim_mgr Add_bry_bval(byte b, byte val) {return (Btrie_slim_mgr)Add_obj(new byte[] {b}, Byte_obj_val.new_(val));}
public Btrie_slim_mgr Add_bry_bval(byte[] bry, byte val) {return (Btrie_slim_mgr)Add_obj(bry, Byte_obj_val.new_(val));}
public Btrie_slim_mgr Add_str_byte__many(byte val, String... ary) {
int ary_len = ary.length;
Byte_obj_val bval = Byte_obj_val.new_(val);
for (int i = 0; i < ary_len; i++)
Add_obj(Bry_.new_u8(ary[i]), bval);
return this;
}
public Btrie_slim_mgr Add_stub(String key, byte val) {byte[] bry = Bry_.new_u8(key); return (Btrie_slim_mgr)Add_obj(bry, new Btrie_itm_stub(val, bry));}
public Btrie_slim_mgr Add_stubs(byte[][] ary) {return Add_stubs(ary, ary.length);}
public Btrie_slim_mgr Add_stubs(byte[][] ary, int ary_len) {
for (byte i = 0; i < ary_len; i++) {
byte[] bry = ary[i];
Add_obj(bry, new Btrie_itm_stub(i, bry));
}
return this;
}
public Btrie_mgr Add_obj(String key, Object val) {return Add_obj(Bry_.new_u8(key), val);}
public Btrie_mgr Add_obj(byte[] key, Object val) {
if (val == null) throw Err_.new_("null objects cannot be registered").Add("key", String_.new_u8(key));
int key_len = key.length; int key_end = key_len - 1;
Btrie_slim_itm cur = root;
for (int i = 0; i < key_len; i++) {
byte b = key[i];
if (root.Case_any() && (b > 64 && b < 91)) b += 32;
Btrie_slim_itm nxt = cur.Ary_find(b);
if (nxt == null)
nxt = cur.Ary_add(b, null);
if (i == key_end)
nxt.Val_set(val);
cur = nxt;
}
count++; // FUTURE: do not increment if replacing value
return this;
}
public void Del(byte[] key) {
int key_len = key.length;
Btrie_slim_itm cur = root;
for (int i = 0; i < key_len; i++) {
byte b = key[i];
Btrie_slim_itm nxt = cur.Ary_find(b);
if (nxt == null) break;
Object nxt_val = nxt.Val();
if (nxt_val == null) // cur is end of chain; remove entry; EX: Abc and at c
cur.Ary_del(b);
else // cur is mid of chain; null out entry
nxt.Val_set(null);
cur = nxt;
}
count--; // FUTURE: do not decrement if not found
}
public byte[] Replace(Bry_bfr tmp_bfr, byte[] src, int bgn, int end) {
int pos = bgn;
boolean dirty = false;
while (pos < end) {
byte b = src[pos];
Object o = this.Match_bgn_w_byte(b, src, pos, end);
if (o == null) {
if (dirty)
tmp_bfr.Add_byte(b);
pos++;
}
else {
if (!dirty) {
tmp_bfr.Add_mid(src, bgn, pos);
dirty = true;
}
tmp_bfr.Add((byte[])o);
pos = match_pos;
}
}
return dirty ? tmp_bfr.Xto_bry_and_clear() : src;
}
public void Clear() {root.Clear(); count = 0;}
public static Btrie_slim_mgr cs_() {return new Btrie_slim_mgr(true);}
public static Btrie_slim_mgr ci_ascii_() {return new Btrie_slim_mgr(false);}
public static Btrie_slim_mgr ci_utf_8_() {return new Btrie_slim_mgr(false);}
public static Btrie_slim_mgr new_(boolean v) {return new Btrie_slim_mgr(v);}
}

View File

@@ -0,0 +1,92 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
import org.junit.*;
public class Btrie_slim_mgr_tst {
@Before public void init() {
} private Btrie_slim_mgr trie;
private void ini_setup1() {
trie = Btrie_slim_mgr.cs_();
run_Add("a" , 1);
run_Add("abc" , 123);
}
@Test public void Get_by() {
ini_setup1();
tst_MatchAtCur("a" , 1);
tst_MatchAtCur("abc" , 123);
tst_MatchAtCur("ab" , 1);
tst_MatchAtCur("abcde" , 123);
tst_MatchAtCur(" a" , null);
}
@Test public void Bos() {
ini_setup1();
tst_Match("bc", Byte_ascii.Ltr_a, -1, 123);
}
@Test public void Match_exact() {
ini_setup1();
tst_MatchAtCurExact("a", 1);
tst_MatchAtCurExact("ab", null);
tst_MatchAtCurExact("abc", 123);
}
private void ini_setup2() {
trie = Btrie_slim_mgr.cs_();
run_Add("a" , 1);
run_Add("b" , 2);
}
@Test public void Match_2() {
ini_setup2();
tst_MatchAtCur("a", 1);
tst_MatchAtCur("b", 2);
}
private void ini_setup_caseAny() {
trie = Btrie_slim_mgr.ci_ascii_(); // NOTE:ci.ascii:test
run_Add("a" , 1);
run_Add("b" , 2);
}
@Test public void CaseAny() {
ini_setup_caseAny();
tst_MatchAtCur("a", 1);
tst_MatchAtCur("A", 1);
}
@Test public void Del() {
ini_setup1();
trie.Del(Bry_.new_a7("a")); // delete "a"; "abc" still remains;
tst_MatchAtCur("a" , null);
tst_MatchAtCur("abc" , 123);
trie.Del(Bry_.new_a7("abc"));
tst_MatchAtCur("abc" , null);
}
private void run_Add(String k, int val) {trie.Add_obj(Bry_.new_a7(k), val);}
private void tst_Match(String srcStr, byte b, int bgn_pos, int expd) {
byte[] src = Bry_.new_a7(srcStr);
Object actl = trie.Match_bgn_w_byte(b, src, bgn_pos, src.length);
Tfds.Eq(expd, actl);
}
private void tst_MatchAtCur(String srcStr, Object expd) {
byte[] src = Bry_.new_a7(srcStr);
Object actl = trie.Match_bgn_w_byte(src[0], src, 0, src.length);
Tfds.Eq(expd, actl);
}
private void tst_MatchAtCurExact(String srcStr, Object expd) {
byte[] src = Bry_.new_a7(srcStr);
Object actl = trie.Match_exact(src, 0, src.length);
Tfds.Eq(expd, actl);
}
}

View File

@@ -0,0 +1,68 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
import gplx.intl.*;
class Btrie_utf8_itm {
private Hash_adp_bry nxts;
private byte[] asymmetric_bry;
public Btrie_utf8_itm(byte[] key, Object val) {this.key = key; this.val = val;}
public byte[] Key() {return key;} private byte[] key;
public Object Val() {return val;} public void Val_set(Object val) {this.val = val;} private Object val;
public boolean Nxts_is_empty() {return nxts == null;}
public void Clear() {
val = null;
nxts.Clear();
nxts = null;
}
public Btrie_utf8_itm Nxts_find(byte[] src, int c_bgn, int c_end, boolean called_by_match) {
if (nxts == null) return null;
Object rv_obj = nxts.Get_by_mid(src, c_bgn, c_end);
if (rv_obj == null) return null;
Btrie_utf8_itm rv = (Btrie_utf8_itm)rv_obj;
byte[] asymmetric_bry = rv.asymmetric_bry;
if (asymmetric_bry == null) // itm doesn't have asymmetric_bry; note that this is the case for most items
return rv;
else { // itm has asymmetric_bry; EX: "İ" was added to trie, must match "İ" and "i";
if (called_by_match) { // called by mgr.Match
return
( Bry_.Eq(rv.key, src, c_bgn, c_end) // key matches src; EX: "aİ"
|| Bry_.Eq(rv.asymmetric_bry, src, c_bgn, c_end) // asymmetric_bry matches src; EX: "ai"; note that "aI" won't match
)
? rv : null;
}
else { // called by mgr.Add; this means that an asymmetric_itm was already added; happens when "İ" added first and then "I" added next
rv.asymmetric_bry = null; // always null out asymmetric_bry; note that this noops non-asymmetric itms, while making an asymmetric_itm case-insenstivie (matches İ,i,I); see tests
return rv;
}
}
}
public Btrie_utf8_itm Nxts_add(Gfo_case_mgr case_mgr, byte[] key, Object val) {
Btrie_utf8_itm rv = new Btrie_utf8_itm(key, val);
if (nxts == null) nxts = Hash_adp_bry.ci_utf8_(case_mgr);
nxts.Add_bry_obj(key, rv);
Gfo_case_itm case_itm = case_mgr.Get_or_null(key[0], key, 0, key.length); // get case_item
if (case_itm != null) { // note that case_itm may be null; EX: "__TOC__" and "_"
byte[] asymmetric_bry = case_itm.Asymmetric_bry();
if (asymmetric_bry != null) { // case_itm has asymmetry_bry; only itms in Xol_case_itm_ that are created with Tid_upper and Tid_lower will be non-null
rv.asymmetric_bry = asymmetric_bry; // set itm to asymmetric_bry; EX: for İ, asymmetric_bry = i
nxts.Add_bry_obj(asymmetric_bry, rv); // add the asymmetric_bry to the hash; in above example, this allows "i" to match "İ"
}
}
return rv;
}
}

View File

@@ -0,0 +1,68 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.btries; import gplx.*; import gplx.core.*;
import gplx.intl.*;
public class Btrie_utf8_mgr implements Btrie_mgr {
private Btrie_utf8_itm root; private Gfo_case_mgr case_mgr;
Btrie_utf8_mgr(Gfo_case_mgr case_mgr) {
this.case_mgr = case_mgr;
this.root = new Btrie_utf8_itm(Bry_.Empty, null);
}
public int Count() {return count;} private int count;
public int Match_pos() {return match_pos;} private int match_pos;
public Object Match_bgn(byte[] src, int bgn_pos, int end_pos) {return Match_bgn_w_byte(src[bgn_pos], src, bgn_pos, end_pos);}
public Object Match_bgn_w_byte(byte b, byte[] src, int bgn_pos, int end_pos) {
Object rv = null; int cur_pos = match_pos = bgn_pos;
Btrie_utf8_itm cur = root;
while (true) {
int c_len = Utf8_.Len_of_char_by_1st_byte(b);
int c_end = cur_pos + c_len;
Btrie_utf8_itm nxt = cur.Nxts_find(src, cur_pos, c_end, true); if (nxt == null) return rv; // nxts does not have key; return rv;
cur_pos = c_end;
if (nxt.Nxts_is_empty()) {match_pos = cur_pos; return nxt.Val();} // nxt is leaf; return nxt.Val() (which should be non-null)
Object nxt_val = nxt.Val();
if (nxt_val != null) {match_pos = cur_pos; rv = nxt_val;} // nxt is node; cache rv (in case of false match)
if (cur_pos == end_pos) return rv; // increment cur_pos and exit if end
b = src[cur_pos];
cur = nxt;
}
}
public void Clear() {root.Clear(); count = 0;}
public Btrie_mgr Add_obj(String key, Object val) {return Add_obj(Bry_.new_u8(key), val);}
public Btrie_mgr Add_obj(byte[] key, Object val) {
if (val == null) throw Err_.new_("null objects cannot be registered").Add("key", String_.new_u8(key));
int key_len = key.length;
Btrie_utf8_itm cur = root;
int c_bgn = 0;
while (c_bgn < key_len) {
byte c = key[c_bgn];
int c_len = Utf8_.Len_of_char_by_1st_byte(c);
int c_end = c_bgn + c_len;
Btrie_utf8_itm nxt = cur.Nxts_find(key, c_bgn, c_end, false);
if (nxt == null)
nxt = cur.Nxts_add(case_mgr, Bry_.Mid(key, c_bgn, c_end), null);
c_bgn = c_end;
if (c_bgn == key_len)
nxt.Val_set(val);
cur = nxt;
}
++count;
return this;
}
public static Btrie_utf8_mgr new_(Gfo_case_mgr case_mgr) {return new Btrie_utf8_mgr(case_mgr);}
}

View File

@@ -42,7 +42,7 @@ class ErrProcData {
for (int i = 0; i < len; i++) {
ErrProcData md = ErrProcData.parse_(lines[i]);
if (md.SourceLine() == 0) break; // ASSUME: java code; not interested
if (String_.HasAtBgn(md.signatureRaw, "gplx.Err_") || String_.HasAtBgn(md.signatureRaw, "gplx.Err.")) continue; // java includes entire stackTrace from point of creation; only care about point of throw
if (String_.Has_at_bgn(md.signatureRaw, "gplx.Err_") || String_.Has_at_bgn(md.signatureRaw, "gplx.Err.")) continue; // java includes entire stackTrace from point of creation; only care about point of throw
list.Add(md);
}
return (ErrProcData[])list.To_ary(ErrProcData.class);

View File

@@ -414,8 +414,8 @@ public class Bry_ {
if (src[i] == lkp) return true;
return false;
}
public static boolean HasAtEnd(byte[] src, byte[] lkp) {int src_len = src.length; return HasAtEnd(src, lkp, src_len - lkp.length, src_len);}
public static boolean HasAtEnd(byte[] src, byte[] lkp, int src_bgn, int src_end) {
public static boolean Has_at_end(byte[] src, byte[] lkp) {int src_len = src.length; return Has_at_end(src, lkp, src_len - lkp.length, src_len);}
public static boolean Has_at_end(byte[] src, byte[] lkp, int src_bgn, int src_end) {
int lkp_len = lkp.length;
if (src_bgn < 0) return false;
int pos = src_end - lkp_len; if (pos < src_bgn) return false; // lkp is longer than src
@@ -424,11 +424,11 @@ public class Bry_ {
}
return true;
}
public static boolean HasAtBgn(byte[] src, byte lkp, int src_bgn) {
public static boolean Has_at_bgn(byte[] src, byte lkp, int src_bgn) {
return src_bgn < src.length ? src[src_bgn] == lkp : false;
}
public static boolean HasAtBgn(byte[] src, byte[] lkp) {return HasAtBgn(src, lkp, 0, src.length);}
public static boolean HasAtBgn(byte[] src, byte[] lkp, int src_bgn, int src_end) {
public static boolean Has_at_bgn(byte[] src, byte[] lkp) {return Has_at_bgn(src, lkp, 0, src.length);}
public static boolean Has_at_bgn(byte[] src, byte[] lkp, int src_bgn, int src_end) {
int lkp_len = lkp.length;
if (lkp_len + src_bgn > src_end) return false; // lkp is longer than src
for (int i = 0; i < lkp_len; i++) {

View File

@@ -47,7 +47,7 @@ public class Bry__tst {
}
Tfds.Eq_ary(expd, Bry_.XtoStrBytesByInt(val, Int_.DigitCount(val)));
}
@Test public void HasAtEnd() {
@Test public void Has_at_end() {
tst_HasAtEnd("a|bcd|e", "d" , 2, 5, true); // y_basic
tst_HasAtEnd("a|bcd|e", "bcd" , 2, 5, true); // y_many
tst_HasAtEnd("a|bcd|e", "|bcd" , 2, 5, false); // n_long
@@ -56,14 +56,14 @@ public class Bry__tst {
tst_HasAtEnd("abc", "bd", false); // n
tst_HasAtEnd("a", "ab", false); // exceeds_len
}
void tst_HasAtEnd(String src, String find, int bgn, int end, boolean expd) {Tfds.Eq(expd, Bry_.HasAtEnd(Bry_.new_u8(src), Bry_.new_u8(find), bgn, end));}
void tst_HasAtEnd(String src, String find, boolean expd) {Tfds.Eq(expd, Bry_.HasAtEnd(Bry_.new_u8(src), Bry_.new_u8(find)));}
@Test public void HasAtBgn() {
void tst_HasAtEnd(String src, String find, int bgn, int end, boolean expd) {Tfds.Eq(expd, Bry_.Has_at_end(Bry_.new_u8(src), Bry_.new_u8(find), bgn, end));}
void tst_HasAtEnd(String src, String find, boolean expd) {Tfds.Eq(expd, Bry_.Has_at_end(Bry_.new_u8(src), Bry_.new_u8(find)));}
@Test public void Has_at_bgn() {
tst_HasAtBgn("y_basic" , "a|bcd|e", "b" , 2, 5, true);
tst_HasAtBgn("y_many" , "a|bcd|e", "bcd" , 2, 5, true);
tst_HasAtBgn("n_long" , "a|bcd|e", "bcde" , 2, 5, false);
tst_HasAtBgn("n_pos" , "a|bcd|e", "|bc" , 2, 5, false);
} void tst_HasAtBgn(String tst, String src, String find, int bgn, int end, boolean expd) {Tfds.Eq(expd, Bry_.HasAtBgn(Bry_.new_u8(src), Bry_.new_u8(find), bgn, end), tst);}
} void tst_HasAtBgn(String tst, String src, String find, int bgn, int end, boolean expd) {Tfds.Eq(expd, Bry_.Has_at_bgn(Bry_.new_u8(src), Bry_.new_u8(find), bgn, end), tst);}
@Test public void Match() {
tst_Match("abc", 0, "abc", true);
tst_Match("abc", 2, "c", true);

View File

@@ -79,8 +79,8 @@ public class String_ implements GfoInvkAble {
public static char CharAt(String s, int i) {return s.charAt(i);}
public static int CodePointAt(String s, int i) {return s.codePointAt(i);}
public static boolean Has(String s, String find) {return s.indexOf(find) != String_.Find_none;}
public static boolean HasAtBgn(String s, String v) {return s.startsWith(v);}
public static boolean HasAtEnd(String s, String v) {return s.endsWith(v);}
public static boolean Has_at_bgn(String s, String v) {return s.startsWith(v);}
public static boolean Has_at_end(String s, String v) {return s.endsWith(v);}
public static int FindFwd(String s, String find) {return s.indexOf(find);}
public static int FindFwd(String s, String find, int pos) {return s.indexOf(find, pos);}
public static int FindBwd(String s, String find) {return s.lastIndexOf(find);}
@@ -204,7 +204,7 @@ public class String_ implements GfoInvkAble {
}
public static String DelBgnIf(String s, String find) {
if (s == null) throw Err_arg.null_("s"); if (find == null) throw Err_arg.null_("find");
return HasAtBgn(s, find) ? String_.Mid(s, Len(find)) : s;
return Has_at_bgn(s, find) ? String_.Mid(s, Len(find)) : s;
}
public static String DelEnd(String s, int count) {
if (count < 0) throw Err_arg.cannotBe_("< 0", "count", count);
@@ -214,7 +214,7 @@ public class String_ implements GfoInvkAble {
}
public static String DelEndIf(String s, String find) {
if (s == null) throw Err_arg.null_("s"); if (find == null) throw Err_arg.null_("find");
return HasAtEnd(s, find) ? Mid_lang(s, 0, Len(s) - Len(find)) : s;
return Has_at_end(s, find) ? Mid_lang(s, 0, Len(s) - Len(find)) : s;
}
public static String LowerFirst(String s) {
int len = Len(s); if (len == 0) return String_.Empty;

View File

@@ -35,7 +35,7 @@ public class EnmMgr {
String term = String_.Trim(ary[i]); // ex: key.ctrl + key.a
if (prefix != null) term = String_.Replace(term, prefix, "");
int cur = -1;
if (String_.HasAtBgn(term, "#"))
if (String_.Has_at_bgn(term, "#"))
cur = Int_.parse_(String_.Mid(term, 1));
else
cur = Int_.cast_(rawRegy.Get_by(term));

View File

@@ -57,7 +57,7 @@ public class Io_url implements CompareAble, EqAble, ParseAble, GfoInvkAble { //_
}
public String GenRelUrl_orEmpty(Io_url dir) {
String dirRaw = dir.Raw();
return String_.HasAtBgn(raw, dirRaw)
return String_.Has_at_bgn(raw, dirRaw)
? String_.DelBgn(raw, String_.Len(dirRaw))
: String_.Empty;
}

View File

@@ -55,7 +55,7 @@ public class Io_url_ {
private static String parse_http_file(String v, boolean wnt) {
byte[] v_bry = Bry_.new_u8(v);
int v_len = v_bry.length;
if (Bry_.HasAtBgn(v_bry, Io_url.Http_file_bry, 0, v_len)) {
if (Bry_.Has_at_bgn(v_bry, Io_url.Http_file_bry, 0, v_len)) {
byte[] rv = new byte[v_len - Io_url.Http_file_len];
for (int i = 0; i < rv.length; i++) {
byte b = v_bry[i + Io_url.Http_file_len];
@@ -80,13 +80,13 @@ public class Io_url_ {
return Io_url_.new_any_(val);
}
static String EndsWith_or_add(String raw, String endsWith) {
if (String_.HasAtEnd(raw, endsWith)) return raw;
if (String_.Has_at_end(raw, endsWith)) return raw;
return raw += endsWith;
}
public static Io_url Rel_dir(String s) {return IsAbs(s) ? Io_url_.new_dir_(s) : Env_.AppUrl().OwnerDir().GenSubDir(s);}
public static Io_url Rel_fil(String s) {return IsAbs(s) ? Io_url_.new_fil_(s) : Env_.AppUrl().OwnerDir().GenSubFil(s);}
static boolean IsAbs(String s) {
return String_.HasAtBgn(s, Op_sys.Lnx.Fsys_dir_spr_str())
return String_.Has_at_bgn(s, Op_sys.Lnx.Fsys_dir_spr_str())
|| (String_.Len(s) > 2
&& ( (String_.CharAt(s, 1) == ':' && String_.CharAt(s, 2) == '\\')
|| (String_.CharAt(s, 1) == '\\' && String_.CharAt(s, 2) == '\\')

View File

@@ -55,7 +55,7 @@ public class Base64Converter {
public static byte[] Decode(String s){
if (toInt == null) Init();
int sLen = String_.Len(s);
int delta = String_.HasAtEnd(s, "==") ? 2 : String_.HasAtEnd(s, "=") ? 1 : 0;
int delta = String_.Has_at_end(s, "==") ? 2 : String_.Has_at_end(s, "=") ? 1 : 0;
byte[] buffer = new byte[sLen *3/4 - delta];
int mask = 0xFF;
int index = 0;

View File

@@ -573,7 +573,7 @@ class Io_stream_rdr_http implements Io_stream_rdr {
return this;
}
read_done = false;
this.exists = src_conn.getResponseCode() == 200; // ASSUME: response code of 200 means that file exists; note that content_length seems to always be -1; DATE:2015-05-20
this.exists = Int_.In(src_conn.getResponseCode(), 200, 301); // ASSUME: response code of 200 (OK) or 301 (Redirect) means that file exists; note that content_length seems to always be -1; DATE:2015-05-20
src_stream = new java.io.BufferedInputStream(src_conn.getInputStream());
xfer_fmt.Bgn(content_length);
}

View File

@@ -41,7 +41,7 @@ public class IoItmDir extends IoItm_base {
String dirSpr = this.Url().Info().DirSpr(); int dirSprLen = String_.Len(dirSpr);
String currDirStr = this.Url().Raw();
String findDirStr = findDirUrl.Raw();
if (!String_.HasAtBgn(findDirStr, currDirStr)) return null; // findUrl must start with currUrl;
if (!String_.Has_at_bgn(findDirStr, currDirStr)) return null; // findUrl must start with currUrl;
String findName = String_.DelEnd(currDirStr, dirSprLen); // seed findName for String_.MidByLen below;
IoItmDir curDir = this;
while (true) {

View File

@@ -42,7 +42,7 @@ public class IoRecycleBin {
String nameAndExt = url.NameAndExt_noDirSpr() + "|";
for (int i = linesLen; i > 0; i--) {
String line = lines[i - 1];
if (String_.HasAtBgn(line, nameAndExt)) {
if (String_.Has_at_bgn(line, nameAndExt)) {
String[] terms = String_.Split(line, "|");
Io_url origUrl = url.OwnerRoot().GenSubFil(terms[1]);
list.Add(origUrl);

View File

@@ -60,7 +60,7 @@ abstract class IoUrlInfo_base implements IoUrlInfo {
public abstract boolean CaseSensitive();
public abstract boolean Match(String raw);
public abstract String EngineKey();
public boolean IsDir(String raw) {return String_.HasAtEnd(raw, DirSpr());}
public boolean IsDir(String raw) {return String_.Has_at_end(raw, DirSpr());}
public abstract String XtoRootName(String raw, int rawLen);
@gplx.Virtual public String Xto_api(String raw) {
return IsDir(raw)
@@ -142,7 +142,7 @@ class IoUrlInfo_lnx extends IoUrlInfo_base {
@Override public String DirSpr() {return DirSprStr;} static final String DirSprStr = Op_sys.Lnx.Fsys_dir_spr_str();
@Override public byte DirSpr_byte() {return Byte_ascii.Slash;}
@Override public boolean CaseSensitive() {return Op_sys.Lnx.Fsys_case_match();}
@Override public boolean Match(String raw) {return String_.HasAtBgn(raw, DirSprStr);} // anything that starts with /
@Override public boolean Match(String raw) {return String_.Has_at_bgn(raw, DirSprStr);} // anything that starts with /
@Override public String XtoRootName(String raw, int rawLen) {
return rawLen == 1 && String_.Eq(raw, DirSprStr)
? "root"
@@ -180,7 +180,7 @@ class IoUrlInfo_mem extends IoUrlInfo_base {
@Override public String XtoRootName(String raw, int rawLen) {
return String_.Eq(raw, key) ? String_.DelEnd(key, 1) : null;
}
@Override public boolean Match(String raw) {return String_.HasAtBgn(raw, key);}
@Override public boolean Match(String raw) {return String_.Has_at_bgn(raw, key);}
public static IoUrlInfo_mem new_(String key, String engineKey) {
IoUrlInfo_mem rv = new IoUrlInfo_mem();
rv.key = key; rv.engineKey = engineKey;
@@ -196,7 +196,7 @@ class IoUrlInfo_alias extends IoUrlInfo_base {
@Override public String XtoRootName(String raw, int rawLen) {
return String_.Eq(raw, srcRootDir) ? srcRootName : null;
}
@Override public boolean Match(String raw) {return String_.HasAtBgn(raw, srcDir);}
@Override public boolean Match(String raw) {return String_.Has_at_bgn(raw, srcDir);}
@Override public String Xto_api(String raw) {
String rv = String_.Replace(raw, srcDir, trgDir); // replace src with trg
if (!String_.Eq(srcDirSpr, trgDirSpr)) rv = String_.Replace(rv, srcDirSpr, trgDirSpr); // replace dirSprs

View File

@@ -66,7 +66,7 @@ public class Op_sys {
else throw Err_mgr._.fmt_(GRP_KEY, "unknown_bitness", "unknown bitness; expecting 32 or 64; System.getProperty(\"bit.level\") yielded ~{0}", bitness_str);
os_name = System.getProperty("os.name").toLowerCase();
if (String_.HasAtBgn(os_name, "win")) {
if (String_.Has_at_bgn(os_name, "win")) {
String os_version = System.getProperty("os.version").toLowerCase();// "Windows 7".equals(osName) && "6.1".equals(osVersion);
byte sub_tid = Sub_tid_unknown;
if (String_.Eq(os_name, "windows xp") && String_.Eq(os_version, "5.1")) sub_tid = Sub_tid_win_xp;
@@ -75,7 +75,7 @@ public class Op_sys {
return new_wnt_(bitness_byte, sub_tid);
}
else if (String_.Eq(os_name, "linux")) return new_unx_flavor_(Tid_lnx, os_name, bitness_byte);
else if (String_.HasAtBgn(os_name, "mac")) return new_unx_flavor_(Tid_osx, os_name, bitness_byte); // EX:Mac OS X
else if (String_.Has_at_bgn(os_name, "mac")) return new_unx_flavor_(Tid_osx, os_name, bitness_byte); // EX:Mac OS X
else throw Err_mgr._.fmt_(GRP_KEY, "unknown_os_name", "unknown os_name; expecting windows, linux, mac; System.getProperty(\"os.name\") yielded ~{0}", os_name);
} catch (Exception exc) {Drd.os_name = os_name; return Drd;}
}