1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2014-07-06 22:58:35 -04:00
parent 57f65b4d0c
commit ecbe2918d8
187 changed files with 4184 additions and 2286 deletions

View File

@@ -65,7 +65,7 @@ public class Bry_bfr {
bfr_len += len;
return this;
}
public Bry_bfr Add_bfr(Bry_bfr src) {
public Bry_bfr Add_bfr_and_preserve(Bry_bfr src) {
int len = src.bfr_len;
if (bfr_len + len > bfr_max) Resize((bfr_max + len) * 2);
Bry_.Copy_by_pos(src.bfr, 0, len, bfr, bfr_len);
@@ -74,10 +74,15 @@ public class Bry_bfr {
return this;
}
public Bry_bfr Add_bfr_and_clear(Bry_bfr src) {
Add_bfr(src);
Add_bfr_and_preserve(src);
src.ClearAndReset();
return this;
}
public Bry_bfr Add_bfr_or_mid(boolean escaped, Bry_bfr tmp_bfr, byte[] src, int src_bgn, int src_end) {
return escaped
? this.Add_bfr_and_clear(tmp_bfr)
: this.Add_mid(src, src_bgn, src_end);
}
public Bry_bfr Add_bfr_trim_and_clear(Bry_bfr src, boolean trim_bgn, boolean trim_end) {return Add_bfr_trim_and_clear(src, trim_bgn, trim_end, Bry_.Trim_ary_ws);}
public Bry_bfr Add_bfr_trim_and_clear(Bry_bfr src, boolean trim_bgn, boolean trim_end, byte[] trim_ary) {
int src_len = src.bfr_len;
@@ -326,7 +331,7 @@ public class Bry_bfr {
else if (o_type == Byte.class) Add_byte(Byte_.cast_(o));
else if (o_type == Long.class) Add_long_variable(Long_.cast_(o));
else if (o_type == String.class) Add_str((String)o);
else if (o_type == Bry_bfr.class) Add_bfr((Bry_bfr)o);
else if (o_type == Bry_bfr.class) Add_bfr_and_preserve((Bry_bfr)o);
else if (o_type == DateAdp.class) Add_dte((DateAdp)o);
else if (o_type == Io_url.class) Add(((Io_url)o).RawBry());
else if (o_type == boolean.class) Add_yn(Bool_.cast_(o));
@@ -377,6 +382,20 @@ public class Bry_bfr {
if (new_len > -1) bfr_len = new_len;
return this;
}
public Bry_bfr Trim_end(byte trim_byte) {
if (bfr_len == 0) return this;
int count = 0;
for (int i = bfr_len - 1; i > -1; --i) {
byte b = bfr[i];
if (b == trim_byte)
++count;
else
break;
}
if (count > 0)
this.Del_by(count);
return this;
}
public Bry_bfr Concat_skip_empty(byte[] dlm, byte[]... ary) {
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {

View File

@@ -0,0 +1,37 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx;
public class Bry_bfr_ {
public static void Assert_at_end(Bry_bfr bfr, byte assert_byte) {
int len = bfr.Len(); if (len == 0) return;
int assert_count = 0;
byte[] bfr_bry = bfr.Bfr();
for (int i = len - 1; i > -1; --i) {
byte b = bfr_bry[i];
if (b == assert_byte)
++assert_count;
else
break;
}
switch (assert_count) {
case 0: bfr.Add_byte(assert_byte); break;
case 1: break;
default: bfr.Del_by(assert_count - 1); break;
}
}
}

View File

@@ -117,7 +117,7 @@ public class Bry_finder {
}
return rv;
}
public static int Find_bwd_non_ws(byte[] src, int cur, int end) { // get pos of 1st char that is not ws;
public static int Find_bwd_non_ws_or_not_found(byte[] src, int cur, int end) { // get pos of 1st char that is not ws;
if (cur >= src.length) return Bry_finder.Not_found;
for (int i = cur; i >= end; i--) {
byte b = src[i];
@@ -130,6 +130,19 @@ public class Bry_finder {
}
return Bry_finder.Not_found;
}
public static int Find_bwd_non_ws_or_end(byte[] src, int cur, int end) {
if (cur >= src.length) return Bry_finder.Not_found;
for (int i = cur; i >= end; i--) {
byte b = src[i];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.NewLine: case Byte_ascii.CarriageReturn:
break;
default:
return i;
}
}
return end;
}
public static int Find_bwd_while(byte[] src, int cur, int end, byte while_byte) {
--cur;
while (true) {

View File

@@ -23,7 +23,7 @@ public class Bry_fmtr_arg_ {
public static Bry_fmtr_arg_byt byt_(byte v) {return new Bry_fmtr_arg_byt(v);}
public static Bry_fmtr_arg_int int_(int v) {return new Bry_fmtr_arg_int(v);}
public static Bry_fmtr_arg_bfr bfr_(Bry_bfr v) {return new Bry_fmtr_arg_bfr(v);}
public static Bry_fmtr_arg_bfr_retain bfr_retain_(Bry_bfr v) {return new Bry_fmtr_arg_bfr_retain(v);}
public static Bry_fmtr_arg_bfr_preserve bfr_retain_(Bry_bfr v) {return new Bry_fmtr_arg_bfr_preserve(v);}
public static Bry_fmtr_arg fmtr_(Bry_fmtr v, Bry_fmtr_arg... arg_ary) {return new Bry_fmtr_arg_fmtr(v, arg_ary);}
public static Bry_fmtr_arg_fmtr_objs fmtr_null_() {return new Bry_fmtr_arg_fmtr_objs(null, null);}
public static final Bry_fmtr_arg Null = new Bry_fmtr_arg_null();

View File

@@ -70,7 +70,16 @@ public class Byte_ascii {
;
}
public static final byte[]
Dot_bry = new byte[] {Byte_ascii.Dot}
, NewLine_bry = new byte[] {Byte_ascii.NewLine}
Dot_bry = new byte[] {Byte_ascii.Dot}
, NewLine_bry = new byte[] {Byte_ascii.NewLine}
, Colon_bry = new byte[] {Byte_ascii.Colon}
, Lt_bry = new byte[] {Byte_ascii.Lt}
, Gt_bry = new byte[] {Byte_ascii.Gt}
, Brack_bgn_bry = new byte[] {Byte_ascii.Brack_bgn}
, Apos_bry = new byte[] {Byte_ascii.Apos}
, Pipe_bry = new byte[] {Byte_ascii.Pipe}
, Underline_bry = new byte[] {Byte_ascii.Underline}
, Asterisk_bry = new byte[] {Byte_ascii.Asterisk}
, Space_bry = new byte[] {Byte_ascii.Space}
;
}

View File

@@ -16,8 +16,8 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.brys; import gplx.*;
public class Bry_fmtr_arg_bfr_retain implements Bry_fmtr_arg {
public Bry_fmtr_arg_bfr_retain Data_(Bry_bfr v) {bfr = v; return this;}
public void XferAry(Bry_bfr trg, int idx) {trg.Add_bfr(bfr);}
public Bry_fmtr_arg_bfr_retain(Bry_bfr bfr) {this.bfr = bfr;} Bry_bfr bfr;
public class Bry_fmtr_arg_bfr_preserve implements Bry_fmtr_arg {
public Bry_fmtr_arg_bfr_preserve Data_(Bry_bfr v) {bfr = v; return this;}
public void XferAry(Bry_bfr trg, int idx) {trg.Add_bfr_and_preserve(bfr);}
public Bry_fmtr_arg_bfr_preserve(Bry_bfr bfr) {this.bfr = bfr;} Bry_bfr bfr;
}

View File

@@ -16,18 +16,23 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx;
import gplx.intl.*;
public class Hash_adp_bry extends gplx.lists.HashAdp_base implements HashAdp {
Hash_adp_bry(boolean case_match) {
this.case_match = case_match;
key_ref = new Hash_adp_bry_ref(case_match, null, -1, -1);
} private final boolean case_match; private Hash_adp_bry_ref key_ref;
public Object Get_by_bry(byte[] src) {return super.Fetch_base(key_ref.Src_all_(src));}
public Object Get_by_mid(byte[] src, int bgn, int end) {return super.Fetch_base(key_ref.Src_all_set_(src, bgn, end));}
public Hash_adp_bry Add_bry_bry(byte[] key) {this.Add_base(key, key); return this;}
public Hash_adp_bry Add_str_byte(String key, byte val) {this.Add_base(Bry_.new_utf8_(key), Byte_obj_val.new_(val)); return this;}
public Hash_adp_bry Add_str_obj(String key, Object val) {this.Add_base(Bry_.new_utf8_(key), val); return this;}
public Hash_adp_bry Add_bry_byte(byte[] key, byte val) {this.Add_base(key, Byte_obj_val.new_(val)); return this;}
public Hash_adp_bry Add_bry_obj(byte[] key, Object val) {this.Add_base(key, val); return this;}
private final Hash_adp_bry_itm_base proto, key_ref;
Hash_adp_bry(Hash_adp_bry_itm_base proto) {
this.proto = proto;
key_ref = proto.New();
}
@Override protected Object Fetch_base(Object key) {return super.Fetch_base(key_ref.Init((byte[])key));}
@Override protected void Del_base(Object key) {super.Del_base(key_ref.Init((byte[])key));}
@Override protected boolean Has_base(Object key) {return super.Has_base(key_ref.Init((byte[])key));}
public Object Get_by_bry(byte[] src) {return super.Fetch_base(key_ref.Init(src));}
public Object Get_by_mid(byte[] src, int bgn, int end) {return super.Fetch_base(key_ref.Init(src, bgn, end));}
public Hash_adp_bry Add_bry_bry(byte[] key) {this.Add_base(key, key); return this;}
public Hash_adp_bry Add_str_byte(String key, byte val) {this.Add_base(Bry_.new_utf8_(key), Byte_obj_val.new_(val)); return this;}
public Hash_adp_bry Add_str_obj(String key, Object val) {this.Add_base(Bry_.new_utf8_(key), val); return this;}
public Hash_adp_bry Add_bry_byte(byte[] key, byte val) {this.Add_base(key, Byte_obj_val.new_(val)); return this;}
public Hash_adp_bry Add_bry_obj(byte[] key, Object val) {this.Add_base(key, val); return this;}
public Hash_adp_bry Add_many_str(String... ary) {
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
@@ -39,38 +44,57 @@ public class Hash_adp_bry extends gplx.lists.HashAdp_base implements HashAdp {
}
@Override protected void Add_base(Object key, Object val) {
byte[] key_bry = (byte[])key;
super.Add_base(new Hash_adp_bry_ref(case_match, key_bry, 0, key_bry.length), val);
Hash_adp_bry_itm_base key_itm = proto.New();
key_itm.Init(key_bry, 0, key_bry.length);
super.Add_base(key_itm, val);
}
@Override protected void Del_base(Object key) {super.Del_base(key_ref.Src_all_((byte[])key));}
@Override protected boolean Has_base(Object key) {return super.Has_base(key_ref.Src_all_((byte[])key));}
@Override protected Object Fetch_base(Object key) {return super.Fetch_base(key_ref.Src_all_((byte[])key));}
public static Hash_adp_bry cs_() {return new Hash_adp_bry(true);}
public static Hash_adp_bry ci_() {return new Hash_adp_bry(false);}
public static Hash_adp_bry ci_ascii_() {return new Hash_adp_bry(false);}
public static Hash_adp_bry cs_() {return new Hash_adp_bry(Hash_adp_bry_itm_cs._);}
public static Hash_adp_bry ci_ascii_() {return new Hash_adp_bry(Hash_adp_bry_itm_ci_ascii._);}
public static Hash_adp_bry ci_utf8_(Gfo_case_mgr case_mgr) {return new Hash_adp_bry(Hash_adp_bry_itm_ci_utf8.get_or_new(case_mgr));}
public static Hash_adp_bry c__utf8_(boolean case_match, Gfo_case_mgr case_mgr) {return case_match ? cs_() : ci_utf8_(case_mgr);}
public static Hash_adp_bry ci_() {return new Hash_adp_bry(Hash_adp_bry_itm_ci_ascii._);}
}
class Hash_adp_bry_ref {
public Hash_adp_bry_ref(boolean case_match, byte[] src, int src_bgn, int src_end) {this.case_match = case_match; this.src = src; this.src_bgn = src_bgn; this.src_end = src_end;}
final boolean case_match;
public byte[] Src() {return src;} private byte[] src;
public Hash_adp_bry_ref Src_all_(byte[] v) {
this.src = v;
this.src_bgn = 0;
this.src_end = v.length;
return this;
}
public Hash_adp_bry_ref Src_all_set_(byte[] v, int src_bgn, int src_end) {
this.src = v;
this.src_bgn = src_bgn;
this.src_end = src_end;
return this;
}
public int Src_bgn() {return src_bgn;} int src_bgn;
public int Src_end() {return src_end;} int src_end;
abstract class Hash_adp_bry_itm_base {
public abstract Hash_adp_bry_itm_base New();
public Hash_adp_bry_itm_base Init(byte[] src) {return this.Init(src, 0, src.length);}
public abstract Hash_adp_bry_itm_base Init(byte[] src, int src_bgn, int src_end);
}
class Hash_adp_bry_itm_cs extends Hash_adp_bry_itm_base {
private byte[] src; int src_bgn, src_end;
@Override public Hash_adp_bry_itm_base New() {return new Hash_adp_bry_itm_cs();}
@Override public Hash_adp_bry_itm_base Init(byte[] src, int src_bgn, int src_end) {this.src = src; this.src_bgn = src_bgn; this.src_end = src_end; return this;}
@Override public int hashCode() {
int rv = 0;
for (int i = src_bgn; i < src_end; i++) {
int b_int = src[i] & 0xFF; // JAVA: patch
if (!case_match && b_int > 64 && b_int < 91) // 64=before A; 91=after Z; NOTE: lowering upper-case on PERF assumption that there will be more lower-case letters than upper-case
rv = (31 * rv) + b_int;
}
return rv;
}
@Override public boolean equals(Object obj) {
if (obj == null) return false;
Hash_adp_bry_itm_cs comp = (Hash_adp_bry_itm_cs)obj;
byte[] comp_src = comp.src; int comp_bgn = comp.src_bgn, comp_end = comp.src_end;
int comp_len = comp_end - comp_bgn, src_len = src_end - src_bgn;
if (comp_len != src_len) return false;
for (int i = 0; i < comp_len; i++) {
int src_pos = src_bgn + i;
if (src_pos >= src_end) return false; // ran out of src; exit; EX: src=ab; find=abc
if (src[src_pos] != comp_src[i + comp_bgn]) return false;
}
return true;
}
public static final Hash_adp_bry_itm_cs _ = new Hash_adp_bry_itm_cs(); Hash_adp_bry_itm_cs() {}
}
class Hash_adp_bry_itm_ci_ascii extends Hash_adp_bry_itm_base {
private byte[] src; int src_bgn, src_end;
@Override public Hash_adp_bry_itm_base New() {return new Hash_adp_bry_itm_ci_ascii();}
@Override public Hash_adp_bry_itm_base Init(byte[] src, int src_bgn, int src_end) {this.src = src; this.src_bgn = src_bgn; this.src_end = src_end; return this;}
@Override public int hashCode() {
int rv = 0;
for (int i = src_bgn; i < src_end; i++) {
int b_int = src[i] & 0xFF; // JAVA: patch
if (b_int > 64 && b_int < 91) // 64=before A; 91=after Z; NOTE: lowering upper-case on PERF assumption that there will be more lower-case letters than upper-case
b_int += 32;
rv = (31 * rv) + b_int;
}
@@ -78,24 +102,77 @@ class Hash_adp_bry_ref {
}
@Override public boolean equals(Object obj) {
if (obj == null) return false;
Hash_adp_bry_ref comp = (Hash_adp_bry_ref)obj;
byte[] comp_src = comp.Src(); int comp_bgn = comp.Src_bgn(), comp_end = comp.Src_end();
Hash_adp_bry_itm_ci_ascii comp = (Hash_adp_bry_itm_ci_ascii)obj;
byte[] comp_src = comp.src; int comp_bgn = comp.src_bgn, comp_end = comp.src_end;
int comp_len = comp_end - comp_bgn, src_len = src_end - src_bgn;
if (comp_len != src_len) return false;
for (int i = 0; i < comp_len; i++) {
int src_pos = src_bgn + i;
if (src_pos >= src_end) return false; // ran out of src; exit; EX: src=ab; find=abc
if (case_match) {
if (src[src_pos] != comp_src[i + comp_bgn]) return false;
byte src_byte = src[src_pos];
if (src_byte > 64 && src_byte < 91) src_byte += 32;
byte comp_byte = comp_src[i + comp_bgn];
if (comp_byte > 64 && comp_byte < 91) comp_byte += 32;
if (src_byte != comp_byte) return false;
}
return true;
}
public static final Hash_adp_bry_itm_ci_ascii _ = new Hash_adp_bry_itm_ci_ascii(); Hash_adp_bry_itm_ci_ascii() {}
}
class Hash_adp_bry_itm_ci_utf8 extends Hash_adp_bry_itm_base {
private final Gfo_case_mgr case_mgr;
Hash_adp_bry_itm_ci_utf8(Gfo_case_mgr case_mgr) {this.case_mgr = case_mgr;}
private byte[] src; int src_bgn, src_end;
@Override public Hash_adp_bry_itm_base New() {return new Hash_adp_bry_itm_ci_utf8(case_mgr);}
@Override public Hash_adp_bry_itm_base Init(byte[] src, int src_bgn, int src_end) {this.src = src; this.src_bgn = src_bgn; this.src_end = src_end; return this;}
@Override public int hashCode() {
int rv = 0;
for (int i = src_bgn; i < src_end; i++) {
byte b = src[i];
int b_int = b & 0xFF; // JAVA: patch
Gfo_case_itm itm = case_mgr.Get_or_null(b, src, i, src_end);
if (itm == null) { // unknown itm; byte is a number, symbol, or unknown; just use the existing byte
}
else { // known itm; use its hash_code
b_int = itm.Hashcode_lo();
i += itm.Len_lo() - 1;
}
rv = (31 * rv) + b_int;
}
return rv;
}
@Override public boolean equals(Object obj) {
if (obj == null) return false;
Hash_adp_bry_itm_ci_utf8 trg_itm = (Hash_adp_bry_itm_ci_utf8)obj;
byte[] trg = trg_itm.src; int trg_bgn = trg_itm.src_bgn, trg_end = trg_itm.src_end;
int trg_len = trg_end - trg_bgn, src_len = src_end - src_bgn;
if (trg_len != src_len) return false;
for (int i = 0; i < trg_len; i++) { // ASSUME: upper/lower have same width; i.e.: upper'ing a character doesn't go from a 2-width byte to a 3-width byte
int src_pos = src_bgn + i;
if (src_pos >= src_end) return false; // ran out of src; exit; EX: src=ab; find=abc
byte src_byte = src[src_pos];
byte trg_byte = trg[i + trg_bgn];
Gfo_case_itm src_case_itm = case_mgr.Get_or_null(src_byte, src, i, src_len);
Gfo_case_itm trg_case_itm = case_mgr.Get_or_null(trg_byte, trg, i, trg_len);
if (src_case_itm != null && trg_case_itm == null) return false;
else if (src_case_itm == null && trg_case_itm != null) return false;
else if (src_case_itm == null && trg_case_itm == null) {
if (src_byte != trg_byte) return false;
}
else {
byte src_byte = src[src_pos];
if (src_byte > 64 && src_byte < 91) src_byte += 32;
byte comp_byte = comp_src[i + comp_bgn];
if (comp_byte > 64 && comp_byte < 91) comp_byte += 32;
if (src_byte != comp_byte) return false;
if (!src_case_itm.Eq_lo(trg_case_itm)) return false;
i += src_case_itm.Len_lo() - 1;
}
}
return true;
}
public static Hash_adp_bry_itm_ci_utf8 get_or_new(Gfo_case_mgr case_mgr) {
switch (case_mgr.Tid()) {
case Gfo_case_mgr_.Tid_ascii: if (Itm_ascii == null) Itm_ascii = new Hash_adp_bry_itm_ci_utf8(case_mgr); return Itm_ascii;
case Gfo_case_mgr_.Tid_utf8: if (Itm_utf8 == null) Itm_utf8 = new Hash_adp_bry_itm_ci_utf8(case_mgr); return Itm_utf8;
case Gfo_case_mgr_.Tid_custom: return new Hash_adp_bry_itm_ci_utf8(case_mgr);
default: throw Err_.unhandled(case_mgr.Tid());
}
}
private static Hash_adp_bry_itm_ci_utf8 Itm_ascii, Itm_utf8;
}

View File

@@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.intl; import gplx.*;
public interface Gfo_case_itm {
boolean Eq_lo(Gfo_case_itm itm);
int Hashcode_lo();
int Len_lo();
}

View File

@@ -0,0 +1,22 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.intl; import gplx.*;
public interface Gfo_case_mgr {
byte Tid();
Gfo_case_itm Get_or_null(byte bgn_byte, byte[] src, int bgn, int end);
}

View File

@@ -0,0 +1,21 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.intl; import gplx.*;
public class Gfo_case_mgr_ {
public static final byte Tid_ascii = 0, Tid_utf8 = 1, Tid_custom = 2;
}