You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gnosygnu_xowa/100_core/src/gplx/Bry_find_.java

362 lines
14 KiB

/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx;
public class Bry_find_ {
public static final int Not_found = -1;
public static int Find_fwd(byte[] src, byte lkp) {return Find_fwd(src, lkp, 0, src.length);}
public static int Find_fwd(byte[] src, byte lkp, int cur) {return Find_fwd(src, lkp, cur, src.length);}
public static int Find_fwd(byte[] src, byte lkp, int cur, int end) {
for (int i = cur; i < end; i++)
if (src[i] == lkp) return i;
return Bry_find_.Not_found;
}
public static int Find_fwd_or(byte[] src, byte lkp, int cur, int end, int or) {
int rv = Find_fwd(src, lkp, cur, end);
return rv == Bry_find_.Not_found ? or : rv;
}
public static int Find_bwd(byte[] src, byte lkp) {return Find_bwd(src, lkp, src.length, 0);}
public static int Find_bwd(byte[] src, byte lkp, int cur) {return Find_bwd(src, lkp, cur , 0);}
public static int Find_bwd(byte[] src, byte lkp, int cur, int end) {
--cur; // always subtract 1 from cur; allows passing in src_len or cur_pos without forcing caller to subtract - 1; DATE:2014-02-11
--end;
for (int i = cur; i > end; i--)
if (src[i] == lkp) return i;
return Bry_find_.Not_found;
}
public static int Move_fwd(byte[] src, byte lkp, int cur, int end) {
int rv = Find_fwd(src, lkp, cur, src.length);
return rv == Bry_find_.Not_found ? rv : rv + 1;
}
public static int Move_fwd(byte[] src, byte[] lkp, int cur) {return Move_fwd(src, lkp, cur, src.length);}
public static int Move_fwd(byte[] src, byte[] lkp, int cur, int end) {
int rv = Find_fwd(src, lkp, cur, src.length);
return rv == Bry_find_.Not_found ? rv : rv + lkp.length;
}
public static int Find_fwd(byte[] src, byte[] lkp) {return Find(src, lkp, 0 , src.length, true);}
public static int Find_fwd(byte[] src, byte[] lkp, int cur) {return Find(src, lkp, cur , src.length, true);}
public static int Find_fwd(byte[] src, byte[] lkp, int cur, int end) {return Find(src, lkp, cur , end, true);}
public static int Find(byte[] src, byte[] lkp, int src_bgn, int src_end, boolean fwd) {
if (src_bgn < 0 || src.length == 0) return Bry_find_.Not_found;
int dif, lkp_len = lkp.length, lkp_bgn, lkp_end, src_end_chk;
if (fwd) {
if (src_bgn > src_end) return Bry_find_.Not_found;
dif = 1; lkp_bgn = 0; lkp_end = lkp_len; src_end_chk = src_end - CompareAble_.OffsetCompare;
}
else {
if (src_bgn < src_end) return Bry_find_.Not_found;
dif = -1; lkp_bgn = lkp_len - 1; lkp_end = -1; src_end_chk = src.length - CompareAble_.OffsetCompare; // src_end_chk needed when going bwd, b/c lkp_len may be > 1
}
while (src_bgn != src_end) { // while src is not done;
int lkp_cur = lkp_bgn;
while (lkp_cur != lkp_end) { // while lkp is not done
int pos = src_bgn + lkp_cur;
if ( pos > src_end_chk // outside bounds; occurs when lkp_len > 1
|| src[pos] != lkp[lkp_cur]) // srcByte doesn't match lkpByte
break;
else
lkp_cur += dif;
}
if (lkp_cur == lkp_end) return src_bgn; // lkp matches src; exit
src_bgn += dif;
}
return Bry_find_.Not_found;
}
public static int Find_bwd(byte[] src, byte[] lkp, int cur) {return Find_bwd(src, lkp, cur , 0);}
public static int Find_bwd(byte[] src, byte[] lkp, int cur, int end) {
if (cur < 1) return Bry_find_.Not_found;
--cur; // always subtract 1 from cur; allows passing in src_len or cur_pos without forcing caller to subtract - 1; DATE:2014-02-11
--end;
int src_len = src.length;
int lkp_len = lkp.length;
for (int i = cur; i > end; i--) {
if (i + lkp_len > src_len) continue; // lkp too small for pos; EX: src=abcde; lkp=bcd; pos=4
boolean match = true;
for (int j = 0; j < lkp_len; j++) {
if (lkp[j] != src[i + j]) {
match = false;
break;
}
}
if (match) return i;
}
return Bry_find_.Not_found;
}
public static int Find_bwd_last_ws(byte[] src, int cur) {
if (cur < 1) return Bry_find_.Not_found;
--cur;
int rv = Bry_find_.Not_found;
for (int i = cur; i > -1; i--) {
byte b = src[i];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
rv = i;
break;
default:
i = -1;
break;
}
}
return rv;
}
public static int Find_bwd_ws(byte[] src, int cur, int end) {
for (int i = cur; i > -1; --i) {
byte b = src[i];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
return i;
}
}
return Bry_find_.Not_found;
}
public static int Find_fwd_last_ws(byte[] src, int cur) {
int end = src.length;
if (cur >= end) return Bry_find_.Not_found;
int rv = Bry_find_.Not_found;
for (int i = cur; i < end; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
rv = i;
break;
default:
i = -1;
break;
}
}
return rv;
}
public static int Find_bwd_non_ws_or_not_found(byte[] src, int cur, int end) { // get pos of 1st char that is not ws;
if (cur >= src.length) return Bry_find_.Not_found;
for (int i = cur; i >= end; i--) {
byte b = src[i];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
break;
default:
return i;
}
}
return Bry_find_.Not_found;
}
public static int Find_bwd_non_ws_or_end(byte[] src, int cur, int end) {
if (cur >= src.length) return Bry_find_.Not_found;
for (int i = cur; i >= end; i--) {
byte b = src[i];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
break;
default:
return i;
}
}
return end;
}
public static int Find_bwd__skip_ws(byte[] src, int end, int bgn) {
int src_len = src.length;
if (end == src_len) return end;
if (end > src_len || end < 0) return Bry_find_.Not_found;
int pos = end - 1; // start from end - 1; handles situations where len is passed in
for (int i = pos; i >= bgn; --i) {
switch (src[i]) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
break;
default:
return i + 1;
}
}
return bgn;
}
public static int Find_bwd__skip(byte[] src, int end, int bgn, byte skip) {
int src_len = src.length; // if (end == src_len) return end;
if (end > src_len || end < 0) return Bry_find_.Not_found;
int pos = end - 1; // start from end - 1; handles situations where len is passed in
for (int i = pos; i >= bgn; --i) {
if (src[i] != skip)
return i + 1;
}
return bgn;
}
public static int Find_bwd__skip(byte[] src, int end, int bgn, byte[] skip) {
int src_len = src.length;
if (end > src_len || end < 0) return Bry_find_.Not_found;
int skip_len = skip.length;
int pos = end - skip_len; // start from end - 1; handles situations where len is passed in
for (int i = pos; i >= bgn; --i) {
if (!Bry_.Eq(src, i, i + skip_len, skip))
return i + skip_len;
}
return bgn;
}
public static int Find_bwd_while(byte[] src, int cur, int end, byte while_byte) {
--cur;
while (true) {
if ( cur < end
|| src[cur] != while_byte) return cur;
--cur;
}
}
public static int Find_fwd_while(byte[] src, int cur, int end, byte while_byte) {
while (true) {
if ( cur == end
|| src[cur] != while_byte) return cur;
cur++;
}
}
public static int Find_fwd_until(byte[] src, int cur, int end, byte until_byte) {
while (true) {
if ( cur == end
|| src[cur] == until_byte) return cur;
cur++;
}
}
public static int Find_fwd_until_space_or_tab(byte[] src, int cur, int end) {
while (true) {
if (cur == end) return Bry_find_.Not_found;
switch (src[cur]) {
case Byte_ascii.Space: case Byte_ascii.Tab:
return cur;
default:
++cur;
break;
}
}
}
public static int Find_fwd_until_ws(byte[] src, int cur, int end) {
while (true) {
if (cur == end) return Bry_find_.Not_found;
switch (src[cur]) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
return cur;
default:
++cur;
break;
}
}
}
public static int Find_fwd_while_space_or_tab(byte[] src, int cur, int end) {
while (true) {
if (cur == end) return cur;
switch (src[cur]) {
case Byte_ascii.Space: case Byte_ascii.Tab: ++cur; break;
default: return cur;
}
}
}
public static int Trim_fwd_space_tab(byte[] src, int cur, int end) {
while (true) {
if (cur == end) return cur;
switch (src[cur]) {
case Byte_ascii.Space: case Byte_ascii.Tab: ++cur; break;
default: return cur;
}
}
}
public static int Trim_bwd_space_tab(byte[] src, int cur, int bgn) {
while (true) {
int prv_cur = cur - 1; // check byte before cur; EX: "a b " will have len of 4, and pass cur=4;
if (prv_cur < bgn) return cur; // checking byte before prv; exit;
switch (src[prv_cur]) {
case Byte_ascii.Space: case Byte_ascii.Tab: --cur; break;
default: return cur;
}
}
}
public static int Find_fwd_while_ws(byte[] src, int cur, int end) {
while (true) {
if (cur == end) return cur;
try {
switch (src[cur]) {
case Byte_ascii.Nl: case Byte_ascii.Cr:
case Byte_ascii.Space: case Byte_ascii.Tab: ++cur; break;
default: return cur;
}
} catch (Exception e) {throw Err_.new_exc(e, "core", "idx is invalid", "cur", cur, "src", src);}
}
}
public static int Find_fwd_while_letter(byte[] src, int cur, int end) {
while (cur < end) {
switch (src[cur]) {
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
break;
default:
return cur;
}
++cur;
}
return cur;
}
public static int Find_fwd_while_num(byte[] src) {return Find_fwd_while_num(src, 0, src.length);}
public static int Find_fwd_while_num(byte[] src, int cur, int end) {
while (cur < end) {
if (!Byte_ascii.Is_num(src[cur]))
return cur;
++cur;
}
return cur;
}
public static int Find_fwd_while_not_ws(byte[] src, int cur, int end) {
while (true) {
if (cur == end) return cur;
switch (src[cur]) {
case Byte_ascii.Space:
case Byte_ascii.Nl:
case Byte_ascii.Tab:
case Byte_ascii.Cr:
++cur;
break;
default:
return cur;
}
}
}
public static int Find_bwd_while_alphanum(byte[] src, int cur) {return Find_bwd_while_alphanum(src, cur, -1);}
public static int Find_bwd_while_alphanum(byte[] src, int cur, int end) {
--cur;
while (cur > end) {
switch (src[cur]) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
--cur;
break;
default:
return cur;
}
}
return 0; // always return a valid index
}
}