mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Search: Add more punctuation support
This commit is contained in:
@@ -29,7 +29,7 @@ public class Xomw_regex_boundary { // THREAD.SAFE: trv is only for consistent in
|
||||
}
|
||||
public boolean Is_boundary_prv(byte[] src, int pos) {
|
||||
if (pos == 0) return true; // BOS is true
|
||||
int bgn = gplx.core.intls.Utf8_.Get_pos0_of_char_bwd(src, pos - 1);
|
||||
int bgn = gplx.core.intls.Utf8_.Get_prv_char_pos0(src, pos);
|
||||
byte b = src[bgn];
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, bgn, pos);
|
||||
return o != null;
|
||||
|
||||
@@ -285,7 +285,7 @@ class Xomw_regex_html_entity {
|
||||
int numbers = 0;
|
||||
int letters = 0;
|
||||
while (cur >= src_end) {
|
||||
int b_bgn = gplx.core.intls.Utf8_.Get_pos0_of_char_bwd(src, cur);
|
||||
int b_bgn = gplx.core.intls.Utf8_.Get_prv_char_pos0_old(src, cur);
|
||||
switch (src[b_bgn]) {
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
|
||||
Reference in New Issue
Block a user