1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2025-06-13 12:54:14 +00:00
gnosygnu_xowa/400_xowa/src/gplx/xowa/mediawiki/XophpPreg.java

113 lines
3.1 KiB
Java

/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
import gplx.core.btries.*; import gplx.core.brys.*;
import gplx.core.primitives.*;
public class XophpPreg {
public static byte[][] split(Int_list list, byte[] src, int src_bgn, int src_end, byte[] dlm, boolean extend) {
// find delimiters
int dlm_len = dlm.length;
byte dlm_nth = dlm[dlm_len - 1];
int i = src_bgn;
list.Add(src_bgn);
while (true) {
if (i == src_end) break;
int dlm_end = i + dlm_len;
if (dlm_end <= src_end && Bry_.Eq(src, i, dlm_end, dlm)) {
if (extend) {
dlm_end = Bry_find_.Find_fwd_while(src, i, src_end, dlm_nth);
}
list.Add(i);
list.Add(dlm_end);
i = dlm_end;
}
else
i++;
}
list.Add(src_end);
// create brys
int rv_len = list.Len() - 1;
if (rv_len == 1) {
list.Clear();
return null;
}
if (list.Get_at(list.Len() - 2) == src_end) { // if 2nd to last elem == src_end, then last item is Bry_.Empty; ignore it; EX: "a''" -> "a", "''" x> "a", "''", ""
rv_len--;
}
byte[][] rv = new byte[rv_len][];
for (i = 0; i < rv_len; i += 2) {
rv[i ] = Bry_.Mid(src, list.Get_at(i + 0), list.Get_at(i + 1));
if (i + 1 == rv_len) break;
rv[i + 1] = Bry_.Mid(src, list.Get_at(i + 1), list.Get_at(i + 2));
}
list.Clear();
return rv;
}
public static Object match(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
trv.Match_bgn = -1;
int cur = src_bgn;
while (cur < src_end) {
byte b = src[cur];
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null)
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
else {
trv.Match_bgn = cur;
return o;
}
}
return null;
}
public static void replace(Bry_tmp bry, Bry_bfr tmp, Btrie_slim_mgr find_trie, Btrie_rv trv, byte[] repl_bry) {
byte[] src = bry.src;
int src_bgn = bry.src_bgn;
int src_end = bry.src_end;
int cur = src_bgn;
int prv = cur;
boolean dirty = false;
while (true) {
// eos
if (cur == src_end) {
if (dirty) {
tmp.Add_mid(src, prv, src_end);
}
break;
}
byte b = src[cur];
Object o = find_trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null) {
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
}
else {
dirty = true;
tmp.Add_mid(src, prv, cur);
tmp.Add(repl_bry);
cur = trv.Pos();
prv = cur;
}
}
if (dirty) {
bry.Set_by_bfr(tmp);
}
}
}