mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Mw_parse: Mass checkin of various mediawiki parse files
This commit is contained in:
@@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.core.primitives.*;
|
||||
public class Php_preg_ {
|
||||
public static byte[][] Split(Int_list list, byte[] src, int src_bgn, int src_end, byte[] dlm, boolean extend) {
|
||||
@@ -27,7 +28,7 @@ public class Php_preg_ {
|
||||
while (true) {
|
||||
if (i == src_end) break;
|
||||
int dlm_end = i + dlm_len;
|
||||
if (dlm_end < src_end && Bry_.Eq(src, i, dlm_end, dlm)) {
|
||||
if (dlm_end <= src_end && Bry_.Eq(src, i, dlm_end, dlm)) {
|
||||
if (extend) {
|
||||
dlm_end = Bry_find_.Find_fwd_while(src, i, src_end, dlm_nth);
|
||||
}
|
||||
@@ -42,13 +43,33 @@ public class Php_preg_ {
|
||||
|
||||
// create brys
|
||||
int rv_len = list.Len() - 1;
|
||||
if (rv_len == 1) return null;
|
||||
if (rv_len == 1) {
|
||||
list.Clear();
|
||||
return null;
|
||||
}
|
||||
if (list.Get_at(list.Len() - 2) == src_end) { // if 2nd to last elem == src_end, then last item is Bry_.Empty; ignore it; EX: "a''" -> "a", "''" x> "a", "''", ""
|
||||
rv_len--;
|
||||
}
|
||||
byte[][] rv = new byte[rv_len][];
|
||||
for (i = 0; i < rv_len; i += 2) {
|
||||
rv[i ] = Bry_.Mid(src, list.Get_at(i + 0), list.Get_at(i + 1));
|
||||
if (i + 1 == rv_len) break;
|
||||
rv[i + 1] = Bry_.Mid(src, list.Get_at(i + 1), list.Get_at(i + 2));
|
||||
}
|
||||
list.Clear();
|
||||
return rv;
|
||||
}
|
||||
public static Object Match(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
|
||||
int cur = src_bgn;
|
||||
while (cur < src_end) {
|
||||
byte b = src[cur];
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
|
||||
if (o == null)
|
||||
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
else {
|
||||
return o;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,11 +21,12 @@ public class Php_preg___tst {
|
||||
private final Php_preg___fxt fxt = new Php_preg___fxt();
|
||||
@Test public void Basic() {fxt.Test__split("a''b''c" , "''", Bool_.Y, "a", "''", "b", "''", "c");}
|
||||
@Test public void Extend() {fxt.Test__split("a'''b'''c" , "''", Bool_.Y, "a", "'''", "b", "'''", "c");}
|
||||
@Test public void Eos() {fxt.Test__split("a''" , "''", Bool_.Y, "a", "''");}
|
||||
}
|
||||
class Php_preg___fxt {
|
||||
private final gplx.core.primitives.Int_list rv = new gplx.core.primitives.Int_list();
|
||||
public void Test__split(String src, String dlm, boolean extend, String... expd) {Test__split(src, 0, String_.Len(src), dlm, extend, expd);}
|
||||
public void Test__split(String src, int src_bgn, int src_end, String dlm, boolean extend, String... expd) {
|
||||
gplx.core.primitives.Int_list rv = new gplx.core.primitives.Int_list();
|
||||
byte[][] actl = Php_preg_.Split(rv, Bry_.new_u8(src), src_bgn, src_end, Bry_.new_u8(dlm), extend);
|
||||
Gftest.Eq__ary(expd, String_.Ary(actl), "find_failed");
|
||||
}
|
||||
|
||||
@@ -16,7 +16,11 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Php_str_ {
|
||||
public static int Strpos(byte[] src, byte find, int bgn, int end) {
|
||||
return Bry_find_.Find_fwd(src, find, bgn, end);
|
||||
}
|
||||
public static byte[] Substr(byte[] src, int bgn) {return Substr(src, bgn, src.length);}
|
||||
public static byte[] Substr(byte[] src, int bgn, int len) {
|
||||
int src_len = src.length;
|
||||
@@ -29,11 +33,23 @@ public class Php_str_ {
|
||||
public static byte Substr_byte(byte[] src, int bgn) {return Substr_byte(src, bgn, src.length);}
|
||||
public static byte Substr_byte(byte[] src, int bgn, int len) {
|
||||
int src_len = src.length;
|
||||
if (src_len == 0) return Byte_ascii.Null;
|
||||
if (bgn < 0) bgn = src_len + bgn; // handle negative
|
||||
if (bgn < 0) bgn = 0; // handle out of bounds; EX: ("a", -1, -1)
|
||||
int end = len < 0 ? src_len + len : bgn + len;
|
||||
if (end > src.length) end = src.length;; // handle out of bounds;
|
||||
return src[bgn];
|
||||
}
|
||||
public static int Strspn_fwd__ary(byte[] src, boolean[] find, int bgn, int max, int src_len) {
|
||||
if (max == -1) max = src_len;
|
||||
int rv = 0;
|
||||
for (int i = bgn; i < src_len; i++) {
|
||||
if (find[src[i]] && rv < max)
|
||||
rv++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static int Strspn_fwd__byte(byte[] src, byte find, int bgn, int max, int src_len) {
|
||||
if (max == -1) max = src_len;
|
||||
@@ -91,4 +107,31 @@ public class Php_str_ {
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static byte[] Strtr(byte[] src, Btrie_slim_mgr trie, Bry_bfr tmp, Btrie_rv trv) {
|
||||
boolean dirty = false;
|
||||
int src_bgn = 0;
|
||||
int src_end = src.length;
|
||||
int i = src_bgn;
|
||||
|
||||
while (true) {
|
||||
if (i == src_end) break;
|
||||
byte b = src[i];
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, i, src_end);
|
||||
if (o == null) {
|
||||
if (dirty) {
|
||||
tmp.Add_byte(b);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
else {
|
||||
if (!dirty) {
|
||||
dirty = true;
|
||||
tmp.Add_mid(src, 0, i);
|
||||
}
|
||||
tmp.Add((byte[])o);
|
||||
i = trv.Pos();
|
||||
}
|
||||
}
|
||||
return dirty ? tmp.To_bry_and_clear() : src;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*;
|
||||
public class Php_str___tst {
|
||||
private final Php_str___fxt fxt = new Php_str___fxt();
|
||||
@Test public void Strspn_fwd__byte() {
|
||||
@@ -43,6 +43,14 @@ public class Php_str___tst {
|
||||
fxt.Test__substr("abcde" , -1, "e");
|
||||
fxt.Test__substr("abcde" , -3, -1, "cd");
|
||||
}
|
||||
@Test public void Strtr() {
|
||||
fxt.Init__strtr_by_trie("01", "89", "02", "79");
|
||||
fxt.Test__strtr_by_trie("abc" , "abc"); // found=none
|
||||
fxt.Test__strtr_by_trie("ab_01_cd" , "ab_89_cd"); // found=one
|
||||
fxt.Test__strtr_by_trie("ab_01_cd_02_ef", "ab_89_cd_79_ef"); // found=many
|
||||
fxt.Test__strtr_by_trie("01_ab" , "89_ab"); // BOS
|
||||
fxt.Test__strtr_by_trie("ab_01" , "ab_89"); // EOS
|
||||
}
|
||||
}
|
||||
class Php_str___fxt {
|
||||
public void Test__strspn_fwd__byte(String src_str, byte find, int bgn, int max, int expd) {
|
||||
@@ -63,4 +71,17 @@ class Php_str___fxt {
|
||||
public void Test__substr(String src_str, int bgn, int len, String expd) {
|
||||
Gftest.Eq__str(expd, Php_str_.Substr(Bry_.new_u8(src_str), bgn, len));
|
||||
}
|
||||
private Btrie_slim_mgr strtr_trie;
|
||||
public void Init__strtr_by_trie(String... kvs) {
|
||||
if (strtr_trie == null) strtr_trie = Btrie_slim_mgr.cs();
|
||||
int len = kvs.length;
|
||||
for (int i = 0; i < len; i += 2) {
|
||||
strtr_trie.Add_str_str(kvs[i], kvs[i + 1]);
|
||||
}
|
||||
}
|
||||
public void Test__strtr_by_trie(String src, String expd) {
|
||||
Bry_bfr tmp = Bry_bfr_.New();
|
||||
Btrie_rv trv = new Btrie_rv();
|
||||
Gftest.Eq__str(expd, Php_str_.Strtr(Bry_.new_u8(src), strtr_trie, tmp, trv));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user