1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Mw_parse: Mass checkin of various mediawiki parse files

This commit is contained in:
gnosygnu
2017-01-25 01:27:18 -05:00
parent 6a5c114998
commit cef2d7e2f6
81 changed files with 6723 additions and 485 deletions

View File

@@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
import gplx.core.btries.*;
import gplx.core.primitives.*;
public class Php_preg_ {
public static byte[][] Split(Int_list list, byte[] src, int src_bgn, int src_end, byte[] dlm, boolean extend) {
@@ -27,7 +28,7 @@ public class Php_preg_ {
while (true) {
if (i == src_end) break;
int dlm_end = i + dlm_len;
if (dlm_end < src_end && Bry_.Eq(src, i, dlm_end, dlm)) {
if (dlm_end <= src_end && Bry_.Eq(src, i, dlm_end, dlm)) {
if (extend) {
dlm_end = Bry_find_.Find_fwd_while(src, i, src_end, dlm_nth);
}
@@ -42,13 +43,33 @@ public class Php_preg_ {
// create brys
int rv_len = list.Len() - 1;
if (rv_len == 1) return null;
if (rv_len == 1) {
list.Clear();
return null;
}
if (list.Get_at(list.Len() - 2) == src_end) { // if 2nd to last elem == src_end, then last item is Bry_.Empty; ignore it; EX: "a''" -> "a", "''" x> "a", "''", ""
rv_len--;
}
byte[][] rv = new byte[rv_len][];
for (i = 0; i < rv_len; i += 2) {
rv[i ] = Bry_.Mid(src, list.Get_at(i + 0), list.Get_at(i + 1));
if (i + 1 == rv_len) break;
rv[i + 1] = Bry_.Mid(src, list.Get_at(i + 1), list.Get_at(i + 2));
}
list.Clear();
return rv;
}
public static Object Match(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
int cur = src_bgn;
while (cur < src_end) {
byte b = src[cur];
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null)
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
else {
return o;
}
}
return null;
}
}

View File

@@ -21,11 +21,12 @@ public class Php_preg___tst {
private final Php_preg___fxt fxt = new Php_preg___fxt();
@Test public void Basic() {fxt.Test__split("a''b''c" , "''", Bool_.Y, "a", "''", "b", "''", "c");}
@Test public void Extend() {fxt.Test__split("a'''b'''c" , "''", Bool_.Y, "a", "'''", "b", "'''", "c");}
@Test public void Eos() {fxt.Test__split("a''" , "''", Bool_.Y, "a", "''");}
}
class Php_preg___fxt {
private final gplx.core.primitives.Int_list rv = new gplx.core.primitives.Int_list();
public void Test__split(String src, String dlm, boolean extend, String... expd) {Test__split(src, 0, String_.Len(src), dlm, extend, expd);}
public void Test__split(String src, int src_bgn, int src_end, String dlm, boolean extend, String... expd) {
gplx.core.primitives.Int_list rv = new gplx.core.primitives.Int_list();
byte[][] actl = Php_preg_.Split(rv, Bry_.new_u8(src), src_bgn, src_end, Bry_.new_u8(dlm), extend);
Gftest.Eq__ary(expd, String_.Ary(actl), "find_failed");
}

View File

@@ -16,7 +16,11 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
import gplx.core.btries.*;
public class Php_str_ {
public static int Strpos(byte[] src, byte find, int bgn, int end) {
return Bry_find_.Find_fwd(src, find, bgn, end);
}
public static byte[] Substr(byte[] src, int bgn) {return Substr(src, bgn, src.length);}
public static byte[] Substr(byte[] src, int bgn, int len) {
int src_len = src.length;
@@ -29,11 +33,23 @@ public class Php_str_ {
public static byte Substr_byte(byte[] src, int bgn) {return Substr_byte(src, bgn, src.length);}
public static byte Substr_byte(byte[] src, int bgn, int len) {
int src_len = src.length;
if (src_len == 0) return Byte_ascii.Null;
if (bgn < 0) bgn = src_len + bgn; // handle negative
if (bgn < 0) bgn = 0; // handle out of bounds; EX: ("a", -1, -1)
int end = len < 0 ? src_len + len : bgn + len;
if (end > src.length) end = src.length;; // handle out of bounds;
return src[bgn];
}
public static int Strspn_fwd__ary(byte[] src, boolean[] find, int bgn, int max, int src_len) {
if (max == -1) max = src_len;
int rv = 0;
for (int i = bgn; i < src_len; i++) {
if (find[src[i]] && rv < max)
rv++;
else
break;
}
return rv;
}
public static int Strspn_fwd__byte(byte[] src, byte find, int bgn, int max, int src_len) {
if (max == -1) max = src_len;
@@ -91,4 +107,31 @@ public class Php_str_ {
}
return rv;
}
public static byte[] Strtr(byte[] src, Btrie_slim_mgr trie, Bry_bfr tmp, Btrie_rv trv) {
boolean dirty = false;
int src_bgn = 0;
int src_end = src.length;
int i = src_bgn;
while (true) {
if (i == src_end) break;
byte b = src[i];
Object o = trie.Match_at_w_b0(trv, b, src, i, src_end);
if (o == null) {
if (dirty) {
tmp.Add_byte(b);
}
i++;
}
else {
if (!dirty) {
dirty = true;
tmp.Add_mid(src, 0, i);
}
tmp.Add((byte[])o);
i = trv.Pos();
}
}
return dirty ? tmp.To_bry_and_clear() : src;
}
}

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
import org.junit.*; import gplx.core.tests.*;
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*;
public class Php_str___tst {
private final Php_str___fxt fxt = new Php_str___fxt();
@Test public void Strspn_fwd__byte() {
@@ -43,6 +43,14 @@ public class Php_str___tst {
fxt.Test__substr("abcde" , -1, "e");
fxt.Test__substr("abcde" , -3, -1, "cd");
}
@Test public void Strtr() {
fxt.Init__strtr_by_trie("01", "89", "02", "79");
fxt.Test__strtr_by_trie("abc" , "abc"); // found=none
fxt.Test__strtr_by_trie("ab_01_cd" , "ab_89_cd"); // found=one
fxt.Test__strtr_by_trie("ab_01_cd_02_ef", "ab_89_cd_79_ef"); // found=many
fxt.Test__strtr_by_trie("01_ab" , "89_ab"); // BOS
fxt.Test__strtr_by_trie("ab_01" , "ab_89"); // EOS
}
}
class Php_str___fxt {
public void Test__strspn_fwd__byte(String src_str, byte find, int bgn, int max, int expd) {
@@ -63,4 +71,17 @@ class Php_str___fxt {
public void Test__substr(String src_str, int bgn, int len, String expd) {
Gftest.Eq__str(expd, Php_str_.Substr(Bry_.new_u8(src_str), bgn, len));
}
private Btrie_slim_mgr strtr_trie;
public void Init__strtr_by_trie(String... kvs) {
if (strtr_trie == null) strtr_trie = Btrie_slim_mgr.cs();
int len = kvs.length;
for (int i = 0; i < len; i += 2) {
strtr_trie.Add_str_str(kvs[i], kvs[i + 1]);
}
}
public void Test__strtr_by_trie(String src, String expd) {
Bry_bfr tmp = Bry_bfr_.New();
Btrie_rv trv = new Btrie_rv();
Gftest.Eq__str(expd, Php_str_.Strtr(Bry_.new_u8(src), strtr_trie, tmp, trv));
}
}