1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Mw_parse: Mass checkin of various mediawiki parse files

This commit is contained in:
gnosygnu
2017-01-25 01:27:18 -05:00
parent 6a5c114998
commit cef2d7e2f6
81 changed files with 6723 additions and 485 deletions

View File

@@ -114,4 +114,9 @@ public class Array_ {
Set_at(trg, i, Get_at(add, i - srcLen));
return trg;
}
public static Object Clone(Object src) {
Object trg = Create(Component_type(src), Len(src));
Copy(src, trg);
return trg;
}
}

View File

@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx;
import java.lang.*;
import gplx.core.brys.*; import gplx.core.primitives.*; import gplx.core.ios.*;
import gplx.langs.htmls.entitys.*;
public class Bry_ {
public static final String Cls_val_name = "byte[]";
public static final byte[] Empty = new byte[0];
@@ -62,6 +63,7 @@ public class Bry_ {
public static byte[] new_u8(String str) {
try {
int str_len = str.length();
if (str_len == 0) return Bry_.Empty;
int bry_len = new_u8__by_len(str, str_len);
byte[] rv = new byte[bry_len];
new_u8__write(str, str_len, rv, 0);
@@ -365,7 +367,7 @@ public class Bry_ {
|| (end < bgn)
)
return or;
return Mid(src, bgn, src.length);
return bgn == src_len ? Bry_.Empty : Mid(src, bgn, src_len);
}
public static byte[] Mid(byte[] src, int bgn, int end) {
try {
@@ -1001,6 +1003,33 @@ public class Bry_ {
}
return rv;
}
public static byte[] Xcase__build__all(Bry_bfr tmp, boolean upper, byte[] src) {
if (src == null) return null;
int src_bgn = 0;
int src_end = src.length;
int lbound = 96, ubound = 123;
if (!upper) {
lbound = 64; ubound = 91;
}
boolean dirty = false;
for (int i = src_bgn; i < src_end; i++) {
byte b = src[i];
if (b > lbound && b < ubound) {
if (!dirty) {
dirty = true;
tmp.Add_mid(src, src_bgn, i);
}
if (upper)
b -= 32;
else
b += 32;
}
if (dirty)
tmp.Add_byte(b);
}
return dirty ? tmp.To_bry_and_clear() : src;
}
public static byte[] Ucase__1st(byte[] src) {return Xcase__1st(Bool_.Y, src);}
public static byte[] Lcase__1st(byte[] src) {return Xcase__1st(Bool_.N, src);}
private static byte[] Xcase__1st(boolean upper, byte[] src) {
@@ -1076,4 +1105,71 @@ public class Bry_ {
public static byte[] Replace_nl_w_tab(byte[] src, int bgn, int end) {
return Bry_.Replace(Bry_.Mid(src, bgn, end), Byte_ascii.Nl, Byte_ascii.Tab);
}
public static byte[] Escape_html(byte[] src) {
return Escape_html(null, src, 0, src.length);
}
public static byte[] Escape_html(Bry_bfr bfr, byte[] src, int src_bgn, int src_end) { // uses PHP rules for htmlspecialchars; REF.PHP:http://php.net/manual/en/function.htmlspecialchars.php
boolean dirty = false;
int cur = src_bgn;
int prv = cur;
boolean called_by_bry = bfr == null;
// loop over chars
while (true) {
// if EOS, exit
if (cur == src_end) {
if (dirty) {
bfr.Add_mid(src, prv, src_end);
}
break;
}
// check current byte if escaped
byte b = src[cur];
byte[] escaped = null;
switch (b) {
case Byte_ascii.Amp: escaped = Gfh_entity_.Amp_bry; break;
case Byte_ascii.Quote: escaped = Gfh_entity_.Quote_bry; break;
case Byte_ascii.Apos: escaped = Gfh_entity_.Apos_num_bry; break;
case Byte_ascii.Lt: escaped = Gfh_entity_.Lt_bry; break;
case Byte_ascii.Gt: escaped = Gfh_entity_.Gt_bry; break;
}
// not escaped; increment and continue
if (escaped == null) {
cur++;
continue;
}
// escaped
else {
dirty = true;
if (bfr == null) bfr = Bry_bfr_.New();
if (prv < cur)
bfr.Add_mid(src, prv, cur);
bfr.Add(escaped);
cur++;
prv = cur;
}
}
if (dirty) {
if (called_by_bry)
return bfr.To_bry_and_clear();
else
return null;
}
else {
if (called_by_bry) {
if (src_bgn == 0 && src_end == src.length)
return src;
else
return Bry_.Mid(src, src_bgn, src_end);
}
else {
bfr.Add_mid(src, src_bgn, src_end);
return null;
}
}
}
}

View File

@@ -279,8 +279,13 @@ public class Bry__tst {
@Test public void Repeat_bry() {
fxt.Test__repeat_bry("abc" , 3, "abcabcabc");
}
@Test public void Xcase__build__all() {
fxt.Test__xcase__build__all(Bool_.N, "abc", "abc");
fxt.Test__xcase__build__all(Bool_.N, "aBc", "abc");
}
}
class Bry__fxt {
private final Bry_bfr tmp = Bry_bfr_.New();
public void Test_trim_end(String raw, byte trim, String expd) {
byte[] raw_bry = Bry_.new_a7(raw);
Tfds.Eq(expd, String_.new_u8(Bry_.Trim_end(raw_bry, trim, raw_bry.length)));
@@ -298,4 +303,7 @@ class Bry__fxt {
public void Test__repeat_bry(String s, int count, String expd) {
Gftest.Eq__str(expd, Bry_.Repeat_bry(Bry_.new_u8(s), count));
}
public void Test__xcase__build__all(boolean upper, String src, String expd) {
Gftest.Eq__str(expd, Bry_.Xcase__build__all(tmp, upper, Bry_.new_u8(src)));
}
}

View File

@@ -297,35 +297,21 @@ public class Bry_bfr {
Add_mid(val, bgn, end);
return this;
}
public Bry_bfr Add_bry_escape_html(byte[] val) {return Add_bry_escape_html(val, 0, val.length);}
public Bry_bfr Add_bry_escape_html(byte[] val, int bgn, int end) { // uses PHP rules for htmlspecialchars; REF.PHP:http://php.net/manual/en/function.htmlspecialchars.php
boolean clean = true;
for (int i = bgn; i < end; ++i) {
byte[] escaped = null;
byte b = val[i];
switch (b) {
case Byte_ascii.Amp: escaped = Gfh_entity_.Amp_bry; break;
case Byte_ascii.Quote: escaped = Gfh_entity_.Quote_bry; break;
case Byte_ascii.Apos: escaped = Gfh_entity_.Apos_num_bry; break;
case Byte_ascii.Lt: escaped = Gfh_entity_.Lt_bry; break;
case Byte_ascii.Gt: escaped = Gfh_entity_.Gt_bry; break;
}
if (escaped == null && clean) {
continue;
}
else {
if (clean) {
clean = false;
this.Add_mid(val, bgn, i);
}
if (escaped == null)
this.Add_byte(b);
else
this.Add(escaped);
}
public Bry_bfr Add_bry_many(byte[]... ary) {
int len = ary.length;
for (int i = 0; i < len; i++) {
byte[] bry = ary[i];
if (bry != null && bry.length > 0)
this.Add(bry);
}
if (clean)
Add_mid(val, bgn, end);
return this;
}
public Bry_bfr Add_bry_escape_html(byte[] val) {
if (val == null) return this;
return Add_bry_escape_html(val, 0, val.length);
}
public Bry_bfr Add_bry_escape_html(byte[] val, int bgn, int end) {
Bry_.Escape_html(this, val, bgn, end);
return this;
}
public Bry_bfr Add_str_u8_w_nl(String s) {Add_str_u8(s); return Add_byte_nl();}
@@ -542,6 +528,30 @@ public class Bry_bfr {
this.Del_by(count);
return this;
}
public Bry_bfr Trim_end_ws() {
if (bfr_len == 0) return this;
int count = 0;
for (int i = bfr_len - 1; i > -1; --i) {
byte b = bfr[i];
if (Trim_end_ws_ary[b])
++count;
else
break;
}
if (count > 0)
this.Del_by(count);
return this;
}
private static final boolean[] Trim_end_ws_ary = Trim_end_ws_new();
private static boolean[] Trim_end_ws_new() {
boolean[] rv = new boolean[256];
rv[32] = true;
rv[ 9] = true;
rv[10] = true;
rv[13] = true;
rv[11] = true;
return rv;
}
public Bry_bfr Concat_skip_empty(byte[] dlm, byte[]... ary) {
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {

View File

@@ -245,6 +245,13 @@ public class Bry_find_ {
cur += while_len;
}
}
public static int Find_fwd_while_in(byte[] src, int cur, int end, boolean[] while_ary) {
while (cur < end) {
if (cur == end || !while_ary[src[cur]]) return cur;
cur++;
}
return end;
}
public static int Find_fwd_until(byte[] src, int cur, int end, byte until_byte) {
while (true) {
if ( cur == end

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx;
import org.junit.*;
import org.junit.*; import gplx.core.tests.*;
public class Bry_find__tst {
private Bry_find__fxt fxt = new Bry_find__fxt();
@Test public void Find_fwd() {
@@ -59,6 +59,10 @@ public class Bry_find__tst {
fxt.Test_Trim_bwd_space_tab("" , 0);
fxt.Test_Trim_bwd_space_tab(" \t" , 0);
}
@Test public void Find_fwd_while_in() {
boolean[] while_ary = fxt.Init__find_fwd_while_in(Byte_ascii.Space, Byte_ascii.Tab, Byte_ascii.Nl);
fxt.Test__find_fwd_while_in(" \t\na", while_ary, 3);
}
}
class Bry_find__fxt {
public void Test_Find_fwd(String src, String lkp, int bgn, int expd) {Tfds.Eq(expd, Bry_find_.Find_fwd(Bry_.new_u8(src), Bry_.new_u8(lkp), bgn));}
@@ -74,4 +78,15 @@ class Bry_find__fxt {
int actl = Bry_find_.Trim_fwd_space_tab(raw_bry, 0, raw_bry.length);
Tfds.Eq(expd, actl, raw_str);
}
public boolean[] Init__find_fwd_while_in(byte... ary) {
boolean[] rv = new boolean[256];
int len = ary.length;
for (int i = 0; i < len; i++)
rv[ary[i]] = true;
return rv;
}
public void Test__find_fwd_while_in(String src, boolean[] ary, int expd) {
byte[] src_bry = Bry_.new_u8(src);
Gftest.Eq__int(expd, Bry_find_.Find_fwd_while_in(src_bry, 0, src_bry.length, ary));
}
}

View File

@@ -48,7 +48,7 @@ public class Bry_split_ {
boolean reset = true;
if (itm_bgn == -1) {
if (pos_is_last) {} // skip dlm at bgn / end; EX: "a,"
else {wkr.Split(src, itm_bgn, itm_end);} // else, process "empty" dlm; EX: ",a"
else {wkr.Split(src, pos, pos );} // else, process "empty" dlm; EX: ",a"
}
else {
int rv = wkr.Split(src, itm_bgn, itm_end);

View File

@@ -43,6 +43,9 @@ public class Bry_split__tst {
fxt.Test_split("a|b|c|d" , 2, 6, "|", "b", "c");
fxt.Test_split("a|b|c|d" , 2, 4, "|", "b");
}
@Test public void Empty() {
fxt.Test_split("a\n\nb" , Byte_ascii.Nl, Bool_.N, "a", "", "b");
}
@Test public void Split_w_max() {
fxt.Test__split_w_max("a|b|c|d" , Byte_ascii.Pipe, 2, "a", "b"); // max is less
fxt.Test__split_w_max("a" , Byte_ascii.Pipe, 2, "a", null); // max is more

View File

@@ -109,6 +109,14 @@ public class Btrie_slim_mgr implements Btrie_mgr {
Add_obj(Bry_.new_u8(ary[i]), bval);
return this;
}
public Btrie_slim_mgr Add_many_str(String... ary) {
int len = ary.length;
for (int i = 0; i < len; i++) {
byte[] itm = Bry_.new_u8(ary[i]);
Add_obj(itm, itm);
}
return this;
}
public Btrie_slim_mgr Add_many_int(int val, String... ary) {return Add_many_int(val, Bry_.Ary(ary));}
public Btrie_slim_mgr Add_many_int(int val, byte[]... ary) {
int len = ary.length;

View File

@@ -87,11 +87,31 @@ public class Hex_utl_ {
public static void Write(byte[] bry, int bgn, int end, int val) {
for (int i = end - 1; i > bgn - 1; i--) {
int b = val % 16;
bry[i] = To_byte(b);
bry[i] = To_byte_ucase(b);
val /= 16;
if (val == 0) break;
}
}
public static void Write_bfr(Bry_bfr bfr, boolean lcase, int val) {
// count bytes
int val_len = 0;
int tmp = val;
while (true) {
tmp /= 16;
val_len++;
if (tmp == 0) break;
}
// fill bytes from right to left
int hex_bgn = bfr.Len();
bfr.Add_byte_repeat(Byte_ascii.Null, val_len);
byte[] bry = bfr.Bfr();
for (int i = 0; i < val_len; i++) {
int b = val % 16;
bry[hex_bgn + val_len - i - 1] = lcase ? To_byte_lcase(b) : To_byte_ucase(b);
val /= 16;
}
}
public static boolean Is_hex_many(byte... ary) {
for (byte itm : ary) {
switch (itm) {
@@ -123,7 +143,7 @@ public class Hex_utl_ {
default: throw Err_.new_parse("hexstring", Int_.To_str(val));
}
}
private static byte To_byte(int v) {
private static byte To_byte_ucase(int v) {
switch (v) {
case 0: return Byte_ascii.Num_0; case 1: return Byte_ascii.Num_1; case 2: return Byte_ascii.Num_2; case 3: return Byte_ascii.Num_3; case 4: return Byte_ascii.Num_4;
case 5: return Byte_ascii.Num_5; case 6: return Byte_ascii.Num_6; case 7: return Byte_ascii.Num_7; case 8: return Byte_ascii.Num_8; case 9: return Byte_ascii.Num_9;

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.encoders; import gplx.*; import gplx.core.*;
import org.junit.*;
import org.junit.*; import gplx.core.tests.*;
public class Hex_utl__tst {
private final Hex_utl__fxt fxt = new Hex_utl__fxt();
@Test public void To_int() {
@@ -46,6 +46,15 @@ public class Hex_utl__tst {
fxt.Test__write("[00000000]", 1, 9, 15, "[0000000F]");
fxt.Test__write("[00000000]", 1, 9, 255, "[000000FF]");
}
@Test public void Write_bfr() {
fxt.Test__write_bfr(Bool_.Y, 0, "0");
fxt.Test__write_bfr(Bool_.Y, 15, "f");
fxt.Test__write_bfr(Bool_.Y, 16, "10");
fxt.Test__write_bfr(Bool_.Y, 32, "20");
fxt.Test__write_bfr(Bool_.Y, 255, "ff");
fxt.Test__write_bfr(Bool_.Y, 256, "100");
fxt.Test__write_bfr(Bool_.Y, Int_.Max_value, "7fffffff");
}
}
class Hex_utl__fxt {
public void Test__write(String s, int bgn, int end, int val, String expd) {
@@ -63,6 +72,11 @@ class Hex_utl__fxt {
String actl = Hex_utl_.To_str(val, pad);
Tfds.Eq(expd, actl);
}
private final Bry_bfr bfr = Bry_bfr_.New();
public void Test__write_bfr(boolean lcase, int val, String expd) {
Hex_utl_.Write_bfr(bfr, lcase, val);
Gftest.Eq__str(expd, bfr.To_str_and_clear());
}
// public void Test__encode_bry(int val, int pad, String expd) {
// String actl = Hex_utl_.To_str(val, pad);
// Tfds.Eq(expd, actl);