mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Mw_parse: Mass checkin of various mediawiki parse files
This commit is contained in:
@@ -114,4 +114,9 @@ public class Array_ {
|
||||
Set_at(trg, i, Get_at(add, i - srcLen));
|
||||
return trg;
|
||||
}
|
||||
public static Object Clone(Object src) {
|
||||
Object trg = Create(Component_type(src), Len(src));
|
||||
Copy(src, trg);
|
||||
return trg;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx;
|
||||
import java.lang.*;
|
||||
import gplx.core.brys.*; import gplx.core.primitives.*; import gplx.core.ios.*;
|
||||
import gplx.langs.htmls.entitys.*;
|
||||
public class Bry_ {
|
||||
public static final String Cls_val_name = "byte[]";
|
||||
public static final byte[] Empty = new byte[0];
|
||||
@@ -62,6 +63,7 @@ public class Bry_ {
|
||||
public static byte[] new_u8(String str) {
|
||||
try {
|
||||
int str_len = str.length();
|
||||
if (str_len == 0) return Bry_.Empty;
|
||||
int bry_len = new_u8__by_len(str, str_len);
|
||||
byte[] rv = new byte[bry_len];
|
||||
new_u8__write(str, str_len, rv, 0);
|
||||
@@ -365,7 +367,7 @@ public class Bry_ {
|
||||
|| (end < bgn)
|
||||
)
|
||||
return or;
|
||||
return Mid(src, bgn, src.length);
|
||||
return bgn == src_len ? Bry_.Empty : Mid(src, bgn, src_len);
|
||||
}
|
||||
public static byte[] Mid(byte[] src, int bgn, int end) {
|
||||
try {
|
||||
@@ -1001,6 +1003,33 @@ public class Bry_ {
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static byte[] Xcase__build__all(Bry_bfr tmp, boolean upper, byte[] src) {
|
||||
if (src == null) return null;
|
||||
int src_bgn = 0;
|
||||
int src_end = src.length;
|
||||
int lbound = 96, ubound = 123;
|
||||
if (!upper) {
|
||||
lbound = 64; ubound = 91;
|
||||
}
|
||||
|
||||
boolean dirty = false;
|
||||
for (int i = src_bgn; i < src_end; i++) {
|
||||
byte b = src[i];
|
||||
if (b > lbound && b < ubound) {
|
||||
if (!dirty) {
|
||||
dirty = true;
|
||||
tmp.Add_mid(src, src_bgn, i);
|
||||
}
|
||||
if (upper)
|
||||
b -= 32;
|
||||
else
|
||||
b += 32;
|
||||
}
|
||||
if (dirty)
|
||||
tmp.Add_byte(b);
|
||||
}
|
||||
return dirty ? tmp.To_bry_and_clear() : src;
|
||||
}
|
||||
public static byte[] Ucase__1st(byte[] src) {return Xcase__1st(Bool_.Y, src);}
|
||||
public static byte[] Lcase__1st(byte[] src) {return Xcase__1st(Bool_.N, src);}
|
||||
private static byte[] Xcase__1st(boolean upper, byte[] src) {
|
||||
@@ -1076,4 +1105,71 @@ public class Bry_ {
|
||||
public static byte[] Replace_nl_w_tab(byte[] src, int bgn, int end) {
|
||||
return Bry_.Replace(Bry_.Mid(src, bgn, end), Byte_ascii.Nl, Byte_ascii.Tab);
|
||||
}
|
||||
public static byte[] Escape_html(byte[] src) {
|
||||
return Escape_html(null, src, 0, src.length);
|
||||
}
|
||||
public static byte[] Escape_html(Bry_bfr bfr, byte[] src, int src_bgn, int src_end) { // uses PHP rules for htmlspecialchars; REF.PHP:http://php.net/manual/en/function.htmlspecialchars.php
|
||||
boolean dirty = false;
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
boolean called_by_bry = bfr == null;
|
||||
|
||||
// loop over chars
|
||||
while (true) {
|
||||
// if EOS, exit
|
||||
if (cur == src_end) {
|
||||
if (dirty) {
|
||||
bfr.Add_mid(src, prv, src_end);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// check current byte if escaped
|
||||
byte b = src[cur];
|
||||
byte[] escaped = null;
|
||||
switch (b) {
|
||||
case Byte_ascii.Amp: escaped = Gfh_entity_.Amp_bry; break;
|
||||
case Byte_ascii.Quote: escaped = Gfh_entity_.Quote_bry; break;
|
||||
case Byte_ascii.Apos: escaped = Gfh_entity_.Apos_num_bry; break;
|
||||
case Byte_ascii.Lt: escaped = Gfh_entity_.Lt_bry; break;
|
||||
case Byte_ascii.Gt: escaped = Gfh_entity_.Gt_bry; break;
|
||||
}
|
||||
|
||||
// not escaped; increment and continue
|
||||
if (escaped == null) {
|
||||
cur++;
|
||||
continue;
|
||||
}
|
||||
// escaped
|
||||
else {
|
||||
dirty = true;
|
||||
if (bfr == null) bfr = Bry_bfr_.New();
|
||||
|
||||
if (prv < cur)
|
||||
bfr.Add_mid(src, prv, cur);
|
||||
bfr.Add(escaped);
|
||||
cur++;
|
||||
prv = cur;
|
||||
}
|
||||
}
|
||||
|
||||
if (dirty) {
|
||||
if (called_by_bry)
|
||||
return bfr.To_bry_and_clear();
|
||||
else
|
||||
return null;
|
||||
}
|
||||
else {
|
||||
if (called_by_bry) {
|
||||
if (src_bgn == 0 && src_end == src.length)
|
||||
return src;
|
||||
else
|
||||
return Bry_.Mid(src, src_bgn, src_end);
|
||||
}
|
||||
else {
|
||||
bfr.Add_mid(src, src_bgn, src_end);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -279,8 +279,13 @@ public class Bry__tst {
|
||||
@Test public void Repeat_bry() {
|
||||
fxt.Test__repeat_bry("abc" , 3, "abcabcabc");
|
||||
}
|
||||
@Test public void Xcase__build__all() {
|
||||
fxt.Test__xcase__build__all(Bool_.N, "abc", "abc");
|
||||
fxt.Test__xcase__build__all(Bool_.N, "aBc", "abc");
|
||||
}
|
||||
}
|
||||
class Bry__fxt {
|
||||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
public void Test_trim_end(String raw, byte trim, String expd) {
|
||||
byte[] raw_bry = Bry_.new_a7(raw);
|
||||
Tfds.Eq(expd, String_.new_u8(Bry_.Trim_end(raw_bry, trim, raw_bry.length)));
|
||||
@@ -298,4 +303,7 @@ class Bry__fxt {
|
||||
public void Test__repeat_bry(String s, int count, String expd) {
|
||||
Gftest.Eq__str(expd, Bry_.Repeat_bry(Bry_.new_u8(s), count));
|
||||
}
|
||||
public void Test__xcase__build__all(boolean upper, String src, String expd) {
|
||||
Gftest.Eq__str(expd, Bry_.Xcase__build__all(tmp, upper, Bry_.new_u8(src)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -297,35 +297,21 @@ public class Bry_bfr {
|
||||
Add_mid(val, bgn, end);
|
||||
return this;
|
||||
}
|
||||
public Bry_bfr Add_bry_escape_html(byte[] val) {return Add_bry_escape_html(val, 0, val.length);}
|
||||
public Bry_bfr Add_bry_escape_html(byte[] val, int bgn, int end) { // uses PHP rules for htmlspecialchars; REF.PHP:http://php.net/manual/en/function.htmlspecialchars.php
|
||||
boolean clean = true;
|
||||
for (int i = bgn; i < end; ++i) {
|
||||
byte[] escaped = null;
|
||||
byte b = val[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Amp: escaped = Gfh_entity_.Amp_bry; break;
|
||||
case Byte_ascii.Quote: escaped = Gfh_entity_.Quote_bry; break;
|
||||
case Byte_ascii.Apos: escaped = Gfh_entity_.Apos_num_bry; break;
|
||||
case Byte_ascii.Lt: escaped = Gfh_entity_.Lt_bry; break;
|
||||
case Byte_ascii.Gt: escaped = Gfh_entity_.Gt_bry; break;
|
||||
}
|
||||
if (escaped == null && clean) {
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
if (clean) {
|
||||
clean = false;
|
||||
this.Add_mid(val, bgn, i);
|
||||
}
|
||||
if (escaped == null)
|
||||
this.Add_byte(b);
|
||||
else
|
||||
this.Add(escaped);
|
||||
}
|
||||
public Bry_bfr Add_bry_many(byte[]... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte[] bry = ary[i];
|
||||
if (bry != null && bry.length > 0)
|
||||
this.Add(bry);
|
||||
}
|
||||
if (clean)
|
||||
Add_mid(val, bgn, end);
|
||||
return this;
|
||||
}
|
||||
public Bry_bfr Add_bry_escape_html(byte[] val) {
|
||||
if (val == null) return this;
|
||||
return Add_bry_escape_html(val, 0, val.length);
|
||||
}
|
||||
public Bry_bfr Add_bry_escape_html(byte[] val, int bgn, int end) {
|
||||
Bry_.Escape_html(this, val, bgn, end);
|
||||
return this;
|
||||
}
|
||||
public Bry_bfr Add_str_u8_w_nl(String s) {Add_str_u8(s); return Add_byte_nl();}
|
||||
@@ -542,6 +528,30 @@ public class Bry_bfr {
|
||||
this.Del_by(count);
|
||||
return this;
|
||||
}
|
||||
public Bry_bfr Trim_end_ws() {
|
||||
if (bfr_len == 0) return this;
|
||||
int count = 0;
|
||||
for (int i = bfr_len - 1; i > -1; --i) {
|
||||
byte b = bfr[i];
|
||||
if (Trim_end_ws_ary[b])
|
||||
++count;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (count > 0)
|
||||
this.Del_by(count);
|
||||
return this;
|
||||
}
|
||||
private static final boolean[] Trim_end_ws_ary = Trim_end_ws_new();
|
||||
private static boolean[] Trim_end_ws_new() {
|
||||
boolean[] rv = new boolean[256];
|
||||
rv[32] = true;
|
||||
rv[ 9] = true;
|
||||
rv[10] = true;
|
||||
rv[13] = true;
|
||||
rv[11] = true;
|
||||
return rv;
|
||||
}
|
||||
public Bry_bfr Concat_skip_empty(byte[] dlm, byte[]... ary) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
|
||||
@@ -245,6 +245,13 @@ public class Bry_find_ {
|
||||
cur += while_len;
|
||||
}
|
||||
}
|
||||
public static int Find_fwd_while_in(byte[] src, int cur, int end, boolean[] while_ary) {
|
||||
while (cur < end) {
|
||||
if (cur == end || !while_ary[src[cur]]) return cur;
|
||||
cur++;
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static int Find_fwd_until(byte[] src, int cur, int end, byte until_byte) {
|
||||
while (true) {
|
||||
if ( cur == end
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx;
|
||||
import org.junit.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Bry_find__tst {
|
||||
private Bry_find__fxt fxt = new Bry_find__fxt();
|
||||
@Test public void Find_fwd() {
|
||||
@@ -59,6 +59,10 @@ public class Bry_find__tst {
|
||||
fxt.Test_Trim_bwd_space_tab("" , 0);
|
||||
fxt.Test_Trim_bwd_space_tab(" \t" , 0);
|
||||
}
|
||||
@Test public void Find_fwd_while_in() {
|
||||
boolean[] while_ary = fxt.Init__find_fwd_while_in(Byte_ascii.Space, Byte_ascii.Tab, Byte_ascii.Nl);
|
||||
fxt.Test__find_fwd_while_in(" \t\na", while_ary, 3);
|
||||
}
|
||||
}
|
||||
class Bry_find__fxt {
|
||||
public void Test_Find_fwd(String src, String lkp, int bgn, int expd) {Tfds.Eq(expd, Bry_find_.Find_fwd(Bry_.new_u8(src), Bry_.new_u8(lkp), bgn));}
|
||||
@@ -74,4 +78,15 @@ class Bry_find__fxt {
|
||||
int actl = Bry_find_.Trim_fwd_space_tab(raw_bry, 0, raw_bry.length);
|
||||
Tfds.Eq(expd, actl, raw_str);
|
||||
}
|
||||
public boolean[] Init__find_fwd_while_in(byte... ary) {
|
||||
boolean[] rv = new boolean[256];
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++)
|
||||
rv[ary[i]] = true;
|
||||
return rv;
|
||||
}
|
||||
public void Test__find_fwd_while_in(String src, boolean[] ary, int expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src);
|
||||
Gftest.Eq__int(expd, Bry_find_.Find_fwd_while_in(src_bry, 0, src_bry.length, ary));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ public class Bry_split_ {
|
||||
boolean reset = true;
|
||||
if (itm_bgn == -1) {
|
||||
if (pos_is_last) {} // skip dlm at bgn / end; EX: "a,"
|
||||
else {wkr.Split(src, itm_bgn, itm_end);} // else, process "empty" dlm; EX: ",a"
|
||||
else {wkr.Split(src, pos, pos );} // else, process "empty" dlm; EX: ",a"
|
||||
}
|
||||
else {
|
||||
int rv = wkr.Split(src, itm_bgn, itm_end);
|
||||
|
||||
@@ -43,6 +43,9 @@ public class Bry_split__tst {
|
||||
fxt.Test_split("a|b|c|d" , 2, 6, "|", "b", "c");
|
||||
fxt.Test_split("a|b|c|d" , 2, 4, "|", "b");
|
||||
}
|
||||
@Test public void Empty() {
|
||||
fxt.Test_split("a\n\nb" , Byte_ascii.Nl, Bool_.N, "a", "", "b");
|
||||
}
|
||||
@Test public void Split_w_max() {
|
||||
fxt.Test__split_w_max("a|b|c|d" , Byte_ascii.Pipe, 2, "a", "b"); // max is less
|
||||
fxt.Test__split_w_max("a" , Byte_ascii.Pipe, 2, "a", null); // max is more
|
||||
|
||||
@@ -109,6 +109,14 @@ public class Btrie_slim_mgr implements Btrie_mgr {
|
||||
Add_obj(Bry_.new_u8(ary[i]), bval);
|
||||
return this;
|
||||
}
|
||||
public Btrie_slim_mgr Add_many_str(String... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte[] itm = Bry_.new_u8(ary[i]);
|
||||
Add_obj(itm, itm);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public Btrie_slim_mgr Add_many_int(int val, String... ary) {return Add_many_int(val, Bry_.Ary(ary));}
|
||||
public Btrie_slim_mgr Add_many_int(int val, byte[]... ary) {
|
||||
int len = ary.length;
|
||||
|
||||
@@ -87,11 +87,31 @@ public class Hex_utl_ {
|
||||
public static void Write(byte[] bry, int bgn, int end, int val) {
|
||||
for (int i = end - 1; i > bgn - 1; i--) {
|
||||
int b = val % 16;
|
||||
bry[i] = To_byte(b);
|
||||
bry[i] = To_byte_ucase(b);
|
||||
val /= 16;
|
||||
if (val == 0) break;
|
||||
}
|
||||
}
|
||||
public static void Write_bfr(Bry_bfr bfr, boolean lcase, int val) {
|
||||
// count bytes
|
||||
int val_len = 0;
|
||||
int tmp = val;
|
||||
while (true) {
|
||||
tmp /= 16;
|
||||
val_len++;
|
||||
if (tmp == 0) break;
|
||||
}
|
||||
|
||||
// fill bytes from right to left
|
||||
int hex_bgn = bfr.Len();
|
||||
bfr.Add_byte_repeat(Byte_ascii.Null, val_len);
|
||||
byte[] bry = bfr.Bfr();
|
||||
for (int i = 0; i < val_len; i++) {
|
||||
int b = val % 16;
|
||||
bry[hex_bgn + val_len - i - 1] = lcase ? To_byte_lcase(b) : To_byte_ucase(b);
|
||||
val /= 16;
|
||||
}
|
||||
}
|
||||
public static boolean Is_hex_many(byte... ary) {
|
||||
for (byte itm : ary) {
|
||||
switch (itm) {
|
||||
@@ -123,7 +143,7 @@ public class Hex_utl_ {
|
||||
default: throw Err_.new_parse("hexstring", Int_.To_str(val));
|
||||
}
|
||||
}
|
||||
private static byte To_byte(int v) {
|
||||
private static byte To_byte_ucase(int v) {
|
||||
switch (v) {
|
||||
case 0: return Byte_ascii.Num_0; case 1: return Byte_ascii.Num_1; case 2: return Byte_ascii.Num_2; case 3: return Byte_ascii.Num_3; case 4: return Byte_ascii.Num_4;
|
||||
case 5: return Byte_ascii.Num_5; case 6: return Byte_ascii.Num_6; case 7: return Byte_ascii.Num_7; case 8: return Byte_ascii.Num_8; case 9: return Byte_ascii.Num_9;
|
||||
|
||||
@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.encoders; import gplx.*; import gplx.core.*;
|
||||
import org.junit.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Hex_utl__tst {
|
||||
private final Hex_utl__fxt fxt = new Hex_utl__fxt();
|
||||
@Test public void To_int() {
|
||||
@@ -46,6 +46,15 @@ public class Hex_utl__tst {
|
||||
fxt.Test__write("[00000000]", 1, 9, 15, "[0000000F]");
|
||||
fxt.Test__write("[00000000]", 1, 9, 255, "[000000FF]");
|
||||
}
|
||||
@Test public void Write_bfr() {
|
||||
fxt.Test__write_bfr(Bool_.Y, 0, "0");
|
||||
fxt.Test__write_bfr(Bool_.Y, 15, "f");
|
||||
fxt.Test__write_bfr(Bool_.Y, 16, "10");
|
||||
fxt.Test__write_bfr(Bool_.Y, 32, "20");
|
||||
fxt.Test__write_bfr(Bool_.Y, 255, "ff");
|
||||
fxt.Test__write_bfr(Bool_.Y, 256, "100");
|
||||
fxt.Test__write_bfr(Bool_.Y, Int_.Max_value, "7fffffff");
|
||||
}
|
||||
}
|
||||
class Hex_utl__fxt {
|
||||
public void Test__write(String s, int bgn, int end, int val, String expd) {
|
||||
@@ -63,6 +72,11 @@ class Hex_utl__fxt {
|
||||
String actl = Hex_utl_.To_str(val, pad);
|
||||
Tfds.Eq(expd, actl);
|
||||
}
|
||||
private final Bry_bfr bfr = Bry_bfr_.New();
|
||||
public void Test__write_bfr(boolean lcase, int val, String expd) {
|
||||
Hex_utl_.Write_bfr(bfr, lcase, val);
|
||||
Gftest.Eq__str(expd, bfr.To_str_and_clear());
|
||||
}
|
||||
// public void Test__encode_bry(int val, int pad, String expd) {
|
||||
// String actl = Hex_utl_.To_str(val, pad);
|
||||
// Tfds.Eq(expd, actl);
|
||||
|
||||
Reference in New Issue
Block a user