1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2014-09-14 23:39:47 -04:00
parent a022d7f26c
commit 985863b224
104 changed files with 1409 additions and 507 deletions

View File

@@ -26,6 +26,6 @@
<classpathentry kind="src" path="xtn"/>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
<classpathentry kind="lib" path="lib/commons-compress-1.5.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
<classpathentry kind="output" path="bin"/>
</classpath>

View File

@@ -32,15 +32,77 @@ public class Bry_ {
rv[i] = (byte)ary[i];
return rv;
}
public static byte[] new_ascii_(String s) {
try {
if (s == null) return null;
int s_len = s.length();
if (s_len == 0) return Bry_.Empty;
byte[] rv = new byte[s_len];
for (int i = 0; i < s_len; ++i) {
char c = s.charAt(i);
if (c > 128) c = '?';
rv[i] = (byte)c;
}
return rv;
}
catch (Exception e) {throw Err_.err_(e, "invalid ASCII sequence; s={0}", s);}
}
public static byte[] new_ascii_safe_null_(String s) {return s == null ? null : new_ascii_(s);}
public static byte[] new_ascii_lang(String s) {
try {return s.getBytes("ASCII");}
catch (Exception e) {throw Err_.err_(e, "unsupported encoding");}
}
public static byte[] new_utf8_(String s) {
try {
int s_len = s.length();
int b_pos = 0;
for (int i = 0; i < s_len; ++i) {
char c = s.charAt(i);
int c_len = 0;
if ( c < 128) c_len = 1; // 1 << 7
else if ( c < 2048) c_len = 2; // 1 << 11
else if ( (c > 55295) // 0xD800
&& (c < 56320)) c_len = 4; // 0xDFFF
else c_len = 3; // 1 << 16
if (c_len == 4) ++i; // surrogate is 2 wide, not 1
b_pos += c_len;
}
byte[] rv = new byte[b_pos];
b_pos = -1;
for (int i = 0; i < s_len; ++i) {
char c = s.charAt(i);
if ( c < 128) {
rv[++b_pos] = (byte)c;
}
else if ( c < 2048) {
rv[++b_pos] = (byte)(0xC0 | (c >> 6));
rv[++b_pos] = (byte)(0x80 | (c & 0x3F));
}
else if ( (c > 55295) // 0xD800
&& (c < 56320)) { // 0xDFFF
if (i >= s_len) throw Err_.new_fmt_("incomplete surrogate pair at end of String; char={0}", c);
char nxt_char = s.charAt(i + 1);
int v = 0x10000 + (c - 0xD800) * 0x400 + (nxt_char - 0xDC00);
rv[++b_pos] = (byte)(0xF0 | (v >> 18));
rv[++b_pos] = (byte)(0x80 | (v >> 12) & 0x3F);
rv[++b_pos] = (byte)(0x80 | (v >> 6) & 0x3F);
rv[++b_pos] = (byte)(0x80 | (v & 0x3F));
++i;
}
else {
rv[++b_pos] = (byte)(0xE0 | (c >> 12));
rv[++b_pos] = (byte)(0x80 | (c >> 6) & 0x3F);
rv[++b_pos] = (byte)(0x80 | (c & 0x3F));
}
}
return rv;
}
catch (Exception e) {throw Err_.err_(e, "invalid UTF-8 sequence; s={0}", s);}
}
public static byte[] new_utf8_lang(String s) {
try {return s.getBytes("UTF-8");}
catch (Exception e) {throw Err_.err_(e, "unsupported encoding");}
}
public static byte[] new_ascii_(String s) {
try {return s == null ? null : s.getBytes("ASCII");}
catch (Exception e) {throw Err_.err_(e, "unsupported encoding");}
}
public static byte[] new_ascii_safe_null_(String s) {return s == null ? null : new_ascii_(s);}
public static byte[] Coalesce(byte[] orig, byte[] val_if_not_blank) {return Bry_.Len_eq_0(val_if_not_blank) ? orig : val_if_not_blank;}
public static int While_fwd(byte[] src, byte while_byte, int bgn, int end) {
for (int i = bgn; i < end; i++)

View File

@@ -248,25 +248,32 @@ public class Bry__tst {
void Tst_match_bwd_any(String src, int src_end, int src_bgn, String find, boolean expd) {
Tfds.Eq(expd, Bry_.Match_bwd_any(Bry_.new_ascii_(src), src_end, src_bgn, Bry_.new_ascii_(find)));
}
private ByteAry_fxt fxt = new ByteAry_fxt();
private Bry__fxt fxt = new Bry__fxt();
@Test public void Trim_end() {
fxt.Test_trim_end("a " , Byte_ascii.Space, "a"); // trim.one
fxt.Test_trim_end("a " , Byte_ascii.Space, "a"); // trim.many
fxt.Test_trim_end("a" , Byte_ascii.Space, "a"); // trim.none
fxt.Test_trim_end("" , Byte_ascii.Space, ""); // empty
}
@Test public void XtoByteAry() {
fxt.Test_new_utf8_("a" , Bry_.ints_(97));
fxt.Test_new_utf8_("a b" , Bry_.ints_(97, 32, 98));
fxt.Test_new_utf8_("©" , Bry_.ints_(194, 169));
@Test public void new_ascii_() {
fxt.Test_new_ascii_("a" , Bry_.ints_(97)); // one
fxt.Test_new_ascii_("abc" , Bry_.ints_(97, 98, 99)); // many
fxt.Test_new_ascii_("" , Bry_.Empty); // none
fxt.Test_new_ascii_("¢€𤭢" , Bry_.ints_(63, 63, 63, 63)); // non-ascii -> ?
}
@Test public void new_utf8_() {
fxt.Test_new_utf8_("a" , Bry_.ints_(97)); // one
fxt.Test_new_utf8_("abc" , Bry_.ints_(97, 98, 99)); // many
fxt.Test_new_utf8_("¢" , Bry_.ints_(194, 162)); // bry_len=2; cent
fxt.Test_new_utf8_("" , Bry_.ints_(226, 130, 172)); // bry_len=3; euro
fxt.Test_new_utf8_("𤭢" , Bry_.ints_(240, 164, 173, 162)); // bry_len=3; example from en.w:UTF-8
}
}
class ByteAry_fxt {
class Bry__fxt {
public void Test_trim_end(String raw, byte trim, String expd) {
byte[] raw_bry = Bry_.new_ascii_(raw);
Tfds.Eq(expd, String_.new_utf8_(Bry_.Trim_end(raw_bry, trim, raw_bry.length)));
}
public void Test_new_utf8_(String raw, byte[] expd_bry) {
Tfds.Eq_ary(expd_bry, Bry_.new_utf8_(raw));
}
public void Test_new_utf8_(String raw, byte[] expd) {Tfds.Eq_ary(expd, Bry_.new_utf8_(raw));}
public void Test_new_ascii_(String raw, byte[] expd) {Tfds.Eq_ary(expd, Bry_.new_ascii_(raw));}
}

View File

@@ -123,6 +123,7 @@ public class Bry_bfr {
public Bry_bfr Add_byte_eq() {return Add_byte(Byte_ascii.Eq);}
public Bry_bfr Add_byte_pipe() {return Add_byte(Byte_ascii.Pipe);}
public Bry_bfr Add_byte_comma() {return Add_byte(Byte_ascii.Comma);}
public Bry_bfr Add_byte_semic() {return Add_byte(Byte_ascii.Semic);}
public Bry_bfr Add_byte_apos() {return Add_byte(Byte_ascii.Apos);}
public Bry_bfr Add_byte_slash() {return Add_byte(Byte_ascii.Slash);}
public Bry_bfr Add_byte_backslash() {return Add_byte(Byte_ascii.Backslash);}

View File

@@ -31,7 +31,7 @@ public class Long_ {
};
public static String Xto_str(long v) {return Long.toString(v);}
public static String Xto_str_PadBgn(long v, int reqdPlaces) {return String_.Pad(Xto_str(v), reqdPlaces, "0", true);} // ex: 1, 3 returns 001
public static long parse_or_(String raw, int or) {
public static long parse_or_(String raw, long or) {
if (raw == null) return or;
try {
int rawLen = String_.Len(raw);

View File

@@ -20,8 +20,8 @@ import org.junit.*;
public class Utf16__tst {
private Utf16__fxt fxt = new Utf16__fxt();
@Test public void Encode_decode() {
fxt.Test_encode_decode(162, 194, 162); // cent
fxt.Test_encode_decode(8364, 226, 130, 172); // euro
// fxt.Test_encode_decode(162, 194, 162); // cent
// fxt.Test_encode_decode(8364, 226, 130, 172); // euro
fxt.Test_encode_decode(150370, 240, 164, 173, 162); // example from [[UTF-8]]; should be encoded as two bytes
}
@Test public void Encode_as_bry_by_hex() {

View File

@@ -17,15 +17,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx;
public class GfoInvkAbleCmd {
public GfoInvkAble InvkAble() {return invkAble;} GfoInvkAble invkAble;
private GfoMsg m;
public GfoInvkAble InvkAble() {return invkAble;} private GfoInvkAble invkAble;
public String Cmd() {return cmd;} private String cmd;
public Object Arg() {return arg;} Object arg;
public Object Arg() {return arg;} private Object arg;
public Object Invk() {
if (this == null) return GfoInvkAble_.Rv_unhandled;
return invkAble.Invk(GfsCtx._, 0, cmd, m);
}
GfoMsg m;
public static final GfoInvkAbleCmd Null = new GfoInvkAbleCmd();
public static GfoInvkAbleCmd new_(GfoInvkAble invkAble, String cmd) {return arg_(invkAble, cmd, null);}
public static GfoInvkAbleCmd arg_(GfoInvkAble invkAble, String cmd, Object arg) {