mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Language: Escape left-to-right / right-to-left marks in names.json ('\xE2\x80\x8E' to '\u200E') [#501]
This commit is contained in:
parent
d4a28d3ffe
commit
5d886501e8
@ -17,7 +17,7 @@ package gplx;
|
||||
public class Byte_ascii {
|
||||
public static final byte
|
||||
Null = 0 , Backfeed = 8, Tab = 9
|
||||
, Nl = 10, Formfeed = 12, Cr = 13
|
||||
, Nl = 10, Vertical_tab = 11, Formfeed = 12, Cr = 13
|
||||
, Escape = 27
|
||||
, Space = 32, Bang = 33, Quote = 34
|
||||
, Hash = 35, Dollar = 36, Percent = 37, Amp = 38, Apos = 39
|
||||
|
@ -128,18 +128,22 @@ public class Hex_utl_ {
|
||||
}
|
||||
public static boolean Is_hex_many(byte... ary) {
|
||||
for (byte itm : ary) {
|
||||
if (!Is_hex(itm))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
public static boolean Is_hex(byte itm) {
|
||||
switch (itm) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E: case Byte_ascii.Ltr_F:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e: case Byte_ascii.Ltr_f:
|
||||
break;
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
private static int To_int(char c) {
|
||||
switch (c) {
|
||||
case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4;
|
||||
|
34
100_core/src/gplx/core/encoders/Oct_utl_.java
Normal file
34
100_core/src/gplx/core/encoders/Oct_utl_.java
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.core.encoders; import gplx.*; import gplx.core.*;
|
||||
public class Oct_utl_ {
|
||||
public static int Parse_or(byte[] src, int or) {return Parse_or(src, 0, src.length, or);}
|
||||
public static int Parse_or(byte[] src, int bgn, int end, int or) {
|
||||
int rv = 0; int factor = 1;
|
||||
byte b = Byte_.Max_value_127;
|
||||
for (int i = end - 1; i >= bgn; i--) {
|
||||
switch (src[i]) {
|
||||
case Byte_ascii.Num_0: b = 0; break; case Byte_ascii.Num_1: b = 1; break; case Byte_ascii.Num_2: b = 2; break; case Byte_ascii.Num_3: b = 3; break; case Byte_ascii.Num_4: b = 4; break;
|
||||
case Byte_ascii.Num_5: b = 5; break; case Byte_ascii.Num_6: b = 6; break; case Byte_ascii.Num_7: b = 7; break;
|
||||
default: b = Byte_.Max_value_127; break;
|
||||
}
|
||||
if (b == Byte_.Max_value_127) return or;
|
||||
rv += b * factor;
|
||||
factor *= 8;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
@ -14,9 +14,12 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
import gplx.objects.strings.unicodes.*;
|
||||
import gplx.core.encoders.*;
|
||||
public class Json_doc_wtr {
|
||||
private int indent = -2;
|
||||
private Bry_bfr bfr = Bry_bfr_.Reset(255);
|
||||
public void Opt_unicode_y_() {opt_unicode = true;} private boolean opt_unicode;
|
||||
public Json_doc_wtr Indent() {return Indent(indent);}
|
||||
private Json_doc_wtr Indent(int v) {if (v > 0) bfr.Add_byte_repeat(Byte_ascii.Space, v); return this;}
|
||||
public Json_doc_wtr Indent_add() {indent += 2; return this;}
|
||||
@ -31,11 +34,59 @@ public class Json_doc_wtr {
|
||||
bfr.Add(Object_.Bry__null);
|
||||
else {
|
||||
bfr.Add_byte(Byte_ascii.Quote);
|
||||
if (opt_unicode) {
|
||||
Ustring ustr = Ustring_.New_codepoints(String_.new_u8(v));
|
||||
int ustr_len = ustr.Len_in_data();
|
||||
for (int i = 0; i < ustr_len; i++) {
|
||||
int cp = ustr.Get_data(i);
|
||||
Write_str_codepoint(bfr, cp);
|
||||
}
|
||||
}
|
||||
else {
|
||||
bfr.Add_bry_escape(Byte_ascii.Quote, Escaped__quote, v, 0, v.length);
|
||||
}
|
||||
bfr.Add_byte(Byte_ascii.Quote);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
private void Write_str_codepoint(Bry_bfr bfr, int val) {
|
||||
switch (val) { // REF: https://www.json.org/
|
||||
case Byte_ascii.Quote:
|
||||
bfr.Add_byte_backslash().Add_byte(Byte_ascii.Quote);
|
||||
break;
|
||||
case Byte_ascii.Backslash:
|
||||
bfr.Add_byte_backslash().Add_byte(Byte_ascii.Backslash);
|
||||
break;
|
||||
case Byte_ascii.Backfeed:
|
||||
bfr.Add_byte_backslash().Add_byte(Byte_ascii.Ltr_b);
|
||||
break;
|
||||
case Byte_ascii.Formfeed:
|
||||
bfr.Add_byte_backslash().Add_byte(Byte_ascii.Ltr_f);
|
||||
break;
|
||||
case Byte_ascii.Nl:
|
||||
bfr.Add_byte_backslash().Add_byte(Byte_ascii.Ltr_n);
|
||||
break;
|
||||
case Byte_ascii.Cr:
|
||||
bfr.Add_byte_backslash().Add_byte(Byte_ascii.Ltr_r);
|
||||
break;
|
||||
case Byte_ascii.Tab:
|
||||
bfr.Add_byte_backslash().Add_byte(Byte_ascii.Ltr_t);
|
||||
break;
|
||||
default:
|
||||
if ( val < Byte_ascii.Space // control characters
|
||||
|| val == 160 // nbsp
|
||||
|| val == 8206 // left to right
|
||||
|| val == 8207 // right to left
|
||||
) {
|
||||
// convert to \u1234
|
||||
bfr.Add_byte_backslash().Add_byte(Byte_ascii.Ltr_u).Add_str_a7(Hex_utl_.To_str(val, 4));
|
||||
}
|
||||
else {
|
||||
bfr.Add_u8_int(val);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
public Json_doc_wtr Int(int v) {bfr.Add_int_variable(v); return this;}
|
||||
public Json_doc_wtr Double(double v) {bfr.Add_double(v); return this;}
|
||||
public Json_doc_wtr Comma() {Indent(); bfr.Add_byte(Byte_ascii.Comma).Add_byte_nl(); return this;}
|
||||
|
@ -25,6 +25,19 @@ public class Json_doc_wtr_tst {
|
||||
, " 'k1':'v\\\"1'"
|
||||
, "}"));
|
||||
}
|
||||
@Test public void Quotes() {
|
||||
fxt.Test__string__quotes("a\"z" , "a\\\"z");
|
||||
fxt.Test__string__quotes("a\u0008z" , "a\\bz");
|
||||
fxt.Test__string__quotes("a\fz" , "a\\fz");
|
||||
fxt.Test__string__quotes("a\nz" , "a\\nz");
|
||||
fxt.Test__string__quotes("a\rz" , "a\\rz");
|
||||
fxt.Test__string__quotes("a\tz" , "a\\tz");
|
||||
fxt.Test__string__quotes("aēz" , "aēz");
|
||||
fxt.Test__string__quotes("az" , "a\\u000Fz");
|
||||
fxt.Test__string__quotes("a z" , "a\\u00A0z");
|
||||
fxt.Test__string__quotes("az" , "a\\u200Ez");
|
||||
fxt.Test__string__quotes("az" , "a\\u200Fz");
|
||||
}
|
||||
}
|
||||
class Json_doc_wtr_fxt {
|
||||
public Json_doc_wtr Exec__Kv_simple(String key, String val) {
|
||||
@ -40,4 +53,12 @@ class Json_doc_wtr_fxt {
|
||||
public String Exec__Concat_apos(String... ary) {
|
||||
return Json_doc.Make_str_by_apos(ary);
|
||||
}
|
||||
public void Test__string__quotes(String raw, String expd) {
|
||||
Json_doc_wtr doc_wtr = new Json_doc_wtr();
|
||||
doc_wtr.Opt_unicode_y_();
|
||||
doc_wtr.Str(Bry_.new_u8(raw));
|
||||
String actl = doc_wtr.Bld_as_str();
|
||||
actl = String_.Mid(actl, 1, String_.Len(actl) - 1);
|
||||
Gftest.Eq__str(expd, actl);
|
||||
}
|
||||
}
|
||||
|
@ -25,7 +25,11 @@ public class Php_evaluator implements Php_tkn_wkr {
|
||||
private byte mode = Mode_key_bgn, next_tid = 0, next_mode = 0;
|
||||
private Php_line_assign cur_line; private Php_itm_ary cur_ary; private Php_key cur_kv_key;
|
||||
private final List_adp frame_stack = List_adp_.New();
|
||||
public Php_evaluator(Gfo_msg_log msg_log) {this.msg_log = msg_log;} private Gfo_msg_log msg_log;
|
||||
private final Php_quote_parser quote_parser = new Php_quote_parser();
|
||||
private final Gfo_msg_log msg_log;
|
||||
public Php_evaluator(Gfo_msg_log msg_log) {
|
||||
this.msg_log = msg_log;
|
||||
}
|
||||
public void Init(Php_ctx ctx) {src = ctx.Src(); frame_stack.Clear();} private byte[] src;
|
||||
public List_adp List() {return lines;} private final List_adp lines = List_adp_.New();
|
||||
public Gfo_msg_log Msg_log() {return msg_log;}
|
||||
@ -101,7 +105,7 @@ public class Php_evaluator implements Php_tkn_wkr {
|
||||
switch (tkn_tid) {
|
||||
case Php_tkn_.Tid_quote:
|
||||
Php_tkn_quote tkn_quote = (Php_tkn_quote)tkn;
|
||||
Php_itm_quote key_sub = new Php_itm_quote(tkn_quote.Quote_text(src));
|
||||
Php_itm_quote key_sub = new Php_itm_quote(tkn_quote.Quote_text(quote_parser, src));
|
||||
cur_line.Key_subs_(new Php_key[] {key_sub});
|
||||
mode = Mode_key_end;
|
||||
break;
|
||||
@ -121,7 +125,7 @@ public class Php_evaluator implements Php_tkn_wkr {
|
||||
case Php_tkn_.Tid_quote:
|
||||
Expect(Php_tkn_.Tid_semic, Mode_key_bgn);
|
||||
Php_tkn_quote tkn_quote = (Php_tkn_quote)tkn;
|
||||
line_val = new Php_itm_quote(tkn_quote.Quote_text(src));
|
||||
line_val = new Php_itm_quote(tkn_quote.Quote_text(quote_parser, src));
|
||||
break;
|
||||
case Php_tkn_.Tid_ary:
|
||||
case Php_tkn_.Tid_brack_bgn:
|
||||
@ -161,7 +165,7 @@ public class Php_evaluator implements Php_tkn_wkr {
|
||||
case Php_tkn_.Tid_true: Ary_add_itm(Php_itm_bool_true.Instance); break;
|
||||
case Php_tkn_.Tid_quote:
|
||||
Php_tkn_quote tkn_quote = (Php_tkn_quote)tkn;
|
||||
Ary_add_itm(new Php_itm_quote(tkn_quote.Quote_text(src)));
|
||||
Ary_add_itm(new Php_itm_quote(tkn_quote.Quote_text(quote_parser, src)));
|
||||
break;
|
||||
case Php_tkn_.Tid_num:
|
||||
Php_tkn_num tkn_num = (Php_tkn_num)tkn;
|
||||
|
@ -67,6 +67,15 @@ class Php_parser_fxt {
|
||||
tst_mgr.Tst_ary("", expd, actl);
|
||||
log_mgr_chkr.tst(tst_mgr, line_wkr.Msg_log());
|
||||
}
|
||||
public void Test__string__quotes(String raw, String expd) {
|
||||
line_wkr.Clear();
|
||||
byte[] raw_bry = Bry_.new_u8("$var =\"" + raw +"\";");
|
||||
parser.Parse_tkns(raw_bry, line_wkr);
|
||||
Php_line[] actl_lines = (Php_line[])line_wkr.List().To_ary(Php_line.class);
|
||||
Php_line_assign actl_line = (Php_line_assign)actl_lines[0];
|
||||
Php_itm_quote actl = (Php_itm_quote)actl_line.Val();
|
||||
Tfds.Eq_str(expd, String_.new_u8(actl.Val_obj_bry()));
|
||||
}
|
||||
}
|
||||
abstract class Php_tkn_chkr_base implements Tst_chkr {
|
||||
public abstract byte Tkn_tid();
|
||||
|
@ -48,10 +48,10 @@ public class Php_parser_tst {
|
||||
@Test public void Ary_flat() {fxt.tst_lines("$a = array('b', 'c', 'd');" , fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b"), fxt.itm_quote("c"), fxt.itm_quote("d"))));}
|
||||
@Test public void Brack_flat() {fxt.tst_lines("$a = ['b', 'c', 'd'];" , fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b"), fxt.itm_quote("c"), fxt.itm_quote("d"))));}
|
||||
@Test public void Ary_flat_escape() { // PURPOSE.fix: \\' was being interpreted incorrectly; \\ should escape \, but somehow \' was being escaped
|
||||
fxt.tst_lines("$a = array('b\\\\', 'c');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b\\\\"), fxt.itm_quote("c"))));
|
||||
fxt.tst_lines("$a = array('b\\\\', 'c');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b\\"), fxt.itm_quote("c"))));
|
||||
}
|
||||
@Test public void Ary_flat_escape2() { // PURPOSE.fix: \\' was being interpreted incorrectly; \\ should escape \, but somehow \' was being escaped
|
||||
fxt.tst_lines("$a = array('b\\\\\\'c', 'd');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b\\\\\\'c"), fxt.itm_quote("d"))));
|
||||
fxt.tst_lines("$a = array('b\\\\\\'c', 'd');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b\\'c"), fxt.itm_quote("d"))));
|
||||
}
|
||||
@Test public void Ary_kv() {fxt.tst_lines("$a = array(k0 => 'v0', k1 => 'v1', k2 => 'v2');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_kv_quote("k0", "v0"), fxt.itm_kv_quote("k1", "v1"), fxt.itm_kv_quote("k2", "v2"))));}
|
||||
@Test public void Brack_kv() {fxt.tst_lines("$a = [k0 => 'v0', k1 => 'v1', k2 => 'v2'];" , fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_kv_quote("k0", "v0"), fxt.itm_kv_quote("k1", "v1"), fxt.itm_kv_quote("k2", "v2"))));}
|
||||
@ -79,4 +79,26 @@ public class Php_parser_tst {
|
||||
, fxt.itm_kv_itm("i20", fxt.itm_ary().Subs_(fxt.itm_quote("21"), fxt.itm_quote("22")))
|
||||
)));
|
||||
}
|
||||
@Test public void Quoted() {
|
||||
fxt.Test__string__quotes("a\\\"z" , "a\"z");
|
||||
fxt.Test__string__quotes("a\\\\z" , "a\\z");
|
||||
fxt.Test__string__quotes("a\\u0008z" , "a\bz");
|
||||
fxt.Test__string__quotes("a\\fz" , "a\fz");
|
||||
fxt.Test__string__quotes("a\\nz" , "a\nz");
|
||||
fxt.Test__string__quotes("a\\rz" , "a\rz");
|
||||
fxt.Test__string__quotes("a\\tz" , "a\tz");
|
||||
fxt.Test__string__quotes("a\\vz" , "a\u000bz");
|
||||
fxt.Test__string__quotes("a\\ez" , "a\u001bz");
|
||||
fxt.Test__string__quotes("a\\$z" , "a$z");
|
||||
fxt.Test__string__quotes("a\\7z" , "a\u0007z");
|
||||
fxt.Test__string__quotes("a\\41z" , "a!z");
|
||||
fxt.Test__string__quotes("a\\111z" , "aIz");
|
||||
fxt.Test__string__quotes("a\\x9z" , "a\tz");
|
||||
fxt.Test__string__quotes("a\\x21z" , "a!z");
|
||||
fxt.Test__string__quotes("a\\xE2\\x80\\x8Ez" , "a\u200Ez");
|
||||
fxt.Test__string__quotes("a\\u9z" , "a\tz");
|
||||
fxt.Test__string__quotes("a\\u21z" , "a!z");
|
||||
fxt.Test__string__quotes("a\\u113z" , "aēz");
|
||||
fxt.Test__string__quotes("a\\u{0008}z" , "a\bz");
|
||||
}
|
||||
}
|
||||
|
132
400_xowa/src/gplx/langs/phps/Php_quote_parser.java
Normal file
132
400_xowa/src/gplx/langs/phps/Php_quote_parser.java
Normal file
@ -0,0 +1,132 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.encoders.*;
|
||||
class Php_quote_parser { // REF: https://www.php.net/manual/en/language.types.String.php
|
||||
private final Bry_bfr bfr = Bry_bfr_.New();
|
||||
public byte[] Parse(byte[] src, int src_pos, int src_end) {
|
||||
try {
|
||||
while (src_pos < src_end) {
|
||||
int val = 0;
|
||||
byte b = src[src_pos++];
|
||||
if (b == Byte_ascii.Backslash) {
|
||||
b = src[src_pos++];
|
||||
switch(b) {
|
||||
case Byte_ascii.Ltr_n:
|
||||
val = Byte_ascii.Nl;
|
||||
break;
|
||||
case Byte_ascii.Ltr_r:
|
||||
val = Byte_ascii.Cr;
|
||||
break;
|
||||
case Byte_ascii.Ltr_t:
|
||||
val = Byte_ascii.Tab;
|
||||
break;
|
||||
case Byte_ascii.Ltr_v:
|
||||
val = Byte_ascii.Vertical_tab;
|
||||
break;
|
||||
case Byte_ascii.Ltr_e:
|
||||
val = Byte_ascii.Escape;
|
||||
break;
|
||||
case Byte_ascii.Ltr_f:
|
||||
val = Byte_ascii.Formfeed;
|
||||
break;
|
||||
case Byte_ascii.Dollar:
|
||||
case Byte_ascii.Backslash:
|
||||
case Byte_ascii.Quote:
|
||||
val = b;
|
||||
break;
|
||||
// octal
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9: {
|
||||
int num_bgn = src_pos - 1; // - 1 b/c pos++ above
|
||||
int num_end = src_pos;
|
||||
for (int i = 0; i < 3; i++) {// per REF, octal is {1,3}
|
||||
byte n = src[src_pos];
|
||||
num_end = src_pos;
|
||||
if (Byte_ascii.Is_num(n)) {
|
||||
++src_pos;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
val = Oct_utl_.Parse_or(src, num_bgn, num_end, -1);
|
||||
break;
|
||||
}
|
||||
// hexdec
|
||||
case Byte_ascii.Ltr_x: {
|
||||
// REF: changed from \xFF to \u1234; https://github.com/wikimedia/mediawiki/commit/0313128b1038de8f2ee52a181eafdee8c5e430f7#diff-1b04277d170b32db7f92ce812744ef6b
|
||||
int num_bgn = src_pos;
|
||||
int num_end = src_pos++;
|
||||
for (int i = 0; i < 2; i++) { // per REF, hex is {1,2}
|
||||
byte n = src[src_pos];
|
||||
num_end = src_pos;
|
||||
if (Hex_utl_.Is_hex(n)) {
|
||||
++src_pos;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
val = Hex_utl_.Parse_or(src, num_bgn, num_end, -1);
|
||||
break;
|
||||
}
|
||||
// unicode
|
||||
case Byte_ascii.Ltr_u: {
|
||||
if (src[src_pos] == Byte_ascii.Curly_bgn) { // ignore braces in u{1234}
|
||||
src_pos++;
|
||||
}
|
||||
|
||||
int num_bgn = src_pos;
|
||||
int num_end = src_pos;
|
||||
for (int i = 0; i < 8; i++) { // assume max of 8 hexdecimals
|
||||
byte n = src[src_pos];
|
||||
num_end = src_pos;
|
||||
if (Byte_ascii.Is_num(n)) {
|
||||
++src_pos;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (src[src_pos] == Byte_ascii.Curly_end) { // ignore braces in u{1234}
|
||||
++src_pos;
|
||||
}
|
||||
|
||||
val = Hex_utl_.Parse_or(src, num_bgn, num_end, -1);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
val = b;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
val = b;
|
||||
}
|
||||
if (val < 255)
|
||||
bfr.Add_byte((byte)val);
|
||||
else
|
||||
bfr.Add_u8_int(val);
|
||||
}
|
||||
return bfr.To_bry_and_clear();
|
||||
} catch (Exception e) {
|
||||
throw Err_.new_exc(e, "Ustring_parser", "unable to parse ustring", "src", Bry_.Mid(src, src_pos, src_end));
|
||||
}
|
||||
}
|
||||
}
|
@ -14,6 +14,7 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.encoders.*;
|
||||
public abstract class Php_tkn_base implements Php_tkn {
|
||||
public abstract byte Tkn_tid();
|
||||
public int Src_bgn() {return src_bgn;} private int src_bgn;
|
||||
@ -48,7 +49,9 @@ class Php_tkn_quote extends Php_tkn_base {
|
||||
public Php_tkn_quote(int src_bgn, int src_end, byte quote_tid) {this.Src_rng_(src_bgn, src_end); this.quote_tid = quote_tid;}
|
||||
@Override public byte Tkn_tid() {return Php_tkn_.Tid_quote;}
|
||||
public byte Quote_tid() {return quote_tid;} private byte quote_tid;
|
||||
public byte[] Quote_text(byte[] src) {return Bry_.Mid(src, this.Src_bgn() + 1, this.Src_end() - 1);} // NOTE: assume quote are of form 'abc'; +1, -1 to skip flanking chars
|
||||
public byte[] Quote_text(Php_quote_parser quote_parser, byte[] src) {
|
||||
return quote_parser.Parse(src, this.Src_bgn() + 1, this.Src_end() - 1);
|
||||
}
|
||||
public static final byte Tid_null = 0, Tid_mult = 1, Tid_slash = 2, Tid_hash = 3;
|
||||
}
|
||||
class Php_tkn_declaration extends Php_tkn_base {
|
||||
|
@ -73,6 +73,7 @@ class Language_names_converter {
|
||||
}
|
||||
public String To_json(Language_name[] ary) {
|
||||
Json_doc_wtr doc_wtr = new Json_doc_wtr();
|
||||
doc_wtr.Opt_unicode_y_();
|
||||
doc_wtr.Ary_bgn();
|
||||
int len = ary.length;
|
||||
byte[] key_code = Bry_.new_a7("code");
|
||||
|
@ -241,13 +241,13 @@
|
||||
,
|
||||
{
|
||||
"code":"be-tarask"
|
||||
, "name":"беларуская (тарашкевіца)\xE2\x80\x8E"
|
||||
, "name":"беларуская (тарашкевіца)\u200E"
|
||||
, "note":"Belarusian in Taraskievica orthography"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"be-x-old"
|
||||
, "name":"беларуская (тарашкевіца)\xE2\x80\x8E"
|
||||
, "name":"беларуская (тарашкевіца)\u200E"
|
||||
, "note":"(be-tarask compat)"
|
||||
}
|
||||
,
|
||||
@ -439,13 +439,13 @@
|
||||
,
|
||||
{
|
||||
"code":"crh-latn"
|
||||
, "name":"qırımtatarca (Latin)\xE2\x80\x8E"
|
||||
, "name":"qırımtatarca (Latin)\u200E"
|
||||
, "note":"Crimean Tatar (Latin)"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"crh-cyrl"
|
||||
, "name":"къырымтатарджа (Кирилл)\xE2\x80\x8E"
|
||||
, "name":"къырымтатарджа (Кирилл)\u200E"
|
||||
, "note":"Crimean Tatar (Cyrillic)"
|
||||
}
|
||||
,
|
||||
@ -505,7 +505,7 @@
|
||||
,
|
||||
{
|
||||
"code":"de-formal"
|
||||
, "name":"Deutsch (Sie-Form)\xE2\x80\x8E"
|
||||
, "name":"Deutsch (Sie-Form)\u200E"
|
||||
, "note":"German - formal address (\"Sie\")"
|
||||
}
|
||||
,
|
||||
@ -715,13 +715,13 @@
|
||||
,
|
||||
{
|
||||
"code":"gan-hans"
|
||||
, "name":"赣语(简体)\xE2\x80\x8E"
|
||||
, "name":"赣语(简体)\u200E"
|
||||
, "note":"Gan (Simplified Han)"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"gan-hant"
|
||||
, "name":"贛語(繁體)\xE2\x80\x8E"
|
||||
, "name":"贛語(繁體)\u200E"
|
||||
, "note":"Gan (Traditional Han)"
|
||||
}
|
||||
,
|
||||
@ -1081,37 +1081,37 @@
|
||||
,
|
||||
{
|
||||
"code":"kk-arab"
|
||||
, "name":"قازاقشا (تٴوتە)\xE2\x80\x8F"
|
||||
, "name":"قازاقشا (تٴوتە)\u200F"
|
||||
, "note":"Kazakh Arabic"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"kk-cyrl"
|
||||
, "name":"қазақша (кирил)\xE2\x80\x8E"
|
||||
, "name":"қазақша (кирил)\u200E"
|
||||
, "note":"Kazakh Cyrillic"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"kk-latn"
|
||||
, "name":"qazaqşa (latın)\xE2\x80\x8E"
|
||||
, "name":"qazaqşa (latın)\u200E"
|
||||
, "note":"Kazakh Latin"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"kk-cn"
|
||||
, "name":"قازاقشا (جۇنگو)\xE2\x80\x8F"
|
||||
, "name":"قازاقشا (جۇنگو)\u200F"
|
||||
, "note":"Kazakh (China)"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"kk-kz"
|
||||
, "name":"қазақша (Қазақстан)\xE2\x80\x8E"
|
||||
, "name":"қазақша (Қазақстан)\u200E"
|
||||
, "note":"Kazakh (Kazakhstan)"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"kk-tr"
|
||||
, "name":"qazaqşa (Türkïya)\xE2\x80\x8E"
|
||||
, "name":"qazaqşa (Türkïya)\u200E"
|
||||
, "note":"Kazakh (Turkey)"
|
||||
}
|
||||
,
|
||||
@ -1213,13 +1213,13 @@
|
||||
,
|
||||
{
|
||||
"code":"ku-latn"
|
||||
, "name":"Kurdî (latînî)\xE2\x80\x8E"
|
||||
, "name":"Kurdî (latînî)\u200E"
|
||||
, "note":"Northern Kurdish (Latin script)"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"ku-arab"
|
||||
, "name":"كوردي (عەرەبی)\xE2\x80\x8F"
|
||||
, "name":"كوردي (عەرەبی)\u200F"
|
||||
, "note":"Northern Kurdish (Arabic script) (falls back to ckb)"
|
||||
}
|
||||
,
|
||||
@ -1303,7 +1303,7 @@
|
||||
,
|
||||
{
|
||||
"code":"lki"
|
||||
, "name":"لەکی"
|
||||
, "name":"لەکی\u200E"
|
||||
, "note":"Laki"
|
||||
}
|
||||
,
|
||||
@ -1579,7 +1579,7 @@
|
||||
,
|
||||
{
|
||||
"code":"nl-informal"
|
||||
, "name":"Nederlands (informeel)\xE2\x80\x8E"
|
||||
, "name":"Nederlands (informeel)\u200E"
|
||||
, "note":"Dutch (informal address (\"je\"))"
|
||||
}
|
||||
,
|
||||
@ -2047,13 +2047,13 @@
|
||||
,
|
||||
{
|
||||
"code":"sr-ec"
|
||||
, "name":"српски (ћирилица)\xE2\x80\x8E"
|
||||
, "name":"српски (ћирилица)\u200E"
|
||||
, "note":"Serbian Cyrillic ekavian"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"sr-el"
|
||||
, "name":"srpski (latinica)\xE2\x80\x8E"
|
||||
, "name":"srpski (latinica)\u200E"
|
||||
, "note":"Serbian Latin ekavian"
|
||||
}
|
||||
,
|
||||
@ -2461,25 +2461,25 @@
|
||||
,
|
||||
{
|
||||
"code":"zh-cn"
|
||||
, "name":"中文(中国大陆)\xE2\x80\x8E"
|
||||
, "name":"中文(中国大陆)\u200E"
|
||||
, "note":"Chinese (PRC)"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"zh-hans"
|
||||
, "name":"中文(简体)\xE2\x80\x8E"
|
||||
, "name":"中文(简体)\u200E"
|
||||
, "note":"Mandarin Chinese (Simplified Chinese script) (cmn-hans)"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"zh-hant"
|
||||
, "name":"中文(繁體)\xE2\x80\x8E"
|
||||
, "name":"中文(繁體)\u200E"
|
||||
, "note":"Mandarin Chinese (Traditional Chinese script) (cmn-hant)"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"zh-hk"
|
||||
, "name":"中文(香港)\xE2\x80\x8E"
|
||||
, "name":"中文(香港)\u200E"
|
||||
, "note":"Chinese (Hong Kong)"
|
||||
}
|
||||
,
|
||||
@ -2491,25 +2491,25 @@
|
||||
,
|
||||
{
|
||||
"code":"zh-mo"
|
||||
, "name":"中文(澳門)\xE2\x80\x8E"
|
||||
, "name":"中文(澳門)\u200E"
|
||||
, "note":"Chinese (Macau)"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"zh-my"
|
||||
, "name":"中文(马来西亚)\xE2\x80\x8E"
|
||||
, "name":"中文(马来西亚)\u200E"
|
||||
, "note":"Chinese (Malaysia)"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"zh-sg"
|
||||
, "name":"中文(新加坡)\xE2\x80\x8E"
|
||||
, "name":"中文(新加坡)\u200E"
|
||||
, "note":"Chinese (Singapore)"
|
||||
}
|
||||
,
|
||||
{
|
||||
"code":"zh-tw"
|
||||
, "name":"中文(台灣)\xE2\x80\x8E"
|
||||
, "name":"中文(台灣)\u200E"
|
||||
, "note":"Chinese (Taiwan)"
|
||||
}
|
||||
,
|
||||
|
Loading…
Reference in New Issue
Block a user