mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v1.8.2.1
This commit is contained in:
@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.php; import gplx.*;
|
||||
public class Php_text_itm_parser {
|
||||
public static final byte Rslt_orig = 0, Rslt_dirty = 1, Rslt_fmt = 2;
|
||||
public boolean Quote_is_single() {return quote_is_single;} public Php_text_itm_parser Quote_is_single_(boolean v) {quote_is_single = v; return this;} private boolean quote_is_single;
|
||||
public byte[] Parse_as_bry(ListAdp tmp_list, byte[] raw, Byte_obj_ref rslt_ref, Bry_bfr tmp_bfr) {
|
||||
Parse(tmp_list, raw, rslt_ref);
|
||||
byte[] rv = raw;
|
||||
@@ -49,34 +50,52 @@ public class Php_text_itm_parser {
|
||||
switch (b) {
|
||||
case Byte_ascii.Backslash:
|
||||
if (txt_bgn != -1) {tmp_list.Add(new Php_text_itm_text(txt_bgn, i)); txt_bgn = -1; rslt_val = Rslt_dirty;}
|
||||
if (i == raw_last) throw Err_mgr._.fmt_auto_(GRP_KEY, "backslash_is_last_char", String_.new_utf8_(raw));
|
||||
boolean pos_is_last = i == raw_last;
|
||||
int next_pos = i + 1;
|
||||
byte next_char = raw[next_pos];
|
||||
switch (next_char) {
|
||||
case Byte_ascii.Ltr_N:
|
||||
case Byte_ascii.Ltr_n: next_char = Byte_ascii.NewLine; break;
|
||||
case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_t: next_char = Byte_ascii.Tab; break;
|
||||
case Byte_ascii.Ltr_R:
|
||||
case Byte_ascii.Ltr_r: next_char = Byte_ascii.CarriageReturn; break;
|
||||
case Byte_ascii.Ltr_U:
|
||||
case Byte_ascii.Ltr_u: { // EX: "\u007C"
|
||||
rslt_val = Rslt_dirty;
|
||||
Parse_utf16(tmp_list, raw, next_pos + 1, raw_len); // +1 to skip u
|
||||
i = next_pos + 4; // +4 to skip utf16 seq; EX: \u007C; +4 for 007C
|
||||
continue;
|
||||
}
|
||||
case Byte_ascii.Ltr_X:
|
||||
case Byte_ascii.Ltr_x: { // EX: "\xc2"
|
||||
rslt_val = Rslt_dirty;
|
||||
byte[] literal = Bry_.Add(CONST_utf_prefix, Bry_.Mid(raw, next_pos + 1, next_pos + 3));
|
||||
tmp_list.Add(new Php_text_itm_utf16(i, i + 4, literal));
|
||||
i = next_pos + 2; // +2 to skip rest; EX: \xc2; +2 for c2
|
||||
continue;
|
||||
byte next_char = pos_is_last ? Byte_ascii.Nil : raw[next_pos];
|
||||
if (quote_is_single) { // NOTE: q1 is simpler than q2; REF.MW:http://php.net/manual/en/language.types.String.php; DATE:2014-08-06
|
||||
switch (next_char) {
|
||||
case Byte_ascii.Apos: next_char = Byte_ascii.Apos; break;
|
||||
case Byte_ascii.Backslash: next_char = Byte_ascii.Backslash; break;
|
||||
default: next_char = Byte_ascii.Nil; break;
|
||||
}
|
||||
}
|
||||
tmp_list.Add(new Php_text_itm_escaped(i, next_pos, next_char)); rslt_val = Rslt_dirty;
|
||||
i = next_pos;
|
||||
else {
|
||||
if (pos_is_last) throw Err_mgr._.fmt_auto_(GRP_KEY, "backslash_is_last_char", String_.new_utf8_(raw));
|
||||
switch (next_char) {
|
||||
case Byte_ascii.Backslash: next_char = Byte_ascii.Backslash; break;
|
||||
case Byte_ascii.Quote: next_char = Byte_ascii.Quote; break;
|
||||
case Byte_ascii.Ltr_N:
|
||||
case Byte_ascii.Ltr_n: next_char = Byte_ascii.NewLine; break;
|
||||
case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_t: next_char = Byte_ascii.Tab; break;
|
||||
case Byte_ascii.Ltr_R:
|
||||
case Byte_ascii.Ltr_r: next_char = Byte_ascii.CarriageReturn; break;
|
||||
case Byte_ascii.Ltr_U:
|
||||
case Byte_ascii.Ltr_u: { // EX: "\u007C"
|
||||
rslt_val = Rslt_dirty;
|
||||
Parse_utf16(tmp_list, raw, next_pos + 1, raw_len); // +1 to skip u
|
||||
i = next_pos + 4; // +4 to skip utf16 seq; EX: \u007C; +4 for 007C
|
||||
continue;
|
||||
}
|
||||
case Byte_ascii.Ltr_X:
|
||||
case Byte_ascii.Ltr_x: { // EX: "\xc2"
|
||||
rslt_val = Rslt_dirty;
|
||||
byte[] literal = Bry_.Add(CONST_utf_prefix, Bry_.Mid(raw, next_pos + 1, next_pos + 3));
|
||||
tmp_list.Add(new Php_text_itm_utf16(i, i + 4, literal));
|
||||
i = next_pos + 2; // +2 to skip rest; EX: \xc2; +2 for c2
|
||||
continue;
|
||||
}
|
||||
default: next_char = Byte_ascii.Nil; break;
|
||||
}
|
||||
}
|
||||
if (next_char == Byte_ascii.Nil) {
|
||||
if (txt_bgn == -1) txt_bgn = i;
|
||||
}
|
||||
else {
|
||||
tmp_list.Add(new Php_text_itm_escaped(i, next_pos, next_char)); rslt_val = Rslt_dirty;
|
||||
i = next_pos;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Dollar:
|
||||
if (txt_bgn != -1) {tmp_list.Add(new Php_text_itm_text(txt_bgn, i)); txt_bgn = -1;}
|
||||
|
||||
@@ -18,13 +18,27 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.php; import gplx.*;
|
||||
import org.junit.*;
|
||||
public class Php_text_itm_tst {
|
||||
@Test public void Basic() {Tst_("abcde", "abcde");}
|
||||
@Test public void Escaped() {Tst_("a\\$b\\\"c\\td\\ne", "a$b\"c\td\ne");}
|
||||
@Test public void Fmt() {Tst_("a$1b$2c", "a~{0}b~{1}c");}
|
||||
@Test public void Utf16() {Tst_("a\\u007Cd", "a|d");}
|
||||
@Test public void Utf8_nbsp() {Tst_("a\\xc2\\xa0d", "a\\u00c2\\u00a0d");}
|
||||
private void Tst_(String raw_str, String expd) {
|
||||
Php_text_itm_parser parser = new Php_text_itm_parser();
|
||||
@Before public void init() {fxt.Clear();} private Php_text_itm_fxt fxt = new Php_text_itm_fxt();
|
||||
@Test public void Q1_basic() {fxt.Init_q1().Test_parse("abcde" , "abcde");}
|
||||
@Test public void Q1_apos() {fxt.Init_q1().Test_parse("a\\'b" , "a'b");}
|
||||
@Test public void Q1_backslash() {fxt.Init_q1().Test_parse("a\\\\b" , "a\\b");}
|
||||
@Test public void Q1_backslash_eos() {fxt.Init_q1().Test_parse("a\\" , "a\\");} // PURPOSE: allow single trailing backslash; DATE:2014-08-06
|
||||
@Test public void Q1_noop() {fxt.Init_q1().Test_parse("a\\$\\nb" , "a\\$\\nb");}
|
||||
@Test public void Q2_basic() {fxt.Init_q2().Test_parse("abcde" , "abcde");}
|
||||
@Test public void Q2_quote() {fxt.Init_q2().Test_parse("a\\\"b" , "a\"b");}
|
||||
@Test public void Q2_backslash() {fxt.Init_q2().Test_parse("a\\\\b" , "a\\b");}
|
||||
@Test public void Q2_noop() {fxt.Init_q2().Test_parse("a\\%\\cb" , "a\\%\\cb");}
|
||||
@Test public void Q2_ws() {fxt.Init_q2().Test_parse("a\\tb\\nc" , "a\tb\nc");}
|
||||
@Test public void Q2_fmt() {fxt.Init_q2().Test_parse("a$1b$2c" , "a~{0}b~{1}c");}
|
||||
@Test public void Q2_utf_pipe() {fxt.Init_q2().Test_parse("a\\u007Cd" , "a|d");}
|
||||
@Test public void Q2_hex_nbsp() {fxt.Init_q2().Test_parse("a\\xc2\\xa0d" , "a\\u00c2\\u00a0d");}
|
||||
}
|
||||
class Php_text_itm_fxt {
|
||||
private Php_text_itm_parser parser;
|
||||
public void Clear() {parser = new Php_text_itm_parser();}
|
||||
public Php_text_itm_fxt Init_q1() {parser.Quote_is_single_(Bool_.Y); return this;}
|
||||
public Php_text_itm_fxt Init_q2() {parser.Quote_is_single_(Bool_.N); return this;}
|
||||
public void Test_parse(String raw_str, String expd) {
|
||||
ListAdp list = ListAdp_.new_();
|
||||
byte[] raw = Bry_.new_utf8_(raw_str);
|
||||
parser.Parse(list, raw);
|
||||
|
||||
Reference in New Issue
Block a user