1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2014-08-10 23:10:23 -04:00
parent fb8c06c560
commit 67b04263a7
131 changed files with 2285 additions and 1355 deletions

View File

@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.php; import gplx.*;
public class Php_text_itm_parser {
public static final byte Rslt_orig = 0, Rslt_dirty = 1, Rslt_fmt = 2;
public boolean Quote_is_single() {return quote_is_single;} public Php_text_itm_parser Quote_is_single_(boolean v) {quote_is_single = v; return this;} private boolean quote_is_single;
public byte[] Parse_as_bry(ListAdp tmp_list, byte[] raw, Byte_obj_ref rslt_ref, Bry_bfr tmp_bfr) {
Parse(tmp_list, raw, rslt_ref);
byte[] rv = raw;
@@ -49,34 +50,52 @@ public class Php_text_itm_parser {
switch (b) {
case Byte_ascii.Backslash:
if (txt_bgn != -1) {tmp_list.Add(new Php_text_itm_text(txt_bgn, i)); txt_bgn = -1; rslt_val = Rslt_dirty;}
if (i == raw_last) throw Err_mgr._.fmt_auto_(GRP_KEY, "backslash_is_last_char", String_.new_utf8_(raw));
boolean pos_is_last = i == raw_last;
int next_pos = i + 1;
byte next_char = raw[next_pos];
switch (next_char) {
case Byte_ascii.Ltr_N:
case Byte_ascii.Ltr_n: next_char = Byte_ascii.NewLine; break;
case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_t: next_char = Byte_ascii.Tab; break;
case Byte_ascii.Ltr_R:
case Byte_ascii.Ltr_r: next_char = Byte_ascii.CarriageReturn; break;
case Byte_ascii.Ltr_U:
case Byte_ascii.Ltr_u: { // EX: "\u007C"
rslt_val = Rslt_dirty;
Parse_utf16(tmp_list, raw, next_pos + 1, raw_len); // +1 to skip u
i = next_pos + 4; // +4 to skip utf16 seq; EX: \u007C; +4 for 007C
continue;
}
case Byte_ascii.Ltr_X:
case Byte_ascii.Ltr_x: { // EX: "\xc2"
rslt_val = Rslt_dirty;
byte[] literal = Bry_.Add(CONST_utf_prefix, Bry_.Mid(raw, next_pos + 1, next_pos + 3));
tmp_list.Add(new Php_text_itm_utf16(i, i + 4, literal));
i = next_pos + 2; // +2 to skip rest; EX: \xc2; +2 for c2
continue;
byte next_char = pos_is_last ? Byte_ascii.Nil : raw[next_pos];
if (quote_is_single) { // NOTE: q1 is simpler than q2; REF.MW:http://php.net/manual/en/language.types.String.php; DATE:2014-08-06
switch (next_char) {
case Byte_ascii.Apos: next_char = Byte_ascii.Apos; break;
case Byte_ascii.Backslash: next_char = Byte_ascii.Backslash; break;
default: next_char = Byte_ascii.Nil; break;
}
}
tmp_list.Add(new Php_text_itm_escaped(i, next_pos, next_char)); rslt_val = Rslt_dirty;
i = next_pos;
else {
if (pos_is_last) throw Err_mgr._.fmt_auto_(GRP_KEY, "backslash_is_last_char", String_.new_utf8_(raw));
switch (next_char) {
case Byte_ascii.Backslash: next_char = Byte_ascii.Backslash; break;
case Byte_ascii.Quote: next_char = Byte_ascii.Quote; break;
case Byte_ascii.Ltr_N:
case Byte_ascii.Ltr_n: next_char = Byte_ascii.NewLine; break;
case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_t: next_char = Byte_ascii.Tab; break;
case Byte_ascii.Ltr_R:
case Byte_ascii.Ltr_r: next_char = Byte_ascii.CarriageReturn; break;
case Byte_ascii.Ltr_U:
case Byte_ascii.Ltr_u: { // EX: "\u007C"
rslt_val = Rslt_dirty;
Parse_utf16(tmp_list, raw, next_pos + 1, raw_len); // +1 to skip u
i = next_pos + 4; // +4 to skip utf16 seq; EX: \u007C; +4 for 007C
continue;
}
case Byte_ascii.Ltr_X:
case Byte_ascii.Ltr_x: { // EX: "\xc2"
rslt_val = Rslt_dirty;
byte[] literal = Bry_.Add(CONST_utf_prefix, Bry_.Mid(raw, next_pos + 1, next_pos + 3));
tmp_list.Add(new Php_text_itm_utf16(i, i + 4, literal));
i = next_pos + 2; // +2 to skip rest; EX: \xc2; +2 for c2
continue;
}
default: next_char = Byte_ascii.Nil; break;
}
}
if (next_char == Byte_ascii.Nil) {
if (txt_bgn == -1) txt_bgn = i;
}
else {
tmp_list.Add(new Php_text_itm_escaped(i, next_pos, next_char)); rslt_val = Rslt_dirty;
i = next_pos;
}
break;
case Byte_ascii.Dollar:
if (txt_bgn != -1) {tmp_list.Add(new Php_text_itm_text(txt_bgn, i)); txt_bgn = -1;}

View File

@@ -18,13 +18,27 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.php; import gplx.*;
import org.junit.*;
public class Php_text_itm_tst {
@Test public void Basic() {Tst_("abcde", "abcde");}
@Test public void Escaped() {Tst_("a\\$b\\\"c\\td\\ne", "a$b\"c\td\ne");}
@Test public void Fmt() {Tst_("a$1b$2c", "a~{0}b~{1}c");}
@Test public void Utf16() {Tst_("a\\u007Cd", "a|d");}
@Test public void Utf8_nbsp() {Tst_("a\\xc2\\xa0d", "a\\u00c2\\u00a0d");}
private void Tst_(String raw_str, String expd) {
Php_text_itm_parser parser = new Php_text_itm_parser();
@Before public void init() {fxt.Clear();} private Php_text_itm_fxt fxt = new Php_text_itm_fxt();
@Test public void Q1_basic() {fxt.Init_q1().Test_parse("abcde" , "abcde");}
@Test public void Q1_apos() {fxt.Init_q1().Test_parse("a\\'b" , "a'b");}
@Test public void Q1_backslash() {fxt.Init_q1().Test_parse("a\\\\b" , "a\\b");}
@Test public void Q1_backslash_eos() {fxt.Init_q1().Test_parse("a\\" , "a\\");} // PURPOSE: allow single trailing backslash; DATE:2014-08-06
@Test public void Q1_noop() {fxt.Init_q1().Test_parse("a\\$\\nb" , "a\\$\\nb");}
@Test public void Q2_basic() {fxt.Init_q2().Test_parse("abcde" , "abcde");}
@Test public void Q2_quote() {fxt.Init_q2().Test_parse("a\\\"b" , "a\"b");}
@Test public void Q2_backslash() {fxt.Init_q2().Test_parse("a\\\\b" , "a\\b");}
@Test public void Q2_noop() {fxt.Init_q2().Test_parse("a\\%\\cb" , "a\\%\\cb");}
@Test public void Q2_ws() {fxt.Init_q2().Test_parse("a\\tb\\nc" , "a\tb\nc");}
@Test public void Q2_fmt() {fxt.Init_q2().Test_parse("a$1b$2c" , "a~{0}b~{1}c");}
@Test public void Q2_utf_pipe() {fxt.Init_q2().Test_parse("a\\u007Cd" , "a|d");}
@Test public void Q2_hex_nbsp() {fxt.Init_q2().Test_parse("a\\xc2\\xa0d" , "a\\u00c2\\u00a0d");}
}
class Php_text_itm_fxt {
private Php_text_itm_parser parser;
public void Clear() {parser = new Php_text_itm_parser();}
public Php_text_itm_fxt Init_q1() {parser.Quote_is_single_(Bool_.Y); return this;}
public Php_text_itm_fxt Init_q2() {parser.Quote_is_single_(Bool_.N); return this;}
public void Test_parse(String raw_str, String expd) {
ListAdp list = ListAdp_.new_();
byte[] raw = Bry_.new_utf8_(raw_str);
parser.Parse(list, raw);