/* XOWA: the XOWA Offline Wiki Application Copyright (C) 2012 gnosygnu@gmail.com This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.php; import gplx.*; import gplx.texts.*; public class Php_srl_parser { @gplx.Internal protected Php_srl_factory Factory() {return factory;} Php_srl_factory factory = new Php_srl_factory(); byte[] raw; int raw_len, pos; public KeyVal[] Parse_as_kvs(byte[] raw) { Php_srl_itm_ary root = Parse(raw); return Xto_kv_ary(root); } KeyVal[] Xto_kv_ary(Php_srl_itm_ary ary) { int len = ary.Subs_len(); KeyVal[] rv = new KeyVal[len]; for (int i = 0; i < len; i++) rv[i] = Xto_kv(ary.Subs_get_at(i)); return rv; } KeyVal Xto_kv(Php_srl_itm_kv itm) { Php_srl_itm itm_key = itm.Key(); Object key = itm_key == null ? null : itm_key.Val(); Php_srl_itm itm_val = itm.Val(); Object val = null; switch (itm_val.Tid()) { case Php_srl_itm_.Tid_array: Php_srl_itm_ary ary = (Php_srl_itm_ary)itm_val; val = Xto_kv_ary(ary); break; case Php_srl_itm_.Tid_function: val = new gplx.xowa.xtns.scribunto.Scrib_lua_proc(Object_.Xto_str_strict_or_null_mark(key), Int_.cast_(itm_val.Val())); // NOTE: in most cases, key is a STRING (name of ScribFunction); however, for gsub it is an INT (arg_idx) b/c it is passed as a parameter break; default: val = itm_val.Val(); break; } return KeyVal_.obj_(key, val); } @gplx.Internal protected Php_srl_itm_ary Parse(byte[] raw) { this.raw = raw; this.raw_len = raw.length; pos = 0; Php_srl_itm_ary rv = new Php_srl_itm_ary(0, raw_len); Php_srl_itm_kv cur_kv = factory.Kv(); rv.Subs_add(cur_kv); boolean mode_is_key = false; while (true) { if (pos >= raw_len) break; if (mode_is_key) { cur_kv.Key_(Parse_itm(pos)); mode_is_key = false; } else { cur_kv.Val_(Parse_itm(pos)); mode_is_key = true; } } return rv; } Php_srl_itm_ary Parse_array(int bgn, int subs_len) { // enters after '{'; EX: 'a:1{' -> Parse_array Php_srl_itm_ary rv = factory.Ary(bgn, bgn); for (int i = 0; i < subs_len; i++) { Php_srl_itm_kv kv = factory.Kv(); Php_srl_itm key_itm = Parse_itm(pos); kv.Key_(key_itm); Php_srl_itm val_itm = Parse_itm(pos); kv.Val_(val_itm); rv.Subs_add(kv); } return rv; } Php_srl_itm Parse_itm(int bgn) { pos = bgn; Php_srl_itm rv = null; byte b = raw[pos]; switch (b) { case Byte_ascii.Ltr_N: // EX: 'N;' rv = factory.Nil(); pos = Chk(raw, pos + 1, Byte_ascii.Semic); break; case Byte_ascii.Ltr_b: // EX: 'b:0;' or 'b:1;' pos = Chk(raw, pos + 1, Byte_ascii.Colon); b = raw[pos]; switch (b) { case Byte_ascii.Num_1: rv = factory.Bool_y(); break; case Byte_ascii.Num_0: rv = factory.Bool_n(); break; default: throw err_(raw, pos, raw_len, "unknown boolean type {0}", Char_.XtoStr(b)); } pos = Chk(raw, pos + 1, Byte_ascii.Semic); break; case Byte_ascii.Ltr_i: // EX: 'i:123;' rv = Parse_int(pos); pos = Chk(raw, pos, Byte_ascii.Semic); break; case Byte_ascii.Ltr_d: // EX: 'd:1.23;' pos = Chk(raw, pos + 1, Byte_ascii.Colon); int double_end = Bry_finder.Find_fwd(raw, Byte_ascii.Semic, pos, raw_len); String double_str = String_.new_a7(raw, pos, double_end); double double_val = 0; if (String_.Eq(double_str, "INF")) double_val = Double_.Inf_pos; else if (String_.Eq(double_str, "NAN")) double_val = Double_.NaN; else double_val = Double_.parse_(double_str); rv = factory.Double(pos, double_end, double_val); pos = Chk(raw, double_end, Byte_ascii.Semic); break; case Byte_ascii.Ltr_s: // EX: 's:3:"abc";' int len_val = Parse_int(pos).Val_as_int(); pos = Chk(raw, pos, Byte_ascii.Colon); pos = Chk(raw, pos, Byte_ascii.Quote); int str_end = pos + len_val; String str_val = String_.new_u8(raw, pos, str_end); rv = factory.Str(pos, str_end, str_val); pos = Chk(raw, str_end, Byte_ascii.Quote); pos = Chk(raw, pos, Byte_ascii.Semic); break; case Byte_ascii.Ltr_a: // EX: 'a:0:{}' int subs_len = Parse_int(pos).Val_as_int(); pos = Chk(raw, pos, Byte_ascii.Colon); pos = Chk(raw, pos, Byte_ascii.Curly_bgn); rv = Parse_array(pos, subs_len); pos = Chk(raw, pos, Byte_ascii.Curly_end); break; case Byte_ascii.Ltr_O: // EX: 'O:42:"Scribunto_LuaStandaloneInterpreterFunction":1:{s:2:"id";i:123;}' int func_bgn = pos; pos += 62; // 64= len of constant String after ":42:"Scribunto...." int func_id = Parse_int_val(pos); rv = factory.Func(func_bgn, pos, func_id); pos += 2; break; default: throw err_(raw, pos, "unexpected type: {0}", Char_.XtoStr(b)); } return rv; } static final byte[] CONST_funct_bgn = Bry_.new_a7("O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:"), CONST_funct_end = Bry_.new_a7(";}"); int Parse_int_val(int bgn) { pos = bgn; pos = Chk(raw, pos + 1, Byte_ascii.Colon); int int_end = Skip_while_num(raw, raw_len, pos, true); int int_val = Bry_.To_int_or(raw, pos, int_end, Int_.MinValue); pos = int_end; return int_val; } Php_srl_itm_int Parse_int(int bgn) { pos = bgn; pos = Chk(raw, pos + 1, Byte_ascii.Colon); int int_end = Skip_while_num(raw, raw_len, pos, true); int int_val = Bry_.To_int_or(raw, pos, int_end, Int_.MinValue); Php_srl_itm_int rv = factory.Int(pos, int_end, int_val); pos = int_end; return rv; } int Chk(byte[] raw, int i, byte expd) { byte actl = raw[i]; if (actl == expd) return i + 1; else throw err_(raw, i, "expected '{0}' but got '{1}'", Char_.XtoStr(expd), Char_.XtoStr(actl)); } int Skip_while_num(byte[] raw, int raw_len, int bgn, boolean num_is_int) { int num_len = 1; for (int i = bgn; i < raw_len; i++) { byte b = raw[i]; switch (b) { case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4: case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9: break; case Byte_ascii.Dot: case Byte_ascii.Dash: break; default: if (num_is_int && num_len < 11) { return i; } else return i; } } throw err_(raw, raw_len, raw_len, "skip_ws found eos"); } Err err_(byte[] raw, int bgn, String fmt, Object... args) {return err_(raw, bgn, raw.length, fmt, args);} Err err_(byte[] raw, int bgn, int raw_len, String fmt, Object... args) { String msg = String_.Format(fmt, args) + " " + Int_.Xto_str(bgn) + " " + String_.new_u8_by_len(raw, bgn, 20); return Err_.new_wo_type(msg); } } class Php_srl_factory { public Php_srl_itm Nil() {return Php_srl_itm_nil.Nil;} public Php_srl_itm Bool_n() {return Php_srl_itm_bool.Bool_n;} public Php_srl_itm Bool_y() {return Php_srl_itm_bool.Bool_y;} public Php_srl_itm_int Int(int bgn, int end, int v) {return new Php_srl_itm_int(bgn, end, v);} public Php_srl_itm Double(int bgn, int end, double v) {return new Php_srl_itm_double(bgn, end, v);} public Php_srl_itm Str(int bgn, int end) {return new Php_srl_itm_str(bgn, end, null);} public Php_srl_itm Str(int bgn, int end, String v) {return new Php_srl_itm_str(bgn, end, v);} public Php_srl_itm_func Func(int bgn, int end, int v) {return new Php_srl_itm_func(bgn, end, v);} public Php_srl_itm_ary Ary(int bgn, int end) {return new Php_srl_itm_ary(bgn, end);} public Php_srl_itm_kv Kv() {return new Php_srl_itm_kv();} }