1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Embeddable: Create core dbs in proper subdirectory

This commit is contained in:
gnosygnu
2017-10-23 20:50:50 -04:00
parent 1336d44f34
commit 66877212bf
4537 changed files with 311750 additions and 0 deletions

View File

@@ -13,3 +13,7 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_ctx {
public byte[] Src() {return src;} public Php_ctx Src_(byte[] v) {this.src = v; return this;} private byte[] src;
}

View File

@@ -13,3 +13,261 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.log_msgs.*;
/*
NOTE: naive implementation of PHP parser; intended only for parsing Messages**.php files in MediaWiki. Specifically, it assumes the following:
- all lines are assignment lines: EX: $a = b;
- only the assignment operator is allowed (=); EX: $a = 5 + 7; fails b/c of + operator;
- no functions are supported: EX: strlen('a') fails
*/
public class Php_evaluator implements Php_tkn_wkr {
byte mode = Mode_key_bgn, next_tid = 0, next_mode = 0;
Php_line_assign cur_line; Php_itm_ary cur_ary; Php_key cur_kv_key;
List_adp frame_stack = List_adp_.New();
public Php_evaluator(Gfo_msg_log msg_log) {this.msg_log = msg_log;} Gfo_msg_log msg_log;
public void Init(Php_ctx ctx) {src = ctx.Src(); frame_stack.Clear();} private byte[] src;
public List_adp List() {return lines;} List_adp lines = List_adp_.New();
public Gfo_msg_log Msg_log() {return msg_log;}
public void Clear() {
lines.Clear(); msg_log.Clear();
cur_line = null;
cur_ary = null;
cur_kv_key = null;
mode = Mode_key_bgn;
next_tid = next_mode = 0;
}
public void Process(Php_tkn tkn) {
byte tkn_tid = tkn.Tkn_tid();
switch (tkn_tid) {
case Php_tkn_.Tid_declaration: case Php_tkn_.Tid_comment: case Php_tkn_.Tid_ws: // always discard, regardless of mode
return;
}
switch (mode) {
case Mode_expect: // handles sequences like "array(" which hook in to "array" but need to skip "("
if (tkn_tid == next_tid)
mode = next_mode;
else {
Msg_many(src, tkn.Src_bgn(), tkn.Src_end(), Expecting_itm_failed, Php_tkn_.Xto_str(next_tid), Php_tkn_.Xto_str(tkn_tid));
Fail();
}
break;
case Mode_suspend:
if (tkn_tid == Php_tkn_.Tid_semic) mode = Mode_key_bgn;
break;
case Mode_key_bgn:
if (tkn_tid == Php_tkn_.Tid_var) {
cur_ary = null;
cur_line = new Php_line_assign();
lines.Add(cur_line);
Php_tkn_var var_tkn = (Php_tkn_var)tkn;
cur_line.Key_(new Php_itm_var(var_tkn.Var_name(src)));
mode = Mode_key_end;
}
else {
Msg_many(src, tkn.Src_bgn(), tkn.Src_end(), Expecting_itm_failed, Php_tkn_.Xto_str(Php_tkn_.Tid_var), Php_tkn_.Xto_str(tkn_tid));
Fail();
}
break;
case Mode_key_end:
switch (tkn_tid) {
case Php_tkn_.Tid_eq: mode = Mode_val; break;
case Php_tkn_.Tid_brack_bgn: mode = Mode_brack_itm; break;
case Php_tkn_.Tid_brack_end: Expect(Php_tkn_.Tid_eq, Mode_val); break;
default: {
Msg_many(src, tkn.Src_bgn(), tkn.Src_end(), Expecting_itm_failed, Php_tkn_.Xto_str(Php_tkn_.Tid_var), Php_tkn_.Xto_str(tkn_tid));
Fail();
break;
}
}
break;
case Mode_brack_itm:
switch (tkn_tid) {
case Php_tkn_.Tid_quote:
Php_tkn_quote tkn_quote = (Php_tkn_quote)tkn;
Php_itm_quote key_sub = new Php_itm_quote(tkn_quote.Quote_text(src));
cur_line.Key_subs_(new Php_key[] {key_sub});
mode = Mode_key_end;
break;
default: {
Msg_many(src, tkn.Src_bgn(), tkn.Src_end(), Expecting_itm_failed, Php_tkn_.Xto_str(Php_tkn_.Tid_var), Php_tkn_.Xto_str(tkn_tid));
Fail();
break;
}
}
break;
case Mode_val:
Php_itm line_val = null;
switch (tkn_tid) {
case Php_tkn_.Tid_null: Expect(Php_tkn_.Tid_semic, Mode_key_bgn); line_val = Php_itm_null.Instance; break;
case Php_tkn_.Tid_false: Expect(Php_tkn_.Tid_semic, Mode_key_bgn); line_val = Php_itm_bool_false.Instance; break;
case Php_tkn_.Tid_true: Expect(Php_tkn_.Tid_semic, Mode_key_bgn); line_val = Php_itm_bool_true.Instance; break;
case Php_tkn_.Tid_quote:
Expect(Php_tkn_.Tid_semic, Mode_key_bgn);
Php_tkn_quote tkn_quote = (Php_tkn_quote)tkn;
line_val = new Php_itm_quote(tkn_quote.Quote_text(src));
break;
case Php_tkn_.Tid_ary:
case Php_tkn_.Tid_brack_bgn:
Php_itm_ary ary = new Php_itm_ary();
if (cur_ary == null)
line_val = ary;
else {
cur_ary.Subs_add(ary);
frame_stack.Add(new Php_scanner_frame(cur_ary));
cur_kv_key = null;
}
this.cur_ary = ary;
if (tkn_tid == Php_tkn_.Tid_ary)
Expect(Php_tkn_.Tid_paren_bgn, Mode_ary_subs);
else
mode = Mode_ary_subs;
break;
case Php_tkn_.Tid_txt:
case Php_tkn_.Tid_var:
break;
case Php_tkn_.Tid_eq:
case Php_tkn_.Tid_eq_kv:
case Php_tkn_.Tid_semic:
case Php_tkn_.Tid_comma:
case Php_tkn_.Tid_paren_bgn:
case Php_tkn_.Tid_paren_end:
case Php_tkn_.Tid_brack_end:
case Php_tkn_.Tid_num:
break;
}
cur_line.Val_(line_val);
break;
case Mode_ary_subs:
switch (tkn_tid) {
case Php_tkn_.Tid_null: Ary_add_itm(Php_itm_null.Instance); break;
case Php_tkn_.Tid_false: Ary_add_itm(Php_itm_bool_false.Instance); break;
case Php_tkn_.Tid_true: Ary_add_itm(Php_itm_bool_true.Instance); break;
case Php_tkn_.Tid_quote:
Php_tkn_quote tkn_quote = (Php_tkn_quote)tkn;
Ary_add_itm(new Php_itm_quote(tkn_quote.Quote_text(src)));
break;
case Php_tkn_.Tid_num:
Php_tkn_num tkn_num = (Php_tkn_num)tkn;
Ary_add_itm(new Php_itm_int(tkn_num.Num_val_int(src)));
break;
case Php_tkn_.Tid_var:
Php_tkn_var tkn_var = (Php_tkn_var)tkn;
Ary_add_itm(new Php_itm_var(Bry_.Mid(src, tkn_var.Src_bgn(), tkn_var.Src_end())));
break;
case Php_tkn_.Tid_txt:
Php_tkn_txt tkn_txt = (Php_tkn_txt)tkn;
Ary_add_itm(new Php_itm_var(Bry_.Mid(src, tkn_txt.Src_bgn(), tkn_txt.Src_end())));
break;
case Php_tkn_.Tid_ary:
case Php_tkn_.Tid_brack_bgn:
Php_itm_ary ary = new Php_itm_ary();
if (cur_ary == null)
line_val = ary;
else {
frame_stack.Add(new Php_scanner_frame(cur_ary));
if (cur_kv_key == null)
cur_ary.Subs_add(ary);
else {
Php_itm_kv ary_itm = new Php_itm_kv().Key_(cur_kv_key).Val_(ary);
cur_ary.Subs_add(ary_itm);
cur_kv_key = null;
}
}
this.cur_ary = ary;
if (tkn_tid == Php_tkn_.Tid_ary)
Expect(Php_tkn_.Tid_paren_bgn, Mode_ary_subs);
else
mode = Mode_ary_subs;
break;
case Php_tkn_.Tid_paren_end:
case Php_tkn_.Tid_brack_end:
mode = Mode_ary_term;
if (frame_stack.Count() == 0)
cur_ary = null;
else {
Php_scanner_frame frame = (Php_scanner_frame)List_adp_.Pop(frame_stack);
cur_ary = frame.Ary();
frame.Rls();
}
break;
case Php_tkn_.Tid_semic: // NOTE: will occur in following construct array(array());
mode = Mode_key_bgn;
break;
case Php_tkn_.Tid_eq:
case Php_tkn_.Tid_eq_kv:
case Php_tkn_.Tid_comma:
case Php_tkn_.Tid_paren_bgn:
break;
}
break;
case Mode_ary_dlm:
switch (tkn_tid) {
case Php_tkn_.Tid_comma:
mode = Mode_ary_subs;
break;
case Php_tkn_.Tid_paren_end:
case Php_tkn_.Tid_brack_end:
mode = Mode_ary_term;
if (frame_stack.Count() == 0)
cur_ary = null;
else {
Php_scanner_frame frame = (Php_scanner_frame)List_adp_.Pop(frame_stack);
cur_ary = frame.Ary();
frame.Rls();
}
break;
case Php_tkn_.Tid_eq_kv:
Php_itm_sub tmp_key = cur_ary.Subs_pop();
cur_kv_key = (Php_key)tmp_key;
mode = Mode_ary_subs;
break;
}
break;
case Mode_ary_term:
switch (tkn_tid) {
case Php_tkn_.Tid_comma:
case Php_tkn_.Tid_paren_end: // NOTE: paren_end occurs in multiple nests; EX: array(array())
case Php_tkn_.Tid_brack_end:
mode = Mode_ary_subs;
break;
case Php_tkn_.Tid_semic:
mode = Mode_key_bgn;
break;
}
break;
}
}
private void Fail() {mode = Mode_suspend;}
private void Ary_add_itm(Php_itm val) {
mode = Mode_ary_dlm;
if (cur_kv_key == null)
cur_ary.Subs_add((Php_itm_sub)val);
else {
Php_itm_kv ary_itm = new Php_itm_kv().Key_(cur_kv_key).Val_(val);
cur_ary.Subs_add(ary_itm);
cur_kv_key = null;
}
}
private void Expect(byte next_tid, byte next_mode) {
mode = Mode_expect;
this.next_tid = next_tid;
this.next_mode = next_mode;
}
public void Msg_many(byte[] src, int bgn, int end, Gfo_msg_itm itm, Object... args) {
msg_log.Add_itm_many(itm, src, bgn, end, args);
}
public static final Gfo_msg_itm Expecting_itm_failed = Gfo_msg_itm_.new_warn_(Php_parser.Log_nde, "expecting_itm_failed", "expecting_itm ~{0} but got ~{1} instead");
private static final byte Mode_key_bgn = 1, Mode_key_end = 2, Mode_expect = 3, Mode_suspend = 4, Mode_val = 5, Mode_ary_subs = 6, Mode_ary_dlm = 7, Mode_ary_term = 8, Mode_brack_itm = 9;
}
class Php_scanner_frame {
public Php_scanner_frame(Php_itm_ary ary) {this.ary = ary;}
public Php_itm_ary Ary() {return ary;} Php_itm_ary ary;
public void Rls() {ary = null;}
}
class Php_parser_interrupt {
public static final Php_parser_interrupt Char = new Php_parser_interrupt();
}

View File

@@ -13,3 +13,30 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public interface Php_itm {
byte Itm_tid();
byte[] Val_obj_bry();
}
class Php_itm_null implements Php_itm, Php_itm_sub {
public byte Itm_tid() {return Php_itm_.Tid_null;}
public byte[] Val_obj_bry() {return null;}
public static final Php_itm_null Instance = new Php_itm_null(); Php_itm_null() {}
}
class Php_itm_bool_true implements Php_itm, Php_itm_sub {
public byte Itm_tid() {return Php_itm_.Tid_bool_true;}
public byte[] Val_obj_bry() {return Bry_true;}
public static final Php_itm_bool_true Instance = new Php_itm_bool_true(); Php_itm_bool_true() {}
private static final byte[] Bry_true = Bry_.new_a7("true");
}
class Php_itm_bool_false implements Php_itm, Php_itm_sub {
public byte Itm_tid() {return Php_itm_.Tid_bool_false;}
public byte[] Val_obj_bry() {return Bry_true;}
public static final Php_itm_bool_false Instance = new Php_itm_bool_false(); Php_itm_bool_false() {}
private static final byte[] Bry_true = Bry_.new_a7("false");
}
class Php_itm_var implements Php_itm, Php_itm_sub, Php_key {
public Php_itm_var(byte[] v) {this.val_obj_bry = v;}
public byte Itm_tid() {return Php_itm_.Tid_var;}
public byte[] Val_obj_bry() {return val_obj_bry;} private byte[] val_obj_bry;
}

View File

@@ -13,3 +13,30 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_itm_ {
public static final byte Tid_null = 0, Tid_bool_false = 1, Tid_bool_true = 2, Tid_int = 3, Tid_quote = 4, Tid_ary = 5, Tid_kv = 6, Tid_var = 7;
public static int Parse_int_or(Php_itm itm, int or) {
int rv = -1;
switch (itm.Itm_tid()) {
case Php_itm_.Tid_int:
rv = ((Php_itm_int)itm).Val_obj_int();
return rv;
case Php_itm_.Tid_quote:
byte[] bry = ((Php_itm_quote)itm).Val_obj_bry();
rv = Bry_.To_int_or(bry, -1);
return (rv == -1) ? or : rv;
default:
return or;
}
}
public static byte[] Parse_bry(Php_itm itm) {
switch (itm.Itm_tid()) {
case Php_itm_.Tid_kv:
case Php_itm_.Tid_ary:
throw Err_.new_unhandled(itm.Itm_tid());
default:
return itm.Val_obj_bry();
}
}
}

View File

@@ -13,3 +13,23 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_itm_ary implements Php_itm, Php_itm_sub {
public Php_itm_ary() {}
public byte Itm_tid() {return Php_itm_.Tid_ary;}
public byte[] Val_obj_bry() {return null;}
public int Subs_len() {return subs_len;} private int subs_len;
public Php_itm_sub Subs_get(int i) {return ary[i];}
public Php_itm_sub Subs_pop() {return ary[--subs_len];}
public void Subs_add(Php_itm_sub v) {
int new_len = subs_len + 1;
if (new_len > subs_max) { // ary too small >>> expand
subs_max = new_len * 2;
Php_itm_sub[] new_ary = new Php_itm_sub[subs_max];
Array_.Copy_to(ary, 0, new_ary, 0, subs_len);
ary = new_ary;
}
ary[subs_len] = v;
subs_len = new_len;
} Php_itm_sub[] ary = Php_itm_sub_.Ary_empty; int subs_max;
}

View File

@@ -13,3 +13,10 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_itm_int implements Php_itm, Php_itm_sub, Php_key {
public Php_itm_int(int v) {this.val_obj_int = v;}
public byte Itm_tid() {return Php_itm_.Tid_int;}
public byte[] Val_obj_bry() {return Bry_.new_by_int(val_obj_int);}
public int Val_obj_int() {return val_obj_int;} private int val_obj_int;
}

View File

@@ -13,3 +13,10 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_itm_kv implements Php_itm, Php_itm_sub {
public byte Itm_tid() {return Php_itm_.Tid_kv;}
public byte[] Val_obj_bry() {return null;}
public Php_key Key() {return key;} public Php_itm_kv Key_(Php_key v) {this.key = v; return this;} Php_key key;
public Php_itm Val() {return val;} public Php_itm_kv Val_(Php_itm v) {this.val = v; return this;} Php_itm val;
}

View File

@@ -13,3 +13,9 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_itm_quote implements Php_itm, Php_itm_sub, Php_key {
public Php_itm_quote(byte[] v) {this.val_obj_bry = v;} // NOTE: use Php_text_itm_parser to parse \" and related
public byte Itm_tid() {return Php_itm_.Tid_quote;}
public byte[] Val_obj_bry() {return val_obj_bry;} private byte[] val_obj_bry;
}

View File

@@ -13,3 +13,9 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public interface Php_itm_sub extends Php_itm {
}
class Php_itm_sub_ {
public static final Php_itm_sub[] Ary_empty = new Php_itm_sub[0];
}

View File

@@ -13,3 +13,9 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public interface Php_key extends Php_itm {
}
class Php_key_ {
public static final Php_key[] Ary_empty = new Php_key[0];
}

View File

@@ -13,3 +13,5 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public interface Php_line {}

View File

@@ -13,3 +13,9 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_line_assign implements Php_line {
public Php_key Key() {return key;} public Php_line_assign Key_(Php_key v) {this.key = v; return this;} Php_key key;
public Php_key[] Key_subs() {return key_subs;} public Php_line_assign Key_subs_(Php_key[] v) {this.key_subs = v; return this;} Php_key[] key_subs = Php_key_.Ary_empty;
public Php_itm Val() {return val;} public Php_line_assign Val_(Php_itm v) {this.val = v; return this;} Php_itm val;
}

View File

@@ -13,3 +13,269 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.btries.*; import gplx.core.log_msgs.*;
interface Php_lxr {
int Lxr_tid();
void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts);
void Lxr_bgn(byte[] src, int src_len, Php_tkn_wkr tkn_wkr, Php_tkn_factory tkn_factory);
int Lxr_make(Php_ctx ctx, int bgn, int cur);
}
class Php_lxr_ {
public static final byte Tid_declaration = 1, Tid_ws = 2, Tid_comment = 3, Tid_var = 4, Tid_sym = 5, Tid_keyword = 6, Tid_num = 7, Tid_quote = 8;
}
abstract class Php_lxr_base implements Php_lxr {
protected byte[] src; protected int src_len; protected Php_tkn_wkr tkn_wkr; protected Php_tkn_factory tkn_factory;
public abstract int Lxr_tid();
public abstract void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts);
public void Lxr_bgn(byte[] src, int src_len, Php_tkn_wkr tkn_wkr, Php_tkn_factory tkn_factory) {this.src = src; this.src_len = src_len; this.tkn_wkr = tkn_wkr; this.tkn_factory = tkn_factory;}
public abstract int Lxr_make(Php_ctx ctx, int bgn, int cur);
}
class Php_lxr_declaration extends Php_lxr_base {
@Override public int Lxr_tid() {return Php_lxr_.Tid_declaration;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(Bry_declaration, this);
parser_interrupts[Byte_ascii.Lt] = Php_parser_interrupt.Char;
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
boolean loop = true;
boolean ws_found = false;
while (loop) {
if (cur == src_len) break;
byte b = src[cur];
switch (b) {
case Byte_ascii.Nl: case Byte_ascii.Cr:
ws_found = true;
++cur;
break;
default:
if (ws_found) loop = false;
else return Php_parser.NotFound;
break;
}
}
tkn_wkr.Process(tkn_factory.Declaration(bgn, cur));
return cur;
}
private static final byte[] Bry_declaration = Bry_.new_a7("<?php");
}
class Php_lxr_ws extends Php_lxr_base {
public Php_lxr_ws(byte ws_tid) {
this.ws_tid = ws_tid;
switch (ws_tid) {
case Php_tkn_ws.Tid_space: ws_bry = Bry_ws_space; break;
case Php_tkn_ws.Tid_nl: ws_bry = Bry_ws_nl; break;
case Php_tkn_ws.Tid_tab: ws_bry = Bry_ws_tab; break;
case Php_tkn_ws.Tid_cr: ws_bry = Bry_ws_cr; break;
}
}
public byte Ws_tid() {return ws_tid;} private byte ws_tid;
public byte[] Ws_bry() {return ws_bry;} private byte[] ws_bry;
@Override public int Lxr_tid() {return Php_lxr_.Tid_ws;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(ws_bry, this);
parser_interrupts[ws_bry[0]] = Php_parser_interrupt.Char;
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
boolean loop = true;
while (loop) {
if (cur == src_len) break;
byte b = src[cur];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: case Byte_ascii.Cr:
++cur;
break;
default:
loop = false;
break;
}
}
tkn_wkr.Process(tkn_factory.Ws(bgn, cur, ws_tid));
return cur;
}
public static final byte[] Bry_ws_space = Bry_.new_a7(" "), Bry_ws_nl = Bry_.new_a7("\n"), Bry_ws_tab = Bry_.new_a7("\t"), Bry_ws_cr = Bry_.new_a7("\r");
}
class Php_lxr_comment extends Php_lxr_base {
public Php_lxr_comment(byte comment_tid) {
this.comment_tid = comment_tid;
switch (comment_tid) {
case Php_tkn_comment.Tid_mult: comment_bgn = Bry_bgn_mult; comment_end = Bry_end_mult; break;
case Php_tkn_comment.Tid_slash: comment_bgn = Bry_bgn_slash; comment_end = Bry_end_nl; break;
case Php_tkn_comment.Tid_hash: comment_bgn = Bry_bgn_hash; comment_end = Bry_end_nl; break;
}
}
@Override public int Lxr_tid() {return Php_lxr_.Tid_comment;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(comment_bgn, this);
parser_interrupts[Byte_ascii.Slash] = Php_parser_interrupt.Char;
parser_interrupts[Byte_ascii.Hash] = Php_parser_interrupt.Char;
}
public byte Comment_tid() {return comment_tid;} private byte comment_tid;
public byte[] Comment_bgn() {return comment_bgn;} private byte[] comment_bgn;
public byte[] Comment_end() {return comment_end;} private byte[] comment_end;
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
int end = Bry_find_.Find_fwd(src, comment_end, bgn);
if (end == Bry_find_.Not_found) {
tkn_wkr.Msg_many(src, bgn, cur, Php_lxr_comment.Dangling_comment, comment_tid, comment_end);
cur = src_len; // NOTE: terminating sequence not found; assume rest of src is comment
}
else
cur = end + comment_end.length;
tkn_wkr.Process(tkn_factory.Comment(bgn, cur, comment_tid));
return cur;
}
public static final Gfo_msg_itm Dangling_comment = Gfo_msg_itm_.new_warn_(Php_parser.Log_nde, "dangling_comment", "dangling_comment");
public static final byte[] Bry_bgn_mult = Bry_.new_a7("/*"), Bry_bgn_slash = Bry_.new_a7("//"), Bry_bgn_hash = Bry_.new_a7("#")
, Bry_end_mult = Bry_.new_a7("*/"), Bry_end_nl = Bry_.new_a7("\n");
}
class Php_lxr_var extends Php_lxr_base {
@Override public int Lxr_tid() {return Php_lxr_.Tid_var;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(Bry_var, this);
parser_interrupts[Byte_ascii.Dollar] = Php_parser_interrupt.Char;
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
boolean loop = true;
while (loop) {
if (cur == src_len) break;
byte b = src[cur];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Underline:
++cur;
break;
default:
loop = false;
break;
}
}
tkn_wkr.Process(tkn_factory.Var(bgn, cur));
return cur;
}
private static final byte[] Bry_var = Bry_.new_a7("$");
}
class Php_lxr_sym extends Php_lxr_base {
public Php_lxr_sym(String hook_str, byte tkn_tid) {this.hook = Bry_.new_a7(hook_str); this.tkn_tid = tkn_tid;} private byte[] hook; byte tkn_tid;
@Override public int Lxr_tid() {return Php_lxr_.Tid_sym;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(hook, this);
parser_interrupts[hook[0]] = Php_parser_interrupt.Char;
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
tkn_wkr.Process(tkn_factory.Generic(bgn, cur, tkn_tid));
return cur;
}
}
class Php_lxr_quote extends Php_lxr_base {
public Php_lxr_quote(byte quote_tid) {
this.quote_tid = quote_tid;
switch (quote_tid) {
case Byte_ascii.Apos: quote_bry = Quote_bry_single; break;
case Byte_ascii.Quote: quote_bry = Quote_bry_double; break;
}
}
@Override public int Lxr_tid() {return Php_lxr_.Tid_quote;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(quote_bry, this);
parser_interrupts[quote_tid] = Php_parser_interrupt.Char;
}
public byte Quote_tid() {return quote_tid;} private byte quote_tid;
public byte[] Quote_bry() {return quote_bry;} private byte[] quote_bry;
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
int end = -1;
while (true) {
end = Bry_find_.Find_fwd(src, quote_bry, cur);
if (end == Bry_find_.Not_found) {
tkn_wkr.Msg_many(src, bgn, cur, Php_lxr_quote.Dangling_quote, quote_tid, quote_bry);
cur = src_len; // NOTE: terminating sequence not found; assume rest of src is comment
break;
}
else {
boolean end_quote = true;
if (src[end - 1] == Byte_ascii.Backslash) { // \' encountered;
int backslash_count = 1;
for (int i = end - 2; i > -1; i--) { // count preceding backslashes
if (src[i] == Byte_ascii.Backslash)
++backslash_count;
else
break;
}
if (backslash_count % 2 == 1) { // odd backslashes; this means that ' is escaped; EX: \' and \\\'; note that even backslashes means not escaped; EX: \\'
end_quote = false;
cur = end + 1;
}
}
if (end_quote) {
cur = end + quote_bry.length;
break;
}
}
}
tkn_wkr.Process(tkn_factory.Quote(bgn, cur, quote_tid));
return cur;
}
public static final Gfo_msg_itm Dangling_quote = Gfo_msg_itm_.new_warn_(Php_parser.Log_nde, "dangling_quote", "dangling_quote");
public static final byte[] Quote_bry_single = Bry_.new_a7("'"), Quote_bry_double = Bry_.new_a7("\"");
}
class Php_lxr_keyword extends Php_lxr_base {
public Php_lxr_keyword(String hook_str, byte tkn_tid) {this.hook = Bry_.new_a7(hook_str); this.tkn_tid = tkn_tid;} private byte[] hook; byte tkn_tid;
@Override public int Lxr_tid() {return Php_lxr_.Tid_keyword;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {trie.Add_obj(hook, this);}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
if (cur < src_len) {
byte next_byte = src[cur];
switch (next_byte) { // valid characters for end of word; EX: 'null '; 'null='; etc..
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
case Byte_ascii.Hash: case Byte_ascii.Slash:
case Byte_ascii.Quote: case Byte_ascii.Apos:
case Byte_ascii.Bang: case Byte_ascii.Dollar: case Byte_ascii.Percent: case Byte_ascii.Amp:
case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star: case Byte_ascii.Plus:
case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Semic:
case Byte_ascii.Lt: case Byte_ascii.Eq: case Byte_ascii.Gt: case Byte_ascii.Question: case Byte_ascii.At:
case Byte_ascii.Brack_bgn: case Byte_ascii.Backslash: case Byte_ascii.Brack_end: case Byte_ascii.Pow: case Byte_ascii.Tick:
case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde:
break;
default: // num,ltr or extended utf8 character sequence; treat keyword as false match; EX: 'nulla'; 'null0'
return Php_parser.NotFound;
}
}
tkn_wkr.Process(tkn_factory.Generic(bgn, cur, tkn_tid));
return cur;
}
}
class Php_lxr_num extends Php_lxr_base {
@Override public int Lxr_tid() {return Php_lxr_.Tid_keyword;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
for (int i = 0; i < 10; i++)
trie.Add_obj(new byte[] {(byte)(i + Byte_ascii.Num_0)}, this);
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
boolean loop = true;
while (loop) {
if (cur == src_len) break;
byte b = src[cur];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
++cur;
break;
default:
loop = false;
break;
}
}
tkn_wkr.Process(tkn_factory.Num(bgn, cur));
return cur;
}
}

View File

@@ -13,3 +13,108 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.btries.*; import gplx.core.log_msgs.*;
public class Php_parser {
Php_lxr[] lxrs; int lxrs_len;
int txt_bgn; Php_tkn_txt txt_tkn;
private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci:PHP tkns are ASCII
private final Btrie_rv trv = new Btrie_rv();
byte[] src; int src_len; Php_tkn_wkr tkn_wkr; Php_tkn_factory tkn_factory = new Php_tkn_factory(); Php_ctx ctx = new Php_ctx();
Php_parser_interrupt[] parser_interrupts = new Php_parser_interrupt[256];
public Php_parser() {
List_adp list = List_adp_.New();
Init_lxr(list, new Php_lxr_declaration());
Init_lxr(list, new Php_lxr_ws(Php_tkn_ws.Tid_space));
Init_lxr(list, new Php_lxr_ws(Php_tkn_ws.Tid_nl));
Init_lxr(list, new Php_lxr_ws(Php_tkn_ws.Tid_tab));
Init_lxr(list, new Php_lxr_ws(Php_tkn_ws.Tid_cr));
Init_lxr(list, new Php_lxr_comment(Php_tkn_comment.Tid_mult));
Init_lxr(list, new Php_lxr_comment(Php_tkn_comment.Tid_slash));
Init_lxr(list, new Php_lxr_comment(Php_tkn_comment.Tid_hash));
Init_lxr(list, new Php_lxr_var());
Init_lxr(list, new Php_lxr_sym(";", Php_tkn_.Tid_semic));
Init_lxr(list, new Php_lxr_sym("=", Php_tkn_.Tid_eq));
Init_lxr(list, new Php_lxr_sym("=>", Php_tkn_.Tid_eq_kv));
Init_lxr(list, new Php_lxr_sym(",", Php_tkn_.Tid_comma));
Init_lxr(list, new Php_lxr_sym("(", Php_tkn_.Tid_paren_bgn));
Init_lxr(list, new Php_lxr_sym(")", Php_tkn_.Tid_paren_end));
Init_lxr(list, new Php_lxr_sym("[", Php_tkn_.Tid_brack_bgn));
Init_lxr(list, new Php_lxr_sym("]", Php_tkn_.Tid_brack_end));
Init_lxr(list, new Php_lxr_keyword("null", Php_tkn_.Tid_null));
Init_lxr(list, new Php_lxr_keyword("false", Php_tkn_.Tid_false));
Init_lxr(list, new Php_lxr_keyword("true", Php_tkn_.Tid_true));
Init_lxr(list, new Php_lxr_keyword("array", Php_tkn_.Tid_ary));
Init_lxr(list, new Php_lxr_num());
Init_lxr(list, new Php_lxr_quote(Byte_ascii.Apos));
Init_lxr(list, new Php_lxr_quote(Byte_ascii.Quote));
lxrs = (Php_lxr[])list.To_ary(Php_lxr.class);
lxrs_len = list.Count();
}
private void Init_lxr(List_adp list, Php_lxr lxr) {
lxr.Lxr_ini(trie, parser_interrupts);
list.Add(lxr);
}
public void Parse_tkns(String src, Php_tkn_wkr tkn_wkr) {Parse_tkns(Bry_.new_u8(src), tkn_wkr);}
public void Parse_tkns(byte[] src, Php_tkn_wkr tkn_wkr) {
this.src = src; this.src_len = src.length; this.tkn_wkr = tkn_wkr;
ctx.Src_(src);
tkn_wkr.Init(ctx);
if (src_len == 0) return;
for (int i = 0; i < lxrs_len; i++)
lxrs[i].Lxr_bgn(src, src_len, tkn_wkr, tkn_factory);
int pos = 0;
byte b = src[pos];
txt_tkn = null; txt_bgn = 0;
boolean loop_raw = true, loop_txt = true;
while (loop_raw) {
Object o = trie.Match_at_w_b0(trv, b, src, pos, src_len);
if (o == null) { // char does not hook into a lxr
loop_txt = true;
while (loop_txt) { // keep looping until end of String or parser_interrupt
++pos;
if (pos == src_len) {loop_raw = false; break;}
b = src[pos];
if (parser_interrupts[b & 0xFF] == Php_parser_interrupt.Char) {
Make_txt(txt_bgn, pos);
break;
}
}
if (!loop_raw) break;
continue; // continue b/c b is set to interrupt char, and should be matched against trie
}
else { // char hooks into lxr
if (txt_bgn != pos) // txt_bgn is set; make text tkn
Make_txt(txt_bgn, pos);
Php_lxr lxr = (Php_lxr)o;
int match_pos = trv.Pos();
int make_pos = lxr.Lxr_make(ctx, pos, match_pos);
if (make_pos == Php_parser.NotFound) {
Make_txt(txt_bgn, pos);
++pos;
}
else {
txt_tkn = null;
txt_bgn = pos = make_pos;
}
}
if (pos == src_len) break;
b = src[pos];
}
if (txt_bgn != pos)
Make_txt(txt_bgn, pos);
}
int Make_txt(int bgn, int end) {
if (txt_tkn == null) {
txt_tkn = tkn_factory.Txt(bgn, end);
tkn_wkr.Process(txt_tkn);
}
else
txt_tkn.Src_end_(end);
return end;
}
public static final int NotFound = -1;
public static final Gfo_msg_grp Log_nde = Gfo_msg_grp_.new_(Gfo_msg_grp_.Root_gplx, "php_parser");
}

View File

@@ -13,3 +13,278 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.tests.*; import gplx.core.log_msgs.*;
class Php_parser_fxt {
Php_tkn_factory tkn_factory = new Php_tkn_factory();
Php_parser parser = new Php_parser();
Php_tkn_wkr_tkn tkn_wkr = new Php_tkn_wkr_tkn();
Php_evaluator line_wkr = new Php_evaluator(new Gfo_msg_log("test"));
Tst_mgr tst_mgr = new Tst_mgr();
Gfo_msg_log_chkr log_mgr_chkr = new Gfo_msg_log_chkr();
public void Clear() {log_mgr_chkr.Clear(); tkn_wkr.Clear(); line_wkr.Clear();}
public Php_tkn_chkr_base tkn_declaration() {return Php_tkn_declaration_chkr.Instance;}
public Php_tkn_chkr_base tkn_txt(int bgn, int end) {return new Php_tkn_txt_chkr(bgn, end);}
public Php_tkn_chkr_base tkn_ws(int bgn, int end) {return new Php_tkn_ws_chkr(bgn, end);}
public Php_tkn_chkr_base tkn_generic(int bgn, int end, byte tid) {return new Php_tkn_generic_chkr(bgn, end, tid);}
public Php_tkn_comment_chkr tkn_comment_mult(int bgn, int end) {return new Php_tkn_comment_chkr(bgn, end).Comment_tid_(Php_tkn_comment.Tid_mult);}
public Php_tkn_comment_chkr tkn_comment_slash(int bgn, int end) {return new Php_tkn_comment_chkr(bgn, end).Comment_tid_(Php_tkn_comment.Tid_slash);}
public Php_tkn_comment_chkr tkn_comment_hash(int bgn, int end) {return new Php_tkn_comment_chkr(bgn, end).Comment_tid_(Php_tkn_comment.Tid_hash);}
public Php_tkn_quote_chkr tkn_quote_apos(int bgn, int end) {return new Php_tkn_quote_chkr(bgn, end).Quote_tid_(Byte_ascii.Apos);}
public Php_tkn_quote_chkr tkn_quote_quote(int bgn, int end) {return new Php_tkn_quote_chkr(bgn, end).Quote_tid_(Byte_ascii.Quote);}
public Php_parser_fxt Msg(Gfo_msg_itm itm, int bgn, int end) {
log_mgr_chkr.Add_itm(itm, bgn, end);
return this;
}
public Php_tkn_var_chkr tkn_var(int bgn, int end, String v) {return new Php_tkn_var_chkr(bgn, end).Var_name_(v);}
public Php_tkn_num_chkr tkn_num(int bgn, int end, int v) {return new Php_tkn_num_chkr(bgn, end).Num_val_int_(v);}
public Php_line_assign_chkr line_assign(String key, Php_itm_chkr_base val) {return new Php_line_assign_chkr().Key_(key).Val_(val);}
public Php_line_assign_chkr line_assign_subs(String key, String[] subs, Php_itm_chkr_base val) {return new Php_line_assign_chkr().Key_(key).Subs_(subs).Val_(val);}
public Php_itm_chkr_base itm_bool_true() {return new Php_itm_generic_chkr(Php_itm_.Tid_bool_true);}
public Php_itm_chkr_base itm_bool_false() {return new Php_itm_generic_chkr(Php_itm_.Tid_bool_false);}
public Php_itm_chkr_base itm_null() {return new Php_itm_generic_chkr(Php_itm_.Tid_null);}
public Php_itm_chkr_base itm_quote(String v) {return new Php_itm_quote_chkr().Val_obj_str_(v);}
public Php_itm_chkr_base itm_int(int v) {return new Php_itm_int_chkr().Val_obj_int_(v);}
public Php_itm_chkr_base itm_txt(String v) {return new Php_itm_txt_chkr().Val_obj_str_(v);}
public Php_itm_ary_chkr itm_ary() {return new Php_itm_ary_chkr();}
public Php_itm_kv_chkr itm_kv_quote(String k, String v) {return new Php_itm_kv_chkr().Key_(k).Val_(itm_quote(v));}
public Php_itm_kv_chkr itm_kv_txt(String k, String v) {return new Php_itm_kv_chkr().Key_(k).Val_(itm_txt(v));}
public Php_itm_kv_chkr itm_kv_int(String k, int v) {return new Php_itm_kv_chkr().Key_(k).Val_(itm_int(v));}
public Php_itm_kv_chkr itm_kv_itm(String k, Php_itm_chkr_base v) {return new Php_itm_kv_chkr().Key_(k).Val_(v);}
public void tst_tkns(String raw, Php_tkn_chkr_base... expd) {
byte[] raw_bry = Bry_.new_u8(raw);
parser.Parse_tkns(raw_bry, tkn_wkr);
Php_tkn[] actl = (Php_tkn[])tkn_wkr.List().To_ary(Php_tkn.class);
tst_mgr.Vars().Clear().Add("raw_bry", raw_bry);
tst_mgr.Tst_ary("", expd, actl);
log_mgr_chkr.tst(tst_mgr, tkn_wkr.Msg_log());
}
public void tst_lines(String raw, Php_line_assign_chkr... expd) {
byte[] raw_bry = Bry_.new_u8(raw);
parser.Parse_tkns(raw_bry, line_wkr);
Php_line[] actl = (Php_line[])line_wkr.List().To_ary(Php_line.class);
tst_mgr.Vars().Clear().Add("raw_bry", raw_bry);
tst_mgr.Tst_ary("", expd, actl);
log_mgr_chkr.tst(tst_mgr, line_wkr.Msg_log());
}
}
abstract class Php_tkn_chkr_base implements Tst_chkr {
public abstract byte Tkn_tid();
public abstract Class<?> TypeOf();
public int Src_bgn() {return src_bgn;} private int src_bgn = -1;
public int Src_end() {return src_end;} private int src_end = -1;
public void Src_rng_(int src_bgn, int src_end) {this.src_bgn = src_bgn; this.src_end = src_end;}
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
Php_tkn actl = (Php_tkn)actl_obj;
int rv = 0;
rv += mgr.Tst_val(false, path, "tkn_tid", this.Tkn_tid(), actl.Tkn_tid());
rv += mgr.Tst_val(src_bgn == -1, path, "src_bgn", src_bgn, actl.Src_bgn());
rv += mgr.Tst_val(src_end == -1, path, "src_end", src_end, actl.Src_end());
rv += Chk_tkn(mgr, path, actl);
return rv;
}
@gplx.Virtual public int Chk_tkn(Tst_mgr mgr, String path, Php_tkn actl_obj) {return 0;}
}
class Php_tkn_declaration_chkr extends Php_tkn_chkr_base {
@Override public Class<?> TypeOf() {return Php_tkn_declaration.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_declaration;}
public static final Php_tkn_declaration_chkr Instance = new Php_tkn_declaration_chkr();
}
class Php_tkn_txt_chkr extends Php_tkn_chkr_base {
public Php_tkn_txt_chkr(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public Class<?> TypeOf() {return Php_tkn_txt.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_txt;}
}
class Php_tkn_ws_chkr extends Php_tkn_chkr_base {
public Php_tkn_ws_chkr(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public Class<?> TypeOf() {return Php_tkn_ws.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_ws;}
}
class Php_tkn_comment_chkr extends Php_tkn_chkr_base {
public Php_tkn_comment_chkr(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public Class<?> TypeOf() {return Php_tkn_comment.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_comment;}
public Php_tkn_comment_chkr Comment_tid_(byte v) {this.comment_tid = v; return this;} private byte comment_tid = Php_tkn_comment.Tid_null;
@Override public int Chk_tkn(Tst_mgr mgr, String path, Php_tkn actl_obj) {
Php_tkn_comment actl = (Php_tkn_comment)actl_obj;
int rv = 0;
rv += mgr.Tst_val(comment_tid == Php_tkn_comment.Tid_null, path, "comment_tid", comment_tid, actl.Comment_tid());
return rv;
}
}
class Php_tkn_quote_chkr extends Php_tkn_chkr_base {
public Php_tkn_quote_chkr(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public Class<?> TypeOf() {return Php_tkn_quote.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_quote;}
public Php_tkn_quote_chkr Quote_tid_(byte v) {this.quote_tid = v; return this;} private byte quote_tid = Byte_ascii.Null;
@Override public int Chk_tkn(Tst_mgr mgr, String path, Php_tkn actl_obj) {
Php_tkn_quote actl = (Php_tkn_quote)actl_obj;
int rv = 0;
rv += mgr.Tst_val(quote_tid == Byte_ascii.Null, path, "quote_tid", quote_tid, actl.Quote_tid());
return rv;
}
}
class Php_tkn_var_chkr extends Php_tkn_chkr_base {
public Php_tkn_var_chkr(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public Class<?> TypeOf() {return Php_tkn_var.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_var;}
public Php_tkn_var_chkr Var_name_(String v) {this.var_name = v; return this;} private String var_name;
@Override public int Chk_tkn(Tst_mgr mgr, String path, Php_tkn actl_obj) {
Php_tkn_var actl = (Php_tkn_var)actl_obj;
int rv = 0;
byte[] raw_bry = (byte[])mgr.Vars_get_by_key("raw_bry");
rv += mgr.Tst_val(var_name == null, path, "var_name", var_name, String_.new_u8(actl.Var_name(raw_bry)));
return rv;
}
}
class Php_tkn_num_chkr extends Php_tkn_chkr_base {
public Php_tkn_num_chkr(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public Class<?> TypeOf() {return Php_tkn_num.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_num;}
public Php_tkn_num_chkr Num_val_int_(int v) {this.num_val_int = v; return this;} private int num_val_int = Int_.Min_value;
@Override public int Chk_tkn(Tst_mgr mgr, String path, Php_tkn actl_obj) {
Php_tkn_num actl = (Php_tkn_num)actl_obj;
int rv = 0;
byte[] raw_bry = (byte[])mgr.Vars_get_by_key("raw_bry");
rv += mgr.Tst_val(num_val_int == Int_.Min_value, path, "num_val_int", num_val_int, actl.Num_val_int(raw_bry));
return rv;
}
}
class Php_tkn_generic_chkr extends Php_tkn_chkr_base {
public Php_tkn_generic_chkr(int src_bgn, int src_end, byte tkn_tid) {this.Src_rng_(src_bgn, src_end); this.tkn_tid = tkn_tid;}
@Override public Class<?> TypeOf() {return Php_tkn.class;}
@Override public byte Tkn_tid() {return tkn_tid;} private byte tkn_tid;
}
class Php_line_assign_chkr implements Tst_chkr {
public Class<?> TypeOf() {return Php_line_assign.class;}
public Php_line_assign_chkr Key_(String v) {key = v; return this;} private String key;
public Php_line_assign_chkr Subs_(String[] v) {
int subs_len = v.length;
subs = new Php_itm_quote_chkr[subs_len];
for (int i = 0; i < subs_len; i++)
subs[i] = new Php_itm_quote_chkr().Val_obj_str_(v[i]);
return this;
} Php_itm_chkr_base[] subs;
public Php_line_assign_chkr Val_(Php_itm_chkr_base v) {val = v; return this;} Php_itm_chkr_base val;
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
Php_line_assign actl = (Php_line_assign)actl_obj;
int rv = 0;
rv += mgr.Tst_val(key == null, path, "key", key, String_.new_u8(actl.Key().Val_obj_bry()));
if (subs != null) rv += mgr.Tst_sub_ary(subs, actl.Key_subs(), "subs", rv);
rv += mgr.Tst_sub_obj(val, actl.Val(), "val", rv);
return rv;
}
}
abstract class Php_itm_chkr_base implements Tst_chkr {
public abstract byte Itm_tid();
public abstract Class<?> TypeOf();
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
Php_itm actl = (Php_itm)actl_obj;
int rv = 0;
rv += mgr.Tst_val(false, path, "tkn_tid", this.Itm_tid(), actl.Itm_tid());
rv += Chk_itm(mgr, path, actl);
return rv;
}
@gplx.Virtual public int Chk_itm(Tst_mgr mgr, String path, Php_itm actl_obj) {return 0;}
public static final Php_itm_chkr_base[] Ary_empty = new Php_itm_chkr_base[0];
}
class Php_itm_generic_chkr extends Php_itm_chkr_base {
public Php_itm_generic_chkr(byte itm_tid) {this.itm_tid = itm_tid;} private byte itm_tid;
@Override public byte Itm_tid() {return itm_tid;}
@Override public Class<?> TypeOf() {return Php_itm.class;}
}
class Php_itm_int_chkr extends Php_itm_chkr_base {
@Override public byte Itm_tid() {return Php_itm_.Tid_int;}
@Override public Class<?> TypeOf() {return Php_itm.class;}
public Php_itm_int_chkr Val_obj_int_(int v) {this.val_obj_int = v; return this;} private int val_obj_int;
@Override public int Chk_itm(Tst_mgr mgr, String path, Php_itm actl_obj) {
Php_itm_int actl = (Php_itm_int)actl_obj;
int rv = 0;
rv += mgr.Tst_val(false, path, "val_obj_str", val_obj_int, actl.Val_obj_int());
return rv;
}
}
class Php_itm_txt_chkr extends Php_itm_chkr_base {
@Override public byte Itm_tid() {return Php_itm_.Tid_var;}
@Override public Class<?> TypeOf() {return Php_itm.class;}
public Php_itm_txt_chkr Val_obj_str_(String v) {this.val_obj_str = v; return this;} private String val_obj_str;
@Override public int Chk_itm(Tst_mgr mgr, String path, Php_itm actl_obj) {
Php_itm_var actl = (Php_itm_var)actl_obj;
int rv = 0;
rv += mgr.Tst_val(false, path, "val_obj_str", val_obj_str, String_.new_u8(actl.Val_obj_bry()));
return rv;
}
}
class Php_itm_quote_chkr extends Php_itm_chkr_base {
@Override public byte Itm_tid() {return Php_itm_.Tid_quote;}
@Override public Class<?> TypeOf() {return Php_itm.class;}
public Php_itm_quote_chkr Val_obj_str_(String v) {this.val_obj_str = v; return this;} private String val_obj_str;
@Override public int Chk_itm(Tst_mgr mgr, String path, Php_itm actl_obj) {
Php_itm_quote actl = (Php_itm_quote)actl_obj;
int rv = 0;
rv += mgr.Tst_val(false, path, "val_obj_str", val_obj_str, String_.new_u8(actl.Val_obj_bry()));
return rv;
}
}
class Php_itm_ary_chkr extends Php_itm_chkr_base {
@Override public byte Itm_tid() {return Php_itm_.Tid_ary;}
@Override public Class<?> TypeOf() {return Php_itm.class;}
public Php_itm_ary_chkr Subs_(Php_itm_chkr_base... v) {this.itms = v; return this;} Php_itm_chkr_base[] itms = Php_itm_chkr_base.Ary_empty;
@Override public int Chk_itm(Tst_mgr mgr, String path, Php_itm actl_obj) {
Php_itm_ary actl = (Php_itm_ary)actl_obj;
int rv = 0;
int actl_subs_len = actl.Subs_len();
Php_itm[] actl_ary = new Php_itm[actl_subs_len];
for (int i = 0; i < actl_subs_len; i++) {
actl_ary[i] = (Php_itm)actl.Subs_get(i);
}
rv += mgr.Tst_sub_ary(itms, actl_ary, "subs", rv);
return rv;
}
}
class Php_itm_kv_chkr extends Php_itm_chkr_base {
@Override public byte Itm_tid() {return Php_itm_.Tid_kv;}
@Override public Class<?> TypeOf() {return Php_itm.class;}
public Php_itm_kv_chkr Key_(String v) {key = v; return this;} private String key;
public Php_itm_kv_chkr Val_(Php_itm_chkr_base v) {val = v; return this;} Php_itm_chkr_base val;
@Override public int Chk_itm(Tst_mgr mgr, String path, Php_itm actl_obj) {
Php_itm_kv actl = (Php_itm_kv)actl_obj;
int rv = 0;
rv += mgr.Tst_val(false, path, "key", key, String_.new_u8(actl.Key().Val_obj_bry()));
rv += mgr.Tst_sub_obj(val, actl.Val(), path, rv);
return rv;
}
}
class Gfo_msg_log_chkr implements Tst_chkr {
List_adp itms = List_adp_.New();
public Class<?> TypeOf() {return Gfo_msg_log.class;}
public void Clear() {itms.Clear();}
public void Add_itm(Gfo_msg_itm itm, int bgn, int end) {
Gfo_msg_data_chkr chkr = new Gfo_msg_data_chkr();
chkr.Itm_(itm).Excerpt_bgn_(bgn).Excerpt_end_(end);
itms.Add(chkr);
}
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {return 0;}
public void tst(Tst_mgr mgr, Object actl_obj) {
Gfo_msg_log actl = (Gfo_msg_log)actl_obj;
int actl_itms_len = actl.Ary_len();
Gfo_msg_data[] actl_itms = new Gfo_msg_data[actl_itms_len];
for (int i = 0; i < actl_itms_len; i++)
actl_itms[i] = actl.Ary_get(i);
mgr.Tst_ary("itms", (Gfo_msg_data_chkr[])itms.To_ary(Gfo_msg_data_chkr.class), actl_itms);
}
}
class Gfo_msg_data_chkr implements Tst_chkr {
public Class<?> TypeOf() {return Gfo_msg_data.class;}
public Gfo_msg_data_chkr Itm_(Gfo_msg_itm v) {itm = v; return this;} Gfo_msg_itm itm;
public Gfo_msg_data_chkr Excerpt_bgn_(int v) {excerpt_bgn = v; return this;} private int excerpt_bgn = -1;
public Gfo_msg_data_chkr Excerpt_end_(int v) {excerpt_end = v; return this;} private int excerpt_end = -1;
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
Gfo_msg_data actl = (Gfo_msg_data)actl_obj;
int rv = 0;
rv += mgr.Tst_val(itm == null, path, "itm", itm.Path_str(), actl.Item().Path_str());
rv += mgr.Tst_val(excerpt_bgn == -1, path, "excerpt_bgn", excerpt_bgn, actl.Src_bgn());
rv += mgr.Tst_val(excerpt_end == -1, path, "excerpt_end", excerpt_end, actl.Src_end());
return rv;
}
}

View File

@@ -13,3 +13,70 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Php_parser_tst {
@Before public void init() {fxt.Clear();} private final Php_parser_fxt fxt = new Php_parser_fxt();
@Test public void Text() {fxt.tst_tkns("text", fxt.tkn_txt(0, 4)); }
@Test public void Declaration_pass() {fxt.tst_tkns("<?php", fxt.tkn_declaration());}
@Test public void Declaration_fail() {fxt.tst_tkns("<?phpx", fxt.tkn_txt(0, 6));}
@Test public void Ws_basic() {fxt.tst_tkns(" ", fxt.tkn_ws(0, 1));}
@Test public void Ws_mix() {fxt.tst_tkns(" a\n", fxt.tkn_ws(0, 1), fxt.tkn_txt(1, 2), fxt.tkn_ws(2, 3));}
@Test public void Comment_mult() {fxt.tst_tkns("/*a*/", fxt.tkn_comment_mult(0, 5));}
@Test public void Comment_slash() {fxt.tst_tkns("//a\n", fxt.tkn_comment_slash(0, 4));}
@Test public void Comment_hash() {fxt.tst_tkns("#a\n", fxt.tkn_comment_hash(0, 3));}
@Test public void Comment_mult_fail() {fxt.Msg(Php_lxr_comment.Dangling_comment, 0, 2).tst_tkns("/*a", fxt.tkn_comment_mult(0, 3));}
@Test public void Var() {fxt.tst_tkns("$abc", fxt.tkn_var(0, 4, "abc"));}
@Test public void Sym() {fxt.tst_tkns(";==>,()", fxt.tkn_generic(0, 1, Php_tkn_.Tid_semic), fxt.tkn_generic(1, 2, Php_tkn_.Tid_eq), fxt.tkn_generic(2, 4, Php_tkn_.Tid_eq_kv), fxt.tkn_generic(4, 5, Php_tkn_.Tid_comma), fxt.tkn_generic(5, 6, Php_tkn_.Tid_paren_bgn), fxt.tkn_generic(6, 7, Php_tkn_.Tid_paren_end));}
@Test public void Keyword() {fxt.tst_tkns("null=nulla", fxt.tkn_generic(0, 4, Php_tkn_.Tid_null), fxt.tkn_generic(4, 5, Php_tkn_.Tid_eq), fxt.tkn_txt(5, 10));}
@Test public void Num() {fxt.tst_tkns("0=123", fxt.tkn_num(0, 1, 0), fxt.tkn_generic(1, 2, Php_tkn_.Tid_eq), fxt.tkn_num(2, 5, 123));}
@Test public void Quote_apos() {fxt.tst_tkns("'a\"b'", fxt.tkn_quote_apos(0, 5));}
@Test public void Quote_quote() {fxt.tst_tkns("\"a'b\"", fxt.tkn_quote_quote(0, 5));}
@Test public void Quote_escape() {fxt.tst_tkns("'a\\'b'", fxt.tkn_quote_apos(0, 6));}
@Test public void Brack() {fxt.tst_tkns("['a']", fxt.tkn_generic(0, 1, Php_tkn_.Tid_brack_bgn), fxt.tkn_quote_apos(1, 4), fxt.tkn_generic(4, 5, Php_tkn_.Tid_brack_end));}
@Test public void Line_ws() {fxt.tst_lines("\r\n$a = false;", fxt.line_assign("a", fxt.itm_bool_false()));}
@Test public void Line_brack() {fxt.tst_lines("$a['b'] = 'c';", fxt.line_assign_subs("a", String_.Ary("b"), fxt.itm_quote("c")));}
@Test public void Line_assign_false() {fxt.tst_lines("$a = false;", fxt.line_assign("a", fxt.itm_bool_false()));}
@Test public void Line_assign_quote_charcode() {fxt.tst_lines("$a = 'bc';", fxt.line_assign("a", fxt.itm_quote("bc")));}
@Test public void Line_assign_mult() {fxt.tst_lines("$a = 'b';\n$c='d';", fxt.line_assign("a", fxt.itm_quote("b")), fxt.line_assign("c", fxt.itm_quote("d")));}
@Test public void Empty_usr_array() {
fxt.tst_lines("$a = array();\n$b = array();"
, fxt.line_assign("a", fxt.itm_ary())
, fxt.line_assign("b", fxt.itm_ary())
);
}
@Test public void Ary_flat() {fxt.tst_lines("$a = array('b', 'c', 'd');" , fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b"), fxt.itm_quote("c"), fxt.itm_quote("d"))));}
@Test public void Brack_flat() {fxt.tst_lines("$a = ['b', 'c', 'd'];" , fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b"), fxt.itm_quote("c"), fxt.itm_quote("d"))));}
@Test public void Ary_flat_escape() { // PURPOSE.fix: \\' was being interpreted incorrectly; \\ should escape \, but somehow \' was being escaped
fxt.tst_lines("$a = array('b\\\\', 'c');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b\\\\"), fxt.itm_quote("c"))));
}
@Test public void Ary_flat_escape2() { // PURPOSE.fix: \\' was being interpreted incorrectly; \\ should escape \, but somehow \' was being escaped
fxt.tst_lines("$a = array('b\\\\\\'c', 'd');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b\\\\\\'c"), fxt.itm_quote("d"))));
}
@Test public void Ary_kv() {fxt.tst_lines("$a = array(k0 => 'v0', k1 => 'v1', k2 => 'v2');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_kv_quote("k0", "v0"), fxt.itm_kv_quote("k1", "v1"), fxt.itm_kv_quote("k2", "v2"))));}
@Test public void Brack_kv() {fxt.tst_lines("$a = [k0 => 'v0', k1 => 'v1', k2 => 'v2'];" , fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_kv_quote("k0", "v0"), fxt.itm_kv_quote("k1", "v1"), fxt.itm_kv_quote("k2", "v2"))));}
@Test public void Ary_kv_num() {fxt.tst_lines("$a = array(k0 => 0, k1 => 1);", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_kv_int("k0", 0), fxt.itm_kv_int("k1", 1))));}
@Test public void Ary_kv_txt() {fxt.tst_lines("$a = array('k0' => a, 'k1' => b);", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_kv_txt("k0", "a"), fxt.itm_kv_txt("k1", "b"))));}
@Test public void Ary_nest() {fxt.tst_lines("$a = array('b', array('c', 'd'), 'e');" , fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b"), fxt.itm_ary().Subs_(fxt.itm_quote("c"), fxt.itm_quote("d")), fxt.itm_quote("e"))));}
@Test public void Brack_nest() {fxt.tst_lines("$a = ['b', ['c', 'd'], 'e'];" , fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b"), fxt.itm_ary().Subs_(fxt.itm_quote("c"), fxt.itm_quote("d")), fxt.itm_quote("e"))));}
@Test public void Ary_nest_kv() {
fxt.tst_lines("$a = array('i00' => array('01', '02'), 'i10' => array('11', '12'), 'i20' => array('21', '22'));"
, fxt.line_assign
( "a"
, fxt.itm_ary().Subs_
( fxt.itm_kv_itm("i00", fxt.itm_ary().Subs_(fxt.itm_quote("01"), fxt.itm_quote("02")))
, fxt.itm_kv_itm("i10", fxt.itm_ary().Subs_(fxt.itm_quote("11"), fxt.itm_quote("12")))
, fxt.itm_kv_itm("i20", fxt.itm_ary().Subs_(fxt.itm_quote("21"), fxt.itm_quote("22")))
)));
}
@Test public void Brack_nest_kv() {
fxt.tst_lines("$a = ['i00' => ['01', '02'], 'i10' => ['11', '12'], 'i20' => ['21', '22']];"
, fxt.line_assign
( "a"
, fxt.itm_ary().Subs_
( fxt.itm_kv_itm("i00", fxt.itm_ary().Subs_(fxt.itm_quote("01"), fxt.itm_quote("02")))
, fxt.itm_kv_itm("i10", fxt.itm_ary().Subs_(fxt.itm_quote("11"), fxt.itm_quote("12")))
, fxt.itm_kv_itm("i20", fxt.itm_ary().Subs_(fxt.itm_quote("21"), fxt.itm_quote("22")))
)));
}
}

View File

@@ -13,3 +13,125 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
interface Php_srl_itm {
byte Tid();
int Src_bgn();
int Src_end();
Object Val();
void Xto_bfr(Bry_bfr bfr, int depth);
void Clear();
}
class Php_srl_itm_ {
public static final Php_srl_itm[] Ary_empty = new Php_srl_itm[0];
public static final byte Tid_unknown = 0, Tid_nil = 1, Tid_bool = 2, Tid_int = 3, Tid_double = 4, Tid_string = 5, Tid_array = 6, Tid_function = 7;
public static final byte[][] Names = Bry_.Ary("unknown", "nil", "boolean", "int", "double", "string", "array", "function");
public static final Object Val_nil = null, Val_table = null;
}
abstract class Php_srl_itm_base implements Php_srl_itm {
public abstract byte Tid();
public void Ctor(int src_bgn, int src_end, Object val) {this.src_bgn = src_bgn; this.src_end = src_end; this.val = val;}
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public Object Val() {return val;} Object val;
@gplx.Virtual public void Xto_bfr(Bry_bfr bfr, int depth) {
Php_srl_wtr.Indent(bfr, depth);
bfr.Add(Php_srl_itm_.Names[this.Tid()]).Add_byte(Byte_ascii.Colon);
bfr.Add_str_u8(Object_.Xto_str_strict_or_null_mark(this.Val())).Add_byte(Byte_ascii.Semic).Add_byte_nl();
}
public void Clear() {}
}
class Php_srl_itm_nil extends Php_srl_itm_base {
public Php_srl_itm_nil() {this.Ctor(-1, -1, null);}
@Override public byte Tid() {return Php_srl_itm_.Tid_nil;}
public byte[] Bry_extract(byte[] raw) {return null;}
public static Php_srl_itm_nil Nil = new Php_srl_itm_nil();
}
class Php_srl_itm_bool extends Php_srl_itm_base {
public Php_srl_itm_bool(boolean val, byte[] bry) {this.val = val; this.bry = bry; this.Ctor(-1, -1, val);}
@Override public byte Tid() {return Php_srl_itm_.Tid_bool;}
public byte[] Bry_extract(byte[] raw) {return bry;} private byte[] bry;
public boolean Val_as_bool() {return val;} private boolean val;
public static Php_srl_itm_bool Bool_n = new Php_srl_itm_bool(false, new byte[] {Byte_ascii.Num_0}), Bool_y = new Php_srl_itm_bool(true, new byte[] {Byte_ascii.Num_1});
}
class Php_srl_itm_int extends Php_srl_itm_base {
public Php_srl_itm_int(int src_bgn, int src_end, int val) {this.val = val; this.Ctor(src_bgn, src_end, val);}
@Override public byte Tid() {return Php_srl_itm_.Tid_int;}
public int Val_as_int() {return val;} private int val;
}
class Php_srl_itm_double extends Php_srl_itm_base {
public Php_srl_itm_double(int src_bgn, int src_end, double val) {this.val = val; this.Ctor(src_bgn, src_end, val);}
@Override public byte Tid() {return Php_srl_itm_.Tid_double;}
public double Val_as_double() {return val;} double val;
}
class Php_srl_itm_str extends Php_srl_itm_base {
public Php_srl_itm_str(int src_bgn, int src_end, String val) {this.val = val; this.Ctor(src_bgn, src_end, val);}
@Override public byte Tid() {return Php_srl_itm_.Tid_string;}
public String Val_as_str() {return val;} private String val;
}
class Php_srl_itm_func extends Php_srl_itm_base {
public Php_srl_itm_func(int src_bgn, int src_end, int val) {this.val = val; this.Ctor(src_bgn, src_end, val);}
@Override public byte Tid() {return Php_srl_itm_.Tid_function;}
public int Val_as_int() {return val;} private int val;
}
class Php_srl_itm_ary extends Php_srl_itm_base {
public Php_srl_itm_ary(int src_bgn, int src_end) {this.Ctor(src_bgn, src_end, null);}
@Override public byte Tid() {return Php_srl_itm_.Tid_array;}
public Php_srl_itm_kv[] Subs_ary() {return subs;}
public int Subs_len() {return subs_len;} private int subs_len = 0, subs_max = 0;
public Php_srl_itm_kv Subs_get_at(int i) {return subs[i];}
public void Subs_clear() {
for (int i = 0; i < subs_len; i++) {
subs[i].Clear();
}
subs = Php_srl_itm_kv.Ary_empty;
subs_len = subs_max = 0;
}
public Php_srl_itm_ary Subs_add_many(Php_srl_itm_kv... ary) {
int len = ary.length;
for (int i = 0; i < len; i++)
Subs_add(ary[i]);
return this;
}
public Php_srl_itm_ary Subs_add(Php_srl_itm_kv itm) {
int new_len = subs_len + 1;
if (new_len > subs_max) { // ary too small >>> expand
subs_max = new_len * 2;
Php_srl_itm_kv[] new_subs = new Php_srl_itm_kv[subs_max];
Array_.Copy_to(subs, 0, new_subs, 0, subs_len);
subs = new_subs;
}
subs[subs_len] = itm;
subs_len = new_len;
return this;
}
@Override public void Xto_bfr(Bry_bfr bfr, int depth) {
Php_srl_wtr.Indent(bfr, depth);
bfr.Add_byte(Byte_ascii.Ltr_a).Add_byte(Byte_ascii.Brack_bgn).Add_int_variable(subs_len).Add(CONST_ary_bgn);
for (int i = 0; i < subs_len; i++)
subs[i].Xto_bfr(bfr, depth + 1);
Php_srl_wtr.Indent(bfr, depth);
bfr.Add_byte(Byte_ascii.Curly_end).Add_byte_nl();
}
private static final byte[] CONST_ary_bgn = Bry_.new_a7("]{\n");
Php_srl_itm_kv[] subs = Php_srl_itm_kv.Ary_empty;
}
class Php_srl_itm_kv {
public int Idx_int() {return idx_int;} public Php_srl_itm_kv Idx_int_(int v) {idx_int = v; return this;} private int idx_int = -1;
public Php_srl_itm Key() {return key;} public Php_srl_itm_kv Key_(Php_srl_itm v) {key = v; return this;} Php_srl_itm key;
public Php_srl_itm Val() {return val;} public Php_srl_itm_kv Val_(Php_srl_itm v) {val = v; return this;} Php_srl_itm val;
public void Clear() {
key.Clear();
val.Clear();
}
public void Xto_bfr(Bry_bfr bfr, int depth) {
key.Xto_bfr(bfr, depth);
val.Xto_bfr(bfr, depth);
}
public static final Php_srl_itm_kv[] Ary_empty = new Php_srl_itm_kv[0];
}
class Php_srl_wtr {
public static void Indent(Bry_bfr bfr, int depth) {
if (depth > 0) bfr.Add_byte_repeat(Byte_ascii.Space, depth * 2); // indent
}
}

View File

@@ -13,3 +13,194 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.texts.*;
public class Php_srl_parser {
@gplx.Internal protected Php_srl_factory Factory() {return factory;} Php_srl_factory factory = new Php_srl_factory();
byte[] raw; int raw_len, pos;
public Keyval[] Parse_as_kvs(byte[] raw) {
Php_srl_itm_ary root = Parse(raw);
return Xto_kv_ary(root);
}
Keyval[] Xto_kv_ary(Php_srl_itm_ary ary) {
int len = ary.Subs_len();
Keyval[] rv = new Keyval[len];
for (int i = 0; i < len; i++)
rv[i] = Xto_kv(ary.Subs_get_at(i));
return rv;
}
Keyval Xto_kv(Php_srl_itm_kv itm) {
Php_srl_itm itm_key = itm.Key();
Object key = itm_key == null ? null : itm_key.Val();
Php_srl_itm itm_val = itm.Val();
Object val = null;
switch (itm_val.Tid()) {
case Php_srl_itm_.Tid_array:
Php_srl_itm_ary ary = (Php_srl_itm_ary)itm_val;
val = Xto_kv_ary(ary);
break;
case Php_srl_itm_.Tid_function:
val = new gplx.xowa.xtns.scribunto.Scrib_lua_proc(Object_.Xto_str_strict_or_null_mark(key), Int_.Cast(itm_val.Val())); // NOTE: in most cases, key is a STRING (name of ScribFunction); however, for gsub it is an INT (arg_idx) b/c it is passed as a parameter
break;
default:
val = itm_val.Val();
break;
}
return Keyval_.obj_(key, val);
}
@gplx.Internal protected Php_srl_itm_ary Parse(byte[] raw) {
this.raw = raw; this.raw_len = raw.length; pos = 0;
Php_srl_itm_ary rv = new Php_srl_itm_ary(0, raw_len);
Php_srl_itm_kv cur_kv = factory.Kv();
rv.Subs_add(cur_kv);
boolean mode_is_key = false;
while (true) {
if (pos >= raw_len) break;
if (mode_is_key) {
cur_kv.Key_(Parse_itm(pos));
mode_is_key = false;
}
else {
cur_kv.Val_(Parse_itm(pos));
mode_is_key = true;
}
}
return rv;
}
Php_srl_itm_ary Parse_array(int bgn, int subs_len) { // enters after '{'; EX: 'a:1{' -> Parse_array
Php_srl_itm_ary rv = factory.Ary(bgn, bgn);
for (int i = 0; i < subs_len; i++) {
Php_srl_itm_kv kv = factory.Kv();
Php_srl_itm key_itm = Parse_itm(pos);
kv.Key_(key_itm);
Php_srl_itm val_itm = Parse_itm(pos);
kv.Val_(val_itm);
rv.Subs_add(kv);
}
return rv;
}
Php_srl_itm Parse_itm(int bgn) {
pos = bgn;
Php_srl_itm rv = null;
byte b = raw[pos];
switch (b) {
case Byte_ascii.Ltr_N: // EX: 'N;'
rv = factory.Nil();
pos = Chk(raw, pos + 1, Byte_ascii.Semic);
break;
case Byte_ascii.Ltr_b: // EX: 'b:0;' or 'b:1;'
pos = Chk(raw, pos + 1, Byte_ascii.Colon);
b = raw[pos];
switch (b) {
case Byte_ascii.Num_1: rv = factory.Bool_y(); break;
case Byte_ascii.Num_0: rv = factory.Bool_n(); break;
default: throw err_(raw, pos, raw_len, "unknown boolean type {0}", Char_.To_str(b));
}
pos = Chk(raw, pos + 1, Byte_ascii.Semic);
break;
case Byte_ascii.Ltr_i: // EX: 'i:123;'
rv = Parse_int(pos);
pos = Chk(raw, pos, Byte_ascii.Semic);
break;
case Byte_ascii.Ltr_d: // EX: 'd:1.23;'
pos = Chk(raw, pos + 1, Byte_ascii.Colon);
int double_end = Bry_find_.Find_fwd(raw, Byte_ascii.Semic, pos, raw_len);
String double_str = String_.new_a7(raw, pos, double_end);
double double_val = 0;
if (String_.Eq(double_str, "INF")) double_val = Double_.Inf_pos;
else if (String_.Eq(double_str, "NAN")) double_val = Double_.NaN;
else double_val = Double_.parse(double_str);
rv = factory.Double(pos, double_end, double_val);
pos = Chk(raw, double_end, Byte_ascii.Semic);
break;
case Byte_ascii.Ltr_s: // EX: 's:3:"abc";'
int len_val = Parse_int(pos).Val_as_int();
pos = Chk(raw, pos, Byte_ascii.Colon);
pos = Chk(raw, pos, Byte_ascii.Quote);
int str_end = pos + len_val;
String str_val = String_.new_u8(raw, pos, str_end);
rv = factory.Str(pos, str_end, str_val);
pos = Chk(raw, str_end, Byte_ascii.Quote);
pos = Chk(raw, pos, Byte_ascii.Semic);
break;
case Byte_ascii.Ltr_a: // EX: 'a:0:{}'
int subs_len = Parse_int(pos).Val_as_int();
pos = Chk(raw, pos, Byte_ascii.Colon);
pos = Chk(raw, pos, Byte_ascii.Curly_bgn);
rv = Parse_array(pos, subs_len);
pos = Chk(raw, pos, Byte_ascii.Curly_end);
break;
case Byte_ascii.Ltr_O: // EX: 'O:42:"Scribunto_LuaStandaloneInterpreterFunction":1:{s:2:"id";i:123;}'
int func_bgn = pos;
pos += 62; // 64= len of constant String after ":42:"Scribunto...."
int func_id = Parse_int_val(pos);
rv = factory.Func(func_bgn, pos, func_id);
pos += 2;
break;
default: throw err_(raw, pos, "unexpected type: {0}", Char_.To_str(b));
}
return rv;
}
int Parse_int_val(int bgn) {
pos = bgn;
pos = Chk(raw, pos + 1, Byte_ascii.Colon);
int int_end = Skip_while_num(raw, raw_len, pos, true);
int int_val = Bry_.To_int_or(raw, pos, int_end, Int_.Min_value);
pos = int_end;
return int_val;
}
Php_srl_itm_int Parse_int(int bgn) {
pos = bgn;
pos = Chk(raw, pos + 1, Byte_ascii.Colon);
int int_end = Skip_while_num(raw, raw_len, pos, true);
int int_val = Bry_.To_int_or(raw, pos, int_end, Int_.Min_value);
Php_srl_itm_int rv = factory.Int(pos, int_end, int_val);
pos = int_end;
return rv;
}
int Chk(byte[] raw, int i, byte expd) {
byte actl = raw[i];
if (actl == expd)
return i + 1;
else
throw err_(raw, i, "expected '{0}' but got '{1}'", Char_.To_str(expd), Char_.To_str(actl));
}
int Skip_while_num(byte[] raw, int raw_len, int bgn, boolean num_is_int) {
int num_len = 1;
for (int i = bgn; i < raw_len; i++) {
byte b = raw[i];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
break;
case Byte_ascii.Dot:
case Byte_ascii.Dash:
break;
default:
if (num_is_int && num_len < 11) {
return i;
}
else
return i;
}
}
throw err_(raw, raw_len, raw_len, "skip_ws found eos");
}
Err err_(byte[] raw, int bgn, String fmt, Object... args) {return err_(raw, bgn, raw.length, fmt, args);}
Err err_(byte[] raw, int bgn, int raw_len, String fmt, Object... args) {
String msg = String_.Format(fmt, args) + " " + Int_.To_str(bgn) + " " + String_.new_u8__by_len(raw, bgn, 20);
return Err_.new_wo_type(msg);
}
}
class Php_srl_factory {
public Php_srl_itm Nil() {return Php_srl_itm_nil.Nil;}
public Php_srl_itm Bool_n() {return Php_srl_itm_bool.Bool_n;}
public Php_srl_itm Bool_y() {return Php_srl_itm_bool.Bool_y;}
public Php_srl_itm_int Int(int bgn, int end, int v) {return new Php_srl_itm_int(bgn, end, v);}
public Php_srl_itm Double(int bgn, int end, double v) {return new Php_srl_itm_double(bgn, end, v);}
public Php_srl_itm Str(int bgn, int end) {return new Php_srl_itm_str(bgn, end, null);}
public Php_srl_itm Str(int bgn, int end, String v) {return new Php_srl_itm_str(bgn, end, v);}
public Php_srl_itm_func Func(int bgn, int end, int v) {return new Php_srl_itm_func(bgn, end, v);}
public Php_srl_itm_ary Ary(int bgn, int end) {return new Php_srl_itm_ary(bgn, end);}
public Php_srl_itm_kv Kv() {return new Php_srl_itm_kv();}
}

View File

@@ -13,3 +13,98 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Php_srl_parser_tst {
Php_srl_parser_fxt fxt = new Php_srl_parser_fxt();
@Before public void init() {fxt.Clear();}
@Test public void Nil() {fxt.Test_parse("N;", fxt.itm_nil_());}
@Test public void Bool_y() {fxt.Test_parse("b:1;", fxt.itm_bool_y_());}
@Test public void Bool_n() {fxt.Test_parse("b:0;", fxt.itm_bool_n_());}
@Test public void Num_int() {fxt.Test_parse("i:123;", fxt.itm_int_(123));}
@Test public void Num_int_neg() {fxt.Test_parse("i:-123;", fxt.itm_int_(-123));}
@Test public void Num_double() {fxt.Test_parse("d:1.23;", fxt.itm_double_(1.23d));}
@Test public void Num_double_inf_pos(){fxt.Test_parse("d:INF;", fxt.itm_double_(Double_.Inf_pos));}
@Test public void Num_double_exp() {fxt.Test_parse("d:1.2e+2;", fxt.itm_double_(120));}
@Test public void Num_double_nan() {fxt.Test_parse("d:NAN;", fxt.itm_double_(Double_.NaN));}
@Test public void Str_len_3() {fxt.Test_parse("s:3:\"abc\";", fxt.itm_str_("abc"));}
@Test public void Str_len_4() {fxt.Test_parse("s:4:\"abcd\";", fxt.itm_str_("abcd"));}
@Test public void Str_len_0() {fxt.Test_parse("s:0:\"\";", fxt.itm_str_(""));}
@Test public void Ary_empty() {fxt.Test_parse("a:0:{}", fxt.itm_ary_());}
@Test public void Ary_flat_one() {fxt.Test_parse("a:1:{i:1;i:9;}", fxt.itm_ary_().Subs_add(fxt.itm_kvi_(1, fxt.itm_int_(9))));}
@Test public void Ary_flat_many() {
fxt.Test_parse(String_.Concat
( "a:3:{"
, "i:1;i:9;"
, "i:2;i:8;"
, "i:3;i:7;"
, "}"), fxt.itm_ary_().Subs_add_many
( fxt.itm_kvi_(1, fxt.itm_int_(9))
, fxt.itm_kvi_(2, fxt.itm_int_(8))
, fxt.itm_kvi_(3, fxt.itm_int_(7))
));
}
@Test public void Ary_nest_one() {
fxt.Test_parse(String_.Concat
( "a:1:{"
, "i:1;"
, "a:2:{"
, "i:1;i:9;"
, "i:2;i:8;"
, "}"
, "}"
)
, fxt.itm_ary_().Subs_add_many
( fxt.itm_kvi_(1, fxt.itm_ary_().Subs_add_many
( fxt.itm_kvi_(1, fxt.itm_int_(9))
, fxt.itm_kvi_(2, fxt.itm_int_(8))
))));
}
@Test public void Ary_key_str() {
fxt.Test_parse(String_.Concat
( "a:1:{"
, "s:3:\"abc\";"
, "i:987;"
, "}"), fxt.itm_ary_().Subs_add_many
( fxt.itm_kvs_("abc", fxt.itm_int_(987))
));
}
@Test public void Func() {
fxt.Test_parse("O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:123;}", fxt.itm_func_(123));
}
@Test public void Smoke() {
// fxt.Test_parse("a:2:{s:6:\"values\";a:1:{i:1;a:9:{s:21:\"makeProt"+"ectedEnvFuncs\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:2;}s:3:\"log\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:3;}s:14:\"clearLogBuffer\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:4;}s:5:\"setup\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:5;}s:5:\"clone\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:6;}s:15:\"getCurrentFrame\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:7;}s:13:\"executeModule\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:8;}s:15:\"executeFunction\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:9;}s:12:\"getLogBuffer\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:10;}}}s:2:\"op\";s:6:\"return\";}");
}
}
class Php_srl_parser_fxt {
public void Clear() {
parser = new Php_srl_parser();
factory = parser.Factory();
} Php_srl_parser parser; Php_srl_factory factory; Bry_bfr tmp_bfr = Bry_bfr_.Reset(255);
public Php_srl_itm itm_nil_() {return factory.Nil();}
public Php_srl_itm itm_bool_n_() {return factory.Bool_n();}
public Php_srl_itm itm_bool_y_() {return factory.Bool_y();}
public Php_srl_itm itm_int_(int v) {return factory.Int(-1, -1, v);}
public Php_srl_itm itm_double_(double v) {return factory.Double(-1, -1, v);}
public Php_srl_itm itm_str_(String v) {return factory.Str(-1, -1, v);}
public Php_srl_itm itm_func_(int v) {return factory.Func(-1, -1, v);}
public Php_srl_itm_ary itm_ary_() {return factory.Ary(-1, -1);}
public Php_srl_itm_kv itm_kvi_(int k, Php_srl_itm v){return factory.Kv().Key_(itm_int_(k)).Val_(v);}
public Php_srl_itm_kv itm_kvs_(String k, Php_srl_itm v){return factory.Kv().Key_(itm_str_(k)).Val_(v);}
public void Test_parse(String raw_str, Php_srl_itm... expd_ary) {
byte[] raw = Bry_.new_u8(raw_str);
Php_srl_itm_ary root = parser.Parse(raw);
Php_srl_itm root_sub = root.Subs_get_at(0).Val();
root_sub.Xto_bfr(tmp_bfr, 0);
String actl = tmp_bfr.To_str_and_clear();
String expd = Xto_str(expd_ary, 0, expd_ary.length);
Tfds.Eq_str_lines(expd, actl, actl);
}
String Xto_str(Php_srl_itm[] ary, int bgn, int end) {
for (int i = bgn; i < end; i++) {
Php_srl_itm itm = ary[i];
itm.Xto_bfr(tmp_bfr, 0);
}
return tmp_bfr.To_str_and_clear();
}
}

View File

@@ -13,3 +13,48 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
interface Php_text_itm {
byte Tid();
int Src_bgn();
int Src_end();
void Bld(Bry_bfr bfr, byte[] src);
}
class Php_text_itm_ {
public static final byte Tid_text = 0, Tid_escaped = 1, Tid_arg = 2, Tid_utf16 = 3;
}
class Php_text_itm_text implements Php_text_itm {
public Php_text_itm_text(int src_bgn, int src_end) {this.src_bgn = src_bgn; this.src_end = src_end;}
public byte Tid() {return Php_text_itm_.Tid_text;}
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public void Bld(Bry_bfr bfr, byte[] src) {bfr.Add_mid(src, src_bgn, src_end);}
}
class Php_text_itm_escaped implements Php_text_itm {
public Php_text_itm_escaped(int src_bgn, int src_end, byte literal) {this.src_bgn = src_bgn; this.src_end = src_end; this.literal = literal;}
public byte Tid() {return Php_text_itm_.Tid_escaped;}
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public byte Literal() {return literal;} private byte literal;
public void Bld(Bry_bfr bfr, byte[] src) {bfr.Add_byte(literal);}
}
class Php_text_itm_utf16 implements Php_text_itm {
public Php_text_itm_utf16(int src_bgn, int src_end, byte[] literal) {this.src_bgn = src_bgn; this.src_end = src_end; this.literal = literal;}
public byte Tid() {return Php_text_itm_.Tid_utf16;}
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public byte[] Literal() {return literal;} private byte[] literal;
public void Bld(Bry_bfr bfr, byte[] src) {bfr.Add(literal);}
}
class Php_text_itm_arg implements Php_text_itm {
public Php_text_itm_arg(int src_bgn, int src_end, int idx) {this.src_bgn = src_bgn; this.src_end = src_end; this.idx = idx;}
public byte Tid() {return Php_text_itm_.Tid_escaped;}
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public int Idx() {return idx;} private int idx;
public void Bld(Bry_bfr bfr, byte[] src) {
bfr.Add_byte(Byte_ascii.Tilde).Add_byte(Byte_ascii.Curly_bgn)
.Add_int_variable(idx - List_adp_.Base1) // php is super 1
.Add_byte(Byte_ascii.Curly_end);
}
}

View File

@@ -13,3 +13,131 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.primitives.*;
public class Php_text_itm_parser {
public static final byte Rslt_orig = 0, Rslt_dirty = 1, Rslt_fmt = 2;
public boolean Quote_is_single() {return quote_is_single;} public Php_text_itm_parser Quote_is_single_(boolean v) {quote_is_single = v; return this;} private boolean quote_is_single;
public byte[] Parse_as_bry(List_adp tmp_list, byte[] raw, Byte_obj_ref rslt_ref, Bry_bfr tmp_bfr) {
Parse(tmp_list, raw, rslt_ref);
byte[] rv = raw;
switch (rslt_ref.Val()) {
case Rslt_orig: break;
case Rslt_dirty:
case Rslt_fmt:
tmp_bfr.Clear();
int tmp_list_len = tmp_list.Count();
for (int i = 0; i < tmp_list_len; i++) {
Php_text_itm itm = (Php_text_itm)tmp_list.Get_at(i);
itm.Bld(tmp_bfr, raw);
}
rv = tmp_bfr.To_bry_and_clear();
break;
}
return rv;
}
public void Parse(List_adp tmp_list, byte[] raw) {
Parse(tmp_list, raw, Byte_obj_ref.zero_());
}
public void Parse(List_adp tmp_list, byte[] raw, Byte_obj_ref rslt) {
tmp_list.Clear();
int raw_len = raw.length; int raw_last = raw_len - 1;
int txt_bgn = -1;
byte rslt_val = Rslt_orig;
for (int i = 0; i < raw_len; i++) {
byte b = raw[i];
switch (b) {
case Byte_ascii.Backslash:
if (txt_bgn != -1) {tmp_list.Add(new Php_text_itm_text(txt_bgn, i)); txt_bgn = -1; rslt_val = Rslt_dirty;}
boolean pos_is_last = i == raw_last;
int next_pos = i + 1;
byte next_char = pos_is_last ? Byte_ascii.Null : raw[next_pos];
if (quote_is_single) { // NOTE: q1 is simpler than q2; REF.MW:http://php.net/manual/en/language.types.String.php; DATE:2014-08-06
switch (next_char) {
case Byte_ascii.Apos: next_char = Byte_ascii.Apos; break;
case Byte_ascii.Backslash: next_char = Byte_ascii.Backslash; break;
default: next_char = Byte_ascii.Null; break;
}
}
else {
if (pos_is_last) throw Err_.new_wo_type("backslash_is_last_char", "raw", String_.new_u8(raw));
switch (next_char) {
case Byte_ascii.Backslash: next_char = Byte_ascii.Backslash; break;
case Byte_ascii.Quote: next_char = Byte_ascii.Quote; break;
case Byte_ascii.Ltr_N:
case Byte_ascii.Ltr_n: next_char = Byte_ascii.Nl; break;
case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_t: next_char = Byte_ascii.Tab; break;
case Byte_ascii.Ltr_R:
case Byte_ascii.Ltr_r: next_char = Byte_ascii.Cr; break;
case Byte_ascii.Ltr_U:
case Byte_ascii.Ltr_u: { // EX: "\u007C"
rslt_val = Rslt_dirty;
Parse_utf16(tmp_list, raw, next_pos + 1, raw_len); // +1 to skip u
i = next_pos + 4; // +4 to skip utf16 seq; EX: \u007C; +4 for 007C
continue;
}
case Byte_ascii.Ltr_X:
case Byte_ascii.Ltr_x: { // EX: "\xc2"
rslt_val = Rslt_dirty;
byte[] literal = Bry_.Add(CONST_utf_prefix, Bry_.Mid(raw, next_pos + 1, next_pos + 3));
tmp_list.Add(new Php_text_itm_utf16(i, i + 4, literal));
i = next_pos + 2; // +2 to skip rest; EX: \xc2; +2 for c2
continue;
}
default: next_char = Byte_ascii.Null; break;
}
}
if (next_char == Byte_ascii.Null) {
if (txt_bgn == -1) txt_bgn = i;
}
else {
tmp_list.Add(new Php_text_itm_escaped(i, next_pos, next_char)); rslt_val = Rslt_dirty;
i = next_pos;
}
break;
case Byte_ascii.Dollar:
if (txt_bgn != -1) {tmp_list.Add(new Php_text_itm_text(txt_bgn, i)); txt_bgn = -1;}
if (i == raw_last) {
//throw Err_mgr.Instance.fmt_auto_(GRP_KEY, "dollar_is_last_char", String_.new_u8(raw));
}
int int_end = Find_fwd_non_int(raw, i + 1, raw_len); // +1 to search after $
int int_val = Bry_.To_int_or(raw, i + 1, int_end, -1); // +1 to search after $
if (int_val == -1) {
tmp_list.Add(new Php_text_itm_text(i, i + 1));
continue;
}
//throw Err_mgr.Instance.fmt_auto_(GRP_KEY, "invalid_arg", String_.new_u8(raw));
tmp_list.Add(new Php_text_itm_arg(i, int_end, int_val));
rslt_val = Rslt_fmt;
i = int_end - 1; // -1 b/c i++ in for loop
break;
default:
if (txt_bgn == -1) txt_bgn = i;
break;
}
}
if (txt_bgn != -1) {tmp_list.Add(new Php_text_itm_text(txt_bgn, raw_len)); txt_bgn = -1; rslt_val = Rslt_dirty;}
rslt.Val_(rslt_val);
} private static final byte[] CONST_utf_prefix = Bry_.new_a7("\\u00");
private void Parse_utf16(List_adp rv, byte[] src, int bgn, int src_len) {
int end = bgn + 4;
if (end >= src_len) throw Err_.new_wo_type("utf16_parse", "src", String_.new_u8(src));
int v = Int_.By_hex_bry(src, bgn, end); // +2; skip "\" + "u"
byte[] literal = gplx.core.intls.Utf16_.Encode_int_to_bry(v);
rv.Add(new Php_text_itm_utf16(bgn, end, literal));
}
public static int Find_fwd_non_int(byte[] src, int bgn, int end) {
for (int i = bgn; i < end; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
break;
default:
return i;
}
}
return end;
}
}

View File

@@ -13,3 +13,39 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Php_text_itm_tst {
@Before public void init() {fxt.Clear();} private Php_text_itm_fxt fxt = new Php_text_itm_fxt();
@Test public void Q1_basic() {fxt.Init_q1().Test_parse("abcde" , "abcde");}
@Test public void Q1_apos() {fxt.Init_q1().Test_parse("a\\'b" , "a'b");}
@Test public void Q1_backslash() {fxt.Init_q1().Test_parse("a\\\\b" , "a\\b");}
@Test public void Q1_backslash_eos() {fxt.Init_q1().Test_parse("a\\" , "a\\");} // PURPOSE: allow single trailing backslash; DATE:2014-08-06
@Test public void Q1_noop() {fxt.Init_q1().Test_parse("a\\$\\nb" , "a\\$\\nb");}
@Test public void Q2_basic() {fxt.Init_q2().Test_parse("abcde" , "abcde");}
@Test public void Q2_quote() {fxt.Init_q2().Test_parse("a\\\"b" , "a\"b");}
@Test public void Q2_backslash() {fxt.Init_q2().Test_parse("a\\\\b" , "a\\b");}
@Test public void Q2_noop() {fxt.Init_q2().Test_parse("a\\%\\cb" , "a\\%\\cb");}
@Test public void Q2_ws() {fxt.Init_q2().Test_parse("a\\tb\\nc" , "a\tb\nc");}
@Test public void Q2_fmt() {fxt.Init_q2().Test_parse("a$1b$2c" , "a~{0}b~{1}c");}
@Test public void Q2_utf_pipe() {fxt.Init_q2().Test_parse("a\\u007Cd" , "a|d");}
@Test public void Q2_hex_nbsp() {fxt.Init_q2().Test_parse("a\\xc2\\xa0d" , "a\\u00c2\\u00a0d");}
}
class Php_text_itm_fxt {
private Php_text_itm_parser parser;
public void Clear() {parser = new Php_text_itm_parser();}
public Php_text_itm_fxt Init_q1() {parser.Quote_is_single_(Bool_.Y); return this;}
public Php_text_itm_fxt Init_q2() {parser.Quote_is_single_(Bool_.N); return this;}
public void Test_parse(String raw_str, String expd) {
List_adp list = List_adp_.New();
byte[] raw = Bry_.new_u8(raw_str);
parser.Parse(list, raw);
Bry_bfr bfr = Bry_bfr_.Reset(255);
int list_len = list.Count();
for (int i = 0; i < list_len; i++) {
Php_text_itm itm = (Php_text_itm)list.Get_at(i);
itm.Bld(bfr, raw);
}
Tfds.Eq(expd, bfr.To_str_and_clear());
}
}

View File

@@ -13,3 +13,60 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public interface Php_tkn {
byte Tkn_tid();
int Src_bgn();
int Src_end();
}
class Php_tkn_ {
public static final byte Tid_txt = 1, Tid_declaration = 2, Tid_ws = 3, Tid_comment = 4, Tid_var = 5, Tid_eq = 6, Tid_eq_kv = 7, Tid_semic = 8, Tid_comma = 9, Tid_paren_bgn = 10, Tid_paren_end = 11, Tid_null = 12, Tid_false = 13, Tid_true = 14, Tid_ary = 15, Tid_num = 16, Tid_quote = 17, Tid_brack_bgn = 18, Tid_brack_end = 19;
public static String Xto_str(byte tid) {return Byte_.To_str(tid);}
}
abstract class Php_tkn_base implements Php_tkn {
public abstract byte Tkn_tid();
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} public void Src_end_(int v) {this.src_end = v;} private int src_end;
public void Src_rng_(int src_bgn, int src_end) {this.src_bgn = src_bgn; this.src_end = src_end;}
}
class Php_tkn_generic extends Php_tkn_base {
public Php_tkn_generic(int src_bgn, int src_end, byte tid) {this.Src_rng_(src_bgn, src_end); this.tid = tid;}
@Override public byte Tkn_tid() {return tid;} private byte tid;
}
class Php_tkn_txt extends Php_tkn_base {
public Php_tkn_txt(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_txt;}
}
class Php_tkn_ws extends Php_tkn_base {
public Php_tkn_ws(int src_bgn, int src_end, byte ws_tid) {this.Src_rng_(src_bgn, src_end); this.ws_tid = ws_tid;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_ws;}
public byte Ws_tid() {return ws_tid;} private byte ws_tid;
public static final byte Tid_space = 0, Tid_nl = 1, Tid_tab = 2, Tid_cr = 3;
}
class Php_tkn_comment extends Php_tkn_base {
public Php_tkn_comment(int src_bgn, int src_end, byte comment_tid) {this.Src_rng_(src_bgn, src_end); this.comment_tid = comment_tid;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_comment;}
public byte Comment_tid() {return comment_tid;} private byte comment_tid;
public static final byte Tid_null = 0, Tid_mult = 1, Tid_slash = 2, Tid_hash = 3;
}
class Php_tkn_var extends Php_tkn_base {
public Php_tkn_var(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_var;}
public byte[] Var_name(byte[] src) {return Bry_.Mid(src, this.Src_bgn() + 1, this.Src_end());} // NOTE: assume vars are of form $abc; +1 to skip first $
}
class Php_tkn_num extends Php_tkn_base {
public Php_tkn_num(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_num;}
public int Num_val_int(byte[] src) {return Bry_.To_int_or(src, this.Src_bgn(), this.Src_end(), Int_.Min_value);}
}
class Php_tkn_quote extends Php_tkn_base {
public Php_tkn_quote(int src_bgn, int src_end, byte quote_tid) {this.Src_rng_(src_bgn, src_end); this.quote_tid = quote_tid;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_quote;}
public byte Quote_tid() {return quote_tid;} private byte quote_tid;
public byte[] Quote_text(byte[] src) {return Bry_.Mid(src, this.Src_bgn() + 1, this.Src_end() - 1);} // NOTE: assume quote are of form 'abc'; +1, -1 to skip flanking chars
public static final byte Tid_null = 0, Tid_mult = 1, Tid_slash = 2, Tid_hash = 3;
}
class Php_tkn_declaration extends Php_tkn_base {
@Override public byte Tkn_tid() {return Php_tkn_.Tid_declaration;}
public static final Php_tkn_declaration Instance = new Php_tkn_declaration();
}

View File

@@ -13,3 +13,14 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
class Php_tkn_factory {
public Php_tkn_generic Generic(int bgn, int end, byte tid) {return new Php_tkn_generic(bgn, end, tid);}
public Php_tkn_txt Txt(int bgn, int end) {return new Php_tkn_txt(bgn, end);}
public Php_tkn Declaration(int bgn, int end) {return Php_tkn_declaration.Instance;}
public Php_tkn_ws Ws(int bgn, int end, byte ws_tid) {return new Php_tkn_ws(bgn, end, ws_tid);}
public Php_tkn_var Var(int bgn, int end) {return new Php_tkn_var(bgn, end);}
public Php_tkn_num Num(int bgn, int end) {return new Php_tkn_num(bgn, end);}
public Php_tkn_comment Comment(int bgn, int end, byte comment_tid) {return new Php_tkn_comment(bgn, end, comment_tid);}
public Php_tkn_quote Quote(int bgn, int end, byte quote_tid) {return new Php_tkn_quote(bgn, end, quote_tid);}
}

View File

@@ -13,3 +13,22 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.log_msgs.*;
public interface Php_tkn_wkr {
void Init(Php_ctx ctx);
void Process(Php_tkn tkn);
void Msg_many(byte[] src, int bgn, int end, Gfo_msg_itm itm, Object... args);
}
class Php_tkn_wkr_tkn implements Php_tkn_wkr {
public void Init(Php_ctx ctx) {}
public List_adp List() {return lines;} List_adp lines = List_adp_.New();
public Gfo_msg_log Msg_log() {return msg_log;} Gfo_msg_log msg_log = new Gfo_msg_log("gplx.langs.phps");
public void Clear() {lines.Clear(); msg_log.Clear();}
public void Process(Php_tkn tkn) {
lines.Add(tkn);
}
public void Msg_many(byte[] src, int bgn, int end, Gfo_msg_itm itm, Object... args) {
msg_log.Add_itm_many(itm, src, bgn, end, args);
}
}