1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-09-20 23:43:51 -04:00
parent 5fe27b5b3b
commit fa70c05354
1056 changed files with 8375 additions and 7095 deletions

View File

@@ -0,0 +1,22 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.dsvs; import gplx.*; import gplx.langs.*;
public interface Dsv_fld_parser {
void Init(byte fld_dlm, byte row_dlm);
int Parse(Dsv_tbl_parser tbl_parser, Dsv_wkr_base mgr, byte[] src, int pos, int src_len, int fld_idx, int fld_bgn);
}

View File

@@ -0,0 +1,114 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.dsvs; import gplx.*; import gplx.langs.*;
public class Dsv_fld_parser_ {
public static final Dsv_fld_parser Bry_parser = Dsv_fld_parser_bry._;
public static final Dsv_fld_parser Int_parser = Dsv_fld_parser_int._;
public static final Dsv_fld_parser Line_parser__comment_is_pipe = new Dsv_fld_parser_line(Byte_ascii.Pipe);
public static Err err_fld_unhandled(Dsv_fld_parser parser, Dsv_wkr_base wkr, int fld_idx, byte[] src, int bgn, int end) {
throw Err_.new_wo_type("fld unhandled", "parser", Type_adp_.NameOf_obj(parser), "wkr", Type_adp_.NameOf_obj(wkr), "fld_idx", fld_idx, "val", String_.new_u8(src, bgn, end)).Trace_ignore_add_1_();
}
}
class Dsv_fld_parser_line implements Dsv_fld_parser {
private byte row_dlm = Byte_ascii.Nl; private final byte comment_dlm;
public Dsv_fld_parser_line(byte comment_dlm) {this.comment_dlm = comment_dlm;}
public void Init(byte fld_dlm, byte row_dlm) {
this.row_dlm = row_dlm;
}
public int Parse(Dsv_tbl_parser parser, Dsv_wkr_base wkr, byte[] src, int pos, int src_len, int fld_idx, int fld_bgn) {
while (true) {
boolean pos_is_last = pos == src_len;
byte b = pos_is_last ? row_dlm : src[pos];
if (b == comment_dlm) {
pos = Bry_find_.Find_fwd_until(src, pos, src_len, row_dlm);
if (pos == Bry_find_.Not_found)
pos = src_len;
}
else if (b == row_dlm) {
boolean pass = wkr.Write_bry(parser, fld_idx, src, fld_bgn, pos);
if (!pass) throw Dsv_fld_parser_.err_fld_unhandled(this, wkr, fld_idx, src, fld_bgn, pos);
wkr.Commit_itm(parser, pos);
int rv = pos + 1; // row_dlm is always 1 byte
parser.Update_by_row(rv);
return rv;
}
else
++pos;
}
}
}
class Dsv_fld_parser_bry implements Dsv_fld_parser {
private byte fld_dlm = Byte_ascii.Pipe, row_dlm = Byte_ascii.Nl;
public void Init(byte fld_dlm, byte row_dlm) {
this.fld_dlm = fld_dlm; this.row_dlm = row_dlm;
}
public int Parse(Dsv_tbl_parser parser, Dsv_wkr_base wkr, byte[] src, int pos, int src_len, int fld_idx, int fld_bgn) {
while (true) {
boolean pos_is_last = pos == src_len;
byte b = pos_is_last ? row_dlm : src[pos];
if (b == fld_dlm) {
boolean pass = wkr.Write_bry(parser, fld_idx, src, fld_bgn, pos);
if (!pass) throw Dsv_fld_parser_.err_fld_unhandled(this, wkr, fld_idx, src, fld_bgn, pos);
int rv = pos + 1; // fld_dlm is always 1 byte
parser.Update_by_fld(rv);
return rv;
}
else if (b == row_dlm) {
boolean pass = wkr.Write_bry(parser, fld_idx, src, fld_bgn, pos);
if (!pass) throw Dsv_fld_parser_.err_fld_unhandled(this, wkr, fld_idx, src, fld_bgn, pos);
wkr.Commit_itm(parser, pos);
int rv = pos + 1; // row_dlm is always 1 byte
parser.Update_by_row(rv);
return rv;
}
else
++pos;
}
}
public static final Dsv_fld_parser_bry _ = new Dsv_fld_parser_bry(); Dsv_fld_parser_bry() {}
}
class Dsv_fld_parser_int implements Dsv_fld_parser {
private byte fld_dlm = Byte_ascii.Pipe, row_dlm = Byte_ascii.Nl;
public void Init(byte fld_dlm, byte row_dlm) {
this.fld_dlm = fld_dlm; this.row_dlm = row_dlm;
}
public int Parse(Dsv_tbl_parser parser, Dsv_wkr_base wkr, byte[] src, int pos, int src_len, int fld_idx, int fld_bgn) {
while (true) {
boolean pos_is_last = pos == src_len;
byte b = pos_is_last ? row_dlm : src[pos];
if (b == fld_dlm) {
boolean pass = wkr.Write_int(parser, fld_idx, pos, Bry_.To_int_or(src, fld_bgn, pos, -1));
if (!pass) throw Dsv_fld_parser_.err_fld_unhandled(this, wkr, fld_idx, src, fld_bgn, pos);
int rv = pos + 1; // fld_dlm is always 1 byte
parser.Update_by_fld(rv);
return rv;
}
else if (b == row_dlm) {
boolean pass = wkr.Write_int(parser, fld_idx, pos, Bry_.To_int_or(src, fld_bgn, pos, -1));
if (!pass) throw Dsv_fld_parser_.err_fld_unhandled(this, wkr, fld_idx, src, fld_bgn, pos);
wkr.Commit_itm(parser, pos);
int rv = pos + 1; // row_dlm is always 1 byte
parser.Update_by_row(rv);
return rv;
}
else
++pos;
}
}
public static final Dsv_fld_parser_int _ = new Dsv_fld_parser_int(); Dsv_fld_parser_int() {}
}

View File

@@ -0,0 +1,79 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.dsvs; import gplx.*; import gplx.langs.*;
public class Dsv_tbl_parser implements GfoInvkAble, RlsAble {
private Dsv_wkr_base mgr;
private Dsv_fld_parser[] fld_parsers = new Dsv_fld_parser[2];
public byte[] Src() {return src;} private byte[] src;
public int Fld_bgn() {return fld_bgn;} private int fld_bgn = 0;
public int Fld_idx() {return fld_idx;} private int fld_idx = 0;
public int Row_bgn() {return row_bgn;} private int row_bgn = 0;
public int Row_idx() {return row_idx;} private int row_idx = 0;
public boolean Skip_blank_lines() {return skip_blank_lines;} public Dsv_tbl_parser Skip_blank_lines_(boolean v) {skip_blank_lines = v; return this;} private boolean skip_blank_lines = true;
public byte Fld_dlm() {return fld_dlm;} public Dsv_tbl_parser Fld_dlm_(byte v) {fld_dlm = v; return this;} private byte fld_dlm = Byte_ascii.Pipe;
public byte Row_dlm() {return row_dlm;} public Dsv_tbl_parser Row_dlm_(byte v) {row_dlm = v; return this;} private byte row_dlm = Byte_ascii.Nl;
public void Init(Dsv_wkr_base mgr, Dsv_fld_parser... fld_parsers) {
this.mgr = mgr;
this.fld_parsers = fld_parsers;
int fld_parsers_len = fld_parsers.length;
for (int i = 0; i < fld_parsers_len; i++)
fld_parsers[i].Init(fld_dlm, row_dlm);
}
public void Clear() {
fld_bgn = fld_idx = row_bgn = row_idx = 0;
}
public Err Err_row_bgn(String fmt, int pos) {
return Err_.new_wo_type(fmt, "line", String_.new_u8(src, row_bgn, pos)).Trace_ignore_add_1_();
}
public void Update_by_fld(int pos) {
fld_bgn = pos;
++fld_idx;
}
public void Update_by_row(int pos) {
row_bgn = fld_bgn = pos;
++row_idx;
fld_idx = 0;
}
public void Parse(byte[] src) {
this.src = src;
int src_len = src.length;
int pos = 0;
while (pos < src_len) {
if (fld_idx == 0 && skip_blank_lines) { // row committed; skip blank lines
while (pos < src_len) {
if (src[pos] == row_dlm) {
++pos;
row_bgn = fld_bgn = pos;
}
else
break;
}
}
Dsv_fld_parser fld_parser = fld_parsers[fld_idx];
pos = fld_parser.Parse(this, mgr, src, pos, src_len, fld_idx, fld_bgn);
}
}
public void Rls() {
src = null; fld_parsers = null; mgr = null;
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_load_by_str)) Parse(m.ReadBry("v"));
else return GfoInvkAble_.Rv_unhandled;
return this;
} private static final String Invk_load_by_str = "load_by_str";
}

View File

@@ -0,0 +1,64 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.dsvs; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Dsv_tbl_parser_int_tst {
private Dsv_mok_fxt fxt = new Dsv_mok_fxt();
@Test public void Basic() {
fxt .Test_load(String_.Concat_lines_nl_skip_last
( "a|1|3"
, "b|2|4"
)
, fxt.mgr_int_()
, fxt.itm_int_("a", 1, 3)
, fxt.itm_int_("b", 2, 4)
);
}
}
class Mok_int_itm implements To_str_able {
private String fld_0;
private int fld_1, fld_2;
public Mok_int_itm(String fld_0, int fld_1, int fld_2) {this.fld_0 = fld_0; this.fld_1 = fld_1; this.fld_2 = fld_2;}
public String To_str() {return String_.Concat_with_str("|", fld_0, Int_.Xto_str(fld_1), Int_.Xto_str(fld_2));}
}
class Mok_int_mgr extends Mok_mgr_base {
public void Clear() {itms.Clear();}
@Override public To_str_able[] Itms() {return (To_str_able[])itms.To_ary(To_str_able.class);} private List_adp itms = List_adp_.new_();
private String fld_0;
private int fld_1, fld_2;
@Override public Dsv_fld_parser[] Fld_parsers() {
return new Dsv_fld_parser[] {Dsv_fld_parser_bry._, Dsv_fld_parser_int._, Dsv_fld_parser_int._};
}
@Override public boolean Write_bry(Dsv_tbl_parser parser, int fld_idx, byte[] src, int bgn, int end) {
switch (fld_idx) {
case 0: fld_0 = String_.new_u8(src, bgn, end); return true;
default: return false;
}
}
@Override public boolean Write_int(Dsv_tbl_parser parser, int fld_idx, int pos, int val_int) {
switch (fld_idx) {
case 1: fld_1 = val_int; return true;
case 2: fld_2 = val_int; return true;
default: return false;
}
}
@Override public void Commit_itm(Dsv_tbl_parser parser, int pos) {
Mok_int_itm itm = new Mok_int_itm(fld_0, fld_1, fld_2);
itms.Add(itm);
}
}

View File

@@ -0,0 +1,102 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.dsvs; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Dsv_tbl_parser_str_tst {
private Dsv_mok_fxt fxt = new Dsv_mok_fxt();
@Test public void Basic() {
fxt .Test_load(String_.Concat_lines_nl_skip_last
( "a|A"
, "b|B"
)
, fxt.mgr_str_(2)
, fxt.itm_str_("a", "A")
, fxt.itm_str_("b", "B")
);
}
@Test public void Blank_lines() {
fxt .Test_load(String_.Concat_lines_nl_skip_last
( ""
, "a|A"
, ""
, "b|B"
, ""
)
, fxt.mgr_str_(2)
, fxt.itm_str_("a", "A")
, fxt.itm_str_("b", "B")
);
}
@Test public void Incomplete_row() {
fxt .Test_load(String_.Concat_lines_nl_skip_last
( "a"
, "b"
, ""
)
, fxt.mgr_str_(2)
, fxt.itm_str_("a")
, fxt.itm_str_("b")
);
}
}
abstract class Mok_mgr_base extends Dsv_wkr_base {
public abstract To_str_able[] Itms();
}
class Dsv_mok_fxt {
private Dsv_tbl_parser tbl_parser = new Dsv_tbl_parser();
public Dsv_mok_fxt Clear() {
tbl_parser.Clear();
return this;
}
public Mok_mgr_base mgr_int_() {return new Mok_int_mgr();}
public Mok_mgr_base mgr_str_(int len) {return new Mok_str_mgr(len);}
public Mok_str_itm itm_str_(String... flds) {return new Mok_str_itm(flds);}
public Mok_int_itm itm_int_(String fld_0, int fld_1, int fld_2) {return new Mok_int_itm(fld_0, fld_1, fld_2);}
public void Test_load(String src, Mok_mgr_base mgr, To_str_able... expd) {
mgr.Load_by_bry(Bry_.new_u8(src));
Tfds.Eq_ary_str(expd, mgr.Itms());
}
}
class Mok_str_itm implements To_str_able {
private String[] flds;
public Mok_str_itm(String[] flds) {this.flds = flds;}
public String To_str() {return String_.Concat_with_str("|", flds);}
}
class Mok_str_mgr extends Mok_mgr_base {
private int flds_len;
public Mok_str_mgr(int flds_len) {
this.flds_len = flds_len;
}
public void Clear() {itms.Clear();}
@Override public To_str_able[] Itms() {return (To_str_able[])itms.To_ary(To_str_able.class);} private List_adp itms = List_adp_.new_();
private List_adp flds = List_adp_.new_();
@Override public boolean Write_bry(Dsv_tbl_parser parser, int fld_idx, byte[] src, int bgn, int end) {
flds.Add(String_.new_u8(src, bgn, end));
return true;
}
@Override public Dsv_fld_parser[] Fld_parsers() {
Dsv_fld_parser[] rv = new Dsv_fld_parser[flds_len];
for (int i = 0; i < flds_len; i++)
rv[i] = Dsv_fld_parser_.Bry_parser;
return rv;
}
@Override public void Commit_itm(Dsv_tbl_parser parser, int pos) {
Mok_str_itm itm = new Mok_str_itm((String[])flds.To_ary_and_clear(String.class));
itms.Add(itm);
}
}

View File

@@ -0,0 +1,42 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.dsvs; import gplx.*; import gplx.langs.*;
public abstract class Dsv_wkr_base implements GfoInvkAble {
public abstract Dsv_fld_parser[] Fld_parsers();
public byte[] Src() {return src;} private byte[] src;
public abstract void Commit_itm(Dsv_tbl_parser parser, int pos);
@gplx.Virtual public boolean Write_bry(Dsv_tbl_parser parser, int fld_idx, byte[] src, int bgn, int end) {return false;}
@gplx.Virtual public boolean Write_int(Dsv_tbl_parser parser, int fld_idx, int pos, int val_int) {return false;}
public void Load_by_bry(byte[] src) {
this.src = src;
Dsv_tbl_parser tbl_parser = new Dsv_tbl_parser(); // NOTE: this proc should only be called once, so don't bother caching tbl_parser
tbl_parser.Init(this, this.Fld_parsers());
Load_by_bry_bgn();
tbl_parser.Parse(src);
tbl_parser.Rls();
Load_by_bry_end();
}
@gplx.Virtual public void Load_by_bry_bgn() {}
@gplx.Virtual public void Load_by_bry_end() {}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_load_by_str)) Load_by_bry(m.ReadBry("v"));
else return GfoInvkAble_.Rv_unhandled;
return this;
}
public static final String Invk_load_by_str = "load_by_str";
}

View File

@@ -0,0 +1,214 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.gfss; import gplx.*; import gplx.langs.*;
interface Gfs_lxr {
int Lxr_tid();
int Process(Gfs_parser_ctx ctx, int bgn, int end);
}
class Gfs_lxr_whitespace implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_whitespace;}
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
byte[] src = ctx.Src(); int src_len = ctx.Src_len();
int rv = Gfs_lxr_.Rv_eos, cur_pos;
for (cur_pos = end; cur_pos < src_len; cur_pos++) {
byte b = src[cur_pos];
Object o = ctx.Trie().Match_bgn_w_byte(b, src, cur_pos, src_len);
if (o == null) {
rv = Gfs_lxr_.Rv_null;
ctx.Process_null(cur_pos);
break;
}
else {
Gfs_lxr lxr = (Gfs_lxr)o;
if (lxr.Lxr_tid() == Gfs_lxr_.Tid_whitespace) {}
else {
rv = Gfs_lxr_.Rv_lxr;
ctx.Process_lxr(cur_pos, lxr);
break;
}
}
}
return rv;
}
public static final Gfs_lxr_whitespace _ = new Gfs_lxr_whitespace(); Gfs_lxr_whitespace() {}
}
class Gfs_lxr_comment_flat implements Gfs_lxr {
public Gfs_lxr_comment_flat(byte[] bgn_bry, byte[] end_bry) {
this.bgn_bry = bgn_bry; this.bgn_bry_len = bgn_bry.length;
this.end_bry = end_bry; this.end_bry_len = end_bry.length;
} byte[] bgn_bry, end_bry; int bgn_bry_len, end_bry_len;
public int Lxr_tid() {return Gfs_lxr_.Tid_comment;}
public int Process(Gfs_parser_ctx ctx, int lxr_bgn, int lxr_end) {
byte[] src = ctx.Src(); int src_len = ctx.Src_len();
int end_pos = Bry_find_.Find_fwd(src, end_bry, lxr_end, src_len);
// if (end_pos == Bry_.NotFound) throw Err_.new_fmt_("comment is not closed: {0}", String_.new_u8(end_bry));
return (end_pos == Bry_.NotFound)
? src_len // allow eos to terminate flat comment; needed for "tidy-always-adds-nl-in-textarea" fix; NOTE: DATE:2014-06-21
: end_pos + end_bry_len; // position after end_bry
}
}
class Gfs_lxr_identifier implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_identifier;}
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
byte[] src = ctx.Src(); int src_len = ctx.Src_len();
int pos, rv = Gfs_lxr_.Rv_eos;
for (pos = end; pos < src_len; pos++) {
byte b = src[pos];
Object o = ctx.Trie().Match_bgn_w_byte(b, src, pos, src_len);
if (o == null) { // invalid char; stop;
rv = Gfs_lxr_.Rv_null;
ctx.Process_null(pos);
break;
}
else {
Gfs_lxr lxr = (Gfs_lxr)o;
if (lxr.Lxr_tid() == Gfs_lxr_.Tid_identifier) {} // still an identifier; continue
else { // new lxr (EX: "." in "abc."); (a) hold word of "abc"; mark "." as new lxr;
ctx.Hold_word(bgn, pos);
rv = Gfs_lxr_.Rv_lxr;
ctx.Process_lxr(pos, lxr);
break;
}
}
}
if (rv == Gfs_lxr_.Rv_eos) ctx.Process_eos(); // eos
return rv;
}
public static final Gfs_lxr_identifier _ = new Gfs_lxr_identifier(); Gfs_lxr_identifier() {}
}
class Gfs_lxr_semic implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_semic;}
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
switch (ctx.Prv_lxr()) {
case Gfs_lxr_.Tid_identifier: ctx.Make_nde(bgn, end); ctx.Cur_nde_from_stack(); break; // a;
case Gfs_lxr_.Tid_quote:
case Gfs_lxr_.Tid_paren_end: ctx.Cur_nde_from_stack(); break; // a();
case Gfs_lxr_.Tid_semic: break; // a;; ignore;
default: ctx.Err_mgr().Fail_invalid_lxr(ctx, bgn, this.Lxr_tid(), Byte_ascii.Semic); break;
}
return end;
}
public static final Gfs_lxr_semic _ = new Gfs_lxr_semic(); Gfs_lxr_semic() {}
}
class Gfs_lxr_dot implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_dot;}
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
switch (ctx.Prv_lxr()) {
case Gfs_lxr_.Tid_identifier: ctx.Make_nde(bgn, end); break; // a.
case Gfs_lxr_.Tid_paren_end: break; // a().
default: ctx.Err_mgr().Fail_invalid_lxr(ctx, bgn, this.Lxr_tid(), Byte_ascii.Dot); break;
}
return end;
}
public static final Gfs_lxr_dot _ = new Gfs_lxr_dot(); Gfs_lxr_dot() {}
}
class Gfs_lxr_paren_bgn implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_paren_bgn;}
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
switch (ctx.Prv_lxr()) {
case Gfs_lxr_.Tid_identifier: ctx.Make_nde(bgn, end); break; // a(;
default: ctx.Err_mgr().Fail_invalid_lxr(ctx, bgn, this.Lxr_tid(), Byte_ascii.Paren_bgn); break;
}
return end;
}
public static final Gfs_lxr_paren_bgn _ = new Gfs_lxr_paren_bgn(); Gfs_lxr_paren_bgn() {}
}
class Gfs_lxr_paren_end implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_paren_end;}
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
switch (ctx.Prv_lxr()) {
case Gfs_lxr_.Tid_paren_bgn:
case Gfs_lxr_.Tid_quote: break; // "))", "abc)", "'abc')"
case Gfs_lxr_.Tid_identifier: ctx.Make_atr_by_idf(); break; // 123)
default: ctx.Err_mgr().Fail_invalid_lxr(ctx, bgn, this.Lxr_tid(), Byte_ascii.Paren_end); break;
}
return end;
}
public static final Gfs_lxr_paren_end _ = new Gfs_lxr_paren_end(); Gfs_lxr_paren_end() {}
}
class Gfs_lxr_quote implements Gfs_lxr {
public Gfs_lxr_quote(byte[] bgn_bry, byte[] end_bry) {
this.bgn_bry_len = bgn_bry.length;
this.end_bry = end_bry; this.end_bry_len = end_bry.length;
} private byte[] end_bry; private int bgn_bry_len, end_bry_len;
public int Lxr_tid() {return Gfs_lxr_.Tid_quote;}
public int Process(Gfs_parser_ctx ctx, int lxr_bgn, int lxr_end) {
byte[] src = ctx.Src(); int src_len = ctx.Src_len();
int end_pos = Bry_find_.Find_fwd(src, end_bry, lxr_end, src_len);
if (end_pos == Bry_.NotFound) throw Err_.new_wo_type("quote is not closed", "end", String_.new_u8(end_bry));
Bry_bfr bfr = ctx.Tmp_bfr().Clear();
int prv_pos = lxr_end;
int nxt_pos = end_pos + end_bry_len;
if (Bry_.Match(src, nxt_pos, nxt_pos + end_bry_len, end_bry)) { // end_bry is doubled; EX: end_bry = ' and raw = a''
while (true) {
bfr.Add_mid(src, prv_pos, end_pos); // add everything up to end_bry
bfr.Add(end_bry); // add end_bry
prv_pos = nxt_pos + end_bry_len; // set prv_pos to after doubled end_bry
end_pos = Bry_find_.Find_fwd(src, end_bry, prv_pos, src_len);
if (end_pos == Bry_.NotFound) throw Err_.new_wo_type("quote is not closed", "end", String_.new_u8(end_bry));
nxt_pos = end_pos + end_bry_len;
if (!Bry_.Match(src, nxt_pos, nxt_pos + end_bry_len, end_bry)) {
bfr.Add_mid(src, prv_pos, end_pos);
break;
}
}
ctx.Make_atr_by_bry(lxr_bgn + bgn_bry_len, end_pos, bfr.Xto_bry_and_clear());
}
else
ctx.Make_atr(lxr_bgn + bgn_bry_len, end_pos);
return end_pos + end_bry_len; // position after quote
}
}
class Gfs_lxr_curly_bgn implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_curly_bgn;}
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
switch (ctx.Prv_lxr()) {
case Gfs_lxr_.Tid_identifier: ctx.Make_nde(bgn, end); ctx.Stack_add(); break; // a{;
case Gfs_lxr_.Tid_paren_end: ctx.Stack_add(); break; // a(){; NOTE: node exists but needs to be pushed onto stack
default: ctx.Err_mgr().Fail_invalid_lxr(ctx, bgn, this.Lxr_tid(), Byte_ascii.Curly_bgn); break;
}
return end;
}
public static final Gfs_lxr_curly_bgn _ = new Gfs_lxr_curly_bgn(); Gfs_lxr_curly_bgn() {}
}
class Gfs_lxr_curly_end implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_curly_end;}
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
ctx.Stack_pop(bgn);
return end;
}
public static final Gfs_lxr_curly_end _ = new Gfs_lxr_curly_end(); Gfs_lxr_curly_end() {}
}
class Gfs_lxr_equal implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_eq;}
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
ctx.Make_nde(bgn, end).Op_tid_(Gfs_nde.Op_tid_assign);
return end;
}
public static final Gfs_lxr_equal _ = new Gfs_lxr_equal(); Gfs_lxr_equal() {}
}
class Gfs_lxr_comma implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_comma;}
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
switch (ctx.Prv_lxr()) {
case Gfs_lxr_.Tid_identifier: ctx.Make_atr_by_idf(); break; // 123,
}
return end;
}
public static final Gfs_lxr_comma _ = new Gfs_lxr_comma(); Gfs_lxr_comma() {}
}

View File

@@ -0,0 +1,39 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.gfss; import gplx.*; import gplx.langs.*;
class Gfs_lxr_ {
public static final int Rv_init = -1, Rv_null = -2, Rv_eos = -3, Rv_lxr = -4;
public static final int Tid_identifier = 1, Tid_dot = 2, Tid_semic = 3, Tid_paren_bgn = 4, Tid_paren_end = 5, Tid_curly_bgn = 6, Tid_curly_end = 7, Tid_quote = 8, Tid_comma = 9, Tid_whitespace = 10, Tid_comment = 11, Tid_eq = 12;
public static String Tid__name(int tid) {
switch (tid) {
case Tid_identifier: return "identifier";
case Tid_dot: return "dot";
case Tid_semic: return "semic";
case Tid_paren_bgn: return "paren_bgn";
case Tid_paren_end: return "paren_end";
case Tid_curly_bgn: return "curly_bgn";
case Tid_curly_end: return "curly_end";
case Tid_quote: return "quote";
case Tid_comma: return "comma";
case Tid_whitespace: return "whitespace";
case Tid_comment: return "comment";
case Tid_eq: return "eq";
default: throw Err_.new_unhandled(tid);
}
}
}

View File

@@ -0,0 +1,49 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.gfss; import gplx.*; import gplx.langs.*;
public class Gfs_msg_bldr implements GfoMsgParser {
Gfs_parser parser = new Gfs_parser();
public GfoMsg ParseToMsg(String s) {return Bld(s);}
public GfoMsg Bld(String src) {return Bld(Bry_.new_u8(src));}
public GfoMsg Bld(byte[] src) {
Gfs_nde nde = parser.Parse(src);
return Bld_msg(src, nde);
}
GfoMsg Bld_msg(byte[] src, Gfs_nde nde) {
boolean op_is_assign = (nde.Op_tid() == Gfs_nde.Op_tid_assign);
String name = String_.new_u8(nde.Name_bry(src));
if (op_is_assign) name += Tkn_mutator;
GfoMsg rv = GfoMsg_.new_parse_(name);
int len = nde.Atrs_len();
for (int i = 0; i < len; i++) {
Gfs_nde atr = nde.Atrs_get_at(i);
rv.Add("", String_.new_u8(atr.Name_bry(src)));
}
len = nde.Subs_len();
for (int i = 0; i < len; i++) {
Gfs_nde sub = nde.Subs_get_at(i);
if (op_is_assign) // NOTE: for now (a) assignss cannot be nested; EX: "a.b = c;" is okay but "a.b = c.d;" is not
rv.Add("", Bld_msg(src, sub).Key());
else
rv.Subs_add(Bld_msg(src, sub));
}
return rv;
}
public static final Gfs_msg_bldr _ = new Gfs_msg_bldr(); Gfs_msg_bldr() {}
public static final String Tkn_mutator = "_";
}

View File

@@ -0,0 +1,76 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.gfss; import gplx.*; import gplx.langs.*;
import org.junit.*; import gplx.core.strings.*;
public class Gfs_msg_bldr_tst {
@Before public void init() {fxt.Clear();} Gfs_msg_bldr_fxt fxt = new Gfs_msg_bldr_fxt();
@Test public void Basic() {
fxt.Test_build("a;", fxt.msg_("a"));
}
@Test public void Dot() {
fxt.Test_build("a.b.c;"
, fxt.msg_("a").Subs_
( fxt.msg_("b").Subs_
( fxt.msg_("c")
)));
}
@Test public void Args() {
fxt.Test_build("a('b', 'c');", fxt.msg_("a", fxt.kv_("", "b"), fxt.kv_("", "c")));
}
@Test public void Args_num() {
fxt.Test_build("a(1);", fxt.msg_("a", fxt.kv_("", "1")));
}
@Test public void Assign() {
fxt.Test_build("a = 'b';", fxt.msg_("a_", fxt.kv_("", "b")));
}
@Test public void Assign_num() {
fxt.Test_build("a = 1;", fxt.msg_("a_", fxt.kv_("", "1")));
}
}
class Gfs_msg_bldr_fxt {
public void Clear() {} String_bldr sb = String_bldr_.new_(); Gfs_msg_bldr msg_bldr = Gfs_msg_bldr._;
public KeyVal kv_(String key, String val) {return KeyVal_.new_(key, val);}
public GfoMsg msg_(String key, KeyVal... args) {
GfoMsg rv = GfoMsg_.new_parse_(key);
int len = args.length;
for (int i = 0; i < len; i++) {
KeyVal kv = args[i];
rv.Add(kv.Key(), kv.Val());
}
return rv;
}
public void Test_build(String raw, GfoMsg... expd) {
GfoMsg root = msg_bldr.Bld(raw);
Tfds.Eq_str_lines(Xto_str(expd), Xto_str(To_ary(root)));
}
GfoMsg[] To_ary(GfoMsg msg) {
int len = msg.Subs_count();
GfoMsg[] rv = new GfoMsg[len];
for (int i = 0; i < len; i++)
rv[i] = msg.Subs_getAt(i);
return rv;
}
String Xto_str(GfoMsg[] ary) {
int len = ary.length;
for (int i = 0; i < len; i++) {
if (i != 0) sb.Add_char_crlf();
sb.Add(ary[i].To_str());
}
return sb.Xto_str_and_clear();
}
}

View File

@@ -0,0 +1,85 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.gfss; import gplx.*; import gplx.langs.*;
public class Gfs_nde {
public byte[] Name_bry(byte[] src) {return name == null ? Bry_.Mid(src, name_bgn, name_end) : name;}
public byte[] Name() {return name;} public Gfs_nde Name_(byte[] v) {name = v; return this;} private byte[] name;
public int Name_bgn() {return name_bgn;} private int name_bgn = -1;
public int Name_end() {return name_end;} private int name_end = -1;
public Gfs_nde Name_rng_(int name_bgn, int name_end) {this.name_bgn = name_bgn; this.name_end = name_end; return this;}
public byte Op_tid() {return op_tid;} public Gfs_nde Op_tid_(byte v) {op_tid = v; return this;} private byte op_tid;
public void Subs_clear() {
for (int i = 0; i < subs_len; i++)
subs[i] = null;
subs_len = 0;
}
public int Subs_len() {return subs_len;} private int subs_len;
public Gfs_nde Subs_add_many(Gfs_nde... ary) {
int len = ary.length;
for (int i = 0; i < len; i++)
Subs_add(ary[i]);
return this;
}
public Gfs_nde Subs_add(Gfs_nde nde) {
int new_len = subs_len + 1;
if (new_len > subs_max) { // ary too small >>> expand
subs_max = new_len * 2;
Gfs_nde[] new_subs = new Gfs_nde[subs_max];
Array_.Copy_to(subs, 0, new_subs, 0, subs_len);
subs = new_subs;
}
subs[subs_len] = nde;
subs_len = new_len;
return this;
} Gfs_nde[] subs = Gfs_nde.Ary_empty; int subs_max; int[] subs_pos_ary = Int_.Ary_empty;
public Gfs_nde Subs_get_at(int i) {return subs[i];}
public Gfs_nde[] Subs_to_ary() {
Gfs_nde[] rv = new Gfs_nde[subs_len];
for (int i = 0; i < subs_len; i++)
rv[i] = subs[i];
return rv;
}
public int Atrs_len() {return args_len;} private int args_len;
public Gfs_nde Atrs_get_at(int i) {return args[i];}
public Gfs_nde Atrs_add_many(Gfs_nde... ary) {
int len = ary.length;
for (int i = 0; i < len; i++)
Atrs_add(ary[i]);
return this;
}
public Gfs_nde Atrs_add(Gfs_nde nde) {
int new_len = args_len + 1;
if (new_len > args_max) { // ary too small >>> expand
args_max = new_len * 2;
Gfs_nde[] new_args = new Gfs_nde[args_max];
Array_.Copy_to(args, 0, new_args, 0, args_len);
args = new_args;
}
args[args_len] = nde;
args_len = new_len;
return this;
} Gfs_nde[] args = Gfs_nde.Ary_empty; int args_max; int[] args_pos_ary = Int_.Ary_empty;
public Gfs_nde[] Atrs_to_ary() {
Gfs_nde[] rv = new Gfs_nde[args_len];
for (int i = 0; i < args_len; i++)
rv[i] = args[i];
return rv;
}
public static final Gfs_nde[] Ary_empty = new Gfs_nde[0];
public static final byte Op_tid_null = 0, Op_tid_assign = 1;
}

View File

@@ -0,0 +1,104 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.gfss; import gplx.*; import gplx.langs.*;
import gplx.core.btries.*;
public class Gfs_parser {
Btrie_fast_mgr trie = Gfs_parser_.trie_();
Gfs_parser_ctx ctx = new Gfs_parser_ctx();
public Gfs_nde Parse(byte[] src) {
ctx.Root().Subs_clear();
int src_len = src.length; if (src_len == 0) return ctx.Root();
ctx.Init(trie, src, src_len);
int pos = 0;
while (pos < src_len) {
byte b = src[pos];
Object o = trie.Match_bgn_w_byte(b, src, pos, src_len);
if (o == null)
ctx.Err_mgr().Fail_unknown_char(ctx, pos, b);
else {
Gfs_lxr lxr = (Gfs_lxr)o;
while (lxr != null) {
int rslt = lxr.Process(ctx, pos, trie.Match_pos());
switch (lxr.Lxr_tid()) {
case Gfs_lxr_.Tid_whitespace: break;
case Gfs_lxr_.Tid_comment: break;
default: ctx.Prv_lxr_(lxr.Lxr_tid()); break;
}
switch (rslt) {
case Gfs_lxr_.Rv_lxr:
pos = ctx.Nxt_pos();
lxr = ctx.Nxt_lxr();
break;
case Gfs_lxr_.Rv_eos:
pos = src_len;
lxr = null;
break;
default:
pos = rslt;
lxr = null;
break;
}
}
}
}
switch (ctx.Prv_lxr()) {
case Gfs_lxr_.Tid_curly_end:
case Gfs_lxr_.Tid_semic: break;
default: ctx.Err_mgr().Fail_eos(ctx); break;
}
return ctx.Root();
}
}
class Gfs_parser_ {
public static Btrie_fast_mgr trie_() {
Btrie_fast_mgr rv = Btrie_fast_mgr.ci_a7(); // NOTE:ci.ascii:gfs;letters/symbols only;
Gfs_lxr_identifier word_lxr = Gfs_lxr_identifier._;
trie_add_rng(rv, word_lxr, Byte_ascii.Ltr_a, Byte_ascii.Ltr_z);
trie_add_rng(rv, word_lxr, Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z);
trie_add_rng(rv, word_lxr, Byte_ascii.Num_0, Byte_ascii.Num_9);
rv.Add(Byte_ascii.Underline, word_lxr);
trie_add_many(rv, Gfs_lxr_whitespace._, Byte_ascii.Space, Byte_ascii.Nl, Byte_ascii.Cr, Byte_ascii.Tab);
trie_add_quote(rv, new byte[] {Byte_ascii.Apos});
trie_add_quote(rv, new byte[] {Byte_ascii.Quote});
trie_add_quote(rv, Bry_.new_a7("<:[\"\n"), Bry_.new_a7("\n\"]:>"));
trie_add_quote(rv, Bry_.new_a7("<:['\n"), Bry_.new_a7("\n']:>"));
trie_add_comment(rv, new byte[] {Byte_ascii.Slash, Byte_ascii.Slash}, new byte[] {Byte_ascii.Nl});
trie_add_comment(rv, new byte[] {Byte_ascii.Slash, Byte_ascii.Star}, new byte[] {Byte_ascii.Star, Byte_ascii.Slash});
rv.Add(Byte_ascii.Semic, Gfs_lxr_semic._);
rv.Add(Byte_ascii.Paren_bgn, Gfs_lxr_paren_bgn._);
rv.Add(Byte_ascii.Paren_end, Gfs_lxr_paren_end._);
rv.Add(Byte_ascii.Curly_bgn, Gfs_lxr_curly_bgn._);
rv.Add(Byte_ascii.Curly_end, Gfs_lxr_curly_end._);
rv.Add(Byte_ascii.Dot, Gfs_lxr_dot._);
rv.Add(Byte_ascii.Comma, Gfs_lxr_comma._);
rv.Add(Byte_ascii.Eq, Gfs_lxr_equal._);
return rv;
}
private static void trie_add_rng(Btrie_fast_mgr trie, Gfs_lxr lxr, byte bgn, byte end) {
for (byte b = bgn; b <= end; b++)
trie.Add(b, lxr);
}
private static void trie_add_many(Btrie_fast_mgr trie, Gfs_lxr lxr, byte... ary) {
int len = ary.length;
for (int i = 0; i < len; i++)
trie.Add(ary[i], lxr);
}
private static void trie_add_quote(Btrie_fast_mgr trie, byte[] bgn) {trie_add_quote(trie, bgn, bgn);}
private static void trie_add_quote(Btrie_fast_mgr trie, byte[] bgn, byte[] end) {trie.Add(bgn, new Gfs_lxr_quote(bgn, end));}
private static void trie_add_comment(Btrie_fast_mgr trie, byte[] bgn, byte[] end) {trie.Add(bgn, new Gfs_lxr_comment_flat(bgn, end));}
}

View File

@@ -0,0 +1,126 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.gfss; import gplx.*; import gplx.langs.*;
import gplx.core.btries.*;
class Gfs_parser_ctx {
public Btrie_fast_mgr Trie() {return trie;} Btrie_fast_mgr trie;
public Gfs_nde Root() {return root;} Gfs_nde root = new Gfs_nde();
public byte[] Src() {return src;} private byte[] src;
public int Src_len() {return src_len;} private int src_len;
public int Prv_lxr() {return prv_lxr;} public Gfs_parser_ctx Prv_lxr_(int v) {prv_lxr = v; return this;} private int prv_lxr;
public Gfs_nde Cur_nde() {return cur_nde;} Gfs_nde cur_nde;
public int Nxt_pos() {return nxt_pos;} private int nxt_pos;
public Gfs_lxr Nxt_lxr() {return nxt_lxr;} Gfs_lxr nxt_lxr;
public Bry_bfr Tmp_bfr() {return tmp_bfr;} private Bry_bfr tmp_bfr = Bry_bfr.new_();
public void Process_eos() {}
public void Process_lxr(int nxt_pos, Gfs_lxr nxt_lxr) {this.nxt_pos = nxt_pos; this.nxt_lxr = nxt_lxr;}
public void Process_null(int cur_pos) {this.nxt_pos = cur_pos; this.nxt_lxr = null;}
public void Init(Btrie_fast_mgr trie, byte[] src, int src_len) {
this.trie = trie; this.src = src; this.src_len = src_len;
cur_nde = root;
Stack_add();
}
public void Hold_word(int bgn, int end) {
cur_idf_bgn = bgn;
cur_idf_end = end;
} int cur_idf_bgn = -1, cur_idf_end = -1;
private void Held_word_clear() {cur_idf_bgn = -1; cur_idf_end = -1;}
public Gfs_nde Make_nde(int tkn_bgn, int tkn_end) { // "abc."; "abc("; "abc;"; "abc{"
Gfs_nde nde = new Gfs_nde().Name_rng_(cur_idf_bgn, cur_idf_end);
this.Held_word_clear();
cur_nde.Subs_add(nde);
cur_nde = nde;
return nde;
}
public void Make_atr_by_idf() {Make_atr(cur_idf_bgn, cur_idf_end); Held_word_clear();}
public void Make_atr_by_bry(int bgn, int end, byte[] bry) {Make_atr(bgn, end).Name_(bry);}
public Gfs_nde Make_atr(int bgn, int end) {
Gfs_nde nde = new Gfs_nde().Name_rng_(bgn, end);
cur_nde.Atrs_add(nde);
return nde;
}
public void Cur_nde_from_stack() {cur_nde = (Gfs_nde)nodes.Get_at_last();}
public void Stack_add() {nodes.Add(cur_nde);} List_adp nodes = List_adp_.new_();
public void Stack_pop(int pos) {
if (nodes.Count() < 2) err_mgr.Fail_nde_stack_empty(this, pos); // NOTE: need at least 2 items; 1 to pop and 1 to set as current
List_adp_.DelAt_last(nodes);
Cur_nde_from_stack();
}
public Gfs_err_mgr Err_mgr() {return err_mgr;} Gfs_err_mgr err_mgr = new Gfs_err_mgr();
}
class Gfs_err_mgr {
public void Fail_eos(Gfs_parser_ctx ctx) {Fail(ctx, Fail_msg_eos, ctx.Src_len());}
public void Fail_unknown_char(Gfs_parser_ctx ctx, int pos, byte c) {Fail(ctx, Fail_msg_unknown_char, pos, KeyVal_.new_("char", Char_.To_str((char)c)));}
public void Fail_nde_stack_empty(Gfs_parser_ctx ctx, int pos) {Fail(ctx, Fail_msg_nde_stack_empty, pos);}
public void Fail_invalid_lxr(Gfs_parser_ctx ctx, int pos, int lxr_tid, byte c) {
Fail(ctx, Fail_msg_invalid_lxr, pos, KeyVal_.new_("char", Char_.To_str((char)c)), KeyVal_.new_("cur_lxr", Gfs_lxr_.Tid__name(lxr_tid)), KeyVal_.new_("prv_lxr", Gfs_lxr_.Tid__name(ctx.Prv_lxr())));
}
private void Fail(Gfs_parser_ctx ctx, String msg, int pos, KeyVal... args) {
byte[] src = ctx.Src(); int src_len = ctx.Src_len();
Fail_args_standard(src, src_len, pos);
int len = args.length;
for (int i = 0; i < len; i++) {
KeyVal arg = args[i];
tmp_fail_args.Add(arg.Key(), arg.Val_to_str_or_empty());
}
throw Err_.new_wo_type(Fail_msg(msg, tmp_fail_args));
}
private void Fail_args_standard(byte[] src, int src_len, int pos) {
tmp_fail_args.Add("excerpt_bgn", Fail_excerpt_bgn(src, src_len, pos));
tmp_fail_args.Add("excerpt_end", Fail_excerpt_end(src, src_len, pos));
tmp_fail_args.Add("pos" , pos);
}
public static final String Fail_msg_invalid_lxr = "invalid character", Fail_msg_unknown_char = "unknown char", Fail_msg_eos = "end of stream", Fail_msg_nde_stack_empty = "node stack empty";
String Fail_msg(String type, KeyValList fail_args) {
tmp_fail_bfr.Add_str(type).Add_byte(Byte_ascii.Colon);
int len = fail_args.Count();
for (int i = 0; i < len; i++) {
tmp_fail_bfr.Add_byte(Byte_ascii.Space);
KeyVal kv = fail_args.GetAt(i);
tmp_fail_bfr.Add_str(kv.Key());
tmp_fail_bfr.Add_byte(Byte_ascii.Eq).Add_byte(Byte_ascii.Apos);
tmp_fail_bfr.Add_str(kv.Val_to_str_or_empty()).Add_byte(Byte_ascii.Apos);
}
return tmp_fail_bfr.Xto_str_and_clear();
}
Bry_bfr tmp_fail_bfr = Bry_bfr.reset_(255);
KeyValList tmp_fail_args = new KeyValList();
private static int excerpt_len = 50;
String Fail_excerpt_bgn(byte[] src, int src_len, int pos) {
int bgn = pos - excerpt_len; if (bgn < 0) bgn = 0;
Fail_excerpt_rng(tmp_fail_bfr, src, bgn, pos);
return tmp_fail_bfr.Xto_str_and_clear();
}
String Fail_excerpt_end(byte[] src, int src_len, int pos) {
int end = pos + excerpt_len; if (end > src_len) end = src_len;
Fail_excerpt_rng(tmp_fail_bfr, src, pos, end);
return tmp_fail_bfr.Xto_str_and_clear();
}
private static void Fail_excerpt_rng(Bry_bfr bfr, byte[] src, int bgn, int end) {
for (int i = bgn; i < end; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Tab: bfr.Add(Esc_tab); break;
case Byte_ascii.Nl: bfr.Add(Esc_nl); break;
case Byte_ascii.Cr: bfr.Add(Esc_cr); break;
default: bfr.Add_byte(b); break;
}
}
} static final byte[] Esc_nl = Bry_.new_a7("\\n"), Esc_cr = Bry_.new_a7("\\r"), Esc_tab = Bry_.new_a7("\\t");
}

View File

@@ -0,0 +1,196 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.gfss; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Gfs_parser_tst {
@Before public void init() {fxt.Clear();} Gfs_parser_fxt fxt = new Gfs_parser_fxt();
@Test public void Semicolon() {
fxt .Test_parse("a;", fxt.nde_("a"));
fxt .Test_parse("a;b;c;", fxt.nde_("a"), fxt.nde_("b"), fxt.nde_("c"));
fxt .Test_parse("a_0;", fxt.nde_("a_0"));
}
@Test public void Dot() {
fxt .Test_parse("a.b;", fxt.nde_("a").Subs_add(fxt.nde_("b")));
fxt .Test_parse("a.b;c.d;", fxt.nde_("a").Subs_add(fxt.nde_("b")), fxt.nde_("c").Subs_add(fxt.nde_("d")));
}
@Test public void Parens() {
fxt .Test_parse("a();b();", fxt.nde_("a"), fxt.nde_("b"));
fxt .Test_parse("a().b();c().d();", fxt.nde_("a").Subs_add(fxt.nde_("b")), fxt.nde_("c").Subs_add(fxt.nde_("d")));
}
@Test public void Num() {
fxt .Test_parse("a(1,2);", fxt.nde_("a").Atrs_add_many(fxt.val_("1"), fxt.val_("2")));
}
@Test public void Quote() {
fxt .Test_parse("a('b');", fxt.nde_("a").Atrs_add(fxt.val_("b")));
}
@Test public void Quote_escaped() {
fxt .Test_parse("a('b''c''d');", fxt.nde_("a").Atrs_add(fxt.val_("b'c'd")));
}
@Test public void Quote_escaped_2() {
fxt .Test_parse("a('a''''b');", fxt.nde_("a").Atrs_add(fxt.val_("a''b")));
}
@Test public void Quote_mixed() {
fxt .Test_parse("a('b\"c');", fxt.nde_("a").Atrs_add(fxt.val_("b\"c")));
}
@Test public void Comma() {
fxt .Test_parse("a('b','c','d');", fxt.nde_("a").Atrs_add_many(fxt.val_("b"), fxt.val_("c"), fxt.val_("d")));
}
@Test public void Ws() {
fxt .Test_parse(" a ( 'b' , 'c' ) ; ", fxt.nde_("a").Atrs_add_many(fxt.val_("b"), fxt.val_("c")));
}
@Test public void Comment_slash_slash() {
fxt .Test_parse("//z\na;//y\n", fxt.nde_("a"));
}
@Test public void Comment_slash_star() {
fxt .Test_parse("/*z*/a;/*y*/", fxt.nde_("a"));
}
@Test public void Curly() {
fxt .Test_parse("a{b;}", fxt.nde_("a").Subs_add(fxt.nde_("b")));
}
@Test public void Curly_nest() {
fxt .Test_parse("a{b{c{d;}}}"
, fxt.nde_("a").Subs_add
( fxt.nde_("b").Subs_add
( fxt.nde_("c").Subs_add
( fxt.nde_("d")
))));
}
@Test public void Curly_nest_peers() {
fxt .Test_parse(String_.Concat_lines_nl
( "a{"
, " a0{"
, " a00{"
, " a000;"
, " }"
, " a01;"
, " }"
, " a1;"
, "}"
)
, fxt.nde_("a").Subs_add_many
( fxt.nde_("a0").Subs_add_many
( fxt.nde_("a00").Subs_add
( fxt.nde_("a000")
)
, fxt.nde_("a01")
)
, fxt.nde_("a1")
));
}
@Test public void Curly_dot() {
fxt .Test_parse("a{a0.a00;a1.a10;}"
, fxt.nde_("a").Subs_add_many
( fxt.nde_("a0").Subs_add_many(fxt.nde_("a00"))
, fxt.nde_("a1").Subs_add_many(fxt.nde_("a10"))
));
}
@Test public void Eq() {
fxt .Test_parse("a='b';", fxt.nde_("a").Atrs_add(fxt.val_("b")));
fxt .Test_parse("a.b.c='d';"
, fxt.nde_("a").Subs_add
( fxt.nde_("b").Subs_add_many
( fxt.nde_("c").Atrs_add(fxt.val_("d"))
)));
fxt .Test_parse("a.b{c='d'; e='f'}"
, fxt.nde_("a").Subs_add
( fxt.nde_("b").Subs_add_many
( fxt.nde_("c").Atrs_add(fxt.val_("d"))
, fxt.nde_("e").Atrs_add(fxt.val_("f"))
)));
}
@Test public void Curly_nest_peers2() {
fxt .Test_parse(String_.Concat_lines_nl
( "a() {"
, " k1 = 'v1';"
, "}"
)
, fxt.nde_("a").Subs_add_many
( fxt.nde_("k1").Atrs_add(fxt.val_("v1"))
)
);
}
@Test public void Fail() {
fxt .Test_parse_fail("a(.);", Gfs_err_mgr.Fail_msg_invalid_lxr); // (.)
fxt .Test_parse_fail("a..b;", Gfs_err_mgr.Fail_msg_invalid_lxr); // ..
fxt .Test_parse_fail("a.;", Gfs_err_mgr.Fail_msg_invalid_lxr); // .;
fxt .Test_parse_fail("a", Gfs_err_mgr.Fail_msg_eos); // eos
fxt .Test_parse_fail("a;~;", Gfs_err_mgr.Fail_msg_unknown_char); // ~
}
}
class Gfs_parser_fxt {
public void Clear() {}
public Gfs_nde nde_(String v) {return new Gfs_nde().Name_(Bry_.new_a7(v));}
public Gfs_nde val_(String v) {return new Gfs_nde().Name_(Bry_.new_a7(v));}
public void Test_parse(String src_str, Gfs_nde... expd) {
byte[] src_bry = Bry_.new_u8(src_str);
Gfs_nde root = parser.Parse(src_bry);
Tfds.Eq_str_lines(To_str(null, expd), To_str(src_bry, root.Subs_to_ary()));
} private Bry_bfr tmp_bfr = Bry_bfr.new_(), path_bfr = Bry_bfr.new_(); Gfs_parser parser = new Gfs_parser();
public void Test_parse_fail(String src_str, String expd_err) {
byte[] src_bry = Bry_.new_u8(src_str);
try {parser.Parse(src_bry);}
catch (Exception e) {
String actl_err = Err_.Message_gplx_full(e);
actl_err = String_.GetStrBefore(actl_err, ":");
boolean match = String_.Has(actl_err, expd_err);
if (!match) Tfds.Fail("expecting '" + expd_err + "' got '" + actl_err + "'");
return;
}
Tfds.Fail("expected to fail with " + expd_err);
}
public String To_str(byte[] src, Gfs_nde[] expd) {
int subs_len = expd.length;
for (int i = 0; i < subs_len; i++) {
path_bfr.Clear().Add_int_variable(i);
To_str(tmp_bfr, path_bfr, src, expd[i]);
}
return tmp_bfr.Xto_str_and_clear();
}
public void To_str(Bry_bfr bfr, Bry_bfr path, byte[] src, Gfs_nde nde) {
To_str_atr(bfr, path, src, Atr_name, nde.Name(), nde.Name_bgn(), nde.Name_end());
int atrs_len = nde.Atrs_len();
for (int i = 0; i < atrs_len; i++) {
Gfs_nde atr = nde.Atrs_get_at(i);
int path_len_old = path.Len();
path.Add_byte(Byte_ascii.Dot).Add_byte((byte)(Byte_ascii.Ltr_a + i));
int path_len_new = path.Len();
To_str(bfr, path, src, atr);
path.Del_by(path_len_new - path_len_old);
}
int subs_len = nde.Subs_len();
for (int i = 0; i < subs_len; i++) {
Gfs_nde sub = nde.Subs_get_at(i);
int path_len_old = path.Len();
path.Add_byte(Byte_ascii.Dot).Add_int_variable(i);
int path_len_new = path.Len();
To_str(bfr, path, src, sub);
path.Del_by(path_len_new - path_len_old);
}
}
private void To_str_atr(Bry_bfr bfr, Bry_bfr path_bfr, byte[] src, byte[] name, byte[] val, int val_bgn, int val_end) {
if (val == null && val_bgn == -1 && val_end == -1) return;
bfr.Add_bfr_and_preserve(path_bfr).Add_byte(Byte_ascii.Colon);
bfr.Add(name);
if (val == null)
bfr.Add_mid(src, val_bgn, val_end);
else
bfr.Add(val);
bfr.Add_byte_nl();
}
private static final byte[] Atr_name = Bry_.new_a7("name=");
}

View File

@@ -0,0 +1,46 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.gfss; import gplx.*; import gplx.langs.*;
public class Gfs_wtr {
public byte Quote_char() {return quote_char;} public Gfs_wtr Quote_char_(byte v) {quote_char = v; return this;} private byte quote_char = Byte_ascii.Apos;
public Bry_bfr Bfr() {return bfr;} private Bry_bfr bfr = Bry_bfr.reset_(255);
public void Add_grp_bgn(byte[] key) {
bfr.Add(key); // key
bfr.Add_byte(Byte_ascii.Curly_bgn); // {
}
public void Add_grp_end(byte[] key) {
bfr.Add_byte(Byte_ascii.Curly_end); // }
}
public void Add_set_eq(byte[] key, byte[] val) {
bfr.Add(key); // key
bfr.Add_byte_eq(); // =
bfr.Add_byte(quote_char); // '
Write_val(val);
bfr.Add_byte(quote_char); // '
bfr.Add_byte(Byte_ascii.Semic); // ;
}
private void Write_val(byte[] bry) {
int bry_len = bry.length;
for (int i = 0; i < bry_len; i++) {
byte b = bry[i];
if (b == quote_char) // byte is quote
bfr.Add_byte(b); // double up
bfr.Add_byte(b);
}
}
}

View File

@@ -0,0 +1,29 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_atr_ {
public static final String
Src_str = "src"
;
public static final byte[]
Id_bry = Bry_.new_a7("id")
, Cls_bry = Bry_.new_a7("class")
, Style_bry = Bry_.new_a7("style")
, Href_bry = Bry_.new_a7("href")
;
}

View File

@@ -0,0 +1,35 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_entity_ {
public static final String
Nl_str = "&#10;"
;
public static final byte[]
Lt_bry = Bry_.new_a7("&lt;"), Gt_bry = Bry_.new_a7("&gt;")
, Amp_bry = Bry_.new_a7("&amp;"), Quote_bry = Bry_.new_a7("&quot;")
, Apos_num_bry = Bry_.new_a7("&#39;")
, Apos_key_bry = Bry_.new_a7("&apos;")
, Eq_bry = Bry_.new_a7("&#61;")
, Nl_bry = Bry_.new_a7(Nl_str), Space_bry = Bry_.new_a7("&#32;")
, Pipe_bry = Bry_.new_a7("&#124;")
, Colon_bry = Bry_.new_a7("&#58;"), Underline_bry = Bry_.new_a7("&#95;"), Asterisk_bry = Bry_.new_a7("&#42;")
, Brack_bgn_bry = Bry_.new_a7("&#91;"), Brack_end_bry = Bry_.new_a7("&#93;")
, Nbsp_num_bry = Bry_.new_a7("&#160;")
;
}

View File

@@ -0,0 +1,94 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_nde {
public Html_nde(byte[] src, boolean tag_tid_is_inline, int tag_lhs_bgn, int tag_lhs_end, int tag_rhs_bgn, int tag_rhs_end, int name_bgn, int name_end, int[] cur_atrs, int atrs_idx) {
this.src = src;
this.tag_tid_is_inline = tag_tid_is_inline;
this.tag_lhs_bgn = tag_lhs_bgn; this.tag_lhs_end = tag_lhs_end; this.tag_rhs_bgn = tag_rhs_bgn; this.tag_rhs_end = tag_rhs_end; this.name_bgn = name_bgn; this.name_end = name_end;
if (atrs_idx > 0) {
atrs = new int[atrs_idx];
for (int i = 0; i < atrs_idx; i++)
atrs[i] = cur_atrs[i];
atrs_len = atrs_idx / 5;
}
}
public byte[] Src() {return src;} private byte[] src;
public int[] Atrs() {return atrs;} private int[] atrs = Int_.Ary_empty;
public int Atrs_len() {return atrs_len;} private int atrs_len;
public boolean Tag_tid_is_inline() {return tag_tid_is_inline;} private boolean tag_tid_is_inline;
public int Tag_lhs_bgn() {return tag_lhs_bgn;} public Html_nde Tag_lhs_bgn_(int v) {tag_lhs_bgn = v; return this;} private int tag_lhs_bgn;
public int Tag_lhs_end() {return tag_lhs_end;} public Html_nde Tag_lhs_end_(int v) {tag_lhs_end = v; return this;} private int tag_lhs_end;
public int Tag_rhs_bgn() {return tag_rhs_bgn;} public Html_nde Tag_rhs_bgn_(int v) {tag_rhs_bgn = v; return this;} private int tag_rhs_bgn;
public int Tag_rhs_end() {return tag_rhs_end;} public Html_nde Tag_rhs_end_(int v) {tag_rhs_end = v; return this;} private int tag_rhs_end;
public int Name_bgn() {return name_bgn;} public Html_nde Name_bgn_(int v) {name_bgn = v; return this;} private int name_bgn;
public int Name_end() {return name_end;} public Html_nde Name_end_(int v) {name_end = v; return this;} private int name_end;
public void Clear() {tag_lhs_bgn = tag_rhs_bgn = -1;}
public String Atrs_val_by_key_str(String find_key_str) {return String_.new_u8(Atrs_val_by_key_bry(Bry_.new_u8(find_key_str)));}
public byte[] Atrs_val_by_key_bry(byte[] find_key_bry) {
for (int i = 0; i < atrs_len; i ++) {
int atrs_idx = i * 5;
int atr_key_bgn = atrs[atrs_idx + 1];
int atr_key_end = atrs[atrs_idx + 2];
if (Bry_.Match(src, atr_key_bgn, atr_key_end, find_key_bry))
return Atrs_vals_by_pos(src, atrs[atrs_idx + 0], atrs[atrs_idx + 3], atrs[atrs_idx + 4]);
}
return null;
}
byte[] Atrs_vals_by_pos(byte[] src, int quote_byte, int bgn, int end) {
Bry_bfr tmp_bfr = Bry_bfr.new_();
boolean dirty = false;
for (int i = bgn; i < end; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Backslash:
if (!dirty) {dirty = true; tmp_bfr.Add_mid(src, bgn, i);}
++i;
tmp_bfr.Add_byte(src[i]);
break;
default:
if (b == quote_byte) {
byte next_byte = src[i + 1];
if (next_byte == b) {
if (!dirty) {dirty = true; tmp_bfr.Add_mid(src, bgn, i);}
++i;
tmp_bfr.Add_byte(src[i]);
}
}
else {
if (dirty)
tmp_bfr.Add_byte(b);
}
break;
}
}
return dirty ? tmp_bfr.Xto_bry_and_clear() : Bry_.Mid(src, bgn, end);
}
public byte[] Data(byte[] src) {
return Bry_.Mid(src, tag_lhs_end, tag_rhs_bgn);
}
}
// class Xoh_atr {
// public byte[] Key_bry() {return key_bry;} private byte[] key_bry;
// public byte[] Val_bry() {return val_bry;} private byte[] val_bry;
// public int Key_bgn() {return key_bgn;} private int key_bgn;
// public int Key_end() {return key_end;} private int key_end;
// public int Val_bgn() {return val_bgn;} private int val_bgn;
// public int Val_end() {return val_end;} private int val_end;
// public byte Val_quote_tid() {return val_quote_tid;} private byte val_quote_tid;
// }

View File

@@ -0,0 +1,165 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
import gplx.core.brys.*;
public class Html_parser {
public Html_parser() {
Bry_bldr bry_bldr = new Bry_bldr();
bry_xnde_name = bry_bldr.New_256().Set_rng_xml_identifier(Scan_valid).Set_rng_ws(Scan_stop).Val();
bry_atr_key = bry_bldr.New_256().Set_rng_xml_identifier(Scan_valid).Set_rng_ws(Scan_stop).Set_many(Scan_stop, Byte_ascii.Eq).Val();
}
byte[] src; int pos, end; byte[] bry_xnde_name, bry_atr_key;
int cur_atrs_idx = 0; int[] cur_atrs = new int[250];// define max of 50 atrs;
public Html_nde[] Parse_as_ary(byte[] src) {return Parse_as_ary(src, 0, src.length, Wildcard, Wildcard);}
public Html_nde[] Parse_as_ary(byte[] src, int bgn, int end) {return Parse_as_ary(src, bgn, end, Wildcard, Wildcard);}
public Html_nde[] Parse_as_ary(byte[] src, int bgn, int end, byte[] find_key, byte[] find_val) { // flattens html into a list of hndes; only used for Options
this.src = src; pos = bgn; this.end = end;
List_adp rv = List_adp_.new_();
while (pos < end) {
byte b = src[pos++];
switch (b) {
case Byte_ascii.Lt:
if (xnde_init) {
if (Parse_xnde_lhs()) {
if (tag_tid_is_inline)
rv.Add(new Html_nde(src, tag_tid_is_inline, cur_lhs_bgn, cur_lhs_end, cur_rhs_bgn, pos, cur_name_bgn, cur_name_end, cur_atrs, cur_atrs_idx));
else
xnde_init = false;
}
}
else {
if (Parse_xnde_rhs()) {
rv.Add(new Html_nde(src, tag_tid_is_inline, cur_lhs_bgn, cur_lhs_end, cur_rhs_bgn, pos, cur_name_bgn, cur_name_end, cur_atrs, cur_atrs_idx));
}
xnde_init = true;
}
break;
default:
break;
}
}
return (Html_nde[])rv.To_ary(Html_nde.class);
}
int cur_lhs_bgn, cur_lhs_end, cur_name_bgn, cur_name_end, cur_rhs_bgn; boolean xnde_init = true, tag_tid_is_inline = false;
private boolean Parse_xnde_rhs() {
cur_rhs_bgn = pos - 1; // -1 b/c "<" is already read
byte b = src[pos];
if (b != Byte_ascii.Slash) return false;
++pos;
int name_len = cur_name_end - cur_name_bgn;
if (pos + name_len >= end) return false;
if (!Bry_.Match(src, pos, pos + name_len, src, cur_name_bgn, cur_name_end)) return false;
pos += name_len;
if (src[pos] != Byte_ascii.Gt) return false;
++pos;
return true;
}
private boolean Parse_xnde_lhs() {
cur_atrs_idx = 0;
cur_lhs_bgn = pos - 1;
cur_name_bgn = pos;
tag_tid_is_inline = false;
byte rslt = Skip_while_valid(this.bry_atr_key);
if (rslt == Scan_invalid) return false;
cur_name_end = pos;
int key_bgn, key_end, val_bgn, quote_type;
while (true) {
if (pos >= end) return false;
key_bgn = key_end = val_bgn = quote_type = -1;
Skip_ws();
byte b = src[pos];
if (b == Byte_ascii.Slash) {
++pos;
if (pos == end) return false;
byte next = src[pos];
if (next == Byte_ascii.Gt) {
tag_tid_is_inline = true;
++pos;
break;
}
else return false; // NOTE: don't consume byte b/c false
}
else if (b == Byte_ascii.Gt) {
++pos;
break;
}
key_bgn = pos;
rslt = Skip_while_valid(this.bry_atr_key);
if (rslt == Scan_invalid) return false;
key_end = pos;
Skip_ws();
if (src[pos++] != Byte_ascii.Eq) return false;
Skip_ws();
byte quote_byte = src[pos];
switch (quote_byte) {
case Byte_ascii.Quote: quote_type = quote_byte; break;
case Byte_ascii.Apos: quote_type = quote_byte; break;
default: return false;
}
val_bgn = ++pos; // ++pos: start val after quote
if (!Skip_to_quote_end(quote_byte)) return false;
cur_atrs[cur_atrs_idx + 0] = quote_type;
cur_atrs[cur_atrs_idx + 1] = key_bgn;
cur_atrs[cur_atrs_idx + 2] = key_end;
cur_atrs[cur_atrs_idx + 3] = val_bgn;
cur_atrs[cur_atrs_idx + 4] = pos - 1; // NOTE: Skip_to_quote_end positions after quote
cur_atrs_idx += 5;
}
cur_lhs_end = pos;
return true;
}
private void Skip_ws() {
while (pos < end) {
switch (src[pos]) {
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
++pos;
break;
default:
return;
}
}
}
boolean Skip_to_quote_end(byte v) {
while (pos < end) {
byte b = src[pos++];
if (b == v) {
if (pos == end) return false;
byte next = src[pos];
if (next != v) return true;
else ++pos;
}
else if (b == Byte_ascii.Backslash) {
++pos;
}
}
return false;
}
byte Skip_while_valid(byte[] comp) {
while (pos < end) {
byte rv = comp[src[pos]];
if (rv == Scan_valid)
++pos;
else
return rv;
}
return Scan_invalid;
}
private static final byte Scan_invalid = 0, Scan_valid = 1, Scan_stop = 2;
public static final byte[] Wildcard = null;
public static final String Wildcard_str = null;
}

View File

@@ -0,0 +1,53 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Html_parser_tst {
@Before public void init() {fxt.Clear();} private Xoh_parser_fxt fxt = new Xoh_parser_fxt();
@Test public void One() {fxt.Test_parse_find_all("<a id='id0'></a>", "id0");}
@Test public void Many() {fxt.Test_parse_find_all("<a id='id0'></a><a id='id1'></a><a id='id2'></a>", "id0", "id1", "id2");}
@Test public void Inline() {fxt.Test_parse_find_all("<a id='id0'/>", "id0");}
@Test public void Mix() {fxt.Test_parse_find_all("012<a id='id0'></a>id=id2<a id='id1'/>345<a id='id2'></a>abc", "id0", "id1", "id2");}
@Test public void Quote_double() {fxt.Test_parse_find_all("<a id='id''0'/>", "id'0");}
@Test public void Quote_escape() {fxt.Test_parse_find_all("<a id='id\\'0'/>", "id'0");}
}
class Xoh_parser_fxt {
public void Clear() {
if (parser == null) {
parser = new Html_parser();
}
} private Html_parser parser;
public Xoh_parser_fxt Test_parse_find_all(String raw_str, String... expd) {return Test_parse_find(raw_str, Html_parser.Wildcard_str, Html_parser.Wildcard_str, expd);}
public Xoh_parser_fxt Test_parse_find(String raw_str, String find_key, String find_val, String... expd) {
byte[] raw = Bry_.new_a7(raw_str);
Html_nde[] actl_ndes = parser.Parse_as_ary(raw, 0, raw.length, Bry_.new_a7(find_key), Bry_.new_a7(find_val));
String[] actl = Xto_ids(raw, actl_ndes);
Tfds.Eq_ary_str(expd, actl);
return this;
}
private String[] Xto_ids(byte[] src, Html_nde[] ary) {
int len = ary.length;
String[] rv = new String[len];
for (int i = 0; i < len; i++) {
Html_nde itm = ary[i];
String atr_val = itm.Atrs_val_by_key_str("id");
rv[i] = atr_val;
}
return rv;
}
}

View File

@@ -0,0 +1,40 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_selecter {
public static Html_nde[] Select(byte[] src, Html_nde[] ary, Hash_adp_bry hash) {
List_adp list = List_adp_.new_();
int xndes_len = ary.length;
for (int i = 0; i < xndes_len; i++) {
Html_nde hnde = ary[i];
int[] atrs = hnde.Atrs();
int atrs_len = atrs.length;
for (int j = 0; j < atrs_len; j += 5) {
int atr_key_bgn = atrs[j + 1];
int atr_key_end = atrs[j + 2];
if (hash.Get_by_mid(src, atr_key_bgn, atr_key_end) != null) {
list.Add(hnde);
break;
}
}
}
Html_nde[] rv = (Html_nde[])list.To_ary(Html_nde.class);
list.Clear();
return rv;
}
}

View File

@@ -0,0 +1,59 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_tag_ {
public static final byte[]
Ul_name_bry = Bry_.new_a7("ul")
, A_name_bry = Bry_.new_a7("a")
, Code_name_bry = Bry_.new_a7("code")
, Tr_name_bry = Bry_.new_a7("tr")
, Td_name_bry = Bry_.new_a7("td")
, Table_name_bry = Bry_.new_a7("table")
;
public static final byte[]
Br_inl = Bry_.new_a7("<br/>")
, Hr_inl = Bry_.new_a7("<hr/>")
, Body_lhs = Bry_.new_a7("<body>") , Body_rhs = Bry_.new_a7("</body>")
, B_lhs = Bry_.new_a7("<b>") , B_rhs = Bry_.new_a7("</b>")
, I_lhs = Bry_.new_a7("<i>") , I_rhs = Bry_.new_a7("</i>")
, P_lhs = Bry_.new_a7("<p>") , P_rhs = Bry_.new_a7("</p>")
, Div_lhs = Bry_.new_a7("<div>") , Div_rhs = Bry_.new_a7("</div>")
, Html_rhs = Bry_.new_a7("</html>")
, Head_lhs_bgn = Bry_.new_a7("<head")
, Head_rhs = Bry_.new_a7("</head>")
, Style_lhs_w_type = Bry_.new_a7("<style type=\"text/css\">")
, Style_rhs = Bry_.new_a7("</style>")
, Script_lhs = Bry_.new_a7("<script>")
, Script_lhs_w_type = Bry_.new_a7("<script type='text/javascript'>")
, Script_rhs = Bry_.new_a7("</script>")
, Span_rhs = Bry_.new_a7("</span>")
;
public static final String
Comm_bgn_str = "<!--"
, Comm_end_str = "-->"
, Anchor_str = "#"
;
public static final byte[]
Comm_bgn = Bry_.new_a7(Comm_bgn_str), Comm_end = Bry_.new_a7(Comm_end_str)
;
public static final int
Comm_bgn_len = Comm_bgn.length
, Comm_end_len = Comm_end.length
;
}

View File

@@ -0,0 +1,180 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.langs.htmls.encoders.*;
public class Html_utl {
private static final Url_encoder encoder_id = Url_encoder.new_html_id_(); private static final Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
public static String Encode_id_as_str(byte[] key) {return String_.new_u8(Encode_id_as_bry(key));}
public static byte[] Encode_id_as_bry(byte[] key) {
byte[] escaped = Escape_html_as_bry(tmp_bfr, key, Bool_.N, Bool_.N, Bool_.N, Bool_.Y, Bool_.Y);
return encoder_id.Encode(escaped);
}
public static String Escape_for_atr_val_as_str(Bry_bfr bfr, byte quote_byte, String s) {return String_.new_u8(Escape_for_atr_val_as_bry(bfr, quote_byte, s));}
public static byte[] Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, String s) {
if (s == null) return null;
return Escape_for_atr_val_as_bry(bfr, quote_byte, Bry_.new_u8(s));
}
public static byte[] Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, byte[] bry) {
if (bry == null) return null;
boolean dirty = Escape_for_atr_val_as_bry(bfr, quote_byte, bry, 0, bry.length);
return dirty ? bfr.Xto_bry_and_clear() : bry;
}
public static boolean Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, byte[] src, int bgn, int end) {
boolean dirty = false;
for (int i = bgn; i < end; i++) {
byte b = src[i];
if (b == quote_byte) {
if (!dirty) {
bfr.Add_mid(src, bgn, i);
dirty = true;
}
switch (quote_byte) {
case Byte_ascii.Apos: bfr.Add(Html_entity_.Apos_num_bry); break;
case Byte_ascii.Quote: bfr.Add(Html_entity_.Quote_bry); break;
default: throw Err_.new_unhandled(quote_byte);
}
}
else {
if (dirty)
bfr.Add_byte(b);
}
}
return dirty;
}
public static String Escape_html_as_str(String v) {return String_.new_u8(Escape_html_as_bry(Bry_.new_u8(v)));}
public static byte[] Escape_html_as_bry(Bry_bfr tmp, byte[] bry) {return Escape_html(false, tmp, bry, 0, bry.length, true, true, true, true, true);}
public static byte[] Escape_html_as_bry(byte[] bry) {return Escape_html(false, tmp_bfr, bry, 0, bry.length, true, true, true, true, true);}
public static byte[] Escape_html_as_bry(byte[] bry, boolean lt, boolean gt, boolean amp, boolean quote, boolean apos)
{return Escape_html(false, tmp_bfr, bry, 0, bry.length, lt, gt, amp, quote, apos);}
public static byte[] Escape_html_as_bry(Bry_bfr bfr, byte[] bry, boolean lt, boolean gt, boolean amp, boolean quote, boolean apos)
{return Escape_html(false, bfr, bry, 0, bry.length, lt, gt, amp, quote, apos);}
public static void Escape_html_to_bfr(Bry_bfr bfr, byte[] bry, int bgn, int end, boolean escape_lt, boolean escape_gt, boolean escape_amp, boolean escape_quote, boolean escape_apos) {
Escape_html(true, bfr, bry, bgn, end, escape_lt, escape_gt, escape_amp, escape_quote, escape_apos);
}
private static byte[] Escape_html(boolean write_to_bfr, Bry_bfr bfr, byte[] bry, int bgn, int end, boolean escape_lt, boolean escape_gt, boolean escape_amp, boolean escape_quote, boolean escape_apos) {
if (bry == null) return null;
boolean dirty = write_to_bfr ? true : false; // if write_to_bfr, then mark true, else bfr.Add_mid(bry, 0, i); will write whole bry again
byte[] escaped = null;
for (int i = bgn; i < end; i++) {
byte b = bry[i];
switch (b) {
case Byte_ascii.Lt: if (escape_lt) escaped = Html_entity_.Lt_bry; break;
case Byte_ascii.Gt: if (escape_gt) escaped = Html_entity_.Gt_bry; break;
case Byte_ascii.Amp: if (escape_amp) escaped = Html_entity_.Amp_bry; break;
case Byte_ascii.Quote: if (escape_quote) escaped = Html_entity_.Quote_bry; break;
case Byte_ascii.Apos: if (escape_apos) escaped = Html_entity_.Apos_num_bry; break;
default:
if (dirty || write_to_bfr)
bfr.Add_byte(b);
continue;
}
// handle lt, gt, amp, quote; everything else handled by default: continue above
if (escaped == null) { // handle do-not-escape calls; EX: Escape(y, y, n, y);
if (dirty || write_to_bfr)
bfr.Add_byte(b);
}
else {
if (!dirty) {
bfr.Add_mid(bry, bgn, i);
dirty = true;
}
bfr.Add(escaped);
escaped = null;
}
}
if (write_to_bfr)
return null;
else
return dirty ? bfr.Xto_bry_and_clear() : bry;
}
private static final Btrie_slim_mgr unescape_trie = Btrie_slim_mgr.ci_a7()
.Add_bry_byte(Html_entity_.Lt_bry , Byte_ascii.Lt)
.Add_bry_byte(Html_entity_.Gt_bry , Byte_ascii.Gt)
.Add_bry_byte(Html_entity_.Amp_bry , Byte_ascii.Amp)
.Add_bry_byte(Html_entity_.Quote_bry , Byte_ascii.Quote)
.Add_bry_byte(Html_entity_.Apos_num_bry , Byte_ascii.Apos)
;
public static String Unescape_as_str(String src) {
Bry_bfr bfr = Bry_bfr.reset_(255);
byte[] bry = Bry_.new_u8(src);
Unescape(Bool_.Y, bfr, bry, 0, bry.length, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y);
return bfr.Xto_str_and_clear();
}
public static byte[] Unescape(boolean write_to_bfr, Bry_bfr bfr, byte[] bry, int bgn, int end, boolean escape_lt, boolean escape_gt, boolean escape_amp, boolean escape_quote, boolean escape_apos) {
if (bry == null) return null;
boolean dirty = write_to_bfr ? true : false; // if write_to_bfr, then mark true, else bfr.Add_mid(bry, 0, i); will write whole bry again
int pos = bgn;
while (pos < end) {
byte b = bry[pos];
Object o = unescape_trie.Match_bgn_w_byte(b, bry, pos, end);
if (o == null) {
if (dirty || write_to_bfr)
bfr.Add_byte(b);
++pos;
}
else {
Byte_obj_val unescaped_bval = (Byte_obj_val)o;
byte unescaped_byte = unescaped_bval.Val();
boolean unescape = false;
switch (unescaped_byte) {
case Byte_ascii.Lt: if (escape_lt) unescape = true; break;
case Byte_ascii.Gt: if (escape_gt) unescape = true; break;
case Byte_ascii.Amp: if (escape_amp) unescape = true; break;
case Byte_ascii.Quote: if (escape_quote) unescape = true; break;
case Byte_ascii.Apos: if (escape_apos) unescape = true; break;
}
if (unescape) {
if (!dirty) {
bfr.Add_mid(bry, bgn, pos);
dirty = true;
}
bfr.Add_byte(unescaped_byte);
}
else {
if (dirty || write_to_bfr)
bfr.Add_byte(b);
}
pos = unescape_trie.Match_pos();
}
}
if (write_to_bfr)
return null;
else
return dirty ? bfr.Xto_bry_and_clear() : bry;
}
public static byte[] Del_comments(Bry_bfr bfr, byte[] src) {return Del_comments(bfr, src, 0, src.length);}
public static byte[] Del_comments(Bry_bfr bfr, byte[] src, int pos, int end) {
while (true) {
if (pos >= end) break;
int comm_bgn = Bry_find_.Find_fwd(src, Html_tag_.Comm_bgn, pos); // look for <!--
if (comm_bgn == Bry_find_.Not_found) { // not found; consume rest
bfr.Add_mid(src, pos, end);
break;
}
int comm_end = Bry_find_.Find_fwd(src, Html_tag_.Comm_end, comm_bgn + Html_tag_.Comm_bgn_len); // look for -->
if (comm_end == Bry_find_.Not_found) { // not found; consume rest
bfr.Add_mid(src, pos, end);
break;
}
bfr.Add_mid(src, pos, comm_bgn); // add everything between pos and comm_bgn
pos = comm_end + Html_tag_.Comm_end_len; // reposition pos after comm_end
}
return bfr.Xto_bry_and_clear();
}
}

View File

@@ -0,0 +1,62 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Html_utl_tst {
@Before public void init() {fxt.Clear();} private Html_utl_fxt fxt = new Html_utl_fxt();
@Test public void Basic() {fxt.Test_del_comments("a<!-- b -->c" , "ac");}
@Test public void Bgn_missing() {fxt.Test_del_comments("a b c" , "a b c");}
@Test public void End_missing() {fxt.Test_del_comments("a<!-- b c" , "a<!-- b c");}
@Test public void Multiple() {fxt.Test_del_comments("a<!--b-->c<!--d-->e" , "ace");}
@Test public void Escape() {
fxt.Test_escape_html(Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, "a<b" , "a&lt;b"); // basic
fxt.Test_escape_html(Bool_.Y, Bool_.Y, Bool_.N, Bool_.Y, Bool_.Y, "a<&b" , "a&lt;&b"); // fix: & not escaped when <> present
fxt.Test_escape_html(Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, "a<>'&\"b" , "a&lt;&gt;&#39;&amp;&quot;b");
}
@Test public void Escape_for_atr_val() {
fxt.Test_escape_for_atr("abc" , Bool_.N, "abc"); // basic
fxt.Test_escape_for_atr("a'\"b" , Bool_.Y, "a&#39;\"b"); // quote is '
fxt.Test_escape_for_atr("a'\"b" , Bool_.N, "a'&quot;b"); // quote is "
}
@Test public void Unescape() {
fxt.Test_unescape_html(Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, "a&lt;&gt;&#39;&amp;&quot;b" , "a<>'&\"b"); // basic
}
}
class Html_utl_fxt {
private Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
public void Clear() {
tmp_bfr.Clear();
}
public void Test_del_comments(String src, String expd) {
byte[] actl = Html_utl.Del_comments(tmp_bfr, Bry_.new_u8(src));
Tfds.Eq(expd, String_.new_a7(actl));
}
public void Test_escape_html(boolean lt, boolean gt, boolean amp, boolean quote, boolean apos, String src, String expd) {
byte[] actl = Html_utl.Escape_html_as_bry(Bry_.new_a7(src), lt, gt, amp, quote, apos);
Tfds.Eq(expd, String_.new_a7(actl));
}
public void Test_escape_for_atr(String src, boolean quote_is_apos, String expd) {
byte[] actl = Html_utl.Escape_for_atr_val_as_bry(tmp_bfr, quote_is_apos ? Byte_ascii.Apos : Byte_ascii.Quote, src);
Tfds.Eq(expd, String_.new_u8(actl));
}
public void Test_unescape_html(boolean lt, boolean gt, boolean amp, boolean quote, boolean apos, String src, String expd) {
byte[] bry = Bry_.new_u8(src);
byte[] actl = Html_utl.Unescape(false, tmp_bfr, bry, 0, bry.length, lt, gt, amp, quote, apos);
Tfds.Eq(expd, String_.new_a7(actl));
}
}

View File

@@ -0,0 +1,107 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_wtr {
private Bry_bfr bfr = Bry_bfr.reset_(255);
private List_adp nde_stack = List_adp_.new_();
public byte Atr_quote() {return atr_quote;} public Html_wtr Atr_quote_(byte v) {atr_quote = v; return this;} private byte atr_quote = Byte_ascii.Quote;
public Html_wtr Nde_full_atrs(byte[] tag, byte[] text, boolean text_escape, byte[]... atrs) {
Nde_bgn(tag);
int atrs_len = atrs.length;
for (int i = 0; i < atrs_len; i += 2) {
byte[] key = atrs[i];
byte[] val = atrs[i + 1];
Atr(key, val);
}
Nde_end_hdr();
if (text_escape)
Txt(text);
else
bfr.Add(text);
Nde_end();
return this;
}
public Html_wtr Nde_full(byte[] tag, byte[] text) {
Nde_bgn_hdr(tag);
Txt(text);
Nde_end();
return this;
}
public Html_wtr Txt_mid(byte[] src, int bgn, int end) {bfr.Add_mid(src, bgn, end); return this;}
public Html_wtr Txt_byte(byte v) {bfr.Add_byte(v); return this;}
public Html_wtr Txt_raw(byte[] v) {bfr.Add(v); return this;}
public Html_wtr Txt(byte[] v) {
if (v != null) {
bfr.Add(Html_utl.Escape_html_as_bry(v));
}
return this;
}
public Html_wtr Nde_bgn_hdr(byte[] name) {
this.Nde_bgn(name);
this.Nde_end_hdr();
return this;
}
public Html_wtr Nde_bgn(byte[] name) {
bfr.Add_byte(Byte_ascii.Lt);
bfr.Add(name);
nde_stack.Add(name);
return this;
}
public Html_wtr Atr(byte[] key, byte[] val) {
Write_atr_bry(bfr, Bool_.Y, atr_quote, key, val);
return this;
}
public Html_wtr Nde_end_inline() {
bfr.Add_byte(Byte_ascii.Slash).Add_byte(Byte_ascii.Gt);
List_adp_.Pop_last(nde_stack);
return this;
}
public Html_wtr Nde_end_hdr() {
bfr.Add_byte(Byte_ascii.Gt);
return this;
}
public Html_wtr Nde_end() {
byte[] name = (byte[])List_adp_.Pop_last(nde_stack);
bfr.Add_byte(Byte_ascii.Lt).Add_byte(Byte_ascii.Slash);
bfr.Add(name);
bfr.Add_byte(Byte_ascii.Gt);
return this;
}
public byte[] Xto_bry_and_clear() {return bfr.Xto_bry_and_clear();}
public byte[] Xto_bry() {return bfr.Xto_bry();}
public String Xto_str() {return bfr.Xto_str();}
public static void Write_atr_bry(Bry_bfr bfr, byte[] key, byte[] val) {Write_atr_bry(bfr, Bool_.Y, Byte_ascii.Quote, key, val);}
public static void Write_atr_bry(Bry_bfr bfr, boolean write_space, byte atr_quote, byte[] key, byte[] val) {
if (Bry_.Len_eq_0(val)) return; // don't write empty
if (write_space) bfr.Add_byte_space();
bfr.Add(key);
bfr.Add_byte(Byte_ascii.Eq);
bfr.Add_byte(atr_quote);
Html_utl.Escape_html_to_bfr(bfr, val, 0, val.length, false, false, false, true, true);
bfr.Add_byte(atr_quote);
}
public static void Write_atr_int(Bry_bfr bfr, byte[] key, int val) {Write_atr_int(bfr, Bool_.Y, Byte_ascii.Quote, key, val);}
public static void Write_atr_int(Bry_bfr bfr, boolean write_space, byte atr_quote, byte[] key, int val) {
if (write_space) bfr.Add_byte_space();
bfr.Add(key);
bfr.Add_byte(Byte_ascii.Eq);
bfr.Add_byte(atr_quote);
bfr.Add_int_variable(val);
bfr.Add_byte(atr_quote);
}
}

View File

@@ -0,0 +1,307 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.core.btries.*;
import gplx.xowa.parsers.amps.*;
public class Url_encoder implements Url_encoder_interface {
private Url_encoder_itm[] encode_ary = new Url_encoder_itm[256], decode_ary = new Url_encoder_itm[256];
private Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
private Url_encoder anchor_encoder = null;
private Object thread_lock = new Object();
public void Itms_ini(byte primary_encode_marker) {
Url_encoder_itm_hex hex = new Url_encoder_itm_hex(primary_encode_marker);
for (int i = 0; i < 256; i++) {
encode_ary[i] = hex; // default encode to hex
decode_ary[i] = Url_encoder_itm_same._; // default decode to same; needed for files; EX: A!%21.png -> A!!.png;
}
decode_ary[primary_encode_marker] = hex;
}
public void Itms_raw_diff_many(byte primary_encode_marker, int... ary) {
Url_encoder_itm_hex hex = new Url_encoder_itm_hex(primary_encode_marker);
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
encode_ary[ary[i]] = hex;
decode_ary[ary[i]] = hex;
}
decode_ary[primary_encode_marker] = hex;
}
public void Itms_decode_marker(byte decode_marker) {
Url_encoder_itm_hex hex = new Url_encoder_itm_hex(decode_marker);
decode_ary[decode_marker & 0xff] = hex;// PATCH.JAVA:need to convert to unsigned byte
}
public void Itms_decode_diff(byte orig, byte repl) {
decode_ary[orig & 0xff] = new Url_encoder_itm_diff(orig, repl);// PATCH.JAVA:need to convert to unsigned byte
}
public void Itms_raw_same_rng(int bgn, int end) {
for (int i = bgn; i <= end; i++) {
encode_ary[i] = Url_encoder_itm_same._;
decode_ary[i] = Url_encoder_itm_same._;
}
}
public Url_encoder Itms_raw_same_many(int... ary) {
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
encode_ary[ary[i]] = Url_encoder_itm_same._;
decode_ary[ary[i]] = Url_encoder_itm_same._;
}
return this;
}
public void Itms_raw_html_ent(byte src, Btrie_slim_mgr trie) {
Url_encoder_itm_html_ent itm = new Url_encoder_itm_html_ent(trie);
encode_ary[src] = itm;
}
public Url_encoder Itms_raw_diff(byte src, byte trg) {
Url_encoder_itm_diff itm = new Url_encoder_itm_diff(src, trg);
encode_ary[src] = itm;
decode_ary[trg] = itm;
return this;
}
public byte[] Encode_http(Io_url url) {
synchronized (thread_lock) {
tmp_bfr.Add(Io_url.Http_file_bry);
Encode(tmp_bfr, url.RawBry());
return tmp_bfr.Xto_bry_and_clear();
}
}
public String Encode_str(String str) {
synchronized (thread_lock) {
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.Xto_str_and_clear();
}
}
public byte[] Encode_bry(String str) {
synchronized (thread_lock) {
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.Xto_bry_and_clear();
}
}
public byte[] Encode(byte[] bry) {Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.Xto_bry_and_clear();}
public Bry_bfr Encode(Bry_bfr bfr, byte[] bry) {Encode(bfr, bry, 0, bry.length); return bfr;}
public void Encode(Bry_bfr bfr, byte[] bry, int bgn, int end) {
synchronized (thread_lock) {
for (int i = bgn; i < end; i++) {
byte b = bry[i];
if (anchor_encoder != null && b == Byte_ascii.Hash) {
bfr.Add_byte(Byte_ascii.Hash);
anchor_encoder.Encode(bfr, bry, i + 1, end);
break;
}
Url_encoder_itm itm = encode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
i += itm.Encode(bfr, bry, end, i, b);
}
}
}
public String Decode_str(String str) {
synchronized (thread_lock) {
byte[] bry = Bry_.new_u8(str); Decode(bry, 0, bry.length, tmp_bfr, true); return tmp_bfr.Xto_str_and_clear();
}
}
public byte[] Decode(byte[] bry) {return Decode(tmp_bfr, bry, 0, bry.length);}
public byte[] Decode(byte[] bry, int bgn, int end) {return Decode(tmp_bfr, bry, bgn, end);}
public byte[] Decode(Bry_bfr bfr, byte[] bry, int bgn, int end) {Decode(bry, bgn, end, bfr , false); return bfr.Xto_bry_and_clear();}
public byte[] Decode_lax(byte[] bry) {
synchronized (thread_lock) {
Decode(bry, 0, bry.length, tmp_bfr, false); return tmp_bfr.Xto_bry_and_clear();
}
}
public void Decode(byte[] bry, int bgn, int end, Bry_bfr bfr, boolean fail_when_invalid) {
synchronized (thread_lock) {
for (int i = bgn; i < end; i++) {
byte b = bry[i];
if (anchor_encoder != null && b == Byte_ascii.Hash) {
bfr.Add_byte(Byte_ascii.Hash);
anchor_encoder.Decode(bry, i + 1, end, bfr, false);
break;
}
Url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
i += itm.Decode(bfr, bry, end, i, b, fail_when_invalid);
}
}
}
private static void mediawiki_base(Url_encoder rv, boolean encode_colon) {
rv.Itms_raw_same_rng(Byte_ascii.Num_0, Byte_ascii.Num_9);
rv.Itms_raw_same_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z);
rv.Itms_raw_same_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z);
rv.Itms_raw_same_many(Byte_ascii.Dash, Byte_ascii.Dot, Byte_ascii.Underline);
if (encode_colon)
rv.Itms_raw_same_many(Byte_ascii.Colon);
}
public static Url_encoder new_html_id_() {
Url_encoder rv = new Url_encoder();
rv.Itms_ini(Byte_ascii.Dot);
mediawiki_base(rv, true);
rv.Itms_decode_marker(Byte_ascii.Dot);
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Underline);
rv.Itms_raw_html_ent(Byte_ascii.Amp, Xop_amp_trie._);
return rv;
}
public static Url_encoder new_http_url_() {
Url_encoder rv = new Url_encoder();
rv.Itms_ini(Byte_ascii.Percent);
mediawiki_base(rv, false);
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Plus);
return rv;
}
public static Url_encoder new_http_url_ttl_() {
Url_encoder rv = new Url_encoder();
rv.Itms_ini(Byte_ascii.Percent);
mediawiki_base(rv, true);
return rv;
}
public static Url_encoder new_http_url_space_is_space() {
Url_encoder rv = new Url_encoder();
rv.Itms_ini(Byte_ascii.Percent);
mediawiki_base(rv, true);
return rv;
}
public static Url_encoder new_fsys_lnx_() {
Url_encoder rv = new Url_encoder();
rv.Itms_ini(Byte_ascii.Percent);
mediawiki_base(rv, true);
rv.Itms_raw_same_many(Byte_ascii.Slash);
rv.Itms_raw_diff(Byte_ascii.Backslash, Byte_ascii.Slash);
return rv;
}
public static Url_encoder new_fsys_wnt_() {
Url_encoder rv = new Url_encoder();
rv.Itms_ini(Byte_ascii.Percent);
rv.Itms_raw_same_rng(Byte_ascii.Num_0, Byte_ascii.Num_9);
rv.Itms_raw_same_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z);
rv.Itms_raw_same_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z);
rv.Itms_raw_same_many
( Byte_ascii.Bang, Byte_ascii.At, Byte_ascii.Hash, Byte_ascii.Dollar, Byte_ascii.Percent, Byte_ascii.Pow, Byte_ascii.Amp
, Byte_ascii.Plus, Byte_ascii.Eq, Byte_ascii.Underline, Byte_ascii.Dash
, Byte_ascii.Dot, Byte_ascii.Comma
, Byte_ascii.Tick, Byte_ascii.Tilde, Byte_ascii.Brack_bgn, Byte_ascii.Brack_end, Byte_ascii.Curly_bgn, Byte_ascii.Curly_end);
return rv;
}
public static Url_encoder new_file_() {
Url_encoder rv = new Url_encoder();
rv.Itms_ini(Byte_ascii.Percent);
mediawiki_base(rv, true);
return rv;
}
public static Url_encoder new_gfs_() {
Url_encoder rv = new Url_encoder();
rv.Itms_ini(Byte_ascii.Percent);
rv.Itms_raw_same_many(Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Apos, Byte_ascii.Semic);
mediawiki_base(rv, true);
return rv;
}
public static Url_encoder new_html_href_mw_() {
Url_encoder rv = new Url_encoder();
rv.Itms_ini(Byte_ascii.Percent);
mediawiki_base(rv, true);
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Underline);
rv.Itms_raw_same_many(Byte_ascii.Semic, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash, Byte_ascii.Colon
, Byte_ascii.Hash// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
);
rv.anchor_encoder = new_html_id_();
return rv;
}
public static Url_encoder new_html_href_quotes_() {
Url_encoder rv = new Url_encoder();
rv.Itms_ini(Byte_ascii.Percent);
rv.Itms_raw_same_rng(0, 255); // default everything to same;
rv.Itms_raw_diff_many(Byte_ascii.Percent
, Byte_ascii.Apos, Byte_ascii.Quote, Byte_ascii.Lt, Byte_ascii.Gt); // encode ', ", <, >
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Underline); // convert " " to "_"
return rv;
}
}
interface Url_encoder_itm {
int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b);
int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid);
}
class Url_encoder_itm_same implements Url_encoder_itm {
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(b); return 0;}
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(b); return 0;}
public static final Url_encoder_itm _ = new Url_encoder_itm_same();
}
class Url_encoder_itm_diff implements Url_encoder_itm {
public Url_encoder_itm_diff(byte orig, byte repl) {this.orig = orig; this.repl = repl;} private byte orig, repl;
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(repl); return 0;}
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(orig); return 0;}
}
class Url_encoder_itm_hex implements Url_encoder_itm {
public Url_encoder_itm_hex(byte encode_marker) {this.encode_marker = encode_marker;} private byte encode_marker;
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {Encode_byte(b, bfr, encode_marker); return 0;}
public static void Encode_byte(byte b, Bry_bfr bfr, byte encode_marker) {
int b_int = b & 0xFF;// PATCH.JAVA:need to convert to unsigned byte
bfr.Add_byte(encode_marker);
bfr.Add_byte(HexBytes[b_int >> 4]);
bfr.Add_byte(HexBytes[b_int & 15]);
}
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {
if (idx + 2 >= end) {
if (fail_when_invalid) throw Err_.new_wo_type("decode needs 3 bytes", "idx", idx, "len", end, "snip", String_.new_u8(Bry_.Mid_by_len_safe(src, idx, 3)));
else {
bfr.Add_byte(b);
return 0;
}
}
int hex_val = Int_.Xto_int_hex(src[idx + 1]);
if (hex_val == -1) { // invalid hex byte; EX: %GC; DATE:2014-04-10
bfr.Add_byte(b);
return 0;
}
int v_0 = hex_val * 16;
if (v_0 != -1) {
int v_1 = Int_.Xto_int_hex(src[idx + 2]);
if (v_1 != -1) {
bfr.Add_byte((byte)(v_0 + v_1));
return 2;
}
}
if (fail_when_invalid)
throw Err_.new_wo_type("decode is invalid", "idx", idx, "snip", String_.new_u8(Bry_.Mid_by_len_safe(src, idx, 3)));
else {
bfr.Add_byte(b);
return 0;
}
}
public static final byte[] HexBytes = new byte[]
{ Byte_ascii.Num_0, Byte_ascii.Num_1, Byte_ascii.Num_2, Byte_ascii.Num_3, Byte_ascii.Num_4, Byte_ascii.Num_5, Byte_ascii.Num_6, Byte_ascii.Num_7
, Byte_ascii.Num_8, Byte_ascii.Num_9, Byte_ascii.Ltr_A, Byte_ascii.Ltr_B, Byte_ascii.Ltr_C, Byte_ascii.Ltr_D, Byte_ascii.Ltr_E, Byte_ascii.Ltr_F
};
}
class Url_encoder_itm_html_ent implements Url_encoder_itm {
public Url_encoder_itm_html_ent(Btrie_slim_mgr amp_trie) {this.amp_trie = amp_trie;} Btrie_slim_mgr amp_trie;
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {
++idx; // b is &; get next character afterwards
if (idx == end) { // & is last char; return
Url_encoder_itm_hex.Encode_byte(Byte_ascii.Amp, bfr, Byte_ascii.Dot);
return 0;
}
b = src[idx];
Object o = amp_trie.Match_bgn_w_byte(b, src, idx, end);
if (o == null) { // unknown entity (EX:&unknown;); return &;
Url_encoder_itm_hex.Encode_byte(Byte_ascii.Amp, bfr, Byte_ascii.Dot);
return 0;
}
else {
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
byte[] bry_u8 = itm.U8_bry(); // NOTE: must utf8 encode val; EX: &nbsp; is 160 but must become 192,160
for (int i = 0; i < bry_u8.length; i++)
Url_encoder_itm_hex.Encode_byte(bry_u8[i], bfr, Byte_ascii.Dot);
return itm.Xml_name_bry().length - 1; // -1 to ignore & in XmlEntityName
}
}
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {
bfr.Add_byte(b); return 0;
}
}

View File

@@ -0,0 +1,30 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public class Url_encoder_mgr {
public Url_encoder File() {return file;} private final Url_encoder file = Url_encoder.new_file_();
public Url_encoder Http_url() {return http_url;} private final Url_encoder http_url = Url_encoder.new_http_url_();
public Url_encoder Http_url_ttl() {return http_url_ttl;} private final Url_encoder http_url_ttl = Url_encoder.new_http_url_ttl_();
public Url_encoder Id() {return html_id;} private final Url_encoder html_id = Url_encoder.new_html_id_();
public Url_encoder Href() {return href;} private final Url_encoder href = Url_encoder.new_html_href_mw_();
public Url_encoder Href_quotes() {return href_quotes;} private final Url_encoder href_quotes = Url_encoder.new_html_href_quotes_();
public Url_encoder Gfs() {return gfs;} private final Url_encoder gfs = Url_encoder.new_gfs_();
public Url_encoder Fsys() {return fsys;} private final Url_encoder fsys = Url_encoder.new_fsys_lnx_();
public Url_encoder Fsys_safe() {return fsys_safe;} private final Url_encoder fsys_safe = Url_encoder.new_fsys_wnt_();
public Url_encoder Xourl() {return xourl;} private final Url_encoder xourl = Url_encoder.new_html_href_mw_().Itms_raw_same_many(Byte_ascii.Underline);
}

View File

@@ -0,0 +1,72 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import org.junit.*;
public class Url_encoder_tst {
@Before public void init() {fxt = new Url_encoder_fxt();} Url_encoder_fxt fxt;
@Test public void Id_nums() {fxt.Encoder_id().Test_encode_decode("0123456789", "0123456789");}
@Test public void Id_ltrs_lower() {fxt.Encoder_id().Test_encode_decode("abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz");}
@Test public void Id_ltrs_upper() {fxt.Encoder_id().Test_encode_decode("ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");}
@Test public void Id_syms() {fxt.Encoder_id().Test_encode("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ".21.22.23.24.25.26.27.28.29.2A.2B.2C-..2F:.3B.3C.3D.3E.3F.40.5B.5C.5D.5E_.60.7B.7C.7D.7E");} // NOTE: not reversible since "." is encode_marker but not encoded
@Test public void Id_foreign() {fxt.Encoder_id().Test_encode_decode("aéb", "a.C3.A9b");}
@Test public void Id_space() {fxt.Encoder_id().Test_encode_decode("a b", "a_b");}
@Test public void Id_err() {
byte[] raw = Bry_.new_a7("0%.jpg");
Bry_bfr tmp_bfr = Bry_bfr.new_();
fxt.Encoder_id().Encoder().Decode(raw, 0, raw.length, tmp_bfr, false);
Tfds.Eq("0%.jpg", tmp_bfr.Xto_str_and_clear());
}
@Test public void Id_nbsp() {fxt.Encoder_id().Test_encode("a&nbsp;b", "a.C2.A0b");} // NOTE: not just .A0 (160) but utf8-encoded .C2.A0
@Test public void Url_syms() {fxt.Encoder_url().Test_encode_decode("!?^~", "%21%3F%5E%7E");}
@Test public void Url_foreign() {fxt.Encoder_url().Test_encode_decode("aéb", "a%C3%A9b");}
@Test public void Url_space() {fxt.Encoder_url().Test_encode_decode("a b", "a+b");}
@Test public void File_space() {
fxt.Encoder_href().Test_encode("a b", "a_b");
// fxt.Encoder_url().tst_decode("a_b", "a_b");
}
@Test public void Href_special_and_anchor() { // PURPOSE: MediaWiki encodes with % for ttls, but . for anchors; REF:Title.php!(before-anchor)getLocalUrl;wfUrlencode (after-anchor)escapeFragmentForURL
fxt.Encoder_href().Test_encode("^#^", "%5E#.5E");
fxt.Encoder_href().Test_encode("A#", "A#");
fxt.Encoder_href().tst_decode("%5E#.5E", "^#^");
}
@Test public void Fsys_wnt() {
fxt.Encoder_fsys_safe().Test_encode("Help:Options/HTML", "Help%3AOptions%2FHTML");
}
@Test public void Invalid_url_decode() { // PURPOSE: check that invalid url decodings are rendered literally; DATE:2014-04-10
fxt.Encoder_href().Test_encode("%GC", "%25GC");
}
}
class Url_encoder_fxt {
public Url_encoder Encoder() {return encoder;} Url_encoder encoder;
public Url_encoder_fxt Encoder_id() {encoder = Url_encoder.new_html_id_(); return this;}
public Url_encoder_fxt Encoder_href() {encoder = Url_encoder.new_html_href_mw_(); return this;}
public Url_encoder_fxt Encoder_url() {encoder = Url_encoder.new_http_url_(); return this;}
public Url_encoder_fxt Encoder_fsys_safe() {encoder = Url_encoder.new_fsys_wnt_(); return this;}
public void Test_encode_decode(String raw, String encoded) {
Test_encode(raw, encoded);
tst_decode(encoded, raw);
}
public void Test_encode(String raw, String expd) {
byte[] bry = encoder.Encode(Bry_.new_u8(raw));
Tfds.Eq(expd, String_.new_u8(bry));
}
public void tst_decode(String raw, String expd) {
byte[] bry = encoder.Decode(Bry_.new_u8(raw));
Tfds.Eq(expd, String_.new_u8(bry));
}
}

View File

@@ -0,0 +1,24 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
class Gfo_html_node {
public Gfo_html_node(byte[] src, int bgn, int end) {this.src = src; this.bgn = bgn; this.end = end;}
public byte[] Src() {return src;} private final byte[] src;
public int Bgn() {return bgn;} private final int bgn;
public int End() {return end;} private final int end;
}

View File

@@ -0,0 +1,69 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.core.btries.*; import gplx.core.primitives.*;
import gplx.xowa.*;
import gplx.xowa.parsers.xndes.*;
class Gfo_html_parser {
private final Gfo_msg_log msg_log = Gfo_msg_log.Test();
private final Xop_xatr_parser xatr_parser = new Xop_xatr_parser();
public void Parse(Gfo_html_wkr handler, byte[] src, int bgn, int end) {
// int src_len = src.length;
// int prv_pos = 0;
// int css_find_bgn_len = Css_find_bgn.length;
// byte[] protocol_prefix_bry = Bry_.new_u8(protocol_prefix);
// while (true) {
// int url_bgn = Bry_find_.Find_fwd(src, Css_find_bgn, prv_pos); if (url_bgn == Bry_.NotFound) break; // nothing left; stop
// url_bgn += css_find_bgn_len;
// int url_end = Bry_find_.Find_fwd(src, Byte_ascii.Quote, url_bgn, src_len); if (url_end == Bry_.NotFound) {usr_dlg.Warn_many("", "main_page.css_parse", "could not find css; pos='~{0}' text='~{1}'", url_bgn, String_.new_u8__by_len(src, url_bgn, url_bgn + 32)); break;}
// byte[] css_url_bry = Bry_.Mid(src, url_bgn, url_end);
// css_url_bry = Bry_.Replace(css_url_bry, Css_amp_find, Css_amp_repl); // &amp; -> &
// css_url_bry = url_encoder.Decode(css_url_bry); // %2C -> %7C -> |
// css_url_bry = Bry_.Add(protocol_prefix_bry, css_url_bry);
// rv.Add(String_.new_u8(css_url_bry));
// prv_pos = url_end;
// }
// return rv.XtoStrAry();
int src_len = src.length; int pos = 0;
while (pos < src_len) {
byte b = src[pos];
switch (b) {
case Byte_ascii.Angle_bgn:
pos = Parse_node(handler, src, end, pos, pos + 1);
break;
default:
++pos;
break;
}
}
}
private int Parse_node(Gfo_html_wkr handler, byte[] src, int end, int tkn_bgn, int tkn_end) {
int name_bgn = tkn_end;
int name_end = Bry_find_.Find_fwd_until_ws(src, name_bgn, end);
if (name_end == Bry_find_.Not_found) return end; // EOS; EX: "<abcEOS"
if (name_bgn == name_end) return tkn_end; // ws; EX: "< "
Object o = handler.Get_or_null(src, name_bgn, name_end);
if (o == null) return name_end; // unknown name: EX: "<unknown >"
int node_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, end);
if (node_end == Bry_find_.Not_found) return end; // EOS; EX: "<name lots_of_text_but_no_gt EOS"
Xop_xatr_itm[] xatr_ary = xatr_parser.Parse(msg_log, src, name_end, node_end);
Gfo_html_tkn tkn = (Gfo_html_tkn)o;
tkn.Process(src, Xop_xatr_hash.new_ary(src, xatr_ary));
return node_end;
}
}

View File

@@ -0,0 +1,22 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
interface Gfo_html_wkr {
Gfo_html_tkn Get_or_null(byte[] src, int bgn, int end);
void Process(Gfo_html_node node);
}

View File

@@ -0,0 +1,34 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.xowa.*;
import gplx.xowa.parsers.xndes.*;
interface Gfo_html_tkn {
int Tid();
byte[] Key();
void Process(byte[] src, Xop_xatr_hash hash);
}
class Gfo_html_tkn_ {
public static final int Tid_link = 1;
public static final byte[] Key_link = Bry_.new_a7("link");
}
class Gfo_html_tkn__link implements Gfo_html_tkn {
public int Tid() {return Gfo_html_tkn_.Tid_link;}
public byte[] Key() {return Gfo_html_tkn_.Key_link;}
@gplx.Virtual public void Process(byte[] src, Xop_xatr_hash hash) {}
}

View File

@@ -0,0 +1,79 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public class Json_ary extends Json_itm_base implements Json_grp {
public Json_ary(int src_bgn, int src_end) {this.Ctor(src_bgn, src_end);}
@Override public byte Tid() {return Json_itm_.Tid__ary;}
public void Src_end_(int v) {this.src_end = v;}
@Override public Object Data() {return null;}
@Override public byte[] Data_bry() {return null;}
public int Len() {return subs_len;} private int subs_len = 0, subs_max = 0;
public Json_nde Get_at_as_nde(int i) {
Json_itm rv = subs[i]; if (rv.Tid() != Json_itm_.Tid__nde) throw Err_.new_("json", "itm is not nde", "type", rv.Tid(), "i", i);
return (Json_nde)rv;
}
public Json_itm Get_at(int i) {return subs[i];}
public Json_ary Add_many(Json_itm... ary) {
int len = ary.length;
for (int i = 0; i < len; i++)
Add(ary[i]);
return this;
}
public void Add(Json_itm itm) {
int new_len = subs_len + 1;
if (new_len > subs_max) { // ary too small >>> expand
subs_max = new_len * 2;
Json_itm[] new_subs = new Json_itm[subs_max];
Array_.Copy_to(subs, 0, new_subs, 0, subs_len);
subs = new_subs;
}
subs[subs_len] = itm;
subs_len = new_len;
}
@Override public void Print_as_json(Bry_bfr bfr, int depth) {
if (subs_len == 0) { // empty grp; print on one line (rather than printing across 3)
bfr.Add_byte(Byte_ascii.Brack_bgn).Add_byte(Byte_ascii.Brack_end);
return;
}
bfr.Add_byte_nl();
Json_grp_.Print_indent(bfr, depth);
bfr.Add_byte(Byte_ascii.Brack_bgn).Add_byte(Byte_ascii.Space);
for (int i = 0; i < subs_len; i++) {
if (i != 0) {
Json_grp_.Print_nl(bfr); Json_grp_.Print_indent(bfr, depth);
bfr.Add_byte(Byte_ascii.Comma).Add_byte(Byte_ascii.Space);
}
subs[i].Print_as_json(bfr, depth + 1);
}
Json_grp_.Print_nl(bfr); Json_grp_.Print_indent(bfr, depth);
bfr.Add_byte(Byte_ascii.Brack_end).Add_byte_nl();
}
public byte[][] Xto_bry_ary() {
if (subs_len == 0) return Bry_.Ary_empty;
byte[][] rv = new byte[subs_len][];
for (int i = 0; i < subs_len; ++i)
rv[i] = subs[i].Data_bry();
return rv;
}
private Json_itm[] subs = Json_itm_.Ary_empty;
public static Json_ary cast_or_null(Json_itm v) {return v == null || v.Tid() != Json_itm_.Tid__ary ? null : (Json_ary)v;}
public static Json_ary cast(Json_itm v) {
if (v == null || v.Tid() != Json_itm_.Tid__ary) throw Err_.new_("json", "itm is not array");
return (Json_ary)v;
}
}

View File

@@ -0,0 +1,74 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
import gplx.core.primitives.*;
public class Json_doc {
private final byte[][] tmp_qry_bry = new byte[1][];
public void Ctor(byte[] src, Json_grp new_root) {
this.src = src;
this.root_grp = new_root;
switch (root_grp.Tid()) {
case Json_itm_.Tid__nde: this.root_ary = null; this.root_nde = (Json_nde)root_grp; break;
case Json_itm_.Tid__ary: this.root_nde = null; this.root_ary = (Json_ary)root_grp; break;
default: throw Err_.new_unhandled(root_grp.Tid());
}
}
public byte[] Src() {return src;} private byte[] src;
public Json_grp Root_grp() {return root_grp;} private Json_grp root_grp;
public Json_nde Root_nde() {return root_nde;} private Json_nde root_nde;
public Json_ary Root_ary() {return root_ary;} private Json_ary root_ary;
public Bry_bfr Bfr() {return bfr;} private final Bry_bfr bfr = Bry_bfr.new_();
public Number_parser Utl_num_parser() {return utl_num_parser;} private final Number_parser utl_num_parser = new Number_parser();
public byte[] Tmp_u8_bry() {return tmp_u8_bry;} private final byte[] tmp_u8_bry = new byte[6]; // tmp bry[] for decoding sequences like \u0008
public byte[] Get_val_as_bry_or(byte[] qry_bry, byte[] or) {tmp_qry_bry[0] = qry_bry; return Get_val_as_bry_or(tmp_qry_bry, or);}
public byte[] Get_val_as_bry_or(byte[][] qry_bry, byte[] or) {
Json_itm nde = Find_nde(root_nde, qry_bry, qry_bry.length - 1, 0);
return nde == null || nde.Tid() != Json_itm_.Tid__str ? or : nde.Data_bry();
}
public String Get_val_as_str_or(byte[] qry_bry, String or) {tmp_qry_bry[0] = qry_bry; return Get_val_as_str_or(tmp_qry_bry, or);}
public String Get_val_as_str_or(byte[][] qry_bry, String or) {
Json_itm nde = Find_nde(root_nde, qry_bry, qry_bry.length - 1, 0);
return nde == null || nde.Tid() != Json_itm_.Tid__str ? or : (String)nde.Data();
}
public Json_grp Get_grp(byte[] qry_bry) {
tmp_qry_bry[0] = qry_bry;
Json_itm rv = Find_nde(root_nde, tmp_qry_bry, 0, 0); if (rv == null) return null;
return (Json_grp)rv;
}
public Json_grp Get_grp(byte[][] qry_bry) {
Json_itm rv = Find_nde(root_nde, qry_bry, qry_bry.length - 1, 0); if (rv == null) return null;
return (Json_grp)rv;
}
public Json_itm Find_nde(byte[] key) {
tmp_qry_bry[0] = key;
return Find_nde(root_nde, tmp_qry_bry, 0, 0);
}
private Json_itm Find_nde(Json_nde owner, byte[][] paths, int paths_last, int paths_idx) {
byte[] path = paths[paths_idx];
int subs_len = owner.Len();
for (int i = 0; i < subs_len; i++) {
Json_kv itm = Json_kv.cast(owner.Get_at(i)); if (itm == null) continue; // ignore simple props, arrays, ndes
if (!itm.Key_eq(path)) continue;
if (paths_idx == paths_last) return itm.Val();
Json_nde sub_nde = Json_nde.cast(itm.Val()); if (sub_nde == null) return null; // match, but has not a nde; exit
return Find_nde(sub_nde, paths, paths_last, paths_idx + 1);
}
return null;
}
public static String Make_str_by_apos(String... ary) {return String_.Replace(String_.Concat_lines_nl_skip_last(ary), "'", "\"");}
}

View File

@@ -0,0 +1,42 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public class Json_doc_bldr {
public Json_nde Nde(Json_doc jdoc) {return factory.Nde(jdoc, -1);}
public Json_nde Nde(Json_doc jdoc, Json_grp owner) {
Json_nde rv = factory.Nde(jdoc, -1);
owner.Add(rv);
return rv;
}
public Json_itm Str(byte[] v) {return Str(String_.new_u8(v));}
public Json_itm Str(String v) {return Json_itm_tmp.new_str_(v);}
public Json_itm Int(int v) {return Json_itm_tmp.new_int_(v);}
public Json_kv Kv_int(Json_grp owner, String key, int val) {Json_kv rv = factory.Kv(Json_itm_tmp.new_str_(key), Json_itm_tmp.new_int_(val)); owner.Add(rv); return rv;}
public Json_kv Kv_str(Json_grp owner, String key, String val) {Json_kv rv = factory.Kv(Json_itm_tmp.new_str_(key), Json_itm_tmp.new_str_(val)); owner.Add(rv); return rv;}
public Json_ary Kv_ary(Json_grp owner, String key, Json_itm... subs) {
Json_itm key_itm = Json_itm_tmp.new_str_(key);
Json_ary val_ary = factory.Ary(-1, -1);
Json_kv kv = factory.Kv(key_itm, val_ary);
owner.Add(kv);
int len = subs.length;
for (int i = 0; i < len; i++)
val_ary.Add(subs[i]);
return val_ary;
}
Json_doc doc = new Json_doc(); Json_factory factory = new Json_factory();
}

View File

@@ -0,0 +1,89 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public class Json_doc_srl {
private int indent = -1;
private Bry_bfr bfr = Bry_bfr.reset_(255);
public boolean Ws_enabled() {return ws_enabled;} public void Ws_enabled_(boolean v) {ws_enabled = v;} private boolean ws_enabled = false;
public byte[] Bld() {return bfr.Xto_bry_and_clear();}
public String Bld_as_str() {return bfr.Xto_str_and_clear();}
public Json_doc_srl Write_root(byte[] key, Object val) {
Write_nde_bgn();
Write_obj(false, key, val);
Write_nde_end();
return this;
}
public void Write_obj(boolean comma, byte[] key, Object val) {
Class<?> t = Type_adp_.ClassOf_obj(val);
if (Type_adp_.Is_array(t))
Write_kv_ary(comma, key, (Object[])val);
else
Write_kv_str(comma, key, Object_.Xto_str_strict_or_empty(val));
}
private void Write_kv_ary(boolean comma, byte[] key, Object[] val) {
Write_key(comma, key); Write_new_line(); // '"key":\n'
Write_ary_bgn(); // '[\n'
Indent_add(); // -->
int len = val.length;
for (int i = 0; i < len; i++) {
Write_itm_hdr(i != 0); // ', '
Write_str(Bry_.new_u8(Object_.Xto_str_strict_or_null(val[i])));
Write_new_line();
}
Indent_del();
Write_ary_end();
}
private void Write_kv_str(boolean comma, byte[] key, String val) {
Write_key(comma, key); // "key":
Write_str(Bry_.new_u8(val)); // "val"
Write_new_line(); // \n
}
private void Write_key(boolean comma, byte[] key) { // "key":
Write_indent();
Write_str(key);
bfr.Add_byte(Byte_ascii.Colon);
}
private void Write_indent() {if (ws_enabled && indent > 0) bfr.Add_byte_repeat(Byte_ascii.Space, indent);}
private void Write_str(byte[] v) {
if (v == null)
bfr.Add(Object_.Bry__null);
else
bfr.Add_byte(Byte_ascii.Quote).Add(v).Add_byte(Byte_ascii.Quote);
}
private void Write_comma(boolean comma) {
if (comma)
bfr.Add_byte(Byte_ascii.Comma);
else {
if (ws_enabled)
bfr.Add_byte(Byte_ascii.Space);
}
if (ws_enabled)
bfr.Add_byte(Byte_ascii.Space);
}
private void Write_ary_bgn() {Indent_add(); Write_indent(); bfr.Add_byte(Byte_ascii.Brack_bgn); Write_new_line();}
private void Write_ary_end() { Write_indent(); bfr.Add_byte(Byte_ascii.Brack_end); Write_new_line(); Indent_del();}
private void Write_nde_bgn() {Indent_add(); Write_indent(); bfr.Add_byte(Byte_ascii.Curly_bgn); Write_new_line();}
private void Write_nde_end() { Write_indent(); bfr.Add_byte(Byte_ascii.Curly_end); Write_new_line(); Indent_del();}
private void Write_itm_hdr(boolean comma) {
Write_indent();
Write_comma(comma);
}
private void Indent_add() {indent += 2;}
private void Indent_del() {indent -= 2;}
private void Write_new_line() {if (ws_enabled) bfr.Add_byte_nl();}
}

View File

@@ -0,0 +1,46 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Json_doc_tst {
private final Json_qry_mgr_fxt fxt = new Json_qry_mgr_fxt();
@Test public void Select() {
Json_doc doc = fxt.Make_json
( "{'0':"
, " {'0_0':"
, " {'0_0_0':'000'"
, " },"
, " '0_1':"
, " {'0_1_0':'010'"
, " }"
, " }"
, "}"
);
fxt.Test_get_val_as_str(doc, "0/0_0/0_0_0", "000");
fxt.Test_get_val_as_str(doc, "0/0_1/0_1_0", "010");
fxt.Test_get_val_as_str(doc, "x", null);
}
}
class Json_qry_mgr_fxt {
private final Json_parser json_parser = new Json_parser();
public Json_doc Make_json(String... ary) {return json_parser.Parse_by_apos_ary(ary);}
public void Test_get_val_as_str(Json_doc doc, String qry, String expd){
byte[][] qry_bry = Bry_split_.Split(Bry_.new_u8(qry), Byte_ascii.Slash);
Tfds.Eq(expd, doc.Get_val_as_str_or(qry_bry, null));
}
}

View File

@@ -0,0 +1,98 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public class Json_doc_wtr {
private int indent = -2;
private Bry_bfr bfr = Bry_bfr.reset_(255);
public Json_doc_wtr Indent() {return Indent(indent);}
private Json_doc_wtr Indent(int v) {if (v > 0) bfr.Add_byte_repeat(Byte_ascii.Space, v); return this;}
public Json_doc_wtr Indent_add() {indent += 2; return this;}
public Json_doc_wtr Indent_del() {indent -= 2; return this;}
public Json_doc_wtr Nde_bgn() {Indent_add(); Indent(); bfr.Add_byte(Byte_ascii.Curly_bgn).Add_byte_nl(); return this;}
public Json_doc_wtr Nde_end() { Indent(); bfr.Add_byte(Byte_ascii.Curly_end).Add_byte_nl(); Indent_del(); return this;}
public Json_doc_wtr Ary_bgn() {Indent_add(); Indent(); bfr.Add_byte(Byte_ascii.Brack_bgn).Add_byte_nl(); return this;}
public Json_doc_wtr Ary_end() { Indent(); bfr.Add_byte(Byte_ascii.Brack_end).Add_byte_nl(); Indent_del(); return this;}
public Json_doc_wtr New_line() {bfr.Add_byte_nl(); return this;}
public Json_doc_wtr Str(byte[] v) {
if (v == null)
bfr.Add(Object_.Bry__null);
else
bfr.Add_byte(Byte_ascii.Quote).Add(v).Add_byte(Byte_ascii.Quote);
return this;
}
public Json_doc_wtr Int(int v) {bfr.Add_int_variable(v); return this;}
public Json_doc_wtr Double(double v) {bfr.Add_double(v); return this;}
public Json_doc_wtr Comma() {Indent(); bfr.Add_byte(Byte_ascii.Comma).Add_byte_nl(); return this;}
public Json_doc_wtr Kv_ary_empty(boolean comma, byte[] key) {
Key_internal(comma, key);
bfr.Add_byte(Byte_ascii.Brack_bgn).Add_byte(Byte_ascii.Brack_end);
bfr.Add_byte_nl();
return this;
}
public Json_doc_wtr Kv(boolean comma, byte[] key, byte[] val) {
Key_internal(comma, key);
Str(val);
bfr.Add_byte_nl();
return this;
}
public Json_doc_wtr Kv_double(boolean comma, byte[] key, double v) {
Key_internal(comma, key);
Double(v);
bfr.Add_byte_nl();
return this;
}
public Json_doc_wtr Kv(boolean comma, byte[] key, int v) {
Key_internal(comma, key);
Int(v);
bfr.Add_byte_nl();
return this;
}
public Json_doc_wtr Key(boolean comma, byte[] key) {
Key_internal(comma, key);
bfr.Add_byte_nl();
return this;
}
public Json_doc_wtr Val(boolean comma, int v) {
Val_internal(comma);
Int(v);
New_line();
return this;
}
public Json_doc_wtr Val(boolean comma, byte[] v) {
Val_internal(comma);
Str(v);
New_line();
return this;
}
Json_doc_wtr Val_internal(boolean comma) {
Indent();
bfr.Add_byte(comma ? Byte_ascii.Comma : Byte_ascii.Space);
bfr.Add_byte(Byte_ascii.Space);
return this;
}
Json_doc_wtr Key_internal(boolean comma, byte[] key) {
Indent();
bfr.Add_byte(comma ? Byte_ascii.Comma : Byte_ascii.Space);
bfr.Add_byte(Byte_ascii.Space);
Str(key);
bfr.Add_byte(Byte_ascii.Colon);
return this;
}
public byte[] Bld() {return bfr.Xto_bry_and_clear();}
public String Bld_as_str() {return bfr.Xto_str_and_clear();}
}

View File

@@ -0,0 +1,29 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public class Json_factory {
public Json_itm Null() {return Json_itm_null.Null;}
public Json_itm Bool_n() {return Json_itm_bool.Bool_n;}
public Json_itm Bool_y() {return Json_itm_bool.Bool_y;}
public Json_itm_int Int(Json_doc doc, int bgn, int end) {return new Json_itm_int(doc, bgn, end);}
public Json_itm Decimal(Json_doc doc, int bgn, int end) {return new Json_itm_decimal(doc, bgn, end);}
public Json_itm Str(Json_doc doc, int bgn, int end, boolean exact) {return new Json_itm_str(doc, bgn, end, exact);}
public Json_kv Kv(Json_itm key, Json_itm val) {return new Json_kv(key, val);}
public Json_ary Ary(int bgn, int end) {return new Json_ary(bgn, end);}
public Json_nde Nde(Json_doc doc, int bgn) {return new Json_nde(doc, bgn);}
}

View File

@@ -0,0 +1,34 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public interface Json_grp extends Json_itm {
void Src_end_(int v);
int Len();
Json_itm Get_at(int i);
void Add(Json_itm itm);
}
class Json_grp_ {
public static final Json_grp[] Ary_empty = new Json_grp[0];
public static void Print_nl(Bry_bfr bfr) { // \n\n can be caused by nested groups (EX: "[[]]"); only print 1
if (bfr.Bfr()[bfr.Len() - 1] != Byte_ascii.Nl)
bfr.Add_byte_nl();
}
public static void Print_indent(Bry_bfr bfr, int depth) {
if (depth > 0) bfr.Add_byte_repeat(Byte_ascii.Space, depth * 2); // indent
}
}

View File

@@ -0,0 +1,59 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public interface Json_itm {
byte Tid();
int Src_bgn();
int Src_end();
Object Data();
byte[] Data_bry();
void Print_as_json(Bry_bfr bfr, int depth);
boolean Data_eq(byte[] comp);
}
class Json_itm_null extends Json_itm_base {
Json_itm_null() {this.Ctor(-1, -1);}
@Override public byte Tid() {return Json_itm_.Tid__null;}
@Override public Object Data() {return null;}
@Override public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add(Object_.Bry__null);}
@Override public byte[] Data_bry() {return Object_.Bry__null;}
public static final Json_itm_null Null = new Json_itm_null();
}
class Json_itm_bool extends Json_itm_base {
private boolean data;
public Json_itm_bool(boolean data) {this.data = data; this.Ctor(-1, -1);}
@Override public byte Tid() {return Json_itm_.Tid__bool;}
@Override public Object Data() {return data;}
@Override public byte[] Data_bry() {return data ? Json_itm_.Bry__true : Json_itm_.Bry__false;}
@Override public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add(data ? Json_itm_.Bry__true: Json_itm_.Bry__false);}
public static final Json_itm_bool Bool_n = new Json_itm_bool(false), Bool_y = new Json_itm_bool(true);
}
class Json_itm_decimal extends Json_itm_base {
private final Json_doc doc; private Decimal_adp data; private byte[] data_bry;
public Json_itm_decimal(Json_doc doc, int src_bgn, int src_end) {this.Ctor(src_bgn, src_end); this.doc = doc;}
@Override public byte Tid() {return Json_itm_.Tid__decimal;}
@Override public Object Data() {
if (data == null)
data = Decimal_adp_.parse(String_.new_a7(this.Data_bry()));
return data;
}
@Override public byte[] Data_bry() {
if (data_bry == null) data_bry = Bry_.Mid(doc.Src(), this.Src_bgn(), this.Src_end());
return data_bry;
}
@Override public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add_mid(doc.Src(), this.Src_bgn(), this.Src_end());}
}

View File

@@ -0,0 +1,28 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public class Json_itm_ {
public static final Json_itm[] Ary_empty = new Json_itm[0];
public static final byte Tid__unknown = 0, Tid__null = 1, Tid__bool = 2, Tid__int = 3, Tid__decimal = 4, Tid__str = 5, Tid__kv = 6, Tid__ary = 7, Tid__nde = 8;
public static final byte[] Bry__true = Bool_.True_bry, Bry__false = Bool_.False_bry, Bry__null = Object_.Bry__null;
public static byte[] To_bry(Bry_bfr bfr, Json_itm itm) {
if (itm == null) return Bry_.Empty;
itm.Print_as_json(bfr, 0);
return bfr.Xto_bry_and_clear();
}
}

View File

@@ -0,0 +1,29 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public abstract class Json_itm_base implements Json_itm {
public abstract byte Tid();
public void Ctor(int src_bgn, int src_end) {this.src_bgn = src_bgn; this.src_end = src_end;}
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} protected int src_end;
public abstract Object Data();
public abstract byte[] Data_bry();
public String Print_as_json() {Bry_bfr bfr = Bry_bfr.new_(); Print_as_json(bfr, 0); return bfr.Xto_str_and_clear();}
public abstract void Print_as_json(Bry_bfr bfr, int depth);
@gplx.Virtual public boolean Data_eq(byte[] comp) {return false;}
}

View File

@@ -0,0 +1,35 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public class Json_itm_int extends Json_itm_base {
private final Json_doc doc;
private byte[] data_bry; private int data; private boolean data_is_null = true;
public Json_itm_int(Json_doc doc, int src_bgn, int src_end) {this.Ctor(src_bgn, src_end); this.doc = doc;}
@Override public byte Tid() {return Json_itm_.Tid__int;}
public int Data_as_int() {
if (data_is_null) {
data = doc.Utl_num_parser().Parse(doc.Src(), Src_bgn(), Src_end()).Rv_as_int();
data_is_null = false;
}
return data;
}
@Override public Object Data() {return Data_as_int();}
@Override public byte[] Data_bry() {if (data_bry == null) data_bry = Bry_.Mid(doc.Src(), this.Src_bgn(), this.Src_end()); return data_bry;}
@Override public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add_mid(doc.Src(), this.Src_bgn(), this.Src_end());}
public static Json_itm_int cast(Json_itm v) {return v == null || v.Tid() != Json_itm_.Tid__int ? null : (Json_itm_int)v;}
}

View File

@@ -0,0 +1,78 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
class Json_itm_str extends Json_itm_base {
private final boolean exact; private final Json_doc doc;
private String data_str; private byte[] data_bry = null;
public Json_itm_str(Json_doc doc, int src_bgn, int src_end, boolean exact) {this.Ctor(src_bgn + 1, src_end - 1); this.doc = doc; this.exact = exact;}
@Override public byte Tid() {return Json_itm_.Tid__str;}
@Override public void Print_as_json(Bry_bfr bfr, int depth) {
bfr.Add_byte(Byte_ascii.Quote);
gplx.langs.htmls.Html_utl.Escape_html_to_bfr(bfr, doc.Src(), this.Src_bgn(), this.Src_end(), true, true, true, true, false); // false to apos for backwards compatibility
bfr.Add_byte(Byte_ascii.Quote);
}
@Override public Object Data() {
if (data_str == null) {
if (data_bry == null)
data_bry = Data_make_bry();
data_str = String_.new_u8(data_bry);
}
return data_str;
}
@Override public byte[] Data_bry() {if (data_bry == null) data_bry = Data_make_bry(); return data_bry;}
@Override public boolean Data_eq(byte[] comp) {
if (exact) return Bry_.Eq(doc.Src(), this.Src_bgn(), this.Src_end(), comp);
if (data_bry == null) data_bry = Data_make_bry();
return Bry_.Match(data_bry, comp);
}
private byte[] Data_make_bry() {
byte[] src = doc.Src(); int bgn = this.Src_bgn(), end = this.Src_end();
if (exact) return Bry_.Mid(src, bgn, end);
Bry_bfr bfr = doc.Bfr();
byte[] utf8_bry = doc.Tmp_u8_bry();
for (int i = bgn; i < end; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Backslash:
b = src[++i];
switch (b) { // NOTE: must properly unescape chars; EX:wd.q:2; DATE:2014-04-23
case Byte_ascii.Ltr_t: bfr.Add_byte(Byte_ascii.Tab); break;
case Byte_ascii.Ltr_n: bfr.Add_byte(Byte_ascii.Nl); break;
case Byte_ascii.Ltr_r: bfr.Add_byte(Byte_ascii.Cr); break;
case Byte_ascii.Ltr_b: bfr.Add_byte(Byte_ascii.Backfeed); break;
case Byte_ascii.Ltr_f: bfr.Add_byte(Byte_ascii.Formfeed); break;
case Byte_ascii.Ltr_u:
int utf8_val = gplx.texts.HexDecUtl.parse_or(src, i + 1, i + 5, -1);
int len = gplx.core.intls.Utf16_.Encode_int(utf8_val, utf8_bry, 0);
bfr.Add_mid(utf8_bry, 0, len);
i += 4;
break; // \uFFFF 4 hex-dec
case Byte_ascii.Backslash:
case Byte_ascii.Slash:
default:
bfr.Add_byte(b); break; // \? " \ / b f n r t
}
break;
default:
bfr.Add_byte(b);
break;
}
}
return bfr.Xto_bry_and_clear();
}
}

View File

@@ -0,0 +1,31 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public class Json_itm_tmp implements Json_itm { // TEST:
public Json_itm_tmp(byte tid, String data) {this.tid = tid; this.data = data;}
public byte Tid() {return tid;} private byte tid;
public byte[] Data_bry() {return Bry_.new_u8(Object_.Xto_str_strict_or_empty(data));}
public int Src_bgn() {return -1;}
public int Src_end() {return -1;}
public Object Data() {return data;} private String data;
public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add_str(data);}
public boolean Data_eq(byte[] comp) {return false;}
public void Clear() {}
public static Json_itm new_str_(String v) {return new Json_itm_tmp(Json_itm_.Tid__str, "\"" + v + "\"");}
public static Json_itm new_int_(int v) {return new Json_itm_tmp(Json_itm_.Tid__int, Int_.Xto_str(v));}
}

View File

@@ -0,0 +1,39 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public class Json_kv extends Json_itm_base {
public Json_kv(Json_itm key, Json_itm val) {this.key = key; this.val = val;}
@Override public byte Tid() {return Json_itm_.Tid__kv;}
public Json_itm Key() {return key;} private final Json_itm key;
public Json_itm Val() {return val;} private final Json_itm val;
public byte[] Key_as_bry() {return key.Data_bry();}
public String Key_as_str() {return (String)key.Data();}
public byte[] Val_as_bry() {return val.Data_bry();}
public Json_nde Val_as_nde() {return Json_nde.cast(val);}
public Json_ary Val_as_ary() {return Json_ary.cast(val);}
public boolean Key_eq(byte[] comp) {return ((Json_itm_str)key).Data_eq(comp);}
@Override public Object Data() {return null;}
@Override public byte[] Data_bry() {return null;}
@Override public void Print_as_json(Bry_bfr bfr, int depth) {
key.Print_as_json(bfr, depth);
bfr.Add_byte(Byte_ascii.Colon);
val.Print_as_json(bfr, depth);
}
public static final Json_kv[] Ary_empty = new Json_kv[0];
public static Json_kv cast(Json_itm v) {return v == null || v.Tid() != Json_itm_.Tid__kv ? null : (Json_kv)v;}
}

View File

@@ -0,0 +1,61 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public class Json_kv_ary_srl {
public static KeyVal Kv_by_itm(Json_itm itm) {
switch (itm.Tid()) {
case Json_itm_.Tid__kv:
Json_kv kv = (Json_kv)itm;
return KeyVal_.new_(kv.Key_as_str(), Val_by_itm(kv.Val()));
default:
throw Err_.new_unhandled(itm.Tid());
}
}
private static Object Val_by_itm(Json_itm itm) {
switch (itm.Tid()) {
case Json_itm_.Tid__bool: return Bool_.To_str_lower(Bool_.cast(itm.Data()));
case Json_itm_.Tid__int:
case Json_itm_.Tid__null:
case Json_itm_.Tid__str:
case Json_itm_.Tid__decimal: return itm.Data();
case Json_itm_.Tid__ary: return Val_by_itm_ary((Json_ary)itm);
case Json_itm_.Tid__nde: return Val_by_itm_nde((Json_nde)itm);
case Json_itm_.Tid__kv: // kv should never be val; EX: "a":"b":c; not possible
default: throw Err_.new_unhandled(itm.Tid());
}
}
private static KeyVal[] Val_by_itm_ary(Json_ary itm) {
int subs_len = itm.Len();
KeyVal[] rv = new KeyVal[subs_len];
for (int i = 0; i < subs_len; i++) {
Json_itm sub = itm.Get_at(i);
KeyVal kv = KeyVal_.new_(Int_.Xto_str(i + Int_.Base1), Val_by_itm(sub));
rv[i] = kv;
}
return rv;
}
public static KeyVal[] Val_by_itm_nde(Json_nde itm) {
int subs_len = itm.Len();
KeyVal[] rv = new KeyVal[subs_len];
for (int i = 0; i < subs_len; i++) {
Json_itm sub = itm.Get_at(i);
rv[i] = Kv_by_itm(sub);
}
return rv;
}
}

View File

@@ -0,0 +1,50 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Json_kv_ary_srl_tst {
@Before public void init() {fxt.Clear();} private Json_kv_ary_srl_fxt fxt = new Json_kv_ary_srl_fxt();
@Test public void Null() {fxt.Test_parse("{'k0':null}" , fxt.ary_(fxt.kv_str_("k0", null)));}
@Test public void Bool_n() {fxt.Test_parse("{'k0':false}" , fxt.ary_(fxt.kv_bool_("k0", false)));}
@Test public void Num() {fxt.Test_parse("{'k0':123}" , fxt.ary_(fxt.kv_int_("k0", 123)));}
@Test public void Str() {fxt.Test_parse("{'k0':'v0'}" , fxt.ary_(fxt.kv_str_("k0", "v0")));}
@Test public void Num_dec() {fxt.Test_parse("{'k0':1.23}" , fxt.ary_(fxt.kv_dec_("k0", Decimal_adp_.parse("1.23"))));}
@Test public void Ary_int() {fxt.Test_parse("{'k0':[1,2,3]}" , fxt.ary_(fxt.kv_obj_("k0", fxt.ary_(fxt.kv_int_("1", 1), fxt.kv_int_("2", 2), fxt.kv_int_("3", 3)))));}
@Test public void Ary_empty() {fxt.Test_parse("{'k0':[]}" , fxt.ary_(fxt.kv_obj_("k0", fxt.ary_())));}
@Test public void Subs_int() {fxt.Test_parse("{'k0':{'k00':1,'k01':2}}" , fxt.ary_(fxt.kv_obj_("k0", fxt.ary_(fxt.kv_int_("k00", 1), fxt.kv_int_("k01", 2)))));}
@Test public void Subs_empty() {fxt.Test_parse("{'k0':{}}" , fxt.ary_(fxt.kv_obj_("k0", fxt.ary_())));}
}
class Json_kv_ary_srl_fxt {
public void Clear() {
if (parser == null) {
parser = new Json_parser();
}
} private Json_parser parser;
public void Test_parse(String raw_str, KeyVal[] expd) {
byte[] raw_bry = Json_parser_tst.Replace_apos(Bry_.new_u8(raw_str));
Json_doc doc = parser.Parse(raw_bry);
KeyVal[] actl = Json_kv_ary_srl.Val_by_itm_nde(doc.Root_nde());
Tfds.Eq_str_lines(KeyVal_.Ary_to_str(expd), KeyVal_.Ary_to_str(actl));
}
public KeyVal[] ary_(KeyVal... ary) {return ary;}
public KeyVal kv_obj_(String key, Object val) {return KeyVal_.new_(key, val);}
public KeyVal kv_str_(String key, String val) {return KeyVal_.new_(key, val);}
public KeyVal kv_int_(String key, int val) {return KeyVal_.new_(key, val);}
public KeyVal kv_bool_(String key, boolean val) {return KeyVal_.new_(key, Bool_.To_str_lower(val));}
public KeyVal kv_dec_(String key, Decimal_adp val) {return KeyVal_.new_(key, val.To_str());}
}

View File

@@ -0,0 +1,100 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
public class Json_nde extends Json_itm_base implements Json_grp {
private Json_itm[] subs = Json_itm_.Ary_empty; private int subs_len = 0, subs_max = 0;
public Json_nde(Json_doc jdoc, int src_bgn) {this.jdoc = jdoc; this.Ctor(src_bgn, -1);}
@Override public byte Tid() {return Json_itm_.Tid__nde;}
public Json_doc Doc() {return jdoc;} private final Json_doc jdoc;
public void Src_end_(int v) {this.src_end = v;}
@Override public Object Data() {return null;}
@Override public byte[] Data_bry() {return null;}
public int Len() {return subs_len;}
public Json_kv Get_at_as_kv(int i) {
Json_itm rv_itm = Get_at(i);
Json_kv rv = Json_kv.cast(rv_itm); if (rv == null) throw Err_.new_("json", "sub is not kv", "i", i, "src", Bry_.Mid(jdoc.Src(), this.Src_bgn(), src_end));
return rv;
}
public Json_itm Get_at(int i) {return subs[i];}
public Json_kv Get_kv(byte[] key) {return Json_kv.cast(Get_itm(key));}
public Json_nde Get(String key) {return Get(Bry_.new_u8(key));}
public Json_nde Get(byte[] key) {
Json_kv kv = Json_kv.cast(this.Get_itm(key)); if (kv == null) throw Err_.new_("json", "kv not found", "key", key);
Json_nde rv = Json_nde.cast(kv.Val()); if (rv == null) throw Err_.new_("json", "nde not found", "key", key);
return rv;
}
public Json_itm Get_itm(byte[] key) {
for (int i = 0; i < subs_len; i++) {
Json_itm itm = subs[i];
if (itm.Tid() == Json_itm_.Tid__kv) {
Json_kv itm_as_kv = (Json_kv)itm;
if (Bry_.Eq(key, itm_as_kv.Key().Data_bry()))
return itm;
}
}
return null;
}
public boolean Has(byte[] key) {return Get_bry(key, null) != null;}
public byte[] Get_bry(byte[] key) {
byte[] rv = Get_bry(key, null); if (rv == null) throw Err_.new_("json", "key missing", "key", key);
return rv;
}
public byte[] Get_bry_or_null(String key) {return Get_bry(Bry_.new_u8(key), null);}
public byte[] Get_bry_or_null(byte[] key) {return Get_bry(key, null);}
public byte[] Get_bry(byte[] key, byte[] or) {
Json_itm kv_obj = Get_itm(key);
if (kv_obj == null) return or; // key not found;
if (kv_obj.Tid() != Json_itm_.Tid__kv) return or; // key is not a key_val
Json_kv kv = (Json_kv)kv_obj;
Json_itm val = kv.Val();
return (val == null) ? or : val.Data_bry();
}
public Json_nde Add_many(Json_itm... ary) {
int len = ary.length;
for (int i = 0; i < len; i++)
Add(ary[i]);
return this;
}
public void Add(Json_itm itm) {
int new_len = subs_len + 1;
if (new_len > subs_max) { // ary too small >>> expand
subs_max = new_len * 2;
Json_itm[] new_subs = new Json_itm[subs_max];
Array_.Copy_to(subs, 0, new_subs, 0, subs_len);
subs = new_subs;
}
subs[subs_len] = (Json_itm)itm;
subs_len = new_len;
}
@Override public void Print_as_json(Bry_bfr bfr, int depth) {
if (bfr.Len() != 0)
bfr.Add_byte_nl();
Json_grp_.Print_indent(bfr, depth);
bfr.Add_byte(Byte_ascii.Curly_bgn).Add_byte(Byte_ascii.Space);
for (int i = 0; i < subs_len; i++) {
if (i != 0) {
Json_grp_.Print_nl(bfr); Json_grp_.Print_indent(bfr, depth);
bfr.Add_byte(Byte_ascii.Comma).Add_byte(Byte_ascii.Space);
}
subs[i].Print_as_json(bfr, depth + 1);
}
Json_grp_.Print_nl(bfr); Json_grp_.Print_indent(bfr, depth);
bfr.Add_byte(Byte_ascii.Curly_end).Add_byte_nl();
}
public static Json_nde cast(Json_itm v) {return v == null || v.Tid() != Json_itm_.Tid__nde ? null : (Json_nde)v;}
}

View File

@@ -0,0 +1,181 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
import gplx.core.primitives.*;
public class Json_parser {
private byte[] src; private int src_len, pos; private final Number_parser num_parser = new Number_parser();
public Json_factory Factory() {return factory;} private final Json_factory factory = new Json_factory();
public Json_doc Parse_by_apos_ary(String... ary) {return Parse_by_apos(String_.Concat_lines_nl(ary));}
public Json_doc Parse_by_apos(String s) {return Parse(Bry_.Replace(Bry_.new_u8(s), Byte_ascii.Apos, Byte_ascii.Quote));}
public Json_doc Parse(String src) {return Parse(Bry_.new_u8(src));}
public Json_doc Parse(byte[] src) {
synchronized (factory) {
this.src = src; if (src == null) return null;
this.src_len = src.length; if (src_len == 0) return null;
this.pos = 0;
Skip_ws();
boolean root_is_nde = true;
switch (src[pos]) {
case Byte_ascii.Curly_bgn: root_is_nde = Bool_.Y; break;
case Byte_ascii.Brack_bgn: root_is_nde = Bool_.N; break;
default: return null;
}
Skip_ws();
Json_doc doc = new Json_doc();
Json_grp root = null;
if (root_is_nde)
root = Make_nde(doc);
else
root = Make_ary(doc);
doc.Ctor(src, root);
return doc;
}
}
private Json_nde Make_nde(Json_doc doc) {
++pos; // brack_bgn
Json_nde nde = new Json_nde(doc, pos);
while (pos < src_len) {
Skip_ws();
if (src[pos] == Byte_ascii.Curly_end) {++pos; return nde;}
else nde.Add(Make_kv(doc));
Skip_ws();
switch (src[pos++]) {
case Byte_ascii.Comma: break;
case Byte_ascii.Curly_end: return nde;
default: throw Err_.new_unhandled(src[pos - 1]);
}
}
throw Err_.new_wo_type("eos inside nde");
}
private Json_itm Make_kv(Json_doc doc) {
Json_itm key = Make_string(doc);
Skip_ws();
Chk(Byte_ascii.Colon);
Skip_ws();
Json_itm val = Make_val(doc);
return new Json_kv(key, val);
}
private Json_itm Make_val(Json_doc doc) {
while (pos < src_len) {
byte b = src[pos];
switch (b) {
case Byte_ascii.Ltr_n: return Make_literal(Bry_null_ull , 3, factory.Null());
case Byte_ascii.Ltr_f: return Make_literal(Bry_bool_alse , 4, factory.Bool_n());
case Byte_ascii.Ltr_t: return Make_literal(Bry_bool_rue , 3, factory.Bool_y());
case Byte_ascii.Quote: return Make_string(doc);
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Dash: return Make_num(doc);
case Byte_ascii.Brack_bgn: return Make_ary(doc);
case Byte_ascii.Curly_bgn: return Make_nde(doc);
}
throw Err_.new_unhandled(Char_.To_str(b));
}
throw Err_.new_wo_type("eos reached in val");
}
private Json_itm Make_literal(byte[] remainder, int remainder_len, Json_itm singleton) {
++pos; // 1st char
int literal_end = pos + remainder_len;
if (Bry_.Eq(src, pos, literal_end, remainder)) {
pos = literal_end;
return singleton;
}
throw Err_.new_("json.parser", "invalid literal", "excerpt", Bry_.Mid_by_len_safe(src, pos - 1, 16));
}
private Json_itm Make_string(Json_doc doc) {
int bgn = pos++; // ++: quote_bgn
boolean exact = true;
while (pos < src_len) {
switch (src[pos]) {
case Byte_ascii.Backslash:
++pos; // backslash
switch (src[pos]) {
case Byte_ascii.Ltr_u: pos += 4; break; // \uFFFF 4 hex-dec
default: ++pos; break; // \? " \ / b f n r t
}
exact = false;
break;
case Byte_ascii.Quote:
return factory.Str(doc, bgn, ++pos, exact); // ++: quote_end
default:
++pos;
break;
}
}
throw Err_.new_wo_type("eos reached inside quote");
}
private Json_itm Make_num(Json_doc doc) {
int num_bgn = pos;
boolean loop = true;
while (loop) {
if (pos == src_len) throw Err_.new_wo_type("eos reached inside num");
switch (src[pos]) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
++pos;
break;
case Byte_ascii.Dot:
case Byte_ascii.Dash: case Byte_ascii.Plus:
case Byte_ascii.Ltr_E: case Byte_ascii.Ltr_e: // e e+ e- E E+ E-
++pos;
break;
default:
loop = false;
break;
}
}
num_parser.Parse(src, num_bgn, pos);
return num_parser.Has_frac()
? factory.Decimal(doc, num_bgn, pos)
: factory.Int(doc, num_bgn, pos);
}
private Json_ary Make_ary(Json_doc doc) {
Json_ary rv = factory.Ary(pos++, pos); // brack_bgn
while (pos < src_len) {
Skip_ws();
if (src[pos] == Byte_ascii.Brack_end) {++pos; return rv;}
else rv.Add(Make_val(doc));
Skip_ws();
switch (src[pos]) {
case Byte_ascii.Comma: ++pos; break;
case Byte_ascii.Brack_end: ++pos; return rv;
}
}
throw Err_.new_wo_type("eos inside ary");
}
private void Skip_ws() {
while (pos < src_len) {
switch (src[pos]) {
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: case Byte_ascii.Cr: ++pos; break;
default: return;
}
}
}
private void Chk(byte expd) {
if (src[pos] == expd)
++pos;
else
throw err_(src, pos, "expected '{0}' but got '{1}'", Char_.To_str(expd), Char_.To_str(src[pos]));
}
private Err err_(byte[] src, int bgn, String fmt, Object... args) {return err_(src, bgn, src.length, fmt, args);}
private Err err_(byte[] src, int bgn, int src_len, String fmt, Object... args) {
String msg = String_.Format(fmt, args) + " " + Int_.Xto_str(bgn) + " " + String_.new_u8__by_len(src, bgn, 20);
return Err_.new_wo_type(msg);
}
private static final byte[] Bry_bool_rue = Bry_.new_a7("rue"), Bry_bool_alse = Bry_.new_a7("alse"), Bry_null_ull = Bry_.new_a7("ull");
}

View File

@@ -0,0 +1,74 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
import gplx.core.primitives.*;
public abstract class Json_parser__itm__base {
protected String context;
protected final Hash_adp_bry hash = Hash_adp_bry.cs();
protected final Bry_bfr tmp_bfr = Bry_bfr.new_(255);
protected String[] keys;
protected Json_kv[] atrs;
protected Json_itm cur_itm;
protected int keys_len;
public void Ctor(String... keys) {
this.keys = keys;
this.keys_len = keys.length;
for (int i = 0; i < keys_len; ++i)
hash.Add(Bry_.new_u8(keys[i]), Int_obj_val.new_(i));
this.atrs = new Json_kv[keys_len];
}
public int Kv__int(Json_kv[] ary, int i) {return Bry_.To_int(ary[i].Val_as_bry());}
public long Kv__long(Json_kv[] ary, int i) {return Bry_.To_long_or(ary[i].Val_as_bry(), 0);}
public long Kv__long_or_0(Json_kv[] ary, int i) {
Json_kv kv = ary[i]; if (kv == null) return 0;
return Bry_.To_long_or(kv.Val_as_bry(), 0);
}
public byte[] Kv__bry(Json_kv[] ary, int i) {
byte[] rv = Kv__bry_or_null(ary, i); if (rv == null) throw Err_.new_("json.parser", "missing val", "key", context + "." + keys[i], "excerpt", Json_itm_.To_bry(tmp_bfr, cur_itm));
return rv;
}
public byte[][] Kv__bry_ary(Json_kv[] ary, int i) {
return ary[i].Val_as_ary().Xto_bry_ary();
}
public byte[] Kv__bry_or_empty(Json_kv[] ary, int i) {
byte[] rv = Kv__bry_or_null(ary, i);
return rv == null ? Bry_.Empty : rv;
}
public byte[] Kv__bry_or_null(Json_kv[] ary, int i) {
Json_kv kv = ary[i]; if (kv == null) return null;
Json_itm val = kv.Val();
return kv == null ? null : val.Data_bry();
}
public boolean Kv__mw_bool(Json_kv[] ary, int i) {
Json_kv kv = ary[i]; if (kv == null) return false;
Json_itm val = kv.Val();
if ( val.Tid() == Json_itm_.Tid__str
&& Bry_.Len_eq_0(val.Data_bry())) {
return true;
}
else {
Warn("unknown val: val=" + String_.new_u8(kv.Data_bry()) + " excerpt=" + String_.new_u8(Json_itm_.To_bry(tmp_bfr, cur_itm)), kv);
return false;
}
}
public boolean Kv__has(Json_kv[] ary, int i) {return Kv__bry_or_empty(ary, i) != null;}
protected abstract void Parse_hook_nde(Json_nde sub, Json_kv[] atrs);
protected void Warn(String msg, Json_kv kv) {
Gfo_usr_dlg_.I.Warn_many("", "", msg + ": path=~{0}.~{1} excerpt=~{2}", context, kv.Key_as_bry(), Json_itm_.To_bry(tmp_bfr, cur_itm));
}
}

View File

@@ -0,0 +1,70 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
import gplx.core.primitives.*;
public class Json_parser__list_nde__base extends Json_parser__itm__base {
public void Parse_grp(String context, Json_grp grp) {
this.context = context;
int len = grp.Len();
for (int i = 0; i < len; ++i) {
Json_nde sub = null;
if (grp.Tid() == Json_itm_.Tid__nde) {
Json_kv kv = Json_nde.cast(grp).Get_at_as_kv(i);
sub = kv.Val_as_nde();
}
else {
sub = Json_nde.cast(grp.Get_at(i));
}
Parse_nde(context, sub);
}
}
public void Parse_nde(String context, Json_nde nde) {
this.cur_itm = nde;
for (int j = 0; j < keys_len; ++j)
atrs[j] = null;
int atr_len = nde.Len();
for (int j = 0; j < atr_len; ++j) {
Json_kv atr = nde.Get_at_as_kv(j);
Object idx_obj = hash.Get_by_bry(atr.Key_as_bry());
if (idx_obj == null) {Warn("unknown key", atr); continue;}
int idx_int = ((Int_obj_val)idx_obj).Val();
atrs[idx_int] = atr;
}
Parse_hook_nde(nde, atrs);
}
public void Parse_to_list_as_bry(String context, Json_ary ary, Ordered_hash list) {
this.cur_itm = ary;
int len = ary.Len();
for (int i = 0; i < len; ++i) {
byte[] val = ary.Get_at(i).Data_bry();
list.Add(val, val);
}
}
public void Parse_to_list_as_kv(String context, Json_nde nde, Ordered_hash list) {
this.cur_itm = nde;
int len = nde.Len();
for (int i = 0; i < len; ++i) {
Json_kv sub = nde.Get_at_as_kv(i);
byte[] key = sub.Key_as_bry();
byte[] val = Parse_to_list_as_kv__get_val(sub, key);
list.Add(key, KeyVal_.new_(String_.new_u8(key), String_.new_u8(val)));
}
}
@gplx.Virtual protected byte[] Parse_to_list_as_kv__get_val(Json_kv sub, byte[] key) {return sub.Val_as_bry();}
@Override protected void Parse_hook_nde(Json_nde sub, Json_kv[] atrs) {}
}

View File

@@ -0,0 +1,100 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Json_parser_tst {
private final Json_parser_fxt fxt = new Json_parser_fxt();
@Before public void init() {fxt.Clear();}
@Test public void Null() {fxt.Test_parse_val0("{'k0':null}" , null);}
@Test public void Bool_n() {fxt.Test_parse_val0("{'k0':false}" , false);}
@Test public void Bool_y() {fxt.Test_parse_val0("{'k0':true}" , true);}
@Test public void Num() {fxt.Test_parse_val0("{'k0':123}" , 123);}
@Test public void Num_neg() {fxt.Test_parse_val0("{'k0':-123}" , -123);}
@Test public void Str() {fxt.Test_parse_val0("{'k0':'v0'}" , "v0");}
@Test public void Str_esc_quote() {fxt.Test_parse_val0("{'k0':'a\\\"b'}" , "a\"b");}
@Test public void Str_esc_hex4() {fxt.Test_parse_val0("{'k0':'a\\u0021b'}" , "a!b");}
@Test public void Num_dec() {fxt.Test_parse("{'k0':1.23}" , fxt.itm_nde_().Add_many(fxt.itm_kv_dec_("k0", "1.23")));}
@Test public void Num_exp() {fxt.Test_parse("{'k0':1e+2}" , fxt.itm_nde_().Add_many(fxt.itm_kv_dec_("k0", "1e+2")));}
@Test public void Num_mix() {fxt.Test_parse("{'k0':-1.23e-1}" , fxt.itm_nde_().Add_many(fxt.itm_kv_dec_("k0", "-1.23e-1")));}
@Test public void Str_many() {fxt.Test_parse("{'k0':'v0','k1':'v1','k2':'v2'}", fxt.itm_nde_().Add_many(fxt.itm_kv_("k0", "v0"), fxt.itm_kv_("k1", "v1"), fxt.itm_kv_("k2", "v2")));}
@Test public void Ary_empty() {fxt.Test_parse("{'k0':[]}", fxt.itm_nde_().Add_many(fxt.itm_kv_ary_int_("k0")));}
@Test public void Ary_int() {fxt.Test_parse("{'k0':[1,2,3]}", fxt.itm_nde_().Add_many(fxt.itm_kv_ary_int_("k0", 1, 2, 3)));}
@Test public void Ary_str() {fxt.Test_parse("{'k0':['a','b','c']}", fxt.itm_nde_().Add_many(fxt.itm_kv_ary_str_("k0", "a", "b", "c")));}
@Test public void Ary_ws() {fxt.Test_parse("{'k0': [ 1 , 2 , 3 ] }", fxt.itm_nde_().Add_many(fxt.itm_kv_ary_int_("k0", 1, 2, 3)));}
@Test public void Subs_int() {fxt.Test_parse("{'k0':{'k00':1}}", fxt.itm_nde_().Add_many(fxt.itm_kv_("k0", fxt.itm_nde_().Add_many(fxt.itm_kv_("k00", 1)))));}
@Test public void Subs_empty() {fxt.Test_parse("{'k0':{}}", fxt.itm_nde_().Add_many(fxt.itm_kv_("k0", fxt.itm_nde_())));}
@Test public void Subs_ws() {fxt.Test_parse("{'k0': { 'k00' : 1 } }", fxt.itm_nde_().Add_many(fxt.itm_kv_("k0", fxt.itm_nde_().Add_many(fxt.itm_kv_("k00", 1)))));}
@Test public void Ws() {fxt.Test_parse(" { 'k0' : 'v0' } ", fxt.itm_nde_().Add_many(fxt.itm_kv_("k0", "v0")));}
@Test public void Root_is_ary() {fxt.Test_parse("[ 1 , 2 , 3 ]", fxt.itm_ary_().Add_many(fxt.itm_int_(1), fxt.itm_int_(2), fxt.itm_int_(3)));}
public static String Replace_apos_as_str(String v) {return String_.new_u8(Replace_apos(Bry_.new_u8(v)));}
public static byte[] Replace_apos(byte[] v) {return Bry_.Replace(v, Byte_ascii.Apos, Byte_ascii.Quote);}
}
class Json_parser_fxt {
public void Clear() {
if (parser == null) {
parser = new Json_parser();
factory = parser.Factory();
}
} Json_parser parser; Json_factory factory; Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
public Json_itm itm_int_(int v) {return Json_itm_tmp.new_int_(v);}
Json_itm itm_str_(String v) {return Json_itm_tmp.new_str_(v);}
public Json_ary itm_ary_() {return factory.Ary(-1, -1);}
public Json_nde itm_nde_() {return factory.Nde(null, -1);}
public Json_kv itm_kv_null_(String k) {return factory.Kv(itm_str_(k), factory.Null());}
public Json_kv itm_kv_(String k, String v) {return factory.Kv(itm_str_(k), itm_str_(v));}
public Json_kv itm_kv_(String k, int v) {return factory.Kv(itm_str_(k), itm_int_(v));}
public Json_kv itm_kv_(String k, boolean v) {return factory.Kv(itm_str_(k), v ? factory.Bool_y() : factory.Bool_n());}
public Json_kv itm_kv_dec_(String k, String v) {return factory.Kv(itm_str_(k), new Json_itm_tmp(Json_itm_.Tid__decimal, v));}
public Json_kv itm_kv_(String k, Json_nde v) {return factory.Kv(itm_str_(k), v);}
public Json_kv itm_kv_ary_int_(String k, int... v) {
Json_ary ary = factory.Ary(-1, -1);
int len = v.length;
for (int i = 0; i < len; i++)
ary.Add(itm_int_(v[i]));
return factory.Kv(itm_str_(k), ary);
}
public Json_kv itm_kv_ary_str_(String k, String... v) {
Json_ary ary = factory.Ary(-1, -1);
int len = v.length;
for (int i = 0; i < len; i++)
ary.Add(itm_str_(v[i]));
return factory.Kv(itm_str_(k), ary);
}
public void Test_parse(String raw_str, Json_itm... expd_ary) {
byte[] raw = Json_parser_tst.Replace_apos(Bry_.new_u8(raw_str));
Json_doc doc = parser.Parse(raw);
doc.Root_grp().Print_as_json(tmp_bfr, 0);
String actl = tmp_bfr.Xto_str_and_clear();
String expd = Xto_str(raw, doc, expd_ary, 0, expd_ary.length);
Tfds.Eq_str_lines(expd, actl, actl);
}
public void Test_parse_val0(String raw_str, Object expd) {
byte[] raw = Json_parser_tst.Replace_apos(Bry_.new_u8(raw_str));
Json_doc doc = parser.Parse(raw);
Json_kv kv = Json_kv.cast(doc.Root_nde().Get_at(0)); // assume root has kv as first sub; EX: {"a":"b"}
Object actl = kv.Val().Data(); // NOTE: Data_bry is escaped val; EX: a\"b has DataBry of a"b
Tfds.Eq(expd, actl);
}
String Xto_str(byte[] raw, Json_doc doc, Json_itm[] ary, int bgn, int end) {
for (int i = bgn; i < end; i++) {
Json_itm itm = ary[i];
itm.Print_as_json(tmp_bfr, 0);
}
return tmp_bfr.Xto_str_and_clear();
}
}

View File

@@ -0,0 +1,230 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
import gplx.core.primitives.*;
public class Json_wtr {
private final Bry_bfr bfr = Bry_bfr.new_(255);
private final Int_ary idx_stack = new Int_ary(4);
private int idx = 0;
private int indent;
public Bry_bfr Bfr() {return bfr;}
public void Indent_(int v) {this.indent = v;}
public byte Opt_quote_byte() {return opt_quote_byte;} public Json_wtr Opt_quote_byte_(byte v) {opt_quote_byte = v; return this;} private byte opt_quote_byte = Byte_ascii.Quote;
public boolean Opt_ws() {return opt_ws;} public Json_wtr Opt_ws_(boolean v) {opt_ws = v; return this;} private boolean opt_ws = true;
public byte[] To_bry_and_clear() {return bfr.Xto_bry_and_clear();}
public String To_str_and_clear() {return bfr.Xto_str_and_clear();}
public Json_wtr Clear() {
indent = -1;
idx_stack.Clear();
idx = 0;
return this;
}
public Json_wtr Doc_nde_bgn() {return Write_grp_bgn(Sym_nde_bgn);}
public Json_wtr Doc_nde_end() {Write_grp_end(Bool_.Y, Sym_nde_end); return Write_nl();}
public Json_wtr Doc_ary_bgn() {return Write_grp_bgn(Sym_ary_bgn);}
public Json_wtr Doc_ary_end() {Write_grp_end(Bool_.N, Sym_ary_end); return Write_nl();}
public Json_wtr Nde_bgn(String key) {return Nde_bgn(Bry_.new_u8(key));}
public Json_wtr Nde_bgn(byte[] key) {
Write_indent_itm();
Write_key(key);
Write_nl();
return Write_grp_bgn(Sym_nde_bgn);
}
public Json_wtr Nde_end() {
Write_grp_end(Bool_.Y, Sym_nde_end);
return Write_nl();
}
public Json_wtr Ary_bgn(String nde) {
Write_indent_itm();
Write_key(Bry_.new_u8(nde));
return Ary_bgn_keyless();
}
private Json_wtr Ary_bgn_keyless() {
Write_nl();
return Write_grp_bgn(Sym_ary_bgn);
}
public Json_wtr Ary_itm_str(String itm) {return Ary_itm_by_type_tid(Type_adp_.Tid__str, itm);}
public Json_wtr Ary_itm_bry(byte[] itm) {return Ary_itm_by_type_tid(Type_adp_.Tid__bry, itm);}
public Json_wtr Ary_itm_obj(Object itm) {return Ary_itm_by_type_tid(Type_adp_.To_tid_obj(itm), itm);}
public Json_wtr Ary_itm_by_type_tid(int itm_type_tid, Object itm) {
Write_indent_itm();
Write_val_obj(itm_type_tid, itm);
Write_nl();
++idx;
return this;
}
public Json_wtr Ary_end() {
Write_grp_end(Bool_.N, Sym_ary_end);
return Write_nl();
}
public Json_wtr Kv_bool_as_mw(String key, boolean val) {
if (val) Kv_bry(key, Bry_.Empty); // if true, write 'key:""'; if false, write nothing
return this;
}
public Json_wtr Kv_bool(String key, boolean val) {return Kv_bool(Bry_.new_u8(key), val);}
public Json_wtr Kv_bool(byte[] key, boolean val) {return Kv_raw(key, val ? Bool_.True_bry : Bool_.False_bry);}
public Json_wtr Kv_int(String key, int val) {return Kv_raw(Bry_.new_u8(key), Int_.Xto_bry(val));}
public Json_wtr Kv_long(String key, long val) {return Kv_raw(Bry_.new_u8(key), Bry_.new_a7(Long_.Xto_str(val)));}
public Json_wtr Kv_float(String key, float val) {return Kv_raw(Bry_.new_u8(key), Bry_.new_a7(Float_.Xto_str(val)));}
public Json_wtr Kv_double(String key, double val) {return Kv_raw(Bry_.new_u8(key), Bry_.new_a7(Double_.Xto_str(val)));}
private Json_wtr Kv_raw(byte[] key, byte[] val) {
Write_indent_itm();
Write_key(key);
bfr.Add(val);
Write_nl();
return this;
}
public Json_wtr Kv_str(String key, String val) {return Kv_bry(Bry_.new_u8(key), Bry_.new_u8(val));}
public Json_wtr Kv_str(byte[] key, String val) {return Kv_bry(key, Bry_.new_u8(val));}
public Json_wtr Kv_bry(String key, byte[] val) {return Kv_bry(Bry_.new_u8(key), val);}
public Json_wtr Kv_bry(byte[] key, byte[] val) {
Write_indent_itm();
Write_key(key);
Write_str(val);
Write_nl();
return this;
}
public Json_wtr Kv_obj(byte[] key, Object val, int val_tid) {
Write_indent_itm();
Write_key(key);
Write_val_obj(val_tid, val);
Write_nl();
return this;
}
private Json_wtr Write_grp_bgn(byte[] grp_sym) {return Write_grp_bgn(grp_sym, Bool_.Y);}
private Json_wtr Write_grp_bgn(byte[] grp_sym, boolean write_indent) {
idx_stack.Add(idx);
idx = 0;
++indent;
if (write_indent) Write_indent();
bfr.Add(grp_sym);
return this;
}
private Json_wtr Write_grp_end(boolean grp_is_nde, byte[] grp_sym) {
if ((grp_is_nde && idx == 0) || (!grp_is_nde && idx == 0))
Write_nl();
Write_indent();
--indent;
bfr.Add(grp_sym);
this.idx = idx_stack.Pop_or(0);
return this;
}
private Json_wtr Write_key(byte[] bry) {
Write_str(bry); // "key"
bfr.Add_byte_colon(); // ":"
++idx;
return this;
}
private void Write_val_obj(int type_tid, Object obj) {
switch (type_tid) {
case Type_adp_.Tid__null: bfr.Add(Object_.Bry__null); break;
case Type_adp_.Tid__bool: bfr.Add_bool(Bool_.cast(obj)); break;
case Type_adp_.Tid__byte: bfr.Add_byte(Byte_.cast(obj)); break;
case Type_adp_.Tid__int: bfr.Add_int_variable(Int_.cast(obj)); break;
case Type_adp_.Tid__long: bfr.Add_long_variable(Long_.cast(obj)); break;
case Type_adp_.Tid__float: bfr.Add_float(Float_.cast(obj)); break;
case Type_adp_.Tid__double: bfr.Add_double(Double_.cast(obj)); break;
case Type_adp_.Tid__str: Write_str(Bry_.new_u8((String)obj)); break;
case Type_adp_.Tid__bry: Write_str((byte[])obj); break;
case Type_adp_.Tid__char:
case Type_adp_.Tid__date:
case Type_adp_.Tid__decimal: Write_str(Bry_.new_u8(Object_.Xto_str_strict_or_empty(obj))); break;
case Type_adp_.Tid__obj:
Class<?> type = obj.getClass();
if (Type_adp_.Eq(type, KeyVal[].class)) {
if (idx == 0) { // if nde, and first item, then put on new line
bfr.Del_by_1();
if (opt_ws) {
bfr.Add_byte_nl();
++indent;
Write_indent();
--indent;
}
}
KeyVal[] kvy = (KeyVal[])obj;
Write_grp_bgn(Sym_nde_bgn, Bool_.N);
int kvy_len = kvy.length;
for (int i = 0; i < kvy_len; ++i) {
KeyVal kv = kvy[i];
Object kv_val = kv.Val();
Kv_obj(Bry_.new_u8(kv.Key()), kv_val, Type_adp_.To_tid_obj(kv_val));
}
Write_grp_end(Bool_.Y, Sym_nde_end);
}
else if (Type_adp_.Is_array(type))
Write_val_ary(obj);
else
throw Err_.new_unhandled(type);
break;
default: throw Err_.new_unhandled(type_tid);
}
}
private void Write_val_ary(Object ary_obj) {
Ary_bgn_keyless();
Object ary = Array_.cast(ary_obj);
int len = Array_.Len(ary);
for (int i = 0; i < len; ++i) {
Object itm = Array_.Get_at(ary, i);
Ary_itm_obj(itm);
}
Write_grp_end(Bool_.N, Sym_ary_end);
}
private void Write_str(byte[] bry) {
if (bry == null) {bfr.Add(Object_.Bry__null); return;}
int len = bry.length;
bfr.Add_byte(opt_quote_byte);
for (int i = 0; i < len; ++i) {
byte b = bry[i];
switch (b) {
case Byte_ascii.Backslash: bfr.Add_byte(Byte_ascii.Backslash).Add_byte(b); break; // "\" -> "\\"; needed else js will usurp \ as escape; EX: "\&" -> "&"; DATE:2014-06-24
case Byte_ascii.Quote: bfr.Add_byte(Byte_ascii.Backslash).Add_byte(b); break;
case Byte_ascii.Apos: bfr.Add_byte(b); break;
case Byte_ascii.Nl: bfr.Add_byte_repeat(Byte_ascii.Backslash, 2).Add_byte(Byte_ascii.Ltr_n); break; // "\n" -> "\\n"
case Byte_ascii.Cr: break;// skip
default: bfr.Add_byte(b); break;
}
}
bfr.Add_byte(opt_quote_byte);
}
private void Write_indent_itm() {
if (idx == 0) {
if (opt_ws)
bfr.Add_byte_space();
}
else {
Write_indent();
bfr.Add(Sym_itm_spr);
if (opt_ws) bfr.Add_byte_space();
}
}
private void Write_indent() {
if (opt_ws && indent > 0)
bfr.Add_byte_repeat(Byte_ascii.Space, indent * 2);
}
private Json_wtr Write_nl() {
if (opt_ws) bfr.Add_byte_nl();
return this;
}
private static final byte[]
Sym_nde_bgn = Bry_.new_a7("{")
, Sym_nde_end = Bry_.new_a7("}")
, Sym_ary_bgn = Bry_.new_a7("[")
, Sym_ary_end = Bry_.new_a7("]")
, Sym_itm_spr = Bry_.new_a7(",")
;
}

View File

@@ -0,0 +1,114 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Json_wtr_tst {
@Before public void init() {fxt.Clear();} private final Json_wtr_fxt fxt = new Json_wtr_fxt();
@Test public void Root() {
fxt.Wtr().Doc_nde_bgn().Doc_nde_end();
fxt.Test
( "{"
, "}"
);
}
@Test public void Kv() {
fxt.Wtr()
.Doc_nde_bgn()
.Kv_str("k0", "v0")
.Kv_str("k1", "v1")
.Doc_nde_end();
fxt.Test
( "{ 'k0':'v0'"
, ", 'k1':'v1'"
, "}"
);
}
@Test public void Nde() {
fxt.Wtr()
.Doc_nde_bgn()
.Nde_bgn("s0")
.Nde_bgn("s00")
.Nde_end()
.Nde_end()
.Nde_bgn("s1")
.Nde_bgn("s10")
.Nde_end()
.Nde_end()
.Doc_nde_end();
fxt.Test
( "{ 's0':"
, " { 's00':"
, " {"
, " }"
, " }"
, ", 's1':"
, " { 's10':"
, " {"
, " }"
, " }"
, "}"
);
}
@Test public void Ary() {
fxt.Wtr()
.Doc_nde_bgn()
.Ary_bgn("a0")
.Ary_itm_str("v0")
.Ary_itm_str("v1")
.Ary_end()
.Doc_nde_end();
fxt.Test
( "{ 'a0':"
, " [ 'v0'"
, " , 'v1'"
, " ]"
, "}"
);
}
@Test public void Nde__nested() {
fxt.Wtr()
.Doc_nde_bgn()
.Ary_bgn("a0")
.Ary_itm_obj(KeyVal_.Ary
( KeyVal_.new_("k1", "v1")
, KeyVal_.new_("k2", "v2")
))
.Ary_end()
.Doc_nde_end();
fxt.Test
( "{ 'a0':"
, " ["
, " { 'k1':'v1'"
, " , 'k2':'v2'"
, " }"
, " ]"
, "}"
);
}
}
class Json_wtr_fxt {
private final Json_wtr wtr = new Json_wtr().Opt_quote_byte_(Byte_ascii.Apos);
public void Clear() {wtr.Clear();}
public Json_wtr Wtr() {return wtr;}
public void Test(String... expd) {
Tfds.Eq_ary_str
( String_.Ary_add(expd, String_.Ary("")) // json_wtr always ends with "}\n"; rather than add "\n" to each test, just add it here
, String_.SplitLines_nl(String_.new_u8(wtr.To_bry_and_clear()))
);
}
}

View File

@@ -0,0 +1,21 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_ctx {
public byte[] Src() {return src;} public Php_ctx Src_(byte[] v) {this.src = v; return this;} private byte[] src;
}

View File

@@ -0,0 +1,262 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
/*
NOTE: naive implementation of PHP evaluator. intended only for parsing Messages**.php files in MediaWiki. Specifically, it assumes the following:
- all lines are assignment lines: EX: $a = b;
- only the assignment operator is allowed (=); EX: $a = 5 + 7; fails b/c of + operator;
- no functions are supported: EX: strlen('a') fails
*/
public class Php_evaluator implements Php_tkn_wkr {
byte mode = Mode_key_bgn, next_tid = 0, next_mode = 0;
Php_line_assign cur_line; Php_itm_ary cur_ary; Php_key cur_kv_key;
List_adp frame_stack = List_adp_.new_();
public Php_evaluator(Gfo_msg_log msg_log) {this.msg_log = msg_log;} Gfo_msg_log msg_log;
public void Init(Php_ctx ctx) {src = ctx.Src(); frame_stack.Clear();} private byte[] src;
public List_adp List() {return lines;} List_adp lines = List_adp_.new_();
public Gfo_msg_log Msg_log() {return msg_log;}
public void Clear() {
lines.Clear(); msg_log.Clear();
cur_line = null;
cur_ary = null;
cur_kv_key = null;
mode = Mode_key_bgn;
next_tid = next_mode = 0;
}
public void Process(Php_tkn tkn) {
byte tkn_tid = tkn.Tkn_tid();
switch (tkn_tid) {
case Php_tkn_.Tid_declaration: case Php_tkn_.Tid_comment: case Php_tkn_.Tid_ws: // always discard, regardless of mode
return;
}
switch (mode) {
case Mode_expect: // handles sequences like "array("
if (tkn_tid == next_tid)
mode = next_mode;
else {
Msg_many(src, tkn.Src_bgn(), tkn.Src_end(), Expecting_itm_failed, Php_tkn_.Xto_str(next_tid), Php_tkn_.Xto_str(tkn_tid));
Fail();
}
break;
case Mode_suspend:
if (tkn_tid == Php_tkn_.Tid_semic) mode = Mode_key_bgn;
break;
case Mode_key_bgn:
if (tkn_tid == Php_tkn_.Tid_var) {
cur_ary = null;
cur_line = new Php_line_assign();
lines.Add(cur_line);
Php_tkn_var var_tkn = (Php_tkn_var)tkn;
cur_line.Key_(new Php_itm_var(var_tkn.Var_name(src)));
mode = Mode_key_end;
}
else {
Msg_many(src, tkn.Src_bgn(), tkn.Src_end(), Expecting_itm_failed, Php_tkn_.Xto_str(Php_tkn_.Tid_var), Php_tkn_.Xto_str(tkn_tid));
Fail();
}
break;
case Mode_key_end:
switch (tkn_tid) {
case Php_tkn_.Tid_eq: mode = Mode_val; break;
case Php_tkn_.Tid_brack_bgn: mode = Mode_brack_itm; break;
case Php_tkn_.Tid_brack_end: Expect(Php_tkn_.Tid_eq, Mode_val); break;
default: {
Msg_many(src, tkn.Src_bgn(), tkn.Src_end(), Expecting_itm_failed, Php_tkn_.Xto_str(Php_tkn_.Tid_var), Php_tkn_.Xto_str(tkn_tid));
Fail();
break;
}
}
break;
case Mode_brack_itm:
switch (tkn_tid) {
case Php_tkn_.Tid_quote:
Php_tkn_quote tkn_quote = (Php_tkn_quote)tkn;
Php_itm_quote key_sub = new Php_itm_quote(tkn_quote.Quote_text(src));
cur_line.Key_subs_(new Php_key[] {key_sub});
mode = Mode_key_end;
break;
default: {
Msg_many(src, tkn.Src_bgn(), tkn.Src_end(), Expecting_itm_failed, Php_tkn_.Xto_str(Php_tkn_.Tid_var), Php_tkn_.Xto_str(tkn_tid));
Fail();
break;
}
}
break;
case Mode_val:
Php_itm line_val = null;
switch (tkn_tid) {
case Php_tkn_.Tid_null: Expect(Php_tkn_.Tid_semic, Mode_key_bgn); line_val = Php_itm_null._; break;
case Php_tkn_.Tid_false: Expect(Php_tkn_.Tid_semic, Mode_key_bgn); line_val = Php_itm_bool_false._; break;
case Php_tkn_.Tid_true: Expect(Php_tkn_.Tid_semic, Mode_key_bgn); line_val = Php_itm_bool_true._; break;
case Php_tkn_.Tid_quote:
Expect(Php_tkn_.Tid_semic, Mode_key_bgn);
Php_tkn_quote tkn_quote = (Php_tkn_quote)tkn;
line_val = new Php_itm_quote(tkn_quote.Quote_text(src));
break;
case Php_tkn_.Tid_ary:
Expect(Php_tkn_.Tid_paren_bgn, Mode_ary_subs);
Php_itm_ary ary = new Php_itm_ary();
if (cur_ary == null)
line_val = ary;
else {
cur_ary.Subs_add(ary);
frame_stack.Add(new Php_scanner_frame(cur_ary));
cur_kv_key = null;
}
this.cur_ary = ary;
break;
case Php_tkn_.Tid_txt:
case Php_tkn_.Tid_var:
break;
case Php_tkn_.Tid_eq:
case Php_tkn_.Tid_eq_kv:
case Php_tkn_.Tid_semic:
case Php_tkn_.Tid_comma:
case Php_tkn_.Tid_paren_bgn:
case Php_tkn_.Tid_paren_end:
case Php_tkn_.Tid_num:
break;
}
cur_line.Val_(line_val);
break;
case Mode_ary_subs:
switch (tkn_tid) {
case Php_tkn_.Tid_null: Ary_add_itm(Php_itm_null._); break;
case Php_tkn_.Tid_false: Ary_add_itm(Php_itm_bool_false._); break;
case Php_tkn_.Tid_true: Ary_add_itm(Php_itm_bool_true._); break;
case Php_tkn_.Tid_quote:
Php_tkn_quote tkn_quote = (Php_tkn_quote)tkn;
Ary_add_itm(new Php_itm_quote(tkn_quote.Quote_text(src)));
break;
case Php_tkn_.Tid_num:
Php_tkn_num tkn_num = (Php_tkn_num)tkn;
Ary_add_itm(new Php_itm_int(tkn_num.Num_val_int(src)));
break;
case Php_tkn_.Tid_var:
Php_tkn_var tkn_var = (Php_tkn_var)tkn;
Ary_add_itm(new Php_itm_var(Bry_.Mid(src, tkn_var.Src_bgn(), tkn_var.Src_end())));
break;
case Php_tkn_.Tid_txt:
Php_tkn_txt tkn_txt = (Php_tkn_txt)tkn;
Ary_add_itm(new Php_itm_var(Bry_.Mid(src, tkn_txt.Src_bgn(), tkn_txt.Src_end())));
break;
case Php_tkn_.Tid_ary:
Expect(Php_tkn_.Tid_paren_bgn, Mode_ary_subs);
Php_itm_ary ary = new Php_itm_ary();
if (cur_ary == null)
line_val = ary;
else {
frame_stack.Add(new Php_scanner_frame(cur_ary));
if (cur_kv_key == null)
cur_ary.Subs_add(ary);
else {
Php_itm_kv ary_itm = new Php_itm_kv().Key_(cur_kv_key).Val_(ary);
cur_ary.Subs_add(ary_itm);
cur_kv_key = null;
}
}
this.cur_ary = ary;
break;
case Php_tkn_.Tid_paren_end:
mode = Mode_ary_term;
if (frame_stack.Count() == 0)
cur_ary = null;
else {
Php_scanner_frame frame = (Php_scanner_frame)List_adp_.Pop(frame_stack);
cur_ary = frame.Ary();
frame.Rls();
}
break;
case Php_tkn_.Tid_semic: // NOTE: will occur in following construct array(array());
mode = Mode_key_bgn;
break;
case Php_tkn_.Tid_eq:
case Php_tkn_.Tid_eq_kv:
case Php_tkn_.Tid_comma:
case Php_tkn_.Tid_paren_bgn:
break;
}
break;
case Mode_ary_dlm:
switch (tkn_tid) {
case Php_tkn_.Tid_comma:
mode = Mode_ary_subs;
break;
case Php_tkn_.Tid_paren_end:
mode = Mode_ary_term;
if (frame_stack.Count() == 0)
cur_ary = null;
else {
Php_scanner_frame frame = (Php_scanner_frame)List_adp_.Pop(frame_stack);
cur_ary = frame.Ary();
frame.Rls();
}
break;
case Php_tkn_.Tid_eq_kv:
Php_itm_sub tmp_key = cur_ary.Subs_pop();
cur_kv_key = (Php_key)tmp_key;
mode = Mode_ary_subs;
break;
}
break;
case Mode_ary_term:
switch (tkn_tid) {
case Php_tkn_.Tid_comma:
case Php_tkn_.Tid_paren_end: // NOTE: paren_end occurs in multiple nests; EX: array(array())
mode = Mode_ary_subs;
break;
case Php_tkn_.Tid_semic:
mode = Mode_key_bgn;
break;
}
break;
}
}
private void Fail() {mode = Mode_suspend;}
private void Ary_add_itm(Php_itm val) {
mode = Mode_ary_dlm;
if (cur_kv_key == null)
cur_ary.Subs_add((Php_itm_sub)val);
else {
Php_itm_kv ary_itm = new Php_itm_kv().Key_(cur_kv_key).Val_(val);
cur_ary.Subs_add(ary_itm);
cur_kv_key = null;
}
}
private void Expect(byte next_tid, byte next_mode) {
mode = Mode_expect;
this.next_tid = next_tid;
this.next_mode = next_mode;
}
public void Msg_many(byte[] src, int bgn, int end, Gfo_msg_itm itm, Object... args) {
msg_log.Add_itm_many(itm, src, bgn, end, args);
}
public static final Gfo_msg_itm Expecting_itm_failed = Gfo_msg_itm_.new_warn_(Php_parser.Log_nde, "expecting_itm_failed", "expecting_itm ~{0} but got ~{1} instead");
private static final byte Mode_key_bgn = 1, Mode_key_end = 2, Mode_expect = 3, Mode_suspend = 4, Mode_val = 5, Mode_ary_subs = 6, Mode_ary_dlm = 7, Mode_ary_term = 8, Mode_brack_itm = 9;
}
class Php_scanner_frame {
public Php_scanner_frame(Php_itm_ary ary) {this.ary = ary;}
public Php_itm_ary Ary() {return ary;} Php_itm_ary ary;
public void Rls() {ary = null;}
}
class Php_parser_interrupt {
public static final Php_parser_interrupt Char = new Php_parser_interrupt();
}

View File

@@ -0,0 +1,44 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public interface Php_itm {
byte Itm_tid();
byte[] Val_obj_bry();
}
class Php_itm_null implements Php_itm, Php_itm_sub {
public byte Itm_tid() {return Php_itm_.Tid_null;}
public byte[] Val_obj_bry() {return null;}
public static final Php_itm_null _ = new Php_itm_null(); Php_itm_null() {}
}
class Php_itm_bool_true implements Php_itm, Php_itm_sub {
public byte Itm_tid() {return Php_itm_.Tid_bool_true;}
public byte[] Val_obj_bry() {return Bry_true;}
public static final Php_itm_bool_true _ = new Php_itm_bool_true(); Php_itm_bool_true() {}
private static final byte[] Bry_true = Bry_.new_a7("true");
}
class Php_itm_bool_false implements Php_itm, Php_itm_sub {
public byte Itm_tid() {return Php_itm_.Tid_bool_false;}
public byte[] Val_obj_bry() {return Bry_true;}
public static final Php_itm_bool_false _ = new Php_itm_bool_false(); Php_itm_bool_false() {}
private static final byte[] Bry_true = Bry_.new_a7("false");
}
class Php_itm_var implements Php_itm, Php_itm_sub, Php_key {
public Php_itm_var(byte[] v) {this.val_obj_bry = v;}
public byte Itm_tid() {return Php_itm_.Tid_var;}
public byte[] Val_obj_bry() {return val_obj_bry;} private byte[] val_obj_bry;
}

View File

@@ -0,0 +1,44 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_itm_ {
public static final byte Tid_null = 0, Tid_bool_false = 1, Tid_bool_true = 2, Tid_int = 3, Tid_quote = 4, Tid_ary = 5, Tid_kv = 6, Tid_var = 7;
public static int Parse_int_or(Php_itm itm, int or) {
int rv = -1;
switch (itm.Itm_tid()) {
case Php_itm_.Tid_int:
rv = ((Php_itm_int)itm).Val_obj_int();
return rv;
case Php_itm_.Tid_quote:
byte[] bry = ((Php_itm_quote)itm).Val_obj_bry();
rv = Bry_.To_int_or(bry, -1);
return (rv == -1) ? or : rv;
default:
return or;
}
}
public static byte[] Parse_bry(Php_itm itm) {
switch (itm.Itm_tid()) {
case Php_itm_.Tid_kv:
case Php_itm_.Tid_ary:
throw Err_.new_unhandled(itm.Itm_tid());
default:
return itm.Val_obj_bry();
}
}
}

View File

@@ -0,0 +1,37 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_itm_ary implements Php_itm, Php_itm_sub {
public Php_itm_ary() {}
public byte Itm_tid() {return Php_itm_.Tid_ary;}
public byte[] Val_obj_bry() {return null;}
public int Subs_len() {return subs_len;} private int subs_len;
public Php_itm_sub Subs_get(int i) {return ary[i];}
public Php_itm_sub Subs_pop() {return ary[--subs_len];}
public void Subs_add(Php_itm_sub v) {
int new_len = subs_len + 1;
if (new_len > subs_max) { // ary too small >>> expand
subs_max = new_len * 2;
Php_itm_sub[] new_ary = new Php_itm_sub[subs_max];
Array_.Copy_to(ary, 0, new_ary, 0, subs_len);
ary = new_ary;
}
ary[subs_len] = v;
subs_len = new_len;
} Php_itm_sub[] ary = Php_itm_sub_.Ary_empty; int subs_max;
}

View File

@@ -0,0 +1,24 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_itm_int implements Php_itm, Php_itm_sub, Php_key {
public Php_itm_int(int v) {this.val_obj_int = v;}
public byte Itm_tid() {return Php_itm_.Tid_int;}
public byte[] Val_obj_bry() {return Bry_.new_by_int(val_obj_int);}
public int Val_obj_int() {return val_obj_int;} private int val_obj_int;
}

View File

@@ -0,0 +1,24 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_itm_kv implements Php_itm, Php_itm_sub {
public byte Itm_tid() {return Php_itm_.Tid_kv;}
public byte[] Val_obj_bry() {return null;}
public Php_key Key() {return key;} public Php_itm_kv Key_(Php_key v) {this.key = v; return this;} Php_key key;
public Php_itm Val() {return val;} public Php_itm_kv Val_(Php_itm v) {this.val = v; return this;} Php_itm val;
}

View File

@@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_itm_quote implements Php_itm, Php_itm_sub, Php_key {
public Php_itm_quote(byte[] v) {this.val_obj_bry = v;} // NOTE: use Php_text_itm_parser to parse \" and related
public byte Itm_tid() {return Php_itm_.Tid_quote;}
public byte[] Val_obj_bry() {return val_obj_bry;} private byte[] val_obj_bry;
}

View File

@@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public interface Php_itm_sub extends Php_itm {
}
class Php_itm_sub_ {
public static final Php_itm_sub[] Ary_empty = new Php_itm_sub[0];
}

View File

@@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public interface Php_key extends Php_itm {
}
class Php_key_ {
public static final Php_key[] Ary_empty = new Php_key[0];
}

View File

@@ -0,0 +1,19 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public interface Php_line {}

View File

@@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public class Php_line_assign implements Php_line {
public Php_key Key() {return key;} public Php_line_assign Key_(Php_key v) {this.key = v; return this;} Php_key key;
public Php_key[] Key_subs() {return key_subs;} public Php_line_assign Key_subs_(Php_key[] v) {this.key_subs = v; return this;} Php_key[] key_subs = Php_key_.Ary_empty;
public Php_itm Val() {return val;} public Php_line_assign Val_(Php_itm v) {this.val = v; return this;} Php_itm val;
}

View File

@@ -0,0 +1,283 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.btries.*;
interface Php_lxr {
int Lxr_tid();
void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts);
void Lxr_bgn(byte[] src, int src_len, Php_tkn_wkr tkn_wkr, Php_tkn_factory tkn_factory);
int Lxr_make(Php_ctx ctx, int bgn, int cur);
}
class Php_lxr_ {
public static final byte Tid_declaration = 1, Tid_ws = 2, Tid_comment = 3, Tid_var = 4, Tid_sym = 5, Tid_keyword = 6, Tid_num = 7, Tid_quote = 8;
}
abstract class Php_lxr_base implements Php_lxr {
protected byte[] src; protected int src_len; protected Php_tkn_wkr tkn_wkr; protected Php_tkn_factory tkn_factory;
public abstract int Lxr_tid();
public abstract void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts);
public void Lxr_bgn(byte[] src, int src_len, Php_tkn_wkr tkn_wkr, Php_tkn_factory tkn_factory) {this.src = src; this.src_len = src_len; this.tkn_wkr = tkn_wkr; this.tkn_factory = tkn_factory;}
public abstract int Lxr_make(Php_ctx ctx, int bgn, int cur);
}
class Php_lxr_declaration extends Php_lxr_base {
@Override public int Lxr_tid() {return Php_lxr_.Tid_declaration;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(Bry_declaration, this);
parser_interrupts[Byte_ascii.Lt] = Php_parser_interrupt.Char;
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
boolean loop = true;
boolean ws_found = false;
while (loop) {
if (cur == src_len) break;
byte b = src[cur];
switch (b) {
case Byte_ascii.Nl: case Byte_ascii.Cr:
ws_found = true;
++cur;
break;
default:
if (ws_found) loop = false;
else return Php_parser.NotFound;
break;
}
}
tkn_wkr.Process(tkn_factory.Declaration(bgn, cur));
return cur;
}
private static final byte[] Bry_declaration = Bry_.new_a7("<?php");
}
class Php_lxr_ws extends Php_lxr_base {
public Php_lxr_ws(byte ws_tid) {
this.ws_tid = ws_tid;
switch (ws_tid) {
case Php_tkn_ws.Tid_space: ws_bry = Bry_ws_space; break;
case Php_tkn_ws.Tid_nl: ws_bry = Bry_ws_nl; break;
case Php_tkn_ws.Tid_tab: ws_bry = Bry_ws_tab; break;
case Php_tkn_ws.Tid_cr: ws_bry = Bry_ws_cr; break;
}
}
public byte Ws_tid() {return ws_tid;} private byte ws_tid;
public byte[] Ws_bry() {return ws_bry;} private byte[] ws_bry;
@Override public int Lxr_tid() {return Php_lxr_.Tid_ws;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(ws_bry, this);
parser_interrupts[ws_bry[0]] = Php_parser_interrupt.Char;
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
boolean loop = true;
while (loop) {
if (cur == src_len) break;
byte b = src[cur];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: case Byte_ascii.Cr:
++cur;
break;
default:
loop = false;
break;
}
}
tkn_wkr.Process(tkn_factory.Ws(bgn, cur, ws_tid));
return cur;
}
public static final byte[] Bry_ws_space = Bry_.new_a7(" "), Bry_ws_nl = Bry_.new_a7("\n"), Bry_ws_tab = Bry_.new_a7("\t"), Bry_ws_cr = Bry_.new_a7("\r");
}
class Php_lxr_comment extends Php_lxr_base {
public Php_lxr_comment(byte comment_tid) {
this.comment_tid = comment_tid;
switch (comment_tid) {
case Php_tkn_comment.Tid_mult: comment_bgn = Bry_bgn_mult; comment_end = Bry_end_mult; break;
case Php_tkn_comment.Tid_slash: comment_bgn = Bry_bgn_slash; comment_end = Bry_end_nl; break;
case Php_tkn_comment.Tid_hash: comment_bgn = Bry_bgn_hash; comment_end = Bry_end_nl; break;
}
}
@Override public int Lxr_tid() {return Php_lxr_.Tid_comment;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(comment_bgn, this);
parser_interrupts[Byte_ascii.Slash] = Php_parser_interrupt.Char;
parser_interrupts[Byte_ascii.Hash] = Php_parser_interrupt.Char;
}
public byte Comment_tid() {return comment_tid;} private byte comment_tid;
public byte[] Comment_bgn() {return comment_bgn;} private byte[] comment_bgn;
public byte[] Comment_end() {return comment_end;} private byte[] comment_end;
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
int end = Bry_find_.Find_fwd(src, comment_end, bgn);
if (end == Bry_.NotFound) {
tkn_wkr.Msg_many(src, bgn, cur, Php_lxr_comment.Dangling_comment, comment_tid, comment_end);
cur = src_len; // NOTE: terminating sequence not found; assume rest of src is comment
}
else
cur = end + comment_end.length;
tkn_wkr.Process(tkn_factory.Comment(bgn, cur, comment_tid));
return cur;
}
public static final Gfo_msg_itm Dangling_comment = Gfo_msg_itm_.new_warn_(Php_parser.Log_nde, "dangling_comment", "dangling_comment");
public static final byte[] Bry_bgn_mult = Bry_.new_a7("/*"), Bry_bgn_slash = Bry_.new_a7("//"), Bry_bgn_hash = Bry_.new_a7("#")
, Bry_end_mult = Bry_.new_a7("*/"), Bry_end_nl = Bry_.new_a7("\n");
}
class Php_lxr_var extends Php_lxr_base {
@Override public int Lxr_tid() {return Php_lxr_.Tid_var;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(Bry_var, this);
parser_interrupts[Byte_ascii.Dollar] = Php_parser_interrupt.Char;
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
boolean loop = true;
while (loop) {
if (cur == src_len) break;
byte b = src[cur];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Underline:
++cur;
break;
default:
loop = false;
break;
}
}
tkn_wkr.Process(tkn_factory.Var(bgn, cur));
return cur;
}
private static final byte[] Bry_var = Bry_.new_a7("$");
}
class Php_lxr_sym extends Php_lxr_base {
public Php_lxr_sym(String hook_str, byte tkn_tid) {this.hook = Bry_.new_a7(hook_str); this.tkn_tid = tkn_tid;} private byte[] hook; byte tkn_tid;
@Override public int Lxr_tid() {return Php_lxr_.Tid_sym;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(hook, this);
parser_interrupts[hook[0]] = Php_parser_interrupt.Char;
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
tkn_wkr.Process(tkn_factory.Generic(bgn, cur, tkn_tid));
return cur;
}
}
class Php_lxr_quote extends Php_lxr_base {
public Php_lxr_quote(byte quote_tid) {
this.quote_tid = quote_tid;
switch (quote_tid) {
case Byte_ascii.Apos: quote_bry = Quote_bry_single; break;
case Byte_ascii.Quote: quote_bry = Quote_bry_double; break;
}
}
@Override public int Lxr_tid() {return Php_lxr_.Tid_quote;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(quote_bry, this);
parser_interrupts[quote_tid] = Php_parser_interrupt.Char;
}
public byte Quote_tid() {return quote_tid;} private byte quote_tid;
public byte[] Quote_bry() {return quote_bry;} private byte[] quote_bry;
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
int end = -1;
while (true) {
end = Bry_find_.Find_fwd(src, quote_bry, cur);
if (end == Bry_.NotFound) {
tkn_wkr.Msg_many(src, bgn, cur, Php_lxr_quote.Dangling_quote, quote_tid, quote_bry);
cur = src_len; // NOTE: terminating sequence not found; assume rest of src is comment
break;
}
else {
boolean end_quote = true;
if (src[end - 1] == Byte_ascii.Backslash) { // \' encountered;
int backslash_count = 1;
for (int i = end - 2; i > -1; i--) { // count preceding backslashes
if (src[i] == Byte_ascii.Backslash)
++backslash_count;
else
break;
}
if (backslash_count % 2 == 1) { // odd backslashes; this means that ' is escaped; EX: \' and \\\'; note that even backslashes means not escaped; EX: \\'
end_quote = false;
cur = end + 1;
}
}
if (end_quote) {
cur = end + quote_bry.length;
break;
}
}
}
tkn_wkr.Process(tkn_factory.Quote(bgn, cur, quote_tid));
return cur;
}
public static final Gfo_msg_itm Dangling_quote = Gfo_msg_itm_.new_warn_(Php_parser.Log_nde, "dangling_quote", "dangling_quote");
public static final byte[] Quote_bry_single = Bry_.new_a7("'"), Quote_bry_double = Bry_.new_a7("\"");
}
class Php_lxr_keyword extends Php_lxr_base {
public Php_lxr_keyword(String hook_str, byte tkn_tid) {this.hook = Bry_.new_a7(hook_str); this.tkn_tid = tkn_tid;} private byte[] hook; byte tkn_tid;
@Override public int Lxr_tid() {return Php_lxr_.Tid_keyword;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {trie.Add_obj(hook, this);}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
if (cur < src_len) {
byte next_byte = src[cur];
switch (next_byte) { // valid characters for end of word; EX: 'null '; 'null='; etc..
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
case Byte_ascii.Hash: case Byte_ascii.Slash:
case Byte_ascii.Quote: case Byte_ascii.Apos:
case Byte_ascii.Bang: case Byte_ascii.Dollar: case Byte_ascii.Percent: case Byte_ascii.Amp:
case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star: case Byte_ascii.Plus:
case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Semic:
case Byte_ascii.Lt: case Byte_ascii.Eq: case Byte_ascii.Gt: case Byte_ascii.Question: case Byte_ascii.At:
case Byte_ascii.Brack_bgn: case Byte_ascii.Backslash: case Byte_ascii.Brack_end: case Byte_ascii.Pow: case Byte_ascii.Tick:
case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde:
break;
default: // num,ltr or extended utf8 character sequence; treat keyword as false match; EX: 'nulla'; 'null0'
return Php_parser.NotFound;
}
}
tkn_wkr.Process(tkn_factory.Generic(bgn, cur, tkn_tid));
return cur;
}
}
class Php_lxr_num extends Php_lxr_base {
@Override public int Lxr_tid() {return Php_lxr_.Tid_keyword;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
for (int i = 0; i < 10; i++)
trie.Add_obj(new byte[] {(byte)(i + Byte_ascii.Num_0)}, this);
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
boolean loop = true;
while (loop) {
if (cur == src_len) break;
byte b = src[cur];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
++cur;
break;
default:
loop = false;
break;
}
}
tkn_wkr.Process(tkn_factory.Num(bgn, cur));
return cur;
}
}

View File

@@ -0,0 +1,121 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.btries.*;
public class Php_parser {
Php_lxr[] lxrs; int lxrs_len;
int txt_bgn; Php_tkn_txt txt_tkn;
private Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci:PHP tkns are ASCII
byte[] src; int src_len; Php_tkn_wkr tkn_wkr; Php_tkn_factory tkn_factory = new Php_tkn_factory(); Php_ctx ctx = new Php_ctx();
Php_parser_interrupt[] parser_interrupts = new Php_parser_interrupt[256];
public Php_parser() {
List_adp list = List_adp_.new_();
Init_lxr(list, new Php_lxr_declaration());
Init_lxr(list, new Php_lxr_ws(Php_tkn_ws.Tid_space));
Init_lxr(list, new Php_lxr_ws(Php_tkn_ws.Tid_nl));
Init_lxr(list, new Php_lxr_ws(Php_tkn_ws.Tid_tab));
Init_lxr(list, new Php_lxr_ws(Php_tkn_ws.Tid_cr));
Init_lxr(list, new Php_lxr_comment(Php_tkn_comment.Tid_mult));
Init_lxr(list, new Php_lxr_comment(Php_tkn_comment.Tid_slash));
Init_lxr(list, new Php_lxr_comment(Php_tkn_comment.Tid_hash));
Init_lxr(list, new Php_lxr_var());
Init_lxr(list, new Php_lxr_sym(";", Php_tkn_.Tid_semic));
Init_lxr(list, new Php_lxr_sym("=", Php_tkn_.Tid_eq));
Init_lxr(list, new Php_lxr_sym("=>", Php_tkn_.Tid_eq_kv));
Init_lxr(list, new Php_lxr_sym(",", Php_tkn_.Tid_comma));
Init_lxr(list, new Php_lxr_sym("(", Php_tkn_.Tid_paren_bgn));
Init_lxr(list, new Php_lxr_sym(")", Php_tkn_.Tid_paren_end));
Init_lxr(list, new Php_lxr_sym("[", Php_tkn_.Tid_brack_bgn));
Init_lxr(list, new Php_lxr_sym("]", Php_tkn_.Tid_brack_end));
Init_lxr(list, new Php_lxr_keyword("null", Php_tkn_.Tid_null));
Init_lxr(list, new Php_lxr_keyword("false", Php_tkn_.Tid_false));
Init_lxr(list, new Php_lxr_keyword("true", Php_tkn_.Tid_true));
Init_lxr(list, new Php_lxr_keyword("array", Php_tkn_.Tid_ary));
Init_lxr(list, new Php_lxr_num());
Init_lxr(list, new Php_lxr_quote(Byte_ascii.Apos));
Init_lxr(list, new Php_lxr_quote(Byte_ascii.Quote));
lxrs = (Php_lxr[])list.To_ary(Php_lxr.class);
lxrs_len = list.Count();
}
private void Init_lxr(List_adp list, Php_lxr lxr) {
lxr.Lxr_ini(trie, parser_interrupts);
list.Add(lxr);
}
public void Parse_tkns(String src, Php_tkn_wkr tkn_wkr) {Parse_tkns(Bry_.new_u8(src), tkn_wkr);}
public void Parse_tkns(byte[] src, Php_tkn_wkr tkn_wkr) {
this.src = src; this.src_len = src.length; this.tkn_wkr = tkn_wkr;
ctx.Src_(src);
tkn_wkr.Init(ctx);
if (src_len == 0) return;
for (int i = 0; i < lxrs_len; i++)
lxrs[i].Lxr_bgn(src, src_len, tkn_wkr, tkn_factory);
int pos = 0;
byte b = src[pos];
txt_tkn = null; txt_bgn = 0;
boolean loop_raw = true, loop_txt = true;
while (loop_raw) {
Object o = trie.Match_bgn_w_byte(b, src, pos, src_len);
if (o == null) { // char does not hook into a lxr
loop_txt = true;
while (loop_txt) { // keep looping until end of String or parser_interrupt
++pos;
if (pos == src_len) {loop_raw = false; break;}
b = src[pos];
if (parser_interrupts[b & 0xFF] == Php_parser_interrupt.Char) {
Make_txt(txt_bgn, pos);
break;
}
}
if (!loop_raw) break;
continue; // continue b/c b is set to interrupt char, and should be matched against trie
}
else { // char hooks into lxr
if (txt_bgn != pos) // txt_bgn is set; make text tkn
Make_txt(txt_bgn, pos);
Php_lxr lxr = (Php_lxr)o;
int match_pos = trie.Match_pos();
int make_pos = lxr.Lxr_make(ctx, pos, match_pos);
if (make_pos == Php_parser.NotFound) {
Make_txt(txt_bgn, pos);
++pos;
}
else {
txt_tkn = null;
txt_bgn = pos = make_pos;
}
}
if (pos == src_len) break;
b = src[pos];
}
if (txt_bgn != pos)
Make_txt(txt_bgn, pos);
}
int Make_txt(int bgn, int end) {
if (txt_tkn == null) {
txt_tkn = tkn_factory.Txt(bgn, end);
tkn_wkr.Process(txt_tkn);
}
else
txt_tkn.Src_end_(end);
return end;
}
public static final int NotFound = -1;
public static final Gfo_msg_grp Log_nde = Gfo_msg_grp_.new_(Gfo_msg_grp_.Root_gplx, "php_parser");
}

View File

@@ -0,0 +1,399 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import org.junit.*; import gplx.core.tests.*;
public class Php_parser_tst {
Php_parser_fxt fxt = new Php_parser_fxt();
@Before public void init() {fxt.Clear();}
@Test public void Text() {
fxt.tst_tkns("text", fxt.tkn_txt(0, 4));
}
@Test public void Declaration_pass() {
fxt.tst_tkns("<?php", fxt.tkn_declaration());
}
@Test public void Declaration_fail() {
fxt.tst_tkns("<?phpx", fxt.tkn_txt(0, 6));
}
@Test public void Ws_basic() {
fxt.tst_tkns(" ", fxt.tkn_ws(0, 1));
}
@Test public void Ws_mix() {
fxt.tst_tkns(" a\n", fxt.tkn_ws(0, 1), fxt.tkn_txt(1, 2), fxt.tkn_ws(2, 3));
}
@Test public void Comment_mult() {
fxt.tst_tkns("/*a*/", fxt.tkn_comment_mult(0, 5));
}
@Test public void Comment_slash() {
fxt.tst_tkns("//a\n", fxt.tkn_comment_slash(0, 4));
}
@Test public void Comment_hash() {
fxt.tst_tkns("#a\n", fxt.tkn_comment_hash(0, 3));
}
@Test public void Comment_mult_fail() {
fxt.Msg(Php_lxr_comment.Dangling_comment, 0, 2).tst_tkns("/*a", fxt.tkn_comment_mult(0, 3));
}
@Test public void Var() {
fxt.tst_tkns("$abc", fxt.tkn_var(0, 4, "abc"));
}
@Test public void Sym() {
fxt.tst_tkns(";==>,()", fxt.tkn_generic(0, 1, Php_tkn_.Tid_semic), fxt.tkn_generic(1, 2, Php_tkn_.Tid_eq), fxt.tkn_generic(2, 4, Php_tkn_.Tid_eq_kv), fxt.tkn_generic(4, 5, Php_tkn_.Tid_comma), fxt.tkn_generic(5, 6, Php_tkn_.Tid_paren_bgn), fxt.tkn_generic(6, 7, Php_tkn_.Tid_paren_end));
}
@Test public void Keyword() {
fxt.tst_tkns("null=nulla", fxt.tkn_generic(0, 4, Php_tkn_.Tid_null), fxt.tkn_generic(4, 5, Php_tkn_.Tid_eq), fxt.tkn_txt(5, 10));
}
@Test public void Num() {
fxt.tst_tkns("0=123", fxt.tkn_num(0, 1, 0), fxt.tkn_generic(1, 2, Php_tkn_.Tid_eq), fxt.tkn_num(2, 5, 123));
}
@Test public void Quote_apos() {
fxt.tst_tkns("'a\"b'", fxt.tkn_quote_apos(0, 5));
}
@Test public void Quote_quote() {
fxt.tst_tkns("\"a'b\"", fxt.tkn_quote_quote(0, 5));
}
@Test public void Quote_escape() {
fxt.tst_tkns("'a\\'b'", fxt.tkn_quote_apos(0, 6));
}
@Test public void Brack() {
fxt.tst_tkns("['a']", fxt.tkn_generic(0, 1, Php_tkn_.Tid_brack_bgn), fxt.tkn_quote_apos(1, 4), fxt.tkn_generic(4, 5, Php_tkn_.Tid_brack_end));
}
@Test public void Line_assign_false() {
fxt.tst_lines("$a = false;", fxt.line_assign("a", fxt.itm_bool_false()));
}
@Test public void Line_assign_quote_charcode() {
fxt.tst_lines("$a = 'bc';", fxt.line_assign("a", fxt.itm_quote("bc")));
}
@Test public void Line_assign_mult() {
fxt.tst_lines("$a = 'b';\n$c='d';", fxt.line_assign("a", fxt.itm_quote("b")), fxt.line_assign("c", fxt.itm_quote("d")));
}
@Test public void Line_ary_flat() {
fxt.tst_lines("$a = array('b', 'c', 'd');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b"), fxt.itm_quote("c"), fxt.itm_quote("d"))));
}
@Test public void Line_ary_flat_escape() { // PURPOSE.fix: \\' was being interpreted incorrectly; \\ should escape \, but somehow \' was being escaped
fxt.tst_lines("$a = array('b\\\\', 'c');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b\\\\"), fxt.itm_quote("c"))));
}
@Test public void Line_ary_flat_escape2() { // PURPOSE.fix: \\' was being interpreted incorrectly; \\ should escape \, but somehow \' was being escaped
fxt.tst_lines("$a = array('b\\\\\\'c', 'd');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b\\\\\\'c"), fxt.itm_quote("d"))));
}
@Test public void Line_ary_kv() {
fxt.tst_lines("$a = array(k0 => 'v0', k1 => 'v1', k2 => 'v2');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_kv_quote("k0", "v0"), fxt.itm_kv_quote("k1", "v1"), fxt.itm_kv_quote("k2", "v2"))));
}
@Test public void Line_ary_kv_num() {
fxt.tst_lines("$a = array(k0 => 0, k1 => 1);", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_kv_int("k0", 0), fxt.itm_kv_int("k1", 1))));
}
@Test public void Line_ary_nest() {
fxt.tst_lines("$a = array('b', array('c', 'd'), 'e');", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_quote("b"), fxt.itm_ary().Subs_(fxt.itm_quote("c"), fxt.itm_quote("d")), fxt.itm_quote("e"))));
}
@Test public void Line_ary_nest_kv() {
fxt.tst_lines("$a = array('i00' => array('01', '02'), 'i10' => array('11', '12'), 'i20' => array('21', '22'));"
, fxt.line_assign
( "a"
, fxt.itm_ary().Subs_
( fxt.itm_kv_itm("i00", fxt.itm_ary().Subs_(fxt.itm_quote("01"), fxt.itm_quote("02")))
, fxt.itm_kv_itm("i10", fxt.itm_ary().Subs_(fxt.itm_quote("11"), fxt.itm_quote("12")))
, fxt.itm_kv_itm("i20", fxt.itm_ary().Subs_(fxt.itm_quote("21"), fxt.itm_quote("22")))
)));
}
@Test public void Line_ws() {
fxt.tst_lines("\r\n$a = false;", fxt.line_assign("a", fxt.itm_bool_false()));
}
@Test public void Empty_usr_array() {
fxt.tst_lines("$a = array();\n$b = array();"
, fxt.line_assign("a", fxt.itm_ary())
, fxt.line_assign("b", fxt.itm_ary())
);
}
@Test public void Line_ary_kv_txt() {
fxt.tst_lines("$a = array('k0' => a, 'k1' => b);", fxt.line_assign("a", fxt.itm_ary().Subs_(fxt.itm_kv_txt("k0", "a"), fxt.itm_kv_txt("k1", "b"))));
}
@Test public void Line_brack() {
fxt.tst_lines("$a['b'] = 'c';", fxt.line_assign_subs("a", String_.Ary("b"), fxt.itm_quote("c")));
}
}
class Php_parser_fxt {
Php_tkn_factory tkn_factory = new Php_tkn_factory();
Php_parser parser = new Php_parser();
Php_tkn_wkr_tkn tkn_wkr = new Php_tkn_wkr_tkn();
Php_evaluator line_wkr = new Php_evaluator(new Gfo_msg_log("test"));
Tst_mgr tst_mgr = new Tst_mgr();
Gfo_msg_log_chkr log_mgr_chkr = new Gfo_msg_log_chkr();
public void Clear() {log_mgr_chkr.Clear(); tkn_wkr.Clear(); line_wkr.Clear();}
public Php_tkn_chkr_base tkn_declaration() {return Php_tkn_declaration_chkr._;}
public Php_tkn_chkr_base tkn_txt(int bgn, int end) {return new Php_tkn_txt_chkr(bgn, end);}
public Php_tkn_chkr_base tkn_ws(int bgn, int end) {return new Php_tkn_ws_chkr(bgn, end);}
public Php_tkn_chkr_base tkn_generic(int bgn, int end, byte tid) {return new Php_tkn_generic_chkr(bgn, end, tid);}
public Php_tkn_comment_chkr tkn_comment_mult(int bgn, int end) {return new Php_tkn_comment_chkr(bgn, end).Comment_tid_(Php_tkn_comment.Tid_mult);}
public Php_tkn_comment_chkr tkn_comment_slash(int bgn, int end) {return new Php_tkn_comment_chkr(bgn, end).Comment_tid_(Php_tkn_comment.Tid_slash);}
public Php_tkn_comment_chkr tkn_comment_hash(int bgn, int end) {return new Php_tkn_comment_chkr(bgn, end).Comment_tid_(Php_tkn_comment.Tid_hash);}
public Php_tkn_quote_chkr tkn_quote_apos(int bgn, int end) {return new Php_tkn_quote_chkr(bgn, end).Quote_tid_(Byte_ascii.Apos);}
public Php_tkn_quote_chkr tkn_quote_quote(int bgn, int end) {return new Php_tkn_quote_chkr(bgn, end).Quote_tid_(Byte_ascii.Quote);}
public Php_parser_fxt Msg(Gfo_msg_itm itm, int bgn, int end) {
log_mgr_chkr.Add_itm(itm, bgn, end);
return this;
}
public Php_tkn_var_chkr tkn_var(int bgn, int end, String v) {return new Php_tkn_var_chkr(bgn, end).Var_name_(v);}
public Php_tkn_num_chkr tkn_num(int bgn, int end, int v) {return new Php_tkn_num_chkr(bgn, end).Num_val_int_(v);}
public Php_line_assign_chkr line_assign(String key, Php_itm_chkr_base val) {return new Php_line_assign_chkr().Key_(key).Val_(val);}
public Php_line_assign_chkr line_assign_subs(String key, String[] subs, Php_itm_chkr_base val) {return new Php_line_assign_chkr().Key_(key).Subs_(subs).Val_(val);}
public Php_itm_chkr_base itm_bool_true() {return new Php_itm_generic_chkr(Php_itm_.Tid_bool_true);}
public Php_itm_chkr_base itm_bool_false() {return new Php_itm_generic_chkr(Php_itm_.Tid_bool_false);}
public Php_itm_chkr_base itm_null() {return new Php_itm_generic_chkr(Php_itm_.Tid_null);}
public Php_itm_chkr_base itm_quote(String v) {return new Php_itm_quote_chkr().Val_obj_str_(v);}
public Php_itm_chkr_base itm_int(int v) {return new Php_itm_int_chkr().Val_obj_int_(v);}
public Php_itm_chkr_base itm_txt(String v) {return new Php_itm_txt_chkr().Val_obj_str_(v);}
public Php_itm_ary_chkr itm_ary() {return new Php_itm_ary_chkr();}
public Php_itm_kv_chkr itm_kv_quote(String k, String v) {return new Php_itm_kv_chkr().Key_(k).Val_(itm_quote(v));}
public Php_itm_kv_chkr itm_kv_txt(String k, String v) {return new Php_itm_kv_chkr().Key_(k).Val_(itm_txt(v));}
public Php_itm_kv_chkr itm_kv_int(String k, int v) {return new Php_itm_kv_chkr().Key_(k).Val_(itm_int(v));}
public Php_itm_kv_chkr itm_kv_itm(String k, Php_itm_chkr_base v) {return new Php_itm_kv_chkr().Key_(k).Val_(v);}
public void tst_tkns(String raw, Php_tkn_chkr_base... expd) {
byte[] raw_bry = Bry_.new_u8(raw);
parser.Parse_tkns(raw_bry, tkn_wkr);
Php_tkn[] actl = (Php_tkn[])tkn_wkr.List().To_ary(Php_tkn.class);
tst_mgr.Vars().Clear().Add("raw_bry", raw_bry);
tst_mgr.Tst_ary("", expd, actl);
log_mgr_chkr.tst(tst_mgr, tkn_wkr.Msg_log());
}
public void tst_lines(String raw, Php_line_assign_chkr... expd) {
byte[] raw_bry = Bry_.new_u8(raw);
parser.Parse_tkns(raw_bry, line_wkr);
Php_line[] actl = (Php_line[])line_wkr.List().To_ary(Php_line.class);
tst_mgr.Vars().Clear().Add("raw_bry", raw_bry);
tst_mgr.Tst_ary("", expd, actl);
log_mgr_chkr.tst(tst_mgr, line_wkr.Msg_log());
}
}
abstract class Php_tkn_chkr_base implements Tst_chkr {
public abstract byte Tkn_tid();
public abstract Class<?> TypeOf();
public int Src_bgn() {return src_bgn;} private int src_bgn = -1;
public int Src_end() {return src_end;} private int src_end = -1;
public void Src_rng_(int src_bgn, int src_end) {this.src_bgn = src_bgn; this.src_end = src_end;}
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
Php_tkn actl = (Php_tkn)actl_obj;
int rv = 0;
rv += mgr.Tst_val(false, path, "tkn_tid", this.Tkn_tid(), actl.Tkn_tid());
rv += mgr.Tst_val(src_bgn == -1, path, "src_bgn", src_bgn, actl.Src_bgn());
rv += mgr.Tst_val(src_end == -1, path, "src_end", src_end, actl.Src_end());
rv += Chk_tkn(mgr, path, actl);
return rv;
}
@gplx.Virtual public int Chk_tkn(Tst_mgr mgr, String path, Php_tkn actl_obj) {return 0;}
}
class Php_tkn_declaration_chkr extends Php_tkn_chkr_base {
@Override public Class<?> TypeOf() {return Php_tkn_declaration.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_declaration;}
public static final Php_tkn_declaration_chkr _ = new Php_tkn_declaration_chkr();
}
class Php_tkn_txt_chkr extends Php_tkn_chkr_base {
public Php_tkn_txt_chkr(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public Class<?> TypeOf() {return Php_tkn_txt.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_txt;}
}
class Php_tkn_ws_chkr extends Php_tkn_chkr_base {
public Php_tkn_ws_chkr(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public Class<?> TypeOf() {return Php_tkn_ws.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_ws;}
}
class Php_tkn_comment_chkr extends Php_tkn_chkr_base {
public Php_tkn_comment_chkr(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public Class<?> TypeOf() {return Php_tkn_comment.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_comment;}
public Php_tkn_comment_chkr Comment_tid_(byte v) {this.comment_tid = v; return this;} private byte comment_tid = Php_tkn_comment.Tid_null;
@Override public int Chk_tkn(Tst_mgr mgr, String path, Php_tkn actl_obj) {
Php_tkn_comment actl = (Php_tkn_comment)actl_obj;
int rv = 0;
rv += mgr.Tst_val(comment_tid == Php_tkn_comment.Tid_null, path, "comment_tid", comment_tid, actl.Comment_tid());
return rv;
}
}
class Php_tkn_quote_chkr extends Php_tkn_chkr_base {
public Php_tkn_quote_chkr(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public Class<?> TypeOf() {return Php_tkn_quote.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_quote;}
public Php_tkn_quote_chkr Quote_tid_(byte v) {this.quote_tid = v; return this;} private byte quote_tid = Byte_ascii.Null;
@Override public int Chk_tkn(Tst_mgr mgr, String path, Php_tkn actl_obj) {
Php_tkn_quote actl = (Php_tkn_quote)actl_obj;
int rv = 0;
rv += mgr.Tst_val(quote_tid == Byte_ascii.Null, path, "quote_tid", quote_tid, actl.Quote_tid());
return rv;
}
}
class Php_tkn_var_chkr extends Php_tkn_chkr_base {
public Php_tkn_var_chkr(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public Class<?> TypeOf() {return Php_tkn_var.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_var;}
public Php_tkn_var_chkr Var_name_(String v) {this.var_name = v; return this;} private String var_name;
@Override public int Chk_tkn(Tst_mgr mgr, String path, Php_tkn actl_obj) {
Php_tkn_var actl = (Php_tkn_var)actl_obj;
int rv = 0;
byte[] raw_bry = (byte[])mgr.Vars_get_by_key("raw_bry");
rv += mgr.Tst_val(var_name == null, path, "var_name", var_name, String_.new_u8(actl.Var_name(raw_bry)));
return rv;
}
}
class Php_tkn_num_chkr extends Php_tkn_chkr_base {
public Php_tkn_num_chkr(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public Class<?> TypeOf() {return Php_tkn_num.class;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_num;}
public Php_tkn_num_chkr Num_val_int_(int v) {this.num_val_int = v; return this;} private int num_val_int = Int_.Min_value;
@Override public int Chk_tkn(Tst_mgr mgr, String path, Php_tkn actl_obj) {
Php_tkn_num actl = (Php_tkn_num)actl_obj;
int rv = 0;
byte[] raw_bry = (byte[])mgr.Vars_get_by_key("raw_bry");
rv += mgr.Tst_val(num_val_int == Int_.Min_value, path, "num_val_int", num_val_int, actl.Num_val_int(raw_bry));
return rv;
}
}
class Php_tkn_generic_chkr extends Php_tkn_chkr_base {
public Php_tkn_generic_chkr(int src_bgn, int src_end, byte tkn_tid) {this.Src_rng_(src_bgn, src_end); this.tkn_tid = tkn_tid;}
@Override public Class<?> TypeOf() {return Php_tkn.class;}
@Override public byte Tkn_tid() {return tkn_tid;} private byte tkn_tid;
}
class Php_line_assign_chkr implements Tst_chkr {
public Class<?> TypeOf() {return Php_line_assign.class;}
public Php_line_assign_chkr Key_(String v) {key = v; return this;} private String key;
public Php_line_assign_chkr Subs_(String[] v) {
int subs_len = v.length;
subs = new Php_itm_quote_chkr[subs_len];
for (int i = 0; i < subs_len; i++)
subs[i] = new Php_itm_quote_chkr().Val_obj_str_(v[i]);
return this;
} Php_itm_chkr_base[] subs;
public Php_line_assign_chkr Val_(Php_itm_chkr_base v) {val = v; return this;} Php_itm_chkr_base val;
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
Php_line_assign actl = (Php_line_assign)actl_obj;
int rv = 0;
rv += mgr.Tst_val(key == null, path, "key", key, String_.new_u8(actl.Key().Val_obj_bry()));
if (subs != null) rv += mgr.Tst_sub_ary(subs, actl.Key_subs(), "subs", rv);
rv += mgr.Tst_sub_obj(val, actl.Val(), "val", rv);
return rv;
}
}
abstract class Php_itm_chkr_base implements Tst_chkr {
public abstract byte Itm_tid();
public abstract Class<?> TypeOf();
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
Php_itm actl = (Php_itm)actl_obj;
int rv = 0;
rv += mgr.Tst_val(false, path, "tkn_tid", this.Itm_tid(), actl.Itm_tid());
rv += Chk_itm(mgr, path, actl);
return rv;
}
@gplx.Virtual public int Chk_itm(Tst_mgr mgr, String path, Php_itm actl_obj) {return 0;}
public static final Php_itm_chkr_base[] Ary_empty = new Php_itm_chkr_base[0];
}
class Php_itm_generic_chkr extends Php_itm_chkr_base {
public Php_itm_generic_chkr(byte itm_tid) {this.itm_tid = itm_tid;} private byte itm_tid;
@Override public byte Itm_tid() {return itm_tid;}
@Override public Class<?> TypeOf() {return Php_itm.class;}
}
class Php_itm_int_chkr extends Php_itm_chkr_base {
@Override public byte Itm_tid() {return Php_itm_.Tid_int;}
@Override public Class<?> TypeOf() {return Php_itm.class;}
public Php_itm_int_chkr Val_obj_int_(int v) {this.val_obj_int = v; return this;} private int val_obj_int;
@Override public int Chk_itm(Tst_mgr mgr, String path, Php_itm actl_obj) {
Php_itm_int actl = (Php_itm_int)actl_obj;
int rv = 0;
rv += mgr.Tst_val(false, path, "val_obj_str", val_obj_int, actl.Val_obj_int());
return rv;
}
}
class Php_itm_txt_chkr extends Php_itm_chkr_base {
@Override public byte Itm_tid() {return Php_itm_.Tid_var;}
@Override public Class<?> TypeOf() {return Php_itm.class;}
public Php_itm_txt_chkr Val_obj_str_(String v) {this.val_obj_str = v; return this;} private String val_obj_str;
@Override public int Chk_itm(Tst_mgr mgr, String path, Php_itm actl_obj) {
Php_itm_var actl = (Php_itm_var)actl_obj;
int rv = 0;
rv += mgr.Tst_val(false, path, "val_obj_str", val_obj_str, String_.new_u8(actl.Val_obj_bry()));
return rv;
}
}
class Php_itm_quote_chkr extends Php_itm_chkr_base {
@Override public byte Itm_tid() {return Php_itm_.Tid_quote;}
@Override public Class<?> TypeOf() {return Php_itm.class;}
public Php_itm_quote_chkr Val_obj_str_(String v) {this.val_obj_str = v; return this;} private String val_obj_str;
@Override public int Chk_itm(Tst_mgr mgr, String path, Php_itm actl_obj) {
Php_itm_quote actl = (Php_itm_quote)actl_obj;
int rv = 0;
rv += mgr.Tst_val(false, path, "val_obj_str", val_obj_str, String_.new_u8(actl.Val_obj_bry()));
return rv;
}
}
class Php_itm_ary_chkr extends Php_itm_chkr_base {
@Override public byte Itm_tid() {return Php_itm_.Tid_ary;}
@Override public Class<?> TypeOf() {return Php_itm.class;}
public Php_itm_ary_chkr Subs_(Php_itm_chkr_base... v) {this.itms = v; return this;} Php_itm_chkr_base[] itms = Php_itm_chkr_base.Ary_empty;
@Override public int Chk_itm(Tst_mgr mgr, String path, Php_itm actl_obj) {
Php_itm_ary actl = (Php_itm_ary)actl_obj;
int rv = 0;
int actl_subs_len = actl.Subs_len();
Php_itm[] actl_ary = new Php_itm[actl_subs_len];
for (int i = 0; i < actl_subs_len; i++) {
actl_ary[i] = (Php_itm)actl.Subs_get(i);
}
rv += mgr.Tst_sub_ary(itms, actl_ary, "subs", rv);
return rv;
}
}
class Php_itm_kv_chkr extends Php_itm_chkr_base {
@Override public byte Itm_tid() {return Php_itm_.Tid_kv;}
@Override public Class<?> TypeOf() {return Php_itm.class;}
public Php_itm_kv_chkr Key_(String v) {key = v; return this;} private String key;
public Php_itm_kv_chkr Val_(Php_itm_chkr_base v) {val = v; return this;} Php_itm_chkr_base val;
@Override public int Chk_itm(Tst_mgr mgr, String path, Php_itm actl_obj) {
Php_itm_kv actl = (Php_itm_kv)actl_obj;
int rv = 0;
rv += mgr.Tst_val(false, path, "key", key, String_.new_u8(actl.Key().Val_obj_bry()));
rv += mgr.Tst_sub_obj(val, actl.Val(), path, rv);
return rv;
}
}
class Gfo_msg_log_chkr implements Tst_chkr {
List_adp itms = List_adp_.new_();
public Class<?> TypeOf() {return Gfo_msg_log.class;}
public void Clear() {itms.Clear();}
public void Add_itm(Gfo_msg_itm itm, int bgn, int end) {
Gfo_msg_data_chkr chkr = new Gfo_msg_data_chkr();
chkr.Itm_(itm).Excerpt_bgn_(bgn).Excerpt_end_(end);
itms.Add(chkr);
}
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {return 0;}
public void tst(Tst_mgr mgr, Object actl_obj) {
Gfo_msg_log actl = (Gfo_msg_log)actl_obj;
int actl_itms_len = actl.Ary_len();
Gfo_msg_data[] actl_itms = new Gfo_msg_data[actl_itms_len];
for (int i = 0; i < actl_itms_len; i++)
actl_itms[i] = actl.Ary_get(i);
mgr.Tst_ary("itms", (Gfo_msg_data_chkr[])itms.To_ary(Gfo_msg_data_chkr.class), actl_itms);
}
}
class Gfo_msg_data_chkr implements Tst_chkr {
public Class<?> TypeOf() {return Gfo_msg_data.class;}
public Gfo_msg_data_chkr Itm_(Gfo_msg_itm v) {itm = v; return this;} Gfo_msg_itm itm;
public Gfo_msg_data_chkr Excerpt_bgn_(int v) {excerpt_bgn = v; return this;} private int excerpt_bgn = -1;
public Gfo_msg_data_chkr Excerpt_end_(int v) {excerpt_end = v; return this;} private int excerpt_end = -1;
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
Gfo_msg_data actl = (Gfo_msg_data)actl_obj;
int rv = 0;
rv += mgr.Tst_val(itm == null, path, "itm", itm.Path_str(), actl.Item().Path_str());
rv += mgr.Tst_val(excerpt_bgn == -1, path, "excerpt_bgn", excerpt_bgn, actl.Src_bgn());
rv += mgr.Tst_val(excerpt_end == -1, path, "excerpt_end", excerpt_end, actl.Src_end());
return rv;
}
}

View File

@@ -0,0 +1,138 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
interface Php_srl_itm {
byte Tid();
int Src_bgn();
int Src_end();
Object Val();
void Xto_bfr(Bry_bfr bfr, int depth);
void Clear();
}
class Php_srl_itm_ {
public static final Php_srl_itm[] Ary_empty = new Php_srl_itm[0];
public static final byte Tid_unknown = 0, Tid_nil = 1, Tid_bool = 2, Tid_int = 3, Tid_double = 4, Tid_string = 5, Tid_array = 6, Tid_function = 7;
public static final byte[][] Names = Bry_.Ary("unknown", "nil", "boolean", "int", "double", "string", "array", "function");
public static final Object Val_nil = null, Val_table = null;
}
abstract class Php_srl_itm_base implements Php_srl_itm {
public abstract byte Tid();
public void Ctor(int src_bgn, int src_end, Object val) {this.src_bgn = src_bgn; this.src_end = src_end; this.val = val;}
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public Object Val() {return val;} Object val;
@gplx.Virtual public void Xto_bfr(Bry_bfr bfr, int depth) {
Php_srl_wtr.Indent(bfr, depth);
bfr.Add(Php_srl_itm_.Names[this.Tid()]).Add_byte(Byte_ascii.Colon);
bfr.Add_str(Object_.Xto_str_strict_or_null_mark(this.Val())).Add_byte(Byte_ascii.Semic).Add_byte_nl();
}
public void Clear() {}
}
class Php_srl_itm_nil extends Php_srl_itm_base {
public Php_srl_itm_nil() {this.Ctor(-1, -1, null);}
@Override public byte Tid() {return Php_srl_itm_.Tid_nil;}
public byte[] Bry_extract(byte[] raw) {return null;}
public static Php_srl_itm_nil Nil = new Php_srl_itm_nil();
}
class Php_srl_itm_bool extends Php_srl_itm_base {
public Php_srl_itm_bool(boolean val, byte[] bry) {this.val = val; this.bry = bry; this.Ctor(-1, -1, val);}
@Override public byte Tid() {return Php_srl_itm_.Tid_bool;}
public byte[] Bry_extract(byte[] raw) {return bry;} private byte[] bry;
public boolean Val_as_bool() {return val;} private boolean val;
public static Php_srl_itm_bool Bool_n = new Php_srl_itm_bool(false, new byte[] {Byte_ascii.Num_0}), Bool_y = new Php_srl_itm_bool(true, new byte[] {Byte_ascii.Num_1});
}
class Php_srl_itm_int extends Php_srl_itm_base {
public Php_srl_itm_int(int src_bgn, int src_end, int val) {this.val = val; this.Ctor(src_bgn, src_end, val);}
@Override public byte Tid() {return Php_srl_itm_.Tid_int;}
public int Val_as_int() {return val;} private int val;
}
class Php_srl_itm_double extends Php_srl_itm_base {
public Php_srl_itm_double(int src_bgn, int src_end, double val) {this.val = val; this.Ctor(src_bgn, src_end, val);}
@Override public byte Tid() {return Php_srl_itm_.Tid_double;}
public double Val_as_double() {return val;} double val;
}
class Php_srl_itm_str extends Php_srl_itm_base {
public Php_srl_itm_str(int src_bgn, int src_end, String val) {this.val = val; this.Ctor(src_bgn, src_end, val);}
@Override public byte Tid() {return Php_srl_itm_.Tid_string;}
public String Val_as_str() {return val;} private String val;
}
class Php_srl_itm_func extends Php_srl_itm_base {
public Php_srl_itm_func(int src_bgn, int src_end, int val) {this.val = val; this.Ctor(src_bgn, src_end, val);}
@Override public byte Tid() {return Php_srl_itm_.Tid_function;}
public int Val_as_int() {return val;} private int val;
}
class Php_srl_itm_ary extends Php_srl_itm_base {
public Php_srl_itm_ary(int src_bgn, int src_end) {this.Ctor(src_bgn, src_end, null);}
@Override public byte Tid() {return Php_srl_itm_.Tid_array;}
public Php_srl_itm_kv[] Subs_ary() {return subs;}
public int Subs_len() {return subs_len;} private int subs_len = 0, subs_max = 0;
public Php_srl_itm_kv Subs_get_at(int i) {return subs[i];}
public void Subs_clear() {
for (int i = 0; i < subs_len; i++) {
subs[i].Clear();
}
subs = Php_srl_itm_kv.Ary_empty;
subs_len = subs_max = 0;
}
public Php_srl_itm_ary Subs_add_many(Php_srl_itm_kv... ary) {
int len = ary.length;
for (int i = 0; i < len; i++)
Subs_add(ary[i]);
return this;
}
public Php_srl_itm_ary Subs_add(Php_srl_itm_kv itm) {
int new_len = subs_len + 1;
if (new_len > subs_max) { // ary too small >>> expand
subs_max = new_len * 2;
Php_srl_itm_kv[] new_subs = new Php_srl_itm_kv[subs_max];
Array_.Copy_to(subs, 0, new_subs, 0, subs_len);
subs = new_subs;
}
subs[subs_len] = itm;
subs_len = new_len;
return this;
}
@Override public void Xto_bfr(Bry_bfr bfr, int depth) {
Php_srl_wtr.Indent(bfr, depth);
bfr.Add_byte(Byte_ascii.Ltr_a).Add_byte(Byte_ascii.Brack_bgn).Add_int_variable(subs_len).Add(CONST_ary_bgn);
for (int i = 0; i < subs_len; i++)
subs[i].Xto_bfr(bfr, depth + 1);
Php_srl_wtr.Indent(bfr, depth);
bfr.Add_byte(Byte_ascii.Curly_end).Add_byte_nl();
} static final byte[] CONST_ary_bgn = Bry_.new_a7("]{\n");
Php_srl_itm_kv[] subs = Php_srl_itm_kv.Ary_empty;
}
class Php_srl_itm_kv {
public int Idx_int() {return idx_int;} public Php_srl_itm_kv Idx_int_(int v) {idx_int = v; return this;} private int idx_int = -1;
public Php_srl_itm Key() {return key;} public Php_srl_itm_kv Key_(Php_srl_itm v) {key = v; return this;} Php_srl_itm key;
public Php_srl_itm Val() {return val;} public Php_srl_itm_kv Val_(Php_srl_itm v) {val = v; return this;} Php_srl_itm val;
public void Clear() {
key.Clear();
val.Clear();
}
public void Xto_bfr(Bry_bfr bfr, int depth) {
key.Xto_bfr(bfr, depth);
val.Xto_bfr(bfr, depth);
}
public static final Php_srl_itm_kv[] Ary_empty = new Php_srl_itm_kv[0];
}
class Php_srl_wtr {
public static void Indent(Bry_bfr bfr, int depth) {
if (depth > 0) bfr.Add_byte_repeat(Byte_ascii.Space, depth * 2); // indent
}
}

View File

@@ -0,0 +1,208 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.texts.*;
public class Php_srl_parser {
@gplx.Internal protected Php_srl_factory Factory() {return factory;} Php_srl_factory factory = new Php_srl_factory();
byte[] raw; int raw_len, pos;
public KeyVal[] Parse_as_kvs(byte[] raw) {
Php_srl_itm_ary root = Parse(raw);
return Xto_kv_ary(root);
}
KeyVal[] Xto_kv_ary(Php_srl_itm_ary ary) {
int len = ary.Subs_len();
KeyVal[] rv = new KeyVal[len];
for (int i = 0; i < len; i++)
rv[i] = Xto_kv(ary.Subs_get_at(i));
return rv;
}
KeyVal Xto_kv(Php_srl_itm_kv itm) {
Php_srl_itm itm_key = itm.Key();
Object key = itm_key == null ? null : itm_key.Val();
Php_srl_itm itm_val = itm.Val();
Object val = null;
switch (itm_val.Tid()) {
case Php_srl_itm_.Tid_array:
Php_srl_itm_ary ary = (Php_srl_itm_ary)itm_val;
val = Xto_kv_ary(ary);
break;
case Php_srl_itm_.Tid_function:
val = new gplx.xowa.xtns.scribunto.Scrib_lua_proc(Object_.Xto_str_strict_or_null_mark(key), Int_.cast(itm_val.Val())); // NOTE: in most cases, key is a STRING (name of ScribFunction); however, for gsub it is an INT (arg_idx) b/c it is passed as a parameter
break;
default:
val = itm_val.Val();
break;
}
return KeyVal_.obj_(key, val);
}
@gplx.Internal protected Php_srl_itm_ary Parse(byte[] raw) {
this.raw = raw; this.raw_len = raw.length; pos = 0;
Php_srl_itm_ary rv = new Php_srl_itm_ary(0, raw_len);
Php_srl_itm_kv cur_kv = factory.Kv();
rv.Subs_add(cur_kv);
boolean mode_is_key = false;
while (true) {
if (pos >= raw_len) break;
if (mode_is_key) {
cur_kv.Key_(Parse_itm(pos));
mode_is_key = false;
}
else {
cur_kv.Val_(Parse_itm(pos));
mode_is_key = true;
}
}
return rv;
}
Php_srl_itm_ary Parse_array(int bgn, int subs_len) { // enters after '{'; EX: 'a:1{' -> Parse_array
Php_srl_itm_ary rv = factory.Ary(bgn, bgn);
for (int i = 0; i < subs_len; i++) {
Php_srl_itm_kv kv = factory.Kv();
Php_srl_itm key_itm = Parse_itm(pos);
kv.Key_(key_itm);
Php_srl_itm val_itm = Parse_itm(pos);
kv.Val_(val_itm);
rv.Subs_add(kv);
}
return rv;
}
Php_srl_itm Parse_itm(int bgn) {
pos = bgn;
Php_srl_itm rv = null;
byte b = raw[pos];
switch (b) {
case Byte_ascii.Ltr_N: // EX: 'N;'
rv = factory.Nil();
pos = Chk(raw, pos + 1, Byte_ascii.Semic);
break;
case Byte_ascii.Ltr_b: // EX: 'b:0;' or 'b:1;'
pos = Chk(raw, pos + 1, Byte_ascii.Colon);
b = raw[pos];
switch (b) {
case Byte_ascii.Num_1: rv = factory.Bool_y(); break;
case Byte_ascii.Num_0: rv = factory.Bool_n(); break;
default: throw err_(raw, pos, raw_len, "unknown boolean type {0}", Char_.To_str(b));
}
pos = Chk(raw, pos + 1, Byte_ascii.Semic);
break;
case Byte_ascii.Ltr_i: // EX: 'i:123;'
rv = Parse_int(pos);
pos = Chk(raw, pos, Byte_ascii.Semic);
break;
case Byte_ascii.Ltr_d: // EX: 'd:1.23;'
pos = Chk(raw, pos + 1, Byte_ascii.Colon);
int double_end = Bry_find_.Find_fwd(raw, Byte_ascii.Semic, pos, raw_len);
String double_str = String_.new_a7(raw, pos, double_end);
double double_val = 0;
if (String_.Eq(double_str, "INF")) double_val = Double_.Inf_pos;
else if (String_.Eq(double_str, "NAN")) double_val = Double_.NaN;
else double_val = Double_.parse(double_str);
rv = factory.Double(pos, double_end, double_val);
pos = Chk(raw, double_end, Byte_ascii.Semic);
break;
case Byte_ascii.Ltr_s: // EX: 's:3:"abc";'
int len_val = Parse_int(pos).Val_as_int();
pos = Chk(raw, pos, Byte_ascii.Colon);
pos = Chk(raw, pos, Byte_ascii.Quote);
int str_end = pos + len_val;
String str_val = String_.new_u8(raw, pos, str_end);
rv = factory.Str(pos, str_end, str_val);
pos = Chk(raw, str_end, Byte_ascii.Quote);
pos = Chk(raw, pos, Byte_ascii.Semic);
break;
case Byte_ascii.Ltr_a: // EX: 'a:0:{}'
int subs_len = Parse_int(pos).Val_as_int();
pos = Chk(raw, pos, Byte_ascii.Colon);
pos = Chk(raw, pos, Byte_ascii.Curly_bgn);
rv = Parse_array(pos, subs_len);
pos = Chk(raw, pos, Byte_ascii.Curly_end);
break;
case Byte_ascii.Ltr_O: // EX: 'O:42:"Scribunto_LuaStandaloneInterpreterFunction":1:{s:2:"id";i:123;}'
int func_bgn = pos;
pos += 62; // 64= len of constant String after ":42:"Scribunto...."
int func_id = Parse_int_val(pos);
rv = factory.Func(func_bgn, pos, func_id);
pos += 2;
break;
default: throw err_(raw, pos, "unexpected type: {0}", Char_.To_str(b));
}
return rv;
} static final byte[] CONST_funct_bgn = Bry_.new_a7("O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:"), CONST_funct_end = Bry_.new_a7(";}");
int Parse_int_val(int bgn) {
pos = bgn;
pos = Chk(raw, pos + 1, Byte_ascii.Colon);
int int_end = Skip_while_num(raw, raw_len, pos, true);
int int_val = Bry_.To_int_or(raw, pos, int_end, Int_.Min_value);
pos = int_end;
return int_val;
}
Php_srl_itm_int Parse_int(int bgn) {
pos = bgn;
pos = Chk(raw, pos + 1, Byte_ascii.Colon);
int int_end = Skip_while_num(raw, raw_len, pos, true);
int int_val = Bry_.To_int_or(raw, pos, int_end, Int_.Min_value);
Php_srl_itm_int rv = factory.Int(pos, int_end, int_val);
pos = int_end;
return rv;
}
int Chk(byte[] raw, int i, byte expd) {
byte actl = raw[i];
if (actl == expd)
return i + 1;
else
throw err_(raw, i, "expected '{0}' but got '{1}'", Char_.To_str(expd), Char_.To_str(actl));
}
int Skip_while_num(byte[] raw, int raw_len, int bgn, boolean num_is_int) {
int num_len = 1;
for (int i = bgn; i < raw_len; i++) {
byte b = raw[i];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
break;
case Byte_ascii.Dot:
case Byte_ascii.Dash:
break;
default:
if (num_is_int && num_len < 11) {
return i;
}
else
return i;
}
}
throw err_(raw, raw_len, raw_len, "skip_ws found eos");
}
Err err_(byte[] raw, int bgn, String fmt, Object... args) {return err_(raw, bgn, raw.length, fmt, args);}
Err err_(byte[] raw, int bgn, int raw_len, String fmt, Object... args) {
String msg = String_.Format(fmt, args) + " " + Int_.Xto_str(bgn) + " " + String_.new_u8__by_len(raw, bgn, 20);
return Err_.new_wo_type(msg);
}
}
class Php_srl_factory {
public Php_srl_itm Nil() {return Php_srl_itm_nil.Nil;}
public Php_srl_itm Bool_n() {return Php_srl_itm_bool.Bool_n;}
public Php_srl_itm Bool_y() {return Php_srl_itm_bool.Bool_y;}
public Php_srl_itm_int Int(int bgn, int end, int v) {return new Php_srl_itm_int(bgn, end, v);}
public Php_srl_itm Double(int bgn, int end, double v) {return new Php_srl_itm_double(bgn, end, v);}
public Php_srl_itm Str(int bgn, int end) {return new Php_srl_itm_str(bgn, end, null);}
public Php_srl_itm Str(int bgn, int end, String v) {return new Php_srl_itm_str(bgn, end, v);}
public Php_srl_itm_func Func(int bgn, int end, int v) {return new Php_srl_itm_func(bgn, end, v);}
public Php_srl_itm_ary Ary(int bgn, int end) {return new Php_srl_itm_ary(bgn, end);}
public Php_srl_itm_kv Kv() {return new Php_srl_itm_kv();}
}

View File

@@ -0,0 +1,112 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Php_srl_parser_tst {
Php_srl_parser_fxt fxt = new Php_srl_parser_fxt();
@Before public void init() {fxt.Clear();}
@Test public void Nil() {fxt.Test_parse("N;", fxt.itm_nil_());}
@Test public void Bool_y() {fxt.Test_parse("b:1;", fxt.itm_bool_y_());}
@Test public void Bool_n() {fxt.Test_parse("b:0;", fxt.itm_bool_n_());}
@Test public void Num_int() {fxt.Test_parse("i:123;", fxt.itm_int_(123));}
@Test public void Num_int_neg() {fxt.Test_parse("i:-123;", fxt.itm_int_(-123));}
@Test public void Num_double() {fxt.Test_parse("d:1.23;", fxt.itm_double_(1.23d));}
@Test public void Num_double_inf_pos(){fxt.Test_parse("d:INF;", fxt.itm_double_(Double_.Inf_pos));}
@Test public void Num_double_exp() {fxt.Test_parse("d:1.2e+2;", fxt.itm_double_(120));}
@Test public void Num_double_nan() {fxt.Test_parse("d:NAN;", fxt.itm_double_(Double_.NaN));}
@Test public void Str_len_3() {fxt.Test_parse("s:3:\"abc\";", fxt.itm_str_("abc"));}
@Test public void Str_len_4() {fxt.Test_parse("s:4:\"abcd\";", fxt.itm_str_("abcd"));}
@Test public void Str_len_0() {fxt.Test_parse("s:0:\"\";", fxt.itm_str_(""));}
@Test public void Ary_empty() {fxt.Test_parse("a:0:{}", fxt.itm_ary_());}
@Test public void Ary_flat_one() {fxt.Test_parse("a:1:{i:1;i:9;}", fxt.itm_ary_().Subs_add(fxt.itm_kvi_(1, fxt.itm_int_(9))));}
@Test public void Ary_flat_many() {
fxt.Test_parse(String_.Concat
( "a:3:{"
, "i:1;i:9;"
, "i:2;i:8;"
, "i:3;i:7;"
, "}"), fxt.itm_ary_().Subs_add_many
( fxt.itm_kvi_(1, fxt.itm_int_(9))
, fxt.itm_kvi_(2, fxt.itm_int_(8))
, fxt.itm_kvi_(3, fxt.itm_int_(7))
));
}
@Test public void Ary_nest_one() {
fxt.Test_parse(String_.Concat
( "a:1:{"
, "i:1;"
, "a:2:{"
, "i:1;i:9;"
, "i:2;i:8;"
, "}"
, "}"
)
, fxt.itm_ary_().Subs_add_many
( fxt.itm_kvi_(1, fxt.itm_ary_().Subs_add_many
( fxt.itm_kvi_(1, fxt.itm_int_(9))
, fxt.itm_kvi_(2, fxt.itm_int_(8))
))));
}
@Test public void Ary_key_str() {
fxt.Test_parse(String_.Concat
( "a:1:{"
, "s:3:\"abc\";"
, "i:987;"
, "}"), fxt.itm_ary_().Subs_add_many
( fxt.itm_kvs_("abc", fxt.itm_int_(987))
));
}
@Test public void Func() {
fxt.Test_parse("O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:123;}", fxt.itm_func_(123));
}
@Test public void Smoke() {
// fxt.Test_parse("a:2:{s:6:\"values\";a:1:{i:1;a:9:{s:21:\"makeProt"+"ectedEnvFuncs\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:2;}s:3:\"log\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:3;}s:14:\"clearLogBuffer\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:4;}s:5:\"setup\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:5;}s:5:\"clone\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:6;}s:15:\"getCurrentFrame\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:7;}s:13:\"executeModule\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:8;}s:15:\"executeFunction\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:9;}s:12:\"getLogBuffer\";O:42:\"Scribunto_LuaStandaloneInterpreterFunction\":1:{s:2:\"id\";i:10;}}}s:2:\"op\";s:6:\"return\";}");
}
}
class Php_srl_parser_fxt {
public void Clear() {
parser = new Php_srl_parser();
factory = parser.Factory();
} Php_srl_parser parser; Php_srl_factory factory; Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
public Php_srl_itm itm_nil_() {return factory.Nil();}
public Php_srl_itm itm_bool_n_() {return factory.Bool_n();}
public Php_srl_itm itm_bool_y_() {return factory.Bool_y();}
public Php_srl_itm itm_int_(int v) {return factory.Int(-1, -1, v);}
public Php_srl_itm itm_double_(double v) {return factory.Double(-1, -1, v);}
public Php_srl_itm itm_str_(String v) {return factory.Str(-1, -1, v);}
public Php_srl_itm itm_func_(int v) {return factory.Func(-1, -1, v);}
public Php_srl_itm_ary itm_ary_() {return factory.Ary(-1, -1);}
public Php_srl_itm_kv itm_kvi_(int k, Php_srl_itm v){return factory.Kv().Key_(itm_int_(k)).Val_(v);}
public Php_srl_itm_kv itm_kvs_(String k, Php_srl_itm v){return factory.Kv().Key_(itm_str_(k)).Val_(v);}
public void Test_parse(String raw_str, Php_srl_itm... expd_ary) {
byte[] raw = Bry_.new_u8(raw_str);
Php_srl_itm_ary root = parser.Parse(raw);
Php_srl_itm root_sub = root.Subs_get_at(0).Val();
root_sub.Xto_bfr(tmp_bfr, 0);
String actl = tmp_bfr.Xto_str_and_clear();
String expd = Xto_str(expd_ary, 0, expd_ary.length);
Tfds.Eq_str_lines(expd, actl, actl);
}
String Xto_str(Php_srl_itm[] ary, int bgn, int end) {
for (int i = bgn; i < end; i++) {
Php_srl_itm itm = ary[i];
itm.Xto_bfr(tmp_bfr, 0);
}
return tmp_bfr.Xto_str_and_clear();
}
}

View File

@@ -0,0 +1,62 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
interface Php_text_itm {
byte Tid();
int Src_bgn();
int Src_end();
void Bld(Bry_bfr bfr, byte[] src);
}
class Php_text_itm_ {
public static final byte Tid_text = 0, Tid_escaped = 1, Tid_arg = 2, Tid_utf16 = 3;
}
class Php_text_itm_text implements Php_text_itm {
public Php_text_itm_text(int src_bgn, int src_end) {this.src_bgn = src_bgn; this.src_end = src_end;}
public byte Tid() {return Php_text_itm_.Tid_text;}
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public void Bld(Bry_bfr bfr, byte[] src) {bfr.Add_mid(src, src_bgn, src_end);}
}
class Php_text_itm_escaped implements Php_text_itm {
public Php_text_itm_escaped(int src_bgn, int src_end, byte literal) {this.src_bgn = src_bgn; this.src_end = src_end; this.literal = literal;}
public byte Tid() {return Php_text_itm_.Tid_escaped;}
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public byte Literal() {return literal;} private byte literal;
public void Bld(Bry_bfr bfr, byte[] src) {bfr.Add_byte(literal);}
}
class Php_text_itm_utf16 implements Php_text_itm {
public Php_text_itm_utf16(int src_bgn, int src_end, byte[] literal) {this.src_bgn = src_bgn; this.src_end = src_end; this.literal = literal;}
public byte Tid() {return Php_text_itm_.Tid_utf16;}
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public byte[] Literal() {return literal;} private byte[] literal;
public void Bld(Bry_bfr bfr, byte[] src) {bfr.Add(literal);}
}
class Php_text_itm_arg implements Php_text_itm {
public Php_text_itm_arg(int src_bgn, int src_end, int idx) {this.src_bgn = src_bgn; this.src_end = src_end; this.idx = idx;}
public byte Tid() {return Php_text_itm_.Tid_escaped;}
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public int Idx() {return idx;} private int idx;
public void Bld(Bry_bfr bfr, byte[] src) {
bfr.Add_byte(Byte_ascii.Tilde).Add_byte(Byte_ascii.Curly_bgn)
.Add_int_variable(idx - List_adp_.Base1) // php is super 1
.Add_byte(Byte_ascii.Curly_end);
}
}

View File

@@ -0,0 +1,145 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.primitives.*;
public class Php_text_itm_parser {
public static final byte Rslt_orig = 0, Rslt_dirty = 1, Rslt_fmt = 2;
public boolean Quote_is_single() {return quote_is_single;} public Php_text_itm_parser Quote_is_single_(boolean v) {quote_is_single = v; return this;} private boolean quote_is_single;
public byte[] Parse_as_bry(List_adp tmp_list, byte[] raw, Byte_obj_ref rslt_ref, Bry_bfr tmp_bfr) {
Parse(tmp_list, raw, rslt_ref);
byte[] rv = raw;
switch (rslt_ref.Val()) {
case Rslt_orig: break;
case Rslt_dirty:
case Rslt_fmt:
tmp_bfr.Clear();
int tmp_list_len = tmp_list.Count();
for (int i = 0; i < tmp_list_len; i++) {
Php_text_itm itm = (Php_text_itm)tmp_list.Get_at(i);
itm.Bld(tmp_bfr, raw);
}
rv = tmp_bfr.Xto_bry_and_clear();
break;
}
return rv;
}
public void Parse(List_adp tmp_list, byte[] raw) {
Parse(tmp_list, raw, Byte_obj_ref.zero_());
}
public void Parse(List_adp tmp_list, byte[] raw, Byte_obj_ref rslt) {
tmp_list.Clear();
int raw_len = raw.length; int raw_last = raw_len - 1;
int txt_bgn = -1;
byte rslt_val = Rslt_orig;
for (int i = 0; i < raw_len; i++) {
byte b = raw[i];
switch (b) {
case Byte_ascii.Backslash:
if (txt_bgn != -1) {tmp_list.Add(new Php_text_itm_text(txt_bgn, i)); txt_bgn = -1; rslt_val = Rslt_dirty;}
boolean pos_is_last = i == raw_last;
int next_pos = i + 1;
byte next_char = pos_is_last ? Byte_ascii.Null : raw[next_pos];
if (quote_is_single) { // NOTE: q1 is simpler than q2; REF.MW:http://php.net/manual/en/language.types.String.php; DATE:2014-08-06
switch (next_char) {
case Byte_ascii.Apos: next_char = Byte_ascii.Apos; break;
case Byte_ascii.Backslash: next_char = Byte_ascii.Backslash; break;
default: next_char = Byte_ascii.Null; break;
}
}
else {
if (pos_is_last) throw Err_.new_wo_type("backslash_is_last_char", "raw", String_.new_u8(raw));
switch (next_char) {
case Byte_ascii.Backslash: next_char = Byte_ascii.Backslash; break;
case Byte_ascii.Quote: next_char = Byte_ascii.Quote; break;
case Byte_ascii.Ltr_N:
case Byte_ascii.Ltr_n: next_char = Byte_ascii.Nl; break;
case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_t: next_char = Byte_ascii.Tab; break;
case Byte_ascii.Ltr_R:
case Byte_ascii.Ltr_r: next_char = Byte_ascii.Cr; break;
case Byte_ascii.Ltr_U:
case Byte_ascii.Ltr_u: { // EX: "\u007C"
rslt_val = Rslt_dirty;
Parse_utf16(tmp_list, raw, next_pos + 1, raw_len); // +1 to skip u
i = next_pos + 4; // +4 to skip utf16 seq; EX: \u007C; +4 for 007C
continue;
}
case Byte_ascii.Ltr_X:
case Byte_ascii.Ltr_x: { // EX: "\xc2"
rslt_val = Rslt_dirty;
byte[] literal = Bry_.Add(CONST_utf_prefix, Bry_.Mid(raw, next_pos + 1, next_pos + 3));
tmp_list.Add(new Php_text_itm_utf16(i, i + 4, literal));
i = next_pos + 2; // +2 to skip rest; EX: \xc2; +2 for c2
continue;
}
default: next_char = Byte_ascii.Null; break;
}
}
if (next_char == Byte_ascii.Null) {
if (txt_bgn == -1) txt_bgn = i;
}
else {
tmp_list.Add(new Php_text_itm_escaped(i, next_pos, next_char)); rslt_val = Rslt_dirty;
i = next_pos;
}
break;
case Byte_ascii.Dollar:
if (txt_bgn != -1) {tmp_list.Add(new Php_text_itm_text(txt_bgn, i)); txt_bgn = -1;}
if (i == raw_last) {
//throw Err_mgr._.fmt_auto_(GRP_KEY, "dollar_is_last_char", String_.new_u8(raw));
}
int int_end = Find_fwd_non_int(raw, i + 1, raw_len); // +1 to search after $
int int_val = Bry_.To_int_or(raw, i + 1, int_end, -1); // +1 to search after $
if (int_val == -1) {
tmp_list.Add(new Php_text_itm_text(i, i + 1));
continue;
}
//throw Err_mgr._.fmt_auto_(GRP_KEY, "invalid_arg", String_.new_u8(raw));
tmp_list.Add(new Php_text_itm_arg(i, int_end, int_val));
rslt_val = Rslt_fmt;
i = int_end - 1; // -1 b/c i++ in for loop
break;
default:
if (txt_bgn == -1) txt_bgn = i;
break;
}
}
if (txt_bgn != -1) {tmp_list.Add(new Php_text_itm_text(txt_bgn, raw_len)); txt_bgn = -1; rslt_val = Rslt_dirty;}
rslt.Val_(rslt_val);
} private static final byte[] CONST_utf_prefix = Bry_.new_a7("\\u00");
private void Parse_utf16(List_adp rv, byte[] src, int bgn, int src_len) {
int end = bgn + 4;
if (end >= src_len) throw Err_.new_wo_type("utf16_parse", "src", String_.new_u8(src));
int v = Int_.Xto_int_hex(src, bgn, end); // +2; skip "\" + "u"
byte[] literal = gplx.core.intls.Utf16_.Encode_int_to_bry(v);
rv.Add(new Php_text_itm_utf16(bgn, end, literal));
}
public static int Find_fwd_non_int(byte[] src, int bgn, int end) {
for (int i = bgn; i < end; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
break;
default:
return i;
}
}
return end;
}
}

View File

@@ -0,0 +1,53 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Php_text_itm_tst {
@Before public void init() {fxt.Clear();} private Php_text_itm_fxt fxt = new Php_text_itm_fxt();
@Test public void Q1_basic() {fxt.Init_q1().Test_parse("abcde" , "abcde");}
@Test public void Q1_apos() {fxt.Init_q1().Test_parse("a\\'b" , "a'b");}
@Test public void Q1_backslash() {fxt.Init_q1().Test_parse("a\\\\b" , "a\\b");}
@Test public void Q1_backslash_eos() {fxt.Init_q1().Test_parse("a\\" , "a\\");} // PURPOSE: allow single trailing backslash; DATE:2014-08-06
@Test public void Q1_noop() {fxt.Init_q1().Test_parse("a\\$\\nb" , "a\\$\\nb");}
@Test public void Q2_basic() {fxt.Init_q2().Test_parse("abcde" , "abcde");}
@Test public void Q2_quote() {fxt.Init_q2().Test_parse("a\\\"b" , "a\"b");}
@Test public void Q2_backslash() {fxt.Init_q2().Test_parse("a\\\\b" , "a\\b");}
@Test public void Q2_noop() {fxt.Init_q2().Test_parse("a\\%\\cb" , "a\\%\\cb");}
@Test public void Q2_ws() {fxt.Init_q2().Test_parse("a\\tb\\nc" , "a\tb\nc");}
@Test public void Q2_fmt() {fxt.Init_q2().Test_parse("a$1b$2c" , "a~{0}b~{1}c");}
@Test public void Q2_utf_pipe() {fxt.Init_q2().Test_parse("a\\u007Cd" , "a|d");}
@Test public void Q2_hex_nbsp() {fxt.Init_q2().Test_parse("a\\xc2\\xa0d" , "a\\u00c2\\u00a0d");}
}
class Php_text_itm_fxt {
private Php_text_itm_parser parser;
public void Clear() {parser = new Php_text_itm_parser();}
public Php_text_itm_fxt Init_q1() {parser.Quote_is_single_(Bool_.Y); return this;}
public Php_text_itm_fxt Init_q2() {parser.Quote_is_single_(Bool_.N); return this;}
public void Test_parse(String raw_str, String expd) {
List_adp list = List_adp_.new_();
byte[] raw = Bry_.new_u8(raw_str);
parser.Parse(list, raw);
Bry_bfr bfr = Bry_bfr.reset_(255);
int list_len = list.Count();
for (int i = 0; i < list_len; i++) {
Php_text_itm itm = (Php_text_itm)list.Get_at(i);
itm.Bld(bfr, raw);
}
Tfds.Eq(expd, bfr.Xto_str_and_clear());
}
}

View File

@@ -0,0 +1,74 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public interface Php_tkn {
byte Tkn_tid();
int Src_bgn();
int Src_end();
}
class Php_tkn_ {
public static final byte Tid_txt = 1, Tid_declaration = 2, Tid_ws = 3, Tid_comment = 4, Tid_var = 5, Tid_eq = 6, Tid_eq_kv = 7, Tid_semic = 8, Tid_comma = 9, Tid_paren_bgn = 10, Tid_paren_end = 11, Tid_null = 12, Tid_false = 13, Tid_true = 14, Tid_ary = 15, Tid_num = 16, Tid_quote = 17, Tid_brack_bgn = 18, Tid_brack_end = 19;
public static String Xto_str(byte tid) {return Byte_.To_str(tid);}
}
abstract class Php_tkn_base implements Php_tkn {
public abstract byte Tkn_tid();
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} public void Src_end_(int v) {this.src_end = v;} private int src_end;
public void Src_rng_(int src_bgn, int src_end) {this.src_bgn = src_bgn; this.src_end = src_end;}
}
class Php_tkn_generic extends Php_tkn_base {
public Php_tkn_generic(int src_bgn, int src_end, byte tid) {this.Src_rng_(src_bgn, src_end); this.tid = tid;}
@Override public byte Tkn_tid() {return tid;} private byte tid;
}
class Php_tkn_txt extends Php_tkn_base {
public Php_tkn_txt(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_txt;}
}
class Php_tkn_ws extends Php_tkn_base {
public Php_tkn_ws(int src_bgn, int src_end, byte ws_tid) {this.Src_rng_(src_bgn, src_end); this.ws_tid = ws_tid;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_ws;}
public byte Ws_tid() {return ws_tid;} private byte ws_tid;
public static final byte Tid_space = 0, Tid_nl = 1, Tid_tab = 2, Tid_cr = 3;
}
class Php_tkn_comment extends Php_tkn_base {
public Php_tkn_comment(int src_bgn, int src_end, byte comment_tid) {this.Src_rng_(src_bgn, src_end); this.comment_tid = comment_tid;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_comment;}
public byte Comment_tid() {return comment_tid;} private byte comment_tid;
public static final byte Tid_null = 0, Tid_mult = 1, Tid_slash = 2, Tid_hash = 3;
}
class Php_tkn_var extends Php_tkn_base {
public Php_tkn_var(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_var;}
public byte[] Var_name(byte[] src) {return Bry_.Mid(src, this.Src_bgn() + 1, this.Src_end());} // NOTE: assume vars are of form $abc; +1 to skip first $
}
class Php_tkn_num extends Php_tkn_base {
public Php_tkn_num(int src_bgn, int src_end) {this.Src_rng_(src_bgn, src_end);}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_num;}
public int Num_val_int(byte[] src) {return Bry_.To_int_or(src, this.Src_bgn(), this.Src_end(), Int_.Min_value);}
}
class Php_tkn_quote extends Php_tkn_base {
public Php_tkn_quote(int src_bgn, int src_end, byte quote_tid) {this.Src_rng_(src_bgn, src_end); this.quote_tid = quote_tid;}
@Override public byte Tkn_tid() {return Php_tkn_.Tid_quote;}
public byte Quote_tid() {return quote_tid;} private byte quote_tid;
public byte[] Quote_text(byte[] src) {return Bry_.Mid(src, this.Src_bgn() + 1, this.Src_end() - 1);} // NOTE: assume quote are of form 'abc'; +1, -1 to skip flanking chars
public static final byte Tid_null = 0, Tid_mult = 1, Tid_slash = 2, Tid_hash = 3;
}
class Php_tkn_declaration extends Php_tkn_base {
@Override public byte Tkn_tid() {return Php_tkn_.Tid_declaration;}
public static final Php_tkn_declaration _ = new Php_tkn_declaration();
}

View File

@@ -0,0 +1,28 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
class Php_tkn_factory {
public Php_tkn_generic Generic(int bgn, int end, byte tid) {return new Php_tkn_generic(bgn, end, tid);}
public Php_tkn_txt Txt(int bgn, int end) {return new Php_tkn_txt(bgn, end);}
public Php_tkn Declaration(int bgn, int end) {return Php_tkn_declaration._;}
public Php_tkn_ws Ws(int bgn, int end, byte ws_tid) {return new Php_tkn_ws(bgn, end, ws_tid);}
public Php_tkn_var Var(int bgn, int end) {return new Php_tkn_var(bgn, end);}
public Php_tkn_num Num(int bgn, int end) {return new Php_tkn_num(bgn, end);}
public Php_tkn_comment Comment(int bgn, int end, byte comment_tid) {return new Php_tkn_comment(bgn, end, comment_tid);}
public Php_tkn_quote Quote(int bgn, int end, byte quote_tid) {return new Php_tkn_quote(bgn, end, quote_tid);}
}

View File

@@ -0,0 +1,35 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
public interface Php_tkn_wkr {
void Init(Php_ctx ctx);
void Process(Php_tkn tkn);
void Msg_many(byte[] src, int bgn, int end, Gfo_msg_itm itm, Object... args);
}
class Php_tkn_wkr_tkn implements Php_tkn_wkr {
public void Init(Php_ctx ctx) {}
public List_adp List() {return lines;} List_adp lines = List_adp_.new_();
public Gfo_msg_log Msg_log() {return msg_log;} Gfo_msg_log msg_log = new Gfo_msg_log("gplx.langs.phps");
public void Clear() {lines.Clear(); msg_log.Clear();}
public void Process(Php_tkn tkn) {
lines.Add(tkn);
}
public void Msg_many(byte[] src, int bgn, int end, Gfo_msg_itm itm, Object... args) {
msg_log.Add_itm_many(itm, src, bgn, end, args);
}
}

View File

@@ -0,0 +1,50 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
public class Gfo_pattern {
private final Gfo_pattern_itm[] itms; private final int itms_len;
private final Gfo_pattern_ctx ctx = new Gfo_pattern_ctx();
public Gfo_pattern(byte[] raw) {
this.raw = raw;
itms = Gfo_pattern_itm_.Compile(raw);
itms_len = itms.length;
}
public byte[] Raw() {return raw;} private byte[] raw;
public boolean Match(byte[] val) {
int val_len = val.length;
int val_pos = 0;
ctx.Init(itms_len);
for (int i = 0; i < itms_len; ++i) {
Gfo_pattern_itm itm = itms[i];
ctx.Itm_idx_(i);
val_pos = itm.Match(ctx, val, val_len, val_pos);
if (!ctx.Rslt_pass()) return false;
}
return ctx.Rslt_pass() && val_pos == val_len;
}
public static Gfo_pattern[] Parse_to_ary(byte[] raw) {
byte[][] patterns = Bry_split_.Split(raw, Byte_ascii.Semic, true);
int patterns_len = patterns.length;
Gfo_pattern[] rv = new Gfo_pattern[patterns_len];
for (int i = 0; i < patterns_len; ++i) {
byte[] pattern = patterns[i];
rv[i] = new Gfo_pattern(pattern);
}
return rv;
}
}

View File

@@ -0,0 +1,31 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
public class Gfo_pattern_ctx {
public boolean Rslt_pass() {return rslt;} private boolean rslt;
public void Rslt_fail_() {rslt = false;}
public boolean Prv_was_wild() {return prv_was_wild;} public void Prv_was_wild_(boolean v) {prv_was_wild = v;} private boolean prv_was_wild;
private int itm_len;
public int Itm_idx() {return itm_idx;} public void Itm_idx_(int v) {itm_idx = v;} private int itm_idx;
public boolean Itm_idx_is_last() {return itm_idx == itm_len - 1;}
public void Init(int itm_len) {
this.rslt = true;
this.itm_len = itm_len;
this.prv_was_wild = false;
}
}

View File

@@ -0,0 +1,64 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
import gplx.core.strings.*;
public interface Gfo_pattern_itm {
byte Tid();
void Compile(byte[] src, int bgn, int end);
int Match(Gfo_pattern_ctx ctx, byte[] src, int src_len, int pos);
void Xto_str(String_bldr sb);
}
class Gfo_pattern_itm_text implements Gfo_pattern_itm {
public Gfo_pattern_itm_text() {}
public byte Tid() {return Gfo_pattern_itm_.Tid_text;}
public byte[] Text() {return text;} private byte[] text; private int text_len;
public void Xto_str(String_bldr sb) {sb.Add(this.Tid()).Add("|" + String_.new_u8(text));}
public void Compile(byte[] src, int bgn, int end) {
this.text = Bry_.Mid(src, bgn, end);
this.text_len = end - bgn;
}
public int Match(Gfo_pattern_ctx ctx, byte[] src, int src_len, int pos) {
boolean pass = false;
int text_end = pos + text_len;
if (text_end > src_len) text_end = src_len;
if (ctx.Prv_was_wild()) {
int text_bgn = Bry_find_.Find_fwd(src, text, pos);
pass = text_bgn != Bry_find_.Not_found;
if (pass)
pos = text_bgn + text_len;
}
else {
pass = Bry_.Match(src, pos, text_end, text);
if (pass)
pos = text_end;
}
if (!pass) ctx.Rslt_fail_();
ctx.Prv_was_wild_(false);
return pos;
}
}
class Gfo_pattern_itm_wild implements Gfo_pattern_itm {
public byte Tid() {return Gfo_pattern_itm_.Tid_wild;}
public void Compile(byte[] src, int bgn, int end) {}
public int Match(Gfo_pattern_ctx ctx, byte[] src, int src_len, int pos) {
ctx.Prv_was_wild_(true);
return ctx.Itm_idx_is_last() ? src_len : pos;
}
public void Xto_str(String_bldr sb) {sb.Add(this.Tid()).Add("|*");}
public static final Gfo_pattern_itm_wild _ = new Gfo_pattern_itm_wild(); Gfo_pattern_itm_wild() {}
}

View File

@@ -0,0 +1,51 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
public class Gfo_pattern_itm_ {
public static final byte Tid_text = 0, Tid_wild = 1;
public static Gfo_pattern_itm[] Compile(byte[] raw) {
List_adp rv = List_adp_.new_();
int raw_len = raw.length;
int itm_bgn = -1;
Gfo_pattern_itm itm = null;
int pos = 0;
while (true) {
boolean last = pos == raw_len;
byte b = last ? Byte_ascii.Null : raw[pos];
switch (b) {
case Byte_ascii.Null:
if (itm != null) {itm.Compile(raw, itm_bgn, pos); itm = null; itm_bgn = -1;}
break;
case Byte_ascii.Star:
if (itm != null) {itm.Compile(raw, itm_bgn, pos); itm = null; itm_bgn = -1;}
rv.Add(Gfo_pattern_itm_wild._);
break;
default:
if (itm_bgn == -1) {
itm_bgn = pos;
itm = new Gfo_pattern_itm_text();
rv.Add(itm);
}
break;
}
++pos;
if (last) break;
}
return (Gfo_pattern_itm[])rv.To_ary_and_clear(Gfo_pattern_itm.class);
}
}

View File

@@ -0,0 +1,93 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
import org.junit.*; import gplx.core.strings.*;
public class Gfo_pattern_tst {
@Before public void init() {fxt.Clear();} private Gfo_pattern_itm_fxt fxt = new Gfo_pattern_itm_fxt();
@Test public void Compile() {
fxt.Test_Compile("a" , fxt.itm_text_("a"));
fxt.Test_Compile("*" , fxt.itm_wild_());
fxt.Test_Compile("a*" , fxt.itm_text_("a"), fxt.itm_wild_());
fxt.Test_Compile("*a" , fxt.itm_wild_(), fxt.itm_text_("a"));
fxt.Test_Compile("*ab*" , fxt.itm_wild_(), fxt.itm_text_("ab"), fxt.itm_wild_());
fxt.Test_Compile("" );
}
@Test public void Match() {
Gfo_pattern pattern = fxt.pattern_("abc");
fxt.Test_Match_y(pattern, "abc");
fxt.Test_Match_n(pattern, "ab", "a", "bc", "Abc", "");
}
@Test public void Match_all() {
Gfo_pattern pattern = fxt.pattern_("*");
fxt.Test_Match_y(pattern, "a", "abc", "");
}
@Test public void Match_bgn() {
Gfo_pattern pattern = fxt.pattern_("abc*");
fxt.Test_Match_y(pattern, "abc", "abcdef");
fxt.Test_Match_n(pattern, "abd", "aabc", "");
}
@Test public void Match_end() {
Gfo_pattern pattern = fxt.pattern_("*abc");
fxt.Test_Match_y(pattern, "abc", "xyzabc");
fxt.Test_Match_n(pattern, "abcd", "");
}
@Test public void Match_mid() {
Gfo_pattern pattern = fxt.pattern_("a*c*e");
fxt.Test_Match_y(pattern, "ace", "abcde");
fxt.Test_Match_n(pattern, "abc", "");
}
@Test public void Bug_ctx() { // PURPOSE.fix: cb was true b/c ctx was not reset correctly
Gfo_pattern pattern = fxt.pattern_("b*");
fxt.Test_Match_y(pattern, "bc");
fxt.Test_Match_n(pattern, "cb");
}
}
class Gfo_pattern_itm_fxt {
public void Clear() {}
public Gfo_pattern pattern_(String raw) {return new Gfo_pattern(Bry_.new_u8(raw));}
public void Test_Match_y(Gfo_pattern pattern, String... itms) {Test_Match(pattern, itms, Bool_.Y);}
public void Test_Match_n(Gfo_pattern pattern, String... itms) {Test_Match(pattern, itms, Bool_.N);}
private void Test_Match(Gfo_pattern pattern, String[] itms, boolean expd) {
int len = itms.length;
for (int i = 0; i < len; i++) {
String itm = itms[i];
Tfds.Eq(expd, pattern.Match(Bry_.new_u8(itm)), "pattern={0} itm={1} expd={2}", String_.new_u8(pattern.Raw()), itm, expd);
}
}
public Gfo_pattern_itm_wild itm_wild_() {return Gfo_pattern_itm_wild._;}
public Gfo_pattern_itm_text itm_text_(String raw) {
Gfo_pattern_itm_text rv = new Gfo_pattern_itm_text();
byte[] bry = Bry_.new_u8(raw);
rv.Compile(bry, 0, bry.length);
return rv;
}
public void Test_Compile(String raw, Gfo_pattern_itm... expd) {
Gfo_pattern_itm[] actl = Gfo_pattern_itm_.Compile(Bry_.new_u8(raw));
Tfds.Eq(Ary_xto_str(expd), Ary_xto_str(actl));
}
private static String Ary_xto_str(Gfo_pattern_itm[] ary) {
int len = ary.length;
String_bldr sb = String_bldr_.new_();
for (int i = 0; i < len; i++) {
if (i != 0) sb.Add_char_nl();
Gfo_pattern_itm itm = ary[i];
itm.Xto_str(sb);
}
return sb.Xto_str_and_clear();
}
}

View File

@@ -0,0 +1,151 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.xmls; import gplx.*; import gplx.langs.*;
public class Gfo_xml_wtr {
private final Bry_bfr bfr = Bry_bfr.reset_(255), txt_bfr = Bry_bfr.reset_(32);
private byte quote_byte = Byte_ascii.Apos;
private byte[] quote_escape = Bry_quote_1_escape;
private List_adp nde_stack = List_adp_.new_();
private Gfo_xml_nde nde_cur = null;
private int indent = 0;
public void Quote_(boolean apos) {
if (apos) {
this.quote_byte = Byte_ascii.Apos;
this.quote_escape = Bry_quote_1_escape;
}
else {
this.quote_byte = Byte_ascii.Quote;
this.quote_escape = Bry_quote_2_escape;
}
}
public Gfo_xml_wtr Nde_lhs_bgn_grp(String v) {return Nde_lhs_bgn(Bool_.Y, v);}
public Gfo_xml_wtr Nde_lhs_bgn_itm(String v) {return Nde_lhs_bgn(Bool_.N, v);}
private Gfo_xml_wtr Nde_lhs_bgn(boolean grp, String v) {
nde_cur = new Gfo_xml_nde(grp, v);
nde_stack.Add(nde_cur);
bfr.Add_byte_repeat(Byte_ascii.Space, indent);
bfr.Add_byte(Byte_ascii.Angle_bgn).Add_str_u8(v);
indent += 2;
return this;
}
public Gfo_xml_wtr Nde_lhs_end() {
bfr.Add_byte(Byte_ascii.Angle_end);
if (nde_cur.Grp()) bfr.Add_byte_nl();
return this;
}
public Gfo_xml_wtr Nde_lhs(String v) {return Nde_lhs(Bool_.Y, v);}
private Gfo_xml_wtr Nde_lhs(boolean grp, String v) {
this.Nde_lhs_bgn(grp, v);
this.Nde_lhs_end();
return this;
}
public Gfo_xml_wtr Nde_rhs() {
Gfo_xml_nde nde = (Gfo_xml_nde)List_adp_.Pop(nde_stack);
indent -= 2;
if (nde.Grp()) bfr.Add_byte_repeat(Byte_ascii.Space, indent);
bfr.Add(Bry_nde_rhs_bgn).Add_str_u8(nde.Name()).Add_byte(Byte_ascii.Angle_end); // EX: </node>
bfr.Add_byte_nl();
return this;
}
public Gfo_xml_wtr Nde_txt_str(String name, String text) {
this.Nde_lhs(Bool_.N, name);
this.Txt_str_u8(text);
this.Nde_rhs();
return this;
}
public Gfo_xml_wtr Nde_txt_bry(String name, byte[] text) {
this.Nde_lhs(Bool_.N, name);
this.Txt_bry(text);
this.Nde_rhs();
return this;
}
public Gfo_xml_wtr Nde_txt_int(String name, int text) {
this.Nde_lhs(Bool_.N, name);
this.Txt_bry(Int_.Xto_bry(text));
this.Nde_rhs();
return this;
}
public Gfo_xml_wtr Atr_bgn(String key) {
bfr.Add_byte_space().Add_str_u8(key).Add_byte(Byte_ascii.Eq).Add_byte(quote_byte);
return this;
}
public Gfo_xml_wtr Atr_val_str_a7(String v) {bfr.Add_str_a7(v); return this;}
public Gfo_xml_wtr Atr_val_str_u8(String v) {bfr.Add_str_u8 (v); return this;}
public Gfo_xml_wtr Atr_val_bry (byte[] v) {bfr.Add(v); return this;}
public Gfo_xml_wtr Atr_val_int (int v) {bfr.Add_int_variable(v); return this;}
public Gfo_xml_wtr Atr_end() {
bfr.Add_byte(quote_byte);
return this;
}
public Gfo_xml_wtr Atr_kv_int(String key, int val) {return Atr_kv_bry(key, Int_.Xto_bry(val));}
public Gfo_xml_wtr Atr_kv_str_a7(String key, String val) {return Atr_kv_bry(key, Bry_.new_a7(val));}
public Gfo_xml_wtr Atr_kv_str_u8(String key, String val) {return Atr_kv_bry(key, Bry_.new_u8(val));}
public Gfo_xml_wtr Atr_kv_bry(String key, byte[] val) {
bfr.Add_byte_space().Add_str_u8(key);
bfr.Add_byte(Byte_ascii.Eq);
Atr_val_quote(val);
return this;
}
private Gfo_xml_wtr Atr_val_quote(byte[] val_bry) {
bfr.Add_byte(quote_byte);
bfr.Add_bry_escape(quote_byte, quote_escape, val_bry, 0, val_bry.length);
bfr.Add_byte(quote_byte);
return this;
}
public Gfo_xml_wtr Txt_bry(byte[] txt) {
int len = txt.length;
boolean dirty = false;
for (int i = 0; i < len; ++i) {
byte[] escape = null;
byte b = txt[i];
switch (b) {
case Byte_ascii.Lt: escape = Bry_escape_lt; break;
case Byte_ascii.Gt: escape = Bry_escape_gt; break;
case Byte_ascii.Amp: escape = Bry_escape_amp; break;
default: break;
}
if (escape != null && !dirty) {
bfr.Add_mid(txt, 0, i);
dirty = true;
}
if (dirty) {
if (escape == null) bfr.Add_byte(b);
else bfr.Add(escape);
}
}
if (dirty) bfr.Add_bfr_and_clear(txt_bfr);
else bfr.Add(txt);
return this;
}
public Gfo_xml_wtr Txt_str_u8(String txt) {return Txt_bry(Bry_.new_u8(txt));}
public String Bld_str() {return bfr.Xto_str_and_clear();}
private static final byte[]
Bry_nde_rhs_bgn = Bry_.new_a7("</")
// , Bry_nde_inline = Bry_.new_a7("/>")
, Bry_quote_1_escape = Bry_.new_a7("&apos;")
, Bry_quote_2_escape = Bry_.new_a7("&quot;")
, Bry_escape_lt = Bry_.new_a7("&lt;")
, Bry_escape_gt = Bry_.new_a7("&gt;")
, Bry_escape_amp = Bry_.new_a7("&amp;")
;
}
class Gfo_xml_nde {
public Gfo_xml_nde(boolean grp, String name) {this.grp = grp; this.name = name;}
public boolean Grp() {return grp;} private final boolean grp;
public String Name() {return name;} private final String name;
}

View File

@@ -0,0 +1,81 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.xmls; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Gfo_xml_wtr_tst {
private final Gfo_xml_wtr_fxt fxt = new Gfo_xml_wtr_fxt();
@Before public void init() {}
@Test public void Root() {
fxt.Wtr().Nde_lhs("a").Nde_rhs();
fxt.Test_bld("<a>", "</a>");
}
@Test public void Nest() {
fxt.Wtr()
.Nde_lhs("a")
. Nde_lhs("a_a")
. Nde_lhs("a_a_a")
. Nde_rhs()
. Nde_rhs()
.Nde_rhs()
;
fxt.Test_bld
( "<a>"
, " <a_a>"
, " <a_a_a>"
, " </a_a_a>"
, " </a_a>"
, "</a>"
);
}
@Test public void Atrs() {
fxt.Wtr()
.Nde_lhs_bgn_itm("a")
.Atr_kv_str_a7("b", "b1")
.Nde_lhs_end()
.Nde_rhs()
;
fxt.Test_bld("<a b='b1'></a>");
}
@Test public void Atrs_escape() {
fxt.Wtr()
.Nde_lhs_bgn_itm("a")
.Atr_kv_str_a7("b", "'\"<>&")
.Nde_lhs_end()
.Nde_rhs()
;
fxt.Test_bld("<a b='&apos;\"<>&'></a>");
}
@Test public void Nde_txt() {
fxt.Wtr()
.Nde_txt_str("a", "a123")
;
fxt.Test_bld("<a>a123</a>");
}
@Test public void Nde_txt_escape() {
fxt.Wtr()
.Nde_txt_str("a", "'\"<>&x")
;
fxt.Test_bld("<a>'\"&lt;&gt;&amp;x</a>");
}
}
class Gfo_xml_wtr_fxt {
public Gfo_xml_wtr Wtr() {return wtr;} private final Gfo_xml_wtr wtr = new Gfo_xml_wtr();
public void Test_bld(String... lines) {
Tfds.Eq_str_lines(String_.Concat_lines_nl_skip_last(lines), wtr.Bld_str());
}
}