mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Embeddable: Create core dbs in proper subdirectory
This commit is contained in:
@@ -13,3 +13,8 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.dsvs; import gplx.*; import gplx.langs.*;
|
||||
public interface Dsv_fld_parser {
|
||||
void Init(byte fld_dlm, byte row_dlm);
|
||||
int Parse(Dsv_tbl_parser tbl_parser, Dsv_wkr_base mgr, byte[] src, int pos, int src_len, int fld_idx, int fld_bgn);
|
||||
}
|
||||
|
||||
@@ -13,3 +13,100 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.dsvs; import gplx.*; import gplx.langs.*;
|
||||
public class Dsv_fld_parser_ {
|
||||
public static final Dsv_fld_parser Bry_parser = Dsv_fld_parser_bry.Instance;
|
||||
public static final Dsv_fld_parser Int_parser = Dsv_fld_parser_int.Instance;
|
||||
public static final Dsv_fld_parser Line_parser__comment_is_pipe = new Dsv_fld_parser_line(Byte_ascii.Pipe);
|
||||
public static Err err_fld_unhandled(Dsv_fld_parser parser, Dsv_wkr_base wkr, int fld_idx, byte[] src, int bgn, int end) {
|
||||
throw Err_.new_wo_type("fld unhandled", "parser", Type_.Name_by_obj(parser), "wkr", Type_.Name_by_obj(wkr), "fld_idx", fld_idx, "val", String_.new_u8(src, bgn, end)).Trace_ignore_add_1_();
|
||||
}
|
||||
}
|
||||
class Dsv_fld_parser_line implements Dsv_fld_parser {
|
||||
private byte row_dlm = Byte_ascii.Nl; private final byte comment_dlm;
|
||||
public Dsv_fld_parser_line(byte comment_dlm) {this.comment_dlm = comment_dlm;}
|
||||
public void Init(byte fld_dlm, byte row_dlm) {
|
||||
this.row_dlm = row_dlm;
|
||||
}
|
||||
public int Parse(Dsv_tbl_parser parser, Dsv_wkr_base wkr, byte[] src, int pos, int src_len, int fld_idx, int fld_bgn) {
|
||||
while (true) {
|
||||
boolean pos_is_last = pos == src_len;
|
||||
byte b = pos_is_last ? row_dlm : src[pos];
|
||||
if (b == comment_dlm) {
|
||||
pos = Bry_find_.Find_fwd_until(src, pos, src_len, row_dlm);
|
||||
if (pos == Bry_find_.Not_found)
|
||||
pos = src_len;
|
||||
}
|
||||
else if (b == row_dlm) {
|
||||
boolean pass = wkr.Write_bry(parser, fld_idx, src, fld_bgn, pos);
|
||||
if (!pass) throw Dsv_fld_parser_.err_fld_unhandled(this, wkr, fld_idx, src, fld_bgn, pos);
|
||||
wkr.Commit_itm(parser, pos);
|
||||
int rv = pos + 1; // row_dlm is always 1 byte
|
||||
parser.Update_by_row(rv);
|
||||
return rv;
|
||||
}
|
||||
else
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
class Dsv_fld_parser_bry implements Dsv_fld_parser {
|
||||
private byte fld_dlm = Byte_ascii.Pipe, row_dlm = Byte_ascii.Nl;
|
||||
public void Init(byte fld_dlm, byte row_dlm) {
|
||||
this.fld_dlm = fld_dlm; this.row_dlm = row_dlm;
|
||||
}
|
||||
public int Parse(Dsv_tbl_parser parser, Dsv_wkr_base wkr, byte[] src, int pos, int src_len, int fld_idx, int fld_bgn) {
|
||||
while (true) {
|
||||
boolean pos_is_last = pos == src_len;
|
||||
byte b = pos_is_last ? row_dlm : src[pos];
|
||||
if (b == fld_dlm) {
|
||||
boolean pass = wkr.Write_bry(parser, fld_idx, src, fld_bgn, pos);
|
||||
if (!pass) throw Dsv_fld_parser_.err_fld_unhandled(this, wkr, fld_idx, src, fld_bgn, pos);
|
||||
int rv = pos + 1; // fld_dlm is always 1 byte
|
||||
parser.Update_by_fld(rv);
|
||||
return rv;
|
||||
}
|
||||
else if (b == row_dlm) {
|
||||
boolean pass = wkr.Write_bry(parser, fld_idx, src, fld_bgn, pos);
|
||||
if (!pass) throw Dsv_fld_parser_.err_fld_unhandled(this, wkr, fld_idx, src, fld_bgn, pos);
|
||||
wkr.Commit_itm(parser, pos);
|
||||
int rv = pos + 1; // row_dlm is always 1 byte
|
||||
parser.Update_by_row(rv);
|
||||
return rv;
|
||||
}
|
||||
else
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
public static final Dsv_fld_parser_bry Instance = new Dsv_fld_parser_bry(); Dsv_fld_parser_bry() {}
|
||||
}
|
||||
class Dsv_fld_parser_int implements Dsv_fld_parser {
|
||||
private byte fld_dlm = Byte_ascii.Pipe, row_dlm = Byte_ascii.Nl;
|
||||
public void Init(byte fld_dlm, byte row_dlm) {
|
||||
this.fld_dlm = fld_dlm; this.row_dlm = row_dlm;
|
||||
}
|
||||
public int Parse(Dsv_tbl_parser parser, Dsv_wkr_base wkr, byte[] src, int pos, int src_len, int fld_idx, int fld_bgn) {
|
||||
while (true) {
|
||||
boolean pos_is_last = pos == src_len;
|
||||
byte b = pos_is_last ? row_dlm : src[pos];
|
||||
if (b == fld_dlm) {
|
||||
boolean pass = wkr.Write_int(parser, fld_idx, pos, Bry_.To_int_or(src, fld_bgn, pos, -1));
|
||||
if (!pass) throw Dsv_fld_parser_.err_fld_unhandled(this, wkr, fld_idx, src, fld_bgn, pos);
|
||||
int rv = pos + 1; // fld_dlm is always 1 byte
|
||||
parser.Update_by_fld(rv);
|
||||
return rv;
|
||||
}
|
||||
else if (b == row_dlm) {
|
||||
boolean pass = wkr.Write_int(parser, fld_idx, pos, Bry_.To_int_or(src, fld_bgn, pos, -1));
|
||||
if (!pass) throw Dsv_fld_parser_.err_fld_unhandled(this, wkr, fld_idx, src, fld_bgn, pos);
|
||||
wkr.Commit_itm(parser, pos);
|
||||
int rv = pos + 1; // row_dlm is always 1 byte
|
||||
parser.Update_by_row(rv);
|
||||
return rv;
|
||||
}
|
||||
else
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
public static final Dsv_fld_parser_int Instance = new Dsv_fld_parser_int(); Dsv_fld_parser_int() {}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,69 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.dsvs; import gplx.*; import gplx.langs.*;
|
||||
public class Dsv_tbl_parser implements Gfo_invk, Rls_able {
|
||||
private Dsv_wkr_base mgr;
|
||||
private Dsv_fld_parser[] fld_parsers = new Dsv_fld_parser[2]; private int fld_parsers_len = 2;
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Fld_bgn() {return fld_bgn;} private int fld_bgn = 0;
|
||||
public int Fld_idx() {return fld_idx;} private int fld_idx = 0;
|
||||
public int Row_bgn() {return row_bgn;} private int row_bgn = 0;
|
||||
public int Row_idx() {return row_idx;} private int row_idx = 0;
|
||||
public boolean Skip_blank_lines() {return skip_blank_lines;} public Dsv_tbl_parser Skip_blank_lines_(boolean v) {skip_blank_lines = v; return this;} private boolean skip_blank_lines = true;
|
||||
public byte Fld_dlm() {return fld_dlm;} public Dsv_tbl_parser Fld_dlm_(byte v) {fld_dlm = v; return this;} private byte fld_dlm = Byte_ascii.Pipe;
|
||||
public byte Row_dlm() {return row_dlm;} public Dsv_tbl_parser Row_dlm_(byte v) {row_dlm = v; return this;} private byte row_dlm = Byte_ascii.Nl;
|
||||
public void Init(Dsv_wkr_base mgr, Dsv_fld_parser... fld_parsers) {
|
||||
this.mgr = mgr;
|
||||
this.fld_parsers = fld_parsers;
|
||||
this.fld_parsers_len = fld_parsers.length;
|
||||
for (int i = 0; i < fld_parsers_len; i++)
|
||||
fld_parsers[i].Init(fld_dlm, row_dlm);
|
||||
}
|
||||
public void Clear() {
|
||||
fld_bgn = fld_idx = row_bgn = row_idx = 0;
|
||||
}
|
||||
public Err Err_row_bgn(String fmt, int pos) {
|
||||
return Err_.new_wo_type(fmt, "line", String_.new_u8(src, row_bgn, pos)).Trace_ignore_add_1_();
|
||||
}
|
||||
public void Update_by_fld(int pos) {
|
||||
fld_bgn = pos;
|
||||
++fld_idx;
|
||||
}
|
||||
public void Update_by_row(int pos) {
|
||||
row_bgn = fld_bgn = pos;
|
||||
++row_idx;
|
||||
fld_idx = 0;
|
||||
}
|
||||
public void Parse(byte[] src) {
|
||||
int src_len = src.length; if (src_len == 0) return; // NOTE: do not process if empty; note that loop below will process once for empty row
|
||||
this.src = src;
|
||||
int pos = 0;
|
||||
while (true) {
|
||||
if (fld_idx == 0 && skip_blank_lines) { // row committed; skip blank lines
|
||||
while (pos < src_len) {
|
||||
if (src[pos] == row_dlm) {
|
||||
++pos;
|
||||
row_bgn = fld_bgn = pos;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (fld_idx == fld_parsers_len) break;
|
||||
Dsv_fld_parser fld_parser = fld_parsers[fld_idx];
|
||||
pos = fld_parser.Parse(this, mgr, src, pos, src_len, fld_idx, fld_bgn);
|
||||
if ( pos > src_len // pos is now fully past src_len; exit
|
||||
|| pos == src_len && fld_idx == 0 // last pos but fld_idx > 0; do one more iteration which will "commit row; EX: 2 fields and src of "a|"; EOS should close out row
|
||||
) break;
|
||||
}
|
||||
}
|
||||
public void Rls() {
|
||||
src = null; fld_parsers = null; mgr = null; fld_parsers_len = 0;
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_load_by_str)) Parse(m.ReadBry("v"));
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
} private static final String Invk_load_by_str = "load_by_str";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,50 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.dsvs; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Dsv_tbl_parser_int_tst {
|
||||
private Dsv_mok_fxt fxt = new Dsv_mok_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt .Test_load(String_.Concat_lines_nl_skip_last
|
||||
( "a|1|3"
|
||||
, "b|2|4"
|
||||
)
|
||||
, fxt.mgr_int_()
|
||||
, fxt.itm_int_("a", 1, 3)
|
||||
, fxt.itm_int_("b", 2, 4)
|
||||
);
|
||||
}
|
||||
}
|
||||
class Mok_int_itm implements To_str_able {
|
||||
private String fld_0;
|
||||
private int fld_1, fld_2;
|
||||
public Mok_int_itm(String fld_0, int fld_1, int fld_2) {this.fld_0 = fld_0; this.fld_1 = fld_1; this.fld_2 = fld_2;}
|
||||
public String To_str() {return String_.Concat_with_str("|", fld_0, Int_.To_str(fld_1), Int_.To_str(fld_2));}
|
||||
}
|
||||
class Mok_int_mgr extends Mok_mgr_base {
|
||||
public void Clear() {itms.Clear();}
|
||||
@Override public To_str_able[] Itms() {return (To_str_able[])itms.To_ary(To_str_able.class);} private List_adp itms = List_adp_.New();
|
||||
private String fld_0;
|
||||
private int fld_1, fld_2;
|
||||
@Override public Dsv_fld_parser[] Fld_parsers() {
|
||||
return new Dsv_fld_parser[] {Dsv_fld_parser_bry.Instance, Dsv_fld_parser_int.Instance, Dsv_fld_parser_int.Instance};
|
||||
}
|
||||
@Override public boolean Write_bry(Dsv_tbl_parser parser, int fld_idx, byte[] src, int bgn, int end) {
|
||||
switch (fld_idx) {
|
||||
case 0: fld_0 = String_.new_u8(src, bgn, end); return true;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
@Override public boolean Write_int(Dsv_tbl_parser parser, int fld_idx, int pos, int val_int) {
|
||||
switch (fld_idx) {
|
||||
case 1: fld_1 = val_int; return true;
|
||||
case 2: fld_2 = val_int; return true;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
@Override public void Commit_itm(Dsv_tbl_parser parser, int pos) {
|
||||
Mok_int_itm itm = new Mok_int_itm(fld_0, fld_1, fld_2);
|
||||
itms.Add(itm);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,95 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.dsvs; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Dsv_tbl_parser_str_tst {
|
||||
private Dsv_mok_fxt fxt = new Dsv_mok_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt .Test_load(String_.Concat_lines_nl_skip_last
|
||||
( "a|A"
|
||||
, "b|B"
|
||||
)
|
||||
, fxt.mgr_str_(2)
|
||||
, fxt.itm_str_("a", "A")
|
||||
, fxt.itm_str_("b", "B")
|
||||
);
|
||||
}
|
||||
@Test public void Blank_lines() {
|
||||
fxt .Test_load(String_.Concat_lines_nl_skip_last
|
||||
( ""
|
||||
, "a|A"
|
||||
, ""
|
||||
, "b|B"
|
||||
, ""
|
||||
)
|
||||
, fxt.mgr_str_(2)
|
||||
, fxt.itm_str_("a", "A")
|
||||
, fxt.itm_str_("b", "B")
|
||||
);
|
||||
}
|
||||
@Test public void Incomplete_row() {
|
||||
fxt .Test_load(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, "b"
|
||||
, ""
|
||||
)
|
||||
, fxt.mgr_str_(2)
|
||||
, fxt.itm_str_("a")
|
||||
, fxt.itm_str_("b")
|
||||
);
|
||||
}
|
||||
@Test public void Incomplete_row_2() { // PURPOSE: handle multiple incomplete cells
|
||||
fxt .Test_load(String_.Concat_lines_nl_skip_last
|
||||
( "a|")
|
||||
, fxt.mgr_str_(3)
|
||||
, fxt.itm_str_("a", "")
|
||||
);
|
||||
}
|
||||
}
|
||||
abstract class Mok_mgr_base extends Dsv_wkr_base {
|
||||
public abstract To_str_able[] Itms();
|
||||
}
|
||||
class Dsv_mok_fxt {
|
||||
private Dsv_tbl_parser tbl_parser = new Dsv_tbl_parser();
|
||||
public Dsv_mok_fxt Clear() {
|
||||
tbl_parser.Clear();
|
||||
return this;
|
||||
}
|
||||
public Mok_mgr_base mgr_int_() {return new Mok_int_mgr();}
|
||||
public Mok_mgr_base mgr_str_(int len) {return new Mok_str_mgr(len);}
|
||||
public Mok_str_itm itm_str_(String... flds) {return new Mok_str_itm(flds);}
|
||||
public Mok_int_itm itm_int_(String fld_0, int fld_1, int fld_2) {return new Mok_int_itm(fld_0, fld_1, fld_2);}
|
||||
public void Test_load(String src, Mok_mgr_base mgr, To_str_able... expd) {
|
||||
mgr.Load_by_bry(Bry_.new_u8(src));
|
||||
Tfds.Eq_ary_str(expd, mgr.Itms());
|
||||
}
|
||||
}
|
||||
class Mok_str_itm implements To_str_able {
|
||||
private String[] flds;
|
||||
public Mok_str_itm(String[] flds) {this.flds = flds;}
|
||||
public String To_str() {return String_.Concat_with_str("|", flds);}
|
||||
}
|
||||
class Mok_str_mgr extends Mok_mgr_base {
|
||||
private int flds_len;
|
||||
public Mok_str_mgr(int flds_len) {
|
||||
this.flds_len = flds_len;
|
||||
}
|
||||
public void Clear() {itms.Clear();}
|
||||
@Override public To_str_able[] Itms() {return (To_str_able[])itms.To_ary(To_str_able.class);} private List_adp itms = List_adp_.New();
|
||||
private List_adp flds = List_adp_.New();
|
||||
@Override public boolean Write_bry(Dsv_tbl_parser parser, int fld_idx, byte[] src, int bgn, int end) {
|
||||
flds.Add(String_.new_u8(src, bgn, end));
|
||||
return true;
|
||||
}
|
||||
@Override public Dsv_fld_parser[] Fld_parsers() {
|
||||
Dsv_fld_parser[] rv = new Dsv_fld_parser[flds_len];
|
||||
for (int i = 0; i < flds_len; i++)
|
||||
rv[i] = Dsv_fld_parser_.Bry_parser;
|
||||
return rv;
|
||||
}
|
||||
@Override public void Commit_itm(Dsv_tbl_parser parser, int pos) {
|
||||
Mok_str_itm itm = new Mok_str_itm((String[])flds.To_ary_and_clear(String.class));
|
||||
itms.Add(itm);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,28 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.dsvs; import gplx.*; import gplx.langs.*;
|
||||
public abstract class Dsv_wkr_base implements Gfo_invk {
|
||||
public abstract Dsv_fld_parser[] Fld_parsers();
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public abstract void Commit_itm(Dsv_tbl_parser parser, int pos);
|
||||
@gplx.Virtual public boolean Write_bry(Dsv_tbl_parser parser, int fld_idx, byte[] src, int bgn, int end) {return false;}
|
||||
@gplx.Virtual public boolean Write_int(Dsv_tbl_parser parser, int fld_idx, int pos, int val_int) {return false;}
|
||||
public void Load_by_bry(byte[] src) {
|
||||
this.src = src;
|
||||
Dsv_tbl_parser tbl_parser = new Dsv_tbl_parser(); // NOTE: this proc should only be called once, so don't bother caching tbl_parser
|
||||
tbl_parser.Init(this, this.Fld_parsers());
|
||||
Load_by_bry_bgn();
|
||||
tbl_parser.Parse(src);
|
||||
tbl_parser.Rls();
|
||||
Load_by_bry_end();
|
||||
}
|
||||
@gplx.Virtual public void Load_by_bry_bgn() {}
|
||||
@gplx.Virtual public void Load_by_bry_end() {}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk_load_by_str)) Load_by_bry(m.ReadBry("v"));
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
public static final String Invk_load_by_str = "load_by_str";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,200 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
|
||||
interface Gfs_lxr {
|
||||
int Lxr_tid();
|
||||
int Process(Gfs_parser_ctx ctx, int bgn, int end);
|
||||
}
|
||||
class Gfs_lxr_whitespace implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_whitespace;}
|
||||
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
|
||||
byte[] src = ctx.Src(); int src_len = ctx.Src_len();
|
||||
int rv = Gfs_lxr_.Rv_eos, cur_pos;
|
||||
for (cur_pos = end; cur_pos < src_len; cur_pos++) {
|
||||
byte b = src[cur_pos];
|
||||
Object o = ctx.Trie().Match_at_w_b0(ctx.Trie_rv(), b, src, cur_pos, src_len);
|
||||
if (o == null) {
|
||||
rv = Gfs_lxr_.Rv_null;
|
||||
ctx.Process_null(cur_pos);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
Gfs_lxr lxr = (Gfs_lxr)o;
|
||||
if (lxr.Lxr_tid() == Gfs_lxr_.Tid_whitespace) {}
|
||||
else {
|
||||
rv = Gfs_lxr_.Rv_lxr;
|
||||
ctx.Process_lxr(cur_pos, lxr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static final Gfs_lxr_whitespace Instance = new Gfs_lxr_whitespace(); Gfs_lxr_whitespace() {}
|
||||
}
|
||||
class Gfs_lxr_comment_flat implements Gfs_lxr {
|
||||
public Gfs_lxr_comment_flat(byte[] bgn_bry, byte[] end_bry) {
|
||||
this.bgn_bry = bgn_bry; this.bgn_bry_len = bgn_bry.length;
|
||||
this.end_bry = end_bry; this.end_bry_len = end_bry.length;
|
||||
} byte[] bgn_bry, end_bry; int bgn_bry_len, end_bry_len;
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_comment;}
|
||||
public int Process(Gfs_parser_ctx ctx, int lxr_bgn, int lxr_end) {
|
||||
byte[] src = ctx.Src(); int src_len = ctx.Src_len();
|
||||
int end_pos = Bry_find_.Find_fwd(src, end_bry, lxr_end, src_len);
|
||||
// if (end_pos == Bry_find_.Not_found) throw Err_.new_fmt_("comment is not closed: {0}", String_.new_u8(end_bry));
|
||||
return (end_pos == Bry_find_.Not_found)
|
||||
? src_len // allow eos to terminate flat comment; needed for "tidy-always-adds-nl-in-textarea" fix; NOTE: DATE:2014-06-21
|
||||
: end_pos + end_bry_len; // position after end_bry
|
||||
}
|
||||
}
|
||||
class Gfs_lxr_identifier implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_identifier;}
|
||||
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
|
||||
byte[] src = ctx.Src(); int src_len = ctx.Src_len();
|
||||
int pos, rv = Gfs_lxr_.Rv_eos;
|
||||
for (pos = end; pos < src_len; pos++) {
|
||||
byte b = src[pos];
|
||||
Object o = ctx.Trie().Match_at_w_b0(ctx.Trie_rv(), b, src, pos, src_len);
|
||||
if (o == null) { // invalid char; stop;
|
||||
rv = Gfs_lxr_.Rv_null;
|
||||
ctx.Process_null(pos);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
Gfs_lxr lxr = (Gfs_lxr)o;
|
||||
if (lxr.Lxr_tid() == Gfs_lxr_.Tid_identifier) {} // still an identifier; continue
|
||||
else { // new lxr (EX: "." in "abc."); (a) hold word of "abc"; mark "." as new lxr;
|
||||
ctx.Hold_word(bgn, pos);
|
||||
rv = Gfs_lxr_.Rv_lxr;
|
||||
ctx.Process_lxr(pos, lxr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (rv == Gfs_lxr_.Rv_eos) ctx.Process_eos(); // eos
|
||||
return rv;
|
||||
}
|
||||
public static final Gfs_lxr_identifier Instance = new Gfs_lxr_identifier(); Gfs_lxr_identifier() {}
|
||||
}
|
||||
class Gfs_lxr_semic implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_semic;}
|
||||
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
|
||||
switch (ctx.Prv_lxr()) {
|
||||
case Gfs_lxr_.Tid_identifier: ctx.Make_nde(bgn, end); ctx.Cur_nde_from_stack(); break; // a;
|
||||
case Gfs_lxr_.Tid_quote:
|
||||
case Gfs_lxr_.Tid_paren_end: ctx.Cur_nde_from_stack(); break; // a();
|
||||
case Gfs_lxr_.Tid_semic: break; // a;; ignore;
|
||||
default: ctx.Err_mgr().Fail_invalid_lxr(ctx, bgn, this.Lxr_tid(), Byte_ascii.Semic); break;
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_semic Instance = new Gfs_lxr_semic(); Gfs_lxr_semic() {}
|
||||
}
|
||||
class Gfs_lxr_dot implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_dot;}
|
||||
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
|
||||
switch (ctx.Prv_lxr()) {
|
||||
case Gfs_lxr_.Tid_identifier: ctx.Make_nde(bgn, end); break; // a.
|
||||
case Gfs_lxr_.Tid_paren_end: break; // a().
|
||||
default: ctx.Err_mgr().Fail_invalid_lxr(ctx, bgn, this.Lxr_tid(), Byte_ascii.Dot); break;
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_dot Instance = new Gfs_lxr_dot(); Gfs_lxr_dot() {}
|
||||
}
|
||||
class Gfs_lxr_paren_bgn implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_paren_bgn;}
|
||||
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
|
||||
switch (ctx.Prv_lxr()) {
|
||||
case Gfs_lxr_.Tid_identifier: ctx.Make_nde(bgn, end); break; // a(;
|
||||
default: ctx.Err_mgr().Fail_invalid_lxr(ctx, bgn, this.Lxr_tid(), Byte_ascii.Paren_bgn); break;
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_paren_bgn Instance = new Gfs_lxr_paren_bgn(); Gfs_lxr_paren_bgn() {}
|
||||
}
|
||||
class Gfs_lxr_paren_end implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_paren_end;}
|
||||
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
|
||||
switch (ctx.Prv_lxr()) {
|
||||
case Gfs_lxr_.Tid_paren_bgn:
|
||||
case Gfs_lxr_.Tid_quote: break; // "))", "abc)", "'abc')"
|
||||
case Gfs_lxr_.Tid_identifier: ctx.Make_atr_by_idf(); break; // 123)
|
||||
default: ctx.Err_mgr().Fail_invalid_lxr(ctx, bgn, this.Lxr_tid(), Byte_ascii.Paren_end); break;
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_paren_end Instance = new Gfs_lxr_paren_end(); Gfs_lxr_paren_end() {}
|
||||
}
|
||||
class Gfs_lxr_quote implements Gfs_lxr {
|
||||
public Gfs_lxr_quote(byte[] bgn_bry, byte[] end_bry) {
|
||||
this.bgn_bry_len = bgn_bry.length;
|
||||
this.end_bry = end_bry; this.end_bry_len = end_bry.length;
|
||||
} private byte[] end_bry; private int bgn_bry_len, end_bry_len;
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_quote;}
|
||||
public int Process(Gfs_parser_ctx ctx, int lxr_bgn, int lxr_end) {
|
||||
byte[] src = ctx.Src(); int src_len = ctx.Src_len();
|
||||
int end_pos = Bry_find_.Find_fwd(src, end_bry, lxr_end, src_len);
|
||||
if (end_pos == Bry_find_.Not_found) throw Err_.new_wo_type("quote is not closed", "end", String_.new_u8(end_bry));
|
||||
Bry_bfr bfr = ctx.Tmp_bfr().Clear();
|
||||
int prv_pos = lxr_end;
|
||||
int nxt_pos = end_pos + end_bry_len;
|
||||
if (Bry_.Match(src, nxt_pos, nxt_pos + end_bry_len, end_bry)) { // end_bry is doubled; EX: end_bry = ' and raw = a''
|
||||
while (true) {
|
||||
bfr.Add_mid(src, prv_pos, end_pos); // add everything up to end_bry
|
||||
bfr.Add(end_bry); // add end_bry
|
||||
prv_pos = nxt_pos + end_bry_len; // set prv_pos to after doubled end_bry
|
||||
end_pos = Bry_find_.Find_fwd(src, end_bry, prv_pos, src_len);
|
||||
if (end_pos == Bry_find_.Not_found) throw Err_.new_wo_type("quote is not closed", "end", String_.new_u8(end_bry));
|
||||
nxt_pos = end_pos + end_bry_len;
|
||||
if (!Bry_.Match(src, nxt_pos, nxt_pos + end_bry_len, end_bry)) {
|
||||
bfr.Add_mid(src, prv_pos, end_pos);
|
||||
break;
|
||||
}
|
||||
}
|
||||
ctx.Make_atr_by_bry(lxr_bgn + bgn_bry_len, end_pos, bfr.To_bry_and_clear());
|
||||
}
|
||||
else
|
||||
ctx.Make_atr(lxr_bgn + bgn_bry_len, end_pos);
|
||||
return end_pos + end_bry_len; // position after quote
|
||||
}
|
||||
}
|
||||
class Gfs_lxr_curly_bgn implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_curly_bgn;}
|
||||
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
|
||||
switch (ctx.Prv_lxr()) {
|
||||
case Gfs_lxr_.Tid_identifier: ctx.Make_nde(bgn, end); ctx.Stack_add(); break; // a{;
|
||||
case Gfs_lxr_.Tid_paren_end: ctx.Stack_add(); break; // a(){; NOTE: node exists but needs to be pushed onto stack
|
||||
default: ctx.Err_mgr().Fail_invalid_lxr(ctx, bgn, this.Lxr_tid(), Byte_ascii.Curly_bgn); break;
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_curly_bgn Instance = new Gfs_lxr_curly_bgn(); Gfs_lxr_curly_bgn() {}
|
||||
}
|
||||
class Gfs_lxr_curly_end implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_curly_end;}
|
||||
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
|
||||
ctx.Stack_pop(bgn);
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_curly_end Instance = new Gfs_lxr_curly_end(); Gfs_lxr_curly_end() {}
|
||||
}
|
||||
class Gfs_lxr_equal implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_eq;}
|
||||
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
|
||||
ctx.Make_nde(bgn, end).Op_tid_(Gfs_nde.Op_tid_assign);
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_equal Instance = new Gfs_lxr_equal(); Gfs_lxr_equal() {}
|
||||
}
|
||||
class Gfs_lxr_comma implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_comma;}
|
||||
public int Process(Gfs_parser_ctx ctx, int bgn, int end) {
|
||||
switch (ctx.Prv_lxr()) {
|
||||
case Gfs_lxr_.Tid_identifier: ctx.Make_atr_by_idf(); break; // 123,
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_comma Instance = new Gfs_lxr_comma(); Gfs_lxr_comma() {}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,25 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
|
||||
class Gfs_lxr_ {
|
||||
public static final int Rv_init = -1, Rv_null = -2, Rv_eos = -3, Rv_lxr = -4;
|
||||
public static final int Tid_identifier = 1, Tid_dot = 2, Tid_semic = 3, Tid_paren_bgn = 4, Tid_paren_end = 5, Tid_curly_bgn = 6, Tid_curly_end = 7, Tid_quote = 8, Tid_comma = 9, Tid_whitespace = 10, Tid_comment = 11, Tid_eq = 12;
|
||||
public static String Tid__name(int tid) {
|
||||
switch (tid) {
|
||||
case Tid_identifier: return "identifier";
|
||||
case Tid_dot: return "dot";
|
||||
case Tid_semic: return "semic";
|
||||
case Tid_paren_bgn: return "paren_bgn";
|
||||
case Tid_paren_end: return "paren_end";
|
||||
case Tid_curly_bgn: return "curly_bgn";
|
||||
case Tid_curly_end: return "curly_end";
|
||||
case Tid_quote: return "quote";
|
||||
case Tid_comma: return "comma";
|
||||
case Tid_whitespace: return "whitespace";
|
||||
case Tid_comment: return "comment";
|
||||
case Tid_eq: return "eq";
|
||||
default: throw Err_.new_unhandled(tid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,39 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.gfo_regys.*;
|
||||
public class Gfs_msg_bldr implements GfoMsgParser {
|
||||
private final Object thread_lock = new Object();
|
||||
private final Gfs_parser parser = new Gfs_parser();
|
||||
public GfoMsg ParseToMsg(String s) {return Bld(s);}
|
||||
public GfoMsg Bld(String src) {return Bld(Bry_.new_u8(src));}
|
||||
public GfoMsg Bld(byte[] src) {
|
||||
synchronized (thread_lock) { // LOCK:Gfs_parser called when converting messages in Xow_msg_mgr; DATE:2016-10-18
|
||||
Gfs_nde nde = parser.Parse(src);
|
||||
return Bld_msg(src, nde);
|
||||
}
|
||||
}
|
||||
private GfoMsg Bld_msg(byte[] src, Gfs_nde nde) {
|
||||
boolean op_is_assign = (nde.Op_tid() == Gfs_nde.Op_tid_assign);
|
||||
String name = String_.new_u8(nde.Name_bry(src));
|
||||
if (op_is_assign) name += Tkn_mutator;
|
||||
GfoMsg rv = GfoMsg_.new_parse_(name);
|
||||
int len = nde.Atrs_len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Gfs_nde atr = nde.Atrs_get_at(i);
|
||||
rv.Add("", String_.new_u8(atr.Name_bry(src)));
|
||||
}
|
||||
len = nde.Subs_len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Gfs_nde sub = nde.Subs_get_at(i);
|
||||
if (op_is_assign) // NOTE: for now (a) assignss cannot be nested; EX: "a.b = c;" is okay but "a.b = c.d;" is not
|
||||
rv.Add("", Bld_msg(src, sub).Key());
|
||||
else
|
||||
rv.Subs_add(Bld_msg(src, sub));
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static final Gfs_msg_bldr Instance = new Gfs_msg_bldr(); Gfs_msg_bldr() {} // TS.static
|
||||
public static final String Tkn_mutator = "_";
|
||||
}
|
||||
|
||||
@@ -13,3 +13,62 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*; import gplx.core.strings.*;
|
||||
public class Gfs_msg_bldr_tst {
|
||||
@Before public void init() {fxt.Clear();} Gfs_msg_bldr_fxt fxt = new Gfs_msg_bldr_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test_build("a;", fxt.msg_("a"));
|
||||
}
|
||||
@Test public void Dot() {
|
||||
fxt.Test_build("a.b.c;"
|
||||
, fxt.msg_("a").Subs_
|
||||
( fxt.msg_("b").Subs_
|
||||
( fxt.msg_("c")
|
||||
)));
|
||||
}
|
||||
@Test public void Args() {
|
||||
fxt.Test_build("a('b', 'c');", fxt.msg_("a", fxt.kv_("", "b"), fxt.kv_("", "c")));
|
||||
}
|
||||
@Test public void Args_num() {
|
||||
fxt.Test_build("a(1);", fxt.msg_("a", fxt.kv_("", "1")));
|
||||
}
|
||||
@Test public void Assign() {
|
||||
fxt.Test_build("a = 'b';", fxt.msg_("a_", fxt.kv_("", "b")));
|
||||
}
|
||||
@Test public void Assign_num() {
|
||||
fxt.Test_build("a = 1;", fxt.msg_("a_", fxt.kv_("", "1")));
|
||||
}
|
||||
}
|
||||
class Gfs_msg_bldr_fxt {
|
||||
public void Clear() {} String_bldr sb = String_bldr_.new_(); Gfs_msg_bldr msg_bldr = Gfs_msg_bldr.Instance;
|
||||
public Keyval kv_(String key, String val) {return Keyval_.new_(key, val);}
|
||||
public GfoMsg msg_(String key, Keyval... args) {
|
||||
GfoMsg rv = GfoMsg_.new_parse_(key);
|
||||
int len = args.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
Keyval kv = args[i];
|
||||
rv.Add(kv.Key(), kv.Val());
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public void Test_build(String raw, GfoMsg... expd) {
|
||||
GfoMsg root = msg_bldr.Bld(raw);
|
||||
Tfds.Eq_str_lines(Xto_str(expd), Xto_str(To_ary(root)));
|
||||
}
|
||||
GfoMsg[] To_ary(GfoMsg msg) {
|
||||
int len = msg.Subs_count();
|
||||
GfoMsg[] rv = new GfoMsg[len];
|
||||
for (int i = 0; i < len; i++)
|
||||
rv[i] = msg.Subs_getAt(i);
|
||||
return rv;
|
||||
}
|
||||
String Xto_str(GfoMsg[] ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (i != 0) sb.Add_char_crlf();
|
||||
sb.Add(ary[i].To_str());
|
||||
}
|
||||
return sb.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,71 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
|
||||
public class Gfs_nde {
|
||||
public byte[] Name_bry(byte[] src) {return name == null ? Bry_.Mid(src, name_bgn, name_end) : name;}
|
||||
public byte[] Name() {return name;} public Gfs_nde Name_(byte[] v) {name = v; return this;} private byte[] name;
|
||||
public int Name_bgn() {return name_bgn;} private int name_bgn = -1;
|
||||
public int Name_end() {return name_end;} private int name_end = -1;
|
||||
public Gfs_nde Name_rng_(int name_bgn, int name_end) {this.name_bgn = name_bgn; this.name_end = name_end; return this;}
|
||||
public byte Op_tid() {return op_tid;} public Gfs_nde Op_tid_(byte v) {op_tid = v; return this;} private byte op_tid;
|
||||
public void Subs_clear() {
|
||||
for (int i = 0; i < subs_len; i++)
|
||||
subs[i] = null;
|
||||
subs_len = 0;
|
||||
}
|
||||
public int Subs_len() {return subs_len;} private int subs_len;
|
||||
public Gfs_nde Subs_add_many(Gfs_nde... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++)
|
||||
Subs_add(ary[i]);
|
||||
return this;
|
||||
}
|
||||
public Gfs_nde Subs_add(Gfs_nde nde) {
|
||||
int new_len = subs_len + 1;
|
||||
if (new_len > subs_max) { // ary too small >>> expand
|
||||
subs_max = new_len * 2;
|
||||
Gfs_nde[] new_subs = new Gfs_nde[subs_max];
|
||||
Array_.Copy_to(subs, 0, new_subs, 0, subs_len);
|
||||
subs = new_subs;
|
||||
}
|
||||
subs[subs_len] = nde;
|
||||
subs_len = new_len;
|
||||
return this;
|
||||
} Gfs_nde[] subs = Gfs_nde.Ary_empty; int subs_max; int[] subs_pos_ary = Int_ary_.Empty;
|
||||
public Gfs_nde Subs_get_at(int i) {return subs[i];}
|
||||
public Gfs_nde[] Subs_to_ary() {
|
||||
Gfs_nde[] rv = new Gfs_nde[subs_len];
|
||||
for (int i = 0; i < subs_len; i++)
|
||||
rv[i] = subs[i];
|
||||
return rv;
|
||||
}
|
||||
public int Atrs_len() {return args_len;} private int args_len;
|
||||
public Gfs_nde Atrs_get_at(int i) {return args[i];}
|
||||
public Gfs_nde Atrs_add_many(Gfs_nde... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++)
|
||||
Atrs_add(ary[i]);
|
||||
return this;
|
||||
}
|
||||
public Gfs_nde Atrs_add(Gfs_nde nde) {
|
||||
int new_len = args_len + 1;
|
||||
if (new_len > args_max) { // ary too small >>> expand
|
||||
args_max = new_len * 2;
|
||||
Gfs_nde[] new_args = new Gfs_nde[args_max];
|
||||
Array_.Copy_to(args, 0, new_args, 0, args_len);
|
||||
args = new_args;
|
||||
}
|
||||
args[args_len] = nde;
|
||||
args_len = new_len;
|
||||
return this;
|
||||
} Gfs_nde[] args = Gfs_nde.Ary_empty; int args_max; int[] args_pos_ary = Int_ary_.Empty;
|
||||
public Gfs_nde[] Atrs_to_ary() {
|
||||
Gfs_nde[] rv = new Gfs_nde[args_len];
|
||||
for (int i = 0; i < args_len; i++)
|
||||
rv[i] = args[i];
|
||||
return rv;
|
||||
}
|
||||
public static final Gfs_nde[] Ary_empty = new Gfs_nde[0];
|
||||
public static final byte Op_tid_null = 0, Op_tid_assign = 1;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,91 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Gfs_parser {
|
||||
private final Btrie_fast_mgr trie = Gfs_parser_.trie_();
|
||||
private final Gfs_parser_ctx ctx = new Gfs_parser_ctx();
|
||||
public Gfs_nde Parse(byte[] src) {
|
||||
ctx.Root().Subs_clear();
|
||||
int src_len = src.length; if (src_len == 0) return ctx.Root();
|
||||
ctx.Init(trie, src, src_len);
|
||||
int pos = 0;
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
Object o = trie.Match_at_w_b0(ctx.Trie_rv(), b, src, pos, src_len);
|
||||
if (o == null)
|
||||
ctx.Err_mgr().Fail_unknown_char(ctx, pos, b);
|
||||
else {
|
||||
Gfs_lxr lxr = (Gfs_lxr)o;
|
||||
while (lxr != null) {
|
||||
int rslt = lxr.Process(ctx, pos, ctx.Trie_rv().Pos());
|
||||
switch (lxr.Lxr_tid()) {
|
||||
case Gfs_lxr_.Tid_whitespace: break;
|
||||
case Gfs_lxr_.Tid_comment: break;
|
||||
default: ctx.Prv_lxr_(lxr.Lxr_tid()); break;
|
||||
}
|
||||
switch (rslt) {
|
||||
case Gfs_lxr_.Rv_lxr:
|
||||
pos = ctx.Nxt_pos();
|
||||
lxr = ctx.Nxt_lxr();
|
||||
break;
|
||||
case Gfs_lxr_.Rv_eos:
|
||||
pos = src_len;
|
||||
lxr = null;
|
||||
break;
|
||||
default:
|
||||
pos = rslt;
|
||||
lxr = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
switch (ctx.Prv_lxr()) {
|
||||
case Gfs_lxr_.Tid_curly_end:
|
||||
case Gfs_lxr_.Tid_semic: break;
|
||||
default: ctx.Err_mgr().Fail_eos(ctx); break;
|
||||
}
|
||||
return ctx.Root();
|
||||
}
|
||||
}
|
||||
class Gfs_parser_ {
|
||||
public static Btrie_fast_mgr trie_() {
|
||||
Btrie_fast_mgr rv = Btrie_fast_mgr.ci_a7(); // NOTE:ci.ascii:gfs;letters/symbols only;
|
||||
Gfs_lxr_identifier word_lxr = Gfs_lxr_identifier.Instance;
|
||||
trie_add_rng(rv, word_lxr, Byte_ascii.Ltr_a, Byte_ascii.Ltr_z);
|
||||
trie_add_rng(rv, word_lxr, Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z);
|
||||
trie_add_rng(rv, word_lxr, Byte_ascii.Num_0, Byte_ascii.Num_9);
|
||||
rv.Add(Byte_ascii.Underline, word_lxr);
|
||||
trie_add_many(rv, Gfs_lxr_whitespace.Instance, Byte_ascii.Space, Byte_ascii.Nl, Byte_ascii.Cr, Byte_ascii.Tab);
|
||||
trie_add_quote(rv, new byte[] {Byte_ascii.Apos});
|
||||
trie_add_quote(rv, new byte[] {Byte_ascii.Quote});
|
||||
trie_add_quote(rv, Bry_.new_a7("<:[\"\n"), Bry_.new_a7("\n\"]:>"));
|
||||
trie_add_quote(rv, Bry_.new_a7("<:['\n"), Bry_.new_a7("\n']:>"));
|
||||
trie_add_quote(rv, Bry_.new_a7("<:{'"), Bry_.new_a7("'}:>"));
|
||||
trie_add_comment(rv, new byte[] {Byte_ascii.Slash, Byte_ascii.Slash}, new byte[] {Byte_ascii.Nl});
|
||||
trie_add_comment(rv, new byte[] {Byte_ascii.Slash, Byte_ascii.Star}, new byte[] {Byte_ascii.Star, Byte_ascii.Slash});
|
||||
rv.Add(Byte_ascii.Semic, Gfs_lxr_semic.Instance);
|
||||
rv.Add(Byte_ascii.Paren_bgn, Gfs_lxr_paren_bgn.Instance);
|
||||
rv.Add(Byte_ascii.Paren_end, Gfs_lxr_paren_end.Instance);
|
||||
rv.Add(Byte_ascii.Curly_bgn, Gfs_lxr_curly_bgn.Instance);
|
||||
rv.Add(Byte_ascii.Curly_end, Gfs_lxr_curly_end.Instance);
|
||||
rv.Add(Byte_ascii.Dot, Gfs_lxr_dot.Instance);
|
||||
rv.Add(Byte_ascii.Comma, Gfs_lxr_comma.Instance);
|
||||
rv.Add(Byte_ascii.Eq, Gfs_lxr_equal.Instance);
|
||||
return rv;
|
||||
}
|
||||
private static void trie_add_rng(Btrie_fast_mgr trie, Gfs_lxr lxr, byte bgn, byte end) {
|
||||
for (byte b = bgn; b <= end; b++)
|
||||
trie.Add(b, lxr);
|
||||
}
|
||||
private static void trie_add_many(Btrie_fast_mgr trie, Gfs_lxr lxr, byte... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++)
|
||||
trie.Add(ary[i], lxr);
|
||||
}
|
||||
private static void trie_add_quote(Btrie_fast_mgr trie, byte[] bgn) {trie_add_quote(trie, bgn, bgn);}
|
||||
private static void trie_add_quote(Btrie_fast_mgr trie, byte[] bgn, byte[] end) {trie.Add(bgn, new Gfs_lxr_quote(bgn, end));}
|
||||
private static void trie_add_comment(Btrie_fast_mgr trie, byte[] bgn, byte[] end) {trie.Add(bgn, new Gfs_lxr_comment_flat(bgn, end));}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,113 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.btries.*;
|
||||
class Gfs_parser_ctx {
|
||||
public Btrie_fast_mgr Trie() {return trie;} Btrie_fast_mgr trie;
|
||||
public Btrie_rv Trie_rv() {return trie_rv;} private final Btrie_rv trie_rv = new Btrie_rv();
|
||||
public Gfs_nde Root() {return root;} Gfs_nde root = new Gfs_nde();
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Src_len() {return src_len;} private int src_len;
|
||||
public int Prv_lxr() {return prv_lxr;} public Gfs_parser_ctx Prv_lxr_(int v) {prv_lxr = v; return this;} private int prv_lxr;
|
||||
public Gfs_nde Cur_nde() {return cur_nde;} Gfs_nde cur_nde;
|
||||
public int Nxt_pos() {return nxt_pos;} private int nxt_pos;
|
||||
public Gfs_lxr Nxt_lxr() {return nxt_lxr;} Gfs_lxr nxt_lxr;
|
||||
public Bry_bfr Tmp_bfr() {return tmp_bfr;} private Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
public void Process_eos() {}
|
||||
public void Process_lxr(int nxt_pos, Gfs_lxr nxt_lxr) {this.nxt_pos = nxt_pos; this.nxt_lxr = nxt_lxr;}
|
||||
public void Process_null(int cur_pos) {this.nxt_pos = cur_pos; this.nxt_lxr = null;}
|
||||
public void Init(Btrie_fast_mgr trie, byte[] src, int src_len) {
|
||||
this.trie = trie; this.src = src; this.src_len = src_len;
|
||||
cur_nde = root;
|
||||
Stack_add();
|
||||
}
|
||||
public void Hold_word(int bgn, int end) {
|
||||
cur_idf_bgn = bgn;
|
||||
cur_idf_end = end;
|
||||
} int cur_idf_bgn = -1, cur_idf_end = -1;
|
||||
private void Held_word_clear() {cur_idf_bgn = -1; cur_idf_end = -1;}
|
||||
public Gfs_nde Make_nde(int tkn_bgn, int tkn_end) { // "abc."; "abc("; "abc;"; "abc{"
|
||||
Gfs_nde nde = new Gfs_nde().Name_rng_(cur_idf_bgn, cur_idf_end);
|
||||
this.Held_word_clear();
|
||||
cur_nde.Subs_add(nde);
|
||||
cur_nde = nde;
|
||||
return nde;
|
||||
}
|
||||
public void Make_atr_by_idf() {Make_atr(cur_idf_bgn, cur_idf_end); Held_word_clear();}
|
||||
public void Make_atr_by_bry(int bgn, int end, byte[] bry) {Make_atr(bgn, end).Name_(bry);}
|
||||
public Gfs_nde Make_atr(int bgn, int end) {
|
||||
Gfs_nde nde = new Gfs_nde().Name_rng_(bgn, end);
|
||||
cur_nde.Atrs_add(nde);
|
||||
return nde;
|
||||
}
|
||||
public void Cur_nde_from_stack() {cur_nde = (Gfs_nde)nodes.Get_at_last();}
|
||||
public void Stack_add() {nodes.Add(cur_nde);} List_adp nodes = List_adp_.New();
|
||||
public void Stack_pop(int pos) {
|
||||
if (nodes.Count() < 2) err_mgr.Fail_nde_stack_empty(this, pos); // NOTE: need at least 2 items; 1 to pop and 1 to set as current
|
||||
List_adp_.Del_at_last(nodes);
|
||||
Cur_nde_from_stack();
|
||||
}
|
||||
public Gfs_err_mgr Err_mgr() {return err_mgr;} Gfs_err_mgr err_mgr = new Gfs_err_mgr();
|
||||
}
|
||||
class Gfs_err_mgr {
|
||||
public void Fail_eos(Gfs_parser_ctx ctx) {Fail(ctx, Fail_msg_eos, ctx.Src_len());}
|
||||
public void Fail_unknown_char(Gfs_parser_ctx ctx, int pos, byte c) {Fail(ctx, Fail_msg_unknown_char, pos, Keyval_.new_("char", Char_.To_str((char)c)));}
|
||||
public void Fail_nde_stack_empty(Gfs_parser_ctx ctx, int pos) {Fail(ctx, Fail_msg_nde_stack_empty, pos);}
|
||||
public void Fail_invalid_lxr(Gfs_parser_ctx ctx, int pos, int lxr_tid, byte c) {
|
||||
Fail(ctx, Fail_msg_invalid_lxr, pos, Keyval_.new_("char", Char_.To_str((char)c)), Keyval_.new_("cur_lxr", Gfs_lxr_.Tid__name(lxr_tid)), Keyval_.new_("prv_lxr", Gfs_lxr_.Tid__name(ctx.Prv_lxr())));
|
||||
}
|
||||
private void Fail(Gfs_parser_ctx ctx, String msg, int pos, Keyval... args) {
|
||||
byte[] src = ctx.Src(); int src_len = ctx.Src_len();
|
||||
Fail_args_standard(src, src_len, pos);
|
||||
int len = args.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
Keyval arg = args[i];
|
||||
tmp_fail_args.Add(arg.Key(), arg.Val_to_str_or_empty());
|
||||
}
|
||||
throw Err_.new_wo_type(Fail_msg(msg, tmp_fail_args));
|
||||
}
|
||||
private void Fail_args_standard(byte[] src, int src_len, int pos) {
|
||||
tmp_fail_args.Add("excerpt_bgn", Fail_excerpt_bgn(src, src_len, pos));
|
||||
tmp_fail_args.Add("excerpt_end", Fail_excerpt_end(src, src_len, pos));
|
||||
tmp_fail_args.Add("pos" , pos);
|
||||
}
|
||||
public static final String Fail_msg_invalid_lxr = "invalid character", Fail_msg_unknown_char = "unknown char", Fail_msg_eos = "end of stream", Fail_msg_nde_stack_empty = "node stack empty";
|
||||
String Fail_msg(String type, Keyval_list fail_args) {
|
||||
tmp_fail_bfr.Add_str_u8(type).Add_byte(Byte_ascii.Colon);
|
||||
int len = fail_args.Count();
|
||||
for (int i = 0; i < len; i++) {
|
||||
tmp_fail_bfr.Add_byte(Byte_ascii.Space);
|
||||
Keyval kv = fail_args.Get_at(i);
|
||||
tmp_fail_bfr.Add_str_u8(kv.Key());
|
||||
tmp_fail_bfr.Add_byte(Byte_ascii.Eq).Add_byte(Byte_ascii.Apos);
|
||||
tmp_fail_bfr.Add_str_u8(kv.Val_to_str_or_empty()).Add_byte(Byte_ascii.Apos);
|
||||
}
|
||||
return tmp_fail_bfr.To_str_and_clear();
|
||||
}
|
||||
Bry_bfr tmp_fail_bfr = Bry_bfr_.Reset(255);
|
||||
Keyval_list tmp_fail_args = new Keyval_list();
|
||||
private static int excerpt_len = 50;
|
||||
String Fail_excerpt_bgn(byte[] src, int src_len, int pos) {
|
||||
int bgn = pos - excerpt_len; if (bgn < 0) bgn = 0;
|
||||
Fail_excerpt_rng(tmp_fail_bfr, src, bgn, pos);
|
||||
return tmp_fail_bfr.To_str_and_clear();
|
||||
}
|
||||
String Fail_excerpt_end(byte[] src, int src_len, int pos) {
|
||||
int end = pos + excerpt_len; if (end > src_len) end = src_len;
|
||||
Fail_excerpt_rng(tmp_fail_bfr, src, pos, end);
|
||||
return tmp_fail_bfr.To_str_and_clear();
|
||||
}
|
||||
private static void Fail_excerpt_rng(Bry_bfr bfr, byte[] src, int bgn, int end) {
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Tab: bfr.Add(Esc_tab); break;
|
||||
case Byte_ascii.Nl: bfr.Add(Esc_nl); break;
|
||||
case Byte_ascii.Cr: bfr.Add(Esc_cr); break;
|
||||
default: bfr.Add_byte(b); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
private static final byte[] Esc_nl = Bry_.new_a7("\\n"), Esc_cr = Bry_.new_a7("\\r"), Esc_tab = Bry_.new_a7("\\t");
|
||||
}
|
||||
|
||||
@@ -13,3 +13,182 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Gfs_parser_tst {
|
||||
@Before public void init() {fxt.Clear();} Gfs_parser_fxt fxt = new Gfs_parser_fxt();
|
||||
@Test public void Semicolon() {
|
||||
fxt .Test_parse("a;", fxt.nde_("a"));
|
||||
fxt .Test_parse("a;b;c;", fxt.nde_("a"), fxt.nde_("b"), fxt.nde_("c"));
|
||||
fxt .Test_parse("a_0;", fxt.nde_("a_0"));
|
||||
}
|
||||
@Test public void Dot() {
|
||||
fxt .Test_parse("a.b;", fxt.nde_("a").Subs_add(fxt.nde_("b")));
|
||||
fxt .Test_parse("a.b;c.d;", fxt.nde_("a").Subs_add(fxt.nde_("b")), fxt.nde_("c").Subs_add(fxt.nde_("d")));
|
||||
}
|
||||
@Test public void Parens() {
|
||||
fxt .Test_parse("a();b();", fxt.nde_("a"), fxt.nde_("b"));
|
||||
fxt .Test_parse("a().b();c().d();", fxt.nde_("a").Subs_add(fxt.nde_("b")), fxt.nde_("c").Subs_add(fxt.nde_("d")));
|
||||
}
|
||||
@Test public void Num() {
|
||||
fxt .Test_parse("a(1,2);", fxt.nde_("a").Atrs_add_many(fxt.val_("1"), fxt.val_("2")));
|
||||
}
|
||||
@Test public void Quote() {
|
||||
fxt .Test_parse("a('b');", fxt.nde_("a").Atrs_add(fxt.val_("b")));
|
||||
}
|
||||
@Test public void Quote_escaped() {
|
||||
fxt .Test_parse("a('b''c''d');", fxt.nde_("a").Atrs_add(fxt.val_("b'c'd")));
|
||||
}
|
||||
@Test public void Quote_escaped_2() {
|
||||
fxt .Test_parse("a('a''''b');", fxt.nde_("a").Atrs_add(fxt.val_("a''b")));
|
||||
}
|
||||
@Test public void Quote_mixed() {
|
||||
fxt .Test_parse("a('b\"c');", fxt.nde_("a").Atrs_add(fxt.val_("b\"c")));
|
||||
}
|
||||
@Test public void Comma() {
|
||||
fxt .Test_parse("a('b','c','d');", fxt.nde_("a").Atrs_add_many(fxt.val_("b"), fxt.val_("c"), fxt.val_("d")));
|
||||
}
|
||||
@Test public void Ws() {
|
||||
fxt .Test_parse(" a ( 'b' , 'c' ) ; ", fxt.nde_("a").Atrs_add_many(fxt.val_("b"), fxt.val_("c")));
|
||||
}
|
||||
@Test public void Comment_slash_slash() {
|
||||
fxt .Test_parse("//z\na;//y\n", fxt.nde_("a"));
|
||||
}
|
||||
@Test public void Comment_slash_star() {
|
||||
fxt .Test_parse("/*z*/a;/*y*/", fxt.nde_("a"));
|
||||
}
|
||||
@Test public void Curly() {
|
||||
fxt .Test_parse("a{b;}", fxt.nde_("a").Subs_add(fxt.nde_("b")));
|
||||
}
|
||||
@Test public void Curly_nest() {
|
||||
fxt .Test_parse("a{b{c{d;}}}"
|
||||
, fxt.nde_("a").Subs_add
|
||||
( fxt.nde_("b").Subs_add
|
||||
( fxt.nde_("c").Subs_add
|
||||
( fxt.nde_("d")
|
||||
))));
|
||||
}
|
||||
@Test public void Curly_nest_peers() {
|
||||
fxt .Test_parse(String_.Concat_lines_nl
|
||||
( "a{"
|
||||
, " a0{"
|
||||
, " a00{"
|
||||
, " a000;"
|
||||
, " }"
|
||||
, " a01;"
|
||||
, " }"
|
||||
, " a1;"
|
||||
, "}"
|
||||
)
|
||||
, fxt.nde_("a").Subs_add_many
|
||||
( fxt.nde_("a0").Subs_add_many
|
||||
( fxt.nde_("a00").Subs_add
|
||||
( fxt.nde_("a000")
|
||||
)
|
||||
, fxt.nde_("a01")
|
||||
)
|
||||
, fxt.nde_("a1")
|
||||
));
|
||||
}
|
||||
@Test public void Curly_dot() {
|
||||
fxt .Test_parse("a{a0.a00;a1.a10;}"
|
||||
, fxt.nde_("a").Subs_add_many
|
||||
( fxt.nde_("a0").Subs_add_many(fxt.nde_("a00"))
|
||||
, fxt.nde_("a1").Subs_add_many(fxt.nde_("a10"))
|
||||
));
|
||||
}
|
||||
@Test public void Eq() {
|
||||
fxt .Test_parse("a='b';", fxt.nde_("a").Atrs_add(fxt.val_("b")));
|
||||
fxt .Test_parse("a.b.c='d';"
|
||||
, fxt.nde_("a").Subs_add
|
||||
( fxt.nde_("b").Subs_add_many
|
||||
( fxt.nde_("c").Atrs_add(fxt.val_("d"))
|
||||
)));
|
||||
fxt .Test_parse("a.b{c='d'; e='f'}"
|
||||
, fxt.nde_("a").Subs_add
|
||||
( fxt.nde_("b").Subs_add_many
|
||||
( fxt.nde_("c").Atrs_add(fxt.val_("d"))
|
||||
, fxt.nde_("e").Atrs_add(fxt.val_("f"))
|
||||
)));
|
||||
}
|
||||
@Test public void Curly_nest_peers2() {
|
||||
fxt .Test_parse(String_.Concat_lines_nl
|
||||
( "a() {"
|
||||
, " k1 = 'v1';"
|
||||
, "}"
|
||||
)
|
||||
, fxt.nde_("a").Subs_add_many
|
||||
( fxt.nde_("k1").Atrs_add(fxt.val_("v1"))
|
||||
)
|
||||
);
|
||||
}
|
||||
@Test public void Fail() {
|
||||
fxt .Test_parse_fail("a(.);", Gfs_err_mgr.Fail_msg_invalid_lxr); // (.)
|
||||
fxt .Test_parse_fail("a..b;", Gfs_err_mgr.Fail_msg_invalid_lxr); // ..
|
||||
fxt .Test_parse_fail("a.;", Gfs_err_mgr.Fail_msg_invalid_lxr); // .;
|
||||
fxt .Test_parse_fail("a", Gfs_err_mgr.Fail_msg_eos); // eos
|
||||
fxt .Test_parse_fail("a;~;", Gfs_err_mgr.Fail_msg_unknown_char); // ~
|
||||
}
|
||||
}
|
||||
class Gfs_parser_fxt {
|
||||
public void Clear() {}
|
||||
public Gfs_nde nde_(String v) {return new Gfs_nde().Name_(Bry_.new_a7(v));}
|
||||
public Gfs_nde val_(String v) {return new Gfs_nde().Name_(Bry_.new_a7(v));}
|
||||
public void Test_parse(String src_str, Gfs_nde... expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
Gfs_nde root = parser.Parse(src_bry);
|
||||
Tfds.Eq_str_lines(To_str(null, expd), To_str(src_bry, root.Subs_to_ary()));
|
||||
} private Bry_bfr tmp_bfr = Bry_bfr_.New(), path_bfr = Bry_bfr_.New(); Gfs_parser parser = new Gfs_parser();
|
||||
public void Test_parse_fail(String src_str, String expd_err) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
try {parser.Parse(src_bry);}
|
||||
catch (Exception e) {
|
||||
String actl_err = Err_.Message_gplx_full(e);
|
||||
actl_err = String_.GetStrBefore(actl_err, ":");
|
||||
boolean match = String_.Has(actl_err, expd_err);
|
||||
if (!match) Tfds.Fail("expecting '" + expd_err + "' got '" + actl_err + "'");
|
||||
return;
|
||||
}
|
||||
Tfds.Fail("expected to fail with " + expd_err);
|
||||
}
|
||||
public String To_str(byte[] src, Gfs_nde[] expd) {
|
||||
int subs_len = expd.length;
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
path_bfr.Clear().Add_int_variable(i);
|
||||
To_str(tmp_bfr, path_bfr, src, expd[i]);
|
||||
}
|
||||
return tmp_bfr.To_str_and_clear();
|
||||
}
|
||||
public void To_str(Bry_bfr bfr, Bry_bfr path, byte[] src, Gfs_nde nde) {
|
||||
To_str_atr(bfr, path, src, Atr_name, nde.Name(), nde.Name_bgn(), nde.Name_end());
|
||||
int atrs_len = nde.Atrs_len();
|
||||
for (int i = 0; i < atrs_len; i++) {
|
||||
Gfs_nde atr = nde.Atrs_get_at(i);
|
||||
int path_len_old = path.Len();
|
||||
path.Add_byte(Byte_ascii.Dot).Add_byte((byte)(Byte_ascii.Ltr_a + i));
|
||||
int path_len_new = path.Len();
|
||||
To_str(bfr, path, src, atr);
|
||||
path.Del_by(path_len_new - path_len_old);
|
||||
}
|
||||
int subs_len = nde.Subs_len();
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
Gfs_nde sub = nde.Subs_get_at(i);
|
||||
int path_len_old = path.Len();
|
||||
path.Add_byte(Byte_ascii.Dot).Add_int_variable(i);
|
||||
int path_len_new = path.Len();
|
||||
To_str(bfr, path, src, sub);
|
||||
path.Del_by(path_len_new - path_len_old);
|
||||
}
|
||||
}
|
||||
private void To_str_atr(Bry_bfr bfr, Bry_bfr path_bfr, byte[] src, byte[] name, byte[] val, int val_bgn, int val_end) {
|
||||
if (val == null && val_bgn == -1 && val_end == -1) return;
|
||||
bfr.Add_bfr_and_preserve(path_bfr).Add_byte(Byte_ascii.Colon);
|
||||
bfr.Add(name);
|
||||
if (val == null)
|
||||
bfr.Add_mid(src, val_bgn, val_end);
|
||||
else
|
||||
bfr.Add(val);
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
private static final byte[] Atr_name = Bry_.new_a7("name=");
|
||||
}
|
||||
|
||||
@@ -13,3 +13,32 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
|
||||
public class Gfs_wtr {
|
||||
public byte Quote_char() {return quote_char;} public Gfs_wtr Quote_char_(byte v) {quote_char = v; return this;} private byte quote_char = Byte_ascii.Apos;
|
||||
public Bry_bfr Bfr() {return bfr;} private Bry_bfr bfr = Bry_bfr_.Reset(255);
|
||||
public void Add_grp_bgn(byte[] key) {
|
||||
bfr.Add(key); // key
|
||||
bfr.Add_byte(Byte_ascii.Curly_bgn); // {
|
||||
}
|
||||
public void Add_grp_end(byte[] key) {
|
||||
bfr.Add_byte(Byte_ascii.Curly_end); // }
|
||||
}
|
||||
public void Add_set_eq(byte[] key, byte[] val) {
|
||||
bfr.Add(key); // key
|
||||
bfr.Add_byte_eq(); // =
|
||||
bfr.Add_byte(quote_char); // '
|
||||
Write_val(val);
|
||||
bfr.Add_byte(quote_char); // '
|
||||
bfr.Add_byte(Byte_ascii.Semic); // ;
|
||||
}
|
||||
private void Write_val(byte[] bry) {
|
||||
int bry_len = bry.length;
|
||||
for (int i = 0; i < bry_len; i++) {
|
||||
byte b = bry[i];
|
||||
if (b == quote_char) // byte is quote
|
||||
bfr.Add_byte(b); // double up
|
||||
bfr.Add_byte(b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,62 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Gfh_atr_ {
|
||||
public static final byte[]
|
||||
// "coreattrs"
|
||||
Bry__id = Bry_.new_a7("id")
|
||||
, Bry__class = Bry_.new_a7("class")
|
||||
, Bry__style = Bry_.new_a7("style")
|
||||
, Bry__title = Bry_.new_a7("title")
|
||||
// "i18n"
|
||||
, Bry__lang = Bry_.new_a7("lang")
|
||||
, Bry__dir = Bry_.new_a7("dir")
|
||||
// <a>
|
||||
, Bry__href = Bry_.new_a7("href")
|
||||
, Bry__rel = Bry_.new_a7("rel")
|
||||
, Bry__target = Bry_.new_a7("target")
|
||||
// <img>
|
||||
, Bry__alt = Bry_.new_a7("alt")
|
||||
, Bry__src = Bry_.new_a7("src")
|
||||
, Bry__width = Bry_.new_a7("width")
|
||||
, Bry__height = Bry_.new_a7("height")
|
||||
// <table>
|
||||
//, Bry__width = Bry_.new_a7("width")
|
||||
, Bry__cellpadding = Bry_.new_a7("cellpadding")
|
||||
, Bry__cellspacing = Bry_.new_a7("cellspacing")
|
||||
, Bry__summary = Bry_.new_a7("summary") // HTML.ua
|
||||
// <table>.borders_and_rules
|
||||
, Bry__border = Bry_.new_a7("border")
|
||||
, Bry__frames = Bry_.new_a7("frames")
|
||||
, Bry__rules = Bry_.new_a7("rules")
|
||||
// <th>,<td>
|
||||
, Bry__scope = Bry_.new_a7("scope")
|
||||
, Bry__rowspan = Bry_.new_a7("rowspan")
|
||||
, Bry__colspan = Bry_.new_a7("colspan")
|
||||
, Bry__align = Bry_.new_a7("align") // HTML.v4
|
||||
, Bry__bgcolor = Bry_.new_a7("bgcolor") // HTML.v4
|
||||
, Bry__abbr = Bry_.new_a7("abbr") // HTML.ua
|
||||
, Bry__srcset = Bry_.new_a7("srcset")
|
||||
;
|
||||
public static byte[] Make(Bry_bfr bfr, byte[] key, byte[] val) {
|
||||
return bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote().Add(val).Add_byte_quote().To_bry_and_clear();
|
||||
}
|
||||
public static byte[] Add_to_bry(Bry_bfr bfr, byte[] key, byte[] val) {
|
||||
bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote().Add(val).Add_byte_quote();
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
public static void Add(Bry_bfr bfr, byte[] key, byte[] val) {
|
||||
bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote().Add(val).Add_byte_quote();
|
||||
}
|
||||
public static void Add(Bry_bfr bfr, byte[] key, int val) {
|
||||
bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote();
|
||||
bfr.Add_int_variable(val);
|
||||
bfr.Add_byte_quote();
|
||||
}
|
||||
public static void Add_double(Bry_bfr bfr, byte[] key, double val) {
|
||||
bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote();
|
||||
bfr.Add_double(val);
|
||||
bfr.Add_byte_quote();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,28 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Gfh_bldr_ {
|
||||
public static final byte[]
|
||||
Bry__a_lhs_bgn = Bry_.new_a7("<a")
|
||||
, Bry__a_rhs = Bry_.new_a7("</a>")
|
||||
, Bry__a_lhs_w_href = Bry_.new_a7("<a href=\"")
|
||||
, Bry__img_lhs_w_alt = Bry_.new_a7("<img alt=\"")
|
||||
, Bry__img_lhs = Bry_.new_a7("<img")
|
||||
, Bry__div_lhs = Bry_.new_a7("<div")
|
||||
, Bry__div_rhs = Bry_.new_a7("</div>")
|
||||
, Bry__id__1st = Bry_.new_a7(" id=\"")
|
||||
, Bry__id__nth = Bry_.new_a7("\" id=\"")
|
||||
, Bry__cls__1st = Bry_.new_a7(" class=\"")
|
||||
, Bry__cls__nth = Bry_.new_a7("\" class=\"")
|
||||
, Bry__title__nth = Bry_.new_a7("\" title=\"")
|
||||
, Bry__alt__nth = Bry_.new_a7("\" alt=\"")
|
||||
, Bry__src__nth = Bry_.new_a7("\" src=\"")
|
||||
, Bry__width__nth = Bry_.new_a7("\" width=\"")
|
||||
, Bry__height__nth = Bry_.new_a7("\" height=\"")
|
||||
, Bry__lhs_end_head = Bry_.new_a7(">")
|
||||
, Bry__lhs_end_head_w_quote = Bry_.new_a7("\">")
|
||||
, Bry__lhs_end_inline = Bry_.new_a7("/>")
|
||||
, Bry__lhs_end_inline_w_quote = Bry_.new_a7("\"/>")
|
||||
;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,80 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Gfh_nde {
|
||||
public Gfh_nde(byte[] src, boolean tag_tid_is_inline, int tag_lhs_bgn, int tag_lhs_end, int tag_rhs_bgn, int tag_rhs_end, int name_bgn, int name_end, int[] cur_atrs, int atrs_idx) {
|
||||
this.src = src;
|
||||
this.tag_tid_is_inline = tag_tid_is_inline;
|
||||
this.tag_lhs_bgn = tag_lhs_bgn; this.tag_lhs_end = tag_lhs_end; this.tag_rhs_bgn = tag_rhs_bgn; this.tag_rhs_end = tag_rhs_end; this.name_bgn = name_bgn; this.name_end = name_end;
|
||||
if (atrs_idx > 0) {
|
||||
atrs = new int[atrs_idx];
|
||||
for (int i = 0; i < atrs_idx; i++)
|
||||
atrs[i] = cur_atrs[i];
|
||||
atrs_len = atrs_idx / 5;
|
||||
}
|
||||
}
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int[] Atrs() {return atrs;} private int[] atrs = Int_ary_.Empty;
|
||||
public int Atrs_len() {return atrs_len;} private int atrs_len;
|
||||
public boolean Tag_tid_is_inline() {return tag_tid_is_inline;} private boolean tag_tid_is_inline;
|
||||
public int Tag_lhs_bgn() {return tag_lhs_bgn;} public Gfh_nde Tag_lhs_bgn_(int v) {tag_lhs_bgn = v; return this;} private int tag_lhs_bgn;
|
||||
public int Tag_lhs_end() {return tag_lhs_end;} public Gfh_nde Tag_lhs_end_(int v) {tag_lhs_end = v; return this;} private int tag_lhs_end;
|
||||
public int Tag_rhs_bgn() {return tag_rhs_bgn;} public Gfh_nde Tag_rhs_bgn_(int v) {tag_rhs_bgn = v; return this;} private int tag_rhs_bgn;
|
||||
public int Tag_rhs_end() {return tag_rhs_end;} public Gfh_nde Tag_rhs_end_(int v) {tag_rhs_end = v; return this;} private int tag_rhs_end;
|
||||
public int Name_bgn() {return name_bgn;} public Gfh_nde Name_bgn_(int v) {name_bgn = v; return this;} private int name_bgn;
|
||||
public int Name_end() {return name_end;} public Gfh_nde Name_end_(int v) {name_end = v; return this;} private int name_end;
|
||||
public void Clear() {tag_lhs_bgn = tag_rhs_bgn = -1;}
|
||||
public String Atrs_val_by_key_str(String find_key_str) {return String_.new_u8(Atrs_val_by_key_bry(Bry_.new_u8(find_key_str)));}
|
||||
public byte[] Atrs_val_by_key_bry(byte[] find_key_bry) {
|
||||
for (int i = 0; i < atrs_len; i ++) {
|
||||
int atrs_idx = i * 5;
|
||||
int atr_key_bgn = atrs[atrs_idx + 1];
|
||||
int atr_key_end = atrs[atrs_idx + 2];
|
||||
if (Bry_.Match(src, atr_key_bgn, atr_key_end, find_key_bry))
|
||||
return Atrs_vals_by_pos(src, atrs[atrs_idx + 0], atrs[atrs_idx + 3], atrs[atrs_idx + 4]);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
byte[] Atrs_vals_by_pos(byte[] src, int quote_byte, int bgn, int end) {
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
boolean dirty = false;
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Backslash:
|
||||
if (!dirty) {dirty = true; tmp_bfr.Add_mid(src, bgn, i);}
|
||||
++i;
|
||||
tmp_bfr.Add_byte(src[i]);
|
||||
break;
|
||||
default:
|
||||
if (b == quote_byte) {
|
||||
byte next_byte = src[i + 1];
|
||||
if (next_byte == b) {
|
||||
if (!dirty) {dirty = true; tmp_bfr.Add_mid(src, bgn, i);}
|
||||
++i;
|
||||
tmp_bfr.Add_byte(src[i]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (dirty)
|
||||
tmp_bfr.Add_byte(b);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return dirty ? tmp_bfr.To_bry_and_clear() : Bry_.Mid(src, bgn, end);
|
||||
}
|
||||
public byte[] Data(byte[] src) {
|
||||
return Bry_.Mid(src, tag_lhs_end, tag_rhs_bgn);
|
||||
}
|
||||
}
|
||||
// class Xoh_atr {
|
||||
// public byte[] Key_bry() {return key_bry;} private byte[] key_bry;
|
||||
// public byte[] Val_bry() {return val_bry;} private byte[] val_bry;
|
||||
// public int Key_bgn() {return key_bgn;} private int key_bgn;
|
||||
// public int Key_end() {return key_end;} private int key_end;
|
||||
// public int Val_bgn() {return val_bgn;} private int val_bgn;
|
||||
// public int Val_end() {return val_end;} private int val_end;
|
||||
// public byte Val_quote_tid() {return val_quote_tid;} private byte val_quote_tid;
|
||||
// }
|
||||
|
||||
@@ -13,3 +13,151 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.brys.*;
|
||||
public class Gfh_parser {
|
||||
public Gfh_parser() {
|
||||
Bry_bldr bry_bldr = new Bry_bldr();
|
||||
bry_xnde_name = bry_bldr.New_256().Set_rng_xml_identifier(Scan_valid).Set_rng_ws(Scan_stop).Val();
|
||||
bry_atr_key = bry_bldr.New_256().Set_rng_xml_identifier(Scan_valid).Set_rng_ws(Scan_stop).Set_many(Scan_stop, Byte_ascii.Eq).Val();
|
||||
}
|
||||
byte[] src; int pos, end; byte[] bry_xnde_name, bry_atr_key;
|
||||
int cur_atrs_idx = 0; int[] cur_atrs = new int[250];// define max of 50 atrs;
|
||||
public Gfh_nde[] Parse_as_ary(byte[] src) {return Parse_as_ary(src, 0, src.length, Wildcard, Wildcard);}
|
||||
public Gfh_nde[] Parse_as_ary(byte[] src, int bgn, int end) {return Parse_as_ary(src, bgn, end, Wildcard, Wildcard);}
|
||||
public Gfh_nde[] Parse_as_ary(byte[] src, int bgn, int end, byte[] find_key, byte[] find_val) { // flattens html into a list of hndes; only used for Options
|
||||
this.src = src; pos = bgn; this.end = end;
|
||||
List_adp rv = List_adp_.New();
|
||||
while (pos < end) {
|
||||
byte b = src[pos++];
|
||||
switch (b) {
|
||||
case Byte_ascii.Lt:
|
||||
if (xnde_init) {
|
||||
if (Parse_xnde_lhs()) {
|
||||
if (tag_tid_is_inline)
|
||||
rv.Add(new Gfh_nde(src, tag_tid_is_inline, cur_lhs_bgn, cur_lhs_end, cur_rhs_bgn, pos, cur_name_bgn, cur_name_end, cur_atrs, cur_atrs_idx));
|
||||
else
|
||||
xnde_init = false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (Parse_xnde_rhs()) {
|
||||
rv.Add(new Gfh_nde(src, tag_tid_is_inline, cur_lhs_bgn, cur_lhs_end, cur_rhs_bgn, pos, cur_name_bgn, cur_name_end, cur_atrs, cur_atrs_idx));
|
||||
}
|
||||
xnde_init = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (Gfh_nde[])rv.To_ary(Gfh_nde.class);
|
||||
}
|
||||
int cur_lhs_bgn, cur_lhs_end, cur_name_bgn, cur_name_end, cur_rhs_bgn; boolean xnde_init = true, tag_tid_is_inline = false;
|
||||
private boolean Parse_xnde_rhs() {
|
||||
cur_rhs_bgn = pos - 1; // -1 b/c "<" is already read
|
||||
byte b = src[pos];
|
||||
if (b != Byte_ascii.Slash) return false;
|
||||
++pos;
|
||||
int name_len = cur_name_end - cur_name_bgn;
|
||||
if (pos + name_len >= end) return false;
|
||||
if (!Bry_.Match(src, pos, pos + name_len, src, cur_name_bgn, cur_name_end)) return false;
|
||||
pos += name_len;
|
||||
if (src[pos] != Byte_ascii.Gt) return false;
|
||||
++pos;
|
||||
return true;
|
||||
}
|
||||
private boolean Parse_xnde_lhs() {
|
||||
cur_atrs_idx = 0;
|
||||
cur_lhs_bgn = pos - 1;
|
||||
cur_name_bgn = pos;
|
||||
tag_tid_is_inline = false;
|
||||
byte rslt = Skip_while_valid(this.bry_atr_key);
|
||||
if (rslt == Scan_invalid) return false;
|
||||
cur_name_end = pos;
|
||||
int key_bgn, key_end, val_bgn, quote_type;
|
||||
while (true) {
|
||||
if (pos >= end) return false;
|
||||
key_bgn = key_end = val_bgn = quote_type = -1;
|
||||
Skip_ws();
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Slash) {
|
||||
++pos;
|
||||
if (pos == end) return false;
|
||||
byte next = src[pos];
|
||||
if (next == Byte_ascii.Gt) {
|
||||
tag_tid_is_inline = true;
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
else return false; // NOTE: don't consume byte b/c false
|
||||
}
|
||||
else if (b == Byte_ascii.Gt) {
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
key_bgn = pos;
|
||||
rslt = Skip_while_valid(this.bry_atr_key);
|
||||
if (rslt == Scan_invalid) return false;
|
||||
key_end = pos;
|
||||
Skip_ws();
|
||||
if (src[pos++] != Byte_ascii.Eq) return false;
|
||||
Skip_ws();
|
||||
byte quote_byte = src[pos];
|
||||
switch (quote_byte) {
|
||||
case Byte_ascii.Quote: quote_type = quote_byte; break;
|
||||
case Byte_ascii.Apos: quote_type = quote_byte; break;
|
||||
default: return false;
|
||||
}
|
||||
val_bgn = ++pos; // ++pos: start val after quote
|
||||
if (!Skip_to_quote_end(quote_byte)) return false;
|
||||
cur_atrs[cur_atrs_idx + 0] = quote_type;
|
||||
cur_atrs[cur_atrs_idx + 1] = key_bgn;
|
||||
cur_atrs[cur_atrs_idx + 2] = key_end;
|
||||
cur_atrs[cur_atrs_idx + 3] = val_bgn;
|
||||
cur_atrs[cur_atrs_idx + 4] = pos - 1; // NOTE: Skip_to_quote_end positions after quote
|
||||
cur_atrs_idx += 5;
|
||||
}
|
||||
cur_lhs_end = pos;
|
||||
return true;
|
||||
}
|
||||
private void Skip_ws() {
|
||||
while (pos < end) {
|
||||
switch (src[pos]) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
|
||||
++pos;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
boolean Skip_to_quote_end(byte v) {
|
||||
while (pos < end) {
|
||||
byte b = src[pos++];
|
||||
if (b == v) {
|
||||
if (pos == end) return false;
|
||||
byte next = src[pos];
|
||||
if (next != v) return true;
|
||||
else ++pos;
|
||||
}
|
||||
else if (b == Byte_ascii.Backslash) {
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
byte Skip_while_valid(byte[] comp) {
|
||||
while (pos < end) {
|
||||
byte rv = comp[src[pos]];
|
||||
if (rv == Scan_valid)
|
||||
++pos;
|
||||
else
|
||||
return rv;
|
||||
}
|
||||
return Scan_invalid;
|
||||
}
|
||||
private static final byte Scan_invalid = 0, Scan_valid = 1, Scan_stop = 2;
|
||||
public static final byte[] Wildcard = null;
|
||||
public static final String Wildcard_str = null;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,39 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Gfh_parser_tst {
|
||||
@Before public void init() {fxt.Clear();} private Gfh_parser_fxt fxt = new Gfh_parser_fxt();
|
||||
@Test public void One() {fxt.Test_parse_find_all("<a id='id0'></a>", "id0");}
|
||||
@Test public void Many() {fxt.Test_parse_find_all("<a id='id0'></a><a id='id1'></a><a id='id2'></a>", "id0", "id1", "id2");}
|
||||
@Test public void Inline() {fxt.Test_parse_find_all("<a id='id0'/>", "id0");}
|
||||
@Test public void Mix() {fxt.Test_parse_find_all("012<a id='id0'></a>id=id2<a id='id1'/>345<a id='id2'></a>abc", "id0", "id1", "id2");}
|
||||
@Test public void Quote_double() {fxt.Test_parse_find_all("<a id='id''0'/>", "id'0");}
|
||||
@Test public void Quote_escape() {fxt.Test_parse_find_all("<a id='id\\'0'/>", "id'0");}
|
||||
}
|
||||
class Gfh_parser_fxt {
|
||||
public void Clear() {
|
||||
if (parser == null) {
|
||||
parser = new Gfh_parser();
|
||||
}
|
||||
} private Gfh_parser parser;
|
||||
public Gfh_parser_fxt Test_parse_find_all(String raw_str, String... expd) {return Test_parse_find(raw_str, Gfh_parser.Wildcard_str, Gfh_parser.Wildcard_str, expd);}
|
||||
public Gfh_parser_fxt Test_parse_find(String raw_str, String find_key, String find_val, String... expd) {
|
||||
byte[] raw = Bry_.new_a7(raw_str);
|
||||
Gfh_nde[] actl_ndes = parser.Parse_as_ary(raw, 0, raw.length, Bry_.new_a7(find_key), Bry_.new_a7(find_val));
|
||||
String[] actl = Xto_ids(raw, actl_ndes);
|
||||
Tfds.Eq_ary_str(expd, actl);
|
||||
return this;
|
||||
}
|
||||
private String[] Xto_ids(byte[] src, Gfh_nde[] ary) {
|
||||
int len = ary.length;
|
||||
String[] rv = new String[len];
|
||||
for (int i = 0; i < len; i++) {
|
||||
Gfh_nde itm = ary[i];
|
||||
String atr_val = itm.Atrs_val_by_key_str("id");
|
||||
rv[i] = atr_val;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,26 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Gfh_selecter {
|
||||
public static Gfh_nde[] Select(byte[] src, Gfh_nde[] ary, Hash_adp_bry hash) {
|
||||
List_adp list = List_adp_.New();
|
||||
int xndes_len = ary.length;
|
||||
for (int i = 0; i < xndes_len; i++) {
|
||||
Gfh_nde hnde = ary[i];
|
||||
int[] atrs = hnde.Atrs();
|
||||
int atrs_len = atrs.length;
|
||||
for (int j = 0; j < atrs_len; j += 5) {
|
||||
int atr_key_bgn = atrs[j + 1];
|
||||
int atr_key_end = atrs[j + 2];
|
||||
if (hash.Get_by_mid(src, atr_key_bgn, atr_key_end) != null) {
|
||||
list.Add(hnde);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Gfh_nde[] rv = (Gfh_nde[])list.To_ary(Gfh_nde.class);
|
||||
list.Clear();
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,273 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
public static final int
|
||||
Id__comment = -3
|
||||
, Id__eos = -2
|
||||
, Id__any = -1
|
||||
, Id__unknown = 0
|
||||
, Id__h1 = 1
|
||||
, Id__h2 = 2
|
||||
, Id__h3 = 3
|
||||
, Id__h4 = 4
|
||||
, Id__h5 = 5
|
||||
, Id__h6 = 6
|
||||
, Id__a = 7
|
||||
, Id__span = 8
|
||||
, Id__div = 9
|
||||
, Id__img = 10
|
||||
, Id__ul = 11
|
||||
, Id__ol = 12
|
||||
, Id__li = 13
|
||||
, Id__dl = 14
|
||||
, Id__dd = 15
|
||||
, Id__dt = 16
|
||||
, Id__p = 17
|
||||
, Id__br = 18
|
||||
, Id__hr = 19
|
||||
, Id__table = 20
|
||||
, Id__tr = 21
|
||||
, Id__td = 22
|
||||
, Id__th = 23
|
||||
, Id__thead = 24
|
||||
, Id__tbody = 25
|
||||
, Id__caption = 26
|
||||
, Id__pre = 27
|
||||
, Id__small = 28
|
||||
, Id__i = 29
|
||||
, Id__b = 30
|
||||
, Id__sup = 31
|
||||
, Id__sub = 32
|
||||
, Id__bdi = 33
|
||||
, Id__font = 34
|
||||
, Id__strong = 35
|
||||
, Id__s = 36
|
||||
, Id__abbr = 37
|
||||
, Id__cite = 38
|
||||
, Id__var = 39
|
||||
, Id__u = 40
|
||||
, Id__big = 41
|
||||
, Id__del = 42
|
||||
, Id__strike = 43
|
||||
, Id__tt = 44
|
||||
, Id__code = 45
|
||||
, Id__wbr = 46
|
||||
, Id__center = 47 // not HTML5, but used by en.v:Vandalism_in_progress
|
||||
, Id__dfn = 48
|
||||
, Id__kbd = 49
|
||||
, Id__samp = 50
|
||||
, Id__ins = 51
|
||||
, Id__em = 52
|
||||
, Id__blockquote = 53
|
||||
, Id__map = 54
|
||||
, Id__bdo = 55
|
||||
, Id__time = 56
|
||||
, Id__ruby = 57
|
||||
, Id__rb = 58
|
||||
, Id__rp = 59
|
||||
, Id__rt = 60
|
||||
;
|
||||
// private static final int Id__ary_max = 60;
|
||||
public static final byte[]
|
||||
Bry__a = Bry_.new_a7("a")
|
||||
, Bry__ul = Bry_.new_a7("ul")
|
||||
, Bry__td = Bry_.new_a7("td")
|
||||
, Bry__th = Bry_.new_a7("th")
|
||||
, Bry__div = Bry_.new_a7("div")
|
||||
, Bry__link = Bry_.new_a7("link")
|
||||
, Bry__style = Bry_.new_a7("style")
|
||||
, Bry__script = Bry_.new_a7("script")
|
||||
, Bry__xowa_any = Bry_.new_a7("xowa_any")
|
||||
, Bry__xowa_comment = Bry_.new_a7("xowa_comment")
|
||||
, Bry__img = Bry_.new_a7("img")
|
||||
;
|
||||
|
||||
// private static final Gfh_tag_meta[] Ary = new Gfh_tag_meta[Id__ary_max];
|
||||
// private static final Hash_adp_bry tags_by_bry = Hash_adp_bry.ci_a7();
|
||||
// public static Gfh_tag_meta New_tag(int id, String key_str) {
|
||||
// Gfh_tag_meta rv = new Gfh_tag_meta(id, key_str);
|
||||
// Ary[id] = rv;
|
||||
// tags_by_bry.Add_bry_int(rv.Key_bry(), id);
|
||||
// return rv;
|
||||
// }
|
||||
public static final Hash_adp_bry Hash = Hash_adp_bry.ci_a7()
|
||||
.Add_bry_int(Bry__a , Id__a)
|
||||
.Add_str_int("h1" , Id__h1)
|
||||
.Add_str_int("h2" , Id__h2)
|
||||
.Add_str_int("h3" , Id__h3)
|
||||
.Add_str_int("h4" , Id__h4)
|
||||
.Add_str_int("h5" , Id__h5)
|
||||
.Add_str_int("h6" , Id__h6)
|
||||
.Add_str_int("span" , Id__span)
|
||||
.Add_str_int("div" , Id__div)
|
||||
.Add_str_int("img" , Id__img)
|
||||
.Add_str_int("br" , Id__br)
|
||||
.Add_str_int("hr" , Id__hr)
|
||||
.Add_str_int("ul" , Id__ul)
|
||||
.Add_str_int("ol" , Id__ol)
|
||||
.Add_str_int("li" , Id__li)
|
||||
.Add_str_int("dl" , Id__dl)
|
||||
.Add_str_int("dd" , Id__dd)
|
||||
.Add_str_int("dt" , Id__dt)
|
||||
.Add_str_int("table" , Id__table)
|
||||
.Add_str_int("tr" , Id__tr)
|
||||
.Add_str_int("td" , Id__td)
|
||||
.Add_str_int("th" , Id__th)
|
||||
.Add_str_int("thead" , Id__thead)
|
||||
.Add_str_int("tbody" , Id__tbody)
|
||||
.Add_str_int("caption" , Id__caption)
|
||||
.Add_str_int("p" , Id__p)
|
||||
.Add_str_int("pre" , Id__pre)
|
||||
.Add_str_int("small" , Id__small)
|
||||
.Add_str_int("i" , Id__i)
|
||||
.Add_str_int("b" , Id__b)
|
||||
.Add_str_int("sup" , Id__sup)
|
||||
.Add_str_int("sub" , Id__sub)
|
||||
.Add_str_int("bdi" , Id__bdi)
|
||||
.Add_str_int("font" , Id__font)
|
||||
.Add_str_int("strong" , Id__strong)
|
||||
.Add_str_int("s" , Id__s)
|
||||
.Add_str_int("abbr" , Id__abbr)
|
||||
.Add_str_int("cite" , Id__cite)
|
||||
.Add_str_int("var" , Id__var)
|
||||
.Add_str_int("u" , Id__u)
|
||||
.Add_str_int("big" , Id__big)
|
||||
.Add_str_int("del" , Id__del)
|
||||
.Add_str_int("strike" , Id__strike)
|
||||
.Add_str_int("tt" , Id__tt)
|
||||
.Add_str_int("code" , Id__code)
|
||||
.Add_str_int("wbr" , Id__wbr)
|
||||
.Add_str_int("center" , Id__center)
|
||||
.Add_str_int("dfn" , Id__dfn)
|
||||
.Add_str_int("kbd" , Id__kbd)
|
||||
.Add_str_int("samp" , Id__samp)
|
||||
.Add_str_int("ins" , Id__ins)
|
||||
.Add_str_int("em" , Id__em)
|
||||
.Add_str_int("blockquote" , Id__blockquote)
|
||||
.Add_str_int("map" , Id__map)
|
||||
.Add_str_int("bdo" , Id__bdo)
|
||||
.Add_str_int("time" , Id__time)
|
||||
.Add_str_int("ruby" , Id__ruby)
|
||||
.Add_str_int("rb" , Id__rb)
|
||||
.Add_str_int("rp" , Id__rp)
|
||||
.Add_str_int("rt" , Id__rt)
|
||||
;
|
||||
public static String To_str(int tid) {
|
||||
switch (tid) {
|
||||
case Id__eos: return "EOS";
|
||||
case Id__any: return "any";
|
||||
case Id__unknown: return "unknown";
|
||||
case Id__comment: return "comment";
|
||||
case Id__h1: return "h1";
|
||||
case Id__h2: return "h2";
|
||||
case Id__h3: return "h2";
|
||||
case Id__h4: return "h2";
|
||||
case Id__h5: return "h2";
|
||||
case Id__h6: return "h2";
|
||||
case Id__a: return "a";
|
||||
case Id__span: return "span";
|
||||
case Id__div: return "div";
|
||||
case Id__img: return "img";
|
||||
case Id__p: return "p";
|
||||
case Id__br: return "br";
|
||||
case Id__hr: return "hr";
|
||||
case Id__ul: return "ul";
|
||||
case Id__ol: return "ol";
|
||||
case Id__li: return "li";
|
||||
case Id__dl: return "dl";
|
||||
case Id__dd: return "dd";
|
||||
case Id__dt: return "dt";
|
||||
case Id__table: return "table";
|
||||
case Id__tr: return "tr";
|
||||
case Id__td: return "td";
|
||||
case Id__th: return "th";
|
||||
case Id__thead: return "thead";
|
||||
case Id__tbody: return "tbody";
|
||||
case Id__caption: return "caption";
|
||||
case Id__pre: return "pre";
|
||||
case Id__small: return "small";
|
||||
case Id__i: return "i";
|
||||
case Id__b: return "b";
|
||||
case Id__sup: return "sup";
|
||||
case Id__sub: return "sub";
|
||||
case Id__bdi: return "bdi";
|
||||
case Id__font: return "font";
|
||||
case Id__strong: return "strong";
|
||||
case Id__s: return "s";
|
||||
case Id__abbr: return "abbr";
|
||||
case Id__cite: return "cite";
|
||||
case Id__var: return "var";
|
||||
case Id__u: return "u";
|
||||
case Id__big: return "big";
|
||||
case Id__del: return "del";
|
||||
case Id__strike: return "strike";
|
||||
case Id__tt: return "tt";
|
||||
case Id__code: return "code";
|
||||
case Id__wbr: return "wbr";
|
||||
case Id__center: return "center";
|
||||
case Id__dfn: return "dfn";
|
||||
case Id__kbd: return "kbd";
|
||||
case Id__samp: return "samp";
|
||||
case Id__ins: return "ins";
|
||||
case Id__em: return "em";
|
||||
case Id__blockquote: return "blockquote";
|
||||
case Id__map: return "map";
|
||||
case Id__bdo: return "bdo";
|
||||
case Id__time: return "time";
|
||||
case Id__ruby: return "ruby";
|
||||
case Id__rb: return "rb";
|
||||
case Id__rp: return "rp";
|
||||
case Id__rt: return "rt";
|
||||
default: throw Err_.new_unhandled(tid);
|
||||
}
|
||||
}
|
||||
public static final byte[]
|
||||
Br_inl = Bry_.new_a7("<br/>")
|
||||
, Br_lhs = Bry_.new_a7("<br>")
|
||||
, Hr_inl = Bry_.new_a7("<hr/>")
|
||||
, Body_lhs = Bry_.new_a7("<body>") , Body_rhs = Bry_.new_a7("</body>")
|
||||
, B_lhs = Bry_.new_a7("<b>") , B_rhs = Bry_.new_a7("</b>")
|
||||
, I_lhs = Bry_.new_a7("<i>") , I_rhs = Bry_.new_a7("</i>")
|
||||
, P_lhs = Bry_.new_a7("<p>") , P_rhs = Bry_.new_a7("</p>")
|
||||
, Pre_lhs = Bry_.new_a7("<pre>") , Pre_rhs = Bry_.new_a7("</pre>")
|
||||
, Div_lhs = Bry_.new_a7("<div>") , Div_rhs = Bry_.new_a7("</div>") , Div_lhs_bgn = Bry_.new_a7("<div")
|
||||
, Html_rhs = Bry_.new_a7("</html>")
|
||||
, Head_lhs_bgn = Bry_.new_a7("<head") , Head_rhs = Bry_.new_a7("</head>")
|
||||
, Style_lhs_w_type = Bry_.new_a7("<style type=\"text/css\">")
|
||||
, Style_rhs = Bry_.new_a7("</style>")
|
||||
, Script_lhs = Bry_.new_a7("<script>") , Script_rhs = Bry_.new_a7("</script>")
|
||||
, Script_lhs_w_type = Bry_.new_a7("<script type='text/javascript'>")
|
||||
, Span_lhs = Bry_.new_a7("<span") , Span_rhs = Bry_.new_a7("</span>")
|
||||
, Strong_lhs = Bry_.new_a7("<strong>") , Strong_rhs = Bry_.new_a7("</strong>")
|
||||
, Ul_lhs = Bry_.new_a7("<ul>") , Ul_rhs = Bry_.new_a7("</ul>")
|
||||
, Ol_lhs = Bry_.new_a7("<ol>") , Ol_rhs = Bry_.new_a7("</ol>")
|
||||
, Dt_lhs = Bry_.new_a7("<dt>") , Dt_rhs = Bry_.new_a7("</dt>")
|
||||
, Dd_lhs = Bry_.new_a7("<dd>") , Dd_rhs = Bry_.new_a7("</dd>")
|
||||
, Dl_lhs = Bry_.new_a7("<dl>") , Dl_rhs = Bry_.new_a7("</dl>")
|
||||
, Li_lhs = Bry_.new_a7("<li>") , Li_rhs = Bry_.new_a7("</li>") , Li_lhs_bgn = Bry_.new_a7("<li")
|
||||
, Table_lhs = Bry_.new_a7("<table>") , Table_rhs = Bry_.new_a7("</table>") , Table_lhs_bgn = Bry_.new_a7("<table")
|
||||
, Tr_lhs = Bry_.new_a7("<tr>") , Tr_rhs = Bry_.new_a7("</tr>") , Tr_lhs_bgn = Bry_.new_a7("<tr")
|
||||
, Td_lhs = Bry_.new_a7("<td>") , Td_rhs = Bry_.new_a7("</td>") , Td_lhs_bgn = Bry_.new_a7("<td")
|
||||
, Th_lhs = Bry_.new_a7("<th>") , Th_rhs = Bry_.new_a7("</th>") , Th_lhs_bgn = Bry_.new_a7("<th")
|
||||
, Caption_lhs = Bry_.new_a7("<caption>") , Caption_rhs = Bry_.new_a7("</caption>") , Caption_lhs_bgn = Bry_.new_a7("<caption")
|
||||
;
|
||||
public static final String
|
||||
Comm_bgn_str = "<!--"
|
||||
, Comm_end_str = "-->"
|
||||
, Anchor_str = "#"
|
||||
;
|
||||
public static final byte[]
|
||||
Comm_bgn = Bry_.new_a7(Comm_bgn_str), Comm_end = Bry_.new_a7(Comm_end_str)
|
||||
;
|
||||
public static final int
|
||||
Comm_bgn_len = Comm_bgn.length
|
||||
, Comm_end_len = Comm_end.length
|
||||
;
|
||||
public static final byte[] Rhs_bgn = Bry_.new_a7("</");
|
||||
public static void Bld_lhs_bgn(Bry_bfr bfr, byte[] tag) {bfr.Add_byte(Byte_ascii.Lt).Add(tag);} // >
|
||||
public static void Bld_lhs_end_nde(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Gt);} // >
|
||||
public static void Bld_lhs_end_inl(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Slash).Add_byte(Byte_ascii.Gt);} // "/>"
|
||||
public static void Bld_rhs(Bry_bfr bfr, byte[] name) {bfr.Add(Rhs_bgn).Add(name).Add_byte(Byte_ascii.Angle_end);} // EX:"</tag_name>"
|
||||
}
|
||||
|
||||
@@ -13,3 +13,14 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Gfh_tag_meta {
|
||||
public Gfh_tag_meta(int id, String key_str) {
|
||||
this.id = id;
|
||||
this.key_str = key_str;
|
||||
this.key_bry = Bry_.new_u8(key_str);
|
||||
}
|
||||
public int Id() {return id;} private final int id;
|
||||
public String Key_str() {return key_str;} private final String key_str;
|
||||
public byte[] Key_bry() {return key_bry;} private final byte[] key_bry;
|
||||
}
|
||||
@@ -13,3 +13,196 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.langs.htmls.encoders.*;
|
||||
import gplx.langs.htmls.entitys.*;
|
||||
public class Gfh_utl {// TS:Gfo_url_encoder is TS
|
||||
private static final Gfo_url_encoder encoder_id = Gfo_url_encoder_.Id;
|
||||
public static String Encode_id_as_str(byte[] key) {return String_.new_u8(Encode_id_as_bry(key));}
|
||||
public static byte[] Encode_id_as_bry(byte[] key) {
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.Get();
|
||||
try {
|
||||
byte[] escaped = Escape_html_as_bry(tmp_bfr, key, Bool_.N, Bool_.N, Bool_.N, Bool_.Y, Bool_.Y);
|
||||
return encoder_id.Encode(escaped);
|
||||
} finally {tmp_bfr.Mkr_rls();}
|
||||
}
|
||||
public static String Escape_for_atr_val_as_str(Bry_bfr bfr, byte quote_byte, String s) {return String_.new_u8(Escape_for_atr_val_as_bry(bfr, quote_byte, s));}
|
||||
public static byte[] Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, String s) {
|
||||
if (s == null) return null;
|
||||
return Escape_for_atr_val_as_bry(bfr, quote_byte, Bry_.new_u8(s));
|
||||
}
|
||||
public static byte[] Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, byte[] bry) {
|
||||
if (bry == null) return null;
|
||||
boolean dirty = Escape_for_atr_val_as_bry(bfr, quote_byte, bry, 0, bry.length);
|
||||
return dirty ? bfr.To_bry_and_clear() : bry;
|
||||
}
|
||||
public static boolean Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, byte[] src, int bgn, int end) {
|
||||
boolean dirty = false;
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = src[i];
|
||||
if (b == quote_byte) {
|
||||
if (!dirty) {
|
||||
bfr.Add_mid(src, bgn, i);
|
||||
dirty = true;
|
||||
}
|
||||
switch (quote_byte) {
|
||||
case Byte_ascii.Apos: bfr.Add(Gfh_entity_.Apos_num_bry); break;
|
||||
case Byte_ascii.Quote: bfr.Add(Gfh_entity_.Quote_bry); break;
|
||||
default: throw Err_.new_unhandled(quote_byte);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (dirty)
|
||||
bfr.Add_byte(b);
|
||||
}
|
||||
}
|
||||
return dirty;
|
||||
}
|
||||
public static String Escape_html_as_str(String v) {return String_.new_u8(Escape_html_as_bry(Bry_.new_u8(v)));}
|
||||
public static byte[] Escape_html_as_bry(Bry_bfr tmp, byte[] bry) {return Escape_html(false, tmp, bry, 0, bry.length, true, true, true, true, true);}
|
||||
public static byte[] Escape_html_as_bry(byte[] bry) {
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.Get();
|
||||
try {return Escape_html(false, tmp_bfr, bry, 0, bry.length, true, true, true, true, true);}
|
||||
finally {tmp_bfr.Mkr_rls();}
|
||||
}
|
||||
public static byte[] Escape_html_as_bry(byte[] bry, boolean lt, boolean gt, boolean amp, boolean quote, boolean apos) {
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.Get();
|
||||
try {return Escape_html(false, tmp_bfr, bry, 0, bry.length, lt, gt, amp, quote, apos);}
|
||||
finally {tmp_bfr.Mkr_rls();}
|
||||
}
|
||||
public static byte[] Escape_html_as_bry(Bry_bfr bfr, byte[] bry, boolean lt, boolean gt, boolean amp, boolean quote, boolean apos)
|
||||
{return Escape_html(false, bfr, bry, 0, bry.length, lt, gt, amp, quote, apos);}
|
||||
public static void Escape_html_to_bfr(Bry_bfr bfr, byte[] bry, int bgn, int end, boolean escape_lt, boolean escape_gt, boolean escape_amp, boolean escape_quote, boolean escape_apos) {
|
||||
Escape_html(true, bfr, bry, bgn, end, escape_lt, escape_gt, escape_amp, escape_quote, escape_apos);
|
||||
}
|
||||
private static byte[] Escape_html(boolean write_to_bfr, Bry_bfr bfr, byte[] bry, int bgn, int end, boolean escape_lt, boolean escape_gt, boolean escape_amp, boolean escape_quote, boolean escape_apos) {
|
||||
if (bry == null) return null;
|
||||
boolean dirty = write_to_bfr ? true : false; // if write_to_bfr, then mark true, else bfr.Add_mid(bry, 0, i); will write whole bry again
|
||||
byte[] escaped = null;
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = bry[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Lt: if (escape_lt) escaped = Gfh_entity_.Lt_bry; break;
|
||||
case Byte_ascii.Gt: if (escape_gt) escaped = Gfh_entity_.Gt_bry; break;
|
||||
case Byte_ascii.Amp: if (escape_amp) escaped = Gfh_entity_.Amp_bry; break;
|
||||
case Byte_ascii.Quote: if (escape_quote) escaped = Gfh_entity_.Quote_bry; break;
|
||||
case Byte_ascii.Apos: if (escape_apos) escaped = Gfh_entity_.Apos_num_bry; break;
|
||||
default:
|
||||
if (dirty || write_to_bfr)
|
||||
bfr.Add_byte(b);
|
||||
continue;
|
||||
}
|
||||
// handle lt, gt, amp, quote; everything else handled by default: continue above
|
||||
if (escaped == null) { // handle do-not-escape calls; EX: Escape(y, y, n, y);
|
||||
if (dirty || write_to_bfr)
|
||||
bfr.Add_byte(b);
|
||||
}
|
||||
else {
|
||||
if (!dirty) {
|
||||
bfr.Add_mid(bry, bgn, i);
|
||||
dirty = true;
|
||||
}
|
||||
bfr.Add(escaped);
|
||||
escaped = null;
|
||||
}
|
||||
}
|
||||
if (write_to_bfr)
|
||||
return null;
|
||||
else
|
||||
return dirty ? bfr.To_bry_and_clear() : bry;
|
||||
}
|
||||
private static final Btrie_slim_mgr unescape_trie = Btrie_slim_mgr.ci_a7()
|
||||
.Add_bry_byte(Gfh_entity_.Lt_bry , Byte_ascii.Lt)
|
||||
.Add_bry_byte(Gfh_entity_.Gt_bry , Byte_ascii.Gt)
|
||||
.Add_bry_byte(Gfh_entity_.Amp_bry , Byte_ascii.Amp)
|
||||
.Add_bry_byte(Gfh_entity_.Quote_bry , Byte_ascii.Quote)
|
||||
.Add_bry_byte(Gfh_entity_.Apos_num_bry , Byte_ascii.Apos)
|
||||
;
|
||||
public static String Unescape_as_str(String src) {
|
||||
Bry_bfr bfr = Bry_bfr_.Reset(255);
|
||||
byte[] bry = Bry_.new_u8(src);
|
||||
Unescape(Bool_.Y, bfr, bry, 0, bry.length, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y);
|
||||
return bfr.To_str_and_clear();
|
||||
}
|
||||
public static byte[] Unescape(boolean write_to_bfr, Bry_bfr bfr, byte[] bry, int bgn, int end, boolean escape_lt, boolean escape_gt, boolean escape_amp, boolean escape_quote, boolean escape_apos) {
|
||||
if (bry == null) return null;
|
||||
boolean dirty = write_to_bfr ? true : false; // if write_to_bfr, then mark true, else bfr.Add_mid(bry, 0, i); will write whole bry again
|
||||
int pos = bgn;
|
||||
Btrie_rv trv = new Btrie_rv();
|
||||
while (pos < end) {
|
||||
byte b = bry[pos];
|
||||
Object o = unescape_trie.Match_at_w_b0(trv, b, bry, pos, end);
|
||||
if (o == null) {
|
||||
if (dirty || write_to_bfr)
|
||||
bfr.Add_byte(b);
|
||||
++pos;
|
||||
}
|
||||
else {
|
||||
Byte_obj_val unescaped_bval = (Byte_obj_val)o;
|
||||
byte unescaped_byte = unescaped_bval.Val();
|
||||
boolean unescape = false;
|
||||
switch (unescaped_byte) {
|
||||
case Byte_ascii.Lt: if (escape_lt) unescape = true; break;
|
||||
case Byte_ascii.Gt: if (escape_gt) unescape = true; break;
|
||||
case Byte_ascii.Amp: if (escape_amp) unescape = true; break;
|
||||
case Byte_ascii.Quote: if (escape_quote) unescape = true; break;
|
||||
case Byte_ascii.Apos: if (escape_apos) unescape = true; break;
|
||||
}
|
||||
if (unescape) {
|
||||
if (!dirty) {
|
||||
bfr.Add_mid(bry, bgn, pos);
|
||||
dirty = true;
|
||||
}
|
||||
bfr.Add_byte(unescaped_byte);
|
||||
}
|
||||
else {
|
||||
if (dirty || write_to_bfr)
|
||||
bfr.Add_byte(b);
|
||||
}
|
||||
pos = trv.Pos();
|
||||
}
|
||||
}
|
||||
if (write_to_bfr)
|
||||
return null;
|
||||
else
|
||||
return dirty ? bfr.To_bry_and_clear() : bry;
|
||||
}
|
||||
public static byte[] Del_comments(Bry_bfr bfr, byte[] src) {return Del_comments(bfr, src, 0, src.length);}
|
||||
public static byte[] Del_comments(Bry_bfr bfr, byte[] src, int pos, int end) {
|
||||
while (true) {
|
||||
if (pos >= end) break;
|
||||
int comm_bgn = Bry_find_.Find_fwd(src, Gfh_tag_.Comm_bgn, pos); // look for <!--
|
||||
if (comm_bgn == Bry_find_.Not_found) { // <!-- not found;
|
||||
bfr.Add_mid(src, pos, end); // add everything between pos and <!--
|
||||
break; // stop checking
|
||||
}
|
||||
int comm_bgn_rhs = comm_bgn + Gfh_tag_.Comm_bgn_len;
|
||||
int comm_end = Bry_find_.Find_fwd(src, Gfh_tag_.Comm_end, comm_bgn_rhs); // look for -->
|
||||
if (comm_end == Bry_find_.Not_found) { // --> not found
|
||||
bfr.Add_mid(src, pos, comm_bgn); // add everything between pos and comm_bgn; EX: "a<!--b->" must add "a"
|
||||
break; // stop checking
|
||||
}
|
||||
bfr.Add_mid(src, pos, comm_bgn); // add everything between pos and comm_bgn
|
||||
pos = comm_end + Gfh_tag_.Comm_end_len; // reposition pos after comm_end
|
||||
}
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
public static String Replace_apos(String s) {return String_.Replace(s, "'", "\"");}
|
||||
public static String Replace_apos_concat_lines(String... lines) {
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
int len = lines.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
String line_str = lines[i];
|
||||
byte[] line_bry = Bry_.new_u8(line_str);
|
||||
Bry_.Replace_all_direct(line_bry, Byte_ascii.Apos, Byte_ascii.Quote, 0, line_bry.length);
|
||||
if (i != 0) bfr.Add_byte_nl();
|
||||
bfr.Add(line_bry);
|
||||
}
|
||||
return bfr.To_str_and_clear();
|
||||
}
|
||||
public static void Log(Exception e, String head, byte[] page_url, byte[] src, int pos) {
|
||||
Err err = Err_.Cast_or_make(e); if (err.Logged()) return;
|
||||
String msg = String_.Format("{0}; page={1} err={2} mid={3} trace={4}", head, page_url, Err_.Message_lang(e), Bry_.Escape_ws(Bry_.Mid_by_len_safe(src, pos, 255)), err.To_str__log());
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", msg);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,44 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Gfh_utl__basic__tst {
|
||||
@Before public void init() {fxt.Clear();} private Gfh_class_fxt fxt = new Gfh_class_fxt();
|
||||
@Test public void Escape() {
|
||||
fxt.Test_escape_html(Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, "a<b" , "a<b"); // basic
|
||||
fxt.Test_escape_html(Bool_.Y, Bool_.Y, Bool_.N, Bool_.Y, Bool_.Y, "a<&b" , "a<&b"); // fix: & not escaped when <> present
|
||||
fxt.Test_escape_html(Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, "a<>'&\"b" , "a<>'&"b");
|
||||
}
|
||||
@Test public void Escape_for_atr_val() {
|
||||
fxt.Test_escape_for_atr("abc" , Bool_.N, "abc"); // basic
|
||||
fxt.Test_escape_for_atr("a'\"b" , Bool_.Y, "a'\"b"); // quote is '
|
||||
fxt.Test_escape_for_atr("a'\"b" , Bool_.N, "a'"b"); // quote is "
|
||||
}
|
||||
@Test public void Unescape() {
|
||||
fxt.Test_unescape_html(Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, "a<>'&"b" , "a<>'&\"b"); // basic
|
||||
}
|
||||
}
|
||||
class Gfh_class_fxt {
|
||||
private Bry_bfr tmp_bfr = Bry_bfr_.Reset(255);
|
||||
public void Clear() {
|
||||
tmp_bfr.Clear();
|
||||
}
|
||||
public void Test_del_comments(String src, String expd) {
|
||||
byte[] actl = Gfh_utl.Del_comments(tmp_bfr, Bry_.new_u8(src));
|
||||
Tfds.Eq(expd, String_.new_a7(actl));
|
||||
}
|
||||
public void Test_escape_html(boolean lt, boolean gt, boolean amp, boolean quote, boolean apos, String src, String expd) {
|
||||
byte[] actl = Gfh_utl.Escape_html_as_bry(Bry_.new_a7(src), lt, gt, amp, quote, apos);
|
||||
Tfds.Eq(expd, String_.new_a7(actl));
|
||||
}
|
||||
public void Test_escape_for_atr(String src, boolean quote_is_apos, String expd) {
|
||||
byte[] actl = Gfh_utl.Escape_for_atr_val_as_bry(tmp_bfr, quote_is_apos ? Byte_ascii.Apos : Byte_ascii.Quote, src);
|
||||
Tfds.Eq(expd, String_.new_u8(actl));
|
||||
}
|
||||
public void Test_unescape_html(boolean lt, boolean gt, boolean amp, boolean quote, boolean apos, String src, String expd) {
|
||||
byte[] bry = Bry_.new_u8(src);
|
||||
byte[] actl = Gfh_utl.Unescape(false, tmp_bfr, bry, 0, bry.length, lt, gt, amp, quote, apos);
|
||||
Tfds.Eq(expd, String_.new_a7(actl));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,12 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Gfh_utl__comments__tst {
|
||||
@Before public void init() {fxt.Clear();} private final Gfh_class_fxt fxt = new Gfh_class_fxt();
|
||||
@Test public void Basic() {fxt.Test_del_comments("a<!-- b -->c" , "ac");}
|
||||
@Test public void Bgn_missing() {fxt.Test_del_comments("a b c" , "a b c");}
|
||||
@Test public void End_missing() {fxt.Test_del_comments("a<!-- b c" , "a");}
|
||||
@Test public void Multiple() {fxt.Test_del_comments("a<!--b-->c<!--d-->e" , "ace");}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,93 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Gfh_wtr {
|
||||
private Bry_bfr bfr = Bry_bfr_.Reset(255);
|
||||
private List_adp nde_stack = List_adp_.New();
|
||||
public byte Atr_quote() {return atr_quote;} public Gfh_wtr Atr_quote_(byte v) {atr_quote = v; return this;} private byte atr_quote = Byte_ascii.Quote;
|
||||
public Gfh_wtr Nde_full_atrs(byte[] tag, byte[] text, boolean text_escape, byte[]... atrs) {
|
||||
Nde_bgn(tag);
|
||||
int atrs_len = atrs.length;
|
||||
for (int i = 0; i < atrs_len; i += 2) {
|
||||
byte[] key = atrs[i];
|
||||
byte[] val = atrs[i + 1];
|
||||
Atr(key, val);
|
||||
}
|
||||
Nde_end_hdr();
|
||||
if (text_escape)
|
||||
Txt(text);
|
||||
else
|
||||
bfr.Add(text);
|
||||
Nde_end();
|
||||
return this;
|
||||
}
|
||||
public Gfh_wtr Nde_full(byte[] tag, byte[] text) {
|
||||
Nde_bgn_hdr(tag);
|
||||
Txt(text);
|
||||
Nde_end();
|
||||
return this;
|
||||
}
|
||||
public Gfh_wtr Txt_mid(byte[] src, int bgn, int end) {bfr.Add_mid(src, bgn, end); return this;}
|
||||
public Gfh_wtr Txt_byte(byte v) {bfr.Add_byte(v); return this;}
|
||||
public Gfh_wtr Txt_raw(byte[] v) {bfr.Add(v); return this;}
|
||||
public Gfh_wtr Txt(byte[] v) {
|
||||
if (v != null) {
|
||||
bfr.Add(Gfh_utl.Escape_html_as_bry(v));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public Gfh_wtr Nde_bgn_hdr(byte[] name) {
|
||||
this.Nde_bgn(name);
|
||||
this.Nde_end_hdr();
|
||||
return this;
|
||||
}
|
||||
public Gfh_wtr Nde_bgn(byte[] name) {
|
||||
bfr.Add_byte(Byte_ascii.Lt);
|
||||
bfr.Add(name);
|
||||
nde_stack.Add(name);
|
||||
return this;
|
||||
}
|
||||
public Gfh_wtr Atr(byte[] key, byte[] val) {
|
||||
Write_atr_bry(bfr, Bool_.Y, atr_quote, key, val);
|
||||
return this;
|
||||
}
|
||||
public Gfh_wtr Nde_end_inline() {
|
||||
bfr.Add_byte(Byte_ascii.Slash).Add_byte(Byte_ascii.Gt);
|
||||
List_adp_.Pop_last(nde_stack);
|
||||
return this;
|
||||
}
|
||||
public Gfh_wtr Nde_end_hdr() {
|
||||
bfr.Add_byte(Byte_ascii.Gt);
|
||||
return this;
|
||||
}
|
||||
public Gfh_wtr Nde_end() {
|
||||
byte[] name = (byte[])List_adp_.Pop_last(nde_stack);
|
||||
bfr.Add_byte(Byte_ascii.Lt).Add_byte(Byte_ascii.Slash);
|
||||
bfr.Add(name);
|
||||
bfr.Add_byte(Byte_ascii.Gt);
|
||||
return this;
|
||||
}
|
||||
public byte[] To_bry_and_clear() {return bfr.To_bry_and_clear();}
|
||||
public byte[] Xto_bry() {return bfr.To_bry();}
|
||||
public String Xto_str() {return bfr.To_str();}
|
||||
public static void Write_atr_bry(Bry_bfr bfr, byte[] key, byte[] val) {Write_atr_bry(bfr, Bool_.Y, Byte_ascii.Quote, key, val);}
|
||||
public static void Write_atr_bry(Bry_bfr bfr, boolean write_space, byte atr_quote, byte[] key, byte[] val) {
|
||||
if (Bry_.Len_eq_0(val)) return; // don't write empty
|
||||
if (write_space) bfr.Add_byte_space();
|
||||
bfr.Add(key);
|
||||
bfr.Add_byte(Byte_ascii.Eq);
|
||||
bfr.Add_byte(atr_quote);
|
||||
Gfh_utl.Escape_html_to_bfr(bfr, val, 0, val.length, false, false, false, true, true);
|
||||
bfr.Add_byte(atr_quote);
|
||||
}
|
||||
public static void Write_atr_int(Bry_bfr bfr, byte[] key, int val) {Write_atr_int(bfr, Bool_.Y, Byte_ascii.Quote, key, val);}
|
||||
public static void Write_atr_int(Bry_bfr bfr, boolean write_space, byte atr_quote, byte[] key, int val) {
|
||||
if (write_space) bfr.Add_byte_space();
|
||||
bfr.Add(key);
|
||||
bfr.Add_byte(Byte_ascii.Eq);
|
||||
bfr.Add_byte(atr_quote);
|
||||
bfr.Add_int_variable(val);
|
||||
bfr.Add_byte(atr_quote);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,47 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public class Gfh_class_ {
|
||||
public static boolean Has(byte[] src, int src_bgn, int src_end, byte[] cls) {
|
||||
int cls_bgn = src_bgn;
|
||||
int pos = src_bgn;
|
||||
while (true) {
|
||||
boolean pos_is_last = pos == src_end;
|
||||
byte b = pos_is_last ? Byte_ascii.Space : src[pos];
|
||||
if (b == Byte_ascii.Space) {
|
||||
if (cls_bgn != -1) {
|
||||
if (Bry_.Match(src, cls_bgn, pos, cls))return true;
|
||||
cls_bgn = -1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (cls_bgn == -1) cls_bgn = pos;
|
||||
}
|
||||
if (pos_is_last) break;
|
||||
++pos;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
public static byte Find_1st(byte[] src, int src_bgn, int src_end, Hash_adp_bry hash) {
|
||||
int cls_bgn = src_bgn;
|
||||
int pos = src_bgn;
|
||||
while (true) {
|
||||
boolean pos_is_last = pos == src_end;
|
||||
byte b = pos_is_last ? Byte_ascii.Space : src[pos];
|
||||
if (b == Byte_ascii.Space) {
|
||||
if (cls_bgn != -1) {
|
||||
byte rv = hash.Get_as_byte_or(src, cls_bgn, pos, Byte_.Max_value_127);
|
||||
if (rv != Byte_.Max_value_127) return rv;
|
||||
cls_bgn = -1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (cls_bgn == -1) cls_bgn = pos;
|
||||
}
|
||||
if (pos_is_last) break;
|
||||
++pos;
|
||||
}
|
||||
return Byte_.Max_value_127;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,44 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import org.junit.*;
|
||||
public class Gfh_class__tst {
|
||||
private final Gfh_class__fxt fxt = new Gfh_class__fxt();
|
||||
@Test public void Has() {
|
||||
fxt.Test__has__y("a b c", "a", "b", "c");
|
||||
fxt.Test__has__n("a b c", "d");
|
||||
fxt.Test__has__n("ab", "a");
|
||||
}
|
||||
@Test public void Cls__has__hash() {
|
||||
Hash_adp_bry hash = fxt.Make_hash("x", "y", "z");
|
||||
fxt.Test__find_1st(hash, 0, "x");
|
||||
fxt.Test__find_1st(hash, 2, "z");
|
||||
fxt.Test__find_1st(hash, 0, "a x b");
|
||||
fxt.Test__find_1st(hash, 0, "a b x");
|
||||
fxt.Test__find_1st(hash, Byte_.Max_value_127, "a");
|
||||
fxt.Test__find_1st(hash, Byte_.Max_value_127, "xyz");
|
||||
}
|
||||
}
|
||||
class Gfh_class__fxt {
|
||||
public void Test__has__y(String src, String... ary) {Test__has(Bool_.Y, src, ary);}
|
||||
public void Test__has__n(String src, String... ary) {Test__has(Bool_.N, src, ary);}
|
||||
public void Test__has(boolean expd, String src, String... ary) {
|
||||
byte[] src_bry = Bry_.new_u8(src);
|
||||
for (String itm : ary) {
|
||||
byte[] itm_bry = Bry_.new_u8(itm);
|
||||
Tfds.Eq_bool(expd, Gfh_class_.Has(src_bry, 0, src_bry.length, itm_bry), itm);
|
||||
}
|
||||
}
|
||||
public Hash_adp_bry Make_hash(String... ary) {
|
||||
Hash_adp_bry rv = Hash_adp_bry.ci_a7();
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; ++i)
|
||||
rv.Add_bry_byte(Bry_.new_u8(ary[i]), (byte)i);
|
||||
return rv;
|
||||
}
|
||||
public void Test__find_1st(Hash_adp_bry hash, int expd, String src) {
|
||||
byte[] src_bry = Bry_.new_u8(src);
|
||||
Tfds.Eq_byte((byte)expd, Gfh_class_.Find_1st(src_bry, 0, src_bry.length, hash), src);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,35 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.langs.htmls.docs.*;
|
||||
public class Gfh_class_parser_ {
|
||||
public static void Parse(Gfh_tag tag, Gfh_class_parser_wkr wkr) {
|
||||
Gfh_atr atr = tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__class);
|
||||
if (atr.Val_dat_exists())
|
||||
Parse(tag.Src(), atr.Val_bgn(), atr.Val_end(), wkr);
|
||||
}
|
||||
public static void Parse(byte[] src, int src_bgn, int src_end, Gfh_class_parser_wkr wkr) {
|
||||
int atr_idx = 0, tmp_bgn = -1, tmp_end = -1;
|
||||
int pos = src_bgn;
|
||||
while (true) {
|
||||
boolean pos_is_last = pos == src_end;
|
||||
byte b = pos_is_last ? Byte_ascii.Space : src[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
if (tmp_bgn != -1) { // ignore empty atrs
|
||||
if (!wkr.On_cls(src, atr_idx, src_bgn, src_end, tmp_bgn, tmp_end))
|
||||
pos_is_last = true;
|
||||
}
|
||||
++atr_idx; tmp_bgn = -1; tmp_end = -1;
|
||||
break;
|
||||
default:
|
||||
if (tmp_bgn == -1) tmp_bgn = pos;
|
||||
tmp_end = pos + 1;
|
||||
break;
|
||||
}
|
||||
if (pos_is_last) break;
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,30 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import org.junit.*;
|
||||
public class Gfh_class_parser__tst {
|
||||
private final Gfh_class_parser__fxt fxt = new Gfh_class_parser__fxt();
|
||||
@Test public void Basic() {fxt.Test__parse("v1" , "v1");}
|
||||
@Test public void Many() {fxt.Test__parse("v1 v2" , "v1", "v2");}
|
||||
}
|
||||
class Gfh_class_parser__fxt {
|
||||
private final Gfh_class_wkr__list wkr = new Gfh_class_wkr__list();
|
||||
public void Test__parse(String src_str, String... expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
String[] actl = wkr.Parse(src_bry, 0, src_bry.length);
|
||||
Tfds.Eq_ary_str(expd, actl);
|
||||
}
|
||||
}
|
||||
class Gfh_class_wkr__list implements Gfh_class_parser_wkr {
|
||||
private final List_adp list = List_adp_.New();
|
||||
public boolean On_cls(byte[] src, int atr_idx, int atr_bgn, int atr_end, int val_bgn, int val_end) {
|
||||
String s = String_.new_u8(src, val_bgn, val_end);
|
||||
list.Add(s); //
|
||||
return true;
|
||||
}
|
||||
public String[] Parse(byte[] src, int src_bgn, int src_end) {
|
||||
Gfh_class_parser_.Parse(src, src_bgn, src_end, this);
|
||||
return (String[])list.To_ary_and_clear(String.class);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,7 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public interface Gfh_class_parser_wkr {
|
||||
boolean On_cls(byte[] src, int atr_idx, int atr_bgn, int atr_end, int val_bgn, int val_end);
|
||||
}
|
||||
|
||||
@@ -13,3 +13,33 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public class Gfh_atr implements gplx.core.brys.Bfr_arg {
|
||||
public Gfh_atr(int idx, int atr_bgn, int atr_end, byte[] key, byte[] val, byte[] src, int val_bgn, int val_end) {
|
||||
this.idx = idx; this.atr_bgn = atr_bgn; this.atr_end = atr_end; this.key = key; this.val = val;
|
||||
this.src = src; this.val_bgn = val_bgn; this.val_end = val_end;
|
||||
}
|
||||
public byte[] Src() {return src;} private final byte[] src;
|
||||
public int Idx() {return idx;} private final int idx;
|
||||
public int Atr_bgn() {return atr_bgn;} private final int atr_bgn;
|
||||
public int Atr_end() {return atr_end;} private final int atr_end;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public int Val_bgn() {return val_bgn;} private final int val_bgn;
|
||||
public int Val_end() {return val_end;} private final int val_end;
|
||||
public boolean Val_dat_exists() {return val_end != -1;}
|
||||
public boolean Val_dat_missing() {return val_end == -1;}
|
||||
public byte[] Val() {
|
||||
if (val == null)
|
||||
val = Bry_.Mid(src, val_bgn, val_end);
|
||||
return val;
|
||||
} private byte[] val;
|
||||
public void Html__add(Bry_bfr bfr) {
|
||||
if (val_end > val_bgn)
|
||||
bfr.Add_mid(src, val_bgn, val_end);
|
||||
}
|
||||
public void Bfr_arg__add(Bry_bfr bfr) {
|
||||
if (Val_dat_exists())
|
||||
bfr.Add_mid(src, val_bgn, val_end);
|
||||
}
|
||||
public static final Gfh_atr Noop = new Gfh_atr(-1, -1, -1, Bry_.Empty, Bry_.Empty, Bry_.Empty, -1, -1);
|
||||
}
|
||||
|
||||
@@ -13,3 +13,40 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Gfh_doc_parser {
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private final Gfh_txt_wkr txt_wkr;
|
||||
public Gfh_doc_parser(Gfh_txt_wkr txt_wkr, Gfh_doc_wkr... wkr_ary) {
|
||||
this.txt_wkr = txt_wkr;
|
||||
for (Gfh_doc_wkr wkr : wkr_ary)
|
||||
trie.Add_obj(wkr.Hook(), wkr);
|
||||
}
|
||||
public void Parse(byte[] page_url, byte[] src, int src_bgn, int src_end) {
|
||||
int txt_bgn = -1;
|
||||
int pos = src_bgn;
|
||||
while (pos < src_end) {
|
||||
Object o = trie.Match_at(trv, src, pos, src_end);
|
||||
if (o == null) { // not a known hook; add to txt
|
||||
if (txt_bgn == -1) txt_bgn = pos;
|
||||
++pos;
|
||||
}
|
||||
else { // known hook
|
||||
if (txt_bgn != -1) { // txt pending; handle it
|
||||
txt_wkr.Parse(txt_bgn, pos);
|
||||
txt_bgn = -1;
|
||||
}
|
||||
Gfh_doc_wkr wkr = (Gfh_doc_wkr)o;
|
||||
try {pos = wkr.Parse(src, src_bgn, src_end, pos);}
|
||||
catch (Exception e) {
|
||||
Gfh_utl.Log(e, "html parse failed", page_url, src, pos);
|
||||
txt_bgn = pos; // set txt_bgn to hook_bgn which is "pos"; i.e.: txt resumes from start of failed hook
|
||||
pos = trv.Pos(); // set pos to hook_end
|
||||
}
|
||||
}
|
||||
}
|
||||
if (txt_bgn != -1) txt_wkr.Parse(txt_bgn, src_end); // handle add pending txt at EOS
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,8 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public interface Gfh_doc_wkr {
|
||||
byte[] Hook();
|
||||
int Parse(byte[] src, int src_bgn, int src_end, int pos);
|
||||
}
|
||||
|
||||
@@ -13,3 +13,150 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.xowa.parsers.htmls.*; import gplx.langs.htmls.styles.*; import gplx.langs.htmls.clses.*;
|
||||
public class Gfh_tag implements Mwh_atr_wkr {
|
||||
private Gfh_tag_rdr tag_rdr;
|
||||
private Ordered_hash atrs_hash; private boolean atrs_null; private int atrs_bgn, atrs_end;
|
||||
private final Gfh_style_wkr__val_as_int style_wkr = new Gfh_style_wkr__val_as_int();
|
||||
public Gfh_tag Init(Gfh_tag_rdr tag_rdr, byte[] src, boolean tag_is_tail, boolean tag_is_inline, int src_bgn, int src_end, int atrs_bgn, int atrs_end, int name_id, byte[] name_bry) {
|
||||
this.tag_rdr = tag_rdr; this.src = src; this.atrs_null = true;
|
||||
this.tag_is_tail = tag_is_tail; this.tag_is_inline = tag_is_inline;
|
||||
this.atrs_bgn = atrs_bgn; this.atrs_end = atrs_end;
|
||||
this.name_id = name_id; this.name_bry = name_bry; this.src_bgn = src_bgn; this.src_end = src_end;
|
||||
return this;
|
||||
}
|
||||
public Gfh_tag Copy() {
|
||||
Gfh_tag rv = new Gfh_tag().Init(tag_rdr, src, tag_is_tail, tag_is_inline, src_bgn, src_end, atrs_bgn, atrs_end, name_id, name_bry);
|
||||
rv.atrs_null = false;
|
||||
rv.atrs_hash = Copy(atrs_hash);
|
||||
return rv;
|
||||
}
|
||||
public int Name_id() {return name_id;} private int name_id;
|
||||
public boolean Tid_is_comment() {return name_id == Gfh_tag_.Id__comment;}
|
||||
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
|
||||
public Gfh_tag Chk_name_or_fail(int chk) {
|
||||
if (!Chk_name(chk)) tag_rdr.Err_wkr().Fail("name_id chk failed", "expecting", Gfh_tag_.To_str(chk));
|
||||
return this;
|
||||
}
|
||||
public boolean Chk_name(int chk) {
|
||||
return ( chk == name_id
|
||||
|| (name_id != Gfh_tag_.Id__eos && Int_.In(chk, Gfh_tag_.Id__any, Gfh_tag_.Id__comment)));
|
||||
}
|
||||
public boolean Chk(int chk_name, byte[] chk_cls) {return name_id == chk_name && Atrs__cls_has(chk_cls);}
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Src_bgn() {return src_bgn;} private int src_bgn;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public boolean Src_exists() {return src_end > src_bgn;} // NOTE: only true if EOS where src_end == src_bgn == src_len
|
||||
public boolean Tag_is_tail() {return tag_is_tail;} private boolean tag_is_tail;
|
||||
public boolean Tag_is_inline() {return tag_is_inline;} private boolean tag_is_inline;
|
||||
public Ordered_hash Atrs__hash() {if (atrs_null) Atrs__make(); return atrs_hash;}
|
||||
public int Atrs__len() {if (atrs_null) Atrs__make(); return atrs_hash.Count();}
|
||||
public boolean Atrs__match_pair(byte[] key, byte[] val) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key);
|
||||
return rv == null ? false : Bry_.Eq(val, rv.Val());
|
||||
}
|
||||
public boolean Atrs__cls_has(byte[] val) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(Gfh_atr_.Bry__class); if (rv == null) return false;
|
||||
byte[] rv_val = rv.Val();
|
||||
return Gfh_class_.Has(rv_val, 0, rv_val.length, val);
|
||||
}
|
||||
public boolean Atrs__cls_eq(byte[] val) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(Gfh_atr_.Bry__class); if (rv == null) return false;
|
||||
return Bry_.Eq(val, rv.Val());
|
||||
}
|
||||
public byte Atrs__cls_find_or_fail(Hash_adp_bry hash) {
|
||||
byte rv = Atrs__cls_find_or(hash, Byte_.Max_value_127); if (rv == Byte_.Max_value_127) tag_rdr.Err_wkr().Fail("cls missing");
|
||||
return rv;
|
||||
}
|
||||
public byte Atrs__cls_find_or(Hash_adp_bry hash, byte or) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr cls_atr = (Gfh_atr)atrs_hash.Get_by(Gfh_atr_.Bry__class); if (cls_atr == null) return or;
|
||||
byte rv = Gfh_class_.Find_1st(src, cls_atr.Val_bgn(), cls_atr.Val_end(), hash); if (rv == Byte_.Max_value_127) return or;
|
||||
return rv;
|
||||
}
|
||||
public int Atrs__style_get_as_int(byte[] key) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(Gfh_atr_.Bry__style); if (rv == null) return -1;
|
||||
byte[] rv_val = rv.Val();
|
||||
return style_wkr.Parse(rv_val, 0, rv_val.length, key);
|
||||
}
|
||||
public boolean Atrs__has(byte[] key) {
|
||||
if (atrs_null) Atrs__make();
|
||||
return atrs_hash.Get_by(key) != null;
|
||||
}
|
||||
public byte[] Atrs__get_as_bry(byte[] key) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key);
|
||||
return rv == null ? Bry_.Empty : rv.Val();
|
||||
}
|
||||
public int Atrs__get_as_int(byte[] key) {
|
||||
int rv = Atrs__get_as_int_or(key, Int_.Min_value); if (rv == Int_.Min_value) tag_rdr.Err_wkr().Fail("atr missing", "key", key);
|
||||
return rv;
|
||||
}
|
||||
public int Atrs__get_as_int_or(byte[] key, int or) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key); if (rv == null) return or;
|
||||
return Bry_.To_int_or(src, rv.Val_bgn(), rv.Val_end(), or);
|
||||
}
|
||||
public double Atrs__get_as_double_or(byte[] key, double or) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key); if (rv == null) return or;
|
||||
return Bry_.To_double_or(src, rv.Val_bgn(), rv.Val_end(), or);
|
||||
}
|
||||
public Gfh_atr Atrs__get_at(int i) {return (Gfh_atr)atrs_hash.Get_at(i);}
|
||||
public Gfh_atr Atrs__get_by_or_fail(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.Y);}
|
||||
public Gfh_atr Atrs__get_by_or_empty(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.N);}
|
||||
public Gfh_atr Atrs__get_by_or_fail(byte[] key, boolean fail_if_null) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key);
|
||||
if (rv == null) {
|
||||
if (fail_if_null) tag_rdr.Err_wkr().Fail("atr missing", "key", key);
|
||||
else return Gfh_atr.Noop;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public String Atrs__print() {
|
||||
if (atrs_null) Atrs__make();
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
int len = atrs_hash.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Gfh_atr atr = (Gfh_atr)atrs_hash.Get_at(i);
|
||||
bfr.Add(atr.Key()).Add_byte_eq().Add(atr.Val()).Add_byte_nl();
|
||||
}
|
||||
return bfr.To_str();
|
||||
}
|
||||
private void Atrs__make() {
|
||||
atrs_null = false;
|
||||
if (atrs_hash == null) atrs_hash = Ordered_hash_.New_bry();
|
||||
else atrs_hash.Clear();
|
||||
tag_rdr.Atrs__make(this, atrs_bgn, atrs_end);
|
||||
}
|
||||
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {
|
||||
if (!valid) return;
|
||||
byte[] val_bry = val_bry_manual;
|
||||
int val_bgn = -1, val_end = -1;
|
||||
int atr_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn];
|
||||
int atr_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end];
|
||||
if (key_exists) {
|
||||
val_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
|
||||
val_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
|
||||
}
|
||||
else
|
||||
val_bry = key_bry;
|
||||
Gfh_atr atr = new Gfh_atr(atrs_hash.Count(), atr_bgn, atr_end, key_bry, val_bry, src, val_bgn, val_end);
|
||||
atrs_hash.Add(key_bry, atr);
|
||||
}
|
||||
private static Ordered_hash Copy(Ordered_hash src) {
|
||||
Ordered_hash rv = Ordered_hash_.New();
|
||||
int len = src.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Gfh_atr atr = (Gfh_atr)src.Get_at(i);
|
||||
rv.Add(atr.Key(), atr);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,346 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.btries.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
public class Gfh_tag_rdr {
|
||||
private final Hash_adp_bry name_hash;
|
||||
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||
private final Gfh_tag tag__tmp__move = new Gfh_tag(), tag__tmp__peek = new Gfh_tag(), tag__eos = new Gfh_tag(), tag__comment = new Gfh_tag();
|
||||
private final Int_obj_ref tmp_depth = Int_obj_ref.New_zero();
|
||||
Gfh_tag_rdr(Hash_adp_bry name_hash) {this.name_hash = name_hash;}
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public Bry_err_wkr Err_wkr() {return err_wkr;} private final Bry_err_wkr err_wkr = new Bry_err_wkr();
|
||||
public Gfh_tag_rdr Reg(String tag_name, int tag_id) {name_hash.Add_str_int(tag_name, tag_id); return this;}
|
||||
public void Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) {
|
||||
this.src = src; this.pos = src_bgn; this.src_end = src_end;
|
||||
tag__eos.Init(this, src, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Gfh_tag_.Id__eos, Bry_.Empty);
|
||||
err_wkr.Init_by_page(String_.new_u8(ctx_name), src);
|
||||
}
|
||||
public void Src_rng_(int src_bgn, int src_end) {
|
||||
this.pos = src_bgn; this.src_end = src_end;
|
||||
}
|
||||
public int Pos() {return pos;} private int pos;
|
||||
public void Pos_(int v) {this.pos = v;}
|
||||
public void Atrs__make(Mwh_atr_wkr atr_wkr, int head_bgn, int head_end) {atr_parser.Parse(atr_wkr, -1, -1, src, head_bgn, head_end);}
|
||||
public Gfh_tag Tag__move_fwd_head() {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, pos, src_end, Gfh_tag_.Id__any);}
|
||||
public Gfh_tag Tag__move_fwd_head(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, pos, src_end, match_name_id);}
|
||||
public Gfh_tag Tag__move_fwd_tail(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, pos, src_end, match_name_id);}
|
||||
public Gfh_tag Tag__peek_fwd_head() {return Tag__find(Bool_.N, Bool_.N, Bool_.N, pos, src_end, Gfh_tag_.Id__any);}
|
||||
public Gfh_tag Tag__peek_fwd_head(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, pos, src_end, match_name_id);}
|
||||
public Gfh_tag Tag__peek_fwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.Y, pos, src_end, match_name_id);}
|
||||
public Gfh_tag Tag__peek_bwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, pos, src_end, match_name_id);}
|
||||
public Gfh_tag Tag__peek_bwd_head() {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, pos, src_end, Gfh_tag_.Id__any);}
|
||||
public Gfh_tag Tag__find_fwd_head(int bgn, int end, int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, bgn, end, match_name_id);}
|
||||
private Gfh_tag Tag__find(boolean move, boolean bwd, boolean tail, int rng_bgn, int rng_end, int match_name_id) {
|
||||
int tmp = rng_bgn;
|
||||
int stop_pos = rng_end; int adj = 1;
|
||||
if (bwd) {
|
||||
stop_pos = -1;
|
||||
adj = -1;
|
||||
--tmp; // subtract 1 from tmp; needed when pos is at src_len, else array error below
|
||||
}
|
||||
tmp_depth.Val_zero_();
|
||||
Gfh_tag rv = null;
|
||||
while (tmp != stop_pos) {
|
||||
if (src[tmp] == Byte_ascii.Angle_bgn) {
|
||||
rv = Tag__extract(move, tail, match_name_id, tmp);
|
||||
if (rv.Name_id() == Gfh_tag_.Id__comment) { // ignore comments DATE:2016-06-25
|
||||
tmp = rv.Src_end();
|
||||
rv = null; // null rv, else rv will still be comment and may get returned to caller
|
||||
continue;
|
||||
}
|
||||
if (Tag__match(move, bwd, tail, match_name_id, tmp_depth, rv))
|
||||
break;
|
||||
else {
|
||||
tmp = bwd ? rv.Src_bgn() - 1 : rv.Src_end();
|
||||
rv = null;
|
||||
}
|
||||
}
|
||||
else
|
||||
tmp += adj;
|
||||
}
|
||||
if (rv == null) {
|
||||
if (move && tail && !bwd)
|
||||
err_wkr.Fail("move tag fwd failed", "tag_name", Gfh_tag_.To_str(match_name_id));
|
||||
else
|
||||
return Tag__eos(rng_bgn);
|
||||
}
|
||||
if (move) pos = rv.Src_end();
|
||||
return rv;
|
||||
}
|
||||
private boolean Tag__match(boolean move, boolean bwd, boolean tail, int match_name_id, Int_obj_ref depth_obj, Gfh_tag tag) {
|
||||
int tag_name_id = tag.Name_id();
|
||||
if ( tag_name_id != match_name_id // tag doesn't match requested
|
||||
&& match_name_id != Gfh_tag_.Id__any // requested is not wildcard
|
||||
) return false;
|
||||
if (tag_name_id == Gfh_tag_.Id__comment) return true; // ignore comments
|
||||
int depth = depth_obj.Val();
|
||||
boolean tag_is_tail = tag.Tag_is_tail();
|
||||
if (tail == tag_is_tail) {
|
||||
if (depth == 0)
|
||||
return true;
|
||||
else {
|
||||
if (match_name_id == tag_name_id)
|
||||
depth_obj.Val_add(-1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!bwd && tail && !tag_is_tail && !tag.Tag_is_inline()) {
|
||||
if (match_name_id == tag_name_id)
|
||||
depth_obj.Val_add(1);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
public Gfh_tag Tag__move_fwd_tail(byte[] find_tag_bry) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, pos, src_end, find_tag_bry);}
|
||||
public Gfh_tag Tag__peek_fwd_tail(byte[] find_tag_bry) {return Tag__find(Bool_.N, Bool_.N, Bool_.Y, pos, src_end, find_tag_bry);}
|
||||
private Gfh_tag Tag__find(boolean move, boolean bwd, boolean tail, int rng_bgn, int rng_end, byte[] find_tag_bry) {
|
||||
int tmp = rng_bgn;
|
||||
int stop_pos = rng_end; int adj = 1;
|
||||
if (bwd) {
|
||||
stop_pos = -1;
|
||||
adj = -1;
|
||||
--tmp; // subtract 1 from tmp; needed when pos is at src_len, else array error below
|
||||
}
|
||||
tmp_depth.Val_zero_();
|
||||
Gfh_tag rv = null;
|
||||
while (tmp != stop_pos) {
|
||||
if (src[tmp] == Byte_ascii.Angle_bgn) {
|
||||
rv = Tag__extract(move, tail, find_tag_bry, tmp);
|
||||
if (Bry_.Eq(rv.Name_bry(), Gfh_tag_.Bry__xowa_comment)) { // ignore comments DATE:2016-06-25
|
||||
tmp = rv.Src_end();
|
||||
rv = null; // null rv, else rv will still be comment and may get returned to caller
|
||||
continue;
|
||||
}
|
||||
if (Tag__match(move, bwd, tail, find_tag_bry, tmp_depth, rv))
|
||||
break;
|
||||
else {
|
||||
tmp = bwd ? rv.Src_bgn() - 1 : rv.Src_end();
|
||||
rv = null;
|
||||
}
|
||||
}
|
||||
else
|
||||
tmp += adj;
|
||||
}
|
||||
if (rv == null) {
|
||||
if (move && tail && !bwd)
|
||||
err_wkr.Fail("move tag fwd failed", "tag_name", find_tag_bry);
|
||||
else
|
||||
return Tag__eos(rng_bgn);
|
||||
}
|
||||
if (move) pos = rv.Src_end();
|
||||
return rv;
|
||||
}
|
||||
private boolean Tag__match(boolean move, boolean bwd, boolean tail, byte[] find_tag_bry, Int_obj_ref depth_obj, Gfh_tag tag) {
|
||||
byte[] cur_tag_bry = tag.Name_bry();
|
||||
if ( !Bry_.Eq(cur_tag_bry, find_tag_bry) // tag doesn't match requested
|
||||
&& find_tag_bry != Gfh_tag_.Bry__xowa_any // requested is not wildcard
|
||||
) return false;
|
||||
if (cur_tag_bry == Gfh_tag_.Bry__xowa_comment) return true; // ignore comments
|
||||
int depth = depth_obj.Val();
|
||||
boolean tag_is_tail = tag.Tag_is_tail();
|
||||
if (tail == tag_is_tail) {
|
||||
if (depth == 0)
|
||||
return true;
|
||||
else {
|
||||
if (Bry_.Eq(cur_tag_bry, find_tag_bry))
|
||||
depth_obj.Val_add(-1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!bwd && tail && !tag_is_tail && !tag.Tag_is_inline()) {
|
||||
if (Bry_.Eq(cur_tag_bry, find_tag_bry))
|
||||
depth_obj.Val_add(1);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
public Gfh_tag Tag__extract(boolean move, boolean tail, byte[] find_tag_bry, int tag_bgn) {
|
||||
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "<EOS"
|
||||
byte name_0 = src[name_bgn];
|
||||
boolean cur_is_tail = false;
|
||||
switch (name_0) {
|
||||
case Byte_ascii.Bang:
|
||||
if (Bry_.Match(src, name_bgn + 1, name_bgn + 3, Bry__comment__mid)) // skip comment; EX: "<!"
|
||||
return Tag__comment(tag_bgn);
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
++name_bgn; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "</EOS"
|
||||
name_0 = src[name_bgn];
|
||||
cur_is_tail = true;
|
||||
break;
|
||||
}
|
||||
int name_end = -1, atrs_end = -1, tag_end = -1, name_pos = name_bgn;
|
||||
byte name_byte = name_0; boolean inline = false;
|
||||
boolean loop = true;
|
||||
while (true) {
|
||||
switch (name_byte) {
|
||||
case Byte_ascii.Angle_end: // EX: "<a>"
|
||||
name_end = atrs_end = name_pos;
|
||||
tag_end = name_end + 1;
|
||||
loop = false;
|
||||
break;
|
||||
case Byte_ascii.Slash: // EX: "<a/>"
|
||||
name_end = name_pos;
|
||||
tag_end = name_pos + 1; if (tag_end == src_end) return Tag__eos(tag_bgn);// EX: "<a/EOS"
|
||||
if (src[tag_end] == Byte_ascii.Angle_end) {
|
||||
atrs_end = name_end;
|
||||
inline = true;
|
||||
loop = false;
|
||||
++tag_end; // move tag_end after >
|
||||
}
|
||||
else {
|
||||
name_end = tag_end = -1;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
name_end = name_pos;
|
||||
loop = false;
|
||||
break;
|
||||
}
|
||||
if (!loop) break;
|
||||
++name_pos; if (name_pos == src_end) return Tag__eos(tag_bgn); // EX: "<abEOS"
|
||||
name_byte = src[name_pos];
|
||||
}
|
||||
if (tag_end == -1) {
|
||||
tag_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, src_end);
|
||||
if (tag_end == Bry_find_.Not_found) return Tag__eos(tag_bgn);
|
||||
int prv_pos = tag_end - 1;
|
||||
if (src[prv_pos] == Byte_ascii.Slash) {
|
||||
atrs_end = prv_pos;
|
||||
inline = true;
|
||||
}
|
||||
else
|
||||
atrs_end = tag_end;
|
||||
++tag_end; // position after ">"
|
||||
}
|
||||
Gfh_tag tmp = move ? tag__tmp__move : tag__tmp__peek;
|
||||
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, Gfh_tag_.Id__unknown, Bry_.Mid(src, name_bgn, name_end));
|
||||
}
|
||||
public Gfh_tag Tag__extract(boolean move, boolean tail, int match_name_id, int tag_bgn) {
|
||||
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "<EOS"
|
||||
byte name_0 = src[name_bgn];
|
||||
boolean cur_is_tail = false;
|
||||
switch (name_0) {
|
||||
case Byte_ascii.Bang:
|
||||
if (Bry_.Match(src, name_bgn + 1, name_bgn + 3, Bry__comment__mid)) // skip comment; EX: "<!"
|
||||
return Tag__comment(tag_bgn);
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
++name_bgn; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "</EOS"
|
||||
name_0 = src[name_bgn];
|
||||
cur_is_tail = true;
|
||||
break;
|
||||
}
|
||||
int name_end = -1, atrs_end = -1, tag_end = -1, name_pos = name_bgn;
|
||||
byte name_byte = name_0; boolean inline = false;
|
||||
boolean loop = true;
|
||||
while (true) {
|
||||
switch (name_byte) {
|
||||
case Byte_ascii.Angle_end: // EX: "<a>"
|
||||
name_end = atrs_end = name_pos;
|
||||
tag_end = name_end + 1;
|
||||
loop = false;
|
||||
break;
|
||||
case Byte_ascii.Slash: // EX: "<a/>"
|
||||
name_end = name_pos;
|
||||
tag_end = name_pos + 1; if (tag_end == src_end) return Tag__eos(tag_bgn);// EX: "<a/EOS"
|
||||
if (src[tag_end] == Byte_ascii.Angle_end) {
|
||||
atrs_end = name_end;
|
||||
inline = true;
|
||||
loop = false;
|
||||
++tag_end; // move tag_end after >
|
||||
}
|
||||
else {
|
||||
name_end = tag_end = -1;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
name_end = name_pos;
|
||||
loop = false;
|
||||
break;
|
||||
}
|
||||
if (!loop) break;
|
||||
++name_pos; if (name_pos == src_end) return Tag__eos(tag_bgn); // EX: "<abEOS"
|
||||
name_byte = src[name_pos];
|
||||
}
|
||||
if (tag_end == -1) {
|
||||
tag_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, src_end);
|
||||
if (tag_end == Bry_find_.Not_found) return Tag__eos(tag_bgn);
|
||||
int prv_pos = tag_end - 1;
|
||||
if (src[prv_pos] == Byte_ascii.Slash) {
|
||||
atrs_end = prv_pos;
|
||||
inline = true;
|
||||
}
|
||||
else
|
||||
atrs_end = tag_end;
|
||||
++tag_end; // position after ">"
|
||||
}
|
||||
Gfh_tag tmp = move ? tag__tmp__move : tag__tmp__peek;
|
||||
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end
|
||||
, name_hash.Get_as_int_or(src, name_bgn, name_end, -1) // TODO_OLD: change from -1 to Unknown
|
||||
, Bry_.Mid(src, name_bgn, name_end));
|
||||
}
|
||||
public boolean Read_and_move(byte match) {
|
||||
byte b = src[pos];
|
||||
if (b == match) {
|
||||
++pos;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
public int Read_int_to(byte to_char) {
|
||||
int rv = Read_int_to(to_char, Int_.Max_value); if (rv == Int_.Max_value) err_wkr.Fail("invalid int", "pos", pos);
|
||||
return rv;
|
||||
}
|
||||
public int Read_int_to(byte to_char, int or_int) {
|
||||
int bgn = pos;
|
||||
int rv = 0;
|
||||
int negative = 1;
|
||||
while (pos < src_end) {
|
||||
byte b = src[pos++];
|
||||
switch (b) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
rv = (rv * 10) + (b - Byte_ascii.Num_0);
|
||||
break;
|
||||
case Byte_ascii.Dash:
|
||||
if (negative == -1) // 2nd negative
|
||||
return or_int; // return or_int
|
||||
else // 1st negative
|
||||
negative = -1; // flag negative
|
||||
break;
|
||||
default: {
|
||||
boolean match = b == to_char;
|
||||
if (to_char == Byte_ascii.Null) {// hack for Read_int_to_non_num
|
||||
--pos;
|
||||
match = true;
|
||||
}
|
||||
return match ? rv * negative : or_int;
|
||||
}
|
||||
}
|
||||
}
|
||||
return bgn == pos ? or_int : rv * negative;
|
||||
}
|
||||
private Gfh_tag Tag__comment(int tag_bgn) {
|
||||
int tag_end = Bry_find_.Move_fwd(src, gplx.langs.htmls.Gfh_tag_.Comm_end, tag_bgn, src_end); if (tag_end == Bry_find_.Not_found) tag_end = src_end;
|
||||
return tag__comment.Init(this, src, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Gfh_tag_.Id__comment, Bry_.Empty);
|
||||
}
|
||||
private Gfh_tag Tag__eos(int tag_bgn) {
|
||||
int tag_end = tag_bgn + 255; if (tag_end > src_end) tag_end = src_end;
|
||||
return tag__comment.Init(this, src, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Gfh_tag_.Id__eos, Bry_.Empty);
|
||||
}
|
||||
private static final byte[] Bry__comment__mid = Bry_.new_a7("--");
|
||||
public static Gfh_tag_rdr New__html() {return new Gfh_tag_rdr(Gfh_tag_.Hash);}
|
||||
public static Gfh_tag_rdr New__custom() {return new Gfh_tag_rdr(Hash_adp_bry.cs());}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,66 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import org.junit.*;
|
||||
public class Gfh_tag_rdr_tst {
|
||||
private final Gfh_tag_rdr_fxt fxt = new Gfh_tag_rdr_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Init("1<div id='1'>2</div>3<div id='2'>4</div>5<div id='3'>6</div>7");
|
||||
fxt.Test__move_fwd_head("<div id='1'>"); fxt.Test__pos("2");
|
||||
fxt.Test__peek_fwd_head("<div id='2'>"); fxt.Test__pos("2");
|
||||
fxt.Test__move_fwd_head("<div id='2'>"); fxt.Test__pos("4");
|
||||
fxt.Test__peek_bwd_tail("</div>3") ; fxt.Test__pos("4");
|
||||
}
|
||||
@Test public void Comment() {
|
||||
fxt.Init("1<!--2-->3<!--4-->5<div id='1'>6</div>");
|
||||
fxt.Test__move_fwd_head(Gfh_tag_.Id__any , "<div id='1'>") ; fxt.Test__pos("6");
|
||||
}
|
||||
@Test public void Meta() {
|
||||
fxt.Init("<!DOCTYPE html>1<div id='1'>2</div>3");
|
||||
fxt.Test__move_fwd_head(Gfh_tag_.Id__div , "<div id='1'>") ; fxt.Test__pos("2");
|
||||
}
|
||||
@Test public void Recursive__same_tags() {
|
||||
fxt.Init("1<a>2<a>3</a>4</a>5");
|
||||
fxt.Test__move_fwd_head(Gfh_tag_.Id__a , "<a>") ; fxt.Test__pos("2");
|
||||
fxt.Test__move_fwd_tail(Gfh_tag_.Id__a , "</a>") ; fxt.Test__pos("5");
|
||||
}
|
||||
@Test public void Recursive__diff_tags() {
|
||||
fxt.Init("1<div>2<a>3<img/>4</a>5</div>6");
|
||||
fxt.Test__move_fwd_head(Gfh_tag_.Id__div , "<div>") ; fxt.Test__pos("2");
|
||||
fxt.Test__move_fwd_tail(Gfh_tag_.Id__div , "</div>") ; fxt.Test__pos("6");
|
||||
}
|
||||
@Test public void Inline() {
|
||||
fxt.Init("1<br/>2");
|
||||
fxt.Test__move_fwd_head(Gfh_tag_.Id__br , "<br/>") ; fxt.Test__pos("2");
|
||||
}
|
||||
}
|
||||
class Gfh_tag_rdr_fxt {
|
||||
private final Gfh_tag_rdr rdr = Gfh_tag_rdr.New__html();
|
||||
public void Init(String src_str) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
rdr.Init(Bry_.Empty, src_bry, 0, src_bry.length);
|
||||
}
|
||||
public void Test__move_fwd_head(String expd) {Test__move_fwd_head(Gfh_tag_.Id__any, expd);}
|
||||
public void Test__move_fwd_head(int match_name_id, String expd) {
|
||||
Gfh_tag actl_tag = rdr.Tag__move_fwd_head(match_name_id).Chk_name_or_fail(match_name_id);
|
||||
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
|
||||
}
|
||||
public void Test__move_fwd_tail(int match_name_id, String expd) {
|
||||
Gfh_tag actl_tag = rdr.Tag__move_fwd_tail(match_name_id);
|
||||
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
|
||||
}
|
||||
public void Test__peek_fwd_head(String expd) {
|
||||
Gfh_tag actl_tag = rdr.Tag__peek_fwd_head();
|
||||
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
|
||||
}
|
||||
public void Test__peek_bwd_tail(String expd_str) {
|
||||
byte[] expd_bry = Bry_.new_u8(expd_str);
|
||||
Gfh_tag actl_tag = rdr.Tag__peek_bwd_tail(-1);
|
||||
Tfds.Eq_bry(expd_bry, Bry_.Mid(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_bgn() + expd_bry.length));
|
||||
}
|
||||
public void Test__pos(String expd_str) {
|
||||
byte[] expd_bry = Bry_.new_u8(expd_str);
|
||||
Tfds.Eq_bry(expd_bry, Bry_.Mid(rdr.Src(), rdr.Pos(), rdr.Pos() + expd_bry.length));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,7 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public interface Gfh_txt_wkr {
|
||||
void Parse(int rng_bgn, int rng_end);
|
||||
}
|
||||
|
||||
@@ -13,3 +13,51 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.*;
|
||||
public class Gfo_url_encoder implements Url_encoder_interface { // TS; Gfo_url_encoder_itm[] are read-only; anchor_encoder is effectively read-only
|
||||
private final Gfo_url_encoder_itm[] encode_ary, decode_ary; private final Gfo_url_encoder anchor_encoder;
|
||||
public Gfo_url_encoder(Gfo_url_encoder_itm[] encode_ary, Gfo_url_encoder_itm[] decode_ary, Gfo_url_encoder anchor_encoder) {
|
||||
this.encode_ary = encode_ary; this.decode_ary = decode_ary; this.anchor_encoder = anchor_encoder;
|
||||
}
|
||||
public String Encode_str(String str) {return String_.new_u8(Encode(Bry_.new_u8(str)));}
|
||||
public byte[] Encode_bry(String str) {return Encode(Bry_.new_u8(str));}
|
||||
public byte[] Encode(byte[] bry) {Bry_bfr bfr = Bry_bfr_.Get(); Encode(bfr, bry, 0, bry.length); return bfr.To_bry_and_rls();}
|
||||
public Bry_bfr Encode(Bry_bfr bfr, byte[] bry) { Encode(bfr, bry, 0, bry.length); return bfr;}
|
||||
public void Encode(Bry_bfr bfr, byte[] bry, int bgn, int end) {
|
||||
for (int i = bgn; i < end; ++i) {
|
||||
byte b = bry[i];
|
||||
if (anchor_encoder != null && b == Byte_ascii.Hash) {
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
anchor_encoder.Encode(bfr, bry, i + 1, end);
|
||||
break;
|
||||
}
|
||||
Gfo_url_encoder_itm itm = encode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
|
||||
i += itm.Encode(bfr, bry, end, i, b);
|
||||
}
|
||||
}
|
||||
public byte[] Encode_to_file_protocol(Io_url url) {
|
||||
Bry_bfr bfr = Bry_bfr_.Get();
|
||||
bfr.Add(Io_url.Http_file_bry);
|
||||
Encode(bfr, url.RawBry());
|
||||
return bfr.To_bry_and_rls();
|
||||
}
|
||||
public String Decode_str(String str) {return String_.new_u8(Decode(Bry_.new_u8(str)));}
|
||||
public byte[] Decode(byte[] bry) {return Decode(Bool_.N, bry, 0, bry.length);}
|
||||
public byte[] Decode(byte[] bry, int bgn, int end) {return Decode(Bool_.N, bry, bgn, end);}
|
||||
private byte[] Decode(boolean fail, byte[] bry, int bgn, int end) {Bry_bfr bfr = Bry_bfr_.Get(); Decode(bfr, fail, bry, bgn, end); return bfr.To_bry_and_rls();}
|
||||
public Bry_bfr Decode(Bry_bfr bfr, boolean fail, byte[] bry, int bgn, int end) {
|
||||
for (int i = bgn; i < end; ++i) {
|
||||
byte b = bry[i];
|
||||
if (anchor_encoder != null && b == Byte_ascii.Hash) {
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
anchor_encoder.Decode(bfr, Bool_.N, bry, i + 1, end);
|
||||
break;
|
||||
}
|
||||
Gfo_url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
|
||||
i += itm.Decode(bfr, bry, end, i, b, fail);
|
||||
}
|
||||
return bfr;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,121 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.entitys.*;
|
||||
public class Gfo_url_encoder_ {
|
||||
public static Gfo_url_encoder New__id() {return Gfo_url_encoder_.New__html_id().Make();}
|
||||
public static Gfo_url_encoder_mkr New__html_id() { // EX: "<a id='a<>b'>" -> "<a id='a.C3.A9b'>"
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Dot).Init_common(Bool_.Y)
|
||||
.Init__decode_mark(Byte_ascii.Dot)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
.Init__html_ent(Byte_ascii.Amp, Gfh_entity_trie.Instance);
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__html_href_mw(boolean use_anchor_encoder) { // EX: "<a href='^#^'>" -> "<a href='%5E#.5E'>"; REF.MW: ";:@$!*(),/"
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
.Init__same__many
|
||||
( Byte_ascii.Semic, Byte_ascii.Colon, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
|
||||
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash
|
||||
, Byte_ascii.Hash// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
|
||||
)
|
||||
.Init__anchor_encoder(use_anchor_encoder ? New__html_id().Make() : null);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__html_href_qarg() { // same as regular href encoder, but also do not encode qarg characters "?" and "="
|
||||
return New__html_href_mw(Bool_.Y).Init__same__many(Byte_ascii.Question, Byte_ascii.Eq);
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__html_href_quotes() {// same as href encoder, but do not encode ?, =, #, +; also, don't encode "%" vals
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
.Init__same__many
|
||||
( Byte_ascii.Semic, Byte_ascii.Colon, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
|
||||
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash
|
||||
, Byte_ascii.Question, Byte_ascii.Eq, Byte_ascii.Hash, Byte_ascii.Plus// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
|
||||
);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__html_href_quotes_v2() {// same as href encoder, but do not encode ?, =, #, +; also, don't encode "%" vals
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
.Init__same__many
|
||||
( Byte_ascii.Semic, Byte_ascii.Colon, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
|
||||
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash
|
||||
, Byte_ascii.Question, Byte_ascii.Eq, Byte_ascii.Hash, Byte_ascii.Plus// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
|
||||
, Byte_ascii.Percent // DATE:2016-07-12
|
||||
);
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__http_url() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.N)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__http_url_ttl() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y);
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__fsys_lnx() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__same__many(Byte_ascii.Slash)
|
||||
.Init__diff__one(Byte_ascii.Backslash, Byte_ascii.Slash);
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__fsys_wnt() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent)
|
||||
.Init__same__rng(Byte_ascii.Num_0, Byte_ascii.Num_9)
|
||||
.Init__same__rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z)
|
||||
.Init__same__rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z)
|
||||
.Init__same__many
|
||||
( Byte_ascii.Bang, Byte_ascii.At, Byte_ascii.Hash, Byte_ascii.Dollar, Byte_ascii.Percent, Byte_ascii.Pow, Byte_ascii.Amp
|
||||
, Byte_ascii.Plus, Byte_ascii.Eq, Byte_ascii.Underline, Byte_ascii.Dash
|
||||
, Byte_ascii.Dot, Byte_ascii.Comma
|
||||
, Byte_ascii.Tick, Byte_ascii.Tilde, Byte_ascii.Brack_bgn, Byte_ascii.Brack_end, Byte_ascii.Curly_bgn, Byte_ascii.Curly_end);
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__gfs() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__same__many(Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Apos, Byte_ascii.Semic);
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__mw_ttl() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent)
|
||||
.Init__same__rng(0, 255)
|
||||
.Init__diff__many(Byte_ascii.Percent, Byte_ascii.Amp, Byte_ascii.Apos, Byte_ascii.Eq, Byte_ascii.Plus)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
;
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__php_urlencode() {
|
||||
// REF: http://php.net/manual/en/function.urlencode.php;
|
||||
// "Returns a String in which all non-alphanumeric characters except -_. have been replaced with a percent (%) sign followed by two hex digits and spaces encoded as plus (+) signs"
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__wfUrlencode() {
|
||||
// REF: GlobalFunctions.php|wfUrlencode
|
||||
// same as php_urlencode, but do not encode ";:@$!*(),/~"
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus)
|
||||
.Init__same__many
|
||||
( Byte_ascii.Semic, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
|
||||
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash
|
||||
, Byte_ascii.Tilde
|
||||
, Byte_ascii.Colon // NOTE: MW doesn't unescape colon if IIS. However, all of WMF servers run on non-IIS boxes, so include this;
|
||||
);
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__php_rawurlencode() {
|
||||
// REF: http://php.net/manual/en/function.rawurlencode.php
|
||||
// "Returns a String in which all non-alphanumeric characters except -_.~ have been replaced with a percent (%) sign followed by two hex digits. "
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__same__many(Byte_ascii.Tilde)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
|
||||
}
|
||||
public static final Gfo_url_encoder
|
||||
Id = Gfo_url_encoder_.New__html_id().Make()
|
||||
, Href = Gfo_url_encoder_.New__html_href_mw(Bool_.Y).Make()
|
||||
, Href_wo_anchor = Gfo_url_encoder_.New__html_href_mw(Bool_.N).Make()
|
||||
, Href_quotes = Gfo_url_encoder_.New__html_href_quotes().Make()
|
||||
, Href_quotes_v2 = Gfo_url_encoder_.New__html_href_quotes_v2().Make()
|
||||
, Href_qarg = Gfo_url_encoder_.New__html_href_qarg().Make()
|
||||
, Xourl = Gfo_url_encoder_.New__html_href_mw(Bool_.Y).Init__same__many(Byte_ascii.Underline).Make()
|
||||
, Http_url = Gfo_url_encoder_.New__http_url().Make()
|
||||
, Http_url_ttl = Gfo_url_encoder_.New__http_url_ttl().Make()
|
||||
, Mw_ttl = Gfo_url_encoder_.New__mw_ttl().Make()
|
||||
, Php_urlencode = Gfo_url_encoder_.New__php_urlencode().Make()
|
||||
, Php_rawurlencode = Gfo_url_encoder_.New__php_rawurlencode().Make()
|
||||
, Mw_wfUrlencode = Gfo_url_encoder_.New__wfUrlencode().Make()
|
||||
;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,91 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.entitys.*;
|
||||
public interface Gfo_url_encoder_itm {
|
||||
int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b);
|
||||
int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid);
|
||||
}
|
||||
class Gfo_url_encoder_itm_same implements Gfo_url_encoder_itm {
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(b); return 0;}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(b); return 0;}
|
||||
public static final Gfo_url_encoder_itm Instance = new Gfo_url_encoder_itm_same(); // TS.static
|
||||
}
|
||||
class Gfo_url_encoder_itm_diff implements Gfo_url_encoder_itm {
|
||||
private final byte orig, repl;
|
||||
public Gfo_url_encoder_itm_diff(byte orig, byte repl) {this.orig = orig; this.repl = repl;}
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(repl); return 0;}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(orig); return 0;}
|
||||
}
|
||||
class Gfo_url_encoder_itm_hex implements Gfo_url_encoder_itm {
|
||||
private final byte encode_marker;
|
||||
public Gfo_url_encoder_itm_hex(byte encode_marker) {this.encode_marker = encode_marker;}
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {Encode_byte(b, bfr, encode_marker); return 0;}
|
||||
public static void Encode_byte(byte b, Bry_bfr bfr, byte encode_marker) {
|
||||
int b_int = b & 0xFF;// PATCH.JAVA:need to convert to unsigned byte
|
||||
bfr.Add_byte(encode_marker);
|
||||
bfr.Add_byte(HexBytes[b_int >> 4]);
|
||||
bfr.Add_byte(HexBytes[b_int & 15]);
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {
|
||||
if (idx + 2 >= end) {
|
||||
if (fail_when_invalid) throw Err_.new_wo_type("decode needs 3 bytes", "idx", idx, "len", end, "snip", String_.new_u8(Bry_.Mid_by_len_safe(src, idx, 3)));
|
||||
else {
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
int hex_val = Int_.By_hex_byte(src[idx + 1]);
|
||||
if (hex_val == -1) { // invalid hex byte; EX: %GC; DATE:2014-04-10
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
int v_0 = hex_val * 16;
|
||||
if (v_0 != -1) {
|
||||
int v_1 = Int_.By_hex_byte(src[idx + 2]);
|
||||
if (v_1 != -1) {
|
||||
bfr.Add_byte((byte)(v_0 + v_1));
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
if (fail_when_invalid)
|
||||
throw Err_.new_wo_type("decode is invalid", "idx", idx, "snip", String_.new_u8(Bry_.Mid_by_len_safe(src, idx, 3)));
|
||||
else {
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
public static final byte[] HexBytes = new byte[]
|
||||
{ Byte_ascii.Num_0, Byte_ascii.Num_1, Byte_ascii.Num_2, Byte_ascii.Num_3, Byte_ascii.Num_4, Byte_ascii.Num_5, Byte_ascii.Num_6, Byte_ascii.Num_7
|
||||
, Byte_ascii.Num_8, Byte_ascii.Num_9, Byte_ascii.Ltr_A, Byte_ascii.Ltr_B, Byte_ascii.Ltr_C, Byte_ascii.Ltr_D, Byte_ascii.Ltr_E, Byte_ascii.Ltr_F
|
||||
};
|
||||
}
|
||||
class Gfo_url_encoder_itm_html_ent implements Gfo_url_encoder_itm {
|
||||
private final Btrie_slim_mgr amp_trie;
|
||||
public Gfo_url_encoder_itm_html_ent(Btrie_slim_mgr amp_trie) {this.amp_trie = amp_trie;}
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {
|
||||
++idx; // b is &; get next character afterwards
|
||||
if (idx == end) { // & is last char; return
|
||||
Gfo_url_encoder_itm_hex.Encode_byte(Byte_ascii.Amp, bfr, Byte_ascii.Dot);
|
||||
return 0;
|
||||
}
|
||||
b = src[idx];
|
||||
Object o = amp_trie.Match_bgn_w_byte(b, src, idx, end);
|
||||
if (o == null) { // unknown entity (EX:&unknown;); return &;
|
||||
Gfo_url_encoder_itm_hex.Encode_byte(Byte_ascii.Amp, bfr, Byte_ascii.Dot);
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
Gfh_entity_itm itm = (Gfh_entity_itm)o;
|
||||
byte[] bry_u8 = itm.U8_bry(); // NOTE: must utf8 encode val; EX: is 160 but must become 192,160
|
||||
for (int i = 0; i < bry_u8.length; i++)
|
||||
Gfo_url_encoder_itm_hex.Encode_byte(bry_u8[i], bfr, Byte_ascii.Dot);
|
||||
return itm.Xml_name_bry().length - 1; // -1 to ignore & in XmlEntityName
|
||||
}
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {
|
||||
bfr.Add_byte(b); return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,69 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Gfo_url_encoder_mkr {
|
||||
private Gfo_url_encoder_itm[] encode_ary, decode_ary; private Gfo_url_encoder anchor_encoder;
|
||||
public Gfo_url_encoder_mkr Init(byte bicode_mark) {
|
||||
encode_ary = new Gfo_url_encoder_itm[256]; decode_ary = new Gfo_url_encoder_itm[256];
|
||||
Gfo_url_encoder_itm_hex hex = new Gfo_url_encoder_itm_hex(bicode_mark);
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
encode_ary[i] = hex; // default encode to hex
|
||||
decode_ary[i] = Gfo_url_encoder_itm_same.Instance; // default decode to same; needed for files; EX: A!%21.png -> A!!.png;
|
||||
}
|
||||
decode_ary[bicode_mark] = hex;
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__same__rng(int bgn, int end) {
|
||||
for (int i = bgn; i <= end; ++i)
|
||||
encode_ary[i] = decode_ary[i] = Gfo_url_encoder_itm_same.Instance;
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__same__many(int... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
int idx = ary[i];
|
||||
encode_ary[idx] = decode_ary[idx] = Gfo_url_encoder_itm_same.Instance;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init_common(boolean encode_colon) {
|
||||
Init__same__rng(Byte_ascii.Num_0, Byte_ascii.Num_9);
|
||||
Init__same__rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z);
|
||||
Init__same__rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z);
|
||||
Init__same__many(Byte_ascii.Dash, Byte_ascii.Dot, Byte_ascii.Underline);
|
||||
if (encode_colon) Init__same__many(Byte_ascii.Colon);
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__decode_mark(byte decode_mark) {
|
||||
decode_ary[decode_mark & 0xff] = new Gfo_url_encoder_itm_hex(decode_mark);// PATCH.JAVA:need to convert to unsigned byte
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__diff__one(byte src, byte trg) {
|
||||
Gfo_url_encoder_itm_diff itm = new Gfo_url_encoder_itm_diff(src, trg);
|
||||
encode_ary[src] = decode_ary[trg] = itm;
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__diff__many(byte bicode_mark, int... ary) {
|
||||
Gfo_url_encoder_itm_hex hex = new Gfo_url_encoder_itm_hex(bicode_mark);
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
int idx = ary[i];
|
||||
encode_ary[idx] = decode_ary[idx] = hex;
|
||||
}
|
||||
decode_ary[bicode_mark] = hex;
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__html_ent(byte src, Btrie_slim_mgr trie) {
|
||||
Gfo_url_encoder_itm_html_ent itm = new Gfo_url_encoder_itm_html_ent(trie);
|
||||
encode_ary[src] = itm;
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__anchor_encoder(Gfo_url_encoder v) {this.anchor_encoder = v; return this;}
|
||||
public Gfo_url_encoder Make() {
|
||||
Gfo_url_encoder rv = new Gfo_url_encoder(encode_ary, decode_ary, anchor_encoder);
|
||||
encode_ary = decode_ary = null; anchor_encoder = null;
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,57 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import org.junit.*;
|
||||
public class Gfo_url_encoder_tst {
|
||||
private final Gfo_url_encoder_fxt fxt = new Gfo_url_encoder_fxt();
|
||||
@Test public void Id__nums() {fxt.Encoder_id().Test__bicode("0123456789" , "0123456789");}
|
||||
@Test public void Id__ltrs_lower() {fxt.Encoder_id().Test__bicode("abcdefghijklmnopqrstuvwxyz" , "abcdefghijklmnopqrstuvwxyz");}
|
||||
@Test public void Id__ltrs_upper() {fxt.Encoder_id().Test__bicode("ABCDEFGHIJKLMNOPQRSTUVWXYZ" , "ABCDEFGHIJKLMNOPQRSTUVWXYZ");}
|
||||
@Test public void Id__syms() {fxt.Encoder_id().Test__encode("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ".21.22.23.24.25.26.27.28.29.2A.2B.2C-..2F:.3B.3C.3D.3E.3F.40.5B.5C.5D.5E_.60.7B.7C.7D.7E");} // NOTE: not reversible since "." is encode_marker but not encoded
|
||||
@Test public void Id__foreign() {fxt.Encoder_id().Test__bicode("aéb", "a.C3.A9b");}
|
||||
@Test public void Id__nbsp() {fxt.Encoder_id().Test__encode("a b", "a.C2.A0b");} // NOTE: not just .A0 (160) but utf8-encoded .C2.A0
|
||||
@Test public void Id__space() {fxt.Encoder_id().Test__bicode("a b", "a_b");}
|
||||
@Test public void Id__err() {
|
||||
byte[] raw = Bry_.new_a7("0%.jpg");
|
||||
Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
fxt.Encoder_id().Encoder().Decode(tmp_bfr, Bool_.N, raw, 0, raw.length);
|
||||
Tfds.Eq("0%.jpg", tmp_bfr.To_str_and_clear());
|
||||
}
|
||||
@Test public void Ttl__syms__diff() {fxt.Encoder_ttl().Test__encode(" &'=+", "_%26%27%3D%2B");}
|
||||
@Test public void Ttl__syms__same() {fxt.Encoder_ttl().Test__encode("!\"#$%()*,-./:;<>?@[\\]^_`{|}~", "!\"#$%()*,-./:;<>?@[\\]^_`{|}~");}
|
||||
@Test public void Url__syms() {fxt.Encoder_url().Test__bicode("!?^~", "%21%3F%5E%7E");}
|
||||
@Test public void Url__foreign() {fxt.Encoder_url().Test__bicode("aéb", "a%C3%A9b");}
|
||||
@Test public void Url__space() {fxt.Encoder_url().Test__bicode("a b", "a+b");}
|
||||
@Test public void Href__space() {
|
||||
fxt.Encoder_href().Test__encode("a b", "a_b");
|
||||
}
|
||||
@Test public void Href__special_and_anchor() { // PURPOSE: MediaWiki encodes with % for ttls, but . for anchors; REF:Title.php!(before-anchor)getLocalUrl;wfUrlencode (after-anchor)escapeFragmentForURL
|
||||
fxt.Encoder_href().Test__bicode("^#^", "%5E#.5E");
|
||||
fxt.Encoder_href().Test__encode("A#", "A#");
|
||||
}
|
||||
@Test public void Href__invalid() { // PURPOSE: check that invalid url decodings are rendered literally; DATE:2014-04-10
|
||||
fxt.Encoder_href().Test__encode("%GC", "%25GC");
|
||||
}
|
||||
@Test public void Fsys__wnt() {
|
||||
fxt.Encoder_fsys_safe().Test__encode("Options/HTML", "Options%2FHTML");
|
||||
}
|
||||
}
|
||||
class Gfo_url_encoder_fxt {
|
||||
public Gfo_url_encoder Encoder() {return encoder;} private Gfo_url_encoder encoder;
|
||||
public Gfo_url_encoder_fxt Encoder_id() {encoder = Gfo_url_encoder_.Id; return this;}
|
||||
public Gfo_url_encoder_fxt Encoder_href() {encoder = Gfo_url_encoder_.Href; return this;}
|
||||
public Gfo_url_encoder_fxt Encoder_url() {encoder = Gfo_url_encoder_.Http_url; return this;}
|
||||
public Gfo_url_encoder_fxt Encoder_ttl() {encoder = Gfo_url_encoder_.Mw_ttl; return this;}
|
||||
public Gfo_url_encoder_fxt Encoder_fsys_safe() {encoder = Gfo_url_encoder_.New__fsys_wnt().Make(); return this;}
|
||||
public void Test__bicode(String raw, String encoded) {
|
||||
Test__encode(raw, encoded);
|
||||
Test__decode(encoded, raw);
|
||||
}
|
||||
public void Test__encode(String raw, String expd) {
|
||||
Tfds.Eq(expd, String_.new_u8(encoder.Encode(Bry_.new_u8(raw))));
|
||||
}
|
||||
public void Test__decode(String raw, String expd) {
|
||||
Tfds.Eq(expd, String_.new_u8(encoder.Decode(Bry_.new_u8(raw))));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,11 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public class Gfh_style_itm implements To_str_able {
|
||||
public Gfh_style_itm(int idx, byte[] key, byte[] val) {this.idx = idx; this.key = key; this.val = val;}
|
||||
public int Idx() {return idx;} private final int idx;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public byte[] Val() {return val;} private final byte[] val;
|
||||
public String To_str() {return String_.new_u8(Bry_.Add(key, Byte_ascii.Colon_bry, val, Byte_ascii.Semic_bry));}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,10 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public class Gfh_style_key_ {
|
||||
public static final byte[]
|
||||
Bry__width = Bry_.new_a7("width")
|
||||
, Bry__margin = Bry_.new_a7("margin")
|
||||
;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,55 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.langs.htmls.docs.*;
|
||||
public class Gfh_style_parser_ {
|
||||
public static void Parse(Gfh_tag tag, Gfh_style_wkr wkr) {
|
||||
Gfh_atr atr = tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__style);
|
||||
if (atr.Val_dat_exists())
|
||||
Parse(tag.Src(), atr.Val_bgn(), atr.Val_end(), wkr);
|
||||
}
|
||||
public static void Parse(byte[] src, int src_bgn, int src_end, Gfh_style_wkr wkr) {
|
||||
int atr_idx = 0, itm_bgn = -1, itm_end = -1, key_bgn = -1, key_end = -1, tmp_bgn = -1, tmp_end = -1;
|
||||
int pos = src_bgn;
|
||||
while (true) {
|
||||
boolean pos_is_last = pos == src_end;
|
||||
byte b = pos_is_last ? Byte_ascii.Semic : src[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Semic:
|
||||
if (tmp_bgn != -1) { // tmp_bgn == -1 if all ws
|
||||
if (key_bgn == -1) { // handle malformed key-only pairs; EX: "style='center'"
|
||||
key_bgn = tmp_bgn;
|
||||
key_end = tmp_end;
|
||||
tmp_bgn = tmp_end = -1;
|
||||
}
|
||||
if (!wkr.On_atr(src, atr_idx, src_bgn, src_end, itm_bgn, itm_end, key_bgn, key_end, tmp_bgn, tmp_end))
|
||||
pos_is_last = true;
|
||||
}
|
||||
++atr_idx; itm_bgn = itm_end = key_bgn = key_end = tmp_bgn = tmp_end = -1;
|
||||
break;
|
||||
case Byte_ascii.Colon:
|
||||
if (key_bgn == -1) {
|
||||
key_bgn = tmp_bgn;
|
||||
key_end = tmp_end;
|
||||
tmp_bgn = -1; tmp_end = -1;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
if (itm_bgn == -1) itm_bgn = pos;
|
||||
break;
|
||||
default:
|
||||
if (itm_bgn == -1) itm_bgn = pos;
|
||||
if (tmp_bgn == -1) tmp_bgn = pos;
|
||||
tmp_end = pos + 1;
|
||||
break;
|
||||
}
|
||||
if (pos_is_last) {
|
||||
if (key_bgn != -1) // handle "k"
|
||||
wkr.On_atr(src, atr_idx, src_bgn, src_end, itm_bgn, itm_end, key_bgn, key_end, tmp_bgn, tmp_end);
|
||||
break;
|
||||
}
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,37 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import org.junit.*;
|
||||
public class Gfh_style_parser__tst {
|
||||
private final Gfh_style_parser__fxt fxt = new Gfh_style_parser__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__parse("k_0:v_0" , fxt.Make("k_0", "v_0"));
|
||||
fxt.Test__parse("k_0:v_0;" , fxt.Make("k_0", "v_0"));
|
||||
fxt.Test__parse("k_0:v_0;k_1:v_1" , fxt.Make("k_0", "v_0"), fxt.Make("k_1", "v_1"));
|
||||
}
|
||||
@Test public void Ws() {
|
||||
fxt.Test__parse(" k_0 : v_0 ;" , fxt.Make("k_0", "v_0"));
|
||||
fxt.Test__parse(" k_0 : v_0 ; k_1 : v_1 " , fxt.Make("k_0", "v_0"), fxt.Make("k_1", "v_1"));
|
||||
fxt.Test__parse(" k_0 : v 0 ;" , fxt.Make("k_0", "v 0"));
|
||||
}
|
||||
@Test public void Empty() {
|
||||
fxt.Test__parse("k_0:v_0;;" , fxt.Make("k_0", "v_0"));
|
||||
fxt.Test__parse("k_0:v_0; ; " , fxt.Make("k_0", "v_0"));
|
||||
}
|
||||
@Test public void Invalid__no_semic() {
|
||||
fxt.Test__parse("k_0" , fxt.Make("k_0", ""));
|
||||
}
|
||||
@Test public void Invalid__dupe_colon() {
|
||||
fxt.Test__parse("a:b:c:d;" , fxt.Make("a", "b:c:d"));
|
||||
}
|
||||
}
|
||||
class Gfh_style_parser__fxt {
|
||||
private final Gfh_style_wkr__ary wkr = new Gfh_style_wkr__ary();
|
||||
public Gfh_style_itm Make(String k, String v) {return new Gfh_style_itm(-1, Bry_.new_u8(k), Bry_.new_u8(v));}
|
||||
public void Test__parse(String src_str, Gfh_style_itm... expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
Gfh_style_itm[] actl = wkr.Parse(src_bry, 0, src_bry.length);
|
||||
Tfds.Eq_ary_str(expd, actl);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,7 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public interface Gfh_style_wkr {
|
||||
boolean On_atr(byte[] src, int atr_idx, int atr_val_bgn, int atr_val_end, int itm_bgn, int itm_End, int key_bgn, int key_end, int val_bgn, int val_end);
|
||||
}
|
||||
|
||||
@@ -13,3 +13,17 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public class Gfh_style_wkr__ary implements Gfh_style_wkr {
|
||||
private final List_adp list = List_adp_.New();
|
||||
public boolean On_atr(byte[] src, int atr_idx, int atr_val_bgn, int atr_val_end, int itm_bgn, int itm_End, int key_bgn, int key_end, int val_bgn, int val_end) {
|
||||
byte[] key = Bry_.Mid(src, key_bgn, key_end);
|
||||
byte[] val = Bry_.Mid(src, val_bgn, val_end);
|
||||
list.Add(new Gfh_style_itm(list.Count(), key, val));
|
||||
return true;
|
||||
}
|
||||
public Gfh_style_itm[] Parse(byte[] src, int src_bgn, int src_end) {
|
||||
Gfh_style_parser_.Parse(src, src_bgn, src_end, this);
|
||||
return (Gfh_style_itm[])list.To_ary_and_clear(Gfh_style_itm.class);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,21 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public class Gfh_style_wkr__val_as_int implements Gfh_style_wkr {
|
||||
private byte[] find_key;
|
||||
private int val_bgn, val_end;
|
||||
public boolean On_atr(byte[] src, int atr_idx, int atr_val_bgn, int atr_val_end, int itm_bgn, int itm_End, int key_bgn, int key_end, int val_bgn, int val_end) {
|
||||
boolean rv = Bry_.Match(src, key_bgn, key_end, find_key);
|
||||
if (rv) {
|
||||
this.val_bgn = val_bgn;
|
||||
this.val_end = val_end;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public int Parse(byte[] src, int src_bgn, int src_end, byte[] find_key) {
|
||||
this.find_key = find_key;
|
||||
Gfh_style_parser_.Parse(src, src_bgn, src_end, this);
|
||||
return Bry_.To_int_or__lax(src, val_bgn, val_end, -1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,66 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_ary extends Json_itm_base implements Json_grp {
|
||||
public Json_ary(int src_bgn, int src_end) {this.Ctor(src_bgn, src_end);}
|
||||
@Override public byte Tid() {return Json_itm_.Tid__ary;}
|
||||
public void Src_end_(int v) {this.src_end = v;}
|
||||
@Override public Object Data() {return null;}
|
||||
@Override public byte[] Data_bry() {return null;}
|
||||
public int Len() {return subs_len;} private int subs_len = 0, subs_max = 0;
|
||||
public Json_nde Get_at_as_nde(int i) {
|
||||
Json_itm rv = subs[i]; if (rv.Tid() != Json_itm_.Tid__nde) throw Err_.new_("json", "itm is not nde", "type", rv.Tid(), "i", i);
|
||||
return (Json_nde)rv;
|
||||
}
|
||||
public Json_itm Get_at(int i) {return subs[i];}
|
||||
public Json_nde Get_as_nde(int i) {return Json_nde.cast(subs[i]);}
|
||||
public Json_ary Add_many(Json_itm... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++)
|
||||
Add(ary[i]);
|
||||
return this;
|
||||
}
|
||||
public void Add(Json_itm itm) {
|
||||
int new_len = subs_len + 1;
|
||||
if (new_len > subs_max) { // ary too small >>> expand
|
||||
subs_max = new_len * 2;
|
||||
Json_itm[] new_subs = new Json_itm[subs_max];
|
||||
Array_.Copy_to(subs, 0, new_subs, 0, subs_len);
|
||||
subs = new_subs;
|
||||
}
|
||||
subs[subs_len] = itm;
|
||||
subs_len = new_len;
|
||||
}
|
||||
@Override public void Print_as_json(Bry_bfr bfr, int depth) {
|
||||
if (subs_len == 0) { // empty grp; print on one line (rather than printing across 3)
|
||||
bfr.Add_byte(Byte_ascii.Brack_bgn).Add_byte(Byte_ascii.Brack_end);
|
||||
return;
|
||||
}
|
||||
bfr.Add_byte_nl();
|
||||
Json_grp_.Print_indent(bfr, depth);
|
||||
bfr.Add_byte(Byte_ascii.Brack_bgn).Add_byte(Byte_ascii.Space);
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
if (i != 0) {
|
||||
Json_grp_.Print_nl(bfr); Json_grp_.Print_indent(bfr, depth);
|
||||
bfr.Add_byte(Byte_ascii.Comma).Add_byte(Byte_ascii.Space);
|
||||
}
|
||||
subs[i].Print_as_json(bfr, depth + 1);
|
||||
}
|
||||
Json_grp_.Print_nl(bfr); Json_grp_.Print_indent(bfr, depth);
|
||||
bfr.Add_byte(Byte_ascii.Brack_end).Add_byte_nl();
|
||||
}
|
||||
public byte[][] Xto_bry_ary() {
|
||||
if (subs_len == 0) return Bry_.Ary_empty;
|
||||
byte[][] rv = new byte[subs_len][];
|
||||
for (int i = 0; i < subs_len; ++i)
|
||||
rv[i] = subs[i].Data_bry();
|
||||
return rv;
|
||||
}
|
||||
private Json_itm[] subs = Json_itm_.Ary_empty;
|
||||
public static Json_ary cast_or_null(Json_itm v) {return v == null || v.Tid() != Json_itm_.Tid__ary ? null : (Json_ary)v;}
|
||||
public static Json_ary cast(Json_itm v) {
|
||||
if (v == null || v.Tid() != Json_itm_.Tid__ary) throw Err_.new_("json", "itm is not array");
|
||||
return (Json_ary)v;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,70 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.primitives.*;
|
||||
public class Json_doc {
|
||||
private final byte[][] tmp_qry_bry = new byte[1][];
|
||||
public void Ctor(byte[] src, Json_grp new_root) {
|
||||
this.src = src;
|
||||
this.root_grp = new_root;
|
||||
switch (root_grp.Tid()) {
|
||||
case Json_itm_.Tid__nde: this.root_ary = null; this.root_nde = (Json_nde)root_grp; break;
|
||||
case Json_itm_.Tid__ary: this.root_nde = null; this.root_ary = (Json_ary)root_grp; break;
|
||||
default: throw Err_.new_unhandled(root_grp.Tid());
|
||||
}
|
||||
}
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public Json_grp Root_grp() {return root_grp;} private Json_grp root_grp;
|
||||
public Json_nde Root_nde() {return root_nde;} private Json_nde root_nde;
|
||||
public Json_ary Root_ary() {return root_ary;} private Json_ary root_ary;
|
||||
public Bry_bfr Bfr() {return bfr;} private final Bry_bfr bfr = Bry_bfr_.New();
|
||||
public Gfo_number_parser Utl_num_parser() {return utl_num_parser;} private final Gfo_number_parser utl_num_parser = new Gfo_number_parser();
|
||||
public byte[] Tmp_u8_bry() {return tmp_u8_bry;} private final byte[] tmp_u8_bry = new byte[6]; // tmp bry[] for decoding sequences like \u0008
|
||||
public byte[] Get_val_as_bry_or(byte[] qry_bry, byte[] or) {tmp_qry_bry[0] = qry_bry; return Get_val_as_bry_or(tmp_qry_bry, or);}
|
||||
public byte[] Get_val_as_bry_or(byte[][] qry_bry, byte[] or) {
|
||||
Json_itm nde = Find_nde(root_nde, qry_bry, qry_bry.length - 1, 0);
|
||||
return nde == null || nde.Tid() != Json_itm_.Tid__str ? or : nde.Data_bry();
|
||||
}
|
||||
public String Get_val_as_str_or(byte[] qry_bry, String or) {tmp_qry_bry[0] = qry_bry; return Get_val_as_str_or(tmp_qry_bry, or);}
|
||||
public String Get_val_as_str_or(byte[][] qry_bry, String or) {
|
||||
Json_itm nde = Find_nde(root_nde, qry_bry, qry_bry.length - 1, 0);
|
||||
return nde == null || nde.Tid() != Json_itm_.Tid__str ? or : (String)nde.Data();
|
||||
}
|
||||
public Json_grp Get_grp(byte[] qry_bry) {
|
||||
tmp_qry_bry[0] = qry_bry;
|
||||
Json_itm rv = Find_nde(root_nde, tmp_qry_bry, 0, 0); if (rv == null) return null;
|
||||
return (Json_grp)rv;
|
||||
}
|
||||
public Json_grp Get_grp_many(String... qry_ary) {return Get_grp_many(Bry_.Ary(qry_ary));}
|
||||
public Json_grp Get_grp_many(byte[]... qry_bry) {
|
||||
Json_itm rv = Find_nde(root_nde, qry_bry, qry_bry.length - 1, 0); if (rv == null) return null;
|
||||
return (Json_grp)rv;
|
||||
}
|
||||
public Json_itm Find_nde(byte[] key) {
|
||||
tmp_qry_bry[0] = key;
|
||||
return Find_nde(root_nde, tmp_qry_bry, 0, 0);
|
||||
}
|
||||
private Json_itm Find_nde(Json_nde owner, byte[][] paths, int paths_last, int paths_idx) {
|
||||
byte[] path = paths[paths_idx];
|
||||
int subs_len = owner.Len();
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
Json_kv itm = Json_kv.cast(owner.Get_at(i)); if (itm == null) continue; // ignore simple props, arrays, ndes
|
||||
if (!itm.Key_eq(path)) continue;
|
||||
if (paths_idx == paths_last) return itm.Val();
|
||||
Json_nde sub_nde = Json_nde.cast(itm.Val()); if (sub_nde == null) return null; // match, but has not a nde; exit
|
||||
return Find_nde(sub_nde, paths, paths_last, paths_idx + 1);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
public static String Make_str_by_apos(String... ary) {return String_.Replace(String_.Concat_lines_nl_skip_last(ary), "'", "\"");}
|
||||
public static String[] Make_str_ary_by_apos(String... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
String itm = ary[i];
|
||||
if (String_.Has(itm, "'"))
|
||||
ary[i] = String_.Replace(itm, "'", "\"");
|
||||
}
|
||||
return ary;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,28 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_doc_bldr {
|
||||
public Json_nde Nde(Json_doc jdoc) {return factory.Nde(jdoc, -1);}
|
||||
public Json_nde Nde(Json_doc jdoc, Json_grp owner) {
|
||||
Json_nde rv = factory.Nde(jdoc, -1);
|
||||
owner.Add(rv);
|
||||
return rv;
|
||||
}
|
||||
public Json_itm Str(byte[] v) {return Str(String_.new_u8(v));}
|
||||
public Json_itm Str(String v) {return Json_itm_tmp.new_str_(v);}
|
||||
public Json_itm Int(int v) {return Json_itm_tmp.new_int_(v);}
|
||||
public Json_kv Kv_int(Json_grp owner, String key, int val) {Json_kv rv = factory.Kv(Json_itm_tmp.new_str_(key), Json_itm_tmp.new_int_(val)); owner.Add(rv); return rv;}
|
||||
public Json_kv Kv_str(Json_grp owner, String key, String val) {Json_kv rv = factory.Kv(Json_itm_tmp.new_str_(key), Json_itm_tmp.new_str_(val)); owner.Add(rv); return rv;}
|
||||
public Json_ary Kv_ary(Json_grp owner, String key, Json_itm... subs) {
|
||||
Json_itm key_itm = Json_itm_tmp.new_str_(key);
|
||||
Json_ary val_ary = factory.Ary(-1, -1);
|
||||
Json_kv kv = factory.Kv(key_itm, val_ary);
|
||||
owner.Add(kv);
|
||||
int len = subs.length;
|
||||
for (int i = 0; i < len; i++)
|
||||
val_ary.Add(subs[i]);
|
||||
return val_ary;
|
||||
}
|
||||
Json_doc doc = new Json_doc(); Json_factory factory = new Json_factory();
|
||||
}
|
||||
|
||||
@@ -13,3 +13,75 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_doc_srl {
|
||||
private int indent = -1;
|
||||
private Bry_bfr bfr = Bry_bfr_.Reset(255);
|
||||
public boolean Ws_enabled() {return ws_enabled;} public void Ws_enabled_(boolean v) {ws_enabled = v;} private boolean ws_enabled = false;
|
||||
public byte[] Bld() {return bfr.To_bry_and_clear();}
|
||||
public String Bld_as_str() {return bfr.To_str_and_clear();}
|
||||
public Json_doc_srl Write_root(byte[] key, Object val) {
|
||||
Write_nde_bgn();
|
||||
Write_obj(false, key, val);
|
||||
Write_nde_end();
|
||||
return this;
|
||||
}
|
||||
public void Write_obj(boolean comma, byte[] key, Object val) {
|
||||
Class<?> t = Type_.Type_by_obj(val);
|
||||
if (Type_.Is_array(t))
|
||||
Write_kv_ary(comma, key, (Object[])val);
|
||||
else
|
||||
Write_kv_str(comma, key, Object_.Xto_str_strict_or_empty(val));
|
||||
}
|
||||
private void Write_kv_ary(boolean comma, byte[] key, Object[] val) {
|
||||
Write_key(comma, key); Write_new_line(); // '"key":\n'
|
||||
Write_ary_bgn(); // '[\n'
|
||||
Indent_add(); // -->
|
||||
int len = val.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
Write_itm_hdr(i != 0); // ', '
|
||||
Write_str(Bry_.new_u8(Object_.Xto_str_strict_or_null(val[i])));
|
||||
Write_new_line();
|
||||
}
|
||||
Indent_del();
|
||||
Write_ary_end();
|
||||
}
|
||||
private void Write_kv_str(boolean comma, byte[] key, String val) {
|
||||
Write_key(comma, key); // "key":
|
||||
Write_str(Bry_.new_u8(val)); // "val"
|
||||
Write_new_line(); // \n
|
||||
}
|
||||
private void Write_key(boolean comma, byte[] key) { // "key":
|
||||
Write_indent();
|
||||
Write_str(key);
|
||||
bfr.Add_byte(Byte_ascii.Colon);
|
||||
}
|
||||
private void Write_indent() {if (ws_enabled && indent > 0) bfr.Add_byte_repeat(Byte_ascii.Space, indent);}
|
||||
private void Write_str(byte[] v) {
|
||||
if (v == null)
|
||||
bfr.Add(Object_.Bry__null);
|
||||
else
|
||||
bfr.Add_byte(Byte_ascii.Quote).Add(v).Add_byte(Byte_ascii.Quote);
|
||||
}
|
||||
private void Write_comma(boolean comma) {
|
||||
if (comma)
|
||||
bfr.Add_byte(Byte_ascii.Comma);
|
||||
else {
|
||||
if (ws_enabled)
|
||||
bfr.Add_byte(Byte_ascii.Space);
|
||||
}
|
||||
if (ws_enabled)
|
||||
bfr.Add_byte(Byte_ascii.Space);
|
||||
}
|
||||
private void Write_ary_bgn() {Indent_add(); Write_indent(); bfr.Add_byte(Byte_ascii.Brack_bgn); Write_new_line();}
|
||||
private void Write_ary_end() { Write_indent(); bfr.Add_byte(Byte_ascii.Brack_end); Write_new_line(); Indent_del();}
|
||||
private void Write_nde_bgn() {Indent_add(); Write_indent(); bfr.Add_byte(Byte_ascii.Curly_bgn); Write_new_line();}
|
||||
private void Write_nde_end() { Write_indent(); bfr.Add_byte(Byte_ascii.Curly_end); Write_new_line(); Indent_del();}
|
||||
private void Write_itm_hdr(boolean comma) {
|
||||
Write_indent();
|
||||
Write_comma(comma);
|
||||
}
|
||||
private void Indent_add() {indent += 2;}
|
||||
private void Indent_del() {indent -= 2;}
|
||||
private void Write_new_line() {if (ws_enabled) bfr.Add_byte_nl();}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,32 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Json_doc_tst {
|
||||
private final Json_qry_mgr_fxt fxt = new Json_qry_mgr_fxt();
|
||||
@Test public void Select() {
|
||||
Json_doc doc = fxt.Make_json
|
||||
( "{'0':"
|
||||
, " {'0_0':"
|
||||
, " {'0_0_0':'000'"
|
||||
, " },"
|
||||
, " '0_1':"
|
||||
, " {'0_1_0':'010'"
|
||||
, " }"
|
||||
, " }"
|
||||
, "}"
|
||||
);
|
||||
fxt.Test_get_val_as_str(doc, "0/0_0/0_0_0", "000");
|
||||
fxt.Test_get_val_as_str(doc, "0/0_1/0_1_0", "010");
|
||||
fxt.Test_get_val_as_str(doc, "x", null);
|
||||
}
|
||||
}
|
||||
class Json_qry_mgr_fxt {
|
||||
private final Json_parser json_parser = new Json_parser();
|
||||
public Json_doc Make_json(String... ary) {return json_parser.Parse_by_apos_ary(ary);}
|
||||
public void Test_get_val_as_str(Json_doc doc, String qry, String expd){
|
||||
byte[][] qry_bry = Bry_split_.Split(Bry_.new_u8(qry), Byte_ascii.Slash);
|
||||
Tfds.Eq(expd, doc.Get_val_as_str_or(qry_bry, null));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,84 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_doc_wtr {
|
||||
private int indent = -2;
|
||||
private Bry_bfr bfr = Bry_bfr_.Reset(255);
|
||||
public Json_doc_wtr Indent() {return Indent(indent);}
|
||||
private Json_doc_wtr Indent(int v) {if (v > 0) bfr.Add_byte_repeat(Byte_ascii.Space, v); return this;}
|
||||
public Json_doc_wtr Indent_add() {indent += 2; return this;}
|
||||
public Json_doc_wtr Indent_del() {indent -= 2; return this;}
|
||||
public Json_doc_wtr Nde_bgn() {Indent_add(); Indent(); bfr.Add_byte(Byte_ascii.Curly_bgn).Add_byte_nl(); return this;}
|
||||
public Json_doc_wtr Nde_end() { Indent(); bfr.Add_byte(Byte_ascii.Curly_end).Add_byte_nl(); Indent_del(); return this;}
|
||||
public Json_doc_wtr Ary_bgn() {Indent_add(); Indent(); bfr.Add_byte(Byte_ascii.Brack_bgn).Add_byte_nl(); return this;}
|
||||
public Json_doc_wtr Ary_end() { Indent(); bfr.Add_byte(Byte_ascii.Brack_end).Add_byte_nl(); Indent_del(); return this;}
|
||||
public Json_doc_wtr New_line() {bfr.Add_byte_nl(); return this;}
|
||||
public Json_doc_wtr Str(byte[] v) {
|
||||
if (v == null)
|
||||
bfr.Add(Object_.Bry__null);
|
||||
else
|
||||
bfr.Add_byte(Byte_ascii.Quote).Add(v).Add_byte(Byte_ascii.Quote);
|
||||
return this;
|
||||
}
|
||||
public Json_doc_wtr Int(int v) {bfr.Add_int_variable(v); return this;}
|
||||
public Json_doc_wtr Double(double v) {bfr.Add_double(v); return this;}
|
||||
public Json_doc_wtr Comma() {Indent(); bfr.Add_byte(Byte_ascii.Comma).Add_byte_nl(); return this;}
|
||||
public Json_doc_wtr Kv_ary_empty(boolean comma, byte[] key) {
|
||||
Key_internal(comma, key);
|
||||
bfr.Add_byte(Byte_ascii.Brack_bgn).Add_byte(Byte_ascii.Brack_end);
|
||||
bfr.Add_byte_nl();
|
||||
return this;
|
||||
}
|
||||
public Json_doc_wtr Kv(boolean comma, byte[] key, byte[] val) {
|
||||
Key_internal(comma, key);
|
||||
Str(val);
|
||||
bfr.Add_byte_nl();
|
||||
return this;
|
||||
}
|
||||
public Json_doc_wtr Kv_double(boolean comma, byte[] key, double v) {
|
||||
Key_internal(comma, key);
|
||||
Double(v);
|
||||
bfr.Add_byte_nl();
|
||||
return this;
|
||||
}
|
||||
public Json_doc_wtr Kv(boolean comma, byte[] key, int v) {
|
||||
Key_internal(comma, key);
|
||||
Int(v);
|
||||
bfr.Add_byte_nl();
|
||||
return this;
|
||||
}
|
||||
public Json_doc_wtr Key(boolean comma, byte[] key) {
|
||||
Key_internal(comma, key);
|
||||
bfr.Add_byte_nl();
|
||||
return this;
|
||||
}
|
||||
public Json_doc_wtr Val(boolean comma, int v) {
|
||||
Val_internal(comma);
|
||||
Int(v);
|
||||
New_line();
|
||||
return this;
|
||||
}
|
||||
public Json_doc_wtr Val(boolean comma, byte[] v) {
|
||||
Val_internal(comma);
|
||||
Str(v);
|
||||
New_line();
|
||||
return this;
|
||||
}
|
||||
Json_doc_wtr Val_internal(boolean comma) {
|
||||
Indent();
|
||||
bfr.Add_byte(comma ? Byte_ascii.Comma : Byte_ascii.Space);
|
||||
bfr.Add_byte(Byte_ascii.Space);
|
||||
return this;
|
||||
}
|
||||
Json_doc_wtr Key_internal(boolean comma, byte[] key) {
|
||||
Indent();
|
||||
bfr.Add_byte(comma ? Byte_ascii.Comma : Byte_ascii.Space);
|
||||
bfr.Add_byte(Byte_ascii.Space);
|
||||
Str(key);
|
||||
bfr.Add_byte(Byte_ascii.Colon);
|
||||
return this;
|
||||
}
|
||||
public byte[] Bld() {return bfr.To_bry_and_clear();}
|
||||
public String Bld_as_str() {return bfr.To_str_and_clear();}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,16 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_factory {
|
||||
public Json_itm Null() {return Json_itm_null.Null;}
|
||||
public Json_itm Bool_n() {return Json_itm_bool.Bool_n;}
|
||||
public Json_itm Bool_y() {return Json_itm_bool.Bool_y;}
|
||||
public Json_itm_int Int(Json_doc doc, int bgn, int end) {return new Json_itm_int(doc, bgn, end);}
|
||||
public Json_itm_long Long(Json_doc doc, int bgn, int end) {return new Json_itm_long(doc, bgn, end);}
|
||||
public Json_itm Decimal(Json_doc doc, int bgn, int end) {return new Json_itm_decimal(doc, bgn, end);}
|
||||
public Json_itm Str(Json_doc doc, int bgn, int end, boolean exact) {return new Json_itm_str(doc, bgn, end, exact);}
|
||||
public Json_kv Kv(Json_itm key, Json_itm val) {return new Json_kv(key, val);}
|
||||
public Json_ary Ary(int bgn, int end) {return new Json_ary(bgn, end);}
|
||||
public Json_nde Nde(Json_doc doc, int bgn) {return new Json_nde(doc, bgn);}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,21 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public interface Json_grp extends Json_itm {
|
||||
void Src_end_(int v);
|
||||
int Len();
|
||||
Json_itm Get_at(int i);
|
||||
Json_nde Get_as_nde(int i);
|
||||
void Add(Json_itm itm);
|
||||
}
|
||||
class Json_grp_ {
|
||||
public static final Json_grp[] Ary_empty = new Json_grp[0];
|
||||
public static void Print_nl(Bry_bfr bfr) { // \n\n can be caused by nested groups (EX: "[[]]"); only print 1
|
||||
if (bfr.Bfr()[bfr.Len() - 1] != Byte_ascii.Nl)
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
public static void Print_indent(Bry_bfr bfr, int depth) {
|
||||
if (depth > 0) bfr.Add_byte_repeat(Byte_ascii.Space, depth * 2); // indent
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,21 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public interface Json_itm {
|
||||
byte Tid();
|
||||
int Src_bgn();
|
||||
int Src_end();
|
||||
Object Data();
|
||||
byte[] Data_bry();
|
||||
void Print_as_json(Bry_bfr bfr, int depth);
|
||||
boolean Data_eq(byte[] comp);
|
||||
}
|
||||
class Json_itm_null extends Json_itm_base {
|
||||
Json_itm_null() {this.Ctor(-1, -1);}
|
||||
@Override public byte Tid() {return Json_itm_.Tid__null;}
|
||||
@Override public Object Data() {return null;}
|
||||
@Override public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add(Object_.Bry__null);}
|
||||
@Override public byte[] Data_bry() {return Object_.Bry__null;}
|
||||
public static final Json_itm_null Null = new Json_itm_null();
|
||||
}
|
||||
|
||||
@@ -13,3 +13,14 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_itm_ {
|
||||
public static final Json_itm[] Ary_empty = new Json_itm[0];
|
||||
public static final byte Tid__unknown = 0, Tid__null = 1, Tid__bool = 2, Tid__int = 3, Tid__long = 4, Tid__decimal = 5, Tid__str = 6, Tid__kv = 7, Tid__ary = 8, Tid__nde = 9;
|
||||
public static final byte[] Bry__true = Bool_.True_bry, Bry__false = Bool_.False_bry, Bry__null = Object_.Bry__null;
|
||||
public static byte[] To_bry(Bry_bfr bfr, Json_itm itm) {
|
||||
if (itm == null) return Bry_.Empty;
|
||||
itm.Print_as_json(bfr, 0);
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,15 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public abstract class Json_itm_base implements Json_itm {
|
||||
public abstract byte Tid();
|
||||
public void Ctor(int src_bgn, int src_end) {this.src_bgn = src_bgn; this.src_end = src_end;}
|
||||
public int Src_bgn() {return src_bgn;} private int src_bgn;
|
||||
public int Src_end() {return src_end;} protected int src_end;
|
||||
public abstract Object Data();
|
||||
public abstract byte[] Data_bry();
|
||||
public String Print_as_json() {Bry_bfr bfr = Bry_bfr_.New(); Print_as_json(bfr, 0); return bfr.To_str_and_clear();}
|
||||
public abstract void Print_as_json(Bry_bfr bfr, int depth);
|
||||
@gplx.Virtual public boolean Data_eq(byte[] comp) {return false;}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,14 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_itm_bool extends Json_itm_base {
|
||||
private boolean data;
|
||||
public Json_itm_bool(boolean data) {this.data = data; this.Ctor(-1, -1);}
|
||||
@Override public byte Tid() {return Json_itm_.Tid__bool;}
|
||||
public boolean Data_as_bool() {return data;}
|
||||
@Override public Object Data() {return data;}
|
||||
@Override public byte[] Data_bry() {return data ? Json_itm_.Bry__true : Json_itm_.Bry__false;}
|
||||
@Override public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add(data ? Json_itm_.Bry__true: Json_itm_.Bry__false);}
|
||||
public static final Json_itm_bool Bool_n = new Json_itm_bool(false), Bool_y = new Json_itm_bool(true);
|
||||
}
|
||||
|
||||
@@ -13,3 +13,20 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_itm_decimal extends Json_itm_base {
|
||||
private final Json_doc doc; private Decimal_adp data; private byte[] data_bry;
|
||||
public Json_itm_decimal(Json_doc doc, int src_bgn, int src_end) {this.Ctor(src_bgn, src_end); this.doc = doc;}
|
||||
@Override public byte Tid() {return Json_itm_.Tid__decimal;}
|
||||
@Override public Object Data() {return this.Data_as_decimal();}
|
||||
@Override public byte[] Data_bry() {
|
||||
if (data_bry == null) data_bry = Bry_.Mid(doc.Src(), this.Src_bgn(), this.Src_end());
|
||||
return data_bry;
|
||||
}
|
||||
public Decimal_adp Data_as_decimal() {
|
||||
if (data == null)
|
||||
data = Decimal_adp_.parse(String_.new_a7(this.Data_bry()));
|
||||
return data;
|
||||
}
|
||||
@Override public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add_mid(doc.Src(), this.Src_bgn(), this.Src_end());}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,21 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_itm_int extends Json_itm_base {
|
||||
private final Json_doc doc;
|
||||
private byte[] data_bry; private int data; private boolean data_is_null = true;
|
||||
public Json_itm_int(Json_doc doc, int src_bgn, int src_end) {this.Ctor(src_bgn, src_end); this.doc = doc;}
|
||||
@Override public byte Tid() {return Json_itm_.Tid__int;}
|
||||
public int Data_as_int() {
|
||||
if (data_is_null) {
|
||||
data = doc.Utl_num_parser().Parse(doc.Src(), Src_bgn(), Src_end()).Rv_as_int();
|
||||
data_is_null = false;
|
||||
}
|
||||
return data;
|
||||
}
|
||||
@Override public Object Data() {return Data_as_int();}
|
||||
@Override public byte[] Data_bry() {if (data_bry == null) data_bry = Bry_.Mid(doc.Src(), this.Src_bgn(), this.Src_end()); return data_bry;}
|
||||
@Override public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add_mid(doc.Src(), this.Src_bgn(), this.Src_end());}
|
||||
public static Json_itm_int cast(Json_itm v) {return v == null || v.Tid() != Json_itm_.Tid__int ? null : (Json_itm_int)v;}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,21 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_itm_long extends Json_itm_base {
|
||||
private final Json_doc doc;
|
||||
private byte[] data_bry; private long data; private boolean data_is_null = true;
|
||||
public Json_itm_long(Json_doc doc, int src_bgn, int src_end) {this.Ctor(src_bgn, src_end); this.doc = doc;}
|
||||
@Override public byte Tid() {return Json_itm_.Tid__long;}
|
||||
public long Data_as_long() {
|
||||
if (data_is_null) {
|
||||
data = doc.Utl_num_parser().Parse(doc.Src(), Src_bgn(), Src_end()).Rv_as_long();
|
||||
data_is_null = false;
|
||||
}
|
||||
return data;
|
||||
}
|
||||
@Override public Object Data() {return Data_as_long();}
|
||||
@Override public byte[] Data_bry() {if (data_bry == null) data_bry = Bry_.Mid(doc.Src(), this.Src_bgn(), this.Src_end()); return data_bry;}
|
||||
@Override public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add_mid(doc.Src(), this.Src_bgn(), this.Src_end());}
|
||||
public static Json_itm_long cast(Json_itm v) {return v == null || v.Tid() != Json_itm_.Tid__long ? null : (Json_itm_long)v;}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,65 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_itm_str extends Json_itm_base {
|
||||
private final boolean exact; private final Json_doc doc;
|
||||
private String data_str; private byte[] data_bry = null;
|
||||
public Json_itm_str(Json_doc doc, int src_bgn, int src_end, boolean exact) {this.Ctor(src_bgn + 1, src_end - 1); this.doc = doc; this.exact = exact;}
|
||||
@Override public byte Tid() {return Json_itm_.Tid__str;}
|
||||
@Override public void Print_as_json(Bry_bfr bfr, int depth) {
|
||||
bfr.Add_byte(Byte_ascii.Quote);
|
||||
gplx.langs.htmls.Gfh_utl.Escape_html_to_bfr(bfr, doc.Src(), this.Src_bgn(), this.Src_end(), true, true, true, true, false); // false to apos for backwards compatibility
|
||||
bfr.Add_byte(Byte_ascii.Quote);
|
||||
}
|
||||
@Override public Object Data() {return this.Data_as_str();}
|
||||
public String Data_as_str() {
|
||||
if (data_str == null) {
|
||||
if (data_bry == null)
|
||||
data_bry = Data_make_bry();
|
||||
data_str = String_.new_u8(data_bry);
|
||||
}
|
||||
return data_str;
|
||||
}
|
||||
@Override public byte[] Data_bry() {if (data_bry == null) data_bry = Data_make_bry(); return data_bry;}
|
||||
@Override public boolean Data_eq(byte[] comp) {
|
||||
if (exact) return Bry_.Eq(doc.Src(), this.Src_bgn(), this.Src_end(), comp);
|
||||
if (data_bry == null) data_bry = Data_make_bry();
|
||||
return Bry_.Match(data_bry, comp);
|
||||
}
|
||||
private byte[] Data_make_bry() {
|
||||
byte[] src = doc.Src(); int bgn = this.Src_bgn(), end = this.Src_end();
|
||||
if (exact) return Bry_.Mid(src, bgn, end);
|
||||
Bry_bfr bfr = doc.Bfr();
|
||||
byte[] utf8_bry = doc.Tmp_u8_bry();
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Backslash:
|
||||
b = src[++i];
|
||||
switch (b) { // NOTE: must properly unescape chars; EX:wd.q:2; DATE:2014-04-23
|
||||
case Byte_ascii.Ltr_t: bfr.Add_byte(Byte_ascii.Tab); break;
|
||||
case Byte_ascii.Ltr_n: bfr.Add_byte(Byte_ascii.Nl); break;
|
||||
case Byte_ascii.Ltr_r: bfr.Add_byte(Byte_ascii.Cr); break;
|
||||
case Byte_ascii.Ltr_b: bfr.Add_byte(Byte_ascii.Backfeed); break;
|
||||
case Byte_ascii.Ltr_f: bfr.Add_byte(Byte_ascii.Formfeed); break;
|
||||
case Byte_ascii.Ltr_u:
|
||||
int utf8_val = gplx.core.encoders.Hex_utl_.Parse_or(src, i + 1, i + 5, -1);
|
||||
int len = gplx.core.intls.Utf16_.Encode_int(utf8_val, utf8_bry, 0);
|
||||
bfr.Add_mid(utf8_bry, 0, len);
|
||||
i += 4;
|
||||
break; // \uFFFF 4 hex-dec
|
||||
case Byte_ascii.Backslash:
|
||||
case Byte_ascii.Slash:
|
||||
default:
|
||||
bfr.Add_byte(b); break; // \? " \ / b f n r t
|
||||
}
|
||||
break;
|
||||
default:
|
||||
bfr.Add_byte(b);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,17 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_itm_tmp implements Json_itm { // TEST:
|
||||
public Json_itm_tmp(byte tid, String data) {this.tid = tid; this.data = data;}
|
||||
public byte Tid() {return tid;} private byte tid;
|
||||
public byte[] Data_bry() {return Bry_.new_u8(Object_.Xto_str_strict_or_empty(data));}
|
||||
public int Src_bgn() {return -1;}
|
||||
public int Src_end() {return -1;}
|
||||
public Object Data() {return data;} private String data;
|
||||
public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add_str_u8(data);}
|
||||
public boolean Data_eq(byte[] comp) {return false;}
|
||||
public void Clear() {}
|
||||
public static Json_itm new_str_(String v) {return new Json_itm_tmp(Json_itm_.Tid__str, "\"" + v + "\"");}
|
||||
public static Json_itm new_int_(int v) {return new Json_itm_tmp(Json_itm_.Tid__int, Int_.To_str(v));}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,25 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_kv extends Json_itm_base {
|
||||
public Json_kv(Json_itm key, Json_itm val) {this.key = key; this.val = val;}
|
||||
@Override public byte Tid() {return Json_itm_.Tid__kv;}
|
||||
public Json_itm Key() {return key;} private final Json_itm key;
|
||||
public Json_itm Val() {return val;} private final Json_itm val;
|
||||
public byte[] Key_as_bry() {return key.Data_bry();}
|
||||
public String Key_as_str() {return (String)key.Data();}
|
||||
public byte[] Val_as_bry() {return val.Data_bry();}
|
||||
public Json_nde Val_as_nde() {return Json_nde.cast(val);}
|
||||
public Json_ary Val_as_ary() {return Json_ary.cast(val);}
|
||||
public boolean Key_eq(byte[] comp) {return ((Json_itm_str)key).Data_eq(comp);}
|
||||
@Override public Object Data() {return null;}
|
||||
@Override public byte[] Data_bry() {return null;}
|
||||
@Override public void Print_as_json(Bry_bfr bfr, int depth) {
|
||||
key.Print_as_json(bfr, depth);
|
||||
bfr.Add_byte(Byte_ascii.Colon);
|
||||
val.Print_as_json(bfr, depth);
|
||||
}
|
||||
public static final Json_kv[] Ary_empty = new Json_kv[0];
|
||||
public static Json_kv cast(Json_itm v) {return v == null || v.Tid() != Json_itm_.Tid__kv ? null : (Json_kv)v;}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,47 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_kv_ary_srl {
|
||||
public static Keyval Kv_by_itm(Json_itm itm) {
|
||||
switch (itm.Tid()) {
|
||||
case Json_itm_.Tid__kv:
|
||||
Json_kv kv = (Json_kv)itm;
|
||||
return Keyval_.new_(kv.Key_as_str(), Val_by_itm(kv.Val()));
|
||||
default:
|
||||
throw Err_.new_unhandled(itm.Tid());
|
||||
}
|
||||
}
|
||||
private static Object Val_by_itm(Json_itm itm) {
|
||||
switch (itm.Tid()) {
|
||||
case Json_itm_.Tid__bool: return Bool_.To_str_lower(Bool_.Cast(itm.Data()));
|
||||
case Json_itm_.Tid__int:
|
||||
case Json_itm_.Tid__null:
|
||||
case Json_itm_.Tid__str:
|
||||
case Json_itm_.Tid__decimal: return itm.Data();
|
||||
case Json_itm_.Tid__ary: return Val_by_itm_ary((Json_ary)itm);
|
||||
case Json_itm_.Tid__nde: return Val_by_itm_nde((Json_nde)itm);
|
||||
case Json_itm_.Tid__kv: // kv should never be val; EX: "a":"b":c; not possible
|
||||
default: throw Err_.new_unhandled(itm.Tid());
|
||||
}
|
||||
}
|
||||
private static Keyval[] Val_by_itm_ary(Json_ary itm) {
|
||||
int subs_len = itm.Len();
|
||||
Keyval[] rv = new Keyval[subs_len];
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
Json_itm sub = itm.Get_at(i);
|
||||
Keyval kv = Keyval_.new_(Int_.To_str(i + Int_.Base1), Val_by_itm(sub));
|
||||
rv[i] = kv;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static Keyval[] Val_by_itm_nde(Json_nde itm) {
|
||||
int subs_len = itm.Len();
|
||||
Keyval[] rv = new Keyval[subs_len];
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
Json_itm sub = itm.Get_at(i);
|
||||
rv[i] = Kv_by_itm(sub);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,36 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Json_kv_ary_srl_tst {
|
||||
@Before public void init() {fxt.Clear();} private Json_kv_ary_srl_fxt fxt = new Json_kv_ary_srl_fxt();
|
||||
@Test public void Null() {fxt.Test_parse("{'k0':null}" , fxt.ary_(fxt.kv_str_("k0", null)));}
|
||||
@Test public void Bool_n() {fxt.Test_parse("{'k0':false}" , fxt.ary_(fxt.kv_bool_("k0", false)));}
|
||||
@Test public void Num() {fxt.Test_parse("{'k0':123}" , fxt.ary_(fxt.kv_int_("k0", 123)));}
|
||||
@Test public void Str() {fxt.Test_parse("{'k0':'v0'}" , fxt.ary_(fxt.kv_str_("k0", "v0")));}
|
||||
@Test public void Num_dec() {fxt.Test_parse("{'k0':1.23}" , fxt.ary_(fxt.kv_dec_("k0", Decimal_adp_.parse("1.23"))));}
|
||||
@Test public void Ary_int() {fxt.Test_parse("{'k0':[1,2,3]}" , fxt.ary_(fxt.kv_obj_("k0", fxt.ary_(fxt.kv_int_("1", 1), fxt.kv_int_("2", 2), fxt.kv_int_("3", 3)))));}
|
||||
@Test public void Ary_empty() {fxt.Test_parse("{'k0':[]}" , fxt.ary_(fxt.kv_obj_("k0", fxt.ary_())));}
|
||||
@Test public void Subs_int() {fxt.Test_parse("{'k0':{'k00':1,'k01':2}}" , fxt.ary_(fxt.kv_obj_("k0", fxt.ary_(fxt.kv_int_("k00", 1), fxt.kv_int_("k01", 2)))));}
|
||||
@Test public void Subs_empty() {fxt.Test_parse("{'k0':{}}" , fxt.ary_(fxt.kv_obj_("k0", fxt.ary_())));}
|
||||
}
|
||||
class Json_kv_ary_srl_fxt {
|
||||
public void Clear() {
|
||||
if (parser == null) {
|
||||
parser = new Json_parser();
|
||||
}
|
||||
} private Json_parser parser;
|
||||
public void Test_parse(String raw_str, Keyval[] expd) {
|
||||
byte[] raw_bry = Json_parser_tst.Replace_apos(Bry_.new_u8(raw_str));
|
||||
Json_doc doc = parser.Parse(raw_bry);
|
||||
Keyval[] actl = Json_kv_ary_srl.Val_by_itm_nde(doc.Root_nde());
|
||||
Tfds.Eq_str_lines(Keyval_.Ary_to_str(expd), Keyval_.Ary_to_str(actl));
|
||||
}
|
||||
public Keyval[] ary_(Keyval... ary) {return ary;}
|
||||
public Keyval kv_obj_(String key, Object val) {return Keyval_.new_(key, val);}
|
||||
public Keyval kv_str_(String key, String val) {return Keyval_.new_(key, val);}
|
||||
public Keyval kv_int_(String key, int val) {return Keyval_.new_(key, val);}
|
||||
public Keyval kv_bool_(String key, boolean val) {return Keyval_.new_(key, Bool_.To_str_lower(val));}
|
||||
public Keyval kv_dec_(String key, Decimal_adp val) {return Keyval_.new_(key, val.To_str());}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,157 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_nde extends Json_itm_base implements Json_grp {
|
||||
private Json_itm[] subs = Json_itm_.Ary_empty; private int subs_len = 0, subs_max = 0;
|
||||
private Hash_adp_bry subs_hash;
|
||||
public Json_nde(Json_doc jdoc, int src_bgn) {this.jdoc = jdoc; this.Ctor(src_bgn, -1);}
|
||||
@Override public byte Tid() {return Json_itm_.Tid__nde;}
|
||||
public Json_doc Doc() {return jdoc;} private final Json_doc jdoc;
|
||||
public void Src_end_(int v) {this.src_end = v;}
|
||||
@Override public Object Data() {return null;}
|
||||
@Override public byte[] Data_bry() {return null;}
|
||||
public int Len() {return subs_len;}
|
||||
public Json_itm Get_at(int i) {return subs[i];}
|
||||
public Json_itm Get_as_itm_or_null(byte[] key) {if (subs_hash == null) subs_hash = subs_hash_init(); return (Json_itm)subs_hash.Get_by_bry(key);}
|
||||
public Json_ary Get_as_ary(int idx) {return Json_ary.cast(Get_at(idx));}
|
||||
public Json_nde Get_as_nde(String key) {return Json_nde.cast(Get_as_itm_or_null(Bry_.new_u8(key)));}
|
||||
public Json_nde Get_as_nde(int idx) {return Json_nde.cast(Get_at(idx));}
|
||||
public Json_ary Get_as_ary(String key) {return Get_as_ary(Bry_.new_u8(key));}
|
||||
public Json_ary Get_as_ary(byte[] key) {
|
||||
Json_itm rv = Get_as_itm_or_null(key); if (rv == null) throw Err_.new_("json", "key missing", "key", key);
|
||||
return Json_ary.cast(rv);
|
||||
}
|
||||
public byte[] Get_as_bry(String key) {
|
||||
byte[] rv = Get_as_bry_or(Bry_.new_u8(key), null); if (rv == null) throw Err_.new_("json", "key missing", "key", key);
|
||||
return rv;
|
||||
}
|
||||
public byte[] Get_as_bry_or(byte[] key, byte[] or) {
|
||||
Json_itm rv = Get_as_itm_or_null(key);
|
||||
return rv == null ? or : rv.Data_bry();
|
||||
}
|
||||
public String Get_as_str(String key) {
|
||||
String rv = Get_as_str_or(key, null); if (rv == null) throw Err_.new_("json", "key missing", "key", key);
|
||||
return rv;
|
||||
}
|
||||
public String Get_as_str_or(String key, String or) {return Get_as_str_or(Bry_.new_u8(key), or);}
|
||||
public String Get_as_str_or(byte[] key, String or) {
|
||||
byte[] rv = Get_as_bry_or(key, null);
|
||||
return rv == null ? or : String_.new_u8(rv);
|
||||
}
|
||||
public int Get_as_int(String key) {
|
||||
int rv = Get_as_int_or(key, Int_.Min_value); if (rv == Int_.Min_value) throw Err_.new_("json", "key missing", "key", key);
|
||||
return rv;
|
||||
}
|
||||
public int Get_as_int_or(String key, int or) {return Get_as_int_or(Bry_.new_u8(key), or);}
|
||||
public int Get_as_int_or(byte[] key, int or) {
|
||||
byte[] rv = Get_as_bry_or(key, null);
|
||||
return rv == null ? or : Bry_.To_int(rv);
|
||||
}
|
||||
public long Get_as_long(String key) {
|
||||
long rv = Get_as_long_or(key, Long_.Min_value); if (rv == Long_.Min_value) throw Err_.new_("json", "key missing", "key", key);
|
||||
return rv;
|
||||
}
|
||||
public long Get_as_long_or(String key, long or) {return Get_as_long_or(Bry_.new_u8(key), or);}
|
||||
public long Get_as_long_or(byte[] key, long or) {
|
||||
byte[] rv = Get_as_bry_or(key, null);
|
||||
return rv == null ? or : Bry_.To_long_or(rv, or);
|
||||
}
|
||||
public boolean Get_as_bool_or(String key, boolean or) {return Get_as_bool_or(Bry_.new_u8(key), or);}
|
||||
public boolean Get_as_bool_or(byte[] key, boolean or) {
|
||||
byte[] rv = Get_as_bry_or(key, null);
|
||||
return rv == null ? or : Bry_.Eq(rv, Bool_.True_bry);
|
||||
}
|
||||
public DateAdp Get_as_date_by_utc(String key) {
|
||||
byte[] rv = Get_as_bry_or(Bry_.new_u8(key), null); if (rv == null) throw Err_.new_("json", "key missing", "key", key);
|
||||
return DateAdp_.parse_gplx(String_.new_u8(rv));
|
||||
}
|
||||
|
||||
// to convert
|
||||
public boolean Has(byte[] key) {return Get_bry(key, null) != null;}
|
||||
public Json_kv Get_at_as_kv(int i) {
|
||||
Json_itm rv_itm = Get_at(i);
|
||||
Json_kv rv = Json_kv.cast(rv_itm); if (rv == null) throw Err_.new_("json", "sub is not kv", "i", i, "src", Bry_.Mid(jdoc.Src(), this.Src_bgn(), src_end));
|
||||
return rv;
|
||||
}
|
||||
|
||||
public Json_kv Get_kv(byte[] key) {return Json_kv.cast(Get_itm(key));}
|
||||
public Json_nde Get(String key) {return Get(Bry_.new_u8(key));}
|
||||
public Json_nde Get(byte[] key) {
|
||||
Json_kv kv = Json_kv.cast(this.Get_itm(key)); if (kv == null) throw Err_.new_("json", "kv not found", "key", key);
|
||||
Json_nde rv = Json_nde.cast(kv.Val()); if (rv == null) throw Err_.new_("json", "nde not found", "key", key);
|
||||
return rv;
|
||||
}
|
||||
public Json_itm Get_itm(byte[] key) {
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
Json_itm itm = subs[i];
|
||||
if (itm.Tid() == Json_itm_.Tid__kv) {
|
||||
Json_kv itm_as_kv = (Json_kv)itm;
|
||||
if (Bry_.Eq(key, itm_as_kv.Key().Data_bry()))
|
||||
return itm;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
public Json_ary Get_ary(String key) {return Get_ary(Bry_.new_u8(key));}
|
||||
public Json_ary Get_ary(byte[] key) {return Json_ary.cast(Get_kv(key).Val_as_ary());}
|
||||
public String Get_str(String key) {return String_.new_u8(Get_bry(Bry_.new_u8(key)));}
|
||||
public byte[] Get_bry(byte[] key) {
|
||||
byte[] rv = Get_bry(key, null); if (rv == null) throw Err_.new_("json", "key missing", "key", key);
|
||||
return rv;
|
||||
}
|
||||
public byte[] Get_bry_or_null(String key) {return Get_bry(Bry_.new_u8(key), null);}
|
||||
public byte[] Get_bry_or_null(byte[] key) {return Get_bry(key, null);}
|
||||
public byte[] Get_bry(byte[] key, byte[] or) {
|
||||
Json_itm kv_obj = Get_itm(key);
|
||||
if (kv_obj == null) return or; // key not found;
|
||||
if (kv_obj.Tid() != Json_itm_.Tid__kv) return or; // key is not a key_val
|
||||
Json_kv kv = (Json_kv)kv_obj;
|
||||
Json_itm val = kv.Val();
|
||||
return (val == null) ? or : val.Data_bry();
|
||||
}
|
||||
public Json_nde Add_many(Json_itm... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++)
|
||||
Add(ary[i]);
|
||||
return this;
|
||||
}
|
||||
public void Add(Json_itm itm) {
|
||||
int new_len = subs_len + 1;
|
||||
if (new_len > subs_max) { // ary too small >>> expand
|
||||
subs_max = new_len * 2;
|
||||
Json_itm[] new_subs = new Json_itm[subs_max];
|
||||
Array_.Copy_to(subs, 0, new_subs, 0, subs_len);
|
||||
subs = new_subs;
|
||||
}
|
||||
subs[subs_len] = (Json_itm)itm;
|
||||
subs_len = new_len;
|
||||
subs_hash = null;
|
||||
}
|
||||
@Override public void Print_as_json(Bry_bfr bfr, int depth) {
|
||||
if (bfr.Len() != 0) bfr.Add_byte_nl();
|
||||
Json_grp_.Print_indent(bfr, depth);
|
||||
bfr.Add_byte(Byte_ascii.Curly_bgn).Add_byte(Byte_ascii.Space);
|
||||
for (int i = 0; i < subs_len; i++) {
|
||||
if (i != 0) {
|
||||
Json_grp_.Print_nl(bfr); Json_grp_.Print_indent(bfr, depth);
|
||||
bfr.Add_byte(Byte_ascii.Comma).Add_byte(Byte_ascii.Space);
|
||||
}
|
||||
subs[i].Print_as_json(bfr, depth + 1);
|
||||
}
|
||||
Json_grp_.Print_nl(bfr); Json_grp_.Print_indent(bfr, depth);
|
||||
bfr.Add_byte(Byte_ascii.Curly_end).Add_byte_nl();
|
||||
}
|
||||
private Hash_adp_bry subs_hash_init() {
|
||||
Hash_adp_bry rv = Hash_adp_bry.cs();
|
||||
for (int i = 0; i < subs_len; ++i) {
|
||||
Json_itm itm = subs[i];
|
||||
if (itm.Tid() == Json_itm_.Tid__kv) {
|
||||
Json_kv itm_as_kv = (Json_kv)itm;
|
||||
rv.Add(itm_as_kv.Key().Data_bry(), itm_as_kv.Val());
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static Json_nde cast(Json_itm v) {return v == null || v.Tid() != Json_itm_.Tid__nde ? null : (Json_nde)v;}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,172 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.primitives.*;
|
||||
public class Json_parser {
|
||||
private byte[] src; private int src_len, pos; private final Gfo_number_parser num_parser = new Gfo_number_parser();
|
||||
public Json_factory Factory() {return factory;} private final Json_factory factory = new Json_factory();
|
||||
public Json_doc Parse_by_apos_ary(String... ary) {return Parse_by_apos(String_.Concat_lines_nl(ary));}
|
||||
public Json_doc Parse_by_apos(String s) {return Parse(Bry_.Replace(Bry_.new_u8(s), Byte_ascii.Apos, Byte_ascii.Quote));}
|
||||
public Json_doc Parse(String src) {return Parse(Bry_.new_u8(src));}
|
||||
public Json_doc Parse(byte[] src) {
|
||||
synchronized (factory) {
|
||||
this.src = src; if (src == null) return null;
|
||||
this.src_len = src.length; if (src_len == 0) return null;
|
||||
this.pos = 0;
|
||||
Skip_ws();
|
||||
boolean root_is_nde = true;
|
||||
switch (src[pos]) {
|
||||
case Byte_ascii.Curly_bgn: root_is_nde = Bool_.Y; break;
|
||||
case Byte_ascii.Brack_bgn: root_is_nde = Bool_.N; break;
|
||||
default: return null;
|
||||
}
|
||||
Skip_ws();
|
||||
Json_doc doc = new Json_doc();
|
||||
Json_grp root = null;
|
||||
if (root_is_nde)
|
||||
root = Make_nde(doc);
|
||||
else
|
||||
root = Make_ary(doc);
|
||||
doc.Ctor(src, root);
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
private Json_nde Make_nde(Json_doc doc) {
|
||||
++pos; // brack_bgn
|
||||
Json_nde nde = new Json_nde(doc, pos);
|
||||
while (pos < src_len) {
|
||||
Skip_ws();
|
||||
if (src[pos] == Byte_ascii.Curly_end) {++pos; return nde;}
|
||||
else nde.Add(Make_kv(doc));
|
||||
Skip_ws();
|
||||
switch (src[pos++]) {
|
||||
case Byte_ascii.Comma: break;
|
||||
case Byte_ascii.Curly_end: return nde;
|
||||
default: throw Err_.new_unhandled(src[pos - 1]);
|
||||
}
|
||||
}
|
||||
throw Err_.new_wo_type("eos inside nde");
|
||||
}
|
||||
private Json_itm Make_kv(Json_doc doc) {
|
||||
Json_itm key = Make_string(doc);
|
||||
Skip_ws();
|
||||
Chk(Byte_ascii.Colon);
|
||||
Skip_ws();
|
||||
Json_itm val = Make_val(doc);
|
||||
return new Json_kv(key, val);
|
||||
}
|
||||
private Json_itm Make_val(Json_doc doc) {
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Ltr_n: return Make_literal(Bry_null_ull , 3, factory.Null());
|
||||
case Byte_ascii.Ltr_f: return Make_literal(Bry_bool_alse , 4, factory.Bool_n());
|
||||
case Byte_ascii.Ltr_t: return Make_literal(Bry_bool_rue , 3, factory.Bool_y());
|
||||
case Byte_ascii.Quote: return Make_string(doc);
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Dash: return Make_num(doc);
|
||||
case Byte_ascii.Brack_bgn: return Make_ary(doc);
|
||||
case Byte_ascii.Curly_bgn: return Make_nde(doc);
|
||||
}
|
||||
throw Err_.new_unhandled(Char_.To_str(b));
|
||||
}
|
||||
throw Err_.new_wo_type("eos reached in val");
|
||||
}
|
||||
private Json_itm Make_literal(byte[] remainder, int remainder_len, Json_itm singleton) {
|
||||
++pos; // 1st char
|
||||
int literal_end = pos + remainder_len;
|
||||
if (Bry_.Eq(src, pos, literal_end, remainder)) {
|
||||
pos = literal_end;
|
||||
return singleton;
|
||||
}
|
||||
throw Err_.new_("json.parser", "invalid literal", "excerpt", Bry_.Mid_by_len_safe(src, pos - 1, 16));
|
||||
}
|
||||
private Json_itm Make_string(Json_doc doc) {
|
||||
int bgn = pos++; // ++: quote_bgn
|
||||
boolean exact = true;
|
||||
while (pos < src_len) {
|
||||
switch (src[pos]) {
|
||||
case Byte_ascii.Backslash:
|
||||
++pos; // backslash
|
||||
switch (src[pos]) {
|
||||
case Byte_ascii.Ltr_u: pos += 4; break; // \uFFFF 4 hex-dec
|
||||
default: ++pos; break; // \? " \ / b f n r t
|
||||
}
|
||||
exact = false;
|
||||
break;
|
||||
case Byte_ascii.Quote:
|
||||
return factory.Str(doc, bgn, ++pos, exact); // ++: quote_end
|
||||
default:
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
throw Err_.new_wo_type("eos reached inside quote");
|
||||
}
|
||||
private Json_itm Make_num(Json_doc doc) {
|
||||
int num_bgn = pos;
|
||||
boolean loop = true;
|
||||
while (loop) {
|
||||
if (pos == src_len) throw Err_.new_wo_type("eos reached inside num");
|
||||
switch (src[pos]) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
++pos;
|
||||
break;
|
||||
case Byte_ascii.Dot:
|
||||
case Byte_ascii.Dash: case Byte_ascii.Plus:
|
||||
case Byte_ascii.Ltr_E: case Byte_ascii.Ltr_e: // e e+ e- E E+ E-
|
||||
++pos;
|
||||
break;
|
||||
default:
|
||||
loop = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
num_parser.Parse(src, num_bgn, pos);
|
||||
if (num_parser.Has_frac())
|
||||
return factory.Decimal(doc, num_bgn, pos);
|
||||
else {
|
||||
if (num_parser.Is_int())
|
||||
return factory.Int(doc, num_bgn, pos);
|
||||
else
|
||||
return factory.Long(doc, num_bgn, pos);
|
||||
}
|
||||
}
|
||||
private Json_ary Make_ary(Json_doc doc) {
|
||||
Json_ary rv = factory.Ary(pos++, pos); // brack_bgn
|
||||
while (pos < src_len) {
|
||||
Skip_ws();
|
||||
if (src[pos] == Byte_ascii.Brack_end) {++pos; return rv;}
|
||||
else rv.Add(Make_val(doc));
|
||||
Skip_ws();
|
||||
switch (src[pos]) {
|
||||
case Byte_ascii.Comma: ++pos; break;
|
||||
case Byte_ascii.Brack_end: ++pos; return rv;
|
||||
}
|
||||
}
|
||||
throw Err_.new_wo_type("eos inside ary");
|
||||
}
|
||||
private void Skip_ws() {
|
||||
while (pos < src_len) {
|
||||
switch (src[pos]) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: case Byte_ascii.Cr: ++pos; break;
|
||||
default: return;
|
||||
}
|
||||
}
|
||||
}
|
||||
private void Chk(byte expd) {
|
||||
if (src[pos] == expd)
|
||||
++pos;
|
||||
else
|
||||
throw err_(src, pos, "expected '{0}' but got '{1}'", Char_.To_str(expd), Char_.To_str(src[pos]));
|
||||
}
|
||||
private Err err_(byte[] src, int bgn, String fmt, Object... args) {return err_(src, bgn, src.length, fmt, args);}
|
||||
private Err err_(byte[] src, int bgn, int src_len, String fmt, Object... args) {
|
||||
String msg = String_.Format(fmt, args) + " " + Int_.To_str(bgn) + " " + String_.new_u8__by_len(src, bgn, 20);
|
||||
return Err_.new_wo_type(msg);
|
||||
}
|
||||
private static final byte[] Bry_bool_rue = Bry_.new_a7("rue"), Bry_bool_alse = Bry_.new_a7("alse"), Bry_null_ull = Bry_.new_a7("ull");
|
||||
}
|
||||
|
||||
@@ -13,3 +13,60 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.primitives.*;
|
||||
public abstract class Json_parser__itm__base {
|
||||
protected String context;
|
||||
protected final Hash_adp_bry hash = Hash_adp_bry.cs();
|
||||
protected final Bry_bfr tmp_bfr = Bry_bfr_.New_w_size(255);
|
||||
protected String[] keys;
|
||||
protected Json_kv[] atrs;
|
||||
protected Json_itm cur_itm;
|
||||
protected int keys_len;
|
||||
public void Ctor(String... keys) {
|
||||
this.keys = keys;
|
||||
this.keys_len = keys.length;
|
||||
for (int i = 0; i < keys_len; ++i)
|
||||
hash.Add(Bry_.new_u8(keys[i]), new Int_obj_val(i));
|
||||
this.atrs = new Json_kv[keys_len];
|
||||
}
|
||||
public int Kv__int(Json_kv[] ary, int i) {return Bry_.To_int(ary[i].Val_as_bry());}
|
||||
public long Kv__long(Json_kv[] ary, int i) {return Bry_.To_long_or(ary[i].Val_as_bry(), 0);}
|
||||
public long Kv__long_or_0(Json_kv[] ary, int i) {
|
||||
Json_kv kv = ary[i]; if (kv == null) return 0;
|
||||
return Bry_.To_long_or(kv.Val_as_bry(), 0);
|
||||
}
|
||||
public byte[] Kv__bry(Json_kv[] ary, int i) {
|
||||
byte[] rv = Kv__bry_or_null(ary, i); if (rv == null) throw Err_.new_("json.parser", "missing val", "key", context + "." + keys[i], "excerpt", Json_itm_.To_bry(tmp_bfr, cur_itm));
|
||||
return rv;
|
||||
}
|
||||
public byte[][] Kv__bry_ary(Json_kv[] ary, int i) {
|
||||
return ary[i].Val_as_ary().Xto_bry_ary();
|
||||
}
|
||||
public byte[] Kv__bry_or_empty(Json_kv[] ary, int i) {
|
||||
byte[] rv = Kv__bry_or_null(ary, i);
|
||||
return rv == null ? Bry_.Empty : rv;
|
||||
}
|
||||
public byte[] Kv__bry_or_null(Json_kv[] ary, int i) {
|
||||
Json_kv kv = ary[i]; if (kv == null) return null;
|
||||
Json_itm val = kv.Val();
|
||||
return kv == null ? null : val.Data_bry();
|
||||
}
|
||||
public boolean Kv__mw_bool(Json_kv[] ary, int i) {
|
||||
Json_kv kv = ary[i]; if (kv == null) return false;
|
||||
Json_itm val = kv.Val();
|
||||
if ( val.Tid() == Json_itm_.Tid__str
|
||||
&& Bry_.Len_eq_0(val.Data_bry())) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
Warn("unknown val: val=" + String_.new_u8(kv.Data_bry()) + " excerpt=" + String_.new_u8(Json_itm_.To_bry(tmp_bfr, cur_itm)), kv);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
public boolean Kv__has(Json_kv[] ary, int i) {return Kv__bry_or_empty(ary, i) != null;}
|
||||
protected abstract void Parse_hook_nde(Json_nde sub, Json_kv[] atrs);
|
||||
protected void Warn(String msg, Json_kv kv) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", msg + ": path=~{0}.~{1} excerpt=~{2}", context, kv.Key_as_bry(), Json_itm_.To_bry(tmp_bfr, cur_itm));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,56 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.primitives.*;
|
||||
public class Json_parser__list_nde__base extends Json_parser__itm__base {
|
||||
public void Parse_grp(String context, Json_grp grp) {
|
||||
this.context = context;
|
||||
int len = grp.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_nde sub = null;
|
||||
if (grp.Tid() == Json_itm_.Tid__nde) {
|
||||
Json_kv kv = Json_nde.cast(grp).Get_at_as_kv(i);
|
||||
sub = kv.Val_as_nde();
|
||||
}
|
||||
else {
|
||||
sub = Json_nde.cast(grp.Get_at(i));
|
||||
}
|
||||
Parse_nde(context, sub);
|
||||
}
|
||||
}
|
||||
public void Parse_nde(String context, Json_nde nde) {
|
||||
this.cur_itm = nde;
|
||||
for (int j = 0; j < keys_len; ++j)
|
||||
atrs[j] = null;
|
||||
int atr_len = nde.Len();
|
||||
for (int j = 0; j < atr_len; ++j) {
|
||||
Json_kv atr = nde.Get_at_as_kv(j);
|
||||
Object idx_obj = hash.Get_by_bry(atr.Key_as_bry());
|
||||
if (idx_obj == null) {Warn("unknown json parser key", atr); continue;}
|
||||
int idx_int = ((Int_obj_val)idx_obj).Val();
|
||||
atrs[idx_int] = atr;
|
||||
}
|
||||
Parse_hook_nde(nde, atrs);
|
||||
}
|
||||
public void Parse_to_list_as_bry(String context, Json_ary ary, Ordered_hash list) {
|
||||
this.cur_itm = ary;
|
||||
int len = ary.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
byte[] val = ary.Get_at(i).Data_bry();
|
||||
list.Add(val, val);
|
||||
}
|
||||
}
|
||||
public void Parse_to_list_as_kv(String context, Json_nde nde, Ordered_hash list) {
|
||||
this.cur_itm = nde;
|
||||
int len = nde.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_kv sub = nde.Get_at_as_kv(i);
|
||||
byte[] key = sub.Key_as_bry();
|
||||
byte[] val = Parse_to_list_as_kv__get_val(sub, key);
|
||||
list.Add(key, Keyval_.new_(String_.new_u8(key), String_.new_u8(val)));
|
||||
}
|
||||
}
|
||||
@gplx.Virtual protected byte[] Parse_to_list_as_kv__get_val(Json_kv sub, byte[] key) {return sub.Val_as_bry();}
|
||||
@Override protected void Parse_hook_nde(Json_nde sub, Json_kv[] atrs) {}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,86 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Json_parser_tst {
|
||||
private final Json_parser_fxt fxt = new Json_parser_fxt();
|
||||
@Before public void init() {fxt.Clear();}
|
||||
@Test public void Null() {fxt.Test_parse_val0("{'k0':null}" , null);}
|
||||
@Test public void Bool_n() {fxt.Test_parse_val0("{'k0':false}" , false);}
|
||||
@Test public void Bool_y() {fxt.Test_parse_val0("{'k0':true}" , true);}
|
||||
@Test public void Num() {fxt.Test_parse_val0("{'k0':123}" , 123);}
|
||||
@Test public void Num_neg() {fxt.Test_parse_val0("{'k0':-123}" , -123);}
|
||||
@Test public void Str() {fxt.Test_parse_val0("{'k0':'v0'}" , "v0");}
|
||||
@Test public void Str_esc_quote() {fxt.Test_parse_val0("{'k0':'a\\\"b'}" , "a\"b");}
|
||||
@Test public void Str_esc_hex4() {fxt.Test_parse_val0("{'k0':'a\\u0021b'}" , "a!b");}
|
||||
@Test public void Num_dec() {fxt.Test_parse("{'k0':1.23}" , fxt.itm_nde_().Add_many(fxt.itm_kv_dec_("k0", "1.23")));}
|
||||
@Test public void Num_exp() {fxt.Test_parse("{'k0':1e+2}" , fxt.itm_nde_().Add_many(fxt.itm_kv_dec_("k0", "1e+2")));}
|
||||
@Test public void Num_mix() {fxt.Test_parse("{'k0':-1.23e-1}" , fxt.itm_nde_().Add_many(fxt.itm_kv_dec_("k0", "-1.23e-1")));}
|
||||
@Test public void Str_many() {fxt.Test_parse("{'k0':'v0','k1':'v1','k2':'v2'}", fxt.itm_nde_().Add_many(fxt.itm_kv_("k0", "v0"), fxt.itm_kv_("k1", "v1"), fxt.itm_kv_("k2", "v2")));}
|
||||
@Test public void Ary_empty() {fxt.Test_parse("{'k0':[]}", fxt.itm_nde_().Add_many(fxt.itm_kv_ary_int_("k0")));}
|
||||
@Test public void Ary_int() {fxt.Test_parse("{'k0':[1,2,3]}", fxt.itm_nde_().Add_many(fxt.itm_kv_ary_int_("k0", 1, 2, 3)));}
|
||||
@Test public void Ary_str() {fxt.Test_parse("{'k0':['a','b','c']}", fxt.itm_nde_().Add_many(fxt.itm_kv_ary_str_("k0", "a", "b", "c")));}
|
||||
@Test public void Ary_ws() {fxt.Test_parse("{'k0': [ 1 , 2 , 3 ] }", fxt.itm_nde_().Add_many(fxt.itm_kv_ary_int_("k0", 1, 2, 3)));}
|
||||
@Test public void Subs_int() {fxt.Test_parse("{'k0':{'k00':1}}", fxt.itm_nde_().Add_many(fxt.itm_kv_("k0", fxt.itm_nde_().Add_many(fxt.itm_kv_("k00", 1)))));}
|
||||
@Test public void Subs_empty() {fxt.Test_parse("{'k0':{}}", fxt.itm_nde_().Add_many(fxt.itm_kv_("k0", fxt.itm_nde_())));}
|
||||
@Test public void Subs_ws() {fxt.Test_parse("{'k0': { 'k00' : 1 } }", fxt.itm_nde_().Add_many(fxt.itm_kv_("k0", fxt.itm_nde_().Add_many(fxt.itm_kv_("k00", 1)))));}
|
||||
@Test public void Ws() {fxt.Test_parse(" { 'k0' : 'v0' } ", fxt.itm_nde_().Add_many(fxt.itm_kv_("k0", "v0")));}
|
||||
@Test public void Root_is_ary() {fxt.Test_parse("[ 1 , 2 , 3 ]", fxt.itm_ary_().Add_many(fxt.itm_int_(1), fxt.itm_int_(2), fxt.itm_int_(3)));}
|
||||
public static String Replace_apos_as_str(String v) {return String_.new_u8(Replace_apos(Bry_.new_u8(v)));}
|
||||
public static byte[] Replace_apos(byte[] v) {return Bry_.Replace(v, Byte_ascii.Apos, Byte_ascii.Quote);}
|
||||
}
|
||||
class Json_parser_fxt {
|
||||
public void Clear() {
|
||||
if (parser == null) {
|
||||
parser = new Json_parser();
|
||||
factory = parser.Factory();
|
||||
}
|
||||
} Json_parser parser; Json_factory factory; Bry_bfr tmp_bfr = Bry_bfr_.Reset(255);
|
||||
public Json_itm itm_int_(int v) {return Json_itm_tmp.new_int_(v);}
|
||||
Json_itm itm_str_(String v) {return Json_itm_tmp.new_str_(v);}
|
||||
public Json_ary itm_ary_() {return factory.Ary(-1, -1);}
|
||||
public Json_nde itm_nde_() {return factory.Nde(null, -1);}
|
||||
public Json_kv itm_kv_null_(String k) {return factory.Kv(itm_str_(k), factory.Null());}
|
||||
public Json_kv itm_kv_(String k, String v) {return factory.Kv(itm_str_(k), itm_str_(v));}
|
||||
public Json_kv itm_kv_(String k, int v) {return factory.Kv(itm_str_(k), itm_int_(v));}
|
||||
public Json_kv itm_kv_(String k, boolean v) {return factory.Kv(itm_str_(k), v ? factory.Bool_y() : factory.Bool_n());}
|
||||
public Json_kv itm_kv_dec_(String k, String v) {return factory.Kv(itm_str_(k), new Json_itm_tmp(Json_itm_.Tid__decimal, v));}
|
||||
public Json_kv itm_kv_(String k, Json_nde v) {return factory.Kv(itm_str_(k), v);}
|
||||
public Json_kv itm_kv_ary_int_(String k, int... v) {
|
||||
Json_ary ary = factory.Ary(-1, -1);
|
||||
int len = v.length;
|
||||
for (int i = 0; i < len; i++)
|
||||
ary.Add(itm_int_(v[i]));
|
||||
return factory.Kv(itm_str_(k), ary);
|
||||
}
|
||||
public Json_kv itm_kv_ary_str_(String k, String... v) {
|
||||
Json_ary ary = factory.Ary(-1, -1);
|
||||
int len = v.length;
|
||||
for (int i = 0; i < len; i++)
|
||||
ary.Add(itm_str_(v[i]));
|
||||
return factory.Kv(itm_str_(k), ary);
|
||||
}
|
||||
public void Test_parse(String raw_str, Json_itm... expd_ary) {
|
||||
byte[] raw = Json_parser_tst.Replace_apos(Bry_.new_u8(raw_str));
|
||||
Json_doc doc = parser.Parse(raw);
|
||||
doc.Root_grp().Print_as_json(tmp_bfr, 0);
|
||||
String actl = tmp_bfr.To_str_and_clear();
|
||||
String expd = Xto_str(raw, doc, expd_ary, 0, expd_ary.length);
|
||||
Tfds.Eq_str_lines(expd, actl, actl);
|
||||
}
|
||||
public void Test_parse_val0(String raw_str, Object expd) {
|
||||
byte[] raw = Json_parser_tst.Replace_apos(Bry_.new_u8(raw_str));
|
||||
Json_doc doc = parser.Parse(raw);
|
||||
Json_kv kv = Json_kv.cast(doc.Root_nde().Get_at(0)); // assume root has kv as first sub; EX: {"a":"b"}
|
||||
Object actl = kv.Val().Data(); // NOTE: Data_bry is escaped val; EX: a\"b has DataBry of a"b
|
||||
Tfds.Eq(expd, actl);
|
||||
}
|
||||
String Xto_str(byte[] raw, Json_doc doc, Json_itm[] ary, int bgn, int end) {
|
||||
for (int i = bgn; i < end; i++) {
|
||||
Json_itm itm = ary[i];
|
||||
itm.Print_as_json(tmp_bfr, 0);
|
||||
}
|
||||
return tmp_bfr.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,40 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
public class Json_printer {
|
||||
private final Json_parser parser = new Json_parser();
|
||||
private final Json_wtr wtr = new Json_wtr();
|
||||
public Json_printer Opt_quote_byte_(byte v) {wtr.Opt_quote_byte_(v); return this;}
|
||||
public Json_wtr Wtr() {return wtr;}
|
||||
public byte[] To_bry() {return wtr.To_bry_and_clear();}
|
||||
public String To_str() {return wtr.To_str_and_clear();}
|
||||
public Json_printer Print_by_bry(byte[] src) {
|
||||
Json_doc jdoc = parser.Parse(src);
|
||||
return (jdoc.Root_grp().Tid() == Json_itm_.Tid__ary)
|
||||
? Print_by_ary(jdoc.Root_ary())
|
||||
: Print_by_nde(jdoc.Root_nde())
|
||||
;
|
||||
}
|
||||
public Json_printer Print_by_ary(Json_ary ary) {
|
||||
wtr.Doc_ary_bgn();
|
||||
int len = ary.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_itm itm = ary.Get_at(i);
|
||||
wtr.Ary_itm_obj(wtr.Get_x(itm));
|
||||
}
|
||||
wtr.Doc_ary_end();
|
||||
return this;
|
||||
}
|
||||
public Json_printer Print_by_nde(Json_nde nde) {
|
||||
wtr.Doc_nde_bgn();
|
||||
int len = nde.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_kv kv = nde.Get_at_as_kv(i);
|
||||
Object kv_val = wtr.Get_x(kv.Val());
|
||||
wtr.Kv_obj(kv.Key_as_bry(), kv_val, Type_ids_.To_id_by_obj(kv_val));
|
||||
}
|
||||
wtr.Doc_nde_end();
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,74 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Json_printer_tst {
|
||||
private final Json_printer_fxt fxt = new Json_printer_fxt();
|
||||
@Test public void Root_nde() {
|
||||
fxt.Test_print(Json_doc.Make_str_by_apos("{'k1':'v1','k2':'v2'}"), String_.Concat_lines_nl
|
||||
( "{ 'k1':'v1'"
|
||||
, ", 'k2':'v2'"
|
||||
, "}"
|
||||
));
|
||||
}
|
||||
@Test public void Root_ary() {
|
||||
fxt.Test_print(Json_doc.Make_str_by_apos("[1,2,3]"), String_.Concat_lines_nl
|
||||
( "[ 1"
|
||||
, ", 2"
|
||||
, ", 3"
|
||||
, "]"
|
||||
));
|
||||
}
|
||||
@Test public void Ary_w_ary() {
|
||||
fxt.Test_print(Json_doc.Make_str_by_apos("[[1,2],[3,4]]"), String_.Concat_lines_nl
|
||||
( "[ "
|
||||
, " [ 1"
|
||||
, " , 2"
|
||||
, " ]"
|
||||
, ", "
|
||||
, " [ 3"
|
||||
, " , 4"
|
||||
, " ]"
|
||||
, "]"
|
||||
));
|
||||
}
|
||||
@Test public void Ary_w_nde() {
|
||||
fxt.Test_print(Json_doc.Make_str_by_apos("[{'k1':'v1','k2':'v2'},{'k3':'v3','k4':'v4'}]"), String_.Concat_lines_nl
|
||||
( "[ "
|
||||
, " { 'k1':'v1'"
|
||||
, " , 'k2':'v2'"
|
||||
, " }"
|
||||
, ", "
|
||||
, " { 'k3':'v3'"
|
||||
, " , 'k4':'v4'"
|
||||
, " }"
|
||||
, "]"
|
||||
));
|
||||
}
|
||||
@Test public void Nde_w_ary() {
|
||||
fxt.Test_print(Json_doc.Make_str_by_apos("{'k1':[1,2],'k2':[3,4]}"), String_.Concat_lines_nl
|
||||
( "{ 'k1':"
|
||||
, " [ 1"
|
||||
, " , 2"
|
||||
, " ]"
|
||||
, ", 'k2':"
|
||||
, " [ 3"
|
||||
, " , 4"
|
||||
, " ]"
|
||||
, "}"
|
||||
));
|
||||
}
|
||||
// @Test public void Smoke() {
|
||||
// Json_printer printer = new Json_printer();
|
||||
// String url = "C:\\temp.json";
|
||||
// String s = printer.Pretty_print_as_str(Bry_.new_u8(Io_mgr.Instance.LoadFilStr(url)));
|
||||
// Io_mgr.Instance.SaveFilStr(url, s);
|
||||
// }
|
||||
}
|
||||
class Json_printer_fxt {
|
||||
private final Json_printer printer = new Json_printer().Opt_quote_byte_(Byte_ascii.Apos);
|
||||
public void Test_print(String raw, String expd) {
|
||||
Tfds.Eq_str_lines(expd, printer.Print_by_bry(Bry_.new_u8(raw)).To_str());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,300 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.primitives.*;
|
||||
public class Json_wtr {
|
||||
private final Bry_bfr bfr = Bry_bfr_.New_w_size(255);
|
||||
private final Int_ary idx_stack = new Int_ary(4);
|
||||
private int idx = 0;
|
||||
public Bry_bfr Bfr() {return bfr;}
|
||||
public void Indent_(int v) {this.indent = v;} private int indent;
|
||||
public byte Opt_quote_byte() {return opt_quote_byte;} public Json_wtr Opt_quote_byte_(byte v) {opt_quote_byte = v; return this;} private byte opt_quote_byte = Byte_ascii.Quote;
|
||||
public boolean Opt_ws() {return opt_ws;} public Json_wtr Opt_ws_(boolean v) {opt_ws = v; return this;} private boolean opt_ws = true;
|
||||
public boolean Opt_backslash_2x() {return opt_backslash_2x;} public Json_wtr Opt_backslash_2x_(boolean v) {opt_backslash_2x = v; return this;} private boolean opt_backslash_2x = false;
|
||||
public byte[] To_bry_and_clear() {return bfr.To_bry_and_clear();}
|
||||
public String To_str_and_clear() {return bfr.To_str_and_clear();}
|
||||
public Json_wtr () {this.Clear();}
|
||||
public Json_wtr Clear() {
|
||||
indent = -1;
|
||||
idx_stack.Clear();
|
||||
idx = 0;
|
||||
return this;
|
||||
}
|
||||
public Json_wtr Doc_nde_bgn() {return Write_grp_bgn(Sym_nde_bgn);}
|
||||
public Json_wtr Doc_nde_end() {Write_grp_end(Bool_.Y, Sym_nde_end); return Write_nl();}
|
||||
public Json_wtr Doc_ary_bgn() {return Write_grp_bgn(Sym_ary_bgn);}
|
||||
public Json_wtr Doc_ary_end() {Write_grp_end(Bool_.N, Sym_ary_end); return Write_nl();}
|
||||
public Json_wtr Nde_bgn_ary() {return Nde_bgn(Bry_.Empty);}
|
||||
public Json_wtr Nde_bgn(String key) {return Nde_bgn(Bry_.new_u8(key));}
|
||||
public Json_wtr Nde_bgn(byte[] key) {
|
||||
Write_indent_itm();
|
||||
if (key == Bry_.Empty) {
|
||||
if (opt_ws) bfr.Del_by_1(); // remove trailing space from Write_indent_itm
|
||||
++idx;
|
||||
}
|
||||
else
|
||||
Write_key(key);
|
||||
Write_nl();
|
||||
return Write_grp_bgn(Sym_nde_bgn);
|
||||
}
|
||||
public Json_wtr Nde_end() {
|
||||
Write_grp_end(Bool_.Y, Sym_nde_end);
|
||||
return Write_nl();
|
||||
}
|
||||
public Json_wtr Ary_bgn_ary() {return Ary_bgn(String_.Empty);}
|
||||
public Json_wtr Ary_bgn(String nde) {
|
||||
Write_indent_itm();
|
||||
if (nde == String_.Empty) {
|
||||
if (opt_ws) bfr.Del_by_1(); // remove trailing space from Write_indent_itm
|
||||
++idx;
|
||||
}
|
||||
else
|
||||
Write_key(Bry_.new_u8(nde));
|
||||
return Ary_bgn_keyless();
|
||||
}
|
||||
private Json_wtr Ary_bgn_keyless() {
|
||||
Write_nl();
|
||||
return Write_grp_bgn(Sym_ary_bgn);
|
||||
}
|
||||
public Json_wtr Ary_itm_str(String itm) {return Ary_itm_by_type_tid(Type_ids_.Id__str, itm);}
|
||||
public Json_wtr Ary_itm_bry(byte[] itm) {return Ary_itm_by_type_tid(Type_ids_.Id__bry, itm);}
|
||||
public Json_wtr Ary_itm_obj(Object itm) {return Ary_itm_by_type_tid(Type_ids_.To_id_by_obj(itm), itm);}
|
||||
public Json_wtr Ary_itm_by_type_tid(int itm_type_tid, Object itm) {
|
||||
Write_indent_itm();
|
||||
Write_val_obj(Bool_.Y, itm_type_tid, itm);
|
||||
Write_nl();
|
||||
++idx;
|
||||
return this;
|
||||
}
|
||||
public Json_wtr Ary_end() {
|
||||
Write_grp_end(Bool_.N, Sym_ary_end);
|
||||
return Write_nl();
|
||||
}
|
||||
public Json_wtr Kv_bool_as_mw(String key, boolean val) {
|
||||
if (val) Kv_bry(key, Bry_.Empty); // if true, write 'key:""'; if false, write nothing
|
||||
return this;
|
||||
}
|
||||
public Json_wtr Kv_bool(String key, boolean val) {return Kv_bool(Bry_.new_u8(key), val);}
|
||||
public Json_wtr Kv_bool(byte[] key, boolean val) {return Kv_raw(key, val ? Bool_.True_bry : Bool_.False_bry);}
|
||||
public Json_wtr Kv_int(String key, int val) {return Kv_raw(Bry_.new_u8(key), Int_.To_bry(val));}
|
||||
public Json_wtr Kv_long(String key, long val) {return Kv_raw(Bry_.new_u8(key), Bry_.new_a7(Long_.To_str(val)));}
|
||||
public Json_wtr Kv_float(String key, float val) {return Kv_raw(Bry_.new_u8(key), Bry_.new_a7(Float_.To_str(val)));}
|
||||
public Json_wtr Kv_double(String key, double val) {return Kv_raw(Bry_.new_u8(key), Bry_.new_a7(Double_.To_str(val)));}
|
||||
private Json_wtr Kv_raw(byte[] key, byte[] val) {
|
||||
Write_indent_itm();
|
||||
Write_key(key);
|
||||
bfr.Add(val);
|
||||
Write_nl();
|
||||
return this;
|
||||
}
|
||||
public Json_wtr Kv_str(String key, String val) {return Kv_bry(Bry_.new_u8(key), val == null ? null : Bry_.new_u8(val));}
|
||||
public Json_wtr Kv_str(byte[] key, String val) {return Kv_bry(key, Bry_.new_u8(val));}
|
||||
public Json_wtr Kv_bry(String key, byte[] val) {return Kv_bry(Bry_.new_u8(key), val);}
|
||||
public Json_wtr Kv_bry(byte[] key, byte[] val) {
|
||||
Write_indent_itm();
|
||||
Write_key(key);
|
||||
Write_str(val);
|
||||
Write_nl();
|
||||
return this;
|
||||
}
|
||||
public Object Get_x(Json_itm itm) {
|
||||
switch (itm.Tid()) {
|
||||
case Json_itm_.Tid__ary:
|
||||
case Json_itm_.Tid__nde:
|
||||
return itm;
|
||||
default:
|
||||
case Json_itm_.Tid__kv: throw Err_.new_unsupported();
|
||||
case Json_itm_.Tid__bool:
|
||||
case Json_itm_.Tid__int:
|
||||
case Json_itm_.Tid__decimal:
|
||||
case Json_itm_.Tid__str:
|
||||
return itm.Data();
|
||||
}
|
||||
}
|
||||
public void Kv_itm_x(byte[] key, Json_itm itm) {
|
||||
Object val = Get_x(itm);
|
||||
int val_tid = Type_ids_.To_id_by_obj(val);
|
||||
Kv_obj(key, val, val_tid);
|
||||
}
|
||||
public Json_wtr Kv_obj(byte[] key, Object val, int val_tid) {
|
||||
Write_indent_itm();
|
||||
Write_key(key);
|
||||
Write_val_obj(Bool_.N, val_tid, val);
|
||||
Write_nl();
|
||||
return this;
|
||||
}
|
||||
private Json_wtr Write_grp_bgn(byte[] grp_sym) {return Write_grp_bgn(grp_sym, Bool_.Y);}
|
||||
private Json_wtr Write_grp_bgn(byte[] grp_sym, boolean write_indent) {
|
||||
idx_stack.Add(idx);
|
||||
idx = 0;
|
||||
++indent;
|
||||
if (write_indent) Write_indent();
|
||||
bfr.Add(grp_sym);
|
||||
return this;
|
||||
}
|
||||
private Json_wtr Write_grp_end(boolean grp_is_nde, byte[] grp_sym) {
|
||||
if ((grp_is_nde && idx == 0) || (!grp_is_nde && idx == 0))
|
||||
Write_nl();
|
||||
Write_indent();
|
||||
--indent;
|
||||
bfr.Add(grp_sym);
|
||||
this.idx = idx_stack.Pop_or(0);
|
||||
return this;
|
||||
}
|
||||
private Json_wtr Write_key(byte[] bry) {
|
||||
Write_str(bry); // "key"
|
||||
bfr.Add_byte_colon(); // ":"
|
||||
++idx;
|
||||
return this;
|
||||
}
|
||||
private void Write_val_obj(boolean called_by_ary, int type_tid, Object obj) {
|
||||
switch (type_tid) {
|
||||
case Type_ids_.Id__null: bfr.Add(Object_.Bry__null); break;
|
||||
case Type_ids_.Id__bool: bfr.Add_bool(Bool_.Cast(obj)); break;
|
||||
case Type_ids_.Id__byte: bfr.Add_byte(Byte_.Cast(obj)); break;
|
||||
case Type_ids_.Id__int: bfr.Add_int_variable(Int_.Cast(obj)); break;
|
||||
case Type_ids_.Id__long: bfr.Add_long_variable(Long_.cast(obj)); break;
|
||||
case Type_ids_.Id__float: bfr.Add_float(Float_.cast(obj)); break;
|
||||
case Type_ids_.Id__double: bfr.Add_double(Double_.cast(obj)); break;
|
||||
case Type_ids_.Id__str: Write_str(Bry_.new_u8((String)obj)); break;
|
||||
case Type_ids_.Id__bry: Write_str((byte[])obj); break;
|
||||
case Type_ids_.Id__char:
|
||||
case Type_ids_.Id__date:
|
||||
case Type_ids_.Id__decimal: Write_str(Bry_.new_u8(Object_.Xto_str_strict_or_empty(obj))); break;
|
||||
case Type_ids_.Id__obj:
|
||||
int grp_type = Grp_type__get(obj);
|
||||
if (grp_type < Grp_type__json_ary)
|
||||
Write_val_obj__nde(called_by_ary, grp_type, obj);
|
||||
else
|
||||
Write_val_itm__ary(called_by_ary, grp_type, obj);
|
||||
break;
|
||||
default: throw Err_.new_unhandled(type_tid);
|
||||
}
|
||||
}
|
||||
private void Handle_nde_as_ary_itm_0() {
|
||||
if (idx == 0) { // if nde, and first item, then put on new line
|
||||
bfr.Del_by_1();
|
||||
if (opt_ws) {
|
||||
bfr.Add_byte_nl();
|
||||
++indent;
|
||||
Write_indent();
|
||||
--indent;
|
||||
}
|
||||
}
|
||||
}
|
||||
private void Write_val_obj__nde(boolean called_by_ary, int grp_type, Object obj) {
|
||||
if (grp_type == Grp_type__json_nde) {
|
||||
if (idx == 0) { // if nde, and first item, then put on new line
|
||||
if (!called_by_ary) {
|
||||
bfr.Del_by_1();
|
||||
if (opt_ws) {
|
||||
bfr.Add_byte_nl();
|
||||
++indent;
|
||||
Write_indent();
|
||||
--indent;
|
||||
}
|
||||
}
|
||||
}
|
||||
bfr.Add_byte_nl();
|
||||
Write_grp_bgn(Sym_nde_bgn, Bool_.Y);
|
||||
Json_nde sub_nde = (Json_nde)obj;
|
||||
int sub_nde_len = sub_nde.Len();
|
||||
for (int i = 0; i < sub_nde_len; ++i) {
|
||||
Json_kv sub_kv = sub_nde.Get_at_as_kv(i);
|
||||
Kv_itm_x(sub_kv.Key_as_bry(), sub_kv.Val());
|
||||
}
|
||||
}
|
||||
else {
|
||||
Handle_nde_as_ary_itm_0();
|
||||
Write_grp_bgn(Sym_nde_bgn, Bool_.N);
|
||||
Keyval[] kvy = (Keyval[])obj;
|
||||
int kvy_len = kvy.length;
|
||||
for (int i = 0; i < kvy_len; ++i) {
|
||||
Keyval kv = kvy[i];
|
||||
Object kv_val = kv.Val();
|
||||
Kv_obj(Bry_.new_u8(kv.Key()), kv_val, Type_ids_.To_id_by_obj(kv_val));
|
||||
}
|
||||
}
|
||||
Write_grp_end(Bool_.Y, Sym_nde_end);
|
||||
}
|
||||
private void Write_val_itm__ary(boolean called_by_ary, int grp_type, Object obj) {
|
||||
Ary_bgn_keyless();
|
||||
if (grp_type == Grp_type__json_ary) {
|
||||
Json_ary sub_ary = (Json_ary)(obj);
|
||||
int len = sub_ary.Len();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Json_itm sub_itm = sub_ary.Get_at(i);
|
||||
Ary_itm_obj(Get_x(sub_itm));
|
||||
}
|
||||
}
|
||||
else {
|
||||
Object ary = Array_.cast(obj);
|
||||
int len = Array_.Len(ary);
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Object itm = Array_.Get_at(ary, i);
|
||||
Ary_itm_obj(itm);
|
||||
}
|
||||
}
|
||||
Write_grp_end(Bool_.N, Sym_ary_end);
|
||||
}
|
||||
private void Write_str(byte[] bry) {
|
||||
if (bry == null) {bfr.Add(Object_.Bry__null); return;}
|
||||
int len = bry.length;
|
||||
int backslash_count = opt_backslash_2x ? 3 : 1; // NOTE: 3 handles backslashes usurped by javascript; EX: '{"val":"\\\\"}' --javascript--> '{"val":"\\"}' --json--> '{"val":"\"}'
|
||||
bfr.Add_byte(opt_quote_byte);
|
||||
for (int i = 0; i < len; ++i) {
|
||||
byte b = bry[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Backslash: bfr.Add_byte_repeat(Byte_ascii.Backslash, backslash_count).Add_byte(b); break; // "\" -> "\\"; needed else js will usurp \ as escape; EX: "\&" -> "&"; DATE:2014-06-24
|
||||
case Byte_ascii.Quote: bfr.Add_byte_repeat(Byte_ascii.Backslash, backslash_count).Add_byte(b); break;
|
||||
case Byte_ascii.Apos: // // "'" -> "'''"; needed else xocfg fails; DATE:2016-12-07
|
||||
if (opt_backslash_2x)
|
||||
bfr.Add_byte_repeat(Byte_ascii.Backslash, 1).Add_byte(b);
|
||||
else
|
||||
bfr.Add_byte(b);
|
||||
break;
|
||||
case Byte_ascii.Nl: bfr.Add_byte_repeat(Byte_ascii.Backslash, 2).Add_byte(Byte_ascii.Ltr_n); break; // "\n" -> "\\n"
|
||||
case Byte_ascii.Cr: bfr.Add_byte_repeat(Byte_ascii.Backslash, 2).Add_byte(Byte_ascii.Ltr_r); break; // "\r" -> "\\r"; DATE:2017-03-02
|
||||
case Byte_ascii.Tab: bfr.Add_byte_repeat(Byte_ascii.Backslash, 2).Add_byte(Byte_ascii.Ltr_t); break; // "\t" -> "\\t"; DATE:2017-03-02
|
||||
default: bfr.Add_byte(b); break;
|
||||
}
|
||||
}
|
||||
bfr.Add_byte(opt_quote_byte);
|
||||
}
|
||||
private void Write_indent_itm() {
|
||||
if (idx == 0) {
|
||||
if (opt_ws)
|
||||
bfr.Add_byte_space();
|
||||
}
|
||||
else {
|
||||
Write_indent();
|
||||
bfr.Add(Sym_itm_spr);
|
||||
if (opt_ws) bfr.Add_byte_space();
|
||||
}
|
||||
}
|
||||
private void Write_indent() {
|
||||
if (opt_ws && indent > 0)
|
||||
bfr.Add_byte_repeat(Byte_ascii.Space, indent * 2);
|
||||
}
|
||||
private Json_wtr Write_nl() {
|
||||
if (opt_ws) bfr.Add_byte_nl();
|
||||
return this;
|
||||
}
|
||||
private static final byte[]
|
||||
Sym_nde_bgn = Bry_.new_a7("{")
|
||||
, Sym_nde_end = Bry_.new_a7("}")
|
||||
, Sym_ary_bgn = Bry_.new_a7("[")
|
||||
, Sym_ary_end = Bry_.new_a7("]")
|
||||
, Sym_itm_spr = Bry_.new_a7(",")
|
||||
;
|
||||
private static final int Grp_type__json_nde = 1, Grp_type__kv_ary = 2, Grp_type__json_ary = 3, Grp_type__obj_ary = 4;
|
||||
private static int Grp_type__get(Object obj) {
|
||||
Class<?> type = obj.getClass();
|
||||
if (Type_.Eq(type, Keyval[].class)) return Grp_type__kv_ary;
|
||||
else if (Type_.Is_array(type)) return Grp_type__obj_ary;
|
||||
else if (Type_.Eq(type, Json_nde.class)) return Grp_type__json_nde;
|
||||
else if (Type_.Eq(type, Json_ary.class)) return Grp_type__json_ary;
|
||||
else throw Err_.new_unhandled(type);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,120 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.jsons; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Json_wtr_tst {
|
||||
@Before public void init() {fxt.Clear();} private final Json_wtr_fxt fxt = new Json_wtr_fxt();
|
||||
@Test public void Root() {
|
||||
fxt.Wtr().Doc_nde_bgn().Doc_nde_end();
|
||||
fxt.Test
|
||||
( "{"
|
||||
, "}"
|
||||
);
|
||||
}
|
||||
@Test public void Kv() {
|
||||
fxt.Wtr()
|
||||
.Doc_nde_bgn()
|
||||
.Kv_str("k0", "v0")
|
||||
.Kv_str("k1", "v1")
|
||||
.Doc_nde_end();
|
||||
fxt.Test
|
||||
( "{ 'k0':'v0'"
|
||||
, ", 'k1':'v1'"
|
||||
, "}"
|
||||
);
|
||||
}
|
||||
@Test public void Escaped() {
|
||||
fxt.Wtr()
|
||||
.Doc_nde_bgn()
|
||||
.Kv_str("backslash", "\\")
|
||||
.Kv_str("quote", "\"")
|
||||
.Kv_str("apos", "'")
|
||||
.Kv_str("nl", "\n")
|
||||
.Kv_str("cr", "\r")
|
||||
.Kv_str("tab", "\t")
|
||||
.Doc_nde_end();
|
||||
fxt.Test
|
||||
( "{ 'backslash':'\\\\'"
|
||||
, ", 'quote':'\\\"'"
|
||||
, ", 'apos':'\''"
|
||||
, ", 'nl':'\\\\n'"
|
||||
, ", 'cr':'\\\\r'"
|
||||
, ", 'tab':'\\\\t'"
|
||||
, "}"
|
||||
);
|
||||
}
|
||||
@Test public void Nde() {
|
||||
fxt.Wtr()
|
||||
.Doc_nde_bgn()
|
||||
.Nde_bgn("s0")
|
||||
.Nde_bgn("s00")
|
||||
.Nde_end()
|
||||
.Nde_end()
|
||||
.Nde_bgn("s1")
|
||||
.Nde_bgn("s10")
|
||||
.Nde_end()
|
||||
.Nde_end()
|
||||
.Doc_nde_end();
|
||||
fxt.Test
|
||||
( "{ 's0':"
|
||||
, " { 's00':"
|
||||
, " {"
|
||||
, " }"
|
||||
, " }"
|
||||
, ", 's1':"
|
||||
, " { 's10':"
|
||||
, " {"
|
||||
, " }"
|
||||
, " }"
|
||||
, "}"
|
||||
);
|
||||
}
|
||||
@Test public void Ary() {
|
||||
fxt.Wtr()
|
||||
.Doc_nde_bgn()
|
||||
.Ary_bgn("a0")
|
||||
.Ary_itm_str("v0")
|
||||
.Ary_itm_str("v1")
|
||||
.Ary_end()
|
||||
.Doc_nde_end();
|
||||
fxt.Test
|
||||
( "{ 'a0':"
|
||||
, " [ 'v0'"
|
||||
, " , 'v1'"
|
||||
, " ]"
|
||||
, "}"
|
||||
);
|
||||
}
|
||||
@Test public void Nde__nested() {
|
||||
fxt.Wtr()
|
||||
.Doc_nde_bgn()
|
||||
.Ary_bgn("a0")
|
||||
.Ary_itm_obj(Keyval_.Ary
|
||||
( Keyval_.new_("k1", "v1")
|
||||
, Keyval_.new_("k2", "v2")
|
||||
))
|
||||
.Ary_end()
|
||||
.Doc_nde_end();
|
||||
fxt.Test
|
||||
( "{ 'a0':"
|
||||
, " ["
|
||||
, " { 'k1':'v1'"
|
||||
, " , 'k2':'v2'"
|
||||
, " }"
|
||||
, " ]"
|
||||
, "}"
|
||||
);
|
||||
}
|
||||
}
|
||||
class Json_wtr_fxt {
|
||||
private final Json_wtr wtr = new Json_wtr().Opt_quote_byte_(Byte_ascii.Apos);
|
||||
public void Clear() {wtr.Clear();}
|
||||
public Json_wtr Wtr() {return wtr;}
|
||||
public void Test(String... expd) {
|
||||
Tfds.Eq_ary_str
|
||||
( String_.Ary_add(expd, String_.Ary("")) // json_wtr always ends with "}\n"; rather than add "\n" to each test, just add it here
|
||||
, String_.SplitLines_nl(String_.new_u8(wtr.To_bry_and_clear()))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,27 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.mustaches; import gplx.*; import gplx.langs.*;
|
||||
public class Mustache_bfr {
|
||||
private final Bry_bfr bfr;
|
||||
public Mustache_bfr(Bry_bfr bfr) {this.bfr = bfr;}
|
||||
public Bry_bfr Bfr() {return bfr;}
|
||||
public Mustache_bfr Escape_(boolean v) {escape = v; return this;} private boolean escape;
|
||||
public void Add_int (int v) {bfr.Add_int_variable(v);}
|
||||
public void Add_long (long v) {bfr.Add_long_variable(v);}
|
||||
public void Add_double (double v) {bfr.Add_double(v);}
|
||||
public void Add_str_u8 (String v) {bfr.Add_str_u8(v);}
|
||||
public void Add_str_u8_safe (String v) {if (v != null) bfr.Add_str_u8(v);}
|
||||
public void Add_mid (byte[] src, int bgn, int end) {bfr.Add_mid(src, bgn, end);}
|
||||
public void Add_bry (byte[] v) {
|
||||
if (v == null) return; // allow items to have null props
|
||||
if (escape)
|
||||
gplx.langs.htmls.Gfh_utl.Escape_html_to_bfr(bfr, v, 0, v.length, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y);
|
||||
else
|
||||
bfr.Add(v);
|
||||
}
|
||||
public byte[] To_bry_and_clear() {return bfr.To_bry_and_clear();}
|
||||
public String To_str_and_clear() {return bfr.To_str_and_clear();}
|
||||
public static Mustache_bfr New() {return new Mustache_bfr(Bry_bfr_.New());}
|
||||
public static Mustache_bfr New_bfr(Bry_bfr v) {return new Mustache_bfr(v);}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,9 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.mustaches; import gplx.*; import gplx.langs.*;
|
||||
import gplx.langs.jsons.*;
|
||||
public interface Mustache_doc_itm {
|
||||
boolean Mustache__write(String key, Mustache_bfr bfr);
|
||||
Mustache_doc_itm[] Mustache__subs(String key);
|
||||
}
|
||||
|
||||
@@ -13,3 +13,17 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.mustaches; import gplx.*; import gplx.langs.*;
|
||||
public class Mustache_doc_itm_ {
|
||||
public static final byte[] Null_val = null;
|
||||
public static final Mustache_doc_itm Null_itm = null;
|
||||
public static final Mustache_doc_itm Itm__bool__y = new Mustache_doc_itm__bool(Bool_.Y), Itm__bool__n = new Mustache_doc_itm__bool(Bool_.N);
|
||||
public static final Mustache_doc_itm[] Ary__empty = new Mustache_doc_itm[0], Ary__bool__y = new Mustache_doc_itm[] {Itm__bool__y}, Ary__bool__n = new Mustache_doc_itm[] {Itm__bool__n};
|
||||
public static Mustache_doc_itm[] Ary__bool(boolean v) {return v ? Ary__bool__y : Ary__bool__n;}
|
||||
}
|
||||
class Mustache_doc_itm__bool implements Mustache_doc_itm {
|
||||
public Mustache_doc_itm__bool(boolean val) {this.val = val;}
|
||||
public boolean Val() {return val;} private final boolean val;
|
||||
public boolean Mustache__write(String key, Mustache_bfr bfr) {return false;}
|
||||
public Mustache_doc_itm[] Mustache__subs(String key) {return Mustache_doc_itm_.Ary__empty;}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,150 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.mustaches; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*; import gplx.core.primitives.*;
|
||||
public class Mustache_itm_render_tst {
|
||||
private final Mustache_itm_render_fxt fxt = new Mustache_itm_render_fxt();
|
||||
@Test public void Text() {
|
||||
fxt.Test__parse("a b c", "a b c");
|
||||
}
|
||||
@Test public void Variable() {
|
||||
fxt.Init__root(fxt.Make_mock(0).Add_prop("prop1", "1").Add_prop("prop2", "2"));
|
||||
fxt.Test__parse("{{prop1}}", "1");
|
||||
fxt.Test__parse("a{{prop1}}b{{prop2}}c", "a1b2c");
|
||||
}
|
||||
@Test public void Escape() {
|
||||
fxt.Init__root(fxt.Make_mock(0).Add_prop("prop1", "<"));
|
||||
fxt.Test__parse("{{{prop1}}}", "<");
|
||||
fxt.Test__parse("{{prop1}}", "<");
|
||||
}
|
||||
@Test public void Section_bool() {
|
||||
fxt.Init__root(fxt.Make_mock(0).Add_bool_y("bool_y").Add_bool_n("bool_n"));
|
||||
fxt.Test__parse("a{{#bool_y}}b{{/bool_y}}c", "abc");
|
||||
fxt.Test__parse("a{{#bool_n}}b{{/bool_n}}c", "ac");
|
||||
fxt.Test__parse("a{{#bool_y}}b{{/bool_y}}c{{#bool_n}}d{{/bool_n}}e", "abce");
|
||||
}
|
||||
@Test public void Section_not() {
|
||||
fxt.Init__root(fxt.Make_mock(0).Add_bool_y("bool_y").Add_bool_n("bool_n"));
|
||||
fxt.Test__parse("a{{^bool_y}}b{{/bool_y}}c", "ac");
|
||||
fxt.Test__parse("a{{^bool_n}}b{{/bool_n}}c", "abc");
|
||||
fxt.Test__parse("a{{^bool_y}}b{{/bool_y}}c{{^bool_n}}d{{/bool_n}}e", "acde");
|
||||
}
|
||||
@Test public void Section_ws() {
|
||||
fxt.Init__root(fxt.Make_mock(0).Add_bool_y("bool_y"));
|
||||
fxt.Test__parse("a\n {{#bool_y}} \nb\n {{/bool_y}} \nc", "a\nb\nc");
|
||||
}
|
||||
@Test public void Section_subs_flat() {
|
||||
fxt.Init__root(fxt.Make_mock(0).Add_subs("subs1"
|
||||
, fxt.Make_mock(1).Add_prop("prop1", "1").Add_subs("subs2")
|
||||
, fxt.Make_mock(2).Add_prop("prop1", "2").Add_subs("subs2")
|
||||
));
|
||||
fxt.Test__parse("a{{#subs1}}({{prop1}}){{/subs1}}d", "a(1)(2)d");
|
||||
}
|
||||
@Test public void Section_subs_nest_1() {
|
||||
fxt.Init__root
|
||||
( fxt.Make_mock(0).Add_subs("subs1"
|
||||
, fxt.Make_mock(1).Add_prop("prop1", "a").Add_subs("subs2"
|
||||
, fxt.Make_mock(11).Add_prop("prop2", "1")
|
||||
, fxt.Make_mock(12).Add_prop("prop2", "2"))
|
||||
));
|
||||
fxt.Test__parse
|
||||
( "{{#subs1}}{{prop1}}{{#subs2}}{{prop2}}{{/subs2}}{{/subs1}}"
|
||||
, "a12"
|
||||
);
|
||||
}
|
||||
@Test public void Section_subs_nest_2() {
|
||||
fxt.Init__root
|
||||
( fxt.Make_mock(0).Add_subs("subs1"
|
||||
, fxt.Make_mock(1).Add_prop("prop1", "a").Add_subs("subs2"
|
||||
, fxt.Make_mock(11).Add_prop("prop2", "1")
|
||||
, fxt.Make_mock(12).Add_prop("prop2", "2")
|
||||
)
|
||||
, fxt.Make_mock(2).Add_prop("prop1", "b")
|
||||
)
|
||||
);
|
||||
fxt.Test__parse
|
||||
( "{{#subs1}}{{prop1}}{{#subs2}}{{prop2}}{{/subs2}}{{/subs1}}"
|
||||
, "a12b"
|
||||
);
|
||||
}
|
||||
@Test public void Section_subs_nest_3() {
|
||||
fxt.Init__root
|
||||
( fxt.Make_mock(0).Add_subs("subs1"
|
||||
, fxt.Make_mock(1).Add_prop("prop1", "a").Add_subs("subs2"
|
||||
, fxt.Make_mock(11).Add_prop("prop2", "1")
|
||||
, fxt.Make_mock(12).Add_prop("prop2", "2")
|
||||
)
|
||||
, fxt.Make_mock(2).Add_prop("prop1", "b").Add_subs("subs2"
|
||||
, fxt.Make_mock(21).Add_prop("prop2", "3")
|
||||
, fxt.Make_mock(22).Add_prop("prop2", "4")
|
||||
)
|
||||
)
|
||||
);
|
||||
fxt.Test__parse
|
||||
( "{{#subs1}}{{prop1}}{{#subs2}}{{prop2}}{{/subs2}}{{prop1}}{{/subs1}}"
|
||||
, "a12ab34b"
|
||||
);
|
||||
}
|
||||
@Test public void Section_bool_subs() { // handle prop written after boolean; should not pick up inner prop
|
||||
fxt.Init__root
|
||||
( fxt.Make_mock(0).Add_bool_y("bool1").Add_prop("prop2", "2").Add_subs("subs1"
|
||||
, fxt.Make_mock(1).Add_prop("prop1", "11")
|
||||
, fxt.Make_mock(2).Add_prop("prop1", "12")
|
||||
));
|
||||
fxt.Test__parse
|
||||
( "a{{#bool1}}b{{#subs1}}c{{prop1}}d{{/subs1}}e{{/bool1}}f{{prop2}}g"
|
||||
, "abc11dc12def2g"
|
||||
);
|
||||
}
|
||||
@Test public void Section_owner() {
|
||||
fxt.Init__root
|
||||
( fxt.Make_mock(0).Add_subs("subs1"
|
||||
, fxt.Make_mock(1).Add_prop("prop1", "a").Add_subs("subs2"
|
||||
, fxt.Make_mock(11).Add_prop("prop2", "1")
|
||||
)
|
||||
));
|
||||
fxt.Test__parse
|
||||
( "{{#subs1}}{{#subs2}}{{prop1}}{{prop2}}{{/subs2}}{{/subs1}}" // prop1 is cited in subs2, but value belongs to subs1
|
||||
, "a1"
|
||||
);
|
||||
}
|
||||
}
|
||||
class Mustache_itm_render_fxt {
|
||||
private final Mustache_tkn_parser parser = new Mustache_tkn_parser();
|
||||
private final Mustache_render_ctx ctx = new Mustache_render_ctx();
|
||||
private final Mustache_bfr bfr = Mustache_bfr.New();
|
||||
private Mustache_doc_itm__mock root;
|
||||
public Mustache_doc_itm__mock Make_mock(int id) {return new Mustache_doc_itm__mock(id);}
|
||||
public void Init__root(Mustache_doc_itm__mock v) {this.root = v;}
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_a7(src_str);
|
||||
Mustache_tkn_itm actl_itm = parser.Parse(src_bry, 0, src_bry.length);
|
||||
ctx.Init(root);
|
||||
actl_itm.Render(bfr, ctx);
|
||||
Tfds.Eq_str_lines(expd, bfr.To_str_and_clear());
|
||||
}
|
||||
}
|
||||
class Mustache_doc_itm__mock implements Mustache_doc_itm {
|
||||
private final Hash_adp hash_prop = Hash_adp_.New(), hash_bool = Hash_adp_.New(), hash_subs = Hash_adp_.New();
|
||||
public Mustache_doc_itm__mock(int id) {this.id = id;}
|
||||
public int id;
|
||||
public Mustache_doc_itm__mock Add_prop(String key, String val) {hash_prop.Add(key, Bry_.new_u8(val)); return this;}
|
||||
public Mustache_doc_itm__mock Add_bool_y(String key) {hash_bool.Add(key, Bool_obj_ref.y_()); return this;}
|
||||
public Mustache_doc_itm__mock Add_bool_n(String key) {hash_bool.Add(key, Bool_obj_ref.n_()); return this;}
|
||||
public Mustache_doc_itm__mock Add_subs(String key, Mustache_doc_itm__mock... ary) {hash_subs.Add(key, ary); return this;}
|
||||
public boolean Mustache__write(String key, Mustache_bfr bfr) {
|
||||
byte[] rv = (byte[])hash_prop.Get_by(key);
|
||||
if (rv == null) return false;
|
||||
bfr.Add_bry(rv);
|
||||
return true;
|
||||
}
|
||||
public Mustache_doc_itm[] Mustache__subs(String key) {
|
||||
Object rv = hash_bool.Get_by(key);
|
||||
if (rv != null) {
|
||||
boolean bool_val = ((Bool_obj_ref)rv).Val();
|
||||
return bool_val ? Mustache_doc_itm_.Ary__bool__y : Mustache_doc_itm_.Ary__bool__n;
|
||||
}
|
||||
return (Mustache_doc_itm__mock[])hash_subs.Get_by(key);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,86 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.mustaches; import gplx.*; import gplx.langs.*;
|
||||
public class Mustache_render_ctx {
|
||||
private final List_adp stack = List_adp_.New();
|
||||
private Mustache_doc_itm cur;
|
||||
private Mustache_doc_itm[] subs; private int subs_idx, subs_len; private byte cur_is_bool;
|
||||
public Mustache_render_ctx Init(Mustache_doc_itm cur) {
|
||||
this.cur = cur;
|
||||
this.subs = null;
|
||||
this.subs_idx = subs_len = 0; this.cur_is_bool = Bool_.__byte;
|
||||
return this;
|
||||
}
|
||||
public boolean Render_variable(Mustache_bfr bfr, String key) {
|
||||
boolean rv = false;
|
||||
int stack_pos = stack.Len();
|
||||
Mustache_doc_itm itm = cur;
|
||||
while (itm != Mustache_doc_itm_.Null_itm) {
|
||||
boolean resolved = itm.Mustache__write(key, bfr);
|
||||
if (resolved) {
|
||||
rv = true;
|
||||
break;
|
||||
}
|
||||
else {
|
||||
--stack_pos;
|
||||
if (stack_pos == -1) // nothing else in stack
|
||||
break;
|
||||
else
|
||||
itm = ((Mustache_stack_itm)stack.Get_at(stack_pos)).cur;
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public void Section_bgn(String key) {
|
||||
Mustache_stack_itm stack_itm = new Mustache_stack_itm(cur, subs, subs_idx, subs_len, cur_is_bool); // note that cur is "owner" since subs_idx == 0
|
||||
stack.Add(stack_itm);
|
||||
subs = cur.Mustache__subs(key); if (subs == null) subs = Mustache_doc_itm_.Ary__empty; // subs == null if property does not exist; EX: "folder{{#files}}file{{/files}}" and folder = new Folder(File[0]);
|
||||
subs_len = subs.length;
|
||||
subs_idx = -1;
|
||||
}
|
||||
public boolean Section_do(boolean inverted) {
|
||||
if (++subs_idx >= subs_len) return false;
|
||||
Mustache_doc_itm sub = subs[subs_idx];
|
||||
if (subs_idx == 0) { // special logic to handle 1st item; note that there always be at least one item
|
||||
if (sub == Mustache_doc_itm_.Itm__bool__n) {
|
||||
boolean rv = Bool_.N;
|
||||
if (inverted) rv = !rv;
|
||||
cur_is_bool = Bool_.To_byte(rv);
|
||||
return rv;
|
||||
}
|
||||
else if (sub == Mustache_doc_itm_.Itm__bool__y) {
|
||||
boolean rv = Bool_.Y;
|
||||
if (inverted) rv = !rv;
|
||||
cur_is_bool = Bool_.To_byte(rv);
|
||||
return rv;
|
||||
}
|
||||
else
|
||||
cur_is_bool = Bool_.__byte;
|
||||
}
|
||||
cur = sub;
|
||||
return true;
|
||||
}
|
||||
public void Section_end() {
|
||||
Mustache_stack_itm itm = (Mustache_stack_itm)List_adp_.Pop(stack);
|
||||
subs = itm.subs;
|
||||
subs_len = itm.subs_len;
|
||||
subs_idx = itm.subs_idx;
|
||||
cur = itm.cur;
|
||||
cur_is_bool = itm.cur_is_bool;
|
||||
}
|
||||
}
|
||||
class Mustache_stack_itm {
|
||||
public Mustache_stack_itm(Mustache_doc_itm cur, Mustache_doc_itm[] subs, int subs_idx, int subs_len, byte cur_is_bool) {
|
||||
this.cur = cur;
|
||||
this.cur_is_bool = cur_is_bool;
|
||||
this.subs = subs;
|
||||
this.subs_idx = subs_idx;
|
||||
this.subs_len = subs_len;
|
||||
}
|
||||
public final Mustache_doc_itm cur;
|
||||
public final byte cur_is_bool;
|
||||
public final Mustache_doc_itm[] subs;
|
||||
public final int subs_idx;
|
||||
public final int subs_len;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,30 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.mustaches; import gplx.*; import gplx.langs.*;
|
||||
class Mustache_tkn_def {
|
||||
public byte[] Variable_lhs = Dflt_variable_lhs;
|
||||
public byte[] Variable_rhs = Dflt_variable_rhs;
|
||||
public int Variable_lhs_len;
|
||||
public int Variable_rhs_len;
|
||||
public static final byte[]
|
||||
Dflt_variable_lhs = Bry_.new_a7("{{")
|
||||
, Dflt_variable_rhs = Bry_.new_a7("}}")
|
||||
;
|
||||
public static final byte
|
||||
Variable = Byte_ascii.Curly_end // {{=<% %>=}}
|
||||
, Escape_bgn = Byte_ascii.Curly_bgn // {{{escape}}}
|
||||
, Escape_end = Byte_ascii.Curly_end // {{{escape}}}
|
||||
, Section = Byte_ascii.Hash // {{#section}}
|
||||
, Grp_end = Byte_ascii.Slash // {{/section}}
|
||||
, Inverted = Byte_ascii.Pow // {{^inverted}}
|
||||
, Comment = Byte_ascii.Bang // {{!comment}}
|
||||
, Partial = Byte_ascii.Angle_bgn // {{>partial}}
|
||||
, Delimiter_bgn = Byte_ascii.Eq // {{=<% %>=}}
|
||||
, Delimiter_end = Byte_ascii.Curly_end // {{=<% %>=}}
|
||||
;
|
||||
public Mustache_tkn_def() {
|
||||
Variable_lhs_len = Variable_lhs.length;
|
||||
Variable_rhs_len = Variable_rhs.length;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,98 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.mustaches; import gplx.*; import gplx.langs.*;
|
||||
public interface Mustache_tkn_itm {
|
||||
int Tid();
|
||||
String Key();
|
||||
Mustache_tkn_itm[] Subs_ary();
|
||||
void Subs_ary_(Mustache_tkn_itm[] v);
|
||||
void Render(Mustache_bfr bfr, Mustache_render_ctx ctx);
|
||||
}
|
||||
class Mustache_tkn_itm_ {// for types, see http://mustache.github.io/mustache.5.html
|
||||
public static final int Tid__root = 0, Tid__text = 1, Tid__variable = 2, Tid__escape = 3, Tid__section = 4, Tid__inverted = 5, Tid__comment = 6, Tid__partial = 7, Tid__delimiter = 8;
|
||||
public static final Mustache_tkn_itm[] Ary_empty = new Mustache_tkn_itm[0];
|
||||
}
|
||||
abstract class Mustache_tkn_base implements Mustache_tkn_itm {
|
||||
public Mustache_tkn_base(int tid, byte[] key_bry) {this.tid = tid; this.key = String_.new_u8(key_bry);}
|
||||
public int Tid() {return tid;} private final int tid;
|
||||
public String Key() {return key;} private final String key;
|
||||
@gplx.Virtual public Mustache_tkn_itm[] Subs_ary() {return Mustache_tkn_itm_.Ary_empty;}
|
||||
@gplx.Virtual public void Subs_ary_(Mustache_tkn_itm[] v) {throw Err_.new_unsupported();} // fail if trying to set and not overridden
|
||||
@gplx.Virtual public void Render(Mustache_bfr bfr, Mustache_render_ctx ctx) {throw Err_.new_unsupported();} // should be abstract
|
||||
}
|
||||
class Mustache_tkn_root extends Mustache_tkn_base { // EX: {{variable}} -> <a>
|
||||
private Mustache_tkn_itm[] subs_ary;
|
||||
public Mustache_tkn_root() {super(Mustache_tkn_itm_.Tid__root, Bry_.Empty);}
|
||||
@Override public Mustache_tkn_itm[] Subs_ary() {return subs_ary;}
|
||||
@Override public void Subs_ary_(Mustache_tkn_itm[] v) {subs_ary = v;}
|
||||
@Override public void Render(Mustache_bfr bfr, Mustache_render_ctx ctx) {
|
||||
int subs_len = subs_ary.length;
|
||||
for (int i = 0; i < subs_len; ++i) {
|
||||
Mustache_tkn_itm sub = subs_ary[i];
|
||||
sub.Render(bfr, ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
class Mustache_tkn_text extends Mustache_tkn_base { // EX: text -> text
|
||||
private final byte[] src; private final int src_bgn, src_end;
|
||||
public Mustache_tkn_text(byte[] src, int src_bgn, int src_end) {super(Mustache_tkn_itm_.Tid__text, Bry_.Empty);
|
||||
this.src = src;
|
||||
this.src_bgn = src_bgn;
|
||||
this.src_end = src_end;
|
||||
}
|
||||
@Override public void Render(Mustache_bfr bfr, Mustache_render_ctx ctx) {
|
||||
bfr.Add_mid(src, src_bgn, src_end);
|
||||
}
|
||||
}
|
||||
class Mustache_tkn_comment extends Mustache_tkn_base { // EX: {{!section}}comment{{/section}} ->
|
||||
public Mustache_tkn_comment() {super(Mustache_tkn_itm_.Tid__comment, Bry_.Empty);}
|
||||
@Override public void Render(Mustache_bfr bfr, Mustache_render_ctx ctx) {}
|
||||
}
|
||||
class Mustache_tkn_variable extends Mustache_tkn_base { // EX: {{variable}} -> <a>
|
||||
public Mustache_tkn_variable(byte[] key) {super(Mustache_tkn_itm_.Tid__variable, key);}
|
||||
@Override public void Render(Mustache_bfr bfr, Mustache_render_ctx ctx) {
|
||||
String key = this.Key();
|
||||
ctx.Render_variable(bfr.Escape_(Bool_.Y), key);
|
||||
}
|
||||
}
|
||||
class Mustache_tkn_escape extends Mustache_tkn_base { // EX: {{{variable}}} -> <a>
|
||||
public Mustache_tkn_escape(byte[] key) {super(Mustache_tkn_itm_.Tid__escape, key);}
|
||||
@Override public void Render(Mustache_bfr bfr, Mustache_render_ctx ctx) {
|
||||
String key = this.Key();
|
||||
ctx.Render_variable(bfr.Escape_(Bool_.N), key);
|
||||
}
|
||||
}
|
||||
class Mustache_tkn_section extends Mustache_tkn_base { // EX: {{#section}}val{{/section}} -> val (if boolean) or valvalval (if list)
|
||||
private Mustache_tkn_itm[] subs_ary;
|
||||
public Mustache_tkn_section(byte[] key) {super(Mustache_tkn_itm_.Tid__section, key);}
|
||||
@Override public Mustache_tkn_itm[] Subs_ary() {return subs_ary;}
|
||||
@Override public void Subs_ary_(Mustache_tkn_itm[] v) {subs_ary = v;}
|
||||
@Override public void Render(Mustache_bfr bfr, Mustache_render_ctx ctx) {Render_static(Bool_.N, this, bfr, ctx);}
|
||||
public static void Render_static(boolean inverted, Mustache_tkn_base tkn, Mustache_bfr bfr, Mustache_render_ctx ctx) {
|
||||
String key = tkn.Key();
|
||||
Mustache_tkn_itm[] subs_ary = tkn.Subs_ary();
|
||||
ctx.Section_bgn(key);
|
||||
while (ctx.Section_do(inverted)) {
|
||||
int subs_len = subs_ary.length;
|
||||
for (int i = 0; i < subs_len; ++i) {
|
||||
Mustache_tkn_itm sub = subs_ary[i];
|
||||
sub.Render(bfr, ctx);
|
||||
}
|
||||
}
|
||||
ctx.Section_end();
|
||||
}
|
||||
}
|
||||
class Mustache_tkn_inverted extends Mustache_tkn_base { // EX: {{^section}}missing{{/section}} -> missing
|
||||
private Mustache_tkn_itm[] subs_ary;
|
||||
public Mustache_tkn_inverted(byte[] key) {super(Mustache_tkn_itm_.Tid__inverted, key);}
|
||||
@Override public Mustache_tkn_itm[] Subs_ary() {return subs_ary;}
|
||||
@Override public void Subs_ary_(Mustache_tkn_itm[] v) {subs_ary = v;}
|
||||
@Override public void Render(Mustache_bfr bfr, Mustache_render_ctx ctx) {Mustache_tkn_section.Render_static(Bool_.Y, this, bfr, ctx);}
|
||||
}
|
||||
class Mustache_tkn_partial extends Mustache_tkn_base { // EX: {{>a}} -> abc (deferred eval)
|
||||
public Mustache_tkn_partial(byte[] key) {super(Mustache_tkn_itm_.Tid__partial, key);}
|
||||
}
|
||||
class Mustache_tkn_delimiter extends Mustache_tkn_base {// EX: {{=<% %>=}} -> <% variable %>
|
||||
public Mustache_tkn_delimiter(byte[] key) {super(Mustache_tkn_itm_.Tid__delimiter, key);}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,140 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.mustaches; import gplx.*; import gplx.langs.*;
|
||||
public class Mustache_tkn_parser {
|
||||
private byte[] src; private int src_end;
|
||||
private final Mustache_tkn_def tkn_def = new Mustache_tkn_def();
|
||||
public Mustache_tkn_itm Parse(byte[] src) {return Parse(src, 0, src.length);}
|
||||
public Mustache_tkn_itm Parse(byte[] src, int src_bgn, int src_end) {
|
||||
this.src = src; this.src_end = src_end;
|
||||
Mustache_tkn_root root = new Mustache_tkn_root();
|
||||
Parse_grp(root, src_bgn);
|
||||
return root;
|
||||
}
|
||||
private int Parse_grp(Mustache_tkn_itm owner, int src_bgn) {
|
||||
List_adp subs_list = List_adp_.New();
|
||||
int txt_bgn = src_bgn;
|
||||
boolean end_grp = false;
|
||||
while (true) {// loop for "{{"
|
||||
int lhs_bgn = Bry_find_.Find_fwd(src, tkn_def.Variable_lhs, txt_bgn, src_end); // next "{{"
|
||||
if (lhs_bgn == Bry_find_.Not_found) { // no more "{{"
|
||||
subs_list.Add(new Mustache_tkn_text(src, txt_bgn, src_end)); // add everything between prv "}}" and cur "{{"
|
||||
break;
|
||||
}
|
||||
int lhs_end = lhs_bgn + tkn_def.Variable_lhs_len;
|
||||
|
||||
Mustache_tkn_data tkn_data = new Mustache_tkn_data(src[lhs_end]); // preview tkn
|
||||
lhs_end += tkn_data.lhs_end_adj;
|
||||
|
||||
int rhs_bgn = Bry_find_.Find_fwd(src, tkn_def.Variable_rhs, lhs_end, src_end); // next "}}"
|
||||
if (rhs_bgn == Bry_find_.Not_found) throw Fail(lhs_bgn, "unclosed tag"); // fail if no "}}"
|
||||
int rhs_end = rhs_bgn + tkn_def.Variable_rhs_len;
|
||||
if (tkn_data.rhs_bgn_chk != Byte_ascii.Null) {
|
||||
if (src[rhs_bgn] != tkn_data.rhs_bgn_chk) throw Fail(lhs_end, "invalid check byte");
|
||||
++rhs_end; // skip the chk_byte; note that bottom of function will skip "}}" by adding +2
|
||||
}
|
||||
|
||||
|
||||
int txt_end = lhs_bgn; // get text tkn
|
||||
if (tkn_data.ws_ignore) {
|
||||
int new_txt_bgn = Trim_bwd_to_nl(src, txt_bgn, txt_end);
|
||||
if (new_txt_bgn != -1) {
|
||||
int new_txt_end = Trim_fwd_to_nl(src, rhs_end, src_end);
|
||||
if (new_txt_end != -1) {
|
||||
txt_end = new_txt_bgn;
|
||||
rhs_end = new_txt_end == src_end ? src_end : new_txt_end + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (txt_end > txt_bgn) // ignore 0-byte text tkns; occurs when consecutive tkns; EX: {{v1}}{{v2}} will try to create text tkn between "}}{{"
|
||||
subs_list.Add(new Mustache_tkn_text(src, txt_bgn, txt_end)); // add everything between prv "}}" and cur "{{"
|
||||
|
||||
txt_bgn = Parse_itm(tkn_data, subs_list, lhs_end, rhs_bgn, rhs_end); // do parse
|
||||
if (txt_bgn < 0) { // NOTE: txt_bgn < 0 means end grp
|
||||
txt_bgn *= -1;
|
||||
end_grp = true;
|
||||
}
|
||||
if (end_grp) break;
|
||||
}
|
||||
if (subs_list.Count() > 0) // don't create subs if no members
|
||||
owner.Subs_ary_((Mustache_tkn_itm[])subs_list.To_ary_and_clear(Mustache_tkn_itm.class));
|
||||
return txt_bgn;
|
||||
}
|
||||
private int Parse_itm(Mustache_tkn_data tkn_data, List_adp subs_list, int lhs_end, int rhs_bgn, int rhs_end) {
|
||||
byte[] val_bry = Bry_.Mid(src, lhs_end, rhs_bgn);
|
||||
Mustache_tkn_base tkn = null;
|
||||
switch (tkn_data.tid) {
|
||||
default: throw Err_.new_unhandled(tkn_data.tid);
|
||||
case Mustache_tkn_def.Variable: tkn = new Mustache_tkn_variable(val_bry); break;
|
||||
case Mustache_tkn_def.Comment: tkn = new Mustache_tkn_comment(); break;
|
||||
case Mustache_tkn_def.Partial: tkn = new Mustache_tkn_partial(val_bry); break;
|
||||
case Mustache_tkn_def.Delimiter_bgn: tkn = new Mustache_tkn_delimiter(val_bry); break; // TODO_OLD: implement delimiter; EX: {{=<% %>=}}
|
||||
case Mustache_tkn_def.Escape_bgn: tkn = new Mustache_tkn_escape(val_bry); break;
|
||||
case Mustache_tkn_def.Section: tkn = new Mustache_tkn_section(val_bry); break;
|
||||
case Mustache_tkn_def.Inverted: tkn = new Mustache_tkn_inverted(val_bry); break;
|
||||
case Mustache_tkn_def.Grp_end: {
|
||||
return -(rhs_end); // pop the stack
|
||||
}
|
||||
}
|
||||
subs_list.Add(tkn);
|
||||
if (tkn_data.parse_grp) {
|
||||
return Parse_grp(tkn, rhs_end);
|
||||
}
|
||||
else
|
||||
return rhs_end;
|
||||
}
|
||||
private Err Fail(int pos, String fmt, Object... args) {
|
||||
return Err_.new_("mustache", fmt, "excerpt", Bry_.Mid_by_len_safe(src, pos, 32));
|
||||
}
|
||||
private static int Trim_bwd_to_nl(byte[] src, int txt_bgn, int txt_end) {
|
||||
int stop = txt_bgn - 1;
|
||||
int pos = txt_end - 1;
|
||||
while (pos > stop) {
|
||||
byte b = src[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Tab:
|
||||
case Byte_ascii.Space: --pos; break;
|
||||
case Byte_ascii.Nl: return pos + 1; // 1 char after \n
|
||||
default: return -1;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
private static int Trim_fwd_to_nl(byte[] src, int txt_bgn, int txt_end) {
|
||||
int pos = txt_bgn;
|
||||
while (pos < txt_end) {
|
||||
byte b = src[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Tab:
|
||||
case Byte_ascii.Space: ++pos; break;
|
||||
case Byte_ascii.Nl: return pos;
|
||||
default: return -1;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
class Mustache_tkn_data {
|
||||
public int tid;
|
||||
public int lhs_end_adj;
|
||||
public byte rhs_bgn_chk;
|
||||
public boolean parse_grp;
|
||||
public boolean ws_ignore;
|
||||
public Mustache_tkn_data(byte b) {
|
||||
tid = b;
|
||||
parse_grp = ws_ignore = false;
|
||||
lhs_end_adj = 1;
|
||||
rhs_bgn_chk = Byte_ascii.Null;
|
||||
switch (b) {
|
||||
default: lhs_end_adj = 0; tid = Mustache_tkn_def.Variable; break;
|
||||
case Mustache_tkn_def.Comment:
|
||||
case Mustache_tkn_def.Partial:
|
||||
case Mustache_tkn_def.Grp_end: ws_ignore = true; break;
|
||||
case Mustache_tkn_def.Delimiter_bgn: rhs_bgn_chk = Mustache_tkn_def.Delimiter_end; break; // check for "=}}"; TODO_OLD: implement delimiter; EX: {{=<% %>=}}
|
||||
case Mustache_tkn_def.Escape_bgn: rhs_bgn_chk = Mustache_tkn_def.Escape_end; break; // check for ""
|
||||
case Mustache_tkn_def.Section:
|
||||
case Mustache_tkn_def.Inverted: ws_ignore = true; parse_grp = true; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,25 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.mustaches; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Mustache_tkn_parser_tst {
|
||||
private final Mustache_tkn_parser_fxt fxt = new Mustache_tkn_parser_fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test_parse("a{{b}}c", "ac");
|
||||
}
|
||||
@Test public void Comment() {
|
||||
fxt.Test_parse("a{{!b}}c", "ac");
|
||||
}
|
||||
}
|
||||
class Mustache_tkn_parser_fxt {
|
||||
private final Mustache_tkn_parser parser = new Mustache_tkn_parser();
|
||||
private final Mustache_render_ctx ctx = new Mustache_render_ctx();
|
||||
private final Mustache_bfr bfr = Mustache_bfr.New();
|
||||
public void Test_parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_a7(src_str);
|
||||
Mustache_tkn_itm actl_itm = parser.Parse(src_bry, 0, src_bry.length);
|
||||
actl_itm.Render(bfr, ctx);
|
||||
Tfds.Eq_str_lines(expd, bfr.To_str_and_clear());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,15 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.mustaches; import gplx.*; import gplx.langs.*;
|
||||
public class Mustache_wtr_ {
|
||||
public static byte[] Write_to_bry(byte[] src, Mustache_doc_itm itm) {return Write_to_bry(Bry_bfr_.New(), src, itm);}
|
||||
public static byte[] Write_to_bry(Bry_bfr bfr, byte[] src, Mustache_doc_itm itm) {
|
||||
Mustache_tkn_parser parser = new Mustache_tkn_parser();
|
||||
Mustache_tkn_itm root = parser.Parse(src, 0, src.length);
|
||||
Mustache_render_ctx ctx = new Mustache_render_ctx().Init(itm);
|
||||
Mustache_bfr mbfr = new Mustache_bfr(bfr);
|
||||
root.Render(mbfr, ctx);
|
||||
return mbfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,7 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
|
||||
public class Php_ctx {
|
||||
public byte[] Src() {return src;} public Php_ctx Src_(byte[] v) {this.src = v; return this;} private byte[] src;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,261 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.log_msgs.*;
|
||||
/*
|
||||
NOTE: naive implementation of PHP parser; intended only for parsing Messages**.php files in MediaWiki. Specifically, it assumes the following:
|
||||
- all lines are assignment lines: EX: $a = b;
|
||||
- only the assignment operator is allowed (=); EX: $a = 5 + 7; fails b/c of + operator;
|
||||
- no functions are supported: EX: strlen('a') fails
|
||||
*/
|
||||
public class Php_evaluator implements Php_tkn_wkr {
|
||||
byte mode = Mode_key_bgn, next_tid = 0, next_mode = 0;
|
||||
Php_line_assign cur_line; Php_itm_ary cur_ary; Php_key cur_kv_key;
|
||||
List_adp frame_stack = List_adp_.New();
|
||||
public Php_evaluator(Gfo_msg_log msg_log) {this.msg_log = msg_log;} Gfo_msg_log msg_log;
|
||||
public void Init(Php_ctx ctx) {src = ctx.Src(); frame_stack.Clear();} private byte[] src;
|
||||
public List_adp List() {return lines;} List_adp lines = List_adp_.New();
|
||||
public Gfo_msg_log Msg_log() {return msg_log;}
|
||||
public void Clear() {
|
||||
lines.Clear(); msg_log.Clear();
|
||||
cur_line = null;
|
||||
cur_ary = null;
|
||||
cur_kv_key = null;
|
||||
mode = Mode_key_bgn;
|
||||
next_tid = next_mode = 0;
|
||||
}
|
||||
public void Process(Php_tkn tkn) {
|
||||
byte tkn_tid = tkn.Tkn_tid();
|
||||
switch (tkn_tid) {
|
||||
case Php_tkn_.Tid_declaration: case Php_tkn_.Tid_comment: case Php_tkn_.Tid_ws: // always discard, regardless of mode
|
||||
return;
|
||||
}
|
||||
switch (mode) {
|
||||
case Mode_expect: // handles sequences like "array(" which hook in to "array" but need to skip "("
|
||||
if (tkn_tid == next_tid)
|
||||
mode = next_mode;
|
||||
else {
|
||||
Msg_many(src, tkn.Src_bgn(), tkn.Src_end(), Expecting_itm_failed, Php_tkn_.Xto_str(next_tid), Php_tkn_.Xto_str(tkn_tid));
|
||||
Fail();
|
||||
}
|
||||
break;
|
||||
case Mode_suspend:
|
||||
if (tkn_tid == Php_tkn_.Tid_semic) mode = Mode_key_bgn;
|
||||
break;
|
||||
case Mode_key_bgn:
|
||||
if (tkn_tid == Php_tkn_.Tid_var) {
|
||||
cur_ary = null;
|
||||
cur_line = new Php_line_assign();
|
||||
lines.Add(cur_line);
|
||||
|
||||
Php_tkn_var var_tkn = (Php_tkn_var)tkn;
|
||||
cur_line.Key_(new Php_itm_var(var_tkn.Var_name(src)));
|
||||
|
||||
mode = Mode_key_end;
|
||||
}
|
||||
else {
|
||||
Msg_many(src, tkn.Src_bgn(), tkn.Src_end(), Expecting_itm_failed, Php_tkn_.Xto_str(Php_tkn_.Tid_var), Php_tkn_.Xto_str(tkn_tid));
|
||||
Fail();
|
||||
}
|
||||
break;
|
||||
case Mode_key_end:
|
||||
switch (tkn_tid) {
|
||||
case Php_tkn_.Tid_eq: mode = Mode_val; break;
|
||||
case Php_tkn_.Tid_brack_bgn: mode = Mode_brack_itm; break;
|
||||
case Php_tkn_.Tid_brack_end: Expect(Php_tkn_.Tid_eq, Mode_val); break;
|
||||
default: {
|
||||
Msg_many(src, tkn.Src_bgn(), tkn.Src_end(), Expecting_itm_failed, Php_tkn_.Xto_str(Php_tkn_.Tid_var), Php_tkn_.Xto_str(tkn_tid));
|
||||
Fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Mode_brack_itm:
|
||||
switch (tkn_tid) {
|
||||
case Php_tkn_.Tid_quote:
|
||||
Php_tkn_quote tkn_quote = (Php_tkn_quote)tkn;
|
||||
Php_itm_quote key_sub = new Php_itm_quote(tkn_quote.Quote_text(src));
|
||||
cur_line.Key_subs_(new Php_key[] {key_sub});
|
||||
mode = Mode_key_end;
|
||||
break;
|
||||
default: {
|
||||
Msg_many(src, tkn.Src_bgn(), tkn.Src_end(), Expecting_itm_failed, Php_tkn_.Xto_str(Php_tkn_.Tid_var), Php_tkn_.Xto_str(tkn_tid));
|
||||
Fail();
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case Mode_val:
|
||||
Php_itm line_val = null;
|
||||
switch (tkn_tid) {
|
||||
case Php_tkn_.Tid_null: Expect(Php_tkn_.Tid_semic, Mode_key_bgn); line_val = Php_itm_null.Instance; break;
|
||||
case Php_tkn_.Tid_false: Expect(Php_tkn_.Tid_semic, Mode_key_bgn); line_val = Php_itm_bool_false.Instance; break;
|
||||
case Php_tkn_.Tid_true: Expect(Php_tkn_.Tid_semic, Mode_key_bgn); line_val = Php_itm_bool_true.Instance; break;
|
||||
case Php_tkn_.Tid_quote:
|
||||
Expect(Php_tkn_.Tid_semic, Mode_key_bgn);
|
||||
Php_tkn_quote tkn_quote = (Php_tkn_quote)tkn;
|
||||
line_val = new Php_itm_quote(tkn_quote.Quote_text(src));
|
||||
break;
|
||||
case Php_tkn_.Tid_ary:
|
||||
case Php_tkn_.Tid_brack_bgn:
|
||||
Php_itm_ary ary = new Php_itm_ary();
|
||||
if (cur_ary == null)
|
||||
line_val = ary;
|
||||
else {
|
||||
cur_ary.Subs_add(ary);
|
||||
frame_stack.Add(new Php_scanner_frame(cur_ary));
|
||||
cur_kv_key = null;
|
||||
}
|
||||
this.cur_ary = ary;
|
||||
if (tkn_tid == Php_tkn_.Tid_ary)
|
||||
Expect(Php_tkn_.Tid_paren_bgn, Mode_ary_subs);
|
||||
else
|
||||
mode = Mode_ary_subs;
|
||||
break;
|
||||
case Php_tkn_.Tid_txt:
|
||||
case Php_tkn_.Tid_var:
|
||||
break;
|
||||
case Php_tkn_.Tid_eq:
|
||||
case Php_tkn_.Tid_eq_kv:
|
||||
case Php_tkn_.Tid_semic:
|
||||
case Php_tkn_.Tid_comma:
|
||||
case Php_tkn_.Tid_paren_bgn:
|
||||
case Php_tkn_.Tid_paren_end:
|
||||
case Php_tkn_.Tid_brack_end:
|
||||
case Php_tkn_.Tid_num:
|
||||
break;
|
||||
}
|
||||
cur_line.Val_(line_val);
|
||||
break;
|
||||
case Mode_ary_subs:
|
||||
switch (tkn_tid) {
|
||||
case Php_tkn_.Tid_null: Ary_add_itm(Php_itm_null.Instance); break;
|
||||
case Php_tkn_.Tid_false: Ary_add_itm(Php_itm_bool_false.Instance); break;
|
||||
case Php_tkn_.Tid_true: Ary_add_itm(Php_itm_bool_true.Instance); break;
|
||||
case Php_tkn_.Tid_quote:
|
||||
Php_tkn_quote tkn_quote = (Php_tkn_quote)tkn;
|
||||
Ary_add_itm(new Php_itm_quote(tkn_quote.Quote_text(src)));
|
||||
break;
|
||||
case Php_tkn_.Tid_num:
|
||||
Php_tkn_num tkn_num = (Php_tkn_num)tkn;
|
||||
Ary_add_itm(new Php_itm_int(tkn_num.Num_val_int(src)));
|
||||
break;
|
||||
case Php_tkn_.Tid_var:
|
||||
Php_tkn_var tkn_var = (Php_tkn_var)tkn;
|
||||
Ary_add_itm(new Php_itm_var(Bry_.Mid(src, tkn_var.Src_bgn(), tkn_var.Src_end())));
|
||||
break;
|
||||
case Php_tkn_.Tid_txt:
|
||||
Php_tkn_txt tkn_txt = (Php_tkn_txt)tkn;
|
||||
Ary_add_itm(new Php_itm_var(Bry_.Mid(src, tkn_txt.Src_bgn(), tkn_txt.Src_end())));
|
||||
break;
|
||||
case Php_tkn_.Tid_ary:
|
||||
case Php_tkn_.Tid_brack_bgn:
|
||||
Php_itm_ary ary = new Php_itm_ary();
|
||||
if (cur_ary == null)
|
||||
line_val = ary;
|
||||
else {
|
||||
frame_stack.Add(new Php_scanner_frame(cur_ary));
|
||||
if (cur_kv_key == null)
|
||||
cur_ary.Subs_add(ary);
|
||||
else {
|
||||
Php_itm_kv ary_itm = new Php_itm_kv().Key_(cur_kv_key).Val_(ary);
|
||||
cur_ary.Subs_add(ary_itm);
|
||||
cur_kv_key = null;
|
||||
}
|
||||
}
|
||||
this.cur_ary = ary;
|
||||
if (tkn_tid == Php_tkn_.Tid_ary)
|
||||
Expect(Php_tkn_.Tid_paren_bgn, Mode_ary_subs);
|
||||
else
|
||||
mode = Mode_ary_subs;
|
||||
break;
|
||||
case Php_tkn_.Tid_paren_end:
|
||||
case Php_tkn_.Tid_brack_end:
|
||||
mode = Mode_ary_term;
|
||||
if (frame_stack.Count() == 0)
|
||||
cur_ary = null;
|
||||
else {
|
||||
Php_scanner_frame frame = (Php_scanner_frame)List_adp_.Pop(frame_stack);
|
||||
cur_ary = frame.Ary();
|
||||
frame.Rls();
|
||||
}
|
||||
break;
|
||||
case Php_tkn_.Tid_semic: // NOTE: will occur in following construct array(array());
|
||||
mode = Mode_key_bgn;
|
||||
break;
|
||||
case Php_tkn_.Tid_eq:
|
||||
case Php_tkn_.Tid_eq_kv:
|
||||
case Php_tkn_.Tid_comma:
|
||||
case Php_tkn_.Tid_paren_bgn:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Mode_ary_dlm:
|
||||
switch (tkn_tid) {
|
||||
case Php_tkn_.Tid_comma:
|
||||
mode = Mode_ary_subs;
|
||||
break;
|
||||
case Php_tkn_.Tid_paren_end:
|
||||
case Php_tkn_.Tid_brack_end:
|
||||
mode = Mode_ary_term;
|
||||
if (frame_stack.Count() == 0)
|
||||
cur_ary = null;
|
||||
else {
|
||||
Php_scanner_frame frame = (Php_scanner_frame)List_adp_.Pop(frame_stack);
|
||||
cur_ary = frame.Ary();
|
||||
frame.Rls();
|
||||
}
|
||||
break;
|
||||
case Php_tkn_.Tid_eq_kv:
|
||||
Php_itm_sub tmp_key = cur_ary.Subs_pop();
|
||||
cur_kv_key = (Php_key)tmp_key;
|
||||
mode = Mode_ary_subs;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case Mode_ary_term:
|
||||
switch (tkn_tid) {
|
||||
case Php_tkn_.Tid_comma:
|
||||
case Php_tkn_.Tid_paren_end: // NOTE: paren_end occurs in multiple nests; EX: array(array())
|
||||
case Php_tkn_.Tid_brack_end:
|
||||
mode = Mode_ary_subs;
|
||||
break;
|
||||
case Php_tkn_.Tid_semic:
|
||||
mode = Mode_key_bgn;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
private void Fail() {mode = Mode_suspend;}
|
||||
private void Ary_add_itm(Php_itm val) {
|
||||
mode = Mode_ary_dlm;
|
||||
if (cur_kv_key == null)
|
||||
cur_ary.Subs_add((Php_itm_sub)val);
|
||||
else {
|
||||
Php_itm_kv ary_itm = new Php_itm_kv().Key_(cur_kv_key).Val_(val);
|
||||
cur_ary.Subs_add(ary_itm);
|
||||
cur_kv_key = null;
|
||||
}
|
||||
}
|
||||
private void Expect(byte next_tid, byte next_mode) {
|
||||
mode = Mode_expect;
|
||||
this.next_tid = next_tid;
|
||||
this.next_mode = next_mode;
|
||||
}
|
||||
public void Msg_many(byte[] src, int bgn, int end, Gfo_msg_itm itm, Object... args) {
|
||||
msg_log.Add_itm_many(itm, src, bgn, end, args);
|
||||
}
|
||||
public static final Gfo_msg_itm Expecting_itm_failed = Gfo_msg_itm_.new_warn_(Php_parser.Log_nde, "expecting_itm_failed", "expecting_itm ~{0} but got ~{1} instead");
|
||||
private static final byte Mode_key_bgn = 1, Mode_key_end = 2, Mode_expect = 3, Mode_suspend = 4, Mode_val = 5, Mode_ary_subs = 6, Mode_ary_dlm = 7, Mode_ary_term = 8, Mode_brack_itm = 9;
|
||||
}
|
||||
class Php_scanner_frame {
|
||||
public Php_scanner_frame(Php_itm_ary ary) {this.ary = ary;}
|
||||
public Php_itm_ary Ary() {return ary;} Php_itm_ary ary;
|
||||
public void Rls() {ary = null;}
|
||||
}
|
||||
class Php_parser_interrupt {
|
||||
public static final Php_parser_interrupt Char = new Php_parser_interrupt();
|
||||
}
|
||||
|
||||
|
||||
@@ -13,3 +13,30 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
|
||||
public interface Php_itm {
|
||||
byte Itm_tid();
|
||||
byte[] Val_obj_bry();
|
||||
}
|
||||
class Php_itm_null implements Php_itm, Php_itm_sub {
|
||||
public byte Itm_tid() {return Php_itm_.Tid_null;}
|
||||
public byte[] Val_obj_bry() {return null;}
|
||||
public static final Php_itm_null Instance = new Php_itm_null(); Php_itm_null() {}
|
||||
}
|
||||
class Php_itm_bool_true implements Php_itm, Php_itm_sub {
|
||||
public byte Itm_tid() {return Php_itm_.Tid_bool_true;}
|
||||
public byte[] Val_obj_bry() {return Bry_true;}
|
||||
public static final Php_itm_bool_true Instance = new Php_itm_bool_true(); Php_itm_bool_true() {}
|
||||
private static final byte[] Bry_true = Bry_.new_a7("true");
|
||||
}
|
||||
class Php_itm_bool_false implements Php_itm, Php_itm_sub {
|
||||
public byte Itm_tid() {return Php_itm_.Tid_bool_false;}
|
||||
public byte[] Val_obj_bry() {return Bry_true;}
|
||||
public static final Php_itm_bool_false Instance = new Php_itm_bool_false(); Php_itm_bool_false() {}
|
||||
private static final byte[] Bry_true = Bry_.new_a7("false");
|
||||
}
|
||||
class Php_itm_var implements Php_itm, Php_itm_sub, Php_key {
|
||||
public Php_itm_var(byte[] v) {this.val_obj_bry = v;}
|
||||
public byte Itm_tid() {return Php_itm_.Tid_var;}
|
||||
public byte[] Val_obj_bry() {return val_obj_bry;} private byte[] val_obj_bry;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,30 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
|
||||
public class Php_itm_ {
|
||||
public static final byte Tid_null = 0, Tid_bool_false = 1, Tid_bool_true = 2, Tid_int = 3, Tid_quote = 4, Tid_ary = 5, Tid_kv = 6, Tid_var = 7;
|
||||
public static int Parse_int_or(Php_itm itm, int or) {
|
||||
int rv = -1;
|
||||
switch (itm.Itm_tid()) {
|
||||
case Php_itm_.Tid_int:
|
||||
rv = ((Php_itm_int)itm).Val_obj_int();
|
||||
return rv;
|
||||
case Php_itm_.Tid_quote:
|
||||
byte[] bry = ((Php_itm_quote)itm).Val_obj_bry();
|
||||
rv = Bry_.To_int_or(bry, -1);
|
||||
return (rv == -1) ? or : rv;
|
||||
default:
|
||||
return or;
|
||||
}
|
||||
}
|
||||
public static byte[] Parse_bry(Php_itm itm) {
|
||||
switch (itm.Itm_tid()) {
|
||||
case Php_itm_.Tid_kv:
|
||||
case Php_itm_.Tid_ary:
|
||||
throw Err_.new_unhandled(itm.Itm_tid());
|
||||
default:
|
||||
return itm.Val_obj_bry();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,3 +13,23 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
|
||||
public class Php_itm_ary implements Php_itm, Php_itm_sub {
|
||||
public Php_itm_ary() {}
|
||||
public byte Itm_tid() {return Php_itm_.Tid_ary;}
|
||||
public byte[] Val_obj_bry() {return null;}
|
||||
public int Subs_len() {return subs_len;} private int subs_len;
|
||||
public Php_itm_sub Subs_get(int i) {return ary[i];}
|
||||
public Php_itm_sub Subs_pop() {return ary[--subs_len];}
|
||||
public void Subs_add(Php_itm_sub v) {
|
||||
int new_len = subs_len + 1;
|
||||
if (new_len > subs_max) { // ary too small >>> expand
|
||||
subs_max = new_len * 2;
|
||||
Php_itm_sub[] new_ary = new Php_itm_sub[subs_max];
|
||||
Array_.Copy_to(ary, 0, new_ary, 0, subs_len);
|
||||
ary = new_ary;
|
||||
}
|
||||
ary[subs_len] = v;
|
||||
subs_len = new_len;
|
||||
} Php_itm_sub[] ary = Php_itm_sub_.Ary_empty; int subs_max;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,10 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
|
||||
public class Php_itm_int implements Php_itm, Php_itm_sub, Php_key {
|
||||
public Php_itm_int(int v) {this.val_obj_int = v;}
|
||||
public byte Itm_tid() {return Php_itm_.Tid_int;}
|
||||
public byte[] Val_obj_bry() {return Bry_.new_by_int(val_obj_int);}
|
||||
public int Val_obj_int() {return val_obj_int;} private int val_obj_int;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,10 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
|
||||
public class Php_itm_kv implements Php_itm, Php_itm_sub {
|
||||
public byte Itm_tid() {return Php_itm_.Tid_kv;}
|
||||
public byte[] Val_obj_bry() {return null;}
|
||||
public Php_key Key() {return key;} public Php_itm_kv Key_(Php_key v) {this.key = v; return this;} Php_key key;
|
||||
public Php_itm Val() {return val;} public Php_itm_kv Val_(Php_itm v) {this.val = v; return this;} Php_itm val;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,9 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
|
||||
public class Php_itm_quote implements Php_itm, Php_itm_sub, Php_key {
|
||||
public Php_itm_quote(byte[] v) {this.val_obj_bry = v;} // NOTE: use Php_text_itm_parser to parse \" and related
|
||||
public byte Itm_tid() {return Php_itm_.Tid_quote;}
|
||||
public byte[] Val_obj_bry() {return val_obj_bry;} private byte[] val_obj_bry;
|
||||
}
|
||||
|
||||
@@ -13,3 +13,9 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
|
||||
public interface Php_itm_sub extends Php_itm {
|
||||
}
|
||||
class Php_itm_sub_ {
|
||||
public static final Php_itm_sub[] Ary_empty = new Php_itm_sub[0];
|
||||
}
|
||||
|
||||
@@ -13,3 +13,9 @@ The terms of each license can be found in the source code repository:
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
|
||||
public interface Php_key extends Php_itm {
|
||||
}
|
||||
class Php_key_ {
|
||||
public static final Php_key[] Ary_empty = new Php_key[0];
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user