1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

'v3.7.3.1'

This commit is contained in:
gnosygnu
2016-07-17 21:10:59 -04:00
parent b333db45f8
commit 7a851a41a5
290 changed files with 3048 additions and 2124 deletions

View File

@@ -27,7 +27,7 @@ class Gfs_lxr_whitespace implements Gfs_lxr {
int rv = Gfs_lxr_.Rv_eos, cur_pos;
for (cur_pos = end; cur_pos < src_len; cur_pos++) {
byte b = src[cur_pos];
Object o = ctx.Trie().Match_bgn_w_byte(b, src, cur_pos, src_len);
Object o = ctx.Trie().Match_at_w_b0(ctx.Trie_rv(), b, src, cur_pos, src_len);
if (o == null) {
rv = Gfs_lxr_.Rv_null;
ctx.Process_null(cur_pos);
@@ -45,7 +45,7 @@ class Gfs_lxr_whitespace implements Gfs_lxr {
}
return rv;
}
public static final Gfs_lxr_whitespace Instance = new Gfs_lxr_whitespace(); Gfs_lxr_whitespace() {}
public static final Gfs_lxr_whitespace Instance = new Gfs_lxr_whitespace(); Gfs_lxr_whitespace() {}
}
class Gfs_lxr_comment_flat implements Gfs_lxr {
public Gfs_lxr_comment_flat(byte[] bgn_bry, byte[] end_bry) {
@@ -69,7 +69,7 @@ class Gfs_lxr_identifier implements Gfs_lxr {
int pos, rv = Gfs_lxr_.Rv_eos;
for (pos = end; pos < src_len; pos++) {
byte b = src[pos];
Object o = ctx.Trie().Match_bgn_w_byte(b, src, pos, src_len);
Object o = ctx.Trie().Match_at_w_b0(ctx.Trie_rv(), b, src, pos, src_len);
if (o == null) { // invalid char; stop;
rv = Gfs_lxr_.Rv_null;
ctx.Process_null(pos);
@@ -89,7 +89,7 @@ class Gfs_lxr_identifier implements Gfs_lxr {
if (rv == Gfs_lxr_.Rv_eos) ctx.Process_eos(); // eos
return rv;
}
public static final Gfs_lxr_identifier Instance = new Gfs_lxr_identifier(); Gfs_lxr_identifier() {}
public static final Gfs_lxr_identifier Instance = new Gfs_lxr_identifier(); Gfs_lxr_identifier() {}
}
class Gfs_lxr_semic implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_semic;}
@@ -103,7 +103,7 @@ class Gfs_lxr_semic implements Gfs_lxr {
}
return end;
}
public static final Gfs_lxr_semic Instance = new Gfs_lxr_semic(); Gfs_lxr_semic() {}
public static final Gfs_lxr_semic Instance = new Gfs_lxr_semic(); Gfs_lxr_semic() {}
}
class Gfs_lxr_dot implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_dot;}
@@ -115,7 +115,7 @@ class Gfs_lxr_dot implements Gfs_lxr {
}
return end;
}
public static final Gfs_lxr_dot Instance = new Gfs_lxr_dot(); Gfs_lxr_dot() {}
public static final Gfs_lxr_dot Instance = new Gfs_lxr_dot(); Gfs_lxr_dot() {}
}
class Gfs_lxr_paren_bgn implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_paren_bgn;}
@@ -126,7 +126,7 @@ class Gfs_lxr_paren_bgn implements Gfs_lxr {
}
return end;
}
public static final Gfs_lxr_paren_bgn Instance = new Gfs_lxr_paren_bgn(); Gfs_lxr_paren_bgn() {}
public static final Gfs_lxr_paren_bgn Instance = new Gfs_lxr_paren_bgn(); Gfs_lxr_paren_bgn() {}
}
class Gfs_lxr_paren_end implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_paren_end;}
@@ -139,7 +139,7 @@ class Gfs_lxr_paren_end implements Gfs_lxr {
}
return end;
}
public static final Gfs_lxr_paren_end Instance = new Gfs_lxr_paren_end(); Gfs_lxr_paren_end() {}
public static final Gfs_lxr_paren_end Instance = new Gfs_lxr_paren_end(); Gfs_lxr_paren_end() {}
}
class Gfs_lxr_quote implements Gfs_lxr {
public Gfs_lxr_quote(byte[] bgn_bry, byte[] end_bry) {
@@ -184,7 +184,7 @@ class Gfs_lxr_curly_bgn implements Gfs_lxr {
}
return end;
}
public static final Gfs_lxr_curly_bgn Instance = new Gfs_lxr_curly_bgn(); Gfs_lxr_curly_bgn() {}
public static final Gfs_lxr_curly_bgn Instance = new Gfs_lxr_curly_bgn(); Gfs_lxr_curly_bgn() {}
}
class Gfs_lxr_curly_end implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_curly_end;}
@@ -192,7 +192,7 @@ class Gfs_lxr_curly_end implements Gfs_lxr {
ctx.Stack_pop(bgn);
return end;
}
public static final Gfs_lxr_curly_end Instance = new Gfs_lxr_curly_end(); Gfs_lxr_curly_end() {}
public static final Gfs_lxr_curly_end Instance = new Gfs_lxr_curly_end(); Gfs_lxr_curly_end() {}
}
class Gfs_lxr_equal implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_eq;}
@@ -200,7 +200,7 @@ class Gfs_lxr_equal implements Gfs_lxr {
ctx.Make_nde(bgn, end).Op_tid_(Gfs_nde.Op_tid_assign);
return end;
}
public static final Gfs_lxr_equal Instance = new Gfs_lxr_equal(); Gfs_lxr_equal() {}
public static final Gfs_lxr_equal Instance = new Gfs_lxr_equal(); Gfs_lxr_equal() {}
}
class Gfs_lxr_comma implements Gfs_lxr {
public int Lxr_tid() {return Gfs_lxr_.Tid_comma;}
@@ -210,5 +210,5 @@ class Gfs_lxr_comma implements Gfs_lxr {
}
return end;
}
public static final Gfs_lxr_comma Instance = new Gfs_lxr_comma(); Gfs_lxr_comma() {}
public static final Gfs_lxr_comma Instance = new Gfs_lxr_comma(); Gfs_lxr_comma() {}
}

View File

@@ -18,8 +18,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
import gplx.core.btries.*;
public class Gfs_parser {
Btrie_fast_mgr trie = Gfs_parser_.trie_();
Gfs_parser_ctx ctx = new Gfs_parser_ctx();
private final Btrie_fast_mgr trie = Gfs_parser_.trie_();
private final Gfs_parser_ctx ctx = new Gfs_parser_ctx();
public Gfs_nde Parse(byte[] src) {
ctx.Root().Subs_clear();
int src_len = src.length; if (src_len == 0) return ctx.Root();
@@ -27,13 +27,13 @@ public class Gfs_parser {
int pos = 0;
while (pos < src_len) {
byte b = src[pos];
Object o = trie.Match_bgn_w_byte(b, src, pos, src_len);
Object o = trie.Match_at_w_b0(ctx.Trie_rv(), b, src, pos, src_len);
if (o == null)
ctx.Err_mgr().Fail_unknown_char(ctx, pos, b);
else {
Gfs_lxr lxr = (Gfs_lxr)o;
while (lxr != null) {
int rslt = lxr.Process(ctx, pos, trie.Match_pos());
int rslt = lxr.Process(ctx, pos, ctx.Trie_rv().Pos());
switch (lxr.Lxr_tid()) {
case Gfs_lxr_.Tid_whitespace: break;
case Gfs_lxr_.Tid_comment: break;

View File

@@ -19,6 +19,7 @@ package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
import gplx.core.btries.*;
class Gfs_parser_ctx {
public Btrie_fast_mgr Trie() {return trie;} Btrie_fast_mgr trie;
public Btrie_rv Trie_rv() {return trie_rv;} private final Btrie_rv trie_rv = new Btrie_rv();
public Gfs_nde Root() {return root;} Gfs_nde root = new Gfs_nde();
public byte[] Src() {return src;} private byte[] src;
public int Src_len() {return src_len;} private int src_len;

View File

@@ -50,6 +50,7 @@ public class Gfh_atr_ {
, Bry__align = Bry_.new_a7("align") // HTML.v4
, Bry__bgcolor = Bry_.new_a7("bgcolor") // HTML.v4
, Bry__abbr = Bry_.new_a7("abbr") // HTML.ua
, Bry__srcset = Bry_.new_a7("srcset")
;
public static byte[] Make(Bry_bfr bfr, byte[] key, byte[] val) {
return bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote().Add(val).Add_byte_quote().To_bry_and_clear();
@@ -66,4 +67,9 @@ public class Gfh_atr_ {
bfr.Add_int_variable(val);
bfr.Add_byte_quote();
}
public static void Add_double(Bry_bfr bfr, byte[] key, double val) {
bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote();
bfr.Add_double(val);
bfr.Add_byte_quote();
}
}

View File

@@ -65,6 +65,14 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
, Id__del = 41
, Id__strike = 42
, Id__tt = 43
, Id__code = 44
, Id__wbr = 45
, Id__center = 46 // en.v:Vandalism_in_progress
, Id__dfn = 47
, Id__kbd = 48
, Id__samp = 49
, Id__ins = 50
, Id__em = 51
;
public static final byte[]
Bry__a = Bry_.new_a7("a")
@@ -122,6 +130,14 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
.Add_str_int("del" , Id__del)
.Add_str_int("strike" , Id__strike)
.Add_str_int("tt" , Id__tt)
.Add_str_int("code" , Id__code)
.Add_str_int("wbr" , Id__wbr)
.Add_str_int("center" , Id__center)
.Add_str_int("dfn" , Id__dfn)
.Add_str_int("kbd" , Id__kbd)
.Add_str_int("samp" , Id__samp)
.Add_str_int("ins" , Id__ins)
.Add_str_int("em" , Id__em)
;
public static String To_str(int tid) {
switch (tid) {
@@ -172,6 +188,14 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
case Id__del: return "del";
case Id__strike: return "strike";
case Id__tt: return "tt";
case Id__code: return "code";
case Id__wbr: return "wbr";
case Id__center: return "center";
case Id__dfn: return "dfn";
case Id__kbd: return "kbd";
case Id__samp: return "samp";
case Id__ins: return "ins";
case Id__em: return "em";
default: throw Err_.new_unhandled(tid);
}
}
@@ -184,8 +208,7 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
, I_lhs = Bry_.new_a7("<i>") , I_rhs = Bry_.new_a7("</i>")
, P_lhs = Bry_.new_a7("<p>") , P_rhs = Bry_.new_a7("</p>")
, Pre_lhs = Bry_.new_a7("<pre>") , Pre_rhs = Bry_.new_a7("</pre>")
, Div_lhs = Bry_.new_a7("<div>") , Div_rhs = Bry_.new_a7("</div>")
, Div_lhs_bgn = Bry_.new_a7("<div")
, Div_lhs = Bry_.new_a7("<div>") , Div_rhs = Bry_.new_a7("</div>") , Div_lhs_bgn = Bry_.new_a7("<div")
, Html_rhs = Bry_.new_a7("</html>")
, Head_lhs_bgn = Bry_.new_a7("<head") , Head_rhs = Bry_.new_a7("</head>")
, Style_lhs_w_type = Bry_.new_a7("<style type=\"text/css\">")
@@ -195,8 +218,16 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
, Span_lhs = Bry_.new_a7("<span") , Span_rhs = Bry_.new_a7("</span>")
, Strong_lhs = Bry_.new_a7("<strong>") , Strong_rhs = Bry_.new_a7("</strong>")
, Ul_lhs = Bry_.new_a7("<ul>") , Ul_rhs = Bry_.new_a7("</ul>")
, Li_lhs = Bry_.new_a7("<li>") , Li_rhs = Bry_.new_a7("</li>")
, Li_lhs_bgn = Bry_.new_a7("<li")
, Ol_lhs = Bry_.new_a7("<ol>") , Ol_rhs = Bry_.new_a7("</ol>")
, Dt_lhs = Bry_.new_a7("<dt>") , Dt_rhs = Bry_.new_a7("</dt>")
, Dd_lhs = Bry_.new_a7("<dd>") , Dd_rhs = Bry_.new_a7("</dd>")
, Dl_lhs = Bry_.new_a7("<dl>") , Dl_rhs = Bry_.new_a7("</dl>")
, Li_lhs = Bry_.new_a7("<li>") , Li_rhs = Bry_.new_a7("</li>") , Li_lhs_bgn = Bry_.new_a7("<li")
, Table_lhs = Bry_.new_a7("<table>") , Table_rhs = Bry_.new_a7("</table>") , Table_lhs_bgn = Bry_.new_a7("<table")
, Tr_lhs = Bry_.new_a7("<tr>") , Tr_rhs = Bry_.new_a7("</tr>") , Tr_lhs_bgn = Bry_.new_a7("<tr")
, Td_lhs = Bry_.new_a7("<td>") , Td_rhs = Bry_.new_a7("</td>") , Td_lhs_bgn = Bry_.new_a7("<td")
, Th_lhs = Bry_.new_a7("<th>") , Th_rhs = Bry_.new_a7("</th>") , Th_lhs_bgn = Bry_.new_a7("<th")
, Caption_lhs = Bry_.new_a7("<caption>") , Caption_rhs = Bry_.new_a7("</caption>") , Caption_lhs_bgn = Bry_.new_a7("<caption")
;
public static final String
Comm_bgn_str = "<!--"
@@ -210,6 +241,8 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
Comm_bgn_len = Comm_bgn.length
, Comm_end_len = Comm_end.length
;
public static void Lhs_end_nde(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Gt);}
public static void Lhs_end_inl(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Slash).Add_byte(Byte_ascii.Gt);}
private static final byte[] Rhs_bgn = Bry_.new_a7("</");
public static void Bld_lhs_end_nde(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Gt);}
public static void Bld_lhs_end_inl(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Slash).Add_byte(Byte_ascii.Gt);}
public static void Bld_rhs(Bry_bfr bfr, byte[] name) {bfr.Add(Rhs_bgn).Add(name).Add_byte(Byte_ascii.Angle_end);} // EX:"</tag_name>"
}

View File

@@ -119,9 +119,10 @@ public class Gfh_utl {
if (bry == null) return null;
boolean dirty = write_to_bfr ? true : false; // if write_to_bfr, then mark true, else bfr.Add_mid(bry, 0, i); will write whole bry again
int pos = bgn;
Btrie_rv trv = new Btrie_rv();
while (pos < end) {
byte b = bry[pos];
Object o = unescape_trie.Match_bgn_w_byte(b, bry, pos, end);
Object o = unescape_trie.Match_at_w_b0(trv, b, bry, pos, end);
if (o == null) {
if (dirty || write_to_bfr)
bfr.Add_byte(b);
@@ -149,7 +150,7 @@ public class Gfh_utl {
if (dirty || write_to_bfr)
bfr.Add_byte(b);
}
pos = unescape_trie.Match_pos();
pos = trv.Pos();
}
}
if (write_to_bfr)

View File

@@ -18,8 +18,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.core.btries.*;
public class Gfh_doc_parser {
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private final Gfh_txt_wkr txt_wkr;
private final Btrie_rv trv = new Btrie_rv();
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private final Gfh_txt_wkr txt_wkr;
public Gfh_doc_parser(Gfh_txt_wkr txt_wkr, Gfh_doc_wkr... wkr_ary) {
this.txt_wkr = txt_wkr;
for (Gfh_doc_wkr wkr : wkr_ary)
@@ -29,7 +30,7 @@ public class Gfh_doc_parser {
int txt_bgn = -1;
int pos = src_bgn;
while (pos < src_end) {
Object o = trie.Match_bgn(src, pos, src_end);
Object o = trie.Match_at(trv, src, pos, src_end);
if (o == null) { // not a known hook; add to txt
if (txt_bgn == -1) txt_bgn = pos;
++pos;
@@ -44,7 +45,7 @@ public class Gfh_doc_parser {
catch (Exception e) {
Gfh_utl.Log(e, "html parse failed", page_url, src, pos);
txt_bgn = pos; // set txt_bgn to hook_bgn which is "pos"; i.e.: txt resumes from start of failed hook
pos = trie.Match_pos(); // set pos to hook_end
pos = trv.Pos(); // set pos to hook_end
}
}
}

View File

@@ -104,6 +104,11 @@ public class Gfh_tag implements Mwh_atr_wkr {
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key); if (rv == null) return or;
return Bry_.To_int_or(src, rv.Val_bgn(), rv.Val_end(), or);
}
public double Atrs__get_as_double_or(byte[] key, double or) {
if (atrs_null) Atrs__make();
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key); if (rv == null) return or;
return Bry_.To_double_or(src, rv.Val_bgn(), rv.Val_end(), or);
}
public Gfh_atr Atrs__get_at(int i) {return (Gfh_atr)atrs_hash.Get_at(i);}
public Gfh_atr Atrs__get_by_or_fail(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.Y);}
public Gfh_atr Atrs__get_by_or_empty(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.N);}

View File

@@ -19,7 +19,7 @@ package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gp
import gplx.core.btries.*; import gplx.xowa.parsers.amps.*;
import gplx.langs.htmls.*;
public class Gfo_url_encoder implements Url_encoder_interface {
private Gfo_url_encoder_itm[] encode_ary, decode_ary; private Gfo_url_encoder anchor_encoder = null;
private final Gfo_url_encoder_itm[] encode_ary, decode_ary; private final Gfo_url_encoder anchor_encoder;
public Gfo_url_encoder(Gfo_url_encoder_itm[] encode_ary, Gfo_url_encoder_itm[] decode_ary, Gfo_url_encoder anchor_encoder) {
this.encode_ary = encode_ary; this.decode_ary = decode_ary; this.anchor_encoder = anchor_encoder;
}
@@ -50,18 +50,16 @@ public class Gfo_url_encoder implements Url_encoder_interface {
public byte[] Decode(byte[] bry, int bgn, int end) {return Decode(Bool_.N, bry, bgn, end);}
private byte[] Decode(boolean fail, byte[] bry, int bgn, int end) {Bry_bfr bfr = Bry_bfr_.Get(); Decode(bfr, fail, bry, bgn, end); return bfr.To_bry_and_rls();}
public Bry_bfr Decode(Bry_bfr bfr, boolean fail, byte[] bry, int bgn, int end) {
// synchronized (this) { // LOCK:DELETE; DATE:2016-07-06
for (int i = bgn; i < end; ++i) {
byte b = bry[i];
if (anchor_encoder != null && b == Byte_ascii.Hash) {
bfr.Add_byte(Byte_ascii.Hash);
anchor_encoder.Decode(bfr, Bool_.N, bry, i + 1, end);
break;
}
Gfo_url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
i += itm.Decode(bfr, bry, end, i, b, fail);
for (int i = bgn; i < end; ++i) {
byte b = bry[i];
if (anchor_encoder != null && b == Byte_ascii.Hash) {
bfr.Add_byte(Byte_ascii.Hash);
anchor_encoder.Decode(bfr, Bool_.N, bry, i + 1, end);
break;
}
return bfr;
// }
Gfo_url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
i += itm.Decode(bfr, bry, end, i, b, fail);
}
return bfr;
}
}

View File

@@ -25,6 +25,7 @@ public class Gfo_url_encoder_ {
, Href = Gfo_url_encoder_.New__html_href_mw(Bool_.Y).Make()
, Href_wo_anchor = Gfo_url_encoder_.New__html_href_mw(Bool_.N).Make()
, Href_quotes = Gfo_url_encoder_.New__html_href_quotes().Make()
, Href_quotes_v2 = Gfo_url_encoder_.New__html_href_quotes_v2().Make()
, Href_qarg = Gfo_url_encoder_.New__html_href_qarg().Make()
, Xourl = Gfo_url_encoder_.New__html_href_mw(Bool_.Y).Init__same__many(Byte_ascii.Underline).Make()
, Http_url = Gfo_url_encoder_.New__http_url().Make()
@@ -39,7 +40,7 @@ public class Gfo_url_encoder_ {
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
.Init__html_ent(Byte_ascii.Amp, Xop_amp_trie.Instance);
}
private static Gfo_url_encoder_mkr New__html_href_mw(boolean use_anchor_encoder) { // EX: "<a href='^#^'>" -> "<a href='%5E#.5E'>"; REF.MW: ";:@$!*(),/"
public static Gfo_url_encoder_mkr New__html_href_mw(boolean use_anchor_encoder) { // EX: "<a href='^#^'>" -> "<a href='%5E#.5E'>"; REF.MW: ";:@$!*(),/"
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
.Init__same__many
@@ -61,6 +62,16 @@ public class Gfo_url_encoder_ {
, Byte_ascii.Question, Byte_ascii.Eq, Byte_ascii.Hash, Byte_ascii.Plus// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
);
}
private static Gfo_url_encoder_mkr New__html_href_quotes_v2() {// same as href encoder, but do not encode ?, =, #, +; also, don't encode "%" vals
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
.Init__same__many
( Byte_ascii.Semic, Byte_ascii.Colon, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash
, Byte_ascii.Question, Byte_ascii.Eq, Byte_ascii.Hash, Byte_ascii.Plus// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
, Byte_ascii.Percent // DATE:2016-07-12
);
}
public static Gfo_url_encoder_mkr New__http_url() {
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.N)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);

View File

@@ -24,15 +24,17 @@ public interface Gfo_url_encoder_itm {
class Gfo_url_encoder_itm_same implements Gfo_url_encoder_itm {
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(b); return 0;}
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(b); return 0;}
public static final Gfo_url_encoder_itm Instance = new Gfo_url_encoder_itm_same();
public static final Gfo_url_encoder_itm Instance = new Gfo_url_encoder_itm_same();
}
class Gfo_url_encoder_itm_diff implements Gfo_url_encoder_itm {
public Gfo_url_encoder_itm_diff(byte orig, byte repl) {this.orig = orig; this.repl = repl;} private byte orig, repl;
private final byte orig, repl;
public Gfo_url_encoder_itm_diff(byte orig, byte repl) {this.orig = orig; this.repl = repl;}
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(repl); return 0;}
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(orig); return 0;}
}
class Gfo_url_encoder_itm_hex implements Gfo_url_encoder_itm {
public Gfo_url_encoder_itm_hex(byte encode_marker) {this.encode_marker = encode_marker;} private byte encode_marker;
private final byte encode_marker;
public Gfo_url_encoder_itm_hex(byte encode_marker) {this.encode_marker = encode_marker;}
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {Encode_byte(b, bfr, encode_marker); return 0;}
public static void Encode_byte(byte b, Bry_bfr bfr, byte encode_marker) {
int b_int = b & 0xFF;// PATCH.JAVA:need to convert to unsigned byte
@@ -68,13 +70,14 @@ class Gfo_url_encoder_itm_hex implements Gfo_url_encoder_itm {
return 0;
}
}
public static final byte[] HexBytes = new byte[]
public static final byte[] HexBytes = new byte[]
{ Byte_ascii.Num_0, Byte_ascii.Num_1, Byte_ascii.Num_2, Byte_ascii.Num_3, Byte_ascii.Num_4, Byte_ascii.Num_5, Byte_ascii.Num_6, Byte_ascii.Num_7
, Byte_ascii.Num_8, Byte_ascii.Num_9, Byte_ascii.Ltr_A, Byte_ascii.Ltr_B, Byte_ascii.Ltr_C, Byte_ascii.Ltr_D, Byte_ascii.Ltr_E, Byte_ascii.Ltr_F
};
}
class Gfo_url_encoder_itm_html_ent implements Gfo_url_encoder_itm {
public Gfo_url_encoder_itm_html_ent(Btrie_slim_mgr amp_trie) {this.amp_trie = amp_trie;} Btrie_slim_mgr amp_trie;
private final Btrie_slim_mgr amp_trie;
public Gfo_url_encoder_itm_html_ent(Btrie_slim_mgr amp_trie) {this.amp_trie = amp_trie;}
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {
++idx; // b is &; get next character afterwards
if (idx == end) { // & is last char; return

View File

@@ -20,7 +20,8 @@ import gplx.core.btries.*; import gplx.core.log_msgs.*;
public class Php_parser {
Php_lxr[] lxrs; int lxrs_len;
int txt_bgn; Php_tkn_txt txt_tkn;
private Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci:PHP tkns are ASCII
private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci:PHP tkns are ASCII
private final Btrie_rv trv = new Btrie_rv();
byte[] src; int src_len; Php_tkn_wkr tkn_wkr; Php_tkn_factory tkn_factory = new Php_tkn_factory(); Php_ctx ctx = new Php_ctx();
Php_parser_interrupt[] parser_interrupts = new Php_parser_interrupt[256];
public Php_parser() {
@@ -71,7 +72,7 @@ public class Php_parser {
txt_tkn = null; txt_bgn = 0;
boolean loop_raw = true, loop_txt = true;
while (loop_raw) {
Object o = trie.Match_bgn_w_byte(b, src, pos, src_len);
Object o = trie.Match_at_w_b0(trv, b, src, pos, src_len);
if (o == null) { // char does not hook into a lxr
loop_txt = true;
while (loop_txt) { // keep looping until end of String or parser_interrupt
@@ -90,7 +91,7 @@ public class Php_parser {
if (txt_bgn != pos) // txt_bgn is set; make text tkn
Make_txt(txt_bgn, pos);
Php_lxr lxr = (Php_lxr)o;
int match_pos = trie.Match_pos();
int match_pos = trv.Pos();
int make_pos = lxr.Lxr_make(ctx, pos, match_pos);
if (make_pos == Php_parser.NotFound) {
Make_txt(txt_bgn, pos);