mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
'v3.7.3.1'
This commit is contained in:
@@ -27,7 +27,7 @@ class Gfs_lxr_whitespace implements Gfs_lxr {
|
||||
int rv = Gfs_lxr_.Rv_eos, cur_pos;
|
||||
for (cur_pos = end; cur_pos < src_len; cur_pos++) {
|
||||
byte b = src[cur_pos];
|
||||
Object o = ctx.Trie().Match_bgn_w_byte(b, src, cur_pos, src_len);
|
||||
Object o = ctx.Trie().Match_at_w_b0(ctx.Trie_rv(), b, src, cur_pos, src_len);
|
||||
if (o == null) {
|
||||
rv = Gfs_lxr_.Rv_null;
|
||||
ctx.Process_null(cur_pos);
|
||||
@@ -45,7 +45,7 @@ class Gfs_lxr_whitespace implements Gfs_lxr {
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static final Gfs_lxr_whitespace Instance = new Gfs_lxr_whitespace(); Gfs_lxr_whitespace() {}
|
||||
public static final Gfs_lxr_whitespace Instance = new Gfs_lxr_whitespace(); Gfs_lxr_whitespace() {}
|
||||
}
|
||||
class Gfs_lxr_comment_flat implements Gfs_lxr {
|
||||
public Gfs_lxr_comment_flat(byte[] bgn_bry, byte[] end_bry) {
|
||||
@@ -69,7 +69,7 @@ class Gfs_lxr_identifier implements Gfs_lxr {
|
||||
int pos, rv = Gfs_lxr_.Rv_eos;
|
||||
for (pos = end; pos < src_len; pos++) {
|
||||
byte b = src[pos];
|
||||
Object o = ctx.Trie().Match_bgn_w_byte(b, src, pos, src_len);
|
||||
Object o = ctx.Trie().Match_at_w_b0(ctx.Trie_rv(), b, src, pos, src_len);
|
||||
if (o == null) { // invalid char; stop;
|
||||
rv = Gfs_lxr_.Rv_null;
|
||||
ctx.Process_null(pos);
|
||||
@@ -89,7 +89,7 @@ class Gfs_lxr_identifier implements Gfs_lxr {
|
||||
if (rv == Gfs_lxr_.Rv_eos) ctx.Process_eos(); // eos
|
||||
return rv;
|
||||
}
|
||||
public static final Gfs_lxr_identifier Instance = new Gfs_lxr_identifier(); Gfs_lxr_identifier() {}
|
||||
public static final Gfs_lxr_identifier Instance = new Gfs_lxr_identifier(); Gfs_lxr_identifier() {}
|
||||
}
|
||||
class Gfs_lxr_semic implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_semic;}
|
||||
@@ -103,7 +103,7 @@ class Gfs_lxr_semic implements Gfs_lxr {
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_semic Instance = new Gfs_lxr_semic(); Gfs_lxr_semic() {}
|
||||
public static final Gfs_lxr_semic Instance = new Gfs_lxr_semic(); Gfs_lxr_semic() {}
|
||||
}
|
||||
class Gfs_lxr_dot implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_dot;}
|
||||
@@ -115,7 +115,7 @@ class Gfs_lxr_dot implements Gfs_lxr {
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_dot Instance = new Gfs_lxr_dot(); Gfs_lxr_dot() {}
|
||||
public static final Gfs_lxr_dot Instance = new Gfs_lxr_dot(); Gfs_lxr_dot() {}
|
||||
}
|
||||
class Gfs_lxr_paren_bgn implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_paren_bgn;}
|
||||
@@ -126,7 +126,7 @@ class Gfs_lxr_paren_bgn implements Gfs_lxr {
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_paren_bgn Instance = new Gfs_lxr_paren_bgn(); Gfs_lxr_paren_bgn() {}
|
||||
public static final Gfs_lxr_paren_bgn Instance = new Gfs_lxr_paren_bgn(); Gfs_lxr_paren_bgn() {}
|
||||
}
|
||||
class Gfs_lxr_paren_end implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_paren_end;}
|
||||
@@ -139,7 +139,7 @@ class Gfs_lxr_paren_end implements Gfs_lxr {
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_paren_end Instance = new Gfs_lxr_paren_end(); Gfs_lxr_paren_end() {}
|
||||
public static final Gfs_lxr_paren_end Instance = new Gfs_lxr_paren_end(); Gfs_lxr_paren_end() {}
|
||||
}
|
||||
class Gfs_lxr_quote implements Gfs_lxr {
|
||||
public Gfs_lxr_quote(byte[] bgn_bry, byte[] end_bry) {
|
||||
@@ -184,7 +184,7 @@ class Gfs_lxr_curly_bgn implements Gfs_lxr {
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_curly_bgn Instance = new Gfs_lxr_curly_bgn(); Gfs_lxr_curly_bgn() {}
|
||||
public static final Gfs_lxr_curly_bgn Instance = new Gfs_lxr_curly_bgn(); Gfs_lxr_curly_bgn() {}
|
||||
}
|
||||
class Gfs_lxr_curly_end implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_curly_end;}
|
||||
@@ -192,7 +192,7 @@ class Gfs_lxr_curly_end implements Gfs_lxr {
|
||||
ctx.Stack_pop(bgn);
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_curly_end Instance = new Gfs_lxr_curly_end(); Gfs_lxr_curly_end() {}
|
||||
public static final Gfs_lxr_curly_end Instance = new Gfs_lxr_curly_end(); Gfs_lxr_curly_end() {}
|
||||
}
|
||||
class Gfs_lxr_equal implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_eq;}
|
||||
@@ -200,7 +200,7 @@ class Gfs_lxr_equal implements Gfs_lxr {
|
||||
ctx.Make_nde(bgn, end).Op_tid_(Gfs_nde.Op_tid_assign);
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_equal Instance = new Gfs_lxr_equal(); Gfs_lxr_equal() {}
|
||||
public static final Gfs_lxr_equal Instance = new Gfs_lxr_equal(); Gfs_lxr_equal() {}
|
||||
}
|
||||
class Gfs_lxr_comma implements Gfs_lxr {
|
||||
public int Lxr_tid() {return Gfs_lxr_.Tid_comma;}
|
||||
@@ -210,5 +210,5 @@ class Gfs_lxr_comma implements Gfs_lxr {
|
||||
}
|
||||
return end;
|
||||
}
|
||||
public static final Gfs_lxr_comma Instance = new Gfs_lxr_comma(); Gfs_lxr_comma() {}
|
||||
public static final Gfs_lxr_comma Instance = new Gfs_lxr_comma(); Gfs_lxr_comma() {}
|
||||
}
|
||||
|
||||
@@ -18,8 +18,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Gfs_parser {
|
||||
Btrie_fast_mgr trie = Gfs_parser_.trie_();
|
||||
Gfs_parser_ctx ctx = new Gfs_parser_ctx();
|
||||
private final Btrie_fast_mgr trie = Gfs_parser_.trie_();
|
||||
private final Gfs_parser_ctx ctx = new Gfs_parser_ctx();
|
||||
public Gfs_nde Parse(byte[] src) {
|
||||
ctx.Root().Subs_clear();
|
||||
int src_len = src.length; if (src_len == 0) return ctx.Root();
|
||||
@@ -27,13 +27,13 @@ public class Gfs_parser {
|
||||
int pos = 0;
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
Object o = trie.Match_bgn_w_byte(b, src, pos, src_len);
|
||||
Object o = trie.Match_at_w_b0(ctx.Trie_rv(), b, src, pos, src_len);
|
||||
if (o == null)
|
||||
ctx.Err_mgr().Fail_unknown_char(ctx, pos, b);
|
||||
else {
|
||||
Gfs_lxr lxr = (Gfs_lxr)o;
|
||||
while (lxr != null) {
|
||||
int rslt = lxr.Process(ctx, pos, trie.Match_pos());
|
||||
int rslt = lxr.Process(ctx, pos, ctx.Trie_rv().Pos());
|
||||
switch (lxr.Lxr_tid()) {
|
||||
case Gfs_lxr_.Tid_whitespace: break;
|
||||
case Gfs_lxr_.Tid_comment: break;
|
||||
|
||||
@@ -19,6 +19,7 @@ package gplx.langs.gfs; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.btries.*;
|
||||
class Gfs_parser_ctx {
|
||||
public Btrie_fast_mgr Trie() {return trie;} Btrie_fast_mgr trie;
|
||||
public Btrie_rv Trie_rv() {return trie_rv;} private final Btrie_rv trie_rv = new Btrie_rv();
|
||||
public Gfs_nde Root() {return root;} Gfs_nde root = new Gfs_nde();
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Src_len() {return src_len;} private int src_len;
|
||||
|
||||
@@ -50,6 +50,7 @@ public class Gfh_atr_ {
|
||||
, Bry__align = Bry_.new_a7("align") // HTML.v4
|
||||
, Bry__bgcolor = Bry_.new_a7("bgcolor") // HTML.v4
|
||||
, Bry__abbr = Bry_.new_a7("abbr") // HTML.ua
|
||||
, Bry__srcset = Bry_.new_a7("srcset")
|
||||
;
|
||||
public static byte[] Make(Bry_bfr bfr, byte[] key, byte[] val) {
|
||||
return bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote().Add(val).Add_byte_quote().To_bry_and_clear();
|
||||
@@ -66,4 +67,9 @@ public class Gfh_atr_ {
|
||||
bfr.Add_int_variable(val);
|
||||
bfr.Add_byte_quote();
|
||||
}
|
||||
public static void Add_double(Bry_bfr bfr, byte[] key, double val) {
|
||||
bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote();
|
||||
bfr.Add_double(val);
|
||||
bfr.Add_byte_quote();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,6 +65,14 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
, Id__del = 41
|
||||
, Id__strike = 42
|
||||
, Id__tt = 43
|
||||
, Id__code = 44
|
||||
, Id__wbr = 45
|
||||
, Id__center = 46 // en.v:Vandalism_in_progress
|
||||
, Id__dfn = 47
|
||||
, Id__kbd = 48
|
||||
, Id__samp = 49
|
||||
, Id__ins = 50
|
||||
, Id__em = 51
|
||||
;
|
||||
public static final byte[]
|
||||
Bry__a = Bry_.new_a7("a")
|
||||
@@ -122,6 +130,14 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
.Add_str_int("del" , Id__del)
|
||||
.Add_str_int("strike" , Id__strike)
|
||||
.Add_str_int("tt" , Id__tt)
|
||||
.Add_str_int("code" , Id__code)
|
||||
.Add_str_int("wbr" , Id__wbr)
|
||||
.Add_str_int("center" , Id__center)
|
||||
.Add_str_int("dfn" , Id__dfn)
|
||||
.Add_str_int("kbd" , Id__kbd)
|
||||
.Add_str_int("samp" , Id__samp)
|
||||
.Add_str_int("ins" , Id__ins)
|
||||
.Add_str_int("em" , Id__em)
|
||||
;
|
||||
public static String To_str(int tid) {
|
||||
switch (tid) {
|
||||
@@ -172,6 +188,14 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
case Id__del: return "del";
|
||||
case Id__strike: return "strike";
|
||||
case Id__tt: return "tt";
|
||||
case Id__code: return "code";
|
||||
case Id__wbr: return "wbr";
|
||||
case Id__center: return "center";
|
||||
case Id__dfn: return "dfn";
|
||||
case Id__kbd: return "kbd";
|
||||
case Id__samp: return "samp";
|
||||
case Id__ins: return "ins";
|
||||
case Id__em: return "em";
|
||||
default: throw Err_.new_unhandled(tid);
|
||||
}
|
||||
}
|
||||
@@ -184,8 +208,7 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
, I_lhs = Bry_.new_a7("<i>") , I_rhs = Bry_.new_a7("</i>")
|
||||
, P_lhs = Bry_.new_a7("<p>") , P_rhs = Bry_.new_a7("</p>")
|
||||
, Pre_lhs = Bry_.new_a7("<pre>") , Pre_rhs = Bry_.new_a7("</pre>")
|
||||
, Div_lhs = Bry_.new_a7("<div>") , Div_rhs = Bry_.new_a7("</div>")
|
||||
, Div_lhs_bgn = Bry_.new_a7("<div")
|
||||
, Div_lhs = Bry_.new_a7("<div>") , Div_rhs = Bry_.new_a7("</div>") , Div_lhs_bgn = Bry_.new_a7("<div")
|
||||
, Html_rhs = Bry_.new_a7("</html>")
|
||||
, Head_lhs_bgn = Bry_.new_a7("<head") , Head_rhs = Bry_.new_a7("</head>")
|
||||
, Style_lhs_w_type = Bry_.new_a7("<style type=\"text/css\">")
|
||||
@@ -195,8 +218,16 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
, Span_lhs = Bry_.new_a7("<span") , Span_rhs = Bry_.new_a7("</span>")
|
||||
, Strong_lhs = Bry_.new_a7("<strong>") , Strong_rhs = Bry_.new_a7("</strong>")
|
||||
, Ul_lhs = Bry_.new_a7("<ul>") , Ul_rhs = Bry_.new_a7("</ul>")
|
||||
, Li_lhs = Bry_.new_a7("<li>") , Li_rhs = Bry_.new_a7("</li>")
|
||||
, Li_lhs_bgn = Bry_.new_a7("<li")
|
||||
, Ol_lhs = Bry_.new_a7("<ol>") , Ol_rhs = Bry_.new_a7("</ol>")
|
||||
, Dt_lhs = Bry_.new_a7("<dt>") , Dt_rhs = Bry_.new_a7("</dt>")
|
||||
, Dd_lhs = Bry_.new_a7("<dd>") , Dd_rhs = Bry_.new_a7("</dd>")
|
||||
, Dl_lhs = Bry_.new_a7("<dl>") , Dl_rhs = Bry_.new_a7("</dl>")
|
||||
, Li_lhs = Bry_.new_a7("<li>") , Li_rhs = Bry_.new_a7("</li>") , Li_lhs_bgn = Bry_.new_a7("<li")
|
||||
, Table_lhs = Bry_.new_a7("<table>") , Table_rhs = Bry_.new_a7("</table>") , Table_lhs_bgn = Bry_.new_a7("<table")
|
||||
, Tr_lhs = Bry_.new_a7("<tr>") , Tr_rhs = Bry_.new_a7("</tr>") , Tr_lhs_bgn = Bry_.new_a7("<tr")
|
||||
, Td_lhs = Bry_.new_a7("<td>") , Td_rhs = Bry_.new_a7("</td>") , Td_lhs_bgn = Bry_.new_a7("<td")
|
||||
, Th_lhs = Bry_.new_a7("<th>") , Th_rhs = Bry_.new_a7("</th>") , Th_lhs_bgn = Bry_.new_a7("<th")
|
||||
, Caption_lhs = Bry_.new_a7("<caption>") , Caption_rhs = Bry_.new_a7("</caption>") , Caption_lhs_bgn = Bry_.new_a7("<caption")
|
||||
;
|
||||
public static final String
|
||||
Comm_bgn_str = "<!--"
|
||||
@@ -210,6 +241,8 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
Comm_bgn_len = Comm_bgn.length
|
||||
, Comm_end_len = Comm_end.length
|
||||
;
|
||||
public static void Lhs_end_nde(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Gt);}
|
||||
public static void Lhs_end_inl(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Slash).Add_byte(Byte_ascii.Gt);}
|
||||
private static final byte[] Rhs_bgn = Bry_.new_a7("</");
|
||||
public static void Bld_lhs_end_nde(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Gt);}
|
||||
public static void Bld_lhs_end_inl(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Slash).Add_byte(Byte_ascii.Gt);}
|
||||
public static void Bld_rhs(Bry_bfr bfr, byte[] name) {bfr.Add(Rhs_bgn).Add(name).Add_byte(Byte_ascii.Angle_end);} // EX:"</tag_name>"
|
||||
}
|
||||
|
||||
@@ -119,9 +119,10 @@ public class Gfh_utl {
|
||||
if (bry == null) return null;
|
||||
boolean dirty = write_to_bfr ? true : false; // if write_to_bfr, then mark true, else bfr.Add_mid(bry, 0, i); will write whole bry again
|
||||
int pos = bgn;
|
||||
Btrie_rv trv = new Btrie_rv();
|
||||
while (pos < end) {
|
||||
byte b = bry[pos];
|
||||
Object o = unescape_trie.Match_bgn_w_byte(b, bry, pos, end);
|
||||
Object o = unescape_trie.Match_at_w_b0(trv, b, bry, pos, end);
|
||||
if (o == null) {
|
||||
if (dirty || write_to_bfr)
|
||||
bfr.Add_byte(b);
|
||||
@@ -149,7 +150,7 @@ public class Gfh_utl {
|
||||
if (dirty || write_to_bfr)
|
||||
bfr.Add_byte(b);
|
||||
}
|
||||
pos = unescape_trie.Match_pos();
|
||||
pos = trv.Pos();
|
||||
}
|
||||
}
|
||||
if (write_to_bfr)
|
||||
|
||||
@@ -18,8 +18,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Gfh_doc_parser {
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private final Gfh_txt_wkr txt_wkr;
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private final Gfh_txt_wkr txt_wkr;
|
||||
public Gfh_doc_parser(Gfh_txt_wkr txt_wkr, Gfh_doc_wkr... wkr_ary) {
|
||||
this.txt_wkr = txt_wkr;
|
||||
for (Gfh_doc_wkr wkr : wkr_ary)
|
||||
@@ -29,7 +30,7 @@ public class Gfh_doc_parser {
|
||||
int txt_bgn = -1;
|
||||
int pos = src_bgn;
|
||||
while (pos < src_end) {
|
||||
Object o = trie.Match_bgn(src, pos, src_end);
|
||||
Object o = trie.Match_at(trv, src, pos, src_end);
|
||||
if (o == null) { // not a known hook; add to txt
|
||||
if (txt_bgn == -1) txt_bgn = pos;
|
||||
++pos;
|
||||
@@ -44,7 +45,7 @@ public class Gfh_doc_parser {
|
||||
catch (Exception e) {
|
||||
Gfh_utl.Log(e, "html parse failed", page_url, src, pos);
|
||||
txt_bgn = pos; // set txt_bgn to hook_bgn which is "pos"; i.e.: txt resumes from start of failed hook
|
||||
pos = trie.Match_pos(); // set pos to hook_end
|
||||
pos = trv.Pos(); // set pos to hook_end
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -104,6 +104,11 @@ public class Gfh_tag implements Mwh_atr_wkr {
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key); if (rv == null) return or;
|
||||
return Bry_.To_int_or(src, rv.Val_bgn(), rv.Val_end(), or);
|
||||
}
|
||||
public double Atrs__get_as_double_or(byte[] key, double or) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key); if (rv == null) return or;
|
||||
return Bry_.To_double_or(src, rv.Val_bgn(), rv.Val_end(), or);
|
||||
}
|
||||
public Gfh_atr Atrs__get_at(int i) {return (Gfh_atr)atrs_hash.Get_at(i);}
|
||||
public Gfh_atr Atrs__get_by_or_fail(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.Y);}
|
||||
public Gfh_atr Atrs__get_by_or_empty(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.N);}
|
||||
|
||||
@@ -19,7 +19,7 @@ package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gp
|
||||
import gplx.core.btries.*; import gplx.xowa.parsers.amps.*;
|
||||
import gplx.langs.htmls.*;
|
||||
public class Gfo_url_encoder implements Url_encoder_interface {
|
||||
private Gfo_url_encoder_itm[] encode_ary, decode_ary; private Gfo_url_encoder anchor_encoder = null;
|
||||
private final Gfo_url_encoder_itm[] encode_ary, decode_ary; private final Gfo_url_encoder anchor_encoder;
|
||||
public Gfo_url_encoder(Gfo_url_encoder_itm[] encode_ary, Gfo_url_encoder_itm[] decode_ary, Gfo_url_encoder anchor_encoder) {
|
||||
this.encode_ary = encode_ary; this.decode_ary = decode_ary; this.anchor_encoder = anchor_encoder;
|
||||
}
|
||||
@@ -50,18 +50,16 @@ public class Gfo_url_encoder implements Url_encoder_interface {
|
||||
public byte[] Decode(byte[] bry, int bgn, int end) {return Decode(Bool_.N, bry, bgn, end);}
|
||||
private byte[] Decode(boolean fail, byte[] bry, int bgn, int end) {Bry_bfr bfr = Bry_bfr_.Get(); Decode(bfr, fail, bry, bgn, end); return bfr.To_bry_and_rls();}
|
||||
public Bry_bfr Decode(Bry_bfr bfr, boolean fail, byte[] bry, int bgn, int end) {
|
||||
// synchronized (this) { // LOCK:DELETE; DATE:2016-07-06
|
||||
for (int i = bgn; i < end; ++i) {
|
||||
byte b = bry[i];
|
||||
if (anchor_encoder != null && b == Byte_ascii.Hash) {
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
anchor_encoder.Decode(bfr, Bool_.N, bry, i + 1, end);
|
||||
break;
|
||||
}
|
||||
Gfo_url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
|
||||
i += itm.Decode(bfr, bry, end, i, b, fail);
|
||||
for (int i = bgn; i < end; ++i) {
|
||||
byte b = bry[i];
|
||||
if (anchor_encoder != null && b == Byte_ascii.Hash) {
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
anchor_encoder.Decode(bfr, Bool_.N, bry, i + 1, end);
|
||||
break;
|
||||
}
|
||||
return bfr;
|
||||
// }
|
||||
Gfo_url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
|
||||
i += itm.Decode(bfr, bry, end, i, b, fail);
|
||||
}
|
||||
return bfr;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ public class Gfo_url_encoder_ {
|
||||
, Href = Gfo_url_encoder_.New__html_href_mw(Bool_.Y).Make()
|
||||
, Href_wo_anchor = Gfo_url_encoder_.New__html_href_mw(Bool_.N).Make()
|
||||
, Href_quotes = Gfo_url_encoder_.New__html_href_quotes().Make()
|
||||
, Href_quotes_v2 = Gfo_url_encoder_.New__html_href_quotes_v2().Make()
|
||||
, Href_qarg = Gfo_url_encoder_.New__html_href_qarg().Make()
|
||||
, Xourl = Gfo_url_encoder_.New__html_href_mw(Bool_.Y).Init__same__many(Byte_ascii.Underline).Make()
|
||||
, Http_url = Gfo_url_encoder_.New__http_url().Make()
|
||||
@@ -39,7 +40,7 @@ public class Gfo_url_encoder_ {
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
.Init__html_ent(Byte_ascii.Amp, Xop_amp_trie.Instance);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__html_href_mw(boolean use_anchor_encoder) { // EX: "<a href='^#^'>" -> "<a href='%5E#.5E'>"; REF.MW: ";:@$!*(),/"
|
||||
public static Gfo_url_encoder_mkr New__html_href_mw(boolean use_anchor_encoder) { // EX: "<a href='^#^'>" -> "<a href='%5E#.5E'>"; REF.MW: ";:@$!*(),/"
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
.Init__same__many
|
||||
@@ -61,6 +62,16 @@ public class Gfo_url_encoder_ {
|
||||
, Byte_ascii.Question, Byte_ascii.Eq, Byte_ascii.Hash, Byte_ascii.Plus// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
|
||||
);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__html_href_quotes_v2() {// same as href encoder, but do not encode ?, =, #, +; also, don't encode "%" vals
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
.Init__same__many
|
||||
( Byte_ascii.Semic, Byte_ascii.Colon, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
|
||||
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash
|
||||
, Byte_ascii.Question, Byte_ascii.Eq, Byte_ascii.Hash, Byte_ascii.Plus// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
|
||||
, Byte_ascii.Percent // DATE:2016-07-12
|
||||
);
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__http_url() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.N)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
|
||||
|
||||
@@ -24,15 +24,17 @@ public interface Gfo_url_encoder_itm {
|
||||
class Gfo_url_encoder_itm_same implements Gfo_url_encoder_itm {
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(b); return 0;}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(b); return 0;}
|
||||
public static final Gfo_url_encoder_itm Instance = new Gfo_url_encoder_itm_same();
|
||||
public static final Gfo_url_encoder_itm Instance = new Gfo_url_encoder_itm_same();
|
||||
}
|
||||
class Gfo_url_encoder_itm_diff implements Gfo_url_encoder_itm {
|
||||
public Gfo_url_encoder_itm_diff(byte orig, byte repl) {this.orig = orig; this.repl = repl;} private byte orig, repl;
|
||||
private final byte orig, repl;
|
||||
public Gfo_url_encoder_itm_diff(byte orig, byte repl) {this.orig = orig; this.repl = repl;}
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(repl); return 0;}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(orig); return 0;}
|
||||
}
|
||||
class Gfo_url_encoder_itm_hex implements Gfo_url_encoder_itm {
|
||||
public Gfo_url_encoder_itm_hex(byte encode_marker) {this.encode_marker = encode_marker;} private byte encode_marker;
|
||||
private final byte encode_marker;
|
||||
public Gfo_url_encoder_itm_hex(byte encode_marker) {this.encode_marker = encode_marker;}
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {Encode_byte(b, bfr, encode_marker); return 0;}
|
||||
public static void Encode_byte(byte b, Bry_bfr bfr, byte encode_marker) {
|
||||
int b_int = b & 0xFF;// PATCH.JAVA:need to convert to unsigned byte
|
||||
@@ -68,13 +70,14 @@ class Gfo_url_encoder_itm_hex implements Gfo_url_encoder_itm {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
public static final byte[] HexBytes = new byte[]
|
||||
public static final byte[] HexBytes = new byte[]
|
||||
{ Byte_ascii.Num_0, Byte_ascii.Num_1, Byte_ascii.Num_2, Byte_ascii.Num_3, Byte_ascii.Num_4, Byte_ascii.Num_5, Byte_ascii.Num_6, Byte_ascii.Num_7
|
||||
, Byte_ascii.Num_8, Byte_ascii.Num_9, Byte_ascii.Ltr_A, Byte_ascii.Ltr_B, Byte_ascii.Ltr_C, Byte_ascii.Ltr_D, Byte_ascii.Ltr_E, Byte_ascii.Ltr_F
|
||||
};
|
||||
}
|
||||
class Gfo_url_encoder_itm_html_ent implements Gfo_url_encoder_itm {
|
||||
public Gfo_url_encoder_itm_html_ent(Btrie_slim_mgr amp_trie) {this.amp_trie = amp_trie;} Btrie_slim_mgr amp_trie;
|
||||
private final Btrie_slim_mgr amp_trie;
|
||||
public Gfo_url_encoder_itm_html_ent(Btrie_slim_mgr amp_trie) {this.amp_trie = amp_trie;}
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {
|
||||
++idx; // b is &; get next character afterwards
|
||||
if (idx == end) { // & is last char; return
|
||||
|
||||
@@ -20,7 +20,8 @@ import gplx.core.btries.*; import gplx.core.log_msgs.*;
|
||||
public class Php_parser {
|
||||
Php_lxr[] lxrs; int lxrs_len;
|
||||
int txt_bgn; Php_tkn_txt txt_tkn;
|
||||
private Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci:PHP tkns are ASCII
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7(); // NOTE:ci:PHP tkns are ASCII
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
byte[] src; int src_len; Php_tkn_wkr tkn_wkr; Php_tkn_factory tkn_factory = new Php_tkn_factory(); Php_ctx ctx = new Php_ctx();
|
||||
Php_parser_interrupt[] parser_interrupts = new Php_parser_interrupt[256];
|
||||
public Php_parser() {
|
||||
@@ -71,7 +72,7 @@ public class Php_parser {
|
||||
txt_tkn = null; txt_bgn = 0;
|
||||
boolean loop_raw = true, loop_txt = true;
|
||||
while (loop_raw) {
|
||||
Object o = trie.Match_bgn_w_byte(b, src, pos, src_len);
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, pos, src_len);
|
||||
if (o == null) { // char does not hook into a lxr
|
||||
loop_txt = true;
|
||||
while (loop_txt) { // keep looping until end of String or parser_interrupt
|
||||
@@ -90,7 +91,7 @@ public class Php_parser {
|
||||
if (txt_bgn != pos) // txt_bgn is set; make text tkn
|
||||
Make_txt(txt_bgn, pos);
|
||||
Php_lxr lxr = (Php_lxr)o;
|
||||
int match_pos = trie.Match_pos();
|
||||
int match_pos = trv.Pos();
|
||||
int make_pos = lxr.Lxr_make(ctx, pos, match_pos);
|
||||
if (make_pos == Php_parser.NotFound) {
|
||||
Make_txt(txt_bgn, pos);
|
||||
|
||||
Reference in New Issue
Block a user