mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.12.1.1
This commit is contained in:
@@ -17,6 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.parsers.amps.*;
|
||||
import gplx.langs.htmls.*;
|
||||
public class Gfo_url_encoder implements Url_encoder_interface {
|
||||
private Gfo_url_encoder_itm[] encode_ary, decode_ary; private Gfo_url_encoder anchor_encoder = null;
|
||||
public Gfo_url_encoder(Gfo_url_encoder_itm[] encode_ary, Gfo_url_encoder_itm[] decode_ary, Gfo_url_encoder anchor_encoder) {
|
||||
|
||||
@@ -21,14 +21,15 @@ import gplx.xowa.parsers.amps.*;
|
||||
public class Gfo_url_encoder_ {
|
||||
public static final Gfo_url_encoder
|
||||
Id = Gfo_url_encoder_.New__html_id().Make()
|
||||
, Href = Gfo_url_encoder_.New__html_href_mw().Make()
|
||||
, Href = Gfo_url_encoder_.New__html_href_mw(Bool_.Y).Make()
|
||||
, Href_wo_anchor = Gfo_url_encoder_.New__html_href_mw(Bool_.N).Make()
|
||||
, Href_quotes = Gfo_url_encoder_.New__html_href_quotes().Make()
|
||||
, Href_qarg = Gfo_url_encoder_.New__html_href_qarg().Make()
|
||||
, Xourl = Gfo_url_encoder_.New__html_href_mw().Init__same__many(Byte_ascii.Underline).Make()
|
||||
, Xourl = Gfo_url_encoder_.New__html_href_mw(Bool_.Y).Init__same__many(Byte_ascii.Underline).Make()
|
||||
, Http_url = Gfo_url_encoder_.New__http_url().Make()
|
||||
, Http_url_ttl = Gfo_url_encoder_.New__http_url_ttl().Make()
|
||||
, Fsys = Gfo_url_encoder_.New__fsys_lnx().Make()
|
||||
, Fsys_safe = Gfo_url_encoder_.New__fsys_wnt().Make()
|
||||
, Fsys_lnx = Gfo_url_encoder_.New__fsys_lnx().Make()
|
||||
, Fsys_wnt = Gfo_url_encoder_.New__fsys_wnt().Make()
|
||||
, Gfs = Gfo_url_encoder_.New__gfs().Make()
|
||||
;
|
||||
private static Gfo_url_encoder_mkr New__html_id() { // EX: "<a id='a<>b'>" -> "<a id='a.C3.A9b'>"
|
||||
@@ -37,25 +38,28 @@ public class Gfo_url_encoder_ {
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
.Init__html_ent(Byte_ascii.Amp, Xop_amp_trie.Instance);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__html_href_mw() { // EX: "<a href='^#^'>" -> "<a href='%5E#.5E'>"
|
||||
private static Gfo_url_encoder_mkr New__html_href_mw(boolean use_anchor_encoder) { // EX: "<a href='^#^'>" -> "<a href='%5E#.5E'>"; REF.MW: ";:@$!*(),/"
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
.Init__same__many
|
||||
( Byte_ascii.Semic, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
|
||||
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash, Byte_ascii.Colon
|
||||
( Byte_ascii.Semic, Byte_ascii.Colon, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
|
||||
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash
|
||||
, Byte_ascii.Hash// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
|
||||
)
|
||||
.Init__anchor_encoder(New__html_id().Make());
|
||||
.Init__anchor_encoder(use_anchor_encoder ? New__html_id().Make() : null);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__html_href_qarg() { // same as regular href encoder, but also do not encode qarg characters "?" and "="
|
||||
return New__html_href_mw().Init__same__many(Byte_ascii.Question, Byte_ascii.Eq);
|
||||
return New__html_href_mw(Bool_.Y).Init__same__many(Byte_ascii.Question, Byte_ascii.Eq);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__html_href_quotes() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline) // convert " " to "_"
|
||||
.Init__same__rng(0, 255) // default everything to same;
|
||||
.Init__diff__many(Byte_ascii.Percent, Byte_ascii.Apos
|
||||
, Byte_ascii.Quote, Byte_ascii.Lt, Byte_ascii.Gt); // encode ', ", <, >
|
||||
private static Gfo_url_encoder_mkr New__html_href_quotes() {// same as href encoder, but do not encode ?, =, #, +; also, don't encode "%" vals
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
.Init__same__many
|
||||
( Byte_ascii.Semic, Byte_ascii.Colon, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
|
||||
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash
|
||||
, Byte_ascii.Question, Byte_ascii.Eq, Byte_ascii.Hash, Byte_ascii.Plus// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
|
||||
)
|
||||
;
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__http_url() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.N)
|
||||
|
||||
@@ -54,7 +54,7 @@ class Gfo_url_encoder_fxt {
|
||||
public Gfo_url_encoder_fxt Encoder_id() {encoder = Gfo_url_encoder_.Id; return this;}
|
||||
public Gfo_url_encoder_fxt Encoder_href() {encoder = Gfo_url_encoder_.Href; return this;}
|
||||
public Gfo_url_encoder_fxt Encoder_url() {encoder = Gfo_url_encoder_.Http_url; return this;}
|
||||
public Gfo_url_encoder_fxt Encoder_fsys_safe() {encoder = Gfo_url_encoder_.Fsys_safe; return this;}
|
||||
public Gfo_url_encoder_fxt Encoder_fsys_safe() {encoder = Gfo_url_encoder_.Fsys_wnt; return this;}
|
||||
public void Test__bicode(String raw, String encoded) {
|
||||
Test__encode(raw, encoded);
|
||||
Test__decode(encoded, raw);
|
||||
|
||||
@@ -26,7 +26,7 @@ public class Html_atr extends gplx.core.brys.Bfr_arg_base {
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public int Val_bgn() {return val_bgn;} private final int val_bgn;
|
||||
public int Val_end() {return val_end;} private final int val_end;
|
||||
public boolean Val_dat_exists() {return val_end > val_bgn;}
|
||||
public boolean Val_dat_exists() {return val_end != -1;}
|
||||
public boolean Val_dat_missing() {return val_end == -1;}
|
||||
public byte[] Val() {
|
||||
if (val == null)
|
||||
|
||||
@@ -39,7 +39,7 @@ public class Html_tag implements Mwh_atr_wkr {
|
||||
|| (name_id != Html_tag_.Id__eos && Int_.In(chk, Html_tag_.Id__any, Html_tag_.Id__comment))) {
|
||||
}
|
||||
else
|
||||
tag_rdr.Rdr().Fail("name_id chk failed", "expecting", Html_tag_.To_str(chk));
|
||||
tag_rdr.Err_wkr().Fail("name_id chk failed", "expecting", Html_tag_.To_str(chk));
|
||||
return this;
|
||||
}
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
@@ -48,6 +48,7 @@ public class Html_tag implements Mwh_atr_wkr {
|
||||
public boolean Src_exists() {return src_end > src_bgn;} // NOTE: only true if EOS where src_end == src_bgn == src_len
|
||||
public boolean Tag_is_tail() {return tag_is_tail;} private boolean tag_is_tail;
|
||||
public boolean Tag_is_inline() {return tag_is_inline;} private boolean tag_is_inline;
|
||||
public int Atrs__len() {if (atrs_null) Atrs__make(); return atrs_hash.Count();}
|
||||
public boolean Atrs__match_pair(byte[] key, byte[] val) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Html_atr rv = (Html_atr)atrs_hash.Get_by(key);
|
||||
@@ -61,8 +62,8 @@ public class Html_tag implements Mwh_atr_wkr {
|
||||
}
|
||||
public byte Atrs__cls_find_or_fail(Hash_adp_bry hash) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Html_atr cls_atr = (Html_atr)atrs_hash.Get_by(Html_atr_.Bry__class); if (cls_atr == null) tag_rdr.Rdr().Fail("cls missing", String_.Empty, String_.Empty);
|
||||
byte rv = Html_atr_class_.Find_1st(src, cls_atr.Val_bgn(), cls_atr.Val_end(), hash); if (rv == Byte_.Max_value_127) tag_rdr.Rdr().Fail("cls val missing", String_.Empty, String_.Empty);
|
||||
Html_atr cls_atr = (Html_atr)atrs_hash.Get_by(Html_atr_.Bry__class); if (cls_atr == null) tag_rdr.Err_wkr().Fail("cls missing");
|
||||
byte rv = Html_atr_class_.Find_1st(src, cls_atr.Val_bgn(), cls_atr.Val_end(), hash); if (rv == Byte_.Max_value_127) tag_rdr.Err_wkr().Fail("cls val missing");
|
||||
return rv;
|
||||
}
|
||||
private static final Html_atr_style_wkr__get_val_as_int style_wkr = new Html_atr_style_wkr__get_val_as_int();
|
||||
@@ -78,7 +79,7 @@ public class Html_tag implements Mwh_atr_wkr {
|
||||
return rv == null ? Bry_.Empty : rv.Val();
|
||||
}
|
||||
public int Atrs__get_as_int(byte[] key) {
|
||||
int rv = Atrs__get_as_int_or(key, Int_.Min_value); if (rv == Int_.Min_value) tag_rdr.Rdr().Fail("atr missing", "key", key);
|
||||
int rv = Atrs__get_as_int_or(key, Int_.Min_value); if (rv == Int_.Min_value) tag_rdr.Err_wkr().Fail("atr missing", "key", key);
|
||||
return rv;
|
||||
}
|
||||
public int Atrs__get_as_int_or(byte[] key, int or) {
|
||||
@@ -86,13 +87,14 @@ public class Html_tag implements Mwh_atr_wkr {
|
||||
Html_atr rv = (Html_atr)atrs_hash.Get_by(key); if (rv == null) return or;
|
||||
return Bry_.To_int_or(src, rv.Val_bgn(), rv.Val_end(), or);
|
||||
}
|
||||
public Html_atr Atrs__get_by_or_fail(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.Y);}
|
||||
public Html_atr Atrs__get_at(int i) {return (Html_atr)atrs_hash.Get_at(i);}
|
||||
public Html_atr Atrs__get_by_or_fail(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.Y);}
|
||||
public Html_atr Atrs__get_by_or_empty(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.N);}
|
||||
public Html_atr Atrs__get_by_or_fail(byte[] key, boolean fail_if_null) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Html_atr rv = (Html_atr)atrs_hash.Get_by(key);
|
||||
if (rv == null) {
|
||||
if (fail_if_null) tag_rdr.Rdr().Fail("atr missing", "key", key);
|
||||
if (fail_if_null) tag_rdr.Err_wkr().Fail("atr missing", "key", key);
|
||||
else return Html_atr.Noop;
|
||||
}
|
||||
return rv;
|
||||
|
||||
@@ -25,19 +25,17 @@ public class Html_tag_rdr {
|
||||
private final Int_obj_ref tmp_depth = Int_obj_ref.zero_();
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public Bry_rdr Rdr() {return rdr;} private final Bry_rdr rdr = new Bry_rdr();
|
||||
public Bry_err_wkr Err_wkr() {return err_wkr;} private final Bry_err_wkr err_wkr = new Bry_err_wkr();
|
||||
public void Init(byte[] ctx, byte[] src, int src_bgn, int src_end) {
|
||||
this.src = src; this.pos = src_bgn; this.src_end = src_end;
|
||||
tag__eos.Init(this, src, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Html_tag_.Id__eos);
|
||||
rdr.Init_by_page(ctx, src, src_end);
|
||||
err_wkr.Init_by_page(String_.new_u8(ctx), src);
|
||||
}
|
||||
public int Pos() {return pos;} private int pos;
|
||||
public void Pos_(int v) {this.pos = v;}
|
||||
public void Atrs__make(Mwh_atr_wkr atr_wkr, int head_bgn, int head_end) {atr_parser.Parse(atr_wkr, -1, -1, src, head_bgn, head_end);}
|
||||
public void Fail(String msg, Html_tag tag) {rdr.Fail(msg, String_.Empty, String_.Empty, tag.Src_bgn(), tag.Src_end());}
|
||||
public Html_tag Tag__move_fwd_head() {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, pos, src_end, Html_tag_.Id__any);}
|
||||
public Html_tag Tag__move_fwd_head(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, pos, src_end, match_name_id);}
|
||||
// public Html_tag Tag__move_fwd_tail() {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, pos, src_end, Html_tag_.Id__any);}
|
||||
public Html_tag Tag__move_fwd_tail(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, pos, src_end, match_name_id);}
|
||||
public Html_tag Tag__peek_fwd_head() {return Tag__find(Bool_.N, Bool_.N, Bool_.N, pos, src_end, Html_tag_.Id__any);}
|
||||
public Html_tag Tag__peek_fwd_head(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, pos, src_end, match_name_id);}
|
||||
@@ -70,7 +68,7 @@ public class Html_tag_rdr {
|
||||
}
|
||||
if (rv == null) {
|
||||
if (move && tail && !bwd)
|
||||
rdr.Fail("move failed", "tag_name", Html_tag_.To_str(match_name_id));
|
||||
err_wkr.Fail("move failed", "tag_name", Html_tag_.To_str(match_name_id));
|
||||
else
|
||||
return Tag__eos(rng_bgn);
|
||||
}
|
||||
@@ -181,7 +179,7 @@ public class Html_tag_rdr {
|
||||
return false;
|
||||
}
|
||||
public int Read_int_to(byte to_char) {
|
||||
int rv = Read_int_to(to_char, Int_.Max_value); if (rv == Int_.Max_value) rdr.Fail("invalid int", "pos", pos);
|
||||
int rv = Read_int_to(to_char, Int_.Max_value); if (rv == Int_.Max_value) err_wkr.Fail("invalid int", "pos", pos);
|
||||
return rv;
|
||||
}
|
||||
public int Read_int_to(byte to_char, int or_int) {
|
||||
|
||||
Reference in New Issue
Block a user