mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
'v3.7.2.1'
This commit is contained in:
@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Gfh_atr_ {
|
||||
public static final byte[]
|
||||
public static final byte[]
|
||||
// "coreattrs"
|
||||
Bry__id = Bry_.new_a7("id")
|
||||
, Bry__class = Bry_.new_a7("class")
|
||||
@@ -54,7 +54,16 @@ public class Gfh_atr_ {
|
||||
public static byte[] Make(Bry_bfr bfr, byte[] key, byte[] val) {
|
||||
return bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote().Add(val).Add_byte_quote().To_bry_and_clear();
|
||||
}
|
||||
public static byte[] Add_to_bry(Bry_bfr bfr, byte[] key, byte[] val) {
|
||||
bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote().Add(val).Add_byte_quote();
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
public static void Add(Bry_bfr bfr, byte[] key, byte[] val) {
|
||||
bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote().Add(val).Add_byte_quote();
|
||||
}
|
||||
public static void Add(Bry_bfr bfr, byte[] key, int val) {
|
||||
bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote();
|
||||
bfr.Add_int_variable(val);
|
||||
bfr.Add_byte_quote();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -54,6 +54,17 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
, Id__sup = 30
|
||||
, Id__sub = 31
|
||||
, Id__bdi = 32
|
||||
, Id__font = 33
|
||||
, Id__strong = 34
|
||||
, Id__s = 35
|
||||
, Id__abbr = 36
|
||||
, Id__cite = 37
|
||||
, Id__var = 38
|
||||
, Id__u = 39
|
||||
, Id__big = 40
|
||||
, Id__del = 41
|
||||
, Id__strike = 42
|
||||
, Id__tt = 43
|
||||
;
|
||||
public static final byte[]
|
||||
Bry__a = Bry_.new_a7("a")
|
||||
@@ -64,6 +75,8 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
, Bry__link = Bry_.new_a7("link")
|
||||
, Bry__style = Bry_.new_a7("style")
|
||||
, Bry__script = Bry_.new_a7("script")
|
||||
, Bry__xowa_any = Bry_.new_a7("xowa_any")
|
||||
, Bry__xowa_comment = Bry_.new_a7("xowa_comment")
|
||||
;
|
||||
public static final Hash_adp_bry Hash = Hash_adp_bry.ci_a7()
|
||||
.Add_bry_int(Bry__a , Id__a)
|
||||
@@ -98,6 +111,17 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
.Add_str_int("sup" , Id__sup)
|
||||
.Add_str_int("sub" , Id__sub)
|
||||
.Add_str_int("bdi" , Id__bdi)
|
||||
.Add_str_int("font" , Id__font)
|
||||
.Add_str_int("strong" , Id__strong)
|
||||
.Add_str_int("s" , Id__s)
|
||||
.Add_str_int("abbr" , Id__abbr)
|
||||
.Add_str_int("cite" , Id__cite)
|
||||
.Add_str_int("var" , Id__var)
|
||||
.Add_str_int("u" , Id__u)
|
||||
.Add_str_int("big" , Id__big)
|
||||
.Add_str_int("del" , Id__del)
|
||||
.Add_str_int("strike" , Id__strike)
|
||||
.Add_str_int("tt" , Id__tt)
|
||||
;
|
||||
public static String To_str(int tid) {
|
||||
switch (tid) {
|
||||
@@ -137,6 +161,17 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
case Id__sup: return "sup";
|
||||
case Id__sub: return "sub";
|
||||
case Id__bdi: return "bdi";
|
||||
case Id__font: return "font";
|
||||
case Id__strong: return "strong";
|
||||
case Id__s: return "s";
|
||||
case Id__abbr: return "abbr";
|
||||
case Id__cite: return "cite";
|
||||
case Id__var: return "var";
|
||||
case Id__u: return "u";
|
||||
case Id__big: return "big";
|
||||
case Id__del: return "del";
|
||||
case Id__strike: return "strike";
|
||||
case Id__tt: return "tt";
|
||||
default: throw Err_.new_unhandled(tid);
|
||||
}
|
||||
}
|
||||
@@ -150,6 +185,7 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
, P_lhs = Bry_.new_a7("<p>") , P_rhs = Bry_.new_a7("</p>")
|
||||
, Pre_lhs = Bry_.new_a7("<pre>") , Pre_rhs = Bry_.new_a7("</pre>")
|
||||
, Div_lhs = Bry_.new_a7("<div>") , Div_rhs = Bry_.new_a7("</div>")
|
||||
, Div_lhs_bgn = Bry_.new_a7("<div")
|
||||
, Html_rhs = Bry_.new_a7("</html>")
|
||||
, Head_lhs_bgn = Bry_.new_a7("<head") , Head_rhs = Bry_.new_a7("</head>")
|
||||
, Style_lhs_w_type = Bry_.new_a7("<style type=\"text/css\">")
|
||||
@@ -174,4 +210,6 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
|
||||
Comm_bgn_len = Comm_bgn.length
|
||||
, Comm_end_len = Comm_end.length
|
||||
;
|
||||
public static void Lhs_end_nde(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Gt);}
|
||||
public static void Lhs_end_inl(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Slash).Add_byte(Byte_ascii.Gt);}
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ public class Gfh_tag implements Mwh_atr_wkr {
|
||||
return rv;
|
||||
}
|
||||
public int Name_id() {return name_id;} private int name_id;
|
||||
public boolean Tid_is_comment() {return name_id == Gfh_tag_.Id__comment;}
|
||||
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
|
||||
public Gfh_tag Chk_name_or_fail(int chk) {
|
||||
if (!Chk_name(chk)) tag_rdr.Err_wkr().Fail("name_id chk failed", "expecting", Gfh_tag_.To_str(chk));
|
||||
|
||||
@@ -112,6 +112,133 @@ public class Gfh_tag_rdr {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
public Gfh_tag Tag__move_fwd_tail(byte[] find_tag_bry) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, pos, src_end, find_tag_bry);}
|
||||
private Gfh_tag Tag__find(boolean move, boolean bwd, boolean tail, int rng_bgn, int rng_end, byte[] find_tag_bry) {
|
||||
int tmp = rng_bgn;
|
||||
int stop_pos = rng_end; int adj = 1;
|
||||
if (bwd) {
|
||||
stop_pos = -1;
|
||||
adj = -1;
|
||||
--tmp; // subtract 1 from tmp; needed when pos is at src_len, else array error below
|
||||
}
|
||||
tmp_depth.Val_zero_();
|
||||
Gfh_tag rv = null;
|
||||
while (tmp != stop_pos) {
|
||||
if (src[tmp] == Byte_ascii.Angle_bgn) {
|
||||
rv = Tag__extract(move, tail, find_tag_bry, tmp);
|
||||
if (Bry_.Eq(rv.Name_bry(), Gfh_tag_.Bry__xowa_comment)) { // ignore comments DATE:2016-06-25
|
||||
tmp = rv.Src_end();
|
||||
rv = null; // null rv, else rv will still be comment and may get returned to caller
|
||||
continue;
|
||||
}
|
||||
if (Tag__match(move, bwd, tail, find_tag_bry, tmp_depth, rv))
|
||||
break;
|
||||
else {
|
||||
tmp = bwd ? rv.Src_bgn() - 1 : rv.Src_end();
|
||||
rv = null;
|
||||
}
|
||||
}
|
||||
else
|
||||
tmp += adj;
|
||||
}
|
||||
if (rv == null) {
|
||||
if (move && tail && !bwd)
|
||||
err_wkr.Fail("move tag fwd failed", "tag_name", find_tag_bry);
|
||||
else
|
||||
return Tag__eos(rng_bgn);
|
||||
}
|
||||
if (move) pos = rv.Src_end();
|
||||
return rv;
|
||||
}
|
||||
private boolean Tag__match(boolean move, boolean bwd, boolean tail, byte[] find_tag_bry, Int_obj_ref depth_obj, Gfh_tag tag) {
|
||||
byte[] cur_tag_bry = tag.Name_bry();
|
||||
if ( !Bry_.Eq(cur_tag_bry, find_tag_bry) // tag doesn't match requested
|
||||
&& find_tag_bry != Gfh_tag_.Bry__xowa_any // requested is not wildcard
|
||||
) return false;
|
||||
if (cur_tag_bry == Gfh_tag_.Bry__xowa_comment) return true; // ignore comments
|
||||
int depth = depth_obj.Val();
|
||||
boolean tag_is_tail = tag.Tag_is_tail();
|
||||
if (tail == tag_is_tail) {
|
||||
if (depth == 0)
|
||||
return true;
|
||||
else {
|
||||
if (Bry_.Eq(cur_tag_bry, find_tag_bry))
|
||||
depth_obj.Val_add(-1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!bwd && tail && !tag_is_tail && !tag.Tag_is_inline()) {
|
||||
if (Bry_.Eq(cur_tag_bry, find_tag_bry))
|
||||
depth_obj.Val_add(1);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
public Gfh_tag Tag__extract(boolean move, boolean tail, byte[] find_tag_bry, int tag_bgn) {
|
||||
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "<EOS"
|
||||
byte name_0 = src[name_bgn];
|
||||
boolean cur_is_tail = false;
|
||||
switch (name_0) {
|
||||
case Byte_ascii.Bang:
|
||||
if (Bry_.Match(src, name_bgn + 1, name_bgn + 3, Bry__comment__mid)) // skip comment; EX: "<!"
|
||||
return Tag__comment(tag_bgn);
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
++name_bgn; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "</EOS"
|
||||
name_0 = src[name_bgn];
|
||||
cur_is_tail = true;
|
||||
break;
|
||||
}
|
||||
int name_end = -1, atrs_end = -1, tag_end = -1, name_pos = name_bgn;
|
||||
byte name_byte = name_0; boolean inline = false;
|
||||
boolean loop = true;
|
||||
while (true) {
|
||||
switch (name_byte) {
|
||||
case Byte_ascii.Angle_end: // EX: "<a>"
|
||||
name_end = atrs_end = name_pos;
|
||||
tag_end = name_end + 1;
|
||||
loop = false;
|
||||
break;
|
||||
case Byte_ascii.Slash: // EX: "<a/>"
|
||||
name_end = name_pos;
|
||||
tag_end = name_pos + 1; if (tag_end == src_end) return Tag__eos(tag_bgn);// EX: "<a/EOS"
|
||||
if (src[tag_end] == Byte_ascii.Angle_end) {
|
||||
atrs_end = name_end;
|
||||
inline = true;
|
||||
loop = false;
|
||||
++tag_end; // move tag_end after >
|
||||
}
|
||||
else {
|
||||
name_end = tag_end = -1;
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
name_end = name_pos;
|
||||
loop = false;
|
||||
break;
|
||||
}
|
||||
if (!loop) break;
|
||||
++name_pos; if (name_pos == src_end) return Tag__eos(tag_bgn); // EX: "<abEOS"
|
||||
name_byte = src[name_pos];
|
||||
}
|
||||
if (tag_end == -1) {
|
||||
tag_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, src_end);
|
||||
if (tag_end == Bry_find_.Not_found) return Tag__eos(tag_bgn);
|
||||
int prv_pos = tag_end - 1;
|
||||
if (src[prv_pos] == Byte_ascii.Slash) {
|
||||
atrs_end = prv_pos;
|
||||
inline = true;
|
||||
}
|
||||
else
|
||||
atrs_end = tag_end;
|
||||
++tag_end; // position after ">"
|
||||
}
|
||||
Gfh_tag tmp = move ? tag__tmp__move : tag__tmp__peek;
|
||||
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, Gfh_tag_.Id__unknown, Bry_.Mid(src, name_bgn, name_end));
|
||||
}
|
||||
public Gfh_tag Tag__extract(boolean move, boolean tail, int match_name_id, int tag_bgn) {
|
||||
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "<EOS"
|
||||
byte name_0 = src[name_bgn];
|
||||
@@ -172,7 +299,9 @@ public class Gfh_tag_rdr {
|
||||
++tag_end; // position after ">"
|
||||
}
|
||||
Gfh_tag tmp = move ? tag__tmp__move : tag__tmp__peek;
|
||||
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, name_hash.Get_as_int_or(src, name_bgn, name_end, -1), Bry_.Mid(src, name_bgn, name_end));
|
||||
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end
|
||||
, name_hash.Get_as_int_or(src, name_bgn, name_end, -1) // TODO: change from -1 to Unknown
|
||||
, Bry_.Mid(src, name_bgn, name_end));
|
||||
}
|
||||
public boolean Read_and_move(byte match) {
|
||||
byte b = src[pos];
|
||||
|
||||
@@ -50,16 +50,18 @@ public class Gfo_url_encoder implements Url_encoder_interface {
|
||||
public byte[] Decode(byte[] bry, int bgn, int end) {return Decode(Bool_.N, bry, bgn, end);}
|
||||
private byte[] Decode(boolean fail, byte[] bry, int bgn, int end) {Bry_bfr bfr = Bry_bfr_.Get(); Decode(bfr, fail, bry, bgn, end); return bfr.To_bry_and_rls();}
|
||||
public Bry_bfr Decode(Bry_bfr bfr, boolean fail, byte[] bry, int bgn, int end) {
|
||||
for (int i = bgn; i < end; ++i) {
|
||||
byte b = bry[i];
|
||||
if (anchor_encoder != null && b == Byte_ascii.Hash) {
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
anchor_encoder.Decode(bfr, Bool_.N, bry, i + 1, end);
|
||||
break;
|
||||
// synchronized (this) { // LOCK:DELETE; DATE:2016-07-06
|
||||
for (int i = bgn; i < end; ++i) {
|
||||
byte b = bry[i];
|
||||
if (anchor_encoder != null && b == Byte_ascii.Hash) {
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
anchor_encoder.Decode(bfr, Bool_.N, bry, i + 1, end);
|
||||
break;
|
||||
}
|
||||
Gfo_url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
|
||||
i += itm.Decode(bfr, bry, end, i, b, fail);
|
||||
}
|
||||
Gfo_url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
|
||||
i += itm.Decode(bfr, bry, end, i, b, fail);
|
||||
}
|
||||
return bfr;
|
||||
return bfr;
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user