1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

'v3.7.2.1'

This commit is contained in:
gnosygnu
2016-07-10 23:35:32 -04:00
parent f5f48bb9b1
commit b333db45f8
366 changed files with 4468 additions and 3460 deletions

View File

@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Gfh_atr_ {
public static final byte[]
public static final byte[]
// "coreattrs"
Bry__id = Bry_.new_a7("id")
, Bry__class = Bry_.new_a7("class")
@@ -54,7 +54,16 @@ public class Gfh_atr_ {
public static byte[] Make(Bry_bfr bfr, byte[] key, byte[] val) {
return bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote().Add(val).Add_byte_quote().To_bry_and_clear();
}
public static byte[] Add_to_bry(Bry_bfr bfr, byte[] key, byte[] val) {
bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote().Add(val).Add_byte_quote();
return bfr.To_bry_and_clear();
}
public static void Add(Bry_bfr bfr, byte[] key, byte[] val) {
bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote().Add(val).Add_byte_quote();
}
public static void Add(Bry_bfr bfr, byte[] key, int val) {
bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote();
bfr.Add_int_variable(val);
bfr.Add_byte_quote();
}
}

View File

@@ -54,6 +54,17 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
, Id__sup = 30
, Id__sub = 31
, Id__bdi = 32
, Id__font = 33
, Id__strong = 34
, Id__s = 35
, Id__abbr = 36
, Id__cite = 37
, Id__var = 38
, Id__u = 39
, Id__big = 40
, Id__del = 41
, Id__strike = 42
, Id__tt = 43
;
public static final byte[]
Bry__a = Bry_.new_a7("a")
@@ -64,6 +75,8 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
, Bry__link = Bry_.new_a7("link")
, Bry__style = Bry_.new_a7("style")
, Bry__script = Bry_.new_a7("script")
, Bry__xowa_any = Bry_.new_a7("xowa_any")
, Bry__xowa_comment = Bry_.new_a7("xowa_comment")
;
public static final Hash_adp_bry Hash = Hash_adp_bry.ci_a7()
.Add_bry_int(Bry__a , Id__a)
@@ -98,6 +111,17 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
.Add_str_int("sup" , Id__sup)
.Add_str_int("sub" , Id__sub)
.Add_str_int("bdi" , Id__bdi)
.Add_str_int("font" , Id__font)
.Add_str_int("strong" , Id__strong)
.Add_str_int("s" , Id__s)
.Add_str_int("abbr" , Id__abbr)
.Add_str_int("cite" , Id__cite)
.Add_str_int("var" , Id__var)
.Add_str_int("u" , Id__u)
.Add_str_int("big" , Id__big)
.Add_str_int("del" , Id__del)
.Add_str_int("strike" , Id__strike)
.Add_str_int("tt" , Id__tt)
;
public static String To_str(int tid) {
switch (tid) {
@@ -137,6 +161,17 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
case Id__sup: return "sup";
case Id__sub: return "sub";
case Id__bdi: return "bdi";
case Id__font: return "font";
case Id__strong: return "strong";
case Id__s: return "s";
case Id__abbr: return "abbr";
case Id__cite: return "cite";
case Id__var: return "var";
case Id__u: return "u";
case Id__big: return "big";
case Id__del: return "del";
case Id__strike: return "strike";
case Id__tt: return "tt";
default: throw Err_.new_unhandled(tid);
}
}
@@ -150,6 +185,7 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
, P_lhs = Bry_.new_a7("<p>") , P_rhs = Bry_.new_a7("</p>")
, Pre_lhs = Bry_.new_a7("<pre>") , Pre_rhs = Bry_.new_a7("</pre>")
, Div_lhs = Bry_.new_a7("<div>") , Div_rhs = Bry_.new_a7("</div>")
, Div_lhs_bgn = Bry_.new_a7("<div")
, Html_rhs = Bry_.new_a7("</html>")
, Head_lhs_bgn = Bry_.new_a7("<head") , Head_rhs = Bry_.new_a7("</head>")
, Style_lhs_w_type = Bry_.new_a7("<style type=\"text/css\">")
@@ -174,4 +210,6 @@ public class Gfh_tag_ { // NOTE: not serialized; used by tag_rdr
Comm_bgn_len = Comm_bgn.length
, Comm_end_len = Comm_end.length
;
public static void Lhs_end_nde(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Gt);}
public static void Lhs_end_inl(Bry_bfr bfr) {bfr.Add_byte(Byte_ascii.Slash).Add_byte(Byte_ascii.Gt);}
}

View File

@@ -34,6 +34,7 @@ public class Gfh_tag implements Mwh_atr_wkr {
return rv;
}
public int Name_id() {return name_id;} private int name_id;
public boolean Tid_is_comment() {return name_id == Gfh_tag_.Id__comment;}
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
public Gfh_tag Chk_name_or_fail(int chk) {
if (!Chk_name(chk)) tag_rdr.Err_wkr().Fail("name_id chk failed", "expecting", Gfh_tag_.To_str(chk));

View File

@@ -112,6 +112,133 @@ public class Gfh_tag_rdr {
return false;
}
}
public Gfh_tag Tag__move_fwd_tail(byte[] find_tag_bry) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, pos, src_end, find_tag_bry);}
private Gfh_tag Tag__find(boolean move, boolean bwd, boolean tail, int rng_bgn, int rng_end, byte[] find_tag_bry) {
int tmp = rng_bgn;
int stop_pos = rng_end; int adj = 1;
if (bwd) {
stop_pos = -1;
adj = -1;
--tmp; // subtract 1 from tmp; needed when pos is at src_len, else array error below
}
tmp_depth.Val_zero_();
Gfh_tag rv = null;
while (tmp != stop_pos) {
if (src[tmp] == Byte_ascii.Angle_bgn) {
rv = Tag__extract(move, tail, find_tag_bry, tmp);
if (Bry_.Eq(rv.Name_bry(), Gfh_tag_.Bry__xowa_comment)) { // ignore comments DATE:2016-06-25
tmp = rv.Src_end();
rv = null; // null rv, else rv will still be comment and may get returned to caller
continue;
}
if (Tag__match(move, bwd, tail, find_tag_bry, tmp_depth, rv))
break;
else {
tmp = bwd ? rv.Src_bgn() - 1 : rv.Src_end();
rv = null;
}
}
else
tmp += adj;
}
if (rv == null) {
if (move && tail && !bwd)
err_wkr.Fail("move tag fwd failed", "tag_name", find_tag_bry);
else
return Tag__eos(rng_bgn);
}
if (move) pos = rv.Src_end();
return rv;
}
private boolean Tag__match(boolean move, boolean bwd, boolean tail, byte[] find_tag_bry, Int_obj_ref depth_obj, Gfh_tag tag) {
byte[] cur_tag_bry = tag.Name_bry();
if ( !Bry_.Eq(cur_tag_bry, find_tag_bry) // tag doesn't match requested
&& find_tag_bry != Gfh_tag_.Bry__xowa_any // requested is not wildcard
) return false;
if (cur_tag_bry == Gfh_tag_.Bry__xowa_comment) return true; // ignore comments
int depth = depth_obj.Val();
boolean tag_is_tail = tag.Tag_is_tail();
if (tail == tag_is_tail) {
if (depth == 0)
return true;
else {
if (Bry_.Eq(cur_tag_bry, find_tag_bry))
depth_obj.Val_add(-1);
return false;
}
}
else {
if (!bwd && tail && !tag_is_tail && !tag.Tag_is_inline()) {
if (Bry_.Eq(cur_tag_bry, find_tag_bry))
depth_obj.Val_add(1);
return false;
}
else
return false;
}
}
public Gfh_tag Tag__extract(boolean move, boolean tail, byte[] find_tag_bry, int tag_bgn) {
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "<EOS"
byte name_0 = src[name_bgn];
boolean cur_is_tail = false;
switch (name_0) {
case Byte_ascii.Bang:
if (Bry_.Match(src, name_bgn + 1, name_bgn + 3, Bry__comment__mid)) // skip comment; EX: "<!"
return Tag__comment(tag_bgn);
break;
case Byte_ascii.Slash:
++name_bgn; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "</EOS"
name_0 = src[name_bgn];
cur_is_tail = true;
break;
}
int name_end = -1, atrs_end = -1, tag_end = -1, name_pos = name_bgn;
byte name_byte = name_0; boolean inline = false;
boolean loop = true;
while (true) {
switch (name_byte) {
case Byte_ascii.Angle_end: // EX: "<a>"
name_end = atrs_end = name_pos;
tag_end = name_end + 1;
loop = false;
break;
case Byte_ascii.Slash: // EX: "<a/>"
name_end = name_pos;
tag_end = name_pos + 1; if (tag_end == src_end) return Tag__eos(tag_bgn);// EX: "<a/EOS"
if (src[tag_end] == Byte_ascii.Angle_end) {
atrs_end = name_end;
inline = true;
loop = false;
++tag_end; // move tag_end after >
}
else {
name_end = tag_end = -1;
}
break;
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
name_end = name_pos;
loop = false;
break;
}
if (!loop) break;
++name_pos; if (name_pos == src_end) return Tag__eos(tag_bgn); // EX: "<abEOS"
name_byte = src[name_pos];
}
if (tag_end == -1) {
tag_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, src_end);
if (tag_end == Bry_find_.Not_found) return Tag__eos(tag_bgn);
int prv_pos = tag_end - 1;
if (src[prv_pos] == Byte_ascii.Slash) {
atrs_end = prv_pos;
inline = true;
}
else
atrs_end = tag_end;
++tag_end; // position after ">"
}
Gfh_tag tmp = move ? tag__tmp__move : tag__tmp__peek;
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, Gfh_tag_.Id__unknown, Bry_.Mid(src, name_bgn, name_end));
}
public Gfh_tag Tag__extract(boolean move, boolean tail, int match_name_id, int tag_bgn) {
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "<EOS"
byte name_0 = src[name_bgn];
@@ -172,7 +299,9 @@ public class Gfh_tag_rdr {
++tag_end; // position after ">"
}
Gfh_tag tmp = move ? tag__tmp__move : tag__tmp__peek;
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, name_hash.Get_as_int_or(src, name_bgn, name_end, -1), Bry_.Mid(src, name_bgn, name_end));
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end
, name_hash.Get_as_int_or(src, name_bgn, name_end, -1) // TODO: change from -1 to Unknown
, Bry_.Mid(src, name_bgn, name_end));
}
public boolean Read_and_move(byte match) {
byte b = src[pos];

View File

@@ -50,16 +50,18 @@ public class Gfo_url_encoder implements Url_encoder_interface {
public byte[] Decode(byte[] bry, int bgn, int end) {return Decode(Bool_.N, bry, bgn, end);}
private byte[] Decode(boolean fail, byte[] bry, int bgn, int end) {Bry_bfr bfr = Bry_bfr_.Get(); Decode(bfr, fail, bry, bgn, end); return bfr.To_bry_and_rls();}
public Bry_bfr Decode(Bry_bfr bfr, boolean fail, byte[] bry, int bgn, int end) {
for (int i = bgn; i < end; ++i) {
byte b = bry[i];
if (anchor_encoder != null && b == Byte_ascii.Hash) {
bfr.Add_byte(Byte_ascii.Hash);
anchor_encoder.Decode(bfr, Bool_.N, bry, i + 1, end);
break;
// synchronized (this) { // LOCK:DELETE; DATE:2016-07-06
for (int i = bgn; i < end; ++i) {
byte b = bry[i];
if (anchor_encoder != null && b == Byte_ascii.Hash) {
bfr.Add_byte(Byte_ascii.Hash);
anchor_encoder.Decode(bfr, Bool_.N, bry, i + 1, end);
break;
}
Gfo_url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
i += itm.Decode(bfr, bry, end, i, b, fail);
}
Gfo_url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
i += itm.Decode(bfr, bry, end, i, b, fail);
}
return bfr;
return bfr;
// }
}
}