mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.11.3.1
This commit is contained in:
@@ -23,8 +23,11 @@ public class Html_bldr_ {
|
||||
, Bry__a_lhs_w_href = Bry_.new_a7("<a href=\"")
|
||||
, Bry__img_lhs_w_alt = Bry_.new_a7("<img alt=\"")
|
||||
, Bry__img_lhs = Bry_.new_a7("<img")
|
||||
, Bry__div_lhs = Bry_.new_a7("<div")
|
||||
, Bry__div_rhs = Bry_.new_a7("</div>")
|
||||
, Bry__id__1st = Bry_.new_a7(" id=\"")
|
||||
, Bry__id__nth = Bry_.new_a7("\" id=\"")
|
||||
, Bry__cls__1st = Bry_.new_a7(" class=\"")
|
||||
, Bry__cls__nth = Bry_.new_a7("\" class=\"")
|
||||
, Bry__title__nth = Bry_.new_a7("\" title=\"")
|
||||
, Bry__alt__nth = Bry_.new_a7("\" alt=\"")
|
||||
|
||||
@@ -31,6 +31,9 @@ public class Html_tag_ {
|
||||
, Id__span = 8
|
||||
, Id__div = 9
|
||||
, Id__img = 10
|
||||
, Id__ul = 11
|
||||
, Id__li = 12
|
||||
, Id__p = 13
|
||||
;
|
||||
public static final byte[]
|
||||
Bry__a = Bry_.new_a7("a")
|
||||
@@ -47,6 +50,24 @@ public class Html_tag_ {
|
||||
.Add_str_int("div" , Id__div)
|
||||
.Add_str_int("img" , Id__img)
|
||||
;
|
||||
public static String To_str(int tid) {
|
||||
switch (tid) {
|
||||
case Id__eos: return "EOS";
|
||||
case Id__any: return "any";
|
||||
case Id__unknown: return "unknown";
|
||||
case Id__comment: return "comment";
|
||||
case Id__h2: return "h2";
|
||||
case Id__h3: return "h2";
|
||||
case Id__h4: return "h2";
|
||||
case Id__h5: return "h2";
|
||||
case Id__h6: return "h2";
|
||||
case Id__a: return "a";
|
||||
case Id__span: return "span";
|
||||
case Id__div: return "div";
|
||||
case Id__img: return "img";
|
||||
default: throw Err_.new_unhandled(tid);
|
||||
}
|
||||
}
|
||||
public static final byte[]
|
||||
Br_inl = Bry_.new_a7("<br/>")
|
||||
, Hr_inl = Bry_.new_a7("<hr/>")
|
||||
|
||||
@@ -177,4 +177,5 @@ public class Html_utl {
|
||||
}
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
public static String Replace_apos(String s) {return String_.Replace(s, "'", "\"");}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Html_utl_tst {
|
||||
@Before public void init() {fxt.Clear();} private Html_atr_cls_fxt fxt = new Html_atr_cls_fxt();
|
||||
@Before public void init() {fxt.Clear();} private Html_atr_class_fxt fxt = new Html_atr_class_fxt();
|
||||
@Test public void Basic() {fxt.Test_del_comments("a<!-- b -->c" , "ac");}
|
||||
@Test public void Bgn_missing() {fxt.Test_del_comments("a b c" , "a b c");}
|
||||
@Test public void End_missing() {fxt.Test_del_comments("a<!-- b c" , "a<!-- b c");}
|
||||
@@ -37,7 +37,7 @@ public class Html_utl_tst {
|
||||
fxt.Test_unescape_html(Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, "a<>'&"b" , "a<>'&\"b"); // basic
|
||||
}
|
||||
}
|
||||
class Html_atr_cls_fxt {
|
||||
class Html_atr_class_fxt {
|
||||
private Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
|
||||
public void Clear() {
|
||||
tmp_bfr.Clear();
|
||||
|
||||
@@ -16,12 +16,12 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public class Html_atr {
|
||||
private final byte[] src;
|
||||
public class Html_atr extends gplx.core.brys.Bfr_arg_base {
|
||||
public Html_atr(int idx, byte[] key, byte[] val, byte[] src, int val_bgn, int val_end) {
|
||||
this.idx = idx; this.key = key; this.val = val;
|
||||
this.src = src; this.val_bgn = val_bgn; this.val_end = val_end;
|
||||
}
|
||||
public byte[] Src() {return src;} private final byte[] src;
|
||||
public int Idx() {return idx;} private final int idx;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public int Val_bgn() {return val_bgn;} private final int val_bgn;
|
||||
@@ -36,5 +36,10 @@ public class Html_atr {
|
||||
if (val_end > val_bgn)
|
||||
bfr.Add_mid(src, val_bgn, val_end);
|
||||
}
|
||||
@Override public boolean Bfr_arg__exists() {return this.Val_exists();}
|
||||
@Override public void Bfr_arg__add(Bry_bfr bfr) {
|
||||
if (Val_exists())
|
||||
bfr.Add_mid(src, val_bgn, val_end);
|
||||
}
|
||||
public static final Html_atr Noop = new Html_atr(-1, Bry_.Empty, Bry_.Empty, Bry_.Empty, -1, -1);
|
||||
}
|
||||
|
||||
@@ -19,45 +19,36 @@ package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gpl
|
||||
import gplx.core.btries.*;
|
||||
public class Html_doc_parser {
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private final List_adp list = List_adp_.new_();
|
||||
private Html_txt_wkr txt_wkr;
|
||||
public Html_doc_parser Reg_txt(Html_txt_wkr txt_wkr) {
|
||||
private final Html_txt_wkr txt_wkr;
|
||||
public Html_doc_parser(Html_txt_wkr txt_wkr, Html_doc_wkr... wkr_ary) {
|
||||
this.txt_wkr = txt_wkr;
|
||||
return this;
|
||||
}
|
||||
public Html_doc_parser Reg_wkrs(Html_doc_wkr... wkr_ary) {
|
||||
for (Html_doc_wkr wkr : wkr_ary) {
|
||||
for (Html_doc_wkr wkr : wkr_ary)
|
||||
trie.Add_obj(wkr.Hook(), wkr);
|
||||
list.Add(wkr);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public void Parse(byte[] src, int src_bgn, int src_end) {
|
||||
txt_wkr.Init(src, src_bgn, src_end);
|
||||
int len = list.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Html_doc_wkr wkr = (Html_doc_wkr)list.Get_at(i);
|
||||
wkr.Init(src, src_bgn, src_end);
|
||||
}
|
||||
int pos = src_bgn;
|
||||
public void Parse(byte[] page_url, byte[] src, int src_bgn, int src_end) {
|
||||
int txt_bgn = -1;
|
||||
int pos = src_bgn;
|
||||
while (pos < src_end) {
|
||||
Object o = trie.Match_bgn(src, pos, src_end);
|
||||
if (o == null) {
|
||||
if (o == null) { // not a known hook; add to txt
|
||||
if (txt_bgn == -1) txt_bgn = pos;
|
||||
++pos;
|
||||
}
|
||||
else {
|
||||
if (txt_bgn != -1) {
|
||||
else { // known hook
|
||||
if (txt_bgn != -1) { // txt pending; handle it
|
||||
txt_wkr.Parse(txt_bgn, pos);
|
||||
txt_bgn = -1;
|
||||
}
|
||||
Html_doc_wkr wkr = (Html_doc_wkr)o;
|
||||
int hook_end = trie.Match_pos();
|
||||
try {pos = wkr.Parse(pos);}
|
||||
catch (Exception e) {Err_.Noop(e); txt_bgn = pos; pos = hook_end;}
|
||||
try {pos = wkr.Parse(src, src_bgn, src_end, pos);}
|
||||
catch (Exception e) {
|
||||
Err err = Err_.cast_or_make(e);
|
||||
if (!err.Logged()) Gfo_usr_dlg_.Instance.Warn_many("", "", Err_.Message_gplx_log(e), "page_url", page_url, "mid", Bry_.Mid_by_len_safe(src, pos, 255));
|
||||
txt_bgn = pos; // set txt_bgn to hook_bgn which is "pos"; i.e.: txt resumes from start of failed hook
|
||||
pos = trie.Match_pos(); // set pos to hook_end
|
||||
}
|
||||
}
|
||||
}
|
||||
if (txt_bgn != -1) txt_wkr.Parse(txt_bgn, src_end);
|
||||
if (txt_bgn != -1) txt_wkr.Parse(txt_bgn, src_end); // handle add pending txt at EOS
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,5 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public interface Html_doc_wkr {
|
||||
byte[] Hook();
|
||||
void Init(byte[] src, int src_bgn, int src_end);
|
||||
int Parse(int pos);
|
||||
int Parse(byte[] src, int src_bgn, int src_end, int pos);
|
||||
}
|
||||
@@ -16,12 +16,12 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.xowa.parsers.htmls.*; import gplx.langs.htmls.parsers.styles.*;
|
||||
import gplx.xowa.parsers.htmls.*; import gplx.langs.htmls.parsers.styles.*; import gplx.langs.htmls.parsers.clses.*;
|
||||
public class Html_tag implements Mwh_atr_wkr {
|
||||
private Html_tag_rdr tag_rdr;
|
||||
private Ordered_hash atrs_hash; private boolean atrs_null; private int atrs_bgn, atrs_end;
|
||||
public Html_tag Init(Html_tag_rdr tag_rdr, boolean tag_is_tail, boolean tag_is_inline, int src_bgn, int src_end, int atrs_bgn, int atrs_end, int name_id) {
|
||||
this.tag_rdr = tag_rdr; this.atrs_null = true;
|
||||
this.tag_rdr = tag_rdr; this.src = tag_rdr.Src(); this.atrs_null = true;
|
||||
this.tag_is_tail = tag_is_tail; this.tag_is_inline = tag_is_inline;
|
||||
this.atrs_bgn = atrs_bgn; this.atrs_end = atrs_end;
|
||||
this.name_id = name_id; this.src_bgn = src_bgn; this.src_end = src_end;
|
||||
@@ -34,8 +34,18 @@ public class Html_tag implements Mwh_atr_wkr {
|
||||
return rv;
|
||||
}
|
||||
public int Name_id() {return name_id;} private int name_id;
|
||||
public Html_tag Chk_id(int chk) {
|
||||
if ( chk == name_id
|
||||
|| (name_id != Html_tag_.Id__eos && Int_.In(chk, Html_tag_.Id__any, Html_tag_.Id__comment))) {
|
||||
}
|
||||
else
|
||||
tag_rdr.Rdr().Fail("name_id chk failed", "expecting", Html_tag_.To_str(chk));
|
||||
return this;
|
||||
}
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Src_bgn() {return src_bgn;} private int src_bgn;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public boolean Src_exists() {return src_end > src_bgn;} // NOTE: only true if EOS where src_end == src_bgn == src_len
|
||||
public boolean Tag_is_tail() {return tag_is_tail;} private boolean tag_is_tail;
|
||||
public boolean Tag_is_inline() {return tag_is_inline;} private boolean tag_is_inline;
|
||||
public boolean Atrs__match_pair(byte[] key, byte[] val) {
|
||||
@@ -47,12 +57,12 @@ public class Html_tag implements Mwh_atr_wkr {
|
||||
if (atrs_null) Atrs__make();
|
||||
Html_atr rv = (Html_atr)atrs_hash.Get_by(Html_atr_.Bry__class); if (rv == null) return false;
|
||||
byte[] rv_val = rv.Val();
|
||||
return Html_atr_cls_.Has(rv_val, 0, rv_val.length, val);
|
||||
return Html_atr_class_.Has(rv_val, 0, rv_val.length, val);
|
||||
}
|
||||
public byte Atrs__cls_find_1st(Hash_adp_bry hash) {
|
||||
public byte Atrs__cls_find_or_fail(Hash_adp_bry hash) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Html_atr cls_atr = (Html_atr)atrs_hash.Get_by(Html_atr_.Bry__class); if (cls_atr == null) tag_rdr.Rdr().Fail("cls missing", String_.Empty, String_.Empty);
|
||||
byte rv = Html_atr_cls_.Find_1st(tag_rdr.Src(), cls_atr.Val_bgn(), cls_atr.Val_end(), hash); if (rv == Byte_.Max_value_127) tag_rdr.Rdr().Fail("cls val missing", String_.Empty, String_.Empty);
|
||||
byte rv = Html_atr_class_.Find_1st(src, cls_atr.Val_bgn(), cls_atr.Val_end(), hash); if (rv == Byte_.Max_value_127) tag_rdr.Rdr().Fail("cls val missing", String_.Empty, String_.Empty);
|
||||
return rv;
|
||||
}
|
||||
private static final Html_atr_style_wkr__get_val_as_int style_wkr = new Html_atr_style_wkr__get_val_as_int();
|
||||
@@ -74,11 +84,11 @@ public class Html_tag implements Mwh_atr_wkr {
|
||||
public int Atrs__get_as_int_or(byte[] key, int or) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Html_atr rv = (Html_atr)atrs_hash.Get_by(key); if (rv == null) return or;
|
||||
return Bry_.To_int_or(tag_rdr.Src(), rv.Val_bgn(), rv.Val_end(), or);
|
||||
return Bry_.To_int_or(src, rv.Val_bgn(), rv.Val_end(), or);
|
||||
}
|
||||
public Html_atr Atrs__get_by(byte[] key) {return Atrs__get_by(key, Bool_.Y);}
|
||||
public Html_atr Atrs__get_by_or_empty(byte[] key) {return Atrs__get_by(key, Bool_.N);}
|
||||
public Html_atr Atrs__get_by(byte[] key, boolean fail_if_null) {
|
||||
public Html_atr Atrs__get_by_or_fail(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.Y);}
|
||||
public Html_atr Atrs__get_by_or_empty(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.N);}
|
||||
public Html_atr Atrs__get_by_or_fail(byte[] key, boolean fail_if_null) {
|
||||
if (atrs_null) Atrs__make();
|
||||
Html_atr rv = (Html_atr)atrs_hash.Get_by(key);
|
||||
if (rv == null) {
|
||||
|
||||
@@ -29,28 +29,25 @@ public class Html_tag_rdr {
|
||||
public void Init(byte[] src, int src_bgn, int src_end) {
|
||||
this.src = src; this.pos = src_bgn; this.src_end = src_end;
|
||||
tag__eos.Init(this, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Html_tag_.Id__eos);
|
||||
rdr.Ctor_by_page(Bry_.Empty, src, src_end);
|
||||
rdr.Init_by_page(Bry_.Empty, src, src_end);
|
||||
}
|
||||
public int Pos() {return pos;} private int pos;
|
||||
public void Pos_(int v) {this.pos = v;}
|
||||
public void Atrs__make(Mwh_atr_wkr atr_wkr, int head_bgn, int head_end) {atr_parser.Parse(atr_wkr, -1, -1, src, head_bgn, head_end);}
|
||||
public void Fail(String msg, Html_tag tag) {rdr.Fail(msg, String_.Empty, String_.Empty, tag.Src_bgn(), tag.Src_end());}
|
||||
public Html_tag Tag__move_fwd_head() {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, Html_tag_.Id__any);}
|
||||
public Html_tag Tag__move_fwd_head(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, match_name_id);}
|
||||
public Html_tag Tag__move_fwd_tail(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, match_name_id);}
|
||||
public Html_tag Tag__peek_fwd_head() {return Tag__find(Bool_.N, Bool_.N, Bool_.N, Html_tag_.Id__any);}
|
||||
public Html_tag Tag__peek_fwd_head(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, match_name_id);}
|
||||
public Html_tag Tag__peek_fwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.Y, match_name_id);}
|
||||
public Html_tag Tag__peek_bwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, match_name_id);}
|
||||
public Html_tag Tag__peek_bwd_head() {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, Html_tag_.Id__any);}
|
||||
public Html_tag Tag__move_fwd_head(byte[] cls) {
|
||||
Html_tag rv = Tag__find(Bool_.Y, Bool_.N, Bool_.N, Html_tag_.Id__any);
|
||||
if (!rv.Atrs__cls_has(cls)) rdr.Fail("missing cls", "cls", cls);
|
||||
return rv;
|
||||
}
|
||||
private Html_tag Tag__find(boolean move, boolean bwd, boolean tail, int match_name_id) {
|
||||
int tmp = pos;
|
||||
int stop_pos = src_end; int adj = 1;
|
||||
public Html_tag Tag__move_fwd_head() {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, pos, src_end, Html_tag_.Id__any);}
|
||||
public Html_tag Tag__move_fwd_head(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, pos, src_end, match_name_id);}
|
||||
// public Html_tag Tag__move_fwd_tail() {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, pos, src_end, Html_tag_.Id__any);}
|
||||
public Html_tag Tag__move_fwd_tail(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, pos, src_end, match_name_id);}
|
||||
public Html_tag Tag__peek_fwd_head() {return Tag__find(Bool_.N, Bool_.N, Bool_.N, pos, src_end, Html_tag_.Id__any);}
|
||||
public Html_tag Tag__peek_fwd_head(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, pos, src_end, match_name_id);}
|
||||
public Html_tag Tag__peek_fwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.Y, pos, src_end, match_name_id);}
|
||||
public Html_tag Tag__peek_bwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, pos, src_end, match_name_id);}
|
||||
public Html_tag Tag__peek_bwd_head() {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, pos, src_end, Html_tag_.Id__any);}
|
||||
public Html_tag Tag__find_fwd_head(int bgn, int end, int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, bgn, end, match_name_id);}
|
||||
private Html_tag Tag__find(boolean move, boolean bwd, boolean tail, int rng_bgn, int rng_end, int match_name_id) {
|
||||
int tmp = rng_bgn;
|
||||
int stop_pos = rng_end; int adj = 1;
|
||||
if (bwd) {
|
||||
stop_pos = -1;
|
||||
adj = -1;
|
||||
@@ -72,10 +69,10 @@ public class Html_tag_rdr {
|
||||
tmp += adj;
|
||||
}
|
||||
if (rv == null) {
|
||||
if (move)
|
||||
rdr.Fail("missing tag", "name_id", match_name_id);
|
||||
if (move && tail && !bwd)
|
||||
rdr.Fail("move failed", "tag_name", Html_tag_.To_str(match_name_id));
|
||||
else
|
||||
return tag__eos;
|
||||
return Tag__eos(rng_bgn);
|
||||
}
|
||||
if (move) pos = rv.Src_end();
|
||||
return rv;
|
||||
@@ -98,13 +95,15 @@ public class Html_tag_rdr {
|
||||
if (depth == 0)
|
||||
return true;
|
||||
else {
|
||||
depth_obj.Val_add(-1);
|
||||
if (match_name_id == tag_name_id)
|
||||
depth_obj.Val_add(-1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!bwd && tail && !tag_is_tail) {
|
||||
depth_obj.Val_add(1);
|
||||
if (!bwd && tail && !tag_is_tail && !tag.Tag_is_inline()) {
|
||||
if (match_name_id == tag_name_id)
|
||||
depth_obj.Val_add(1);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
@@ -112,7 +111,7 @@ public class Html_tag_rdr {
|
||||
}
|
||||
}
|
||||
public Html_tag Tag__extract(boolean move, boolean tail, int match_name_id, int tag_bgn) {
|
||||
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return tag__eos; // EX: "<EOS"
|
||||
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "<EOS"
|
||||
byte name_0 = src[name_bgn];
|
||||
boolean cur_is_tail = false;
|
||||
switch (name_0) {
|
||||
@@ -121,7 +120,7 @@ public class Html_tag_rdr {
|
||||
return Tag__comment(tag_bgn);
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
++name_bgn; if (name_bgn == src_end) return tag__eos; // EX: "</EOS"
|
||||
++name_bgn; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "</EOS"
|
||||
name_0 = src[name_bgn];
|
||||
cur_is_tail = true;
|
||||
break;
|
||||
@@ -138,7 +137,7 @@ public class Html_tag_rdr {
|
||||
break;
|
||||
case Byte_ascii.Slash: // EX: "<a/>"
|
||||
name_end = name_pos;
|
||||
tag_end = name_pos + 1; if (tag_end == src_end) return tag__eos; // EX: "<a/EOS"
|
||||
tag_end = name_pos + 1; if (tag_end == src_end) return Tag__eos(tag_bgn);// EX: "<a/EOS"
|
||||
if (src[tag_end] == Byte_ascii.Angle_end) {
|
||||
atrs_end = name_end;
|
||||
inline = true;
|
||||
@@ -154,12 +153,12 @@ public class Html_tag_rdr {
|
||||
break;
|
||||
}
|
||||
if (!loop) break;
|
||||
++name_pos; if (name_pos == src_end) return tag__eos; // EX: "<abEOS"
|
||||
++name_pos; if (name_pos == src_end) return Tag__eos(tag_bgn); // EX: "<abEOS"
|
||||
name_byte = src[name_pos];
|
||||
}
|
||||
if (tag_end == -1) {
|
||||
tag_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, src_end);
|
||||
if (tag_end == Bry_find_.Not_found) return tag__eos;
|
||||
if (tag_end == Bry_find_.Not_found) return Tag__eos(tag_bgn);
|
||||
int prv_pos = tag_end - 1;
|
||||
if (src[prv_pos] == Byte_ascii.Slash) {
|
||||
atrs_end = prv_pos;
|
||||
@@ -218,5 +217,9 @@ public class Html_tag_rdr {
|
||||
int tag_end = Bry_find_.Move_fwd(src, gplx.langs.htmls.Html_tag_.Comm_end, tag_bgn, src_end); if (tag_end == Bry_find_.Not_found) tag_end = src_end;
|
||||
return tag__comment.Init(this, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Html_tag_.Id__comment);
|
||||
}
|
||||
private Html_tag Tag__eos(int tag_bgn) {
|
||||
int tag_end = tag_bgn + 255; if (tag_end > src_end) tag_end = src_end;
|
||||
return tag__comment.Init(this, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Html_tag_.Id__eos);
|
||||
}
|
||||
private static final byte[] Bry__comment__mid = Bry_.new_a7("--");
|
||||
}
|
||||
|
||||
@@ -35,22 +35,26 @@ public class Html_tag_rdr_tst {
|
||||
fxt.Init("<!DOCTYPE html>1<div id='1'>2</div>3");
|
||||
fxt.Test__move_fwd_head(Html_tag_.Id__div , "<div id='1'>") ; fxt.Test__pos("2");
|
||||
}
|
||||
@Test public void Recursive() {
|
||||
@Test public void Recursive__same_tags() {
|
||||
fxt.Init("1<a>2<a>3</a>4</a>5");
|
||||
fxt.Test__move_fwd_head(Html_tag_.Id__a , "<a>") ; fxt.Test__pos("2");
|
||||
fxt.Test__move_fwd_tail(Html_tag_.Id__a , "</a>") ; fxt.Test__pos("5");
|
||||
}
|
||||
@Test public void Recursive__diff_tags() {
|
||||
fxt.Init("1<div>2<a>3<img/>4</a>5</div>6");
|
||||
fxt.Test__move_fwd_head(Html_tag_.Id__div , "<div>") ; fxt.Test__pos("2");
|
||||
fxt.Test__move_fwd_tail(Html_tag_.Id__div , "</div>") ; fxt.Test__pos("6");
|
||||
}
|
||||
}
|
||||
class Html_tag_rdr_fxt {
|
||||
private final Html_tag_rdr rdr = new Html_tag_rdr();
|
||||
// private final Html_doc_log log = new Html_doc_log();
|
||||
public void Init(String src_str) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
rdr.Init(src_bry, 0, src_bry.length);
|
||||
}
|
||||
public void Test__move_fwd_head(String expd) {Test__move_fwd_head(Html_tag_.Id__any, expd);}
|
||||
public void Test__move_fwd_head(int match_name_id, String expd) {
|
||||
Html_tag actl_tag = rdr.Tag__move_fwd_head(match_name_id);
|
||||
Html_tag actl_tag = rdr.Tag__move_fwd_head(match_name_id).Chk_id(match_name_id);
|
||||
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
|
||||
}
|
||||
public void Test__move_fwd_tail(int match_name_id, String expd) {
|
||||
|
||||
@@ -18,5 +18,4 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public interface Html_txt_wkr {
|
||||
void Parse(int rng_bgn, int rng_end);
|
||||
void Init(byte[] src, int src_bgn, int src_end);
|
||||
}
|
||||
@@ -15,8 +15,8 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public class Html_atr_cls_ {
|
||||
package gplx.langs.htmls.parsers.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
public class Html_atr_class_ {
|
||||
public static boolean Has(byte[] src, int src_bgn, int src_end, byte[] cls) {
|
||||
int cls_bgn = src_bgn;
|
||||
int pos = src_bgn;
|
||||
@@ -15,10 +15,10 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
package gplx.langs.htmls.parsers.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Html_atr_cls__tst {
|
||||
private final Html_atr_cls__fxt fxt = new Html_atr_cls__fxt();
|
||||
public class Html_atr_class__tst {
|
||||
private final Html_atr_class__fxt fxt = new Html_atr_class__fxt();
|
||||
@Test public void Has() {
|
||||
fxt.Test__has__y("a b c", "a", "b", "c");
|
||||
fxt.Test__has__n("a b c", "d");
|
||||
@@ -34,14 +34,14 @@ public class Html_atr_cls__tst {
|
||||
fxt.Test__find_1st(hash, Byte_.Max_value_127, "xyz");
|
||||
}
|
||||
}
|
||||
class Html_atr_cls__fxt {
|
||||
class Html_atr_class__fxt {
|
||||
public void Test__has__y(String src, String... ary) {Test__has(Bool_.Y, src, ary);}
|
||||
public void Test__has__n(String src, String... ary) {Test__has(Bool_.N, src, ary);}
|
||||
public void Test__has(boolean expd, String src, String... ary) {
|
||||
byte[] src_bry = Bry_.new_u8(src);
|
||||
for (String itm : ary) {
|
||||
byte[] itm_bry = Bry_.new_u8(itm);
|
||||
Tfds.Eq_bool(expd, Html_atr_cls_.Has(src_bry, 0, src_bry.length, itm_bry), itm);
|
||||
Tfds.Eq_bool(expd, Html_atr_class_.Has(src_bry, 0, src_bry.length, itm_bry), itm);
|
||||
}
|
||||
}
|
||||
public Hash_adp_bry Make_hash(String... ary) {
|
||||
@@ -53,6 +53,6 @@ class Html_atr_cls__fxt {
|
||||
}
|
||||
public void Test__find_1st(Hash_adp_bry hash, int expd, String src) {
|
||||
byte[] src_bry = Bry_.new_u8(src);
|
||||
Tfds.Eq_byte((byte)expd, Html_atr_cls_.Find_1st(src_bry, 0, src_bry.length, hash), src);
|
||||
Tfds.Eq_byte((byte)expd, Html_atr_class_.Find_1st(src_bry, 0, src_bry.length, hash), src);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
public class Html_atr_class_parser_ {
|
||||
public static void Parse(Html_tag tag, Html_atr_class_wkr wkr) {
|
||||
Html_atr atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__class);
|
||||
if (atr.Val_exists())
|
||||
Parse(tag.Src(), atr.Val_bgn(), atr.Val_end(), wkr);
|
||||
}
|
||||
public static void Parse(byte[] src, int src_bgn, int src_end, Html_atr_class_wkr wkr) {
|
||||
int atr_idx = 0, atr_bgn = -1, atr_end = -1, tmp_bgn = -1, tmp_end = -1;
|
||||
int pos = src_bgn;
|
||||
while (true) {
|
||||
boolean pos_is_last = pos == src_end;
|
||||
byte b = pos_is_last ? Byte_ascii.Space : src[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
|
||||
if (tmp_bgn != -1) { // ignore empty atrs
|
||||
if (!wkr.On_cls(src, atr_idx, atr_bgn, atr_end, tmp_bgn, tmp_end))
|
||||
pos_is_last = true;
|
||||
}
|
||||
++atr_idx; atr_bgn = -1; atr_end = -1; tmp_bgn = -1; tmp_end = -1;
|
||||
break;
|
||||
default:
|
||||
if (tmp_bgn == -1) tmp_bgn = pos;
|
||||
tmp_end = pos + 1;
|
||||
break;
|
||||
}
|
||||
if (pos_is_last) break;
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Html_atr_class_parser__tst {
|
||||
private final Html_atr_class_parser__fxt fxt = new Html_atr_class_parser__fxt();
|
||||
@Test public void Basic() {fxt.Test__parse("v1" , "v1");}
|
||||
@Test public void Many() {fxt.Test__parse("v1 v2" , "v1", "v2");}
|
||||
}
|
||||
class Html_atr_class_parser__fxt {
|
||||
private final Html_atr_class_wkr__list wkr = new Html_atr_class_wkr__list();
|
||||
public void Test__parse(String src_str, String... expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
String[] actl = wkr.Parse(src_bry, 0, src_bry.length);
|
||||
Tfds.Eq_ary_str(expd, actl);
|
||||
}
|
||||
}
|
||||
class Html_atr_class_wkr__list implements Html_atr_class_wkr {
|
||||
private final List_adp list = List_adp_.new_();
|
||||
public boolean On_cls(byte[] src, int atr_idx, int atr_bgn, int atr_end, int val_bgn, int val_end) {
|
||||
String s = String_.new_u8(src, val_bgn, val_end);
|
||||
list.Add(s); //
|
||||
return true;
|
||||
}
|
||||
public String[] Parse(byte[] src, int src_bgn, int src_end) {
|
||||
Html_atr_class_parser_.Parse(src, src_bgn, src_end, this);
|
||||
return (String[])list.To_ary_and_clear(String.class);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
public interface Html_atr_class_wkr {
|
||||
boolean On_cls(byte[] src, int atr_idx, int atr_bgn, int atr_end, int val_bgn, int val_end);
|
||||
}
|
||||
@@ -16,7 +16,12 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
public class Html_atr_style_parser {
|
||||
public class Html_atr_style_parser_ {
|
||||
public static void Parse(Html_tag tag, Html_atr_style_wkr wkr) {
|
||||
Html_atr atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__style);
|
||||
if (atr.Val_exists())
|
||||
Parse(tag.Src(), atr.Val_bgn(), atr.Val_end(), wkr);
|
||||
}
|
||||
public static void Parse(byte[] src, int src_bgn, int src_end, Html_atr_style_wkr wkr) {
|
||||
int atr_idx = 0, atr_bgn = -1, atr_end = -1, key_bgn = -1, key_end = -1, tmp_bgn = -1, tmp_end = -1;
|
||||
int pos = src_bgn;
|
||||
@@ -17,8 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Html_atr_style_parser_tst {
|
||||
private final Html_atr_style_parser_fxt fxt = new Html_atr_style_parser_fxt();
|
||||
public class Html_atr_style_parser__tst {
|
||||
private final Html_atr_style_parser__fxt fxt = new Html_atr_style_parser__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__parse("k_0:v_0" , fxt.Make("k_0", "v_0"));
|
||||
fxt.Test__parse("k_0:v_0;" , fxt.Make("k_0", "v_0"));
|
||||
@@ -30,7 +30,7 @@ public class Html_atr_style_parser_tst {
|
||||
fxt.Test__parse(" k_0 : v 0 ;" , fxt.Make("k_0", "v 0"));
|
||||
}
|
||||
}
|
||||
class Html_atr_style_parser_fxt {
|
||||
class Html_atr_style_parser__fxt {
|
||||
private final Html_atr_style_wkr__kv_list wkr = new Html_atr_style_wkr__kv_list();
|
||||
public KeyVal Make(String k, String v) {return KeyVal_.new_(k, v);}
|
||||
public void Test__parse(String src_str, KeyVal... expd) {
|
||||
@@ -39,3 +39,15 @@ class Html_atr_style_parser_fxt {
|
||||
Tfds.Eq_ary_str(expd, actl);
|
||||
}
|
||||
}
|
||||
class Html_atr_style_wkr__kv_list implements Html_atr_style_wkr {
|
||||
private final List_adp list = List_adp_.new_();
|
||||
public boolean On_atr(byte[] src, int atr_idx, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end) {
|
||||
KeyVal kv = KeyVal_.new_(String_.new_u8(src, key_bgn, key_end), String_.new_u8(src, val_bgn, val_end));
|
||||
list.Add(kv);
|
||||
return true;
|
||||
}
|
||||
public KeyVal[] Parse(byte[] src, int src_bgn, int src_end) {
|
||||
Html_atr_style_parser_.Parse(src, src_bgn, src_end, this);
|
||||
return (KeyVal[])list.To_ary_and_clear(KeyVal.class);
|
||||
}
|
||||
}
|
||||
@@ -19,15 +19,3 @@ package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; imp
|
||||
public interface Html_atr_style_wkr {
|
||||
boolean On_atr(byte[] src, int atr_idx, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end);
|
||||
}
|
||||
class Html_atr_style_wkr__kv_list implements Html_atr_style_wkr {
|
||||
private final List_adp list = List_adp_.new_();
|
||||
public boolean On_atr(byte[] src, int atr_idx, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end) {
|
||||
KeyVal kv = KeyVal_.new_(String_.new_u8(src, key_bgn, key_end), String_.new_u8(src, val_bgn, val_end));
|
||||
list.Add(kv);
|
||||
return true;
|
||||
}
|
||||
public KeyVal[] Parse(byte[] src, int src_bgn, int src_end) {
|
||||
Html_atr_style_parser.Parse(src, src_bgn, src_end, this);
|
||||
return (KeyVal[])list.To_ary_and_clear(KeyVal.class);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ public class Html_atr_style_wkr__get_val_as_int implements Html_atr_style_wkr {
|
||||
}
|
||||
public int Parse(byte[] src, int src_bgn, int src_end, byte[] find_key) {
|
||||
this.find_key = find_key;
|
||||
Html_atr_style_parser.Parse(src, src_bgn, src_end, this);
|
||||
Html_atr_style_parser_.Parse(src, src_bgn, src_end, this);
|
||||
return Bry_.To_int_or__lax(src, val_bgn, val_end, -1);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user