1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

v2.11.2.1

This commit is contained in:
gnosygnu
2015-11-08 23:48:07 -05:00
parent b990ec409f
commit d9f45cec19
298 changed files with 3908 additions and 2141 deletions

View File

@@ -20,9 +20,15 @@ public class Html_atr_ {
public static final byte[]
Bry__id = Bry_.new_a7("id")
, Bry__class = Bry_.new_a7("class")
, Bry__rel = Bry_.new_a7("rel")
, Bry__href = Bry_.new_a7("href")
, Bry__title = Bry_.new_a7("title")
, Bry__style = Bry_.new_a7("style")
// <a>
, Bry__href = Bry_.new_a7("href")
, Bry__rel = Bry_.new_a7("rel")
// <img>
, Bry__alt = Bry_.new_a7("alt")
, Bry__src = Bry_.new_a7("src")
, Bry__width = Bry_.new_a7("width")
, Bry__height = Bry_.new_a7("height")
;
}

View File

@@ -0,0 +1,38 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_bldr_ {
public static final byte[]
Bry__a_lhs_bgn = Bry_.new_a7("<a")
, Bry__a_rhs = Bry_.new_a7("</a>")
, Bry__a_lhs_w_href = Bry_.new_a7("<a href=\"")
, Bry__img_lhs_w_alt = Bry_.new_a7("<img alt=\"")
, Bry__img_lhs = Bry_.new_a7("<img")
, Bry__id__1st = Bry_.new_a7(" id=\"")
, Bry__id__nth = Bry_.new_a7("\" id=\"")
, Bry__cls__nth = Bry_.new_a7("\" class=\"")
, Bry__title__nth = Bry_.new_a7("\" title=\"")
, Bry__alt__nth = Bry_.new_a7("\" alt=\"")
, Bry__src__nth = Bry_.new_a7("\" src=\"")
, Bry__width__nth = Bry_.new_a7("\" width=\"")
, Bry__height__nth = Bry_.new_a7("\" height=\"")
, Bry__lhs_end_head_w_quote = Bry_.new_a7("\">")
, Bry__lhs_end_inline = Bry_.new_a7("/>")
, Bry__lhs_end_inline_w_quote = Bry_.new_a7("\"/>")
;
}

View File

@@ -17,20 +17,40 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_tag_ {
public static final int
Id__eos = -2
, Id__any = -1
, Id__unknown = 0
, Id__comment = 1
, Id__h2 = 2
, Id__h3 = 3
, Id__h4 = 4
, Id__h5 = 5
, Id__h6 = 6
, Id__a = 7
, Id__span = 8
, Id__div = 9
, Id__img = 10
;
public static final byte[]
Ul_name_bry = Bry_.new_a7("ul")
, A_name_bry = Bry_.new_a7("a")
, Code_name_bry = Bry_.new_a7("code")
, Tr_name_bry = Bry_.new_a7("tr")
, Td_name_bry = Bry_.new_a7("td")
, Table_name_bry = Bry_.new_a7("table")
Bry__a = Bry_.new_a7("a")
, Bry__ul = Bry_.new_a7("ul")
;
public static final Hash_adp_bry Hash = Hash_adp_bry.ci_a7()
.Add_bry_int(Bry__a , Id__a)
.Add_str_int("h2" , Id__h2)
.Add_str_int("h3" , Id__h3)
.Add_str_int("h4" , Id__h4)
.Add_str_int("h5" , Id__h5)
.Add_str_int("h6" , Id__h6)
.Add_str_int("span" , Id__span)
.Add_str_int("div" , Id__div)
.Add_str_int("img" , Id__img)
;
public static final byte[]
Br_inl = Bry_.new_a7("<br/>")
, Hr_inl = Bry_.new_a7("<hr/>")
, Body_lhs = Bry_.new_a7("<body>") , Body_rhs = Bry_.new_a7("</body>")
, A_lhs_bgn = Bry_.new_a7("<a")
, A_rhs = Bry_.new_a7("</a>")
, B_lhs = Bry_.new_a7("<b>") , B_rhs = Bry_.new_a7("</b>")
, I_lhs = Bry_.new_a7("<i>") , I_rhs = Bry_.new_a7("</i>")
, P_lhs = Bry_.new_a7("<p>") , P_rhs = Bry_.new_a7("</p>")
@@ -60,28 +80,4 @@ public class Html_tag_ {
Comm_bgn_len = Comm_bgn.length
, Comm_end_len = Comm_end.length
;
public static final int
Id__eos = -2
, Id__any = -1
, Id__unknown = 0
, Id__a = 1
, Id__h2 = 2
, Id__h3 = 3
, Id__h4 = 4
, Id__h5 = 5
, Id__h6 = 6
, Id__span = 7
, Id__div = 8
, Id__comment = 9
;
public static final Hash_adp_bry Hash = Hash_adp_bry.ci_a7()
.Add_str_int("a" , Id__a)
.Add_str_int("h2" , Id__h2)
.Add_str_int("h3" , Id__h3)
.Add_str_int("h4" , Id__h4)
.Add_str_int("h5" , Id__h5)
.Add_str_int("h6" , Id__h6)
.Add_str_int("span" , Id__span)
.Add_str_int("div" , Id__div)
;
}

View File

@@ -17,8 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Html_utl_tst {
@Before public void init() {fxt.Clear();} private Html_utl_fxt fxt = new Html_utl_fxt();
public class Html_utl_tst {
@Before public void init() {fxt.Clear();} private Html_atr_cls_fxt fxt = new Html_atr_cls_fxt();
@Test public void Basic() {fxt.Test_del_comments("a<!-- b -->c" , "ac");}
@Test public void Bgn_missing() {fxt.Test_del_comments("a b c" , "a b c");}
@Test public void End_missing() {fxt.Test_del_comments("a<!-- b c" , "a<!-- b c");}
@@ -37,7 +37,7 @@ public class Html_utl_tst {
fxt.Test_unescape_html(Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, "a&lt;&gt;&#39;&amp;&quot;b" , "a<>'&\"b"); // basic
}
}
class Html_utl_fxt {
class Html_atr_cls_fxt {
private Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
public void Clear() {
tmp_bfr.Clear();

View File

@@ -26,10 +26,15 @@ public class Html_atr {
public byte[] Key() {return key;} private final byte[] key;
public int Val_bgn() {return val_bgn;} private final int val_bgn;
public int Val_end() {return val_end;} private final int val_end;
public boolean Val_exists() {return val_end > val_bgn;}
public byte[] Val() {
if (val == null)
val = Bry_.Mid(src, val_bgn, val_end);
return val;
} private byte[] val;
public static final Html_atr Noop = new Html_atr(-1, Bry_.Empty, Bry_.Empty, Bry_.Empty, 0, 0);
}
public void Html__add(Bry_bfr bfr) {
if (val_end > val_bgn)
bfr.Add_mid(src, val_bgn, val_end);
}
public static final Html_atr Noop = new Html_atr(-1, Bry_.Empty, Bry_.Empty, Bry_.Empty, -1, -1);
}

View File

@@ -0,0 +1,61 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public class Html_atr_cls_ {
public static boolean Has(byte[] src, int src_bgn, int src_end, byte[] cls) {
int cls_bgn = src_bgn;
int pos = src_bgn;
while (true) {
boolean pos_is_last = pos == src_end;
byte b = pos_is_last ? Byte_ascii.Space : src[pos];
if (b == Byte_ascii.Space) {
if (cls_bgn != -1) {
if (Bry_.Match(src, cls_bgn, pos, cls))return true;
cls_bgn = -1;
}
}
else {
if (cls_bgn == -1) cls_bgn = pos;
}
if (pos_is_last) break;
++pos;
}
return false;
}
public static byte Find_1st(byte[] src, int src_bgn, int src_end, Hash_adp_bry hash) {
int cls_bgn = src_bgn;
int pos = src_bgn;
while (true) {
boolean pos_is_last = pos == src_end;
byte b = pos_is_last ? Byte_ascii.Space : src[pos];
if (b == Byte_ascii.Space) {
if (cls_bgn != -1) {
byte rv = hash.Get_as_byte_or(src, cls_bgn, pos, Byte_.Max_value_127);
if (rv != Byte_.Max_value_127) return rv;
cls_bgn = -1;
}
}
else {
if (cls_bgn == -1) cls_bgn = pos;
}
if (pos_is_last) break;
++pos;
}
return Byte_.Max_value_127;
}
}

View File

@@ -0,0 +1,58 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import org.junit.*;
public class Html_atr_cls__tst {
private final Html_atr_cls__fxt fxt = new Html_atr_cls__fxt();
@Test public void Has() {
fxt.Test__has__y("a b c", "a", "b", "c");
fxt.Test__has__n("a b c", "d");
fxt.Test__has__n("ab", "a");
}
@Test public void Cls__has__hash() {
Hash_adp_bry hash = fxt.Make_hash("x", "y", "z");
fxt.Test__find_1st(hash, 0, "x");
fxt.Test__find_1st(hash, 2, "z");
fxt.Test__find_1st(hash, 0, "a x b");
fxt.Test__find_1st(hash, 0, "a b x");
fxt.Test__find_1st(hash, Byte_.Max_value_127, "a");
fxt.Test__find_1st(hash, Byte_.Max_value_127, "xyz");
}
}
class Html_atr_cls__fxt {
public void Test__has__y(String src, String... ary) {Test__has(Bool_.Y, src, ary);}
public void Test__has__n(String src, String... ary) {Test__has(Bool_.N, src, ary);}
public void Test__has(boolean expd, String src, String... ary) {
byte[] src_bry = Bry_.new_u8(src);
for (String itm : ary) {
byte[] itm_bry = Bry_.new_u8(itm);
Tfds.Eq_bool(expd, Html_atr_cls_.Has(src_bry, 0, src_bry.length, itm_bry), itm);
}
}
public Hash_adp_bry Make_hash(String... ary) {
Hash_adp_bry rv = Hash_adp_bry.ci_a7();
int len = ary.length;
for (int i = 0; i < len; ++i)
rv.Add_bry_byte(Bry_.new_u8(ary[i]), (byte)i);
return rv;
}
public void Test__find_1st(Hash_adp_bry hash, int expd, String src) {
byte[] src_bry = Bry_.new_u8(src);
Tfds.Eq_byte((byte)expd, Html_atr_cls_.Find_1st(src_bry, 0, src_bry.length, hash), src);
}
}

View File

@@ -0,0 +1,38 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//namespace gplx.langs.htmls.parsers {
// public class Html_doc_log {
// private byte[] src; private byte[] page_url; private String wkr_name; private int src_bgn; private int src_end;
// public Html_doc_log Init_by_page(byte[] src, byte[] page_url) {
// this.src = src; this.page_url = page_url;
// return this;
// }
// public Html_doc_log Init_by_wkr(String wkr_name, int src_bgn, int src_end) {
// this.wkr_name = wkr_name; this.src_bgn = src_bgn; this.src_end = src_end;
// return this;
// }
// public Err Fail_w_args(String fail_msg, params Object[] custom_args) {return Fail_w_excerpt(fail_msg, src_bgn, src_end + 255, custom_args);}
// public Err Fail_w_excerpt(String fail_msg, int excerpt_bgn, int excerpt_end, params Object[] custom_args) {
// Object[] dflt_args = Object_.Ary("page", page_url, "wkr", wkr_name, "excerpt", Bry_.Mid_safe(src, excerpt_bgn, excerpt_end));
// Object[] fail_args = Object_.Ary_add(custom_args, dflt_args);
// String msg = Err_msg.To_str(fail_msg, fail_args);
// Gfo_usr_dlg_.Instance.Warn_many("", "", msg);
// return Err_.new_("Xoh_hdoc_err", msg);
// }
// }
//}

View File

@@ -25,11 +25,12 @@ public class Html_doc_parser {
this.txt_wkr = txt_wkr;
return this;
}
public void Reg(Html_doc_wkr... wkr_ary) {
public Html_doc_parser Reg_wkrs(Html_doc_wkr... wkr_ary) {
for (Html_doc_wkr wkr : wkr_ary) {
trie.Add_obj(wkr.Hook(), wkr);
list.Add(wkr);
}
return this;
}
public void Parse(byte[] src, int src_bgn, int src_end) {
txt_wkr.Init(src, src_bgn, src_end);
@@ -52,7 +53,9 @@ public class Html_doc_parser {
txt_bgn = -1;
}
Html_doc_wkr wkr = (Html_doc_wkr)o;
pos = wkr.Parse(pos);
int hook_end = trie.Match_pos();
try {pos = wkr.Parse(pos);}
catch (Exception e) {Err_.Noop(e); txt_bgn = pos; pos = hook_end;}
}
}
if (txt_bgn != -1) txt_wkr.Parse(txt_bgn, src_end);

View File

@@ -16,19 +16,19 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.xowa.parsers.htmls.*;
import gplx.xowa.parsers.htmls.*; import gplx.langs.htmls.parsers.styles.*;
public class Html_tag implements Mwh_atr_wkr {
private Html_tag_rdr rdr;
private Html_tag_rdr tag_rdr;
private Ordered_hash atrs_hash; private boolean atrs_null; private int atrs_bgn, atrs_end;
public Html_tag Init(Html_tag_rdr rdr, boolean tag_is_tail, boolean tag_is_inline, int src_bgn, int src_end, int atrs_bgn, int atrs_end, int name_id) {
this.rdr = rdr; this.atrs_null = true;
public Html_tag Init(Html_tag_rdr tag_rdr, boolean tag_is_tail, boolean tag_is_inline, int src_bgn, int src_end, int atrs_bgn, int atrs_end, int name_id) {
this.tag_rdr = tag_rdr; this.atrs_null = true;
this.tag_is_tail = tag_is_tail; this.tag_is_inline = tag_is_inline;
this.atrs_bgn = atrs_bgn; this.atrs_end = atrs_end;
this.name_id = name_id; this.src_bgn = src_bgn; this.src_end = src_end;
return this;
}
public Html_tag Copy() {
Html_tag rv = new Html_tag().Init(rdr, tag_is_tail, tag_is_inline, src_bgn, src_end, atrs_bgn, atrs_end, name_id);
Html_tag rv = new Html_tag().Init(tag_rdr, tag_is_tail, tag_is_inline, src_bgn, src_end, atrs_bgn, atrs_end, name_id);
rv.atrs_null = false;
rv.atrs_hash = Copy(atrs_hash);
return rv;
@@ -43,15 +43,49 @@ public class Html_tag implements Mwh_atr_wkr {
Html_atr rv = (Html_atr)atrs_hash.Get_by(key);
return rv == null ? false : Bry_.Eq(val, rv.Val());
}
public boolean Atrs__cls_has(byte[] val) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(Html_atr_.Bry__class); if (rv == null) return false;
byte[] rv_val = rv.Val();
return Html_atr_cls_.Has(rv_val, 0, rv_val.length, val);
}
public byte Atrs__cls_find_1st(Hash_adp_bry hash) {
if (atrs_null) Atrs__make();
Html_atr cls_atr = (Html_atr)atrs_hash.Get_by(Html_atr_.Bry__class); if (cls_atr == null) tag_rdr.Rdr().Fail("cls missing", String_.Empty, String_.Empty);
byte rv = Html_atr_cls_.Find_1st(tag_rdr.Src(), cls_atr.Val_bgn(), cls_atr.Val_end(), hash); if (rv == Byte_.Max_value_127) tag_rdr.Rdr().Fail("cls val missing", String_.Empty, String_.Empty);
return rv;
}
private static final Html_atr_style_wkr__get_val_as_int style_wkr = new Html_atr_style_wkr__get_val_as_int();
public int Atrs__style_get_as_int(byte[] key) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(Html_atr_.Bry__style); if (rv == null) return -1;
byte[] rv_val = rv.Val();
return style_wkr.Parse(rv_val, 0, rv_val.length, key);
}
public byte[] Atrs__get_as_bry(byte[] key) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(key);
return rv == null ? Bry_.Empty : rv.Val();
}
public Html_atr Atrs__get_by(byte[] key) {
public int Atrs__get_as_int(byte[] key) {
int rv = Atrs__get_as_int_or(key, Int_.Min_value); if (rv == Int_.Min_value) tag_rdr.Rdr().Fail("atr missing", "key", key);
return rv;
}
public int Atrs__get_as_int_or(byte[] key, int or) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(key); if (rv == null) return or;
return Bry_.To_int_or(tag_rdr.Src(), rv.Val_bgn(), rv.Val_end(), or);
}
public Html_atr Atrs__get_by(byte[] key) {return Atrs__get_by(key, Bool_.Y);}
public Html_atr Atrs__get_by_or_empty(byte[] key) {return Atrs__get_by(key, Bool_.N);}
public Html_atr Atrs__get_by(byte[] key, boolean fail_if_null) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(key);
return rv == null ? Html_atr.Noop : rv;
if (rv == null) {
if (fail_if_null) tag_rdr.Rdr().Fail("atr missing", "key", key);
else return Html_atr.Noop;
}
return rv;
}
public String Atrs__print() {
if (atrs_null) Atrs__make();
@@ -67,21 +101,19 @@ public class Html_tag implements Mwh_atr_wkr {
atrs_null = false;
if (atrs_hash == null) atrs_hash = Ordered_hash_.New_bry();
else atrs_hash.Clear();
rdr.Atrs__make(this, atrs_bgn, atrs_end);
tag_rdr.Atrs__make(this, atrs_bgn, atrs_end);
}
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {
if (!valid) return;
byte[] val_bry = val_bry_manual;
int val_bgn = -1, val_end = -1;
if (key_exists) {
if (val_bry == null) {
val_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
val_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
}
val_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
val_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
}
else
val_bry_manual = key_bry;
Html_atr atr = new Html_atr(atrs_hash.Count(), key_bry, val_bry_manual, src, val_bgn, val_end);
val_bry = key_bry;
Html_atr atr = new Html_atr(atrs_hash.Count(), key_bry, val_bry, src, val_bgn, val_end);
atrs_hash.Add(key_bry, atr);
}
private static Ordered_hash Copy(Ordered_hash src) {

View File

@@ -16,22 +16,25 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.btries.*;
import gplx.xowa.parsers.htmls.*;
public class Html_tag_rdr {
private final Hash_adp_bry name_hash = Html_tag_.Hash;
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
private final Html_tag tag__tmp = new Html_tag(), tag__eos = new Html_tag(), tag__comment = new Html_tag();
private final Html_tag tag__tmp__move = new Html_tag(), tag__tmp__peek = new Html_tag(), tag__eos = new Html_tag(), tag__comment = new Html_tag();
private final Int_obj_ref tmp_depth = Int_obj_ref.zero_();
public byte[] Src() {return src;} private byte[] src;
public int Src_end() {return src_end;} private int src_end;
public Bry_rdr Rdr() {return rdr;} private final Bry_rdr rdr = new Bry_rdr();
public void Init(byte[] src, int src_bgn, int src_end) {
this.src = src; this.pos = src_bgn; this.src_end = src_end;
tag__eos.Init(this, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Html_tag_.Id__eos);
rdr.Ctor_by_page(Bry_.Empty, src, src_end);
}
public int Pos() {return pos;} private int pos;
public void Pos_(int v) {this.pos = v;}
public void Atrs__make(Mwh_atr_wkr atr_wkr, int head_bgn, int head_end) {atr_parser.Parse(atr_wkr, -1, -1, src, head_bgn, head_end);}
public void Fail(String msg, Html_tag tag) {rdr.Fail(msg, String_.Empty, String_.Empty, tag.Src_bgn(), tag.Src_end());}
public Html_tag Tag__move_fwd_head() {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, Html_tag_.Id__any);}
public Html_tag Tag__move_fwd_head(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, match_name_id);}
public Html_tag Tag__move_fwd_tail(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, match_name_id);}
@@ -40,6 +43,11 @@ public class Html_tag_rdr {
public Html_tag Tag__peek_fwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.Y, match_name_id);}
public Html_tag Tag__peek_bwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, match_name_id);}
public Html_tag Tag__peek_bwd_head() {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, Html_tag_.Id__any);}
public Html_tag Tag__move_fwd_head(byte[] cls) {
Html_tag rv = Tag__find(Bool_.Y, Bool_.N, Bool_.N, Html_tag_.Id__any);
if (!rv.Atrs__cls_has(cls)) rdr.Fail("missing cls", "cls", cls);
return rv;
}
private Html_tag Tag__find(boolean move, boolean bwd, boolean tail, int match_name_id) {
int tmp = pos;
int stop_pos = src_end; int adj = 1;
@@ -52,8 +60,8 @@ public class Html_tag_rdr {
Html_tag rv = null;
while (tmp != stop_pos) {
if (src[tmp] == Byte_ascii.Angle_bgn) {
rv = Tag__extract(tail, match_name_id, tmp);
if (Tag__match(bwd, tail, match_name_id, tmp_depth, rv))
rv = Tag__extract(move, tail, match_name_id, tmp);
if (Tag__match(move, bwd, tail, match_name_id, tmp_depth, rv))
break;
else {
tmp = bwd ? rv.Src_bgn() - 1 : rv.Src_end();
@@ -63,11 +71,16 @@ public class Html_tag_rdr {
else
tmp += adj;
}
if (rv == null) rv = tag__eos;
if (rv == null) {
if (move)
rdr.Fail("missing tag", "name_id", match_name_id);
else
return tag__eos;
}
if (move) pos = rv.Src_end();
return rv;
}
private boolean Tag__match(boolean bwd, boolean tail, int match_name_id, Int_obj_ref depth_obj, Html_tag tag) {
private boolean Tag__match(boolean move, boolean bwd, boolean tail, int match_name_id, Int_obj_ref depth_obj, Html_tag tag) {
int tag_name_id = tag.Name_id();
if ( tag_name_id != match_name_id // tag doesn't match requested
&& match_name_id != Html_tag_.Id__any // requested is not wildcard
@@ -98,19 +111,21 @@ public class Html_tag_rdr {
return false;
}
}
private Html_tag Tag__extract(boolean tail, int match_name_id, int tag_bgn) {
public Html_tag Tag__extract(boolean move, boolean tail, int match_name_id, int tag_bgn) {
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return tag__eos; // EX: "<EOS"
byte name_0 = src[name_bgn];
boolean cur_is_tail = false;
switch (name_0) {
case Byte_ascii.Bang: return Tag__comment(tag_bgn); // skip comment; EX: "<!"
case Byte_ascii.Bang:
if (Bry_.Match(src, name_bgn + 1, name_bgn + 3, Bry__comment__mid)) // skip comment; EX: "<!"
return Tag__comment(tag_bgn);
break;
case Byte_ascii.Slash:
++name_bgn; if (name_bgn == src_end) return tag__eos; // EX: "</EOS"
++name_bgn; if (name_bgn == src_end) return tag__eos; // EX: "</EOS"
name_0 = src[name_bgn];
cur_is_tail = true;
break;
}
if (name_0 == Byte_ascii.Bang) return Tag__comment(tag_bgn); // skip comment; EX: "<!"
int name_end = -1, atrs_end = -1, tag_end = -1, name_pos = name_bgn;
byte name_byte = name_0; boolean inline = false;
boolean loop = true;
@@ -145,10 +160,17 @@ public class Html_tag_rdr {
if (tag_end == -1) {
tag_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, src_end);
if (tag_end == Bry_find_.Not_found) return tag__eos;
atrs_end = tag_end;
int prv_pos = tag_end - 1;
if (src[prv_pos] == Byte_ascii.Slash) {
atrs_end = prv_pos;
inline = true;
}
else
atrs_end = tag_end;
++tag_end; // position after ">"
}
return tag__tmp.Init(this, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, name_hash.Get_as_int_or(src, name_bgn, name_end, -1));
Html_tag tmp = move ? tag__tmp__move : tag__tmp__peek;
return tmp.Init(this, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, name_hash.Get_as_int_or(src, name_bgn, name_end, -1));
}
public boolean Read_and_move(byte match) {
byte b = src[pos];
@@ -159,6 +181,10 @@ public class Html_tag_rdr {
else
return false;
}
public int Read_int_to(byte to_char) {
int rv = Read_int_to(to_char, Int_.Max_value); if (rv == Int_.Max_value) rdr.Fail("invalid int", "pos", pos);
return rv;
}
public int Read_int_to(byte to_char, int or_int) {
int bgn = pos;
int rv = 0;
@@ -192,4 +218,5 @@ public class Html_tag_rdr {
int tag_end = Bry_find_.Move_fwd(src, gplx.langs.htmls.Html_tag_.Comm_end, tag_bgn, src_end); if (tag_end == Bry_find_.Not_found) tag_end = src_end;
return tag__comment.Init(this, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Html_tag_.Id__comment);
}
private static final byte[] Bry__comment__mid = Bry_.new_a7("--");
}

View File

@@ -31,6 +31,10 @@ public class Html_tag_rdr_tst {
fxt.Test__move_fwd_head(Html_tag_.Id__comment , "<!--2-->") ; fxt.Test__pos("3");
fxt.Test__move_fwd_head(Html_tag_.Id__any , "<div id='1'>") ; fxt.Test__pos("6");
}
@Test public void Meta() {
fxt.Init("<!DOCTYPE html>1<div id='1'>2</div>3");
fxt.Test__move_fwd_head(Html_tag_.Id__div , "<div id='1'>") ; fxt.Test__pos("2");
}
@Test public void Recursive() {
fxt.Init("1<a>2<a>3</a>4</a>5");
fxt.Test__move_fwd_head(Html_tag_.Id__a , "<a>") ; fxt.Test__pos("2");
@@ -39,6 +43,7 @@ public class Html_tag_rdr_tst {
}
class Html_tag_rdr_fxt {
private final Html_tag_rdr rdr = new Html_tag_rdr();
// private final Html_doc_log log = new Html_doc_log();
public void Init(String src_str) {
byte[] src_bry = Bry_.new_u8(src_str);
rdr.Init(src_bry, 0, src_bry.length);

View File

@@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public class Html_atr_style_ {
public static final byte[]
Bry__width = Bry_.new_a7("width")
;
}

View File

@@ -0,0 +1,50 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public class Html_atr_style_parser {
public static void Parse(byte[] src, int src_bgn, int src_end, Html_atr_style_wkr wkr) {
int atr_idx = 0, atr_bgn = -1, atr_end = -1, key_bgn = -1, key_end = -1, tmp_bgn = -1, tmp_end = -1;
int pos = src_bgn;
while (true) {
boolean pos_is_last = pos == src_end;
byte b = pos_is_last ? Byte_ascii.Semic : src[pos];
switch (b) {
case Byte_ascii.Semic:
if (key_bgn != -1) { // ignore empty atrs
if (!wkr.On_atr(src, atr_idx, atr_bgn, atr_end, key_bgn, key_end, tmp_bgn, tmp_end))
pos_is_last = true;
}
++atr_idx; atr_bgn = -1; atr_end = -1; key_bgn = -1; key_end = -1; tmp_bgn = -1; tmp_end = -1;
break;
case Byte_ascii.Colon:
key_bgn = tmp_bgn;
key_end = tmp_end;
tmp_bgn = -1; tmp_end = -1;
break;
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
break;
default:
if (tmp_bgn == -1) tmp_bgn = pos;
tmp_end = pos + 1;
break;
}
if (pos_is_last) break;
++pos;
}
}
}

View File

@@ -0,0 +1,41 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
import org.junit.*;
public class Html_atr_style_parser_tst {
private final Html_atr_style_parser_fxt fxt = new Html_atr_style_parser_fxt();
@Test public void Basic() {
fxt.Test__parse("k_0:v_0" , fxt.Make("k_0", "v_0"));
fxt.Test__parse("k_0:v_0;" , fxt.Make("k_0", "v_0"));
fxt.Test__parse("k_0:v_0;k_1:v_1" , fxt.Make("k_0", "v_0"), fxt.Make("k_1", "v_1"));
}
@Test public void Ws() {
fxt.Test__parse(" k_0 : v_0 ;" , fxt.Make("k_0", "v_0"));
fxt.Test__parse(" k_0 : v_0 ; k_1 : v_1 " , fxt.Make("k_0", "v_0"), fxt.Make("k_1", "v_1"));
fxt.Test__parse(" k_0 : v 0 ;" , fxt.Make("k_0", "v 0"));
}
}
class Html_atr_style_parser_fxt {
private final Html_atr_style_wkr__kv_list wkr = new Html_atr_style_wkr__kv_list();
public KeyVal Make(String k, String v) {return KeyVal_.new_(k, v);}
public void Test__parse(String src_str, KeyVal... expd) {
byte[] src_bry = Bry_.new_u8(src_str);
KeyVal[] actl = wkr.Parse(src_bry, 0, src_bry.length);
Tfds.Eq_ary_str(expd, actl);
}
}

View File

@@ -0,0 +1,33 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public interface Html_atr_style_wkr {
boolean On_atr(byte[] src, int atr_idx, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end);
}
class Html_atr_style_wkr__kv_list implements Html_atr_style_wkr {
private final List_adp list = List_adp_.new_();
public boolean On_atr(byte[] src, int atr_idx, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end) {
KeyVal kv = KeyVal_.new_(String_.new_u8(src, key_bgn, key_end), String_.new_u8(src, val_bgn, val_end));
list.Add(kv);
return true;
}
public KeyVal[] Parse(byte[] src, int src_bgn, int src_end) {
Html_atr_style_parser.Parse(src, src_bgn, src_end, this);
return (KeyVal[])list.To_ary_and_clear(KeyVal.class);
}
}

View File

@@ -0,0 +1,35 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public class Html_atr_style_wkr__get_val_as_int implements Html_atr_style_wkr {
private byte[] find_key;
private int val_bgn, val_end;
public boolean On_atr(byte[] src, int atr_idx, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end) {
boolean rv = Bry_.Match(src, key_bgn, key_end, find_key);
if (rv) {
this.val_bgn = val_bgn;
this.val_end = val_end;
}
return rv;
}
public int Parse(byte[] src, int src_bgn, int src_end, byte[] find_key) {
this.find_key = find_key;
Html_atr_style_parser.Parse(src, src_bgn, src_end, this);
return Bry_.To_int_or__lax(src, val_bgn, val_end, -1);
}
}