1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2016-01-03 21:27:38 -05:00
parent 9509363f46
commit 096045614c
647 changed files with 11693 additions and 7648 deletions

View File

@@ -0,0 +1,57 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Gfh_atr_ {
public static final byte[]
// "coreattrs"
Bry__id = Bry_.new_a7("id")
, Bry__class = Bry_.new_a7("class")
, Bry__style = Bry_.new_a7("style")
, Bry__title = Bry_.new_a7("title")
// "i18n"
, Bry__lang = Bry_.new_a7("lang")
, Bry__dir = Bry_.new_a7("dir")
// <a>
, Bry__href = Bry_.new_a7("href")
, Bry__rel = Bry_.new_a7("rel")
// <img>
, Bry__alt = Bry_.new_a7("alt")
, Bry__src = Bry_.new_a7("src")
, Bry__width = Bry_.new_a7("width")
, Bry__height = Bry_.new_a7("height")
// <table>
//, Bry__width = Bry_.new_a7("width")
, Bry__cellpadding = Bry_.new_a7("cellpadding")
, Bry__cellspacing = Bry_.new_a7("cellspacing")
, Bry__summary = Bry_.new_a7("summary") // HTML.ua
// <table>.borders_and_rules
, Bry__border = Bry_.new_a7("border")
, Bry__frames = Bry_.new_a7("frames")
, Bry__rules = Bry_.new_a7("rules")
// <th>,<td>
, Bry__scope = Bry_.new_a7("scope")
, Bry__rowspan = Bry_.new_a7("rowspan")
, Bry__colspan = Bry_.new_a7("colspan")
, Bry__align = Bry_.new_a7("align") // HTML.v4
, Bry__bgcolor = Bry_.new_a7("bgcolor") // HTML.v4
, Bry__abbr = Bry_.new_a7("abbr") // HTML.ua
;
public static byte[] Make(Bry_bfr bfr, byte[] key, byte[] val) {
return bfr.Add_byte_space().Add(key).Add_byte_eq().Add_byte_quote().Add(val).Add_byte_quote().To_bry_and_clear();
}
}

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_bldr_ {
public class Gfh_bldr_ {
public static final byte[]
Bry__a_lhs_bgn = Bry_.new_a7("<a")
, Bry__a_rhs = Bry_.new_a7("</a>")
@@ -34,6 +34,7 @@ public class Html_bldr_ {
, Bry__src__nth = Bry_.new_a7("\" src=\"")
, Bry__width__nth = Bry_.new_a7("\" width=\"")
, Bry__height__nth = Bry_.new_a7("\" height=\"")
, Bry__lhs_end_head = Bry_.new_a7(">")
, Bry__lhs_end_head_w_quote = Bry_.new_a7("\">")
, Bry__lhs_end_inline = Bry_.new_a7("/>")
, Bry__lhs_end_inline_w_quote = Bry_.new_a7("\"/>")

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_entity_ {
public class Gfh_entity_ {
public static final String
Nl_str = "&#10;"
;

View File

@@ -16,8 +16,8 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_nde {
public Html_nde(byte[] src, boolean tag_tid_is_inline, int tag_lhs_bgn, int tag_lhs_end, int tag_rhs_bgn, int tag_rhs_end, int name_bgn, int name_end, int[] cur_atrs, int atrs_idx) {
public class Gfh_nde {
public Gfh_nde(byte[] src, boolean tag_tid_is_inline, int tag_lhs_bgn, int tag_lhs_end, int tag_rhs_bgn, int tag_rhs_end, int name_bgn, int name_end, int[] cur_atrs, int atrs_idx) {
this.src = src;
this.tag_tid_is_inline = tag_tid_is_inline;
this.tag_lhs_bgn = tag_lhs_bgn; this.tag_lhs_end = tag_lhs_end; this.tag_rhs_bgn = tag_rhs_bgn; this.tag_rhs_end = tag_rhs_end; this.name_bgn = name_bgn; this.name_end = name_end;
@@ -32,12 +32,12 @@ public class Html_nde {
public int[] Atrs() {return atrs;} private int[] atrs = Int_.Ary_empty;
public int Atrs_len() {return atrs_len;} private int atrs_len;
public boolean Tag_tid_is_inline() {return tag_tid_is_inline;} private boolean tag_tid_is_inline;
public int Tag_lhs_bgn() {return tag_lhs_bgn;} public Html_nde Tag_lhs_bgn_(int v) {tag_lhs_bgn = v; return this;} private int tag_lhs_bgn;
public int Tag_lhs_end() {return tag_lhs_end;} public Html_nde Tag_lhs_end_(int v) {tag_lhs_end = v; return this;} private int tag_lhs_end;
public int Tag_rhs_bgn() {return tag_rhs_bgn;} public Html_nde Tag_rhs_bgn_(int v) {tag_rhs_bgn = v; return this;} private int tag_rhs_bgn;
public int Tag_rhs_end() {return tag_rhs_end;} public Html_nde Tag_rhs_end_(int v) {tag_rhs_end = v; return this;} private int tag_rhs_end;
public int Name_bgn() {return name_bgn;} public Html_nde Name_bgn_(int v) {name_bgn = v; return this;} private int name_bgn;
public int Name_end() {return name_end;} public Html_nde Name_end_(int v) {name_end = v; return this;} private int name_end;
public int Tag_lhs_bgn() {return tag_lhs_bgn;} public Gfh_nde Tag_lhs_bgn_(int v) {tag_lhs_bgn = v; return this;} private int tag_lhs_bgn;
public int Tag_lhs_end() {return tag_lhs_end;} public Gfh_nde Tag_lhs_end_(int v) {tag_lhs_end = v; return this;} private int tag_lhs_end;
public int Tag_rhs_bgn() {return tag_rhs_bgn;} public Gfh_nde Tag_rhs_bgn_(int v) {tag_rhs_bgn = v; return this;} private int tag_rhs_bgn;
public int Tag_rhs_end() {return tag_rhs_end;} public Gfh_nde Tag_rhs_end_(int v) {tag_rhs_end = v; return this;} private int tag_rhs_end;
public int Name_bgn() {return name_bgn;} public Gfh_nde Name_bgn_(int v) {name_bgn = v; return this;} private int name_bgn;
public int Name_end() {return name_end;} public Gfh_nde Name_end_(int v) {name_end = v; return this;} private int name_end;
public void Clear() {tag_lhs_bgn = tag_rhs_bgn = -1;}
public String Atrs_val_by_key_str(String find_key_str) {return String_.new_u8(Atrs_val_by_key_bry(Bry_.new_u8(find_key_str)));}
public byte[] Atrs_val_by_key_bry(byte[] find_key_bry) {

View File

@@ -17,17 +17,17 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
import gplx.core.brys.*;
public class Html_parser {
public Html_parser() {
public class Gfh_parser {
public Gfh_parser() {
Bry_bldr bry_bldr = new Bry_bldr();
bry_xnde_name = bry_bldr.New_256().Set_rng_xml_identifier(Scan_valid).Set_rng_ws(Scan_stop).Val();
bry_atr_key = bry_bldr.New_256().Set_rng_xml_identifier(Scan_valid).Set_rng_ws(Scan_stop).Set_many(Scan_stop, Byte_ascii.Eq).Val();
}
byte[] src; int pos, end; byte[] bry_xnde_name, bry_atr_key;
int cur_atrs_idx = 0; int[] cur_atrs = new int[250];// define max of 50 atrs;
public Html_nde[] Parse_as_ary(byte[] src) {return Parse_as_ary(src, 0, src.length, Wildcard, Wildcard);}
public Html_nde[] Parse_as_ary(byte[] src, int bgn, int end) {return Parse_as_ary(src, bgn, end, Wildcard, Wildcard);}
public Html_nde[] Parse_as_ary(byte[] src, int bgn, int end, byte[] find_key, byte[] find_val) { // flattens html into a list of hndes; only used for Options
public Gfh_nde[] Parse_as_ary(byte[] src) {return Parse_as_ary(src, 0, src.length, Wildcard, Wildcard);}
public Gfh_nde[] Parse_as_ary(byte[] src, int bgn, int end) {return Parse_as_ary(src, bgn, end, Wildcard, Wildcard);}
public Gfh_nde[] Parse_as_ary(byte[] src, int bgn, int end, byte[] find_key, byte[] find_val) { // flattens html into a list of hndes; only used for Options
this.src = src; pos = bgn; this.end = end;
List_adp rv = List_adp_.new_();
while (pos < end) {
@@ -37,14 +37,14 @@ public class Html_parser {
if (xnde_init) {
if (Parse_xnde_lhs()) {
if (tag_tid_is_inline)
rv.Add(new Html_nde(src, tag_tid_is_inline, cur_lhs_bgn, cur_lhs_end, cur_rhs_bgn, pos, cur_name_bgn, cur_name_end, cur_atrs, cur_atrs_idx));
rv.Add(new Gfh_nde(src, tag_tid_is_inline, cur_lhs_bgn, cur_lhs_end, cur_rhs_bgn, pos, cur_name_bgn, cur_name_end, cur_atrs, cur_atrs_idx));
else
xnde_init = false;
}
}
else {
if (Parse_xnde_rhs()) {
rv.Add(new Html_nde(src, tag_tid_is_inline, cur_lhs_bgn, cur_lhs_end, cur_rhs_bgn, pos, cur_name_bgn, cur_name_end, cur_atrs, cur_atrs_idx));
rv.Add(new Gfh_nde(src, tag_tid_is_inline, cur_lhs_bgn, cur_lhs_end, cur_rhs_bgn, pos, cur_name_bgn, cur_name_end, cur_atrs, cur_atrs_idx));
}
xnde_init = true;
}
@@ -53,7 +53,7 @@ public class Html_parser {
break;
}
}
return (Html_nde[])rv.To_ary(Html_nde.class);
return (Gfh_nde[])rv.To_ary(Gfh_nde.class);
}
int cur_lhs_bgn, cur_lhs_end, cur_name_bgn, cur_name_end, cur_rhs_bgn; boolean xnde_init = true, tag_tid_is_inline = false;
private boolean Parse_xnde_rhs() {

View File

@@ -17,8 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Html_parser_tst {
@Before public void init() {fxt.Clear();} private Html_parser_fxt fxt = new Html_parser_fxt();
public class Gfh_parser_tst {
@Before public void init() {fxt.Clear();} private Gfh_parser_fxt fxt = new Gfh_parser_fxt();
@Test public void One() {fxt.Test_parse_find_all("<a id='id0'></a>", "id0");}
@Test public void Many() {fxt.Test_parse_find_all("<a id='id0'></a><a id='id1'></a><a id='id2'></a>", "id0", "id1", "id2");}
@Test public void Inline() {fxt.Test_parse_find_all("<a id='id0'/>", "id0");}
@@ -26,25 +26,25 @@ public class Html_parser_tst {
@Test public void Quote_double() {fxt.Test_parse_find_all("<a id='id''0'/>", "id'0");}
@Test public void Quote_escape() {fxt.Test_parse_find_all("<a id='id\\'0'/>", "id'0");}
}
class Html_parser_fxt {
class Gfh_parser_fxt {
public void Clear() {
if (parser == null) {
parser = new Html_parser();
parser = new Gfh_parser();
}
} private Html_parser parser;
public Html_parser_fxt Test_parse_find_all(String raw_str, String... expd) {return Test_parse_find(raw_str, Html_parser.Wildcard_str, Html_parser.Wildcard_str, expd);}
public Html_parser_fxt Test_parse_find(String raw_str, String find_key, String find_val, String... expd) {
} private Gfh_parser parser;
public Gfh_parser_fxt Test_parse_find_all(String raw_str, String... expd) {return Test_parse_find(raw_str, Gfh_parser.Wildcard_str, Gfh_parser.Wildcard_str, expd);}
public Gfh_parser_fxt Test_parse_find(String raw_str, String find_key, String find_val, String... expd) {
byte[] raw = Bry_.new_a7(raw_str);
Html_nde[] actl_ndes = parser.Parse_as_ary(raw, 0, raw.length, Bry_.new_a7(find_key), Bry_.new_a7(find_val));
Gfh_nde[] actl_ndes = parser.Parse_as_ary(raw, 0, raw.length, Bry_.new_a7(find_key), Bry_.new_a7(find_val));
String[] actl = Xto_ids(raw, actl_ndes);
Tfds.Eq_ary_str(expd, actl);
return this;
}
private String[] Xto_ids(byte[] src, Html_nde[] ary) {
private String[] Xto_ids(byte[] src, Gfh_nde[] ary) {
int len = ary.length;
String[] rv = new String[len];
for (int i = 0; i < len; i++) {
Html_nde itm = ary[i];
Gfh_nde itm = ary[i];
String atr_val = itm.Atrs_val_by_key_str("id");
rv[i] = atr_val;
}

View File

@@ -16,12 +16,12 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_selecter {
public static Html_nde[] Select(byte[] src, Html_nde[] ary, Hash_adp_bry hash) {
public class Gfh_selecter {
public static Gfh_nde[] Select(byte[] src, Gfh_nde[] ary, Hash_adp_bry hash) {
List_adp list = List_adp_.new_();
int xndes_len = ary.length;
for (int i = 0; i < xndes_len; i++) {
Html_nde hnde = ary[i];
Gfh_nde hnde = ary[i];
int[] atrs = hnde.Atrs();
int atrs_len = atrs.length;
for (int j = 0; j < atrs_len; j += 5) {
@@ -33,7 +33,7 @@ public class Html_selecter {
}
}
}
Html_nde[] rv = (Html_nde[])list.To_ary(Html_nde.class);
Gfh_nde[] rv = (Gfh_nde[])list.To_ary(Gfh_nde.class);
list.Clear();
return rv;
}

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_tag_ {
public class Gfh_tag_ {
public static final int
Id__eos = -2
, Id__any = -1
@@ -34,11 +34,15 @@ public class Html_tag_ {
, Id__ul = 11
, Id__li = 12
, Id__p = 13
, Id__hr = 14
, Id__br = 14
, Id__hr = 15
, Id__td = 16
;
public static final byte[]
Bry__a = Bry_.new_a7("a")
, Bry__ul = Bry_.new_a7("ul")
, Bry__td = Bry_.new_a7("td")
, Bry__th = Bry_.new_a7("th")
;
public static final Hash_adp_bry Hash = Hash_adp_bry.ci_a7()
.Add_bry_int(Bry__a , Id__a)
@@ -50,8 +54,12 @@ public class Html_tag_ {
.Add_str_int("span" , Id__span)
.Add_str_int("div" , Id__div)
.Add_str_int("img" , Id__img)
.Add_str_int("br" , Id__br)
.Add_str_int("hr" , Id__hr)
.Add_str_int("ul" , Id__ul)
.Add_str_int("li" , Id__li)
.Add_str_int("td" , Id__td)
.Add_str_int("p" , Id__p)
;
public static String To_str(int tid) {
switch (tid) {
@@ -68,12 +76,18 @@ public class Html_tag_ {
case Id__span: return "span";
case Id__div: return "div";
case Id__img: return "img";
case Id__p: return "p";
case Id__br: return "br";
case Id__hr: return "hr";
case Id__td: return "td";
case Id__ul: return "ul";
case Id__li: return "li";
default: throw Err_.new_unhandled(tid);
}
}
public static final byte[]
Br_inl = Bry_.new_a7("<br/>")
, Br_lhs = Bry_.new_a7("<br>")
, Hr_inl = Bry_.new_a7("<hr/>")
, Body_lhs = Bry_.new_a7("<body>") , Body_rhs = Bry_.new_a7("</body>")
, B_lhs = Bry_.new_a7("<b>") , B_rhs = Bry_.new_a7("</b>")

View File

@@ -17,7 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.langs.htmls.encoders.*;
public class Html_utl {
public class Gfh_utl {
private static final Gfo_url_encoder encoder_id = Gfo_url_encoder_.Id; private static final Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
public static String Encode_id_as_str(byte[] key) {return String_.new_u8(Encode_id_as_bry(key));}
public static byte[] Encode_id_as_bry(byte[] key) {
@@ -44,8 +44,8 @@ public class Html_utl {
dirty = true;
}
switch (quote_byte) {
case Byte_ascii.Apos: bfr.Add(Html_entity_.Apos_num_bry); break;
case Byte_ascii.Quote: bfr.Add(Html_entity_.Quote_bry); break;
case Byte_ascii.Apos: bfr.Add(Gfh_entity_.Apos_num_bry); break;
case Byte_ascii.Quote: bfr.Add(Gfh_entity_.Quote_bry); break;
default: throw Err_.new_unhandled(quote_byte);
}
}
@@ -73,11 +73,11 @@ public class Html_utl {
for (int i = bgn; i < end; i++) {
byte b = bry[i];
switch (b) {
case Byte_ascii.Lt: if (escape_lt) escaped = Html_entity_.Lt_bry; break;
case Byte_ascii.Gt: if (escape_gt) escaped = Html_entity_.Gt_bry; break;
case Byte_ascii.Amp: if (escape_amp) escaped = Html_entity_.Amp_bry; break;
case Byte_ascii.Quote: if (escape_quote) escaped = Html_entity_.Quote_bry; break;
case Byte_ascii.Apos: if (escape_apos) escaped = Html_entity_.Apos_num_bry; break;
case Byte_ascii.Lt: if (escape_lt) escaped = Gfh_entity_.Lt_bry; break;
case Byte_ascii.Gt: if (escape_gt) escaped = Gfh_entity_.Gt_bry; break;
case Byte_ascii.Amp: if (escape_amp) escaped = Gfh_entity_.Amp_bry; break;
case Byte_ascii.Quote: if (escape_quote) escaped = Gfh_entity_.Quote_bry; break;
case Byte_ascii.Apos: if (escape_apos) escaped = Gfh_entity_.Apos_num_bry; break;
default:
if (dirty || write_to_bfr)
bfr.Add_byte(b);
@@ -104,11 +104,11 @@ public class Html_utl {
}
private static final Btrie_slim_mgr unescape_trie = Btrie_slim_mgr.ci_a7()
.Add_bry_byte(Html_entity_.Lt_bry , Byte_ascii.Lt)
.Add_bry_byte(Html_entity_.Gt_bry , Byte_ascii.Gt)
.Add_bry_byte(Html_entity_.Amp_bry , Byte_ascii.Amp)
.Add_bry_byte(Html_entity_.Quote_bry , Byte_ascii.Quote)
.Add_bry_byte(Html_entity_.Apos_num_bry , Byte_ascii.Apos)
.Add_bry_byte(Gfh_entity_.Lt_bry , Byte_ascii.Lt)
.Add_bry_byte(Gfh_entity_.Gt_bry , Byte_ascii.Gt)
.Add_bry_byte(Gfh_entity_.Amp_bry , Byte_ascii.Amp)
.Add_bry_byte(Gfh_entity_.Quote_bry , Byte_ascii.Quote)
.Add_bry_byte(Gfh_entity_.Apos_num_bry , Byte_ascii.Apos)
;
public static String Unescape_as_str(String src) {
Bry_bfr bfr = Bry_bfr.reset_(255);
@@ -162,18 +162,18 @@ public class Html_utl {
public static byte[] Del_comments(Bry_bfr bfr, byte[] src, int pos, int end) {
while (true) {
if (pos >= end) break;
int comm_bgn = Bry_find_.Find_fwd(src, Html_tag_.Comm_bgn, pos); // look for <!--
int comm_bgn = Bry_find_.Find_fwd(src, Gfh_tag_.Comm_bgn, pos); // look for <!--
if (comm_bgn == Bry_find_.Not_found) { // not found; consume rest
bfr.Add_mid(src, pos, end);
break;
}
int comm_end = Bry_find_.Find_fwd(src, Html_tag_.Comm_end, comm_bgn + Html_tag_.Comm_bgn_len); // look for -->
int comm_end = Bry_find_.Find_fwd(src, Gfh_tag_.Comm_end, comm_bgn + Gfh_tag_.Comm_bgn_len); // look for -->
if (comm_end == Bry_find_.Not_found) { // not found; consume rest
bfr.Add_mid(src, pos, end);
break;
}
bfr.Add_mid(src, pos, comm_bgn); // add everything between pos and comm_bgn
pos = comm_end + Html_tag_.Comm_end_len; // reposition pos after comm_end
pos = comm_end + Gfh_tag_.Comm_end_len; // reposition pos after comm_end
}
return bfr.To_bry_and_clear();
}

View File

@@ -17,8 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Html_utl_tst {
@Before public void init() {fxt.Clear();} private Html_atr_class_fxt fxt = new Html_atr_class_fxt();
public class Gfh_utl_tst {
@Before public void init() {fxt.Clear();} private Gfh_class_fxt fxt = new Gfh_class_fxt();
@Test public void Basic() {fxt.Test_del_comments("a<!-- b -->c" , "ac");}
@Test public void Bgn_missing() {fxt.Test_del_comments("a b c" , "a b c");}
@Test public void End_missing() {fxt.Test_del_comments("a<!-- b c" , "a<!-- b c");}
@@ -37,26 +37,26 @@ public class Html_utl_tst {
fxt.Test_unescape_html(Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, "a&lt;&gt;&#39;&amp;&quot;b" , "a<>'&\"b"); // basic
}
}
class Html_atr_class_fxt {
class Gfh_class_fxt {
private Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
public void Clear() {
tmp_bfr.Clear();
}
public void Test_del_comments(String src, String expd) {
byte[] actl = Html_utl.Del_comments(tmp_bfr, Bry_.new_u8(src));
byte[] actl = Gfh_utl.Del_comments(tmp_bfr, Bry_.new_u8(src));
Tfds.Eq(expd, String_.new_a7(actl));
}
public void Test_escape_html(boolean lt, boolean gt, boolean amp, boolean quote, boolean apos, String src, String expd) {
byte[] actl = Html_utl.Escape_html_as_bry(Bry_.new_a7(src), lt, gt, amp, quote, apos);
byte[] actl = Gfh_utl.Escape_html_as_bry(Bry_.new_a7(src), lt, gt, amp, quote, apos);
Tfds.Eq(expd, String_.new_a7(actl));
}
public void Test_escape_for_atr(String src, boolean quote_is_apos, String expd) {
byte[] actl = Html_utl.Escape_for_atr_val_as_bry(tmp_bfr, quote_is_apos ? Byte_ascii.Apos : Byte_ascii.Quote, src);
byte[] actl = Gfh_utl.Escape_for_atr_val_as_bry(tmp_bfr, quote_is_apos ? Byte_ascii.Apos : Byte_ascii.Quote, src);
Tfds.Eq(expd, String_.new_u8(actl));
}
public void Test_unescape_html(boolean lt, boolean gt, boolean amp, boolean quote, boolean apos, String src, String expd) {
byte[] bry = Bry_.new_u8(src);
byte[] actl = Html_utl.Unescape(false, tmp_bfr, bry, 0, bry.length, lt, gt, amp, quote, apos);
byte[] actl = Gfh_utl.Unescape(false, tmp_bfr, bry, 0, bry.length, lt, gt, amp, quote, apos);
Tfds.Eq(expd, String_.new_a7(actl));
}
}

View File

@@ -16,11 +16,11 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_wtr {
public class Gfh_wtr {
private Bry_bfr bfr = Bry_bfr.reset_(255);
private List_adp nde_stack = List_adp_.new_();
public byte Atr_quote() {return atr_quote;} public Html_wtr Atr_quote_(byte v) {atr_quote = v; return this;} private byte atr_quote = Byte_ascii.Quote;
public Html_wtr Nde_full_atrs(byte[] tag, byte[] text, boolean text_escape, byte[]... atrs) {
public byte Atr_quote() {return atr_quote;} public Gfh_wtr Atr_quote_(byte v) {atr_quote = v; return this;} private byte atr_quote = Byte_ascii.Quote;
public Gfh_wtr Nde_full_atrs(byte[] tag, byte[] text, boolean text_escape, byte[]... atrs) {
Nde_bgn(tag);
int atrs_len = atrs.length;
for (int i = 0; i < atrs_len; i += 2) {
@@ -36,46 +36,46 @@ public class Html_wtr {
Nde_end();
return this;
}
public Html_wtr Nde_full(byte[] tag, byte[] text) {
public Gfh_wtr Nde_full(byte[] tag, byte[] text) {
Nde_bgn_hdr(tag);
Txt(text);
Nde_end();
return this;
}
public Html_wtr Txt_mid(byte[] src, int bgn, int end) {bfr.Add_mid(src, bgn, end); return this;}
public Html_wtr Txt_byte(byte v) {bfr.Add_byte(v); return this;}
public Html_wtr Txt_raw(byte[] v) {bfr.Add(v); return this;}
public Html_wtr Txt(byte[] v) {
public Gfh_wtr Txt_mid(byte[] src, int bgn, int end) {bfr.Add_mid(src, bgn, end); return this;}
public Gfh_wtr Txt_byte(byte v) {bfr.Add_byte(v); return this;}
public Gfh_wtr Txt_raw(byte[] v) {bfr.Add(v); return this;}
public Gfh_wtr Txt(byte[] v) {
if (v != null) {
bfr.Add(Html_utl.Escape_html_as_bry(v));
bfr.Add(Gfh_utl.Escape_html_as_bry(v));
}
return this;
}
public Html_wtr Nde_bgn_hdr(byte[] name) {
public Gfh_wtr Nde_bgn_hdr(byte[] name) {
this.Nde_bgn(name);
this.Nde_end_hdr();
return this;
}
public Html_wtr Nde_bgn(byte[] name) {
public Gfh_wtr Nde_bgn(byte[] name) {
bfr.Add_byte(Byte_ascii.Lt);
bfr.Add(name);
nde_stack.Add(name);
return this;
}
public Html_wtr Atr(byte[] key, byte[] val) {
public Gfh_wtr Atr(byte[] key, byte[] val) {
Write_atr_bry(bfr, Bool_.Y, atr_quote, key, val);
return this;
}
public Html_wtr Nde_end_inline() {
public Gfh_wtr Nde_end_inline() {
bfr.Add_byte(Byte_ascii.Slash).Add_byte(Byte_ascii.Gt);
List_adp_.Pop_last(nde_stack);
return this;
}
public Html_wtr Nde_end_hdr() {
public Gfh_wtr Nde_end_hdr() {
bfr.Add_byte(Byte_ascii.Gt);
return this;
}
public Html_wtr Nde_end() {
public Gfh_wtr Nde_end() {
byte[] name = (byte[])List_adp_.Pop_last(nde_stack);
bfr.Add_byte(Byte_ascii.Lt).Add_byte(Byte_ascii.Slash);
bfr.Add(name);
@@ -92,7 +92,7 @@ public class Html_wtr {
bfr.Add(key);
bfr.Add_byte(Byte_ascii.Eq);
bfr.Add_byte(atr_quote);
Html_utl.Escape_html_to_bfr(bfr, val, 0, val.length, false, false, false, true, true);
Gfh_utl.Escape_html_to_bfr(bfr, val, 0, val.length, false, false, false, true, true);
bfr.Add_byte(atr_quote);
}
public static void Write_atr_int(Bry_bfr bfr, byte[] key, int val) {Write_atr_int(bfr, Bool_.Y, Byte_ascii.Quote, key, val);}

View File

@@ -15,8 +15,8 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public class Html_atr_class_ {
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public class Gfh_class_ {
public static boolean Has(byte[] src, int src_bgn, int src_end, byte[] cls) {
int cls_bgn = src_bgn;
int pos = src_bgn;

View File

@@ -15,10 +15,10 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import org.junit.*;
public class Html_atr_class__tst {
private final Html_atr_class__fxt fxt = new Html_atr_class__fxt();
public class Gfh_class__tst {
private final Gfh_class__fxt fxt = new Gfh_class__fxt();
@Test public void Has() {
fxt.Test__has__y("a b c", "a", "b", "c");
fxt.Test__has__n("a b c", "d");
@@ -34,14 +34,14 @@ public class Html_atr_class__tst {
fxt.Test__find_1st(hash, Byte_.Max_value_127, "xyz");
}
}
class Html_atr_class__fxt {
class Gfh_class__fxt {
public void Test__has__y(String src, String... ary) {Test__has(Bool_.Y, src, ary);}
public void Test__has__n(String src, String... ary) {Test__has(Bool_.N, src, ary);}
public void Test__has(boolean expd, String src, String... ary) {
byte[] src_bry = Bry_.new_u8(src);
for (String itm : ary) {
byte[] itm_bry = Bry_.new_u8(itm);
Tfds.Eq_bool(expd, Html_atr_class_.Has(src_bry, 0, src_bry.length, itm_bry), itm);
Tfds.Eq_bool(expd, Gfh_class_.Has(src_bry, 0, src_bry.length, itm_bry), itm);
}
}
public Hash_adp_bry Make_hash(String... ary) {
@@ -53,6 +53,6 @@ class Html_atr_class__fxt {
}
public void Test__find_1st(Hash_adp_bry hash, int expd, String src) {
byte[] src_bry = Bry_.new_u8(src);
Tfds.Eq_byte((byte)expd, Html_atr_class_.Find_1st(src_bry, 0, src_bry.length, hash), src);
Tfds.Eq_byte((byte)expd, Gfh_class_.Find_1st(src_bry, 0, src_bry.length, hash), src);
}
}

View File

@@ -15,15 +15,16 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public class Html_atr_class_parser_ {
public static void Parse(Html_tag tag, Html_atr_class_wkr wkr) {
Html_atr atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__class);
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.langs.htmls.docs.*;
public class Gfh_class_parser_ {
public static void Parse(Gfh_tag tag, Gfh_class_parser_wkr wkr) {
Gfh_atr atr = tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__class);
if (atr.Val_dat_exists())
Parse(tag.Src(), atr.Val_bgn(), atr.Val_end(), wkr);
}
public static void Parse(byte[] src, int src_bgn, int src_end, Html_atr_class_wkr wkr) {
int atr_idx = 0, atr_bgn = -1, atr_end = -1, tmp_bgn = -1, tmp_end = -1;
public static void Parse(byte[] src, int src_bgn, int src_end, Gfh_class_parser_wkr wkr) {
int atr_idx = 0, tmp_bgn = -1, tmp_end = -1;
int pos = src_bgn;
while (true) {
boolean pos_is_last = pos == src_end;
@@ -31,10 +32,10 @@ public class Html_atr_class_parser_ {
switch (b) {
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
if (tmp_bgn != -1) { // ignore empty atrs
if (!wkr.On_cls(src, atr_idx, atr_bgn, atr_end, tmp_bgn, tmp_end))
if (!wkr.On_cls(src, atr_idx, src_bgn, src_end, tmp_bgn, tmp_end))
pos_is_last = true;
}
++atr_idx; atr_bgn = -1; atr_end = -1; tmp_bgn = -1; tmp_end = -1;
++atr_idx; tmp_bgn = -1; tmp_end = -1;
break;
default:
if (tmp_bgn == -1) tmp_bgn = pos;

View File

@@ -15,22 +15,22 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import org.junit.*;
public class Html_atr_class_parser__tst {
private final Html_atr_class_parser__fxt fxt = new Html_atr_class_parser__fxt();
public class Gfh_class_parser__tst {
private final Gfh_class_parser__fxt fxt = new Gfh_class_parser__fxt();
@Test public void Basic() {fxt.Test__parse("v1" , "v1");}
@Test public void Many() {fxt.Test__parse("v1 v2" , "v1", "v2");}
}
class Html_atr_class_parser__fxt {
private final Html_atr_class_wkr__list wkr = new Html_atr_class_wkr__list();
class Gfh_class_parser__fxt {
private final Gfh_class_wkr__list wkr = new Gfh_class_wkr__list();
public void Test__parse(String src_str, String... expd) {
byte[] src_bry = Bry_.new_u8(src_str);
String[] actl = wkr.Parse(src_bry, 0, src_bry.length);
Tfds.Eq_ary_str(expd, actl);
}
}
class Html_atr_class_wkr__list implements Html_atr_class_wkr {
class Gfh_class_wkr__list implements Gfh_class_parser_wkr {
private final List_adp list = List_adp_.new_();
public boolean On_cls(byte[] src, int atr_idx, int atr_bgn, int atr_end, int val_bgn, int val_end) {
String s = String_.new_u8(src, val_bgn, val_end);
@@ -38,7 +38,7 @@ class Html_atr_class_wkr__list implements Html_atr_class_wkr {
return true;
}
public String[] Parse(byte[] src, int src_bgn, int src_end) {
Html_atr_class_parser_.Parse(src, src_bgn, src_end, this);
Gfh_class_parser_.Parse(src, src_bgn, src_end, this);
return (String[])list.To_ary_and_clear(String.class);
}
}

View File

@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public interface Html_atr_class_wkr {
package gplx.langs.htmls.clses; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public interface Gfh_class_parser_wkr {
boolean On_cls(byte[] src, int atr_idx, int atr_bgn, int atr_end, int val_bgn, int val_end);
}

View File

@@ -15,14 +15,16 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public class Html_atr extends gplx.core.brys.Bfr_arg_base {
public Html_atr(int idx, byte[] key, byte[] val, byte[] src, int val_bgn, int val_end) {
this.idx = idx; this.key = key; this.val = val;
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public class Gfh_atr implements gplx.core.brys.Bfr_arg {
public Gfh_atr(int idx, int atr_bgn, int atr_end, byte[] key, byte[] val, byte[] src, int val_bgn, int val_end) {
this.idx = idx; this.atr_bgn = atr_bgn; this.atr_end = atr_end; this.key = key; this.val = val;
this.src = src; this.val_bgn = val_bgn; this.val_end = val_end;
}
public byte[] Src() {return src;} private final byte[] src;
public int Idx() {return idx;} private final int idx;
public int Atr_bgn() {return atr_bgn;} private final int atr_bgn;
public int Atr_end() {return atr_end;} private final int atr_end;
public byte[] Key() {return key;} private final byte[] key;
public int Val_bgn() {return val_bgn;} private final int val_bgn;
public int Val_end() {return val_end;} private final int val_end;
@@ -37,10 +39,9 @@ public class Html_atr extends gplx.core.brys.Bfr_arg_base {
if (val_end > val_bgn)
bfr.Add_mid(src, val_bgn, val_end);
}
@Override public boolean Bfr_arg__exists() {return this.Val_dat_exists();}
@Override public void Bfr_arg__add(Bry_bfr bfr) {
public void Bfr_arg__add(Bry_bfr bfr) {
if (Val_dat_exists())
bfr.Add_mid(src, val_bgn, val_end);
}
public static final Html_atr Noop = new Html_atr(-1, Bry_.Empty, Bry_.Empty, Bry_.Empty, -1, -1);
public static final Gfh_atr Noop = new Gfh_atr(-1, -1, -1, Bry_.Empty, Bry_.Empty, Bry_.Empty, -1, -1);
}

View File

@@ -15,14 +15,14 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.core.btries.*;
public class Html_doc_parser {
public class Gfh_doc_parser {
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private final Html_txt_wkr txt_wkr;
public Html_doc_parser(Html_txt_wkr txt_wkr, Html_doc_wkr... wkr_ary) {
private final Gfh_txt_wkr txt_wkr;
public Gfh_doc_parser(Gfh_txt_wkr txt_wkr, Gfh_doc_wkr... wkr_ary) {
this.txt_wkr = txt_wkr;
for (Html_doc_wkr wkr : wkr_ary)
for (Gfh_doc_wkr wkr : wkr_ary)
trie.Add_obj(wkr.Hook(), wkr);
}
public void Parse(byte[] page_url, byte[] src, int src_bgn, int src_end) {
@@ -39,10 +39,10 @@ public class Html_doc_parser {
txt_wkr.Parse(txt_bgn, pos);
txt_bgn = -1;
}
Html_doc_wkr wkr = (Html_doc_wkr)o;
Gfh_doc_wkr wkr = (Gfh_doc_wkr)o;
try {pos = wkr.Parse(src, src_bgn, src_end, pos);}
catch (Exception e) {
Html_utl.Log(e, "html parse failed", page_url, src, pos);
Gfh_utl.Log(e, "html parse failed", page_url, src, pos);
txt_bgn = pos; // set txt_bgn to hook_bgn which is "pos"; i.e.: txt resumes from start of failed hook
pos = trie.Match_pos(); // set pos to hook_end
}

View File

@@ -15,8 +15,8 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public interface Html_doc_wkr {
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public interface Gfh_doc_wkr {
byte[] Hook();
int Parse(byte[] src, int src_bgn, int src_end, int pos);
}

View File

@@ -15,67 +15,79 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.xowa.parsers.htmls.*; import gplx.langs.htmls.parsers.styles.*; import gplx.langs.htmls.parsers.clses.*;
public class Html_tag implements Mwh_atr_wkr {
private Html_tag_rdr tag_rdr;
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.xowa.parsers.htmls.*; import gplx.langs.htmls.styles.*; import gplx.langs.htmls.clses.*;
public class Gfh_tag implements Mwh_atr_wkr {
private Gfh_tag_rdr tag_rdr;
private Ordered_hash atrs_hash; private boolean atrs_null; private int atrs_bgn, atrs_end;
public Html_tag Init(Html_tag_rdr tag_rdr, byte[] src, boolean tag_is_tail, boolean tag_is_inline, int src_bgn, int src_end, int atrs_bgn, int atrs_end, int name_id) {
public Gfh_tag Init(Gfh_tag_rdr tag_rdr, byte[] src, boolean tag_is_tail, boolean tag_is_inline, int src_bgn, int src_end, int atrs_bgn, int atrs_end, int name_id, byte[] name_bry) {
this.tag_rdr = tag_rdr; this.src = src; this.atrs_null = true;
this.tag_is_tail = tag_is_tail; this.tag_is_inline = tag_is_inline;
this.atrs_bgn = atrs_bgn; this.atrs_end = atrs_end;
this.name_id = name_id; this.src_bgn = src_bgn; this.src_end = src_end;
this.name_id = name_id; this.name_bry = name_bry; this.src_bgn = src_bgn; this.src_end = src_end;
return this;
}
public Html_tag Copy() {
Html_tag rv = new Html_tag().Init(tag_rdr, src, tag_is_tail, tag_is_inline, src_bgn, src_end, atrs_bgn, atrs_end, name_id);
public Gfh_tag Copy() {
Gfh_tag rv = new Gfh_tag().Init(tag_rdr, src, tag_is_tail, tag_is_inline, src_bgn, src_end, atrs_bgn, atrs_end, name_id, name_bry);
rv.atrs_null = false;
rv.atrs_hash = Copy(atrs_hash);
return rv;
}
public int Name_id() {return name_id;} private int name_id;
public Html_tag Chk_id(int chk) {
if ( chk == name_id
|| (name_id != Html_tag_.Id__eos && Int_.In(chk, Html_tag_.Id__any, Html_tag_.Id__comment))) {
}
else
tag_rdr.Err_wkr().Fail("name_id chk failed", "expecting", Html_tag_.To_str(chk));
public byte[] Name_bry() {return name_bry;} private byte[] name_bry;
public Gfh_tag Chk_name_or_fail(int chk) {
if (!Chk_name(chk)) tag_rdr.Err_wkr().Fail("name_id chk failed", "expecting", Gfh_tag_.To_str(chk));
return this;
}
public boolean Chk_name(int chk) {
return ( chk == name_id
|| (name_id != Gfh_tag_.Id__eos && Int_.In(chk, Gfh_tag_.Id__any, Gfh_tag_.Id__comment)));
}
public boolean Chk(int chk_name, byte[] chk_cls) {return name_id == chk_name && Atrs__cls_has(chk_cls);}
public byte[] Src() {return src;} private byte[] src;
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public boolean Src_exists() {return src_end > src_bgn;} // NOTE: only true if EOS where src_end == src_bgn == src_len
public boolean Tag_is_tail() {return tag_is_tail;} private boolean tag_is_tail;
public boolean Tag_is_inline() {return tag_is_inline;} private boolean tag_is_inline;
public Ordered_hash Atrs__hash() {if (atrs_null) Atrs__make(); return atrs_hash;}
public int Atrs__len() {if (atrs_null) Atrs__make(); return atrs_hash.Count();}
public boolean Atrs__match_pair(byte[] key, byte[] val) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(key);
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key);
return rv == null ? false : Bry_.Eq(val, rv.Val());
}
public boolean Atrs__cls_has(byte[] val) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(Html_atr_.Bry__class); if (rv == null) return false;
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(Gfh_atr_.Bry__class); if (rv == null) return false;
byte[] rv_val = rv.Val();
return Html_atr_class_.Has(rv_val, 0, rv_val.length, val);
return Gfh_class_.Has(rv_val, 0, rv_val.length, val);
}
public boolean Atrs__cls_eq(byte[] val) {
if (atrs_null) Atrs__make();
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(Gfh_atr_.Bry__class); if (rv == null) return false;
return Bry_.Eq(val, rv.Val());
}
public byte Atrs__cls_find_or_fail(Hash_adp_bry hash) {
if (atrs_null) Atrs__make();
Html_atr cls_atr = (Html_atr)atrs_hash.Get_by(Html_atr_.Bry__class); if (cls_atr == null) tag_rdr.Err_wkr().Fail("cls missing");
byte rv = Html_atr_class_.Find_1st(src, cls_atr.Val_bgn(), cls_atr.Val_end(), hash); if (rv == Byte_.Max_value_127) tag_rdr.Err_wkr().Fail("cls val missing");
byte rv = Atrs__cls_find_or(hash, Byte_.Max_value_127); if (rv == Byte_.Max_value_127) tag_rdr.Err_wkr().Fail("cls missing");
return rv;
}
private static final Html_atr_style_wkr__get_val_as_int style_wkr = new Html_atr_style_wkr__get_val_as_int();
public byte Atrs__cls_find_or(Hash_adp_bry hash, byte or) {
if (atrs_null) Atrs__make();
Gfh_atr cls_atr = (Gfh_atr)atrs_hash.Get_by(Gfh_atr_.Bry__class); if (cls_atr == null) return or;
byte rv = Gfh_class_.Find_1st(src, cls_atr.Val_bgn(), cls_atr.Val_end(), hash); if (rv == Byte_.Max_value_127) return or;
return rv;
}
private static final Gfh_style_wkr__val_as_int style_wkr = new Gfh_style_wkr__val_as_int();
public int Atrs__style_get_as_int(byte[] key) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(Html_atr_.Bry__style); if (rv == null) return -1;
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(Gfh_atr_.Bry__style); if (rv == null) return -1;
byte[] rv_val = rv.Val();
return style_wkr.Parse(rv_val, 0, rv_val.length, key);
}
public byte[] Atrs__get_as_bry(byte[] key) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(key);
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key);
return rv == null ? Bry_.Empty : rv.Val();
}
public int Atrs__get_as_int(byte[] key) {
@@ -84,18 +96,18 @@ public class Html_tag implements Mwh_atr_wkr {
}
public int Atrs__get_as_int_or(byte[] key, int or) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(key); if (rv == null) return or;
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key); if (rv == null) return or;
return Bry_.To_int_or(src, rv.Val_bgn(), rv.Val_end(), or);
}
public Html_atr Atrs__get_at(int i) {return (Html_atr)atrs_hash.Get_at(i);}
public Html_atr Atrs__get_by_or_fail(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.Y);}
public Html_atr Atrs__get_by_or_empty(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.N);}
public Html_atr Atrs__get_by_or_fail(byte[] key, boolean fail_if_null) {
public Gfh_atr Atrs__get_at(int i) {return (Gfh_atr)atrs_hash.Get_at(i);}
public Gfh_atr Atrs__get_by_or_fail(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.Y);}
public Gfh_atr Atrs__get_by_or_empty(byte[] key) {return Atrs__get_by_or_fail(key, Bool_.N);}
public Gfh_atr Atrs__get_by_or_fail(byte[] key, boolean fail_if_null) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(key);
Gfh_atr rv = (Gfh_atr)atrs_hash.Get_by(key);
if (rv == null) {
if (fail_if_null) tag_rdr.Err_wkr().Fail("atr missing", "key", key);
else return Html_atr.Noop;
else return Gfh_atr.Noop;
}
return rv;
}
@@ -104,7 +116,7 @@ public class Html_tag implements Mwh_atr_wkr {
Bry_bfr bfr = Bry_bfr.new_();
int len = atrs_hash.Count();
for (int i = 0; i < len; ++i) {
Html_atr atr = (Html_atr)atrs_hash.Get_at(i);
Gfh_atr atr = (Gfh_atr)atrs_hash.Get_at(i);
bfr.Add(atr.Key()).Add_byte_eq().Add(atr.Val()).Add_byte_nl();
}
return bfr.To_str();
@@ -119,20 +131,22 @@ public class Html_tag implements Mwh_atr_wkr {
if (!valid) return;
byte[] val_bry = val_bry_manual;
int val_bgn = -1, val_end = -1;
int atr_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_bgn];
int atr_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_atr_end];
if (key_exists) {
val_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
val_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
}
else
val_bry = key_bry;
Html_atr atr = new Html_atr(atrs_hash.Count(), key_bry, val_bry, src, val_bgn, val_end);
Gfh_atr atr = new Gfh_atr(atrs_hash.Count(), atr_bgn, atr_end, key_bry, val_bry, src, val_bgn, val_end);
atrs_hash.Add(key_bry, atr);
}
private static Ordered_hash Copy(Ordered_hash src) {
Ordered_hash rv = Ordered_hash_.New();
int len = src.Count();
for (int i = 0; i < len; ++i) {
Html_atr atr = (Html_atr)src.Get_at(i);
Gfh_atr atr = (Gfh_atr)src.Get_at(i);
rv.Add(atr.Key(), atr);
}
return rv;

View File

@@ -15,35 +15,35 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.core.primitives.*; import gplx.core.brys.*; import gplx.core.btries.*;
import gplx.xowa.parsers.htmls.*;
public class Html_tag_rdr {
private final Hash_adp_bry name_hash = Html_tag_.Hash;
public class Gfh_tag_rdr {
private final Hash_adp_bry name_hash = Gfh_tag_.Hash;
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
private final Html_tag tag__tmp__move = new Html_tag(), tag__tmp__peek = new Html_tag(), tag__eos = new Html_tag(), tag__comment = new Html_tag();
private final Gfh_tag tag__tmp__move = new Gfh_tag(), tag__tmp__peek = new Gfh_tag(), tag__eos = new Gfh_tag(), tag__comment = new Gfh_tag();
private final Int_obj_ref tmp_depth = Int_obj_ref.zero_();
public byte[] Src() {return src;} private byte[] src;
public int Src_end() {return src_end;} private int src_end;
public Bry_err_wkr Err_wkr() {return err_wkr;} private final Bry_err_wkr err_wkr = new Bry_err_wkr();
public void Init(byte[] ctx, byte[] src, int src_bgn, int src_end) {
this.src = src; this.pos = src_bgn; this.src_end = src_end;
tag__eos.Init(this, src, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Html_tag_.Id__eos);
tag__eos.Init(this, src, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Gfh_tag_.Id__eos, Bry_.Empty);
err_wkr.Init_by_page(String_.new_u8(ctx), src);
}
public int Pos() {return pos;} private int pos;
public void Pos_(int v) {this.pos = v;}
public void Atrs__make(Mwh_atr_wkr atr_wkr, int head_bgn, int head_end) {atr_parser.Parse(atr_wkr, -1, -1, src, head_bgn, head_end);}
public Html_tag Tag__move_fwd_head() {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, pos, src_end, Html_tag_.Id__any);}
public Html_tag Tag__move_fwd_head(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, pos, src_end, match_name_id);}
public Html_tag Tag__move_fwd_tail(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, pos, src_end, match_name_id);}
public Html_tag Tag__peek_fwd_head() {return Tag__find(Bool_.N, Bool_.N, Bool_.N, pos, src_end, Html_tag_.Id__any);}
public Html_tag Tag__peek_fwd_head(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, pos, src_end, match_name_id);}
public Html_tag Tag__peek_fwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.Y, pos, src_end, match_name_id);}
public Html_tag Tag__peek_bwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, pos, src_end, match_name_id);}
public Html_tag Tag__peek_bwd_head() {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, pos, src_end, Html_tag_.Id__any);}
public Html_tag Tag__find_fwd_head(int bgn, int end, int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, bgn, end, match_name_id);}
private Html_tag Tag__find(boolean move, boolean bwd, boolean tail, int rng_bgn, int rng_end, int match_name_id) {
public Gfh_tag Tag__move_fwd_head() {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, pos, src_end, Gfh_tag_.Id__any);}
public Gfh_tag Tag__move_fwd_head(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, pos, src_end, match_name_id);}
public Gfh_tag Tag__move_fwd_tail(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, pos, src_end, match_name_id);}
public Gfh_tag Tag__peek_fwd_head() {return Tag__find(Bool_.N, Bool_.N, Bool_.N, pos, src_end, Gfh_tag_.Id__any);}
public Gfh_tag Tag__peek_fwd_head(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, pos, src_end, match_name_id);}
public Gfh_tag Tag__peek_fwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.Y, pos, src_end, match_name_id);}
public Gfh_tag Tag__peek_bwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, pos, src_end, match_name_id);}
public Gfh_tag Tag__peek_bwd_head() {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, pos, src_end, Gfh_tag_.Id__any);}
public Gfh_tag Tag__find_fwd_head(int bgn, int end, int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, bgn, end, match_name_id);}
private Gfh_tag Tag__find(boolean move, boolean bwd, boolean tail, int rng_bgn, int rng_end, int match_name_id) {
int tmp = rng_bgn;
int stop_pos = rng_end; int adj = 1;
if (bwd) {
@@ -52,7 +52,7 @@ public class Html_tag_rdr {
--tmp; // subtract 1 from tmp; needed when pos is at src_len, else array error below
}
tmp_depth.Val_zero_();
Html_tag rv = null;
Gfh_tag rv = null;
while (tmp != stop_pos) {
if (src[tmp] == Byte_ascii.Angle_bgn) {
rv = Tag__extract(move, tail, match_name_id, tmp);
@@ -68,20 +68,20 @@ public class Html_tag_rdr {
}
if (rv == null) {
if (move && tail && !bwd)
err_wkr.Fail("move failed", "tag_name", Html_tag_.To_str(match_name_id));
err_wkr.Fail("move failed", "tag_name", Gfh_tag_.To_str(match_name_id));
else
return Tag__eos(rng_bgn);
}
if (move) pos = rv.Src_end();
return rv;
}
private boolean Tag__match(boolean move, boolean bwd, boolean tail, int match_name_id, Int_obj_ref depth_obj, Html_tag tag) {
private boolean Tag__match(boolean move, boolean bwd, boolean tail, int match_name_id, Int_obj_ref depth_obj, Gfh_tag tag) {
int tag_name_id = tag.Name_id();
if ( tag_name_id != match_name_id // tag doesn't match requested
&& match_name_id != Html_tag_.Id__any // requested is not wildcard
&& match_name_id != Gfh_tag_.Id__any // requested is not wildcard
) return false;
if (tag_name_id == Html_tag_.Id__comment) {
if (match_name_id == Html_tag_.Id__comment)
if (tag_name_id == Gfh_tag_.Id__comment) {
if (match_name_id == Gfh_tag_.Id__comment)
return true;
else
return false;
@@ -108,7 +108,7 @@ public class Html_tag_rdr {
return false;
}
}
public Html_tag Tag__extract(boolean move, boolean tail, int match_name_id, int tag_bgn) {
public Gfh_tag Tag__extract(boolean move, boolean tail, int match_name_id, int tag_bgn) {
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return Tag__eos(tag_bgn); // EX: "<EOS"
byte name_0 = src[name_bgn];
boolean cur_is_tail = false;
@@ -166,8 +166,8 @@ public class Html_tag_rdr {
atrs_end = tag_end;
++tag_end; // position after ">"
}
Html_tag tmp = move ? tag__tmp__move : tag__tmp__peek;
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, name_hash.Get_as_int_or(src, name_bgn, name_end, -1));
Gfh_tag tmp = move ? tag__tmp__move : tag__tmp__peek;
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, name_hash.Get_as_int_or(src, name_bgn, name_end, -1), Bry_.Mid(src, name_bgn, name_end));
}
public boolean Read_and_move(byte match) {
byte b = src[pos];
@@ -211,13 +211,13 @@ public class Html_tag_rdr {
}
return bgn == pos ? or_int : rv * negative;
}
private Html_tag Tag__comment(int tag_bgn) {
int tag_end = Bry_find_.Move_fwd(src, gplx.langs.htmls.Html_tag_.Comm_end, tag_bgn, src_end); if (tag_end == Bry_find_.Not_found) tag_end = src_end;
return tag__comment.Init(this, src, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Html_tag_.Id__comment);
private Gfh_tag Tag__comment(int tag_bgn) {
int tag_end = Bry_find_.Move_fwd(src, gplx.langs.htmls.Gfh_tag_.Comm_end, tag_bgn, src_end); if (tag_end == Bry_find_.Not_found) tag_end = src_end;
return tag__comment.Init(this, src, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Gfh_tag_.Id__comment, Bry_.Empty);
}
private Html_tag Tag__eos(int tag_bgn) {
private Gfh_tag Tag__eos(int tag_bgn) {
int tag_end = tag_bgn + 255; if (tag_end > src_end) tag_end = src_end;
return tag__comment.Init(this, src, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Html_tag_.Id__eos);
return tag__comment.Init(this, src, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Gfh_tag_.Id__eos, Bry_.Empty);
}
private static final byte[] Bry__comment__mid = Bry_.new_a7("--");
}

View File

@@ -15,10 +15,10 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import org.junit.*;
public class Html_tag_rdr_tst {
private final Html_tag_rdr_fxt fxt = new Html_tag_rdr_fxt();
public class Gfh_tag_rdr_tst {
private final Gfh_tag_rdr_fxt fxt = new Gfh_tag_rdr_fxt();
@Test public void Basic() {
fxt.Init("1<div id='1'>2</div>3<div id='2'>4</div>5<div id='3'>6</div>7");
fxt.Test__move_fwd_head("<div id='1'>"); fxt.Test__pos("2");
@@ -28,46 +28,46 @@ public class Html_tag_rdr_tst {
}
@Test public void Comment() {
fxt.Init("1<!--2-->3<!--4-->5<div id='1'>6</div>");
fxt.Test__move_fwd_head(Html_tag_.Id__comment , "<!--2-->") ; fxt.Test__pos("3");
fxt.Test__move_fwd_head(Html_tag_.Id__any , "<div id='1'>") ; fxt.Test__pos("6");
fxt.Test__move_fwd_head(Gfh_tag_.Id__comment , "<!--2-->") ; fxt.Test__pos("3");
fxt.Test__move_fwd_head(Gfh_tag_.Id__any , "<div id='1'>") ; fxt.Test__pos("6");
}
@Test public void Meta() {
fxt.Init("<!DOCTYPE html>1<div id='1'>2</div>3");
fxt.Test__move_fwd_head(Html_tag_.Id__div , "<div id='1'>") ; fxt.Test__pos("2");
fxt.Test__move_fwd_head(Gfh_tag_.Id__div , "<div id='1'>") ; fxt.Test__pos("2");
}
@Test public void Recursive__same_tags() {
fxt.Init("1<a>2<a>3</a>4</a>5");
fxt.Test__move_fwd_head(Html_tag_.Id__a , "<a>") ; fxt.Test__pos("2");
fxt.Test__move_fwd_tail(Html_tag_.Id__a , "</a>") ; fxt.Test__pos("5");
fxt.Test__move_fwd_head(Gfh_tag_.Id__a , "<a>") ; fxt.Test__pos("2");
fxt.Test__move_fwd_tail(Gfh_tag_.Id__a , "</a>") ; fxt.Test__pos("5");
}
@Test public void Recursive__diff_tags() {
fxt.Init("1<div>2<a>3<img/>4</a>5</div>6");
fxt.Test__move_fwd_head(Html_tag_.Id__div , "<div>") ; fxt.Test__pos("2");
fxt.Test__move_fwd_tail(Html_tag_.Id__div , "</div>") ; fxt.Test__pos("6");
fxt.Test__move_fwd_head(Gfh_tag_.Id__div , "<div>") ; fxt.Test__pos("2");
fxt.Test__move_fwd_tail(Gfh_tag_.Id__div , "</div>") ; fxt.Test__pos("6");
}
}
class Html_tag_rdr_fxt {
private final Html_tag_rdr rdr = new Html_tag_rdr();
class Gfh_tag_rdr_fxt {
private final Gfh_tag_rdr rdr = new Gfh_tag_rdr();
public void Init(String src_str) {
byte[] src_bry = Bry_.new_u8(src_str);
rdr.Init(Bry_.Empty, src_bry, 0, src_bry.length);
}
public void Test__move_fwd_head(String expd) {Test__move_fwd_head(Html_tag_.Id__any, expd);}
public void Test__move_fwd_head(String expd) {Test__move_fwd_head(Gfh_tag_.Id__any, expd);}
public void Test__move_fwd_head(int match_name_id, String expd) {
Html_tag actl_tag = rdr.Tag__move_fwd_head(match_name_id).Chk_id(match_name_id);
Gfh_tag actl_tag = rdr.Tag__move_fwd_head(match_name_id).Chk_name_or_fail(match_name_id);
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
}
public void Test__move_fwd_tail(int match_name_id, String expd) {
Html_tag actl_tag = rdr.Tag__move_fwd_tail(match_name_id);
Gfh_tag actl_tag = rdr.Tag__move_fwd_tail(match_name_id);
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
}
public void Test__peek_fwd_head(String expd) {
Html_tag actl_tag = rdr.Tag__peek_fwd_head();
Gfh_tag actl_tag = rdr.Tag__peek_fwd_head();
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
}
public void Test__peek_bwd_tail(String expd_str) {
byte[] expd_bry = Bry_.new_u8(expd_str);
Html_tag actl_tag = rdr.Tag__peek_bwd_tail(-1);
Gfh_tag actl_tag = rdr.Tag__peek_bwd_tail(-1);
Tfds.Eq_bry(expd_bry, Bry_.Mid(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_bgn() + expd_bry.length));
}
public void Test__pos(String expd_str) {

View File

@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public interface Html_txt_wkr {
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public interface Gfh_txt_wkr {
void Parse(int rng_bgn, int rng_end);
}

View File

@@ -1,38 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
//namespace gplx.langs.htmls.parsers {
// public class Html_doc_log {
// private byte[] src; private byte[] page_url; private String wkr_name; private int src_bgn; private int src_end;
// public Html_doc_log Init_by_page(byte[] src, byte[] page_url) {
// this.src = src; this.page_url = page_url;
// return this;
// }
// public Html_doc_log Init_by_wkr(String wkr_name, int src_bgn, int src_end) {
// this.wkr_name = wkr_name; this.src_bgn = src_bgn; this.src_end = src_end;
// return this;
// }
// public Err Fail_w_args(String fail_msg, params Object[] custom_args) {return Fail_w_excerpt(fail_msg, src_bgn, src_end + 255, custom_args);}
// public Err Fail_w_excerpt(String fail_msg, int excerpt_bgn, int excerpt_end, params Object[] custom_args) {
// Object[] dflt_args = Object_.Ary("page", page_url, "wkr", wkr_name, "excerpt", Bry_.Mid_safe(src, excerpt_bgn, excerpt_end));
// Object[] fail_args = Object_.Ary_add(custom_args, dflt_args);
// String msg = Err_msg.To_str(fail_msg, fail_args);
// Gfo_usr_dlg_.Instance.Warn_many("", "", msg);
// return Err_.new_("Xoh_hdoc_err", msg);
// }
// }
//}

View File

@@ -15,20 +15,11 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_atr_ {
public static final byte[]
Bry__id = Bry_.new_a7("id")
, Bry__class = Bry_.new_a7("class")
, Bry__title = Bry_.new_a7("title")
, Bry__style = Bry_.new_a7("style")
// <a>
, Bry__href = Bry_.new_a7("href")
, Bry__rel = Bry_.new_a7("rel")
// <img>
, Bry__alt = Bry_.new_a7("alt")
, Bry__src = Bry_.new_a7("src")
, Bry__width = Bry_.new_a7("width")
, Bry__height = Bry_.new_a7("height")
;
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public class Gfh_style_itm implements To_str_able {
public Gfh_style_itm(int idx, byte[] key, byte[] val) {this.idx = idx; this.key = key; this.val = val;}
public int Idx() {return idx;} private final int idx;
public byte[] Key() {return key;} private final byte[] key;
public byte[] Val() {return val;} private final byte[] val;
public String To_str() {return String_.new_u8(Bry_.Add(key, Byte_ascii.Colon_bry, val, Byte_ascii.Semic_bry));}
}

View File

@@ -15,9 +15,10 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public class Html_atr_style_ {
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public class Gfh_style_key_ {
public static final byte[]
Bry__width = Bry_.new_a7("width")
Bry__width = Bry_.new_a7("width")
, Bry__margin = Bry_.new_a7("margin")
;
}

View File

@@ -15,15 +15,16 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public class Html_atr_style_parser_ {
public static void Parse(Html_tag tag, Html_atr_style_wkr wkr) {
Html_atr atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__style);
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.langs.htmls.docs.*;
public class Gfh_style_parser_ {
public static void Parse(Gfh_tag tag, Gfh_style_wkr wkr) {
Gfh_atr atr = tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__style);
if (atr.Val_dat_exists())
Parse(tag.Src(), atr.Val_bgn(), atr.Val_end(), wkr);
}
public static void Parse(byte[] src, int src_bgn, int src_end, Html_atr_style_wkr wkr) {
int atr_idx = 0, atr_bgn = -1, atr_end = -1, key_bgn = -1, key_end = -1, tmp_bgn = -1, tmp_end = -1;
public static void Parse(byte[] src, int src_bgn, int src_end, Gfh_style_wkr wkr) {
int atr_idx = 0, key_bgn = -1, key_end = -1, tmp_bgn = -1, tmp_end = -1;
int pos = src_bgn;
while (true) {
boolean pos_is_last = pos == src_end;
@@ -31,10 +32,10 @@ public class Html_atr_style_parser_ {
switch (b) {
case Byte_ascii.Semic:
if (key_bgn != -1) { // ignore empty atrs
if (!wkr.On_atr(src, atr_idx, atr_bgn, atr_end, key_bgn, key_end, tmp_bgn, tmp_end))
if (!wkr.On_atr(src, atr_idx, src_bgn, src_end, key_bgn, key_end, tmp_bgn, tmp_end))
pos_is_last = true;
}
++atr_idx; atr_bgn = -1; atr_end = -1; key_bgn = -1; key_end = -1; tmp_bgn = -1; tmp_end = -1;
++atr_idx; key_bgn = -1; key_end = -1; tmp_bgn = -1; tmp_end = -1;
break;
case Byte_ascii.Colon:
key_bgn = tmp_bgn;

View File

@@ -15,10 +15,10 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import org.junit.*;
public class Html_atr_style_parser__tst {
private final Html_atr_style_parser__fxt fxt = new Html_atr_style_parser__fxt();
public class Gfh_style_parser__tst {
private final Gfh_style_parser__fxt fxt = new Gfh_style_parser__fxt();
@Test public void Basic() {
fxt.Test__parse("k_0:v_0" , fxt.Make("k_0", "v_0"));
fxt.Test__parse("k_0:v_0;" , fxt.Make("k_0", "v_0"));
@@ -29,25 +29,17 @@ public class Html_atr_style_parser__tst {
fxt.Test__parse(" k_0 : v_0 ; k_1 : v_1 " , fxt.Make("k_0", "v_0"), fxt.Make("k_1", "v_1"));
fxt.Test__parse(" k_0 : v 0 ;" , fxt.Make("k_0", "v 0"));
}
@Test public void Empty() {
fxt.Test__parse("k_0:v_0;;" , fxt.Make("k_0", "v_0"));
fxt.Test__parse("k_0:v_0; ; " , fxt.Make("k_0", "v_0"));
}
}
class Html_atr_style_parser__fxt {
private final Html_atr_style_wkr__kv_list wkr = new Html_atr_style_wkr__kv_list();
public KeyVal Make(String k, String v) {return KeyVal_.new_(k, v);}
public void Test__parse(String src_str, KeyVal... expd) {
class Gfh_style_parser__fxt {
private final Gfh_style_wkr__ary wkr = Gfh_style_wkr__ary.Instance;
public Gfh_style_itm Make(String k, String v) {return new Gfh_style_itm(-1, Bry_.new_u8(k), Bry_.new_u8(v));}
public void Test__parse(String src_str, Gfh_style_itm... expd) {
byte[] src_bry = Bry_.new_u8(src_str);
KeyVal[] actl = wkr.Parse(src_bry, 0, src_bry.length);
Gfh_style_itm[] actl = wkr.Parse(src_bry, 0, src_bry.length);
Tfds.Eq_ary_str(expd, actl);
}
}
class Html_atr_style_wkr__kv_list implements Html_atr_style_wkr {
private final List_adp list = List_adp_.new_();
public boolean On_atr(byte[] src, int atr_idx, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end) {
KeyVal kv = KeyVal_.new_(String_.new_u8(src, key_bgn, key_end), String_.new_u8(src, val_bgn, val_end));
list.Add(kv);
return true;
}
public KeyVal[] Parse(byte[] src, int src_bgn, int src_end) {
Html_atr_style_parser_.Parse(src, src_bgn, src_end, this);
return (KeyVal[])list.To_ary_and_clear(KeyVal.class);
}
}

View File

@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public interface Html_atr_style_wkr {
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public interface Gfh_style_wkr {
boolean On_atr(byte[] src, int atr_idx, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end);
}

View File

@@ -0,0 +1,32 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public class Gfh_style_wkr__ary implements Gfh_style_wkr {
private final List_adp list = List_adp_.new_();
public boolean On_atr(byte[] src, int atr_idx, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end) {
byte[] key = Bry_.Mid(src, key_bgn, key_end);
byte[] val = Bry_.Mid(src, val_bgn, val_end);
list.Add(new Gfh_style_itm(list.Count(), key, val));
return true;
}
public Gfh_style_itm[] Parse(byte[] src, int src_bgn, int src_end) {
Gfh_style_parser_.Parse(src, src_bgn, src_end, this);
return (Gfh_style_itm[])list.To_ary_and_clear(Gfh_style_itm.class);
}
public static final Gfh_style_wkr__ary Instance = new Gfh_style_wkr__ary(); Gfh_style_wkr__ary() {}
}

View File

@@ -15,8 +15,8 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.langs.htmls.parsers.*;
public class Html_atr_style_wkr__get_val_as_int implements Html_atr_style_wkr {
package gplx.langs.htmls.styles; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public class Gfh_style_wkr__val_as_int implements Gfh_style_wkr {
private byte[] find_key;
private int val_bgn, val_end;
public boolean On_atr(byte[] src, int atr_idx, int atr_bgn, int atr_end, int key_bgn, int key_end, int val_bgn, int val_end) {
@@ -29,7 +29,7 @@ public class Html_atr_style_wkr__get_val_as_int implements Html_atr_style_wkr {
}
public int Parse(byte[] src, int src_bgn, int src_end, byte[] find_key) {
this.find_key = find_key;
Html_atr_style_parser_.Parse(src, src_bgn, src_end, this);
Gfh_style_parser_.Parse(src, src_bgn, src_end, this);
return Bry_.To_int_or__lax(src, val_bgn, val_end, -1);
}
}

View File

@@ -23,7 +23,7 @@ class Json_itm_str extends Json_itm_base {
@Override public byte Tid() {return Json_itm_.Tid__str;}
@Override public void Print_as_json(Bry_bfr bfr, int depth) {
bfr.Add_byte(Byte_ascii.Quote);
gplx.langs.htmls.Html_utl.Escape_html_to_bfr(bfr, doc.Src(), this.Src_bgn(), this.Src_end(), true, true, true, true, false); // false to apos for backwards compatibility
gplx.langs.htmls.Gfh_utl.Escape_html_to_bfr(bfr, doc.Src(), this.Src_bgn(), this.Src_end(), true, true, true, true, false); // false to apos for backwards compatibility
bfr.Add_byte(Byte_ascii.Quote);
}
@Override public Object Data() {