1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

v2.11.1.1

This commit is contained in:
gnosygnu
2015-11-01 20:50:05 -05:00
parent 4f43f51b18
commit b990ec409f
858 changed files with 6758 additions and 4187 deletions

View File

@@ -56,8 +56,8 @@ class Gfs_lxr_comment_flat implements Gfs_lxr {
public int Process(Gfs_parser_ctx ctx, int lxr_bgn, int lxr_end) {
byte[] src = ctx.Src(); int src_len = ctx.Src_len();
int end_pos = Bry_find_.Find_fwd(src, end_bry, lxr_end, src_len);
// if (end_pos == Bry_.NotFound) throw Err_.new_fmt_("comment is not closed: {0}", String_.new_u8(end_bry));
return (end_pos == Bry_.NotFound)
// if (end_pos == Bry_find_.Not_found) throw Err_.new_fmt_("comment is not closed: {0}", String_.new_u8(end_bry));
return (end_pos == Bry_find_.Not_found)
? src_len // allow eos to terminate flat comment; needed for "tidy-always-adds-nl-in-textarea" fix; NOTE: DATE:2014-06-21
: end_pos + end_bry_len; // position after end_bry
}
@@ -150,7 +150,7 @@ class Gfs_lxr_quote implements Gfs_lxr {
public int Process(Gfs_parser_ctx ctx, int lxr_bgn, int lxr_end) {
byte[] src = ctx.Src(); int src_len = ctx.Src_len();
int end_pos = Bry_find_.Find_fwd(src, end_bry, lxr_end, src_len);
if (end_pos == Bry_.NotFound) throw Err_.new_wo_type("quote is not closed", "end", String_.new_u8(end_bry));
if (end_pos == Bry_find_.Not_found) throw Err_.new_wo_type("quote is not closed", "end", String_.new_u8(end_bry));
Bry_bfr bfr = ctx.Tmp_bfr().Clear();
int prv_pos = lxr_end;
int nxt_pos = end_pos + end_bry_len;
@@ -160,7 +160,7 @@ class Gfs_lxr_quote implements Gfs_lxr {
bfr.Add(end_bry); // add end_bry
prv_pos = nxt_pos + end_bry_len; // set prv_pos to after doubled end_bry
end_pos = Bry_find_.Find_fwd(src, end_bry, prv_pos, src_len);
if (end_pos == Bry_.NotFound) throw Err_.new_wo_type("quote is not closed", "end", String_.new_u8(end_bry));
if (end_pos == Bry_find_.Not_found) throw Err_.new_wo_type("quote is not closed", "end", String_.new_u8(end_bry));
nxt_pos = end_pos + end_bry_len;
if (!Bry_.Match(src, nxt_pos, nxt_pos + end_bry_len, end_bry)) {
bfr.Add_mid(src, prv_pos, end_pos);

View File

@@ -88,14 +88,14 @@ class Gfs_err_mgr {
}
public static final String Fail_msg_invalid_lxr = "invalid character", Fail_msg_unknown_char = "unknown char", Fail_msg_eos = "end of stream", Fail_msg_nde_stack_empty = "node stack empty";
String Fail_msg(String type, KeyValList fail_args) {
tmp_fail_bfr.Add_str(type).Add_byte(Byte_ascii.Colon);
tmp_fail_bfr.Add_str_u8(type).Add_byte(Byte_ascii.Colon);
int len = fail_args.Count();
for (int i = 0; i < len; i++) {
tmp_fail_bfr.Add_byte(Byte_ascii.Space);
KeyVal kv = fail_args.GetAt(i);
tmp_fail_bfr.Add_str(kv.Key());
tmp_fail_bfr.Add_str_u8(kv.Key());
tmp_fail_bfr.Add_byte(Byte_ascii.Eq).Add_byte(Byte_ascii.Apos);
tmp_fail_bfr.Add_str(kv.Val_to_str_or_empty()).Add_byte(Byte_ascii.Apos);
tmp_fail_bfr.Add_str_u8(kv.Val_to_str_or_empty()).Add_byte(Byte_ascii.Apos);
}
return tmp_fail_bfr.To_str_and_clear();
}

View File

@@ -17,13 +17,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
public class Html_atr_ {
public static final String
Src_str = "src"
;
public static final byte[]
Id_bry = Bry_.new_a7("id")
, Cls_bry = Bry_.new_a7("class")
, Style_bry = Bry_.new_a7("style")
, Href_bry = Bry_.new_a7("href")
Bry__id = Bry_.new_a7("id")
, Bry__class = Bry_.new_a7("class")
, Bry__rel = Bry_.new_a7("rel")
, Bry__href = Bry_.new_a7("href")
, Bry__title = Bry_.new_a7("title")
, Bry__style = Bry_.new_a7("style")
;
}

View File

@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
import org.junit.*;
public class Html_parser_tst {
@Before public void init() {fxt.Clear();} private Xoh_parser_fxt fxt = new Xoh_parser_fxt();
@Before public void init() {fxt.Clear();} private Html_parser_fxt fxt = new Html_parser_fxt();
@Test public void One() {fxt.Test_parse_find_all("<a id='id0'></a>", "id0");}
@Test public void Many() {fxt.Test_parse_find_all("<a id='id0'></a><a id='id1'></a><a id='id2'></a>", "id0", "id1", "id2");}
@Test public void Inline() {fxt.Test_parse_find_all("<a id='id0'/>", "id0");}
@@ -26,14 +26,14 @@ public class Html_parser_tst {
@Test public void Quote_double() {fxt.Test_parse_find_all("<a id='id''0'/>", "id'0");}
@Test public void Quote_escape() {fxt.Test_parse_find_all("<a id='id\\'0'/>", "id'0");}
}
class Xoh_parser_fxt {
class Html_parser_fxt {
public void Clear() {
if (parser == null) {
parser = new Html_parser();
}
} private Html_parser parser;
public Xoh_parser_fxt Test_parse_find_all(String raw_str, String... expd) {return Test_parse_find(raw_str, Html_parser.Wildcard_str, Html_parser.Wildcard_str, expd);}
public Xoh_parser_fxt Test_parse_find(String raw_str, String find_key, String find_val, String... expd) {
public Html_parser_fxt Test_parse_find_all(String raw_str, String... expd) {return Test_parse_find(raw_str, Html_parser.Wildcard_str, Html_parser.Wildcard_str, expd);}
public Html_parser_fxt Test_parse_find(String raw_str, String find_key, String find_val, String... expd) {
byte[] raw = Bry_.new_a7(raw_str);
Html_nde[] actl_ndes = parser.Parse_as_ary(raw, 0, raw.length, Bry_.new_a7(find_key), Bry_.new_a7(find_val));
String[] actl = Xto_ids(raw, actl_ndes);

View File

@@ -29,9 +29,12 @@ public class Html_tag_ {
Br_inl = Bry_.new_a7("<br/>")
, Hr_inl = Bry_.new_a7("<hr/>")
, Body_lhs = Bry_.new_a7("<body>") , Body_rhs = Bry_.new_a7("</body>")
, A_lhs_bgn = Bry_.new_a7("<a")
, A_rhs = Bry_.new_a7("</a>")
, B_lhs = Bry_.new_a7("<b>") , B_rhs = Bry_.new_a7("</b>")
, I_lhs = Bry_.new_a7("<i>") , I_rhs = Bry_.new_a7("</i>")
, P_lhs = Bry_.new_a7("<p>") , P_rhs = Bry_.new_a7("</p>")
, Pre_lhs = Bry_.new_a7("<pre>") , Pre_rhs = Bry_.new_a7("</pre>")
, Div_lhs = Bry_.new_a7("<div>") , Div_rhs = Bry_.new_a7("</div>")
, Html_rhs = Bry_.new_a7("</html>")
, Head_lhs_bgn = Bry_.new_a7("<head")
@@ -41,6 +44,7 @@ public class Html_tag_ {
, Script_lhs = Bry_.new_a7("<script>")
, Script_lhs_w_type = Bry_.new_a7("<script type='text/javascript'>")
, Script_rhs = Bry_.new_a7("</script>")
, Span_lhs = Bry_.new_a7("<span")
, Span_rhs = Bry_.new_a7("</span>")
;
@@ -56,4 +60,28 @@ public class Html_tag_ {
Comm_bgn_len = Comm_bgn.length
, Comm_end_len = Comm_end.length
;
public static final int
Id__eos = -2
, Id__any = -1
, Id__unknown = 0
, Id__a = 1
, Id__h2 = 2
, Id__h3 = 3
, Id__h4 = 4
, Id__h5 = 5
, Id__h6 = 6
, Id__span = 7
, Id__div = 8
, Id__comment = 9
;
public static final Hash_adp_bry Hash = Hash_adp_bry.ci_a7()
.Add_str_int("a" , Id__a)
.Add_str_int("h2" , Id__h2)
.Add_str_int("h3" , Id__h3)
.Add_str_int("h4" , Id__h4)
.Add_str_int("h5" , Id__h5)
.Add_str_int("h6" , Id__h6)
.Add_str_int("span" , Id__span)
.Add_str_int("div" , Id__div)
;
}

View File

@@ -0,0 +1,35 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public class Html_atr {
private final byte[] src;
public Html_atr(int idx, byte[] key, byte[] val, byte[] src, int val_bgn, int val_end) {
this.idx = idx; this.key = key; this.val = val;
this.src = src; this.val_bgn = val_bgn; this.val_end = val_end;
}
public int Idx() {return idx;} private final int idx;
public byte[] Key() {return key;} private final byte[] key;
public int Val_bgn() {return val_bgn;} private final int val_bgn;
public int Val_end() {return val_end;} private final int val_end;
public byte[] Val() {
if (val == null)
val = Bry_.Mid(src, val_bgn, val_end);
return val;
} private byte[] val;
public static final Html_atr Noop = new Html_atr(-1, Bry_.Empty, Bry_.Empty, Bry_.Empty, 0, 0);
}

View File

@@ -0,0 +1,60 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.core.btries.*;
public class Html_doc_parser {
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private final List_adp list = List_adp_.new_();
private Html_txt_wkr txt_wkr;
public Html_doc_parser Reg_txt(Html_txt_wkr txt_wkr) {
this.txt_wkr = txt_wkr;
return this;
}
public void Reg(Html_doc_wkr... wkr_ary) {
for (Html_doc_wkr wkr : wkr_ary) {
trie.Add_obj(wkr.Hook(), wkr);
list.Add(wkr);
}
}
public void Parse(byte[] src, int src_bgn, int src_end) {
txt_wkr.Init(src, src_bgn, src_end);
int len = list.Count();
for (int i = 0; i < len; ++i) {
Html_doc_wkr wkr = (Html_doc_wkr)list.Get_at(i);
wkr.Init(src, src_bgn, src_end);
}
int pos = src_bgn;
int txt_bgn = -1;
while (pos < src_end) {
Object o = trie.Match_bgn(src, pos, src_end);
if (o == null) {
if (txt_bgn == -1) txt_bgn = pos;
++pos;
}
else {
if (txt_bgn != -1) {
txt_wkr.Parse(txt_bgn, pos);
txt_bgn = -1;
}
Html_doc_wkr wkr = (Html_doc_wkr)o;
pos = wkr.Parse(pos);
}
}
if (txt_bgn != -1) txt_wkr.Parse(txt_bgn, src_end);
}
}

View File

@@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public interface Html_doc_wkr {
byte[] Hook();
void Init(byte[] src, int src_bgn, int src_end);
int Parse(int pos);
}

View File

@@ -0,0 +1,96 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.xowa.parsers.htmls.*;
public class Html_tag implements Mwh_atr_wkr {
private Html_tag_rdr rdr;
private Ordered_hash atrs_hash; private boolean atrs_null; private int atrs_bgn, atrs_end;
public Html_tag Init(Html_tag_rdr rdr, boolean tag_is_tail, boolean tag_is_inline, int src_bgn, int src_end, int atrs_bgn, int atrs_end, int name_id) {
this.rdr = rdr; this.atrs_null = true;
this.tag_is_tail = tag_is_tail; this.tag_is_inline = tag_is_inline;
this.atrs_bgn = atrs_bgn; this.atrs_end = atrs_end;
this.name_id = name_id; this.src_bgn = src_bgn; this.src_end = src_end;
return this;
}
public Html_tag Copy() {
Html_tag rv = new Html_tag().Init(rdr, tag_is_tail, tag_is_inline, src_bgn, src_end, atrs_bgn, atrs_end, name_id);
rv.atrs_null = false;
rv.atrs_hash = Copy(atrs_hash);
return rv;
}
public int Name_id() {return name_id;} private int name_id;
public int Src_bgn() {return src_bgn;} private int src_bgn;
public int Src_end() {return src_end;} private int src_end;
public boolean Tag_is_tail() {return tag_is_tail;} private boolean tag_is_tail;
public boolean Tag_is_inline() {return tag_is_inline;} private boolean tag_is_inline;
public boolean Atrs__match_pair(byte[] key, byte[] val) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(key);
return rv == null ? false : Bry_.Eq(val, rv.Val());
}
public byte[] Atrs__get_as_bry(byte[] key) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(key);
return rv == null ? Bry_.Empty : rv.Val();
}
public Html_atr Atrs__get_by(byte[] key) {
if (atrs_null) Atrs__make();
Html_atr rv = (Html_atr)atrs_hash.Get_by(key);
return rv == null ? Html_atr.Noop : rv;
}
public String Atrs__print() {
if (atrs_null) Atrs__make();
Bry_bfr bfr = Bry_bfr.new_();
int len = atrs_hash.Count();
for (int i = 0; i < len; ++i) {
Html_atr atr = (Html_atr)atrs_hash.Get_at(i);
bfr.Add(atr.Key()).Add_byte_eq().Add(atr.Val()).Add_byte_nl();
}
return bfr.To_str();
}
private void Atrs__make() {
atrs_null = false;
if (atrs_hash == null) atrs_hash = Ordered_hash_.New_bry();
else atrs_hash.Clear();
rdr.Atrs__make(this, atrs_bgn, atrs_end);
}
public void On_atr_each (Mwh_atr_parser mgr, byte[] src, int nde_tid, boolean valid, boolean repeated, boolean key_exists, byte[] key_bry, byte[] val_bry_manual, int[] itm_ary, int itm_idx) {
if (!valid) return;
byte[] val_bry = val_bry_manual;
int val_bgn = -1, val_end = -1;
if (key_exists) {
if (val_bry == null) {
val_bgn = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_bgn];
val_end = itm_ary[itm_idx + Mwh_atr_mgr.Idx_val_end];
}
}
else
val_bry_manual = key_bry;
Html_atr atr = new Html_atr(atrs_hash.Count(), key_bry, val_bry_manual, src, val_bgn, val_end);
atrs_hash.Add(key_bry, atr);
}
private static Ordered_hash Copy(Ordered_hash src) {
Ordered_hash rv = Ordered_hash_.New();
int len = src.Count();
for (int i = 0; i < len; ++i) {
Html_atr atr = (Html_atr)src.Get_at(i);
rv.Add(atr.Key(), atr);
}
return rv;
}
}

View File

@@ -0,0 +1,195 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
import gplx.xowa.parsers.htmls.*;
public class Html_tag_rdr {
private final Hash_adp_bry name_hash = Html_tag_.Hash;
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
private final Html_tag tag__tmp = new Html_tag(), tag__eos = new Html_tag(), tag__comment = new Html_tag();
private final Int_obj_ref tmp_depth = Int_obj_ref.zero_();
public byte[] Src() {return src;} private byte[] src;
public int Src_end() {return src_end;} private int src_end;
public void Init(byte[] src, int src_bgn, int src_end) {
this.src = src; this.pos = src_bgn; this.src_end = src_end;
tag__eos.Init(this, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Html_tag_.Id__eos);
}
public int Pos() {return pos;} private int pos;
public void Pos_(int v) {this.pos = v;}
public void Atrs__make(Mwh_atr_wkr atr_wkr, int head_bgn, int head_end) {atr_parser.Parse(atr_wkr, -1, -1, src, head_bgn, head_end);}
public Html_tag Tag__move_fwd_head() {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, Html_tag_.Id__any);}
public Html_tag Tag__move_fwd_head(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.N, match_name_id);}
public Html_tag Tag__move_fwd_tail(int match_name_id) {return Tag__find(Bool_.Y, Bool_.N, Bool_.Y, match_name_id);}
public Html_tag Tag__peek_fwd_head() {return Tag__find(Bool_.N, Bool_.N, Bool_.N, Html_tag_.Id__any);}
public Html_tag Tag__peek_fwd_head(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.N, match_name_id);}
public Html_tag Tag__peek_fwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.N, Bool_.Y, match_name_id);}
public Html_tag Tag__peek_bwd_tail(int match_name_id) {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, match_name_id);}
public Html_tag Tag__peek_bwd_head() {return Tag__find(Bool_.N, Bool_.Y, Bool_.Y, Html_tag_.Id__any);}
private Html_tag Tag__find(boolean move, boolean bwd, boolean tail, int match_name_id) {
int tmp = pos;
int stop_pos = src_end; int adj = 1;
if (bwd) {
stop_pos = -1;
adj = -1;
--tmp; // subtract 1 from tmp; needed when pos is at src_len, else array error below
}
tmp_depth.Val_zero_();
Html_tag rv = null;
while (tmp != stop_pos) {
if (src[tmp] == Byte_ascii.Angle_bgn) {
rv = Tag__extract(tail, match_name_id, tmp);
if (Tag__match(bwd, tail, match_name_id, tmp_depth, rv))
break;
else {
tmp = bwd ? rv.Src_bgn() - 1 : rv.Src_end();
rv = null;
}
}
else
tmp += adj;
}
if (rv == null) rv = tag__eos;
if (move) pos = rv.Src_end();
return rv;
}
private boolean Tag__match(boolean bwd, boolean tail, int match_name_id, Int_obj_ref depth_obj, Html_tag tag) {
int tag_name_id = tag.Name_id();
if ( tag_name_id != match_name_id // tag doesn't match requested
&& match_name_id != Html_tag_.Id__any // requested is not wildcard
) return false;
if (tag_name_id == Html_tag_.Id__comment) {
if (match_name_id == Html_tag_.Id__comment)
return true;
else
return false;
}
int depth = depth_obj.Val();
boolean tag_is_tail = tag.Tag_is_tail();
if (tail == tag_is_tail) {
if (depth == 0)
return true;
else {
depth_obj.Val_add(-1);
return false;
}
}
else {
if (!bwd && tail && !tag_is_tail) {
depth_obj.Val_add(1);
return false;
}
else
return false;
}
}
private Html_tag Tag__extract(boolean tail, int match_name_id, int tag_bgn) {
int name_bgn = tag_bgn + 1; if (name_bgn == src_end) return tag__eos; // EX: "<EOS"
byte name_0 = src[name_bgn];
boolean cur_is_tail = false;
switch (name_0) {
case Byte_ascii.Bang: return Tag__comment(tag_bgn); // skip comment; EX: "<!"
case Byte_ascii.Slash:
++name_bgn; if (name_bgn == src_end) return tag__eos; // EX: "</EOS"
name_0 = src[name_bgn];
cur_is_tail = true;
break;
}
if (name_0 == Byte_ascii.Bang) return Tag__comment(tag_bgn); // skip comment; EX: "<!"
int name_end = -1, atrs_end = -1, tag_end = -1, name_pos = name_bgn;
byte name_byte = name_0; boolean inline = false;
boolean loop = true;
while (true) {
switch (name_byte) {
case Byte_ascii.Angle_end: // EX: "<a>"
name_end = atrs_end = name_pos;
tag_end = name_end + 1;
loop = false;
break;
case Byte_ascii.Slash: // EX: "<a/>"
name_end = name_pos;
tag_end = name_pos + 1; if (tag_end == src_end) return tag__eos; // EX: "<a/EOS"
if (src[tag_end] == Byte_ascii.Angle_end) {
atrs_end = name_end;
inline = true;
loop = false;
}
else {
name_end = tag_end = -1;
}
break;
case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Space:
name_end = name_pos;
loop = false;
break;
}
if (!loop) break;
++name_pos; if (name_pos == src_end) return tag__eos; // EX: "<abEOS"
name_byte = src[name_pos];
}
if (tag_end == -1) {
tag_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, src_end);
if (tag_end == Bry_find_.Not_found) return tag__eos;
atrs_end = tag_end;
++tag_end; // position after ">"
}
return tag__tmp.Init(this, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, name_hash.Get_as_int_or(src, name_bgn, name_end, -1));
}
public boolean Read_and_move(byte match) {
byte b = src[pos];
if (b == match) {
++pos;
return true;
}
else
return false;
}
public int Read_int_to(byte to_char, int or_int) {
int bgn = pos;
int rv = 0;
int negative = 1;
while (pos < src_end) {
byte b = src[pos++];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
rv = (rv * 10) + (b - Byte_ascii.Num_0);
break;
case Byte_ascii.Dash:
if (negative == -1) // 2nd negative
return or_int; // return or_int
else // 1st negative
negative = -1; // flag negative
break;
default: {
boolean match = b == to_char;
if (to_char == Byte_ascii.Null) {// hack for Read_int_to_non_num
--pos;
match = true;
}
return match ? rv * negative : or_int;
}
}
}
return bgn == pos ? or_int : rv * negative;
}
private Html_tag Tag__comment(int tag_bgn) {
int tag_end = Bry_find_.Move_fwd(src, gplx.langs.htmls.Html_tag_.Comm_end, tag_bgn, src_end); if (tag_end == Bry_find_.Not_found) tag_end = src_end;
return tag__comment.Init(this, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Html_tag_.Id__comment);
}
}

View File

@@ -0,0 +1,68 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import org.junit.*;
public class Html_tag_rdr_tst {
private final Html_tag_rdr_fxt fxt = new Html_tag_rdr_fxt();
@Test public void Basic() {
fxt.Init("1<div id='1'>2</div>3<div id='2'>4</div>5<div id='3'>6</div>7");
fxt.Test__move_fwd_head("<div id='1'>"); fxt.Test__pos("2");
fxt.Test__peek_fwd_head("<div id='2'>"); fxt.Test__pos("2");
fxt.Test__move_fwd_head("<div id='2'>"); fxt.Test__pos("4");
fxt.Test__peek_bwd_tail("</div>3") ; fxt.Test__pos("4");
}
@Test public void Comment() {
fxt.Init("1<!--2-->3<!--4-->5<div id='1'>6</div>");
fxt.Test__move_fwd_head(Html_tag_.Id__comment , "<!--2-->") ; fxt.Test__pos("3");
fxt.Test__move_fwd_head(Html_tag_.Id__any , "<div id='1'>") ; fxt.Test__pos("6");
}
@Test public void Recursive() {
fxt.Init("1<a>2<a>3</a>4</a>5");
fxt.Test__move_fwd_head(Html_tag_.Id__a , "<a>") ; fxt.Test__pos("2");
fxt.Test__move_fwd_tail(Html_tag_.Id__a , "</a>") ; fxt.Test__pos("5");
}
}
class Html_tag_rdr_fxt {
private final Html_tag_rdr rdr = new Html_tag_rdr();
public void Init(String src_str) {
byte[] src_bry = Bry_.new_u8(src_str);
rdr.Init(src_bry, 0, src_bry.length);
}
public void Test__move_fwd_head(String expd) {Test__move_fwd_head(Html_tag_.Id__any, expd);}
public void Test__move_fwd_head(int match_name_id, String expd) {
Html_tag actl_tag = rdr.Tag__move_fwd_head(match_name_id);
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
}
public void Test__move_fwd_tail(int match_name_id, String expd) {
Html_tag actl_tag = rdr.Tag__move_fwd_tail(match_name_id);
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
}
public void Test__peek_fwd_head(String expd) {
Html_tag actl_tag = rdr.Tag__peek_fwd_head();
Tfds.Eq_str(expd, String_.new_u8(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_end()));
}
public void Test__peek_bwd_tail(String expd_str) {
byte[] expd_bry = Bry_.new_u8(expd_str);
Html_tag actl_tag = rdr.Tag__peek_bwd_tail(-1);
Tfds.Eq_bry(expd_bry, Bry_.Mid(rdr.Src(), actl_tag.Src_bgn(), actl_tag.Src_bgn() + expd_bry.length));
}
public void Test__pos(String expd_str) {
byte[] expd_bry = Bry_.new_u8(expd_str);
Tfds.Eq_bry(expd_bry, Bry_.Mid(rdr.Src(), rdr.Pos(), rdr.Pos() + expd_bry.length));
}
}

View File

@@ -0,0 +1,22 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
public interface Html_txt_wkr {
void Parse(int rng_bgn, int rng_end);
void Init(byte[] src, int src_bgn, int src_end);
}

View File

@@ -57,7 +57,7 @@ class Json_itm_str extends Json_itm_base {
case Byte_ascii.Ltr_b: bfr.Add_byte(Byte_ascii.Backfeed); break;
case Byte_ascii.Ltr_f: bfr.Add_byte(Byte_ascii.Formfeed); break;
case Byte_ascii.Ltr_u:
int utf8_val = gplx.texts.HexDecUtl.parse_or(src, i + 1, i + 5, -1);
int utf8_val = gplx.core.texts.HexDecUtl.parse_or(src, i + 1, i + 5, -1);
int len = gplx.core.intls.Utf16_.Encode_int(utf8_val, utf8_bry, 0);
bfr.Add_mid(utf8_bry, 0, len);
i += 4;

View File

@@ -23,7 +23,7 @@ public class Json_itm_tmp implements Json_itm { // TEST:
public int Src_bgn() {return -1;}
public int Src_end() {return -1;}
public Object Data() {return data;} private String data;
public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add_str(data);}
public void Print_as_json(Bry_bfr bfr, int depth) {bfr.Add_str_u8(data);}
public boolean Data_eq(byte[] comp) {return false;}
public void Clear() {}
public static Json_itm new_str_(String v) {return new Json_itm_tmp(Json_itm_.Tid__str, "\"" + v + "\"");}

View File

@@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.log_msgs.*;
/*
NOTE: naive implementation of PHP evaluator. intended only for parsing Messages**.php files in MediaWiki. Specifically, it assumes the following:
- all lines are assignment lines: EX: $a = b;

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.btries.*;
import gplx.core.btries.*; import gplx.core.log_msgs.*;
interface Php_lxr {
int Lxr_tid();
void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts);
@@ -117,7 +117,7 @@ class Php_lxr_comment extends Php_lxr_base {
public byte[] Comment_end() {return comment_end;} private byte[] comment_end;
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
int end = Bry_find_.Find_fwd(src, comment_end, bgn);
if (end == Bry_.NotFound) {
if (end == Bry_find_.Not_found) {
tkn_wkr.Msg_many(src, bgn, cur, Php_lxr_comment.Dangling_comment, comment_tid, comment_end);
cur = src_len; // NOTE: terminating sequence not found; assume rest of src is comment
}
@@ -198,7 +198,7 @@ class Php_lxr_quote extends Php_lxr_base {
int end = -1;
while (true) {
end = Bry_find_.Find_fwd(src, quote_bry, cur);
if (end == Bry_.NotFound) {
if (end == Bry_find_.Not_found) {
tkn_wkr.Msg_many(src, bgn, cur, Php_lxr_quote.Dangling_quote, quote_tid, quote_bry);
cur = src_len; // NOTE: terminating sequence not found; assume rest of src is comment
break;

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.btries.*;
import gplx.core.btries.*; import gplx.core.log_msgs.*;
public class Php_parser {
Php_lxr[] lxrs; int lxrs_len;
int txt_bgn; Php_tkn_txt txt_tkn;

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import org.junit.*; import gplx.core.tests.*;
import org.junit.*; import gplx.core.tests.*; import gplx.core.log_msgs.*;
public class Php_parser_tst {
Php_parser_fxt fxt = new Php_parser_fxt();
@Before public void init() {fxt.Clear();}

View File

@@ -39,7 +39,7 @@ abstract class Php_srl_itm_base implements Php_srl_itm {
@gplx.Virtual public void Xto_bfr(Bry_bfr bfr, int depth) {
Php_srl_wtr.Indent(bfr, depth);
bfr.Add(Php_srl_itm_.Names[this.Tid()]).Add_byte(Byte_ascii.Colon);
bfr.Add_str(Object_.Xto_str_strict_or_null_mark(this.Val())).Add_byte(Byte_ascii.Semic).Add_byte_nl();
bfr.Add_str_u8(Object_.Xto_str_strict_or_null_mark(this.Val())).Add_byte(Byte_ascii.Semic).Add_byte_nl();
}
public void Clear() {}
}

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.texts.*;
import gplx.core.texts.*;
public class Php_srl_parser {
@gplx.Internal protected Php_srl_factory Factory() {return factory;} Php_srl_factory factory = new Php_srl_factory();
byte[] raw; int raw_len, pos;

View File

@@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.log_msgs.*;
public interface Php_tkn_wkr {
void Init(Php_ctx ctx);
void Process(Php_tkn tkn);