mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.9.3.1
This commit is contained in:
29
400_xowa/src/gplx/langs/htmls/Html_atr_.java
Normal file
29
400_xowa/src/gplx/langs/htmls/Html_atr_.java
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Html_atr_ {
|
||||
public static final String
|
||||
Src_str = "src"
|
||||
;
|
||||
public static final byte[]
|
||||
Id_bry = Bry_.new_a7("id")
|
||||
, Cls_bry = Bry_.new_a7("class")
|
||||
, Style_bry = Bry_.new_a7("style")
|
||||
, Href_bry = Bry_.new_a7("href")
|
||||
;
|
||||
}
|
||||
35
400_xowa/src/gplx/langs/htmls/Html_entity_.java
Normal file
35
400_xowa/src/gplx/langs/htmls/Html_entity_.java
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Html_entity_ {
|
||||
public static final String
|
||||
Nl_str = " "
|
||||
;
|
||||
public static final byte[]
|
||||
Lt_bry = Bry_.new_a7("<"), Gt_bry = Bry_.new_a7(">")
|
||||
, Amp_bry = Bry_.new_a7("&"), Quote_bry = Bry_.new_a7(""")
|
||||
, Apos_num_bry = Bry_.new_a7("'")
|
||||
, Apos_key_bry = Bry_.new_a7("'")
|
||||
, Eq_bry = Bry_.new_a7("=")
|
||||
, Nl_bry = Bry_.new_a7(Nl_str), Space_bry = Bry_.new_a7(" ")
|
||||
, Pipe_bry = Bry_.new_a7("|")
|
||||
, Colon_bry = Bry_.new_a7(":"), Underline_bry = Bry_.new_a7("_"), Asterisk_bry = Bry_.new_a7("*")
|
||||
, Brack_bgn_bry = Bry_.new_a7("["), Brack_end_bry = Bry_.new_a7("]")
|
||||
, Nbsp_num_bry = Bry_.new_a7(" ")
|
||||
;
|
||||
}
|
||||
94
400_xowa/src/gplx/langs/htmls/Html_nde.java
Normal file
94
400_xowa/src/gplx/langs/htmls/Html_nde.java
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Html_nde {
|
||||
public Html_nde(byte[] src, boolean tag_tid_is_inline, int tag_lhs_bgn, int tag_lhs_end, int tag_rhs_bgn, int tag_rhs_end, int name_bgn, int name_end, int[] cur_atrs, int atrs_idx) {
|
||||
this.src = src;
|
||||
this.tag_tid_is_inline = tag_tid_is_inline;
|
||||
this.tag_lhs_bgn = tag_lhs_bgn; this.tag_lhs_end = tag_lhs_end; this.tag_rhs_bgn = tag_rhs_bgn; this.tag_rhs_end = tag_rhs_end; this.name_bgn = name_bgn; this.name_end = name_end;
|
||||
if (atrs_idx > 0) {
|
||||
atrs = new int[atrs_idx];
|
||||
for (int i = 0; i < atrs_idx; i++)
|
||||
atrs[i] = cur_atrs[i];
|
||||
atrs_len = atrs_idx / 5;
|
||||
}
|
||||
}
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int[] Atrs() {return atrs;} private int[] atrs = Int_.Ary_empty;
|
||||
public int Atrs_len() {return atrs_len;} private int atrs_len;
|
||||
public boolean Tag_tid_is_inline() {return tag_tid_is_inline;} private boolean tag_tid_is_inline;
|
||||
public int Tag_lhs_bgn() {return tag_lhs_bgn;} public Html_nde Tag_lhs_bgn_(int v) {tag_lhs_bgn = v; return this;} private int tag_lhs_bgn;
|
||||
public int Tag_lhs_end() {return tag_lhs_end;} public Html_nde Tag_lhs_end_(int v) {tag_lhs_end = v; return this;} private int tag_lhs_end;
|
||||
public int Tag_rhs_bgn() {return tag_rhs_bgn;} public Html_nde Tag_rhs_bgn_(int v) {tag_rhs_bgn = v; return this;} private int tag_rhs_bgn;
|
||||
public int Tag_rhs_end() {return tag_rhs_end;} public Html_nde Tag_rhs_end_(int v) {tag_rhs_end = v; return this;} private int tag_rhs_end;
|
||||
public int Name_bgn() {return name_bgn;} public Html_nde Name_bgn_(int v) {name_bgn = v; return this;} private int name_bgn;
|
||||
public int Name_end() {return name_end;} public Html_nde Name_end_(int v) {name_end = v; return this;} private int name_end;
|
||||
public void Clear() {tag_lhs_bgn = tag_rhs_bgn = -1;}
|
||||
public String Atrs_val_by_key_str(String find_key_str) {return String_.new_u8(Atrs_val_by_key_bry(Bry_.new_u8(find_key_str)));}
|
||||
public byte[] Atrs_val_by_key_bry(byte[] find_key_bry) {
|
||||
for (int i = 0; i < atrs_len; i ++) {
|
||||
int atrs_idx = i * 5;
|
||||
int atr_key_bgn = atrs[atrs_idx + 1];
|
||||
int atr_key_end = atrs[atrs_idx + 2];
|
||||
if (Bry_.Match(src, atr_key_bgn, atr_key_end, find_key_bry))
|
||||
return Atrs_vals_by_pos(src, atrs[atrs_idx + 0], atrs[atrs_idx + 3], atrs[atrs_idx + 4]);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
byte[] Atrs_vals_by_pos(byte[] src, int quote_byte, int bgn, int end) {
|
||||
Bry_bfr tmp_bfr = Bry_bfr.new_();
|
||||
boolean dirty = false;
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Backslash:
|
||||
if (!dirty) {dirty = true; tmp_bfr.Add_mid(src, bgn, i);}
|
||||
++i;
|
||||
tmp_bfr.Add_byte(src[i]);
|
||||
break;
|
||||
default:
|
||||
if (b == quote_byte) {
|
||||
byte next_byte = src[i + 1];
|
||||
if (next_byte == b) {
|
||||
if (!dirty) {dirty = true; tmp_bfr.Add_mid(src, bgn, i);}
|
||||
++i;
|
||||
tmp_bfr.Add_byte(src[i]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (dirty)
|
||||
tmp_bfr.Add_byte(b);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return dirty ? tmp_bfr.Xto_bry_and_clear() : Bry_.Mid(src, bgn, end);
|
||||
}
|
||||
public byte[] Data(byte[] src) {
|
||||
return Bry_.Mid(src, tag_lhs_end, tag_rhs_bgn);
|
||||
}
|
||||
}
|
||||
// class Xoh_atr {
|
||||
// public byte[] Key_bry() {return key_bry;} private byte[] key_bry;
|
||||
// public byte[] Val_bry() {return val_bry;} private byte[] val_bry;
|
||||
// public int Key_bgn() {return key_bgn;} private int key_bgn;
|
||||
// public int Key_end() {return key_end;} private int key_end;
|
||||
// public int Val_bgn() {return val_bgn;} private int val_bgn;
|
||||
// public int Val_end() {return val_end;} private int val_end;
|
||||
// public byte Val_quote_tid() {return val_quote_tid;} private byte val_quote_tid;
|
||||
// }
|
||||
165
400_xowa/src/gplx/langs/htmls/Html_parser.java
Normal file
165
400_xowa/src/gplx/langs/htmls/Html_parser.java
Normal file
@@ -0,0 +1,165 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.brys.*;
|
||||
public class Html_parser {
|
||||
public Html_parser() {
|
||||
Bry_bldr bry_bldr = new Bry_bldr();
|
||||
bry_xnde_name = bry_bldr.New_256().Set_rng_xml_identifier(Scan_valid).Set_rng_ws(Scan_stop).Val();
|
||||
bry_atr_key = bry_bldr.New_256().Set_rng_xml_identifier(Scan_valid).Set_rng_ws(Scan_stop).Set_many(Scan_stop, Byte_ascii.Eq).Val();
|
||||
}
|
||||
byte[] src; int pos, end; byte[] bry_xnde_name, bry_atr_key;
|
||||
int cur_atrs_idx = 0; int[] cur_atrs = new int[250];// define max of 50 atrs;
|
||||
public Html_nde[] Parse_as_ary(byte[] src) {return Parse_as_ary(src, 0, src.length, Wildcard, Wildcard);}
|
||||
public Html_nde[] Parse_as_ary(byte[] src, int bgn, int end) {return Parse_as_ary(src, bgn, end, Wildcard, Wildcard);}
|
||||
public Html_nde[] Parse_as_ary(byte[] src, int bgn, int end, byte[] find_key, byte[] find_val) { // flattens html into a list of hndes; only used for Options
|
||||
this.src = src; pos = bgn; this.end = end;
|
||||
List_adp rv = List_adp_.new_();
|
||||
while (pos < end) {
|
||||
byte b = src[pos++];
|
||||
switch (b) {
|
||||
case Byte_ascii.Lt:
|
||||
if (xnde_init) {
|
||||
if (Parse_xnde_lhs()) {
|
||||
if (tag_tid_is_inline)
|
||||
rv.Add(new Html_nde(src, tag_tid_is_inline, cur_lhs_bgn, cur_lhs_end, cur_rhs_bgn, pos, cur_name_bgn, cur_name_end, cur_atrs, cur_atrs_idx));
|
||||
else
|
||||
xnde_init = false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (Parse_xnde_rhs()) {
|
||||
rv.Add(new Html_nde(src, tag_tid_is_inline, cur_lhs_bgn, cur_lhs_end, cur_rhs_bgn, pos, cur_name_bgn, cur_name_end, cur_atrs, cur_atrs_idx));
|
||||
}
|
||||
xnde_init = true;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (Html_nde[])rv.To_ary(Html_nde.class);
|
||||
}
|
||||
int cur_lhs_bgn, cur_lhs_end, cur_name_bgn, cur_name_end, cur_rhs_bgn; boolean xnde_init = true, tag_tid_is_inline = false;
|
||||
private boolean Parse_xnde_rhs() {
|
||||
cur_rhs_bgn = pos - 1; // -1 b/c "<" is already read
|
||||
byte b = src[pos];
|
||||
if (b != Byte_ascii.Slash) return false;
|
||||
++pos;
|
||||
int name_len = cur_name_end - cur_name_bgn;
|
||||
if (pos + name_len >= end) return false;
|
||||
if (!Bry_.Match(src, pos, pos + name_len, src, cur_name_bgn, cur_name_end)) return false;
|
||||
pos += name_len;
|
||||
if (src[pos] != Byte_ascii.Gt) return false;
|
||||
++pos;
|
||||
return true;
|
||||
}
|
||||
private boolean Parse_xnde_lhs() {
|
||||
cur_atrs_idx = 0;
|
||||
cur_lhs_bgn = pos - 1;
|
||||
cur_name_bgn = pos;
|
||||
tag_tid_is_inline = false;
|
||||
byte rslt = Skip_while_valid(this.bry_atr_key);
|
||||
if (rslt == Scan_invalid) return false;
|
||||
cur_name_end = pos;
|
||||
int key_bgn, key_end, val_bgn, quote_type;
|
||||
while (true) {
|
||||
if (pos >= end) return false;
|
||||
key_bgn = key_end = val_bgn = quote_type = -1;
|
||||
Skip_ws();
|
||||
byte b = src[pos];
|
||||
if (b == Byte_ascii.Slash) {
|
||||
++pos;
|
||||
if (pos == end) return false;
|
||||
byte next = src[pos];
|
||||
if (next == Byte_ascii.Gt) {
|
||||
tag_tid_is_inline = true;
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
else return false; // NOTE: don't consume byte b/c false
|
||||
}
|
||||
else if (b == Byte_ascii.Gt) {
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
key_bgn = pos;
|
||||
rslt = Skip_while_valid(this.bry_atr_key);
|
||||
if (rslt == Scan_invalid) return false;
|
||||
key_end = pos;
|
||||
Skip_ws();
|
||||
if (src[pos++] != Byte_ascii.Eq) return false;
|
||||
Skip_ws();
|
||||
byte quote_byte = src[pos];
|
||||
switch (quote_byte) {
|
||||
case Byte_ascii.Quote: quote_type = quote_byte; break;
|
||||
case Byte_ascii.Apos: quote_type = quote_byte; break;
|
||||
default: return false;
|
||||
}
|
||||
val_bgn = ++pos; // ++pos: start val after quote
|
||||
if (!Skip_to_quote_end(quote_byte)) return false;
|
||||
cur_atrs[cur_atrs_idx + 0] = quote_type;
|
||||
cur_atrs[cur_atrs_idx + 1] = key_bgn;
|
||||
cur_atrs[cur_atrs_idx + 2] = key_end;
|
||||
cur_atrs[cur_atrs_idx + 3] = val_bgn;
|
||||
cur_atrs[cur_atrs_idx + 4] = pos - 1; // NOTE: Skip_to_quote_end positions after quote
|
||||
cur_atrs_idx += 5;
|
||||
}
|
||||
cur_lhs_end = pos;
|
||||
return true;
|
||||
}
|
||||
private void Skip_ws() {
|
||||
while (pos < end) {
|
||||
switch (src[pos]) {
|
||||
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
|
||||
++pos;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
boolean Skip_to_quote_end(byte v) {
|
||||
while (pos < end) {
|
||||
byte b = src[pos++];
|
||||
if (b == v) {
|
||||
if (pos == end) return false;
|
||||
byte next = src[pos];
|
||||
if (next != v) return true;
|
||||
else ++pos;
|
||||
}
|
||||
else if (b == Byte_ascii.Backslash) {
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
byte Skip_while_valid(byte[] comp) {
|
||||
while (pos < end) {
|
||||
byte rv = comp[src[pos]];
|
||||
if (rv == Scan_valid)
|
||||
++pos;
|
||||
else
|
||||
return rv;
|
||||
}
|
||||
return Scan_invalid;
|
||||
}
|
||||
private static final byte Scan_invalid = 0, Scan_valid = 1, Scan_stop = 2;
|
||||
public static final byte[] Wildcard = null;
|
||||
public static final String Wildcard_str = null;
|
||||
}
|
||||
53
400_xowa/src/gplx/langs/htmls/Html_parser_tst.java
Normal file
53
400_xowa/src/gplx/langs/htmls/Html_parser_tst.java
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Html_parser_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xoh_parser_fxt fxt = new Xoh_parser_fxt();
|
||||
@Test public void One() {fxt.Test_parse_find_all("<a id='id0'></a>", "id0");}
|
||||
@Test public void Many() {fxt.Test_parse_find_all("<a id='id0'></a><a id='id1'></a><a id='id2'></a>", "id0", "id1", "id2");}
|
||||
@Test public void Inline() {fxt.Test_parse_find_all("<a id='id0'/>", "id0");}
|
||||
@Test public void Mix() {fxt.Test_parse_find_all("012<a id='id0'></a>id=id2<a id='id1'/>345<a id='id2'></a>abc", "id0", "id1", "id2");}
|
||||
@Test public void Quote_double() {fxt.Test_parse_find_all("<a id='id''0'/>", "id'0");}
|
||||
@Test public void Quote_escape() {fxt.Test_parse_find_all("<a id='id\\'0'/>", "id'0");}
|
||||
}
|
||||
class Xoh_parser_fxt {
|
||||
public void Clear() {
|
||||
if (parser == null) {
|
||||
parser = new Html_parser();
|
||||
}
|
||||
} private Html_parser parser;
|
||||
public Xoh_parser_fxt Test_parse_find_all(String raw_str, String... expd) {return Test_parse_find(raw_str, Html_parser.Wildcard_str, Html_parser.Wildcard_str, expd);}
|
||||
public Xoh_parser_fxt Test_parse_find(String raw_str, String find_key, String find_val, String... expd) {
|
||||
byte[] raw = Bry_.new_a7(raw_str);
|
||||
Html_nde[] actl_ndes = parser.Parse_as_ary(raw, 0, raw.length, Bry_.new_a7(find_key), Bry_.new_a7(find_val));
|
||||
String[] actl = Xto_ids(raw, actl_ndes);
|
||||
Tfds.Eq_ary_str(expd, actl);
|
||||
return this;
|
||||
}
|
||||
private String[] Xto_ids(byte[] src, Html_nde[] ary) {
|
||||
int len = ary.length;
|
||||
String[] rv = new String[len];
|
||||
for (int i = 0; i < len; i++) {
|
||||
Html_nde itm = ary[i];
|
||||
String atr_val = itm.Atrs_val_by_key_str("id");
|
||||
rv[i] = atr_val;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
40
400_xowa/src/gplx/langs/htmls/Html_selecter.java
Normal file
40
400_xowa/src/gplx/langs/htmls/Html_selecter.java
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Html_selecter {
|
||||
public static Html_nde[] Select(byte[] src, Html_nde[] ary, Hash_adp_bry hash) {
|
||||
List_adp list = List_adp_.new_();
|
||||
int xndes_len = ary.length;
|
||||
for (int i = 0; i < xndes_len; i++) {
|
||||
Html_nde hnde = ary[i];
|
||||
int[] atrs = hnde.Atrs();
|
||||
int atrs_len = atrs.length;
|
||||
for (int j = 0; j < atrs_len; j += 5) {
|
||||
int atr_key_bgn = atrs[j + 1];
|
||||
int atr_key_end = atrs[j + 2];
|
||||
if (hash.Get_by_mid(src, atr_key_bgn, atr_key_end) != null) {
|
||||
list.Add(hnde);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Html_nde[] rv = (Html_nde[])list.To_ary(Html_nde.class);
|
||||
list.Clear();
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
59
400_xowa/src/gplx/langs/htmls/Html_tag_.java
Normal file
59
400_xowa/src/gplx/langs/htmls/Html_tag_.java
Normal file
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Html_tag_ {
|
||||
public static final byte[]
|
||||
Ul_name_bry = Bry_.new_a7("ul")
|
||||
, A_name_bry = Bry_.new_a7("a")
|
||||
, Code_name_bry = Bry_.new_a7("code")
|
||||
, Tr_name_bry = Bry_.new_a7("tr")
|
||||
, Td_name_bry = Bry_.new_a7("td")
|
||||
, Table_name_bry = Bry_.new_a7("table")
|
||||
;
|
||||
public static final byte[]
|
||||
Br_inl = Bry_.new_a7("<br/>")
|
||||
, Hr_inl = Bry_.new_a7("<hr/>")
|
||||
, Body_lhs = Bry_.new_a7("<body>") , Body_rhs = Bry_.new_a7("</body>")
|
||||
, B_lhs = Bry_.new_a7("<b>") , B_rhs = Bry_.new_a7("</b>")
|
||||
, I_lhs = Bry_.new_a7("<i>") , I_rhs = Bry_.new_a7("</i>")
|
||||
, P_lhs = Bry_.new_a7("<p>") , P_rhs = Bry_.new_a7("</p>")
|
||||
, Div_lhs = Bry_.new_a7("<div>") , Div_rhs = Bry_.new_a7("</div>")
|
||||
, Html_rhs = Bry_.new_a7("</html>")
|
||||
, Head_lhs_bgn = Bry_.new_a7("<head")
|
||||
, Head_rhs = Bry_.new_a7("</head>")
|
||||
, Style_lhs_w_type = Bry_.new_a7("<style type=\"text/css\">")
|
||||
, Style_rhs = Bry_.new_a7("</style>")
|
||||
, Script_lhs = Bry_.new_a7("<script>")
|
||||
, Script_lhs_w_type = Bry_.new_a7("<script type='text/javascript'>")
|
||||
, Script_rhs = Bry_.new_a7("</script>")
|
||||
, Span_rhs = Bry_.new_a7("</span>")
|
||||
;
|
||||
|
||||
public static final String
|
||||
Comm_bgn_str = "<!--"
|
||||
, Comm_end_str = "-->"
|
||||
, Anchor_str = "#"
|
||||
;
|
||||
public static final byte[]
|
||||
Comm_bgn = Bry_.new_a7(Comm_bgn_str), Comm_end = Bry_.new_a7(Comm_end_str)
|
||||
;
|
||||
public static final int
|
||||
Comm_bgn_len = Comm_bgn.length
|
||||
, Comm_end_len = Comm_end.length
|
||||
;
|
||||
}
|
||||
180
400_xowa/src/gplx/langs/htmls/Html_utl.java
Normal file
180
400_xowa/src/gplx/langs/htmls/Html_utl.java
Normal file
@@ -0,0 +1,180 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.langs.htmls.encoders.*;
|
||||
public class Html_utl {
|
||||
private static final Url_encoder encoder_id = Url_encoder.new_html_id_(); private static final Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
|
||||
public static String Encode_id_as_str(byte[] key) {return String_.new_u8(Encode_id_as_bry(key));}
|
||||
public static byte[] Encode_id_as_bry(byte[] key) {
|
||||
byte[] escaped = Escape_html_as_bry(tmp_bfr, key, Bool_.N, Bool_.N, Bool_.N, Bool_.Y, Bool_.Y);
|
||||
return encoder_id.Encode(escaped);
|
||||
}
|
||||
public static String Escape_for_atr_val_as_str(Bry_bfr bfr, byte quote_byte, String s) {return String_.new_u8(Escape_for_atr_val_as_bry(bfr, quote_byte, s));}
|
||||
public static byte[] Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, String s) {
|
||||
if (s == null) return null;
|
||||
return Escape_for_atr_val_as_bry(bfr, quote_byte, Bry_.new_u8(s));
|
||||
}
|
||||
public static byte[] Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, byte[] bry) {
|
||||
if (bry == null) return null;
|
||||
boolean dirty = Escape_for_atr_val_as_bry(bfr, quote_byte, bry, 0, bry.length);
|
||||
return dirty ? bfr.Xto_bry_and_clear() : bry;
|
||||
}
|
||||
public static boolean Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, byte[] src, int bgn, int end) {
|
||||
boolean dirty = false;
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = src[i];
|
||||
if (b == quote_byte) {
|
||||
if (!dirty) {
|
||||
bfr.Add_mid(src, bgn, i);
|
||||
dirty = true;
|
||||
}
|
||||
switch (quote_byte) {
|
||||
case Byte_ascii.Apos: bfr.Add(Html_entity_.Apos_num_bry); break;
|
||||
case Byte_ascii.Quote: bfr.Add(Html_entity_.Quote_bry); break;
|
||||
default: throw Err_.new_unhandled(quote_byte);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (dirty)
|
||||
bfr.Add_byte(b);
|
||||
}
|
||||
}
|
||||
return dirty;
|
||||
}
|
||||
public static String Escape_html_as_str(String v) {return String_.new_u8(Escape_html_as_bry(Bry_.new_u8(v)));}
|
||||
public static byte[] Escape_html_as_bry(Bry_bfr tmp, byte[] bry) {return Escape_html(false, tmp, bry, 0, bry.length, true, true, true, true, true);}
|
||||
public static byte[] Escape_html_as_bry(byte[] bry) {return Escape_html(false, tmp_bfr, bry, 0, bry.length, true, true, true, true, true);}
|
||||
public static byte[] Escape_html_as_bry(byte[] bry, boolean lt, boolean gt, boolean amp, boolean quote, boolean apos)
|
||||
{return Escape_html(false, tmp_bfr, bry, 0, bry.length, lt, gt, amp, quote, apos);}
|
||||
public static byte[] Escape_html_as_bry(Bry_bfr bfr, byte[] bry, boolean lt, boolean gt, boolean amp, boolean quote, boolean apos)
|
||||
{return Escape_html(false, bfr, bry, 0, bry.length, lt, gt, amp, quote, apos);}
|
||||
public static void Escape_html_to_bfr(Bry_bfr bfr, byte[] bry, int bgn, int end, boolean escape_lt, boolean escape_gt, boolean escape_amp, boolean escape_quote, boolean escape_apos) {
|
||||
Escape_html(true, bfr, bry, bgn, end, escape_lt, escape_gt, escape_amp, escape_quote, escape_apos);
|
||||
}
|
||||
private static byte[] Escape_html(boolean write_to_bfr, Bry_bfr bfr, byte[] bry, int bgn, int end, boolean escape_lt, boolean escape_gt, boolean escape_amp, boolean escape_quote, boolean escape_apos) {
|
||||
if (bry == null) return null;
|
||||
boolean dirty = write_to_bfr ? true : false; // if write_to_bfr, then mark true, else bfr.Add_mid(bry, 0, i); will write whole bry again
|
||||
byte[] escaped = null;
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = bry[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Lt: if (escape_lt) escaped = Html_entity_.Lt_bry; break;
|
||||
case Byte_ascii.Gt: if (escape_gt) escaped = Html_entity_.Gt_bry; break;
|
||||
case Byte_ascii.Amp: if (escape_amp) escaped = Html_entity_.Amp_bry; break;
|
||||
case Byte_ascii.Quote: if (escape_quote) escaped = Html_entity_.Quote_bry; break;
|
||||
case Byte_ascii.Apos: if (escape_apos) escaped = Html_entity_.Apos_num_bry; break;
|
||||
default:
|
||||
if (dirty || write_to_bfr)
|
||||
bfr.Add_byte(b);
|
||||
continue;
|
||||
}
|
||||
// handle lt, gt, amp, quote; everything else handled by default: continue above
|
||||
if (escaped == null) { // handle do-not-escape calls; EX: Escape(y, y, n, y);
|
||||
if (dirty || write_to_bfr)
|
||||
bfr.Add_byte(b);
|
||||
}
|
||||
else {
|
||||
if (!dirty) {
|
||||
bfr.Add_mid(bry, bgn, i);
|
||||
dirty = true;
|
||||
}
|
||||
bfr.Add(escaped);
|
||||
escaped = null;
|
||||
}
|
||||
}
|
||||
if (write_to_bfr)
|
||||
return null;
|
||||
else
|
||||
return dirty ? bfr.Xto_bry_and_clear() : bry;
|
||||
}
|
||||
|
||||
private static final Btrie_slim_mgr unescape_trie = Btrie_slim_mgr.ci_a7()
|
||||
.Add_bry_byte(Html_entity_.Lt_bry , Byte_ascii.Lt)
|
||||
.Add_bry_byte(Html_entity_.Gt_bry , Byte_ascii.Gt)
|
||||
.Add_bry_byte(Html_entity_.Amp_bry , Byte_ascii.Amp)
|
||||
.Add_bry_byte(Html_entity_.Quote_bry , Byte_ascii.Quote)
|
||||
.Add_bry_byte(Html_entity_.Apos_num_bry , Byte_ascii.Apos)
|
||||
;
|
||||
public static String Unescape_as_str(String src) {
|
||||
Bry_bfr bfr = Bry_bfr.reset_(255);
|
||||
byte[] bry = Bry_.new_u8(src);
|
||||
Unescape(Bool_.Y, bfr, bry, 0, bry.length, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y);
|
||||
return bfr.Xto_str_and_clear();
|
||||
}
|
||||
public static byte[] Unescape(boolean write_to_bfr, Bry_bfr bfr, byte[] bry, int bgn, int end, boolean escape_lt, boolean escape_gt, boolean escape_amp, boolean escape_quote, boolean escape_apos) {
|
||||
if (bry == null) return null;
|
||||
boolean dirty = write_to_bfr ? true : false; // if write_to_bfr, then mark true, else bfr.Add_mid(bry, 0, i); will write whole bry again
|
||||
int pos = bgn;
|
||||
while (pos < end) {
|
||||
byte b = bry[pos];
|
||||
Object o = unescape_trie.Match_bgn_w_byte(b, bry, pos, end);
|
||||
if (o == null) {
|
||||
if (dirty || write_to_bfr)
|
||||
bfr.Add_byte(b);
|
||||
++pos;
|
||||
}
|
||||
else {
|
||||
Byte_obj_val unescaped_bval = (Byte_obj_val)o;
|
||||
byte unescaped_byte = unescaped_bval.Val();
|
||||
boolean unescape = false;
|
||||
switch (unescaped_byte) {
|
||||
case Byte_ascii.Lt: if (escape_lt) unescape = true; break;
|
||||
case Byte_ascii.Gt: if (escape_gt) unescape = true; break;
|
||||
case Byte_ascii.Amp: if (escape_amp) unescape = true; break;
|
||||
case Byte_ascii.Quote: if (escape_quote) unescape = true; break;
|
||||
case Byte_ascii.Apos: if (escape_apos) unescape = true; break;
|
||||
}
|
||||
if (unescape) {
|
||||
if (!dirty) {
|
||||
bfr.Add_mid(bry, bgn, pos);
|
||||
dirty = true;
|
||||
}
|
||||
bfr.Add_byte(unescaped_byte);
|
||||
}
|
||||
else {
|
||||
if (dirty || write_to_bfr)
|
||||
bfr.Add_byte(b);
|
||||
}
|
||||
pos = unescape_trie.Match_pos();
|
||||
}
|
||||
}
|
||||
if (write_to_bfr)
|
||||
return null;
|
||||
else
|
||||
return dirty ? bfr.Xto_bry_and_clear() : bry;
|
||||
}
|
||||
public static byte[] Del_comments(Bry_bfr bfr, byte[] src) {return Del_comments(bfr, src, 0, src.length);}
|
||||
public static byte[] Del_comments(Bry_bfr bfr, byte[] src, int pos, int end) {
|
||||
while (true) {
|
||||
if (pos >= end) break;
|
||||
int comm_bgn = Bry_find_.Find_fwd(src, Html_tag_.Comm_bgn, pos); // look for <!--
|
||||
if (comm_bgn == Bry_find_.Not_found) { // not found; consume rest
|
||||
bfr.Add_mid(src, pos, end);
|
||||
break;
|
||||
}
|
||||
int comm_end = Bry_find_.Find_fwd(src, Html_tag_.Comm_end, comm_bgn + Html_tag_.Comm_bgn_len); // look for -->
|
||||
if (comm_end == Bry_find_.Not_found) { // not found; consume rest
|
||||
bfr.Add_mid(src, pos, end);
|
||||
break;
|
||||
}
|
||||
bfr.Add_mid(src, pos, comm_bgn); // add everything between pos and comm_bgn
|
||||
pos = comm_end + Html_tag_.Comm_end_len; // reposition pos after comm_end
|
||||
}
|
||||
return bfr.Xto_bry_and_clear();
|
||||
}
|
||||
}
|
||||
62
400_xowa/src/gplx/langs/htmls/Html_utl_tst.java
Normal file
62
400_xowa/src/gplx/langs/htmls/Html_utl_tst.java
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
import org.junit.*;
|
||||
public class Html_utl_tst {
|
||||
@Before public void init() {fxt.Clear();} private Html_utl_fxt fxt = new Html_utl_fxt();
|
||||
@Test public void Basic() {fxt.Test_del_comments("a<!-- b -->c" , "ac");}
|
||||
@Test public void Bgn_missing() {fxt.Test_del_comments("a b c" , "a b c");}
|
||||
@Test public void End_missing() {fxt.Test_del_comments("a<!-- b c" , "a<!-- b c");}
|
||||
@Test public void Multiple() {fxt.Test_del_comments("a<!--b-->c<!--d-->e" , "ace");}
|
||||
@Test public void Escape() {
|
||||
fxt.Test_escape_html(Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, "a<b" , "a<b"); // basic
|
||||
fxt.Test_escape_html(Bool_.Y, Bool_.Y, Bool_.N, Bool_.Y, Bool_.Y, "a<&b" , "a<&b"); // fix: & not escaped when <> present
|
||||
fxt.Test_escape_html(Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, "a<>'&\"b" , "a<>'&"b");
|
||||
}
|
||||
@Test public void Escape_for_atr_val() {
|
||||
fxt.Test_escape_for_atr("abc" , Bool_.N, "abc"); // basic
|
||||
fxt.Test_escape_for_atr("a'\"b" , Bool_.Y, "a'\"b"); // quote is '
|
||||
fxt.Test_escape_for_atr("a'\"b" , Bool_.N, "a'"b"); // quote is "
|
||||
}
|
||||
@Test public void Unescape() {
|
||||
fxt.Test_unescape_html(Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, "a<>'&"b" , "a<>'&\"b"); // basic
|
||||
}
|
||||
}
|
||||
class Html_utl_fxt {
|
||||
private Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
|
||||
public void Clear() {
|
||||
tmp_bfr.Clear();
|
||||
}
|
||||
public void Test_del_comments(String src, String expd) {
|
||||
byte[] actl = Html_utl.Del_comments(tmp_bfr, Bry_.new_u8(src));
|
||||
Tfds.Eq(expd, String_.new_a7(actl));
|
||||
}
|
||||
public void Test_escape_html(boolean lt, boolean gt, boolean amp, boolean quote, boolean apos, String src, String expd) {
|
||||
byte[] actl = Html_utl.Escape_html_as_bry(Bry_.new_a7(src), lt, gt, amp, quote, apos);
|
||||
Tfds.Eq(expd, String_.new_a7(actl));
|
||||
}
|
||||
public void Test_escape_for_atr(String src, boolean quote_is_apos, String expd) {
|
||||
byte[] actl = Html_utl.Escape_for_atr_val_as_bry(tmp_bfr, quote_is_apos ? Byte_ascii.Apos : Byte_ascii.Quote, src);
|
||||
Tfds.Eq(expd, String_.new_u8(actl));
|
||||
}
|
||||
public void Test_unescape_html(boolean lt, boolean gt, boolean amp, boolean quote, boolean apos, String src, String expd) {
|
||||
byte[] bry = Bry_.new_u8(src);
|
||||
byte[] actl = Html_utl.Unescape(false, tmp_bfr, bry, 0, bry.length, lt, gt, amp, quote, apos);
|
||||
Tfds.Eq(expd, String_.new_a7(actl));
|
||||
}
|
||||
}
|
||||
107
400_xowa/src/gplx/langs/htmls/Html_wtr.java
Normal file
107
400_xowa/src/gplx/langs/htmls/Html_wtr.java
Normal file
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
public class Html_wtr {
|
||||
private Bry_bfr bfr = Bry_bfr.reset_(255);
|
||||
private List_adp nde_stack = List_adp_.new_();
|
||||
public byte Atr_quote() {return atr_quote;} public Html_wtr Atr_quote_(byte v) {atr_quote = v; return this;} private byte atr_quote = Byte_ascii.Quote;
|
||||
public Html_wtr Nde_full_atrs(byte[] tag, byte[] text, boolean text_escape, byte[]... atrs) {
|
||||
Nde_bgn(tag);
|
||||
int atrs_len = atrs.length;
|
||||
for (int i = 0; i < atrs_len; i += 2) {
|
||||
byte[] key = atrs[i];
|
||||
byte[] val = atrs[i + 1];
|
||||
Atr(key, val);
|
||||
}
|
||||
Nde_end_hdr();
|
||||
if (text_escape)
|
||||
Txt(text);
|
||||
else
|
||||
bfr.Add(text);
|
||||
Nde_end();
|
||||
return this;
|
||||
}
|
||||
public Html_wtr Nde_full(byte[] tag, byte[] text) {
|
||||
Nde_bgn_hdr(tag);
|
||||
Txt(text);
|
||||
Nde_end();
|
||||
return this;
|
||||
}
|
||||
public Html_wtr Txt_mid(byte[] src, int bgn, int end) {bfr.Add_mid(src, bgn, end); return this;}
|
||||
public Html_wtr Txt_byte(byte v) {bfr.Add_byte(v); return this;}
|
||||
public Html_wtr Txt_raw(byte[] v) {bfr.Add(v); return this;}
|
||||
public Html_wtr Txt(byte[] v) {
|
||||
if (v != null) {
|
||||
bfr.Add(Html_utl.Escape_html_as_bry(v));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public Html_wtr Nde_bgn_hdr(byte[] name) {
|
||||
this.Nde_bgn(name);
|
||||
this.Nde_end_hdr();
|
||||
return this;
|
||||
}
|
||||
public Html_wtr Nde_bgn(byte[] name) {
|
||||
bfr.Add_byte(Byte_ascii.Lt);
|
||||
bfr.Add(name);
|
||||
nde_stack.Add(name);
|
||||
return this;
|
||||
}
|
||||
public Html_wtr Atr(byte[] key, byte[] val) {
|
||||
Write_atr_bry(bfr, Bool_.Y, atr_quote, key, val);
|
||||
return this;
|
||||
}
|
||||
public Html_wtr Nde_end_inline() {
|
||||
bfr.Add_byte(Byte_ascii.Slash).Add_byte(Byte_ascii.Gt);
|
||||
List_adp_.Pop_last(nde_stack);
|
||||
return this;
|
||||
}
|
||||
public Html_wtr Nde_end_hdr() {
|
||||
bfr.Add_byte(Byte_ascii.Gt);
|
||||
return this;
|
||||
}
|
||||
public Html_wtr Nde_end() {
|
||||
byte[] name = (byte[])List_adp_.Pop_last(nde_stack);
|
||||
bfr.Add_byte(Byte_ascii.Lt).Add_byte(Byte_ascii.Slash);
|
||||
bfr.Add(name);
|
||||
bfr.Add_byte(Byte_ascii.Gt);
|
||||
return this;
|
||||
}
|
||||
public byte[] Xto_bry_and_clear() {return bfr.Xto_bry_and_clear();}
|
||||
public byte[] Xto_bry() {return bfr.Xto_bry();}
|
||||
public String Xto_str() {return bfr.Xto_str();}
|
||||
public static void Write_atr_bry(Bry_bfr bfr, byte[] key, byte[] val) {Write_atr_bry(bfr, Bool_.Y, Byte_ascii.Quote, key, val);}
|
||||
public static void Write_atr_bry(Bry_bfr bfr, boolean write_space, byte atr_quote, byte[] key, byte[] val) {
|
||||
if (Bry_.Len_eq_0(val)) return; // don't write empty
|
||||
if (write_space) bfr.Add_byte_space();
|
||||
bfr.Add(key);
|
||||
bfr.Add_byte(Byte_ascii.Eq);
|
||||
bfr.Add_byte(atr_quote);
|
||||
Html_utl.Escape_html_to_bfr(bfr, val, 0, val.length, false, false, false, true, true);
|
||||
bfr.Add_byte(atr_quote);
|
||||
}
|
||||
public static void Write_atr_int(Bry_bfr bfr, byte[] key, int val) {Write_atr_int(bfr, Bool_.Y, Byte_ascii.Quote, key, val);}
|
||||
public static void Write_atr_int(Bry_bfr bfr, boolean write_space, byte atr_quote, byte[] key, int val) {
|
||||
if (write_space) bfr.Add_byte_space();
|
||||
bfr.Add(key);
|
||||
bfr.Add_byte(Byte_ascii.Eq);
|
||||
bfr.Add_byte(atr_quote);
|
||||
bfr.Add_int_variable(val);
|
||||
bfr.Add_byte(atr_quote);
|
||||
}
|
||||
}
|
||||
307
400_xowa/src/gplx/langs/htmls/encoders/Url_encoder.java
Normal file
307
400_xowa/src/gplx/langs/htmls/encoders/Url_encoder.java
Normal file
@@ -0,0 +1,307 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.parsers.amps.*;
|
||||
public class Url_encoder implements Url_encoder_interface {
|
||||
private Url_encoder_itm[] encode_ary = new Url_encoder_itm[256], decode_ary = new Url_encoder_itm[256];
|
||||
private Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
|
||||
private Url_encoder anchor_encoder = null;
|
||||
private Object thread_lock = new Object();
|
||||
public void Itms_ini(byte primary_encode_marker) {
|
||||
Url_encoder_itm_hex hex = new Url_encoder_itm_hex(primary_encode_marker);
|
||||
for (int i = 0; i < 256; i++) {
|
||||
encode_ary[i] = hex; // default encode to hex
|
||||
decode_ary[i] = Url_encoder_itm_same._; // default decode to same; needed for files; EX: A!%21.png -> A!!.png;
|
||||
}
|
||||
decode_ary[primary_encode_marker] = hex;
|
||||
}
|
||||
public void Itms_raw_diff_many(byte primary_encode_marker, int... ary) {
|
||||
Url_encoder_itm_hex hex = new Url_encoder_itm_hex(primary_encode_marker);
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
encode_ary[ary[i]] = hex;
|
||||
decode_ary[ary[i]] = hex;
|
||||
}
|
||||
decode_ary[primary_encode_marker] = hex;
|
||||
}
|
||||
public void Itms_decode_marker(byte decode_marker) {
|
||||
Url_encoder_itm_hex hex = new Url_encoder_itm_hex(decode_marker);
|
||||
decode_ary[decode_marker & 0xff] = hex;// PATCH.JAVA:need to convert to unsigned byte
|
||||
}
|
||||
public void Itms_decode_diff(byte orig, byte repl) {
|
||||
decode_ary[orig & 0xff] = new Url_encoder_itm_diff(orig, repl);// PATCH.JAVA:need to convert to unsigned byte
|
||||
}
|
||||
public void Itms_raw_same_rng(int bgn, int end) {
|
||||
for (int i = bgn; i <= end; i++) {
|
||||
encode_ary[i] = Url_encoder_itm_same._;
|
||||
decode_ary[i] = Url_encoder_itm_same._;
|
||||
}
|
||||
}
|
||||
public Url_encoder Itms_raw_same_many(int... ary) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
encode_ary[ary[i]] = Url_encoder_itm_same._;
|
||||
decode_ary[ary[i]] = Url_encoder_itm_same._;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public void Itms_raw_html_ent(byte src, Btrie_slim_mgr trie) {
|
||||
Url_encoder_itm_html_ent itm = new Url_encoder_itm_html_ent(trie);
|
||||
encode_ary[src] = itm;
|
||||
}
|
||||
public Url_encoder Itms_raw_diff(byte src, byte trg) {
|
||||
Url_encoder_itm_diff itm = new Url_encoder_itm_diff(src, trg);
|
||||
encode_ary[src] = itm;
|
||||
decode_ary[trg] = itm;
|
||||
return this;
|
||||
}
|
||||
public byte[] Encode_http(Io_url url) {
|
||||
synchronized (thread_lock) {
|
||||
tmp_bfr.Add(Io_url.Http_file_bry);
|
||||
Encode(tmp_bfr, url.RawBry());
|
||||
return tmp_bfr.Xto_bry_and_clear();
|
||||
}
|
||||
}
|
||||
public String Encode_str(String str) {
|
||||
synchronized (thread_lock) {
|
||||
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.Xto_str_and_clear();
|
||||
}
|
||||
}
|
||||
public byte[] Encode_bry(String str) {
|
||||
synchronized (thread_lock) {
|
||||
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.Xto_bry_and_clear();
|
||||
}
|
||||
}
|
||||
public byte[] Encode(byte[] bry) {Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.Xto_bry_and_clear();}
|
||||
public Bry_bfr Encode(Bry_bfr bfr, byte[] bry) {Encode(bfr, bry, 0, bry.length); return bfr;}
|
||||
public void Encode(Bry_bfr bfr, byte[] bry, int bgn, int end) {
|
||||
synchronized (thread_lock) {
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = bry[i];
|
||||
if (anchor_encoder != null && b == Byte_ascii.Hash) {
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
anchor_encoder.Encode(bfr, bry, i + 1, end);
|
||||
break;
|
||||
}
|
||||
Url_encoder_itm itm = encode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
|
||||
i += itm.Encode(bfr, bry, end, i, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
public String Decode_str(String str) {
|
||||
synchronized (thread_lock) {
|
||||
byte[] bry = Bry_.new_u8(str); Decode(bry, 0, bry.length, tmp_bfr, true); return tmp_bfr.Xto_str_and_clear();
|
||||
}
|
||||
}
|
||||
public byte[] Decode(byte[] bry) {return Decode(tmp_bfr, bry, 0, bry.length);}
|
||||
public byte[] Decode(byte[] bry, int bgn, int end) {return Decode(tmp_bfr, bry, bgn, end);}
|
||||
public byte[] Decode(Bry_bfr bfr, byte[] bry, int bgn, int end) {Decode(bry, bgn, end, bfr , false); return bfr.Xto_bry_and_clear();}
|
||||
public byte[] Decode_lax(byte[] bry) {
|
||||
synchronized (thread_lock) {
|
||||
Decode(bry, 0, bry.length, tmp_bfr, false); return tmp_bfr.Xto_bry_and_clear();
|
||||
}
|
||||
}
|
||||
public void Decode(byte[] bry, int bgn, int end, Bry_bfr bfr, boolean fail_when_invalid) {
|
||||
synchronized (thread_lock) {
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = bry[i];
|
||||
if (anchor_encoder != null && b == Byte_ascii.Hash) {
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
anchor_encoder.Decode(bry, i + 1, end, bfr, false);
|
||||
break;
|
||||
}
|
||||
Url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
|
||||
i += itm.Decode(bfr, bry, end, i, b, fail_when_invalid);
|
||||
}
|
||||
}
|
||||
}
|
||||
private static void mediawiki_base(Url_encoder rv, boolean encode_colon) {
|
||||
rv.Itms_raw_same_rng(Byte_ascii.Num_0, Byte_ascii.Num_9);
|
||||
rv.Itms_raw_same_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z);
|
||||
rv.Itms_raw_same_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z);
|
||||
rv.Itms_raw_same_many(Byte_ascii.Dash, Byte_ascii.Dot, Byte_ascii.Underline);
|
||||
if (encode_colon)
|
||||
rv.Itms_raw_same_many(Byte_ascii.Colon);
|
||||
}
|
||||
public static Url_encoder new_html_id_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Dot);
|
||||
mediawiki_base(rv, true);
|
||||
rv.Itms_decode_marker(Byte_ascii.Dot);
|
||||
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Underline);
|
||||
rv.Itms_raw_html_ent(Byte_ascii.Amp, Xop_amp_trie._);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_http_url_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
mediawiki_base(rv, false);
|
||||
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Plus);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_http_url_ttl_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
mediawiki_base(rv, true);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_http_url_space_is_space() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
mediawiki_base(rv, true);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_fsys_lnx_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
mediawiki_base(rv, true);
|
||||
rv.Itms_raw_same_many(Byte_ascii.Slash);
|
||||
rv.Itms_raw_diff(Byte_ascii.Backslash, Byte_ascii.Slash);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_fsys_wnt_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
rv.Itms_raw_same_rng(Byte_ascii.Num_0, Byte_ascii.Num_9);
|
||||
rv.Itms_raw_same_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z);
|
||||
rv.Itms_raw_same_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z);
|
||||
rv.Itms_raw_same_many
|
||||
( Byte_ascii.Bang, Byte_ascii.At, Byte_ascii.Hash, Byte_ascii.Dollar, Byte_ascii.Percent, Byte_ascii.Pow, Byte_ascii.Amp
|
||||
, Byte_ascii.Plus, Byte_ascii.Eq, Byte_ascii.Underline, Byte_ascii.Dash
|
||||
, Byte_ascii.Dot, Byte_ascii.Comma
|
||||
, Byte_ascii.Tick, Byte_ascii.Tilde, Byte_ascii.Brack_bgn, Byte_ascii.Brack_end, Byte_ascii.Curly_bgn, Byte_ascii.Curly_end);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_file_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
mediawiki_base(rv, true);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_gfs_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
rv.Itms_raw_same_many(Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Apos, Byte_ascii.Semic);
|
||||
mediawiki_base(rv, true);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_html_href_mw_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
mediawiki_base(rv, true);
|
||||
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Underline);
|
||||
rv.Itms_raw_same_many(Byte_ascii.Semic, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
|
||||
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash, Byte_ascii.Colon
|
||||
, Byte_ascii.Hash// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
|
||||
);
|
||||
rv.anchor_encoder = new_html_id_();
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_html_href_quotes_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
rv.Itms_raw_same_rng(0, 255); // default everything to same;
|
||||
rv.Itms_raw_diff_many(Byte_ascii.Percent
|
||||
, Byte_ascii.Apos, Byte_ascii.Quote, Byte_ascii.Lt, Byte_ascii.Gt); // encode ', ", <, >
|
||||
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Underline); // convert " " to "_"
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
interface Url_encoder_itm {
|
||||
int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b);
|
||||
int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid);
|
||||
}
|
||||
class Url_encoder_itm_same implements Url_encoder_itm {
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(b); return 0;}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(b); return 0;}
|
||||
public static final Url_encoder_itm _ = new Url_encoder_itm_same();
|
||||
}
|
||||
class Url_encoder_itm_diff implements Url_encoder_itm {
|
||||
public Url_encoder_itm_diff(byte orig, byte repl) {this.orig = orig; this.repl = repl;} private byte orig, repl;
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(repl); return 0;}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(orig); return 0;}
|
||||
}
|
||||
class Url_encoder_itm_hex implements Url_encoder_itm {
|
||||
public Url_encoder_itm_hex(byte encode_marker) {this.encode_marker = encode_marker;} private byte encode_marker;
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {Encode_byte(b, bfr, encode_marker); return 0;}
|
||||
public static void Encode_byte(byte b, Bry_bfr bfr, byte encode_marker) {
|
||||
int b_int = b & 0xFF;// PATCH.JAVA:need to convert to unsigned byte
|
||||
bfr.Add_byte(encode_marker);
|
||||
bfr.Add_byte(HexBytes[b_int >> 4]);
|
||||
bfr.Add_byte(HexBytes[b_int & 15]);
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {
|
||||
if (idx + 2 >= end) {
|
||||
if (fail_when_invalid) throw Err_.new_wo_type("decode needs 3 bytes", "idx", idx, "len", end, "snip", String_.new_u8(Bry_.Mid_by_len_safe(src, idx, 3)));
|
||||
else {
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
int hex_val = Int_.Xto_int_hex(src[idx + 1]);
|
||||
if (hex_val == -1) { // invalid hex byte; EX: %GC; DATE:2014-04-10
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
int v_0 = hex_val * 16;
|
||||
if (v_0 != -1) {
|
||||
int v_1 = Int_.Xto_int_hex(src[idx + 2]);
|
||||
if (v_1 != -1) {
|
||||
bfr.Add_byte((byte)(v_0 + v_1));
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
if (fail_when_invalid)
|
||||
throw Err_.new_wo_type("decode is invalid", "idx", idx, "snip", String_.new_u8(Bry_.Mid_by_len_safe(src, idx, 3)));
|
||||
else {
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
public static final byte[] HexBytes = new byte[]
|
||||
{ Byte_ascii.Num_0, Byte_ascii.Num_1, Byte_ascii.Num_2, Byte_ascii.Num_3, Byte_ascii.Num_4, Byte_ascii.Num_5, Byte_ascii.Num_6, Byte_ascii.Num_7
|
||||
, Byte_ascii.Num_8, Byte_ascii.Num_9, Byte_ascii.Ltr_A, Byte_ascii.Ltr_B, Byte_ascii.Ltr_C, Byte_ascii.Ltr_D, Byte_ascii.Ltr_E, Byte_ascii.Ltr_F
|
||||
};
|
||||
}
|
||||
class Url_encoder_itm_html_ent implements Url_encoder_itm {
|
||||
public Url_encoder_itm_html_ent(Btrie_slim_mgr amp_trie) {this.amp_trie = amp_trie;} Btrie_slim_mgr amp_trie;
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {
|
||||
++idx; // b is &; get next character afterwards
|
||||
if (idx == end) { // & is last char; return
|
||||
Url_encoder_itm_hex.Encode_byte(Byte_ascii.Amp, bfr, Byte_ascii.Dot);
|
||||
return 0;
|
||||
}
|
||||
b = src[idx];
|
||||
Object o = amp_trie.Match_bgn_w_byte(b, src, idx, end);
|
||||
if (o == null) { // unknown entity (EX:&unknown;); return &;
|
||||
Url_encoder_itm_hex.Encode_byte(Byte_ascii.Amp, bfr, Byte_ascii.Dot);
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
|
||||
byte[] bry_u8 = itm.U8_bry(); // NOTE: must utf8 encode val; EX: is 160 but must become 192,160
|
||||
for (int i = 0; i < bry_u8.length; i++)
|
||||
Url_encoder_itm_hex.Encode_byte(bry_u8[i], bfr, Byte_ascii.Dot);
|
||||
return itm.Xml_name_bry().length - 1; // -1 to ignore & in XmlEntityName
|
||||
}
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {
|
||||
bfr.Add_byte(b); return 0;
|
||||
}
|
||||
}
|
||||
30
400_xowa/src/gplx/langs/htmls/encoders/Url_encoder_mgr.java
Normal file
30
400_xowa/src/gplx/langs/htmls/encoders/Url_encoder_mgr.java
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public class Url_encoder_mgr {
|
||||
public Url_encoder File() {return file;} private final Url_encoder file = Url_encoder.new_file_();
|
||||
public Url_encoder Http_url() {return http_url;} private final Url_encoder http_url = Url_encoder.new_http_url_();
|
||||
public Url_encoder Http_url_ttl() {return http_url_ttl;} private final Url_encoder http_url_ttl = Url_encoder.new_http_url_ttl_();
|
||||
public Url_encoder Id() {return html_id;} private final Url_encoder html_id = Url_encoder.new_html_id_();
|
||||
public Url_encoder Href() {return href;} private final Url_encoder href = Url_encoder.new_html_href_mw_();
|
||||
public Url_encoder Href_quotes() {return href_quotes;} private final Url_encoder href_quotes = Url_encoder.new_html_href_quotes_();
|
||||
public Url_encoder Gfs() {return gfs;} private final Url_encoder gfs = Url_encoder.new_gfs_();
|
||||
public Url_encoder Fsys() {return fsys;} private final Url_encoder fsys = Url_encoder.new_fsys_lnx_();
|
||||
public Url_encoder Fsys_safe() {return fsys_safe;} private final Url_encoder fsys_safe = Url_encoder.new_fsys_wnt_();
|
||||
public Url_encoder Xourl() {return xourl;} private final Url_encoder xourl = Url_encoder.new_html_href_mw_().Itms_raw_same_many(Byte_ascii.Underline);
|
||||
}
|
||||
72
400_xowa/src/gplx/langs/htmls/encoders/Url_encoder_tst.java
Normal file
72
400_xowa/src/gplx/langs/htmls/encoders/Url_encoder_tst.java
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import org.junit.*;
|
||||
public class Url_encoder_tst {
|
||||
@Before public void init() {fxt = new Url_encoder_fxt();} Url_encoder_fxt fxt;
|
||||
@Test public void Id_nums() {fxt.Encoder_id().Test_encode_decode("0123456789", "0123456789");}
|
||||
@Test public void Id_ltrs_lower() {fxt.Encoder_id().Test_encode_decode("abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz");}
|
||||
@Test public void Id_ltrs_upper() {fxt.Encoder_id().Test_encode_decode("ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");}
|
||||
@Test public void Id_syms() {fxt.Encoder_id().Test_encode("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ".21.22.23.24.25.26.27.28.29.2A.2B.2C-..2F:.3B.3C.3D.3E.3F.40.5B.5C.5D.5E_.60.7B.7C.7D.7E");} // NOTE: not reversible since "." is encode_marker but not encoded
|
||||
@Test public void Id_foreign() {fxt.Encoder_id().Test_encode_decode("aéb", "a.C3.A9b");}
|
||||
@Test public void Id_space() {fxt.Encoder_id().Test_encode_decode("a b", "a_b");}
|
||||
@Test public void Id_err() {
|
||||
byte[] raw = Bry_.new_a7("0%.jpg");
|
||||
Bry_bfr tmp_bfr = Bry_bfr.new_();
|
||||
fxt.Encoder_id().Encoder().Decode(raw, 0, raw.length, tmp_bfr, false);
|
||||
Tfds.Eq("0%.jpg", tmp_bfr.Xto_str_and_clear());
|
||||
}
|
||||
@Test public void Id_nbsp() {fxt.Encoder_id().Test_encode("a b", "a.C2.A0b");} // NOTE: not just .A0 (160) but utf8-encoded .C2.A0
|
||||
@Test public void Url_syms() {fxt.Encoder_url().Test_encode_decode("!?^~", "%21%3F%5E%7E");}
|
||||
@Test public void Url_foreign() {fxt.Encoder_url().Test_encode_decode("aéb", "a%C3%A9b");}
|
||||
@Test public void Url_space() {fxt.Encoder_url().Test_encode_decode("a b", "a+b");}
|
||||
@Test public void File_space() {
|
||||
fxt.Encoder_href().Test_encode("a b", "a_b");
|
||||
// fxt.Encoder_url().tst_decode("a_b", "a_b");
|
||||
}
|
||||
@Test public void Href_special_and_anchor() { // PURPOSE: MediaWiki encodes with % for ttls, but . for anchors; REF:Title.php!(before-anchor)getLocalUrl;wfUrlencode (after-anchor)escapeFragmentForURL
|
||||
fxt.Encoder_href().Test_encode("^#^", "%5E#.5E");
|
||||
fxt.Encoder_href().Test_encode("A#", "A#");
|
||||
fxt.Encoder_href().tst_decode("%5E#.5E", "^#^");
|
||||
}
|
||||
@Test public void Fsys_wnt() {
|
||||
fxt.Encoder_fsys_safe().Test_encode("Help:Options/HTML", "Help%3AOptions%2FHTML");
|
||||
}
|
||||
@Test public void Invalid_url_decode() { // PURPOSE: check that invalid url decodings are rendered literally; DATE:2014-04-10
|
||||
fxt.Encoder_href().Test_encode("%GC", "%25GC");
|
||||
}
|
||||
}
|
||||
class Url_encoder_fxt {
|
||||
public Url_encoder Encoder() {return encoder;} Url_encoder encoder;
|
||||
public Url_encoder_fxt Encoder_id() {encoder = Url_encoder.new_html_id_(); return this;}
|
||||
public Url_encoder_fxt Encoder_href() {encoder = Url_encoder.new_html_href_mw_(); return this;}
|
||||
public Url_encoder_fxt Encoder_url() {encoder = Url_encoder.new_http_url_(); return this;}
|
||||
public Url_encoder_fxt Encoder_fsys_safe() {encoder = Url_encoder.new_fsys_wnt_(); return this;}
|
||||
public void Test_encode_decode(String raw, String encoded) {
|
||||
Test_encode(raw, encoded);
|
||||
tst_decode(encoded, raw);
|
||||
}
|
||||
public void Test_encode(String raw, String expd) {
|
||||
byte[] bry = encoder.Encode(Bry_.new_u8(raw));
|
||||
Tfds.Eq(expd, String_.new_u8(bry));
|
||||
}
|
||||
public void tst_decode(String raw, String expd) {
|
||||
byte[] bry = encoder.Decode(Bry_.new_u8(raw));
|
||||
Tfds.Eq(expd, String_.new_u8(bry));
|
||||
}
|
||||
}
|
||||
24
400_xowa/src/gplx/langs/htmls/parsers/Gfo_html_node.java
Normal file
24
400_xowa/src/gplx/langs/htmls/parsers/Gfo_html_node.java
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
class Gfo_html_node {
|
||||
public Gfo_html_node(byte[] src, int bgn, int end) {this.src = src; this.bgn = bgn; this.end = end;}
|
||||
public byte[] Src() {return src;} private final byte[] src;
|
||||
public int Bgn() {return bgn;} private final int bgn;
|
||||
public int End() {return end;} private final int end;
|
||||
}
|
||||
69
400_xowa/src/gplx/langs/htmls/parsers/Gfo_html_parser.java
Normal file
69
400_xowa/src/gplx/langs/htmls/parsers/Gfo_html_parser.java
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
import gplx.xowa.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
class Gfo_html_parser {
|
||||
private final Gfo_msg_log msg_log = Gfo_msg_log.Test();
|
||||
private final Xop_xatr_parser xatr_parser = new Xop_xatr_parser();
|
||||
public void Parse(Gfo_html_wkr handler, byte[] src, int bgn, int end) {
|
||||
// int src_len = src.length;
|
||||
// int prv_pos = 0;
|
||||
// int css_find_bgn_len = Css_find_bgn.length;
|
||||
// byte[] protocol_prefix_bry = Bry_.new_u8(protocol_prefix);
|
||||
// while (true) {
|
||||
// int url_bgn = Bry_find_.Find_fwd(src, Css_find_bgn, prv_pos); if (url_bgn == Bry_.NotFound) break; // nothing left; stop
|
||||
// url_bgn += css_find_bgn_len;
|
||||
// int url_end = Bry_find_.Find_fwd(src, Byte_ascii.Quote, url_bgn, src_len); if (url_end == Bry_.NotFound) {usr_dlg.Warn_many("", "main_page.css_parse", "could not find css; pos='~{0}' text='~{1}'", url_bgn, String_.new_u8__by_len(src, url_bgn, url_bgn + 32)); break;}
|
||||
// byte[] css_url_bry = Bry_.Mid(src, url_bgn, url_end);
|
||||
// css_url_bry = Bry_.Replace(css_url_bry, Css_amp_find, Css_amp_repl); // & -> &
|
||||
// css_url_bry = url_encoder.Decode(css_url_bry); // %2C -> %7C -> |
|
||||
// css_url_bry = Bry_.Add(protocol_prefix_bry, css_url_bry);
|
||||
// rv.Add(String_.new_u8(css_url_bry));
|
||||
// prv_pos = url_end;
|
||||
// }
|
||||
// return rv.XtoStrAry();
|
||||
int src_len = src.length; int pos = 0;
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Angle_bgn:
|
||||
pos = Parse_node(handler, src, end, pos, pos + 1);
|
||||
break;
|
||||
default:
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
private int Parse_node(Gfo_html_wkr handler, byte[] src, int end, int tkn_bgn, int tkn_end) {
|
||||
int name_bgn = tkn_end;
|
||||
int name_end = Bry_find_.Find_fwd_until_ws(src, name_bgn, end);
|
||||
if (name_end == Bry_find_.Not_found) return end; // EOS; EX: "<abcEOS"
|
||||
if (name_bgn == name_end) return tkn_end; // ws; EX: "< "
|
||||
Object o = handler.Get_or_null(src, name_bgn, name_end);
|
||||
if (o == null) return name_end; // unknown name: EX: "<unknown >"
|
||||
int node_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, end);
|
||||
if (node_end == Bry_find_.Not_found) return end; // EOS; EX: "<name lots_of_text_but_no_gt EOS"
|
||||
Xop_xatr_itm[] xatr_ary = xatr_parser.Parse(msg_log, src, name_end, node_end);
|
||||
Gfo_html_tkn tkn = (Gfo_html_tkn)o;
|
||||
tkn.Process(src, Xop_xatr_hash.new_ary(src, xatr_ary));
|
||||
return node_end;
|
||||
}
|
||||
}
|
||||
22
400_xowa/src/gplx/langs/htmls/parsers/Gfo_html_wkr.java
Normal file
22
400_xowa/src/gplx/langs/htmls/parsers/Gfo_html_wkr.java
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
interface Gfo_html_wkr {
|
||||
Gfo_html_tkn Get_or_null(byte[] src, int bgn, int end);
|
||||
void Process(Gfo_html_node node);
|
||||
}
|
||||
34
400_xowa/src/gplx/langs/htmls/parsers/Xob_html_tkn.java
Normal file
34
400_xowa/src/gplx/langs/htmls/parsers/Xob_html_tkn.java
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.xowa.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
interface Gfo_html_tkn {
|
||||
int Tid();
|
||||
byte[] Key();
|
||||
void Process(byte[] src, Xop_xatr_hash hash);
|
||||
}
|
||||
class Gfo_html_tkn_ {
|
||||
public static final int Tid_link = 1;
|
||||
public static final byte[] Key_link = Bry_.new_a7("link");
|
||||
}
|
||||
class Gfo_html_tkn__link implements Gfo_html_tkn {
|
||||
public int Tid() {return Gfo_html_tkn_.Tid_link;}
|
||||
public byte[] Key() {return Gfo_html_tkn_.Key_link;}
|
||||
@gplx.Virtual public void Process(byte[] src, Xop_xatr_hash hash) {}
|
||||
}
|
||||
Reference in New Issue
Block a user