mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.11.4.1
This commit is contained in:
@@ -34,6 +34,7 @@ public class Html_tag_ {
|
||||
, Id__ul = 11
|
||||
, Id__li = 12
|
||||
, Id__p = 13
|
||||
, Id__hr = 14
|
||||
;
|
||||
public static final byte[]
|
||||
Bry__a = Bry_.new_a7("a")
|
||||
@@ -49,6 +50,8 @@ public class Html_tag_ {
|
||||
.Add_str_int("span" , Id__span)
|
||||
.Add_str_int("div" , Id__div)
|
||||
.Add_str_int("img" , Id__img)
|
||||
.Add_str_int("hr" , Id__hr)
|
||||
.Add_str_int("ul" , Id__ul)
|
||||
;
|
||||
public static String To_str(int tid) {
|
||||
switch (tid) {
|
||||
@@ -65,6 +68,7 @@ public class Html_tag_ {
|
||||
case Id__span: return "span";
|
||||
case Id__div: return "div";
|
||||
case Id__img: return "img";
|
||||
case Id__hr: return "hr";
|
||||
default: throw Err_.new_unhandled(tid);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.langs.htmls; import gplx.*; import gplx.langs.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.langs.htmls.encoders.*;
|
||||
public class Html_utl {
|
||||
private static final Url_encoder encoder_id = Url_encoder.new_html_id_(); private static final Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
|
||||
private static final Gfo_url_encoder encoder_id = Gfo_url_encoder_.Id; private static final Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
|
||||
public static String Encode_id_as_str(byte[] key) {return String_.new_u8(Encode_id_as_bry(key));}
|
||||
public static byte[] Encode_id_as_bry(byte[] key) {
|
||||
byte[] escaped = Escape_html_as_bry(tmp_bfr, key, Bool_.N, Bool_.N, Bool_.N, Bool_.Y, Bool_.Y);
|
||||
@@ -178,4 +178,9 @@ public class Html_utl {
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
public static String Replace_apos(String s) {return String_.Replace(s, "'", "\"");}
|
||||
public static void Log(Exception e, String head, byte[] page_url, byte[] src, int pos) {
|
||||
Err err = Err_.cast_or_make(e); if (err.Logged()) return;
|
||||
String msg = String_.Format("{0}; page={1} err={2} mid={3} trace={4}", head, page_url, Err_.To_str(e), Bry_.Escape_ws(Bry_.Mid_by_len_safe(src, pos, 255)), err.To_str__log());
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", msg);
|
||||
}
|
||||
}
|
||||
|
||||
64
400_xowa/src/gplx/langs/htmls/encoders/Gfo_url_encoder.java
Normal file
64
400_xowa/src/gplx/langs/htmls/encoders/Gfo_url_encoder.java
Normal file
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.parsers.amps.*;
|
||||
public class Gfo_url_encoder implements Url_encoder_interface {
|
||||
private Gfo_url_encoder_itm[] encode_ary, decode_ary; private Gfo_url_encoder anchor_encoder = null;
|
||||
public Gfo_url_encoder(Gfo_url_encoder_itm[] encode_ary, Gfo_url_encoder_itm[] decode_ary, Gfo_url_encoder anchor_encoder) {
|
||||
this.encode_ary = encode_ary; this.decode_ary = decode_ary; this.anchor_encoder = anchor_encoder;
|
||||
}
|
||||
public String Encode_str(String str) {return String_.new_u8(Encode(Bry_.new_u8(str)));}
|
||||
public byte[] Encode_bry(String str) {return Encode(Bry_.new_u8(str));}
|
||||
public byte[] Encode(byte[] bry) {Bry_bfr bfr = Bry_bfr_.Get(); Encode(bfr, bry, 0, bry.length); return bfr.To_bry_and_rls();}
|
||||
public Bry_bfr Encode(Bry_bfr bfr, byte[] bry) { Encode(bfr, bry, 0, bry.length); return bfr;}
|
||||
public void Encode(Bry_bfr bfr, byte[] bry, int bgn, int end) {
|
||||
for (int i = bgn; i < end; ++i) {
|
||||
byte b = bry[i];
|
||||
if (anchor_encoder != null && b == Byte_ascii.Hash) {
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
anchor_encoder.Encode(bfr, bry, i + 1, end);
|
||||
break;
|
||||
}
|
||||
Gfo_url_encoder_itm itm = encode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
|
||||
i += itm.Encode(bfr, bry, end, i, b);
|
||||
}
|
||||
}
|
||||
public byte[] Encode_to_file_protocol(Io_url url) {
|
||||
Bry_bfr bfr = Bry_bfr_.Get();
|
||||
bfr.Add(Io_url.Http_file_bry);
|
||||
Encode(bfr, url.RawBry());
|
||||
return bfr.To_bry_and_rls();
|
||||
}
|
||||
public String Decode_str(String str) {return String_.new_u8(Decode(Bry_.new_u8(str)));}
|
||||
public byte[] Decode(byte[] bry) {return Decode(Bool_.N, bry, 0, bry.length);}
|
||||
public byte[] Decode(byte[] bry, int bgn, int end) {return Decode(Bool_.N, bry, bgn, end);}
|
||||
private byte[] Decode(boolean fail, byte[] bry, int bgn, int end) {Bry_bfr bfr = Bry_bfr_.Get(); Decode(bfr, fail, bry, bgn, end); return bfr.To_bry_and_rls();}
|
||||
public Bry_bfr Decode(Bry_bfr bfr, boolean fail, byte[] bry, int bgn, int end) {
|
||||
for (int i = bgn; i < end; ++i) {
|
||||
byte b = bry[i];
|
||||
if (anchor_encoder != null && b == Byte_ascii.Hash) {
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
anchor_encoder.Decode(bfr, Bool_.N, bry, i + 1, end);
|
||||
break;
|
||||
}
|
||||
Gfo_url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
|
||||
i += itm.Decode(bfr, bry, end, i, b, fail);
|
||||
}
|
||||
return bfr;
|
||||
}
|
||||
}
|
||||
87
400_xowa/src/gplx/langs/htmls/encoders/Gfo_url_encoder_.java
Normal file
87
400_xowa/src/gplx/langs/htmls/encoders/Gfo_url_encoder_.java
Normal file
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.parsers.amps.*;
|
||||
public class Gfo_url_encoder_ {
|
||||
public static final Gfo_url_encoder
|
||||
Id = Gfo_url_encoder_.New__html_id().Make()
|
||||
, Href = Gfo_url_encoder_.New__html_href_mw().Make()
|
||||
, Href_quotes = Gfo_url_encoder_.New__html_href_quotes().Make()
|
||||
, Href_qarg = Gfo_url_encoder_.New__html_href_qarg().Make()
|
||||
, Xourl = Gfo_url_encoder_.New__html_href_mw().Init__same__many(Byte_ascii.Underline).Make()
|
||||
, Http_url = Gfo_url_encoder_.New__http_url().Make()
|
||||
, Http_url_ttl = Gfo_url_encoder_.New__http_url_ttl().Make()
|
||||
, Fsys = Gfo_url_encoder_.New__fsys_lnx().Make()
|
||||
, Fsys_safe = Gfo_url_encoder_.New__fsys_wnt().Make()
|
||||
, Gfs = Gfo_url_encoder_.New__gfs().Make()
|
||||
;
|
||||
private static Gfo_url_encoder_mkr New__html_id() { // EX: "<a id='a<>b'>" -> "<a id='a.C3.A9b'>"
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Dot).Init_common(Bool_.Y)
|
||||
.Init__decode_mark(Byte_ascii.Dot)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
.Init__html_ent(Byte_ascii.Amp, Xop_amp_trie.Instance);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__html_href_mw() { // EX: "<a href='^#^'>" -> "<a href='%5E#.5E'>"
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
|
||||
.Init__same__many
|
||||
( Byte_ascii.Semic, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
|
||||
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash, Byte_ascii.Colon
|
||||
, Byte_ascii.Hash// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
|
||||
)
|
||||
.Init__anchor_encoder(New__html_id().Make());
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__html_href_qarg() { // same as regular href encoder, but also do not encode qarg characters "?" and "="
|
||||
return New__html_href_mw().Init__same__many(Byte_ascii.Question, Byte_ascii.Eq);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__html_href_quotes() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline) // convert " " to "_"
|
||||
.Init__same__rng(0, 255) // default everything to same;
|
||||
.Init__diff__many(Byte_ascii.Percent, Byte_ascii.Apos
|
||||
, Byte_ascii.Quote, Byte_ascii.Lt, Byte_ascii.Gt); // encode ', ", <, >
|
||||
}
|
||||
public static Gfo_url_encoder_mkr New__http_url() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.N)
|
||||
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__http_url_ttl() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__fsys_lnx() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__same__many(Byte_ascii.Slash)
|
||||
.Init__diff__one(Byte_ascii.Backslash, Byte_ascii.Slash);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__fsys_wnt() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent)
|
||||
.Init__same__rng(Byte_ascii.Num_0, Byte_ascii.Num_9)
|
||||
.Init__same__rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z)
|
||||
.Init__same__rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z)
|
||||
.Init__same__many
|
||||
( Byte_ascii.Bang, Byte_ascii.At, Byte_ascii.Hash, Byte_ascii.Dollar, Byte_ascii.Percent, Byte_ascii.Pow, Byte_ascii.Amp
|
||||
, Byte_ascii.Plus, Byte_ascii.Eq, Byte_ascii.Underline, Byte_ascii.Dash
|
||||
, Byte_ascii.Dot, Byte_ascii.Comma
|
||||
, Byte_ascii.Tick, Byte_ascii.Tilde, Byte_ascii.Brack_bgn, Byte_ascii.Brack_end, Byte_ascii.Curly_bgn, Byte_ascii.Curly_end);
|
||||
}
|
||||
private static Gfo_url_encoder_mkr New__gfs() {
|
||||
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
|
||||
.Init__same__many(Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Apos, Byte_ascii.Semic);
|
||||
}
|
||||
}
|
||||
101
400_xowa/src/gplx/langs/htmls/encoders/Gfo_url_encoder_itm.java
Normal file
101
400_xowa/src/gplx/langs/htmls/encoders/Gfo_url_encoder_itm.java
Normal file
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.parsers.amps.*;
|
||||
public interface Gfo_url_encoder_itm {
|
||||
int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b);
|
||||
int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid);
|
||||
}
|
||||
class Gfo_url_encoder_itm_same implements Gfo_url_encoder_itm {
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(b); return 0;}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(b); return 0;}
|
||||
public static final Gfo_url_encoder_itm Instance = new Gfo_url_encoder_itm_same();
|
||||
}
|
||||
class Gfo_url_encoder_itm_diff implements Gfo_url_encoder_itm {
|
||||
public Gfo_url_encoder_itm_diff(byte orig, byte repl) {this.orig = orig; this.repl = repl;} private byte orig, repl;
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(repl); return 0;}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(orig); return 0;}
|
||||
}
|
||||
class Gfo_url_encoder_itm_hex implements Gfo_url_encoder_itm {
|
||||
public Gfo_url_encoder_itm_hex(byte encode_marker) {this.encode_marker = encode_marker;} private byte encode_marker;
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {Encode_byte(b, bfr, encode_marker); return 0;}
|
||||
public static void Encode_byte(byte b, Bry_bfr bfr, byte encode_marker) {
|
||||
int b_int = b & 0xFF;// PATCH.JAVA:need to convert to unsigned byte
|
||||
bfr.Add_byte(encode_marker);
|
||||
bfr.Add_byte(HexBytes[b_int >> 4]);
|
||||
bfr.Add_byte(HexBytes[b_int & 15]);
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {
|
||||
if (idx + 2 >= end) {
|
||||
if (fail_when_invalid) throw Err_.new_wo_type("decode needs 3 bytes", "idx", idx, "len", end, "snip", String_.new_u8(Bry_.Mid_by_len_safe(src, idx, 3)));
|
||||
else {
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
int hex_val = Int_.To_int_hex(src[idx + 1]);
|
||||
if (hex_val == -1) { // invalid hex byte; EX: %GC; DATE:2014-04-10
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
int v_0 = hex_val * 16;
|
||||
if (v_0 != -1) {
|
||||
int v_1 = Int_.To_int_hex(src[idx + 2]);
|
||||
if (v_1 != -1) {
|
||||
bfr.Add_byte((byte)(v_0 + v_1));
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
if (fail_when_invalid)
|
||||
throw Err_.new_wo_type("decode is invalid", "idx", idx, "snip", String_.new_u8(Bry_.Mid_by_len_safe(src, idx, 3)));
|
||||
else {
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
public static final byte[] HexBytes = new byte[]
|
||||
{ Byte_ascii.Num_0, Byte_ascii.Num_1, Byte_ascii.Num_2, Byte_ascii.Num_3, Byte_ascii.Num_4, Byte_ascii.Num_5, Byte_ascii.Num_6, Byte_ascii.Num_7
|
||||
, Byte_ascii.Num_8, Byte_ascii.Num_9, Byte_ascii.Ltr_A, Byte_ascii.Ltr_B, Byte_ascii.Ltr_C, Byte_ascii.Ltr_D, Byte_ascii.Ltr_E, Byte_ascii.Ltr_F
|
||||
};
|
||||
}
|
||||
class Gfo_url_encoder_itm_html_ent implements Gfo_url_encoder_itm {
|
||||
public Gfo_url_encoder_itm_html_ent(Btrie_slim_mgr amp_trie) {this.amp_trie = amp_trie;} Btrie_slim_mgr amp_trie;
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {
|
||||
++idx; // b is &; get next character afterwards
|
||||
if (idx == end) { // & is last char; return
|
||||
Gfo_url_encoder_itm_hex.Encode_byte(Byte_ascii.Amp, bfr, Byte_ascii.Dot);
|
||||
return 0;
|
||||
}
|
||||
b = src[idx];
|
||||
Object o = amp_trie.Match_bgn_w_byte(b, src, idx, end);
|
||||
if (o == null) { // unknown entity (EX:&unknown;); return &;
|
||||
Gfo_url_encoder_itm_hex.Encode_byte(Byte_ascii.Amp, bfr, Byte_ascii.Dot);
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
|
||||
byte[] bry_u8 = itm.U8_bry(); // NOTE: must utf8 encode val; EX: is 160 but must become 192,160
|
||||
for (int i = 0; i < bry_u8.length; i++)
|
||||
Gfo_url_encoder_itm_hex.Encode_byte(bry_u8[i], bfr, Byte_ascii.Dot);
|
||||
return itm.Xml_name_bry().length - 1; // -1 to ignore & in XmlEntityName
|
||||
}
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {
|
||||
bfr.Add_byte(b); return 0;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Gfo_url_encoder_mkr {
|
||||
private Gfo_url_encoder_itm[] encode_ary, decode_ary; private Gfo_url_encoder anchor_encoder;
|
||||
public Gfo_url_encoder_mkr Init(byte bicode_mark) {
|
||||
encode_ary = new Gfo_url_encoder_itm[256]; decode_ary = new Gfo_url_encoder_itm[256];
|
||||
Gfo_url_encoder_itm_hex hex = new Gfo_url_encoder_itm_hex(bicode_mark);
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
encode_ary[i] = hex; // default encode to hex
|
||||
decode_ary[i] = Gfo_url_encoder_itm_same.Instance; // default decode to same; needed for files; EX: A!%21.png -> A!!.png;
|
||||
}
|
||||
decode_ary[bicode_mark] = hex;
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__same__rng(int bgn, int end) {
|
||||
for (int i = bgn; i <= end; ++i)
|
||||
encode_ary[i] = decode_ary[i] = Gfo_url_encoder_itm_same.Instance;
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__same__many(int... ary) {
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
int idx = ary[i];
|
||||
encode_ary[idx] = decode_ary[idx] = Gfo_url_encoder_itm_same.Instance;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init_common(boolean encode_colon) {
|
||||
Init__same__rng(Byte_ascii.Num_0, Byte_ascii.Num_9);
|
||||
Init__same__rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z);
|
||||
Init__same__rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z);
|
||||
Init__same__many(Byte_ascii.Dash, Byte_ascii.Dot, Byte_ascii.Underline);
|
||||
if (encode_colon) Init__same__many(Byte_ascii.Colon);
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__decode_mark(byte decode_mark) {
|
||||
decode_ary[decode_mark & 0xff] = new Gfo_url_encoder_itm_hex(decode_mark);// PATCH.JAVA:need to convert to unsigned byte
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__diff__one(byte src, byte trg) {
|
||||
Gfo_url_encoder_itm_diff itm = new Gfo_url_encoder_itm_diff(src, trg);
|
||||
encode_ary[src] = decode_ary[trg] = itm;
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__diff__many(byte bicode_mark, int... ary) {
|
||||
Gfo_url_encoder_itm_hex hex = new Gfo_url_encoder_itm_hex(bicode_mark);
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
int idx = ary[i];
|
||||
encode_ary[idx] = decode_ary[idx] = hex;
|
||||
}
|
||||
decode_ary[bicode_mark] = hex;
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__html_ent(byte src, Btrie_slim_mgr trie) {
|
||||
Gfo_url_encoder_itm_html_ent itm = new Gfo_url_encoder_itm_html_ent(trie);
|
||||
encode_ary[src] = itm;
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_encoder_mkr Init__anchor_encoder(Gfo_url_encoder v) {this.anchor_encoder = v; return this;}
|
||||
public Gfo_url_encoder Make() {
|
||||
Gfo_url_encoder rv = new Gfo_url_encoder(encode_ary, decode_ary, anchor_encoder);
|
||||
encode_ary = decode_ary = null; anchor_encoder = null;
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import org.junit.*;
|
||||
public class Gfo_url_encoder_tst {
|
||||
private final Gfo_url_encoder_fxt fxt = new Gfo_url_encoder_fxt();
|
||||
@Test public void Id__nums() {fxt.Encoder_id().Test__bicode("0123456789" , "0123456789");}
|
||||
@Test public void Id__ltrs_lower() {fxt.Encoder_id().Test__bicode("abcdefghijklmnopqrstuvwxyz" , "abcdefghijklmnopqrstuvwxyz");}
|
||||
@Test public void Id__ltrs_upper() {fxt.Encoder_id().Test__bicode("ABCDEFGHIJKLMNOPQRSTUVWXYZ" , "ABCDEFGHIJKLMNOPQRSTUVWXYZ");}
|
||||
@Test public void Id__syms() {fxt.Encoder_id().Test__encode("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ".21.22.23.24.25.26.27.28.29.2A.2B.2C-..2F:.3B.3C.3D.3E.3F.40.5B.5C.5D.5E_.60.7B.7C.7D.7E");} // NOTE: not reversible since "." is encode_marker but not encoded
|
||||
@Test public void Id__foreign() {fxt.Encoder_id().Test__bicode("aéb", "a.C3.A9b");}
|
||||
@Test public void Id__nbsp() {fxt.Encoder_id().Test__encode("a b", "a.C2.A0b");} // NOTE: not just .A0 (160) but utf8-encoded .C2.A0
|
||||
@Test public void Id__space() {fxt.Encoder_id().Test__bicode("a b", "a_b");}
|
||||
@Test public void Id__err() {
|
||||
byte[] raw = Bry_.new_a7("0%.jpg");
|
||||
Bry_bfr tmp_bfr = Bry_bfr.new_();
|
||||
fxt.Encoder_id().Encoder().Decode(tmp_bfr, Bool_.N, raw, 0, raw.length);
|
||||
Tfds.Eq("0%.jpg", tmp_bfr.To_str_and_clear());
|
||||
}
|
||||
@Test public void Url__syms() {fxt.Encoder_url().Test__bicode("!?^~", "%21%3F%5E%7E");}
|
||||
@Test public void Url__foreign() {fxt.Encoder_url().Test__bicode("aéb", "a%C3%A9b");}
|
||||
@Test public void Url__space() {fxt.Encoder_url().Test__bicode("a b", "a+b");}
|
||||
@Test public void Href__space() {
|
||||
fxt.Encoder_href().Test__encode("a b", "a_b");
|
||||
}
|
||||
@Test public void Href__special_and_anchor() { // PURPOSE: MediaWiki encodes with % for ttls, but . for anchors; REF:Title.php!(before-anchor)getLocalUrl;wfUrlencode (after-anchor)escapeFragmentForURL
|
||||
fxt.Encoder_href().Test__bicode("^#^", "%5E#.5E");
|
||||
fxt.Encoder_href().Test__encode("A#", "A#");
|
||||
}
|
||||
@Test public void Href__invalid() { // PURPOSE: check that invalid url decodings are rendered literally; DATE:2014-04-10
|
||||
fxt.Encoder_href().Test__encode("%GC", "%25GC");
|
||||
}
|
||||
@Test public void Fsys__wnt() {
|
||||
fxt.Encoder_fsys_safe().Test__encode("Help:Options/HTML", "Help%3AOptions%2FHTML");
|
||||
}
|
||||
}
|
||||
class Gfo_url_encoder_fxt {
|
||||
public Gfo_url_encoder Encoder() {return encoder;} private Gfo_url_encoder encoder;
|
||||
public Gfo_url_encoder_fxt Encoder_id() {encoder = Gfo_url_encoder_.Id; return this;}
|
||||
public Gfo_url_encoder_fxt Encoder_href() {encoder = Gfo_url_encoder_.Href; return this;}
|
||||
public Gfo_url_encoder_fxt Encoder_url() {encoder = Gfo_url_encoder_.Http_url; return this;}
|
||||
public Gfo_url_encoder_fxt Encoder_fsys_safe() {encoder = Gfo_url_encoder_.Fsys_safe; return this;}
|
||||
public void Test__bicode(String raw, String encoded) {
|
||||
Test__encode(raw, encoded);
|
||||
Test__decode(encoded, raw);
|
||||
}
|
||||
public void Test__encode(String raw, String expd) {
|
||||
Tfds.Eq(expd, String_.new_u8(encoder.Encode(Bry_.new_u8(raw))));
|
||||
}
|
||||
public void Test__decode(String raw, String expd) {
|
||||
Tfds.Eq(expd, String_.new_u8(encoder.Decode(Bry_.new_u8(raw))));
|
||||
}
|
||||
}
|
||||
@@ -1,307 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.parsers.amps.*;
|
||||
public class Url_encoder implements Url_encoder_interface {
|
||||
private Url_encoder_itm[] encode_ary = new Url_encoder_itm[256], decode_ary = new Url_encoder_itm[256];
|
||||
private Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
|
||||
private Url_encoder anchor_encoder = null;
|
||||
private Object thread_lock = new Object();
|
||||
public void Itms_ini(byte primary_encode_marker) {
|
||||
Url_encoder_itm_hex hex = new Url_encoder_itm_hex(primary_encode_marker);
|
||||
for (int i = 0; i < 256; i++) {
|
||||
encode_ary[i] = hex; // default encode to hex
|
||||
decode_ary[i] = Url_encoder_itm_same.Instance; // default decode to same; needed for files; EX: A!%21.png -> A!!.png;
|
||||
}
|
||||
decode_ary[primary_encode_marker] = hex;
|
||||
}
|
||||
public void Itms_raw_diff_many(byte primary_encode_marker, int... ary) {
|
||||
Url_encoder_itm_hex hex = new Url_encoder_itm_hex(primary_encode_marker);
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
encode_ary[ary[i]] = hex;
|
||||
decode_ary[ary[i]] = hex;
|
||||
}
|
||||
decode_ary[primary_encode_marker] = hex;
|
||||
}
|
||||
public void Itms_decode_marker(byte decode_marker) {
|
||||
Url_encoder_itm_hex hex = new Url_encoder_itm_hex(decode_marker);
|
||||
decode_ary[decode_marker & 0xff] = hex;// PATCH.JAVA:need to convert to unsigned byte
|
||||
}
|
||||
public void Itms_decode_diff(byte orig, byte repl) {
|
||||
decode_ary[orig & 0xff] = new Url_encoder_itm_diff(orig, repl);// PATCH.JAVA:need to convert to unsigned byte
|
||||
}
|
||||
public void Itms_raw_same_rng(int bgn, int end) {
|
||||
for (int i = bgn; i <= end; i++) {
|
||||
encode_ary[i] = Url_encoder_itm_same.Instance;
|
||||
decode_ary[i] = Url_encoder_itm_same.Instance;
|
||||
}
|
||||
}
|
||||
public Url_encoder Itms_raw_same_many(int... ary) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
encode_ary[ary[i]] = Url_encoder_itm_same.Instance;
|
||||
decode_ary[ary[i]] = Url_encoder_itm_same.Instance;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public void Itms_raw_html_ent(byte src, Btrie_slim_mgr trie) {
|
||||
Url_encoder_itm_html_ent itm = new Url_encoder_itm_html_ent(trie);
|
||||
encode_ary[src] = itm;
|
||||
}
|
||||
public Url_encoder Itms_raw_diff(byte src, byte trg) {
|
||||
Url_encoder_itm_diff itm = new Url_encoder_itm_diff(src, trg);
|
||||
encode_ary[src] = itm;
|
||||
decode_ary[trg] = itm;
|
||||
return this;
|
||||
}
|
||||
public byte[] Encode_http(Io_url url) {
|
||||
synchronized (thread_lock) {
|
||||
tmp_bfr.Add(Io_url.Http_file_bry);
|
||||
Encode(tmp_bfr, url.RawBry());
|
||||
return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
public String Encode_str(String str) {
|
||||
synchronized (thread_lock) {
|
||||
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
public byte[] Encode_bry(String str) {
|
||||
synchronized (thread_lock) {
|
||||
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
public byte[] Encode(byte[] bry) {Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.To_bry_and_clear();}
|
||||
public Bry_bfr Encode(Bry_bfr bfr, byte[] bry) {Encode(bfr, bry, 0, bry.length); return bfr;}
|
||||
public void Encode(Bry_bfr bfr, byte[] bry, int bgn, int end) {
|
||||
synchronized (thread_lock) {
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = bry[i];
|
||||
if (anchor_encoder != null && b == Byte_ascii.Hash) {
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
anchor_encoder.Encode(bfr, bry, i + 1, end);
|
||||
break;
|
||||
}
|
||||
Url_encoder_itm itm = encode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
|
||||
i += itm.Encode(bfr, bry, end, i, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
public String Decode_str(String str) {
|
||||
synchronized (thread_lock) {
|
||||
byte[] bry = Bry_.new_u8(str); Decode(bry, 0, bry.length, tmp_bfr, true); return tmp_bfr.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
public byte[] Decode(byte[] bry) {return Decode(tmp_bfr, bry, 0, bry.length);}
|
||||
public byte[] Decode(byte[] bry, int bgn, int end) {return Decode(tmp_bfr, bry, bgn, end);}
|
||||
public byte[] Decode(Bry_bfr bfr, byte[] bry, int bgn, int end) {Decode(bry, bgn, end, bfr , false); return bfr.To_bry_and_clear();}
|
||||
public byte[] Decode_lax(byte[] bry) {
|
||||
synchronized (thread_lock) {
|
||||
Decode(bry, 0, bry.length, tmp_bfr, false); return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
public void Decode(byte[] bry, int bgn, int end, Bry_bfr bfr, boolean fail_when_invalid) {
|
||||
synchronized (thread_lock) {
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = bry[i];
|
||||
if (anchor_encoder != null && b == Byte_ascii.Hash) {
|
||||
bfr.Add_byte(Byte_ascii.Hash);
|
||||
anchor_encoder.Decode(bry, i + 1, end, bfr, false);
|
||||
break;
|
||||
}
|
||||
Url_encoder_itm itm = decode_ary[b & 0xff];// PATCH.JAVA:need to convert to unsigned byte
|
||||
i += itm.Decode(bfr, bry, end, i, b, fail_when_invalid);
|
||||
}
|
||||
}
|
||||
}
|
||||
private static void mediawiki_base(Url_encoder rv, boolean encode_colon) {
|
||||
rv.Itms_raw_same_rng(Byte_ascii.Num_0, Byte_ascii.Num_9);
|
||||
rv.Itms_raw_same_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z);
|
||||
rv.Itms_raw_same_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z);
|
||||
rv.Itms_raw_same_many(Byte_ascii.Dash, Byte_ascii.Dot, Byte_ascii.Underline);
|
||||
if (encode_colon)
|
||||
rv.Itms_raw_same_many(Byte_ascii.Colon);
|
||||
}
|
||||
public static Url_encoder new_html_id_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Dot);
|
||||
mediawiki_base(rv, true);
|
||||
rv.Itms_decode_marker(Byte_ascii.Dot);
|
||||
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Underline);
|
||||
rv.Itms_raw_html_ent(Byte_ascii.Amp, Xop_amp_trie.Instance);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_http_url_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
mediawiki_base(rv, false);
|
||||
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Plus);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_http_url_ttl_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
mediawiki_base(rv, true);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_http_url_space_is_space() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
mediawiki_base(rv, true);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_fsys_lnx_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
mediawiki_base(rv, true);
|
||||
rv.Itms_raw_same_many(Byte_ascii.Slash);
|
||||
rv.Itms_raw_diff(Byte_ascii.Backslash, Byte_ascii.Slash);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_fsys_wnt_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
rv.Itms_raw_same_rng(Byte_ascii.Num_0, Byte_ascii.Num_9);
|
||||
rv.Itms_raw_same_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z);
|
||||
rv.Itms_raw_same_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z);
|
||||
rv.Itms_raw_same_many
|
||||
( Byte_ascii.Bang, Byte_ascii.At, Byte_ascii.Hash, Byte_ascii.Dollar, Byte_ascii.Percent, Byte_ascii.Pow, Byte_ascii.Amp
|
||||
, Byte_ascii.Plus, Byte_ascii.Eq, Byte_ascii.Underline, Byte_ascii.Dash
|
||||
, Byte_ascii.Dot, Byte_ascii.Comma
|
||||
, Byte_ascii.Tick, Byte_ascii.Tilde, Byte_ascii.Brack_bgn, Byte_ascii.Brack_end, Byte_ascii.Curly_bgn, Byte_ascii.Curly_end);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_file_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
mediawiki_base(rv, true);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_gfs_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
rv.Itms_raw_same_many(Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Apos, Byte_ascii.Semic);
|
||||
mediawiki_base(rv, true);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_html_href_mw_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
mediawiki_base(rv, true);
|
||||
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Underline);
|
||||
rv.Itms_raw_same_many(Byte_ascii.Semic, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
|
||||
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash, Byte_ascii.Colon
|
||||
, Byte_ascii.Hash// NOTE: not part of wfUrlEncode; not sure where this is specified; needed for A#b
|
||||
);
|
||||
rv.anchor_encoder = new_html_id_();
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_html_href_quotes_() {
|
||||
Url_encoder rv = new Url_encoder();
|
||||
rv.Itms_ini(Byte_ascii.Percent);
|
||||
rv.Itms_raw_same_rng(0, 255); // default everything to same;
|
||||
rv.Itms_raw_diff_many(Byte_ascii.Percent
|
||||
, Byte_ascii.Apos, Byte_ascii.Quote, Byte_ascii.Lt, Byte_ascii.Gt); // encode ', ", <, >
|
||||
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Underline); // convert " " to "_"
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
interface Url_encoder_itm {
|
||||
int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b);
|
||||
int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid);
|
||||
}
|
||||
class Url_encoder_itm_same implements Url_encoder_itm {
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(b); return 0;}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(b); return 0;}
|
||||
public static final Url_encoder_itm Instance = new Url_encoder_itm_same();
|
||||
}
|
||||
class Url_encoder_itm_diff implements Url_encoder_itm {
|
||||
public Url_encoder_itm_diff(byte orig, byte repl) {this.orig = orig; this.repl = repl;} private byte orig, repl;
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(repl); return 0;}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(orig); return 0;}
|
||||
}
|
||||
class Url_encoder_itm_hex implements Url_encoder_itm {
|
||||
public Url_encoder_itm_hex(byte encode_marker) {this.encode_marker = encode_marker;} private byte encode_marker;
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {Encode_byte(b, bfr, encode_marker); return 0;}
|
||||
public static void Encode_byte(byte b, Bry_bfr bfr, byte encode_marker) {
|
||||
int b_int = b & 0xFF;// PATCH.JAVA:need to convert to unsigned byte
|
||||
bfr.Add_byte(encode_marker);
|
||||
bfr.Add_byte(HexBytes[b_int >> 4]);
|
||||
bfr.Add_byte(HexBytes[b_int & 15]);
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {
|
||||
if (idx + 2 >= end) {
|
||||
if (fail_when_invalid) throw Err_.new_wo_type("decode needs 3 bytes", "idx", idx, "len", end, "snip", String_.new_u8(Bry_.Mid_by_len_safe(src, idx, 3)));
|
||||
else {
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
int hex_val = Int_.To_int_hex(src[idx + 1]);
|
||||
if (hex_val == -1) { // invalid hex byte; EX: %GC; DATE:2014-04-10
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
int v_0 = hex_val * 16;
|
||||
if (v_0 != -1) {
|
||||
int v_1 = Int_.To_int_hex(src[idx + 2]);
|
||||
if (v_1 != -1) {
|
||||
bfr.Add_byte((byte)(v_0 + v_1));
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
if (fail_when_invalid)
|
||||
throw Err_.new_wo_type("decode is invalid", "idx", idx, "snip", String_.new_u8(Bry_.Mid_by_len_safe(src, idx, 3)));
|
||||
else {
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
public static final byte[] HexBytes = new byte[]
|
||||
{ Byte_ascii.Num_0, Byte_ascii.Num_1, Byte_ascii.Num_2, Byte_ascii.Num_3, Byte_ascii.Num_4, Byte_ascii.Num_5, Byte_ascii.Num_6, Byte_ascii.Num_7
|
||||
, Byte_ascii.Num_8, Byte_ascii.Num_9, Byte_ascii.Ltr_A, Byte_ascii.Ltr_B, Byte_ascii.Ltr_C, Byte_ascii.Ltr_D, Byte_ascii.Ltr_E, Byte_ascii.Ltr_F
|
||||
};
|
||||
}
|
||||
class Url_encoder_itm_html_ent implements Url_encoder_itm {
|
||||
public Url_encoder_itm_html_ent(Btrie_slim_mgr amp_trie) {this.amp_trie = amp_trie;} Btrie_slim_mgr amp_trie;
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {
|
||||
++idx; // b is &; get next character afterwards
|
||||
if (idx == end) { // & is last char; return
|
||||
Url_encoder_itm_hex.Encode_byte(Byte_ascii.Amp, bfr, Byte_ascii.Dot);
|
||||
return 0;
|
||||
}
|
||||
b = src[idx];
|
||||
Object o = amp_trie.Match_bgn_w_byte(b, src, idx, end);
|
||||
if (o == null) { // unknown entity (EX:&unknown;); return &;
|
||||
Url_encoder_itm_hex.Encode_byte(Byte_ascii.Amp, bfr, Byte_ascii.Dot);
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
Xop_amp_trie_itm itm = (Xop_amp_trie_itm)o;
|
||||
byte[] bry_u8 = itm.U8_bry(); // NOTE: must utf8 encode val; EX: is 160 but must become 192,160
|
||||
for (int i = 0; i < bry_u8.length; i++)
|
||||
Url_encoder_itm_hex.Encode_byte(bry_u8[i], bfr, Byte_ascii.Dot);
|
||||
return itm.Xml_name_bry().length - 1; // -1 to ignore & in XmlEntityName
|
||||
}
|
||||
}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {
|
||||
bfr.Add_byte(b); return 0;
|
||||
}
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
public class Url_encoder_mgr {
|
||||
public Url_encoder File() {return file;} private final Url_encoder file = Url_encoder.new_file_();
|
||||
public Url_encoder Http_url() {return http_url;} private final Url_encoder http_url = Url_encoder.new_http_url_();
|
||||
public Url_encoder Http_url_ttl() {return http_url_ttl;} private final Url_encoder http_url_ttl = Url_encoder.new_http_url_ttl_();
|
||||
public Url_encoder Id() {return html_id;} private final Url_encoder html_id = Url_encoder.new_html_id_();
|
||||
public Url_encoder Href() {return href;} private final Url_encoder href = Url_encoder.new_html_href_mw_();
|
||||
public Url_encoder Href_quotes() {return href_quotes;} private final Url_encoder href_quotes = Url_encoder.new_html_href_quotes_();
|
||||
public Url_encoder Gfs() {return gfs;} private final Url_encoder gfs = Url_encoder.new_gfs_();
|
||||
public Url_encoder Fsys() {return fsys;} private final Url_encoder fsys = Url_encoder.new_fsys_lnx_();
|
||||
public Url_encoder Fsys_safe() {return fsys_safe;} private final Url_encoder fsys_safe = Url_encoder.new_fsys_wnt_();
|
||||
public Url_encoder Xourl() {return xourl;} private final Url_encoder xourl = Url_encoder.new_html_href_mw_().Itms_raw_same_many(Byte_ascii.Underline);
|
||||
}
|
||||
@@ -1,72 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.encoders; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import org.junit.*;
|
||||
public class Url_encoder_tst {
|
||||
@Before public void init() {fxt = new Url_encoder_fxt();} Url_encoder_fxt fxt;
|
||||
@Test public void Id_nums() {fxt.Encoder_id().Test_encode_decode("0123456789", "0123456789");}
|
||||
@Test public void Id_ltrs_lower() {fxt.Encoder_id().Test_encode_decode("abcdefghijklmnopqrstuvwxyz", "abcdefghijklmnopqrstuvwxyz");}
|
||||
@Test public void Id_ltrs_upper() {fxt.Encoder_id().Test_encode_decode("ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ");}
|
||||
@Test public void Id_syms() {fxt.Encoder_id().Test_encode("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ".21.22.23.24.25.26.27.28.29.2A.2B.2C-..2F:.3B.3C.3D.3E.3F.40.5B.5C.5D.5E_.60.7B.7C.7D.7E");} // NOTE: not reversible since "." is encode_marker but not encoded
|
||||
@Test public void Id_foreign() {fxt.Encoder_id().Test_encode_decode("aéb", "a.C3.A9b");}
|
||||
@Test public void Id_space() {fxt.Encoder_id().Test_encode_decode("a b", "a_b");}
|
||||
@Test public void Id_err() {
|
||||
byte[] raw = Bry_.new_a7("0%.jpg");
|
||||
Bry_bfr tmp_bfr = Bry_bfr.new_();
|
||||
fxt.Encoder_id().Encoder().Decode(raw, 0, raw.length, tmp_bfr, false);
|
||||
Tfds.Eq("0%.jpg", tmp_bfr.To_str_and_clear());
|
||||
}
|
||||
@Test public void Id_nbsp() {fxt.Encoder_id().Test_encode("a b", "a.C2.A0b");} // NOTE: not just .A0 (160) but utf8-encoded .C2.A0
|
||||
@Test public void Url_syms() {fxt.Encoder_url().Test_encode_decode("!?^~", "%21%3F%5E%7E");}
|
||||
@Test public void Url_foreign() {fxt.Encoder_url().Test_encode_decode("aéb", "a%C3%A9b");}
|
||||
@Test public void Url_space() {fxt.Encoder_url().Test_encode_decode("a b", "a+b");}
|
||||
@Test public void File_space() {
|
||||
fxt.Encoder_href().Test_encode("a b", "a_b");
|
||||
// fxt.Encoder_url().tst_decode("a_b", "a_b");
|
||||
}
|
||||
@Test public void Href_special_and_anchor() { // PURPOSE: MediaWiki encodes with % for ttls, but . for anchors; REF:Title.php!(before-anchor)getLocalUrl;wfUrlencode (after-anchor)escapeFragmentForURL
|
||||
fxt.Encoder_href().Test_encode("^#^", "%5E#.5E");
|
||||
fxt.Encoder_href().Test_encode("A#", "A#");
|
||||
fxt.Encoder_href().tst_decode("%5E#.5E", "^#^");
|
||||
}
|
||||
@Test public void Fsys_wnt() {
|
||||
fxt.Encoder_fsys_safe().Test_encode("Help:Options/HTML", "Help%3AOptions%2FHTML");
|
||||
}
|
||||
@Test public void Invalid_url_decode() { // PURPOSE: check that invalid url decodings are rendered literally; DATE:2014-04-10
|
||||
fxt.Encoder_href().Test_encode("%GC", "%25GC");
|
||||
}
|
||||
}
|
||||
class Url_encoder_fxt {
|
||||
public Url_encoder Encoder() {return encoder;} Url_encoder encoder;
|
||||
public Url_encoder_fxt Encoder_id() {encoder = Url_encoder.new_html_id_(); return this;}
|
||||
public Url_encoder_fxt Encoder_href() {encoder = Url_encoder.new_html_href_mw_(); return this;}
|
||||
public Url_encoder_fxt Encoder_url() {encoder = Url_encoder.new_http_url_(); return this;}
|
||||
public Url_encoder_fxt Encoder_fsys_safe() {encoder = Url_encoder.new_fsys_wnt_(); return this;}
|
||||
public void Test_encode_decode(String raw, String encoded) {
|
||||
Test_encode(raw, encoded);
|
||||
tst_decode(encoded, raw);
|
||||
}
|
||||
public void Test_encode(String raw, String expd) {
|
||||
byte[] bry = encoder.Encode(Bry_.new_u8(raw));
|
||||
Tfds.Eq(expd, String_.new_u8(bry));
|
||||
}
|
||||
public void tst_decode(String raw, String expd) {
|
||||
byte[] bry = encoder.Decode(Bry_.new_u8(raw));
|
||||
Tfds.Eq(expd, String_.new_u8(bry));
|
||||
}
|
||||
}
|
||||
@@ -26,7 +26,8 @@ public class Html_atr extends gplx.core.brys.Bfr_arg_base {
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public int Val_bgn() {return val_bgn;} private final int val_bgn;
|
||||
public int Val_end() {return val_end;} private final int val_end;
|
||||
public boolean Val_exists() {return val_end > val_bgn;}
|
||||
public boolean Val_dat_exists() {return val_end > val_bgn;}
|
||||
public boolean Val_dat_missing() {return val_end == -1;}
|
||||
public byte[] Val() {
|
||||
if (val == null)
|
||||
val = Bry_.Mid(src, val_bgn, val_end);
|
||||
@@ -36,9 +37,9 @@ public class Html_atr extends gplx.core.brys.Bfr_arg_base {
|
||||
if (val_end > val_bgn)
|
||||
bfr.Add_mid(src, val_bgn, val_end);
|
||||
}
|
||||
@Override public boolean Bfr_arg__exists() {return this.Val_exists();}
|
||||
@Override public boolean Bfr_arg__exists() {return this.Val_dat_exists();}
|
||||
@Override public void Bfr_arg__add(Bry_bfr bfr) {
|
||||
if (Val_exists())
|
||||
if (Val_dat_exists())
|
||||
bfr.Add_mid(src, val_bgn, val_end);
|
||||
}
|
||||
public static final Html_atr Noop = new Html_atr(-1, Bry_.Empty, Bry_.Empty, Bry_.Empty, -1, -1);
|
||||
|
||||
@@ -42,8 +42,7 @@ public class Html_doc_parser {
|
||||
Html_doc_wkr wkr = (Html_doc_wkr)o;
|
||||
try {pos = wkr.Parse(src, src_bgn, src_end, pos);}
|
||||
catch (Exception e) {
|
||||
Err err = Err_.cast_or_make(e);
|
||||
if (!err.Logged()) Gfo_usr_dlg_.Instance.Warn_many("", "", Err_.Message_gplx_log(e), "page_url", page_url, "mid", Bry_.Mid_by_len_safe(src, pos, 255));
|
||||
Html_utl.Log(e, "html parse failed", page_url, src, pos);
|
||||
txt_bgn = pos; // set txt_bgn to hook_bgn which is "pos"; i.e.: txt resumes from start of failed hook
|
||||
pos = trie.Match_pos(); // set pos to hook_end
|
||||
}
|
||||
|
||||
@@ -20,15 +20,15 @@ import gplx.xowa.parsers.htmls.*; import gplx.langs.htmls.parsers.styles.*; impo
|
||||
public class Html_tag implements Mwh_atr_wkr {
|
||||
private Html_tag_rdr tag_rdr;
|
||||
private Ordered_hash atrs_hash; private boolean atrs_null; private int atrs_bgn, atrs_end;
|
||||
public Html_tag Init(Html_tag_rdr tag_rdr, boolean tag_is_tail, boolean tag_is_inline, int src_bgn, int src_end, int atrs_bgn, int atrs_end, int name_id) {
|
||||
this.tag_rdr = tag_rdr; this.src = tag_rdr.Src(); this.atrs_null = true;
|
||||
public Html_tag Init(Html_tag_rdr tag_rdr, byte[] src, boolean tag_is_tail, boolean tag_is_inline, int src_bgn, int src_end, int atrs_bgn, int atrs_end, int name_id) {
|
||||
this.tag_rdr = tag_rdr; this.src = src; this.atrs_null = true;
|
||||
this.tag_is_tail = tag_is_tail; this.tag_is_inline = tag_is_inline;
|
||||
this.atrs_bgn = atrs_bgn; this.atrs_end = atrs_end;
|
||||
this.name_id = name_id; this.src_bgn = src_bgn; this.src_end = src_end;
|
||||
return this;
|
||||
}
|
||||
public Html_tag Copy() {
|
||||
Html_tag rv = new Html_tag().Init(tag_rdr, tag_is_tail, tag_is_inline, src_bgn, src_end, atrs_bgn, atrs_end, name_id);
|
||||
Html_tag rv = new Html_tag().Init(tag_rdr, src, tag_is_tail, tag_is_inline, src_bgn, src_end, atrs_bgn, atrs_end, name_id);
|
||||
rv.atrs_null = false;
|
||||
rv.atrs_hash = Copy(atrs_hash);
|
||||
return rv;
|
||||
|
||||
@@ -26,10 +26,10 @@ public class Html_tag_rdr {
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public Bry_rdr Rdr() {return rdr;} private final Bry_rdr rdr = new Bry_rdr();
|
||||
public void Init(byte[] src, int src_bgn, int src_end) {
|
||||
public void Init(byte[] ctx, byte[] src, int src_bgn, int src_end) {
|
||||
this.src = src; this.pos = src_bgn; this.src_end = src_end;
|
||||
tag__eos.Init(this, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Html_tag_.Id__eos);
|
||||
rdr.Init_by_page(Bry_.Empty, src, src_end);
|
||||
tag__eos.Init(this, src, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Html_tag_.Id__eos);
|
||||
rdr.Init_by_page(ctx, src, src_end);
|
||||
}
|
||||
public int Pos() {return pos;} private int pos;
|
||||
public void Pos_(int v) {this.pos = v;}
|
||||
@@ -169,7 +169,7 @@ public class Html_tag_rdr {
|
||||
++tag_end; // position after ">"
|
||||
}
|
||||
Html_tag tmp = move ? tag__tmp__move : tag__tmp__peek;
|
||||
return tmp.Init(this, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, name_hash.Get_as_int_or(src, name_bgn, name_end, -1));
|
||||
return tmp.Init(this, src, cur_is_tail, inline, tag_bgn, tag_end, name_end, atrs_end, name_hash.Get_as_int_or(src, name_bgn, name_end, -1));
|
||||
}
|
||||
public boolean Read_and_move(byte match) {
|
||||
byte b = src[pos];
|
||||
@@ -215,11 +215,11 @@ public class Html_tag_rdr {
|
||||
}
|
||||
private Html_tag Tag__comment(int tag_bgn) {
|
||||
int tag_end = Bry_find_.Move_fwd(src, gplx.langs.htmls.Html_tag_.Comm_end, tag_bgn, src_end); if (tag_end == Bry_find_.Not_found) tag_end = src_end;
|
||||
return tag__comment.Init(this, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Html_tag_.Id__comment);
|
||||
return tag__comment.Init(this, src, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Html_tag_.Id__comment);
|
||||
}
|
||||
private Html_tag Tag__eos(int tag_bgn) {
|
||||
int tag_end = tag_bgn + 255; if (tag_end > src_end) tag_end = src_end;
|
||||
return tag__comment.Init(this, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Html_tag_.Id__eos);
|
||||
return tag__comment.Init(this, src, Bool_.N, Bool_.N, tag_bgn, tag_end, tag_end, tag_end, Html_tag_.Id__eos);
|
||||
}
|
||||
private static final byte[] Bry__comment__mid = Bry_.new_a7("--");
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ class Html_tag_rdr_fxt {
|
||||
private final Html_tag_rdr rdr = new Html_tag_rdr();
|
||||
public void Init(String src_str) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
rdr.Init(src_bry, 0, src_bry.length);
|
||||
rdr.Init(Bry_.Empty, src_bry, 0, src_bry.length);
|
||||
}
|
||||
public void Test__move_fwd_head(String expd) {Test__move_fwd_head(Html_tag_.Id__any, expd);}
|
||||
public void Test__move_fwd_head(int match_name_id, String expd) {
|
||||
|
||||
@@ -19,7 +19,7 @@ package gplx.langs.htmls.parsers.clses; import gplx.*; import gplx.langs.*; impo
|
||||
public class Html_atr_class_parser_ {
|
||||
public static void Parse(Html_tag tag, Html_atr_class_wkr wkr) {
|
||||
Html_atr atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__class);
|
||||
if (atr.Val_exists())
|
||||
if (atr.Val_dat_exists())
|
||||
Parse(tag.Src(), atr.Val_bgn(), atr.Val_end(), wkr);
|
||||
}
|
||||
public static void Parse(byte[] src, int src_bgn, int src_end, Html_atr_class_wkr wkr) {
|
||||
|
||||
@@ -19,7 +19,7 @@ package gplx.langs.htmls.parsers.styles; import gplx.*; import gplx.langs.*; imp
|
||||
public class Html_atr_style_parser_ {
|
||||
public static void Parse(Html_tag tag, Html_atr_style_wkr wkr) {
|
||||
Html_atr atr = tag.Atrs__get_by_or_empty(Html_atr_.Bry__style);
|
||||
if (atr.Val_exists())
|
||||
if (atr.Val_dat_exists())
|
||||
Parse(tag.Src(), atr.Val_bgn(), atr.Val_end(), wkr);
|
||||
}
|
||||
public static void Parse(byte[] src, int src_bgn, int src_end, Html_atr_style_wkr wkr) {
|
||||
|
||||
Reference in New Issue
Block a user