mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.10.3.1
This commit is contained in:
@@ -77,7 +77,7 @@ public class Html_nde {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return dirty ? tmp_bfr.Xto_bry_and_clear() : Bry_.Mid(src, bgn, end);
|
||||
return dirty ? tmp_bfr.To_bry_and_clear() : Bry_.Mid(src, bgn, end);
|
||||
}
|
||||
public byte[] Data(byte[] src) {
|
||||
return Bry_.Mid(src, tag_lhs_end, tag_rhs_bgn);
|
||||
|
||||
@@ -32,7 +32,7 @@ public class Html_utl {
|
||||
public static byte[] Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, byte[] bry) {
|
||||
if (bry == null) return null;
|
||||
boolean dirty = Escape_for_atr_val_as_bry(bfr, quote_byte, bry, 0, bry.length);
|
||||
return dirty ? bfr.Xto_bry_and_clear() : bry;
|
||||
return dirty ? bfr.To_bry_and_clear() : bry;
|
||||
}
|
||||
public static boolean Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, byte[] src, int bgn, int end) {
|
||||
boolean dirty = false;
|
||||
@@ -100,7 +100,7 @@ public class Html_utl {
|
||||
if (write_to_bfr)
|
||||
return null;
|
||||
else
|
||||
return dirty ? bfr.Xto_bry_and_clear() : bry;
|
||||
return dirty ? bfr.To_bry_and_clear() : bry;
|
||||
}
|
||||
|
||||
private static final Btrie_slim_mgr unescape_trie = Btrie_slim_mgr.ci_a7()
|
||||
@@ -114,7 +114,7 @@ public class Html_utl {
|
||||
Bry_bfr bfr = Bry_bfr.reset_(255);
|
||||
byte[] bry = Bry_.new_u8(src);
|
||||
Unescape(Bool_.Y, bfr, bry, 0, bry.length, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y);
|
||||
return bfr.Xto_str_and_clear();
|
||||
return bfr.To_str_and_clear();
|
||||
}
|
||||
public static byte[] Unescape(boolean write_to_bfr, Bry_bfr bfr, byte[] bry, int bgn, int end, boolean escape_lt, boolean escape_gt, boolean escape_amp, boolean escape_quote, boolean escape_apos) {
|
||||
if (bry == null) return null;
|
||||
@@ -156,7 +156,7 @@ public class Html_utl {
|
||||
if (write_to_bfr)
|
||||
return null;
|
||||
else
|
||||
return dirty ? bfr.Xto_bry_and_clear() : bry;
|
||||
return dirty ? bfr.To_bry_and_clear() : bry;
|
||||
}
|
||||
public static byte[] Del_comments(Bry_bfr bfr, byte[] src) {return Del_comments(bfr, src, 0, src.length);}
|
||||
public static byte[] Del_comments(Bry_bfr bfr, byte[] src, int pos, int end) {
|
||||
@@ -175,6 +175,6 @@ public class Html_utl {
|
||||
bfr.Add_mid(src, pos, comm_bgn); // add everything between pos and comm_bgn
|
||||
pos = comm_end + Html_tag_.Comm_end_len; // reposition pos after comm_end
|
||||
}
|
||||
return bfr.Xto_bry_and_clear();
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -82,9 +82,9 @@ public class Html_wtr {
|
||||
bfr.Add_byte(Byte_ascii.Gt);
|
||||
return this;
|
||||
}
|
||||
public byte[] Xto_bry_and_clear() {return bfr.Xto_bry_and_clear();}
|
||||
public byte[] Xto_bry() {return bfr.Xto_bry();}
|
||||
public String Xto_str() {return bfr.Xto_str();}
|
||||
public byte[] To_bry_and_clear() {return bfr.To_bry_and_clear();}
|
||||
public byte[] Xto_bry() {return bfr.To_bry();}
|
||||
public String Xto_str() {return bfr.To_str();}
|
||||
public static void Write_atr_bry(Bry_bfr bfr, byte[] key, byte[] val) {Write_atr_bry(bfr, Bool_.Y, Byte_ascii.Quote, key, val);}
|
||||
public static void Write_atr_bry(Bry_bfr bfr, boolean write_space, byte atr_quote, byte[] key, byte[] val) {
|
||||
if (Bry_.Len_eq_0(val)) return; // don't write empty
|
||||
|
||||
@@ -27,7 +27,7 @@ public class Url_encoder implements Url_encoder_interface {
|
||||
Url_encoder_itm_hex hex = new Url_encoder_itm_hex(primary_encode_marker);
|
||||
for (int i = 0; i < 256; i++) {
|
||||
encode_ary[i] = hex; // default encode to hex
|
||||
decode_ary[i] = Url_encoder_itm_same._; // default decode to same; needed for files; EX: A!%21.png -> A!!.png;
|
||||
decode_ary[i] = Url_encoder_itm_same.Instance; // default decode to same; needed for files; EX: A!%21.png -> A!!.png;
|
||||
}
|
||||
decode_ary[primary_encode_marker] = hex;
|
||||
}
|
||||
@@ -49,15 +49,15 @@ public class Url_encoder implements Url_encoder_interface {
|
||||
}
|
||||
public void Itms_raw_same_rng(int bgn, int end) {
|
||||
for (int i = bgn; i <= end; i++) {
|
||||
encode_ary[i] = Url_encoder_itm_same._;
|
||||
decode_ary[i] = Url_encoder_itm_same._;
|
||||
encode_ary[i] = Url_encoder_itm_same.Instance;
|
||||
decode_ary[i] = Url_encoder_itm_same.Instance;
|
||||
}
|
||||
}
|
||||
public Url_encoder Itms_raw_same_many(int... ary) {
|
||||
int ary_len = ary.length;
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
encode_ary[ary[i]] = Url_encoder_itm_same._;
|
||||
decode_ary[ary[i]] = Url_encoder_itm_same._;
|
||||
encode_ary[ary[i]] = Url_encoder_itm_same.Instance;
|
||||
decode_ary[ary[i]] = Url_encoder_itm_same.Instance;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
@@ -75,20 +75,20 @@ public class Url_encoder implements Url_encoder_interface {
|
||||
synchronized (thread_lock) {
|
||||
tmp_bfr.Add(Io_url.Http_file_bry);
|
||||
Encode(tmp_bfr, url.RawBry());
|
||||
return tmp_bfr.Xto_bry_and_clear();
|
||||
return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
public String Encode_str(String str) {
|
||||
synchronized (thread_lock) {
|
||||
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.Xto_str_and_clear();
|
||||
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
public byte[] Encode_bry(String str) {
|
||||
synchronized (thread_lock) {
|
||||
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.Xto_bry_and_clear();
|
||||
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
public byte[] Encode(byte[] bry) {Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.Xto_bry_and_clear();}
|
||||
public byte[] Encode(byte[] bry) {Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.To_bry_and_clear();}
|
||||
public Bry_bfr Encode(Bry_bfr bfr, byte[] bry) {Encode(bfr, bry, 0, bry.length); return bfr;}
|
||||
public void Encode(Bry_bfr bfr, byte[] bry, int bgn, int end) {
|
||||
synchronized (thread_lock) {
|
||||
@@ -106,15 +106,15 @@ public class Url_encoder implements Url_encoder_interface {
|
||||
}
|
||||
public String Decode_str(String str) {
|
||||
synchronized (thread_lock) {
|
||||
byte[] bry = Bry_.new_u8(str); Decode(bry, 0, bry.length, tmp_bfr, true); return tmp_bfr.Xto_str_and_clear();
|
||||
byte[] bry = Bry_.new_u8(str); Decode(bry, 0, bry.length, tmp_bfr, true); return tmp_bfr.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
public byte[] Decode(byte[] bry) {return Decode(tmp_bfr, bry, 0, bry.length);}
|
||||
public byte[] Decode(byte[] bry, int bgn, int end) {return Decode(tmp_bfr, bry, bgn, end);}
|
||||
public byte[] Decode(Bry_bfr bfr, byte[] bry, int bgn, int end) {Decode(bry, bgn, end, bfr , false); return bfr.Xto_bry_and_clear();}
|
||||
public byte[] Decode(Bry_bfr bfr, byte[] bry, int bgn, int end) {Decode(bry, bgn, end, bfr , false); return bfr.To_bry_and_clear();}
|
||||
public byte[] Decode_lax(byte[] bry) {
|
||||
synchronized (thread_lock) {
|
||||
Decode(bry, 0, bry.length, tmp_bfr, false); return tmp_bfr.Xto_bry_and_clear();
|
||||
Decode(bry, 0, bry.length, tmp_bfr, false); return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
public void Decode(byte[] bry, int bgn, int end, Bry_bfr bfr, boolean fail_when_invalid) {
|
||||
@@ -145,7 +145,7 @@ public class Url_encoder implements Url_encoder_interface {
|
||||
mediawiki_base(rv, true);
|
||||
rv.Itms_decode_marker(Byte_ascii.Dot);
|
||||
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Underline);
|
||||
rv.Itms_raw_html_ent(Byte_ascii.Amp, Xop_amp_trie._);
|
||||
rv.Itms_raw_html_ent(Byte_ascii.Amp, Xop_amp_trie.Instance);
|
||||
return rv;
|
||||
}
|
||||
public static Url_encoder new_http_url_() {
|
||||
@@ -230,7 +230,7 @@ interface Url_encoder_itm {
|
||||
class Url_encoder_itm_same implements Url_encoder_itm {
|
||||
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(b); return 0;}
|
||||
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(b); return 0;}
|
||||
public static final Url_encoder_itm _ = new Url_encoder_itm_same();
|
||||
public static final Url_encoder_itm Instance = new Url_encoder_itm_same();
|
||||
}
|
||||
class Url_encoder_itm_diff implements Url_encoder_itm {
|
||||
public Url_encoder_itm_diff(byte orig, byte repl) {this.orig = orig; this.repl = repl;} private byte orig, repl;
|
||||
@@ -254,14 +254,14 @@ class Url_encoder_itm_hex implements Url_encoder_itm {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
int hex_val = Int_.Xto_int_hex(src[idx + 1]);
|
||||
int hex_val = Int_.To_int_hex(src[idx + 1]);
|
||||
if (hex_val == -1) { // invalid hex byte; EX: %GC; DATE:2014-04-10
|
||||
bfr.Add_byte(b);
|
||||
return 0;
|
||||
}
|
||||
int v_0 = hex_val * 16;
|
||||
if (v_0 != -1) {
|
||||
int v_1 = Int_.Xto_int_hex(src[idx + 2]);
|
||||
int v_1 = Int_.To_int_hex(src[idx + 2]);
|
||||
if (v_1 != -1) {
|
||||
bfr.Add_byte((byte)(v_0 + v_1));
|
||||
return 2;
|
||||
|
||||
@@ -29,7 +29,7 @@ public class Url_encoder_tst {
|
||||
byte[] raw = Bry_.new_a7("0%.jpg");
|
||||
Bry_bfr tmp_bfr = Bry_bfr.new_();
|
||||
fxt.Encoder_id().Encoder().Decode(raw, 0, raw.length, tmp_bfr, false);
|
||||
Tfds.Eq("0%.jpg", tmp_bfr.Xto_str_and_clear());
|
||||
Tfds.Eq("0%.jpg", tmp_bfr.To_str_and_clear());
|
||||
}
|
||||
@Test public void Id_nbsp() {fxt.Encoder_id().Test_encode("a b", "a.C2.A0b");} // NOTE: not just .A0 (160) but utf8-encoded .C2.A0
|
||||
@Test public void Url_syms() {fxt.Encoder_url().Test_encode_decode("!?^~", "%21%3F%5E%7E");}
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
class Gfo_html_node {
|
||||
public Gfo_html_node(byte[] src, int bgn, int end) {this.src = src; this.bgn = bgn; this.end = end;}
|
||||
public byte[] Src() {return src;} private final byte[] src;
|
||||
public int Bgn() {return bgn;} private final int bgn;
|
||||
public int End() {return end;} private final int end;
|
||||
}
|
||||
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
import gplx.xowa.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
class Gfo_html_parser {
|
||||
private final Gfo_msg_log msg_log = Gfo_msg_log.Test();
|
||||
private final Xop_xatr_parser xatr_parser = new Xop_xatr_parser();
|
||||
public void Parse(Gfo_html_wkr handler, byte[] src, int bgn, int end) {
|
||||
// int src_len = src.length;
|
||||
// int prv_pos = 0;
|
||||
// int css_find_bgn_len = Css_find_bgn.length;
|
||||
// byte[] protocol_prefix_bry = Bry_.new_u8(protocol_prefix);
|
||||
// while (true) {
|
||||
// int url_bgn = Bry_find_.Find_fwd(src, Css_find_bgn, prv_pos); if (url_bgn == Bry_.NotFound) break; // nothing left; stop
|
||||
// url_bgn += css_find_bgn_len;
|
||||
// int url_end = Bry_find_.Find_fwd(src, Byte_ascii.Quote, url_bgn, src_len); if (url_end == Bry_.NotFound) {usr_dlg.Warn_many("", "main_page.css_parse", "could not find css; pos='~{0}' text='~{1}'", url_bgn, String_.new_u8__by_len(src, url_bgn, url_bgn + 32)); break;}
|
||||
// byte[] css_url_bry = Bry_.Mid(src, url_bgn, url_end);
|
||||
// css_url_bry = Bry_.Replace(css_url_bry, Css_amp_find, Css_amp_repl); // & -> &
|
||||
// css_url_bry = url_encoder.Decode(css_url_bry); // %2C -> %7C -> |
|
||||
// css_url_bry = Bry_.Add(protocol_prefix_bry, css_url_bry);
|
||||
// rv.Add(String_.new_u8(css_url_bry));
|
||||
// prv_pos = url_end;
|
||||
// }
|
||||
// return rv.XtoStrAry();
|
||||
int src_len = src.length; int pos = 0;
|
||||
while (pos < src_len) {
|
||||
byte b = src[pos];
|
||||
switch (b) {
|
||||
case Byte_ascii.Angle_bgn:
|
||||
pos = Parse_node(handler, src, end, pos, pos + 1);
|
||||
break;
|
||||
default:
|
||||
++pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
private int Parse_node(Gfo_html_wkr handler, byte[] src, int end, int tkn_bgn, int tkn_end) {
|
||||
int name_bgn = tkn_end;
|
||||
int name_end = Bry_find_.Find_fwd_until_ws(src, name_bgn, end);
|
||||
if (name_end == Bry_find_.Not_found) return end; // EOS; EX: "<abcEOS"
|
||||
if (name_bgn == name_end) return tkn_end; // ws; EX: "< "
|
||||
Object o = handler.Get_or_null(src, name_bgn, name_end);
|
||||
if (o == null) return name_end; // unknown name: EX: "<unknown >"
|
||||
int node_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, end);
|
||||
if (node_end == Bry_find_.Not_found) return end; // EOS; EX: "<name lots_of_text_but_no_gt EOS"
|
||||
Xop_xatr_itm[] xatr_ary = xatr_parser.Parse(msg_log, src, name_end, node_end);
|
||||
Gfo_html_tkn tkn = (Gfo_html_tkn)o;
|
||||
tkn.Process(src, Xop_xatr_hash.new_ary(src, xatr_ary));
|
||||
return node_end;
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
interface Gfo_html_wkr {
|
||||
Gfo_html_tkn Get_or_null(byte[] src, int bgn, int end);
|
||||
void Process(Gfo_html_node node);
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.xowa.*;
|
||||
import gplx.xowa.parsers.xndes.*;
|
||||
interface Gfo_html_tkn {
|
||||
int Tid();
|
||||
byte[] Key();
|
||||
void Process(byte[] src, Xop_xatr_hash hash);
|
||||
}
|
||||
class Gfo_html_tkn_ {
|
||||
public static final int Tid_link = 1;
|
||||
public static final byte[] Key_link = Bry_.new_a7("link");
|
||||
}
|
||||
class Gfo_html_tkn__link implements Gfo_html_tkn {
|
||||
public int Tid() {return Gfo_html_tkn_.Tid_link;}
|
||||
public byte[] Key() {return Gfo_html_tkn_.Key_link;}
|
||||
@gplx.Virtual public void Process(byte[] src, Xop_xatr_hash hash) {}
|
||||
}
|
||||
Reference in New Issue
Block a user