1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

v2.10.3.1

This commit is contained in:
gnosygnu
2015-10-18 22:17:57 -04:00
parent 8e18af05b6
commit 4f43f51b18
1935 changed files with 12500 additions and 12889 deletions

View File

@@ -77,7 +77,7 @@ public class Html_nde {
break;
}
}
return dirty ? tmp_bfr.Xto_bry_and_clear() : Bry_.Mid(src, bgn, end);
return dirty ? tmp_bfr.To_bry_and_clear() : Bry_.Mid(src, bgn, end);
}
public byte[] Data(byte[] src) {
return Bry_.Mid(src, tag_lhs_end, tag_rhs_bgn);

View File

@@ -32,7 +32,7 @@ public class Html_utl {
public static byte[] Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, byte[] bry) {
if (bry == null) return null;
boolean dirty = Escape_for_atr_val_as_bry(bfr, quote_byte, bry, 0, bry.length);
return dirty ? bfr.Xto_bry_and_clear() : bry;
return dirty ? bfr.To_bry_and_clear() : bry;
}
public static boolean Escape_for_atr_val_as_bry(Bry_bfr bfr, byte quote_byte, byte[] src, int bgn, int end) {
boolean dirty = false;
@@ -100,7 +100,7 @@ public class Html_utl {
if (write_to_bfr)
return null;
else
return dirty ? bfr.Xto_bry_and_clear() : bry;
return dirty ? bfr.To_bry_and_clear() : bry;
}
private static final Btrie_slim_mgr unescape_trie = Btrie_slim_mgr.ci_a7()
@@ -114,7 +114,7 @@ public class Html_utl {
Bry_bfr bfr = Bry_bfr.reset_(255);
byte[] bry = Bry_.new_u8(src);
Unescape(Bool_.Y, bfr, bry, 0, bry.length, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y, Bool_.Y);
return bfr.Xto_str_and_clear();
return bfr.To_str_and_clear();
}
public static byte[] Unescape(boolean write_to_bfr, Bry_bfr bfr, byte[] bry, int bgn, int end, boolean escape_lt, boolean escape_gt, boolean escape_amp, boolean escape_quote, boolean escape_apos) {
if (bry == null) return null;
@@ -156,7 +156,7 @@ public class Html_utl {
if (write_to_bfr)
return null;
else
return dirty ? bfr.Xto_bry_and_clear() : bry;
return dirty ? bfr.To_bry_and_clear() : bry;
}
public static byte[] Del_comments(Bry_bfr bfr, byte[] src) {return Del_comments(bfr, src, 0, src.length);}
public static byte[] Del_comments(Bry_bfr bfr, byte[] src, int pos, int end) {
@@ -175,6 +175,6 @@ public class Html_utl {
bfr.Add_mid(src, pos, comm_bgn); // add everything between pos and comm_bgn
pos = comm_end + Html_tag_.Comm_end_len; // reposition pos after comm_end
}
return bfr.Xto_bry_and_clear();
return bfr.To_bry_and_clear();
}
}

View File

@@ -82,9 +82,9 @@ public class Html_wtr {
bfr.Add_byte(Byte_ascii.Gt);
return this;
}
public byte[] Xto_bry_and_clear() {return bfr.Xto_bry_and_clear();}
public byte[] Xto_bry() {return bfr.Xto_bry();}
public String Xto_str() {return bfr.Xto_str();}
public byte[] To_bry_and_clear() {return bfr.To_bry_and_clear();}
public byte[] Xto_bry() {return bfr.To_bry();}
public String Xto_str() {return bfr.To_str();}
public static void Write_atr_bry(Bry_bfr bfr, byte[] key, byte[] val) {Write_atr_bry(bfr, Bool_.Y, Byte_ascii.Quote, key, val);}
public static void Write_atr_bry(Bry_bfr bfr, boolean write_space, byte atr_quote, byte[] key, byte[] val) {
if (Bry_.Len_eq_0(val)) return; // don't write empty

View File

@@ -27,7 +27,7 @@ public class Url_encoder implements Url_encoder_interface {
Url_encoder_itm_hex hex = new Url_encoder_itm_hex(primary_encode_marker);
for (int i = 0; i < 256; i++) {
encode_ary[i] = hex; // default encode to hex
decode_ary[i] = Url_encoder_itm_same._; // default decode to same; needed for files; EX: A!%21.png -> A!!.png;
decode_ary[i] = Url_encoder_itm_same.Instance; // default decode to same; needed for files; EX: A!%21.png -> A!!.png;
}
decode_ary[primary_encode_marker] = hex;
}
@@ -49,15 +49,15 @@ public class Url_encoder implements Url_encoder_interface {
}
public void Itms_raw_same_rng(int bgn, int end) {
for (int i = bgn; i <= end; i++) {
encode_ary[i] = Url_encoder_itm_same._;
decode_ary[i] = Url_encoder_itm_same._;
encode_ary[i] = Url_encoder_itm_same.Instance;
decode_ary[i] = Url_encoder_itm_same.Instance;
}
}
public Url_encoder Itms_raw_same_many(int... ary) {
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
encode_ary[ary[i]] = Url_encoder_itm_same._;
decode_ary[ary[i]] = Url_encoder_itm_same._;
encode_ary[ary[i]] = Url_encoder_itm_same.Instance;
decode_ary[ary[i]] = Url_encoder_itm_same.Instance;
}
return this;
}
@@ -75,20 +75,20 @@ public class Url_encoder implements Url_encoder_interface {
synchronized (thread_lock) {
tmp_bfr.Add(Io_url.Http_file_bry);
Encode(tmp_bfr, url.RawBry());
return tmp_bfr.Xto_bry_and_clear();
return tmp_bfr.To_bry_and_clear();
}
}
public String Encode_str(String str) {
synchronized (thread_lock) {
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.Xto_str_and_clear();
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.To_str_and_clear();
}
}
public byte[] Encode_bry(String str) {
synchronized (thread_lock) {
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.Xto_bry_and_clear();
byte[] bry = Bry_.new_u8(str); Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.To_bry_and_clear();
}
}
public byte[] Encode(byte[] bry) {Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.Xto_bry_and_clear();}
public byte[] Encode(byte[] bry) {Encode(tmp_bfr, bry, 0, bry.length); return tmp_bfr.To_bry_and_clear();}
public Bry_bfr Encode(Bry_bfr bfr, byte[] bry) {Encode(bfr, bry, 0, bry.length); return bfr;}
public void Encode(Bry_bfr bfr, byte[] bry, int bgn, int end) {
synchronized (thread_lock) {
@@ -106,15 +106,15 @@ public class Url_encoder implements Url_encoder_interface {
}
public String Decode_str(String str) {
synchronized (thread_lock) {
byte[] bry = Bry_.new_u8(str); Decode(bry, 0, bry.length, tmp_bfr, true); return tmp_bfr.Xto_str_and_clear();
byte[] bry = Bry_.new_u8(str); Decode(bry, 0, bry.length, tmp_bfr, true); return tmp_bfr.To_str_and_clear();
}
}
public byte[] Decode(byte[] bry) {return Decode(tmp_bfr, bry, 0, bry.length);}
public byte[] Decode(byte[] bry, int bgn, int end) {return Decode(tmp_bfr, bry, bgn, end);}
public byte[] Decode(Bry_bfr bfr, byte[] bry, int bgn, int end) {Decode(bry, bgn, end, bfr , false); return bfr.Xto_bry_and_clear();}
public byte[] Decode(Bry_bfr bfr, byte[] bry, int bgn, int end) {Decode(bry, bgn, end, bfr , false); return bfr.To_bry_and_clear();}
public byte[] Decode_lax(byte[] bry) {
synchronized (thread_lock) {
Decode(bry, 0, bry.length, tmp_bfr, false); return tmp_bfr.Xto_bry_and_clear();
Decode(bry, 0, bry.length, tmp_bfr, false); return tmp_bfr.To_bry_and_clear();
}
}
public void Decode(byte[] bry, int bgn, int end, Bry_bfr bfr, boolean fail_when_invalid) {
@@ -145,7 +145,7 @@ public class Url_encoder implements Url_encoder_interface {
mediawiki_base(rv, true);
rv.Itms_decode_marker(Byte_ascii.Dot);
rv.Itms_raw_diff(Byte_ascii.Space, Byte_ascii.Underline);
rv.Itms_raw_html_ent(Byte_ascii.Amp, Xop_amp_trie._);
rv.Itms_raw_html_ent(Byte_ascii.Amp, Xop_amp_trie.Instance);
return rv;
}
public static Url_encoder new_http_url_() {
@@ -230,7 +230,7 @@ interface Url_encoder_itm {
class Url_encoder_itm_same implements Url_encoder_itm {
public int Encode(Bry_bfr bfr, byte[] src, int end, int idx, byte b) {bfr.Add_byte(b); return 0;}
public int Decode(Bry_bfr bfr, byte[] src, int end, int idx, byte b, boolean fail_when_invalid) {bfr.Add_byte(b); return 0;}
public static final Url_encoder_itm _ = new Url_encoder_itm_same();
public static final Url_encoder_itm Instance = new Url_encoder_itm_same();
}
class Url_encoder_itm_diff implements Url_encoder_itm {
public Url_encoder_itm_diff(byte orig, byte repl) {this.orig = orig; this.repl = repl;} private byte orig, repl;
@@ -254,14 +254,14 @@ class Url_encoder_itm_hex implements Url_encoder_itm {
return 0;
}
}
int hex_val = Int_.Xto_int_hex(src[idx + 1]);
int hex_val = Int_.To_int_hex(src[idx + 1]);
if (hex_val == -1) { // invalid hex byte; EX: %GC; DATE:2014-04-10
bfr.Add_byte(b);
return 0;
}
int v_0 = hex_val * 16;
if (v_0 != -1) {
int v_1 = Int_.Xto_int_hex(src[idx + 2]);
int v_1 = Int_.To_int_hex(src[idx + 2]);
if (v_1 != -1) {
bfr.Add_byte((byte)(v_0 + v_1));
return 2;

View File

@@ -29,7 +29,7 @@ public class Url_encoder_tst {
byte[] raw = Bry_.new_a7("0%.jpg");
Bry_bfr tmp_bfr = Bry_bfr.new_();
fxt.Encoder_id().Encoder().Decode(raw, 0, raw.length, tmp_bfr, false);
Tfds.Eq("0%.jpg", tmp_bfr.Xto_str_and_clear());
Tfds.Eq("0%.jpg", tmp_bfr.To_str_and_clear());
}
@Test public void Id_nbsp() {fxt.Encoder_id().Test_encode("a&nbsp;b", "a.C2.A0b");} // NOTE: not just .A0 (160) but utf8-encoded .C2.A0
@Test public void Url_syms() {fxt.Encoder_url().Test_encode_decode("!?^~", "%21%3F%5E%7E");}

View File

@@ -1,24 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
class Gfo_html_node {
public Gfo_html_node(byte[] src, int bgn, int end) {this.src = src; this.bgn = bgn; this.end = end;}
public byte[] Src() {return src;} private final byte[] src;
public int Bgn() {return bgn;} private final int bgn;
public int End() {return end;} private final int end;
}

View File

@@ -1,69 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.core.btries.*; import gplx.core.primitives.*;
import gplx.xowa.*;
import gplx.xowa.parsers.xndes.*;
class Gfo_html_parser {
private final Gfo_msg_log msg_log = Gfo_msg_log.Test();
private final Xop_xatr_parser xatr_parser = new Xop_xatr_parser();
public void Parse(Gfo_html_wkr handler, byte[] src, int bgn, int end) {
// int src_len = src.length;
// int prv_pos = 0;
// int css_find_bgn_len = Css_find_bgn.length;
// byte[] protocol_prefix_bry = Bry_.new_u8(protocol_prefix);
// while (true) {
// int url_bgn = Bry_find_.Find_fwd(src, Css_find_bgn, prv_pos); if (url_bgn == Bry_.NotFound) break; // nothing left; stop
// url_bgn += css_find_bgn_len;
// int url_end = Bry_find_.Find_fwd(src, Byte_ascii.Quote, url_bgn, src_len); if (url_end == Bry_.NotFound) {usr_dlg.Warn_many("", "main_page.css_parse", "could not find css; pos='~{0}' text='~{1}'", url_bgn, String_.new_u8__by_len(src, url_bgn, url_bgn + 32)); break;}
// byte[] css_url_bry = Bry_.Mid(src, url_bgn, url_end);
// css_url_bry = Bry_.Replace(css_url_bry, Css_amp_find, Css_amp_repl); // &amp; -> &
// css_url_bry = url_encoder.Decode(css_url_bry); // %2C -> %7C -> |
// css_url_bry = Bry_.Add(protocol_prefix_bry, css_url_bry);
// rv.Add(String_.new_u8(css_url_bry));
// prv_pos = url_end;
// }
// return rv.XtoStrAry();
int src_len = src.length; int pos = 0;
while (pos < src_len) {
byte b = src[pos];
switch (b) {
case Byte_ascii.Angle_bgn:
pos = Parse_node(handler, src, end, pos, pos + 1);
break;
default:
++pos;
break;
}
}
}
private int Parse_node(Gfo_html_wkr handler, byte[] src, int end, int tkn_bgn, int tkn_end) {
int name_bgn = tkn_end;
int name_end = Bry_find_.Find_fwd_until_ws(src, name_bgn, end);
if (name_end == Bry_find_.Not_found) return end; // EOS; EX: "<abcEOS"
if (name_bgn == name_end) return tkn_end; // ws; EX: "< "
Object o = handler.Get_or_null(src, name_bgn, name_end);
if (o == null) return name_end; // unknown name: EX: "<unknown >"
int node_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, name_end, end);
if (node_end == Bry_find_.Not_found) return end; // EOS; EX: "<name lots_of_text_but_no_gt EOS"
Xop_xatr_itm[] xatr_ary = xatr_parser.Parse(msg_log, src, name_end, node_end);
Gfo_html_tkn tkn = (Gfo_html_tkn)o;
tkn.Process(src, Xop_xatr_hash.new_ary(src, xatr_ary));
return node_end;
}
}

View File

@@ -1,22 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
interface Gfo_html_wkr {
Gfo_html_tkn Get_or_null(byte[] src, int bgn, int end);
void Process(Gfo_html_node node);
}

View File

@@ -1,34 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.htmls.parsers; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.xowa.*;
import gplx.xowa.parsers.xndes.*;
interface Gfo_html_tkn {
int Tid();
byte[] Key();
void Process(byte[] src, Xop_xatr_hash hash);
}
class Gfo_html_tkn_ {
public static final int Tid_link = 1;
public static final byte[] Key_link = Bry_.new_a7("link");
}
class Gfo_html_tkn__link implements Gfo_html_tkn {
public int Tid() {return Gfo_html_tkn_.Tid_link;}
public byte[] Key() {return Gfo_html_tkn_.Key_link;}
@gplx.Virtual public void Process(byte[] src, Xop_xatr_hash hash) {}
}