1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-08-03 00:10:03 -04:00
parent 9d63f03b3d
commit 34c34f227c
514 changed files with 4972 additions and 3910 deletions

View File

@@ -18,8 +18,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.core.btries; import gplx.*; import gplx.core.*;
import org.junit.*;
import gplx.xowa.langs.cases.*;
public class Btrie_utf8_mgr_tst {
@Before public void init() {fxt.Clear();} private Btrie_utf8_mgr_fxt fxt = new Btrie_utf8_mgr_fxt();
public class Btrie_u8_mgr_tst {
@Before public void init() {fxt.Clear();} private Btrie_u8_mgr_fxt fxt = new Btrie_u8_mgr_fxt();
@Test public void Ascii() {
fxt.Init_add(Bry_.new_a7("a") , "1");
fxt.Init_add(Bry_.new_a7("abc") , "123");
@@ -47,7 +47,7 @@ public class Btrie_utf8_mgr_tst {
fxt.Init_add(Bry_.new_u8("İ") , "1");
fxt.Test_match("İ" , "1"); // exact=y; İ = Bry_.ints_(196,176)
fxt.Test_match("i" , "1"); // lower=y; i = Bry_.ints_(105)
fxt.Test_match("I" , null); // upper=n; I = Bry_.ints_( 73); see Btrie_utf8_itm and rv.asymmetric_bry
fxt.Test_match("I" , null); // upper=n; I = Bry_.ints_( 73); see Btrie_u8_itm and rv.asymmetric_bry
fxt.Clear();
fxt.Init_add(Bry_.new_u8("i") , "1");
@@ -62,7 +62,7 @@ public class Btrie_utf8_mgr_tst {
fxt.Test_match("İi" , "1"); // mixed
fxt.Test_match("" , "1"); // mixed
}
@Test public void Utf8_asymmetric_upper() { // PURPOSE: "İ" and "I" should co-exist; see Btrie_utf8_itm and called_by_match
@Test public void Utf8_asymmetric_upper() { // PURPOSE: "İ" and "I" should co-exist; see Btrie_u8_itm and called_by_match
fxt.Init_add(Bry_.new_u8("İ") , "1");
fxt.Init_add(Bry_.new_u8("I") , "1");
fxt.Test_match("İ" , "1"); // exact
@@ -77,10 +77,10 @@ public class Btrie_utf8_mgr_tst {
fxt.Test_match("a_b" , null); // diff : len=1
}
}
class Btrie_utf8_mgr_fxt {
private Btrie_utf8_mgr trie;
class Btrie_u8_mgr_fxt {
private Btrie_u8_mgr trie;
public void Clear() {
trie = Btrie_utf8_mgr.new_(Xol_case_mgr_.Utf8());
trie = Btrie_u8_mgr.new_(Xol_case_mgr_.U8());
}
public void Init_add(byte[] key, Object val) {trie.Add_obj(key, val);}
public void Test_match_pos(String src_str, int bgn_pos, String expd) {

View File

@@ -30,10 +30,10 @@ public class Gfo_fld_rdr extends Gfo_fld_base {
public byte[] Read_bry_simple() {Move_next_simple(); return Bry_.Mid(data, fld_bgn, fld_end);} // was Mid_by_len???; 20120915
public int Read_int_base85_lenN(int len) {fld_bgn = pos; fld_end = pos + len - 1 ; pos = pos + len + 1 ; return Base85_utl.XtoIntByAry(data, fld_bgn, fld_end);}
public int Read_int_base85_len5() {fld_bgn = pos; fld_end = pos + 4 ; pos = pos + 6 ; return Base85_utl.XtoIntByAry(data, fld_bgn, fld_end);}
public int Read_int() {Move_next_simple(); return Bry_.Xto_int_or(data, fld_bgn, fld_end, -1);}
public byte Read_int_as_byte() {Move_next_simple(); return (byte)Bry_.Xto_int_or(data, fld_bgn, fld_end, -1);}
public int Read_int() {Move_next_simple(); return Bry_.To_int_or(data, fld_bgn, fld_end, -1);}
public byte Read_int_as_byte() {Move_next_simple(); return (byte)Bry_.To_int_or(data, fld_bgn, fld_end, -1);}
public byte Read_byte() {Move_next_simple(); return data[fld_bgn];}
public double Read_double() {Move_next_simple(); return Bry_.XtoDoubleByPos(data, fld_bgn, fld_end);}
public double Read_double() {Move_next_simple(); return Bry_.To_double(data, fld_bgn, fld_end);}
public DateAdp Read_dte() {// NOTE: fmt = yyyyMMdd HHmmss.fff
int y = 0, M = 0, d = 0, H = 0, m = 0, s = 0, f = 0;
if (pos < data_len && data[pos] == row_dlm) {++pos; ++row_idx; fld_idx = 0;} fld_bgn = pos;

View File

@@ -20,7 +20,7 @@ public class Json_doc {
public void Ctor(byte[] src, Json_nde root) {this.src = src; this.root = root;}
public Bry_bfr Bfr() {return bfr;} Bry_bfr bfr = Bry_bfr.new_();
public Number_parser Utl_num_parser() {return utl_num_parser;} Number_parser utl_num_parser = new Number_parser();
public byte[] Str_utf8_bry() {return str_utf8_bry;} private byte[] str_utf8_bry = new byte[6];
public byte[] Str_u8_bry() {return str_u8_bry;} private byte[] str_u8_bry = new byte[6];
public byte[] Src() {return src;} private byte[] src;
public Json_nde Root() {return root;} Json_nde root;
public byte[] Get_val_as_bry_or(byte[] qry_bry, byte[] or) {tmp_qry_bry[0] = qry_bry; return Get_val_as_bry_or(tmp_qry_bry, or);}

View File

@@ -47,9 +47,9 @@ class Json_itm_decimal extends Json_itm_base {
@Override public byte Tid() {return Json_itm_.Tid_decimal;}
@Override public Object Data() {
if (data == null)
data = DecimalAdp_.parse_(String_.new_a7(this.Data_bry()));
data = Decimal_adp_.parse_(String_.new_a7(this.Data_bry()));
return data;
} DecimalAdp data;
} Decimal_adp data;
@Override public byte[] Data_bry() {
if (data_bry == null) data_bry = Bry_.Mid(doc.Src(), this.Src_bgn(), this.Src_end());
return data_bry;
@@ -82,7 +82,7 @@ class Json_itm_str extends Json_itm_base {
byte[] src = doc.Src(); int bgn = this.Src_bgn(), end = this.Src_end();
if (exact) return Bry_.Mid(src, bgn, end);
Bry_bfr bfr = doc.Bfr();
byte[] utf8_bry = doc.Str_utf8_bry();
byte[] utf8_bry = doc.Str_u8_bry();
for (int i = bgn; i < end; i++) {
byte b = src[i];
switch (b) {

View File

@@ -23,7 +23,7 @@ public class Json_kv_ary_srl_tst {
@Test public void Bool_n() {fxt.Test_parse("{'k0':false}" , fxt.ary_(fxt.kv_bool_("k0", false)));}
@Test public void Num() {fxt.Test_parse("{'k0':123}" , fxt.ary_(fxt.kv_int_("k0", 123)));}
@Test public void Str() {fxt.Test_parse("{'k0':'v0'}" , fxt.ary_(fxt.kv_str_("k0", "v0")));}
@Test public void Num_dec() {fxt.Test_parse("{'k0':1.23}" , fxt.ary_(fxt.kv_dec_("k0", DecimalAdp_.parse_("1.23"))));}
@Test public void Num_dec() {fxt.Test_parse("{'k0':1.23}" , fxt.ary_(fxt.kv_dec_("k0", Decimal_adp_.parse_("1.23"))));}
@Test public void Ary_int() {fxt.Test_parse("{'k0':[1,2,3]}" , fxt.ary_(fxt.kv_obj_("k0", fxt.ary_(fxt.kv_int_("1", 1), fxt.kv_int_("2", 2), fxt.kv_int_("3", 3)))));}
@Test public void Ary_empty() {fxt.Test_parse("{'k0':[]}" , fxt.ary_(fxt.kv_obj_("k0", fxt.ary_())));}
@Test public void Subs_int() {fxt.Test_parse("{'k0':{'k00':1,'k01':2}}" , fxt.ary_(fxt.kv_obj_("k0", fxt.ary_(fxt.kv_int_("k00", 1), fxt.kv_int_("k01", 2)))));}
@@ -46,5 +46,5 @@ class Json_kv_ary_srl_fxt {
public KeyVal kv_str_(String key, String val) {return KeyVal_.new_(key, val);}
public KeyVal kv_int_(String key, int val) {return KeyVal_.new_(key, val);}
public KeyVal kv_bool_(String key, boolean val) {return KeyVal_.new_(key, Bool_.Xto_str_lower(val));}
public KeyVal kv_dec_(String key, DecimalAdp val) {return KeyVal_.new_(key, val.Xto_str());}
public KeyVal kv_dec_(String key, Decimal_adp val) {return KeyVal_.new_(key, val.To_str());}
}

View File

@@ -0,0 +1,136 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.net; import gplx.*; import gplx.core.*;
public class Gfo_protocol_itm {
public Gfo_protocol_itm(byte tid, String text) {
this.tid = tid;
this.text_bry = Bry_.new_u8(text);
this.text_str = text;
int text_len = text_bry.length;
for (int i = 0; i < text_len; i++) {
if (text_bry[i] == Byte_ascii.Colon) {
key_wo_colon_bry = Bry_.Mid(text_bry, 0, i);
key_w_colon_bry_len = i;
key_wo_colon_str = String_.new_u8(key_wo_colon_bry);
key_w_colon_bry = Bry_.Mid(text_bry, 0, i + 1);
text_ends_w_colon = i == text_len - 1;
break;
}
}
}
public byte Tid() {return tid;} private byte tid;
public byte[] Key_wo_colon_bry() {return key_wo_colon_bry;} private byte[] key_wo_colon_bry; // http
public String Key_wo_colon_str() {return key_wo_colon_str;} private String key_wo_colon_str;
public byte[] Key_w_colon_bry() {return key_w_colon_bry;} private byte[] key_w_colon_bry; // http:
public int Key_w_colon_bry_len() {return key_w_colon_bry_len;} private int key_w_colon_bry_len;
public byte[] Text_bry() {return text_bry;} private byte[] text_bry; // http://
public String Text_str() {return text_str;} private String text_str;
public boolean Text_ends_w_colon() {return text_ends_w_colon;} private boolean text_ends_w_colon;
public static final byte // REF.MW:DefaultSettings|$wgUrlProtocols; NOTE: "news:" not included because it breaks alias "wikinews:"
Tid_http = 0
, Tid_https = 1
, Tid_ftp = 2
, Tid_ftps = 3
, Tid_ssh = 4
, Tid_sftp = 5
, Tid_irc = 6
, Tid_ircs = 7
, Tid_xmpp = 8
, Tid_sip = 9
, Tid_sips = 10
, Tid_gopher = 11
, Tid_telnet = 12
, Tid_nntp = 13
, Tid_worldwind = 14
, Tid_mailto = 15
, Tid_tel = 16
, Tid_sms = 17
, Tid_svn = 18
, Tid_git = 19
, Tid_mms = 20
, Tid_bitcoin = 21
, Tid_magnet = 22
, Tid_urn = 23
, Tid_geo = 24
, Tid_null = 25
, Tid_unknown = 26
, Tid_xowa = 27
, Tid_file = 28
, Tid_relative_1 = 29 // [//a.org]
, Tid_relative_2 = 30 // [[//a.org]]
;
public static final Ordered_hash Regy = Ordered_hash_.new_bry_();
public static final Gfo_protocol_itm
Itm_http = new_(Tid_http , "http://")
, Itm_https = new_(Tid_https , "https://")
, Itm_ftp = new_(Tid_ftp , "ftp://")
, Itm_ftps = new_(Tid_ftps , "ftps://")
, Itm_ssh = new_(Tid_ssh , "ssh://")
, Itm_sftp = new_(Tid_sftp , "sftp://")
, Itm_irc = new_(Tid_irc , "irc://")
, Itm_ircs = new_(Tid_ircs , "ircs://")
, Itm_xmpp = new_(Tid_xmpp , "xmpp:")
, Itm_sip = new_(Tid_sip , "sip:")
, Itm_sips = new_(Tid_sips , "sips:")
, Itm_gopher = new_(Tid_gopher , "gopher://")
, Itm_telnet = new_(Tid_telnet , "telnet://")
, Itm_nntp = new_(Tid_nntp , "nntp://")
, Itm_worldwind = new_(Tid_worldwind , "worldwind://")
, Itm_mailto = new_(Tid_mailto , "mailto:")
, Itm_tel = new_(Tid_tel , "tel:")
, Itm_sms = new_(Tid_sms , "sms:")
, Itm_svn = new_(Tid_svn , "svn://")
, Itm_git = new_(Tid_git , "git://")
, Itm_mms = new_(Tid_mms , "mms://")
, Itm_bitcoin = new_(Tid_bitcoin , "bicoin:")
, Itm_magnet = new_(Tid_magnet , "magnet:")
, Itm_urn = new_(Tid_urn , "urn:")
, Itm_geo = new_(Tid_geo , "geo:")
;
public static final String Str_file = "file:", Str_xcmd = "xowa-cmd:";
public static final byte[] Bry_file = Bry_.new_a7(Str_file), Bry_xcmd = Bry_.new_a7(Str_xcmd);
public static final int Len_xcmd = Bry_xcmd.length;
public static final byte[] Bry_relative = Bry_.new_a7("//");
public static Gfo_protocol_itm Get_or(byte tid, Gfo_protocol_itm or) {
Gfo_protocol_itm[] ary = Ary();
return tid >= ary.length ? or : ary[tid];
}
public static Gfo_protocol_itm[] Ary() {
if (protocol_itm_ary == null) {
int len = Regy.Count();
protocol_itm_ary = new Gfo_protocol_itm[len];
for (int i = 0; i < len; i++)
protocol_itm_ary[i] = (Gfo_protocol_itm)Regy.Get_at(i);
}
return protocol_itm_ary;
} private static Gfo_protocol_itm[] protocol_itm_ary;
public static String[] Protocol_str_ary() {
if (protocol_str_ary == null) {
int len = Regy.Count();
protocol_str_ary = new String[len];
for (int i = 0; i < len; i++)
protocol_str_ary[i] = ((Gfo_protocol_itm)Regy.Get_at(i)).Text_str();
}
return protocol_str_ary;
} private static String[] protocol_str_ary;
private static Gfo_protocol_itm new_(byte tid, String text) {
Gfo_protocol_itm rv = new Gfo_protocol_itm(tid, text);
Regy.Add(rv.Key_wo_colon_bry(), rv);
return rv;
}
}

View File

@@ -0,0 +1,51 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.net; import gplx.*; import gplx.core.*;
public class Gfo_qarg_itm {
public Gfo_qarg_itm(byte[] key_bry, byte[] val_bry) {this.key_bry = key_bry; this.val_bry = val_bry;}
public byte[] Key_bry() {return key_bry;} private byte[] key_bry;
public byte[] Val_bry() {return val_bry;} private byte[] val_bry;
public Gfo_qarg_itm Val_bry_(byte[] v) {val_bry = v; return this;}
public static final Gfo_qarg_itm[] Ary_empty = new Gfo_qarg_itm[0];
public static Gfo_qarg_itm new_key_(String key) {return new Gfo_qarg_itm(Bry_.new_u8(key), Bry_.Empty);}
public static Gfo_qarg_itm[] Ary(String... kvs) {
int len = kvs.length;
Gfo_qarg_itm[] rv = new Gfo_qarg_itm[len / 2];
String key = null;
for (int i = 0; i < len; ++i) {
String s = kvs[i];
if (i % 2 == 0)
key = s;
else
rv[i / 2] = new Gfo_qarg_itm(Bry_.new_u8(key), Bry_.new_u8(s));
}
return rv;
}
public static String To_str(Gfo_qarg_itm[] ary) {
int len = ary.length;
Bry_bfr bfr = Bry_bfr.new_();
for (int i = 0; i < len; ++i) {
Gfo_qarg_itm itm = ary[i];
bfr.Add(itm.Key_bry()).Add_byte_eq();
if (itm.Val_bry() != null)
bfr.Add(itm.Val_bry());
bfr.Add_byte_nl();
}
return bfr.Xto_str_and_clear();
}
}

View File

@@ -0,0 +1,106 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.net; import gplx.*; import gplx.core.*;
public class Gfo_qarg_mgr {
private final List_adp list = List_adp_.new_();
private final Hash_adp hash = Hash_adp_bry.cs();
public int Len() {return list.Count();}
public boolean Match(byte[] key, byte[] val) {
Gfo_qarg_itm arg = (Gfo_qarg_itm)hash.Get_by(key);
return arg == null ? false : Bry_.Eq(val, arg.Val_bry());
}
public Gfo_qarg_itm Get_at(int i) {return (Gfo_qarg_itm)list.Get_at(i);}
public Gfo_qarg_itm Get_arg(byte[] key) {return (Gfo_qarg_itm)hash.Get_by(key);}
public int Get_val_int_or(byte[] key, int or) {
byte[] val_bry = Get_val_bry_or(key, null); if (val_bry == null) return or;
return Bry_.To_int_or(val_bry, or);
}
public byte[] Get_val_bry_or(byte[] key, byte[] or) {
Gfo_qarg_itm arg = (Gfo_qarg_itm)hash.Get_by(key);
return arg == null ? or : arg.Val_bry();
}
public String Get_val_str_or(byte[] key, String or) {
Gfo_qarg_itm arg = (Gfo_qarg_itm)hash.Get_by(key);
return arg == null ? or : String_.new_u8(arg.Val_bry());
}
public void Set_val_by_int(byte[] key, int val) {Set_val_by_bry(key, Bry_.new_a7(Int_.Xto_str(val)));}
public void Set_val_by_bry(byte[] key, byte[] val) {
Gfo_qarg_itm arg = (Gfo_qarg_itm)hash.Get_by(key);
if (arg == null) {
arg = new Gfo_qarg_itm(key, Bry_.Empty);
list.Add(arg);
hash.Add(key, arg);
}
arg.Val_bry_(val);
}
public Gfo_qarg_mgr Load(Gfo_qarg_itm[] ary) {
hash.Clear();
list.Clear();
int len = ary.length;
for (int i = 0; i < len; ++i) {
Gfo_qarg_itm itm = ary[i];
list.Add(itm);
hash.Add_if_dupe_use_nth(itm.Key_bry(), itm);
}
return this;
}
public Gfo_qarg_itm[] To_ary() {return (Gfo_qarg_itm[])list.To_ary(Gfo_qarg_itm.class);}
public byte[] Concat(Bry_bfr bfr, byte[]... ary) {
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
byte[] key = ary[i];
Gfo_qarg_itm itm = Get_arg(key); if (itm == null) continue;
bfr.Add_byte(Byte_ascii.Amp).Add(itm.Key_bry()).Add_byte(Byte_ascii.Eq).Add(itm.Val_bry());
}
return bfr.Xto_bry_and_clear();
}
public byte[] To_bry() {
int len = list.Count(); if (len == 0) return Bry_.Empty;
Bry_bfr bfr = Bry_bfr.new_();
To_bry(bfr, gplx.xowa.Xoa_app_.Utl__encoder_mgr().Href(), false);
return bfr.Xto_bry_and_clear();
}
public void To_bry(Bry_bfr bfr, Url_encoder href_encoder, boolean encode) {
int len = list.Count(); if (len == 0) return;
for (int i = 0; i < len; ++i) {
Gfo_qarg_itm itm = (Gfo_qarg_itm)list.Get_at(i);
bfr.Add_byte(i == 0 ? Byte_ascii.Question : Byte_ascii.Amp);
Write_or_encode(bfr, href_encoder, encode, itm.Key_bry());
bfr.Add_byte(Byte_ascii.Eq);
Write_or_encode(bfr, href_encoder, encode, itm.Val_bry());
}
}
public static void Concat_bfr(Bry_bfr bfr, Url_encoder href_encoder, Gfo_qarg_itm[] ary) {Concat_bfr(bfr, href_encoder, ary, true);}
private static void Concat_bfr(Bry_bfr bfr, Url_encoder href_encoder, Gfo_qarg_itm[] ary, boolean encode) {
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
Gfo_qarg_itm itm = ary[i];
bfr.Add_byte(i == 0 ? Byte_ascii.Question : Byte_ascii.Amp);
Write_or_encode(bfr, href_encoder, encode, itm.Key_bry());
bfr.Add_byte(Byte_ascii.Eq);
Write_or_encode(bfr, href_encoder, encode, itm.Val_bry());
}
}
private static void Write_or_encode(Bry_bfr bfr, Url_encoder href_encoder, boolean encode, byte[] bry) {
if (bry == null) return; // NOTE: need null check b/c itm.Val_bry can be null
if (encode)
href_encoder.Encode(bfr, bry);
else
bfr.Add(bry);
}
}

View File

@@ -0,0 +1,38 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.net; import gplx.*; import gplx.core.*;
public class Gfo_url {
public byte[] Raw() {return raw;} private byte[] raw;
public byte Protocol_tid() {return protocol_tid;} private byte protocol_tid;
public byte[] Protocol_bry() {return protocol_bry;} private byte[] protocol_bry;
public byte[] Anch() {return anch;} private byte[] anch;
public Gfo_qarg_itm[] Qargs() {return qargs;} private Gfo_qarg_itm[] qargs;
public byte[][] Segs() {return segs;} private byte[][] segs; private int segs__len;
public byte[] Segs__get_at(int i) {return i < segs__len ? segs[i] : null;}
public byte[] Segs__get_at_1st() {return segs__len > 0 ? segs[0] : null;}
public byte[] Segs__get_at_nth() {return segs__len > 1 ? segs[segs__len - 1] : null;}
public Gfo_url Ctor(byte[] raw, byte protocol_tid, byte[] protocol_bry, byte[][] segs, Gfo_qarg_itm[] qargs, byte[] anch) {
this.raw = raw;
this.protocol_tid = protocol_tid; this.protocol_bry = protocol_bry;
this.segs = segs; this.segs__len = segs.length;
this.qargs = qargs;
this.anch = anch;
return this;
}
public static final Gfo_url Empty = new Gfo_url().Ctor(Bry_.Empty, Gfo_protocol_itm.Tid_unknown, Bry_.Empty, Bry_.Ary_empty, null, null);
}

View File

@@ -0,0 +1,261 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.net; import gplx.*; import gplx.core.*;
import gplx.core.primitives.*; import gplx.core.btries.*;
public class Gfo_url_parser {
private final Btrie_slim_mgr protocols = Btrie_slim_mgr.ci_a7(); // ASCII:url_protocol; EX:"http:", "ftp:", etc
private final Bry_ary segs_ary = new Bry_ary(4), qargs = new Bry_ary(4);
private final Url_encoder encoder = Url_encoder.new_html_href_mw_().Itms_raw_same_many(Byte_ascii.Underline);
private final Bry_bfr tmp_bfr = Bry_bfr.reset_(500);
public byte[] Relative_url_protocol_bry() {return Gfo_protocol_itm.Itm_https.Key_w_colon_bry();} // NOTE: https b/c any WMF wiki will now default to WMF; DATE:2015-07-26
public Gfo_url_parser() {
Init_protocols(Gfo_protocol_itm.Ary());
Init_protocol_itm(Gfo_protocol_itm.Bry_relative, Gfo_protocol_itm.Tid_relative_1);
Init_protocol_itm(Gfo_protocol_itm.Bry_file, Gfo_protocol_itm.Tid_file);
Init_protocol_itm(gplx.xowa.parsers.lnkes.Xop_lnke_wkr.Bry_xowa_protocol, Gfo_protocol_itm.Tid_xowa);
}
private void Init_protocols(Gfo_protocol_itm... itms) {
int len = itms.length;
for (int i = 0; i < len; i++) {
Gfo_protocol_itm itm = itms[i];
Init_protocol_itm(itm.Key_w_colon_bry(), itm.Tid());
}
}
public void Init_protocol_itm(byte[] key, byte protocol_tid) {
protocols.Add_bry_byte(key, protocol_tid);
}
public void Parse_site_fast(Gfo_url_site_data site_data, byte[] src, int src_bgn, int src_end) {
int pos = src_bgn; boolean rel = false;
if (pos + 1 < src_end && src[pos] == Byte_ascii.Slash && src[pos + 1] == Byte_ascii.Slash) { // starts with "//"
pos += 2;
rel = true;
}
if (!rel) { // search for ":"; NOTE: only search if not rel; i.e.: "//"
int colon_pos = Bry_finder.Find_fwd(src, Byte_ascii.Colon, pos, src_end); // no colon found; EX: "//a.org/b"; "a.org/b"
if (colon_pos != Bry_.NotFound) // colon found; EX: "http://" or "https://"
pos = colon_pos + Int_.Const_dlm_len;
if (pos < src_end && src[pos] == Byte_ascii.Slash) { // skip slash after colon
pos += 1;
if (pos < src_end && src[pos] == Byte_ascii.Slash) // skip 2nd slash after colon
pos += 1;
}
}
int slash_pos = Bry_finder.Find_fwd(src, Byte_ascii.Slash, pos, src_end);
if (slash_pos == Bry_.NotFound) // no terminating slash; EX: http://a.org
slash_pos = src_end;
slash_pos = Bry_.Trim_end_pos(src, slash_pos);
site_data.Atrs_set(rel, pos, slash_pos);
}
private static final int Area__path = 1, Area__qarg_key_1st = 2, Area__qarg_key_nth = 3, Area__qarg_val = 4, Area__anch = 5;
private byte[] src; int src_bgn, src_end;
private int area;
private boolean encoded;
private byte protocol_tid; private byte[] protocol_bry, anch;
private int path_bgn, qarg_key_bgn, qarg_val_bgn, anch_bgn, anch_nth_bgn;
public Gfo_url Parse(byte[] src) {return Parse(new Gfo_url(), src, 0, src.length);}
public Gfo_url Parse(Gfo_url rv, byte[] src, int src_bgn, int src_end) {
this.src = src; this.src_bgn = src_bgn; this.src_end = src_end;
encoded = false;
protocol_tid = Gfo_protocol_itm.Tid_null;
protocol_bry = anch = null;
path_bgn = qarg_key_bgn = qarg_val_bgn = anch_bgn = anch_nth_bgn = -1;
segs_ary.Clear(); qargs.Clear();
int pos = src_bgn;
Object protocol_obj = protocols.Match_bgn(src, src_bgn, src_end);
pos = protocols.Match_pos();
pos = Bry_finder.Find_fwd_while(src, pos, src_end, Byte_ascii.Slash);
if (protocol_obj == null) {
this.protocol_tid = Gfo_protocol_itm.Tid_unknown;
}
else {
this.protocol_tid = ((Byte_obj_val)protocol_obj).Val();
this.protocol_bry = Make_bry(src_bgn, pos);
}
area = Area__path;
path_bgn = pos;
while (true) {
if (pos == src_end) break;
byte b = src[pos];
switch (b) {
case Byte_ascii.Slash: pos = Parse_slash(pos, b); break;
case Byte_ascii.Question: pos = Parse_qarg_key_1st(pos, b); break;
case Byte_ascii.Amp: pos = Parse_qarg_key_nth(pos, b); break;
case Byte_ascii.Eq: pos = Parse_qarg_val(pos, b); break;
case Byte_ascii.Hash: pos = Parse_anch(pos, b); break;
case Byte_ascii.Percent: encoded = true; ++pos; break;
default:
++pos;
break;
}
}
End_area(pos, Byte_ascii.Null);
rv.Ctor(src, protocol_tid, protocol_bry, segs_ary.To_ary(0), Make_qargs(), anch);
return rv;
}
private int Parse_slash(int pos, byte b) {
switch (area) {
case Area__path: return End_area(pos, b);
default: return pos + 1;
}
}
private int Parse_anch(int pos, byte b) {
switch (area) {
case Area__path:
End_area(pos, b);
area = Area__anch;
anch_bgn = pos + 1;
break;
case Area__anch: // handle double; A#B#C -> "A#B", "C"
Append_to_last_path(Byte_ascii.Hash, Make_bry(anch_bgn, pos));
anch_bgn = pos + 1;
break;
case Area__qarg_val:
case Area__qarg_key_1st:
case Area__qarg_key_nth:
if (anch_nth_bgn == -1)
anch_nth_bgn = Bry_finder.Find_bwd(src, Byte_ascii.Hash, src_end);
if (pos == anch_nth_bgn) {
End_area(pos, b);
area = Area__anch;
anch_bgn = pos + 1;
}
break;
default:
break;
}
return pos + 1;
}
private int Parse_qarg_key_1st(int pos, byte b) {
switch (area) {
case Area__path: // only valid way to start qarg; EX: A?B=C
End_area(pos, b);
area = Area__qarg_key_1st;
qarg_key_bgn = pos + 1;
break;
case Area__qarg_key_1st: // handle dupe; EX: A?B?C
case Area__qarg_key_nth: // handle dupe; EX: A?B=C&D?
case Area__qarg_val: // handle dupe; EX: A?B=?
End_area(pos, b);
Append_to_last_path__qargs();
area = Area__qarg_key_1st;
qarg_key_bgn = pos + 1;
break;
}
return pos + 1;
}
private int Parse_qarg_key_nth(int pos, byte b) {
switch (area) {
case Area__path: // ignore if qarg not started; EX: A&B
break;
case Area__qarg_key_1st: // handle invalid; A?B&C
case Area__qarg_key_nth: // handle invalid; A?B=C&D&E=F
End_area(pos, b);
qargs.Add(null);
area = Area__qarg_key_nth;
qarg_key_bgn = pos + 1;
break;
case Area__qarg_val:
End_area(pos, b);
area = Area__qarg_key_nth;
qarg_key_bgn = pos + 1;
break;
}
return pos + 1;
}
private int Parse_qarg_val(int pos, byte b) {
switch (area) {
case Area__qarg_key_1st:
case Area__qarg_key_nth:
End_area(pos, b); break;
default: break;
}
return pos + 1;
}
private int End_area(int pos, byte b) {
switch (area) {
case Area__path:
segs_ary.Add(Make_bry(path_bgn, pos));
path_bgn = pos + 1;
break;
case Area__qarg_key_1st:
case Area__qarg_key_nth:
if (b == Byte_ascii.Null && qargs.Len() == 0) // handle A?b but not A?b=c&d
Append_to_last_path(Byte_ascii.Question, Make_bry(qarg_key_bgn, src_end));
else {
qargs.Add(Make_bry(qarg_key_bgn, pos));
qarg_val_bgn = pos + 1;
area = Area__qarg_val;
}
break;
case Area__qarg_val:
qargs.Add(Make_bry(qarg_val_bgn, pos));
qarg_key_bgn = pos + 1;
qarg_val_bgn = -1;
area = Area__qarg_key_nth;
break;
case Area__anch:
if (b == Byte_ascii.Null && anch_bgn == src_end) // handle A# but not "A#B"
Append_to_last_path(Byte_ascii.Hash, Make_bry(anch_bgn, src_end));
else
anch = Make_bry(anch_bgn, pos);
break;
default:
break;
}
encoded = false;
return pos + 1;
}
private byte[] Make_bry(int bgn, int end) {
return encoded ? encoder.Decode(tmp_bfr, src, bgn, end) : Bry_.Mid(src, bgn, end);
}
private Gfo_qarg_itm[] Make_qargs() {
int qargs_len = qargs.Len(); if (qargs_len == 0) return Gfo_qarg_itm.Ary_empty;
if (qargs_len % 2 == 1) ++qargs_len; // handle odd qargs; EX: ?A=B&C&D=E
Gfo_qarg_itm[] rv = new Gfo_qarg_itm[qargs_len / 2];
for (int i = 0; i < qargs_len; i += 2) {
byte[] key = qargs.Get_at(i);
int val_idx = i + 1;
byte[] val = val_idx < qargs_len ? qargs.Get_at(val_idx) : null;
rv[i / 2] = new Gfo_qarg_itm(key, val);
}
return rv;
}
private void Append_to_last_path(byte b, byte[] append) {
byte[] last_path = segs_ary.Get_at_last(); if (last_path == null) return;
last_path = Bry_.Add_w_dlm(b, last_path, append);
segs_ary.Set_at_last(last_path);
}
private void Append_to_last_path__qargs() {
byte[] last_path = segs_ary.Get_at_last(); if (last_path == null) return;
tmp_bfr.Add(last_path);
int len = qargs.Len();
if (len % 2 == 1) qargs.Add(null); // handle odd qargs
for (int i = 0; i < len; i += 2) {
tmp_bfr.Add_byte(i == 0 ? Byte_ascii.Question : Byte_ascii.Amp);
tmp_bfr.Add(qargs.Get_at(i));
byte[] qarg_val = qargs.Get_at(i + 1);
if (qarg_val != null) // handle "null" added above
tmp_bfr.Add_byte_eq().Add(qarg_val);
}
qargs.Clear();
segs_ary.Set_at_last(tmp_bfr.Xto_bry_and_clear());
}
public static final byte[] Bry_double_slash = new byte[] {Byte_ascii.Slash, Byte_ascii.Slash};
}

View File

@@ -0,0 +1,39 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.net; import gplx.*; import gplx.core.*;
class Gfo_url_parser_fxt {
private final Gfo_url_parser parser = new Gfo_url_parser();
private Gfo_url actl;
public Gfo_url_parser_fxt Chk_protocol_tid(byte v) {Tfds.Eq_byte(v, actl.Protocol_tid(), "protocol_tid"); return this;}
public Gfo_url_parser_fxt Chk_protocol_bry(String v) {Tfds.Eq_str(v, actl.Protocol_bry(), "protocol_bry"); return this;}
public Gfo_url_parser_fxt Chk_site(String v) {Tfds.Eq_str(v, actl.Segs__get_at_1st(), "site"); return this;}
public Gfo_url_parser_fxt Chk_page(String v) {Tfds.Eq_str(v, actl.Segs__get_at_nth(), "page"); return this;}
public Gfo_url_parser_fxt Chk_anch(String v) {Tfds.Eq_str(v, actl.Anch(), "anch"); return this;}
public Gfo_url_parser_fxt Chk_segs(String... ary) {Tfds.Eq_int(ary.length, actl.Segs().length, "segs_len"); Tfds.Eq_str_lines(String_.Concat_lines_nl(ary), String_.Concat_lines_nl(String_.Ary(actl.Segs())), "segs"); return this;}
public Gfo_url_parser_fxt Chk_qargs(String... ary) {Tfds.Eq_str_lines(String_.To_str__as_kv_ary(ary), Gfo_qarg_itm.To_str(actl.Qargs()), "qargs"); return this;}
public Gfo_url_parser_fxt Run_parse(String v) {
this.actl = parser.Parse(Bry_.new_u8(v));
return this;
}
public void Test_Parse_site_fast(String raw, String expd) {
byte[] raw_bry = Bry_.new_u8(raw);
parser.Parse_site_fast(site_data, raw_bry, 0, raw_bry.length);
String actl = String_.new_u8(raw_bry, site_data.Site_bgn(), site_data.Site_end());
Tfds.Eq(expd, actl);
} private final Gfo_url_site_data site_data = new Gfo_url_site_data();
}

View File

@@ -0,0 +1,124 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.net; import gplx.*; import gplx.core.*;
import org.junit.*;
public class Gfo_url_parser_tst {
private final Gfo_url_parser_fxt tstr = new Gfo_url_parser_fxt();
@Test public void Protocol__relative() {
tstr.Run_parse("//en.wikipedia.org").Chk_protocol_tid(Gfo_protocol_itm.Tid_relative_1).Chk_protocol_bry("//").Chk_site("en.wikipedia.org");
}
@Test public void Protocol__none() {
tstr.Run_parse("en.wikipedia.org/wiki/A").Chk_protocol_tid(Gfo_protocol_itm.Tid_unknown).Chk_segs("en.wikipedia.org", "wiki", "A");
}
@Test public void Site__parts__3() {
tstr.Run_parse("https://en.wikipedia.org").Chk_protocol_tid(Gfo_protocol_itm.Tid_https).Chk_protocol_bry("https://").Chk_segs("en.wikipedia.org");
}
@Test public void Site__parts__2() {
tstr.Run_parse("https://wikipedia.org").Chk_protocol_tid(Gfo_protocol_itm.Tid_https).Chk_segs("wikipedia.org");
}
@Test public void Site__parts__1() {
tstr.Run_parse("https://wikipedia").Chk_protocol_tid(Gfo_protocol_itm.Tid_https).Chk_segs("wikipedia");
}
@Test public void Site__slash__none() {
tstr.Run_parse("https:site").Chk_protocol_tid(Gfo_protocol_itm.Tid_https).Chk_site("site");
}
@Test public void Paths__1() {
tstr.Run_parse("https://site/A").Chk_segs("site", "A");
}
@Test public void Paths__2() {
tstr.Run_parse("https://site/wiki/A").Chk_segs("site", "wiki", "A");
}
@Test public void Paths__n() {
tstr.Run_parse("https://site/wiki/A/B/C/D").Chk_segs("site", "wiki", "A", "B", "C", "D");
}
@Test public void Qargs__1() {
tstr.Run_parse("https://site/A?B=C").Chk_page("A").Chk_qargs("B", "C");
}
@Test public void Qargs__2() {
tstr.Run_parse("https://site/A?B=C&D=E").Chk_page("A").Chk_qargs("B", "C", "D", "E");
}
@Test public void Qargs__3() {
tstr.Run_parse("https://site/A?B=C&D=E&F=G").Chk_page("A").Chk_qargs("B", "C", "D", "E", "F", "G");
}
@Test public void Qargs__ques__dupe__ques() {
tstr.Run_parse("https://site/A?B?Y=Z").Chk_page("A?B").Chk_qargs("Y", "Z");
}
@Test public void Qargs__ques__dupe__amp() {
tstr.Run_parse("https://site/A?B=C&D?Y=Z").Chk_page("A?B=C&D").Chk_qargs("Y", "Z");
}
@Test public void Qargs__ques__dupe__eq() {
tstr.Run_parse("https://site/A?B=C?Y=Z").Chk_page("A?B=C").Chk_qargs("Y", "Z");
}
@Test public void Qargs__amp__dupe__ques() {
tstr.Run_parse("https://site/A?B&Y=Z").Chk_page("A").Chk_qargs("B", null, "Y", "Z");
}
@Test public void Qargs__amp__dupe__amp() {
tstr.Run_parse("https://site/A?B=C&D&Y=Z").Chk_page("A").Chk_qargs("B", "C", "D", null, "Y", "Z");
}
@Test public void Qargs__missing_val__0() {
tstr.Run_parse("https://site/A?").Chk_page("A?").Chk_qargs();
}
@Test public void Qargs__missing_val__2() {
tstr.Run_parse("https://site/A?B=C&D&F=G").Chk_page("A").Chk_qargs("B", "C", "D", null, "F", "G");
}
@Test public void Qargs__missing_val__n() {
tstr.Run_parse("https://site/A?B=C&D=E&F").Chk_page("A").Chk_qargs("B", "C", "D", "E", "F", null);
}
@Test public void Qargs__site_less__missing__0() {
tstr.Run_parse("A?B").Chk_segs("A?B").Chk_qargs();
}
@Test public void Qargs__site_less() {
tstr.Run_parse("A?B=C&D=E").Chk_site("A").Chk_qargs("B", "C", "D", "E");
}
@Test public void Anch__basic() {
tstr.Run_parse("https://site/A#B").Chk_page("A").Chk_anch("B");
}
@Test public void Anch__repeat__2() {
tstr.Run_parse("https://site/A#B#C").Chk_page("A#B").Chk_anch("C");
}
@Test public void Anch__repeat__3() {
tstr.Run_parse("https://site/A#B#C#D").Chk_page("A#B#C").Chk_anch("D");
}
@Test public void Anch__missing() {
tstr.Run_parse("https://site/A#").Chk_page("A#").Chk_anch(null);
}
@Test public void Anch__missing__eos() {
tstr.Run_parse("https://site/A#B#").Chk_page("A#B#").Chk_anch(null);
}
@Test public void Anch__qargs__basic() {
tstr.Run_parse("https://site/A?B=C&D=E#F").Chk_page("A").Chk_qargs("B", "C", "D", "E").Chk_anch("F");
}
@Test public void Anch__qargs__repeat() {
tstr.Run_parse("https://site/A?B=C#&D=E#F").Chk_page("A").Chk_qargs("B", "C#", "D", "E").Chk_anch("F");
}
@Test public void Anch__site_less() {
tstr.Run_parse("A#B").Chk_site("A").Chk_anch("B");
}
@Test public void Encode__page() {
tstr.Run_parse("http://site/A%27s").Chk_site("site").Chk_page("A's");
}
@Test public void Protocol_less__qargs() {
tstr.Run_parse("Special:Search/Earth?fulltext=yes").Chk_segs("Special:Search", "Earth").Chk_page("Earth").Chk_qargs("fulltext", "yes");
}
@Test public void Parse_site_fast() {
tstr.Test_Parse_site_fast("http://a.org/B" , "a.org");
tstr.Test_Parse_site_fast("http://a.org" , "a.org");
tstr.Test_Parse_site_fast("//a.org/B" , "a.org");
tstr.Test_Parse_site_fast("//a.org/B:C" , "a.org");
}
}

View File

@@ -0,0 +1,24 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.core.net; import gplx.*; import gplx.core.*;
public class Gfo_url_site_data {
public boolean Rel() {return rel;} private boolean rel;
public int Site_bgn() {return site_bgn;} private int site_bgn;
public int Site_end() {return site_end;} private int site_end;
public void Atrs_set(boolean rel, int bgn, int end) {this.rel = rel; this.site_bgn = bgn; this.site_end = end;}
}

View File

@@ -78,7 +78,7 @@ public class Http_request_parser {
case Tid_x_requested_with: this.x_requested_with = Bry_.Mid(line, val_bgn, line_len); break;
case Tid_cookie: this.cookie = Bry_.Mid(line, val_bgn, line_len); break;
case Tid_referer: this.referer = Bry_.Mid(line, val_bgn, line_len); break;
case Tid_content_length: this.content_length = Bry_.Xto_int_or(line, val_bgn, line_len, -1); break;
case Tid_content_length: this.content_length = Bry_.To_int_or(line, val_bgn, line_len, -1); break;
case Tid_content_type: Parse_content_type(val_bgn, line, line_len); break;
case Tid_connection: this.connection = Bry_.Mid(line, val_bgn, line_len); break;
case Tid_pragma: this.pragma = Bry_.Mid(line, val_bgn, line_len); break;
@@ -146,7 +146,7 @@ public class Http_request_parser {
private String To_str() {return Make_request_itm().To_str(tmp_bfr, Bool_.N);}
private static final int Tid_get = 1, Tid_post = 2, Tid_host = 3, Tid_user_agent = 4, Tid_accept = 5, Tid_accept_language = 6, Tid_accept_encoding = 7, Tid_dnt = 8
, Tid_x_requested_with = 9, Tid_cookie = 10, Tid_referer = 11, Tid_content_length = 12, Tid_content_type = 13, Tid_connection = 14, Tid_pragma = 15, Tid_cache_control = 16, Tid_origin = 17;
private static final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_ascii_()
private static final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_a7()
.Add_str_int("GET" , Tid_get)
.Add_str_int("POST" , Tid_post)
.Add_str_int("Host:" , Tid_host)