mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
uca category support and other changes
This commit is contained in:
22
400_xowa/src/gplx/core/intls/ucas/Uca_collator.java
Normal file
22
400_xowa/src/gplx/core/intls/ucas/Uca_collator.java
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.intls.ucas; import gplx.*; import gplx.core.*; import gplx.core.intls.*;
|
||||
public interface Uca_collator {
|
||||
void Init(String locale, boolean numeric_ordering);
|
||||
byte[] Get_sortkey(String s);
|
||||
}
|
||||
25
400_xowa/src/gplx/core/intls/ucas/Uca_collator_.java
Normal file
25
400_xowa/src/gplx/core/intls/ucas/Uca_collator_.java
Normal file
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.intls.ucas; import gplx.*; import gplx.core.*; import gplx.core.intls.*;
|
||||
public class Uca_collator_ {
|
||||
public static Uca_collator New(String locale, boolean numeric_ordering) {
|
||||
Uca_collator rv = new Uca_collator__icu__4_8();
|
||||
rv.Init(locale, numeric_ordering);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.intls.ucas; import gplx.*; import gplx.core.*; import gplx.core.intls.*;
|
||||
import java.util.Locale;
|
||||
import com.ibm.icu.text.CollationKey;
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
class Uca_collator__icu__4_8 implements Uca_collator {
|
||||
private Collator collator;
|
||||
public void Init(String locale, boolean numeric_ordering) {
|
||||
try {
|
||||
this.collator = Collator.getInstance(Locale.forLanguageTag(locale));
|
||||
if (numeric_ordering) {
|
||||
RuleBasedCollator rbc = (RuleBasedCollator)collator;
|
||||
rbc.setNumericCollation(true);
|
||||
}
|
||||
} catch (Exception e) {throw Err_.new_wo_type("collator init failed", "err", Err_.Message_lang(e));}
|
||||
}
|
||||
public byte[] Get_sortkey(String s) {
|
||||
CollationKey key = collator.getCollationKey(s);
|
||||
byte[] src = key.toByteArray();
|
||||
int src_len = src.length;
|
||||
byte[] rv = src;
|
||||
|
||||
// remove last byte if it is 0 (which it often is)
|
||||
if (src_len > 0 && src[src_len - 1] == 0) {
|
||||
int rv_len = src_len - 1;
|
||||
rv = new byte[rv_len];
|
||||
for (int i = 0; i < rv_len; ++i)
|
||||
rv[i] = src[i];
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
51
400_xowa/src/gplx/core/intls/ucas/Uca_ltr_extractor.java
Normal file
51
400_xowa/src/gplx/core/intls/ucas/Uca_ltr_extractor.java
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.intls.ucas; import gplx.*; import gplx.core.*; import gplx.core.intls.*;
|
||||
public class Uca_ltr_extractor {
|
||||
private final boolean numeric;
|
||||
private final byte[] numeric_heading;
|
||||
private final Hash_adp_bry numeric_hash;
|
||||
public Uca_ltr_extractor(boolean numeric) {
|
||||
this.numeric = numeric;
|
||||
if (numeric) {
|
||||
numeric_heading = Bry_.new_a7("0-9");
|
||||
|
||||
// create hash of "0", "1", "2", ...
|
||||
numeric_hash = Hash_adp_bry.cs();
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
byte[] digit_bry = Bry_.new_by_int(Byte_ascii.Num_0 + i);
|
||||
numeric_hash.Add(digit_bry, digit_bry);
|
||||
}
|
||||
}
|
||||
else {
|
||||
numeric_heading = null;
|
||||
numeric_hash = null;
|
||||
}
|
||||
}
|
||||
public byte[] Get_1st_ltr(byte[] bry) {
|
||||
// NOTE: this is simplified and only does numeric logic; MW code loads up all ICU chars via first-letters-root.ser, adds custom chars, sorts them, and then does a binary search to find it; REF:IcuCollation.php!getFirstLetter
|
||||
int bry_len = bry.length;
|
||||
if (bry_len == 0) return Bry_.Empty;
|
||||
byte[] rv = gplx.core.intls.Utf8_.Get_char_at_pos_as_bry(bry, 0);
|
||||
if (numeric) {
|
||||
if (numeric_hash.Has(rv))
|
||||
rv = numeric_heading;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
@@ -22,7 +22,7 @@ public class Io_buffer_rdr_tst {
|
||||
Io_mgr.Instance.InitEngine_mem();
|
||||
fil = Io_url_.mem_fil_("mem/byteStreamRdr.txt");
|
||||
ini_Write("0123456789");
|
||||
rdr = Io_buffer_rdr.new_(Io_stream_rdr_.file_(fil), 4);
|
||||
rdr = Io_buffer_rdr.new_(Io_stream_rdr_.New__raw(fil), 4);
|
||||
} Io_buffer_rdr rdr; Io_url fil;
|
||||
@After public void teardown() {rdr.Rls();}
|
||||
@Test public void Bfr_load_all() {
|
||||
|
||||
@@ -23,7 +23,7 @@ public class Io_stream_rdr_process implements Io_stream_rdr {
|
||||
private InputStream stream_read;
|
||||
private String[] process_args;
|
||||
Io_stream_rdr_process(Io_url process_exe, Io_url stream_url, String[] process_args) {this.process_exe = process_exe; this.url = stream_url; this.process_args = process_args;}
|
||||
public byte Tid() {return Io_stream_.Tid_bzip2;} // for now, classify as bzip2; not sure if separate tid is necessary
|
||||
public byte Tid() {return Io_stream_tid_.Tid__bzip2;} // for now, classify as bzip2; not sure if separate tid is necessary
|
||||
public boolean Exists() {return this.Len() > 0;}
|
||||
public Io_url Url() {return url;} public Io_stream_rdr Url_(Io_url v) {url = v; return this;} private Io_url url;
|
||||
public long Len() {return len;} public Io_stream_rdr Len_(long v) {len = v; return this;} private long len;
|
||||
|
||||
@@ -18,16 +18,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.core.ios; import gplx.*; import gplx.core.*;
|
||||
import gplx.core.ios.streams.*;
|
||||
public class Io_stream_zip_mgr {
|
||||
private Io_stream_wtr wtr_gzip, wtr_zip, wtr_bzip2;
|
||||
private Io_stream_wtr wtr__gzip, wtr__zip, wtr__bzip2, wtr__xz;
|
||||
public byte[] Zip(byte type, byte[] val) {
|
||||
if (type == Io_stream_.Tid_raw) return val;
|
||||
if (type == Io_stream_tid_.Tid__raw) return val;
|
||||
Io_stream_wtr wtr = Wtr(type);
|
||||
wtr.Write(val, 0, val.length);
|
||||
wtr.Flush();
|
||||
return wtr.To_ary_and_clear();
|
||||
}
|
||||
public byte[] Unzip(byte type, byte[] val) {
|
||||
if (type == Io_stream_.Tid_raw) return val;
|
||||
if (type == Io_stream_tid_.Tid__raw) return val;
|
||||
Io_stream_rdr rdr = Rdr(type);
|
||||
rdr.Open_mem(val);
|
||||
return Io_stream_rdr_.Load_all_as_bry(Bry_bfr_.New(), rdr);
|
||||
@@ -35,20 +35,22 @@ public class Io_stream_zip_mgr {
|
||||
private Io_stream_wtr Wtr(byte type) {
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
switch (type) {
|
||||
case Io_stream_.Tid_gzip : if (wtr_gzip == null) wtr_gzip = Io_stream_wtr_.new_by_mem(bfr, Io_stream_.Tid_gzip) ; return wtr_gzip.Open();
|
||||
case Io_stream_.Tid_zip : if (wtr_zip == null) wtr_zip = Io_stream_wtr_.new_by_mem(bfr, Io_stream_.Tid_zip) ; return wtr_zip.Open();
|
||||
case Io_stream_.Tid_bzip2 : if (wtr_bzip2 == null) wtr_bzip2 = Io_stream_wtr_.new_by_mem(bfr, Io_stream_.Tid_bzip2) ; return wtr_bzip2.Open();
|
||||
case Io_stream_.Tid_raw :
|
||||
default : throw Err_.new_unhandled(type);
|
||||
case Io_stream_tid_.Tid__gzip: if (wtr__gzip == null) wtr__gzip = Io_stream_wtr_.New_by_mem(bfr, Io_stream_tid_.Tid__gzip); return wtr__gzip.Open();
|
||||
case Io_stream_tid_.Tid__zip: if (wtr__zip == null) wtr__zip = Io_stream_wtr_.New_by_mem(bfr, Io_stream_tid_.Tid__zip); return wtr__zip.Open();
|
||||
case Io_stream_tid_.Tid__bzip2: if (wtr__bzip2 == null) wtr__bzip2 = Io_stream_wtr_.New_by_mem(bfr, Io_stream_tid_.Tid__bzip2); return wtr__bzip2.Open();
|
||||
case Io_stream_tid_.Tid__xz: if (wtr__xz == null) wtr__xz = Io_stream_wtr_.New_by_mem(bfr, Io_stream_tid_.Tid__xz); return wtr__xz.Open();
|
||||
case Io_stream_tid_.Tid__raw:
|
||||
default: throw Err_.new_unhandled(type);
|
||||
}
|
||||
}
|
||||
private Io_stream_rdr Rdr(byte type) { // TS.MEM: DATE:2016-07-12
|
||||
switch (type) {
|
||||
case Io_stream_.Tid_gzip : return Io_stream_rdr_.new_by_tid_(Io_stream_.Tid_gzip);
|
||||
case Io_stream_.Tid_zip : return Io_stream_rdr_.new_by_tid_(Io_stream_.Tid_zip);
|
||||
case Io_stream_.Tid_bzip2 : return Io_stream_rdr_.new_by_tid_(Io_stream_.Tid_bzip2);
|
||||
case Io_stream_.Tid_raw :
|
||||
default : throw Err_.new_unhandled(type);
|
||||
case Io_stream_tid_.Tid__gzip: return Io_stream_rdr_.New_by_tid(Io_stream_tid_.Tid__gzip);
|
||||
case Io_stream_tid_.Tid__zip: return Io_stream_rdr_.New_by_tid(Io_stream_tid_.Tid__zip);
|
||||
case Io_stream_tid_.Tid__bzip2: return Io_stream_rdr_.New_by_tid(Io_stream_tid_.Tid__bzip2);
|
||||
case Io_stream_tid_.Tid__xz: return Io_stream_rdr_.New_by_tid(Io_stream_tid_.Tid__xz);
|
||||
case Io_stream_tid_.Tid__raw:
|
||||
default: throw Err_.new_unhandled(type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,22 +18,23 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.core.net; import gplx.*; import gplx.core.*;
|
||||
import gplx.core.net.qargs.*;
|
||||
public class Gfo_url {
|
||||
public byte[] Raw() {return raw;} private byte[] raw;
|
||||
public byte Protocol_tid() {return protocol_tid;} private byte protocol_tid;
|
||||
public byte[] Protocol_bry() {return protocol_bry;} private byte[] protocol_bry;
|
||||
public byte[] Anch() {return anch;} private byte[] anch;
|
||||
public Gfo_qarg_itm[] Qargs() {return qargs;} private Gfo_qarg_itm[] qargs;
|
||||
public byte[][] Segs() {return segs;} private byte[][] segs; private int segs__len;
|
||||
public byte[] Segs__get_at(int i) {return i < segs__len ? segs[i] : null;}
|
||||
public byte[] Segs__get_at_1st() {return segs__len > 0 ? segs[0] : null;}
|
||||
public byte[] Segs__get_at_nth() {return segs__len > 1 ? segs[segs__len - 1] : null;}
|
||||
public Gfo_url Ctor(byte[] raw, byte protocol_tid, byte[] protocol_bry, byte[][] segs, Gfo_qarg_itm[] qargs, byte[] anch) {
|
||||
private final int segs__len;
|
||||
public Gfo_url(byte[] raw, byte protocol_tid, byte[] protocol_bry, byte[][] segs, Gfo_qarg_itm[] qargs, byte[] anch) {
|
||||
this.raw = raw;
|
||||
this.protocol_tid = protocol_tid; this.protocol_bry = protocol_bry;
|
||||
this.segs = segs; this.segs__len = segs.length;
|
||||
this.qargs = qargs;
|
||||
this.anch = anch;
|
||||
return this;
|
||||
}
|
||||
public static final Gfo_url Empty = new Gfo_url().Ctor(Bry_.Empty, Gfo_protocol_itm.Tid_unknown, Bry_.Empty, Bry_.Ary_empty, null, null);
|
||||
public byte[] Raw() {return raw;} private final byte[] raw;
|
||||
public byte Protocol_tid() {return protocol_tid;} private final byte protocol_tid;
|
||||
public byte[] Protocol_bry() {return protocol_bry;} private final byte[] protocol_bry;
|
||||
public byte[] Anch() {return anch;} private final byte[] anch;
|
||||
public Gfo_qarg_itm[] Qargs() {return qargs;} private final Gfo_qarg_itm[] qargs;
|
||||
public byte[][] Segs() {return segs;} private final byte[][] segs;
|
||||
public byte[] Segs__get_at(int i) {return i < segs__len ? segs[i] : null;}
|
||||
public byte[] Segs__get_at_1st() {return segs__len > 0 ? segs[0] : null;}
|
||||
public byte[] Segs__get_at_nth() {return segs__len > 1 ? segs[segs__len - 1] : null;}
|
||||
|
||||
public static final Gfo_url Empty = new Gfo_url(Bry_.Empty, Gfo_protocol_itm.Tid_unknown, Bry_.Empty, Bry_.Ary_empty, null, null);
|
||||
}
|
||||
|
||||
@@ -16,20 +16,21 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.net; import gplx.*; import gplx.core.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.core.net.qargs.*;
|
||||
import gplx.langs.htmls.encoders.*;
|
||||
public class Gfo_url_parser {
|
||||
private final Btrie_slim_mgr protocols = Btrie_slim_mgr.ci_a7(); // ASCII:url_protocol; EX:"http:", "ftp:", etc
|
||||
private final Bry_ary segs_ary = new Bry_ary(4), qargs = new Bry_ary(4);
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr_.Reset(500);
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
public byte[] Relative_url_protocol_bry() {return Gfo_protocol_itm.Itm_https.Key_w_colon_bry();} // NOTE: https b/c any WMF wiki will now default to WMF; DATE:2015-07-26
|
||||
private final List_adp segs_list = List_adp_.New(), qargs_list = List_adp_.New();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr_.Reset(500);
|
||||
public Gfo_url_parser() {
|
||||
Init_protocols(Gfo_protocol_itm.Ary());
|
||||
Init_protocol_itm(Gfo_protocol_itm.Bry_relative, Gfo_protocol_itm.Tid_relative_1);
|
||||
Init_protocol_itm(Gfo_protocol_itm.Bry_file, Gfo_protocol_itm.Tid_file);
|
||||
Init_protocol_itm(gplx.xowa.parsers.lnkes.Xop_lnke_wkr.Bry_xowa_protocol, Gfo_protocol_itm.Tid_xowa);
|
||||
}
|
||||
public byte[] Relative_url_protocol_bry() {return Gfo_protocol_itm.Itm_https.Key_w_colon_bry();} // NOTE: https b/c any WMF wiki will now default to WMF; DATE:2015-07-26
|
||||
private void Init_protocols(Gfo_protocol_itm... itms) {
|
||||
int len = itms.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
@@ -37,9 +38,7 @@ public class Gfo_url_parser {
|
||||
Init_protocol_itm(itm.Key_w_colon_bry(), itm.Tid());
|
||||
}
|
||||
}
|
||||
public void Init_protocol_itm(byte[] key, byte protocol_tid) {
|
||||
protocols.Add_bry_byte(key, protocol_tid);
|
||||
}
|
||||
public void Init_protocol_itm(byte[] key, byte protocol_tid) {protocols.Add_bry_byte(key, protocol_tid);}
|
||||
public void Parse_site_fast(Gfo_url_site_data site_data, byte[] src, int src_bgn, int src_end) {
|
||||
int pos = src_bgn; boolean rel = false;
|
||||
if (pos + 1 < src_end && src[pos] == Byte_ascii.Slash && src[pos + 1] == Byte_ascii.Slash) { // starts with "//"
|
||||
@@ -62,199 +61,126 @@ public class Gfo_url_parser {
|
||||
slash_pos = Bry_.Trim_end_pos(src, slash_pos);
|
||||
site_data.Atrs_set(rel, pos, slash_pos);
|
||||
}
|
||||
private static final int Area__path = 1, Area__qarg_key_1st = 2, Area__qarg_key_nth = 3, Area__qarg_val = 4, Area__anch = 5;
|
||||
private byte[] src; int src_bgn, src_end;
|
||||
private int area;
|
||||
private boolean encoded;
|
||||
private byte protocol_tid; private byte[] protocol_bry, anch;
|
||||
private int path_bgn, qarg_key_bgn, qarg_val_bgn, anch_bgn, anch_nth_bgn;
|
||||
public Gfo_url Parse(byte[] src) {return Parse(new Gfo_url(), src, 0, src.length);}
|
||||
public Gfo_url Parse(Gfo_url rv, byte[] src, int src_bgn, int src_end) {
|
||||
this.src = src; this.src_bgn = src_bgn; this.src_end = src_end;
|
||||
encoded = false;
|
||||
protocol_tid = Gfo_protocol_itm.Tid_null;
|
||||
protocol_bry = anch = null;
|
||||
path_bgn = qarg_key_bgn = qarg_val_bgn = anch_bgn = anch_nth_bgn = -1;
|
||||
segs_ary.Clear(); qargs.Clear();
|
||||
int pos = src_bgn;
|
||||
Object protocol_obj = protocols.Match_at(trv, src, src_bgn, src_end);
|
||||
pos = trv.Pos();
|
||||
pos = Bry_find_.Find_fwd_while(src, pos, src_end, Byte_ascii.Slash);
|
||||
if (protocol_obj == null) {
|
||||
this.protocol_tid = Gfo_protocol_itm.Tid_unknown;
|
||||
}
|
||||
else {
|
||||
this.protocol_tid = ((Byte_obj_val)protocol_obj).Val();
|
||||
this.protocol_bry = Make_bry(src_bgn, pos);
|
||||
}
|
||||
area = Area__path;
|
||||
path_bgn = pos;
|
||||
while (true) {
|
||||
if (pos == src_end) break;
|
||||
byte b = src[pos];
|
||||
public Gfo_url Parse(byte[] src) {return Parse(src, 0, src.length);}
|
||||
public Gfo_url Parse(byte[] src, int src_bgn, int src_end) {
|
||||
// protocol
|
||||
byte protocol_tid = protocols.Match_byte_or(trv, src, src_bgn, src_end, Gfo_protocol_itm.Tid_unknown);
|
||||
int pos = Bry_find_.Find_fwd_while(src, trv.Pos(), src_end, Byte_ascii.Slash); // set pos after last slash; EX: "https://A" -> position before "A"
|
||||
byte[] protocol_bry = protocol_tid == Gfo_protocol_itm.Tid_unknown
|
||||
? null
|
||||
: Make_bry(false, src, src_bgn, pos);
|
||||
|
||||
// loop chars and handle "/", "#", "?", and "%"
|
||||
boolean encoded = false;
|
||||
int src_zth = src_end - 1;
|
||||
int anch_bgn = -1, qarg_bgn = -1, seg_bgn = pos;
|
||||
for (int i = pos; i < src_end; ++i) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Slash: pos = Parse_slash(pos, b); break;
|
||||
case Byte_ascii.Question: pos = Parse_qarg_key_1st(pos, b); break;
|
||||
case Byte_ascii.Amp: pos = Parse_qarg_key_nth(pos, b); break;
|
||||
case Byte_ascii.Eq: pos = Parse_qarg_val(pos, b); break;
|
||||
case Byte_ascii.Hash: if (anch_bgn == -1) pos = Parse_anch(pos, b); else ++pos; break; // anchor begins at 1st #, not last #; EX:A#B#C has anchor of "B#C" not "C" PAGE:en.w:Grand_Central_Terminal; DATE:2015-12-31
|
||||
case Byte_ascii.Percent: encoded = true; ++pos; break;
|
||||
default:
|
||||
++pos;
|
||||
case Byte_ascii.Slash:
|
||||
if (qarg_bgn == -1) { // ignore slash in qargs
|
||||
segs_list.Add(Make_bry(encoded, src, seg_bgn, i));
|
||||
encoded = false;
|
||||
seg_bgn = i + 1; // +1 to skip "/"
|
||||
}
|
||||
break;
|
||||
case Byte_ascii.Hash: // set qarg to first #; also, ignore rest of String; EX: A#B#C -> B#C
|
||||
if (i == src_zth) continue; // ignore # at EOS; EX: "A#"
|
||||
anch_bgn = i;
|
||||
i = src_end;
|
||||
break;
|
||||
case Byte_ascii.Question: // set qarg to last "?"; EX: A?B?C -> C
|
||||
if (i == src_zth) continue; // ignore ? at EOS; EX: "A?"
|
||||
qarg_bgn = i;
|
||||
break;
|
||||
case Byte_ascii.Percent:
|
||||
encoded = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
End_area(pos, Byte_ascii.Null);
|
||||
rv.Ctor(src, protocol_tid, protocol_bry, segs_ary.To_ary(0), Make_qargs(), anch);
|
||||
return rv;
|
||||
}
|
||||
private int Parse_slash(int pos, byte b) {
|
||||
switch (area) {
|
||||
case Area__path: return End_area(pos, b);
|
||||
default: return pos + 1;
|
||||
|
||||
int seg_end = src_end; // set seg_end to src_end; EX: "https://site/A" -> "A"; seg_end may be overriden if "#" or "?" exists
|
||||
|
||||
// set anch
|
||||
byte[] anch = null;
|
||||
if (anch_bgn != -1) {
|
||||
seg_end = anch_bgn; // set seg_end to anch_bgn; EX: "https://site/A#B" -> "A" x> "A#B"
|
||||
anch = Make_bry(encoded, src, anch_bgn + 1, src_end); // +1 to skip "#"
|
||||
}
|
||||
}
|
||||
private int Parse_anch(int pos, byte b) {
|
||||
switch (area) {
|
||||
case Area__path:
|
||||
End_area(pos, b);
|
||||
area = Area__anch;
|
||||
anch_bgn = pos + 1;
|
||||
break;
|
||||
case Area__anch: // handle double; A#B#C -> "A#B", "C"
|
||||
Append_to_last_path(Byte_ascii.Hash, Make_bry(anch_bgn, pos));
|
||||
anch_bgn = pos + 1;
|
||||
break;
|
||||
case Area__qarg_val:
|
||||
case Area__qarg_key_1st:
|
||||
case Area__qarg_key_nth:
|
||||
if (anch_nth_bgn == -1)
|
||||
anch_nth_bgn = Bry_find_.Find_bwd(src, Byte_ascii.Hash, src_end);
|
||||
if (pos == anch_nth_bgn) {
|
||||
End_area(pos, b);
|
||||
area = Area__anch;
|
||||
anch_bgn = pos + 1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
||||
// set qargs
|
||||
Gfo_qarg_itm[] qarg_ary = Gfo_qarg_itm.Ary_empty;
|
||||
if (qarg_bgn != -1) {
|
||||
int qarg_end = anch_bgn == -1
|
||||
? src_end // # missing; set to src_end; EX: "A?B=C" -> EOS
|
||||
: anch_bgn; // # exists; set to anch_bgn; EX: "A?B=C#D" -> #
|
||||
qarg_ary = Make_qarg_ary(src, qarg_bgn, qarg_end);
|
||||
seg_end = qarg_ary.length == 0
|
||||
? src_end // set seg_end to src_end if pseudo qarg; EX: "https://site/A?B" -> "A?B" x> "A"
|
||||
: qarg_bgn; // set seg_end to qarg_bgn; EX: "https://site/A?B=C" -> "A" x> "A#B"; NOTE: overrides anch; "A?B=C#D" -> "A"
|
||||
}
|
||||
return pos + 1;
|
||||
|
||||
// extract seg_end; note that there will always be a seg_end; if src ends with slash, then it will be ""; EX: "A/" -> "A", ""
|
||||
segs_list.Add(Make_bry(encoded, src, seg_bgn, seg_end));
|
||||
|
||||
// build url and return it
|
||||
return new Gfo_url(src, protocol_tid, protocol_bry, (byte[][])segs_list.To_ary_and_clear(byte[].class), qarg_ary, anch);
|
||||
}
|
||||
private int Parse_qarg_key_1st(int pos, byte b) {
|
||||
switch (area) {
|
||||
case Area__path: // only valid way to start qarg; EX: A?B=C
|
||||
End_area(pos, b);
|
||||
area = Area__qarg_key_1st;
|
||||
qarg_key_bgn = pos + 1;
|
||||
break;
|
||||
case Area__qarg_key_1st: // handle dupe; EX: A?B?C
|
||||
case Area__qarg_key_nth: // handle dupe; EX: A?B=C&D?
|
||||
case Area__qarg_val: // handle dupe; EX: A?B=?
|
||||
End_area(pos, b);
|
||||
Append_to_last_path__qargs();
|
||||
area = Area__qarg_key_1st;
|
||||
qarg_key_bgn = pos + 1;
|
||||
break;
|
||||
private Gfo_qarg_itm[] Make_qarg_ary(byte[] src, int qarg_bgn, int qarg_end) {
|
||||
// init
|
||||
int key_bgn = qarg_bgn + 1; // +1 to skip "?"
|
||||
byte[] key_bry = null;
|
||||
int val_bgn = -1;
|
||||
boolean encoded = false;
|
||||
|
||||
// loop qarg for "&", "=", "%"
|
||||
int qarg_pos = qarg_bgn;
|
||||
while (true) {
|
||||
boolean b_is_last = qarg_pos == qarg_end;
|
||||
byte b = b_is_last ? Byte_ascii.Null : src[qarg_pos];
|
||||
boolean make_qarg = false;
|
||||
switch (b) {
|
||||
case Byte_ascii.Amp: // "&" always makes qarg
|
||||
make_qarg = true;
|
||||
break;
|
||||
case Byte_ascii.Null: // "EOS" makes qarg as long as "=" seen or at least one qarg; specifically, "A?B" shouldn't make qarg
|
||||
if ( val_bgn != -1 // "=" seen; EX: "?A=B"
|
||||
|| qargs_list.Count() > 0) // at least one qarg exists; EX: "?A=B&C"
|
||||
make_qarg = true;
|
||||
break;
|
||||
case Byte_ascii.Eq:
|
||||
key_bry = Make_bry(encoded, src, key_bgn, qarg_pos);
|
||||
encoded = false;
|
||||
val_bgn = qarg_pos + 1;
|
||||
break;
|
||||
case Byte_ascii.Percent:
|
||||
encoded = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// make qarg
|
||||
if (make_qarg) {
|
||||
byte[] val_bry = null;
|
||||
if (key_bry == null) // key missing; EX: "&A" -> "A,null"
|
||||
key_bry = Make_bry(encoded, src, key_bgn, qarg_pos);
|
||||
else // key exists; EX: "&A=B" -> "A,B"
|
||||
val_bry = Make_bry(encoded, src, val_bgn, qarg_pos);
|
||||
encoded = false;
|
||||
qargs_list.Add(new Gfo_qarg_itm(key_bry, val_bry));
|
||||
|
||||
// reset vars
|
||||
key_bry = null;
|
||||
key_bgn = qarg_pos + 1;
|
||||
val_bgn = -1;
|
||||
}
|
||||
if (b_is_last) break;
|
||||
++qarg_pos;
|
||||
}
|
||||
return pos + 1;
|
||||
}
|
||||
private int Parse_qarg_key_nth(int pos, byte b) {
|
||||
switch (area) {
|
||||
case Area__path: // ignore if qarg not started; EX: A&B
|
||||
break;
|
||||
case Area__qarg_key_1st: // handle invalid; A?B&C
|
||||
case Area__qarg_key_nth: // handle invalid; A?B=C&D&E=F
|
||||
End_area(pos, b);
|
||||
qargs.Add(null);
|
||||
area = Area__qarg_key_nth;
|
||||
qarg_key_bgn = pos + 1;
|
||||
break;
|
||||
case Area__qarg_val:
|
||||
End_area(pos, b);
|
||||
area = Area__qarg_key_nth;
|
||||
qarg_key_bgn = pos + 1;
|
||||
break;
|
||||
}
|
||||
return pos + 1;
|
||||
}
|
||||
private int Parse_qarg_val(int pos, byte b) {
|
||||
switch (area) {
|
||||
case Area__qarg_key_1st:
|
||||
case Area__qarg_key_nth:
|
||||
End_area(pos, b); break;
|
||||
default: break;
|
||||
}
|
||||
return pos + 1;
|
||||
}
|
||||
private int End_area(int pos, byte b) {
|
||||
switch (area) {
|
||||
case Area__path:
|
||||
segs_ary.Add(Make_bry(path_bgn, pos));
|
||||
path_bgn = pos + 1;
|
||||
break;
|
||||
case Area__qarg_key_1st:
|
||||
case Area__qarg_key_nth:
|
||||
if (b == Byte_ascii.Null && qargs.Len() == 0) // handle A?b but not A?b=c&d
|
||||
Append_to_last_path(Byte_ascii.Question, Make_bry(qarg_key_bgn, src_end));
|
||||
else {
|
||||
qargs.Add(Make_bry(qarg_key_bgn, pos));
|
||||
qarg_val_bgn = pos + 1;
|
||||
area = Area__qarg_val;
|
||||
}
|
||||
break;
|
||||
case Area__qarg_val:
|
||||
qargs.Add(Make_bry(qarg_val_bgn, pos));
|
||||
qarg_key_bgn = pos + 1;
|
||||
qarg_val_bgn = -1;
|
||||
area = Area__qarg_key_nth;
|
||||
break;
|
||||
case Area__anch:
|
||||
if (b == Byte_ascii.Null && anch_bgn == src_end) // handle A# but not "A#B"
|
||||
Append_to_last_path(Byte_ascii.Hash, Make_bry(anch_bgn, src_end));
|
||||
else
|
||||
anch = Make_bry(anch_bgn, pos);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
encoded = false;
|
||||
return pos + 1;
|
||||
}
|
||||
private byte[] Make_bry(int bgn, int end) {
|
||||
return encoded ? gplx.langs.htmls.encoders.Gfo_url_encoder_.Xourl.Decode(tmp_bfr, Bool_.N, src, bgn, end).To_bry_and_clear() : Bry_.Mid(src, bgn, end);
|
||||
}
|
||||
private Gfo_qarg_itm[] Make_qargs() {
|
||||
int qargs_len = qargs.Len(); if (qargs_len == 0) return Gfo_qarg_itm.Ary_empty;
|
||||
if (qargs_len % 2 == 1) ++qargs_len; // handle odd qargs; EX: ?A=B&C&D=E
|
||||
Gfo_qarg_itm[] rv = new Gfo_qarg_itm[qargs_len / 2];
|
||||
for (int i = 0; i < qargs_len; i += 2) {
|
||||
byte[] key = qargs.Get_at(i);
|
||||
int val_idx = i + 1;
|
||||
byte[] val = val_idx < qargs_len ? qargs.Get_at(val_idx) : null;
|
||||
rv[i / 2] = new Gfo_qarg_itm(key, val);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private void Append_to_last_path(byte b, byte[] append) {
|
||||
byte[] last_path = segs_ary.Get_at_last(); if (last_path == null) return;
|
||||
last_path = Bry_.Add_w_dlm(b, last_path, append);
|
||||
segs_ary.Set_at_last(last_path);
|
||||
}
|
||||
private void Append_to_last_path__qargs() {
|
||||
byte[] last_path = segs_ary.Get_at_last(); if (last_path == null) return;
|
||||
tmp_bfr.Add(last_path);
|
||||
int len = qargs.Len();
|
||||
if (len % 2 == 1) qargs.Add(null); // handle odd qargs
|
||||
for (int i = 0; i < len; i += 2) {
|
||||
tmp_bfr.Add_byte(i == 0 ? Byte_ascii.Question : Byte_ascii.Amp);
|
||||
tmp_bfr.Add(qargs.Get_at(i));
|
||||
byte[] qarg_val = qargs.Get_at(i + 1);
|
||||
if (qarg_val != null) // handle "null" added above
|
||||
tmp_bfr.Add_byte_eq().Add(qarg_val);
|
||||
}
|
||||
qargs.Clear();
|
||||
segs_ary.Set_at_last(tmp_bfr.To_bry_and_clear());
|
||||
return (Gfo_qarg_itm[])qargs_list.To_ary_and_clear(Gfo_qarg_itm.class);
|
||||
}
|
||||
private byte[] Make_bry(boolean encoded, byte[] src, int bgn, int end) {
|
||||
return encoded ? Gfo_url_encoder_.Xourl.Decode(tmp_bfr, Bool_.N, src, bgn, end).To_bry_and_clear() : Bry_.Mid(src, bgn, end);
|
||||
}
|
||||
|
||||
public static final byte[] Bry_double_slash = new byte[] {Byte_ascii.Slash, Byte_ascii.Slash};
|
||||
}
|
||||
|
||||
@@ -20,15 +20,19 @@ import gplx.core.net.qargs.*;
|
||||
class Gfo_url_parser_fxt {
|
||||
private final Gfo_url_parser parser = new Gfo_url_parser();
|
||||
private Gfo_url actl;
|
||||
public Gfo_url_parser_fxt Chk_protocol_tid(byte v) {Tfds.Eq_byte(v, actl.Protocol_tid(), "protocol_tid"); return this;}
|
||||
public Gfo_url_parser_fxt Chk_protocol_bry(String v) {Tfds.Eq_str(v, actl.Protocol_bry(), "protocol_bry"); return this;}
|
||||
public Gfo_url_parser_fxt Chk_site(String v) {Tfds.Eq_str(v, actl.Segs__get_at_1st(), "site"); return this;}
|
||||
public Gfo_url_parser_fxt Chk_page(String v) {Tfds.Eq_str(v, actl.Segs__get_at_nth(), "page"); return this;}
|
||||
public Gfo_url_parser_fxt Chk_anch(String v) {Tfds.Eq_str(v, actl.Anch(), "anch"); return this;}
|
||||
public Gfo_url_parser_fxt Chk_segs(String... ary) {Tfds.Eq_int(ary.length, actl.Segs().length, "segs_len"); Tfds.Eq_str_lines(String_.Concat_lines_nl(ary), String_.Concat_lines_nl(String_.Ary(actl.Segs())), "segs"); return this;}
|
||||
public Gfo_url_parser_fxt Chk_qargs(String... ary) {Tfds.Eq_str_lines(String_.To_str__as_kv_ary(ary), Gfo_qarg_itm.To_str(actl.Qargs()), "qargs"); return this;}
|
||||
public Gfo_url_parser_fxt Run_parse(String v) {
|
||||
this.actl = parser.Parse(Bry_.new_u8(v));
|
||||
public Gfo_url_parser_fxt Test__protocol_tid(byte v) {Tfds.Eq_byte(v, actl.Protocol_tid(), "protocol_tid"); return this;}
|
||||
public Gfo_url_parser_fxt Test__protocol_bry(String v) {Tfds.Eq_str(v, actl.Protocol_bry(), "protocol_bry"); return this;}
|
||||
public Gfo_url_parser_fxt Test__site(String v) {Tfds.Eq_str(v, actl.Segs__get_at_1st(), "site"); return this;}
|
||||
public Gfo_url_parser_fxt Test__page(String v) {Tfds.Eq_str(v, actl.Segs__get_at_nth(), "page"); return this;}
|
||||
public Gfo_url_parser_fxt Test__anch(String v) {Tfds.Eq_str(v, actl.Anch(), "anch"); return this;}
|
||||
public Gfo_url_parser_fxt Test__segs(String... ary) {
|
||||
Tfds.Eq_str_lines(String_.Concat_lines_nl(ary), String_.Concat_lines_nl(String_.Ary(actl.Segs())), "segs");
|
||||
Tfds.Eq_int(ary.length, actl.Segs().length, "segs_len");
|
||||
return this;
|
||||
}
|
||||
public Gfo_url_parser_fxt Test__qargs(String... ary) {Tfds.Eq_str_lines(String_.To_str__as_kv_ary(ary), Qargs__To_str(actl.Qargs()), "qargs"); return this;}
|
||||
public Gfo_url_parser_fxt Exec__parse(String v) {
|
||||
this.actl = parser.Parse(Bry_.new_u8(v), 0, String_.Len(v));
|
||||
return this;
|
||||
}
|
||||
public void Test_Parse_site_fast(String raw, String expd) {
|
||||
@@ -37,4 +41,16 @@ class Gfo_url_parser_fxt {
|
||||
String actl = String_.new_u8(raw_bry, site_data.Site_bgn(), site_data.Site_end());
|
||||
Tfds.Eq(expd, actl);
|
||||
} private final Gfo_url_site_data site_data = new Gfo_url_site_data();
|
||||
private static String Qargs__To_str(Gfo_qarg_itm[] ary) {
|
||||
int len = ary.length;
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Gfo_qarg_itm itm = ary[i];
|
||||
bfr.Add(itm.Key_bry()).Add_byte_eq();
|
||||
if (itm.Val_bry() != null)
|
||||
bfr.Add(itm.Val_bry());
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
return bfr.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,102 +18,102 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.core.net; import gplx.*; import gplx.core.*;
|
||||
import org.junit.*;
|
||||
public class Gfo_url_parser_tst {
|
||||
private final Gfo_url_parser_fxt tstr = new Gfo_url_parser_fxt();
|
||||
private final Gfo_url_parser_fxt tstr = new Gfo_url_parser_fxt();
|
||||
@Test public void Protocol__relative() {
|
||||
tstr.Run_parse("//en.wikipedia.org").Chk_protocol_tid(Gfo_protocol_itm.Tid_relative_1).Chk_protocol_bry("//").Chk_site("en.wikipedia.org");
|
||||
tstr.Exec__parse("//en.wikipedia.org").Test__protocol_tid(Gfo_protocol_itm.Tid_relative_1).Test__protocol_bry("//").Test__site("en.wikipedia.org");
|
||||
}
|
||||
@Test public void Protocol__none() {
|
||||
tstr.Run_parse("en.wikipedia.org/wiki/A").Chk_protocol_tid(Gfo_protocol_itm.Tid_unknown).Chk_segs("en.wikipedia.org", "wiki", "A");
|
||||
tstr.Exec__parse("en.wikipedia.org/wiki/A").Test__protocol_tid(Gfo_protocol_itm.Tid_unknown).Test__segs("en.wikipedia.org", "wiki", "A");
|
||||
}
|
||||
@Test public void Site__parts__3() {
|
||||
tstr.Run_parse("https://en.wikipedia.org").Chk_protocol_tid(Gfo_protocol_itm.Tid_https).Chk_protocol_bry("https://").Chk_segs("en.wikipedia.org");
|
||||
tstr.Exec__parse("https://en.wikipedia.org").Test__protocol_tid(Gfo_protocol_itm.Tid_https).Test__protocol_bry("https://").Test__segs("en.wikipedia.org");
|
||||
}
|
||||
@Test public void Site__parts__2() {
|
||||
tstr.Run_parse("https://wikipedia.org").Chk_protocol_tid(Gfo_protocol_itm.Tid_https).Chk_segs("wikipedia.org");
|
||||
tstr.Exec__parse("https://wikipedia.org").Test__protocol_tid(Gfo_protocol_itm.Tid_https).Test__segs("wikipedia.org");
|
||||
}
|
||||
@Test public void Site__parts__1() {
|
||||
tstr.Run_parse("https://wikipedia").Chk_protocol_tid(Gfo_protocol_itm.Tid_https).Chk_segs("wikipedia");
|
||||
tstr.Exec__parse("https://wikipedia").Test__protocol_tid(Gfo_protocol_itm.Tid_https).Test__segs("wikipedia");
|
||||
}
|
||||
@Test public void Site__slash__none() {
|
||||
tstr.Run_parse("https:site").Chk_protocol_tid(Gfo_protocol_itm.Tid_https).Chk_site("site");
|
||||
tstr.Exec__parse("https:site").Test__protocol_tid(Gfo_protocol_itm.Tid_https).Test__site("site");
|
||||
}
|
||||
@Test public void Site__slash__eos() {
|
||||
tstr.Exec__parse("https://en.wikipedia.org/").Test__protocol_tid(Gfo_protocol_itm.Tid_https).Test__site("en.wikipedia.org");
|
||||
}
|
||||
@Test public void Paths__1() {
|
||||
tstr.Run_parse("https://site/A").Chk_segs("site", "A");
|
||||
tstr.Exec__parse("https://site/A").Test__segs("site", "A");
|
||||
}
|
||||
@Test public void Paths__2() {
|
||||
tstr.Run_parse("https://site/wiki/A").Chk_segs("site", "wiki", "A");
|
||||
tstr.Exec__parse("https://site/wiki/A").Test__segs("site", "wiki", "A");
|
||||
}
|
||||
@Test public void Paths__n() {
|
||||
tstr.Run_parse("https://site/wiki/A/B/C/D").Chk_segs("site", "wiki", "A", "B", "C", "D");
|
||||
tstr.Exec__parse("https://site/wiki/A/B/C/D").Test__segs("site", "wiki", "A", "B", "C", "D");
|
||||
}
|
||||
@Test public void Qargs__1() {
|
||||
tstr.Run_parse("https://site/A?B=C").Chk_page("A").Chk_qargs("B", "C");
|
||||
tstr.Exec__parse("https://site/A?B=C").Test__page("A").Test__qargs("B", "C");
|
||||
}
|
||||
@Test public void Qargs__2() {
|
||||
tstr.Run_parse("https://site/A?B=C&D=E").Chk_page("A").Chk_qargs("B", "C", "D", "E");
|
||||
tstr.Exec__parse("https://site/A?B=C&D=E").Test__page("A").Test__qargs("B", "C", "D", "E");
|
||||
}
|
||||
@Test public void Qargs__3() {
|
||||
tstr.Run_parse("https://site/A?B=C&D=E&F=G").Chk_page("A").Chk_qargs("B", "C", "D", "E", "F", "G");
|
||||
tstr.Exec__parse("https://site/A?B=C&D=E&F=G").Test__page("A").Test__qargs("B", "C", "D", "E", "F", "G");
|
||||
}
|
||||
@Test public void Qargs__ques__dupe__ques() {
|
||||
tstr.Run_parse("https://site/A?B?Y=Z").Chk_page("A?B").Chk_qargs("Y", "Z");
|
||||
tstr.Exec__parse("https://site/A?B?Y=Z").Test__page("A?B").Test__qargs("Y", "Z");
|
||||
}
|
||||
@Test public void Qargs__ques__dupe__amp() {
|
||||
tstr.Run_parse("https://site/A?B=C&D?Y=Z").Chk_page("A?B=C&D").Chk_qargs("Y", "Z");
|
||||
tstr.Exec__parse("https://site/A?B=C&D?Y=Z").Test__page("A?B=C&D").Test__qargs("Y", "Z");
|
||||
}
|
||||
@Test public void Qargs__ques__dupe__eq() {
|
||||
tstr.Run_parse("https://site/A?B=C?Y=Z").Chk_page("A?B=C").Chk_qargs("Y", "Z");
|
||||
tstr.Exec__parse("https://site/A?B=C?Y=Z").Test__page("A?B=C").Test__qargs("Y", "Z");
|
||||
}
|
||||
@Test public void Qargs__amp__dupe__ques() {
|
||||
tstr.Run_parse("https://site/A?B&Y=Z").Chk_page("A").Chk_qargs("B", null, "Y", "Z");
|
||||
tstr.Exec__parse("https://site/A?B&Y=Z").Test__page("A").Test__qargs("B", null, "Y", "Z");
|
||||
}
|
||||
@Test public void Qargs__amp__dupe__amp() {
|
||||
tstr.Run_parse("https://site/A?B=C&D&Y=Z").Chk_page("A").Chk_qargs("B", "C", "D", null, "Y", "Z");
|
||||
tstr.Exec__parse("https://site/A?B=C&D&Y=Z").Test__page("A").Test__qargs("B", "C", "D", null, "Y", "Z");
|
||||
}
|
||||
@Test public void Qargs__missing_val__0() {
|
||||
tstr.Run_parse("https://site/A?").Chk_page("A?").Chk_qargs();
|
||||
tstr.Exec__parse("https://site/A?").Test__page("A?").Test__qargs();
|
||||
}
|
||||
@Test public void Qargs__missing_val__2() {
|
||||
tstr.Run_parse("https://site/A?B=C&D&F=G").Chk_page("A").Chk_qargs("B", "C", "D", null, "F", "G");
|
||||
tstr.Exec__parse("https://site/A?B=C&D&F=G").Test__page("A").Test__qargs("B", "C", "D", null, "F", "G");
|
||||
}
|
||||
@Test public void Qargs__missing_val__n() {
|
||||
tstr.Run_parse("https://site/A?B=C&D=E&F").Chk_page("A").Chk_qargs("B", "C", "D", "E", "F", null);
|
||||
tstr.Exec__parse("https://site/A?B=C&D=E&F").Test__page("A").Test__qargs("B", "C", "D", "E", "F", null);
|
||||
}
|
||||
@Test public void Qargs__site_less__missing__0() {
|
||||
tstr.Run_parse("A?B").Chk_segs("A?B").Chk_qargs();
|
||||
tstr.Exec__parse("A?B").Test__segs("A?B").Test__qargs();
|
||||
}
|
||||
@Test public void Qargs__site_less() {
|
||||
tstr.Run_parse("A?B=C&D=E").Chk_site("A").Chk_qargs("B", "C", "D", "E");
|
||||
tstr.Exec__parse("A?B=C&D=E").Test__site("A").Test__qargs("B", "C", "D", "E");
|
||||
}
|
||||
@Test public void Anch__basic() {
|
||||
tstr.Run_parse("https://site/A#B").Chk_page("A").Chk_anch("B");
|
||||
tstr.Exec__parse("https://site/A#B").Test__page("A").Test__anch("B");
|
||||
}
|
||||
@Test public void Anch__repeat__2() {
|
||||
tstr.Run_parse("https://site/A#B#C").Chk_page("A").Chk_anch("B#C");
|
||||
tstr.Exec__parse("https://site/A#B#C").Test__page("A").Test__anch("B#C");
|
||||
}
|
||||
@Test public void Anch__repeat__3() {
|
||||
tstr.Run_parse("https://site/A#B#C#D").Chk_page("A").Chk_anch("B#C#D");
|
||||
tstr.Exec__parse("https://site/A#B#C#D").Test__page("A").Test__anch("B#C#D");
|
||||
}
|
||||
@Test public void Anch__missing() {
|
||||
tstr.Run_parse("https://site/A#").Chk_page("A#").Chk_anch(null);
|
||||
tstr.Exec__parse("https://site/A#").Test__page("A#").Test__anch(null);
|
||||
}
|
||||
@Test public void Anch__missing__eos() {
|
||||
tstr.Run_parse("https://site/A#B#").Chk_page("A").Chk_anch("B#");
|
||||
tstr.Exec__parse("https://site/A#B#").Test__page("A").Test__anch("B#");
|
||||
}
|
||||
@Test public void Anch__qargs__basic() {
|
||||
tstr.Run_parse("https://site/A?B=C&D=E#F").Chk_page("A").Chk_qargs("B", "C", "D", "E").Chk_anch("F");
|
||||
}
|
||||
@Test public void Anch__qargs__repeat() {
|
||||
tstr.Run_parse("https://site/A?B=C#&D=E#F").Chk_page("A").Chk_qargs("B", "C#", "D", "E").Chk_anch("F");
|
||||
tstr.Exec__parse("https://site/A?B=C&D=E#F").Test__page("A").Test__qargs("B", "C", "D", "E").Test__anch("F");
|
||||
}
|
||||
@Test public void Anch__site_less() {
|
||||
tstr.Run_parse("A#B").Chk_site("A").Chk_anch("B");
|
||||
tstr.Exec__parse("A#B").Test__site("A").Test__anch("B");
|
||||
}
|
||||
@Test public void Encode__page() {
|
||||
tstr.Run_parse("http://site/A%27s").Chk_site("site").Chk_page("A's");
|
||||
tstr.Exec__parse("http://site/A%27s").Test__site("site").Test__page("A's");
|
||||
}
|
||||
@Test public void Protocol_less__qargs() {
|
||||
tstr.Run_parse("Special:Search/Earth?fulltext=yes").Chk_segs("Special:Search", "Earth").Chk_page("Earth").Chk_qargs("fulltext", "yes");
|
||||
tstr.Exec__parse("Special:Search/Earth?fulltext=yes").Test__segs("Special:Search", "Earth").Test__page("Earth").Test__qargs("fulltext", "yes");
|
||||
}
|
||||
@Test public void Parse_site_fast() {
|
||||
tstr.Test_Parse_site_fast("http://a.org/B" , "a.org");
|
||||
@@ -121,4 +121,8 @@ public class Gfo_url_parser_tst {
|
||||
tstr.Test_Parse_site_fast("//a.org/B" , "a.org");
|
||||
tstr.Test_Parse_site_fast("//a.org/B:C" , "a.org");
|
||||
}
|
||||
// DELETED: logic isn't right; anch is first # not last; EX: https://en.wikipedia.org/w/index.php?title=Category:2001_albums&pagefrom=Beautiful+#View#mw-pages; DATE:2016-10-10
|
||||
// @Test public void Anch__qargs__repeat() {
|
||||
// tstr.Exec__parse("https://site/A?B=C#&D=E#F").Test__page("A").Test__qargs("B", "C#", "D", "E").Test__anch("F");
|
||||
// }
|
||||
}
|
||||
|
||||
@@ -17,35 +17,13 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.net.qargs; import gplx.*; import gplx.core.*; import gplx.core.net.*;
|
||||
public class Gfo_qarg_itm {
|
||||
public Gfo_qarg_itm(byte[] key_bry, byte[] val_bry) {this.key_bry = key_bry; this.val_bry = val_bry;}
|
||||
public byte[] Key_bry() {return key_bry;} private byte[] key_bry;
|
||||
public byte[] Val_bry() {return val_bry;} private byte[] val_bry;
|
||||
public Gfo_qarg_itm Val_bry_(byte[] v) {val_bry = v; return this;}
|
||||
public Gfo_qarg_itm(byte[] key_bry, byte[] val_bry) {
|
||||
this.key_bry = key_bry;
|
||||
this.val_bry = val_bry;
|
||||
}
|
||||
public byte[] Key_bry() {return key_bry;} private final byte[] key_bry;
|
||||
public byte[] Val_bry() {return val_bry;} private byte[] val_bry;
|
||||
public void Val_bry_(byte[] v) {val_bry = v;}
|
||||
|
||||
public static final Gfo_qarg_itm[] Ary_empty = new Gfo_qarg_itm[0];
|
||||
public static Gfo_qarg_itm new_key_(String key) {return new Gfo_qarg_itm(Bry_.new_u8(key), Bry_.Empty);}
|
||||
public static Gfo_qarg_itm[] Ary(String... kvs) {
|
||||
int len = kvs.length;
|
||||
Gfo_qarg_itm[] rv = new Gfo_qarg_itm[len / 2];
|
||||
String key = null;
|
||||
for (int i = 0; i < len; ++i) {
|
||||
String s = kvs[i];
|
||||
if (i % 2 == 0)
|
||||
key = s;
|
||||
else
|
||||
rv[i / 2] = new Gfo_qarg_itm(Bry_.new_u8(key), Bry_.new_u8(s));
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static String To_str(Gfo_qarg_itm[] ary) {
|
||||
int len = ary.length;
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Gfo_qarg_itm itm = ary[i];
|
||||
bfr.Add(itm.Key_bry()).Add_byte_eq();
|
||||
if (itm.Val_bry() != null)
|
||||
bfr.Add(itm.Val_bry());
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
return bfr.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user