1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2014-06-30 00:04:32 -04:00
parent 85594d3cdd
commit bae88e739c
2482 changed files with 198730 additions and 0 deletions

View File

@@ -0,0 +1,104 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xoa_url {
public Gfo_url_arg[] Args() {return args;} public Xoa_url Args_(Gfo_url_arg[] v) {args = v; return this;} Gfo_url_arg[] args = Gfo_url_arg.Ary_empty;
public byte[] Raw() {return raw;} public Xoa_url Raw_(byte[] v) {raw = v; return this;} private byte[] raw = Bry_.Empty;
public boolean Protocol_is_relative() {return protocol_is_relative;} public Xoa_url Protocol_is_relative_(boolean v) {protocol_is_relative = v; return this;} private boolean protocol_is_relative;
public byte Protocol_tid() {return protocol_tid;} public Xoa_url Protocol_tid_(byte v) {protocol_tid = v; return this;} private byte protocol_tid;
public byte[] Protocol_bry() {return protocol_bry;} public Xoa_url Protocol_bry_(byte[] v) {protocol_bry = v; return this;} private byte[] protocol_bry;
public byte[] Lang_bry() {return lang_bry;} public Xoa_url Lang_bry_(byte[] v) {lang_bry = v; return this;} private byte[] lang_bry;
public byte[] Wiki_bry() {return wiki_bry;} public Xoa_url Wiki_bry_(byte[] v) {wiki_bry = v; return this;} private byte[] wiki_bry;
public Xow_wiki Wiki() {return wiki;} public Xoa_url Wiki_(Xow_wiki v) {wiki = v; return this;} private Xow_wiki wiki;
public byte[] Page_bry() {return page_bry;} public Xoa_url Page_bry_(byte[] v) {page_bry = v; return this;} private byte[] page_bry;
int Page_bgn(int raw_len) {
int wiki_pos = Bry_finder.Find_fwd(raw, Xoh_href_parser.Href_wiki_bry, 0, raw_len); // look for /wiki/
return wiki_pos == Bry_.NotFound ? Bry_.NotFound : wiki_pos + Xoh_href_parser.Href_wiki_bry.length;
}
public byte[] Page_for_lnki() {
int raw_len = raw.length;
int page_bgn = Page_bgn(raw_len);
if (page_bgn == Bry_.NotFound) // no /wiki/ found; return page
return page_bry == null ? Bry_.Empty : page_bry; // guard against null ref
else
return Bry_.Mid(raw, page_bgn, raw_len);// else take everything after "/wiki/";
}
public String X_to_full_str() {return String_.new_utf8_(this.X_to_full());}
public byte[] X_to_full() {return wiki_bry == null ? page_bry : Bry_.Add(wiki_bry, Xoa_consts.Url_wiki_intermediary, page_bry);}
public String X_to_full_str_safe() {try {return X_to_full_str();} catch (Exception e) {return gplx.Err_.Message_gplx_brief(e);}}
public byte[] Anchor_bry() {return anchor_bry;} public Xoa_url Anchor_bry_(byte[] v) {anchor_bry = v; return this;} private byte[] anchor_bry = null;
public byte[] Use_lang() {return use_lang;} public Xoa_url Use_lang_(byte[] v) {use_lang = v; return this;} private byte[] use_lang;
public boolean Redirect_force() {return redirect_force;} public Xoa_url Redirect_force_(boolean v) {redirect_force = v; return this;} private boolean redirect_force;
public boolean Search_fulltext() {return search_fulltext;} public Xoa_url Search_fulltext_(boolean v) {search_fulltext = v; return this;} private boolean search_fulltext;
public boolean Action_is_edit() {return action_is_edit;} public Xoa_url Action_is_edit_(boolean v) {action_is_edit = v; return this;} private boolean action_is_edit;
public byte Err() {return err;} public Xoa_url Err_(byte v) {err = v; return this;} private byte err;
public byte[][] Segs_ary() {return segs_ary;} public Xoa_url Segs_ary_(byte[][] v) {segs_ary = v; return this;} private byte[][] segs_ary;
public boolean Eq_page(Xoa_url comp) {return Bry_.Eq(wiki_bry, comp.wiki_bry) && Bry_.Eq(page_bry, comp.page_bry) && redirect_force == comp.Redirect_force();}
public void Init(byte[] raw) {
this.raw = raw;
segs_ary = null;
lang_bry = wiki_bry = page_bry = anchor_bry = use_lang = null;
err = 0;
protocol_is_relative = false;
redirect_force = false;
action_is_edit = false;
}
public boolean Args_exists(byte[] key, byte[] val) {
int args_len = args.length;
for (int i = 0; i < args_len; i++) {
Gfo_url_arg arg = args[i];
if ( Bry_.Eq(arg.Key_bry(), key)
&& Bry_.Eq(arg.Val_bry(), val))
return true;
}
return false;
}
public void Args_fill(OrderedHash trg_args) {
int trg_len = trg_args.Count();
for (int i = 0; i < trg_len; i++) {
Gfo_url_arg trg_arg = (Gfo_url_arg)trg_args.FetchAt(i);
trg_arg.Val_bry_(null);
}
int src_len = args.length;
for (int i = 0; i < src_len; i++) {
Gfo_url_arg src_arg = args[i];
Gfo_url_arg trg_arg = (Gfo_url_arg)trg_args.Fetch(src_arg.Key_bry());
if (trg_arg != null) trg_arg.Val_bry_(src_arg.Val_bry());
}
}
public byte[] Args_all_as_bry() {
int args_len = args.length;
if (args_len == 0) return Bry_.Empty;
Bry_bfr bfr = Bry_bfr.new_();
for (int i = 0; i < args_len; i++) {
Gfo_url_arg arg = args[i];
bfr.Add_byte(i == 0 ? Byte_ascii.Question : Byte_ascii.Amp);
bfr.Add(arg.Key_bry());
bfr.Add_byte(Byte_ascii.Eq);
bfr.Add(arg.Val_bry());
}
return bfr.XtoAryAndClear();
}
public String Anchor_str() {return anchor_bry == null ? null : String_.new_utf8_(anchor_bry);}
public static Xoa_url new_(byte[] wiki, byte[] page) {
Xoa_url rv = new Xoa_url();
rv.Wiki_bry_(wiki);
rv.Page_bry_(page);
return rv;
}
}

View File

@@ -0,0 +1,74 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xoa_url_arg_hash {
OrderedHash hash = OrderedHash_.new_bry_();
public Gfo_url_arg Get_arg(byte[] key) {return (Gfo_url_arg)hash.Fetch(key);}
public int Get_val_int_or(byte[] key, int or) {
byte[] val_bry = Get_val_bry_or(key, null); if (val_bry == null) return or;
return Bry_.X_to_int_or(val_bry, or);
}
public byte[] Get_val_bry_or(byte[] key, byte[] or) {
Gfo_url_arg arg = (Gfo_url_arg)hash.Fetch(key);
return arg == null ? or : arg.Val_bry();
}
public String Get_val_str_or(byte[] key, String or) {
Gfo_url_arg arg = (Gfo_url_arg)hash.Fetch(key);
return arg == null ? or : String_.new_utf8_(arg.Val_bry());
}
public void Set_val_by_int(byte[] key, int val) {Set_val_by_bry(key, Bry_.new_ascii_(Int_.XtoStr(val)));}
public void Set_val_by_bry(byte[] key, byte[] val) {
Gfo_url_arg arg = (Gfo_url_arg)hash.Fetch(key);
if (arg == null) {
arg = new Gfo_url_arg(key, Bry_.Empty);
hash.Add(key, arg);
}
arg.Val_bry_(val);
}
public byte[] Concat(Bry_bfr bfr, byte[]... ary) {
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
byte[] key = ary[i];
Gfo_url_arg itm = Get_arg(key); if (itm == null) continue;
bfr.Add_byte(Byte_ascii.Amp).Add(itm.Key_bry()).Add_byte(Byte_ascii.Eq).Add(itm.Val_bry());
}
return bfr.XtoAryAndClear();
}
public Xoa_url_arg_hash Load(Xoa_url url) {
hash.Clear();
Gfo_url_arg[] ary = url.Args();
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
Gfo_url_arg itm = ary[i];
hash.Add(itm.Key_bry(), itm);
}
return this;
}
public void Save(Xoa_url url) {
Gfo_url_arg[] ary = (Gfo_url_arg[])hash.XtoAry(Gfo_url_arg.class);
url.Args_(ary);
}
public static void Concat_bfr(Bry_bfr bfr, Gfo_url_arg[] ary) {
int ary_len = ary.length;
for (int i = 0; i < ary_len; i++) {
Gfo_url_arg itm = ary[i];
bfr.Add_byte(i == 0 ? Byte_ascii.Question : Byte_ascii.Amp);
bfr.Add(itm.Key_bry()).Add_byte(Byte_ascii.Eq).Add(itm.Val_bry());
}
}
}

View File

@@ -0,0 +1,288 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.xowa.wikis.*; import gplx.xowa.net.*;
public class Xoa_url_parser {
private Url_encoder encoder = Url_encoder.new_html_href_mw_().Itms_raw_same_many(Byte_ascii.Underline); private Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
public Gfo_url_parser Url_parser() {return url_parser;} private Gfo_url_parser url_parser = new Gfo_url_parser(); private Gfo_url gfo_url = new Gfo_url();
public String Build_str(Xoa_url url) { // transform to "canonical" form that fits url box for both XOWA and Mozilla Firefox
tmp_bfr.Add(url.Wiki_bry()); // add wiki; EX: "en.wikipedia.org"
tmp_bfr.Add(Xoh_href_parser.Href_wiki_bry); // add "/wiki/" EX: "/wiki/"
tmp_bfr.Add(encoder.Decode(url.Page_bry())); // add page; EX: "A"
int args_len = url.Args().length;
if (args_len > 0) {
for (int i = 0; i < args_len; i++) {
byte dlm = i == 0 ? Byte_ascii.Question : Byte_ascii.Amp;
tmp_bfr.Add_byte(dlm);
Gfo_url_arg arg = url.Args()[i];
tmp_bfr.Add(arg.Key_bry()).Add_byte(Byte_ascii.Eq).Add(arg.Val_bry());
}
}
if (url.Anchor_bry() != null)
tmp_bfr.Add_byte(Byte_ascii.Hash).Add(url.Anchor_bry()); // add anchor; EX: "#B"
return tmp_bfr.XtoStrAndClear();
}
public boolean Parse(Xoa_url url, byte[] src, int bgn, int end) {return Parse(url, Bry_.Mid(src, bgn, end));}
public boolean Parse(Xoa_url url, byte[] src) {
url.Init(src); // NOTE: need to call init to clear state; Xoa_url is often reused
src = encoder.Decode(src); // decode any url-encoded parameters
int src_len = src.length;
url_parser.Parse(gfo_url, src, 0, src_len); // parse protocol
byte protocol_tid = gfo_url.Protocol_tid();
url.Protocol_tid_(gfo_url.Protocol_tid()); // NOTE: set protocol early b/c file may exit below; DATE:2014-04-25
url.Protocol_bry_(gfo_url.Protocol_bry());
if (protocol_tid == Xoo_protocol_itm.Tid_file && src_len > 5 && src[5] != Byte_ascii.Slash) { // file ns; EX: "File:A.png"; NOTE: for file:A.png, assume "file" refers to wiki_ns (File:), not protocol; hackish as it relies on looking for / after "file:" to distinguish between MW "File:A.png" and file system "file:///C/A.png"
url.Raw_(src);
url.Wiki_bry_(gfo_url.Raw());
return false;
}
url.Protocol_is_relative_(gfo_url.Protocol_is_relative());
url.Err_(gfo_url.Err());
url.Raw_(src);
if (gfo_url.Site() != null && Bry_.Eq(gfo_url.Site(), Bry_upload_wikimedia_org)) { // handle urls like "http://upload.wikimedia.org/wikipedia/commons/a/ab/C.svg"
byte[][] segs_ary = gfo_url.Segs();
byte[] domain_bry = segs_ary[0]; // type seems to be the 1st seg ; EX: "/wikipedia/"
byte[] sub_bry = segs_ary[1]; // lang/type seems to be 2nd seg; EX: "en", "fr"; "commons"
byte[] lang_bry = sub_bry;
if (upload_segs_hash.Has(sub_bry)) { // wikimedia links will have fmt of "/wikipedia/commons"; must change to wikimedia
domain_bry = Xow_wiki_domain_.Seg_wikimedia_bry;
lang_bry = Xol_lang_itm_.Key__unknown;
}
tmp_bfr.Clear().Add(sub_bry).Add_byte(Byte_ascii.Dot) // add lang/type + .; EX: "en."; "fr."; "commons."
.Add(domain_bry).Add(Bry_dot_org); // add type + .org; EX: "wikipedia.org"; "wikimedia.org";
url.Segs_ary_(Xoa_url_parser.Bry_wiki_name_bry); // NOTE: add "wiki" as seg else will have "/site/commons.wikimedia.org/File:A" which will be invalid (needs to be "/site/commons.wikimedia.org/wiki/File:A")
url.Lang_bry_(lang_bry);
url.Wiki_bry_(tmp_bfr.XtoAryAndClear());
byte[][] segs = gfo_url.Segs();
byte[] page_bry = segs.length > 5 && Bry_.Eq(segs[2], Xof_url_bldr.Bry_thumb) ? segs[5] : gfo_url.Page();
url.Page_bry_(tmp_bfr.Add(Bry_file).Add(page_bry).XtoAryAndClear());
url.Anchor_bry_(Bry_.Empty);
}
else {
url.Segs_ary_(gfo_url.Segs());
url.Lang_bry_(gfo_url.Site_sub());
url.Wiki_bry_(gfo_url.Site());
url.Page_bry_(gfo_url.Page());
url.Anchor_bry_(gfo_url.Anchor());
}
Gfo_url_arg[] args = gfo_url.Args(); // parse args
int args_len = args.length;
boolean args_is_invalid = false;
for (int i = 0; i < args_len; i++) {
Gfo_url_arg arg = args[i];
byte[] key = arg.Key_bry();
if (Bry_.Len_eq_0(key)) {
args_is_invalid = true;
break;
}
Object o = qry_args_hash.Get_by_bry(key);
if (o != null) {
Byte_obj_val id = (Byte_obj_val)o;
switch (id.Val()) {
case Id_arg_redirect: url.Redirect_force_(true); break;
case Id_arg_uselang: url.Use_lang_(arg.Val_bry()); break;
case Id_arg_action: if (Bry_.Eq(arg.Val_bry(), Bry_arg_action_edit)) url.Action_is_edit_(true); break;
case Id_arg_title: url.Page_bry_(arg.Val_bry()); url.Segs_ary_(Segs_ary_remove_w(url.Segs_ary())); break; // handle /w/index.php?title=Earth
case Id_arg_fulltext: url.Search_fulltext_(true); break;
}
}
}
if (args_is_invalid) {
byte[] raw_bry = gfo_url.Raw();
byte[] args_bry = Bry_.Mid(raw_bry, gfo_url.Args_bgn(), raw_bry.length);
byte[] anchor_bry = url.Anchor_bry();
if (anchor_bry == null) // no anchor; set page to rest of url
url.Page_bry_(Bry_.Add(url.Page_bry(), args_bry));
else
url.Anchor_bry_(Bry_.Add(url.Anchor_bry(), args_bry));
}
else
url.Args_(args);
return url.Err() == Gfo_url.Err_none;
}
private static byte[][] Segs_ary_remove_w(byte[][] ary) {
int len = ary.length;
if (len != 1) return ary; // expecting only "w"
byte[] last = ary[0];
return last.length == 1 && last[0] == Byte_ascii.Ltr_w // last is not "w"
? Bry_.Ary_empty
: ary
;
}
public static Xoa_url Parse_url(Xoa_app app, Xow_wiki cur_wiki, String raw) {Xoa_url rv = new Xoa_url(); byte[] raw_bry = Bry_.new_utf8_(raw); return Parse_url(rv, app, cur_wiki, raw_bry, 0, raw_bry.length, false);}
public static Xoa_url Parse_url(Xoa_url rv, Xoa_app app, Xow_wiki cur_wiki, byte[] raw, int bgn, int end, boolean from_url_bar) {
Xow_wiki wiki = null; Bry_bfr_mkr bfr_mkr = app.Utl_bry_bfr_mkr();
byte[] cur_wiki_key = cur_wiki.Domain_bry();
byte[] page_bry = Bry_.Empty;
boolean page_is_main_page = false;
if (app.Url_parser().Parse(rv, raw, bgn, end)) { // parse passed; url has protocol; take Page; EX: "http://en.wikipedia.org/wiki/Earth"
wiki = Parse_url__wiki(app, rv.Wiki_bry());
if (rv.Segs_ary().length == 0 && rv.Page_bry() != null && Bry_.Eq(rv.Page_bry(), Xoa_url_parser.Bry_wiki_name)) // wiki, but directly after site; EX:en.wikipedia.org/wiki
page_is_main_page = true;
else
page_bry = Parse_url__combine(bfr_mkr, null, rv.Segs_ary(), rv.Page_bry()); // NOTE: pass null in for wiki b/c wiki has value, but should not be used for Page
}
else { // parse failed; url doesn't have protocol
byte[] wiki_bry = rv.Wiki_bry();
if (Bry_.Len_gt_0(wiki_bry)) { // NOTE: wiki_bry null when passing in Category:A from home_wiki
Xow_xwiki_itm xwiki_itm = app.User().Wiki().Xwiki_mgr().Get_by_key(wiki_bry); // see if url.Wiki_bry is actually wiki;
if ( xwiki_itm != null // null-check
&& !xwiki_itm.Type_is_lang(cur_wiki.Lang().Lang_id())) // in xwiki, but not lang; EX: "fr.wikipedia.org" vs "fr"; ca.s:So/Natura_del_so; DATE:2014-04-26
wiki = app.Wiki_mgr().Get_by_key_or_make(xwiki_itm.Domain());
}
if (rv.Page_bry() == null) { // 1 seg; EX: "Earth"; "fr.wikipedia.org"
if (wiki != null) { // wiki_bry is known wiki; EX: "fr.wikipedia.org"
wiki = app.Wiki_mgr().Get_by_key_or_make(wiki_bry); // call get again, but this time "make" it
page_is_main_page = true;
}
else { // otherwise, assume page name
wiki = Parse_url__wiki(app, cur_wiki_key);
page_bry = wiki_bry;
}
}
else { // 2+ segs
if (wiki != null) { // valid wiki; handle en.wikisource.org/Hamlet and en.wikisource.org/Hamlet/Act I
if (rv.Segs_ary().length == 0 && Bry_.Eq(rv.Page_bry(), Xoa_url_parser.Bry_wiki_name))
page_is_main_page = true;
else
page_bry = Parse_url__combine(bfr_mkr, Xoa_page_.Main_page_bry_empty, rv.Segs_ary(), rv.Page_bry());
}
else { // invalid wiki; assume cur_wiki; EX: Hamlet/Act I
page_bry = rv.Page_bry();
byte[][] segs_ary = rv.Segs_ary();
if (segs_ary.length > 0)
page_bry = segs_ary[0];
int colon_pos = Bry_finder.Find_fwd(page_bry, Byte_ascii.Colon); // check for alias; EX: w:Earth
boolean xwiki_set = false;
if (colon_pos != Bry_.NotFound) { // alias found
Xow_xwiki_itm xwiki = cur_wiki.Xwiki_mgr().Get_by_mid(page_bry, 0, colon_pos);
if (xwiki != null) {
wiki = app.Wiki_mgr().Get_by_key_or_make(xwiki.Domain());
page_bry = Bry_.Mid(page_bry, colon_pos + 1, page_bry.length);
if (rv.Segs_ary().length == 0) // handle xwiki without segs; EX: commons:Commons:Media_of_the_day; DATE:2014-02-19
rv.Segs_ary_(new byte[][] {Bry_wiki_name, page_bry}); // create segs of "/wiki/Page"
else {
rv.Segs_ary()[0] = page_bry;
page_bry = Parse_url__combine(bfr_mkr, rv.Wiki_bry(), rv.Segs_ary(), rv.Page_bry());
}
xwiki_set = true;
}
}
if (!xwiki_set) {
wiki = Parse_url__wiki(app, cur_wiki_key);
page_bry = Parse_url__combine(bfr_mkr, rv.Wiki_bry(), rv.Segs_ary(), rv.Page_bry());
}
}
}
}
if (page_is_main_page) { // Main_Page requested; EX: "zh.wikipedia.org"; "zh.wikipedia.org/wiki/"; DATE:2014-02-16
if (from_url_bar) {
wiki.Init_assert(); // NOTE: must call Init_assert to load Main_Page; only call if from url_bar, else all sister wikis will be loaded when parsing Sister_wikis panel
page_bry = wiki.Props().Main_page();
}
else
page_bry = Xoa_page_.Main_page_bry_empty;
}
if (rv.Anchor_bry() != null) {
byte[] anchor_bry = app.Url_converter_id().Encode(rv.Anchor_bry()); // reencode for anchors (which use . encoding, not % encoding); EX.WP: Enlightenment_Spain#Enlightened_despotism_.281759%E2%80%931788.29
rv.Anchor_bry_(anchor_bry);
}
Xoa_ttl ttl = Xoa_ttl.parse_(wiki, page_bry);
if (ttl != null) { // can still be empty; EX: "en.wikipedia.org"
Xow_xwiki_itm lang_xwiki = ttl.Wik_itm();
if (lang_xwiki != null && lang_xwiki.Type_is_lang(wiki.Lang().Lang_id())) { // format of http://en.wikipedia.org/wiki/fr:A
wiki = app.Wiki_mgr().Get_by_key_or_make(lang_xwiki.Domain());
page_bry = ttl.Page_txt();
}
}
rv.Wiki_(wiki);
rv.Wiki_bry_(wiki.Domain_bry());
rv.Page_bry_(page_bry);
return rv;
}
private static Xow_wiki Parse_url__wiki(Xoa_app app, byte[] key) {
Xow_wiki rv = null;
Xow_xwiki_itm xwiki = app.User().Wiki().Xwiki_mgr().Get_by_key(key);
if (xwiki == null)
rv = app.User().Wiki();
else
rv = app.Wiki_mgr().Get_by_key_or_make(xwiki.Domain());
return rv;
}
private static byte[] Parse_url__combine(Bry_bfr_mkr bry_bfr_mkr, byte[] wiki, byte[][] segs, byte[] page) {
Bry_bfr bfr = bry_bfr_mkr.Get_b512();
if (wiki != null) bfr.Add(wiki);
if (segs != null) {
int segs_len = segs.length;
for (int i = 0; i < segs_len; i++) {
byte[] seg = segs[i];
if (i == 0 && Bry_.Eq(seg, Xoa_url_parser.Bry_wiki_name)) continue;
if (bfr.Len() > 0) bfr.Add_byte(Byte_ascii.Slash);
bfr.Add(seg);
}
}
if (page != null) {
if (bfr.Len() > 0) bfr.Add_byte(Byte_ascii.Slash);
bfr.Add(page);
}
return bfr.Mkr_rls().XtoAryAndClear();
}
public static Xoa_url Parse_from_url_bar(Xoa_app app, Xow_wiki wiki, String s) {
byte[] bry = Bry_.new_utf8_(s);
bry = Parse_from_url_bar__strip_mobile(bry);
byte[] fmt = app.Gui_mgr().Url_macro_mgr().Fmt_or_null(bry);
if (fmt != null) bry = fmt;
Xoa_url rv = new Xoa_url();
Xoa_url_parser.Parse_url(rv, app, wiki, bry, 0, bry.length, true);
if (app.Wiki_mgr().Wiki_regy().Url_is_invalid_domain(rv)) { // handle lang_code entered; EX: "war" should redirect to "war" article in current wiki, not war.wikipedia.org; DATE:2014-02-07
rv.Page_bry_(rv.Wiki_bry());
rv.Wiki_(wiki);
rv.Wiki_bry_(wiki.Domain_bry());
}
return rv;
}
private static byte[] Parse_from_url_bar__strip_mobile(byte[] v) {// DATE:2014-05-03
int pos = Bry_finder.Find_fwd(v, Byte_ascii.Dot);
if ( pos == Bry_finder.Not_found // no dot; EX: "A"
|| pos + 2 >= v.length // not enough space for .m.; EX: "A.b"
)
return v;
switch (v[pos + 1]) { // check for m
case Byte_ascii.Ltr_M:
case Byte_ascii.Ltr_m:
break;
default:
return v;
}
if (v[pos + 2] != Byte_ascii.Dot) return v;
return Bry_.Add(Bry_.Mid(v, 0, pos), Bry_.Mid(v, pos + 2)); // skip ".m"
}
// private static final byte Tid_xowa = (byte)Gfo_url_parser.Protocol_file_tid + 1;
private static final byte Id_arg_redirect = 0, Id_arg_uselang = 1, Id_arg_title = 2, Id_arg_action = 3, Id_arg_fulltext = 4;
private static final byte[] Bry_arg_redirect = Bry_.new_ascii_("redirect"), Bry_arg_uselang = Bry_.new_ascii_("uselang"), Bry_arg_title = Bry_.new_ascii_("title"), Bry_arg_fulltext = Bry_.new_ascii_("fulltext");
private static final byte[] Bry_upload_wikimedia_org = Bry_.new_ascii_("upload.wikimedia.org"), Bry_dot_org = Bry_.new_ascii_(".org")
, Bry_file = Bry_.new_ascii_("File:"); // NOTE: File does not need i18n; is a canonical namespace
public static final byte[] Bry_wiki_name = Bry_.new_ascii_("wiki");
private static final byte[][] Bry_wiki_name_bry = new byte[][] {Bry_wiki_name};
public static final byte[] Bry_arg_action_eq_edit = Bry_.new_ascii_("action=edit")
, Bry_arg_action = Bry_.new_ascii_("action")
, Bry_arg_action_edit = Bry_.new_ascii_("edit")
;
private static final Hash_adp_bry qry_args_hash = Hash_adp_bry.ci_().Add_bry_byte(Bry_arg_redirect, Id_arg_redirect).Add_bry_byte(Bry_arg_uselang, Id_arg_uselang).Add_bry_byte(Bry_arg_title, Id_arg_title).Add_bry_byte(Bry_arg_action, Id_arg_action).Add_bry_byte(Bry_arg_fulltext, Id_arg_fulltext);
private static final Hash_adp_bry upload_segs_hash = Hash_adp_bry.ci_().Add_bry_bry(Xow_wiki_domain_.Key_commons_bry);//.Add_bry_bry(Xow_wiki_domain_.Key_species_bry).Add_bry_bry(Xow_wiki_domain_.Key_meta_bry);
}

View File

@@ -0,0 +1,169 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xoa_url_parser_basic_tst {
@Before public void init() {fxt.Reset();} private Xoa_url_parser_chkr fxt = new Xoa_url_parser_chkr();
@Test public void Basic() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Test_parse_w_wiki("en.wikipedia.org/wiki/A");
}
@Test public void Abrv() { // deprecate; no longer needed with shortcuts
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Test_parse_w_wiki("en.wikipedia.org/A");
}
@Test public void Http_basic() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Test_parse_w_wiki("http://en.wikipedia.org/wiki/A");
}
@Test public void Relative() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Test_parse_w_wiki("//en.wikipedia.org/wiki/A");
}
@Test public void Name() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Test_parse_w_wiki("A");
}
@Test public void Sub_1() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A/b").Test_parse_w_wiki("A/b");
}
@Test public void Sub_2() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A/b/c").Test_parse_w_wiki("A/b/c");
}
@Test public void Sub_3() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A/b").Test_parse_w_wiki("en.wikipedia.org/wiki/A/b");
}
@Test public void Ns_category() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("Category:A").Test_parse_w_wiki("Category:A");
}
@Test public void Ns_file() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("File:A").Test_parse_w_wiki("File:A");
}
@Test public void Anchor() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Expd_anchor("b").Test_parse_w_wiki("A#b");
}
@Test public void Upload() {
fxt.App().User().Wiki().Xwiki_mgr().Add_full("commons.wikimedia.org", "commons.wikimedia.org");
fxt.Reset().Expd_wiki("commons.wikimedia.org").Expd_page("File:C.svg").Test_parse_w_wiki("http://upload.wikimedia.org/wikipedia/commons/a/ab/C.svg");
fxt.Reset().Expd_wiki("commons.wikimedia.org").Expd_page("File:A.png").Test_parse_w_wiki("http://upload.wikimedia.org/wikipedia/commons/thumb/7/70/A.png/220px-A.png");
}
@Test public void Parse_lang() {
Xow_xwiki_mgr xwiki_mgr = fxt.Wiki().Xwiki_mgr();
xwiki_mgr.Add_full(Bry_.new_ascii_("fr"), Bry_.new_ascii_("fr.wikipedia.org"), Bry_.new_ascii_("http://fr.wikipedia.org/~{0}"));
fxt.Expd_wiki("fr.wikipedia.org").Expd_page("A").Test_parse_w_wiki("http://en.wikipedia.org/wiki/fr:A");
}
@Test public void Alias_wiki() {
Xow_xwiki_mgr xwiki_mgr = fxt.Wiki().Xwiki_mgr();
xwiki_mgr.Add_full(Bry_.new_ascii_("s"), Bry_.new_ascii_("en.wikisource.org"));
fxt.Expd_wiki("en.wikisource.org").Expd_page("A/b/c").Test_parse_w_wiki("s:A/b/c");
}
@Test public void Xwiki_no_segs() { // PURPOSE: handle xwiki without full url; EX: "commons:Commons:Media_of_the_day"; DATE:2014-02-19
Xow_xwiki_mgr xwiki_mgr = fxt.Wiki().Xwiki_mgr();
xwiki_mgr.Add_full(Bry_.new_ascii_("s"), Bry_.new_ascii_("en.wikisource.org"));
fxt.Expd_wiki("en.wikisource.org").Expd_page("Project:A").Test_parse_w_wiki("s:Project:A");
}
@Test public void Domain_only() {
fxt.App().User().Wiki().Xwiki_mgr().Add_full("fr.wikipedia.org", "fr.wikipedia.org");
fxt.Expd_wiki("fr.wikipedia.org").Expd_page("").Test_parse_w_wiki("fr.wikipedia.org");
}
@Test public void Domain_and_wiki() {
fxt.App().User().Wiki().Xwiki_mgr().Add_full("fr.wikipedia.org", "fr.wikipedia.org");
fxt.Expd_wiki("fr.wikipedia.org").Expd_page("").Test_parse_w_wiki("fr.wikipedia.org/wiki");
}
@Test public void Domain_and_wiki_w_http() {
fxt.App().User().Wiki().Xwiki_mgr().Add_full("fr.wikipedia.org", "fr.wikipedia.org");
fxt.Expd_wiki("fr.wikipedia.org").Expd_page("").Test_parse_w_wiki("http://fr.wikipedia.org/wiki");
}
@Test public void Redirect() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Test_parse_w_wiki("A?redirect=no");
}
@Test public void Namespace_in_different_wiki() { // PURPOSE.fix: namespaced titles would default to default_wiki instead of current_wiki
fxt.Expd_wiki("en.wikisource.org").Expd_page("Category:A").Test_parse_w_wiki(fxt.Wiki_wikisource(), "Category:A");
}
@Test public void Action_is_edit() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Expd_action_is_edit_y().Test_parse_w_wiki("A?action=edit");
}
@Test public void Assert_state_cleared() { // PURPOSE.fix: action_is_edit (et. al.) was not being cleared on parse even though Xoa_url reused; DATE:20121231
Xoa_url url = new Xoa_url();
byte[] raw = Bry_.new_ascii_("A?action=edit");
Xoa_url_parser.Parse_url(url, fxt.App(), fxt.Wiki(), raw, 0, raw.length, false);
Tfds.Eq(true, url.Action_is_edit());
raw = Bry_.new_ascii_("B");
Xoa_url_parser.Parse_url(url, fxt.App(), fxt.Wiki(), raw, 0, raw.length, false);
Tfds.Eq(false, url.Action_is_edit());
}
@Test public void Query_arg() { // PURPOSE.fix: query args were not printing out
Xoa_url url = new Xoa_url();
byte[] raw = Bry_.new_ascii_("en.wikipedia.org/wiki/Special:Search/Earth?fulltext=yes");
Xoa_url_parser.Parse_url(url, fxt.App(), fxt.Wiki(), raw, 0, raw.length, false);
Xoa_url_parser parser = new Xoa_url_parser();
Tfds.Eq("en.wikipedia.org/wiki/Special:Search/Earth?fulltext=yes", parser.Build_str(url));
}
@Test public void Anchor_with_slash() { // PURPOSE: A/b#c/d was not parsing correctly
fxt.Expd_page("A/b").Expd_anchor("c.2Fd").Test_parse_w_wiki("A/b#c/d");
}
@Test public void Slash() {
fxt.Reset().Expd_wiki("en.wikipedia.org").Expd_page("/A").Test_parse_w_wiki("en.wikipedia.org/wiki//A");
fxt.Reset().Expd_wiki("en.wikipedia.org").Expd_page("A//b").Test_parse_w_wiki("en.wikipedia.org/wiki/A//b");
fxt.Reset().Expd_wiki("en.wikipedia.org").Expd_page("//A").Test_parse_w_wiki("en.wikipedia.org/wiki///A");
}
@Test public void Question_is_page() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A?B").Expd_anchor(null).Test_parse_w_wiki("A?B");
}
@Test public void Question_is_anchor() {
fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Expd_anchor("b.3Fc").Test_parse_w_wiki("A#b?c");
}
}
class Xoa_url_parser_chkr implements Tst_chkr {
public Xoa_url_parser_chkr Reset() {
if (app == null) {
app = Xoa_app_fxt.app_();
wiki = Xoa_app_fxt.wiki_(app, "en.wikipedia.org");
wiki_wikisource = Xoa_app_fxt.wiki_(app, "en.wikisource.org");
app.User().Wiki().Xwiki_mgr().Add_full("en.wikipedia.org", "en.wikipedia.org");
app.User().Wiki().Xwiki_mgr().Add_full("en.wikisource.org", "en.wikisource.org");
}
expd_wiki_str = expd_page = expd_anchor = null;
expd_anchor_is_edit = Bool_.__byte;
return this;
}
public Xoa_app App() {return app;} private Xoa_app app;
public Xow_wiki Wiki() {return wiki;} private Xow_wiki wiki;
public Xow_wiki Wiki_wikisource() {return wiki_wikisource;} private Xow_wiki wiki_wikisource;
public Class<?> TypeOf() {return Xoa_url.class;}
public Xoa_url_parser_chkr Expd_wiki(String v) {this.expd_wiki_str = v; return this;} private String expd_wiki_str;
public Xoa_url_parser_chkr Expd_page(String v) {this.expd_page = v; return this;} private String expd_page;
public Xoa_url_parser_chkr Expd_anchor(String v) {this.expd_anchor = v; return this;} private String expd_anchor;
public Xoa_url_parser_chkr Expd_action_is_edit_y() {this.expd_anchor_is_edit = Bool_.Y_byte; return this;} private byte expd_anchor_is_edit = Bool_.__byte;
public Xoa_url_parser_chkr Expd_action_is_edit_n() {this.expd_anchor_is_edit = Bool_.N_byte; return this;}
public int Chk(Tst_mgr mgr, String path, Object actl_obj) {
Xoa_url actl = (Xoa_url)actl_obj;
int rv = 0;
rv += mgr.Tst_val(expd_wiki_str == null, path, "wiki", expd_wiki_str, String_.new_utf8_(actl.Wiki_bry()));
rv += mgr.Tst_val(expd_page == null, path, "page", expd_page, String_.new_utf8_(actl.Page_bry()));
rv += mgr.Tst_val(expd_anchor == null, path, "anchor", expd_anchor, String_.new_utf8_(actl.Anchor_bry()));
rv += mgr.Tst_val(expd_anchor_is_edit == Bool_.__byte, path, "anchor_is_edit", expd_anchor_is_edit == Bool_.Y_byte, actl.Action_is_edit());
return rv;
}
public Xoa_url_parser_chkr Test_parse_from_url_bar(String raw, String expd) {
Xoa_url actl_url = Xoa_url_parser.Parse_from_url_bar(app, wiki, raw);
Tfds.Eq(expd, String_.new_ascii_(actl_url.X_to_full()));
return this;
}
public void Test_parse_w_wiki(String raw) {Test_parse_w_wiki(wiki, raw);}
public void Test_parse_w_wiki(Xow_wiki w, String raw) {
Xoa_url url = Xoa_url_parser.Parse_url(app, w, raw);
Tst_mgr tst_mgr = new Tst_mgr();
tst_mgr.Tst_obj(this, url);
}
}

View File

@@ -0,0 +1,25 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xoa_url_parser_mw_links_tst {
@Before public void init() {fxt.Reset();} private Xoa_url_parser_chkr fxt = new Xoa_url_parser_chkr();
@Test public void Title_remove_w() { // PURPOSE: fix /w/ showing up as seg; DATE:2014-05-30
fxt.Expd_page("A").Expd_wiki("en.wikipedia.org").Test_parse_w_wiki("http://en.wikipedia.org/w/index.php?title=A");
}
}

View File

@@ -0,0 +1,57 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xoa_url_parser_url_bar_tst {
@Before public void init() {fxt.Reset();} private Xoa_url_parser_chkr fxt = new Xoa_url_parser_chkr();
@Test public void Basic() {
fxt.Test_parse_from_url_bar("Page_1" , "en.wikipedia.org/wiki/Page_1"); // basic
}
@Test public void Lang() {
fxt.App().User().Wiki().Xwiki_mgr().Add_full("uk", "uk.wikipedia.org");
fxt.Test_parse_from_url_bar("uk" , "en.wikipedia.org/wiki/uk"); // lang-like page (uk=Ukraine) should not try to open wiki; DATE:2014-02-07
}
@Test public void Lang_like() {
fxt.App().User().Wiki().Xwiki_mgr().Add_full(Bry_.new_ascii_("uk"), Bry_.new_ascii_("uk.wikipedia.org"), Bry_.new_ascii_("http://~{1}.wikipedia.org")); // NOTE: fmt needed for Type_is_lang
fxt.Test_parse_from_url_bar("uk/A" , "en.wikipedia.org/wiki/uk/A"); // uk/A should not try be interpreted as wiki="uk" page="A"; DATE:2014-04-26
}
@Test public void Macro() {
fxt.App().User().Wiki().Xwiki_mgr().Add_full("fr.wikisource.org", "fr.wikisource.org");
fxt.Test_parse_from_url_bar("fr.s:Auteur:Shakespeare" , "fr.wikisource.org/wiki/Auteur:Shakespeare"); // url_macros
}
@Test public void Home() {
fxt.Test_parse_from_url_bar("home" , "en.wikipedia.org/wiki/home"); // home should go to current wiki's home; DATE:2014-02-09
fxt.Test_parse_from_url_bar("home/wiki/Main_Page" , "home/wiki/Main_Page"); // home Main_Page should go to home; DATE:2014-02-09
}
@Test public void Custom() {
fxt.App().User().Wiki().Xwiki_mgr().Add_full("zh.wikipedia.org", "zh.wikipedia.org");
gplx.xowa.wikis.Xoa_wiki_regy.Make_wiki_dir(fxt.App(), "zh.wikipedia.org");
fxt.App().Wiki_mgr().Get_by_key_or_make(Bry_.new_ascii_("zh.wikipedia.org")).Props().Main_page_(Bry_.new_ascii_("Zh_Main_Page"));
fxt.Test_parse_from_url_bar("zh.w:" , "zh.wikipedia.org/wiki/Zh_Main_Page");
fxt.Test_parse_from_url_bar("zh.w:Main_Page" , "zh.wikipedia.org/wiki/Main_Page");
}
@Test public void Mobile() { // PURPOSE: handle mobile links; DATE:2014-05-03
fxt.Test_parse_from_url_bar("en.m.wikipedia.org/wiki/A" , "en.wikipedia.org/wiki/A"); // basic
fxt.Test_parse_from_url_bar("en.M.wikipedia.org/wiki/A" , "en.wikipedia.org/wiki/A"); // upper
fxt.Test_parse_from_url_bar("A" , "en.wikipedia.org/wiki/A"); // bounds-check: 0
fxt.Test_parse_from_url_bar("A." , "en.wikipedia.org/wiki/A."); // bounds-check: 1
fxt.Test_parse_from_url_bar("A.b" , "en.wikipedia.org/wiki/A.b"); // bounds-check: 2
fxt.Test_parse_from_url_bar("A.b.m." , "en.wikipedia.org/wiki/A.b.m."); // false-match
fxt.Test_parse_from_url_bar("en.x.wikipedia.org/wiki/A" , "en.wikipedia.org/wiki/en.x.wikipedia.org/A"); // fail
}
}

View File

@@ -0,0 +1,32 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xoa_url_tst {
Xoa_url_fxt fxt = new Xoa_url_fxt();
@Before public void init() {fxt.Clear();}
@Test public void Eq_page() {
fxt.Eq_page_tst(fxt.url_("en.wikipedia.org", "Earth", false), fxt.url_("en.wikipedia.org", "Earth", false), true);
fxt.Eq_page_tst(fxt.url_("en.wikipedia.org", "Earth", false), fxt.url_("en.wikipedia.org", "Earth", true ), false);
}
}
class Xoa_url_fxt {
public void Clear() {}
public Xoa_url url_(String wiki_str, String page_str, boolean redirect_force) {return new Xoa_url().Wiki_bry_(Bry_.new_utf8_(wiki_str)).Page_bry_(Bry_.new_utf8_(page_str)).Redirect_force_(redirect_force);}
public void Eq_page_tst(Xoa_url lhs, Xoa_url rhs, boolean expd) {Tfds.Eq(expd, lhs.Eq_page(rhs));}
}

View File

@@ -0,0 +1,70 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xoh_href {
public byte[] Raw() {return raw;} public Xoh_href Raw_(byte[] v) {raw = v; return this;} private byte[] raw;
public byte[] Wiki() {return wiki;} public Xoh_href Wiki_(byte[] v) {wiki = v; return this;} private byte[] wiki;
public byte[] Page() {return page;} public Xoh_href Page_(byte[] v) {page = v; return this;} private byte[] page;
public byte[] Anchor() {return anchor;} public Xoh_href Anchor_(byte[] v) {anchor = v; return this;} private byte[] anchor;
public byte Tid() {return tid;} public Xoh_href Tid_(byte v) {tid = v; return this;} private byte tid;
public byte Protocol_tid() {return protocol_tid;} private byte protocol_tid;
public void Init(byte[] raw, byte protocol_tid) {
this.raw = raw; this.protocol_tid = protocol_tid;
wiki = page = anchor = null;
tid = Tid_null;
}
public void Print_to_bfr(Bry_bfr bfr, boolean full) { // currently used for status bar (not embedded in any html)
switch (tid) {
case Xoh_href.Tid_http: case Xoh_href.Tid_file: // full protocol; embed all; EX: "http://en.wikipedia.org/wiki/A"; "file:///C/dir/file.txt"
bfr.Add(raw);
break;
case Xoh_href.Tid_xowa:
bfr.Add(page);
break;
default:
if (full) { // "full" can be copied and pasted into firefox url bar
switch (tid) {
case Xoh_href.Tid_wiki: case Xoh_href.Tid_site: case Xoh_href.Tid_anchor:
bfr.Add(wiki); // add wiki_key; EX: "en.wikipedia.org"
bfr.Add(Xoh_href_parser.Href_wiki_bry); // add wiki_str; EX: "/wiki/"
bfr.Add(page); // add page; EX: "A"
if (anchor != null)
bfr.Add_byte(Byte_ascii.Hash).Add(anchor); // add anchor; EX: "#B"
break;
}
}
else {
switch (tid) {
case Xoh_href.Tid_site:
bfr.Add(wiki).Add_byte(Byte_ascii.Slash); // add wiki_key; EX: "en.wikipedia.org/"
bfr.Add(page); // add page; EX: "A"
break;
case Xoh_href.Tid_wiki:
bfr.Add(page); // add page; EX: "A"
break;
case Xoh_href.Tid_anchor: // anchor to be added below
break;
}
if (anchor != null)
bfr.Add_byte(Byte_ascii.Hash).Add(anchor); // add anchor; EX: "#B"
}
break;
}
}
public static final byte Tid_null = 0, Tid_http = 1, Tid_file = 2, Tid_wiki = 3, Tid_site = 4, Tid_xcmd = 5, Tid_anchor = 6, Tid_xowa = 7;
}

View File

@@ -0,0 +1,235 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.xowa.net.*; import gplx.xowa.parsers.lnkes.*;
public class Xoh_href_parser {
private Url_encoder encoder; private Gfo_url_parser url_parser; private Gfo_url tmp_url = new Gfo_url();
private ByteTrieMgr_slim segs = ByteTrieMgr_slim.ci_ascii_(); // NOTE:ci.ascii:XO_const.en; /wiki/, /site/ etc.
private Bry_bfr bfr_encoder = Bry_bfr.reset_(255), tmp_bfr = Bry_bfr.reset_(255);
public Xoh_href_parser(Url_encoder encoder, Gfo_url_parser url_parser) {
this.encoder = encoder;
this.url_parser = url_parser;
url_parser.Init_protocol(Protocol_xowa_tid, Xop_lnke_wkr.Str_xowa_protocol);
segs.Add_stubs(Seg__ary);
}
public void Parse(Xoh_href rv, String raw, Xow_wiki wiki, byte[] cur_page) {Parse(rv, Bry_.new_utf8_(raw), wiki, cur_page);}
public void Parse(Xoh_href rv, byte[] raw, Xow_wiki wiki, byte[] cur_page) {
int bgn = 0, raw_len = raw.length; int file_slash_end = 0;
url_parser.Parse(tmp_url, raw, 0, raw_len); // parse as regular tmp_url to get protocol
rv.Init(raw, tmp_url.Protocol_tid());
switch (tmp_url.Protocol_tid()) {
default: // tmp_url is known protocol ("http:", "ftp:", etc); use it and exit; do not do any substitutions EX: http://en.wikipedia.org
rv.Tid_(Xoh_href.Tid_http);
return;
case Xoo_protocol_itm.Tid_null: // unknown protocol ("unknown:A")or protocol-less ("A"); could be wiki-title or file-name; fall through to below
break;
case Xoo_protocol_itm.Tid_file: // tmp_url is "file:"; remove it; NOTE: swt/mozilla automatically prepends "file://" to any protocol-less links; see NOTE_1 below
int file_proto_len = tmp_url.Protocol_bry().length;
bgn = file_slash_end = Bry_.While_fwd(raw, Byte_ascii.Slash, file_proto_len, raw_len);
if (file_slash_end - file_proto_len > 0) --bgn; // if at least 1 slash, include slash; this ensures that all strings which have "file://" stripped will start with a "/"; EX: file:///wiki -> "/wiki"; file://C -> "/C"
break;
case Xoo_protocol_itm.Tid_xowa:
bgn = file_slash_end = Bry_.While_fwd(raw, Byte_ascii.Slash, tmp_url.Protocol_bry().length, raw_len);
rv.Tid_(Xoh_href.Tid_xowa);
rv.Wiki_(wiki.Domain_bry()); // wiki is always the current wiki
byte[] page = wiki.App().Url_converter_gfs().Decode(Bry_.Mid(raw, bgn, raw_len));
rv.Page_(page); // page is everything after "/xcmd/"; individual cmds will do further parsing; note that it should be decoded; EX: %20 -> " "; also note that anchor (#) or query params (?) are not parsed; the entire String will be reparsed later
return;
}
if (file_slash_end < raw_len && raw[file_slash_end] == Byte_ascii.Hash) { // 1st character is anchor; extract and return
rv.Tid_(Xoh_href.Tid_anchor);
rv.Wiki_(wiki.Domain_bry()); // wiki is always current
rv.Page_(cur_page); // page is always current
rv.Anchor_(Bry_.Mid(raw, file_slash_end + 1, raw_len)); // +1 to skip #; i.e. Anchor should be "A" not "#A"
return;
}
Object seg_obj = segs.MatchAtCur(raw, bgn, raw_len); // match /wiki/ or /site/ or /xcmd/
if (seg_obj == null) // nothing matched; assume file; EX: file:///C/dir/fil.txt -> /C/dir/fil.txt
rv.Tid_(Xoh_href.Tid_file);
else { // something matched;
ByteTrie_stub seg = (ByteTrie_stub)seg_obj;
bgn += seg.Val().length;
switch (seg.Tid()) {
case Seg_wiki_tid: Parse_wiki(rv, encoder, wiki, raw, bgn, raw_len); break;
case Seg_site_tid: Parse_site(rv, encoder, wiki, raw, bgn, raw_len); break;
case Seg_xcmd_tid: Parse_xcmd(rv, encoder, wiki, raw, bgn, raw_len); break;
}
}
}
public byte[] Build_to_bry(Xow_wiki wiki, Xoa_ttl ttl) {Build_to_bfr(tmp_bfr, wiki, ttl, Bool_.N); return tmp_bfr.XtoAryAndClear();}
public void Build_to_bfr(Bry_bfr bfr, Xow_wiki wiki, byte[] raw) {Build_to_bfr(bfr, wiki, Xoa_ttl.parse_(wiki, raw), Bool_.N);}
public void Build_to_bfr(Bry_bfr bfr, Xow_wiki wiki, Xoa_ttl ttl) {Build_to_bfr(bfr, wiki, ttl, Bool_.N);}
public void Build_to_bfr(Bry_bfr bfr, Xow_wiki wiki, Xoa_ttl ttl, boolean force_site) {
byte[] page = ttl.Full_txt_raw();
Xow_xwiki_itm xwiki = ttl.Wik_itm();
if (xwiki == null) { // not an xwiki; EX: [[wikt:Word]]
if (ttl.Leaf_bgn() != Bry_.NotFound) { // NOTE: this is strange logic to handle urls of for form [[../a]]; need to revisit why (a) only Raw has resolved title and (b) why it's not encoded
encoder.Encode(bfr_encoder, ttl.Raw());
}
else { // regular page; encode;
Build_to_bfr_page(ttl, page, 0);
}
}
else { // xwiki; skip wiki and encode page only;
byte[] wik_txt = ttl.Wik_txt();
Build_to_bfr_page(ttl, page, wik_txt.length + 1);
// encoder.Encode(bfr_encoder, page, wik_txt.length + 1, page.length);
}
if (xwiki == null) { // not an xwiki
if (ttl.Anch_bgn() != 1) { // not an anchor-only; EX: "#A"
if (force_site) { // popup parser always writes as "/site/"
bfr.Add(Href_site_bry); // add "/site/"; EX: /site/
bfr.Add(wiki.Domain_bry()); // add xwiki; EX: en_dict
bfr.Add(Href_wiki_bry); // add "/wiki/"; EX: /wiki/
}
else
bfr.Add(Href_wiki_bry); // add "/wiki/"; EX: /wiki/Page
}
else {} // anchor: noop
}
else { // xwiki
if (wiki.App().Xwiki_missing(xwiki.Domain())) { // xwiki is not offline; use http:
bfr.Add(Href_http_bry); // add "http://"; EX: http://
bfr.Add(xwiki.Domain()); // add xwiki; EX: en_dict
bfr.Add(Href_wiki_bry); // add "/wiki/"; EX: /wiki/
}
else { // xwiki is avaiable; use /site/
bfr.Add(Href_site_bry); // add "/site/"; EX: /site/
bfr.Add(xwiki.Domain()); // add xwiki; EX: en_dict
bfr.Add(Href_wiki_bry); // add "/wiki/"; EX: /wiki/
}
}
bfr.Add_bfr_and_clear(bfr_encoder);
}
private void Build_to_bfr_page(Xoa_ttl ttl, byte[] ttl_full, int page_bgn) {
int anch_bgn = Bry_finder.Find_fwd(ttl_full, Byte_ascii.Hash); // NOTE: cannot use Anch_bgn b/c Anch_bgn has bug with whitespace
if (anch_bgn == Bry_.NotFound) // no anchor; just add page
encoder.Encode(bfr_encoder, ttl_full, page_bgn, ttl_full.length);
else { // anchor exists; check if anchor is preceded by ws; EX: [[A #b]] -> "/wiki/A#b"
int page_end = Bry_finder.Find_bwd_last_ws(ttl_full, anch_bgn); // first 1st ws before #; handles multiple ws
page_end = page_end == Bry_.NotFound ? anch_bgn : page_end; // if ws not found, use # pos; else use 1st ws pos
encoder.Encode(bfr_encoder, ttl_full, page_bgn, page_end); // add page
encoder.Encode(bfr_encoder, ttl_full, anch_bgn, ttl_full.length); // add anchor
}
}
public static final String Href_http_str = "http://", Href_file_str = "file:///", Href_wiki_str = "/wiki/", Href_site_str = "/site/", Href_xcmd_str = "/xcmd/";
public static final byte[] Href_http_bry = Bry_.new_utf8_(Href_http_str), Href_file_bry = Bry_.new_ascii_(Href_file_str), Href_site_bry = Bry_.new_ascii_(Href_site_str), Href_wiki_bry = Bry_.new_ascii_(Href_wiki_str);
private static final int Href_wiki_len = Href_wiki_bry.length;
static final byte Seg_null_tid = 0, Seg_wiki_tid = 1, Seg_site_tid = 2, Seg_xcmd_tid = 3;
private static final byte[] Seg_null_bry = Bry_.new_ascii_("/null/"), Seg_wiki_bry = Bry_.new_ascii_(Href_wiki_str), Seg_site_bry = Bry_.new_ascii_(Href_site_str), Seg_xcmd_bry = Bry_.new_ascii_(Href_xcmd_str);
private static final byte[][] Seg__ary = new byte[][] {Seg_null_bry, Seg_wiki_bry, Seg_site_bry, Seg_xcmd_bry};
private static void Parse_wiki(Xoh_href rv, Url_encoder encoder, Xow_wiki wiki, byte[] raw, int bgn, int len) {
byte[] ttl_raw = Bry_.Mid(raw, bgn, len);
Xoa_ttl ttl = Xoa_ttl.parse_(wiki, ttl_raw);
if (ttl == null) {
wiki.App().Gui_wtr().Warn_many("xowa.href.parser", "invalid_wiki", "wiki href does not have valid title: ~{0}", String_.new_utf8_(raw, bgn, len));
return;
}
if (ttl.Wik_itm() == null) { // standard href; EX: "/wiki/A"
rv.Tid_(Xoh_href.Tid_wiki);
rv.Wiki_(wiki.Domain_bry()); // wiki is always the current wiki
}
else { // embedded xwiki prefix; EX: "/wiki/fr:A"
rv.Tid_(Xoh_href.Tid_site);
rv.Wiki_(ttl.Wik_itm().Domain()); // wiki is the xwiki prefix; EX: "en.wikpedia.org//wiki/fr:A" -> "fr.wikpedia.org/wiki/A"
}
byte[] page_bry = encoder.Decode(ttl.Full_txt()); // note that Full is everything except for ns, so it handles "fr:A" ("fr:" being treated as ns, so only "A" will be Full_txt)
if (Bry_.Len_eq_0(page_bry)) // handle xwiki hrefs like "fr:"; EX: "/wiki/wikipedia:" on en.wikisource.org/Main Page
page_bry = Xoa_page_.Main_page_bry_empty;
// if (ttl.Qarg_bgn() != Bry_.NotFound)
// rv.Qarg_(ttl.Qarg_txt());
rv.Page_(page_bry); // add page; note that it should be decoded; EX: %20 -> " "; also note that anchor (#) or query params (?) are not parsed; the entire String will be reparsed later
if (ttl.Anch_bgn() != Bry_.NotFound) rv.Anchor_(ttl.Anch_txt());
}
private static void Parse_site(Xoh_href rv, Url_encoder encoder, Xow_wiki wiki, byte[] raw, int bgn, int len) { // /site/; EX: /site/fr.wikipedia.org/wiki/A
int slash = Bry_finder.Find_fwd(raw, Byte_ascii.Slash, bgn, len); if (slash == Bry_.NotFound) throw Err_mgr._.fmt_("xowa.href.parser", "invalid_site", "site href is missing slash: ~{0}", String_.new_utf8_(raw, bgn, len));
rv.Tid_(Xoh_href.Tid_site);
byte[] wiki_bry = Bry_.Mid(raw, bgn, slash); // wiki is text between "/site/" and next "/"
Xow_xwiki_itm xwiki = wiki.App().User().Wiki().Xwiki_mgr().Get_by_key(wiki_bry); // NOTE: site may refer to alias in user_wiki; ex: /site/wikisource.org which points to en.wikisource.org; this occurs during lnke substitution; EX: [//wikisource.org Wikisource]
if (xwiki != null) {
wiki_bry = xwiki.Domain();
wiki = wiki.App().Wiki_mgr().Get_by_key_or_make(wiki_bry); // NOTE: xwiki links should use case_match of xwiki (en.wiktionary.org) not cur_wiki (en.wikipedia.org); EX:w:alphabet
}
rv.Wiki_(wiki_bry);
int page_pos = slash + Href_wiki_len;
byte[] page_bry = page_pos < len
? Bry_.Mid(raw, page_pos, len) // page is text after next "/" + "/wiki/";
: Bry_.Empty;
if (Bry_.Len_eq_0(page_bry)) // handle "/site/fr.wikipedia.org/wiki/"; note that these are generated by [[fr:]]
page_bry = wiki.Props().Main_page(); // default to Main Page
// int qarg_pos = Bry_finder.Find_bwd(page_bry, Byte_ascii.Question);
// byte[] qarg_bry = null;
// if (qarg_pos != Bry_.NotFound) {
// qarg_bry = Bry_.Mid(page_bry, qarg_pos + 1, page_bry.length);
// rv.Qarg_(qarg_bry);
// page_bry = Bry_.Mid(page_bry, 0, qarg_pos);
// }
Parse_ttl_and_resolve_xwiki(wiki.App().Usr_dlg(), rv, wiki, encoder, page_bry, raw, bgn, len);
}
private static void Parse_ttl_and_resolve_xwiki(Gfo_usr_dlg usr_dlg, Xoh_href rv, Xow_wiki wiki, Url_encoder encoder, byte[] page_bry, byte[] raw, int bgn, int len) {
Xoa_ttl ttl = Xoa_ttl.parse_(wiki, page_bry);
if (ttl == null) {
usr_dlg.Warn_many("xowa.href.parser", "invalid_wiki", "wiki href does not have valid title: ~{0}", String_.new_utf8_(raw, bgn, len));
rv.Page_(Bry_.Empty);
return;
}
if (ttl.Wik_itm() != null) { // page_bry has xwiki; EX: "wikt:A"; note that since this is called by "/site/", there may be two xwikis; EX: "w:wikt:"; Note that more than 2 is not being handled
wiki = wiki.App().Wiki_mgr().Get_by_key_or_make(ttl.Wik_itm().Domain());
rv.Wiki_(wiki.Domain_bry());
if (Bry_.Len_eq_0(ttl.Page_txt())) // page_bry is just alias; EX: "wikt:"
page_bry = wiki.Props().Main_page();
else
page_bry = ttl.Page_txt();
ttl = Xoa_ttl.parse_(wiki, page_bry); if (ttl == null) throw Err_mgr._.fmt_("xowa.href.parser", "invalid_wiki", "wiki href does not have valid title: ~{0}", String_.new_utf8_(raw, bgn, len));
}
rv.Page_(encoder.Decode(ttl.Full_txt())); // add page; note that it should be decoded; EX: %20 -> " "; also note that anchor (#) or query params (?) are not parsed; the entire String will be reparsed later
if (ttl.Anch_bgn() != Bry_.NotFound) // add anchor if it exists
rv.Anchor_(ttl.Anch_txt());
}
private static void Parse_xcmd(Xoh_href rv, Url_encoder encoder, Xow_wiki wiki, byte[] raw, int bgn, int len) { // /xcmd/; note encoder is passed, but don't decode for now; most invk commands have an _ which will get changed to a " ";
rv.Tid_(Xoh_href.Tid_xcmd);
rv.Wiki_(wiki.Domain_bry()); // wiki is always the current wiki
rv.Page_(Bry_.Mid(raw, bgn, len)); // page is everything after "/xcmd/"; individual cmds will do further parsing; note that it should be decoded; EX: %20 -> " "; also note that anchor (#) or query params (?) are not parsed; the entire String will be reparsed later
}
private static final byte Protocol_xowa_tid = Xoo_protocol_itm.Tid_xowa;
}
/*
NOTE_1:
. swt/mozilla treats text differently in href="{text}" when content_editable=n; occurs in LocationListener.changing
http://a.org -> http://a.org does nothing
A -> file:///A adds "file:///"
/wiki/A -> file:///wiki/A adds "file://"
Category:A -> Category:A noops; Category is assumed to be protocol?
//en.wiktionary.org/wiki/a -> file:///wiki/a strips out site name and prepends "file://"; no idea why
. so, to handle the above, the code does the following
http://a.org -> http://a.org does nothing; nothing needed
A -> /wiki/A always prepend /wiki/
Category:A -> /wiki/Category:A always prepend /wiki/
//en.wiktionary.org/wiki/A -> /site/en.wiktionary.org/wiki/A always transform relative url to /site/
. the href will still come here as file:///wiki/A or file:///site/en.wiktionary.org/wiki/A.
. however, the file:// can be lopped off and discarded and the rest of the href will fall into one of the following cases
.. /wiki/
.. /site/
.. /xcmd/
.. #
.. anything else -> assume to be really a file:// url; EX: file://C/dir/fil.txt -> C/dir/fil.txt
. the other advantage of this approach is that this proc can be reused outside of swt calls; i.e.: it can parse both "file:///wiki/A" and "/wiki/A"
*/

View File

@@ -0,0 +1,240 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
import gplx.xowa.net.*;
public class Xoh_href_parser_tst {
@Before public void init() {fxt.Clear();} private Xoh_href_parser_fxt fxt = new Xoh_href_parser_fxt();
@Test public void Parse_full_wiki() {
fxt .Prep_raw_("file:///wiki/A")
.Expd_tid_(Xoh_href.Tid_wiki)
.Expd_full_("en.wikipedia.org/wiki/A")
.Expd_wiki_("en.wikipedia.org")
.Expd_page_("A")
.Test_parse();
}
@Test public void Parse_full_http() {
fxt .Prep_raw_("http://a.org/b")
.Expd_tid_(Xoh_href.Tid_http)
.Expd_full_("http://a.org/b")
.Test_parse();
}
@Test public void Parse_full_file() {
fxt .Prep_raw_("file:///C/xowa/file/a.png")
.Expd_tid_(Xoh_href.Tid_file)
.Expd_full_("file:///C/xowa/file/a.png")
.Test_parse();
}
@Test public void Parse_full_anchor_only() {
fxt .Prep_raw_("#a")
.Expd_tid_(Xoh_href.Tid_anchor)
.Expd_full_("en.wikipedia.org/wiki/Page 1#a")
.Expd_anch_("a")
.Test_parse();
}
@Test public void Parse_full_anchor_w_page() {
fxt .Prep_raw_("file:///wiki/A#b")
.Expd_tid_(Xoh_href.Tid_wiki)
.Expd_full_("en.wikipedia.org/wiki/A#b")
.Expd_anch_("b")
.Test_parse();
}
@Test public void Parse_full_xwiki() {
fxt .Prep_raw_("file:///site/en.wikt.org/wiki/Page")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikt.org/wiki/Page")
.Expd_page_("Page")
.Test_parse();
}
@Test public void Parse_full_xwiki_domain_only() {
fxt .Prep_raw_("/wiki/wikt:")
.Init_xwiki_alias("wikt", "en.wiktionary.org")
.Expd_full_("en.wiktionary.org/wiki/")
.Expd_page_("")
.Test_parse();
}
@Test public void Parse_full_wiki_page() {
fxt .Prep_raw_("/wiki/A")
.Expd_tid_(Xoh_href.Tid_wiki)
.Expd_full_("en.wikipedia.org/wiki/A")
.Expd_page_("A")
.Test_parse();
}
@Test public void Parse_empty_is_main_page() { // PURPOSE: w/ slash; "wiki/"
fxt .Prep_raw_("/site/en.wikipedia.org/wiki/")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikipedia.org/wiki/Main Page")
.Expd_page_("Main Page")
.Test_parse();
}
@Test public void Parse_empty_is_main_page_2() { // PURPOSE: wo slash; "wiki"
fxt .Prep_raw_("/site/en.wikipedia.org/wiki")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikipedia.org/wiki/Main Page")
.Expd_page_("Main Page")
.Test_parse();
}
@Test public void Parse_site_page() {
fxt .Prep_raw_("/site/en.wikt.org/wiki/A")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikt.org/wiki/A")
.Expd_page_("A")
.Test_parse();
}
@Test public void Parse_site_ns_case() {
fxt .Prep_raw_("/site/en.wikt.org/wiki/file:A")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikt.org/wiki/File:A")
.Expd_page_("File:A")
.Test_parse();
}
@Test public void Parse_site_page__invalid_ttl_shouldnt_fail() { // PURPOSE: invalid title shouldn't fail; EX: A{{B}} is invalid (b/c of braces);
fxt .Prep_raw_("/site/en.wikt.org/wiki/A{{B}}")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikt.org/wiki/")
.Expd_page_("")
.Test_parse();
}
@Test public void Parse_xcmd_edit() {
fxt .Prep_raw_("/xcmd/page_edit")
.Expd_tid_(Xoh_href.Tid_xcmd)
.Expd_full_("")
.Expd_page_("page_edit")
.Test_parse();
}
@Test public void Parse_xowa() {
fxt .Prep_raw_("xowa-cmd:a%22b*c")
.Expd_tid_(Xoh_href.Tid_xowa)
.Expd_full_("a\"b*c")
.Expd_page_("a\"b*c")
.Test_parse();
}
@Test public void Parse_edit_wiki_quote() {
fxt .Prep_raw_("/wiki/A%22b%22c")
.Expd_tid_(Xoh_href.Tid_wiki)
.Expd_full_("en.wikipedia.org/wiki/A\"b\"c")
.Expd_page_("A\"b\"c")
.Test_parse();
}
@Test public void Parse_brief_wiki() {fxt.Init_hover_full_n_().Test_parse("file:///wiki/A" , "A");}
@Test public void Parse_brief_http() {fxt.Init_hover_full_n_().Test_parse("http://a.org/b" , "http://a.org/b");}
@Test public void Parse_brief_file() {fxt.Init_hover_full_n_().Test_parse("file:///C/xowa/file/a.png" , "file:///C/xowa/file/a.png");}
@Test public void Parse_brief_anchor() {fxt.Init_hover_full_n_().Test_parse("#a" , "#a");}
@Test public void Parse_brief_anchor_file() {fxt.Init_hover_full_n_().Test_parse("file:///#a" , "#a");}
@Test public void Parse_brief_xwiki() {fxt.Init_hover_full_n_().Test_parse("file:///site/en.wikt.org/wiki/Page" , "en.wikt.org/Page");}
@Test public void Parse_brief_xwiki_2() {fxt.Init_hover_full_n_().Expd_page_("a").Test_parse("/wiki/wikt:a" , "en.wiktionary.org/a");}
@Test public void Parse_brief_error() {fxt.Init_hover_full_n_().Test_parse("file:///wiki/{{{extlink}}}" , "");} // {{{extlink}}} not a valid title; return empty
// @Test public void Parse_site_qarg() {fxt.Prep_raw_("/site/en.wikt.org/wiki/A?action=edit").Expd_tid_(Xoh_href.Tid_site).Expd_full_("en.wikt.org/wiki/A").Expd_page_("A").Expd_qarg_("action=edit").Test_parse();}
// @Test public void Parse_wiki_qarg() {fxt.Prep_raw_("/wiki/A?action=edit").Expd_tid_(Xoh_href.Tid_wiki).Expd_full_("en.wikipedia.org/wiki/A").Expd_page_("A").Expd_qarg_("action=edit").Test_parse();}
//@Test public void Parse_site_anchor() {fxt.Prep_raw_("/site/en.wikt.org/wiki/A#b_c" ).Expd_tid_(Xoh_href.Tid_site).Expd_full_("en.wikt.org/wiki/A#b_c").Expd_page_("A").Expd_anch_("b_c").Test_parse();}
@Test public void Build_xwiki_enc() {fxt.Test_build("wikt:abc?d" , "/site/en.wiktionary.org/wiki/abc%3Fd");}
@Test public void Build_page_quote() {fxt.Test_build("a\"b\"c" , "/wiki/A%22b%22c");}
@Test public void Build_page() {fxt.Test_build("abc" , "/wiki/Abc");}
@Test public void Build_page_ns() {fxt.Test_build("Image:A.png" , "/wiki/Image:A.png");}
@Test public void Build_anchor() {fxt.Test_build("#abc" , "#abc");}
@Test public void Build_page_anchor() {fxt.Test_build("Abc#def" , "/wiki/Abc#def");}
@Test public void Build_xwiki() {fxt.Test_build("wikt:abc" , "/site/en.wiktionary.org/wiki/abc");} // NOTE: "abc" not capitalized, b/c other wiki's case sensitivity is not known; this emulates WP's behavior
@Test public void Build_xwiki_2() {fxt.Test_build("wikt:Special:Search/a" , "/site/en.wiktionary.org/wiki/Special:Search/a");}
@Test public void Build_category() {fxt.Test_build("Category:abc" , "/wiki/Category:Abc");}
@Test public void Parse_site_user_wiki() {// PURPOSE: outlier for wikisource.org which is alias to en.wikisource.org; alias added in user_wiki; EX: [//wikisource.org a]; in browser, automatically goes to http://wikisource.org; in xowa, should go to /site/en.wikisource.org
fxt .Prep_raw_("/site/en_wiki_alias/wiki/")
.Init_xwiki_alias("en_wiki_alias", "en.wikipedia.org")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikipedia.org/wiki/Main Page")
.Expd_page_("Main Page")
.Test_parse();
}
@Test public void Parse_xwiki_cases_correctly() { // PURPOSE: xwiki links should use case_match of xwiki (en.wiktionary.org) not cur_wiki (en.wikipedia.org); EX:w:Alphabet
fxt .Prep_raw_("/site/en.wiktionary.org/wiki/alphabet")
.Init_xwiki_alias("en.wiktionary.org", "en.wiktionary.org");
Xow_wiki en_wiktionary_org = fxt.App().Wiki_mgr().Get_by_key_or_make(Bry_.new_ascii_("en.wiktionary.org"));
en_wiktionary_org.Ns_mgr().Ns_main().Case_match_(Xow_ns_case_.Id_all);
fxt .Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wiktionary.org/wiki/alphabet")
.Expd_page_("alphabet")
.Test_parse();
}
@Test public void Parse_xwiki_compound() { // PURPOSE: [[[w:wikt:]] not handled; DATE:2013-07-25
fxt .Prep_raw_("/site/en.wikipedia.org/wiki/wikt:")
.Init_xwiki_alias("wikt:", "en.wiktionary.org")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wiktionary.org/wiki/Main Page")
.Expd_page_("Main Page")
.Test_parse();
}
@Test public void Parse_protocol() { // PURPOSE: check that urls with form of "ftp://" return back Tid_ftp; DATE:2014-04-25
fxt .Test_parse_protocol("ftp://a.org", Xoo_protocol_itm.Tid_ftp);
}
// @Test public void Parse_question_ttl() {fxt.Prep_raw_("/wiki/%3F").Expd_tid_(Xoh_href.Tid_wiki).Expd_full_("en.wikipedia.org/wiki/?").Expd_page_("?").Test_parse();}
// @Test public void Parse_question_w_arg() {fxt.Prep_raw_("/wiki/A%3F?action=edit").Expd_tid_(Xoh_href.Tid_wiki).Expd_full_("en.wikipedia.org/wiki/A??action=edit").Expd_page_("A??action=edit").Test_parse();}
}
class Xoh_href_parser_fxt {
private Xow_wiki wiki; private Xoh_href_parser href_parser; private Bry_bfr tmp_bfr = Bry_bfr.reset_(255); private Xoh_href href = new Xoh_href();
private static final byte[] Page_1_ttl = Bry_.new_ascii_("Page 1");
public void Clear() {
expd_tid = Xoh_href.Tid_null;
prep_raw = expd_full = expd_wiki = expd_page = expd_anch = null;
if (app != null) return;
app = Xoa_app_fxt.app_();
wiki = Xoa_app_fxt.wiki_tst_(app);
wiki.Xwiki_mgr().Add_bulk(Bry_.new_ascii_("wikt|en.wiktionary.org"));
app.User().Wiki().Xwiki_mgr().Add_bulk(Bry_.new_ascii_("en.wiktionary.org|en.wiktionary.org"));
href_parser = new Xoh_href_parser(app.Url_converter_href(), app.Url_parser().Url_parser());
}
public Xoa_app App() {return app;} private Xoa_app app;
public Xoh_href_parser_fxt Init_xwiki_alias(String alias, String domain) {
app.User().Wiki().Xwiki_mgr().Add_full(alias, domain);
return this;
}
public Xoh_href_parser_fxt Init_hover_full_y_() {return Init_hover_full_(Bool_.Y);}
public Xoh_href_parser_fxt Init_hover_full_n_() {return Init_hover_full_(Bool_.N);}
public Xoh_href_parser_fxt Init_hover_full_(boolean v) {wiki.Gui_mgr().Cfg_browser().Link_hover_full_(v); return this;}
public Xoh_href_parser_fxt Prep_raw_(String v) {this.prep_raw = v; return this;} private String prep_raw;
public Xoh_href_parser_fxt Expd_tid_(byte v) {this.expd_tid = v; return this;} private byte expd_tid;
public Xoh_href_parser_fxt Expd_full_(String v) {this.expd_full = v; return this;} private String expd_full;
public Xoh_href_parser_fxt Expd_wiki_(String v) {this.expd_wiki = v; return this;} private String expd_wiki;
public Xoh_href_parser_fxt Expd_page_(String v) {this.expd_page = v; return this;} private String expd_page;
public Xoh_href_parser_fxt Expd_anch_(String v) {this.expd_anch = v; return this;} private String expd_anch;
public void Test_parse() {
href_parser.Parse(href, prep_raw, wiki, Page_1_ttl);
if (expd_tid != Xoh_href.Tid_null) Tfds.Eq(expd_tid, href.Tid());
if (expd_wiki != null) Tfds.Eq(expd_wiki, String_.new_utf8_(href.Wiki()));
if (expd_page != null) Tfds.Eq(expd_page, String_.new_utf8_(href.Page()));
if (expd_anch != null) Tfds.Eq(expd_anch, String_.new_utf8_(href.Anchor()));
if (expd_full != null) {
href.Print_to_bfr(tmp_bfr, true);
Tfds.Eq(expd_full, tmp_bfr.XtoStrAndClear());
}
}
public void Test_parse(String raw, String expd) {
href_parser.Parse(href, raw, wiki, Page_1_ttl);
href.Print_to_bfr(tmp_bfr, wiki.Gui_mgr().Cfg_browser().Link_hover_full());
Tfds.Eq(expd, tmp_bfr.XtoStrAndClear());
}
public void Test_build(String raw, String expd) {
Xoa_ttl ttl = Xoa_ttl.parse_(wiki, Bry_.new_utf8_(raw));
href_parser.Build_to_bfr(tmp_bfr, wiki, ttl);
Tfds.Eq(expd, tmp_bfr.XtoStrAndClear());
}
public void Test_parse_protocol(String raw, byte expd_tid) {
href_parser.Parse(href, raw, wiki, Page_1_ttl);
Tfds.Eq(expd_tid, href.Protocol_tid());
}
}