1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-07-05 22:30:38 -04:00
parent d858b74d64
commit 3ce2a441a6
107 changed files with 1810 additions and 334 deletions

View File

@@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.xowa.html.hrefs.*;
public class Xoa_url {
public byte[] Raw() {return raw;} public Xoa_url Raw_(byte[] v) {raw = v; return this;} private byte[] raw = Bry_.Empty;
public byte[] Wiki_bry() {return wiki_bry;} public Xoa_url Wiki_bry_(byte[] v) {wiki_bry = v; return this;} private byte[] wiki_bry;

View File

@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa; import gplx.*;
import gplx.core.primitives.*;
import gplx.xowa.langs.*; import gplx.xowa.wikis.*; import gplx.xowa.wikis.xwikis.*; import gplx.xowa.net.*; import gplx.xowa.files.*;
import gplx.xowa.html.hrefs.*;
public class Xoa_url_parser {
private final Url_encoder encoder = Url_encoder.new_html_href_mw_().Itms_raw_same_many(Byte_ascii.Underline); private final Bry_bfr tmp_bfr = Bry_bfr.reset_(255);
public Gfo_url_parser Url_parser() {return url_parser;} private Gfo_url_parser url_parser = new Gfo_url_parser(); private Gfo_url gfo_url = new Gfo_url();
@@ -205,8 +206,7 @@ public class Xoa_url_parser {
if (page_is_main_page) { // Main_Page requested; EX: "zh.wikipedia.org"; "zh.wikipedia.org/wiki/"; DATE:2014-02-16
if (from_url_bar) {
wiki.Init_by_wiki(); // NOTE: must call Init_assert to load Main_Page; only call if from url_bar, else all sister wikis will be loaded when parsing Sister_wikis panel
// page_bry = ((Xowe_wiki)wiki).Props().Main_page();
page_bry = wiki.Data__core_mgr().Mw_props().Main_page();
page_bry = wiki.Props().Main_page();
}
else
page_bry = Xoa_page_.Main_page_bry_empty;

View File

@@ -47,7 +47,7 @@ public class Xoa_url_parser_url_bar_tst {
Xowe_wiki zh_wiki = fxt.App().Wiki_mgr().Get_by_key_or_make(Bry_.new_a7("zh.wikipedia.org"));
Init_db(zh_wiki);
gplx.xowa.wikis.Xoa_wiki_regy.Make_wiki_dir(fxt.App(), "zh.wikipedia.org");
zh_wiki.Data__core_mgr().Mw_props().Main_page_(Bry_.new_a7("Zh_Main_Page"));
zh_wiki.Props().Main_page_(Bry_.new_a7("Zh_Main_Page"));
fxt.Test_parse_from_url_bar("zh.w:" , "zh.wikipedia.org/wiki/Zh_Main_Page");
fxt.Test_parse_from_url_bar("zh.w:Main_Page" , "zh.wikipedia.org/wiki/Main_Page");
}

View File

@@ -1,75 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xoh_href {
public byte[] Raw() {return raw;} public Xoh_href Raw_(byte[] v) {raw = v; return this;} private byte[] raw;
public byte[] Wiki() {return wiki;} public Xoh_href Wiki_(byte[] v) {wiki = v; return this;} private byte[] wiki;
public byte[] Page() {return page;} public Xoh_href Page_(byte[] v) {page = v; return this;} private byte[] page;
public byte[] Anchor() {return anchor;} public Xoh_href Anchor_(byte[] v) {anchor = v; return this;} private byte[] anchor;
public byte Tid() {return tid;} public Xoh_href Tid_(byte v) {tid = v; return this;} private byte tid;
public byte[] Page_and_anchor() {
if (Bry_.Len_eq_0(anchor)) return page;
else if (Bry_.Len_eq_0(page)) return anchor;
else return Bry_.Add_w_dlm(Byte_ascii.Hash, page, anchor);
}
public byte Protocol_tid() {return protocol_tid;} private byte protocol_tid;
public void Init(byte[] raw, byte protocol_tid) {
this.raw = raw; this.protocol_tid = protocol_tid;
wiki = page = anchor = null;
tid = Tid_null;
}
public void Print_to_bfr(Bry_bfr bfr, boolean full) { // currently used for status bar (not embedded in any html)
switch (tid) {
case Xoh_href.Tid_http: case Xoh_href.Tid_file: // full protocol; embed all; EX: "http://en.wikipedia.org/wiki/A"; "file:///C/dir/file.txt"
bfr.Add(raw);
break;
case Xoh_href.Tid_xowa:
bfr.Add(page);
break;
default:
if (full) { // "full" can be copied and pasted into firefox url bar
switch (tid) {
case Xoh_href.Tid_wiki: case Xoh_href.Tid_site: case Xoh_href.Tid_anchor:
bfr.Add(wiki); // add wiki_key; EX: "en.wikipedia.org"
bfr.Add(Xoh_href_parser.Href_wiki_bry); // add wiki_str; EX: "/wiki/"
bfr.Add(page); // add page; EX: "A"
if (anchor != null)
bfr.Add_byte(Byte_ascii.Hash).Add(anchor); // add anchor; EX: "#B"
break;
}
}
else {
switch (tid) {
case Xoh_href.Tid_site:
bfr.Add(wiki).Add_byte(Byte_ascii.Slash); // add wiki_key; EX: "en.wikipedia.org/"
bfr.Add(page); // add page; EX: "A"
break;
case Xoh_href.Tid_wiki:
bfr.Add(page); // add page; EX: "A"
break;
case Xoh_href.Tid_anchor: // anchor to be added below
break;
}
if (anchor != null)
bfr.Add_byte(Byte_ascii.Hash).Add(anchor); // add anchor; EX: "#B"
}
break;
}
}
public static final byte Tid_null = 0, Tid_http = 1, Tid_file = 2, Tid_wiki = 3, Tid_site = 4, Tid_xcmd = 5, Tid_anchor = 6, Tid_xowa = 7;
}

View File

@@ -1,243 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*; import gplx.xowa.wikis.xwikis.*; import gplx.xowa.net.*; import gplx.xowa.parsers.lnkes.*;
import gplx.xowa.wikis.*;
public class Xoh_href_parser {
private Gfo_url_parser url_parser; private Gfo_url tmp_url = new Gfo_url();
private Btrie_slim_mgr segs = Btrie_slim_mgr.ci_ascii_(); // NOTE:ci.ascii:XO_const.en; /wiki/, /site/ etc.
private Bry_bfr bfr_encoder = Bry_bfr.reset_(255), tmp_bfr = Bry_bfr.reset_(255);
public Xoh_href_parser(Url_encoder encoder, Gfo_url_parser url_parser) {
this.encoder = encoder;
this.url_parser = url_parser;
url_parser.Init_protocol(Protocol_xowa_tid, Xop_lnke_wkr.Str_xowa_protocol);
segs.Add_stubs(Seg__ary);
}
public Url_encoder Encoder() {return encoder;} private Url_encoder encoder;
public void Parse(Xoh_href rv, String raw, Xowe_wiki wiki, byte[] cur_page) {Parse(rv, Bry_.new_u8(raw), wiki, cur_page);}
public void Parse(Xoh_href rv, byte[] raw, Xowe_wiki wiki, byte[] cur_page) {
int bgn = 0, raw_len = raw.length; int file_slash_end = 0;
url_parser.Parse(tmp_url, raw, 0, raw_len); // parse as regular tmp_url to get protocol
rv.Init(raw, tmp_url.Protocol_tid());
switch (tmp_url.Protocol_tid()) {
default: // tmp_url is known protocol ("http:", "ftp:", etc); use it and exit; do not do any substitutions EX: http://en.wikipedia.org
rv.Tid_(Xoh_href.Tid_http);
return;
case Xoo_protocol_itm.Tid_null: // unknown protocol ("unknown:A")or protocol-less ("A"); could be wiki-title or file-name; fall through to below
break;
case Xoo_protocol_itm.Tid_file: // tmp_url is "file:"; remove it; NOTE: swt/mozilla automatically prepends "file://" to any protocol-less links; see NOTE_1 below
int file_proto_len = tmp_url.Protocol_bry().length;
bgn = file_slash_end = Bry_.While_fwd(raw, Byte_ascii.Slash, file_proto_len, raw_len);
if (file_slash_end - file_proto_len > 0) --bgn; // if at least 1 slash, include slash; this ensures that all strings which have "file://" stripped will start with a "/"; EX: file:///wiki -> "/wiki"; file://C -> "/C"
break;
case Xoo_protocol_itm.Tid_xowa:
bgn = file_slash_end = Bry_.While_fwd(raw, Byte_ascii.Slash, tmp_url.Protocol_bry().length, raw_len);
rv.Tid_(Xoh_href.Tid_xowa);
rv.Wiki_(wiki.Domain_bry()); // wiki is always the current wiki
byte[] page = Xoa_app_.Utl__encoder_mgr().Gfs().Decode(Bry_.Mid(raw, bgn, raw_len));
rv.Page_(page); // page is everything after "/xcmd/"; individual cmds will do further parsing; note that it should be decoded; EX: %20 -> " "; also note that anchor (#) or query params (?) are not parsed; the entire String will be reparsed later
return;
}
if (file_slash_end < raw_len && raw[file_slash_end] == Byte_ascii.Hash) { // 1st character is anchor; extract and return
rv.Tid_(Xoh_href.Tid_anchor);
rv.Wiki_(wiki.Domain_bry()); // wiki is always current
rv.Page_(cur_page); // page is always current
rv.Anchor_(Bry_.Mid(raw, file_slash_end + 1, raw_len)); // +1 to skip #; i.e. Anchor should be "A" not "#A"
return;
}
Object seg_obj = segs.Match_bgn(raw, bgn, raw_len); // match /wiki/ or /site/ or /xcmd/
if (seg_obj == null) // nothing matched; assume file; EX: file:///C/dir/fil.txt -> /C/dir/fil.txt
rv.Tid_(Xoh_href.Tid_file);
else { // something matched;
Btrie_itm_stub seg = (Btrie_itm_stub)seg_obj;
bgn += seg.Val().length;
switch (seg.Tid()) {
case Seg_wiki_tid: Parse_wiki(rv, encoder, wiki, raw, bgn, raw_len); break;
case Seg_site_tid: Parse_site(rv, encoder, wiki, raw, bgn, raw_len); break;
case Seg_xcmd_tid: Parse_xcmd(rv, encoder, wiki, raw, bgn, raw_len); break;
}
}
}
public byte[] Build_to_bry(Xow_wiki wiki, Xoa_ttl ttl) {
synchronized (tmp_bfr) {
Build_to_bfr(tmp_bfr, wiki.App(), wiki.Domain_bry(), ttl, Bool_.N);
return tmp_bfr.Xto_bry_and_clear();
}
}
public void Build_to_bfr(Bry_bfr bfr, Xoa_app app, byte[] domain_bry, Xoa_ttl ttl) {Build_to_bfr(bfr, app, domain_bry, ttl, Bool_.N);}
public void Build_to_bfr(Bry_bfr bfr, Xoa_app app, byte[] domain_bry, Xoa_ttl ttl, boolean force_site) {
byte[] page = ttl.Full_txt_raw();
Xow_xwiki_itm xwiki = ttl.Wik_itm();
if (xwiki == null) // not an xwiki; EX: [[wikt:Word]]
Build_to_bfr_page(ttl, page, 0); // write page only; NOTE: changed to remove leaf logic DATE:2014-09-07
else { // xwiki; skip wiki and encode page only;
byte[] wik_txt = ttl.Wik_txt();
Build_to_bfr_page(ttl, page, wik_txt.length + 1);
}
if (xwiki == null) { // not an xwiki
if (ttl.Anch_bgn() != 1) { // not an anchor-only; EX: "#A"
if (force_site) { // popup parser always writes as "/site/"
bfr.Add(Href_site_bry); // add "/site/"; EX: /site/
bfr.Add(domain_bry); // add xwiki; EX: en_dict
bfr.Add(Href_wiki_bry); // add "/wiki/"; EX: /wiki/
}
else
bfr.Add(Href_wiki_bry); // add "/wiki/"; EX: /wiki/Page
}
else {} // anchor: noop
}
else { // xwiki
if (app.Xwiki_mgr__missing(xwiki.Domain_bry())) { // xwiki is not offline; use http:
Bry_fmtr url_fmtr = xwiki.Url_fmtr();
if (url_fmtr == null) {
bfr.Add(Href_https_bry); // add "https://"; EX: https://
bfr.Add(xwiki.Domain_bry()); // add xwiki; EX: en_dict
bfr.Add(Href_wiki_bry); // add "/wiki/"; EX: /wiki/
}
else { // url_fmtr exists; DATE:2015-04-22
url_fmtr.Bld_bfr(bfr, bfr_encoder.Xto_bry_and_clear()); // use it and pass bfr_encoder for page_name;
return;
}
}
else { // xwiki is avaiable; use /site/
bfr.Add(Href_site_bry); // add "/site/"; EX: /site/
bfr.Add(xwiki.Domain_bry()); // add xwiki; EX: en_dict
bfr.Add(Href_wiki_bry); // add "/wiki/"; EX: /wiki/
}
}
bfr.Add_bfr_and_clear(bfr_encoder);
}
private void Build_to_bfr_page(Xoa_ttl ttl, byte[] ttl_full, int page_bgn) {
int anch_bgn = Bry_finder.Find_fwd(ttl_full, Byte_ascii.Hash); // NOTE: cannot use Anch_bgn b/c Anch_bgn has bug with whitespace
if (anch_bgn == Bry_.NotFound) // no anchor; just add page
encoder.Encode(bfr_encoder, ttl_full, page_bgn, ttl_full.length);
else { // anchor exists; check if anchor is preceded by ws; EX: [[A #b]] -> "/wiki/A#b"
int page_end = Bry_finder.Find_bwd_last_ws(ttl_full, anch_bgn); // first 1st ws before #; handles multiple ws
page_end = page_end == Bry_.NotFound ? anch_bgn : page_end; // if ws not found, use # pos; else use 1st ws pos
encoder.Encode(bfr_encoder, ttl_full, page_bgn, page_end); // add page
encoder.Encode(bfr_encoder, ttl_full, anch_bgn, ttl_full.length); // add anchor
}
}
public static final String Href_file_str = "file:///", Href_wiki_str = "/wiki/", Href_site_str = "/site/", Href_xcmd_str = "/xcmd/";
public static final byte[]
Href_https_bry = Bry_.new_u8("https://") // NOTE: must be "https:" or wmf api won't work; DATE:2015-06-17
, Href_file_bry = Bry_.new_a7(Href_file_str), Href_site_bry = Bry_.new_a7(Href_site_str), Href_wiki_bry = Bry_.new_a7(Href_wiki_str);
private static final int Href_wiki_len = Href_wiki_bry.length;
static final byte Seg_null_tid = 0, Seg_wiki_tid = 1, Seg_site_tid = 2, Seg_xcmd_tid = 3;
private static final byte[] Seg_null_bry = Bry_.new_a7("/null/"), Seg_wiki_bry = Bry_.new_a7(Href_wiki_str), Seg_site_bry = Bry_.new_a7(Href_site_str), Seg_xcmd_bry = Bry_.new_a7(Href_xcmd_str);
private static final byte[][] Seg__ary = new byte[][] {Seg_null_bry, Seg_wiki_bry, Seg_site_bry, Seg_xcmd_bry};
private static void Parse_wiki(Xoh_href rv, Url_encoder encoder, Xowe_wiki wiki, byte[] raw, int bgn, int len) {
byte[] ttl_raw = Bry_.Mid(raw, bgn, len);
Xoa_ttl ttl = wiki.Ttl_parse(ttl_raw);
if (ttl == null) {
Xoa_app_.Usr_dlg().Warn_many("xowa.href.parser", "invalid_wiki", "wiki href does not have valid title: ~{0}", String_.new_u8(raw, bgn, len));
return;
}
if (ttl.Wik_itm() == null) { // standard href; EX: "/wiki/A"
rv.Tid_(Xoh_href.Tid_wiki);
rv.Wiki_(wiki.Domain_bry()); // wiki is always the current wiki
}
else { // embedded xwiki prefix; EX: "/wiki/fr:A"
rv.Tid_(Xoh_href.Tid_site);
rv.Wiki_(ttl.Wik_itm().Domain_bry()); // wiki is the xwiki prefix; EX: "en.wikpedia.org//wiki/fr:A" -> "fr.wikpedia.org/wiki/A"
}
byte[] page_bry = encoder.Decode(ttl.Full_txt()); // note that Full is everything except for ns, so it handles "fr:A" ("fr:" being treated as ns, so only "A" will be Full_txt)
if (Bry_.Len_eq_0(page_bry)) // handle xwiki hrefs like "fr:"; EX: "/wiki/wikipedia:" on en.wikisource.org/Main Page
page_bry = Xoa_page_.Main_page_bry_empty;
// if (ttl.Qarg_bgn() != Bry_.NotFound)
// rv.Qarg_(ttl.Qarg_txt());
rv.Page_(page_bry); // add page; note that it should be decoded; EX: %20 -> " "; also note that anchor (#) or query params (?) are not parsed; the entire String will be reparsed later
if (ttl.Anch_bgn() != Bry_.NotFound) rv.Anchor_(ttl.Anch_txt());
}
private static void Parse_site(Xoh_href rv, Url_encoder encoder, Xowe_wiki wiki, byte[] raw, int bgn, int len) { // /site/; EX: /site/fr.wikipedia.org/wiki/A
int slash = Bry_finder.Find_fwd(raw, Byte_ascii.Slash, bgn, len); if (slash == Bry_.NotFound) throw Err_mgr._.fmt_("xowa.href.parser", "invalid_site", "site href is missing slash: ~{0}", String_.new_u8(raw, bgn, len));
rv.Tid_(Xoh_href.Tid_site);
byte[] wiki_bry = Bry_.Mid(raw, bgn, slash); // wiki is text between "/site/" and next "/"
Xow_xwiki_itm xwiki = wiki.Appe().Usere().Wiki().Xwiki_mgr().Get_by_key(wiki_bry); // NOTE: site may refer to alias in user_wiki; ex: /site/wikisource.org which points to en.wikisource.org; this occurs during lnke substitution; EX: [//wikisource.org Wikisource]
if (xwiki != null) {
wiki_bry = xwiki.Domain_bry();
wiki = wiki.Appe().Wiki_mgr().Get_by_key_or_make(wiki_bry); // NOTE: xwiki links should use case_match of xwiki (en.wiktionary.org) not cur_wiki (en.wikipedia.org); EX:w:alphabet
}
rv.Wiki_(wiki_bry);
int page_pos = slash + Href_wiki_len;
byte[] page_bry = page_pos < len
? Bry_.Mid(raw, page_pos, len) // page is text after next "/" + "/wiki/";
: Bry_.Empty;
if (Bry_.Len_eq_0(page_bry)) // handle "/site/fr.wikipedia.org/wiki/"; note that these are generated by [[fr:]]
page_bry = wiki.Props().Main_page(); // default to Main Page
// int qarg_pos = Bry_finder.Find_bwd(page_bry, Byte_ascii.Question);
// byte[] qarg_bry = null;
// if (qarg_pos != Bry_.NotFound) {
// qarg_bry = Bry_.Mid(page_bry, qarg_pos + 1, page_bry.length);
// rv.Qarg_(qarg_bry);
// page_bry = Bry_.Mid(page_bry, 0, qarg_pos);
// }
Parse_ttl_and_resolve_xwiki(rv, wiki, encoder, page_bry, raw, bgn, len);
}
private static void Parse_ttl_and_resolve_xwiki(Xoh_href rv, Xowe_wiki wiki, Url_encoder encoder, byte[] page_bry, byte[] raw, int bgn, int len) {
Xoa_ttl ttl = wiki.Ttl_parse(page_bry);
if (ttl == null) {
Xoa_app_.Usr_dlg().Warn_many("xowa.href.parser", "invalid_wiki", "wiki href does not have valid title: ~{0}", String_.new_u8(raw, bgn, len));
rv.Page_(Bry_.Empty);
return;
}
if (ttl.Wik_itm() != null) { // page_bry has xwiki; EX: "wikt:A"; note that since this is called by "/site/", there may be two xwikis; EX: "w:wikt:"; Note that more than 2 is not being handled
wiki = wiki.Appe().Wiki_mgr().Get_by_key_or_make(ttl.Wik_itm().Domain_bry());
rv.Wiki_(wiki.Domain_bry());
if (Bry_.Len_eq_0(ttl.Page_txt())) // page_bry is just alias; EX: "wikt:"
page_bry = wiki.Props().Main_page();
else
page_bry = ttl.Page_txt();
ttl = Xoa_ttl.parse_(wiki, page_bry); if (ttl == null) throw Err_mgr._.fmt_("xowa.href.parser", "invalid_wiki", "wiki href does not have valid title: ~{0}", String_.new_u8(raw, bgn, len));
}
rv.Page_(encoder.Decode(ttl.Full_txt())); // add page; note that it should be decoded; EX: %20 -> " "; also note that anchor (#) or query params (?) are not parsed; the entire String will be reparsed later
if (ttl.Anch_bgn() != Bry_.NotFound) // add anchor if it exists
rv.Anchor_(ttl.Anch_txt());
}
private static void Parse_xcmd(Xoh_href rv, Url_encoder encoder, Xowe_wiki wiki, byte[] raw, int bgn, int len) { // /xcmd/; note encoder is passed, but don't decode for now; most invk commands have an _ which will get changed to a " ";
rv.Tid_(Xoh_href.Tid_xcmd);
rv.Wiki_(wiki.Domain_bry()); // wiki is always the current wiki
rv.Page_(Bry_.Mid(raw, bgn, len)); // page is everything after "/xcmd/"; individual cmds will do further parsing; note that it should be decoded; EX: %20 -> " "; also note that anchor (#) or query params (?) are not parsed; the entire String will be reparsed later
}
private static final byte Protocol_xowa_tid = Xoo_protocol_itm.Tid_xowa;
}
/*
NOTE_1:
. swt/mozilla treats text differently in href="{text}" when content_editable=n; occurs in LocationListener.changing
http://a.org -> http://a.org does nothing
A -> file:///A adds "file:///"
/wiki/A -> file:///wiki/A adds "file://"
Category:A -> Category:A noops; Category is assumed to be protocol?
//en.wiktionary.org/wiki/a -> file:///wiki/a strips out site name and prepends "file://"; no idea why
. so, to handle the above, the code does the following
http://a.org -> http://a.org does nothing; nothing needed
A -> /wiki/A always prepend /wiki/
Category:A -> /wiki/Category:A always prepend /wiki/
//en.wiktionary.org/wiki/A -> /site/en.wiktionary.org/wiki/A always transform relative url to /site/
. the href will still come here as file:///wiki/A or file:///site/en.wiktionary.org/wiki/A.
. however, the file:// can be lopped off and discarded and the rest of the href will fall into one of the following cases
.. /wiki/
.. /site/
.. /xcmd/
.. #
.. anything else -> assume to be really a file:// url; EX: file://C/dir/fil.txt -> C/dir/fil.txt
. the other advantage of this approach is that this proc can be reused outside of swt calls; i.e.: it can parse both "file:///wiki/A" and "/wiki/A"
*/

View File

@@ -1,248 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
import gplx.xowa.net.*;
public class Xoh_href_parser_tst {
@Before public void init() {fxt.Clear();} private Xoh_href_parser_fxt fxt = new Xoh_href_parser_fxt();
@Test public void Parse_full_wiki() {
fxt .Prep_raw_("file:///wiki/A")
.Expd_tid_(Xoh_href.Tid_wiki)
.Expd_full_("en.wikipedia.org/wiki/A")
.Expd_wiki_("en.wikipedia.org")
.Expd_page_("A")
.Test_parse();
}
@Test public void Parse_full_http() {
fxt .Prep_raw_("http://a.org/b")
.Expd_tid_(Xoh_href.Tid_http)
.Expd_full_("http://a.org/b")
.Test_parse();
}
@Test public void Parse_full_file() {
fxt .Prep_raw_("file:///C/xowa/file/a.png")
.Expd_tid_(Xoh_href.Tid_file)
.Expd_full_("file:///C/xowa/file/a.png")
.Test_parse();
}
@Test public void Parse_full_anchor_only() {
fxt .Prep_raw_("#a")
.Expd_tid_(Xoh_href.Tid_anchor)
.Expd_full_("en.wikipedia.org/wiki/Page 1#a")
.Expd_anch_("a")
.Test_parse();
}
@Test public void Parse_full_anchor_w_page() {
fxt .Prep_raw_("file:///wiki/A#b")
.Expd_tid_(Xoh_href.Tid_wiki)
.Expd_full_("en.wikipedia.org/wiki/A#b")
.Expd_anch_("b")
.Test_parse();
}
@Test public void Parse_full_xwiki() {
fxt .Prep_raw_("file:///site/en.wikt.org/wiki/Page")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikt.org/wiki/Page")
.Expd_page_("Page")
.Test_parse();
}
@Test public void Parse_full_xwiki_domain_only() {
fxt .Prep_raw_("/wiki/wikt:")
.Init_xwiki_alias("wikt", "en.wiktionary.org")
.Expd_full_("en.wiktionary.org/wiki/")
.Expd_page_("")
.Test_parse();
}
@Test public void Parse_full_wiki_page() {
fxt .Prep_raw_("/wiki/A")
.Expd_tid_(Xoh_href.Tid_wiki)
.Expd_full_("en.wikipedia.org/wiki/A")
.Expd_page_("A")
.Test_parse();
}
@Test public void Parse_empty_is_main_page() { // PURPOSE: w/ slash; "wiki/"
fxt .Prep_raw_("/site/en.wikipedia.org/wiki/")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikipedia.org/wiki/Main Page")
.Expd_page_("Main Page")
.Test_parse();
}
@Test public void Parse_empty_is_main_page_2() { // PURPOSE: wo slash; "wiki"
fxt .Prep_raw_("/site/en.wikipedia.org/wiki")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikipedia.org/wiki/Main Page")
.Expd_page_("Main Page")
.Test_parse();
}
@Test public void Parse_site_page() {
fxt .Prep_raw_("/site/en.wikt.org/wiki/A")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikt.org/wiki/A")
.Expd_page_("A")
.Test_parse();
}
@Test public void Parse_site_ns_case() {
fxt .Prep_raw_("/site/en.wikt.org/wiki/file:A")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikt.org/wiki/File:A")
.Expd_page_("File:A")
.Test_parse();
}
@Test public void Parse_site_page__invalid_ttl_shouldnt_fail() { // PURPOSE: invalid title shouldn't fail; EX: A{{B}} is invalid (b/c of braces);
fxt .Prep_raw_("/site/en.wikt.org/wiki/A{{B}}")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikt.org/wiki/")
.Expd_page_("")
.Test_parse();
}
@Test public void Parse_xcmd_edit() {
fxt .Prep_raw_("/xcmd/page_edit")
.Expd_tid_(Xoh_href.Tid_xcmd)
.Expd_full_("")
.Expd_page_("page_edit")
.Test_parse();
}
@Test public void Parse_xowa() {
fxt .Prep_raw_("xowa-cmd:a%22b*c")
.Expd_tid_(Xoh_href.Tid_xowa)
.Expd_full_("a\"b*c")
.Expd_page_("a\"b*c")
.Test_parse();
}
@Test public void Parse_edit_wiki_quote() {
fxt .Prep_raw_("/wiki/A%22b%22c")
.Expd_tid_(Xoh_href.Tid_wiki)
.Expd_full_("en.wikipedia.org/wiki/A\"b\"c")
.Expd_page_("A\"b\"c")
.Test_parse();
}
@Test public void Parse_brief_wiki() {fxt.Init_hover_full_n_().Test_parse("file:///wiki/A" , "A");}
@Test public void Parse_brief_http() {fxt.Init_hover_full_n_().Test_parse("http://a.org/b" , "http://a.org/b");}
@Test public void Parse_brief_file() {fxt.Init_hover_full_n_().Test_parse("file:///C/xowa/file/a.png" , "file:///C/xowa/file/a.png");}
@Test public void Parse_brief_anchor() {fxt.Init_hover_full_n_().Test_parse("#a" , "#a");}
@Test public void Parse_brief_anchor_file() {fxt.Init_hover_full_n_().Test_parse("file:///#a" , "#a");}
@Test public void Parse_brief_xwiki() {fxt.Init_hover_full_n_().Test_parse("file:///site/en.wikt.org/wiki/Page" , "en.wikt.org/Page");}
@Test public void Parse_brief_xwiki_2() {fxt.Init_hover_full_n_().Expd_page_("a").Test_parse("/wiki/wikt:a" , "en.wiktionary.org/a");}
@Test public void Parse_brief_error() {fxt.Init_hover_full_n_().Test_parse("file:///wiki/{{{extlink}}}" , "");} // {{{extlink}}} not a valid title; return empty
// @Test public void Parse_site_qarg() {fxt.Prep_raw_("/site/en.wikt.org/wiki/A?action=edit").Expd_tid_(Xoh_href.Tid_site).Expd_full_("en.wikt.org/wiki/A").Expd_page_("A").Expd_qarg_("action=edit").Test_parse();}
// @Test public void Parse_wiki_qarg() {fxt.Prep_raw_("/wiki/A?action=edit").Expd_tid_(Xoh_href.Tid_wiki).Expd_full_("en.wikipedia.org/wiki/A").Expd_page_("A").Expd_qarg_("action=edit").Test_parse();}
//@Test public void Parse_site_anchor() {fxt.Prep_raw_("/site/en.wikt.org/wiki/A#b_c" ).Expd_tid_(Xoh_href.Tid_site).Expd_full_("en.wikt.org/wiki/A#b_c").Expd_page_("A").Expd_anch_("b_c").Test_parse();}
@Test public void Build_xwiki_enc() {fxt.Test_build("wikt:abc?d" , "/site/en.wiktionary.org/wiki/abc%3Fd");}
@Test public void Build_page_quote() {fxt.Test_build("a\"b\"c" , "/wiki/A%22b%22c");}
@Test public void Build_page() {fxt.Test_build("abc" , "/wiki/Abc");}
@Test public void Build_page_ns() {fxt.Test_build("Image:A.png" , "/wiki/Image:A.png");}
@Test public void Build_anchor() {fxt.Test_build("#abc" , "#abc");}
@Test public void Build_page_anchor() {fxt.Test_build("Abc#def" , "/wiki/Abc#def");}
@Test public void Build_xwiki() {fxt.Test_build("wikt:abc" , "/site/en.wiktionary.org/wiki/abc");} // NOTE: "abc" not capitalized, b/c other wiki's case sensitivity is not known; this emulates WP's behavior
@Test public void Build_xwiki_2() {fxt.Test_build("wikt:Special:Search/a" , "/site/en.wiktionary.org/wiki/Special:Search/a");}
@Test public void Build_category() {fxt.Test_build("Category:abc" , "/wiki/Category:Abc");}
@Test public void Parse_site_user_wiki() {// PURPOSE: outlier for wikisource.org which is alias to en.wikisource.org; alias added in user_wiki; EX: [//wikisource.org a]; in browser, automatically goes to http://wikisource.org; in xowa, should go to /site/en.wikisource.org
fxt .Prep_raw_("/site/en_wiki_alias/wiki/")
.Init_xwiki_alias("en_wiki_alias", "en.wikipedia.org")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wikipedia.org/wiki/Main Page")
.Expd_page_("Main Page")
.Test_parse();
}
@Test public void Parse_xwiki_cases_correctly() { // PURPOSE: xwiki links should use case_match of xwiki (en.wiktionary.org) not cur_wiki (en.wikipedia.org); EX:w:Alphabet
fxt .Prep_raw_("/site/en.wiktionary.org/wiki/alphabet")
.Init_xwiki_alias("en.wiktionary.org", "en.wiktionary.org");
Xowe_wiki en_wiktionary_org = fxt.App().Wiki_mgr().Get_by_key_or_make(Bry_.new_a7("en.wiktionary.org"));
en_wiktionary_org.Ns_mgr().Ns_main().Case_match_(Xow_ns_case_.Id_all);
fxt .Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wiktionary.org/wiki/alphabet")
.Expd_page_("alphabet")
.Test_parse();
}
@Test public void Parse_xwiki_compound() { // PURPOSE: [[[w:wikt:]] not handled; DATE:2013-07-25
fxt .Prep_raw_("/site/en.wikipedia.org/wiki/wikt:")
.Init_xwiki_alias("wikt:", "en.wiktionary.org")
.Expd_tid_(Xoh_href.Tid_site)
.Expd_full_("en.wiktionary.org/wiki/Main Page")
.Expd_page_("Main Page")
.Test_parse();
}
@Test public void Parse_protocol() { // PURPOSE: check that urls with form of "ftp://" return back Tid_ftp; DATE:2014-04-25
fxt .Test_parse_protocol("ftp://a.org", Xoo_protocol_itm.Tid_ftp);
}
@Test public void Build_xwiki_wikimedia_mail() { // PURPOSE: DATE:2015-04-22
fxt .Init_xwiki_by_many("mail|https://lists.wikimedia.org/mailman/listinfo/$1|Wikitech Mailing List");
fxt.Test_build("mail:A" , "https://lists.wikimedia.org/mailman/listinfo/A");
}
// @Test public void Parse_question_ttl() {fxt.Prep_raw_("/wiki/%3F").Expd_tid_(Xoh_href.Tid_wiki).Expd_full_("en.wikipedia.org/wiki/?").Expd_page_("?").Test_parse();}
// @Test public void Parse_question_w_arg() {fxt.Prep_raw_("/wiki/A%3F?action=edit").Expd_tid_(Xoh_href.Tid_wiki).Expd_full_("en.wikipedia.org/wiki/A??action=edit").Expd_page_("A??action=edit").Test_parse();}
}
class Xoh_href_parser_fxt {
private Xowe_wiki wiki; private Xoh_href_parser href_parser; private Bry_bfr tmp_bfr = Bry_bfr.reset_(255); private Xoh_href href = new Xoh_href();
private static final byte[] Page_1_ttl = Bry_.new_a7("Page 1");
public void Clear() {
expd_tid = Xoh_href.Tid_null;
prep_raw = expd_full = expd_wiki = expd_page = expd_anch = null;
if (app != null) return;
app = Xoa_app_fxt.app_();
wiki = Xoa_app_fxt.wiki_tst_(app);
wiki.Xwiki_mgr().Add_bulk(Bry_.new_a7("wikt|en.wiktionary.org"));
app.Usere().Wiki().Xwiki_mgr().Add_bulk(Bry_.new_a7("en.wiktionary.org|en.wiktionary.org"));
href_parser = new Xoh_href_parser(Xoa_app_.Utl__encoder_mgr().Href(), app.Utl__url_parser().Url_parser());
}
public Xoae_app App() {return app;} private Xoae_app app;
public Xoh_href_parser_fxt Init_xwiki_alias(String alias, String domain) {
app.Usere().Wiki().Xwiki_mgr().Add_full(alias, domain);
return this;
}
public Xoh_href_parser_fxt Init_xwiki_by_many(String raw) {
wiki.Xwiki_mgr().Add_many(Bry_.new_u8(raw)); // need to add to wiki's xwiki_mgr for ttl_parse
return this;
}
public Xoh_href_parser_fxt Init_hover_full_y_() {return Init_hover_full_(Bool_.Y);}
public Xoh_href_parser_fxt Init_hover_full_n_() {return Init_hover_full_(Bool_.N);}
public Xoh_href_parser_fxt Init_hover_full_(boolean v) {wiki.Gui_mgr().Cfg_browser().Link_hover_full_(v); return this;}
public Xoh_href_parser_fxt Prep_raw_(String v) {this.prep_raw = v; return this;} private String prep_raw;
public Xoh_href_parser_fxt Expd_tid_(byte v) {this.expd_tid = v; return this;} private byte expd_tid;
public Xoh_href_parser_fxt Expd_full_(String v) {this.expd_full = v; return this;} private String expd_full;
public Xoh_href_parser_fxt Expd_wiki_(String v) {this.expd_wiki = v; return this;} private String expd_wiki;
public Xoh_href_parser_fxt Expd_page_(String v) {this.expd_page = v; return this;} private String expd_page;
public Xoh_href_parser_fxt Expd_anch_(String v) {this.expd_anch = v; return this;} private String expd_anch;
public void Test_parse() {
href_parser.Parse(href, prep_raw, wiki, Page_1_ttl);
if (expd_tid != Xoh_href.Tid_null) Tfds.Eq(expd_tid, href.Tid());
if (expd_wiki != null) Tfds.Eq(expd_wiki, String_.new_u8(href.Wiki()));
if (expd_page != null) Tfds.Eq(expd_page, String_.new_u8(href.Page()));
if (expd_anch != null) Tfds.Eq(expd_anch, String_.new_u8(href.Anchor()));
if (expd_full != null) {
href.Print_to_bfr(tmp_bfr, true);
Tfds.Eq(expd_full, tmp_bfr.Xto_str_and_clear());
}
}
public void Test_parse(String raw, String expd) {
href_parser.Parse(href, raw, wiki, Page_1_ttl);
href.Print_to_bfr(tmp_bfr, wiki.Gui_mgr().Cfg_browser().Link_hover_full());
Tfds.Eq(expd, tmp_bfr.Xto_str_and_clear());
}
public void Test_build(String raw, String expd) {
Xoa_ttl ttl = Xoa_ttl.parse_(wiki, Bry_.new_u8(raw));
href_parser.Build_to_bfr(tmp_bfr, app, wiki.Domain_bry(), ttl);
Tfds.Eq(expd, tmp_bfr.Xto_str_and_clear());
}
public void Test_parse_protocol(String raw, byte expd_tid) {
href_parser.Parse(href, raw, wiki, Page_1_ttl);
Tfds.Eq(expd_tid, href.Protocol_tid());
}
}