1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

uca category support and other changes

This commit is contained in:
gnosygnu
2016-10-12 08:57:22 -04:00
parent e3b393650d
commit 3fc2e0741f
187 changed files with 3486 additions and 2984 deletions

View File

@@ -0,0 +1,96 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.hrefs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
import org.junit.*; import gplx.xowa.apps.urls.*; import gplx.xowa.wikis.nss.*;
public class Xoh_href_parser__basic__tst {
private final Xoh_href_parser_fxt fxt = new Xoh_href_parser_fxt();
@Test public void Site__basic() {
fxt.Exec__parse_as_url("/site/en.wikipedia.org/wiki/A").Test__tid(Xoa_url_.Tid_page).Test__to_str("en.wikipedia.org/wiki/A").Test__page("A");
}
@Test public void Site__ns_case() {
fxt.Exec__parse_as_url("/site/en.wikipedia.org/wiki/file:A").Test__page("File:A");
}
@Test public void Site__main_page() {
fxt.Exec__parse_as_url("/site/en.wikipedia.org/wiki/").Test__page("Main_Page").Test__page_is_main_y();
}
@Test public void Site__anch() {
fxt.Exec__parse_as_url("/site/en.wikipedia.org/wiki/A#b_c").Test__page("A").Test__anch("b_c");
}
@Test public void Site__qarg() {
fxt.Exec__parse_as_url("/site/en.wikipedia.org/wiki/A?action=edit").Test__page("A").Test__qargs("?action=edit");
}
@Test public void Site__invalid_ttl_shouldnt_fail() { // PURPOSE: invalid title shouldn't fail; EX: A{{B}} is invalid (b/c of braces);
fxt.Exec__parse_as_url("/site/en.wikipedia.org/wiki/A{{B}}").Test__page("");
}
@Test public void Site__xwiki_cases_correctly() { // PURPOSE: xwiki links should use case_match of xwiki (en.wiktionary.org) not cur_wiki (en.wikipedia.org); EX:w:Alphabet
Xowe_wiki en_wiktionary_org = fxt.Prep_create_wiki("en.wiktionary.org");
en_wiktionary_org.Ns_mgr().Ns_main().Case_match_(Xow_ns_case_.Tid__all);
fxt.Prep_add_xwiki_to_user("en.wiktionary.org", "en.wiktionary.org");
fxt.Exec__parse_as_url("/site/en.wiktionary.org/wiki/alphabet");
fxt.Test__to_str("en.wiktionary.org/wiki/alphabet").Test__page("alphabet");
}
@Test public void Site__xwiki_compound() { // PURPOSE: [[[w:wikt:]] not handled; DATE:2013-07-25
fxt.Prep_add_xwiki_to_wiki("wikt", "en.wiktionary.org");
fxt.Exec__parse_as_url("/site/en.wikipedia.org/wiki/wikt:")
.Test__tid(Xoa_url_.Tid_page)
.Test__page("Main_Page")
.Test__to_str("en.wiktionary.org/wiki/Main_Page")
;
}
// @Test public void Vnt() {
// Xowe_wiki wiki = fxt.Wiki();
// fxt.Prep_add_xwiki_to_user("zh.wikipedia.org");
// wiki.Lang().Vnt_mgr().Enabled_(true);
// wiki.Lang().Vnt_mgr().Vnt_grp().Add(new gplx.xowa.langs.vnts.Vnt_mnu_itm(Bry_.new_a7("zh-hans"), Bry_.new_a7("zh-hant")));
// fxt.Exec__parse_as_url("/site/zh.wikipedia.org/zh-hant/A").Test__page("A").Chk_vnt("zh-hant");
// }
@Test public void Http__basic() {
fxt.Exec__parse_as_url("http://a.org/b").Test__tid(Xoa_url_.Tid_inet);
}
@Test public void Prot__ftp() { // PURPOSE: check that urls with form of "ftp://" return back Tid_ftp; DATE:2014-04-25
fxt.Exec__parse_as_url("ftp://a.org").Test__tid(Xoa_url_.Tid_inet);
}
@Test public void File__basic() {
fxt.Exec__parse_as_url("file:///C/xowa/file/a.png").Test__tid(Xoa_url_.Tid_file);
}
@Test public void Anchor__basic() {
fxt.Exec__parse_as_url("#a").Test__tid(Xoa_url_.Tid_anch).Test__to_str("en.wikipedia.org/wiki/Page 1#a").Test__anch("a");
}
@Test public void Xcmd__basic() {
fxt.Exec__parse_as_url("/xcmd/page_edit").Test__tid(Xoa_url_.Tid_xcmd).Test__page("page_edit");
}
@Test public void Xowa__basic() {
fxt.Exec__parse_as_url("xowa-cmd:a%22b*c").Test__tid(Xoa_url_.Tid_xcmd).Test__page("a\"b*c");
}
// COMMENTED: this seems wrong; [//wikisource.org] should go to https://wikisource.org not https://en.wikisource.org; both sites are different; DATE:2015-08-02
// @Test public void Site__user_wiki() {// PURPOSE: outlier for wikisource.org which is alias to en.wikisource.org; alias added in user_wiki; EX: [//wikisource.org a]; in browser, automatically goes to http://wikisource.org; in xowa, should go to /site/en.wikisource.org
// fxt.Prep_xwiki(fxt.App().User().Wikii(), "en_wiki_alias", "en.wikipedia.org", null);
// fxt.Exec__parse_as_url("/site/en_wiki_alias/wiki/")
// .Test__tid(Xoa_url_.Tid_page)
// .Test__page("Main_Page")
// .Test__to_str("en.wikipedia.org/wiki/Main_Page")
// ;
// }
}
class Xoh_href_parser_fxt extends Xow_url_parser_fxt { private final Xoh_href_parser href_parser = new Xoh_href_parser();
public Xoh_href_parser_fxt Exec__parse_as_url(String raw) {
href_parser.Parse_as_url(actl_url, Bry_.new_u8(raw), cur_wiki, Bry__page_1);
return this;
}
private static final byte[] Bry__page_1 = Bry_.new_a7("Page 1");
}

View File

@@ -0,0 +1,38 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.hrefs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
import org.junit.*; import gplx.xowa.apps.urls.*; import gplx.xowa.wikis.nss.*;
public class Xoh_href_parser__qargs__tst {
private final Xoh_href_parser_fxt fxt = new Xoh_href_parser_fxt();
@Test public void Basic() {
fxt.Exec__parse_as_url("/wiki/A?k1=v1&k2=v2");
fxt.Test__page("A");
fxt.Test__to_str("en.wikipedia.org/wiki/A?k1=v1&k2=v2");
}
@Test public void Anch() { // PURPOSE.fix: anchor was being placed before qargs; DATE:2016-10-08
fxt.Exec__parse_as_url("/wiki/Category:A?pagefrom=A#mw-pages");
fxt.Test__page("Category:A");
fxt.Test__to_str("en.wikipedia.org/wiki/Category:A?pagefrom=A#mw-pages"); // was Category:A#mw-page?pagefrom=A
}
// FUTURE: qargs should be unencoded by default; decoded on request
@Test public void Encoded() { // PURPOSE.fix: do not use decoded String; DATE:2016-10-08
fxt.Exec__parse_as_url("/wiki/Category:A?pagefrom=A%26B#mw-pages");
fxt.Test__page("Category:A");
fxt.Test__qargs("?pagefrom=A&B");
}
}

View File

@@ -0,0 +1,50 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.hrefs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
import org.junit.*; import gplx.xowa.apps.urls.*; import gplx.xowa.wikis.nss.*;
public class Xoh_href_parser__wiki__tst {
private final Xoh_href_parser_fxt fxt = new Xoh_href_parser_fxt();
@Test public void Basic() {
fxt.Exec__parse_as_url("/wiki/A").Test__tid(Xoa_url_.Tid_page).Test__to_str("en.wikipedia.org/wiki/A").Test__wiki("en.wikipedia.org").Test__page("A");
}
@Test public void Page__w_question() {
fxt.Exec__parse_as_url("/wiki/%3F").Test__page("?");
}
@Test public void Qarg() {
fxt.Exec__parse_as_url("/wiki/A?action=edit").Test__page("A").Test__qargs("?action=edit").Test__to_str("en.wikipedia.org/wiki/A?action=edit");
}
@Test public void Qarg__w_question() {
fxt.Exec__parse_as_url("/wiki/A%3F?action=edit").Test__page("A?").Test__qargs("?action=edit");
}
@Test public void Anchor() {
fxt.Exec__parse_as_url("/wiki/A#b").Test__to_str("en.wikipedia.org/wiki/A#b").Test__anch("b");
}
@Test public void Xwiki__only() {
fxt.Prep_add_xwiki_to_wiki("c", "commons.wikimedia.org");
fxt.Exec__parse_as_url("/wiki/c:").Test__page_is_main_y().Test__page("Main_Page").Test__to_str("commons.wikimedia.org/wiki/Main_Page");
}
@Test public void Encoded() {
fxt.Exec__parse_as_url("/wiki/A%22b%22c").Test__page("A\"b\"c");
}
@Test public void Triple_slash() { // PURPOSE: handle triple slashes; PAGE:esolangs.org/wiki/Language_list; DATE:2015-11-14
fxt.Exec__parse_as_url("/wiki////").Test__to_str("en.wikipedia.org/wiki////").Test__wiki("en.wikipedia.org").Test__page("///");
}
@Test public void Http() { // PURPOSE: variant of triple slashes; DATE:2015-11-14
fxt.Exec__parse_as_url("/wiki/http://a").Test__to_str("en.wikipedia.org/wiki/Http://a").Test__wiki("en.wikipedia.org").Test__page("Http://a");
}
}

View File

@@ -1,124 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.htmls.hrefs; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
import org.junit.*; import gplx.xowa.apps.urls.*; import gplx.xowa.wikis.nss.*;
public class Xoh_href_parser_tst {
private final Xoh_href_parser_fxt fxt = new Xoh_href_parser_fxt();
@Test public void Wiki__basic() {
fxt.Run_parse_by_href("/wiki/A").Chk_tid(Xoa_url_.Tid_page).Chk_to_str("en.wikipedia.org/wiki/A").Chk_wiki("en.wikipedia.org").Chk_page("A");
}
@Test public void Wiki__page__w_question() {
fxt.Run_parse_by_href("/wiki/%3F").Chk_page("?");
}
@Test public void Wiki__qarg() {
fxt.Run_parse_by_href("/wiki/A?action=edit").Chk_page("A").Chk_qargs("?action=edit").Chk_to_str("en.wikipedia.org/wiki/A?action=edit");
}
@Test public void Wiki__qarg__w_question() {
fxt.Run_parse_by_href("/wiki/A%3F?action=edit").Chk_page("A?").Chk_qargs("?action=edit");
}
@Test public void Wiki__anchor() {
fxt.Run_parse_by_href("/wiki/A#b").Chk_to_str("en.wikipedia.org/wiki/A#b").Chk_anch("b");
}
@Test public void Wiki__xwiki__only() {
fxt.Prep_add_xwiki_to_wiki("c", "commons.wikimedia.org");
fxt.Run_parse_by_href("/wiki/c:").Chk_page_is_main_y().Chk_page("Main_Page").Chk_to_str("commons.wikimedia.org/wiki/Main_Page");
}
@Test public void Wiki__encoded() {
fxt.Run_parse_by_href("/wiki/A%22b%22c").Chk_page("A\"b\"c");
}
@Test public void Wiki__triple_slash() { // PURPOSE: handle triple slashes; PAGE:esolangs.org/wiki/Language_list; DATE:2015-11-14
fxt.Run_parse_by_href("/wiki////").Chk_to_str("en.wikipedia.org/wiki////").Chk_wiki("en.wikipedia.org").Chk_page("///");
}
@Test public void Wiki__http() { // PURPOSE: variant of triple slashes; DATE:2015-11-14
fxt.Run_parse_by_href("/wiki/http://a").Chk_to_str("en.wikipedia.org/wiki/Http://a").Chk_wiki("en.wikipedia.org").Chk_page("Http://a");
}
@Test public void Site__basic() {
fxt.Run_parse_by_href("/site/en.wikipedia.org/wiki/A").Chk_tid(Xoa_url_.Tid_page).Chk_to_str("en.wikipedia.org/wiki/A").Chk_page("A");
}
@Test public void Site__ns_case() {
fxt.Run_parse_by_href("/site/en.wikipedia.org/wiki/file:A").Chk_page("File:A");
}
@Test public void Site__main_page() {
fxt.Run_parse_by_href("/site/en.wikipedia.org/wiki/").Chk_page("Main_Page").Chk_page_is_main_y();
}
@Test public void Site__anch() {
fxt.Run_parse_by_href("/site/en.wikipedia.org/wiki/A#b_c").Chk_page("A").Chk_anch("b_c");
}
@Test public void Site__qarg() {
fxt.Run_parse_by_href("/site/en.wikipedia.org/wiki/A?action=edit").Chk_page("A").Chk_qargs("?action=edit");
}
@Test public void Site__invalid_ttl_shouldnt_fail() { // PURPOSE: invalid title shouldn't fail; EX: A{{B}} is invalid (b/c of braces);
fxt.Run_parse_by_href("/site/en.wikipedia.org/wiki/A{{B}}").Chk_page("");
}
@Test public void Site__xwiki_cases_correctly() { // PURPOSE: xwiki links should use case_match of xwiki (en.wiktionary.org) not cur_wiki (en.wikipedia.org); EX:w:Alphabet
Xowe_wiki en_wiktionary_org = fxt.Prep_create_wiki("en.wiktionary.org");
en_wiktionary_org.Ns_mgr().Ns_main().Case_match_(Xow_ns_case_.Tid__all);
fxt.Prep_add_xwiki_to_user("en.wiktionary.org", "en.wiktionary.org");
fxt.Run_parse_by_href("/site/en.wiktionary.org/wiki/alphabet");
fxt.Chk_to_str("en.wiktionary.org/wiki/alphabet").Chk_page("alphabet");
}
@Test public void Site__xwiki_compound() { // PURPOSE: [[[w:wikt:]] not handled; DATE:2013-07-25
fxt.Prep_add_xwiki_to_wiki("wikt", "en.wiktionary.org");
fxt.Run_parse_by_href("/site/en.wikipedia.org/wiki/wikt:")
.Chk_tid(Xoa_url_.Tid_page)
.Chk_page("Main_Page")
.Chk_to_str("en.wiktionary.org/wiki/Main_Page")
;
}
// @Test public void Vnt() {
// Xowe_wiki wiki = fxt.Wiki();
// fxt.Prep_add_xwiki_to_user("zh.wikipedia.org");
// wiki.Lang().Vnt_mgr().Enabled_(true);
// wiki.Lang().Vnt_mgr().Vnt_grp().Add(new gplx.xowa.langs.vnts.Vnt_mnu_itm(Bry_.new_a7("zh-hans"), Bry_.new_a7("zh-hant")));
// fxt.Run_parse_by_href("/site/zh.wikipedia.org/zh-hant/A").Chk_page("A").Chk_vnt("zh-hant");
// }
@Test public void Http__basic() {
fxt.Run_parse_by_href("http://a.org/b").Chk_tid(Xoa_url_.Tid_inet);
}
@Test public void Prot__ftp() { // PURPOSE: check that urls with form of "ftp://" return back Tid_ftp; DATE:2014-04-25
fxt.Run_parse_by_href("ftp://a.org").Chk_tid(Xoa_url_.Tid_inet);
}
@Test public void File__basic() {
fxt.Run_parse_by_href("file:///C/xowa/file/a.png").Chk_tid(Xoa_url_.Tid_file);
}
@Test public void Anchor__basic() {
fxt.Run_parse_by_href("#a").Chk_tid(Xoa_url_.Tid_anch).Chk_to_str("en.wikipedia.org/wiki/Page 1#a").Chk_anch("a");
}
@Test public void Xcmd__basic() {
fxt.Run_parse_by_href("/xcmd/page_edit").Chk_tid(Xoa_url_.Tid_xcmd).Chk_page("page_edit");
}
@Test public void Xowa__basic() {
fxt.Run_parse_by_href("xowa-cmd:a%22b*c").Chk_tid(Xoa_url_.Tid_xcmd).Chk_page("a\"b*c");
}
// COMMENTED: this seems wrong; [//wikisource.org] should go to https://wikisource.org not https://en.wikisource.org; both sites are different; DATE:2015-08-02
// @Test public void Site__user_wiki() {// PURPOSE: outlier for wikisource.org which is alias to en.wikisource.org; alias added in user_wiki; EX: [//wikisource.org a]; in browser, automatically goes to http://wikisource.org; in xowa, should go to /site/en.wikisource.org
// fxt.Prep_xwiki(fxt.App().User().Wikii(), "en_wiki_alias", "en.wikipedia.org", null);
// fxt.Run_parse_by_href("/site/en_wiki_alias/wiki/")
// .Chk_tid(Xoa_url_.Tid_page)
// .Chk_page("Main_Page")
// .Chk_to_str("en.wikipedia.org/wiki/Main_Page")
// ;
// }
}
class Xoh_href_parser_fxt extends Xow_url_parser_fxt { private final Xoh_href_parser href_parser = new Xoh_href_parser();
public Xoh_href_parser_fxt Run_parse_by_href(String raw) {
href_parser.Parse_as_url(actl_url, Bry_.new_u8(raw), cur_wiki, Bry__page_1);
return this;
}
private static final byte[] Bry__page_1 = Bry_.new_a7("Page 1");
}