1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

v2.10.3.1

This commit is contained in:
gnosygnu
2015-10-18 22:17:57 -04:00
parent 8e18af05b6
commit 4f43f51b18
1935 changed files with 12500 additions and 12889 deletions

View File

@@ -0,0 +1,92 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.wikis.xwikis.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.wikis.*; import gplx.xowa.wikis.xwikis.*;
import gplx.core.net.*;
import gplx.langs.dsvs.*;
import gplx.xowa.langs.*;
import gplx.xowa.wikis.domains.*; import gplx.xowa.wikis.xwikis.bldrs.*;
public class Xow_xwiki_itm_parser extends Dsv_wkr_base {
private Xow_domain_itm owner_domain_itm;
private int cur_tid = -1; private byte[] cur_fld1, cur_fld2, cur_fld3;
private final Gfo_url_parser url_parser = new Gfo_url_parser(); private final Gfo_url url = new Gfo_url();
public Ordered_hash Xwiki_list() {return xwiki_list;} private final Ordered_hash xwiki_list = Ordered_hash_.New();
@Override public Dsv_fld_parser[] Fld_parsers() {return new Dsv_fld_parser[] {Dsv_fld_parser_.Bry_parser, Dsv_fld_parser_.Bry_parser, Dsv_fld_parser_.Bry_parser, Dsv_fld_parser_.Bry_parser};}
@Override public boolean Write_bry(Dsv_tbl_parser parser, int fld_idx, byte[] src, int bgn, int end) {
switch (fld_idx) {
case 0: cur_tid = Bry_.To_int_or(src, bgn, end, -1); return true;
case 1: cur_fld1 = Bry_.Mid(src, bgn, end); return true;
case 2: cur_fld2 = Bry_.Mid(src, bgn, end); return true;
case 3: cur_fld3 = Bry_.Mid(src, bgn, end); return true;
default: return false;
}
}
public Xow_xwiki_itm_parser Init_by_wiki(Xow_domain_itm owner_domain_itm) {this.owner_domain_itm = owner_domain_itm; return this;}
@Override public void Load_by_bry_bgn() {xwiki_list.Clear();}
@Override public void Commit_itm(Dsv_tbl_parser parser, int pos) {
byte[][] key_ary = Bry_split_.Split(cur_fld1, Byte_ascii.Semic); // allow multiple key defs; EX: "w;wikipedia"
boolean xwiki_is_mw = true;
byte[] domain_name = cur_fld3; // NOTE: by happenstance, domain_name is always cur_fld3
byte[] url_fmt = null, domain_bry = null;
switch (cur_tid) {
case Tid__manual: // EX: "0|domz|http://www.dmoz.org/~{0}|DMOZ"
xwiki_is_mw = false;
url_fmt = cur_fld2;
domain_bry = Xow_xwiki_mgr.Get_domain_from_url(url_parser, url, url_fmt);
break;
case Tid__mw_domain: // EX: "1|w|en.wikipedia.org"
domain_bry = cur_fld2;
break;
case Tid__wm_peer: // EX: "2|wikt|wikipedia"
domain_bry = Bry_.Add(owner_domain_itm.Lang_actl_key(), Byte_ascii.Dot_bry, cur_fld2, gplx.xowa.apps.urls.Xoa_url_parser.Bry_dot_org);
break;
case Tid__wm_lang: // EX: "3|en;english|en|English"
domain_bry = Bry_.Add(cur_fld2, Byte_ascii.Dot_bry, owner_domain_itm.Domain_type().Key_bry(), gplx.xowa.apps.urls.Xoa_url_parser.Bry_dot_org);
break;
default: throw Err_.new_unhandled(cur_tid);
}
byte[] abrv_wm = null;
int lang_id = Xol_lang_stub_.Id__unknown, domain_tid = Xow_domain_tid_.Int__other;
if (xwiki_is_mw) {
url_fmt = Xow_xwiki_mgr.Bld_url_fmt(domain_bry);
Xow_domain_itm domain_itm = Xow_domain_itm_.parse(domain_bry);
if (Bry_.Len_eq_0(domain_name)) { // no name; build default
Xol_lang_stub stub_itm = Xol_lang_stub_.Get_by_key_or_null(domain_itm.Lang_actl_itm().Key());
byte[] lang_name = stub_itm == null ? Bry_.Empty : stub_itm.Canonical_name();
domain_name = Bry_.Add_w_dlm(Byte_ascii.Space, lang_name, domain_itm.Domain_type().Display_bry());
}
abrv_wm = domain_itm.Abrv_wm();
lang_id = domain_itm.Lang_actl_uid();
domain_tid = domain_itm.Domain_type_id();
}
Create_xwikis(key_ary, url_fmt, lang_id, domain_tid, domain_bry, domain_name, abrv_wm);
cur_tid = -1;
cur_fld1 = cur_fld2 = cur_fld3 = null;
}
private void Create_xwikis(byte[][] key_ary, byte[] url_fmt, int lang_id, int domain_tid, byte[] domain_bry, byte[] domain_name, byte[] abrv_wm) {
for (byte[] key : key_ary) {
Xow_xwiki_itm itm = Xow_xwiki_itm_bldr.Instance.Bld_xo(owner_domain_itm, key, url_fmt, domain_name);
xwiki_list.Add(key, itm);
}
}
public static final int
Tid__manual = 0
, Tid__mw_domain = 1
, Tid__wm_peer = 2
, Tid__wm_lang = 3
;
}

View File

@@ -0,0 +1,97 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.wikis.xwikis.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.wikis.*; import gplx.xowa.wikis.xwikis.*;
import org.junit.*; import gplx.xowa.wikis.domains.*;
public class Xow_xwiki_itm_parser_tst {
private final Xow_xwiki_itm_parser_fxt fxt = new Xow_xwiki_itm_parser_fxt();
@Test public void Manual() {
fxt.Exec_parse(String_.Concat_lines_nl_skip_last
( "0|a|https://a.org/~{0}|A"
));
fxt.Test_parse(String_.Concat_lines_nl_skip_last
( "a|https://a.org/~{0}|A"
));
}
@Test public void Mw_domain() {
fxt.Exec_parse(String_.Concat_lines_nl_skip_last
( "1|w|en.wikipedia.org|Wikipedia"
));
fxt.Test_parse(String_.Concat_lines_nl_skip_last
( "w|https://en.wikipedia.org/wiki/~{0}|Wikipedia"
));
}
@Test public void Wm_peer() {
fxt.Exec_parse(String_.Concat_lines_nl_skip_last
( "2|wikt|wiktionary|Wiktionary"
));
fxt.Test_parse(String_.Concat_lines_nl_skip_last
( "wikt|https://en.wiktionary.org/wiki/~{0}|Wiktionary"
));
}
@Test public void Wm_lang() {
fxt.Exec_parse(String_.Concat_lines_nl_skip_last
( "3|fr|fr|French"
));
fxt.Test_parse(String_.Concat_lines_nl_skip_last
( "fr|https://fr.wikipedia.org/wiki/~{0}|French"
));
}
@Test public void Multiple() {
fxt.Exec_parse(String_.Concat_lines_nl_skip_last
( "2|wikt;wiktionary|wiktionary|Wiktionary"
));
fxt.Test_parse(String_.Concat_lines_nl_skip_last
( "wikt|https://en.wiktionary.org/wiki/~{0}|Wiktionary"
, "wiktionary|https://en.wiktionary.org/wiki/~{0}|Wiktionary"
));
}
@Test public void Default_name() {
fxt.Exec_parse(String_.Concat_lines_nl_skip_last
( "2|wikt|wiktionary|"
));
fxt.Test_parse(String_.Concat_lines_nl_skip_last
( "wikt|https://en.wiktionary.org/wiki/~{0}|English Wiktionary"
));
}
}
class Xow_xwiki_itm_parser_fxt {
private final Xow_xwiki_itm_parser parser = new Xow_xwiki_itm_parser();
private final Bry_bfr tmp_bfr = Bry_bfr.new_();
public Xow_xwiki_itm_parser_fxt() {
parser.Init_by_wiki(Xow_domain_itm_.parse(Bry_.new_a7("en.wikipedia.org")));
}
public void Exec_parse(String raw) {
byte[] src = Bry_.new_u8(raw);
parser.Load_by_bry(src);
}
public void Test_parse(String expd) {
Tfds.Eq_str_lines(expd, To_str());
}
private String To_str() {
Ordered_hash list = parser.Xwiki_list();
int len = list.Count();
for (int i = 0; i < len; ++i) {
Xow_xwiki_itm itm = (Xow_xwiki_itm)list.Get_at(i);
tmp_bfr.Add(itm.Key_bry()).Add_byte_pipe();
tmp_bfr.Add(itm.Url_fmt()).Add_byte_pipe();
tmp_bfr.Add(itm.Domain_name()).Add_byte_nl();
}
list.Clear();
return tmp_bfr.To_str_and_clear();
}
}

View File

@@ -0,0 +1,147 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.wikis.xwikis.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.wikis.*; import gplx.xowa.wikis.xwikis.*;
import org.junit.*; import gplx.core.strings.*;
import gplx.xowa.wikis.domains.*; import gplx.xowa.langs.*;
public class Xow_xwiki_mgr_tst {
@Before public void init() {fxt.Clear();} private Xow_xwiki_mgr_fxt fxt = new Xow_xwiki_mgr_fxt();
@Test public void Add_bulk_langs_wiki() {
fxt.Init_langs();
fxt.Test_add_bulk_langs
( fxt.xwiki_("en", "en.wikipedia.org", "https://en.wikipedia.org/wiki/~{0}")
, fxt.xwiki_("de", "de.wikipedia.org", "https://de.wikipedia.org/wiki/~{0}")
, fxt.xwiki_("fr", "fr.wikipedia.org", "https://fr.wikipedia.org/wiki/~{0}")
, fxt.xwiki_("ja", "ja.wikipedia.org", "https://ja.wikipedia.org/wiki/~{0}")
);
}
@Test public void Add_bulk_langs_grp_commons() {
fxt.Init_langs();
fxt.Wiki().Xwiki_mgr().Add_by_sitelink_mgr(Xow_domain_tid_.Int__wikipedia);
fxt.Tst_itms(fxt.xwiki_("de", "de.wikipedia.org", "https://de.wikipedia.org/wiki/~{0}"), fxt.xwiki_("fr", "fr.wikipedia.org", "https://fr.wikipedia.org/wiki/~{0}"));
}
@Test public void Add_bulk_peers() {
fxt.Init_peers();
fxt.Test_add_bulk_peers
( fxt.xwiki_("wikt", "en.wiktionary.org"
, "https://en.wiktionary.org/wiki/~{0}")
, fxt.xwiki_("wiktionary", "en.wiktionary.org"
, "https://en.wiktionary.org/wiki/~{0}")
, fxt.xwiki_("s", "en.wikisource.org", "https://en.wikisource.org/wiki/~{0}"));
}
@Test public void Add_bulk_peers_skip_self() { // PURPOSE: skip "wikipedia" as alias since "Wikipedia" is namespace; needed for titles of "Wikipedia:Main page" (which would otherwise try to go to page "Main Page" in the main names of xwiki "Wikipedia"
fxt.Init_peers();
fxt.Test_add_bulk_peers
( fxt.xwiki_null_("wikipedia")
, fxt.xwiki_("w", "en.wikipedia.org", "https://en.wikipedia.org/wiki/~{0}"));
}
@Test public void Add_bulk_peers_tid() { // PURPOSE:wikt should generate wiki_tid of wiktionary, not wikipedia; PAGE:en.s:Main_Page DATE:2014-09-14
fxt.Init_wikt ().Test_add_bulk_peers(fxt.xwiki_("wikt", "en.wiktionary.org", "https://en.wiktionary.org/wiki/~{0}"));
}
// @Test public void Duplicate() { // PURPOSE.FIX: multiple aliases for same domain should only be added once to Get_at's list; DATE:2014-11-07
// fxt.Exec_parse(String_.Concat_lines_nl_skip_last
// ( "0|a1|a.org"
// , "0|a2|a.org"
// ));
// fxt.Test_parse(String_.Concat_lines_nl_skip_last
// ( "a1|https://a.org//~{0}"
// ));
// }
}
class Xow_xwiki_mgr_fxt {
Xow_xwiki_mgr xwiki_mgr; Xoa_lang_mgr lang_mgr; String_bldr sb = String_bldr_.new_(); Xoae_app app; Xowe_wiki wiki;
public void Clear() {
if (xwiki_mgr == null) {
app = Xoa_app_fxt.app_();
wiki = Xoa_app_fxt.wiki_tst_(app);
xwiki_mgr = wiki.Xwiki_mgr();
lang_mgr = app.Lang_mgr();
}
xwiki_mgr.Clear();
lang_mgr.Clear();
}
public Xowe_wiki Wiki() {return wiki;}
public Xow_xwiki_itm xwiki_null_(String key) {return Xow_xwiki_itm.new_(Bry_.new_u8(key), Bry_.Empty, Xol_lang_stub_.Id__unknown, Xow_domain_tid_.Int__other, Bry_.Empty, Bry_.Empty);}
public Xow_xwiki_itm xwiki_(String key, String domain_str, String url_fmt) {
Xow_domain_itm domain = Xow_domain_itm_.parse(Bry_.new_u8(domain_str));
return Xow_xwiki_itm.new_(Bry_.new_u8(key), Bry_.new_u8(url_fmt), domain.Lang_actl_itm().Id(), domain.Domain_type_id(), domain.Domain_bry(), domain.Abrv_wm());
}
public Xow_xwiki_mgr_fxt Init_langs() {
app.Xwiki_mgr__sitelink_mgr().Parse(Bry_.new_u8(String_.Concat_lines_nl
( "0|english"
, "1|en|English"
, "0|europe_west"
, "1|fr|French"
, "1|de|German"
, "0|asia_east"
, "1|ja|Japanese"
)));
return this;
}
public Xow_xwiki_mgr_fxt Init_peers() {
wiki.Xwiki_mgr().Add_by_csv(Bry_.new_u8(String_.Concat_lines_nl
( "1|d|www.wikidata.org"
, "2|wikt;wiktionary|wiktionary"
, "2|s|wikisource"
, "2|w;wikipedia|wikipedia"
)));
return this;
}
public Xow_xwiki_mgr_fxt Init_wikt() {
wiki.Xwiki_mgr().Add_by_csv(Bry_.new_u8(String_.Concat_lines_nl
( "2|wikt;wiktionary|wiktionary"
)));
return this;
}
public Xow_xwiki_mgr_fxt Test_add_bulk_langs(Xow_xwiki_itm... itms) {
xwiki_mgr.Add_by_sitelink_mgr();
Tfds.Eq_str_lines(Xto_str(itms), Xto_str(To_ary(itms)));
return this;
}
public Xow_xwiki_mgr_fxt Test_add_bulk_peers(Xow_xwiki_itm... itms) {
Tfds.Eq_str_lines(Xto_str(itms), Xto_str(To_ary(itms)));
return this;
}
public Xow_xwiki_mgr_fxt Tst_itms(Xow_xwiki_itm... itms) {
Tfds.Eq_str_lines(Xto_str(itms), Xto_str(To_ary(itms)));
return this;
}
public Xow_xwiki_mgr_fxt Test_len(int expd) {Tfds.Eq(expd, xwiki_mgr.Len()); return this;}
Xow_xwiki_itm[] To_ary(Xow_xwiki_itm[] itms) {
int len = itms.length;
List_adp rv = List_adp_.new_();
for (int i = 0; i < len; i++) {
byte[] alias = itms[i].Key_bry();
Xow_xwiki_itm itm = xwiki_mgr.Get_by_key(alias);
if (itm == null) itm = xwiki_null_(String_.new_u8(alias)); // "null", ignore
rv.Add(itm);
}
return (Xow_xwiki_itm[])rv.To_ary(Xow_xwiki_itm.class);
}
String Xto_str(Xow_xwiki_itm[] itms) {
int len = itms.length;
for (int i = 0; i < len; i++) {
Xow_xwiki_itm itm = itms[i];
if (Bry_.Len_eq_0(itm.Domain_bry())) // "null", ignore
sb.Add(itm.Key_bry()).Add_char_nl();
else {
sb.Add(itm.Key_bry()).Add_char_pipe().Add(itm.Domain_bry()).Add_char_pipe().Add(itm.Url_fmt()).Add_char_pipe().Add(itm.Domain_tid()).Add_char_nl();
}
}
return sb.To_str_and_clear();
}
}