mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.10.3.1
This commit is contained in:
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.wikis.xwikis.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.wikis.*; import gplx.xowa.wikis.xwikis.*;
|
||||
import gplx.core.net.*;
|
||||
import gplx.langs.dsvs.*;
|
||||
import gplx.xowa.langs.*;
|
||||
import gplx.xowa.wikis.domains.*; import gplx.xowa.wikis.xwikis.bldrs.*;
|
||||
public class Xow_xwiki_itm_parser extends Dsv_wkr_base {
|
||||
private Xow_domain_itm owner_domain_itm;
|
||||
private int cur_tid = -1; private byte[] cur_fld1, cur_fld2, cur_fld3;
|
||||
private final Gfo_url_parser url_parser = new Gfo_url_parser(); private final Gfo_url url = new Gfo_url();
|
||||
public Ordered_hash Xwiki_list() {return xwiki_list;} private final Ordered_hash xwiki_list = Ordered_hash_.New();
|
||||
@Override public Dsv_fld_parser[] Fld_parsers() {return new Dsv_fld_parser[] {Dsv_fld_parser_.Bry_parser, Dsv_fld_parser_.Bry_parser, Dsv_fld_parser_.Bry_parser, Dsv_fld_parser_.Bry_parser};}
|
||||
@Override public boolean Write_bry(Dsv_tbl_parser parser, int fld_idx, byte[] src, int bgn, int end) {
|
||||
switch (fld_idx) {
|
||||
case 0: cur_tid = Bry_.To_int_or(src, bgn, end, -1); return true;
|
||||
case 1: cur_fld1 = Bry_.Mid(src, bgn, end); return true;
|
||||
case 2: cur_fld2 = Bry_.Mid(src, bgn, end); return true;
|
||||
case 3: cur_fld3 = Bry_.Mid(src, bgn, end); return true;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
public Xow_xwiki_itm_parser Init_by_wiki(Xow_domain_itm owner_domain_itm) {this.owner_domain_itm = owner_domain_itm; return this;}
|
||||
@Override public void Load_by_bry_bgn() {xwiki_list.Clear();}
|
||||
@Override public void Commit_itm(Dsv_tbl_parser parser, int pos) {
|
||||
byte[][] key_ary = Bry_split_.Split(cur_fld1, Byte_ascii.Semic); // allow multiple key defs; EX: "w;wikipedia"
|
||||
boolean xwiki_is_mw = true;
|
||||
byte[] domain_name = cur_fld3; // NOTE: by happenstance, domain_name is always cur_fld3
|
||||
byte[] url_fmt = null, domain_bry = null;
|
||||
switch (cur_tid) {
|
||||
case Tid__manual: // EX: "0|domz|http://www.dmoz.org/~{0}|DMOZ"
|
||||
xwiki_is_mw = false;
|
||||
url_fmt = cur_fld2;
|
||||
domain_bry = Xow_xwiki_mgr.Get_domain_from_url(url_parser, url, url_fmt);
|
||||
break;
|
||||
case Tid__mw_domain: // EX: "1|w|en.wikipedia.org"
|
||||
domain_bry = cur_fld2;
|
||||
break;
|
||||
case Tid__wm_peer: // EX: "2|wikt|wikipedia"
|
||||
domain_bry = Bry_.Add(owner_domain_itm.Lang_actl_key(), Byte_ascii.Dot_bry, cur_fld2, gplx.xowa.apps.urls.Xoa_url_parser.Bry_dot_org);
|
||||
break;
|
||||
case Tid__wm_lang: // EX: "3|en;english|en|English"
|
||||
domain_bry = Bry_.Add(cur_fld2, Byte_ascii.Dot_bry, owner_domain_itm.Domain_type().Key_bry(), gplx.xowa.apps.urls.Xoa_url_parser.Bry_dot_org);
|
||||
break;
|
||||
default: throw Err_.new_unhandled(cur_tid);
|
||||
}
|
||||
byte[] abrv_wm = null;
|
||||
int lang_id = Xol_lang_stub_.Id__unknown, domain_tid = Xow_domain_tid_.Int__other;
|
||||
if (xwiki_is_mw) {
|
||||
url_fmt = Xow_xwiki_mgr.Bld_url_fmt(domain_bry);
|
||||
Xow_domain_itm domain_itm = Xow_domain_itm_.parse(domain_bry);
|
||||
if (Bry_.Len_eq_0(domain_name)) { // no name; build default
|
||||
Xol_lang_stub stub_itm = Xol_lang_stub_.Get_by_key_or_null(domain_itm.Lang_actl_itm().Key());
|
||||
byte[] lang_name = stub_itm == null ? Bry_.Empty : stub_itm.Canonical_name();
|
||||
domain_name = Bry_.Add_w_dlm(Byte_ascii.Space, lang_name, domain_itm.Domain_type().Display_bry());
|
||||
}
|
||||
abrv_wm = domain_itm.Abrv_wm();
|
||||
lang_id = domain_itm.Lang_actl_uid();
|
||||
domain_tid = domain_itm.Domain_type_id();
|
||||
}
|
||||
Create_xwikis(key_ary, url_fmt, lang_id, domain_tid, domain_bry, domain_name, abrv_wm);
|
||||
cur_tid = -1;
|
||||
cur_fld1 = cur_fld2 = cur_fld3 = null;
|
||||
}
|
||||
private void Create_xwikis(byte[][] key_ary, byte[] url_fmt, int lang_id, int domain_tid, byte[] domain_bry, byte[] domain_name, byte[] abrv_wm) {
|
||||
for (byte[] key : key_ary) {
|
||||
Xow_xwiki_itm itm = Xow_xwiki_itm_bldr.Instance.Bld_xo(owner_domain_itm, key, url_fmt, domain_name);
|
||||
xwiki_list.Add(key, itm);
|
||||
}
|
||||
}
|
||||
public static final int
|
||||
Tid__manual = 0
|
||||
, Tid__mw_domain = 1
|
||||
, Tid__wm_peer = 2
|
||||
, Tid__wm_lang = 3
|
||||
;
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.wikis.xwikis.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.wikis.*; import gplx.xowa.wikis.xwikis.*;
|
||||
import org.junit.*; import gplx.xowa.wikis.domains.*;
|
||||
public class Xow_xwiki_itm_parser_tst {
|
||||
private final Xow_xwiki_itm_parser_fxt fxt = new Xow_xwiki_itm_parser_fxt();
|
||||
@Test public void Manual() {
|
||||
fxt.Exec_parse(String_.Concat_lines_nl_skip_last
|
||||
( "0|a|https://a.org/~{0}|A"
|
||||
));
|
||||
fxt.Test_parse(String_.Concat_lines_nl_skip_last
|
||||
( "a|https://a.org/~{0}|A"
|
||||
));
|
||||
}
|
||||
@Test public void Mw_domain() {
|
||||
fxt.Exec_parse(String_.Concat_lines_nl_skip_last
|
||||
( "1|w|en.wikipedia.org|Wikipedia"
|
||||
));
|
||||
fxt.Test_parse(String_.Concat_lines_nl_skip_last
|
||||
( "w|https://en.wikipedia.org/wiki/~{0}|Wikipedia"
|
||||
));
|
||||
}
|
||||
@Test public void Wm_peer() {
|
||||
fxt.Exec_parse(String_.Concat_lines_nl_skip_last
|
||||
( "2|wikt|wiktionary|Wiktionary"
|
||||
));
|
||||
fxt.Test_parse(String_.Concat_lines_nl_skip_last
|
||||
( "wikt|https://en.wiktionary.org/wiki/~{0}|Wiktionary"
|
||||
));
|
||||
}
|
||||
@Test public void Wm_lang() {
|
||||
fxt.Exec_parse(String_.Concat_lines_nl_skip_last
|
||||
( "3|fr|fr|French"
|
||||
));
|
||||
fxt.Test_parse(String_.Concat_lines_nl_skip_last
|
||||
( "fr|https://fr.wikipedia.org/wiki/~{0}|French"
|
||||
));
|
||||
}
|
||||
@Test public void Multiple() {
|
||||
fxt.Exec_parse(String_.Concat_lines_nl_skip_last
|
||||
( "2|wikt;wiktionary|wiktionary|Wiktionary"
|
||||
));
|
||||
fxt.Test_parse(String_.Concat_lines_nl_skip_last
|
||||
( "wikt|https://en.wiktionary.org/wiki/~{0}|Wiktionary"
|
||||
, "wiktionary|https://en.wiktionary.org/wiki/~{0}|Wiktionary"
|
||||
));
|
||||
}
|
||||
@Test public void Default_name() {
|
||||
fxt.Exec_parse(String_.Concat_lines_nl_skip_last
|
||||
( "2|wikt|wiktionary|"
|
||||
));
|
||||
fxt.Test_parse(String_.Concat_lines_nl_skip_last
|
||||
( "wikt|https://en.wiktionary.org/wiki/~{0}|English Wiktionary"
|
||||
));
|
||||
}
|
||||
}
|
||||
class Xow_xwiki_itm_parser_fxt {
|
||||
private final Xow_xwiki_itm_parser parser = new Xow_xwiki_itm_parser();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr.new_();
|
||||
public Xow_xwiki_itm_parser_fxt() {
|
||||
parser.Init_by_wiki(Xow_domain_itm_.parse(Bry_.new_a7("en.wikipedia.org")));
|
||||
}
|
||||
public void Exec_parse(String raw) {
|
||||
byte[] src = Bry_.new_u8(raw);
|
||||
parser.Load_by_bry(src);
|
||||
}
|
||||
public void Test_parse(String expd) {
|
||||
Tfds.Eq_str_lines(expd, To_str());
|
||||
}
|
||||
private String To_str() {
|
||||
Ordered_hash list = parser.Xwiki_list();
|
||||
int len = list.Count();
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xow_xwiki_itm itm = (Xow_xwiki_itm)list.Get_at(i);
|
||||
tmp_bfr.Add(itm.Key_bry()).Add_byte_pipe();
|
||||
tmp_bfr.Add(itm.Url_fmt()).Add_byte_pipe();
|
||||
tmp_bfr.Add(itm.Domain_name()).Add_byte_nl();
|
||||
}
|
||||
list.Clear();
|
||||
return tmp_bfr.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,147 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.wikis.xwikis.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.wikis.*; import gplx.xowa.wikis.xwikis.*;
|
||||
import org.junit.*; import gplx.core.strings.*;
|
||||
import gplx.xowa.wikis.domains.*; import gplx.xowa.langs.*;
|
||||
public class Xow_xwiki_mgr_tst {
|
||||
@Before public void init() {fxt.Clear();} private Xow_xwiki_mgr_fxt fxt = new Xow_xwiki_mgr_fxt();
|
||||
@Test public void Add_bulk_langs_wiki() {
|
||||
fxt.Init_langs();
|
||||
fxt.Test_add_bulk_langs
|
||||
( fxt.xwiki_("en", "en.wikipedia.org", "https://en.wikipedia.org/wiki/~{0}")
|
||||
, fxt.xwiki_("de", "de.wikipedia.org", "https://de.wikipedia.org/wiki/~{0}")
|
||||
, fxt.xwiki_("fr", "fr.wikipedia.org", "https://fr.wikipedia.org/wiki/~{0}")
|
||||
, fxt.xwiki_("ja", "ja.wikipedia.org", "https://ja.wikipedia.org/wiki/~{0}")
|
||||
);
|
||||
}
|
||||
@Test public void Add_bulk_langs_grp_commons() {
|
||||
fxt.Init_langs();
|
||||
fxt.Wiki().Xwiki_mgr().Add_by_sitelink_mgr(Xow_domain_tid_.Int__wikipedia);
|
||||
fxt.Tst_itms(fxt.xwiki_("de", "de.wikipedia.org", "https://de.wikipedia.org/wiki/~{0}"), fxt.xwiki_("fr", "fr.wikipedia.org", "https://fr.wikipedia.org/wiki/~{0}"));
|
||||
}
|
||||
@Test public void Add_bulk_peers() {
|
||||
fxt.Init_peers();
|
||||
fxt.Test_add_bulk_peers
|
||||
( fxt.xwiki_("wikt", "en.wiktionary.org"
|
||||
, "https://en.wiktionary.org/wiki/~{0}")
|
||||
, fxt.xwiki_("wiktionary", "en.wiktionary.org"
|
||||
, "https://en.wiktionary.org/wiki/~{0}")
|
||||
, fxt.xwiki_("s", "en.wikisource.org", "https://en.wikisource.org/wiki/~{0}"));
|
||||
}
|
||||
@Test public void Add_bulk_peers_skip_self() { // PURPOSE: skip "wikipedia" as alias since "Wikipedia" is namespace; needed for titles of "Wikipedia:Main page" (which would otherwise try to go to page "Main Page" in the main names of xwiki "Wikipedia"
|
||||
fxt.Init_peers();
|
||||
fxt.Test_add_bulk_peers
|
||||
( fxt.xwiki_null_("wikipedia")
|
||||
, fxt.xwiki_("w", "en.wikipedia.org", "https://en.wikipedia.org/wiki/~{0}"));
|
||||
}
|
||||
@Test public void Add_bulk_peers_tid() { // PURPOSE:wikt should generate wiki_tid of wiktionary, not wikipedia; PAGE:en.s:Main_Page DATE:2014-09-14
|
||||
fxt.Init_wikt ().Test_add_bulk_peers(fxt.xwiki_("wikt", "en.wiktionary.org", "https://en.wiktionary.org/wiki/~{0}"));
|
||||
}
|
||||
// @Test public void Duplicate() { // PURPOSE.FIX: multiple aliases for same domain should only be added once to Get_at's list; DATE:2014-11-07
|
||||
// fxt.Exec_parse(String_.Concat_lines_nl_skip_last
|
||||
// ( "0|a1|a.org"
|
||||
// , "0|a2|a.org"
|
||||
// ));
|
||||
// fxt.Test_parse(String_.Concat_lines_nl_skip_last
|
||||
// ( "a1|https://a.org//~{0}"
|
||||
// ));
|
||||
// }
|
||||
}
|
||||
class Xow_xwiki_mgr_fxt {
|
||||
Xow_xwiki_mgr xwiki_mgr; Xoa_lang_mgr lang_mgr; String_bldr sb = String_bldr_.new_(); Xoae_app app; Xowe_wiki wiki;
|
||||
public void Clear() {
|
||||
if (xwiki_mgr == null) {
|
||||
app = Xoa_app_fxt.app_();
|
||||
wiki = Xoa_app_fxt.wiki_tst_(app);
|
||||
xwiki_mgr = wiki.Xwiki_mgr();
|
||||
lang_mgr = app.Lang_mgr();
|
||||
}
|
||||
xwiki_mgr.Clear();
|
||||
lang_mgr.Clear();
|
||||
}
|
||||
public Xowe_wiki Wiki() {return wiki;}
|
||||
public Xow_xwiki_itm xwiki_null_(String key) {return Xow_xwiki_itm.new_(Bry_.new_u8(key), Bry_.Empty, Xol_lang_stub_.Id__unknown, Xow_domain_tid_.Int__other, Bry_.Empty, Bry_.Empty);}
|
||||
public Xow_xwiki_itm xwiki_(String key, String domain_str, String url_fmt) {
|
||||
Xow_domain_itm domain = Xow_domain_itm_.parse(Bry_.new_u8(domain_str));
|
||||
return Xow_xwiki_itm.new_(Bry_.new_u8(key), Bry_.new_u8(url_fmt), domain.Lang_actl_itm().Id(), domain.Domain_type_id(), domain.Domain_bry(), domain.Abrv_wm());
|
||||
}
|
||||
public Xow_xwiki_mgr_fxt Init_langs() {
|
||||
app.Xwiki_mgr__sitelink_mgr().Parse(Bry_.new_u8(String_.Concat_lines_nl
|
||||
( "0|english"
|
||||
, "1|en|English"
|
||||
, "0|europe_west"
|
||||
, "1|fr|French"
|
||||
, "1|de|German"
|
||||
, "0|asia_east"
|
||||
, "1|ja|Japanese"
|
||||
)));
|
||||
return this;
|
||||
}
|
||||
public Xow_xwiki_mgr_fxt Init_peers() {
|
||||
wiki.Xwiki_mgr().Add_by_csv(Bry_.new_u8(String_.Concat_lines_nl
|
||||
( "1|d|www.wikidata.org"
|
||||
, "2|wikt;wiktionary|wiktionary"
|
||||
, "2|s|wikisource"
|
||||
, "2|w;wikipedia|wikipedia"
|
||||
)));
|
||||
return this;
|
||||
}
|
||||
public Xow_xwiki_mgr_fxt Init_wikt() {
|
||||
wiki.Xwiki_mgr().Add_by_csv(Bry_.new_u8(String_.Concat_lines_nl
|
||||
( "2|wikt;wiktionary|wiktionary"
|
||||
)));
|
||||
return this;
|
||||
}
|
||||
public Xow_xwiki_mgr_fxt Test_add_bulk_langs(Xow_xwiki_itm... itms) {
|
||||
xwiki_mgr.Add_by_sitelink_mgr();
|
||||
Tfds.Eq_str_lines(Xto_str(itms), Xto_str(To_ary(itms)));
|
||||
return this;
|
||||
}
|
||||
public Xow_xwiki_mgr_fxt Test_add_bulk_peers(Xow_xwiki_itm... itms) {
|
||||
Tfds.Eq_str_lines(Xto_str(itms), Xto_str(To_ary(itms)));
|
||||
return this;
|
||||
}
|
||||
public Xow_xwiki_mgr_fxt Tst_itms(Xow_xwiki_itm... itms) {
|
||||
Tfds.Eq_str_lines(Xto_str(itms), Xto_str(To_ary(itms)));
|
||||
return this;
|
||||
}
|
||||
public Xow_xwiki_mgr_fxt Test_len(int expd) {Tfds.Eq(expd, xwiki_mgr.Len()); return this;}
|
||||
Xow_xwiki_itm[] To_ary(Xow_xwiki_itm[] itms) {
|
||||
int len = itms.length;
|
||||
List_adp rv = List_adp_.new_();
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte[] alias = itms[i].Key_bry();
|
||||
Xow_xwiki_itm itm = xwiki_mgr.Get_by_key(alias);
|
||||
if (itm == null) itm = xwiki_null_(String_.new_u8(alias)); // "null", ignore
|
||||
rv.Add(itm);
|
||||
}
|
||||
return (Xow_xwiki_itm[])rv.To_ary(Xow_xwiki_itm.class);
|
||||
}
|
||||
String Xto_str(Xow_xwiki_itm[] itms) {
|
||||
int len = itms.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xow_xwiki_itm itm = itms[i];
|
||||
if (Bry_.Len_eq_0(itm.Domain_bry())) // "null", ignore
|
||||
sb.Add(itm.Key_bry()).Add_char_nl();
|
||||
else {
|
||||
sb.Add(itm.Key_bry()).Add_char_pipe().Add(itm.Domain_bry()).Add_char_pipe().Add(itm.Url_fmt()).Add_char_pipe().Add(itm.Domain_tid()).Add_char_nl();
|
||||
}
|
||||
}
|
||||
return sb.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user