1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-08-24 00:32:13 -04:00
parent df10db140c
commit ed911e3de5
220 changed files with 2618 additions and 1569 deletions

View File

@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.wikis.xwikis; import gplx.*; import gplx.xowa.*; import gplx.xowa.wikis.*;
import gplx.core.net.*;
import gplx.xowa.langs.*;
import gplx.xowa.wikis.domains.*;
public class Xow_xwiki_itm implements gplx.CompareAble {
public Xow_xwiki_itm(byte[] key_bry, byte[] url_fmt, int lang_id, int domain_tid, byte[] domain_bry, byte[] domain_name) {
this.key_bry = key_bry; this.key_str = String_.new_u8(key_bry);
@@ -35,10 +36,10 @@ public class Xow_xwiki_itm implements gplx.CompareAble {
public byte[] Domain_name() {return domain_name;} private final byte[] domain_name; // EX: Wikimedia Commons
public boolean Offline() {return offline;} public Xow_xwiki_itm Offline_(boolean v) {offline = v; return this;} private boolean offline;
public int compareTo(Object obj) {Xow_xwiki_itm comp = (Xow_xwiki_itm)obj; return Bry_.Compare(key_bry, comp.key_bry);}
public boolean Type_is_xwiki_lang(int cur_lang_id) {
public boolean Type_is_xwiki_lang(byte[] cur_lang_key) {
return lang_id != Xol_lang_itm_.Id__unknown // valid lang code
&& domain_tid != Xow_domain_type_.Tid_commons // commons should never be considered an xwiki_lang; EX:[[commons:A]] PAGE:species:Scarabaeidae; DATE:2014-09-10
&& lang_id != cur_lang_id // lang is different than current; EX: [[en:A]] in en.wikipedia.org shouldn't link back to self
&& domain_tid != Xow_domain_type_.Int__commons // commons should never be considered an xwiki_lang; EX:[[commons:A]] PAGE:species:Scarabaeidae; DATE:2014-09-10
&& !Bry_.Eq(key_bry, cur_lang_key) // lang is different than current; EX: [[en:A]] in en.wikipedia.org shouldn't link back to self
&& Bry_.Len_gt_0(url_fmt) // url_fmt exists
;
}
@@ -50,9 +51,9 @@ public class Xow_xwiki_itm implements gplx.CompareAble {
byte[] gfs_url = gplx.xowa.apps.Xoa_gfs_php_mgr.Xto_gfs(bfr, mw_url); // EX: "//commons.wikimedia.org/wiki/Category:$1" -> "//commons.wikimedia.org/wiki/Category:~{0}"
url_parser.Parse(url, gfs_url, 0, gfs_url.length);
byte[] domain_bry = url.Segs__get_at_1st(); // extract "commons.wikimedia.org"
Xow_domain domain = Xow_domain_.parse(domain_bry);
Xol_lang_itm lang_itm = Xol_lang_itm_.Get_by_key(domain.Lang_key());
Xow_domain_itm domain = Xow_domain_itm_.parse(domain_bry);
Xol_lang_itm lang_itm = Xol_lang_itm_.Get_by_key(domain.Lang_actl_key());
int lang_id = lang_itm == null ? Xol_lang_itm_.Id__unknown : lang_itm.Id();
return new Xow_xwiki_itm(key, gfs_url, lang_id, domain.Domain_tid(), domain_bry, domain_name);
return new Xow_xwiki_itm(key, gfs_url, lang_id, domain.Domain_type_id(), domain_bry, domain_name);
}
}

View File

@@ -16,12 +16,14 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.wikis.xwikis; import gplx.*; import gplx.xowa.*; import gplx.xowa.wikis.*;
import org.junit.*; import gplx.core.net.*; import gplx.xowa.wikis.*; import gplx.xowa.langs.*;
import org.junit.*; import gplx.core.net.*;
import gplx.xowa.wikis.domains.*;
import gplx.xowa.langs.*;
public class Xow_xwiki_itm_tst {
@Before public void init() {fxt.Clear();} private Xow_xwiki_itm_fxt fxt = new Xow_xwiki_itm_fxt();
@Test public void Commons() {fxt.Test_new_by_mw("commons.wikimedia.org/wiki/$1" , "commons.wikimedia.org" , "commons.wikimedia.org/wiki/~{0}" , Xow_domain_type_.Tid_commons , Xol_lang_itm_.Id__unknown);}
@Test public void Wiktionary() {fxt.Test_new_by_mw("fr.wiktionary.org/wiki/$1" , "fr.wiktionary.org" , "fr.wiktionary.org/wiki/~{0}" , Xow_domain_type_.Tid_wiktionary , Xol_lang_itm_.Id_fr);}
@Test public void Lang() {fxt.Test_new_by_mw("fr.wikipedia.org/wiki/$1" , "fr.wikipedia.org" , "fr.wikipedia.org/wiki/~{0}" , Xow_domain_type_.Tid_wikipedia , Xol_lang_itm_.Id_fr);}
@Test public void Commons() {fxt.Test_new_by_mw("commons.wikimedia.org/wiki/$1" , "commons.wikimedia.org" , "commons.wikimedia.org/wiki/~{0}" , Xow_domain_type_.Int__commons , Xol_lang_itm_.Id__unknown);}
@Test public void Wiktionary() {fxt.Test_new_by_mw("fr.wiktionary.org/wiki/$1" , "fr.wiktionary.org" , "fr.wiktionary.org/wiki/~{0}" , Xow_domain_type_.Int__wiktionary , Xol_lang_itm_.Id_fr);}
@Test public void Lang() {fxt.Test_new_by_mw("fr.wikipedia.org/wiki/$1" , "fr.wikipedia.org" , "fr.wikipedia.org/wiki/~{0}" , Xow_domain_type_.Int__wikipedia , Xol_lang_itm_.Id_fr);}
}
class Xow_xwiki_itm_fxt {
private Bry_bfr tmp_bfr;

View File

@@ -19,6 +19,7 @@ package gplx.xowa.wikis.xwikis; import gplx.*; import gplx.xowa.*; import gplx.x
import gplx.core.net.*;
import gplx.xowa.langs.*;
import gplx.xowa.html.hrefs.*;
import gplx.xowa.wikis.domains.*;
public class Xow_xwiki_mgr implements GfoInvkAble {
private Xowe_wiki wiki; private Xow_xwiki_mgr_srl srl;
private final Ordered_hash list = Ordered_hash_.new_bry_();
@@ -39,10 +40,10 @@ public class Xow_xwiki_mgr implements GfoInvkAble {
public Xow_xwiki_itm Add_full(byte[] alias, byte[] domain_bry, byte[] url_fmt) {
int domain_tid = Byte_.Zero;
int lang_id = -1;
Xow_domain wiki_type = Xow_domain_.parse(domain_bry);
domain_tid = wiki_type.Domain_tid();
if (Bry_.Len_gt_0(wiki_type.Lang_key())) { // domain_bry has lang (EX: "en.")
Xol_lang_itm lang_itm = Xol_lang_itm_.Get_by_key(wiki_type.Lang_key());
Xow_domain_itm wiki_type = Xow_domain_itm_.parse(domain_bry);
domain_tid = wiki_type.Domain_type_id();
if (Bry_.Len_gt_0(wiki_type.Lang_actl_key())) { // domain_bry has lang (EX: "en.")
Xol_lang_itm lang_itm = Xol_lang_itm_.Get_by_key(wiki_type.Lang_actl_key());
if (lang_itm == null) return null; // unknown lang: do not add to wiki collection; EX: en1.wikipedia.org
lang_id = lang_itm.Id();
}
@@ -53,16 +54,16 @@ public class Xow_xwiki_mgr implements GfoInvkAble {
public void Sort_by_key() {
list.Sort();
}
public Xow_domain[] Get_by_crt(Xow_domain cur, gplx.xowa.wikis.domains.crts.Xow_domain_crt_itm crt) {
public Xow_domain_itm[] Get_by_crt(Xow_domain_itm cur, gplx.xowa.wikis.domains.crts.Xow_domain_crt_itm crt) {
List_adp rv = List_adp_.new_();
int len = this.Len();
for (int i = 0; i < len; ++i) {
Xow_xwiki_itm wiki = this.Get_at(i);
if (!wiki.Offline()) continue;
Xow_domain domain_itm = Xow_domain_.parse(wiki.Domain_bry());
Xow_domain_itm domain_itm = Xow_domain_itm_.parse(wiki.Domain_bry());
if (crt.Matches(cur, domain_itm)) rv.Add(domain_itm);
}
return (Xow_domain[])rv.To_ary_and_clear(Xow_domain.class);
return (Xow_domain_itm[])rv.To_ary_and_clear(Xow_domain_itm.class);
}
public void Add_bulk(byte[] raw) {
byte[][] rows = Bry_.Split(raw, Byte_ascii.Nl);
@@ -86,16 +87,16 @@ public class Xow_xwiki_mgr implements GfoInvkAble {
default: throw Err_.new_unhandled(j);
}
}
Xow_domain domain = Xow_domain_.parse(domain_bry);
Xow_domain_itm domain = Xow_domain_itm_.parse(domain_bry);
int lang_id = Xol_lang_itm_.Id__unknown;
if (Bry_.Len_gt_0(domain.Lang_key())) {
Xol_lang_itm lang_itm = Xol_lang_itm_.Get_by_key(domain.Lang_key());
if (Bry_.Len_gt_0(domain.Lang_actl_key())) {
Xol_lang_itm lang_itm = Xol_lang_itm_.Get_by_key(domain.Lang_actl_key());
if (lang_itm != null // lang exists
&& Bry_.Eq(alias, lang_itm.Key())) // alias == lang.key; only assign langs to aliases that have lang key; EX: w|en.wikipedia.org; "w" alias should not be registered for "en"; DATE:2013-07-25
lang_id = lang_itm.Id();
}
byte[] url_fmt = Bry_.Add(Xoh_href_.Bry__https, domain_bry, Xoh_href_.Bry__wiki, Arg_0);
return Xow_xwiki_itm.new_(alias, url_fmt, lang_id, domain.Domain_tid(), domain_bry);
return Xow_xwiki_itm.new_(alias, url_fmt, lang_id, domain.Domain_type_id(), domain_bry);
} static final byte[] Arg_0 = Bry_.new_a7("~{0}");
String Exec_itms_print(byte[] raw) {
Bry_fmtr fmtr = Bry_fmtr.new_bry_(raw, "wiki_key");//, "wiki_type_url", "wiki_lang", "wiki_name", "wiki_logo_url");
@@ -105,7 +106,7 @@ public class Xow_xwiki_mgr implements GfoInvkAble {
for (int i = 0; i < wikis_len; i++) {
Xow_xwiki_itm itm = (Xow_xwiki_itm)list.Get_at(i);
byte[] key = itm.Key_bry();
if (Bry_.Eq(key, Xow_domain_type_.Key_bry_home)) continue; // skip home
if (Bry_.Eq(key, Xow_domain_type_.Bry__home)) continue; // skip home
byte[] domain = itm.Domain_bry();
if (seen.Has(domain)) continue;
seen.Add_as_key_and_val(domain);
@@ -139,13 +140,13 @@ public class Xow_xwiki_mgr implements GfoInvkAble {
String domain_str = null;
int domain_tid = Xow_domain_type_.Get_type_as_tid(wiki_name_bry);
switch (domain_tid) {
case Xow_domain_type_.Tid_commons:
case Xow_domain_type_.Tid_species:
case Xow_domain_type_.Tid_meta:
case Xow_domain_type_.Tid_incubator: domain_str = String_.Format("{0}.wikimedia.org", wiki_name); break; // EX: commons.wikimedia.org
case Xow_domain_type_.Tid_wikidata: domain_str = String_.Format("www.wikidata.org", wiki_name); break; // EX: www.wikidata.org
case Xow_domain_type_.Tid_mediawiki: domain_str = String_.Format("www.mediawiki.org", wiki_name); break;
case Xow_domain_type_.Tid_wmfblog: domain_str = String_.Format("wikimediafoundation.org", wiki_name); break;
case Xow_domain_type_.Int__commons:
case Xow_domain_type_.Int__species:
case Xow_domain_type_.Int__meta:
case Xow_domain_type_.Int__incubator: domain_str = String_.Format("{0}.wikimedia.org", wiki_name); break; // EX: commons.wikimedia.org
case Xow_domain_type_.Int__wikidata: domain_str = String_.Format("www.wikidata.org", wiki_name); break; // EX: www.wikidata.org
case Xow_domain_type_.Int__mediawiki: domain_str = String_.Format("www.mediawiki.org", wiki_name); break;
case Xow_domain_type_.Int__wmfblog: domain_str = String_.Format("wikimediafoundation.org", wiki_name); break;
default: domain_str = String_.Format("{0}.{1}.org", lang_key_str, wiki_name); break; // EX: en.wiktionary.org
}
byte[] domain_bry = Bry_.new_u8(domain_str);

View File

@@ -16,20 +16,21 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.wikis.xwikis; import gplx.*; import gplx.xowa.*; import gplx.xowa.wikis.*;
import org.junit.*; import gplx.core.strings.*; import gplx.xowa.wikis.*; import gplx.xowa.langs.*;
import org.junit.*; import gplx.core.strings.*;
import gplx.xowa.wikis.domains.*; import gplx.xowa.langs.*;
public class Xow_xwiki_mgr_tst {
@Before public void init() {fxt.Clear();} private Xow_xwiki_mgr_fxt fxt = new Xow_xwiki_mgr_fxt();
@Test public void Add_bulk_wiki_en() {fxt.Test_add_bulk("w|en.wikipedia.org" , Xol_lang_itm_.Id__unknown , Xow_domain_type_.Tid_wikipedia , "w" , "https://en.wikipedia.org/wiki/~{0}", "en.wikipedia.org");}
@Test public void Add_bulk_wiki_fr() {fxt.Test_add_bulk("fr|fr.wikipedia.org" , Xol_lang_itm_.Id_fr , Xow_domain_type_.Tid_wikipedia , "fr" , "https://fr.wikipedia.org/wiki/~{0}", "fr.wikipedia.org");}
@Test public void Add_bulk_wikt_en() {fxt.Test_add_bulk("wikt|en.wiktionary.org" , Xol_lang_itm_.Id__unknown , Xow_domain_type_.Tid_wiktionary , "wikt" , "https://en.wiktionary.org/wiki/~{0}", "en.wiktionary.org");}
@Test public void Add_bulk_commons() {fxt.Test_add_bulk("commons|commons.wikimedia.org" , Xol_lang_itm_.Id__unknown , Xow_domain_type_.Tid_commons , "commons" , "https://commons.wikimedia.org/wiki/~{0}", "commons.wikimedia.org");}
@Test public void Add_bulk_commons_cap() {fxt.Test_add_bulk("Commons|commons.wikimedia.org" , Xol_lang_itm_.Id__unknown , Xow_domain_type_.Tid_commons , "Commons" , "https://commons.wikimedia.org/wiki/~{0}", "commons.wikimedia.org");}
@Test public void Add_bulk_wiki_en() {fxt.Test_add_bulk("w|en.wikipedia.org" , Xol_lang_itm_.Id__unknown , Xow_domain_type_.Int__wikipedia , "w" , "https://en.wikipedia.org/wiki/~{0}", "en.wikipedia.org");}
@Test public void Add_bulk_wiki_fr() {fxt.Test_add_bulk("fr|fr.wikipedia.org" , Xol_lang_itm_.Id_fr , Xow_domain_type_.Int__wikipedia , "fr" , "https://fr.wikipedia.org/wiki/~{0}", "fr.wikipedia.org");}
@Test public void Add_bulk_wikt_en() {fxt.Test_add_bulk("wikt|en.wiktionary.org" , Xol_lang_itm_.Id__unknown , Xow_domain_type_.Int__wiktionary , "wikt" , "https://en.wiktionary.org/wiki/~{0}", "en.wiktionary.org");}
@Test public void Add_bulk_commons() {fxt.Test_add_bulk("commons|commons.wikimedia.org" , Xol_lang_itm_.Id__unknown , Xow_domain_type_.Int__commons , "commons" , "https://commons.wikimedia.org/wiki/~{0}", "commons.wikimedia.org");}
@Test public void Add_bulk_commons_cap() {fxt.Test_add_bulk("Commons|commons.wikimedia.org" , Xol_lang_itm_.Id__unknown , Xow_domain_type_.Int__commons , "Commons" , "https://commons.wikimedia.org/wiki/~{0}", "commons.wikimedia.org");}
@Test public void Add_bulk_langs_wiki() {fxt.Init_langs().Test_add_bulk_langs("wiki", fxt.xwiki_("en", "en.wikipedia.org", "http://en.wikipedia.org/wiki/~{0}"), fxt.xwiki_("de", "de.wikipedia.org", "http://de.wikipedia.org/wiki/~{0}"), fxt.xwiki_("fr", "fr.wikipedia.org", "http://fr.wikipedia.org/wiki/~{0}"), fxt.xwiki_("ja", "ja.wikipedia.org", "http://ja.wikipedia.org/wiki/~{0}"));}
@Test public void Add_bulk_langs_grps() {fxt.Init_langs().Test_add_bulk_langs("europe_west~asia_east", fxt.xwiki_("de", "de.wikipedia.org", "http://de.wikipedia.org/wiki/~{0}"), fxt.xwiki_("fr", "fr.wikipedia.org", "http://fr.wikipedia.org/wiki/~{0}"), fxt.xwiki_("ja", "ja.wikipedia.org", "http://ja.wikipedia.org/wiki/~{0}"));}
@Test public void Add_bulk_langs_grp_itm() {fxt.Init_langs().Test_add_bulk_langs("europe_west~ja", fxt.xwiki_("de", "de.wikipedia.org", "http://de.wikipedia.org/wiki/~{0}"), fxt.xwiki_("fr", "fr.wikipedia.org", "http://fr.wikipedia.org/wiki/~{0}"), fxt.xwiki_("ja", "ja.wikipedia.org", "http://ja.wikipedia.org/wiki/~{0}"));}
@Test public void Add_bulk_langs_grp_commons() {
fxt.Init_langs();
fxt.Wiki().Xwiki_mgr().Add_bulk_langs(Bry_.new_a7("europe_west"), Xow_domain_type_.Tid_wikipedia);
fxt.Wiki().Xwiki_mgr().Add_bulk_langs(Bry_.new_a7("europe_west"), Xow_domain_type_.Int__wikipedia);
fxt.Tst_itms(fxt.xwiki_("de", "de.wikipedia.org", "http://de.wikipedia.org/wiki/~{0}"), fxt.xwiki_("fr", "fr.wikipedia.org", "http://fr.wikipedia.org/wiki/~{0}"));
}
@Test public void Add_bulk_peers() {fxt.Init_peers().Test_add_bulk_peers("peer", fxt.xwiki_null_("commons"), fxt.xwiki_null_("m"), fxt.xwiki_("wikt", "en.wiktionary.org", "http://en.wiktionary.org/wiki/~{0}"), fxt.xwiki_("wiktionary", "en.wiktionary.org", "http://en.wiktionary.org/wiki/~{0}"), fxt.xwiki_("s", "en.wikisource.org", "http://en.wikisource.org/wiki/~{0}"));}
@@ -55,10 +56,10 @@ class Xow_xwiki_mgr_fxt {
lang_mgr.Clear();
}
public Xowe_wiki Wiki() {return wiki;}
public Xow_xwiki_itm xwiki_null_(String key) {return Xow_xwiki_itm.new_(Bry_.new_u8(key), Bry_.Empty, Xol_lang_itm_.Id__unknown, Xow_domain_type_.Tid_other, Bry_.Empty);}
public Xow_xwiki_itm xwiki_null_(String key) {return Xow_xwiki_itm.new_(Bry_.new_u8(key), Bry_.Empty, Xol_lang_itm_.Id__unknown, Xow_domain_type_.Int__other, Bry_.Empty);}
public Xow_xwiki_itm xwiki_(String key, String domain_str, String url_fmt) {
Xow_domain domain = Xow_domain_.parse(Bry_.new_u8(domain_str));
return Xow_xwiki_itm.new_(Bry_.new_u8(key), Bry_.new_u8(url_fmt), domain.Lang_itm().Id(), domain.Domain_tid(), domain.Domain_bry());
Xow_domain_itm domain = Xow_domain_itm_.parse(Bry_.new_u8(domain_str));
return Xow_xwiki_itm.new_(Bry_.new_u8(key), Bry_.new_u8(url_fmt), domain.Lang_actl_itm().Id(), domain.Domain_type_id(), domain.Domain_bry());
}
public Xow_xwiki_mgr_fxt Test_add_bulk(String raw, int lang_tid, int wiki_tid, String alias, String fmt, String domain) {
Xow_xwiki_itm itm = xwiki_mgr.Add_bulk_row(Xol_lang_itm_.Regy(), Bry_.new_a7(raw));