1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

DownloadCentral: Fix archive.org url and task name for wikidata

This commit is contained in:
gnosygnu 2016-10-19 16:22:30 -04:00
parent e55e3971d9
commit 74e84fd758
3 changed files with 22 additions and 8 deletions

View File

@ -29,4 +29,6 @@ public class Xobc_task_regy_itm {
public byte[] Key() {return key;} private final byte[] key; public byte[] Key() {return key;} private final byte[] key;
public byte[] Name() {return name;} private final byte[] name; public byte[] Name() {return name;} private final byte[] name;
public int Step_count() {return step_count;} private final int step_count; public int Step_count() {return step_count;} private final int step_count;
public static final String Type__text = "text", Type__html = "html", Type__file = "file";
} }

View File

@ -27,10 +27,16 @@ public class Host_eval_wkr implements Bry_eval_wkr {
int type = hash.Get_as_byte_or(src, args_bgn, args_end, Byte_.Max_value_127); int type = hash.Get_as_byte_or(src, args_bgn, args_end, Byte_.Max_value_127);
switch (type) { switch (type) {
case Type__wiki_abrv: case Type__wiki_abrv:
// handle wikidata separately; DATE:2016-10-19
if (String_.Eq(domain_itm.Domain_str(), "www.wikidata.org"))
rv.Add_str_a7("wikidatawiki");
// do not use Abrv_mw(); all other wikis will be "generalized" to their language url; EX:"en.wiktionary.org" -> "enwiki" x> "enwiktionary"
else {
byte[] lang_key = domain_itm.Lang_orig_key(); byte[] lang_key = domain_itm.Lang_orig_key();
if (lang_key == Bry_.Empty) lang_key = Bry_.new_a7("en"); // handle species if (lang_key == Bry_.Empty) lang_key = Bry_.new_a7("en"); // handle species
rv.Add(lang_key); rv.Add(lang_key);
rv.Add_str_a7("wiki"); rv.Add_str_a7("wiki");
}
break; break;
default: throw Err_.new_unhandled_default(type); default: throw Err_.new_unhandled_default(type);
} }

View File

@ -52,11 +52,11 @@ public class Pack_file_mgr {
// build tasks // build tasks
if (cfg.Pack_text()) // right now, only for wikidata if (cfg.Pack_text()) // right now, only for wikidata
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, "text", Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__wiki__text, Xobc_import_type.Tid__wiki__ctg, Xobc_import_type.Tid__wiki__wbase); Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__text, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__wiki__text, Xobc_import_type.Tid__wiki__ctg, Xobc_import_type.Tid__wiki__wbase);
if (cfg.Pack_html()) if (cfg.Pack_html())
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, "html", Xobc_import_type.Tid__wiki__core, Xobc_import_type.Tid__wiki__srch, Xobc_import_type.Tid__wiki__html, Xobc_import_type.Tid__wiki__ctg); Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__html, Xobc_import_type.Tid__wiki__core, Xobc_import_type.Tid__wiki__srch, Xobc_import_type.Tid__wiki__html, Xobc_import_type.Tid__wiki__ctg);
if (cfg.Pack_file()) if (cfg.Pack_file())
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, "file", Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__file__data); // , Xobc_import_type.Tid__fsdb__delete Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__file, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__file__data); // , Xobc_import_type.Tid__fsdb__delete
if (cfg.Pack_custom()) if (cfg.Pack_custom())
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, cfg.Pack_custom_name(), Xobc_import_type.Tid__misc); Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, cfg.Pack_custom_name(), Xobc_import_type.Tid__misc);
bc_conn.Txn_end(); bc_conn.Txn_end();
@ -117,11 +117,17 @@ public class Pack_file_mgr {
? Bry_.Empty ? Bry_.Empty
: Bry_.Add(gplx.xowa.langs.Xol_lang_stub_.Get_by_key_or_null(lang_key).Canonical_name(), Byte_ascii.Space); // EX: "Deutsch " : Bry_.Add(gplx.xowa.langs.Xol_lang_stub_.Get_by_key_or_null(lang_key).Canonical_name(), Byte_ascii.Space); // EX: "Deutsch "
byte[] wiki_name = wiki.Domain_itm().Domain_type().Display_bry(); // EX: Wikipedia byte[] wiki_name = wiki.Domain_itm().Domain_type().Display_bry(); // EX: Wikipedia
String type_name = String_.Eq(task_type, "html") ? "Articles" : "Images"; String type_name = Get_task_name_by_task_type(task_type);
wiki_date = String_.Replace(wiki_date, ".", "-"); wiki_date = String_.Replace(wiki_date, ".", "-");
String file_size = gplx.core.ios.Io_size_.To_str_new(tmp_bfr, raw_len, 2); String file_size = gplx.core.ios.Io_size_.To_str_new(tmp_bfr, raw_len, 2);
return String_.Format("{0}{1} - {2} ({3}) [{4}]", lang_name, wiki_name, type_name, wiki_date, file_size); return String_.Format("{0}{1} - {2} ({3}) [{4}]", lang_name, wiki_name, type_name, wiki_date, file_size);
} }
private static String Get_task_name_by_task_type(String task_type) {
if (String_.Eq(task_type, Xobc_task_regy_itm.Type__html)) return "Articles";
else if (String_.Eq(task_type, Xobc_task_regy_itm.Type__file)) return "Images";
else if (String_.Eq(task_type, Xobc_task_regy_itm.Type__text)) return "Wikitext";
else return task_type;
}
private static void Make_pack(Xowe_wiki wiki, Io_url wiki_dir, byte[] wiki_abrv, String wiki_date, Xobc_data_db bc_db, Hash_algo hash_algo, Bry_bfr tmp_bfr, Pack_itm itm, int task_id) { private static void Make_pack(Xowe_wiki wiki, Io_url wiki_dir, byte[] wiki_abrv, String wiki_date, Xobc_data_db bc_db, Hash_algo hash_algo, Bry_bfr tmp_bfr, Pack_itm itm, int task_id) {
// hash raws // hash raws
Io_url zip_url = itm.Zip_url(); Io_url zip_url = itm.Zip_url();