1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Full-text search: Add unpack lucene

This commit is contained in:
gnosygnu 2017-03-26 21:06:42 -04:00
parent a43c0d17ba
commit e6ae41d9fd
4 changed files with 10 additions and 5 deletions

View File

@ -61,6 +61,12 @@ public class Xobc_step_factory {
Io_url zip_file_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__download_fil, wiki_domain, file_name)); Io_url zip_file_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__download_fil, wiki_domain, file_name));
Io_url unzip_dir_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__unzip_dir, wiki_domain, file_name)); Io_url unzip_dir_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__unzip_dir, wiki_domain, file_name));
Io_url wiki_dir_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__wiki_dir, wiki_domain, file_name)); Io_url wiki_dir_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__wiki_dir, wiki_domain, file_name));
// if lucene, move to /data/search/
if (import_itm.Import_type == Xobc_import_type.Tid__wiki__lucene) {
wiki_dir_url = gplx.xowa.addons.wikis.fulltexts.Xosearch_fulltext_addon.Get_index_dir(wiki_dir_url);
}
Io_url checksum_url = unzip_dir_url.GenSubFil(file_name + ".md5"); Io_url checksum_url = unzip_dir_url.GenSubFil(file_name + ".md5");
int cmd_idx = 0; int cmd_idx = 0;
list.Add(new Xobc_cmd__download (task_mgr, task_id, step_id, cmd_idx++, src_http_url, zip_file_url, import_itm.Import_size_zip)); list.Add(new Xobc_cmd__download (task_mgr, task_id, step_id, cmd_idx++, src_http_url, zip_file_url, import_itm.Import_size_zip));
@ -69,7 +75,6 @@ public class Xobc_step_factory {
list.Add(new Xobc_cmd__verify_dir (task_mgr, task_id, step_id, cmd_idx++, checksum_url, zip_file_url)); list.Add(new Xobc_cmd__verify_dir (task_mgr, task_id, step_id, cmd_idx++, checksum_url, zip_file_url));
// list.Add(new Xobc_cmd__wiki_merge (task_mgr, task_id, step_id, cmd_idx++, merge_mgr, wiki_domain, unzip_dir_url, import_itm.Import_prog_data_max, import_itm.Import_prog_row_max, step_seqn)); // list.Add(new Xobc_cmd__wiki_merge (task_mgr, task_id, step_id, cmd_idx++, merge_mgr, wiki_domain, unzip_dir_url, import_itm.Import_prog_data_max, import_itm.Import_prog_row_max, step_seqn));
list.Add(new Xobc_cmd__move_fils (task_mgr, task_id, step_id, cmd_idx++, unzip_dir_url, wiki_dir_url)); list.Add(new Xobc_cmd__move_fils (task_mgr, task_id, step_id, cmd_idx++, unzip_dir_url, wiki_dir_url));
switch (import_itm.Import_type) { switch (import_itm.Import_type) {
case Xobc_import_type.Tid__wiki__core: list.Add(new Xobc_cmd__wiki_reg (task_mgr, task_id, step_id, cmd_idx++, wiki_dir_url, wiki_domain)); break; case Xobc_import_type.Tid__wiki__core: list.Add(new Xobc_cmd__wiki_reg (task_mgr, task_id, step_id, cmd_idx++, wiki_dir_url, wiki_domain)); break;
case Xobc_import_type.Tid__fsdb__delete: list.Add(new Xobc_cmd__fsdb_delete (task_mgr, task_id, step_id, cmd_idx++, Pack_zip_name_bldr.To_wiki_url(wiki_dir_url, zip_file_url.OwnerDir()))); break; case Xobc_import_type.Tid__fsdb__delete: list.Add(new Xobc_cmd__fsdb_delete (task_mgr, task_id, step_id, cmd_idx++, Pack_zip_name_bldr.To_wiki_url(wiki_dir_url, zip_file_url.OwnerDir()))); break;

View File

@ -52,7 +52,7 @@ public class Pack_file_mgr {
if (cfg.Pack_text()) // right now, only for wikidata if (cfg.Pack_text()) // right now, only for wikidata
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__text, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__wiki__text, Xobc_import_type.Tid__wiki__ctg, Xobc_import_type.Tid__wiki__wbase); Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__text, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__wiki__text, Xobc_import_type.Tid__wiki__ctg, Xobc_import_type.Tid__wiki__wbase);
if (cfg.Pack_html()) if (cfg.Pack_html())
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__html, Xobc_import_type.Tid__wiki__core, Xobc_import_type.Tid__wiki__srch, Xobc_import_type.Tid__wiki__html, Xobc_import_type.Tid__wiki__ctg); Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__html, Xobc_import_type.Tid__wiki__core, Xobc_import_type.Tid__wiki__srch, Xobc_import_type.Tid__wiki__html, Xobc_import_type.Tid__wiki__ctg, Xobc_import_type.Tid__wiki__lucene);
if (cfg.Pack_file()) if (cfg.Pack_file())
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__file, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__file__data); // , Xobc_import_type.Tid__fsdb__delete Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__file, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__file__data); // , Xobc_import_type.Tid__fsdb__delete
if (cfg.Pack_custom()) if (cfg.Pack_custom())

View File

@ -106,7 +106,7 @@ class Pack_hash_bldr {
// calc size_new // calc size_new
long size_new = size_cur + fil.Size(); long size_new = size_cur + fil.Size();
// if last file, set size_new to max // if last file, set size_new to max and add file
boolean add_file = true; boolean add_file = true;
if (fil_idx == fils_len - 1) { if (fil_idx == fils_len - 1) {
size_new = size_max; size_new = size_max;
@ -125,7 +125,7 @@ class Pack_hash_bldr {
size_cur = size_new; size_cur = size_new;
} }
// add file to list // add file to list; ignore if last file and added above
if (add_file) if (add_file)
url_list.Add(fil.Url()); url_list.Add(fil.Url());
fil_idx++; fil_idx++;

View File

@ -39,7 +39,7 @@ public class Pack_zip_name_bldr { // en.wikipedia.org-file-ns.000-db.001.xowa ->
} }
public Io_url Bld_by_suffix(String suffix, int pack_num) { public Io_url Bld_by_suffix(String suffix, int pack_num) {
// make fil_name EX: "Xowa_enwiki_2017-03" + "_" + "xtn.fulltext_search.001" + .zip // make fil_name EX: "Xowa_enwiki_2017-03" + "_" + "xtn.fulltext_search.001" + .zip
String fil_name = String_.new_u8(zip_name_prefix) + "_" + suffix + "." + Int_.To_str_pad_bgn_zero(pack_num, 3) + ".zip"; String fil_name = String_.new_u8(zip_name_prefix) + "_" + suffix + "." + Int_.To_str_pad_bgn_zero(pack_num + List_adp_.Base1, 3) + ".zip";
return pack_dir.GenSubFil(fil_name); return pack_dir.GenSubFil(fil_name);
} }
public static Io_url To_wiki_url(Io_url wiki_dir, Io_url zip_dir) { public static Io_url To_wiki_url(Io_url wiki_dir, Io_url zip_dir) {