mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Full-text search: Add unpack lucene
This commit is contained in:
parent
a43c0d17ba
commit
e6ae41d9fd
@ -61,6 +61,12 @@ public class Xobc_step_factory {
|
|||||||
Io_url zip_file_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__download_fil, wiki_domain, file_name));
|
Io_url zip_file_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__download_fil, wiki_domain, file_name));
|
||||||
Io_url unzip_dir_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__unzip_dir, wiki_domain, file_name));
|
Io_url unzip_dir_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__unzip_dir, wiki_domain, file_name));
|
||||||
Io_url wiki_dir_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__wiki_dir, wiki_domain, file_name));
|
Io_url wiki_dir_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__wiki_dir, wiki_domain, file_name));
|
||||||
|
|
||||||
|
// if lucene, move to /data/search/
|
||||||
|
if (import_itm.Import_type == Xobc_import_type.Tid__wiki__lucene) {
|
||||||
|
wiki_dir_url = gplx.xowa.addons.wikis.fulltexts.Xosearch_fulltext_addon.Get_index_dir(wiki_dir_url);
|
||||||
|
}
|
||||||
|
|
||||||
Io_url checksum_url = unzip_dir_url.GenSubFil(file_name + ".md5");
|
Io_url checksum_url = unzip_dir_url.GenSubFil(file_name + ".md5");
|
||||||
int cmd_idx = 0;
|
int cmd_idx = 0;
|
||||||
list.Add(new Xobc_cmd__download (task_mgr, task_id, step_id, cmd_idx++, src_http_url, zip_file_url, import_itm.Import_size_zip));
|
list.Add(new Xobc_cmd__download (task_mgr, task_id, step_id, cmd_idx++, src_http_url, zip_file_url, import_itm.Import_size_zip));
|
||||||
@ -69,7 +75,6 @@ public class Xobc_step_factory {
|
|||||||
list.Add(new Xobc_cmd__verify_dir (task_mgr, task_id, step_id, cmd_idx++, checksum_url, zip_file_url));
|
list.Add(new Xobc_cmd__verify_dir (task_mgr, task_id, step_id, cmd_idx++, checksum_url, zip_file_url));
|
||||||
// list.Add(new Xobc_cmd__wiki_merge (task_mgr, task_id, step_id, cmd_idx++, merge_mgr, wiki_domain, unzip_dir_url, import_itm.Import_prog_data_max, import_itm.Import_prog_row_max, step_seqn));
|
// list.Add(new Xobc_cmd__wiki_merge (task_mgr, task_id, step_id, cmd_idx++, merge_mgr, wiki_domain, unzip_dir_url, import_itm.Import_prog_data_max, import_itm.Import_prog_row_max, step_seqn));
|
||||||
list.Add(new Xobc_cmd__move_fils (task_mgr, task_id, step_id, cmd_idx++, unzip_dir_url, wiki_dir_url));
|
list.Add(new Xobc_cmd__move_fils (task_mgr, task_id, step_id, cmd_idx++, unzip_dir_url, wiki_dir_url));
|
||||||
|
|
||||||
switch (import_itm.Import_type) {
|
switch (import_itm.Import_type) {
|
||||||
case Xobc_import_type.Tid__wiki__core: list.Add(new Xobc_cmd__wiki_reg (task_mgr, task_id, step_id, cmd_idx++, wiki_dir_url, wiki_domain)); break;
|
case Xobc_import_type.Tid__wiki__core: list.Add(new Xobc_cmd__wiki_reg (task_mgr, task_id, step_id, cmd_idx++, wiki_dir_url, wiki_domain)); break;
|
||||||
case Xobc_import_type.Tid__fsdb__delete: list.Add(new Xobc_cmd__fsdb_delete (task_mgr, task_id, step_id, cmd_idx++, Pack_zip_name_bldr.To_wiki_url(wiki_dir_url, zip_file_url.OwnerDir()))); break;
|
case Xobc_import_type.Tid__fsdb__delete: list.Add(new Xobc_cmd__fsdb_delete (task_mgr, task_id, step_id, cmd_idx++, Pack_zip_name_bldr.To_wiki_url(wiki_dir_url, zip_file_url.OwnerDir()))); break;
|
||||||
|
@ -52,7 +52,7 @@ public class Pack_file_mgr {
|
|||||||
if (cfg.Pack_text()) // right now, only for wikidata
|
if (cfg.Pack_text()) // right now, only for wikidata
|
||||||
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__text, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__wiki__text, Xobc_import_type.Tid__wiki__ctg, Xobc_import_type.Tid__wiki__wbase);
|
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__text, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__wiki__text, Xobc_import_type.Tid__wiki__ctg, Xobc_import_type.Tid__wiki__wbase);
|
||||||
if (cfg.Pack_html())
|
if (cfg.Pack_html())
|
||||||
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__html, Xobc_import_type.Tid__wiki__core, Xobc_import_type.Tid__wiki__srch, Xobc_import_type.Tid__wiki__html, Xobc_import_type.Tid__wiki__ctg);
|
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__html, Xobc_import_type.Tid__wiki__core, Xobc_import_type.Tid__wiki__srch, Xobc_import_type.Tid__wiki__html, Xobc_import_type.Tid__wiki__ctg, Xobc_import_type.Tid__wiki__lucene);
|
||||||
if (cfg.Pack_file())
|
if (cfg.Pack_file())
|
||||||
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__file, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__file__data); // , Xobc_import_type.Tid__fsdb__delete
|
Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__file, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__file__data); // , Xobc_import_type.Tid__fsdb__delete
|
||||||
if (cfg.Pack_custom())
|
if (cfg.Pack_custom())
|
||||||
|
@ -106,7 +106,7 @@ class Pack_hash_bldr {
|
|||||||
// calc size_new
|
// calc size_new
|
||||||
long size_new = size_cur + fil.Size();
|
long size_new = size_cur + fil.Size();
|
||||||
|
|
||||||
// if last file, set size_new to max
|
// if last file, set size_new to max and add file
|
||||||
boolean add_file = true;
|
boolean add_file = true;
|
||||||
if (fil_idx == fils_len - 1) {
|
if (fil_idx == fils_len - 1) {
|
||||||
size_new = size_max;
|
size_new = size_max;
|
||||||
@ -125,7 +125,7 @@ class Pack_hash_bldr {
|
|||||||
size_cur = size_new;
|
size_cur = size_new;
|
||||||
}
|
}
|
||||||
|
|
||||||
// add file to list
|
// add file to list; ignore if last file and added above
|
||||||
if (add_file)
|
if (add_file)
|
||||||
url_list.Add(fil.Url());
|
url_list.Add(fil.Url());
|
||||||
fil_idx++;
|
fil_idx++;
|
||||||
|
@ -39,7 +39,7 @@ public class Pack_zip_name_bldr { // en.wikipedia.org-file-ns.000-db.001.xowa ->
|
|||||||
}
|
}
|
||||||
public Io_url Bld_by_suffix(String suffix, int pack_num) {
|
public Io_url Bld_by_suffix(String suffix, int pack_num) {
|
||||||
// make fil_name EX: "Xowa_enwiki_2017-03" + "_" + "xtn.fulltext_search.001" + .zip
|
// make fil_name EX: "Xowa_enwiki_2017-03" + "_" + "xtn.fulltext_search.001" + .zip
|
||||||
String fil_name = String_.new_u8(zip_name_prefix) + "_" + suffix + "." + Int_.To_str_pad_bgn_zero(pack_num, 3) + ".zip";
|
String fil_name = String_.new_u8(zip_name_prefix) + "_" + suffix + "." + Int_.To_str_pad_bgn_zero(pack_num + List_adp_.Base1, 3) + ".zip";
|
||||||
return pack_dir.GenSubFil(fil_name);
|
return pack_dir.GenSubFil(fil_name);
|
||||||
}
|
}
|
||||||
public static Io_url To_wiki_url(Io_url wiki_dir, Io_url zip_dir) {
|
public static Io_url To_wiki_url(Io_url wiki_dir, Io_url zip_dir) {
|
||||||
|
Loading…
Reference in New Issue
Block a user