From e6ae41d9fd59f0d8d644d1ab54580553693a8b41 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Sun, 26 Mar 2017 21:06:42 -0400 Subject: [PATCH] Full-text search: Add unpack lucene --- .../addons/bldrs/centrals/steps/Xobc_step_factory.java | 7 ++++++- .../addons/bldrs/exports/packs/files/Pack_file_mgr.java | 2 +- .../addons/bldrs/exports/packs/files/Pack_hash_bldr.java | 4 ++-- .../bldrs/exports/packs/files/Pack_zip_name_bldr.java | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/400_xowa/src/gplx/xowa/addons/bldrs/centrals/steps/Xobc_step_factory.java b/400_xowa/src/gplx/xowa/addons/bldrs/centrals/steps/Xobc_step_factory.java index 006b9562e..b5ad98fba 100644 --- a/400_xowa/src/gplx/xowa/addons/bldrs/centrals/steps/Xobc_step_factory.java +++ b/400_xowa/src/gplx/xowa/addons/bldrs/centrals/steps/Xobc_step_factory.java @@ -61,6 +61,12 @@ public class Xobc_step_factory { Io_url zip_file_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__download_fil, wiki_domain, file_name)); Io_url unzip_dir_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__unzip_dir, wiki_domain, file_name)); Io_url wiki_dir_url = Eval_url(Bry_eval_wkr__builder_central.Make_str(Bry_eval_wkr__builder_central.Type__wiki_dir, wiki_domain, file_name)); + + // if lucene, move to /data/search/ + if (import_itm.Import_type == Xobc_import_type.Tid__wiki__lucene) { + wiki_dir_url = gplx.xowa.addons.wikis.fulltexts.Xosearch_fulltext_addon.Get_index_dir(wiki_dir_url); + } + Io_url checksum_url = unzip_dir_url.GenSubFil(file_name + ".md5"); int cmd_idx = 0; list.Add(new Xobc_cmd__download (task_mgr, task_id, step_id, cmd_idx++, src_http_url, zip_file_url, import_itm.Import_size_zip)); @@ -69,7 +75,6 @@ public class Xobc_step_factory { list.Add(new Xobc_cmd__verify_dir (task_mgr, task_id, step_id, cmd_idx++, checksum_url, zip_file_url)); // list.Add(new Xobc_cmd__wiki_merge (task_mgr, task_id, step_id, cmd_idx++, merge_mgr, wiki_domain, unzip_dir_url, import_itm.Import_prog_data_max, import_itm.Import_prog_row_max, step_seqn)); list.Add(new Xobc_cmd__move_fils (task_mgr, task_id, step_id, cmd_idx++, unzip_dir_url, wiki_dir_url)); - switch (import_itm.Import_type) { case Xobc_import_type.Tid__wiki__core: list.Add(new Xobc_cmd__wiki_reg (task_mgr, task_id, step_id, cmd_idx++, wiki_dir_url, wiki_domain)); break; case Xobc_import_type.Tid__fsdb__delete: list.Add(new Xobc_cmd__fsdb_delete (task_mgr, task_id, step_id, cmd_idx++, Pack_zip_name_bldr.To_wiki_url(wiki_dir_url, zip_file_url.OwnerDir()))); break; diff --git a/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_file_mgr.java b/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_file_mgr.java index b60705e67..740fb07a0 100644 --- a/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_file_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_file_mgr.java @@ -52,7 +52,7 @@ public class Pack_file_mgr { if (cfg.Pack_text()) // right now, only for wikidata Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__text, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__wiki__text, Xobc_import_type.Tid__wiki__ctg, Xobc_import_type.Tid__wiki__wbase); if (cfg.Pack_html()) - Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__html, Xobc_import_type.Tid__wiki__core, Xobc_import_type.Tid__wiki__srch, Xobc_import_type.Tid__wiki__html, Xobc_import_type.Tid__wiki__ctg); + Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__html, Xobc_import_type.Tid__wiki__core, Xobc_import_type.Tid__wiki__srch, Xobc_import_type.Tid__wiki__html, Xobc_import_type.Tid__wiki__ctg, Xobc_import_type.Tid__wiki__lucene); if (cfg.Pack_file()) Make_task(tmp_bfr, wiki, wiki_date, bc_db, hash, Xobc_task_regy_itm.Type__file, Xobc_import_type.Tid__file__core, Xobc_import_type.Tid__file__data); // , Xobc_import_type.Tid__fsdb__delete if (cfg.Pack_custom()) diff --git a/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_hash_bldr.java b/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_hash_bldr.java index 914194688..90c83d63f 100644 --- a/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_hash_bldr.java +++ b/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_hash_bldr.java @@ -106,7 +106,7 @@ class Pack_hash_bldr { // calc size_new long size_new = size_cur + fil.Size(); - // if last file, set size_new to max + // if last file, set size_new to max and add file boolean add_file = true; if (fil_idx == fils_len - 1) { size_new = size_max; @@ -125,7 +125,7 @@ class Pack_hash_bldr { size_cur = size_new; } - // add file to list + // add file to list; ignore if last file and added above if (add_file) url_list.Add(fil.Url()); fil_idx++; diff --git a/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_zip_name_bldr.java b/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_zip_name_bldr.java index b2a888704..4b93cae27 100644 --- a/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_zip_name_bldr.java +++ b/400_xowa/src/gplx/xowa/addons/bldrs/exports/packs/files/Pack_zip_name_bldr.java @@ -39,7 +39,7 @@ public class Pack_zip_name_bldr { // en.wikipedia.org-file-ns.000-db.001.xowa -> } public Io_url Bld_by_suffix(String suffix, int pack_num) { // make fil_name EX: "Xowa_enwiki_2017-03" + "_" + "xtn.fulltext_search.001" + .zip - String fil_name = String_.new_u8(zip_name_prefix) + "_" + suffix + "." + Int_.To_str_pad_bgn_zero(pack_num, 3) + ".zip"; + String fil_name = String_.new_u8(zip_name_prefix) + "_" + suffix + "." + Int_.To_str_pad_bgn_zero(pack_num + List_adp_.Base1, 3) + ".zip"; return pack_dir.GenSubFil(fil_name); } public static Io_url To_wiki_url(Io_url wiki_dir, Io_url zip_dir) {