From a9afa7a827b101ebfbc7cd5b896fec4ec8cc3ac5 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Wed, 22 Mar 2017 09:30:45 -0400 Subject: [PATCH] Full-text search: Add IndexOptions to Indexer --- .../parses/mgrs/Xomp_parse_mgr.java | 2 +- .../parses/mgrs/Xomp_parse_mgr_cfg.java | 2 + .../bldrs/Xofulltext_indexer_args.java | 11 ++++- .../bldrs/Xofulltext_indexer_mgr.java | 4 +- .../bldrs/Xofulltext_indexer_wkr.java | 6 ++- .../specials/Xofulltext_indexer_doc.java | 36 -------------- .../specials/Xofulltext_indexer_html.java | 20 ++++++-- .../specials/Xofulltext_indexer_special.java | 1 + .../searchers/mgrs/Xofulltext_args_qry.java | 7 +++ .../gflucenes/Xofulltext_highlighter_mgr.java | 1 + .../Xofulltext_searcher__lucene.java | 2 + .../specials/Xofulltext_searcher_html.java | 3 +- .../specials/Xofulltext_searcher_special.java | 2 +- .../svcs/Xofulltext_searcher_bridge.java | 4 +- .../svcs/Xofulltext_searcher_svc.java | 12 +++++ .../gflucene/core/Gflucene_index_data.java | 2 +- .../gflucene/indexers/Gflucene_idx_opt.java | 49 +++++++++++++++++++ .../indexers/Gflucene_indexer_mgr.java | 17 +++++-- 18 files changed, 128 insertions(+), 53 deletions(-) delete mode 100644 400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_doc.java create mode 100644 gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_idx_opt.java diff --git a/400_xowa/src/gplx/xowa/addons/bldrs/mass_parses/parses/mgrs/Xomp_parse_mgr.java b/400_xowa/src/gplx/xowa/addons/bldrs/mass_parses/parses/mgrs/Xomp_parse_mgr.java index ebd441b39..c24b40ec5 100644 --- a/400_xowa/src/gplx/xowa/addons/bldrs/mass_parses/parses/mgrs/Xomp_parse_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/bldrs/mass_parses/parses/mgrs/Xomp_parse_mgr.java @@ -61,7 +61,7 @@ public class Xomp_parse_mgr { // init indexer Xofulltext_indexer_wkr indexer = cfg.Indexer_enabled() ? new Xofulltext_indexer_wkr() : null; - if (indexer != null) indexer.Init(wiki); + if (indexer != null) indexer.Init(wiki, cfg.Indexer_opt()); // init parse_wkrs for (int i = 0; i < wkr_len; ++i) { diff --git a/400_xowa/src/gplx/xowa/addons/bldrs/mass_parses/parses/mgrs/Xomp_parse_mgr_cfg.java b/400_xowa/src/gplx/xowa/addons/bldrs/mass_parses/parses/mgrs/Xomp_parse_mgr_cfg.java index f402f3183..925c13335 100644 --- a/400_xowa/src/gplx/xowa/addons/bldrs/mass_parses/parses/mgrs/Xomp_parse_mgr_cfg.java +++ b/400_xowa/src/gplx/xowa/addons/bldrs/mass_parses/parses/mgrs/Xomp_parse_mgr_cfg.java @@ -37,6 +37,7 @@ public class Xomp_parse_mgr_cfg implements Gfo_invk { public String Wkr_machine_name() {return wkr_machine_name;} private String wkr_machine_name; public boolean Show_msg__fetched_pool() {return show_msg__fetched_pool;} private boolean show_msg__fetched_pool; public boolean Indexer_enabled() {return indexer_enabled;} private boolean indexer_enabled; + public String Indexer_opt() {return indexer_opt;} private String indexer_opt = gplx.gflucene.indexers.Gflucene_idx_opt.Docs_and_freqs.Key(); public void Init(Xowe_wiki wiki) { if (num_wkrs == -1) num_wkrs = gplx.core.envs.Runtime_.Cpu_count(); if (num_pages_in_pool == -1) num_pages_in_pool = num_wkrs * 1000; @@ -66,6 +67,7 @@ public class Xomp_parse_mgr_cfg implements Gfo_invk { else if (ctx.Match(k, Invk__hdump_catboxes_)) hdump_catboxs = m.ReadYn("v"); else if (ctx.Match(k, Invk__log_math_)) log_math = m.ReadYn("v"); else if (ctx.Match(k, "indexer_enabled_")) indexer_enabled = m.ReadYn("v"); + else if (ctx.Match(k, "indexer_opt_")) indexer_opt = m.ReadStr("v"); else return Gfo_invk_.Rv_unhandled; return this; } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_args.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_args.java index a49f7fbcd..e7925b094 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_args.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_args.java @@ -15,9 +15,11 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*; import gplx.xowa.wikis.nss.*; +import gplx.gflucene.indexers.*; public class Xofulltext_indexer_args implements Gfo_invk { public byte[] wikis; public String ns_ids; + public String idx_opt; public void Init_by_wiki(Xowe_wiki wiki) { // wikis: null if (wikis == null) @@ -34,15 +36,21 @@ public class Xofulltext_indexer_args implements Gfo_invk { Xow_ns ns = ns_ary[i]; int ns_id = ns.Id(); if (ns_id < 0) continue; // ignore media, special - if (i != 0) bfr.Add_byte(Byte_ascii.Pipe); + if (i != 0) bfr.Add_byte(Byte_ascii.Comma); bfr.Add_int_variable(ns_id); } ns_ids = bfr.To_str_and_clear(); } + + // idx_opt + if (idx_opt == null) { + idx_opt = Gflucene_idx_opt.Docs_and_freqs.Key(); + } } public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) { if (ctx.Match(k, "wikis_")) this.wikis = m.ReadBryOr("v", null); else if (ctx.Match(k, "ns_ids")) this.ns_ids = m.ReadStrOr("v", null); + else if (ctx.Match(k, "idx_opt")) this.idx_opt = m.ReadStrOr("v", null); else return Gfo_invk_.Rv_unhandled; return this; } @@ -50,6 +58,7 @@ public class Xofulltext_indexer_args implements Gfo_invk { Xofulltext_indexer_args rv = new Xofulltext_indexer_args(); rv.wikis = args.Get_as_bry("wikis"); rv.ns_ids = args.Get_as_str("ns_ids"); + rv.idx_opt = args.Get_as_str("idx_opt"); return rv; } } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_mgr.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_mgr.java index c8771e09d..213f416f7 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_mgr.java @@ -23,7 +23,7 @@ public class Xofulltext_indexer_mgr { public void Exec(Xowe_wiki wiki, Xofulltext_indexer_ui ui, Xofulltext_indexer_args args) { // init indexer Xofulltext_indexer_wkr indexer = new Xofulltext_indexer_wkr(); - indexer.Init(wiki); + indexer.Init(wiki, args.idx_opt); // get page tbl Xow_db_file core_db = wiki.Data__core_mgr().Db__core(); @@ -39,7 +39,7 @@ public class Xofulltext_indexer_mgr { Db_rdr rdr = conn.Exec_rdr(Db_sql_.Make_by_fmt(String_.Ary ( "SELECT page_id, page_score, page_namespace, page_title, page_html_db_id" , "FROM page" - , "WHERE page_namespace IN ({0});"), String_.Replace(args.ns_ids, "|", ","))); + , "WHERE page_namespace IN ({0});"), args.ns_ids)); while (rdr.Move_next()) { // read vars int page_namespace = rdr.Read_int("page_namespace"); diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java index ca73b3448..8ff62acff 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java @@ -20,7 +20,7 @@ import gplx.xowa.addons.wikis.fulltexts.core.*; public class Xofulltext_indexer_wkr { private final Gflucene_indexer_mgr index_wtr = new Gflucene_indexer_mgr(); private final Xofulltext_extractor extractor = new Xofulltext_extractor(); - public void Init(Xow_wiki wiki) { + public void Init(Xow_wiki wiki, String idx_opt) { // delete existing dir Io_url index_dir = Xosearch_fulltext_addon.Get_index_dir(wiki); Io_mgr.Instance.DeleteDirDeep(index_dir); @@ -28,7 +28,9 @@ public class Xofulltext_indexer_wkr { // init index_dir index_wtr.Init(new Gflucene_index_data ( Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str()) - , index_dir.Xto_api())); + , index_dir.Xto_api()) + , idx_opt + ); } public void Index(Xoae_page wpg) { byte[] html = extractor.Extract(wpg.Db().Html().Html_bry()); diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_doc.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_doc.java deleted file mode 100644 index 269167cfa..000000000 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_doc.java +++ /dev/null @@ -1,36 +0,0 @@ -/* -XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com - -XOWA is licensed under the terms of the General Public License (GPL) Version 3, -or alternatively under the terms of the Apache License Version 2.0. - -You may use XOWA according to either of these licenses as is most appropriate -for your project on a case-by-case basis. - -The terms of each license can be found in the source code repository: - -GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt -Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt -*/ -package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*; -import gplx.langs.mustaches.*; -public class Xofulltext_indexer_doc implements Mustache_doc_itm { - private final String wikis_bry, ns_ids; - public Xofulltext_indexer_doc(String wikis_bry, String ns_ids) { - this.wikis_bry = wikis_bry; - this.ns_ids = ns_ids; - } - public boolean Mustache__write(String key, Mustache_bfr bfr) { - if (String_.Eq(key, "wikis")) - bfr.Add_str_u8(wikis_bry); - else if (String_.Eq(key, "ns_ids")) - bfr.Add_str_u8(ns_ids); - else - return false; - return true; - } - public Mustache_doc_itm[] Mustache__subs(String key) { - return Mustache_doc_itm_.Ary__empty; - } -} diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_html.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_html.java index 003eb696e..d2a21d076 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_html.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_html.java @@ -16,16 +16,28 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*; import gplx.xowa.specials.*; import gplx.langs.mustaches.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.wikis.pages.tags.*; import gplx.dbs.*; -class Xofulltext_indexer_html extends Xow_special_wtr__base { - private final String wikis_bry, ns_ids; - public Xofulltext_indexer_html(String wikis_bry, String ns_ids) { +class Xofulltext_indexer_html extends Xow_special_wtr__base implements Mustache_doc_itm { + private final String wikis_bry, ns_ids, idx_opt; + public Xofulltext_indexer_html(String wikis_bry, String ns_ids, String idx_opt) { this.wikis_bry = wikis_bry; this.ns_ids = ns_ids; + this.idx_opt = idx_opt; } + public boolean Mustache__write(String key, Mustache_bfr bfr) { + if (String_.Eq(key, "wikis")) bfr.Add_str_u8(wikis_bry); + else if (String_.Eq(key, "ns_ids")) bfr.Add_str_u8(ns_ids); + else if (String_.Eq(key, "idx_opt")) bfr.Add_str_u8(idx_opt); + else return false; + return true; + } + public Mustache_doc_itm[] Mustache__subs(String key) { + return Mustache_doc_itm_.Ary__empty; + } + @Override protected Io_url Get_addon_dir(Xoa_app app) {return Addon_dir(app);} @Override protected Io_url Get_mustache_fil(Io_url addon_dir) {return addon_dir.GenSubFil_nest("bin", "xofulltext_indexer.template.html");} @Override protected Mustache_doc_itm Bld_mustache_root(Xoa_app app) { - return new Xofulltext_indexer_doc(wikis_bry, ns_ids); + return this; } @Override protected void Bld_tags(Xoa_app app, Io_url addon_dir, Xopage_html_data page_data) { Xopg_tag_mgr head_tags = page_data.Head_tags(); diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_special.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_special.java index 05ed7f6b8..e109c8e19 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_special.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/specials/Xofulltext_indexer_special.java @@ -26,6 +26,7 @@ public class Xofulltext_indexer_special implements Xow_special_page { new Xofulltext_indexer_html ( url_args.Read_str_or("wikis", wiki.Domain_str()) , url_args.Read_str_or("ns_ids", "0") + , url_args.Read_str_or("idx_opt", gplx.gflucene.indexers.Gflucene_idx_opt.Docs_and_freqs.Key()) ).Bld_page_by_mustache(wiki.App(), page, this); } Xofulltext_indexer_special(Xow_special_meta special__meta) {this.special__meta = special__meta;} diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/Xofulltext_args_qry.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/Xofulltext_args_qry.java index 153f6a75b..0db171b67 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/Xofulltext_args_qry.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/Xofulltext_args_qry.java @@ -28,10 +28,17 @@ public class Xofulltext_args_qry { public boolean auto_wildcard_end; public boolean expand_matches_section; public boolean show_all_matches; + private boolean canceled; public byte[] Qry_key(byte[] wiki, byte[] ns_ids) { return Bry_.Add_w_dlm(Byte_ascii.Nl, wiki, ns_ids, search_text); // EX: "en.wikipedia.org\n0|4\nearth" } + public void Cancel() { + synchronized (this) { + canceled = true; + } + } + public boolean Canceled() {return canceled;} public static Xofulltext_args_qry New_by_json(Json_nde args) { Xofulltext_args_qry rv = new Xofulltext_args_qry(); diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_highlighter_mgr.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_highlighter_mgr.java index d77d0f943..1c9a6e68a 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_highlighter_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_highlighter_mgr.java @@ -46,6 +46,7 @@ class Xofulltext_highlighter_mgr implements Gfo_invk { // loop items int len = list.Len(); for (int i = 0; i < len; i++) { + if (searcher_args.Canceled()) return; Gflucene_doc_data item = (Gflucene_doc_data)list.Get_at(i); try { Highlight_item(item); diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_searcher__lucene.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_searcher__lucene.java index db0522500..c68bb26e3 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_searcher__lucene.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_searcher__lucene.java @@ -48,10 +48,12 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher { int found = 0; Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.search_text), 100); while (found < needed_len) { + if (args.Canceled()) return; searcher.Exec(temp_list, searcher_data); int temp_list_len = temp_list.Len(); for (int i = 0; i < temp_list_len; i++) { + if (args.Canceled()) return; Gflucene_doc_data doc_data = (Gflucene_doc_data)temp_list.Get_at(i); if (!page_list.Has(doc_data.page_id)) { // load page diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/specials/Xofulltext_searcher_html.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/specials/Xofulltext_searcher_html.java index 735629c0a..e6f5ad112 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/specials/Xofulltext_searcher_html.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/specials/Xofulltext_searcher_html.java @@ -21,7 +21,8 @@ import gplx.xowa.addons.apps.cfgs.*; class Xofulltext_searcher_html extends Xow_special_wtr__base implements Mustache_doc_itm { private final boolean case_match, auto_wildcard_bgn, auto_wildcard_end, expand_matches_section, show_all_matches; private final Hash_adp props = Hash_adp_.New(); - public Xofulltext_searcher_html(Xocfg_mgr cfg_mgr, Gfo_qarg_mgr url_args, Xow_wiki wiki) { + public Xofulltext_searcher_html(Xocfg_mgr cfg_mgr, Gfo_qarg_mgr url_args, Xow_wiki wiki, Guid_adp page_guid) { + props.Add("page_guid", page_guid.To_str()); props.Add("cur_wiki", wiki.Domain_str()); props.Add("search", url_args.Read_str_or("search", "")); props_Add(cfg_mgr, url_args, "wikis" , wiki.Domain_str()); diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/specials/Xofulltext_searcher_special.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/specials/Xofulltext_searcher_special.java index 08600af49..1eb73e983 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/specials/Xofulltext_searcher_special.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/specials/Xofulltext_searcher_special.java @@ -23,7 +23,7 @@ public class Xofulltext_searcher_special implements Xow_special_page { Xocfg_mgr cfg_mgr = wiki.App().Cfg(); // create page - Xofulltext_searcher_html html = new Xofulltext_searcher_html(cfg_mgr, url_args, wiki); + Xofulltext_searcher_html html = new Xofulltext_searcher_html(cfg_mgr, url_args, wiki, page.Page_guid()); html.Bld_page_by_mustache(wiki.App(), page, this); } Xofulltext_searcher_special(Xow_special_meta special__meta) {this.special__meta = special__meta;} diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_bridge.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_bridge.java index 5372ee47c..b1d3d2b93 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_bridge.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_bridge.java @@ -26,15 +26,17 @@ public class Xofulltext_searcher_bridge implements Bridge_cmd_itm { Json_nde args = data.Get_kv(Bridge_cmd_mgr.Msg__args).Val_as_nde(); switch (proc_id) { case Proc__search: svc.Search(args); break; + case Proc__cancel: svc.Cancel(args); break; case Proc__get_lines_rest: svc.Get_lines_rest(args); break; default: throw Err_.new_unhandled_default(proc_id); } return ""; } - private static final byte Proc__search = 0, Proc__get_lines_rest = 1; + private static final byte Proc__search = 0, Proc__cancel = 1, Proc__get_lines_rest = 2; private static final Hash_adp_bry proc_hash = Hash_adp_bry.cs() .Add_str_byte("search" , Proc__search) + .Add_str_byte("cancel" , Proc__cancel) .Add_str_byte("get_lines_rest" , Proc__get_lines_rest) ; diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_svc.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_svc.java index 6d3ec9d44..5f201b4c1 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_svc.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_svc.java @@ -29,13 +29,23 @@ import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.gflucenes.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*; class Xofulltext_searcher_svc implements Gfo_invk { private final Xoa_app app; + private final Hash_adp hash = Hash_adp_.New(); public Xofulltext_searcher_svc(Xoa_app app) { this.app = app; } + public void Cancel(Json_nde args) {this.Cancel(args.Get_as_str("page_guid"));} + private void Cancel(String page_guid) { + Xofulltext_args_qry prv_args = (Xofulltext_args_qry)hash.Get_by(page_guid); + if (prv_args != null) { + prv_args.Cancel(); + } + } public void Search(Json_nde args) { // get search_args Xofulltext_args_qry search_args = Xofulltext_args_qry.New_by_json(args); search_args.cache_mgr = this.Cache_mgr(); + + this.Cancel(search_args.page_guid); // autosave any changes if enabled Xocfg_mgr cfg_mgr = app.Cfg(); @@ -49,6 +59,8 @@ class Xofulltext_searcher_svc implements Gfo_invk { // cfg_mgr.Set_bry_app ("xowa.addon.search.fulltext.special.namespaces", search_args.namespaces); } + hash.Add(search_args.page_guid, search_args); + // launch thread gplx.core.threads.Thread_adp_.Start_by_val("search", Cancelable_.Never, this, Invk__search, search_args); } diff --git a/gplx.gflucene/src/gplx/gflucene/core/Gflucene_index_data.java b/gplx.gflucene/src/gplx/gflucene/core/Gflucene_index_data.java index 66949c925..0eb524274 100644 --- a/gplx.gflucene/src/gplx/gflucene/core/Gflucene_index_data.java +++ b/gplx.gflucene/src/gplx/gflucene/core/Gflucene_index_data.java @@ -18,8 +18,8 @@ import gplx.gflucene.analyzers.*; public class Gflucene_index_data { public final Gflucene_analyzer_data analyzer_data; public final String index_dir; + public final float max_merged_segments = 1500; // "limits" maximum file size - public final boolean positional_enabled = false; public Gflucene_index_data(Gflucene_analyzer_data analyzer_data, String index_dir) { this.analyzer_data = analyzer_data; this.index_dir = index_dir; diff --git a/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_idx_opt.java b/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_idx_opt.java new file mode 100644 index 000000000..32c8c0f7e --- /dev/null +++ b/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_idx_opt.java @@ -0,0 +1,49 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.gflucene.indexers; import gplx.*; import gplx.gflucene.*; +public class Gflucene_idx_opt { + public Gflucene_idx_opt(int uid, String key, String name) { + this.uid = uid; + this.key = key; + this.name = name; + } + public int Uid() {return uid;} private final int uid; + public String Key() {return key;} private final String key; + public String Name() {return name;} private final String name; + + public static final int + Uid_docs = 0 // basic inverted word index; number of words is always 1 per doc + , Uid_docs_and_freqs = 1 // freqs needed for number of words per doc + , Uid_docs_and_freqs_and_positions = 2 // positions needed for proximity queries + , Uid_docs_and_freqs_and_positions_and_offsets = 3 // offsets needed for highlighter + ; + + private static final Hash_adp parse_hash = Hash_adp_.New(); + public static final Gflucene_idx_opt + Docs = New(Uid_docs, "d", "Documents") + , Docs_and_freqs = New(Uid_docs_and_freqs, "df", "Documents / Frequencies") + , Docs_and_freqs_and_positions = New(Uid_docs_and_freqs_and_positions, "dfp", "Documents / Frequencies / Positions") + , Docs_and_freqs_and_positions_and_offsets = New(Uid_docs_and_freqs_and_positions_and_offsets, "dfpo", "Documents / Frequencies / Positions / Offsets") + ; + private static Gflucene_idx_opt New(int uid, String key, String name) { + Gflucene_idx_opt rv = new Gflucene_idx_opt(uid, key, name); + parse_hash.Add(key, rv); + return rv; + } + public static Gflucene_idx_opt Parse(String key) { + return (Gflucene_idx_opt)parse_hash.Get_by_or_fail(key); + } +} diff --git a/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_indexer_mgr.java b/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_indexer_mgr.java index 1205534f2..c2f2f74bd 100644 --- a/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_indexer_mgr.java +++ b/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_indexer_mgr.java @@ -40,7 +40,7 @@ public class Gflucene_indexer_mgr { public Gflucene_indexer_mgr() { } - public void Init(Gflucene_index_data idx_data) { + public void Init(Gflucene_index_data idx_data, String idx_opt) { // create analyzer this.analyzer = Gflucene_analyzer_mgr_.New_analyzer(idx_data.analyzer_data.key); this.config = new IndexWriterConfig(analyzer); @@ -67,7 +67,7 @@ public class Gflucene_indexer_mgr { // create field for body this.body_fld_type = new FieldType(); - IndexOptions index_options = idx_data.positional_enabled ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS : IndexOptions.DOCS_AND_FREQS; + IndexOptions index_options = To_index_options(idx_opt); body_fld_type.setIndexOptions(index_options); body_fld_type.setTokenized(true); body_fld_type.setStored(false); @@ -110,4 +110,15 @@ public class Gflucene_indexer_mgr { throw Err_.new_exc(e, "lucene_index", "failed to close writer"); } } -} + + private static IndexOptions To_index_options(String key) { + Gflucene_idx_opt opt = Gflucene_idx_opt.Parse(key); + switch (opt.Uid()) { + case Gflucene_idx_opt.Uid_docs: return IndexOptions.DOCS; + case Gflucene_idx_opt.Uid_docs_and_freqs: return IndexOptions.DOCS_AND_FREQS; + case Gflucene_idx_opt.Uid_docs_and_freqs_and_positions: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; + case Gflucene_idx_opt.Uid_docs_and_freqs_and_positions_and_offsets: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; + default: throw Err_.new_unhandled_default(opt.Uid()); + } + } + }