From 8c31c8fd62ac5ee4435672b9aa13bf27f1547535 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Wed, 15 Mar 2017 19:51:58 -0400 Subject: [PATCH] Full-text search: Change to lucene 6.4.2 --- .../fulltexts/Xosearch_fulltext_addon.java | 5 ++ .../bldrs/Xofulltext_indexer_wkr.java | 10 ++- .../indexers/svcs/Xofulltext_indexer_svc.java | 2 +- .../caches/Xofulltext_cache_mgr.java | 12 ++- .../brutes/Xofulltext_searcher__brute.java | 7 +- .../Xofulltext_finder_cbk__highlight.java | 77 +++++++------------ .../gflucenes/Xofulltext_highlighter_mgr.java | 10 +-- .../Xofulltext_searcher__lucene.java | 4 +- .../mgrs/uis/Xofulltext_searcher_line.java | 6 +- .../mgrs/uis/Xofulltext_searcher_ui.java | 56 ++++++++++++-- .../mgrs/uis/Xofulltext_searcher_ui__gui.java | 54 ------------- .../svcs/Xofulltext_searcher_svc.java | 16 ++-- gplx.gflucene/.classpath | 10 +-- .../Gflucene_highlighter_mgr.java | 16 +++- 14 files changed, 136 insertions(+), 149 deletions(-) delete mode 100644 400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/uis/Xofulltext_searcher_ui__gui.java diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/Xosearch_fulltext_addon.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/Xosearch_fulltext_addon.java index d10300df3..e381f97ea 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/Xosearch_fulltext_addon.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/Xosearch_fulltext_addon.java @@ -36,4 +36,9 @@ public class Xosearch_fulltext_addon implements Xoax_addon_itm, Xoax_addon_itm__ } public String Addon__key() {return ADDON__KEY;} private static final String ADDON__KEY = "xowa.wiki.fulltext"; + + public static Io_url Get_index_dir(Xow_wiki wiki) {return Get_index_dir(wiki.Fsys_mgr().Root_dir());} + public static Io_url Get_index_dir(Io_url wiki_dir) { + return wiki_dir.GenSubDir_nest("data", "search", "java8-v1"); + } } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java index 15a5d4017..b64847767 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java @@ -19,12 +19,14 @@ import gplx.gflucene.indexers.*; public class Xofulltext_indexer_wkr { private final Gflucene_indexer_mgr index_wtr = new Gflucene_indexer_mgr(); public void Init(Xow_wiki wiki) { - Io_url search_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search"); - Io_mgr.Instance.DeleteDirDeep(search_dir); - ; + // delete existing dir + Io_url index_dir = Xosearch_fulltext_addon.Get_index_dir(wiki); + Io_mgr.Instance.DeleteDirDeep(index_dir); + + // init index_dir index_wtr.Init(new Gflucene_index_data ( Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str()) - , search_dir.Xto_api())); + , index_dir.Xto_api())); } public void Index(Xoae_page wpg) { // TODO: skip if not main_ns diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/svcs/Xofulltext_indexer_svc.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/svcs/Xofulltext_indexer_svc.java index 514adfdfb..9c18c9440 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/svcs/Xofulltext_indexer_svc.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/svcs/Xofulltext_indexer_svc.java @@ -48,7 +48,7 @@ class Xofulltext_indexer_svc implements Gfo_invk { } wiki.Init_by_wiki(); - Io_url search_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search"); + Io_url search_dir = Xosearch_fulltext_addon.Get_index_dir(wiki); if (Io_mgr.Instance.ExistsDir(search_dir)) { app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New() .Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": search dir already exists; please delete it manually before reindexing; " + String_.new_u8(domain))); diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_mgr.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_mgr.java index 54dc8b6e6..a70b49ef3 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_mgr.java @@ -20,13 +20,19 @@ public class Xofulltext_cache_mgr { public void Clear() { qry_hash.Clear(); } - public void Add(int query_id, byte[] query, byte[] wiki_bry, int page_seq, int page_id, int line_seq, byte[] line_html) { - // get qry + public void Add(int query_id, byte[] query) { Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(query_id); if (qry == null) { qry = new Xofulltext_cache_qry(query_id, query); qry_hash.Add(query_id, qry); } + } + public void Add(int query_id, byte[] wiki_bry, int page_id, int line_seq, byte[] line_html) { + // get qry + Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(query_id); + if (qry == null) { + throw Err_.new_wo_type("query not found; query_id=~{0}", query_id); + } // get wiki Xofulltext_cache_wiki wiki = (Xofulltext_cache_wiki)qry.Wikis().Get_by(wiki_bry); @@ -38,7 +44,7 @@ public class Xofulltext_cache_mgr { // get page Xofulltext_cache_page page = (Xofulltext_cache_page)wiki.Pages().Get_by(page_id); if (page == null) { - page = new Xofulltext_cache_page(page_id, page_seq); + page = new Xofulltext_cache_page(page_id, wiki.Pages().Count()); wiki.Pages().Add(page_id, page); } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/brutes/Xofulltext_searcher__brute.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/brutes/Xofulltext_searcher__brute.java index bbeb3c865..a0504755d 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/brutes/Xofulltext_searcher__brute.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/brutes/Xofulltext_searcher__brute.java @@ -22,10 +22,7 @@ import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*; public class Xofulltext_searcher__brute implements Xofulltext_searcher { private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr(); private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval(); - private final Xofulltext_finder_cbk__highlight cbk_highlight; - public Xofulltext_searcher__brute(Xoa_app app, Xog_cbk_trg cbk_trg, Xofulltext_cache_mgr cache_mgr) { - this.cbk_highlight = new Xofulltext_finder_cbk__highlight(app, cbk_trg, cache_mgr); - } + private final Xofulltext_finder_cbk__highlight cbk_highlight = new Xofulltext_finder_cbk__highlight(); public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_searcher_args args) { // get pages from db Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn(); @@ -62,7 +59,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher { // do highlight if (found <= args.max_pages_per_wiki) { - cbk_highlight.Init(args.query, args.query_id, wiki, page_id, ttl.Full_db(), args.show_all_matches); + cbk_highlight.Init(ui, args.query_id, wiki, page_id, ttl.Full_db(), args.show_all_matches); ui.Send_page_add(new Xofulltext_searcher_page ( args.query_id , String_.new_u8(wiki_domain) diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/brutes/finders/Xofulltext_finder_cbk__highlight.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/brutes/finders/Xofulltext_finder_cbk__highlight.java index dbed15889..aead9f489 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/brutes/finders/Xofulltext_finder_cbk__highlight.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/brutes/finders/Xofulltext_finder_cbk__highlight.java @@ -16,25 +16,18 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*; import gplx.xowa.guis.cbks.*; import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*; +import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*; public class Xofulltext_finder_cbk__highlight implements Xofulltext_finder_cbk { - private final Xog_cbk_trg cbk_trg; - private final Xoa_app app; - private final Xofulltext_cache_mgr cache_mgr; + private Xofulltext_searcher_ui ui; private Xow_wiki wiki; - private byte[] qry; private int qry_id; private int page_id; private final Bry_bfr tmp_bfr = Bry_bfr_.New(); public int found; private boolean show_all_matches; - public Xofulltext_finder_cbk__highlight(Xoa_app app, Xog_cbk_trg cbk_trg, Xofulltext_cache_mgr cache_mgr) { - this.app = app; - this.cbk_trg = cbk_trg; - this.cache_mgr = cache_mgr; - } public byte[] Page_ttl() {return page_ttl;} private byte[] page_ttl; - public void Init(byte[] qry, int qry_id, Xow_wiki wiki, int page_id, byte[] page_ttl, boolean show_all_matches) { - this.qry = qry; + public void Init(Xofulltext_searcher_ui ui, int qry_id, Xow_wiki wiki, int page_id, byte[] page_ttl, boolean show_all_matches) { + this.ui = ui; this.qry_id = qry_id; this.wiki = wiki; this.page_id = page_id; @@ -43,50 +36,34 @@ public class Xofulltext_finder_cbk__highlight implements Xofulltext_finder_cbk { found = 0; } public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xofulltext_word_node term) { -// if (found < max_snips_per_page) { - // get snip bounds by finding flanking 50 chars and then expanding to word-bounds - int snip_bgn = hook_bgn - 50; - if (snip_bgn < 0) - snip_bgn = 0; - else { - snip_bgn = Bry_find_.Find_bwd_ws(src, snip_bgn, 0) + 1; - } - int snip_end = hook_end + 50; - if (snip_end >= src.length) + // get snip bounds by finding flanking 50 chars and then expanding to word-bounds + int snip_bgn = hook_bgn - 50; + if (snip_bgn < 0) + snip_bgn = 0; + else { + snip_bgn = Bry_find_.Find_bwd_ws(src, snip_bgn, 0) + 1; + } + int snip_end = hook_end + 50; + if (snip_end >= src.length) + snip_end = src.length; + else { + snip_end = Bry_find_.Find_fwd_until_ws(src, snip_end, src.length); + if (snip_end == Bry_find_.Not_found) { // when snip_end == src.length snip_end = src.length; - else { - snip_end = Bry_find_.Find_fwd_until_ws(src, snip_end, src.length); - if (snip_end == Bry_find_.Not_found) { // when snip_end == src.length - snip_end = src.length; - } } + } - // build snip - Add_snip(tmp_bfr, src, snip_bgn, hook_bgn); - tmp_bfr.Add_str_a7(""); - Add_snip(tmp_bfr, src, hook_bgn, hook_end); - tmp_bfr.Add_str_a7(""); - Add_snip(tmp_bfr, src, hook_end, snip_end); + // build snip + Add_snip(tmp_bfr, src, snip_bgn, hook_bgn); + tmp_bfr.Add_str_a7(""); + Add_snip(tmp_bfr, src, hook_bgn, hook_end); + tmp_bfr.Add_str_a7(""); + Add_snip(tmp_bfr, src, hook_end, snip_end); - // send notification - byte[] line_html = tmp_bfr.To_bry_and_clear(); - if (found == 0 || show_all_matches) { - app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New() - .Add_bry("wiki", wiki.Domain_bry()) - .Add_int("page_id", page_id) - .Add_int("line", found + 1) - .Add_bry("html", line_html) - ); - } - cache_mgr.Add(qry_id, qry, wiki.Domain_bry(), -1, page_id, found, line_html); -// } + // send notification + byte[] line_html = tmp_bfr.To_bry_and_clear(); + ui.Send_line_add(show_all_matches, qry_id, wiki.Domain_bry(), page_id, found, line_html); found++; - app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_searcher.results__page__update__recv", gplx.core.gfobjs.Gfobj_nde.New() - .Add_bry("wiki", wiki.Domain_bry()) - .Add_int("page_id", page_id) - .Add_int("found", found) - .Add_bool("show_all_matches", show_all_matches) - ); } private static final byte[] Angle_bgn_escaped = Bry_.new_a7("<"); private void Add_snip(Bry_bfr bfr, byte[] src, int bgn, int end) { diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_highlighter_mgr.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_highlighter_mgr.java index 735ff140b..a27a2ad20 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_highlighter_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_highlighter_mgr.java @@ -23,17 +23,17 @@ import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*; class Xofulltext_highlighter_mgr implements Gfo_invk { private final Xofulltext_searcher_ui ui; private final Xow_wiki wiki; - private final String wiki_domain; - private final List_adp list; + private final Xofulltext_searcher_args searcher_args; private final Gflucene_analyzer_data analyzer_data; private final Gflucene_searcher_qry searcher_data; private final Gflucene_highlighter_mgr highlighter_mgr = new Gflucene_highlighter_mgr(); private final Xoh_page hpg = new Xoh_page(); private final Xowd_page_itm tmp_page_row = new Xowd_page_itm(); - public Xofulltext_highlighter_mgr(Xofulltext_searcher_ui ui, Xow_wiki wiki, Gflucene_analyzer_data analyzer_data, Gflucene_searcher_qry searcher_data, List_adp list) { + private final List_adp list; + public Xofulltext_highlighter_mgr(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_searcher_args searcher_args, Gflucene_analyzer_data analyzer_data, Gflucene_searcher_qry searcher_data, List_adp list) { this.ui = ui; this.wiki = wiki; - this.wiki_domain = wiki.Domain_str(); + this.searcher_args = searcher_args; this.analyzer_data = analyzer_data; this.searcher_data = searcher_data; this.list = list; @@ -76,7 +76,7 @@ class Xofulltext_highlighter_mgr implements Gfo_invk { int page_id = item.page_id; Gflucene_highlighter_item[] lines = highlighter_mgr.Exec(searcher_data, item); for (Gflucene_highlighter_item line : lines) { - ui.Send_line_add(new Xofulltext_searcher_line(wiki_domain, page_id, line.num, line.text)); + ui.Send_line_add(searcher_args.show_all_matches, searcher_args.query_id, wiki.Domain_bry(), page_id, line.num, Bry_.new_u8(line.text)); } } public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) { diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_searcher__lucene.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_searcher__lucene.java index cad1bc857..2a1687ca9 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_searcher__lucene.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_searcher__lucene.java @@ -30,7 +30,7 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher { Gflucene_analyzer_data analyzer_data = Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str()); searcher.Init(new Gflucene_index_data ( analyzer_data - , wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search").Xto_api())); + , Xosearch_fulltext_addon.Get_index_dir(wiki).Xto_api())); // exec search Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.query), args.max_pages_per_wiki); @@ -50,7 +50,7 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher { } // create highlighter thread and launch it - Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, analyzer_data, searcher_data, list); + Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, list); gplx.core.threads.Thread_adp_.Start_by_key("highlighter", Cancelable_.Never, highlighter_mgr, Xofulltext_highlighter_mgr.Invk__highlight); } } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/uis/Xofulltext_searcher_line.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/uis/Xofulltext_searcher_line.java index c602deb80..964152590 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/uis/Xofulltext_searcher_line.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/uis/Xofulltext_searcher_line.java @@ -15,14 +15,14 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; public class Xofulltext_searcher_line { - public Xofulltext_searcher_line(String wiki_domain, int page_id, int found_idx, String excerpt) { + public Xofulltext_searcher_line(byte[] wiki_domain, int page_id, int found_idx, byte[] excerpt) { this.wiki_domain = wiki_domain; this.page_id = page_id; this.found_idx = found_idx; this.excerpt = excerpt; } - public String Wiki_domain() {return wiki_domain;} private final String wiki_domain; + public byte[] Wiki_domain() {return wiki_domain;} private final byte[] wiki_domain; public int Page_id() {return page_id;} private final int page_id; public int Found_idx() {return found_idx;} private final int found_idx; - public String Excerpt() {return excerpt;} private final String excerpt; + public byte[] Excerpt() {return excerpt;} private final byte[] excerpt; } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/uis/Xofulltext_searcher_ui.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/uis/Xofulltext_searcher_ui.java index 5e7789e7a..e6591ba0a 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/uis/Xofulltext_searcher_ui.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/uis/Xofulltext_searcher_ui.java @@ -14,9 +14,55 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; -public interface Xofulltext_searcher_ui { - void Send_wiki_add(byte[] wiki_domain); - void Send_wiki_update(byte[] wiki, int found, int searched); - void Send_page_add(Xofulltext_searcher_page page); - void Send_line_add(Xofulltext_searcher_line line); +import gplx.xowa.guis.cbks.*; +import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*; +public class Xofulltext_searcher_ui { + private final Xog_cbk_mgr cbk_mgr; + private final Xog_cbk_trg cbk_trg; + private final Xofulltext_cache_mgr cache_mgr; + public Xofulltext_searcher_ui(Xofulltext_cache_mgr cache_mgr, Xog_cbk_mgr cbk_mgr, Xog_cbk_trg cbk_trg) { + this.cache_mgr = cache_mgr; + this.cbk_mgr = cbk_mgr; + this.cbk_trg = cbk_trg; + } + public void Send_wiki_add(byte[] wiki_domain) { + cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__add__recv", gplx.core.gfobjs.Gfobj_nde.New() + .Add_bry("wiki", wiki_domain) + ); + } + public void Send_wiki_update(byte[] wiki, int found, int searched) { + cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__update__recv", gplx.core.gfobjs.Gfobj_nde.New() + .Add_bry("wiki", wiki) + .Add_int("found", found) + .Add_int("searched", searched) + ); + } + public void Send_page_add(Xofulltext_searcher_page page) { + cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__page__add__recv", gplx.core.gfobjs.Gfobj_nde.New() + .Add_int("query_id", page.Query_id()) + .Add_str("wiki", page.Wiki_domain()) + .Add_int("page_id", page.Page_id()) + .Add_str("page_ttl", page.Page_title()) + .Add_bool("expand_matches_section", page.Expand_matches_section()) + ); + } + public void Send_line_add(boolean show_all_matches, int qry_id, byte[] wiki_domain, int page_id, int line_sort_order, byte[] line_html) { + cache_mgr.Add(qry_id, wiki_domain, page_id, line_sort_order, line_html); + + line_sort_order += List_adp_.Base1; // NOTE: increment after cache_mgr + if (line_sort_order == 1 || show_all_matches) { + cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New() + .Add_bry("wiki", wiki_domain) + .Add_int("page_id", page_id) + .Add_int("line", line_sort_order) + .Add_bry("html", line_html) + ); + } + cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__page__update__recv", gplx.core.gfobjs.Gfobj_nde.New() + .Add_bry("wiki", wiki_domain) + .Add_int("page_id", page_id) + .Add_int("found", line_sort_order) + .Add_bool("show_all_matches", show_all_matches) + ); + } } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/uis/Xofulltext_searcher_ui__gui.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/uis/Xofulltext_searcher_ui__gui.java deleted file mode 100644 index 6104cdac5..000000000 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/uis/Xofulltext_searcher_ui__gui.java +++ /dev/null @@ -1,54 +0,0 @@ -/* -XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com - -XOWA is licensed under the terms of the General Public License (GPL) Version 3, -or alternatively under the terms of the Apache License Version 2.0. - -You may use XOWA according to either of these licenses as is most appropriate -for your project on a case-by-case basis. - -The terms of each license can be found in the source code repository: - -GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt -Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt -*/ -package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; -import gplx.xowa.guis.cbks.*; -public class Xofulltext_searcher_ui__gui implements Xofulltext_searcher_ui { - private final Xog_cbk_mgr cbk_mgr; - private final Xog_cbk_trg cbk_trg; - public Xofulltext_searcher_ui__gui(Xog_cbk_mgr cbk_mgr, Xog_cbk_trg cbk_trg) { - this.cbk_mgr = cbk_mgr; - this.cbk_trg = cbk_trg; - } - public void Send_wiki_add(byte[] wiki_domain) { - cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__add__recv", gplx.core.gfobjs.Gfobj_nde.New() - .Add_bry("wiki", wiki_domain) - ); - } - public void Send_wiki_update(byte[] wiki, int found, int searched) { - cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__update__recv", gplx.core.gfobjs.Gfobj_nde.New() - .Add_bry("wiki", wiki) - .Add_int("found", found) - .Add_int("searched", searched) - ); - } - public void Send_page_add(Xofulltext_searcher_page page) { - cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__page__add__recv", gplx.core.gfobjs.Gfobj_nde.New() - .Add_int("query_id", page.Query_id()) - .Add_str("wiki", page.Wiki_domain()) - .Add_int("page_id", page.Page_id()) - .Add_str("page_ttl", page.Page_title()) - .Add_bool("expand_matches_section", page.Expand_matches_section()) - ); - } - public void Send_line_add(Xofulltext_searcher_line match) { - cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New() - .Add_str("wiki", match.Wiki_domain()) - .Add_int("page_id", match.Page_id()) - .Add_int("line", match.Found_idx()) - .Add_str("html", match.Excerpt()) - ); - } -} diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_svc.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_svc.java index d9b9ef542..f23403e04 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_svc.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_svc.java @@ -31,10 +31,10 @@ class Xofulltext_searcher_svc implements Gfo_invk { private final Xoa_app app; private final Xog_cbk_trg cbk_trg = Xog_cbk_trg.New(Xofulltext_searcher_special.Prototype.Special__meta().Ttl_bry()); private final Xofulltext_cache_mgr cache_mgr = new Xofulltext_cache_mgr(); - private final Xofulltext_searcher_ui searcher_cbk; + private final Xofulltext_searcher_ui searcher_ui; public Xofulltext_searcher_svc(Xoa_app app) { this.app = app; - this.searcher_cbk = new Xofulltext_searcher_ui__gui(app.Gui__cbk_mgr(), cbk_trg); + this.searcher_ui = new Xofulltext_searcher_ui(cache_mgr, app.Gui__cbk_mgr(), cbk_trg); } public void Search(Json_nde args) { // for now, always clear cache; "get_lines_rest" will only work for latest search @@ -43,6 +43,7 @@ class Xofulltext_searcher_svc implements Gfo_invk { // get search_args Xofulltext_searcher_args search_args = Xofulltext_searcher_args.New_by_json(args); search_args.query_id = cache_mgr.Next_qry_id(); + cache_mgr.Add(search_args.query_id, search_args.query); // autosave any changes if enabled Xocfg_mgr cfg_mgr = app.Cfg(); @@ -66,11 +67,11 @@ class Xofulltext_searcher_svc implements Gfo_invk { for (byte[] wiki_domain : wiki_domains) { // get wiki and notify Xow_wiki wiki = app.Wiki_mgri().Get_by_or_make_init_y(wiki_domain); - searcher_cbk.Send_wiki_add(wiki_domain); + searcher_ui.Send_wiki_add(wiki_domain); // get searcher and search Xofulltext_searcher searcher = Get_searcher(wiki); - searcher.Search(searcher_cbk, wiki, args); + searcher.Search(searcher_ui, wiki, args); } } catch (Exception exc) { if (app.Tid_is_edit()) @@ -83,16 +84,15 @@ class Xofulltext_searcher_svc implements Gfo_invk { private void Get_lines_rest(int qry_id, byte[] wiki_bry, int page_id) { Xofulltext_cache_line[] lines = cache_mgr.Get_lines_rest(qry_id, wiki_bry, page_id); for (Xofulltext_cache_line line : lines) { - Xofulltext_searcher_line match = new Xofulltext_searcher_line(String_.new_u8(wiki_bry), page_id, line.Line_seq() + 1, String_.new_u8(line.Line_html())); - searcher_cbk.Send_line_add(match); + searcher_ui.Send_line_add(true, qry_id, wiki_bry, page_id, line.Line_seq(), line.Line_html()); } } private Xofulltext_searcher Get_searcher(Xow_wiki wiki) { - if (Io_mgr.Instance.ExistsDir(wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search"))) { + if (Io_mgr.Instance.ExistsDir(Xosearch_fulltext_addon.Get_index_dir(wiki))) { return new Xofulltext_searcher__lucene(); } else { - return new Xofulltext_searcher__brute(app, cbk_trg, cache_mgr); + return new Xofulltext_searcher__brute(); } } diff --git a/gplx.gflucene/.classpath b/gplx.gflucene/.classpath index 4d90c328f..43bad2c2d 100644 --- a/gplx.gflucene/.classpath +++ b/gplx.gflucene/.classpath @@ -3,10 +3,10 @@ - - - - - + + + + + diff --git a/gplx.gflucene/src/gplx/gflucene/highlighters/Gflucene_highlighter_mgr.java b/gplx.gflucene/src/gplx/gflucene/highlighters/Gflucene_highlighter_mgr.java index d11149392..abd52d293 100644 --- a/gplx.gflucene/src/gplx/gflucene/highlighters/Gflucene_highlighter_mgr.java +++ b/gplx.gflucene/src/gplx/gflucene/highlighters/Gflucene_highlighter_mgr.java @@ -32,10 +32,13 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; +import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.search.highlight.TextFragment; import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.store.FSDirectory; @@ -60,16 +63,21 @@ public class Gflucene_highlighter_mgr { // create highlighter SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("", ""); - Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); + QueryScorer scorer = new QueryScorer(query); + scorer.setExpandMultiTermQuery(false); + Highlighter highlighter = new Highlighter(htmlFormatter, scorer); + SimpleFragmenter fragmenter = new SimpleFragmenter(100); + highlighter.setTextFragmenter(fragmenter); // get token stream String text = doc_data.body; TokenStream tokenStream = analyzer.tokenStream("body", text); // get fragments from stream - String[] frags; + TextFragment[] frags; try { - frags = highlighter.getBestFragments(tokenStream, text, 10); +// frags = highlighter.getBestTextFragments(tokenStream, text, false, 1000); + frags = highlighter.getBestTextFragments(tokenStream, text, true, 10); } catch (IOException e) { throw Err_.new_exc(e, "lucene_index", "failed to get best", "query", qry_data.query); } catch (InvalidTokenOffsetsException e) { @@ -80,7 +88,7 @@ public class Gflucene_highlighter_mgr { int frags_len = frags.length; Gflucene_highlighter_item[] array = new Gflucene_highlighter_item[frags_len]; for (int i = 0; i < frags_len; i++) { - String frag = frags[i]; + String frag = frags[i].toString(); array[i] = new Gflucene_highlighter_item(i, frag); } return array;