diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_mgr.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_mgr.java index 05c8a0609..05852b2b1 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_mgr.java @@ -55,6 +55,9 @@ public class Xofulltext_cache_mgr { Xofulltext_cache_line line = new Xofulltext_cache_line(line_seq, line_html); page.Lines().Add(line); } + public Xofulltext_cache_qry Get_or_null(int qry_id) { + return (Xofulltext_cache_qry)qry_hash.Get_by(qry_id); + } public Xofulltext_cache_page[] Get_pages_rng(int qry_id, int page_seq_bgn, int page_seq_end) { Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(qry_id); if (qry == null) return null; diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_qry.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_qry.java index 22d6df7c4..3638fe7c5 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_qry.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_qry.java @@ -22,4 +22,5 @@ public class Xofulltext_cache_qry { public int Id() {return id;} private final int id; public byte[] Text() {return text;} private final byte[] text; public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New(); + public boolean done; } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_wiki.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_wiki.java deleted file mode 100644 index 58dbcde4a..000000000 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/caches/Xofulltext_cache_wiki.java +++ /dev/null @@ -1,23 +0,0 @@ -/* -XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com - -XOWA is licensed under the terms of the General Public License (GPL) Version 3, -or alternatively under the terms of the Apache License Version 2.0. - -You may use XOWA according to either of these licenses as is most appropriate -for your project on a case-by-case basis. - -The terms of each license can be found in the source code repository: - -GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt -Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt -*/ -package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; -public class Xofulltext_cache_wiki { - public Xofulltext_cache_wiki(byte[] wiki) { - this.wiki = wiki; - } - public byte[] Wiki() {return wiki;} private final byte[] wiki; - public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New(); -} diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/Xofulltext_args_wiki.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/Xofulltext_args_wiki.java index 6a67f1eb8..a2c70093f 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/Xofulltext_args_wiki.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/Xofulltext_args_wiki.java @@ -17,15 +17,23 @@ package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import g public class Xofulltext_args_wiki { public byte[] wiki; public byte[] ns_ids; - public int offset; - public int limit; + public Hash_adp ns_hash = Hash_adp_.New(); + public int bgn; + public int len; public Xofulltext_args_wiki(byte[] wiki) { this.wiki = wiki; } public void Init_by_json(String key, byte[] val) { - if (String_.Eq(key, "ns_ids")) this.ns_ids = val; - else if (String_.Eq(key, "offsets")) this.offset = Bry_.To_int(val); - else if (String_.Eq(key, "limits")) this.limit = Bry_.To_int(val); + if (String_.Eq(key, "ns_ids")) { + this.ns_ids = val; + byte[][] ns_ary = Bry_split_.Split(ns_ids, Byte_ascii.Comma, true); + for (byte[] ns_id : ns_ary) { + int ns_int = Bry_.To_int(ns_id); + ns_hash.Add_if_dupe_use_1st(ns_int, ns_int); + } + } + else if (String_.Eq(key, "offsets")) this.bgn = Bry_.To_int(val); + else if (String_.Eq(key, "limits")) this.len = Bry_.To_int(val); } } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/Xofulltext_searcher.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/Xofulltext_searcher.java index ea9cb790f..0cd354cf2 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/Xofulltext_searcher.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/Xofulltext_searcher.java @@ -14,7 +14,8 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; +import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*; public interface Xofulltext_searcher { - void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry qry_args, Xofulltext_args_wiki wiki_args); + void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry qry_args, Xofulltext_args_wiki wiki_args); } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/brutes/Xofulltext_searcher__brute.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/brutes/Xofulltext_searcher__brute.java index 501d2dba3..994605c32 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/brutes/Xofulltext_searcher__brute.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/brutes/Xofulltext_searcher__brute.java @@ -23,7 +23,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher { private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr(); private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval(); private final Xofulltext_finder_cbk__highlight cbk_highlight = new Xofulltext_finder_cbk__highlight(); - public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) { + public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) { // get pages from db Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn(); Db_rdr page_rdr = page_conn.Stmt_sql("SELECT * FROM page WHERE page_namespace IN (0) ORDER BY page_score DESC").Exec_select__rls_auto(); @@ -58,7 +58,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher { ui.Send_wiki_update(wiki_domain, found, searched); // do highlight - if (found <= wiki_args.limit) { + if (found <= wiki_args.len) { cbk_highlight.Init(ui, args.qry_id, wiki, page_id, ttl.Full_db(), args.show_all_matches); ui.Send_page_add(new Xofulltext_searcher_page ( args.qry_id diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_searcher__lucene.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_searcher__lucene.java index e779ef1be..db0522500 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_searcher__lucene.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/mgrs/gflucenes/Xofulltext_searcher__lucene.java @@ -21,11 +21,14 @@ import gplx.gflucene.searchers.*; import gplx.gflucene.highlighters.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*; +import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*; public class Xofulltext_searcher__lucene implements Xofulltext_searcher { private final Gflucene_searcher_mgr searcher = new Gflucene_searcher_mgr(); - public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) { - // create list - Ordered_hash list = Ordered_hash_.New(); + public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) { + // create lists + Ordered_hash full_list = Ordered_hash_.New(); + Ordered_hash temp_list = Ordered_hash_.New(); + Ordered_hash page_list = qry.Pages(); // init searcher with wiki Gflucene_analyzer_data analyzer_data = Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str()); @@ -33,41 +36,56 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher { ( analyzer_data , Xosearch_fulltext_addon.Get_index_dir(wiki).Xto_api())); - // exec search - Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.search_text), wiki_args.limit); - searcher.Exec(list, searcher_data); - - // term - searcher.Term(); - // get page_load vars Xowd_page_itm tmp_page_row = new Xowd_page_itm(); Xowd_page_tbl page_tbl = wiki.Data__core_mgr().Db__core().Tbl__page(); - // loop list and load pages - int len = list.Len(); - for (int i = 0; i < len; i++) { - Gflucene_doc_data doc_data = (Gflucene_doc_data)list.Get_at(i); + // exec search + int needed_bgn = wiki_args.bgn; + if (needed_bgn < page_list.Len()) needed_bgn = page_list.Len(); + int needed_end = wiki_args.bgn + wiki_args.len; + int needed_len = needed_end - needed_bgn; + int found = 0; + Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.search_text), 100); + while (found < needed_len) { + searcher.Exec(temp_list, searcher_data); - // load page - if (!page_tbl.Select_by_id(tmp_page_row, doc_data.page_id)) { - Gfo_usr_dlg_.Instance.Warn_many("", "", "searcher.lucene: could not find page; page_id=~{0}", doc_data.page_id); - continue; + int temp_list_len = temp_list.Len(); + for (int i = 0; i < temp_list_len; i++) { + Gflucene_doc_data doc_data = (Gflucene_doc_data)temp_list.Get_at(i); + if (!page_list.Has(doc_data.page_id)) { + // load page + if (!page_tbl.Select_by_id(tmp_page_row, doc_data.page_id)) { + Gfo_usr_dlg_.Instance.Warn_many("", "", "searcher.lucene: could not find page; page_id=~{0}", doc_data.page_id); + continue; + } + + // make page_ttl + Xoa_ttl page_ttl = wiki.Ttl_parse(tmp_page_row.Ns_id(), tmp_page_row.Ttl_page_db()); + doc_data.ns_id = tmp_page_row.Ns_id(); + doc_data.page_full_db = page_ttl.Full_db(); + + if (!wiki_args.ns_hash.Has(doc_data.ns_id)) continue; + + // call page doc_data + Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.qry_id, wiki.Domain_bry(), doc_data.page_id, doc_data.page_full_db, args.expand_matches_section); + ui.Send_page_add(page); + + full_list.Add(doc_data.page_id, doc_data); + found++; + if (found >= needed_len) break; + } } - - // make page_ttl - Xoa_ttl page_ttl = wiki.Ttl_parse(tmp_page_row.Ns_id(), tmp_page_row.Ttl_page_db()); - doc_data.ns_id = tmp_page_row.Ns_id(); - doc_data.page_full_db = page_ttl.Full_db(); - - // call page doc_data - Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.qry_id, wiki.Domain_bry(), doc_data.page_id, doc_data.page_full_db, args.expand_matches_section); - ui.Send_page_add(page); + temp_list.Clear(); } - ui.Send_wiki_update(wiki.Domain_bry(), len + List_adp_.Base1, -1); + + // term + searcher.Term(); + + ui.Send_wiki_update(wiki.Domain_bry(), page_list.Len(), -1); // create highlighter thread and launch it - Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, list); + Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, full_list); gplx.core.threads.Thread_adp_.Start_by_key("highlighter", Cancelable_.Never, highlighter_mgr, Xofulltext_highlighter_mgr.Invk__highlight); } } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_svc.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_svc.java index ca782cbe7..6d3ec9d44 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_svc.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/searchers/svcs/Xofulltext_searcher_svc.java @@ -77,34 +77,49 @@ class Xofulltext_searcher_svc implements Gfo_invk { // try to get from cache byte[] qry_key = args.Qry_key(wiki_domain, wiki_args.ns_ids); int qry_id = cache_mgr.Ids__get_or_neg1(qry_key); + Xofulltext_cache_qry qry = null; if (qry_id == -1) { qry_id = cache_mgr.Ids__next(); cache_mgr.Add(qry_id, qry_key); + qry = cache_mgr.Get_or_null(qry_id); } else { - Xofulltext_cache_page[] cached_pages = cache_mgr.Get_pages_rng(qry_id, wiki_args.offset, wiki_args.limit); - if (cached_pages != null) { - for (Xofulltext_cache_page page : cached_pages) { - ui.Send_page_add(new Xofulltext_searcher_page(qry_id, wiki.Domain_bry(), page.Page_id(), page.Page_ttl(), args.expand_matches_section)); - int len = page.Lines().Len(); - for (int i = 0; i < len; i++) { - Xofulltext_cache_line line = (Xofulltext_cache_line)page.Lines().Get_at(i); - ui.Send_line_add(args.show_all_matches, qry_id, wiki.Domain_bry(), page.Page_id(), line.Line_seq(), line.Line_html()); - } - } - return; + qry = cache_mgr.Get_or_null(qry_id); + if (qry != null) { + boolean all_shown = Display_cached_qry(args, ui, wiki, qry, qry_id, wiki_args); + if (all_shown || qry.done) + return; } } args.qry_id = qry_id; // do search Xofulltext_searcher searcher = Get_searcher(wiki); - searcher.Search(ui, wiki, args, wiki_args); + searcher.Search(ui, wiki, qry, args, wiki_args); } catch (Exception exc) { Gfo_usr_dlg_.Instance.Warn_many("", "", "failed to search_wiki; err=~{0}", Err_.Message_gplx_log(exc)); } } + private boolean Display_cached_qry(Xofulltext_args_qry args, Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, int qry_id, Xofulltext_args_wiki wiki_args) { + int bgn = wiki_args.bgn; + int len = wiki_args.len; + int end = bgn + len; + int max = qry.Pages().Len(); + for (int i = bgn; i < end; i++) { + if (i >= max) return false; // more pages requested than available + Xofulltext_cache_page page = (Xofulltext_cache_page)qry.Pages().Get_at(i); + ui.Send_page_add(new Xofulltext_searcher_page(qry_id, wiki.Domain_bry(), page.Page_id(), page.Page_ttl(), args.expand_matches_section)); + + // loop lines + int lines_len = page.Lines().Len(); + for (int j = 0; j < lines_len; j++) { + Xofulltext_cache_line line = (Xofulltext_cache_line)page.Lines().Get_at(j); + ui.Send_line_add(args.show_all_matches, qry_id, wiki.Domain_bry(), page.Page_id(), line.Line_seq(), line.Line_html()); + } + } + return true; + } public void Get_lines_rest(Json_nde args) { Get_lines_rest(args.Get_as_int("qry_id"), args.Get_as_bry("wiki"), args.Get_as_int("page_id"), args.Get_as_str("page_guid")); diff --git a/gplx.gflucene/src/gplx/gflucene/searchers/Gflucene_searcher_mgr.java b/gplx.gflucene/src/gplx/gflucene/searchers/Gflucene_searcher_mgr.java index cdf7a43b8..3008f8a48 100644 --- a/gplx.gflucene/src/gplx/gflucene/searchers/Gflucene_searcher_mgr.java +++ b/gplx.gflucene/src/gplx/gflucene/searchers/Gflucene_searcher_mgr.java @@ -82,9 +82,10 @@ public class Gflucene_searcher_mgr { String docTitle = d.get("title"); Gflucene_doc_data doc = (Gflucene_doc_data)list.Get_by(docTitle); if (doc == null) { - doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), 0, docTitle, ""); + int doc_id = Integer.parseInt(d.get("page_id")); + doc = new Gflucene_doc_data(doc_id, 0, docTitle, ""); doc.lucene_score = hits[i].score; - list.Add(docTitle, doc); + list.Add(doc_id, doc); } // Tfds.Write(doc.lucene_score, doc.title); }