mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Full-text search: Implement basic paging
This commit is contained in:
parent
7924e26522
commit
49924110f4
@ -55,6 +55,9 @@ public class Xofulltext_cache_mgr {
|
||||
Xofulltext_cache_line line = new Xofulltext_cache_line(line_seq, line_html);
|
||||
page.Lines().Add(line);
|
||||
}
|
||||
public Xofulltext_cache_qry Get_or_null(int qry_id) {
|
||||
return (Xofulltext_cache_qry)qry_hash.Get_by(qry_id);
|
||||
}
|
||||
public Xofulltext_cache_page[] Get_pages_rng(int qry_id, int page_seq_bgn, int page_seq_end) {
|
||||
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(qry_id);
|
||||
if (qry == null) return null;
|
||||
|
@ -22,4 +22,5 @@ public class Xofulltext_cache_qry {
|
||||
public int Id() {return id;} private final int id;
|
||||
public byte[] Text() {return text;} private final byte[] text;
|
||||
public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New();
|
||||
public boolean done;
|
||||
}
|
||||
|
@ -1,23 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
public class Xofulltext_cache_wiki {
|
||||
public Xofulltext_cache_wiki(byte[] wiki) {
|
||||
this.wiki = wiki;
|
||||
}
|
||||
public byte[] Wiki() {return wiki;} private final byte[] wiki;
|
||||
public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New();
|
||||
}
|
@ -17,15 +17,23 @@ package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import g
|
||||
public class Xofulltext_args_wiki {
|
||||
public byte[] wiki;
|
||||
public byte[] ns_ids;
|
||||
public int offset;
|
||||
public int limit;
|
||||
public Hash_adp ns_hash = Hash_adp_.New();
|
||||
public int bgn;
|
||||
public int len;
|
||||
|
||||
public Xofulltext_args_wiki(byte[] wiki) {
|
||||
this.wiki = wiki;
|
||||
}
|
||||
public void Init_by_json(String key, byte[] val) {
|
||||
if (String_.Eq(key, "ns_ids")) this.ns_ids = val;
|
||||
else if (String_.Eq(key, "offsets")) this.offset = Bry_.To_int(val);
|
||||
else if (String_.Eq(key, "limits")) this.limit = Bry_.To_int(val);
|
||||
if (String_.Eq(key, "ns_ids")) {
|
||||
this.ns_ids = val;
|
||||
byte[][] ns_ary = Bry_split_.Split(ns_ids, Byte_ascii.Comma, true);
|
||||
for (byte[] ns_id : ns_ary) {
|
||||
int ns_int = Bry_.To_int(ns_id);
|
||||
ns_hash.Add_if_dupe_use_1st(ns_int, ns_int);
|
||||
}
|
||||
}
|
||||
else if (String_.Eq(key, "offsets")) this.bgn = Bry_.To_int(val);
|
||||
else if (String_.Eq(key, "limits")) this.len = Bry_.To_int(val);
|
||||
}
|
||||
}
|
||||
|
@ -14,7 +14,8 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
|
||||
public interface Xofulltext_searcher {
|
||||
void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry qry_args, Xofulltext_args_wiki wiki_args);
|
||||
void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry qry_args, Xofulltext_args_wiki wiki_args);
|
||||
}
|
||||
|
@ -23,7 +23,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher {
|
||||
private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr();
|
||||
private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval();
|
||||
private final Xofulltext_finder_cbk__highlight cbk_highlight = new Xofulltext_finder_cbk__highlight();
|
||||
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
|
||||
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
|
||||
// get pages from db
|
||||
Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn();
|
||||
Db_rdr page_rdr = page_conn.Stmt_sql("SELECT * FROM page WHERE page_namespace IN (0) ORDER BY page_score DESC").Exec_select__rls_auto();
|
||||
@ -58,7 +58,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher {
|
||||
ui.Send_wiki_update(wiki_domain, found, searched);
|
||||
|
||||
// do highlight
|
||||
if (found <= wiki_args.limit) {
|
||||
if (found <= wiki_args.len) {
|
||||
cbk_highlight.Init(ui, args.qry_id, wiki, page_id, ttl.Full_db(), args.show_all_matches);
|
||||
ui.Send_page_add(new Xofulltext_searcher_page
|
||||
( args.qry_id
|
||||
|
@ -21,11 +21,14 @@ import gplx.gflucene.searchers.*;
|
||||
import gplx.gflucene.highlighters.*;
|
||||
import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
|
||||
public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
|
||||
private final Gflucene_searcher_mgr searcher = new Gflucene_searcher_mgr();
|
||||
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
|
||||
// create list
|
||||
Ordered_hash list = Ordered_hash_.New();
|
||||
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
|
||||
// create lists
|
||||
Ordered_hash full_list = Ordered_hash_.New();
|
||||
Ordered_hash temp_list = Ordered_hash_.New();
|
||||
Ordered_hash page_list = qry.Pages();
|
||||
|
||||
// init searcher with wiki
|
||||
Gflucene_analyzer_data analyzer_data = Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str());
|
||||
@ -33,41 +36,56 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
|
||||
( analyzer_data
|
||||
, Xosearch_fulltext_addon.Get_index_dir(wiki).Xto_api()));
|
||||
|
||||
// exec search
|
||||
Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.search_text), wiki_args.limit);
|
||||
searcher.Exec(list, searcher_data);
|
||||
|
||||
// term
|
||||
searcher.Term();
|
||||
|
||||
// get page_load vars
|
||||
Xowd_page_itm tmp_page_row = new Xowd_page_itm();
|
||||
Xowd_page_tbl page_tbl = wiki.Data__core_mgr().Db__core().Tbl__page();
|
||||
|
||||
// loop list and load pages
|
||||
int len = list.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Gflucene_doc_data doc_data = (Gflucene_doc_data)list.Get_at(i);
|
||||
// exec search
|
||||
int needed_bgn = wiki_args.bgn;
|
||||
if (needed_bgn < page_list.Len()) needed_bgn = page_list.Len();
|
||||
int needed_end = wiki_args.bgn + wiki_args.len;
|
||||
int needed_len = needed_end - needed_bgn;
|
||||
int found = 0;
|
||||
Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.search_text), 100);
|
||||
while (found < needed_len) {
|
||||
searcher.Exec(temp_list, searcher_data);
|
||||
|
||||
// load page
|
||||
if (!page_tbl.Select_by_id(tmp_page_row, doc_data.page_id)) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "searcher.lucene: could not find page; page_id=~{0}", doc_data.page_id);
|
||||
continue;
|
||||
int temp_list_len = temp_list.Len();
|
||||
for (int i = 0; i < temp_list_len; i++) {
|
||||
Gflucene_doc_data doc_data = (Gflucene_doc_data)temp_list.Get_at(i);
|
||||
if (!page_list.Has(doc_data.page_id)) {
|
||||
// load page
|
||||
if (!page_tbl.Select_by_id(tmp_page_row, doc_data.page_id)) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "searcher.lucene: could not find page; page_id=~{0}", doc_data.page_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
// make page_ttl
|
||||
Xoa_ttl page_ttl = wiki.Ttl_parse(tmp_page_row.Ns_id(), tmp_page_row.Ttl_page_db());
|
||||
doc_data.ns_id = tmp_page_row.Ns_id();
|
||||
doc_data.page_full_db = page_ttl.Full_db();
|
||||
|
||||
if (!wiki_args.ns_hash.Has(doc_data.ns_id)) continue;
|
||||
|
||||
// call page doc_data
|
||||
Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.qry_id, wiki.Domain_bry(), doc_data.page_id, doc_data.page_full_db, args.expand_matches_section);
|
||||
ui.Send_page_add(page);
|
||||
|
||||
full_list.Add(doc_data.page_id, doc_data);
|
||||
found++;
|
||||
if (found >= needed_len) break;
|
||||
}
|
||||
}
|
||||
|
||||
// make page_ttl
|
||||
Xoa_ttl page_ttl = wiki.Ttl_parse(tmp_page_row.Ns_id(), tmp_page_row.Ttl_page_db());
|
||||
doc_data.ns_id = tmp_page_row.Ns_id();
|
||||
doc_data.page_full_db = page_ttl.Full_db();
|
||||
|
||||
// call page doc_data
|
||||
Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.qry_id, wiki.Domain_bry(), doc_data.page_id, doc_data.page_full_db, args.expand_matches_section);
|
||||
ui.Send_page_add(page);
|
||||
temp_list.Clear();
|
||||
}
|
||||
ui.Send_wiki_update(wiki.Domain_bry(), len + List_adp_.Base1, -1);
|
||||
|
||||
// term
|
||||
searcher.Term();
|
||||
|
||||
ui.Send_wiki_update(wiki.Domain_bry(), page_list.Len(), -1);
|
||||
|
||||
// create highlighter thread and launch it
|
||||
Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, list);
|
||||
Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, full_list);
|
||||
gplx.core.threads.Thread_adp_.Start_by_key("highlighter", Cancelable_.Never, highlighter_mgr, Xofulltext_highlighter_mgr.Invk__highlight);
|
||||
}
|
||||
}
|
||||
|
@ -77,34 +77,49 @@ class Xofulltext_searcher_svc implements Gfo_invk {
|
||||
// try to get from cache
|
||||
byte[] qry_key = args.Qry_key(wiki_domain, wiki_args.ns_ids);
|
||||
int qry_id = cache_mgr.Ids__get_or_neg1(qry_key);
|
||||
Xofulltext_cache_qry qry = null;
|
||||
if (qry_id == -1) {
|
||||
qry_id = cache_mgr.Ids__next();
|
||||
cache_mgr.Add(qry_id, qry_key);
|
||||
qry = cache_mgr.Get_or_null(qry_id);
|
||||
}
|
||||
else {
|
||||
Xofulltext_cache_page[] cached_pages = cache_mgr.Get_pages_rng(qry_id, wiki_args.offset, wiki_args.limit);
|
||||
if (cached_pages != null) {
|
||||
for (Xofulltext_cache_page page : cached_pages) {
|
||||
ui.Send_page_add(new Xofulltext_searcher_page(qry_id, wiki.Domain_bry(), page.Page_id(), page.Page_ttl(), args.expand_matches_section));
|
||||
int len = page.Lines().Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xofulltext_cache_line line = (Xofulltext_cache_line)page.Lines().Get_at(i);
|
||||
ui.Send_line_add(args.show_all_matches, qry_id, wiki.Domain_bry(), page.Page_id(), line.Line_seq(), line.Line_html());
|
||||
}
|
||||
}
|
||||
return;
|
||||
qry = cache_mgr.Get_or_null(qry_id);
|
||||
if (qry != null) {
|
||||
boolean all_shown = Display_cached_qry(args, ui, wiki, qry, qry_id, wiki_args);
|
||||
if (all_shown || qry.done)
|
||||
return;
|
||||
}
|
||||
}
|
||||
args.qry_id = qry_id;
|
||||
|
||||
// do search
|
||||
Xofulltext_searcher searcher = Get_searcher(wiki);
|
||||
searcher.Search(ui, wiki, args, wiki_args);
|
||||
searcher.Search(ui, wiki, qry, args, wiki_args);
|
||||
}
|
||||
catch (Exception exc) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "failed to search_wiki; err=~{0}", Err_.Message_gplx_log(exc));
|
||||
}
|
||||
}
|
||||
private boolean Display_cached_qry(Xofulltext_args_qry args, Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, int qry_id, Xofulltext_args_wiki wiki_args) {
|
||||
int bgn = wiki_args.bgn;
|
||||
int len = wiki_args.len;
|
||||
int end = bgn + len;
|
||||
int max = qry.Pages().Len();
|
||||
for (int i = bgn; i < end; i++) {
|
||||
if (i >= max) return false; // more pages requested than available
|
||||
Xofulltext_cache_page page = (Xofulltext_cache_page)qry.Pages().Get_at(i);
|
||||
ui.Send_page_add(new Xofulltext_searcher_page(qry_id, wiki.Domain_bry(), page.Page_id(), page.Page_ttl(), args.expand_matches_section));
|
||||
|
||||
// loop lines
|
||||
int lines_len = page.Lines().Len();
|
||||
for (int j = 0; j < lines_len; j++) {
|
||||
Xofulltext_cache_line line = (Xofulltext_cache_line)page.Lines().Get_at(j);
|
||||
ui.Send_line_add(args.show_all_matches, qry_id, wiki.Domain_bry(), page.Page_id(), line.Line_seq(), line.Line_html());
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public void Get_lines_rest(Json_nde args) {
|
||||
Get_lines_rest(args.Get_as_int("qry_id"), args.Get_as_bry("wiki"), args.Get_as_int("page_id"), args.Get_as_str("page_guid"));
|
||||
|
@ -82,9 +82,10 @@ public class Gflucene_searcher_mgr {
|
||||
String docTitle = d.get("title");
|
||||
Gflucene_doc_data doc = (Gflucene_doc_data)list.Get_by(docTitle);
|
||||
if (doc == null) {
|
||||
doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), 0, docTitle, "");
|
||||
int doc_id = Integer.parseInt(d.get("page_id"));
|
||||
doc = new Gflucene_doc_data(doc_id, 0, docTitle, "");
|
||||
doc.lucene_score = hits[i].score;
|
||||
list.Add(docTitle, doc);
|
||||
list.Add(doc_id, doc);
|
||||
}
|
||||
// Tfds.Write(doc.lucene_score, doc.title);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user