Full-text search: Implement basic paging

pull/620/head
gnosygnu 7 years ago
parent 7924e26522
commit 49924110f4

@ -55,6 +55,9 @@ public class Xofulltext_cache_mgr {
Xofulltext_cache_line line = new Xofulltext_cache_line(line_seq, line_html);
page.Lines().Add(line);
}
public Xofulltext_cache_qry Get_or_null(int qry_id) {
return (Xofulltext_cache_qry)qry_hash.Get_by(qry_id);
}
public Xofulltext_cache_page[] Get_pages_rng(int qry_id, int page_seq_bgn, int page_seq_end) {
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(qry_id);
if (qry == null) return null;

@ -22,4 +22,5 @@ public class Xofulltext_cache_qry {
public int Id() {return id;} private final int id;
public byte[] Text() {return text;} private final byte[] text;
public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New();
public boolean done;
}

@ -1,23 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
public class Xofulltext_cache_wiki {
public Xofulltext_cache_wiki(byte[] wiki) {
this.wiki = wiki;
}
public byte[] Wiki() {return wiki;} private final byte[] wiki;
public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New();
}

@ -17,15 +17,23 @@ package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import g
public class Xofulltext_args_wiki {
public byte[] wiki;
public byte[] ns_ids;
public int offset;
public int limit;
public Hash_adp ns_hash = Hash_adp_.New();
public int bgn;
public int len;
public Xofulltext_args_wiki(byte[] wiki) {
this.wiki = wiki;
}
public void Init_by_json(String key, byte[] val) {
if (String_.Eq(key, "ns_ids")) this.ns_ids = val;
else if (String_.Eq(key, "offsets")) this.offset = Bry_.To_int(val);
else if (String_.Eq(key, "limits")) this.limit = Bry_.To_int(val);
if (String_.Eq(key, "ns_ids")) {
this.ns_ids = val;
byte[][] ns_ary = Bry_split_.Split(ns_ids, Byte_ascii.Comma, true);
for (byte[] ns_id : ns_ary) {
int ns_int = Bry_.To_int(ns_id);
ns_hash.Add_if_dupe_use_1st(ns_int, ns_int);
}
}
else if (String_.Eq(key, "offsets")) this.bgn = Bry_.To_int(val);
else if (String_.Eq(key, "limits")) this.len = Bry_.To_int(val);
}
}

@ -14,7 +14,8 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
public interface Xofulltext_searcher {
void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry qry_args, Xofulltext_args_wiki wiki_args);
void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry qry_args, Xofulltext_args_wiki wiki_args);
}

@ -23,7 +23,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher {
private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr();
private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval();
private final Xofulltext_finder_cbk__highlight cbk_highlight = new Xofulltext_finder_cbk__highlight();
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
// get pages from db
Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn();
Db_rdr page_rdr = page_conn.Stmt_sql("SELECT * FROM page WHERE page_namespace IN (0) ORDER BY page_score DESC").Exec_select__rls_auto();
@ -58,7 +58,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher {
ui.Send_wiki_update(wiki_domain, found, searched);
// do highlight
if (found <= wiki_args.limit) {
if (found <= wiki_args.len) {
cbk_highlight.Init(ui, args.qry_id, wiki, page_id, ttl.Full_db(), args.show_all_matches);
ui.Send_page_add(new Xofulltext_searcher_page
( args.qry_id

@ -21,11 +21,14 @@ import gplx.gflucene.searchers.*;
import gplx.gflucene.highlighters.*;
import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
private final Gflucene_searcher_mgr searcher = new Gflucene_searcher_mgr();
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
// create list
Ordered_hash list = Ordered_hash_.New();
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
// create lists
Ordered_hash full_list = Ordered_hash_.New();
Ordered_hash temp_list = Ordered_hash_.New();
Ordered_hash page_list = qry.Pages();
// init searcher with wiki
Gflucene_analyzer_data analyzer_data = Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str());
@ -33,41 +36,56 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
( analyzer_data
, Xosearch_fulltext_addon.Get_index_dir(wiki).Xto_api()));
// exec search
Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.search_text), wiki_args.limit);
searcher.Exec(list, searcher_data);
// term
searcher.Term();
// get page_load vars
Xowd_page_itm tmp_page_row = new Xowd_page_itm();
Xowd_page_tbl page_tbl = wiki.Data__core_mgr().Db__core().Tbl__page();
// loop list and load pages
int len = list.Len();
for (int i = 0; i < len; i++) {
Gflucene_doc_data doc_data = (Gflucene_doc_data)list.Get_at(i);
// exec search
int needed_bgn = wiki_args.bgn;
if (needed_bgn < page_list.Len()) needed_bgn = page_list.Len();
int needed_end = wiki_args.bgn + wiki_args.len;
int needed_len = needed_end - needed_bgn;
int found = 0;
Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.search_text), 100);
while (found < needed_len) {
searcher.Exec(temp_list, searcher_data);
// load page
if (!page_tbl.Select_by_id(tmp_page_row, doc_data.page_id)) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "searcher.lucene: could not find page; page_id=~{0}", doc_data.page_id);
continue;
}
int temp_list_len = temp_list.Len();
for (int i = 0; i < temp_list_len; i++) {
Gflucene_doc_data doc_data = (Gflucene_doc_data)temp_list.Get_at(i);
if (!page_list.Has(doc_data.page_id)) {
// load page
if (!page_tbl.Select_by_id(tmp_page_row, doc_data.page_id)) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "searcher.lucene: could not find page; page_id=~{0}", doc_data.page_id);
continue;
}
// make page_ttl
Xoa_ttl page_ttl = wiki.Ttl_parse(tmp_page_row.Ns_id(), tmp_page_row.Ttl_page_db());
doc_data.ns_id = tmp_page_row.Ns_id();
doc_data.page_full_db = page_ttl.Full_db();
// make page_ttl
Xoa_ttl page_ttl = wiki.Ttl_parse(tmp_page_row.Ns_id(), tmp_page_row.Ttl_page_db());
doc_data.ns_id = tmp_page_row.Ns_id();
doc_data.page_full_db = page_ttl.Full_db();
// call page doc_data
Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.qry_id, wiki.Domain_bry(), doc_data.page_id, doc_data.page_full_db, args.expand_matches_section);
ui.Send_page_add(page);
if (!wiki_args.ns_hash.Has(doc_data.ns_id)) continue;
// call page doc_data
Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.qry_id, wiki.Domain_bry(), doc_data.page_id, doc_data.page_full_db, args.expand_matches_section);
ui.Send_page_add(page);
full_list.Add(doc_data.page_id, doc_data);
found++;
if (found >= needed_len) break;
}
}
temp_list.Clear();
}
ui.Send_wiki_update(wiki.Domain_bry(), len + List_adp_.Base1, -1);
// term
searcher.Term();
ui.Send_wiki_update(wiki.Domain_bry(), page_list.Len(), -1);
// create highlighter thread and launch it
Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, list);
Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, full_list);
gplx.core.threads.Thread_adp_.Start_by_key("highlighter", Cancelable_.Never, highlighter_mgr, Xofulltext_highlighter_mgr.Invk__highlight);
}
}

@ -77,34 +77,49 @@ class Xofulltext_searcher_svc implements Gfo_invk {
// try to get from cache
byte[] qry_key = args.Qry_key(wiki_domain, wiki_args.ns_ids);
int qry_id = cache_mgr.Ids__get_or_neg1(qry_key);
Xofulltext_cache_qry qry = null;
if (qry_id == -1) {
qry_id = cache_mgr.Ids__next();
cache_mgr.Add(qry_id, qry_key);
qry = cache_mgr.Get_or_null(qry_id);
}
else {
Xofulltext_cache_page[] cached_pages = cache_mgr.Get_pages_rng(qry_id, wiki_args.offset, wiki_args.limit);
if (cached_pages != null) {
for (Xofulltext_cache_page page : cached_pages) {
ui.Send_page_add(new Xofulltext_searcher_page(qry_id, wiki.Domain_bry(), page.Page_id(), page.Page_ttl(), args.expand_matches_section));
int len = page.Lines().Len();
for (int i = 0; i < len; i++) {
Xofulltext_cache_line line = (Xofulltext_cache_line)page.Lines().Get_at(i);
ui.Send_line_add(args.show_all_matches, qry_id, wiki.Domain_bry(), page.Page_id(), line.Line_seq(), line.Line_html());
}
}
return;
qry = cache_mgr.Get_or_null(qry_id);
if (qry != null) {
boolean all_shown = Display_cached_qry(args, ui, wiki, qry, qry_id, wiki_args);
if (all_shown || qry.done)
return;
}
}
args.qry_id = qry_id;
// do search
Xofulltext_searcher searcher = Get_searcher(wiki);
searcher.Search(ui, wiki, args, wiki_args);
searcher.Search(ui, wiki, qry, args, wiki_args);
}
catch (Exception exc) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "failed to search_wiki; err=~{0}", Err_.Message_gplx_log(exc));
}
}
private boolean Display_cached_qry(Xofulltext_args_qry args, Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, int qry_id, Xofulltext_args_wiki wiki_args) {
int bgn = wiki_args.bgn;
int len = wiki_args.len;
int end = bgn + len;
int max = qry.Pages().Len();
for (int i = bgn; i < end; i++) {
if (i >= max) return false; // more pages requested than available
Xofulltext_cache_page page = (Xofulltext_cache_page)qry.Pages().Get_at(i);
ui.Send_page_add(new Xofulltext_searcher_page(qry_id, wiki.Domain_bry(), page.Page_id(), page.Page_ttl(), args.expand_matches_section));
// loop lines
int lines_len = page.Lines().Len();
for (int j = 0; j < lines_len; j++) {
Xofulltext_cache_line line = (Xofulltext_cache_line)page.Lines().Get_at(j);
ui.Send_line_add(args.show_all_matches, qry_id, wiki.Domain_bry(), page.Page_id(), line.Line_seq(), line.Line_html());
}
}
return true;
}
public void Get_lines_rest(Json_nde args) {
Get_lines_rest(args.Get_as_int("qry_id"), args.Get_as_bry("wiki"), args.Get_as_int("page_id"), args.Get_as_str("page_guid"));

@ -82,9 +82,10 @@ public class Gflucene_searcher_mgr {
String docTitle = d.get("title");
Gflucene_doc_data doc = (Gflucene_doc_data)list.Get_by(docTitle);
if (doc == null) {
doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), 0, docTitle, "");
int doc_id = Integer.parseInt(d.get("page_id"));
doc = new Gflucene_doc_data(doc_id, 0, docTitle, "");
doc.lucene_score = hits[i].score;
list.Add(docTitle, doc);
list.Add(doc_id, doc);
}
// Tfds.Write(doc.lucene_score, doc.title);
}

Loading…
Cancel
Save