1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-09-29 23:10:52 +00:00

Full-text search: Implement basic paging

This commit is contained in:
gnosygnu 2017-03-21 10:42:47 -04:00
parent 7924e26522
commit 49924110f4
9 changed files with 98 additions and 74 deletions

View File

@ -55,6 +55,9 @@ public class Xofulltext_cache_mgr {
Xofulltext_cache_line line = new Xofulltext_cache_line(line_seq, line_html); Xofulltext_cache_line line = new Xofulltext_cache_line(line_seq, line_html);
page.Lines().Add(line); page.Lines().Add(line);
} }
public Xofulltext_cache_qry Get_or_null(int qry_id) {
return (Xofulltext_cache_qry)qry_hash.Get_by(qry_id);
}
public Xofulltext_cache_page[] Get_pages_rng(int qry_id, int page_seq_bgn, int page_seq_end) { public Xofulltext_cache_page[] Get_pages_rng(int qry_id, int page_seq_bgn, int page_seq_end) {
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(qry_id); Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(qry_id);
if (qry == null) return null; if (qry == null) return null;

View File

@ -22,4 +22,5 @@ public class Xofulltext_cache_qry {
public int Id() {return id;} private final int id; public int Id() {return id;} private final int id;
public byte[] Text() {return text;} private final byte[] text; public byte[] Text() {return text;} private final byte[] text;
public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New(); public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New();
public boolean done;
} }

View File

@ -1,23 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
public class Xofulltext_cache_wiki {
public Xofulltext_cache_wiki(byte[] wiki) {
this.wiki = wiki;
}
public byte[] Wiki() {return wiki;} private final byte[] wiki;
public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New();
}

View File

@ -17,15 +17,23 @@ package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import g
public class Xofulltext_args_wiki { public class Xofulltext_args_wiki {
public byte[] wiki; public byte[] wiki;
public byte[] ns_ids; public byte[] ns_ids;
public int offset; public Hash_adp ns_hash = Hash_adp_.New();
public int limit; public int bgn;
public int len;
public Xofulltext_args_wiki(byte[] wiki) { public Xofulltext_args_wiki(byte[] wiki) {
this.wiki = wiki; this.wiki = wiki;
} }
public void Init_by_json(String key, byte[] val) { public void Init_by_json(String key, byte[] val) {
if (String_.Eq(key, "ns_ids")) this.ns_ids = val; if (String_.Eq(key, "ns_ids")) {
else if (String_.Eq(key, "offsets")) this.offset = Bry_.To_int(val); this.ns_ids = val;
else if (String_.Eq(key, "limits")) this.limit = Bry_.To_int(val); byte[][] ns_ary = Bry_split_.Split(ns_ids, Byte_ascii.Comma, true);
for (byte[] ns_id : ns_ary) {
int ns_int = Bry_.To_int(ns_id);
ns_hash.Add_if_dupe_use_1st(ns_int, ns_int);
}
}
else if (String_.Eq(key, "offsets")) this.bgn = Bry_.To_int(val);
else if (String_.Eq(key, "limits")) this.len = Bry_.To_int(val);
} }
} }

View File

@ -14,7 +14,8 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
public interface Xofulltext_searcher { public interface Xofulltext_searcher {
void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry qry_args, Xofulltext_args_wiki wiki_args); void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry qry_args, Xofulltext_args_wiki wiki_args);
} }

View File

@ -23,7 +23,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher {
private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr(); private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr();
private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval(); private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval();
private final Xofulltext_finder_cbk__highlight cbk_highlight = new Xofulltext_finder_cbk__highlight(); private final Xofulltext_finder_cbk__highlight cbk_highlight = new Xofulltext_finder_cbk__highlight();
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) { public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
// get pages from db // get pages from db
Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn(); Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn();
Db_rdr page_rdr = page_conn.Stmt_sql("SELECT * FROM page WHERE page_namespace IN (0) ORDER BY page_score DESC").Exec_select__rls_auto(); Db_rdr page_rdr = page_conn.Stmt_sql("SELECT * FROM page WHERE page_namespace IN (0) ORDER BY page_score DESC").Exec_select__rls_auto();
@ -58,7 +58,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher {
ui.Send_wiki_update(wiki_domain, found, searched); ui.Send_wiki_update(wiki_domain, found, searched);
// do highlight // do highlight
if (found <= wiki_args.limit) { if (found <= wiki_args.len) {
cbk_highlight.Init(ui, args.qry_id, wiki, page_id, ttl.Full_db(), args.show_all_matches); cbk_highlight.Init(ui, args.qry_id, wiki, page_id, ttl.Full_db(), args.show_all_matches);
ui.Send_page_add(new Xofulltext_searcher_page ui.Send_page_add(new Xofulltext_searcher_page
( args.qry_id ( args.qry_id

View File

@ -21,11 +21,14 @@ import gplx.gflucene.searchers.*;
import gplx.gflucene.highlighters.*; import gplx.gflucene.highlighters.*;
import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
public class Xofulltext_searcher__lucene implements Xofulltext_searcher { public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
private final Gflucene_searcher_mgr searcher = new Gflucene_searcher_mgr(); private final Gflucene_searcher_mgr searcher = new Gflucene_searcher_mgr();
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) { public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
// create list // create lists
Ordered_hash list = Ordered_hash_.New(); Ordered_hash full_list = Ordered_hash_.New();
Ordered_hash temp_list = Ordered_hash_.New();
Ordered_hash page_list = qry.Pages();
// init searcher with wiki // init searcher with wiki
Gflucene_analyzer_data analyzer_data = Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str()); Gflucene_analyzer_data analyzer_data = Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str());
@ -33,41 +36,56 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
( analyzer_data ( analyzer_data
, Xosearch_fulltext_addon.Get_index_dir(wiki).Xto_api())); , Xosearch_fulltext_addon.Get_index_dir(wiki).Xto_api()));
// exec search
Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.search_text), wiki_args.limit);
searcher.Exec(list, searcher_data);
// term
searcher.Term();
// get page_load vars // get page_load vars
Xowd_page_itm tmp_page_row = new Xowd_page_itm(); Xowd_page_itm tmp_page_row = new Xowd_page_itm();
Xowd_page_tbl page_tbl = wiki.Data__core_mgr().Db__core().Tbl__page(); Xowd_page_tbl page_tbl = wiki.Data__core_mgr().Db__core().Tbl__page();
// loop list and load pages // exec search
int len = list.Len(); int needed_bgn = wiki_args.bgn;
for (int i = 0; i < len; i++) { if (needed_bgn < page_list.Len()) needed_bgn = page_list.Len();
Gflucene_doc_data doc_data = (Gflucene_doc_data)list.Get_at(i); int needed_end = wiki_args.bgn + wiki_args.len;
int needed_len = needed_end - needed_bgn;
int found = 0;
Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.search_text), 100);
while (found < needed_len) {
searcher.Exec(temp_list, searcher_data);
// load page int temp_list_len = temp_list.Len();
if (!page_tbl.Select_by_id(tmp_page_row, doc_data.page_id)) { for (int i = 0; i < temp_list_len; i++) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "searcher.lucene: could not find page; page_id=~{0}", doc_data.page_id); Gflucene_doc_data doc_data = (Gflucene_doc_data)temp_list.Get_at(i);
continue; if (!page_list.Has(doc_data.page_id)) {
// load page
if (!page_tbl.Select_by_id(tmp_page_row, doc_data.page_id)) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "searcher.lucene: could not find page; page_id=~{0}", doc_data.page_id);
continue;
}
// make page_ttl
Xoa_ttl page_ttl = wiki.Ttl_parse(tmp_page_row.Ns_id(), tmp_page_row.Ttl_page_db());
doc_data.ns_id = tmp_page_row.Ns_id();
doc_data.page_full_db = page_ttl.Full_db();
if (!wiki_args.ns_hash.Has(doc_data.ns_id)) continue;
// call page doc_data
Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.qry_id, wiki.Domain_bry(), doc_data.page_id, doc_data.page_full_db, args.expand_matches_section);
ui.Send_page_add(page);
full_list.Add(doc_data.page_id, doc_data);
found++;
if (found >= needed_len) break;
}
} }
temp_list.Clear();
// make page_ttl
Xoa_ttl page_ttl = wiki.Ttl_parse(tmp_page_row.Ns_id(), tmp_page_row.Ttl_page_db());
doc_data.ns_id = tmp_page_row.Ns_id();
doc_data.page_full_db = page_ttl.Full_db();
// call page doc_data
Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.qry_id, wiki.Domain_bry(), doc_data.page_id, doc_data.page_full_db, args.expand_matches_section);
ui.Send_page_add(page);
} }
ui.Send_wiki_update(wiki.Domain_bry(), len + List_adp_.Base1, -1);
// term
searcher.Term();
ui.Send_wiki_update(wiki.Domain_bry(), page_list.Len(), -1);
// create highlighter thread and launch it // create highlighter thread and launch it
Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, list); Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, full_list);
gplx.core.threads.Thread_adp_.Start_by_key("highlighter", Cancelable_.Never, highlighter_mgr, Xofulltext_highlighter_mgr.Invk__highlight); gplx.core.threads.Thread_adp_.Start_by_key("highlighter", Cancelable_.Never, highlighter_mgr, Xofulltext_highlighter_mgr.Invk__highlight);
} }
} }

View File

@ -77,34 +77,49 @@ class Xofulltext_searcher_svc implements Gfo_invk {
// try to get from cache // try to get from cache
byte[] qry_key = args.Qry_key(wiki_domain, wiki_args.ns_ids); byte[] qry_key = args.Qry_key(wiki_domain, wiki_args.ns_ids);
int qry_id = cache_mgr.Ids__get_or_neg1(qry_key); int qry_id = cache_mgr.Ids__get_or_neg1(qry_key);
Xofulltext_cache_qry qry = null;
if (qry_id == -1) { if (qry_id == -1) {
qry_id = cache_mgr.Ids__next(); qry_id = cache_mgr.Ids__next();
cache_mgr.Add(qry_id, qry_key); cache_mgr.Add(qry_id, qry_key);
qry = cache_mgr.Get_or_null(qry_id);
} }
else { else {
Xofulltext_cache_page[] cached_pages = cache_mgr.Get_pages_rng(qry_id, wiki_args.offset, wiki_args.limit); qry = cache_mgr.Get_or_null(qry_id);
if (cached_pages != null) { if (qry != null) {
for (Xofulltext_cache_page page : cached_pages) { boolean all_shown = Display_cached_qry(args, ui, wiki, qry, qry_id, wiki_args);
ui.Send_page_add(new Xofulltext_searcher_page(qry_id, wiki.Domain_bry(), page.Page_id(), page.Page_ttl(), args.expand_matches_section)); if (all_shown || qry.done)
int len = page.Lines().Len(); return;
for (int i = 0; i < len; i++) {
Xofulltext_cache_line line = (Xofulltext_cache_line)page.Lines().Get_at(i);
ui.Send_line_add(args.show_all_matches, qry_id, wiki.Domain_bry(), page.Page_id(), line.Line_seq(), line.Line_html());
}
}
return;
} }
} }
args.qry_id = qry_id; args.qry_id = qry_id;
// do search // do search
Xofulltext_searcher searcher = Get_searcher(wiki); Xofulltext_searcher searcher = Get_searcher(wiki);
searcher.Search(ui, wiki, args, wiki_args); searcher.Search(ui, wiki, qry, args, wiki_args);
} }
catch (Exception exc) { catch (Exception exc) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "failed to search_wiki; err=~{0}", Err_.Message_gplx_log(exc)); Gfo_usr_dlg_.Instance.Warn_many("", "", "failed to search_wiki; err=~{0}", Err_.Message_gplx_log(exc));
} }
} }
private boolean Display_cached_qry(Xofulltext_args_qry args, Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, int qry_id, Xofulltext_args_wiki wiki_args) {
int bgn = wiki_args.bgn;
int len = wiki_args.len;
int end = bgn + len;
int max = qry.Pages().Len();
for (int i = bgn; i < end; i++) {
if (i >= max) return false; // more pages requested than available
Xofulltext_cache_page page = (Xofulltext_cache_page)qry.Pages().Get_at(i);
ui.Send_page_add(new Xofulltext_searcher_page(qry_id, wiki.Domain_bry(), page.Page_id(), page.Page_ttl(), args.expand_matches_section));
// loop lines
int lines_len = page.Lines().Len();
for (int j = 0; j < lines_len; j++) {
Xofulltext_cache_line line = (Xofulltext_cache_line)page.Lines().Get_at(j);
ui.Send_line_add(args.show_all_matches, qry_id, wiki.Domain_bry(), page.Page_id(), line.Line_seq(), line.Line_html());
}
}
return true;
}
public void Get_lines_rest(Json_nde args) { public void Get_lines_rest(Json_nde args) {
Get_lines_rest(args.Get_as_int("qry_id"), args.Get_as_bry("wiki"), args.Get_as_int("page_id"), args.Get_as_str("page_guid")); Get_lines_rest(args.Get_as_int("qry_id"), args.Get_as_bry("wiki"), args.Get_as_int("page_id"), args.Get_as_str("page_guid"));

View File

@ -82,9 +82,10 @@ public class Gflucene_searcher_mgr {
String docTitle = d.get("title"); String docTitle = d.get("title");
Gflucene_doc_data doc = (Gflucene_doc_data)list.Get_by(docTitle); Gflucene_doc_data doc = (Gflucene_doc_data)list.Get_by(docTitle);
if (doc == null) { if (doc == null) {
doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), 0, docTitle, ""); int doc_id = Integer.parseInt(d.get("page_id"));
doc = new Gflucene_doc_data(doc_id, 0, docTitle, "");
doc.lucene_score = hits[i].score; doc.lucene_score = hits[i].score;
list.Add(docTitle, doc); list.Add(doc_id, doc);
} }
// Tfds.Write(doc.lucene_score, doc.title); // Tfds.Write(doc.lucene_score, doc.title);
} }