mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Full-text search: Implement basic paging
This commit is contained in:
parent
7924e26522
commit
49924110f4
@ -55,6 +55,9 @@ public class Xofulltext_cache_mgr {
|
|||||||
Xofulltext_cache_line line = new Xofulltext_cache_line(line_seq, line_html);
|
Xofulltext_cache_line line = new Xofulltext_cache_line(line_seq, line_html);
|
||||||
page.Lines().Add(line);
|
page.Lines().Add(line);
|
||||||
}
|
}
|
||||||
|
public Xofulltext_cache_qry Get_or_null(int qry_id) {
|
||||||
|
return (Xofulltext_cache_qry)qry_hash.Get_by(qry_id);
|
||||||
|
}
|
||||||
public Xofulltext_cache_page[] Get_pages_rng(int qry_id, int page_seq_bgn, int page_seq_end) {
|
public Xofulltext_cache_page[] Get_pages_rng(int qry_id, int page_seq_bgn, int page_seq_end) {
|
||||||
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(qry_id);
|
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(qry_id);
|
||||||
if (qry == null) return null;
|
if (qry == null) return null;
|
||||||
|
@ -22,4 +22,5 @@ public class Xofulltext_cache_qry {
|
|||||||
public int Id() {return id;} private final int id;
|
public int Id() {return id;} private final int id;
|
||||||
public byte[] Text() {return text;} private final byte[] text;
|
public byte[] Text() {return text;} private final byte[] text;
|
||||||
public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New();
|
public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New();
|
||||||
|
public boolean done;
|
||||||
}
|
}
|
||||||
|
@ -1,23 +0,0 @@
|
|||||||
/*
|
|
||||||
XOWA: the XOWA Offline Wiki Application
|
|
||||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
|
||||||
|
|
||||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
|
||||||
or alternatively under the terms of the Apache License Version 2.0.
|
|
||||||
|
|
||||||
You may use XOWA according to either of these licenses as is most appropriate
|
|
||||||
for your project on a case-by-case basis.
|
|
||||||
|
|
||||||
The terms of each license can be found in the source code repository:
|
|
||||||
|
|
||||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|
||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|
||||||
*/
|
|
||||||
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
|
||||||
public class Xofulltext_cache_wiki {
|
|
||||||
public Xofulltext_cache_wiki(byte[] wiki) {
|
|
||||||
this.wiki = wiki;
|
|
||||||
}
|
|
||||||
public byte[] Wiki() {return wiki;} private final byte[] wiki;
|
|
||||||
public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New();
|
|
||||||
}
|
|
@ -17,15 +17,23 @@ package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import g
|
|||||||
public class Xofulltext_args_wiki {
|
public class Xofulltext_args_wiki {
|
||||||
public byte[] wiki;
|
public byte[] wiki;
|
||||||
public byte[] ns_ids;
|
public byte[] ns_ids;
|
||||||
public int offset;
|
public Hash_adp ns_hash = Hash_adp_.New();
|
||||||
public int limit;
|
public int bgn;
|
||||||
|
public int len;
|
||||||
|
|
||||||
public Xofulltext_args_wiki(byte[] wiki) {
|
public Xofulltext_args_wiki(byte[] wiki) {
|
||||||
this.wiki = wiki;
|
this.wiki = wiki;
|
||||||
}
|
}
|
||||||
public void Init_by_json(String key, byte[] val) {
|
public void Init_by_json(String key, byte[] val) {
|
||||||
if (String_.Eq(key, "ns_ids")) this.ns_ids = val;
|
if (String_.Eq(key, "ns_ids")) {
|
||||||
else if (String_.Eq(key, "offsets")) this.offset = Bry_.To_int(val);
|
this.ns_ids = val;
|
||||||
else if (String_.Eq(key, "limits")) this.limit = Bry_.To_int(val);
|
byte[][] ns_ary = Bry_split_.Split(ns_ids, Byte_ascii.Comma, true);
|
||||||
|
for (byte[] ns_id : ns_ary) {
|
||||||
|
int ns_int = Bry_.To_int(ns_id);
|
||||||
|
ns_hash.Add_if_dupe_use_1st(ns_int, ns_int);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (String_.Eq(key, "offsets")) this.bgn = Bry_.To_int(val);
|
||||||
|
else if (String_.Eq(key, "limits")) this.len = Bry_.To_int(val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,8 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
*/
|
*/
|
||||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||||
|
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
|
||||||
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
|
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
|
||||||
public interface Xofulltext_searcher {
|
public interface Xofulltext_searcher {
|
||||||
void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry qry_args, Xofulltext_args_wiki wiki_args);
|
void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry qry_args, Xofulltext_args_wiki wiki_args);
|
||||||
}
|
}
|
||||||
|
@ -23,7 +23,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher {
|
|||||||
private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr();
|
private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr();
|
||||||
private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval();
|
private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval();
|
||||||
private final Xofulltext_finder_cbk__highlight cbk_highlight = new Xofulltext_finder_cbk__highlight();
|
private final Xofulltext_finder_cbk__highlight cbk_highlight = new Xofulltext_finder_cbk__highlight();
|
||||||
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
|
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
|
||||||
// get pages from db
|
// get pages from db
|
||||||
Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn();
|
Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn();
|
||||||
Db_rdr page_rdr = page_conn.Stmt_sql("SELECT * FROM page WHERE page_namespace IN (0) ORDER BY page_score DESC").Exec_select__rls_auto();
|
Db_rdr page_rdr = page_conn.Stmt_sql("SELECT * FROM page WHERE page_namespace IN (0) ORDER BY page_score DESC").Exec_select__rls_auto();
|
||||||
@ -58,7 +58,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher {
|
|||||||
ui.Send_wiki_update(wiki_domain, found, searched);
|
ui.Send_wiki_update(wiki_domain, found, searched);
|
||||||
|
|
||||||
// do highlight
|
// do highlight
|
||||||
if (found <= wiki_args.limit) {
|
if (found <= wiki_args.len) {
|
||||||
cbk_highlight.Init(ui, args.qry_id, wiki, page_id, ttl.Full_db(), args.show_all_matches);
|
cbk_highlight.Init(ui, args.qry_id, wiki, page_id, ttl.Full_db(), args.show_all_matches);
|
||||||
ui.Send_page_add(new Xofulltext_searcher_page
|
ui.Send_page_add(new Xofulltext_searcher_page
|
||||||
( args.qry_id
|
( args.qry_id
|
||||||
|
@ -21,11 +21,14 @@ import gplx.gflucene.searchers.*;
|
|||||||
import gplx.gflucene.highlighters.*;
|
import gplx.gflucene.highlighters.*;
|
||||||
import gplx.xowa.wikis.data.tbls.*;
|
import gplx.xowa.wikis.data.tbls.*;
|
||||||
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
|
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
|
||||||
|
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
|
||||||
public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
|
public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
|
||||||
private final Gflucene_searcher_mgr searcher = new Gflucene_searcher_mgr();
|
private final Gflucene_searcher_mgr searcher = new Gflucene_searcher_mgr();
|
||||||
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
|
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, Xofulltext_args_qry args, Xofulltext_args_wiki wiki_args) {
|
||||||
// create list
|
// create lists
|
||||||
Ordered_hash list = Ordered_hash_.New();
|
Ordered_hash full_list = Ordered_hash_.New();
|
||||||
|
Ordered_hash temp_list = Ordered_hash_.New();
|
||||||
|
Ordered_hash page_list = qry.Pages();
|
||||||
|
|
||||||
// init searcher with wiki
|
// init searcher with wiki
|
||||||
Gflucene_analyzer_data analyzer_data = Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str());
|
Gflucene_analyzer_data analyzer_data = Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str());
|
||||||
@ -33,41 +36,56 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
|
|||||||
( analyzer_data
|
( analyzer_data
|
||||||
, Xosearch_fulltext_addon.Get_index_dir(wiki).Xto_api()));
|
, Xosearch_fulltext_addon.Get_index_dir(wiki).Xto_api()));
|
||||||
|
|
||||||
// exec search
|
|
||||||
Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.search_text), wiki_args.limit);
|
|
||||||
searcher.Exec(list, searcher_data);
|
|
||||||
|
|
||||||
// term
|
|
||||||
searcher.Term();
|
|
||||||
|
|
||||||
// get page_load vars
|
// get page_load vars
|
||||||
Xowd_page_itm tmp_page_row = new Xowd_page_itm();
|
Xowd_page_itm tmp_page_row = new Xowd_page_itm();
|
||||||
Xowd_page_tbl page_tbl = wiki.Data__core_mgr().Db__core().Tbl__page();
|
Xowd_page_tbl page_tbl = wiki.Data__core_mgr().Db__core().Tbl__page();
|
||||||
|
|
||||||
// loop list and load pages
|
// exec search
|
||||||
int len = list.Len();
|
int needed_bgn = wiki_args.bgn;
|
||||||
for (int i = 0; i < len; i++) {
|
if (needed_bgn < page_list.Len()) needed_bgn = page_list.Len();
|
||||||
Gflucene_doc_data doc_data = (Gflucene_doc_data)list.Get_at(i);
|
int needed_end = wiki_args.bgn + wiki_args.len;
|
||||||
|
int needed_len = needed_end - needed_bgn;
|
||||||
|
int found = 0;
|
||||||
|
Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.search_text), 100);
|
||||||
|
while (found < needed_len) {
|
||||||
|
searcher.Exec(temp_list, searcher_data);
|
||||||
|
|
||||||
// load page
|
int temp_list_len = temp_list.Len();
|
||||||
if (!page_tbl.Select_by_id(tmp_page_row, doc_data.page_id)) {
|
for (int i = 0; i < temp_list_len; i++) {
|
||||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "searcher.lucene: could not find page; page_id=~{0}", doc_data.page_id);
|
Gflucene_doc_data doc_data = (Gflucene_doc_data)temp_list.Get_at(i);
|
||||||
continue;
|
if (!page_list.Has(doc_data.page_id)) {
|
||||||
|
// load page
|
||||||
|
if (!page_tbl.Select_by_id(tmp_page_row, doc_data.page_id)) {
|
||||||
|
Gfo_usr_dlg_.Instance.Warn_many("", "", "searcher.lucene: could not find page; page_id=~{0}", doc_data.page_id);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// make page_ttl
|
||||||
|
Xoa_ttl page_ttl = wiki.Ttl_parse(tmp_page_row.Ns_id(), tmp_page_row.Ttl_page_db());
|
||||||
|
doc_data.ns_id = tmp_page_row.Ns_id();
|
||||||
|
doc_data.page_full_db = page_ttl.Full_db();
|
||||||
|
|
||||||
|
if (!wiki_args.ns_hash.Has(doc_data.ns_id)) continue;
|
||||||
|
|
||||||
|
// call page doc_data
|
||||||
|
Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.qry_id, wiki.Domain_bry(), doc_data.page_id, doc_data.page_full_db, args.expand_matches_section);
|
||||||
|
ui.Send_page_add(page);
|
||||||
|
|
||||||
|
full_list.Add(doc_data.page_id, doc_data);
|
||||||
|
found++;
|
||||||
|
if (found >= needed_len) break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
temp_list.Clear();
|
||||||
// make page_ttl
|
|
||||||
Xoa_ttl page_ttl = wiki.Ttl_parse(tmp_page_row.Ns_id(), tmp_page_row.Ttl_page_db());
|
|
||||||
doc_data.ns_id = tmp_page_row.Ns_id();
|
|
||||||
doc_data.page_full_db = page_ttl.Full_db();
|
|
||||||
|
|
||||||
// call page doc_data
|
|
||||||
Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.qry_id, wiki.Domain_bry(), doc_data.page_id, doc_data.page_full_db, args.expand_matches_section);
|
|
||||||
ui.Send_page_add(page);
|
|
||||||
}
|
}
|
||||||
ui.Send_wiki_update(wiki.Domain_bry(), len + List_adp_.Base1, -1);
|
|
||||||
|
// term
|
||||||
|
searcher.Term();
|
||||||
|
|
||||||
|
ui.Send_wiki_update(wiki.Domain_bry(), page_list.Len(), -1);
|
||||||
|
|
||||||
// create highlighter thread and launch it
|
// create highlighter thread and launch it
|
||||||
Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, list);
|
Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, full_list);
|
||||||
gplx.core.threads.Thread_adp_.Start_by_key("highlighter", Cancelable_.Never, highlighter_mgr, Xofulltext_highlighter_mgr.Invk__highlight);
|
gplx.core.threads.Thread_adp_.Start_by_key("highlighter", Cancelable_.Never, highlighter_mgr, Xofulltext_highlighter_mgr.Invk__highlight);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -77,34 +77,49 @@ class Xofulltext_searcher_svc implements Gfo_invk {
|
|||||||
// try to get from cache
|
// try to get from cache
|
||||||
byte[] qry_key = args.Qry_key(wiki_domain, wiki_args.ns_ids);
|
byte[] qry_key = args.Qry_key(wiki_domain, wiki_args.ns_ids);
|
||||||
int qry_id = cache_mgr.Ids__get_or_neg1(qry_key);
|
int qry_id = cache_mgr.Ids__get_or_neg1(qry_key);
|
||||||
|
Xofulltext_cache_qry qry = null;
|
||||||
if (qry_id == -1) {
|
if (qry_id == -1) {
|
||||||
qry_id = cache_mgr.Ids__next();
|
qry_id = cache_mgr.Ids__next();
|
||||||
cache_mgr.Add(qry_id, qry_key);
|
cache_mgr.Add(qry_id, qry_key);
|
||||||
|
qry = cache_mgr.Get_or_null(qry_id);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
Xofulltext_cache_page[] cached_pages = cache_mgr.Get_pages_rng(qry_id, wiki_args.offset, wiki_args.limit);
|
qry = cache_mgr.Get_or_null(qry_id);
|
||||||
if (cached_pages != null) {
|
if (qry != null) {
|
||||||
for (Xofulltext_cache_page page : cached_pages) {
|
boolean all_shown = Display_cached_qry(args, ui, wiki, qry, qry_id, wiki_args);
|
||||||
ui.Send_page_add(new Xofulltext_searcher_page(qry_id, wiki.Domain_bry(), page.Page_id(), page.Page_ttl(), args.expand_matches_section));
|
if (all_shown || qry.done)
|
||||||
int len = page.Lines().Len();
|
return;
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
Xofulltext_cache_line line = (Xofulltext_cache_line)page.Lines().Get_at(i);
|
|
||||||
ui.Send_line_add(args.show_all_matches, qry_id, wiki.Domain_bry(), page.Page_id(), line.Line_seq(), line.Line_html());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
args.qry_id = qry_id;
|
args.qry_id = qry_id;
|
||||||
|
|
||||||
// do search
|
// do search
|
||||||
Xofulltext_searcher searcher = Get_searcher(wiki);
|
Xofulltext_searcher searcher = Get_searcher(wiki);
|
||||||
searcher.Search(ui, wiki, args, wiki_args);
|
searcher.Search(ui, wiki, qry, args, wiki_args);
|
||||||
}
|
}
|
||||||
catch (Exception exc) {
|
catch (Exception exc) {
|
||||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "failed to search_wiki; err=~{0}", Err_.Message_gplx_log(exc));
|
Gfo_usr_dlg_.Instance.Warn_many("", "", "failed to search_wiki; err=~{0}", Err_.Message_gplx_log(exc));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
private boolean Display_cached_qry(Xofulltext_args_qry args, Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_cache_qry qry, int qry_id, Xofulltext_args_wiki wiki_args) {
|
||||||
|
int bgn = wiki_args.bgn;
|
||||||
|
int len = wiki_args.len;
|
||||||
|
int end = bgn + len;
|
||||||
|
int max = qry.Pages().Len();
|
||||||
|
for (int i = bgn; i < end; i++) {
|
||||||
|
if (i >= max) return false; // more pages requested than available
|
||||||
|
Xofulltext_cache_page page = (Xofulltext_cache_page)qry.Pages().Get_at(i);
|
||||||
|
ui.Send_page_add(new Xofulltext_searcher_page(qry_id, wiki.Domain_bry(), page.Page_id(), page.Page_ttl(), args.expand_matches_section));
|
||||||
|
|
||||||
|
// loop lines
|
||||||
|
int lines_len = page.Lines().Len();
|
||||||
|
for (int j = 0; j < lines_len; j++) {
|
||||||
|
Xofulltext_cache_line line = (Xofulltext_cache_line)page.Lines().Get_at(j);
|
||||||
|
ui.Send_line_add(args.show_all_matches, qry_id, wiki.Domain_bry(), page.Page_id(), line.Line_seq(), line.Line_html());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
public void Get_lines_rest(Json_nde args) {
|
public void Get_lines_rest(Json_nde args) {
|
||||||
Get_lines_rest(args.Get_as_int("qry_id"), args.Get_as_bry("wiki"), args.Get_as_int("page_id"), args.Get_as_str("page_guid"));
|
Get_lines_rest(args.Get_as_int("qry_id"), args.Get_as_bry("wiki"), args.Get_as_int("page_id"), args.Get_as_str("page_guid"));
|
||||||
|
@ -82,9 +82,10 @@ public class Gflucene_searcher_mgr {
|
|||||||
String docTitle = d.get("title");
|
String docTitle = d.get("title");
|
||||||
Gflucene_doc_data doc = (Gflucene_doc_data)list.Get_by(docTitle);
|
Gflucene_doc_data doc = (Gflucene_doc_data)list.Get_by(docTitle);
|
||||||
if (doc == null) {
|
if (doc == null) {
|
||||||
doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), 0, docTitle, "");
|
int doc_id = Integer.parseInt(d.get("page_id"));
|
||||||
|
doc = new Gflucene_doc_data(doc_id, 0, docTitle, "");
|
||||||
doc.lucene_score = hits[i].score;
|
doc.lucene_score = hits[i].score;
|
||||||
list.Add(docTitle, doc);
|
list.Add(doc_id, doc);
|
||||||
}
|
}
|
||||||
// Tfds.Write(doc.lucene_score, doc.title);
|
// Tfds.Write(doc.lucene_score, doc.title);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user