Full-text search: Add 'expand matches' and 'show more'

pull/620/head
gnosygnu 8 years ago
parent d5d3c68350
commit 06acdd7335

@ -0,0 +1,24 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.searchs.fulltexts.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
public class Xosearch_cache_line {
public Xosearch_cache_line(int line_seq, byte[] line_html) {
this.line_seq = line_seq;
this.line_html = line_html;
}
public int Line_seq() {return line_seq;} private final int line_seq;
public byte[] Line_html() {return line_html;} private final byte[] line_html;
}

@ -0,0 +1,73 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.searchs.fulltexts.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
public class Xosearch_cache_mgr {
private final Ordered_hash qry_hash = Ordered_hash_.New();
public int Next_qry_id() {return next_qry_id++;} private int next_qry_id;
public void Clear() {
qry_hash.Clear();
}
public void Add(int query_id, byte[] query, byte[] wiki_bry, int page_seq, int page_id, int line_seq, byte[] line_html) {
// get qry
Xosearch_cache_qry qry = (Xosearch_cache_qry)qry_hash.Get_by(query_id);
if (qry == null) {
qry = new Xosearch_cache_qry(query_id, query);
qry_hash.Add(query_id, qry);
}
// get wiki
Xosearch_cache_wiki wiki = (Xosearch_cache_wiki)qry.Wikis().Get_by(wiki_bry);
if (wiki == null) {
wiki = new Xosearch_cache_wiki(wiki_bry);
qry.Wikis().Add(wiki_bry, wiki);
}
// get page
Xosearch_cache_page page = (Xosearch_cache_page)wiki.Pages().Get_by(page_id);
if (page == null) {
page = new Xosearch_cache_page(page_id, page_seq);
wiki.Pages().Add(page_id, page);
}
// add line
Xosearch_cache_line line = new Xosearch_cache_line(line_seq, line_html);
page.Lines().Add(line);
}
public Object Get_pages_rng(int qry_id, byte[] wiki, int page_seq_bgn, int page_seq_end) {
// List_adp list = List_adp_.New();
// for (int i = page_seq_bgn; i < page_seq_end; i++) {
// Xosearch_cache_qry page = (Xosearch_cache_qry)qry_hash.Get_at(i);
// list.Add(page);
// }
// return list.To_ary_and_clear(typeof(Xosearch_cache_itm));
return null;
}
public Xosearch_cache_line[] Get_lines_rest(int qry_id, byte[] wiki_bry, int page_id) {
// get page
Xosearch_cache_qry qry = (Xosearch_cache_qry)qry_hash.Get_by(qry_id);
Xosearch_cache_wiki wiki = (Xosearch_cache_wiki)qry.Wikis().Get_by(wiki_bry);
Xosearch_cache_page page = (Xosearch_cache_page)wiki.Pages().Get_by(page_id);
// loop lines from 1 to n
List_adp list = List_adp_.New();
int lines_len = page.Lines().Len();
for (int i = 1; i < lines_len; i++) {
Xosearch_cache_line line = (Xosearch_cache_line)page.Lines().Get_at(i);
list.Add(line);
}
return (Xosearch_cache_line[])list.To_ary_and_clear(Xosearch_cache_line.class);
}
}

@ -0,0 +1,25 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.searchs.fulltexts.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
public class Xosearch_cache_page {
public Xosearch_cache_page(int page_id, int page_seq) {
this.page_id = page_id;
this.page_seq = page_seq;
}
public int Page_id() {return page_id;} private final int page_id;
public int Page_seq() {return page_seq;} private final int page_seq;
public List_adp Lines() {return lines;} private final List_adp lines = List_adp_.New();
}

@ -0,0 +1,25 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.searchs.fulltexts.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
public class Xosearch_cache_qry {
public Xosearch_cache_qry(int id, byte[] qry) {
this.id = id;
this.qry = qry;
}
public int Id() {return id;} private final int id;
public byte[] Qry() {return qry;} private final byte[] qry;
public Hash_adp_bry Wikis() {return wikis;} private final Hash_adp_bry wikis = Hash_adp_bry.cs();
}

@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.searchs.fulltexts.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
public class Xosearch_cache_wiki {
public Xosearch_cache_wiki(byte[] wiki) {
this.wiki = wiki;
}
public byte[] Wiki() {return wiki;} private final byte[] wiki;
public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New();
}

@ -26,14 +26,16 @@ public class Xosearch_fulltext_bridge implements Bridge_cmd_itm {
Json_nde args = data.Get_kv(Bridge_cmd_mgr.Msg__args).Val_as_nde();
switch (proc_id) {
case Proc__search: svc.Search(args); break;
case Proc__get_lines_rest: svc.Get_lines_rest(args); break;
default: throw Err_.new_unhandled_default(proc_id);
}
return "";
}
private static final byte Proc__search = 0;
private static final byte Proc__search = 0, Proc__get_lines_rest = 1;
private static final Hash_adp_bry proc_hash = Hash_adp_bry.cs()
.Add_str_byte("search" , Proc__search)
.Add_str_byte("get_lines_rest" , Proc__get_lines_rest)
;
public byte[] Key() {return BRIDGE_KEY;} public static final byte[] BRIDGE_KEY = Bry_.new_a7("xowa.wiki.search.fulltext");

@ -19,6 +19,7 @@ import gplx.langs.jsons.*;
import gplx.dbs.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.addons.wikis.searchs.fulltexts.specials.*;
import gplx.xowa.addons.wikis.searchs.fulltexts.finders.*;
import gplx.xowa.addons.wikis.searchs.fulltexts.caches.*;
import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
import gplx.xowa.addons.wikis.searchs.searchers.crts.visitors.*;
class Xosearch_fulltext_svc implements Gfo_invk {
@ -27,11 +28,13 @@ class Xosearch_fulltext_svc implements Gfo_invk {
private final Xosearch_finder_mgr finder = new Xosearch_finder_mgr();
private final Xosearch_finder_cbk__eval cbk_eval = new Xosearch_finder_cbk__eval();
private final Xosearch_finder_cbk__highlight cbk_highlight;
private final Xosearch_cache_mgr cache_mgr = new Xosearch_cache_mgr();
public Xosearch_fulltext_svc(Xoa_app app) {
this.app = app;
cbk_highlight = new Xosearch_finder_cbk__highlight(app, cbk_trg);
cbk_highlight = new Xosearch_finder_cbk__highlight(app, cbk_trg, cache_mgr);
}
public void Search(Json_nde args) {
cache_mgr.Clear();
gplx.core.threads.Thread_adp_.Start_by_val("search", Cancelable_.Never, this, Invk__search, Xosearch_search_args.New_by_json(args));
}
private void Search(Xosearch_search_args args) {
@ -39,14 +42,28 @@ class Xosearch_fulltext_svc implements Gfo_invk {
byte[][] wiki_domains = Bry_split_.Split(args.wikis, Byte_ascii.Pipe_bry);
for (byte[] wiki_domain : wiki_domains) {
Xow_wiki wiki = app.Wiki_mgri().Get_by_or_make_init_y(wiki_domain);
Search_wiki(wiki, args.query, args.case_match, args.auto_wildcard_bgn, args.auto_wildcard_end, args.max_pages_per_wiki, args.max_snips_per_page);
Search_wiki(wiki, args);
}
} catch (Exception exc) {
if (app.Tid_is_edit())
((Xoae_app)app).Gui_mgr().Kit().Ask_ok("", "", Err_.Message_gplx_full(exc));
}
}
private void Search_wiki(Xow_wiki wiki, byte[] query, boolean case_match, boolean auto_wildcard_bgn, boolean auto_wildcard_end, int max_pages_per_wiki, int max_snips_per_page) {
public void Get_lines_rest(Json_nde args) {
Get_lines_rest(args.Get_as_int("qry_id"), args.Get_as_bry("wiki"), args.Get_as_int("page_id"));
}
private void Get_lines_rest(int qry_id, byte[] wiki_bry, int page_id) {
Xosearch_cache_line[] lines = cache_mgr.Get_lines_rest(qry_id, wiki_bry, page_id);
for (Xosearch_cache_line line : lines) {
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki_bry)
.Add_int("page_id", page_id)
.Add_int("line", line.Line_seq() + 1)
.Add_bry("html", line.Line_html())
);
}
}
private void Search_wiki(Xow_wiki wiki, Xosearch_search_args args) {
byte[] wiki_domain = wiki.Domain_bry();
Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn();
Db_rdr page_rdr = page_conn.Stmt_sql("SELECT * FROM page WHERE page_namespace IN (0) ORDER BY page_score DESC").Exec_select__rls_auto();
@ -56,7 +73,8 @@ class Xosearch_fulltext_svc implements Gfo_invk {
.Add_long("page_count", 0)
);
finder.Init(query, case_match, auto_wildcard_bgn, auto_wildcard_end, Byte_ascii.Star, Byte_ascii.Dash);
finder.Init(args.query, args.case_match, args.auto_wildcard_bgn, args.auto_wildcard_end, Byte_ascii.Star, Byte_ascii.Dash);
int query_id = cache_mgr.Next_qry_id();
try {
int found = 0;
int searched = 0;
@ -78,12 +96,14 @@ class Xosearch_fulltext_svc implements Gfo_invk {
Notify_pages_found_and_searched(wiki_domain, found, searched);
// do highlight
if (found <= max_pages_per_wiki) {
cbk_highlight.Init(wiki, page_id, ttl.Full_db(), max_snips_per_page);
if (found <= args.max_pages_per_wiki) {
cbk_highlight.Init(args.query, query_id, wiki, page_id, ttl.Full_db(), args.show_all_matches);
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__page__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_int("query_id", query_id)
.Add_bry("wiki", wiki_domain)
.Add_int("page_id", page_id)
.Add_bry("page_ttl", ttl.Full_db())
.Add_bool("expand_matches_section", args.expand_matches_section)
);
finder.Match(text_mcase, 0, text_mcase.length, cbk_highlight);
@ -116,6 +136,8 @@ class Xosearch_search_args {
public boolean case_match;
public boolean auto_wildcard_bgn;
public boolean auto_wildcard_end;
public boolean expand_matches_section;
public boolean show_all_matches;
public int max_pages_per_wiki;
public int max_snips_per_page;
public byte[] wikis;
@ -125,6 +147,8 @@ class Xosearch_search_args {
rv.case_match = args.Get_as_bool_or("case_match", false);
rv.auto_wildcard_bgn = args.Get_as_bool_or("auto_wildcard_bgn", false);
rv.auto_wildcard_end = args.Get_as_bool_or("auto_wildcard_end", false);
rv.expand_matches_section = args.Get_as_bool_or("expand_matches_section", false);
rv.show_all_matches = args.Get_as_bool_or("show_all_matches", false);
rv.max_pages_per_wiki = args.Get_as_int_or("max_pages_per_wiki", 25);
rv.max_snips_per_page = args.Get_as_int_or("max_snips_per_page", 10);
rv.wikis = args.Get_as_bry("wikis");

@ -15,29 +15,35 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
import gplx.xowa.guis.cbks.*;
import gplx.xowa.addons.wikis.searchs.fulltexts.caches.*;
public class Xosearch_finder_cbk__highlight implements Xosearch_finder_cbk {
private final Xog_cbk_trg cbk_trg;
private final Xoa_app app;
private final Xosearch_cache_mgr cache_mgr;
private Xow_wiki wiki;
private byte[] qry;
private int qry_id;
private int page_id;
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
public int found;
private int max_snips_per_page;
public Xosearch_finder_cbk__highlight(Xoa_app app, Xog_cbk_trg cbk_trg) {
private boolean show_all_matches;
public Xosearch_finder_cbk__highlight(Xoa_app app, Xog_cbk_trg cbk_trg, Xosearch_cache_mgr cache_mgr) {
this.app = app;
this.cbk_trg = cbk_trg;
this.cache_mgr = cache_mgr;
}
public byte[] Page_ttl() {return page_ttl;} private byte[] page_ttl;
public void Init(Xow_wiki wiki, int page_id, byte[] page_ttl, int max_snips_per_page) {
public void Init(byte[] qry, int qry_id, Xow_wiki wiki, int page_id, byte[] page_ttl, boolean show_all_matches) {
this.qry = qry;
this.qry_id = qry_id;
this.wiki = wiki;
this.page_id = page_id;
this.page_ttl= page_ttl;
this.max_snips_per_page = max_snips_per_page;
this.show_all_matches = show_all_matches;
found = 0;
}
public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xosearch_word_node term) {
++found;
if (found <= max_snips_per_page) {
// if (found < max_snips_per_page) {
// get snip bounds by finding flanking 50 chars and then expanding to word-bounds
int snip_bgn = hook_bgn - 50;
if (snip_bgn < 0)
@ -63,17 +69,23 @@ public class Xosearch_finder_cbk__highlight implements Xosearch_finder_cbk {
Add_snip(tmp_bfr, src, hook_end, snip_end);
// send notification
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki.Domain_bry())
.Add_int("page_id", page_id)
.Add_int("line", found)
.Add_bry("html", tmp_bfr.To_bry_and_clear())
);
}
byte[] line_html = tmp_bfr.To_bry_and_clear();
if (found == 0 || show_all_matches) {
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki.Domain_bry())
.Add_int("page_id", page_id)
.Add_int("line", found + 1)
.Add_bry("html", line_html)
);
}
cache_mgr.Add(qry_id, qry, wiki.Domain_bry(), -1, page_id, found, line_html);
// }
found++;
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__page__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki.Domain_bry())
.Add_int("page_id", page_id)
.Add_int("found", found)
.Add_bool("show_all_matches", show_all_matches)
);
}
private static final byte[] Angle_bgn_escaped = Bry_.new_a7("&lt;");

Loading…
Cancel
Save