1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Full-text search: Change to lucene 6.4.2

This commit is contained in:
gnosygnu 2017-03-15 19:51:58 -04:00
parent 8f8e414c80
commit 8c31c8fd62
14 changed files with 136 additions and 149 deletions

View File

@ -36,4 +36,9 @@ public class Xosearch_fulltext_addon implements Xoax_addon_itm, Xoax_addon_itm__
} }
public String Addon__key() {return ADDON__KEY;} private static final String ADDON__KEY = "xowa.wiki.fulltext"; public String Addon__key() {return ADDON__KEY;} private static final String ADDON__KEY = "xowa.wiki.fulltext";
public static Io_url Get_index_dir(Xow_wiki wiki) {return Get_index_dir(wiki.Fsys_mgr().Root_dir());}
public static Io_url Get_index_dir(Io_url wiki_dir) {
return wiki_dir.GenSubDir_nest("data", "search", "java8-v1");
}
} }

View File

@ -19,12 +19,14 @@ import gplx.gflucene.indexers.*;
public class Xofulltext_indexer_wkr { public class Xofulltext_indexer_wkr {
private final Gflucene_indexer_mgr index_wtr = new Gflucene_indexer_mgr(); private final Gflucene_indexer_mgr index_wtr = new Gflucene_indexer_mgr();
public void Init(Xow_wiki wiki) { public void Init(Xow_wiki wiki) {
Io_url search_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search"); // delete existing dir
Io_mgr.Instance.DeleteDirDeep(search_dir); Io_url index_dir = Xosearch_fulltext_addon.Get_index_dir(wiki);
; Io_mgr.Instance.DeleteDirDeep(index_dir);
// init index_dir
index_wtr.Init(new Gflucene_index_data index_wtr.Init(new Gflucene_index_data
( Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str()) ( Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str())
, search_dir.Xto_api())); , index_dir.Xto_api()));
} }
public void Index(Xoae_page wpg) { public void Index(Xoae_page wpg) {
// TODO: skip if not main_ns // TODO: skip if not main_ns

View File

@ -48,7 +48,7 @@ class Xofulltext_indexer_svc implements Gfo_invk {
} }
wiki.Init_by_wiki(); wiki.Init_by_wiki();
Io_url search_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search"); Io_url search_dir = Xosearch_fulltext_addon.Get_index_dir(wiki);
if (Io_mgr.Instance.ExistsDir(search_dir)) { if (Io_mgr.Instance.ExistsDir(search_dir)) {
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New() app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": search dir already exists; please delete it manually before reindexing; " + String_.new_u8(domain))); .Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": search dir already exists; please delete it manually before reindexing; " + String_.new_u8(domain)));

View File

@ -20,13 +20,19 @@ public class Xofulltext_cache_mgr {
public void Clear() { public void Clear() {
qry_hash.Clear(); qry_hash.Clear();
} }
public void Add(int query_id, byte[] query, byte[] wiki_bry, int page_seq, int page_id, int line_seq, byte[] line_html) { public void Add(int query_id, byte[] query) {
// get qry
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(query_id); Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(query_id);
if (qry == null) { if (qry == null) {
qry = new Xofulltext_cache_qry(query_id, query); qry = new Xofulltext_cache_qry(query_id, query);
qry_hash.Add(query_id, qry); qry_hash.Add(query_id, qry);
} }
}
public void Add(int query_id, byte[] wiki_bry, int page_id, int line_seq, byte[] line_html) {
// get qry
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(query_id);
if (qry == null) {
throw Err_.new_wo_type("query not found; query_id=~{0}", query_id);
}
// get wiki // get wiki
Xofulltext_cache_wiki wiki = (Xofulltext_cache_wiki)qry.Wikis().Get_by(wiki_bry); Xofulltext_cache_wiki wiki = (Xofulltext_cache_wiki)qry.Wikis().Get_by(wiki_bry);
@ -38,7 +44,7 @@ public class Xofulltext_cache_mgr {
// get page // get page
Xofulltext_cache_page page = (Xofulltext_cache_page)wiki.Pages().Get_by(page_id); Xofulltext_cache_page page = (Xofulltext_cache_page)wiki.Pages().Get_by(page_id);
if (page == null) { if (page == null) {
page = new Xofulltext_cache_page(page_id, page_seq); page = new Xofulltext_cache_page(page_id, wiki.Pages().Count());
wiki.Pages().Add(page_id, page); wiki.Pages().Add(page_id, page);
} }

View File

@ -22,10 +22,7 @@ import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
public class Xofulltext_searcher__brute implements Xofulltext_searcher { public class Xofulltext_searcher__brute implements Xofulltext_searcher {
private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr(); private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr();
private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval(); private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval();
private final Xofulltext_finder_cbk__highlight cbk_highlight; private final Xofulltext_finder_cbk__highlight cbk_highlight = new Xofulltext_finder_cbk__highlight();
public Xofulltext_searcher__brute(Xoa_app app, Xog_cbk_trg cbk_trg, Xofulltext_cache_mgr cache_mgr) {
this.cbk_highlight = new Xofulltext_finder_cbk__highlight(app, cbk_trg, cache_mgr);
}
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_searcher_args args) { public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_searcher_args args) {
// get pages from db // get pages from db
Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn(); Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn();
@ -62,7 +59,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher {
// do highlight // do highlight
if (found <= args.max_pages_per_wiki) { if (found <= args.max_pages_per_wiki) {
cbk_highlight.Init(args.query, args.query_id, wiki, page_id, ttl.Full_db(), args.show_all_matches); cbk_highlight.Init(ui, args.query_id, wiki, page_id, ttl.Full_db(), args.show_all_matches);
ui.Send_page_add(new Xofulltext_searcher_page ui.Send_page_add(new Xofulltext_searcher_page
( args.query_id ( args.query_id
, String_.new_u8(wiki_domain) , String_.new_u8(wiki_domain)

View File

@ -16,25 +16,18 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*; package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
import gplx.xowa.guis.cbks.*; import gplx.xowa.guis.cbks.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*; import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
public class Xofulltext_finder_cbk__highlight implements Xofulltext_finder_cbk { public class Xofulltext_finder_cbk__highlight implements Xofulltext_finder_cbk {
private final Xog_cbk_trg cbk_trg; private Xofulltext_searcher_ui ui;
private final Xoa_app app;
private final Xofulltext_cache_mgr cache_mgr;
private Xow_wiki wiki; private Xow_wiki wiki;
private byte[] qry;
private int qry_id; private int qry_id;
private int page_id; private int page_id;
private final Bry_bfr tmp_bfr = Bry_bfr_.New(); private final Bry_bfr tmp_bfr = Bry_bfr_.New();
public int found; public int found;
private boolean show_all_matches; private boolean show_all_matches;
public Xofulltext_finder_cbk__highlight(Xoa_app app, Xog_cbk_trg cbk_trg, Xofulltext_cache_mgr cache_mgr) {
this.app = app;
this.cbk_trg = cbk_trg;
this.cache_mgr = cache_mgr;
}
public byte[] Page_ttl() {return page_ttl;} private byte[] page_ttl; public byte[] Page_ttl() {return page_ttl;} private byte[] page_ttl;
public void Init(byte[] qry, int qry_id, Xow_wiki wiki, int page_id, byte[] page_ttl, boolean show_all_matches) { public void Init(Xofulltext_searcher_ui ui, int qry_id, Xow_wiki wiki, int page_id, byte[] page_ttl, boolean show_all_matches) {
this.qry = qry; this.ui = ui;
this.qry_id = qry_id; this.qry_id = qry_id;
this.wiki = wiki; this.wiki = wiki;
this.page_id = page_id; this.page_id = page_id;
@ -43,7 +36,6 @@ public class Xofulltext_finder_cbk__highlight implements Xofulltext_finder_cbk {
found = 0; found = 0;
} }
public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xofulltext_word_node term) { public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xofulltext_word_node term) {
// if (found < max_snips_per_page) {
// get snip bounds by finding flanking 50 chars and then expanding to word-bounds // get snip bounds by finding flanking 50 chars and then expanding to word-bounds
int snip_bgn = hook_bgn - 50; int snip_bgn = hook_bgn - 50;
if (snip_bgn < 0) if (snip_bgn < 0)
@ -70,23 +62,8 @@ public class Xofulltext_finder_cbk__highlight implements Xofulltext_finder_cbk {
// send notification // send notification
byte[] line_html = tmp_bfr.To_bry_and_clear(); byte[] line_html = tmp_bfr.To_bry_and_clear();
if (found == 0 || show_all_matches) { ui.Send_line_add(show_all_matches, qry_id, wiki.Domain_bry(), page_id, found, line_html);
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki.Domain_bry())
.Add_int("page_id", page_id)
.Add_int("line", found + 1)
.Add_bry("html", line_html)
);
}
cache_mgr.Add(qry_id, qry, wiki.Domain_bry(), -1, page_id, found, line_html);
// }
found++; found++;
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_searcher.results__page__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki.Domain_bry())
.Add_int("page_id", page_id)
.Add_int("found", found)
.Add_bool("show_all_matches", show_all_matches)
);
} }
private static final byte[] Angle_bgn_escaped = Bry_.new_a7("&lt;"); private static final byte[] Angle_bgn_escaped = Bry_.new_a7("&lt;");
private void Add_snip(Bry_bfr bfr, byte[] src, int bgn, int end) { private void Add_snip(Bry_bfr bfr, byte[] src, int bgn, int end) {

View File

@ -23,17 +23,17 @@ import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
class Xofulltext_highlighter_mgr implements Gfo_invk { class Xofulltext_highlighter_mgr implements Gfo_invk {
private final Xofulltext_searcher_ui ui; private final Xofulltext_searcher_ui ui;
private final Xow_wiki wiki; private final Xow_wiki wiki;
private final String wiki_domain; private final Xofulltext_searcher_args searcher_args;
private final List_adp list;
private final Gflucene_analyzer_data analyzer_data; private final Gflucene_analyzer_data analyzer_data;
private final Gflucene_searcher_qry searcher_data; private final Gflucene_searcher_qry searcher_data;
private final Gflucene_highlighter_mgr highlighter_mgr = new Gflucene_highlighter_mgr(); private final Gflucene_highlighter_mgr highlighter_mgr = new Gflucene_highlighter_mgr();
private final Xoh_page hpg = new Xoh_page(); private final Xoh_page hpg = new Xoh_page();
private final Xowd_page_itm tmp_page_row = new Xowd_page_itm(); private final Xowd_page_itm tmp_page_row = new Xowd_page_itm();
public Xofulltext_highlighter_mgr(Xofulltext_searcher_ui ui, Xow_wiki wiki, Gflucene_analyzer_data analyzer_data, Gflucene_searcher_qry searcher_data, List_adp list) { private final List_adp list;
public Xofulltext_highlighter_mgr(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_searcher_args searcher_args, Gflucene_analyzer_data analyzer_data, Gflucene_searcher_qry searcher_data, List_adp list) {
this.ui = ui; this.ui = ui;
this.wiki = wiki; this.wiki = wiki;
this.wiki_domain = wiki.Domain_str(); this.searcher_args = searcher_args;
this.analyzer_data = analyzer_data; this.analyzer_data = analyzer_data;
this.searcher_data = searcher_data; this.searcher_data = searcher_data;
this.list = list; this.list = list;
@ -76,7 +76,7 @@ class Xofulltext_highlighter_mgr implements Gfo_invk {
int page_id = item.page_id; int page_id = item.page_id;
Gflucene_highlighter_item[] lines = highlighter_mgr.Exec(searcher_data, item); Gflucene_highlighter_item[] lines = highlighter_mgr.Exec(searcher_data, item);
for (Gflucene_highlighter_item line : lines) { for (Gflucene_highlighter_item line : lines) {
ui.Send_line_add(new Xofulltext_searcher_line(wiki_domain, page_id, line.num, line.text)); ui.Send_line_add(searcher_args.show_all_matches, searcher_args.query_id, wiki.Domain_bry(), page_id, line.num, Bry_.new_u8(line.text));
} }
} }
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) { public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {

View File

@ -30,7 +30,7 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
Gflucene_analyzer_data analyzer_data = Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str()); Gflucene_analyzer_data analyzer_data = Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str());
searcher.Init(new Gflucene_index_data searcher.Init(new Gflucene_index_data
( analyzer_data ( analyzer_data
, wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search").Xto_api())); , Xosearch_fulltext_addon.Get_index_dir(wiki).Xto_api()));
// exec search // exec search
Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.query), args.max_pages_per_wiki); Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.query), args.max_pages_per_wiki);
@ -50,7 +50,7 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
} }
// create highlighter thread and launch it // create highlighter thread and launch it
Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, analyzer_data, searcher_data, list); Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, list);
gplx.core.threads.Thread_adp_.Start_by_key("highlighter", Cancelable_.Never, highlighter_mgr, Xofulltext_highlighter_mgr.Invk__highlight); gplx.core.threads.Thread_adp_.Start_by_key("highlighter", Cancelable_.Never, highlighter_mgr, Xofulltext_highlighter_mgr.Invk__highlight);
} }
} }

View File

@ -15,14 +15,14 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
public class Xofulltext_searcher_line { public class Xofulltext_searcher_line {
public Xofulltext_searcher_line(String wiki_domain, int page_id, int found_idx, String excerpt) { public Xofulltext_searcher_line(byte[] wiki_domain, int page_id, int found_idx, byte[] excerpt) {
this.wiki_domain = wiki_domain; this.wiki_domain = wiki_domain;
this.page_id = page_id; this.page_id = page_id;
this.found_idx = found_idx; this.found_idx = found_idx;
this.excerpt = excerpt; this.excerpt = excerpt;
} }
public String Wiki_domain() {return wiki_domain;} private final String wiki_domain; public byte[] Wiki_domain() {return wiki_domain;} private final byte[] wiki_domain;
public int Page_id() {return page_id;} private final int page_id; public int Page_id() {return page_id;} private final int page_id;
public int Found_idx() {return found_idx;} private final int found_idx; public int Found_idx() {return found_idx;} private final int found_idx;
public String Excerpt() {return excerpt;} private final String excerpt; public byte[] Excerpt() {return excerpt;} private final byte[] excerpt;
} }

View File

@ -14,9 +14,55 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
public interface Xofulltext_searcher_ui { import gplx.xowa.guis.cbks.*;
void Send_wiki_add(byte[] wiki_domain); import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
void Send_wiki_update(byte[] wiki, int found, int searched); public class Xofulltext_searcher_ui {
void Send_page_add(Xofulltext_searcher_page page); private final Xog_cbk_mgr cbk_mgr;
void Send_line_add(Xofulltext_searcher_line line); private final Xog_cbk_trg cbk_trg;
private final Xofulltext_cache_mgr cache_mgr;
public Xofulltext_searcher_ui(Xofulltext_cache_mgr cache_mgr, Xog_cbk_mgr cbk_mgr, Xog_cbk_trg cbk_trg) {
this.cache_mgr = cache_mgr;
this.cbk_mgr = cbk_mgr;
this.cbk_trg = cbk_trg;
}
public void Send_wiki_add(byte[] wiki_domain) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki_domain)
);
}
public void Send_wiki_update(byte[] wiki, int found, int searched) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki)
.Add_int("found", found)
.Add_int("searched", searched)
);
}
public void Send_page_add(Xofulltext_searcher_page page) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__page__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_int("query_id", page.Query_id())
.Add_str("wiki", page.Wiki_domain())
.Add_int("page_id", page.Page_id())
.Add_str("page_ttl", page.Page_title())
.Add_bool("expand_matches_section", page.Expand_matches_section())
);
}
public void Send_line_add(boolean show_all_matches, int qry_id, byte[] wiki_domain, int page_id, int line_sort_order, byte[] line_html) {
cache_mgr.Add(qry_id, wiki_domain, page_id, line_sort_order, line_html);
line_sort_order += List_adp_.Base1; // NOTE: increment after cache_mgr
if (line_sort_order == 1 || show_all_matches) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki_domain)
.Add_int("page_id", page_id)
.Add_int("line", line_sort_order)
.Add_bry("html", line_html)
);
}
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__page__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki_domain)
.Add_int("page_id", page_id)
.Add_int("found", line_sort_order)
.Add_bool("show_all_matches", show_all_matches)
);
}
} }

View File

@ -1,54 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
import gplx.xowa.guis.cbks.*;
public class Xofulltext_searcher_ui__gui implements Xofulltext_searcher_ui {
private final Xog_cbk_mgr cbk_mgr;
private final Xog_cbk_trg cbk_trg;
public Xofulltext_searcher_ui__gui(Xog_cbk_mgr cbk_mgr, Xog_cbk_trg cbk_trg) {
this.cbk_mgr = cbk_mgr;
this.cbk_trg = cbk_trg;
}
public void Send_wiki_add(byte[] wiki_domain) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki_domain)
);
}
public void Send_wiki_update(byte[] wiki, int found, int searched) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki)
.Add_int("found", found)
.Add_int("searched", searched)
);
}
public void Send_page_add(Xofulltext_searcher_page page) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__page__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_int("query_id", page.Query_id())
.Add_str("wiki", page.Wiki_domain())
.Add_int("page_id", page.Page_id())
.Add_str("page_ttl", page.Page_title())
.Add_bool("expand_matches_section", page.Expand_matches_section())
);
}
public void Send_line_add(Xofulltext_searcher_line match) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_str("wiki", match.Wiki_domain())
.Add_int("page_id", match.Page_id())
.Add_int("line", match.Found_idx())
.Add_str("html", match.Excerpt())
);
}
}

View File

@ -31,10 +31,10 @@ class Xofulltext_searcher_svc implements Gfo_invk {
private final Xoa_app app; private final Xoa_app app;
private final Xog_cbk_trg cbk_trg = Xog_cbk_trg.New(Xofulltext_searcher_special.Prototype.Special__meta().Ttl_bry()); private final Xog_cbk_trg cbk_trg = Xog_cbk_trg.New(Xofulltext_searcher_special.Prototype.Special__meta().Ttl_bry());
private final Xofulltext_cache_mgr cache_mgr = new Xofulltext_cache_mgr(); private final Xofulltext_cache_mgr cache_mgr = new Xofulltext_cache_mgr();
private final Xofulltext_searcher_ui searcher_cbk; private final Xofulltext_searcher_ui searcher_ui;
public Xofulltext_searcher_svc(Xoa_app app) { public Xofulltext_searcher_svc(Xoa_app app) {
this.app = app; this.app = app;
this.searcher_cbk = new Xofulltext_searcher_ui__gui(app.Gui__cbk_mgr(), cbk_trg); this.searcher_ui = new Xofulltext_searcher_ui(cache_mgr, app.Gui__cbk_mgr(), cbk_trg);
} }
public void Search(Json_nde args) { public void Search(Json_nde args) {
// for now, always clear cache; "get_lines_rest" will only work for latest search // for now, always clear cache; "get_lines_rest" will only work for latest search
@ -43,6 +43,7 @@ class Xofulltext_searcher_svc implements Gfo_invk {
// get search_args // get search_args
Xofulltext_searcher_args search_args = Xofulltext_searcher_args.New_by_json(args); Xofulltext_searcher_args search_args = Xofulltext_searcher_args.New_by_json(args);
search_args.query_id = cache_mgr.Next_qry_id(); search_args.query_id = cache_mgr.Next_qry_id();
cache_mgr.Add(search_args.query_id, search_args.query);
// autosave any changes if enabled // autosave any changes if enabled
Xocfg_mgr cfg_mgr = app.Cfg(); Xocfg_mgr cfg_mgr = app.Cfg();
@ -66,11 +67,11 @@ class Xofulltext_searcher_svc implements Gfo_invk {
for (byte[] wiki_domain : wiki_domains) { for (byte[] wiki_domain : wiki_domains) {
// get wiki and notify // get wiki and notify
Xow_wiki wiki = app.Wiki_mgri().Get_by_or_make_init_y(wiki_domain); Xow_wiki wiki = app.Wiki_mgri().Get_by_or_make_init_y(wiki_domain);
searcher_cbk.Send_wiki_add(wiki_domain); searcher_ui.Send_wiki_add(wiki_domain);
// get searcher and search // get searcher and search
Xofulltext_searcher searcher = Get_searcher(wiki); Xofulltext_searcher searcher = Get_searcher(wiki);
searcher.Search(searcher_cbk, wiki, args); searcher.Search(searcher_ui, wiki, args);
} }
} catch (Exception exc) { } catch (Exception exc) {
if (app.Tid_is_edit()) if (app.Tid_is_edit())
@ -83,16 +84,15 @@ class Xofulltext_searcher_svc implements Gfo_invk {
private void Get_lines_rest(int qry_id, byte[] wiki_bry, int page_id) { private void Get_lines_rest(int qry_id, byte[] wiki_bry, int page_id) {
Xofulltext_cache_line[] lines = cache_mgr.Get_lines_rest(qry_id, wiki_bry, page_id); Xofulltext_cache_line[] lines = cache_mgr.Get_lines_rest(qry_id, wiki_bry, page_id);
for (Xofulltext_cache_line line : lines) { for (Xofulltext_cache_line line : lines) {
Xofulltext_searcher_line match = new Xofulltext_searcher_line(String_.new_u8(wiki_bry), page_id, line.Line_seq() + 1, String_.new_u8(line.Line_html())); searcher_ui.Send_line_add(true, qry_id, wiki_bry, page_id, line.Line_seq(), line.Line_html());
searcher_cbk.Send_line_add(match);
} }
} }
private Xofulltext_searcher Get_searcher(Xow_wiki wiki) { private Xofulltext_searcher Get_searcher(Xow_wiki wiki) {
if (Io_mgr.Instance.ExistsDir(wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search"))) { if (Io_mgr.Instance.ExistsDir(Xosearch_fulltext_addon.Get_index_dir(wiki))) {
return new Xofulltext_searcher__lucene(); return new Xofulltext_searcher__lucene();
} }
else { else {
return new Xofulltext_searcher__brute(app, cbk_trg, cache_mgr); return new Xofulltext_searcher__brute();
} }
} }

View File

@ -3,10 +3,10 @@
<classpathentry kind="src" path="src"/> <classpathentry kind="src" path="src"/>
<classpathentry combineaccessrules="false" kind="src" path="/100_core"/> <classpathentry combineaccessrules="false" kind="src" path="/100_core"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lib/lucene-analyzers-common-6.4.2.jar"/> <classpathentry kind="lib" path="lib/6.4.2/lucene-analyzers-common-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/lucene-core-6.4.2.jar"/> <classpathentry kind="lib" path="lib/6.4.2/lucene-core-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/lucene-highlighter-6.4.2.jar"/> <classpathentry kind="lib" path="lib/6.4.2/lucene-highlighter-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/lucene-memory-6.4.2.jar"/> <classpathentry kind="lib" path="lib/6.4.2/lucene-memory-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/lucene-queryparser-6.4.2.jar"/> <classpathentry kind="lib" path="lib/6.4.2/lucene-queryparser-6.4.2.jar"/>
<classpathentry kind="output" path="bin"/> <classpathentry kind="output" path="bin"/>
</classpath> </classpath>

View File

@ -32,10 +32,13 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TextFragment; import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
@ -60,16 +63,21 @@ public class Gflucene_highlighter_mgr {
// create highlighter // create highlighter
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span class='snip_highlight'>", "</span>"); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span class='snip_highlight'>", "</span>");
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); QueryScorer scorer = new QueryScorer(query);
scorer.setExpandMultiTermQuery(false);
Highlighter highlighter = new Highlighter(htmlFormatter, scorer);
SimpleFragmenter fragmenter = new SimpleFragmenter(100);
highlighter.setTextFragmenter(fragmenter);
// get token stream // get token stream
String text = doc_data.body; String text = doc_data.body;
TokenStream tokenStream = analyzer.tokenStream("body", text); TokenStream tokenStream = analyzer.tokenStream("body", text);
// get fragments from stream // get fragments from stream
String[] frags; TextFragment[] frags;
try { try {
frags = highlighter.getBestFragments(tokenStream, text, 10); // frags = highlighter.getBestTextFragments(tokenStream, text, false, 1000);
frags = highlighter.getBestTextFragments(tokenStream, text, true, 10);
} catch (IOException e) { } catch (IOException e) {
throw Err_.new_exc(e, "lucene_index", "failed to get best", "query", qry_data.query); throw Err_.new_exc(e, "lucene_index", "failed to get best", "query", qry_data.query);
} catch (InvalidTokenOffsetsException e) { } catch (InvalidTokenOffsetsException e) {
@ -80,7 +88,7 @@ public class Gflucene_highlighter_mgr {
int frags_len = frags.length; int frags_len = frags.length;
Gflucene_highlighter_item[] array = new Gflucene_highlighter_item[frags_len]; Gflucene_highlighter_item[] array = new Gflucene_highlighter_item[frags_len];
for (int i = 0; i < frags_len; i++) { for (int i = 0; i < frags_len; i++) {
String frag = frags[i]; String frag = frags[i].toString();
array[i] = new Gflucene_highlighter_item(i, frag); array[i] = new Gflucene_highlighter_item(i, frag);
} }
return array; return array;