Full-text search: Change to lucene 6.4.2

pull/620/head
gnosygnu 7 years ago
parent 8f8e414c80
commit 8c31c8fd62

@ -36,4 +36,9 @@ public class Xosearch_fulltext_addon implements Xoax_addon_itm, Xoax_addon_itm__
}
public String Addon__key() {return ADDON__KEY;} private static final String ADDON__KEY = "xowa.wiki.fulltext";
public static Io_url Get_index_dir(Xow_wiki wiki) {return Get_index_dir(wiki.Fsys_mgr().Root_dir());}
public static Io_url Get_index_dir(Io_url wiki_dir) {
return wiki_dir.GenSubDir_nest("data", "search", "java8-v1");
}
}

@ -19,12 +19,14 @@ import gplx.gflucene.indexers.*;
public class Xofulltext_indexer_wkr {
private final Gflucene_indexer_mgr index_wtr = new Gflucene_indexer_mgr();
public void Init(Xow_wiki wiki) {
Io_url search_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search");
Io_mgr.Instance.DeleteDirDeep(search_dir);
;
// delete existing dir
Io_url index_dir = Xosearch_fulltext_addon.Get_index_dir(wiki);
Io_mgr.Instance.DeleteDirDeep(index_dir);
// init index_dir
index_wtr.Init(new Gflucene_index_data
( Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str())
, search_dir.Xto_api()));
, index_dir.Xto_api()));
}
public void Index(Xoae_page wpg) {
// TODO: skip if not main_ns

@ -48,7 +48,7 @@ class Xofulltext_indexer_svc implements Gfo_invk {
}
wiki.Init_by_wiki();
Io_url search_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search");
Io_url search_dir = Xosearch_fulltext_addon.Get_index_dir(wiki);
if (Io_mgr.Instance.ExistsDir(search_dir)) {
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": search dir already exists; please delete it manually before reindexing; " + String_.new_u8(domain)));

@ -20,13 +20,19 @@ public class Xofulltext_cache_mgr {
public void Clear() {
qry_hash.Clear();
}
public void Add(int query_id, byte[] query, byte[] wiki_bry, int page_seq, int page_id, int line_seq, byte[] line_html) {
// get qry
public void Add(int query_id, byte[] query) {
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(query_id);
if (qry == null) {
qry = new Xofulltext_cache_qry(query_id, query);
qry_hash.Add(query_id, qry);
}
}
public void Add(int query_id, byte[] wiki_bry, int page_id, int line_seq, byte[] line_html) {
// get qry
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(query_id);
if (qry == null) {
throw Err_.new_wo_type("query not found; query_id=~{0}", query_id);
}
// get wiki
Xofulltext_cache_wiki wiki = (Xofulltext_cache_wiki)qry.Wikis().Get_by(wiki_bry);
@ -38,7 +44,7 @@ public class Xofulltext_cache_mgr {
// get page
Xofulltext_cache_page page = (Xofulltext_cache_page)wiki.Pages().Get_by(page_id);
if (page == null) {
page = new Xofulltext_cache_page(page_id, page_seq);
page = new Xofulltext_cache_page(page_id, wiki.Pages().Count());
wiki.Pages().Add(page_id, page);
}

@ -22,10 +22,7 @@ import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
public class Xofulltext_searcher__brute implements Xofulltext_searcher {
private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr();
private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval();
private final Xofulltext_finder_cbk__highlight cbk_highlight;
public Xofulltext_searcher__brute(Xoa_app app, Xog_cbk_trg cbk_trg, Xofulltext_cache_mgr cache_mgr) {
this.cbk_highlight = new Xofulltext_finder_cbk__highlight(app, cbk_trg, cache_mgr);
}
private final Xofulltext_finder_cbk__highlight cbk_highlight = new Xofulltext_finder_cbk__highlight();
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_searcher_args args) {
// get pages from db
Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn();
@ -62,7 +59,7 @@ public class Xofulltext_searcher__brute implements Xofulltext_searcher {
// do highlight
if (found <= args.max_pages_per_wiki) {
cbk_highlight.Init(args.query, args.query_id, wiki, page_id, ttl.Full_db(), args.show_all_matches);
cbk_highlight.Init(ui, args.query_id, wiki, page_id, ttl.Full_db(), args.show_all_matches);
ui.Send_page_add(new Xofulltext_searcher_page
( args.query_id
, String_.new_u8(wiki_domain)

@ -16,25 +16,18 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
import gplx.xowa.guis.cbks.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
public class Xofulltext_finder_cbk__highlight implements Xofulltext_finder_cbk {
private final Xog_cbk_trg cbk_trg;
private final Xoa_app app;
private final Xofulltext_cache_mgr cache_mgr;
private Xofulltext_searcher_ui ui;
private Xow_wiki wiki;
private byte[] qry;
private int qry_id;
private int page_id;
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
public int found;
private boolean show_all_matches;
public Xofulltext_finder_cbk__highlight(Xoa_app app, Xog_cbk_trg cbk_trg, Xofulltext_cache_mgr cache_mgr) {
this.app = app;
this.cbk_trg = cbk_trg;
this.cache_mgr = cache_mgr;
}
public byte[] Page_ttl() {return page_ttl;} private byte[] page_ttl;
public void Init(byte[] qry, int qry_id, Xow_wiki wiki, int page_id, byte[] page_ttl, boolean show_all_matches) {
this.qry = qry;
public void Init(Xofulltext_searcher_ui ui, int qry_id, Xow_wiki wiki, int page_id, byte[] page_ttl, boolean show_all_matches) {
this.ui = ui;
this.qry_id = qry_id;
this.wiki = wiki;
this.page_id = page_id;
@ -43,50 +36,34 @@ public class Xofulltext_finder_cbk__highlight implements Xofulltext_finder_cbk {
found = 0;
}
public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xofulltext_word_node term) {
// if (found < max_snips_per_page) {
// get snip bounds by finding flanking 50 chars and then expanding to word-bounds
int snip_bgn = hook_bgn - 50;
if (snip_bgn < 0)
snip_bgn = 0;
else {
snip_bgn = Bry_find_.Find_bwd_ws(src, snip_bgn, 0) + 1;
}
int snip_end = hook_end + 50;
if (snip_end >= src.length)
// get snip bounds by finding flanking 50 chars and then expanding to word-bounds
int snip_bgn = hook_bgn - 50;
if (snip_bgn < 0)
snip_bgn = 0;
else {
snip_bgn = Bry_find_.Find_bwd_ws(src, snip_bgn, 0) + 1;
}
int snip_end = hook_end + 50;
if (snip_end >= src.length)
snip_end = src.length;
else {
snip_end = Bry_find_.Find_fwd_until_ws(src, snip_end, src.length);
if (snip_end == Bry_find_.Not_found) { // when snip_end == src.length
snip_end = src.length;
else {
snip_end = Bry_find_.Find_fwd_until_ws(src, snip_end, src.length);
if (snip_end == Bry_find_.Not_found) { // when snip_end == src.length
snip_end = src.length;
}
}
}
// build snip
Add_snip(tmp_bfr, src, snip_bgn, hook_bgn);
tmp_bfr.Add_str_a7("<span class='snip_highlight'>");
Add_snip(tmp_bfr, src, hook_bgn, hook_end);
tmp_bfr.Add_str_a7("</span>");
Add_snip(tmp_bfr, src, hook_end, snip_end);
// build snip
Add_snip(tmp_bfr, src, snip_bgn, hook_bgn);
tmp_bfr.Add_str_a7("<span class='snip_highlight'>");
Add_snip(tmp_bfr, src, hook_bgn, hook_end);
tmp_bfr.Add_str_a7("</span>");
Add_snip(tmp_bfr, src, hook_end, snip_end);
// send notification
byte[] line_html = tmp_bfr.To_bry_and_clear();
if (found == 0 || show_all_matches) {
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki.Domain_bry())
.Add_int("page_id", page_id)
.Add_int("line", found + 1)
.Add_bry("html", line_html)
);
}
cache_mgr.Add(qry_id, qry, wiki.Domain_bry(), -1, page_id, found, line_html);
// }
// send notification
byte[] line_html = tmp_bfr.To_bry_and_clear();
ui.Send_line_add(show_all_matches, qry_id, wiki.Domain_bry(), page_id, found, line_html);
found++;
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_searcher.results__page__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki.Domain_bry())
.Add_int("page_id", page_id)
.Add_int("found", found)
.Add_bool("show_all_matches", show_all_matches)
);
}
private static final byte[] Angle_bgn_escaped = Bry_.new_a7("&lt;");
private void Add_snip(Bry_bfr bfr, byte[] src, int bgn, int end) {

@ -23,17 +23,17 @@ import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
class Xofulltext_highlighter_mgr implements Gfo_invk {
private final Xofulltext_searcher_ui ui;
private final Xow_wiki wiki;
private final String wiki_domain;
private final List_adp list;
private final Xofulltext_searcher_args searcher_args;
private final Gflucene_analyzer_data analyzer_data;
private final Gflucene_searcher_qry searcher_data;
private final Gflucene_highlighter_mgr highlighter_mgr = new Gflucene_highlighter_mgr();
private final Xoh_page hpg = new Xoh_page();
private final Xowd_page_itm tmp_page_row = new Xowd_page_itm();
public Xofulltext_highlighter_mgr(Xofulltext_searcher_ui ui, Xow_wiki wiki, Gflucene_analyzer_data analyzer_data, Gflucene_searcher_qry searcher_data, List_adp list) {
private final List_adp list;
public Xofulltext_highlighter_mgr(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_searcher_args searcher_args, Gflucene_analyzer_data analyzer_data, Gflucene_searcher_qry searcher_data, List_adp list) {
this.ui = ui;
this.wiki = wiki;
this.wiki_domain = wiki.Domain_str();
this.searcher_args = searcher_args;
this.analyzer_data = analyzer_data;
this.searcher_data = searcher_data;
this.list = list;
@ -76,7 +76,7 @@ class Xofulltext_highlighter_mgr implements Gfo_invk {
int page_id = item.page_id;
Gflucene_highlighter_item[] lines = highlighter_mgr.Exec(searcher_data, item);
for (Gflucene_highlighter_item line : lines) {
ui.Send_line_add(new Xofulltext_searcher_line(wiki_domain, page_id, line.num, line.text));
ui.Send_line_add(searcher_args.show_all_matches, searcher_args.query_id, wiki.Domain_bry(), page_id, line.num, Bry_.new_u8(line.text));
}
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {

@ -30,7 +30,7 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
Gflucene_analyzer_data analyzer_data = Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str());
searcher.Init(new Gflucene_index_data
( analyzer_data
, wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search").Xto_api()));
, Xosearch_fulltext_addon.Get_index_dir(wiki).Xto_api()));
// exec search
Gflucene_searcher_qry searcher_data = new Gflucene_searcher_qry(String_.new_u8(args.query), args.max_pages_per_wiki);
@ -50,7 +50,7 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
}
// create highlighter thread and launch it
Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, analyzer_data, searcher_data, list);
Xofulltext_highlighter_mgr highlighter_mgr = new Xofulltext_highlighter_mgr(ui, wiki, args, analyzer_data, searcher_data, list);
gplx.core.threads.Thread_adp_.Start_by_key("highlighter", Cancelable_.Never, highlighter_mgr, Xofulltext_highlighter_mgr.Invk__highlight);
}
}

@ -15,14 +15,14 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
public class Xofulltext_searcher_line {
public Xofulltext_searcher_line(String wiki_domain, int page_id, int found_idx, String excerpt) {
public Xofulltext_searcher_line(byte[] wiki_domain, int page_id, int found_idx, byte[] excerpt) {
this.wiki_domain = wiki_domain;
this.page_id = page_id;
this.found_idx = found_idx;
this.excerpt = excerpt;
}
public String Wiki_domain() {return wiki_domain;} private final String wiki_domain;
public byte[] Wiki_domain() {return wiki_domain;} private final byte[] wiki_domain;
public int Page_id() {return page_id;} private final int page_id;
public int Found_idx() {return found_idx;} private final int found_idx;
public String Excerpt() {return excerpt;} private final String excerpt;
public byte[] Excerpt() {return excerpt;} private final byte[] excerpt;
}

@ -14,9 +14,55 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
public interface Xofulltext_searcher_ui {
void Send_wiki_add(byte[] wiki_domain);
void Send_wiki_update(byte[] wiki, int found, int searched);
void Send_page_add(Xofulltext_searcher_page page);
void Send_line_add(Xofulltext_searcher_line line);
import gplx.xowa.guis.cbks.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
public class Xofulltext_searcher_ui {
private final Xog_cbk_mgr cbk_mgr;
private final Xog_cbk_trg cbk_trg;
private final Xofulltext_cache_mgr cache_mgr;
public Xofulltext_searcher_ui(Xofulltext_cache_mgr cache_mgr, Xog_cbk_mgr cbk_mgr, Xog_cbk_trg cbk_trg) {
this.cache_mgr = cache_mgr;
this.cbk_mgr = cbk_mgr;
this.cbk_trg = cbk_trg;
}
public void Send_wiki_add(byte[] wiki_domain) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki_domain)
);
}
public void Send_wiki_update(byte[] wiki, int found, int searched) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki)
.Add_int("found", found)
.Add_int("searched", searched)
);
}
public void Send_page_add(Xofulltext_searcher_page page) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__page__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_int("query_id", page.Query_id())
.Add_str("wiki", page.Wiki_domain())
.Add_int("page_id", page.Page_id())
.Add_str("page_ttl", page.Page_title())
.Add_bool("expand_matches_section", page.Expand_matches_section())
);
}
public void Send_line_add(boolean show_all_matches, int qry_id, byte[] wiki_domain, int page_id, int line_sort_order, byte[] line_html) {
cache_mgr.Add(qry_id, wiki_domain, page_id, line_sort_order, line_html);
line_sort_order += List_adp_.Base1; // NOTE: increment after cache_mgr
if (line_sort_order == 1 || show_all_matches) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki_domain)
.Add_int("page_id", page_id)
.Add_int("line", line_sort_order)
.Add_bry("html", line_html)
);
}
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__page__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki_domain)
.Add_int("page_id", page_id)
.Add_int("found", line_sort_order)
.Add_bool("show_all_matches", show_all_matches)
);
}
}

@ -1,54 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
import gplx.xowa.guis.cbks.*;
public class Xofulltext_searcher_ui__gui implements Xofulltext_searcher_ui {
private final Xog_cbk_mgr cbk_mgr;
private final Xog_cbk_trg cbk_trg;
public Xofulltext_searcher_ui__gui(Xog_cbk_mgr cbk_mgr, Xog_cbk_trg cbk_trg) {
this.cbk_mgr = cbk_mgr;
this.cbk_trg = cbk_trg;
}
public void Send_wiki_add(byte[] wiki_domain) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki_domain)
);
}
public void Send_wiki_update(byte[] wiki, int found, int searched) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki)
.Add_int("found", found)
.Add_int("searched", searched)
);
}
public void Send_page_add(Xofulltext_searcher_page page) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__page__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_int("query_id", page.Query_id())
.Add_str("wiki", page.Wiki_domain())
.Add_int("page_id", page.Page_id())
.Add_str("page_ttl", page.Page_title())
.Add_bool("expand_matches_section", page.Expand_matches_section())
);
}
public void Send_line_add(Xofulltext_searcher_line match) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_str("wiki", match.Wiki_domain())
.Add_int("page_id", match.Page_id())
.Add_int("line", match.Found_idx())
.Add_str("html", match.Excerpt())
);
}
}

@ -31,10 +31,10 @@ class Xofulltext_searcher_svc implements Gfo_invk {
private final Xoa_app app;
private final Xog_cbk_trg cbk_trg = Xog_cbk_trg.New(Xofulltext_searcher_special.Prototype.Special__meta().Ttl_bry());
private final Xofulltext_cache_mgr cache_mgr = new Xofulltext_cache_mgr();
private final Xofulltext_searcher_ui searcher_cbk;
private final Xofulltext_searcher_ui searcher_ui;
public Xofulltext_searcher_svc(Xoa_app app) {
this.app = app;
this.searcher_cbk = new Xofulltext_searcher_ui__gui(app.Gui__cbk_mgr(), cbk_trg);
this.searcher_ui = new Xofulltext_searcher_ui(cache_mgr, app.Gui__cbk_mgr(), cbk_trg);
}
public void Search(Json_nde args) {
// for now, always clear cache; "get_lines_rest" will only work for latest search
@ -43,6 +43,7 @@ class Xofulltext_searcher_svc implements Gfo_invk {
// get search_args
Xofulltext_searcher_args search_args = Xofulltext_searcher_args.New_by_json(args);
search_args.query_id = cache_mgr.Next_qry_id();
cache_mgr.Add(search_args.query_id, search_args.query);
// autosave any changes if enabled
Xocfg_mgr cfg_mgr = app.Cfg();
@ -66,11 +67,11 @@ class Xofulltext_searcher_svc implements Gfo_invk {
for (byte[] wiki_domain : wiki_domains) {
// get wiki and notify
Xow_wiki wiki = app.Wiki_mgri().Get_by_or_make_init_y(wiki_domain);
searcher_cbk.Send_wiki_add(wiki_domain);
searcher_ui.Send_wiki_add(wiki_domain);
// get searcher and search
Xofulltext_searcher searcher = Get_searcher(wiki);
searcher.Search(searcher_cbk, wiki, args);
searcher.Search(searcher_ui, wiki, args);
}
} catch (Exception exc) {
if (app.Tid_is_edit())
@ -83,16 +84,15 @@ class Xofulltext_searcher_svc implements Gfo_invk {
private void Get_lines_rest(int qry_id, byte[] wiki_bry, int page_id) {
Xofulltext_cache_line[] lines = cache_mgr.Get_lines_rest(qry_id, wiki_bry, page_id);
for (Xofulltext_cache_line line : lines) {
Xofulltext_searcher_line match = new Xofulltext_searcher_line(String_.new_u8(wiki_bry), page_id, line.Line_seq() + 1, String_.new_u8(line.Line_html()));
searcher_cbk.Send_line_add(match);
searcher_ui.Send_line_add(true, qry_id, wiki_bry, page_id, line.Line_seq(), line.Line_html());
}
}
private Xofulltext_searcher Get_searcher(Xow_wiki wiki) {
if (Io_mgr.Instance.ExistsDir(wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search"))) {
if (Io_mgr.Instance.ExistsDir(Xosearch_fulltext_addon.Get_index_dir(wiki))) {
return new Xofulltext_searcher__lucene();
}
else {
return new Xofulltext_searcher__brute(app, cbk_trg, cache_mgr);
return new Xofulltext_searcher__brute();
}
}

@ -3,10 +3,10 @@
<classpathentry kind="src" path="src"/>
<classpathentry combineaccessrules="false" kind="src" path="/100_core"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="lib" path="lib/lucene-analyzers-common-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/lucene-core-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/lucene-highlighter-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/lucene-memory-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/lucene-queryparser-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/6.4.2/lucene-analyzers-common-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/6.4.2/lucene-core-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/6.4.2/lucene-highlighter-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/6.4.2/lucene-memory-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/6.4.2/lucene-queryparser-6.4.2.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>

@ -32,10 +32,13 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.FSDirectory;
@ -60,16 +63,21 @@ public class Gflucene_highlighter_mgr {
// create highlighter
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span class='snip_highlight'>", "</span>");
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
QueryScorer scorer = new QueryScorer(query);
scorer.setExpandMultiTermQuery(false);
Highlighter highlighter = new Highlighter(htmlFormatter, scorer);
SimpleFragmenter fragmenter = new SimpleFragmenter(100);
highlighter.setTextFragmenter(fragmenter);
// get token stream
String text = doc_data.body;
TokenStream tokenStream = analyzer.tokenStream("body", text);
// get fragments from stream
String[] frags;
TextFragment[] frags;
try {
frags = highlighter.getBestFragments(tokenStream, text, 10);
// frags = highlighter.getBestTextFragments(tokenStream, text, false, 1000);
frags = highlighter.getBestTextFragments(tokenStream, text, true, 10);
} catch (IOException e) {
throw Err_.new_exc(e, "lucene_index", "failed to get best", "query", qry_data.query);
} catch (InvalidTokenOffsetsException e) {
@ -80,7 +88,7 @@ public class Gflucene_highlighter_mgr {
int frags_len = frags.length;
Gflucene_highlighter_item[] array = new Gflucene_highlighter_item[frags_len];
for (int i = 0; i < frags_len; i++) {
String frag = frags[i];
String frag = frags[i].toString();
array[i] = new Gflucene_highlighter_item(i, frag);
}
return array;

Loading…
Cancel
Save