1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Full-text search: Add analyzers for non-English languages

This commit is contained in:
gnosygnu
2017-03-13 10:05:21 -04:00
parent 77de7215ce
commit 3b6cc45084
10 changed files with 181 additions and 39 deletions

View File

@@ -14,20 +14,24 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
import gplx.gflucene.*;
import gplx.gflucene.core.*;
import gplx.gflucene.indexers.*;
public class Xofulltext_indexer_wkr {
private final Gflucene_index_bldr index_wtr = new Gflucene_index_bldr();
private final Gflucene_indexer_mgr index_wtr = new Gflucene_indexer_mgr();
public void Init(Xow_wiki wiki) {
Io_url search_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search");
Io_mgr.Instance.DeleteDirDeep(search_dir);
index_wtr.Init(search_dir.Xto_api());
;
index_wtr.Init(new Gflucene_index_data
( Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str())
, search_dir.Xto_api()));
}
public void Index(Xoae_page wpg) {
// TODO: skip if not main_ns
Index(wpg.Db().Page().Id(), wpg.Db().Page().Score(), wpg.Ttl().Page_txt(), wpg.Db().Html().Html_bry());
}
public void Index(int page_id, int score, byte[] ttl, byte[] html) {
Gflucene_index_data data = new Gflucene_index_data(page_id, score, String_.new_u8(ttl), String_.new_u8(html));
Gflucene_doc_data data = new Gflucene_doc_data(page_id, score, String_.new_u8(ttl), String_.new_u8(html));
index_wtr.Exec(data);
}
public void Term() {

View File

@@ -15,18 +15,23 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.gflucenes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
import gplx.gflucene.*;
import gplx.gflucene.core.*;
import gplx.gflucene.indexers.*;
import gplx.gflucene.searchers.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
private final Gflucene_searcher searcher = new Gflucene_searcher();
private final Gflucene_searcher_mgr searcher = new Gflucene_searcher_mgr();
public void Search(Xofulltext_searcher_ui cbk, Xow_wiki wiki, Xofulltext_searcher_args args) {
// create list
List_adp list = List_adp_.New();
// init searcher with wiki
searcher.Init(wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search").Xto_api());
searcher.Init(new Gflucene_index_data
( Gflucene_analyzer_data.New_data_from_locale(wiki.Lang().Key_str())
, wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search").Xto_api()));
// exec search
searcher.Exec(list, new Gflucene_searcher_data(String_.new_u8(args.query), args.max_pages_per_wiki));
searcher.Exec(list, new Gflucene_searcher_qry(String_.new_u8(args.query), args.max_pages_per_wiki));
// term
searcher.Term();
@@ -34,7 +39,7 @@ public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
// loop list
int len = list.Len();
for (int i = 0; i < len; i++) {
Gflucene_index_data found = (Gflucene_index_data)list.Get_at(i);
Gflucene_doc_data found = (Gflucene_doc_data)list.Get_at(i);
// call page found
Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.query_id, wiki.Domain_str(), found.page_id, found.title, false);