mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Full-text search: Add lucene indexer
This commit is contained in:
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*;
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.xowa.specials.*; import gplx.xowa.htmls.bridges.*;
|
||||
public class Xosearch_fulltext_addon implements Xoax_addon_itm, Xoax_addon_itm__special, Xoax_addon_itm__json, Xoax_addon_itm__bldr {
|
||||
public Xob_cmd[] Bldr_cmds() {
|
||||
return new Xob_cmd[]
|
||||
{ gplx.xowa.addons.wikis.fulltexts.indexers.bldrs.Xofulltext_indexer_cmd.Prototype
|
||||
};
|
||||
}
|
||||
public Xow_special_page[] Special_pages() {
|
||||
return new Xow_special_page[]
|
||||
{ gplx.xowa.addons.wikis.fulltexts.searchers.specials.Xofulltext_searcher_special.Prototype
|
||||
, gplx.xowa.addons.wikis.fulltexts.indexers.specials.Xofulltext_indexer_special.Prototype
|
||||
};
|
||||
}
|
||||
public Bridge_cmd_itm[] Json_cmds() {
|
||||
return new Bridge_cmd_itm[]
|
||||
{ gplx.xowa.addons.wikis.fulltexts.searchers.svcs.Xofulltext_searcher_bridge.Prototype
|
||||
, gplx.xowa.addons.wikis.fulltexts.indexers.svcs.Xofulltext_indexer_bridge.Prototype
|
||||
};
|
||||
}
|
||||
|
||||
public String Addon__key() {return ADDON__KEY;} private static final String ADDON__KEY = "xowa.wiki.fulltext";
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xofulltext_indexer_cmd extends Xob_cmd__base {
|
||||
public Xofulltext_indexer_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
||||
@Override public void Cmd_run() {
|
||||
wiki.Init_assert();
|
||||
new Xofulltext_indexer_mgr().Exec(wiki, null);
|
||||
}
|
||||
|
||||
@Override public String Cmd_key() {return "search.index";}
|
||||
public static final Xob_cmd Prototype = new Xofulltext_indexer_cmd(null, null);
|
||||
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xofulltext_indexer_cmd(bldr, wiki);}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.htmls.*;
|
||||
import gplx.xowa.wikis.data.*;
|
||||
import gplx.xowa.htmls.core.dbs.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.indexers.svcs.*;
|
||||
public class Xofulltext_indexer_mgr {
|
||||
public void Exec(Xowe_wiki wiki, Xofulltext_indexer_ui ui) {
|
||||
Xow_db_file core_db = wiki.Data__core_mgr().Db__core();
|
||||
gplx.xowa.wikis.data.tbls.Xowd_page_tbl page_tbl = core_db.Tbl__page();
|
||||
|
||||
Xoh_page hpg = new Xoh_page();
|
||||
|
||||
Xofulltext_indexer_wkr indexer = new Xofulltext_indexer_wkr();
|
||||
indexer.Init(wiki);
|
||||
|
||||
Db_conn conn = page_tbl.Conn();
|
||||
Db_rdr rdr = conn.Exec_rdr("SELECT page_id, page_score, page_namespace, page_title, page_html_db_id FROM page WHERE page_namespace = 0;");
|
||||
int count = 0;
|
||||
while (rdr.Move_next()) {
|
||||
int page_namespace = rdr.Read_int("page_namespace");
|
||||
byte[] page_ttl_bry = rdr.Read_bry_by_str("page_title");
|
||||
int page_id = rdr.Read_int("page_id");
|
||||
int page_score = rdr.Read_int("page_score");
|
||||
int html_db_id = rdr.Read_int("page_html_db_id");
|
||||
|
||||
// ignore redirects
|
||||
if (html_db_id == -1) continue;
|
||||
try {
|
||||
// load page
|
||||
Xoa_ttl page_ttl = wiki.Ttl_parse(page_namespace, page_ttl_bry);
|
||||
if (page_ttl == null)
|
||||
continue;
|
||||
Xow_db_file html_db = html_db_id == -1 ? core_db : wiki.Data__core_mgr().Dbs__get_by_id_or_fail(html_db_id);
|
||||
hpg.Ctor_by_hview(wiki, wiki.Utl__url_parser().Parse(page_ttl.Full_db()), page_ttl, page_id);
|
||||
if (!html_db.Tbl__html().Select_by_page(hpg))
|
||||
continue;
|
||||
byte[] html_text = wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
|
||||
|
||||
indexer.Index(page_id, page_score, page_ttl.Page_txt(), html_text);
|
||||
if ((++count % 10000) == 0) {
|
||||
Gfo_usr_dlg_.Instance.Prog_many("", "", "indexing page: ~{0}", count);
|
||||
if (ui != null)
|
||||
ui.Send_prog(Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": indexing page: " + count);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "err: ~{0}", Err_.Message_gplx_log(e));
|
||||
}
|
||||
}
|
||||
|
||||
indexer.Term();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
||||
import gplx.gflucene.*;
|
||||
public class Xofulltext_indexer_wkr {
|
||||
private final Gflucene_index_bldr index_wtr = new Gflucene_index_bldr();
|
||||
public void Init(Xow_wiki wiki) {
|
||||
Io_url search_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search");
|
||||
Io_mgr.Instance.DeleteDirDeep(search_dir);
|
||||
index_wtr.Init(search_dir.Xto_api());
|
||||
}
|
||||
public void Index(Xoae_page wpg) {
|
||||
// TODO: skip if not main_ns
|
||||
Index(wpg.Db().Page().Id(), wpg.Db().Page().Score(), wpg.Ttl().Page_txt(), wpg.Db().Html().Html_bry());
|
||||
}
|
||||
public void Index(int page_id, int score, byte[] ttl, byte[] html) {
|
||||
Gflucene_index_data data = new Gflucene_index_data(page_id, score, String_.new_u8(ttl), String_.new_u8(html));
|
||||
index_wtr.Exec(data);
|
||||
}
|
||||
public void Term() {
|
||||
index_wtr.Term();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
||||
import gplx.langs.mustaches.*;
|
||||
public class Xofulltext_indexer_doc implements Mustache_doc_itm {
|
||||
private final byte[] wikis_bry;
|
||||
public Xofulltext_indexer_doc
|
||||
( byte[] wikis_bry) {
|
||||
this.wikis_bry = wikis_bry;
|
||||
}
|
||||
public boolean Mustache__write(String key, Mustache_bfr bfr) {
|
||||
if (String_.Eq(key, "wikis"))
|
||||
bfr.Add_bry(wikis_bry);
|
||||
else
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
public Mustache_doc_itm[] Mustache__subs(String key) {
|
||||
return Mustache_doc_itm_.Ary__empty;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
||||
import gplx.xowa.specials.*; import gplx.langs.mustaches.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.wikis.pages.tags.*;
|
||||
import gplx.dbs.*;
|
||||
class Xofulltext_indexer_html extends Xow_special_wtr__base {
|
||||
private final byte[] wikis_bry;
|
||||
public Xofulltext_indexer_html
|
||||
( byte[] wikis_bry) {
|
||||
this.wikis_bry = wikis_bry;
|
||||
}
|
||||
@Override protected Io_url Get_addon_dir(Xoa_app app) {return Addon_dir(app);}
|
||||
@Override protected Io_url Get_mustache_fil(Io_url addon_dir) {return addon_dir.GenSubFil_nest("bin", "xofulltext_indexer.template.html");}
|
||||
@Override protected Mustache_doc_itm Bld_mustache_root(Xoa_app app) {
|
||||
return new Xofulltext_indexer_doc(wikis_bry);
|
||||
}
|
||||
@Override protected void Bld_tags(Xoa_app app, Io_url addon_dir, Xopage_html_data page_data) {
|
||||
Xopg_tag_mgr head_tags = page_data.Head_tags();
|
||||
Xopg_tag_wtr_.Add__xoelem (head_tags, app.Fsys_mgr().Http_root());
|
||||
|
||||
Xopg_tag_wtr_.Add__xocss (head_tags, app.Fsys_mgr().Http_root());
|
||||
Xopg_tag_wtr_.Add__xohelp (head_tags, app.Fsys_mgr().Http_root());
|
||||
Xopg_tag_wtr_.Add__xolog (head_tags, app.Fsys_mgr().Http_root());
|
||||
Xopg_tag_wtr_.Add__xoajax (head_tags, app.Fsys_mgr().Http_root(), app);
|
||||
Xopg_tag_wtr_.Add__jquery (head_tags, app.Fsys_mgr().Http_root());
|
||||
Xopg_tag_wtr_.Add__xonotify (head_tags, app.Fsys_mgr().Http_root());
|
||||
Xopg_alertify_.Add_tags (head_tags, app.Fsys_mgr().Http_root());
|
||||
|
||||
head_tags.Add(Xopg_tag_itm.New_css_file(addon_dir.GenSubFil_nest("bin", "xofulltext_indexer.css")));
|
||||
head_tags.Add(Xopg_tag_itm.New_js_file(addon_dir.GenSubFil_nest("bin", "xofulltext_indexer.js")));
|
||||
|
||||
page_data.Js_enabled_y_();
|
||||
}
|
||||
public static Io_url Addon_dir(Xoa_app app) {
|
||||
return app.Fsys_mgr().Http_root().GenSubDir_nest("bin", "any", "xowa", "addon", "wiki", "fulltext", "indexer");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
||||
import gplx.xowa.specials.*; import gplx.core.net.qargs.*;
|
||||
import gplx.xowa.addons.apps.cfgs.*;
|
||||
public class Xofulltext_indexer_special implements Xow_special_page {
|
||||
public void Special__gen(Xow_wiki wiki, Xoa_page page, Xoa_url url, Xoa_ttl ttl) {
|
||||
// get qry if any
|
||||
Gfo_qarg_mgr url_args = new Gfo_qarg_mgr().Init(url.Qargs_ary());
|
||||
byte[] wikis_bry = url_args.Read_bry_or("wikis", Bry_.Empty);
|
||||
|
||||
// get options and create page
|
||||
// Xocfg_mgr cfg_mgr = wiki.App().Cfg();
|
||||
new Xofulltext_indexer_html
|
||||
( wikis_bry
|
||||
).Bld_page_by_mustache(wiki.App(), page, this);
|
||||
}
|
||||
Xofulltext_indexer_special(Xow_special_meta special__meta) {this.special__meta = special__meta;}
|
||||
public Xow_special_meta Special__meta() {return special__meta;} private final Xow_special_meta special__meta;
|
||||
public Xow_special_page Special__clone() {return this;}
|
||||
public static final Xow_special_page Prototype = new Xofulltext_indexer_special(Xow_special_meta.New_xo("XowaSearchBuilder", "Indexer"));
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.indexers.svcs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
||||
import gplx.langs.jsons.*;
|
||||
import gplx.xowa.htmls.bridges.*;
|
||||
public class Xofulltext_indexer_bridge implements Bridge_cmd_itm {
|
||||
private Xofulltext_indexer_svc svc;
|
||||
public void Init_by_app(Xoa_app app) {
|
||||
this.svc = new Xofulltext_indexer_svc(app);
|
||||
}
|
||||
public String Exec(Json_nde data) {
|
||||
byte proc_id = proc_hash.Get_as_byte_or(data.Get_as_bry_or(Bridge_cmd_mgr.Msg__proc, null), Byte_ascii.Max_7_bit);
|
||||
Json_nde args = data.Get_kv(Bridge_cmd_mgr.Msg__args).Val_as_nde();
|
||||
switch (proc_id) {
|
||||
case Proc__index: svc.Index(args); break;
|
||||
default: throw Err_.new_unhandled_default(proc_id);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
private static final byte Proc__index = 0;
|
||||
private static final Hash_adp_bry proc_hash = Hash_adp_bry.cs()
|
||||
.Add_str_byte("index" , Proc__index)
|
||||
;
|
||||
|
||||
public byte[] Key() {return BRIDGE_KEY;} public static final byte[] BRIDGE_KEY = Bry_.new_a7("xowa.wiki.fulltext.indexer");
|
||||
public static final Xofulltext_indexer_bridge Prototype = new Xofulltext_indexer_bridge(); Xofulltext_indexer_bridge() {}
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.indexers.svcs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.jsons.*;
|
||||
import gplx.dbs.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.guis.cbks.*;
|
||||
import gplx.xowa.addons.apps.cfgs.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.indexers.specials.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.indexers.bldrs.*;
|
||||
class Xofulltext_indexer_svc implements Gfo_invk {
|
||||
private final Xoa_app app;
|
||||
private final Xog_cbk_trg cbk_trg = Xog_cbk_trg.New(Xofulltext_indexer_special.Prototype.Special__meta().Ttl_bry());
|
||||
public Xofulltext_indexer_svc(Xoa_app app) {
|
||||
this.app = app;
|
||||
}
|
||||
public void Index(Json_nde args) {
|
||||
// create args
|
||||
byte[] wikis_bry = args.Get_as_bry("wikis");
|
||||
Xofulltext_indexer_args indexer_args = new Xofulltext_indexer_args(wikis_bry);
|
||||
|
||||
// launch thread
|
||||
gplx.core.threads.Thread_adp_.Start_by_val("index", Cancelable_.Never, this, Invk__index, indexer_args);
|
||||
}
|
||||
private void Index(Xofulltext_indexer_args args) {
|
||||
// loop wikis
|
||||
byte[][] domain_ary = Bry_split_.Split(args.wikis, Byte_ascii.Pipe);
|
||||
for (byte[] domain : domain_ary) {
|
||||
// get wiki
|
||||
Xow_wiki wiki = app.Wiki_mgri().Get_by_or_make_init_n(domain);
|
||||
if (!Io_mgr.Instance.ExistsDir(wiki.Fsys_mgr().Root_dir())) {
|
||||
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": wiki does not exist: " + String_.new_u8(domain)));
|
||||
continue;
|
||||
}
|
||||
|
||||
wiki.Init_by_wiki();
|
||||
Io_url search_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search");
|
||||
if (Io_mgr.Instance.ExistsDir(search_dir)) {
|
||||
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": search dir already exists; please delete it manually before reindexing; " + String_.new_u8(domain)));
|
||||
continue;
|
||||
}
|
||||
|
||||
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": wiki index started: " + String_.new_u8(domain)));
|
||||
|
||||
new Xofulltext_indexer_mgr().Exec((Xowe_wiki)wiki, new Xofulltext_indexer_ui(app.Gui__cbk_mgr(), cbk_trg));
|
||||
|
||||
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": wiki index ended: " + String_.new_u8(domain)));
|
||||
}
|
||||
}
|
||||
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk__index)) this.Index((Xofulltext_indexer_args)m.ReadObj("v"));
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
private static final String Invk__index = "index";
|
||||
}
|
||||
class Xofulltext_indexer_args {
|
||||
public byte[] wikis;
|
||||
public Xofulltext_indexer_args(byte[] wikis) {
|
||||
this.wikis = wikis;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.indexers.svcs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
||||
import gplx.xowa.guis.cbks.*;
|
||||
public class Xofulltext_indexer_ui {
|
||||
private final Xog_cbk_mgr cbk_mgr;
|
||||
private final Xog_cbk_trg cbk_trg;
|
||||
public Xofulltext_indexer_ui(Xog_cbk_mgr cbk_mgr, Xog_cbk_trg cbk_trg) {
|
||||
this.cbk_mgr = cbk_mgr;
|
||||
this.cbk_trg = cbk_trg;
|
||||
}
|
||||
public void Send_prog(String prog) {
|
||||
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_indexer.status__prog__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||
.Add_str("prog", prog)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
public class Xofulltext_cache_line {
|
||||
public Xofulltext_cache_line(int line_seq, byte[] line_html) {
|
||||
this.line_seq = line_seq;
|
||||
this.line_html = line_html;
|
||||
}
|
||||
public int Line_seq() {return line_seq;} private final int line_seq;
|
||||
public byte[] Line_html() {return line_html;} private final byte[] line_html;
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
public class Xofulltext_cache_mgr {
|
||||
private final Ordered_hash qry_hash = Ordered_hash_.New();
|
||||
public int Next_qry_id() {return next_qry_id++;} private int next_qry_id;
|
||||
public void Clear() {
|
||||
qry_hash.Clear();
|
||||
}
|
||||
public void Add(int query_id, byte[] query, byte[] wiki_bry, int page_seq, int page_id, int line_seq, byte[] line_html) {
|
||||
// get qry
|
||||
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(query_id);
|
||||
if (qry == null) {
|
||||
qry = new Xofulltext_cache_qry(query_id, query);
|
||||
qry_hash.Add(query_id, qry);
|
||||
}
|
||||
|
||||
// get wiki
|
||||
Xofulltext_cache_wiki wiki = (Xofulltext_cache_wiki)qry.Wikis().Get_by(wiki_bry);
|
||||
if (wiki == null) {
|
||||
wiki = new Xofulltext_cache_wiki(wiki_bry);
|
||||
qry.Wikis().Add(wiki_bry, wiki);
|
||||
}
|
||||
|
||||
// get page
|
||||
Xofulltext_cache_page page = (Xofulltext_cache_page)wiki.Pages().Get_by(page_id);
|
||||
if (page == null) {
|
||||
page = new Xofulltext_cache_page(page_id, page_seq);
|
||||
wiki.Pages().Add(page_id, page);
|
||||
}
|
||||
|
||||
// add line
|
||||
Xofulltext_cache_line line = new Xofulltext_cache_line(line_seq, line_html);
|
||||
page.Lines().Add(line);
|
||||
}
|
||||
public Object Get_pages_rng(int qry_id, byte[] wiki, int page_seq_bgn, int page_seq_end) {
|
||||
return null;
|
||||
}
|
||||
public Xofulltext_cache_line[] Get_lines_rest(int qry_id, byte[] wiki_bry, int page_id) {
|
||||
// get page
|
||||
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(qry_id);
|
||||
Xofulltext_cache_wiki wiki = (Xofulltext_cache_wiki)qry.Wikis().Get_by(wiki_bry);
|
||||
Xofulltext_cache_page page = (Xofulltext_cache_page)wiki.Pages().Get_by(page_id);
|
||||
|
||||
// loop lines from 1 to n; note "1" b/c results will always show at least 1st line
|
||||
List_adp list = List_adp_.New();
|
||||
int lines_len = page.Lines().Len();
|
||||
for (int i = 1; i < lines_len; i++) {
|
||||
Xofulltext_cache_line line = (Xofulltext_cache_line)page.Lines().Get_at(i);
|
||||
list.Add(line);
|
||||
}
|
||||
return (Xofulltext_cache_line[])list.To_ary_and_clear(Xofulltext_cache_line.class);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
public class Xofulltext_cache_page {
|
||||
public Xofulltext_cache_page(int page_id, int page_seq) {
|
||||
this.page_id = page_id;
|
||||
this.page_seq = page_seq;
|
||||
}
|
||||
public int Page_id() {return page_id;} private final int page_id;
|
||||
public int Page_seq() {return page_seq;} private final int page_seq;
|
||||
public List_adp Lines() {return lines;} private final List_adp lines = List_adp_.New();
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
public class Xofulltext_cache_qry {
|
||||
public Xofulltext_cache_qry(int id, byte[] qry) {
|
||||
this.id = id;
|
||||
this.qry = qry;
|
||||
}
|
||||
public int Id() {return id;} private final int id;
|
||||
public byte[] Qry() {return qry;} private final byte[] qry;
|
||||
public Hash_adp_bry Wikis() {return wikis;} private final Hash_adp_bry wikis = Hash_adp_bry.cs();
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
public class Xofulltext_cache_wiki {
|
||||
public Xofulltext_cache_wiki(byte[] wiki) {
|
||||
this.wiki = wiki;
|
||||
}
|
||||
public byte[] Wiki() {return wiki;} private final byte[] wiki;
|
||||
public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New();
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
|
||||
public interface Xofulltext_searcher {
|
||||
void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_searcher_args args);
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
import gplx.langs.jsons.*;
|
||||
public class Xofulltext_searcher_args {
|
||||
public boolean case_match;
|
||||
public boolean auto_wildcard_bgn;
|
||||
public boolean auto_wildcard_end;
|
||||
public boolean expand_matches_section;
|
||||
public boolean show_all_matches;
|
||||
public int max_pages_per_wiki;
|
||||
public byte[] wikis;
|
||||
public byte[] query;
|
||||
public String namespaces;
|
||||
public int query_id;
|
||||
public static Xofulltext_searcher_args New_by_json(Json_nde args) {
|
||||
Xofulltext_searcher_args rv = new Xofulltext_searcher_args();
|
||||
rv.case_match = args.Get_as_bool_or("case_match", false);
|
||||
rv.auto_wildcard_bgn = args.Get_as_bool_or("auto_wildcard_bgn", false);
|
||||
rv.auto_wildcard_end = args.Get_as_bool_or("auto_wildcard_end", false);
|
||||
rv.expand_matches_section = args.Get_as_bool_or("expand_matches_section", false);
|
||||
rv.show_all_matches = args.Get_as_bool_or("show_all_matches", false);
|
||||
rv.max_pages_per_wiki = args.Get_as_int_or("max_pages_per_wiki", 25);
|
||||
rv.wikis = args.Get_as_bry("wikis");
|
||||
rv.query = args.Get_as_bry("query");
|
||||
rv.namespaces = args.Get_as_str("namespaces");
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.guis.cbks.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
|
||||
public class Xofulltext_searcher__brute implements Xofulltext_searcher {
|
||||
private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr();
|
||||
private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval();
|
||||
private final Xofulltext_finder_cbk__highlight cbk_highlight;
|
||||
public Xofulltext_searcher__brute(Xoa_app app, Xog_cbk_trg cbk_trg, Xofulltext_cache_mgr cache_mgr) {
|
||||
this.cbk_highlight = new Xofulltext_finder_cbk__highlight(app, cbk_trg, cache_mgr);
|
||||
}
|
||||
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_searcher_args args) {
|
||||
// get pages from db
|
||||
Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn();
|
||||
Db_rdr page_rdr = page_conn.Stmt_sql("SELECT * FROM page WHERE page_namespace IN (0) ORDER BY page_score DESC").Exec_select__rls_auto();
|
||||
|
||||
// init finder
|
||||
finder.Init(args.query, args.case_match, args.auto_wildcard_bgn, args.auto_wildcard_end, Byte_ascii.Star, Byte_ascii.Dash);
|
||||
|
||||
// loop
|
||||
byte[] wiki_domain = wiki.Domain_bry();
|
||||
int found = 0;
|
||||
int searched = 0;
|
||||
try {
|
||||
while (page_rdr.Move_next()) {
|
||||
// read data from reader
|
||||
int page_id = page_rdr.Read_int("page_id");
|
||||
int text_db_id = page_rdr.Read_int("page_text_db_id");
|
||||
byte[] text_mcase = wiki.Data__core_mgr().Dbs__get_by_id_or_fail(text_db_id).Tbl__text().Select(page_id);
|
||||
int ns_id = page_rdr.Read_int("page_namespace");
|
||||
byte[] ttl_bry = page_rdr.Read_bry_by_str("page_title");
|
||||
Xoa_ttl ttl = wiki.Ttl_parse(ns_id, ttl_bry);
|
||||
|
||||
// eval query
|
||||
cbk_eval.Init(ttl.Full_db());
|
||||
finder.Match(text_mcase, 0, text_mcase.length, cbk_eval);
|
||||
searched++;
|
||||
|
||||
// check if page matches query
|
||||
if (cbk_eval.found) {
|
||||
++found;
|
||||
|
||||
// update pages found
|
||||
ui.Send_wiki_update(wiki_domain, found, searched);
|
||||
|
||||
// do highlight
|
||||
if (found <= args.max_pages_per_wiki) {
|
||||
cbk_highlight.Init(args.query, args.query_id, wiki, page_id, ttl.Full_db(), args.show_all_matches);
|
||||
ui.Send_page_add(new Xofulltext_searcher_page
|
||||
( args.query_id
|
||||
, String_.new_u8(wiki_domain)
|
||||
, page_id
|
||||
, String_.new_u8(ttl.Full_db())
|
||||
, args.expand_matches_section
|
||||
));
|
||||
finder.Match(text_mcase, 0, text_mcase.length, cbk_highlight);
|
||||
}
|
||||
}
|
||||
|
||||
// update update pages found every 100 pages
|
||||
if (searched % 100 == 0) {
|
||||
ui.Send_wiki_update(wiki_domain, found, searched);
|
||||
}
|
||||
}
|
||||
}
|
||||
finally {
|
||||
page_rdr.Rls();
|
||||
}
|
||||
|
||||
// update one last time for final searched
|
||||
ui.Send_wiki_update(wiki_domain, found, searched);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
|
||||
import gplx.xowa.guis.cbks.*;
|
||||
public interface Xofulltext_finder_cbk {
|
||||
byte[] Page_ttl();
|
||||
void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xofulltext_word_node term);
|
||||
void Process_page_done(byte[] src, Xofulltext_word_node tree_root);
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
|
||||
public class Xofulltext_finder_cbk__eval implements Xofulltext_finder_cbk {
|
||||
public boolean found;
|
||||
public byte[] Page_ttl() {return page_ttl;} private byte[] page_ttl;
|
||||
public void Init(byte[] page_ttl) {
|
||||
this.found = false;
|
||||
this.page_ttl = page_ttl;
|
||||
}
|
||||
public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xofulltext_word_node term) {
|
||||
term.found = true;
|
||||
}
|
||||
public void Process_page_done(byte[] src, Xofulltext_word_node root) {
|
||||
this.found = root.Eval();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,122 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xofulltext_finder_cbk__eval__tst {
|
||||
private final Xofulltext_finder_cbk__eval__fxt fxt = new Xofulltext_finder_cbk__eval__fxt();
|
||||
@Test public void Exact() {
|
||||
fxt.Init__search("a");
|
||||
// y: basic match
|
||||
fxt.Test__eval_y("a");
|
||||
// n: no match
|
||||
fxt.Test__eval_n("z");
|
||||
// n: wildcard_bgn not enabled
|
||||
fxt.Test__eval_n("az");
|
||||
}
|
||||
@Test public void Or() {
|
||||
fxt.Init__search("a, c");
|
||||
// y: lone char
|
||||
fxt.Test__eval_y("a" , "c");
|
||||
// y: one char
|
||||
fxt.Test__eval_y("a b", "b c");
|
||||
// y: both chars
|
||||
fxt.Test__eval_y("a c", "a b c");
|
||||
// n: no chars
|
||||
fxt.Test__eval_n("b");
|
||||
}
|
||||
@Test public void And() {
|
||||
fxt.Init__search("a + c");
|
||||
// y: both chars
|
||||
fxt.Test__eval_y("a c", "a b c");
|
||||
// n: one char only
|
||||
fxt.Test__eval_n("a", "c", "a b", "b c");
|
||||
}
|
||||
@Test public void And__shorthand() {
|
||||
fxt.Init__search("a c");
|
||||
// y: both chars
|
||||
fxt.Test__eval_y("a b c");
|
||||
// n: one char only
|
||||
fxt.Test__eval_n("a", "c");
|
||||
}
|
||||
@Test public void Not() {
|
||||
fxt.Init__search("-a");
|
||||
// y: no chars
|
||||
fxt.Test__eval_y("b");
|
||||
// n: char exists
|
||||
fxt.Test__eval_n("a");
|
||||
}
|
||||
@Test public void Trim_end() {
|
||||
fxt.Init__search("a");
|
||||
// y: single
|
||||
fxt.Test__eval_y("a!");
|
||||
// y: many
|
||||
fxt.Test__eval_y("a!!!");
|
||||
}
|
||||
@Test public void Trim_bgn() {
|
||||
fxt.Init__search("a");
|
||||
// y: single
|
||||
fxt.Test__eval_y("!a");
|
||||
// y: many
|
||||
fxt.Test__eval_y("!!!a");
|
||||
}
|
||||
@Test public void Trim_both() {
|
||||
fxt.Init__search("a");
|
||||
// y: single
|
||||
fxt.Test__eval_y("'a'");
|
||||
// y: many
|
||||
fxt.Test__eval_y("'''a'''");
|
||||
}
|
||||
@Test public void Slash() {
|
||||
fxt.Init__search("a");
|
||||
// y: slash before, after
|
||||
fxt.Test__eval_y("a/b/c", "b/a/c", "b/c/a");
|
||||
}
|
||||
@Test public void Brack() {
|
||||
fxt.Init__search("a");
|
||||
// y
|
||||
fxt.Test__eval_y("[[a]]");
|
||||
}
|
||||
// .
|
||||
// ...
|
||||
// -
|
||||
// a'b
|
||||
// https://site/page
|
||||
// ()
|
||||
// []
|
||||
// <>
|
||||
}
|
||||
class Xofulltext_finder_cbk__eval__fxt {
|
||||
private boolean case_match = false;
|
||||
private boolean auto_wildcard_bgn = false;
|
||||
private boolean auto_wildcard_end = false;
|
||||
private byte wildcard_byte = Byte_ascii.Star;
|
||||
private byte not_byte = Byte_ascii.Dash;
|
||||
private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr();
|
||||
private final Xofulltext_finder_cbk__eval cbk = new Xofulltext_finder_cbk__eval();
|
||||
public void Init__search(String query) {
|
||||
finder.Init(Bry_.new_u8(query), case_match, auto_wildcard_bgn, auto_wildcard_end, wildcard_byte, not_byte);
|
||||
}
|
||||
public void Test__eval_y(String... texts) {Test__eval(Bool_.Y, texts);}
|
||||
public void Test__eval_n(String... texts) {Test__eval(Bool_.N, texts);}
|
||||
public void Test__eval(boolean expd, String... texts) {
|
||||
for (String text : texts) {
|
||||
byte[] text_bry = Bry_.new_u8(text);
|
||||
cbk.found = false;
|
||||
finder.Match(text_bry, 0, text_bry.length, cbk);
|
||||
Gftest.Eq__bool(expd, cbk.found, "query={0} text={1}", finder.Query(), text);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
|
||||
import gplx.xowa.guis.cbks.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
|
||||
public class Xofulltext_finder_cbk__highlight implements Xofulltext_finder_cbk {
|
||||
private final Xog_cbk_trg cbk_trg;
|
||||
private final Xoa_app app;
|
||||
private final Xofulltext_cache_mgr cache_mgr;
|
||||
private Xow_wiki wiki;
|
||||
private byte[] qry;
|
||||
private int qry_id;
|
||||
private int page_id;
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
public int found;
|
||||
private boolean show_all_matches;
|
||||
public Xofulltext_finder_cbk__highlight(Xoa_app app, Xog_cbk_trg cbk_trg, Xofulltext_cache_mgr cache_mgr) {
|
||||
this.app = app;
|
||||
this.cbk_trg = cbk_trg;
|
||||
this.cache_mgr = cache_mgr;
|
||||
}
|
||||
public byte[] Page_ttl() {return page_ttl;} private byte[] page_ttl;
|
||||
public void Init(byte[] qry, int qry_id, Xow_wiki wiki, int page_id, byte[] page_ttl, boolean show_all_matches) {
|
||||
this.qry = qry;
|
||||
this.qry_id = qry_id;
|
||||
this.wiki = wiki;
|
||||
this.page_id = page_id;
|
||||
this.page_ttl= page_ttl;
|
||||
this.show_all_matches = show_all_matches;
|
||||
found = 0;
|
||||
}
|
||||
public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xofulltext_word_node term) {
|
||||
// if (found < max_snips_per_page) {
|
||||
// get snip bounds by finding flanking 50 chars and then expanding to word-bounds
|
||||
int snip_bgn = hook_bgn - 50;
|
||||
if (snip_bgn < 0)
|
||||
snip_bgn = 0;
|
||||
else {
|
||||
snip_bgn = Bry_find_.Find_bwd_ws(src, snip_bgn, 0) + 1;
|
||||
}
|
||||
int snip_end = hook_end + 50;
|
||||
if (snip_end >= src.length)
|
||||
snip_end = src.length;
|
||||
else {
|
||||
snip_end = Bry_find_.Find_fwd_until_ws(src, snip_end, src.length);
|
||||
if (snip_end == Bry_find_.Not_found) { // when snip_end == src.length
|
||||
snip_end = src.length;
|
||||
}
|
||||
}
|
||||
|
||||
// build snip
|
||||
Add_snip(tmp_bfr, src, snip_bgn, hook_bgn);
|
||||
tmp_bfr.Add_str_a7("<span class='snip_highlight'>");
|
||||
Add_snip(tmp_bfr, src, hook_bgn, hook_end);
|
||||
tmp_bfr.Add_str_a7("</span>");
|
||||
Add_snip(tmp_bfr, src, hook_end, snip_end);
|
||||
|
||||
// send notification
|
||||
byte[] line_html = tmp_bfr.To_bry_and_clear();
|
||||
if (found == 0 || show_all_matches) {
|
||||
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||
.Add_bry("wiki", wiki.Domain_bry())
|
||||
.Add_int("page_id", page_id)
|
||||
.Add_int("line", found + 1)
|
||||
.Add_bry("html", line_html)
|
||||
);
|
||||
}
|
||||
cache_mgr.Add(qry_id, qry, wiki.Domain_bry(), -1, page_id, found, line_html);
|
||||
// }
|
||||
found++;
|
||||
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_searcher.results__page__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||
.Add_bry("wiki", wiki.Domain_bry())
|
||||
.Add_int("page_id", page_id)
|
||||
.Add_int("found", found)
|
||||
.Add_bool("show_all_matches", show_all_matches)
|
||||
);
|
||||
}
|
||||
private static final byte[] Angle_bgn_escaped = Bry_.new_a7("<");
|
||||
private void Add_snip(Bry_bfr bfr, byte[] src, int bgn, int end) {
|
||||
for (int i = bgn; i < end; i++) {
|
||||
byte b = src[i];
|
||||
switch (b) {
|
||||
case Byte_ascii.Angle_bgn:
|
||||
bfr.Add(Angle_bgn_escaped);
|
||||
break;
|
||||
case Byte_ascii.Nl:
|
||||
bfr.Add(gplx.langs.htmls.Gfh_tag_.Br_inl);
|
||||
break;
|
||||
default:
|
||||
bfr.Add_byte(b);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Process_page_done(byte[] src, Xofulltext_word_node tree_root) {}
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
|
||||
import gplx.xowa.guis.cbks.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
|
||||
public class Xofulltext_finder_mgr {
|
||||
private Btrie_slim_mgr hook_trie;
|
||||
private Xofulltext_word_node tree_root;
|
||||
private final Srch_crt_parser parser = new Srch_crt_parser(Srch_crt_scanner_syms.Dflt);
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final Xofulltext_word_lang lang = new Xofulltext_word_lang();
|
||||
private final Xofulltext_word_bounds word_bounds = new Xofulltext_word_bounds();
|
||||
|
||||
public byte[] Query() {return query;} private byte[] query;
|
||||
public void Init(byte[] query, boolean case_match, boolean auto_wildcard_bgn, boolean auto_wildcard_end, byte wildchar_byte, byte not_byte) {
|
||||
this.query = query;
|
||||
// create a new hook_trie based on case_match
|
||||
this.hook_trie = case_match ? Btrie_slim_mgr.cs() : Btrie_slim_mgr.ci_u8();
|
||||
|
||||
// create a new tree_root for eval
|
||||
this.tree_root = Xofulltext_word_node_.New_root(parser.Parse_or_invalid(query).Root, hook_trie, auto_wildcard_bgn, auto_wildcard_end, wildchar_byte, not_byte);
|
||||
}
|
||||
public void Match(byte[] src, int src_bgn, int src_end, Xofulltext_finder_cbk cbk) {
|
||||
// init and clear
|
||||
int cur = 0;
|
||||
tree_root.Clear();
|
||||
|
||||
// scan through text one-byte at a time
|
||||
// NOTE: skipping ahead to word-start instead of going byte-by-byte may seem more performant, but will still need to do substring analysis b/c of wildcards and punctuation; EX: "abc" and " 'abc' "; "*abc" and " xyzabc. "
|
||||
while (cur <= src_end) {
|
||||
// check each byte against hook_trie
|
||||
Object hook_obj = hook_trie.Match_at(trv, src, cur, src_end);
|
||||
|
||||
// current byte matches no hooks; go to next byte
|
||||
if (hook_obj == null) {
|
||||
cur++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// current byte matches a hook; get hook and hook_end
|
||||
Xofulltext_word_node hook = (Xofulltext_word_node)hook_obj;
|
||||
int hook_bgn = cur;
|
||||
int hook_end = cur + hook.word_hook.length;
|
||||
|
||||
try {
|
||||
// get word_bounds
|
||||
lang.Get_word_bounds(word_bounds, trv, src, src_end, hook_bgn, hook_end);
|
||||
int word_bgn = word_bounds.word_bgn;
|
||||
int word_end = word_bounds.word_end;
|
||||
|
||||
// check if current word matches criteria-word
|
||||
if (hook.Match_word(lang, src, hook_bgn, hook_end, word_bgn, word_end)) {
|
||||
cbk.Process_item_found(src, hook_bgn, hook_end, word_bgn, word_end, hook);
|
||||
}
|
||||
|
||||
// update position to word_end
|
||||
cur = word_end;
|
||||
} catch (Exception e) {
|
||||
cur = hook_end;
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "fatal error in match; page=~{0} hook=~{1} src=~{2}", cbk.Page_ttl(), hook.word_orig, Err_.Message_gplx_log(e));
|
||||
}
|
||||
}
|
||||
|
||||
// mark page done
|
||||
cbk.Process_page_done(src, tree_root);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
|
||||
public class Xofulltext_word_bounds {
|
||||
public int word_bgn;
|
||||
public int word_end;
|
||||
public void Init(int word_bgn, int word_end) {
|
||||
this.word_bgn = word_bgn;
|
||||
this.word_end = word_end;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,119 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.core.intls.*;
|
||||
public class Xofulltext_word_lang {
|
||||
private final Btrie_slim_mgr ws_bgn = Btrie_slim_mgr.cs()
|
||||
.Add_many_str("\t", "\n", "\r", " ", "/", "(", ")", "[", "]", "<", ">");
|
||||
private final Btrie_slim_mgr ws_end;
|
||||
private final Btrie_slim_mgr punct_bgn = Btrie_slim_mgr.cs()
|
||||
.Add_many_str(".", ",", "?", "!", ":", ";", "'", "\"", "-")
|
||||
;
|
||||
private final Btrie_slim_mgr punct_end;
|
||||
public Xofulltext_word_lang() {
|
||||
this.ws_end = ws_bgn;
|
||||
this.punct_end = punct_bgn;
|
||||
}
|
||||
public void Get_word_bounds(Xofulltext_word_bounds word_bounds, Btrie_rv trv, byte[] src, int src_end, int hook_bgn, int hook_end) {
|
||||
int tmp_pos = -1;
|
||||
Object tmp_obj = null;
|
||||
|
||||
// find word_bgn
|
||||
int word_bgn = hook_bgn;
|
||||
tmp_pos = word_bgn;
|
||||
while (true) {
|
||||
// stop if BOS
|
||||
if (tmp_pos == 0) break;
|
||||
|
||||
// move back one char
|
||||
tmp_pos = Utf8_.Get_prv_char_pos0(src, tmp_pos);
|
||||
|
||||
// check if char is ws
|
||||
tmp_obj = ws_bgn.Match_at(trv, src, tmp_pos, hook_end);
|
||||
|
||||
// char is ws -> stop
|
||||
if (tmp_obj != null) break;
|
||||
|
||||
// char is not ws -> update word_end
|
||||
word_bgn = tmp_pos;
|
||||
}
|
||||
|
||||
// find word_end
|
||||
int word_end = hook_end;
|
||||
tmp_pos = word_end;
|
||||
while (true) {
|
||||
// stop if passed EOS
|
||||
if (tmp_pos >= src_end) break;
|
||||
|
||||
// check if char is ws
|
||||
tmp_obj = ws_end.Match_at(trv, src, tmp_pos, src_end);
|
||||
|
||||
// stop if ws
|
||||
if (tmp_obj != null) break;
|
||||
|
||||
// increment before
|
||||
tmp_pos++;
|
||||
|
||||
// update word_end
|
||||
word_end = tmp_pos;
|
||||
}
|
||||
|
||||
// trim punct at bgn; EX: "'abc" -> "abc"
|
||||
if (word_bgn < hook_bgn) {
|
||||
tmp_pos = word_bgn;
|
||||
while (true) {
|
||||
// stop if passed hook-end
|
||||
if (tmp_pos >= hook_bgn) break;
|
||||
|
||||
// check if char is punct
|
||||
tmp_obj = punct_bgn.Match_at(trv, src, tmp_pos, word_end);
|
||||
|
||||
// stop if not a punct
|
||||
if (tmp_obj == null) break;
|
||||
|
||||
// increment before
|
||||
tmp_pos++;
|
||||
|
||||
// update word_end
|
||||
word_bgn = tmp_pos;
|
||||
}
|
||||
}
|
||||
|
||||
// trim punct at end; EX: "abc." -> "abc"
|
||||
if (word_end > hook_end) {
|
||||
tmp_pos = word_end;
|
||||
while (true) {
|
||||
// scan bwd one char
|
||||
tmp_pos = Utf8_.Get_prv_char_pos0(src, tmp_pos);
|
||||
|
||||
// stop if passed hook-end
|
||||
if (tmp_pos < hook_end) break;
|
||||
|
||||
// check if char is punct
|
||||
tmp_obj = punct_end.Match_at(trv, src, tmp_pos, word_end);
|
||||
|
||||
// stop if not a punct
|
||||
if (tmp_obj == null) break;
|
||||
|
||||
// update word_end
|
||||
word_end = tmp_pos;
|
||||
}
|
||||
}
|
||||
|
||||
word_bounds.Init(word_bgn, word_end);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
|
||||
public class Xofulltext_word_node {
|
||||
public int tid;
|
||||
public Xofulltext_word_node[] subs;
|
||||
public byte[] word_orig;
|
||||
public byte[] word_hook;
|
||||
public boolean wildcard_at_bgn;
|
||||
public boolean wildcard_at_end;
|
||||
public boolean found;
|
||||
|
||||
public boolean Match_word(Xofulltext_word_lang ctx, byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end) {
|
||||
// if no wildcard at bgn, hook_bgn must match word_bgn
|
||||
if ( !wildcard_at_bgn
|
||||
&& hook_bgn != word_bgn)
|
||||
return false;
|
||||
|
||||
// if no wildcard at end, hook_end must match word_end
|
||||
if ( !wildcard_at_end
|
||||
&& hook_end != word_end)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
public void Clear() {
|
||||
found = false;
|
||||
for (Xofulltext_word_node sub : subs)
|
||||
sub.Clear();
|
||||
}
|
||||
public boolean Eval() {
|
||||
switch (tid) {
|
||||
case Srch_crt_itm.Tid__and: {
|
||||
for (Xofulltext_word_node sub : subs)
|
||||
if (!sub.Eval())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
case Srch_crt_itm.Tid__or: {
|
||||
for (Xofulltext_word_node sub : subs)
|
||||
if (sub.Eval())
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
case Srch_crt_itm.Tid__word:
|
||||
case Srch_crt_itm.Tid__word_quote:
|
||||
return found;
|
||||
case Srch_crt_itm.Tid__not:
|
||||
return !subs[0].Eval();
|
||||
case Srch_crt_itm.Tid__invalid: return false; // should not happen
|
||||
default: throw Err_.new_unhandled_default(tid);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
|
||||
public class Xofulltext_word_node_ {
|
||||
public static Xofulltext_word_node New_root(Srch_crt_itm src, Btrie_slim_mgr word_trie, boolean auto_wildcard_bgn, boolean auto_wildcard_end, byte wildchar_byte, byte not_byte) {
|
||||
Xofulltext_word_node trg = new Xofulltext_word_node();
|
||||
trg.tid = src.Tid;
|
||||
|
||||
// set word-related props
|
||||
switch (trg.tid) {
|
||||
case Srch_crt_itm.Tid__word:
|
||||
case Srch_crt_itm.Tid__word_quote:
|
||||
// get word_orig; EX: "abc*"
|
||||
byte[] word_orig = src.Raw;
|
||||
int word_orig_len = word_orig.length;
|
||||
|
||||
// init hook_bgn / hook_end
|
||||
int hook_bgn = 0;
|
||||
int hook_end = word_orig_len;
|
||||
|
||||
// handle wildcard at bgn; EX: "*a"
|
||||
boolean wildcard_at_bgn = auto_wildcard_bgn;
|
||||
if (word_orig_len > hook_bgn + 1 && word_orig[hook_bgn] == wildchar_byte) {
|
||||
wildcard_at_bgn = true;
|
||||
hook_bgn++;
|
||||
}
|
||||
|
||||
// handle wildcard at end; EX: "a*"
|
||||
boolean wildcard_at_end = auto_wildcard_end;
|
||||
if (word_orig_len > hook_bgn + 1 && word_orig[hook_end - 1] == wildchar_byte) {
|
||||
wildcard_at_end = true;
|
||||
hook_end--;
|
||||
}
|
||||
|
||||
// get hook
|
||||
byte[] word_hook = wildcard_at_bgn || wildcard_at_end ? Bry_.Mid(word_orig, hook_bgn, hook_end) : word_orig;
|
||||
|
||||
// assign to trg
|
||||
trg.word_orig = word_orig;
|
||||
trg.word_hook = word_hook;
|
||||
trg.wildcard_at_bgn = wildcard_at_bgn;
|
||||
trg.wildcard_at_end = wildcard_at_end;
|
||||
|
||||
// add to trie
|
||||
if (word_trie.Match_exact(word_hook) == null) { // don't add if exists
|
||||
word_trie.Add_obj(word_hook, trg);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// set subs
|
||||
Srch_crt_itm[] src_subs = src.Subs;
|
||||
Xofulltext_word_node[] trg_subs = new Xofulltext_word_node[src_subs.length];
|
||||
trg.subs = trg_subs;
|
||||
int len = src_subs.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
trg.subs[i] = New_root(src_subs[i], word_trie, auto_wildcard_bgn, auto_wildcard_end, wildchar_byte, not_byte);
|
||||
}
|
||||
|
||||
return trg;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.gflucenes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
|
||||
import gplx.gflucene.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
|
||||
public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
|
||||
private final Gflucene_searcher searcher = new Gflucene_searcher();
|
||||
public void Search(Xofulltext_searcher_ui cbk, Xow_wiki wiki, Xofulltext_searcher_args args) {
|
||||
// create list
|
||||
List_adp list = List_adp_.New();
|
||||
|
||||
// init searcher with wiki
|
||||
searcher.Init(wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search").Xto_api());
|
||||
|
||||
// exec search
|
||||
searcher.Exec(list, new Gflucene_searcher_data(String_.new_u8(args.query), args.max_pages_per_wiki));
|
||||
|
||||
// term
|
||||
searcher.Term();
|
||||
|
||||
// loop list
|
||||
int len = list.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Gflucene_index_data found = (Gflucene_index_data)list.Get_at(i);
|
||||
|
||||
// call page found
|
||||
Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.query_id, wiki.Domain_str(), found.page_id, found.title, false);
|
||||
cbk.Send_page_add(page);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
|
||||
public class Xofulltext_searcher_line {
|
||||
public Xofulltext_searcher_line(String wiki_domain, int page_id, int found_idx, String excerpt) {
|
||||
this.wiki_domain = wiki_domain;
|
||||
this.page_id = page_id;
|
||||
this.found_idx = found_idx;
|
||||
this.excerpt = excerpt;
|
||||
}
|
||||
public String Wiki_domain() {return wiki_domain;} private final String wiki_domain;
|
||||
public int Page_id() {return page_id;} private final int page_id;
|
||||
public int Found_idx() {return found_idx;} private final int found_idx;
|
||||
public String Excerpt() {return excerpt;} private final String excerpt;
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
|
||||
public class Xofulltext_searcher_page {
|
||||
public Xofulltext_searcher_page(int query_id, String wiki_domain, int page_id, String page_title, boolean expand_matches_section) {
|
||||
this.query_id = query_id;
|
||||
this.wiki_domain = wiki_domain;
|
||||
this.page_id = page_id;
|
||||
this.page_title = page_title;
|
||||
this.expand_matches_section = expand_matches_section;
|
||||
}
|
||||
public int Query_id() {return query_id;} private final int query_id;
|
||||
public String Wiki_domain() {return wiki_domain;} private final String wiki_domain;
|
||||
public int Page_id() {return page_id;} private final int page_id;
|
||||
public String Page_title() {return page_title;} private final String page_title;
|
||||
public boolean Expand_matches_section() {return expand_matches_section;} private final boolean expand_matches_section;
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
|
||||
public interface Xofulltext_searcher_ui {
|
||||
void Send_wiki_add(byte[] wiki_domain);
|
||||
void Send_wiki_update(byte[] wiki, int found, int searched);
|
||||
void Send_page_add(Xofulltext_searcher_page page);
|
||||
void Send_line_add(Xofulltext_searcher_line line);
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
|
||||
import gplx.xowa.guis.cbks.*;
|
||||
public class Xofulltext_searcher_ui__gui implements Xofulltext_searcher_ui {
|
||||
private final Xog_cbk_mgr cbk_mgr;
|
||||
private final Xog_cbk_trg cbk_trg;
|
||||
public Xofulltext_searcher_ui__gui(Xog_cbk_mgr cbk_mgr, Xog_cbk_trg cbk_trg) {
|
||||
this.cbk_mgr = cbk_mgr;
|
||||
this.cbk_trg = cbk_trg;
|
||||
}
|
||||
public void Send_wiki_add(byte[] wiki_domain) {
|
||||
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||
.Add_bry("wiki", wiki_domain)
|
||||
);
|
||||
}
|
||||
public void Send_wiki_update(byte[] wiki, int found, int searched) {
|
||||
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||
.Add_bry("wiki", wiki)
|
||||
.Add_int("found", found)
|
||||
.Add_int("searched", searched)
|
||||
);
|
||||
}
|
||||
public void Send_page_add(Xofulltext_searcher_page page) {
|
||||
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__page__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||
.Add_int("query_id", page.Query_id())
|
||||
.Add_str("wiki", page.Wiki_domain())
|
||||
.Add_int("page_id", page.Page_id())
|
||||
.Add_str("page_ttl", page.Page_title())
|
||||
.Add_bool("expand_matches_section", page.Expand_matches_section())
|
||||
);
|
||||
}
|
||||
public void Send_line_add(Xofulltext_searcher_line match) {
|
||||
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||
.Add_str("wiki", match.Wiki_domain())
|
||||
.Add_int("page_id", match.Page_id())
|
||||
.Add_int("line", match.Found_idx())
|
||||
.Add_str("html", match.Excerpt())
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
import gplx.langs.mustaches.*;
|
||||
public class Xofulltext_searcher_doc implements Mustache_doc_itm {
|
||||
private final byte[] query;
|
||||
private final boolean case_match, auto_wildcard_bgn, auto_wildcard_end, expand_matches_section, show_all_matches;
|
||||
private final int max_pages_per_wiki;
|
||||
private final String wikis, namespaces;
|
||||
public Xofulltext_searcher_doc
|
||||
( byte[] query, boolean case_match, boolean auto_wildcard_bgn, boolean auto_wildcard_end
|
||||
, boolean expand_matches_section, boolean show_all_matches
|
||||
, int max_pages_per_wiki
|
||||
, String wikis, String namespaces) {
|
||||
this.query = query;
|
||||
this.case_match = case_match;
|
||||
this.auto_wildcard_bgn = auto_wildcard_bgn;
|
||||
this.auto_wildcard_end = auto_wildcard_end;
|
||||
this.expand_matches_section = expand_matches_section;
|
||||
this.show_all_matches = show_all_matches;
|
||||
this.max_pages_per_wiki = max_pages_per_wiki;
|
||||
this.wikis = wikis;
|
||||
this.namespaces = namespaces;
|
||||
}
|
||||
public boolean Mustache__write(String key, Mustache_bfr bfr) {
|
||||
if (String_.Eq(key, "wikis"))
|
||||
bfr.Add_str_u8(wikis);
|
||||
else if (String_.Eq(key, "namespaces"))
|
||||
bfr.Add_str_u8(namespaces);
|
||||
else if (String_.Eq(key, "max_pages_per_wiki"))
|
||||
bfr.Add_int(max_pages_per_wiki);
|
||||
else if (String_.Eq(key, "query"))
|
||||
bfr.Add_bry(query);
|
||||
else
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
public Mustache_doc_itm[] Mustache__subs(String key) {
|
||||
if (String_.Eq(key, "case_match"))
|
||||
return Mustache_doc_itm_.Ary__bool(case_match);
|
||||
else if (String_.Eq(key, "auto_wildcard_bgn"))
|
||||
return Mustache_doc_itm_.Ary__bool(auto_wildcard_bgn);
|
||||
else if (String_.Eq(key, "auto_wildcard_end"))
|
||||
return Mustache_doc_itm_.Ary__bool(auto_wildcard_end);
|
||||
else if (String_.Eq(key, "expand_matches_section"))
|
||||
return Mustache_doc_itm_.Ary__bool(expand_matches_section);
|
||||
else if (String_.Eq(key, "show_all_matches"))
|
||||
return Mustache_doc_itm_.Ary__bool(show_all_matches);
|
||||
return Mustache_doc_itm_.Ary__empty;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
import gplx.xowa.specials.*; import gplx.langs.mustaches.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.wikis.pages.tags.*;
|
||||
import gplx.dbs.*;
|
||||
class Xofulltext_searcher_html extends Xow_special_wtr__base {
|
||||
private final byte[] query;
|
||||
private final boolean case_match, auto_wildcard_bgn, auto_wildcard_end, expand_matches_section, show_all_matches;
|
||||
private final int max_pages_per_wiki;
|
||||
private final String wikis, namespaces;
|
||||
public Xofulltext_searcher_html
|
||||
( byte[] query, boolean case_match, boolean auto_wildcard_bgn, boolean auto_wildcard_end
|
||||
, boolean expand_matches_section, boolean show_all_matches
|
||||
, int max_pages_per_wiki
|
||||
, String wikis, String namespaces) {
|
||||
this.query = query;
|
||||
this.case_match = case_match;
|
||||
this.auto_wildcard_bgn = auto_wildcard_bgn;
|
||||
this.auto_wildcard_end = auto_wildcard_end;
|
||||
this.expand_matches_section = expand_matches_section;
|
||||
this.show_all_matches = show_all_matches;
|
||||
this.max_pages_per_wiki = max_pages_per_wiki;
|
||||
this.wikis = wikis;
|
||||
this.namespaces = namespaces;
|
||||
}
|
||||
@Override protected Io_url Get_addon_dir(Xoa_app app) {return Addon_dir(app);}
|
||||
@Override protected Io_url Get_mustache_fil(Io_url addon_dir) {return addon_dir.GenSubFil_nest("bin", "xofulltext_searcher.template.html");}
|
||||
@Override protected Mustache_doc_itm Bld_mustache_root(Xoa_app app) {
|
||||
return new Xofulltext_searcher_doc
|
||||
( query, case_match, auto_wildcard_bgn, auto_wildcard_end
|
||||
, expand_matches_section, show_all_matches
|
||||
, max_pages_per_wiki, wikis, namespaces);
|
||||
}
|
||||
@Override protected void Bld_tags(Xoa_app app, Io_url addon_dir, Xopage_html_data page_data) {
|
||||
Xopg_tag_mgr head_tags = page_data.Head_tags();
|
||||
Xopg_tag_wtr_.Add__xoelem (head_tags, app.Fsys_mgr().Http_root());
|
||||
|
||||
Xopg_tag_wtr_.Add__xocss (head_tags, app.Fsys_mgr().Http_root());
|
||||
Xopg_tag_wtr_.Add__xohelp (head_tags, app.Fsys_mgr().Http_root());
|
||||
Xopg_tag_wtr_.Add__xolog (head_tags, app.Fsys_mgr().Http_root());
|
||||
Xopg_tag_wtr_.Add__xoajax (head_tags, app.Fsys_mgr().Http_root(), app);
|
||||
Xopg_tag_wtr_.Add__jquery (head_tags, app.Fsys_mgr().Http_root());
|
||||
Xopg_tag_wtr_.Add__xonotify (head_tags, app.Fsys_mgr().Http_root());
|
||||
Xopg_alertify_.Add_tags (head_tags, app.Fsys_mgr().Http_root());
|
||||
|
||||
head_tags.Add(Xopg_tag_itm.New_css_file(addon_dir.GenSubFil_nest("bin", "xofulltext_searcher.css")));
|
||||
head_tags.Add(Xopg_tag_itm.New_js_file(addon_dir.GenSubFil_nest("bin", "xofulltext_searcher.js")));
|
||||
|
||||
page_data.Js_enabled_y_();
|
||||
}
|
||||
public static Io_url Addon_dir(Xoa_app app) {
|
||||
return app.Fsys_mgr().Http_root().GenSubDir_nest("bin", "any", "xowa", "addon", "wiki", "fulltext", "searcher");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
import gplx.xowa.specials.*; import gplx.core.net.qargs.*;
|
||||
import gplx.xowa.addons.apps.cfgs.*;
|
||||
public class Xofulltext_searcher_special implements Xow_special_page {
|
||||
public void Special__gen(Xow_wiki wiki, Xoa_page page, Xoa_url url, Xoa_ttl ttl) {
|
||||
// get qry if any
|
||||
Gfo_qarg_mgr url_args = new Gfo_qarg_mgr().Init(url.Qargs_ary());
|
||||
byte[] query = url_args.Read_bry_or("query", Bry_.Empty);
|
||||
|
||||
// get options and create page
|
||||
Xocfg_mgr cfg_mgr = wiki.App().Cfg();
|
||||
new Xofulltext_searcher_html
|
||||
( query
|
||||
, cfg_mgr.Get_bool_app_or("xowa.addon.search.fulltext.special.case_match", false)
|
||||
, cfg_mgr.Get_bool_app_or("xowa.addon.search.fulltext.special.auto_wildcard_bgn", false)
|
||||
, cfg_mgr.Get_bool_app_or("xowa.addon.search.fulltext.special.auto_wildcard_end", false)
|
||||
, cfg_mgr.Get_bool_app_or("xowa.addon.search.fulltext.special.expand_matches_section", false)
|
||||
, cfg_mgr.Get_bool_app_or("xowa.addon.search.fulltext.special.show_all_matches", false)
|
||||
, cfg_mgr.Get_int_app_or ("xowa.addon.search.fulltext.special.max_pages_per_wiki", 100)
|
||||
, wiki.Domain_str()
|
||||
, cfg_mgr.Get_str_app_or ("xowa.addon.search.fulltext.special.namespaces", "0|4")
|
||||
).Bld_page_by_mustache(wiki.App(), page, this);
|
||||
}
|
||||
Xofulltext_searcher_special(Xow_special_meta special__meta) {this.special__meta = special__meta;}
|
||||
public Xow_special_meta Special__meta() {return special__meta;} private final Xow_special_meta special__meta;
|
||||
public Xow_special_page Special__clone() {return this;}
|
||||
public static final Xow_special_page Prototype = new Xofulltext_searcher_special(Xow_special_meta.New_xo("XowaSearch", "Search"));
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.svcs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
import gplx.langs.jsons.*;
|
||||
import gplx.xowa.htmls.bridges.*;
|
||||
public class Xofulltext_searcher_bridge implements Bridge_cmd_itm {
|
||||
private Xofulltext_searcher_svc svc;
|
||||
public void Init_by_app(Xoa_app app) {
|
||||
this.svc = new Xofulltext_searcher_svc(app);
|
||||
}
|
||||
public String Exec(Json_nde data) {
|
||||
byte proc_id = proc_hash.Get_as_byte_or(data.Get_as_bry_or(Bridge_cmd_mgr.Msg__proc, null), Byte_ascii.Max_7_bit);
|
||||
Json_nde args = data.Get_kv(Bridge_cmd_mgr.Msg__args).Val_as_nde();
|
||||
switch (proc_id) {
|
||||
case Proc__search: svc.Search(args); break;
|
||||
case Proc__get_lines_rest: svc.Get_lines_rest(args); break;
|
||||
default: throw Err_.new_unhandled_default(proc_id);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
private static final byte Proc__search = 0, Proc__get_lines_rest = 1;
|
||||
private static final Hash_adp_bry proc_hash = Hash_adp_bry.cs()
|
||||
.Add_str_byte("search" , Proc__search)
|
||||
.Add_str_byte("get_lines_rest" , Proc__get_lines_rest)
|
||||
;
|
||||
|
||||
public byte[] Key() {return BRIDGE_KEY;} public static final byte[] BRIDGE_KEY = Bry_.new_a7("xowa.wiki.fulltext.searcher");
|
||||
public static final Xofulltext_searcher_bridge Prototype = new Xofulltext_searcher_bridge(); Xofulltext_searcher_bridge() {}
|
||||
}
|
||||
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.fulltexts.searchers.svcs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.jsons.*;
|
||||
import gplx.dbs.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.guis.cbks.*;
|
||||
import gplx.xowa.addons.apps.cfgs.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.specials.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
|
||||
import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
|
||||
import gplx.xowa.addons.wikis.searchs.searchers.crts.visitors.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.gflucenes.*;
|
||||
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
|
||||
class Xofulltext_searcher_svc implements Gfo_invk {
|
||||
private final Xoa_app app;
|
||||
private final Xog_cbk_trg cbk_trg = Xog_cbk_trg.New(Xofulltext_searcher_special.Prototype.Special__meta().Ttl_bry());
|
||||
private final Xofulltext_cache_mgr cache_mgr = new Xofulltext_cache_mgr();
|
||||
private final Xofulltext_searcher_ui searcher_cbk;
|
||||
public Xofulltext_searcher_svc(Xoa_app app) {
|
||||
this.app = app;
|
||||
this.searcher_cbk = new Xofulltext_searcher_ui__gui(app.Gui__cbk_mgr(), cbk_trg);
|
||||
}
|
||||
public void Search(Json_nde args) {
|
||||
// for now, always clear cache; "get_lines_rest" will only work for latest search
|
||||
cache_mgr.Clear();
|
||||
|
||||
// get search_args
|
||||
Xofulltext_searcher_args search_args = Xofulltext_searcher_args.New_by_json(args);
|
||||
search_args.query_id = cache_mgr.Next_qry_id();
|
||||
|
||||
// autosave any changes if enabled
|
||||
Xocfg_mgr cfg_mgr = app.Cfg();
|
||||
if (cfg_mgr.Get_bool_app_or("xowa.addon.search.fulltext.options.autosave_enabled", true)) {
|
||||
cfg_mgr.Set_bool_app("xowa.addon.search.fulltext.special.case_match", search_args.case_match);
|
||||
cfg_mgr.Set_bool_app("xowa.addon.search.fulltext.special.auto_wildcard_bgn", search_args.auto_wildcard_bgn);
|
||||
cfg_mgr.Set_bool_app("xowa.addon.search.fulltext.special.auto_wildcard_end", search_args.auto_wildcard_end);
|
||||
cfg_mgr.Set_bool_app("xowa.addon.search.fulltext.special.expand_matches_section", search_args.expand_matches_section);
|
||||
cfg_mgr.Set_bool_app("xowa.addon.search.fulltext.special.show_all_matches", search_args.show_all_matches);
|
||||
cfg_mgr.Get_int_app_or ("xowa.addon.search.fulltext.special.max_pages_per_wiki", search_args.max_pages_per_wiki);
|
||||
cfg_mgr.Get_str_app_or ("xowa.addon.search.fulltext.special.namespaces", search_args.namespaces);
|
||||
}
|
||||
|
||||
// launch thread
|
||||
gplx.core.threads.Thread_adp_.Start_by_val("search", Cancelable_.Never, this, Invk__search, search_args);
|
||||
}
|
||||
private void Search(Xofulltext_searcher_args args) {
|
||||
try {
|
||||
// loop wikis
|
||||
byte[][] wiki_domains = Bry_split_.Split(args.wikis, Byte_ascii.Pipe_bry);
|
||||
for (byte[] wiki_domain : wiki_domains) {
|
||||
// get wiki and notify
|
||||
Xow_wiki wiki = app.Wiki_mgri().Get_by_or_make_init_y(wiki_domain);
|
||||
searcher_cbk.Send_wiki_add(wiki_domain);
|
||||
|
||||
// get searcher and search
|
||||
Xofulltext_searcher searcher = Get_searcher(wiki);
|
||||
searcher.Search(searcher_cbk, wiki, args);
|
||||
}
|
||||
} catch (Exception exc) {
|
||||
if (app.Tid_is_edit())
|
||||
((Xoae_app)app).Gui_mgr().Kit().Ask_ok("", "", Err_.Message_gplx_full(exc));
|
||||
}
|
||||
}
|
||||
public void Get_lines_rest(Json_nde args) {
|
||||
Get_lines_rest(args.Get_as_int("qry_id"), args.Get_as_bry("wiki"), args.Get_as_int("page_id"));
|
||||
}
|
||||
private void Get_lines_rest(int qry_id, byte[] wiki_bry, int page_id) {
|
||||
Xofulltext_cache_line[] lines = cache_mgr.Get_lines_rest(qry_id, wiki_bry, page_id);
|
||||
for (Xofulltext_cache_line line : lines) {
|
||||
Xofulltext_searcher_line match = new Xofulltext_searcher_line(String_.new_u8(wiki_bry), page_id, line.Line_seq() + 1, String_.new_u8(line.Line_html()));
|
||||
searcher_cbk.Send_line_add(match);
|
||||
}
|
||||
}
|
||||
private Xofulltext_searcher Get_searcher(Xow_wiki wiki) {
|
||||
if (Io_mgr.Instance.ExistsDir(wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search"))) {
|
||||
return new Xofulltext_searcher__lucene();
|
||||
}
|
||||
else {
|
||||
return new Xofulltext_searcher__brute(app, cbk_trg, cache_mgr);
|
||||
}
|
||||
}
|
||||
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk__search)) this.Search((Xofulltext_searcher_args)m.ReadObj("v"));
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
} private static final String Invk__search = "search";
|
||||
}
|
||||
Reference in New Issue
Block a user