1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Full-text search: Add lucene indexer

This commit is contained in:
gnosygnu
2017-03-12 22:57:42 -04:00
parent ae9d0fccd3
commit 77de7215ce
47 changed files with 688 additions and 196 deletions

View File

@@ -0,0 +1,39 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*;
import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.specials.*; import gplx.xowa.htmls.bridges.*;
public class Xosearch_fulltext_addon implements Xoax_addon_itm, Xoax_addon_itm__special, Xoax_addon_itm__json, Xoax_addon_itm__bldr {
public Xob_cmd[] Bldr_cmds() {
return new Xob_cmd[]
{ gplx.xowa.addons.wikis.fulltexts.indexers.bldrs.Xofulltext_indexer_cmd.Prototype
};
}
public Xow_special_page[] Special_pages() {
return new Xow_special_page[]
{ gplx.xowa.addons.wikis.fulltexts.searchers.specials.Xofulltext_searcher_special.Prototype
, gplx.xowa.addons.wikis.fulltexts.indexers.specials.Xofulltext_indexer_special.Prototype
};
}
public Bridge_cmd_itm[] Json_cmds() {
return new Bridge_cmd_itm[]
{ gplx.xowa.addons.wikis.fulltexts.searchers.svcs.Xofulltext_searcher_bridge.Prototype
, gplx.xowa.addons.wikis.fulltexts.indexers.svcs.Xofulltext_indexer_bridge.Prototype
};
}
public String Addon__key() {return ADDON__KEY;} private static final String ADDON__KEY = "xowa.wiki.fulltext";
}

View File

@@ -0,0 +1,28 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
public class Xofulltext_indexer_cmd extends Xob_cmd__base {
public Xofulltext_indexer_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
@Override public void Cmd_run() {
wiki.Init_assert();
new Xofulltext_indexer_mgr().Exec(wiki, null);
}
@Override public String Cmd_key() {return "search.index";}
public static final Xob_cmd Prototype = new Xofulltext_indexer_cmd(null, null);
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xofulltext_indexer_cmd(bldr, wiki);}
}

View File

@@ -0,0 +1,68 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
import gplx.dbs.*;
import gplx.xowa.htmls.*;
import gplx.xowa.wikis.data.*;
import gplx.xowa.htmls.core.dbs.*;
import gplx.xowa.addons.wikis.fulltexts.indexers.svcs.*;
public class Xofulltext_indexer_mgr {
public void Exec(Xowe_wiki wiki, Xofulltext_indexer_ui ui) {
Xow_db_file core_db = wiki.Data__core_mgr().Db__core();
gplx.xowa.wikis.data.tbls.Xowd_page_tbl page_tbl = core_db.Tbl__page();
Xoh_page hpg = new Xoh_page();
Xofulltext_indexer_wkr indexer = new Xofulltext_indexer_wkr();
indexer.Init(wiki);
Db_conn conn = page_tbl.Conn();
Db_rdr rdr = conn.Exec_rdr("SELECT page_id, page_score, page_namespace, page_title, page_html_db_id FROM page WHERE page_namespace = 0;");
int count = 0;
while (rdr.Move_next()) {
int page_namespace = rdr.Read_int("page_namespace");
byte[] page_ttl_bry = rdr.Read_bry_by_str("page_title");
int page_id = rdr.Read_int("page_id");
int page_score = rdr.Read_int("page_score");
int html_db_id = rdr.Read_int("page_html_db_id");
// ignore redirects
if (html_db_id == -1) continue;
try {
// load page
Xoa_ttl page_ttl = wiki.Ttl_parse(page_namespace, page_ttl_bry);
if (page_ttl == null)
continue;
Xow_db_file html_db = html_db_id == -1 ? core_db : wiki.Data__core_mgr().Dbs__get_by_id_or_fail(html_db_id);
hpg.Ctor_by_hview(wiki, wiki.Utl__url_parser().Parse(page_ttl.Full_db()), page_ttl, page_id);
if (!html_db.Tbl__html().Select_by_page(hpg))
continue;
byte[] html_text = wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
indexer.Index(page_id, page_score, page_ttl.Page_txt(), html_text);
if ((++count % 10000) == 0) {
Gfo_usr_dlg_.Instance.Prog_many("", "", "indexing page: ~{0}", count);
if (ui != null)
ui.Send_prog(Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": indexing page: " + count);
}
} catch (Exception e) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "err: ~{0}", Err_.Message_gplx_log(e));
}
}
indexer.Term();
}
}

View File

@@ -0,0 +1,36 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
import gplx.gflucene.*;
public class Xofulltext_indexer_wkr {
private final Gflucene_index_bldr index_wtr = new Gflucene_index_bldr();
public void Init(Xow_wiki wiki) {
Io_url search_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search");
Io_mgr.Instance.DeleteDirDeep(search_dir);
index_wtr.Init(search_dir.Xto_api());
}
public void Index(Xoae_page wpg) {
// TODO: skip if not main_ns
Index(wpg.Db().Page().Id(), wpg.Db().Page().Score(), wpg.Ttl().Page_txt(), wpg.Db().Html().Html_bry());
}
public void Index(int page_id, int score, byte[] ttl, byte[] html) {
Gflucene_index_data data = new Gflucene_index_data(page_id, score, String_.new_u8(ttl), String_.new_u8(html));
index_wtr.Exec(data);
}
public void Term() {
index_wtr.Term();
}
}

View File

@@ -0,0 +1,34 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
import gplx.langs.mustaches.*;
public class Xofulltext_indexer_doc implements Mustache_doc_itm {
private final byte[] wikis_bry;
public Xofulltext_indexer_doc
( byte[] wikis_bry) {
this.wikis_bry = wikis_bry;
}
public boolean Mustache__write(String key, Mustache_bfr bfr) {
if (String_.Eq(key, "wikis"))
bfr.Add_bry(wikis_bry);
else
return false;
return true;
}
public Mustache_doc_itm[] Mustache__subs(String key) {
return Mustache_doc_itm_.Ary__empty;
}
}

View File

@@ -0,0 +1,50 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
import gplx.xowa.specials.*; import gplx.langs.mustaches.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.wikis.pages.tags.*;
import gplx.dbs.*;
class Xofulltext_indexer_html extends Xow_special_wtr__base {
private final byte[] wikis_bry;
public Xofulltext_indexer_html
( byte[] wikis_bry) {
this.wikis_bry = wikis_bry;
}
@Override protected Io_url Get_addon_dir(Xoa_app app) {return Addon_dir(app);}
@Override protected Io_url Get_mustache_fil(Io_url addon_dir) {return addon_dir.GenSubFil_nest("bin", "xofulltext_indexer.template.html");}
@Override protected Mustache_doc_itm Bld_mustache_root(Xoa_app app) {
return new Xofulltext_indexer_doc(wikis_bry);
}
@Override protected void Bld_tags(Xoa_app app, Io_url addon_dir, Xopage_html_data page_data) {
Xopg_tag_mgr head_tags = page_data.Head_tags();
Xopg_tag_wtr_.Add__xoelem (head_tags, app.Fsys_mgr().Http_root());
Xopg_tag_wtr_.Add__xocss (head_tags, app.Fsys_mgr().Http_root());
Xopg_tag_wtr_.Add__xohelp (head_tags, app.Fsys_mgr().Http_root());
Xopg_tag_wtr_.Add__xolog (head_tags, app.Fsys_mgr().Http_root());
Xopg_tag_wtr_.Add__xoajax (head_tags, app.Fsys_mgr().Http_root(), app);
Xopg_tag_wtr_.Add__jquery (head_tags, app.Fsys_mgr().Http_root());
Xopg_tag_wtr_.Add__xonotify (head_tags, app.Fsys_mgr().Http_root());
Xopg_alertify_.Add_tags (head_tags, app.Fsys_mgr().Http_root());
head_tags.Add(Xopg_tag_itm.New_css_file(addon_dir.GenSubFil_nest("bin", "xofulltext_indexer.css")));
head_tags.Add(Xopg_tag_itm.New_js_file(addon_dir.GenSubFil_nest("bin", "xofulltext_indexer.js")));
page_data.Js_enabled_y_();
}
public static Io_url Addon_dir(Xoa_app app) {
return app.Fsys_mgr().Http_root().GenSubDir_nest("bin", "any", "xowa", "addon", "wiki", "fulltext", "indexer");
}
}

View File

@@ -0,0 +1,35 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
import gplx.xowa.specials.*; import gplx.core.net.qargs.*;
import gplx.xowa.addons.apps.cfgs.*;
public class Xofulltext_indexer_special implements Xow_special_page {
public void Special__gen(Xow_wiki wiki, Xoa_page page, Xoa_url url, Xoa_ttl ttl) {
// get qry if any
Gfo_qarg_mgr url_args = new Gfo_qarg_mgr().Init(url.Qargs_ary());
byte[] wikis_bry = url_args.Read_bry_or("wikis", Bry_.Empty);
// get options and create page
// Xocfg_mgr cfg_mgr = wiki.App().Cfg();
new Xofulltext_indexer_html
( wikis_bry
).Bld_page_by_mustache(wiki.App(), page, this);
}
Xofulltext_indexer_special(Xow_special_meta special__meta) {this.special__meta = special__meta;}
public Xow_special_meta Special__meta() {return special__meta;} private final Xow_special_meta special__meta;
public Xow_special_page Special__clone() {return this;}
public static final Xow_special_page Prototype = new Xofulltext_indexer_special(Xow_special_meta.New_xo("XowaSearchBuilder", "Indexer"));
}

View File

@@ -0,0 +1,41 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.indexers.svcs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
import gplx.langs.jsons.*;
import gplx.xowa.htmls.bridges.*;
public class Xofulltext_indexer_bridge implements Bridge_cmd_itm {
private Xofulltext_indexer_svc svc;
public void Init_by_app(Xoa_app app) {
this.svc = new Xofulltext_indexer_svc(app);
}
public String Exec(Json_nde data) {
byte proc_id = proc_hash.Get_as_byte_or(data.Get_as_bry_or(Bridge_cmd_mgr.Msg__proc, null), Byte_ascii.Max_7_bit);
Json_nde args = data.Get_kv(Bridge_cmd_mgr.Msg__args).Val_as_nde();
switch (proc_id) {
case Proc__index: svc.Index(args); break;
default: throw Err_.new_unhandled_default(proc_id);
}
return "";
}
private static final byte Proc__index = 0;
private static final Hash_adp_bry proc_hash = Hash_adp_bry.cs()
.Add_str_byte("index" , Proc__index)
;
public byte[] Key() {return BRIDGE_KEY;} public static final byte[] BRIDGE_KEY = Bry_.new_a7("xowa.wiki.fulltext.indexer");
public static final Xofulltext_indexer_bridge Prototype = new Xofulltext_indexer_bridge(); Xofulltext_indexer_bridge() {}
}

View File

@@ -0,0 +1,80 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.indexers.svcs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
import gplx.core.btries.*;
import gplx.langs.jsons.*;
import gplx.dbs.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.guis.cbks.*;
import gplx.xowa.addons.apps.cfgs.*;
import gplx.xowa.addons.wikis.fulltexts.indexers.specials.*;
import gplx.xowa.addons.wikis.fulltexts.indexers.bldrs.*;
class Xofulltext_indexer_svc implements Gfo_invk {
private final Xoa_app app;
private final Xog_cbk_trg cbk_trg = Xog_cbk_trg.New(Xofulltext_indexer_special.Prototype.Special__meta().Ttl_bry());
public Xofulltext_indexer_svc(Xoa_app app) {
this.app = app;
}
public void Index(Json_nde args) {
// create args
byte[] wikis_bry = args.Get_as_bry("wikis");
Xofulltext_indexer_args indexer_args = new Xofulltext_indexer_args(wikis_bry);
// launch thread
gplx.core.threads.Thread_adp_.Start_by_val("index", Cancelable_.Never, this, Invk__index, indexer_args);
}
private void Index(Xofulltext_indexer_args args) {
// loop wikis
byte[][] domain_ary = Bry_split_.Split(args.wikis, Byte_ascii.Pipe);
for (byte[] domain : domain_ary) {
// get wiki
Xow_wiki wiki = app.Wiki_mgri().Get_by_or_make_init_n(domain);
if (!Io_mgr.Instance.ExistsDir(wiki.Fsys_mgr().Root_dir())) {
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": wiki does not exist: " + String_.new_u8(domain)));
continue;
}
wiki.Init_by_wiki();
Io_url search_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search");
if (Io_mgr.Instance.ExistsDir(search_dir)) {
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": search dir already exists; please delete it manually before reindexing; " + String_.new_u8(domain)));
continue;
}
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": wiki index started: " + String_.new_u8(domain)));
new Xofulltext_indexer_mgr().Exec((Xowe_wiki)wiki, new Xofulltext_indexer_ui(app.Gui__cbk_mgr(), cbk_trg));
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": wiki index ended: " + String_.new_u8(domain)));
}
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk__index)) this.Index((Xofulltext_indexer_args)m.ReadObj("v"));
else return Gfo_invk_.Rv_unhandled;
return this;
}
private static final String Invk__index = "index";
}
class Xofulltext_indexer_args {
public byte[] wikis;
public Xofulltext_indexer_args(byte[] wikis) {
this.wikis = wikis;
}
}

View File

@@ -0,0 +1,30 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.indexers.svcs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
import gplx.xowa.guis.cbks.*;
public class Xofulltext_indexer_ui {
private final Xog_cbk_mgr cbk_mgr;
private final Xog_cbk_trg cbk_trg;
public Xofulltext_indexer_ui(Xog_cbk_mgr cbk_mgr, Xog_cbk_trg cbk_trg) {
this.cbk_mgr = cbk_mgr;
this.cbk_trg = cbk_trg;
}
public void Send_prog(String prog) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_indexer.status__prog__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_str("prog", prog)
);
}
}

View File

@@ -0,0 +1,24 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
public class Xofulltext_cache_line {
public Xofulltext_cache_line(int line_seq, byte[] line_html) {
this.line_seq = line_seq;
this.line_html = line_html;
}
public int Line_seq() {return line_seq;} private final int line_seq;
public byte[] Line_html() {return line_html;} private final byte[] line_html;
}

View File

@@ -0,0 +1,67 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
public class Xofulltext_cache_mgr {
private final Ordered_hash qry_hash = Ordered_hash_.New();
public int Next_qry_id() {return next_qry_id++;} private int next_qry_id;
public void Clear() {
qry_hash.Clear();
}
public void Add(int query_id, byte[] query, byte[] wiki_bry, int page_seq, int page_id, int line_seq, byte[] line_html) {
// get qry
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(query_id);
if (qry == null) {
qry = new Xofulltext_cache_qry(query_id, query);
qry_hash.Add(query_id, qry);
}
// get wiki
Xofulltext_cache_wiki wiki = (Xofulltext_cache_wiki)qry.Wikis().Get_by(wiki_bry);
if (wiki == null) {
wiki = new Xofulltext_cache_wiki(wiki_bry);
qry.Wikis().Add(wiki_bry, wiki);
}
// get page
Xofulltext_cache_page page = (Xofulltext_cache_page)wiki.Pages().Get_by(page_id);
if (page == null) {
page = new Xofulltext_cache_page(page_id, page_seq);
wiki.Pages().Add(page_id, page);
}
// add line
Xofulltext_cache_line line = new Xofulltext_cache_line(line_seq, line_html);
page.Lines().Add(line);
}
public Object Get_pages_rng(int qry_id, byte[] wiki, int page_seq_bgn, int page_seq_end) {
return null;
}
public Xofulltext_cache_line[] Get_lines_rest(int qry_id, byte[] wiki_bry, int page_id) {
// get page
Xofulltext_cache_qry qry = (Xofulltext_cache_qry)qry_hash.Get_by(qry_id);
Xofulltext_cache_wiki wiki = (Xofulltext_cache_wiki)qry.Wikis().Get_by(wiki_bry);
Xofulltext_cache_page page = (Xofulltext_cache_page)wiki.Pages().Get_by(page_id);
// loop lines from 1 to n; note "1" b/c results will always show at least 1st line
List_adp list = List_adp_.New();
int lines_len = page.Lines().Len();
for (int i = 1; i < lines_len; i++) {
Xofulltext_cache_line line = (Xofulltext_cache_line)page.Lines().Get_at(i);
list.Add(line);
}
return (Xofulltext_cache_line[])list.To_ary_and_clear(Xofulltext_cache_line.class);
}
}

View File

@@ -0,0 +1,25 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
public class Xofulltext_cache_page {
public Xofulltext_cache_page(int page_id, int page_seq) {
this.page_id = page_id;
this.page_seq = page_seq;
}
public int Page_id() {return page_id;} private final int page_id;
public int Page_seq() {return page_seq;} private final int page_seq;
public List_adp Lines() {return lines;} private final List_adp lines = List_adp_.New();
}

View File

@@ -0,0 +1,25 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
public class Xofulltext_cache_qry {
public Xofulltext_cache_qry(int id, byte[] qry) {
this.id = id;
this.qry = qry;
}
public int Id() {return id;} private final int id;
public byte[] Qry() {return qry;} private final byte[] qry;
public Hash_adp_bry Wikis() {return wikis;} private final Hash_adp_bry wikis = Hash_adp_bry.cs();
}

View File

@@ -0,0 +1,23 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.caches; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
public class Xofulltext_cache_wiki {
public Xofulltext_cache_wiki(byte[] wiki) {
this.wiki = wiki;
}
public byte[] Wiki() {return wiki;} private final byte[] wiki;
public Ordered_hash Pages() {return pages;} private final Ordered_hash pages = Ordered_hash_.New();
}

View File

@@ -0,0 +1,20 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
public interface Xofulltext_searcher {
void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_searcher_args args);
}

View File

@@ -0,0 +1,42 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
import gplx.langs.jsons.*;
public class Xofulltext_searcher_args {
public boolean case_match;
public boolean auto_wildcard_bgn;
public boolean auto_wildcard_end;
public boolean expand_matches_section;
public boolean show_all_matches;
public int max_pages_per_wiki;
public byte[] wikis;
public byte[] query;
public String namespaces;
public int query_id;
public static Xofulltext_searcher_args New_by_json(Json_nde args) {
Xofulltext_searcher_args rv = new Xofulltext_searcher_args();
rv.case_match = args.Get_as_bool_or("case_match", false);
rv.auto_wildcard_bgn = args.Get_as_bool_or("auto_wildcard_bgn", false);
rv.auto_wildcard_end = args.Get_as_bool_or("auto_wildcard_end", false);
rv.expand_matches_section = args.Get_as_bool_or("expand_matches_section", false);
rv.show_all_matches = args.Get_as_bool_or("show_all_matches", false);
rv.max_pages_per_wiki = args.Get_as_int_or("max_pages_per_wiki", 25);
rv.wikis = args.Get_as_bry("wikis");
rv.query = args.Get_as_bry("query");
rv.namespaces = args.Get_as_str("namespaces");
return rv;
}
}

View File

@@ -0,0 +1,90 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
import gplx.dbs.*;
import gplx.xowa.guis.cbks.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
public class Xofulltext_searcher__brute implements Xofulltext_searcher {
private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr();
private final Xofulltext_finder_cbk__eval cbk_eval = new Xofulltext_finder_cbk__eval();
private final Xofulltext_finder_cbk__highlight cbk_highlight;
public Xofulltext_searcher__brute(Xoa_app app, Xog_cbk_trg cbk_trg, Xofulltext_cache_mgr cache_mgr) {
this.cbk_highlight = new Xofulltext_finder_cbk__highlight(app, cbk_trg, cache_mgr);
}
public void Search(Xofulltext_searcher_ui ui, Xow_wiki wiki, Xofulltext_searcher_args args) {
// get pages from db
Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn();
Db_rdr page_rdr = page_conn.Stmt_sql("SELECT * FROM page WHERE page_namespace IN (0) ORDER BY page_score DESC").Exec_select__rls_auto();
// init finder
finder.Init(args.query, args.case_match, args.auto_wildcard_bgn, args.auto_wildcard_end, Byte_ascii.Star, Byte_ascii.Dash);
// loop
byte[] wiki_domain = wiki.Domain_bry();
int found = 0;
int searched = 0;
try {
while (page_rdr.Move_next()) {
// read data from reader
int page_id = page_rdr.Read_int("page_id");
int text_db_id = page_rdr.Read_int("page_text_db_id");
byte[] text_mcase = wiki.Data__core_mgr().Dbs__get_by_id_or_fail(text_db_id).Tbl__text().Select(page_id);
int ns_id = page_rdr.Read_int("page_namespace");
byte[] ttl_bry = page_rdr.Read_bry_by_str("page_title");
Xoa_ttl ttl = wiki.Ttl_parse(ns_id, ttl_bry);
// eval query
cbk_eval.Init(ttl.Full_db());
finder.Match(text_mcase, 0, text_mcase.length, cbk_eval);
searched++;
// check if page matches query
if (cbk_eval.found) {
++found;
// update pages found
ui.Send_wiki_update(wiki_domain, found, searched);
// do highlight
if (found <= args.max_pages_per_wiki) {
cbk_highlight.Init(args.query, args.query_id, wiki, page_id, ttl.Full_db(), args.show_all_matches);
ui.Send_page_add(new Xofulltext_searcher_page
( args.query_id
, String_.new_u8(wiki_domain)
, page_id
, String_.new_u8(ttl.Full_db())
, args.expand_matches_section
));
finder.Match(text_mcase, 0, text_mcase.length, cbk_highlight);
}
}
// update update pages found every 100 pages
if (searched % 100 == 0) {
ui.Send_wiki_update(wiki_domain, found, searched);
}
}
}
finally {
page_rdr.Rls();
}
// update one last time for final searched
ui.Send_wiki_update(wiki_domain, found, searched);
}
}

View File

@@ -0,0 +1,22 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
import gplx.xowa.guis.cbks.*;
public interface Xofulltext_finder_cbk {
byte[] Page_ttl();
void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xofulltext_word_node term);
void Process_page_done(byte[] src, Xofulltext_word_node tree_root);
}

View File

@@ -0,0 +1,30 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
public class Xofulltext_finder_cbk__eval implements Xofulltext_finder_cbk {
public boolean found;
public byte[] Page_ttl() {return page_ttl;} private byte[] page_ttl;
public void Init(byte[] page_ttl) {
this.found = false;
this.page_ttl = page_ttl;
}
public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xofulltext_word_node term) {
term.found = true;
}
public void Process_page_done(byte[] src, Xofulltext_word_node root) {
this.found = root.Eval();
}
}

View File

@@ -0,0 +1,122 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
import org.junit.*; import gplx.core.tests.*;
public class Xofulltext_finder_cbk__eval__tst {
private final Xofulltext_finder_cbk__eval__fxt fxt = new Xofulltext_finder_cbk__eval__fxt();
@Test public void Exact() {
fxt.Init__search("a");
// y: basic match
fxt.Test__eval_y("a");
// n: no match
fxt.Test__eval_n("z");
// n: wildcard_bgn not enabled
fxt.Test__eval_n("az");
}
@Test public void Or() {
fxt.Init__search("a, c");
// y: lone char
fxt.Test__eval_y("a" , "c");
// y: one char
fxt.Test__eval_y("a b", "b c");
// y: both chars
fxt.Test__eval_y("a c", "a b c");
// n: no chars
fxt.Test__eval_n("b");
}
@Test public void And() {
fxt.Init__search("a + c");
// y: both chars
fxt.Test__eval_y("a c", "a b c");
// n: one char only
fxt.Test__eval_n("a", "c", "a b", "b c");
}
@Test public void And__shorthand() {
fxt.Init__search("a c");
// y: both chars
fxt.Test__eval_y("a b c");
// n: one char only
fxt.Test__eval_n("a", "c");
}
@Test public void Not() {
fxt.Init__search("-a");
// y: no chars
fxt.Test__eval_y("b");
// n: char exists
fxt.Test__eval_n("a");
}
@Test public void Trim_end() {
fxt.Init__search("a");
// y: single
fxt.Test__eval_y("a!");
// y: many
fxt.Test__eval_y("a!!!");
}
@Test public void Trim_bgn() {
fxt.Init__search("a");
// y: single
fxt.Test__eval_y("!a");
// y: many
fxt.Test__eval_y("!!!a");
}
@Test public void Trim_both() {
fxt.Init__search("a");
// y: single
fxt.Test__eval_y("'a'");
// y: many
fxt.Test__eval_y("'''a'''");
}
@Test public void Slash() {
fxt.Init__search("a");
// y: slash before, after
fxt.Test__eval_y("a/b/c", "b/a/c", "b/c/a");
}
@Test public void Brack() {
fxt.Init__search("a");
// y
fxt.Test__eval_y("[[a]]");
}
// .
// ...
// -
// a'b
// https://site/page
// ()
// []
// <>
}
class Xofulltext_finder_cbk__eval__fxt {
private boolean case_match = false;
private boolean auto_wildcard_bgn = false;
private boolean auto_wildcard_end = false;
private byte wildcard_byte = Byte_ascii.Star;
private byte not_byte = Byte_ascii.Dash;
private final Xofulltext_finder_mgr finder = new Xofulltext_finder_mgr();
private final Xofulltext_finder_cbk__eval cbk = new Xofulltext_finder_cbk__eval();
public void Init__search(String query) {
finder.Init(Bry_.new_u8(query), case_match, auto_wildcard_bgn, auto_wildcard_end, wildcard_byte, not_byte);
}
public void Test__eval_y(String... texts) {Test__eval(Bool_.Y, texts);}
public void Test__eval_n(String... texts) {Test__eval(Bool_.N, texts);}
public void Test__eval(boolean expd, String... texts) {
for (String text : texts) {
byte[] text_bry = Bry_.new_u8(text);
cbk.found = false;
finder.Match(text_bry, 0, text_bry.length, cbk);
Gftest.Eq__bool(expd, cbk.found, "query={0} text={1}", finder.Query(), text);
}
}
}

View File

@@ -0,0 +1,109 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
import gplx.xowa.guis.cbks.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
public class Xofulltext_finder_cbk__highlight implements Xofulltext_finder_cbk {
private final Xog_cbk_trg cbk_trg;
private final Xoa_app app;
private final Xofulltext_cache_mgr cache_mgr;
private Xow_wiki wiki;
private byte[] qry;
private int qry_id;
private int page_id;
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
public int found;
private boolean show_all_matches;
public Xofulltext_finder_cbk__highlight(Xoa_app app, Xog_cbk_trg cbk_trg, Xofulltext_cache_mgr cache_mgr) {
this.app = app;
this.cbk_trg = cbk_trg;
this.cache_mgr = cache_mgr;
}
public byte[] Page_ttl() {return page_ttl;} private byte[] page_ttl;
public void Init(byte[] qry, int qry_id, Xow_wiki wiki, int page_id, byte[] page_ttl, boolean show_all_matches) {
this.qry = qry;
this.qry_id = qry_id;
this.wiki = wiki;
this.page_id = page_id;
this.page_ttl= page_ttl;
this.show_all_matches = show_all_matches;
found = 0;
}
public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xofulltext_word_node term) {
// if (found < max_snips_per_page) {
// get snip bounds by finding flanking 50 chars and then expanding to word-bounds
int snip_bgn = hook_bgn - 50;
if (snip_bgn < 0)
snip_bgn = 0;
else {
snip_bgn = Bry_find_.Find_bwd_ws(src, snip_bgn, 0) + 1;
}
int snip_end = hook_end + 50;
if (snip_end >= src.length)
snip_end = src.length;
else {
snip_end = Bry_find_.Find_fwd_until_ws(src, snip_end, src.length);
if (snip_end == Bry_find_.Not_found) { // when snip_end == src.length
snip_end = src.length;
}
}
// build snip
Add_snip(tmp_bfr, src, snip_bgn, hook_bgn);
tmp_bfr.Add_str_a7("<span class='snip_highlight'>");
Add_snip(tmp_bfr, src, hook_bgn, hook_end);
tmp_bfr.Add_str_a7("</span>");
Add_snip(tmp_bfr, src, hook_end, snip_end);
// send notification
byte[] line_html = tmp_bfr.To_bry_and_clear();
if (found == 0 || show_all_matches) {
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki.Domain_bry())
.Add_int("page_id", page_id)
.Add_int("line", found + 1)
.Add_bry("html", line_html)
);
}
cache_mgr.Add(qry_id, qry, wiki.Domain_bry(), -1, page_id, found, line_html);
// }
found++;
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_searcher.results__page__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki.Domain_bry())
.Add_int("page_id", page_id)
.Add_int("found", found)
.Add_bool("show_all_matches", show_all_matches)
);
}
private static final byte[] Angle_bgn_escaped = Bry_.new_a7("&lt;");
private void Add_snip(Bry_bfr bfr, byte[] src, int bgn, int end) {
for (int i = bgn; i < end; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Angle_bgn:
bfr.Add(Angle_bgn_escaped);
break;
case Byte_ascii.Nl:
bfr.Add(gplx.langs.htmls.Gfh_tag_.Br_inl);
break;
default:
bfr.Add_byte(b);
break;
}
}
}
public void Process_page_done(byte[] src, Xofulltext_word_node tree_root) {}
}

View File

@@ -0,0 +1,81 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
import gplx.xowa.guis.cbks.*;
import gplx.core.btries.*;
import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
public class Xofulltext_finder_mgr {
private Btrie_slim_mgr hook_trie;
private Xofulltext_word_node tree_root;
private final Srch_crt_parser parser = new Srch_crt_parser(Srch_crt_scanner_syms.Dflt);
private final Btrie_rv trv = new Btrie_rv();
private final Xofulltext_word_lang lang = new Xofulltext_word_lang();
private final Xofulltext_word_bounds word_bounds = new Xofulltext_word_bounds();
public byte[] Query() {return query;} private byte[] query;
public void Init(byte[] query, boolean case_match, boolean auto_wildcard_bgn, boolean auto_wildcard_end, byte wildchar_byte, byte not_byte) {
this.query = query;
// create a new hook_trie based on case_match
this.hook_trie = case_match ? Btrie_slim_mgr.cs() : Btrie_slim_mgr.ci_u8();
// create a new tree_root for eval
this.tree_root = Xofulltext_word_node_.New_root(parser.Parse_or_invalid(query).Root, hook_trie, auto_wildcard_bgn, auto_wildcard_end, wildchar_byte, not_byte);
}
public void Match(byte[] src, int src_bgn, int src_end, Xofulltext_finder_cbk cbk) {
// init and clear
int cur = 0;
tree_root.Clear();
// scan through text one-byte at a time
// NOTE: skipping ahead to word-start instead of going byte-by-byte may seem more performant, but will still need to do substring analysis b/c of wildcards and punctuation; EX: "abc" and " 'abc' "; "*abc" and " xyzabc. "
while (cur <= src_end) {
// check each byte against hook_trie
Object hook_obj = hook_trie.Match_at(trv, src, cur, src_end);
// current byte matches no hooks; go to next byte
if (hook_obj == null) {
cur++;
continue;
}
// current byte matches a hook; get hook and hook_end
Xofulltext_word_node hook = (Xofulltext_word_node)hook_obj;
int hook_bgn = cur;
int hook_end = cur + hook.word_hook.length;
try {
// get word_bounds
lang.Get_word_bounds(word_bounds, trv, src, src_end, hook_bgn, hook_end);
int word_bgn = word_bounds.word_bgn;
int word_end = word_bounds.word_end;
// check if current word matches criteria-word
if (hook.Match_word(lang, src, hook_bgn, hook_end, word_bgn, word_end)) {
cbk.Process_item_found(src, hook_bgn, hook_end, word_bgn, word_end, hook);
}
// update position to word_end
cur = word_end;
} catch (Exception e) {
cur = hook_end;
Gfo_usr_dlg_.Instance.Warn_many("", "", "fatal error in match; page=~{0} hook=~{1} src=~{2}", cbk.Page_ttl(), hook.word_orig, Err_.Message_gplx_log(e));
}
}
// mark page done
cbk.Process_page_done(src, tree_root);
}
}

View File

@@ -0,0 +1,24 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
public class Xofulltext_word_bounds {
public int word_bgn;
public int word_end;
public void Init(int word_bgn, int word_end) {
this.word_bgn = word_bgn;
this.word_end = word_end;
}
}

View File

@@ -0,0 +1,119 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
import gplx.core.btries.*;
import gplx.core.intls.*;
public class Xofulltext_word_lang {
private final Btrie_slim_mgr ws_bgn = Btrie_slim_mgr.cs()
.Add_many_str("\t", "\n", "\r", " ", "/", "(", ")", "[", "]", "<", ">");
private final Btrie_slim_mgr ws_end;
private final Btrie_slim_mgr punct_bgn = Btrie_slim_mgr.cs()
.Add_many_str(".", ",", "?", "!", ":", ";", "'", "\"", "-")
;
private final Btrie_slim_mgr punct_end;
public Xofulltext_word_lang() {
this.ws_end = ws_bgn;
this.punct_end = punct_bgn;
}
public void Get_word_bounds(Xofulltext_word_bounds word_bounds, Btrie_rv trv, byte[] src, int src_end, int hook_bgn, int hook_end) {
int tmp_pos = -1;
Object tmp_obj = null;
// find word_bgn
int word_bgn = hook_bgn;
tmp_pos = word_bgn;
while (true) {
// stop if BOS
if (tmp_pos == 0) break;
// move back one char
tmp_pos = Utf8_.Get_prv_char_pos0(src, tmp_pos);
// check if char is ws
tmp_obj = ws_bgn.Match_at(trv, src, tmp_pos, hook_end);
// char is ws -> stop
if (tmp_obj != null) break;
// char is not ws -> update word_end
word_bgn = tmp_pos;
}
// find word_end
int word_end = hook_end;
tmp_pos = word_end;
while (true) {
// stop if passed EOS
if (tmp_pos >= src_end) break;
// check if char is ws
tmp_obj = ws_end.Match_at(trv, src, tmp_pos, src_end);
// stop if ws
if (tmp_obj != null) break;
// increment before
tmp_pos++;
// update word_end
word_end = tmp_pos;
}
// trim punct at bgn; EX: "'abc" -> "abc"
if (word_bgn < hook_bgn) {
tmp_pos = word_bgn;
while (true) {
// stop if passed hook-end
if (tmp_pos >= hook_bgn) break;
// check if char is punct
tmp_obj = punct_bgn.Match_at(trv, src, tmp_pos, word_end);
// stop if not a punct
if (tmp_obj == null) break;
// increment before
tmp_pos++;
// update word_end
word_bgn = tmp_pos;
}
}
// trim punct at end; EX: "abc." -> "abc"
if (word_end > hook_end) {
tmp_pos = word_end;
while (true) {
// scan bwd one char
tmp_pos = Utf8_.Get_prv_char_pos0(src, tmp_pos);
// stop if passed hook-end
if (tmp_pos < hook_end) break;
// check if char is punct
tmp_obj = punct_end.Match_at(trv, src, tmp_pos, word_end);
// stop if not a punct
if (tmp_obj == null) break;
// update word_end
word_end = tmp_pos;
}
}
word_bounds.Init(word_bgn, word_end);
}
}

View File

@@ -0,0 +1,69 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
import gplx.core.btries.*;
import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
public class Xofulltext_word_node {
public int tid;
public Xofulltext_word_node[] subs;
public byte[] word_orig;
public byte[] word_hook;
public boolean wildcard_at_bgn;
public boolean wildcard_at_end;
public boolean found;
public boolean Match_word(Xofulltext_word_lang ctx, byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end) {
// if no wildcard at bgn, hook_bgn must match word_bgn
if ( !wildcard_at_bgn
&& hook_bgn != word_bgn)
return false;
// if no wildcard at end, hook_end must match word_end
if ( !wildcard_at_end
&& hook_end != word_end)
return false;
return true;
}
public void Clear() {
found = false;
for (Xofulltext_word_node sub : subs)
sub.Clear();
}
public boolean Eval() {
switch (tid) {
case Srch_crt_itm.Tid__and: {
for (Xofulltext_word_node sub : subs)
if (!sub.Eval())
return false;
return true;
}
case Srch_crt_itm.Tid__or: {
for (Xofulltext_word_node sub : subs)
if (sub.Eval())
return true;
return false;
}
case Srch_crt_itm.Tid__word:
case Srch_crt_itm.Tid__word_quote:
return found;
case Srch_crt_itm.Tid__not:
return !subs[0].Eval();
case Srch_crt_itm.Tid__invalid: return false; // should not happen
default: throw Err_.new_unhandled_default(tid);
}
}
}

View File

@@ -0,0 +1,77 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
import gplx.core.btries.*;
import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
public class Xofulltext_word_node_ {
public static Xofulltext_word_node New_root(Srch_crt_itm src, Btrie_slim_mgr word_trie, boolean auto_wildcard_bgn, boolean auto_wildcard_end, byte wildchar_byte, byte not_byte) {
Xofulltext_word_node trg = new Xofulltext_word_node();
trg.tid = src.Tid;
// set word-related props
switch (trg.tid) {
case Srch_crt_itm.Tid__word:
case Srch_crt_itm.Tid__word_quote:
// get word_orig; EX: "abc*"
byte[] word_orig = src.Raw;
int word_orig_len = word_orig.length;
// init hook_bgn / hook_end
int hook_bgn = 0;
int hook_end = word_orig_len;
// handle wildcard at bgn; EX: "*a"
boolean wildcard_at_bgn = auto_wildcard_bgn;
if (word_orig_len > hook_bgn + 1 && word_orig[hook_bgn] == wildchar_byte) {
wildcard_at_bgn = true;
hook_bgn++;
}
// handle wildcard at end; EX: "a*"
boolean wildcard_at_end = auto_wildcard_end;
if (word_orig_len > hook_bgn + 1 && word_orig[hook_end - 1] == wildchar_byte) {
wildcard_at_end = true;
hook_end--;
}
// get hook
byte[] word_hook = wildcard_at_bgn || wildcard_at_end ? Bry_.Mid(word_orig, hook_bgn, hook_end) : word_orig;
// assign to trg
trg.word_orig = word_orig;
trg.word_hook = word_hook;
trg.wildcard_at_bgn = wildcard_at_bgn;
trg.wildcard_at_end = wildcard_at_end;
// add to trie
if (word_trie.Match_exact(word_hook) == null) { // don't add if exists
word_trie.Add_obj(word_hook, trg);
}
break;
}
// set subs
Srch_crt_itm[] src_subs = src.Subs;
Xofulltext_word_node[] trg_subs = new Xofulltext_word_node[src_subs.length];
trg.subs = trg_subs;
int len = src_subs.length;
for (int i = 0; i < len; i++) {
trg.subs[i] = New_root(src_subs[i], word_trie, auto_wildcard_bgn, auto_wildcard_end, wildchar_byte, not_byte);
}
return trg;
}
}

View File

@@ -0,0 +1,44 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.gflucenes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
import gplx.gflucene.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
public class Xofulltext_searcher__lucene implements Xofulltext_searcher {
private final Gflucene_searcher searcher = new Gflucene_searcher();
public void Search(Xofulltext_searcher_ui cbk, Xow_wiki wiki, Xofulltext_searcher_args args) {
// create list
List_adp list = List_adp_.New();
// init searcher with wiki
searcher.Init(wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search").Xto_api());
// exec search
searcher.Exec(list, new Gflucene_searcher_data(String_.new_u8(args.query), args.max_pages_per_wiki));
// term
searcher.Term();
// loop list
int len = list.Len();
for (int i = 0; i < len; i++) {
Gflucene_index_data found = (Gflucene_index_data)list.Get_at(i);
// call page found
Xofulltext_searcher_page page = new Xofulltext_searcher_page(args.query_id, wiki.Domain_str(), found.page_id, found.title, false);
cbk.Send_page_add(page);
}
}
}

View File

@@ -0,0 +1,28 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
public class Xofulltext_searcher_line {
public Xofulltext_searcher_line(String wiki_domain, int page_id, int found_idx, String excerpt) {
this.wiki_domain = wiki_domain;
this.page_id = page_id;
this.found_idx = found_idx;
this.excerpt = excerpt;
}
public String Wiki_domain() {return wiki_domain;} private final String wiki_domain;
public int Page_id() {return page_id;} private final int page_id;
public int Found_idx() {return found_idx;} private final int found_idx;
public String Excerpt() {return excerpt;} private final String excerpt;
}

View File

@@ -0,0 +1,30 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
public class Xofulltext_searcher_page {
public Xofulltext_searcher_page(int query_id, String wiki_domain, int page_id, String page_title, boolean expand_matches_section) {
this.query_id = query_id;
this.wiki_domain = wiki_domain;
this.page_id = page_id;
this.page_title = page_title;
this.expand_matches_section = expand_matches_section;
}
public int Query_id() {return query_id;} private final int query_id;
public String Wiki_domain() {return wiki_domain;} private final String wiki_domain;
public int Page_id() {return page_id;} private final int page_id;
public String Page_title() {return page_title;} private final String page_title;
public boolean Expand_matches_section() {return expand_matches_section;} private final boolean expand_matches_section;
}

View File

@@ -0,0 +1,22 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
public interface Xofulltext_searcher_ui {
void Send_wiki_add(byte[] wiki_domain);
void Send_wiki_update(byte[] wiki, int found, int searched);
void Send_page_add(Xofulltext_searcher_page page);
void Send_line_add(Xofulltext_searcher_line line);
}

View File

@@ -0,0 +1,54 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*; import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
import gplx.xowa.guis.cbks.*;
public class Xofulltext_searcher_ui__gui implements Xofulltext_searcher_ui {
private final Xog_cbk_mgr cbk_mgr;
private final Xog_cbk_trg cbk_trg;
public Xofulltext_searcher_ui__gui(Xog_cbk_mgr cbk_mgr, Xog_cbk_trg cbk_trg) {
this.cbk_mgr = cbk_mgr;
this.cbk_trg = cbk_trg;
}
public void Send_wiki_add(byte[] wiki_domain) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki_domain)
);
}
public void Send_wiki_update(byte[] wiki, int found, int searched) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__wiki__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_bry("wiki", wiki)
.Add_int("found", found)
.Add_int("searched", searched)
);
}
public void Send_page_add(Xofulltext_searcher_page page) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__page__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_int("query_id", page.Query_id())
.Add_str("wiki", page.Wiki_domain())
.Add_int("page_id", page.Page_id())
.Add_str("page_ttl", page.Page_title())
.Add_bool("expand_matches_section", page.Expand_matches_section())
);
}
public void Send_line_add(Xofulltext_searcher_line match) {
cbk_mgr.Send_json(cbk_trg, "xo.fulltext_searcher.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
.Add_str("wiki", match.Wiki_domain())
.Add_int("page_id", match.Page_id())
.Add_int("line", match.Found_idx())
.Add_str("html", match.Excerpt())
);
}
}

View File

@@ -0,0 +1,64 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
import gplx.langs.mustaches.*;
public class Xofulltext_searcher_doc implements Mustache_doc_itm {
private final byte[] query;
private final boolean case_match, auto_wildcard_bgn, auto_wildcard_end, expand_matches_section, show_all_matches;
private final int max_pages_per_wiki;
private final String wikis, namespaces;
public Xofulltext_searcher_doc
( byte[] query, boolean case_match, boolean auto_wildcard_bgn, boolean auto_wildcard_end
, boolean expand_matches_section, boolean show_all_matches
, int max_pages_per_wiki
, String wikis, String namespaces) {
this.query = query;
this.case_match = case_match;
this.auto_wildcard_bgn = auto_wildcard_bgn;
this.auto_wildcard_end = auto_wildcard_end;
this.expand_matches_section = expand_matches_section;
this.show_all_matches = show_all_matches;
this.max_pages_per_wiki = max_pages_per_wiki;
this.wikis = wikis;
this.namespaces = namespaces;
}
public boolean Mustache__write(String key, Mustache_bfr bfr) {
if (String_.Eq(key, "wikis"))
bfr.Add_str_u8(wikis);
else if (String_.Eq(key, "namespaces"))
bfr.Add_str_u8(namespaces);
else if (String_.Eq(key, "max_pages_per_wiki"))
bfr.Add_int(max_pages_per_wiki);
else if (String_.Eq(key, "query"))
bfr.Add_bry(query);
else
return false;
return true;
}
public Mustache_doc_itm[] Mustache__subs(String key) {
if (String_.Eq(key, "case_match"))
return Mustache_doc_itm_.Ary__bool(case_match);
else if (String_.Eq(key, "auto_wildcard_bgn"))
return Mustache_doc_itm_.Ary__bool(auto_wildcard_bgn);
else if (String_.Eq(key, "auto_wildcard_end"))
return Mustache_doc_itm_.Ary__bool(auto_wildcard_end);
else if (String_.Eq(key, "expand_matches_section"))
return Mustache_doc_itm_.Ary__bool(expand_matches_section);
else if (String_.Eq(key, "show_all_matches"))
return Mustache_doc_itm_.Ary__bool(show_all_matches);
return Mustache_doc_itm_.Ary__empty;
}
}

View File

@@ -0,0 +1,67 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
import gplx.xowa.specials.*; import gplx.langs.mustaches.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.wikis.pages.tags.*;
import gplx.dbs.*;
class Xofulltext_searcher_html extends Xow_special_wtr__base {
private final byte[] query;
private final boolean case_match, auto_wildcard_bgn, auto_wildcard_end, expand_matches_section, show_all_matches;
private final int max_pages_per_wiki;
private final String wikis, namespaces;
public Xofulltext_searcher_html
( byte[] query, boolean case_match, boolean auto_wildcard_bgn, boolean auto_wildcard_end
, boolean expand_matches_section, boolean show_all_matches
, int max_pages_per_wiki
, String wikis, String namespaces) {
this.query = query;
this.case_match = case_match;
this.auto_wildcard_bgn = auto_wildcard_bgn;
this.auto_wildcard_end = auto_wildcard_end;
this.expand_matches_section = expand_matches_section;
this.show_all_matches = show_all_matches;
this.max_pages_per_wiki = max_pages_per_wiki;
this.wikis = wikis;
this.namespaces = namespaces;
}
@Override protected Io_url Get_addon_dir(Xoa_app app) {return Addon_dir(app);}
@Override protected Io_url Get_mustache_fil(Io_url addon_dir) {return addon_dir.GenSubFil_nest("bin", "xofulltext_searcher.template.html");}
@Override protected Mustache_doc_itm Bld_mustache_root(Xoa_app app) {
return new Xofulltext_searcher_doc
( query, case_match, auto_wildcard_bgn, auto_wildcard_end
, expand_matches_section, show_all_matches
, max_pages_per_wiki, wikis, namespaces);
}
@Override protected void Bld_tags(Xoa_app app, Io_url addon_dir, Xopage_html_data page_data) {
Xopg_tag_mgr head_tags = page_data.Head_tags();
Xopg_tag_wtr_.Add__xoelem (head_tags, app.Fsys_mgr().Http_root());
Xopg_tag_wtr_.Add__xocss (head_tags, app.Fsys_mgr().Http_root());
Xopg_tag_wtr_.Add__xohelp (head_tags, app.Fsys_mgr().Http_root());
Xopg_tag_wtr_.Add__xolog (head_tags, app.Fsys_mgr().Http_root());
Xopg_tag_wtr_.Add__xoajax (head_tags, app.Fsys_mgr().Http_root(), app);
Xopg_tag_wtr_.Add__jquery (head_tags, app.Fsys_mgr().Http_root());
Xopg_tag_wtr_.Add__xonotify (head_tags, app.Fsys_mgr().Http_root());
Xopg_alertify_.Add_tags (head_tags, app.Fsys_mgr().Http_root());
head_tags.Add(Xopg_tag_itm.New_css_file(addon_dir.GenSubFil_nest("bin", "xofulltext_searcher.css")));
head_tags.Add(Xopg_tag_itm.New_js_file(addon_dir.GenSubFil_nest("bin", "xofulltext_searcher.js")));
page_data.Js_enabled_y_();
}
public static Io_url Addon_dir(Xoa_app app) {
return app.Fsys_mgr().Http_root().GenSubDir_nest("bin", "any", "xowa", "addon", "wiki", "fulltext", "searcher");
}
}

View File

@@ -0,0 +1,43 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
import gplx.xowa.specials.*; import gplx.core.net.qargs.*;
import gplx.xowa.addons.apps.cfgs.*;
public class Xofulltext_searcher_special implements Xow_special_page {
public void Special__gen(Xow_wiki wiki, Xoa_page page, Xoa_url url, Xoa_ttl ttl) {
// get qry if any
Gfo_qarg_mgr url_args = new Gfo_qarg_mgr().Init(url.Qargs_ary());
byte[] query = url_args.Read_bry_or("query", Bry_.Empty);
// get options and create page
Xocfg_mgr cfg_mgr = wiki.App().Cfg();
new Xofulltext_searcher_html
( query
, cfg_mgr.Get_bool_app_or("xowa.addon.search.fulltext.special.case_match", false)
, cfg_mgr.Get_bool_app_or("xowa.addon.search.fulltext.special.auto_wildcard_bgn", false)
, cfg_mgr.Get_bool_app_or("xowa.addon.search.fulltext.special.auto_wildcard_end", false)
, cfg_mgr.Get_bool_app_or("xowa.addon.search.fulltext.special.expand_matches_section", false)
, cfg_mgr.Get_bool_app_or("xowa.addon.search.fulltext.special.show_all_matches", false)
, cfg_mgr.Get_int_app_or ("xowa.addon.search.fulltext.special.max_pages_per_wiki", 100)
, wiki.Domain_str()
, cfg_mgr.Get_str_app_or ("xowa.addon.search.fulltext.special.namespaces", "0|4")
).Bld_page_by_mustache(wiki.App(), page, this);
}
Xofulltext_searcher_special(Xow_special_meta special__meta) {this.special__meta = special__meta;}
public Xow_special_meta Special__meta() {return special__meta;} private final Xow_special_meta special__meta;
public Xow_special_page Special__clone() {return this;}
public static final Xow_special_page Prototype = new Xofulltext_searcher_special(Xow_special_meta.New_xo("XowaSearch", "Search"));
}

View File

@@ -0,0 +1,43 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.svcs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
import gplx.langs.jsons.*;
import gplx.xowa.htmls.bridges.*;
public class Xofulltext_searcher_bridge implements Bridge_cmd_itm {
private Xofulltext_searcher_svc svc;
public void Init_by_app(Xoa_app app) {
this.svc = new Xofulltext_searcher_svc(app);
}
public String Exec(Json_nde data) {
byte proc_id = proc_hash.Get_as_byte_or(data.Get_as_bry_or(Bridge_cmd_mgr.Msg__proc, null), Byte_ascii.Max_7_bit);
Json_nde args = data.Get_kv(Bridge_cmd_mgr.Msg__args).Val_as_nde();
switch (proc_id) {
case Proc__search: svc.Search(args); break;
case Proc__get_lines_rest: svc.Get_lines_rest(args); break;
default: throw Err_.new_unhandled_default(proc_id);
}
return "";
}
private static final byte Proc__search = 0, Proc__get_lines_rest = 1;
private static final Hash_adp_bry proc_hash = Hash_adp_bry.cs()
.Add_str_byte("search" , Proc__search)
.Add_str_byte("get_lines_rest" , Proc__get_lines_rest)
;
public byte[] Key() {return BRIDGE_KEY;} public static final byte[] BRIDGE_KEY = Bry_.new_a7("xowa.wiki.fulltext.searcher");
public static final Xofulltext_searcher_bridge Prototype = new Xofulltext_searcher_bridge(); Xofulltext_searcher_bridge() {}
}

View File

@@ -0,0 +1,104 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.fulltexts.searchers.svcs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.searchers.*;
import gplx.core.btries.*;
import gplx.langs.jsons.*;
import gplx.dbs.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.guis.cbks.*;
import gplx.xowa.addons.apps.cfgs.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.specials.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.caches.*;
import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
import gplx.xowa.addons.wikis.searchs.searchers.crts.visitors.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.uis.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.gflucenes.*;
import gplx.xowa.addons.wikis.fulltexts.searchers.mgrs.brutes.*;
class Xofulltext_searcher_svc implements Gfo_invk {
private final Xoa_app app;
private final Xog_cbk_trg cbk_trg = Xog_cbk_trg.New(Xofulltext_searcher_special.Prototype.Special__meta().Ttl_bry());
private final Xofulltext_cache_mgr cache_mgr = new Xofulltext_cache_mgr();
private final Xofulltext_searcher_ui searcher_cbk;
public Xofulltext_searcher_svc(Xoa_app app) {
this.app = app;
this.searcher_cbk = new Xofulltext_searcher_ui__gui(app.Gui__cbk_mgr(), cbk_trg);
}
public void Search(Json_nde args) {
// for now, always clear cache; "get_lines_rest" will only work for latest search
cache_mgr.Clear();
// get search_args
Xofulltext_searcher_args search_args = Xofulltext_searcher_args.New_by_json(args);
search_args.query_id = cache_mgr.Next_qry_id();
// autosave any changes if enabled
Xocfg_mgr cfg_mgr = app.Cfg();
if (cfg_mgr.Get_bool_app_or("xowa.addon.search.fulltext.options.autosave_enabled", true)) {
cfg_mgr.Set_bool_app("xowa.addon.search.fulltext.special.case_match", search_args.case_match);
cfg_mgr.Set_bool_app("xowa.addon.search.fulltext.special.auto_wildcard_bgn", search_args.auto_wildcard_bgn);
cfg_mgr.Set_bool_app("xowa.addon.search.fulltext.special.auto_wildcard_end", search_args.auto_wildcard_end);
cfg_mgr.Set_bool_app("xowa.addon.search.fulltext.special.expand_matches_section", search_args.expand_matches_section);
cfg_mgr.Set_bool_app("xowa.addon.search.fulltext.special.show_all_matches", search_args.show_all_matches);
cfg_mgr.Get_int_app_or ("xowa.addon.search.fulltext.special.max_pages_per_wiki", search_args.max_pages_per_wiki);
cfg_mgr.Get_str_app_or ("xowa.addon.search.fulltext.special.namespaces", search_args.namespaces);
}
// launch thread
gplx.core.threads.Thread_adp_.Start_by_val("search", Cancelable_.Never, this, Invk__search, search_args);
}
private void Search(Xofulltext_searcher_args args) {
try {
// loop wikis
byte[][] wiki_domains = Bry_split_.Split(args.wikis, Byte_ascii.Pipe_bry);
for (byte[] wiki_domain : wiki_domains) {
// get wiki and notify
Xow_wiki wiki = app.Wiki_mgri().Get_by_or_make_init_y(wiki_domain);
searcher_cbk.Send_wiki_add(wiki_domain);
// get searcher and search
Xofulltext_searcher searcher = Get_searcher(wiki);
searcher.Search(searcher_cbk, wiki, args);
}
} catch (Exception exc) {
if (app.Tid_is_edit())
((Xoae_app)app).Gui_mgr().Kit().Ask_ok("", "", Err_.Message_gplx_full(exc));
}
}
public void Get_lines_rest(Json_nde args) {
Get_lines_rest(args.Get_as_int("qry_id"), args.Get_as_bry("wiki"), args.Get_as_int("page_id"));
}
private void Get_lines_rest(int qry_id, byte[] wiki_bry, int page_id) {
Xofulltext_cache_line[] lines = cache_mgr.Get_lines_rest(qry_id, wiki_bry, page_id);
for (Xofulltext_cache_line line : lines) {
Xofulltext_searcher_line match = new Xofulltext_searcher_line(String_.new_u8(wiki_bry), page_id, line.Line_seq() + 1, String_.new_u8(line.Line_html()));
searcher_cbk.Send_line_add(match);
}
}
private Xofulltext_searcher Get_searcher(Xow_wiki wiki) {
if (Io_mgr.Instance.ExistsDir(wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "search"))) {
return new Xofulltext_searcher__lucene();
}
else {
return new Xofulltext_searcher__brute(app, cbk_trg, cache_mgr);
}
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk__search)) this.Search((Xofulltext_searcher_args)m.ReadObj("v"));
else return Gfo_invk_.Rv_unhandled;
return this;
} private static final String Invk__search = "search";
}