mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Full-text search: Specify namespaces for indexing
This commit is contained in:
parent
10d13a3cd9
commit
a4380b6d48
@ -0,0 +1,55 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
||||||
|
import gplx.xowa.wikis.nss.*;
|
||||||
|
public class Xofulltext_indexer_args implements Gfo_invk {
|
||||||
|
public byte[] wikis;
|
||||||
|
public String ns_ids;
|
||||||
|
public void Init_by_wiki(Xowe_wiki wiki) {
|
||||||
|
// wikis: null
|
||||||
|
if (wikis == null)
|
||||||
|
wikis = wiki.Domain_bry();
|
||||||
|
|
||||||
|
// ns: null / *
|
||||||
|
if (ns_ids == null)
|
||||||
|
ns_ids = "0";
|
||||||
|
else if (String_.Eq(ns_ids, "*")) {
|
||||||
|
Xow_ns[] ns_ary = wiki.Ns_mgr().Ords_ary();
|
||||||
|
int len = ns_ary.length;
|
||||||
|
Bry_bfr bfr = Bry_bfr_.New();
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
Xow_ns ns = ns_ary[i];
|
||||||
|
int ns_id = ns.Id();
|
||||||
|
if (ns_id < 0) continue; // ignore media, special
|
||||||
|
if (i != 0) bfr.Add_byte(Byte_ascii.Pipe);
|
||||||
|
bfr.Add_int_variable(ns_id);
|
||||||
|
}
|
||||||
|
ns_ids = bfr.To_str_and_clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||||
|
if (ctx.Match(k, "wikis_")) this.wikis = m.ReadBryOr("v", null);
|
||||||
|
else if (ctx.Match(k, "ns_ids")) this.ns_ids = m.ReadStrOr("v", null);
|
||||||
|
else return Gfo_invk_.Rv_unhandled;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
public static Xofulltext_indexer_args New_by_json(gplx.langs.jsons.Json_nde args) {
|
||||||
|
Xofulltext_indexer_args rv = new Xofulltext_indexer_args();
|
||||||
|
rv.wikis = args.Get_as_bry("wikis");
|
||||||
|
rv.ns_ids = args.Get_as_str("ns_ids");
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
}
|
@ -16,10 +16,15 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|||||||
package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
package gplx.xowa.addons.wikis.fulltexts.indexers.bldrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
||||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
|
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
|
||||||
public class Xofulltext_indexer_cmd extends Xob_cmd__base {
|
public class Xofulltext_indexer_cmd extends Xob_cmd__base {
|
||||||
|
private final Xofulltext_indexer_args args = new Xofulltext_indexer_args();
|
||||||
public Xofulltext_indexer_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
public Xofulltext_indexer_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
||||||
@Override public void Cmd_run() {
|
@Override public void Cmd_run() {
|
||||||
wiki.Init_assert();
|
wiki.Init_assert();
|
||||||
new Xofulltext_indexer_mgr().Exec(wiki, null);
|
new Xofulltext_indexer_mgr().Exec(wiki, null, args);
|
||||||
|
}
|
||||||
|
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||||
|
if (ctx.Match(k, "args")) return args;
|
||||||
|
else return Gfo_invk_.Rv_unhandled;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public String Cmd_key() {return "search.index";}
|
@Override public String Cmd_key() {return "search.index";}
|
||||||
|
@ -20,19 +20,28 @@ import gplx.xowa.wikis.data.*;
|
|||||||
import gplx.xowa.htmls.core.dbs.*;
|
import gplx.xowa.htmls.core.dbs.*;
|
||||||
import gplx.xowa.addons.wikis.fulltexts.indexers.svcs.*;
|
import gplx.xowa.addons.wikis.fulltexts.indexers.svcs.*;
|
||||||
public class Xofulltext_indexer_mgr {
|
public class Xofulltext_indexer_mgr {
|
||||||
public void Exec(Xowe_wiki wiki, Xofulltext_indexer_ui ui) {
|
public void Exec(Xowe_wiki wiki, Xofulltext_indexer_ui ui, Xofulltext_indexer_args args) {
|
||||||
Xow_db_file core_db = wiki.Data__core_mgr().Db__core();
|
// init indexer
|
||||||
gplx.xowa.wikis.data.tbls.Xowd_page_tbl page_tbl = core_db.Tbl__page();
|
|
||||||
|
|
||||||
Xoh_page hpg = new Xoh_page();
|
|
||||||
|
|
||||||
Xofulltext_indexer_wkr indexer = new Xofulltext_indexer_wkr();
|
Xofulltext_indexer_wkr indexer = new Xofulltext_indexer_wkr();
|
||||||
indexer.Init(wiki);
|
indexer.Init(wiki);
|
||||||
|
|
||||||
Db_conn conn = page_tbl.Conn();
|
// get page tbl
|
||||||
Db_rdr rdr = conn.Exec_rdr("SELECT page_id, page_score, page_namespace, page_title, page_html_db_id FROM page WHERE page_namespace = 0;");
|
Xow_db_file core_db = wiki.Data__core_mgr().Db__core();
|
||||||
|
gplx.xowa.wikis.data.tbls.Xowd_page_tbl page_tbl = core_db.Tbl__page();
|
||||||
|
|
||||||
|
// init args
|
||||||
|
args.Init_by_wiki(wiki);
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
Xoh_page hpg = new Xoh_page();
|
||||||
|
|
||||||
|
// get rdr and loop
|
||||||
|
Db_conn conn = page_tbl.Conn();
|
||||||
|
Db_rdr rdr = conn.Exec_rdr(Db_sql_.Make_by_fmt(String_.Ary
|
||||||
|
( "SELECT page_id, page_score, page_namespace, page_title, page_html_db_id"
|
||||||
|
, "FROM page"
|
||||||
|
, "WHERE page_namespace IN ({0});"), String_.Replace(args.ns_ids, "|", ",")));
|
||||||
while (rdr.Move_next()) {
|
while (rdr.Move_next()) {
|
||||||
|
// read vars
|
||||||
int page_namespace = rdr.Read_int("page_namespace");
|
int page_namespace = rdr.Read_int("page_namespace");
|
||||||
byte[] page_ttl_bry = rdr.Read_bry_by_str("page_title");
|
byte[] page_ttl_bry = rdr.Read_bry_by_str("page_title");
|
||||||
int page_id = rdr.Read_int("page_id");
|
int page_id = rdr.Read_int("page_id");
|
||||||
@ -52,7 +61,10 @@ public class Xofulltext_indexer_mgr {
|
|||||||
continue;
|
continue;
|
||||||
byte[] html_text = wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
|
byte[] html_text = wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
|
||||||
|
|
||||||
|
// run index
|
||||||
indexer.Index(page_id, page_score, page_ttl.Page_txt(), html_text);
|
indexer.Index(page_id, page_score, page_ttl.Page_txt(), html_text);
|
||||||
|
|
||||||
|
// notify
|
||||||
if ((++count % 10000) == 0) {
|
if ((++count % 10000) == 0) {
|
||||||
Gfo_usr_dlg_.Instance.Prog_many("", "", "indexing page: ~{0}", count);
|
Gfo_usr_dlg_.Instance.Prog_many("", "", "indexing page: ~{0}", count);
|
||||||
if (ui != null)
|
if (ui != null)
|
||||||
@ -63,6 +75,7 @@ public class Xofulltext_indexer_mgr {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// term indexer
|
||||||
indexer.Term();
|
indexer.Term();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,14 +16,16 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|||||||
package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.fulltexts.*; import gplx.xowa.addons.wikis.fulltexts.indexers.*;
|
||||||
import gplx.langs.mustaches.*;
|
import gplx.langs.mustaches.*;
|
||||||
public class Xofulltext_indexer_doc implements Mustache_doc_itm {
|
public class Xofulltext_indexer_doc implements Mustache_doc_itm {
|
||||||
private final byte[] wikis_bry;
|
private final String wikis_bry, ns_ids;
|
||||||
public Xofulltext_indexer_doc
|
public Xofulltext_indexer_doc(String wikis_bry, String ns_ids) {
|
||||||
( byte[] wikis_bry) {
|
|
||||||
this.wikis_bry = wikis_bry;
|
this.wikis_bry = wikis_bry;
|
||||||
|
this.ns_ids = ns_ids;
|
||||||
}
|
}
|
||||||
public boolean Mustache__write(String key, Mustache_bfr bfr) {
|
public boolean Mustache__write(String key, Mustache_bfr bfr) {
|
||||||
if (String_.Eq(key, "wikis"))
|
if (String_.Eq(key, "wikis"))
|
||||||
bfr.Add_bry(wikis_bry);
|
bfr.Add_str_u8(wikis_bry);
|
||||||
|
else if (String_.Eq(key, "ns_ids"))
|
||||||
|
bfr.Add_str_u8(ns_ids);
|
||||||
else
|
else
|
||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
|
@ -17,15 +17,15 @@ package gplx.xowa.addons.wikis.fulltexts.indexers.specials; import gplx.*; impor
|
|||||||
import gplx.xowa.specials.*; import gplx.langs.mustaches.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.wikis.pages.tags.*;
|
import gplx.xowa.specials.*; import gplx.langs.mustaches.*; import gplx.xowa.wikis.pages.*; import gplx.xowa.wikis.pages.tags.*;
|
||||||
import gplx.dbs.*;
|
import gplx.dbs.*;
|
||||||
class Xofulltext_indexer_html extends Xow_special_wtr__base {
|
class Xofulltext_indexer_html extends Xow_special_wtr__base {
|
||||||
private final byte[] wikis_bry;
|
private final String wikis_bry, ns_ids;
|
||||||
public Xofulltext_indexer_html
|
public Xofulltext_indexer_html(String wikis_bry, String ns_ids) {
|
||||||
( byte[] wikis_bry) {
|
|
||||||
this.wikis_bry = wikis_bry;
|
this.wikis_bry = wikis_bry;
|
||||||
|
this.ns_ids = ns_ids;
|
||||||
}
|
}
|
||||||
@Override protected Io_url Get_addon_dir(Xoa_app app) {return Addon_dir(app);}
|
@Override protected Io_url Get_addon_dir(Xoa_app app) {return Addon_dir(app);}
|
||||||
@Override protected Io_url Get_mustache_fil(Io_url addon_dir) {return addon_dir.GenSubFil_nest("bin", "xofulltext_indexer.template.html");}
|
@Override protected Io_url Get_mustache_fil(Io_url addon_dir) {return addon_dir.GenSubFil_nest("bin", "xofulltext_indexer.template.html");}
|
||||||
@Override protected Mustache_doc_itm Bld_mustache_root(Xoa_app app) {
|
@Override protected Mustache_doc_itm Bld_mustache_root(Xoa_app app) {
|
||||||
return new Xofulltext_indexer_doc(wikis_bry);
|
return new Xofulltext_indexer_doc(wikis_bry, ns_ids);
|
||||||
}
|
}
|
||||||
@Override protected void Bld_tags(Xoa_app app, Io_url addon_dir, Xopage_html_data page_data) {
|
@Override protected void Bld_tags(Xoa_app app, Io_url addon_dir, Xopage_html_data page_data) {
|
||||||
Xopg_tag_mgr head_tags = page_data.Head_tags();
|
Xopg_tag_mgr head_tags = page_data.Head_tags();
|
||||||
|
@ -20,12 +20,12 @@ public class Xofulltext_indexer_special implements Xow_special_page {
|
|||||||
public void Special__gen(Xow_wiki wiki, Xoa_page page, Xoa_url url, Xoa_ttl ttl) {
|
public void Special__gen(Xow_wiki wiki, Xoa_page page, Xoa_url url, Xoa_ttl ttl) {
|
||||||
// get qry if any
|
// get qry if any
|
||||||
Gfo_qarg_mgr url_args = new Gfo_qarg_mgr().Init(url.Qargs_ary());
|
Gfo_qarg_mgr url_args = new Gfo_qarg_mgr().Init(url.Qargs_ary());
|
||||||
byte[] wikis_bry = url_args.Read_bry_or("wikis", Bry_.Empty);
|
|
||||||
|
|
||||||
// get options and create page
|
// get options and create page
|
||||||
// Xocfg_mgr cfg_mgr = wiki.App().Cfg();
|
// Xocfg_mgr cfg_mgr = wiki.App().Cfg();
|
||||||
new Xofulltext_indexer_html
|
new Xofulltext_indexer_html
|
||||||
( wikis_bry
|
( url_args.Read_str_or("wikis", wiki.Domain_str())
|
||||||
|
, url_args.Read_str_or("ns_ids", "0")
|
||||||
).Bld_page_by_mustache(wiki.App(), page, this);
|
).Bld_page_by_mustache(wiki.App(), page, this);
|
||||||
}
|
}
|
||||||
Xofulltext_indexer_special(Xow_special_meta special__meta) {this.special__meta = special__meta;}
|
Xofulltext_indexer_special(Xow_special_meta special__meta) {this.special__meta = special__meta;}
|
||||||
|
@ -29,8 +29,7 @@ class Xofulltext_indexer_svc implements Gfo_invk {
|
|||||||
}
|
}
|
||||||
public void Index(Json_nde args) {
|
public void Index(Json_nde args) {
|
||||||
// create args
|
// create args
|
||||||
byte[] wikis_bry = args.Get_as_bry("wikis");
|
Xofulltext_indexer_args indexer_args = Xofulltext_indexer_args.New_by_json(args);
|
||||||
Xofulltext_indexer_args indexer_args = new Xofulltext_indexer_args(wikis_bry);
|
|
||||||
|
|
||||||
// launch thread
|
// launch thread
|
||||||
gplx.core.threads.Thread_adp_.Start_by_val("index", Cancelable_.Never, this, Invk__index, indexer_args);
|
gplx.core.threads.Thread_adp_.Start_by_val("index", Cancelable_.Never, this, Invk__index, indexer_args);
|
||||||
@ -47,6 +46,7 @@ class Xofulltext_indexer_svc implements Gfo_invk {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// check if dir exists
|
||||||
wiki.Init_by_wiki();
|
wiki.Init_by_wiki();
|
||||||
Io_url search_dir = Xosearch_fulltext_addon.Get_index_dir(wiki);
|
Io_url search_dir = Xosearch_fulltext_addon.Get_index_dir(wiki);
|
||||||
if (Io_mgr.Instance.ExistsDir(search_dir)) {
|
if (Io_mgr.Instance.ExistsDir(search_dir)) {
|
||||||
@ -55,11 +55,14 @@ class Xofulltext_indexer_svc implements Gfo_invk {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// notify bgn
|
||||||
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||||
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": wiki index started: " + String_.new_u8(domain)));
|
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": wiki index started: " + String_.new_u8(domain)));
|
||||||
|
|
||||||
new Xofulltext_indexer_mgr().Exec((Xowe_wiki)wiki, new Xofulltext_indexer_ui(app.Gui__cbk_mgr(), cbk_trg));
|
// run index
|
||||||
|
new Xofulltext_indexer_mgr().Exec((Xowe_wiki)wiki, new Xofulltext_indexer_ui(app.Gui__cbk_mgr(), cbk_trg), args);
|
||||||
|
|
||||||
|
// notify end
|
||||||
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.fulltext_indexer.status__note__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||||
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": wiki index ended: " + String_.new_u8(domain)));
|
.Add_str("note", Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": wiki index ended: " + String_.new_u8(domain)));
|
||||||
}
|
}
|
||||||
@ -72,9 +75,3 @@ class Xofulltext_indexer_svc implements Gfo_invk {
|
|||||||
}
|
}
|
||||||
private static final String Invk__index = "index";
|
private static final String Invk__index = "index";
|
||||||
}
|
}
|
||||||
class Xofulltext_indexer_args {
|
|
||||||
public byte[] wikis;
|
|
||||||
public Xofulltext_indexer_args(byte[] wikis) {
|
|
||||||
this.wikis = wikis;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -63,13 +63,13 @@ public class Gflucene_searcher_mgr {
|
|||||||
IndexReader reader = DirectoryReader.open(index);
|
IndexReader reader = DirectoryReader.open(index);
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
|
||||||
Query query = new QueryParser("body", analyzer).parse(data.query);
|
// Query query = new QueryParser("body", analyzer).parse(data.query);
|
||||||
// Query multi_query = MultiFieldQueryParser.parse(data.query, new String[] {"body"}, new BooleanClause.Occur []{BooleanClause.Occur.SHOULD}, analyzer);
|
Query multi_query = MultiFieldQueryParser.parse(data.query, new String[] {"body"}, new BooleanClause.Occur []{BooleanClause.Occur.SHOULD}, analyzer);
|
||||||
|
|
||||||
// Query body_query = new QueryParser("body", analyzer).parse(data.query);
|
// Query body_query = new QueryParser("body", analyzer).parse(data.query);
|
||||||
// Query title_query = new QueryParser("title", analyzer).parse(data.query);
|
// Query title_query = new QueryParser("title", analyzer).parse(data.query);
|
||||||
// FunctionQuery boost_query = new FunctionQuery(new LongFieldSource("page_score"));
|
FunctionQuery boost_query = new FunctionQuery(new LongFieldSource("page_score"));
|
||||||
// CustomScoreQuery query = new CustomScoreQuery(multi_query, boost_query);
|
CustomScoreQuery query = new CustomScoreQuery(multi_query, boost_query);
|
||||||
|
|
||||||
// TopDocs docs = searcher.search(query, reader.maxDoc());
|
// TopDocs docs = searcher.search(query, reader.maxDoc());
|
||||||
TopDocs docs = searcher.search(query, data.match_max);
|
TopDocs docs = searcher.search(query, data.match_max);
|
||||||
|
Loading…
Reference in New Issue
Block a user