1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Full-text search: Add lucene index generation

This commit is contained in:
gnosygnu
2017-03-10 13:24:41 -05:00
parent 062d958ead
commit c91416801b
8 changed files with 76 additions and 5 deletions

View File

@@ -25,6 +25,9 @@ public class Xow_wiki_utl_ {
rv.File_mgr().Repo_mgr().Clone(wiki.File_mgr().Repo_mgr());
rv.File__fsdb_mode().Tid__v2__bld__y_();
// copy other members
rv.Sys_cfg().Copy(wiki.Sys_cfg());
Clone_repos(wiki);
return rv;
}

View File

@@ -18,6 +18,7 @@ import gplx.core.threads.*; import gplx.core.threads.utils.*;
import gplx.core.caches.*; import gplx.xowa.wikis.caches.*;
import gplx.xowa.addons.bldrs.mass_parses.parses.wkrs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.parses.pools.*; import gplx.xowa.addons.bldrs.mass_parses.parses.utls.*;
import gplx.xowa.addons.bldrs.wmdumps.imglinks.*;
import gplx.xowa.addons.wikis.searchs.fulltexts.indexers.*;
public class Xomp_parse_mgr {
private Gfo_countdown_latch latch;
public Xomp_parse_mgr_cfg Cfg() {return cfg;} private final Xomp_parse_mgr_cfg cfg = new Xomp_parse_mgr_cfg();
@@ -58,6 +59,10 @@ public class Xomp_parse_mgr {
// init ns_ord_mgr
Xomp_ns_ord_mgr ns_ord_mgr = new Xomp_ns_ord_mgr(Int_.Ary_parse(mgr_db.Tbl__cfg().Select_str("", Xomp_parse_wkr.Cfg__ns_ids), "|"));
// init indexer
Xosearch_indexer indexer = cfg.Indexer_enabled() ? new Xosearch_indexer() : null;
if (indexer != null) indexer.Init(wiki);
// init parse_wkrs
for (int i = 0; i < wkr_len; ++i) {
// make wiki
@@ -65,7 +70,7 @@ public class Xomp_parse_mgr {
wkr_wiki.Cache_mgr().Page_cache_(page_cache).Commons_cache_(commons_cache).Ifexist_cache_(ifexist_cache);
// make wkr
Xomp_parse_wkr wkr = new Xomp_parse_wkr(this, cfg, mgr_db, page_pool, prog_mgr, file_orig_wkr, ns_ord_mgr, wkr_wiki, i + wkr_uid_bgn);
Xomp_parse_wkr wkr = new Xomp_parse_wkr(this, cfg, mgr_db, page_pool, prog_mgr, file_orig_wkr, ns_ord_mgr, wkr_wiki, indexer, i + wkr_uid_bgn);
wkrs[i] = wkr;
}
@@ -78,6 +83,7 @@ public class Xomp_parse_mgr {
// wait until wkrs are done
latch.Await();
page_pool.Rls();
if (indexer != null) indexer.Term();
// print stats
Bry_bfr bfr = Bry_bfr_.New();

View File

@@ -36,6 +36,7 @@ public class Xomp_parse_mgr_cfg implements Gfo_invk {
public Io_url Mgr_url() {return mgr_url;} private Io_url mgr_url;
public String Wkr_machine_name() {return wkr_machine_name;} private String wkr_machine_name;
public boolean Show_msg__fetched_pool() {return show_msg__fetched_pool;} private boolean show_msg__fetched_pool;
public boolean Indexer_enabled() {return indexer_enabled;} private boolean indexer_enabled;
public void Init(Xowe_wiki wiki) {
if (num_wkrs == -1) num_wkrs = gplx.core.envs.Runtime_.Cpu_count();
if (num_pages_in_pool == -1) num_pages_in_pool = num_wkrs * 1000;
@@ -64,6 +65,7 @@ public class Xomp_parse_mgr_cfg implements Gfo_invk {
else if (ctx.Match(k, Invk__show_msg__fetched_pool_)) show_msg__fetched_pool = m.ReadYn("v");
else if (ctx.Match(k, Invk__hdump_catboxes_)) hdump_catboxs = m.ReadYn("v");
else if (ctx.Match(k, Invk__log_math_)) log_math = m.ReadYn("v");
else if (ctx.Match(k, "indexer_enabled_")) indexer_enabled = m.ReadYn("v");
else return Gfo_invk_.Rv_unhandled;
return this;
}

View File

@@ -19,6 +19,7 @@ import gplx.xowa.files.origs.*;
import gplx.xowa.htmls.core.bldrs.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.logs.*;
import gplx.xowa.addons.bldrs.mass_parses.parses.mgrs.*; import gplx.xowa.addons.bldrs.mass_parses.parses.utls.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.pools.*;
import gplx.xowa.addons.wikis.searchs.fulltexts.indexers.*;
public class Xomp_parse_wkr implements Gfo_invk {
// mgr vars
private final Xomp_parse_mgr mgr;
@@ -39,13 +40,19 @@ public class Xomp_parse_wkr implements Gfo_invk {
private final int uid;
private Xomp_wkr_db wkr_db;
private final Xosearch_indexer indexer;
private final List_adp list = List_adp_.New(); private int list_idx = 0, list_len = 0;
private int done_count; private long done_time;
public Xomp_parse_wkr(Xomp_parse_mgr mgr, Xomp_parse_mgr_cfg cfg, Xomp_mgr_db mgr_db, Xomp_page_pool page_pool, Xomp_prog_mgr prog_mgr, Xof_orig_wkr file_orig_wkr, Xomp_ns_ord_mgr ns_ord_mgr, Xowe_wiki wiki, int uid) {
public Xomp_parse_wkr(Xomp_parse_mgr mgr, Xomp_parse_mgr_cfg cfg
, Xomp_mgr_db mgr_db, Xomp_page_pool page_pool
, Xomp_prog_mgr prog_mgr, Xof_orig_wkr file_orig_wkr, Xomp_ns_ord_mgr ns_ord_mgr
, Xowe_wiki wiki, Xosearch_indexer indexer, int uid) {
// mgr vars
this.mgr = mgr; this.mgr_db = mgr_db;
this.page_pool = page_pool; this.prog_mgr = prog_mgr; this.file_orig_wkr = file_orig_wkr;
this.ns_ord_mgr = ns_ord_mgr;
this.indexer = indexer;
// cfg vars
this.cfg = cfg;
@@ -125,6 +132,9 @@ public class Xomp_parse_wkr implements Gfo_invk {
// gen_html
hdump_bldr.Insert(pctx, wpg);
// index
if (indexer != null) indexer.Index(wpg);
// mark done for sake of progress
prog_mgr.Mark_done(ppg.Id());