From 680e6b88d6f46340aceac6cce3002367a6addc06 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Sat, 8 Apr 2017 08:14:21 -0400 Subject: [PATCH] Mass_parse: Do not fail when generating full-text search indexes --- .../addons/wikis/fulltexts/core/Xofulltext_extractor.java | 1 - .../fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/core/Xofulltext_extractor.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/core/Xofulltext_extractor.java index a9b019b70..f91e182e5 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/core/Xofulltext_extractor.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/core/Xofulltext_extractor.java @@ -66,7 +66,6 @@ public class Xofulltext_extractor implements Mwh_doc_wkr { // add to bfr bfr.Add_mid(src, itm_bgn, itm_end); } - public byte[] Extract(byte[] src) { doc_parser.Parse(this, src, 0, src.length); return bfr.To_bry_and_clear(); diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java index 8ff62acff..9f6be0fbc 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_wkr.java @@ -33,9 +33,10 @@ public class Xofulltext_indexer_wkr { ); } public void Index(Xoae_page wpg) { - byte[] html = extractor.Extract(wpg.Db().Html().Html_bry()); - - Index(wpg.Db().Page().Id(), wpg.Db().Page().Score(), wpg.Ttl().Page_txt(), html); + synchronized (index_wtr) {// NOTE:synchronized needed for mass_parse; don't launch separate indexer per mp_thread b/c (a) lucene may not handle it well; (b) everything needs to be serialized to the same lucene dir, so no real performance benefits; DATE:2017-04-08 + byte[] html = extractor.Extract(wpg.Db().Html().Html_bry()); + Index(wpg.Db().Page().Id(), wpg.Db().Page().Score(), wpg.Ttl().Page_txt(), html); + } } public void Index(int page_id, int score, byte[] ttl, byte[] html) { Gflucene_doc_data data = new Gflucene_doc_data(page_id, score, String_.new_u8(ttl), String_.new_u8(html));