1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2025-05-30 14:04:56 +00:00

Mass_parse: Do not fail when generating full-text search indexes

This commit is contained in:
gnosygnu 2017-04-08 08:14:21 -04:00
parent 5fef0af3a5
commit 680e6b88d6
2 changed files with 4 additions and 4 deletions

View File

@ -66,7 +66,6 @@ public class Xofulltext_extractor implements Mwh_doc_wkr {
// add to bfr
bfr.Add_mid(src, itm_bgn, itm_end);
}
public byte[] Extract(byte[] src) {
doc_parser.Parse(this, src, 0, src.length);
return bfr.To_bry_and_clear();

View File

@ -33,9 +33,10 @@ public class Xofulltext_indexer_wkr {
);
}
public void Index(Xoae_page wpg) {
byte[] html = extractor.Extract(wpg.Db().Html().Html_bry());
Index(wpg.Db().Page().Id(), wpg.Db().Page().Score(), wpg.Ttl().Page_txt(), html);
synchronized (index_wtr) {// NOTE:synchronized needed for mass_parse; don't launch separate indexer per mp_thread b/c (a) lucene may not handle it well; (b) everything needs to be serialized to the same lucene dir, so no real performance benefits; DATE:2017-04-08
byte[] html = extractor.Extract(wpg.Db().Html().Html_bry());
Index(wpg.Db().Page().Id(), wpg.Db().Page().Score(), wpg.Ttl().Page_txt(), html);
}
}
public void Index(int page_id, int score, byte[] ttl, byte[] html) {
Gflucene_doc_data data = new Gflucene_doc_data(page_id, score, String_.new_u8(ttl), String_.new_u8(html));