mirror of
https://github.com/gnosygnu/xowa.git
synced 2025-05-30 14:04:56 +00:00
Mass_parse: Do not fail when generating full-text search indexes
This commit is contained in:
parent
5fef0af3a5
commit
680e6b88d6
@ -66,7 +66,6 @@ public class Xofulltext_extractor implements Mwh_doc_wkr {
|
||||
// add to bfr
|
||||
bfr.Add_mid(src, itm_bgn, itm_end);
|
||||
}
|
||||
|
||||
public byte[] Extract(byte[] src) {
|
||||
doc_parser.Parse(this, src, 0, src.length);
|
||||
return bfr.To_bry_and_clear();
|
||||
|
@ -33,9 +33,10 @@ public class Xofulltext_indexer_wkr {
|
||||
);
|
||||
}
|
||||
public void Index(Xoae_page wpg) {
|
||||
byte[] html = extractor.Extract(wpg.Db().Html().Html_bry());
|
||||
|
||||
Index(wpg.Db().Page().Id(), wpg.Db().Page().Score(), wpg.Ttl().Page_txt(), html);
|
||||
synchronized (index_wtr) {// NOTE:synchronized needed for mass_parse; don't launch separate indexer per mp_thread b/c (a) lucene may not handle it well; (b) everything needs to be serialized to the same lucene dir, so no real performance benefits; DATE:2017-04-08
|
||||
byte[] html = extractor.Extract(wpg.Db().Html().Html_bry());
|
||||
Index(wpg.Db().Page().Id(), wpg.Db().Page().Score(), wpg.Ttl().Page_txt(), html);
|
||||
}
|
||||
}
|
||||
public void Index(int page_id, int score, byte[] ttl, byte[] html) {
|
||||
Gflucene_doc_data data = new Gflucene_doc_data(page_id, score, String_.new_u8(ttl), String_.new_u8(html));
|
||||
|
Loading…
Reference in New Issue
Block a user