mirror of
https://github.com/gnosygnu/xowa.git
synced 2025-05-31 22:44:34 +00:00
Mass_parse: Do not fail when generating full-text search indexes
This commit is contained in:
parent
5fef0af3a5
commit
680e6b88d6
@ -66,7 +66,6 @@ public class Xofulltext_extractor implements Mwh_doc_wkr {
|
|||||||
// add to bfr
|
// add to bfr
|
||||||
bfr.Add_mid(src, itm_bgn, itm_end);
|
bfr.Add_mid(src, itm_bgn, itm_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] Extract(byte[] src) {
|
public byte[] Extract(byte[] src) {
|
||||||
doc_parser.Parse(this, src, 0, src.length);
|
doc_parser.Parse(this, src, 0, src.length);
|
||||||
return bfr.To_bry_and_clear();
|
return bfr.To_bry_and_clear();
|
||||||
|
@ -33,9 +33,10 @@ public class Xofulltext_indexer_wkr {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
public void Index(Xoae_page wpg) {
|
public void Index(Xoae_page wpg) {
|
||||||
byte[] html = extractor.Extract(wpg.Db().Html().Html_bry());
|
synchronized (index_wtr) {// NOTE:synchronized needed for mass_parse; don't launch separate indexer per mp_thread b/c (a) lucene may not handle it well; (b) everything needs to be serialized to the same lucene dir, so no real performance benefits; DATE:2017-04-08
|
||||||
|
byte[] html = extractor.Extract(wpg.Db().Html().Html_bry());
|
||||||
Index(wpg.Db().Page().Id(), wpg.Db().Page().Score(), wpg.Ttl().Page_txt(), html);
|
Index(wpg.Db().Page().Id(), wpg.Db().Page().Score(), wpg.Ttl().Page_txt(), html);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
public void Index(int page_id, int score, byte[] ttl, byte[] html) {
|
public void Index(int page_id, int score, byte[] ttl, byte[] html) {
|
||||||
Gflucene_doc_data data = new Gflucene_doc_data(page_id, score, String_.new_u8(ttl), String_.new_u8(html));
|
Gflucene_doc_data data = new Gflucene_doc_data(page_id, score, String_.new_u8(ttl), String_.new_u8(html));
|
||||||
|
Loading…
Reference in New Issue
Block a user