mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Full-text search: Adjust indexing weights and scores
This commit is contained in:
parent
8524120a14
commit
6ccee10526
@ -8,5 +8,6 @@
|
||||
<classpathentry kind="lib" path="lib/6.4.2/lucene-highlighter-6.4.2.jar"/>
|
||||
<classpathentry kind="lib" path="lib/6.4.2/lucene-memory-6.4.2.jar"/>
|
||||
<classpathentry kind="lib" path="lib/6.4.2/lucene-queryparser-6.4.2.jar"/>
|
||||
<classpathentry kind="lib" path="lib/6.4.2/lucene-queries-6.4.2.jar"/>
|
||||
<classpathentry kind="output" path="bin"/>
|
||||
</classpath>
|
||||
|
@ -35,7 +35,7 @@ public class Gflucene_indexer_mgr {
|
||||
private IndexWriterConfig config;
|
||||
private Directory index;
|
||||
private IndexWriter wtr;
|
||||
private FieldType body_fld;
|
||||
private FieldType body_fld_type;
|
||||
|
||||
public Gflucene_indexer_mgr() {
|
||||
}
|
||||
@ -61,28 +61,37 @@ public class Gflucene_indexer_mgr {
|
||||
// create writer
|
||||
try {
|
||||
wtr = new IndexWriter(index, config);
|
||||
// ((TieredMergePolicy)config.getMergePolicy()).
|
||||
} catch (IOException e) {
|
||||
throw Err_.new_exc(e, "lucene_index", "failed to create writer");
|
||||
}
|
||||
|
||||
// create field for body
|
||||
this.body_fld = new FieldType();
|
||||
body_fld.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
this.body_fld_type = new FieldType();
|
||||
body_fld_type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
body_fld_type.setTokenized(true);
|
||||
body_fld_type.setStored(false);
|
||||
// body_fld.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||
// body_fld.setStored(true);
|
||||
body_fld.setTokenized(true);
|
||||
// body_fld.setStoreTermVectors(true);
|
||||
// body_fld.setStoreTermVectorOffsets(true);
|
||||
}
|
||||
public void Exec(Gflucene_doc_data doc_data) {
|
||||
// org.apache.lucene.document.
|
||||
Document doc = new Document();
|
||||
// doc.add(new SortedNumericDocValuesField("page_score", data.score));
|
||||
doc.add(new StoredField("page_score", doc_data.score));
|
||||
Document doc = new Document();
|
||||
|
||||
doc.add(new StoredField("page_id", doc_data.page_id));
|
||||
doc.add(new TextField("title", doc_data.title, Field.Store.YES));
|
||||
doc.add(new Field("body", doc_data.body, body_fld));
|
||||
doc.add(new NumericDocValuesField("page_score", doc_data.score));
|
||||
|
||||
// float score = ((float)doc_data.score / 1000000);
|
||||
// float score = doc_data.score;
|
||||
|
||||
TextField title_field = new TextField("title", doc_data.title, Field.Store.YES);
|
||||
// title_field.setBoost(score * 1024);
|
||||
// title_field.setBoost(score);
|
||||
doc.add(title_field);
|
||||
|
||||
Field body_field = new Field("body", doc_data.body, body_fld_type);
|
||||
// body_field.setBoost(score);
|
||||
doc.add(body_field);
|
||||
|
||||
try {
|
||||
wtr.addDocument(doc);
|
||||
} catch (IOException e) {
|
||||
|
@ -24,7 +24,13 @@ import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.queries.CustomScoreQuery;
|
||||
import org.apache.lucene.queries.function.FunctionQuery;
|
||||
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
|
||||
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
@ -57,8 +63,14 @@ public class Gflucene_searcher_mgr {
|
||||
IndexReader reader = DirectoryReader.open(index);
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
|
||||
|
||||
Query query = new QueryParser("body", analyzer).parse(data.query);
|
||||
// Query multi_query = MultiFieldQueryParser.parse(data.query, new String[] {"body"}, new BooleanClause.Occur []{BooleanClause.Occur.SHOULD}, analyzer);
|
||||
|
||||
// Query body_query = new QueryParser("body", analyzer).parse(data.query);
|
||||
// Query title_query = new QueryParser("title", analyzer).parse(data.query);
|
||||
// FunctionQuery boost_query = new FunctionQuery(new LongFieldSource("page_score"));
|
||||
// CustomScoreQuery query = new CustomScoreQuery(multi_query, boost_query);
|
||||
|
||||
// TopDocs docs = searcher.search(query, reader.maxDoc());
|
||||
TopDocs docs = searcher.search(query, data.match_max);
|
||||
ScoreDoc[] hits = docs.scoreDocs;
|
||||
@ -66,8 +78,10 @@ public class Gflucene_searcher_mgr {
|
||||
for(int i = 0; i < hits.length; i++) {
|
||||
int docId = hits[i].doc;
|
||||
Document d = searcher.doc(docId);
|
||||
Gflucene_doc_data doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), Integer.parseInt(d.get("page_score")), d.get("title"), "");
|
||||
// Gflucene_doc_data doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), Integer.parseInt(d.get("page_score")), d.get("title"), "");
|
||||
Gflucene_doc_data doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), 0, d.get("title"), "");
|
||||
doc.lucene_score = hits[i].score;
|
||||
// Tfds.Write(doc.lucene_score, doc.title);
|
||||
list.Add(doc);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user