Full-text search: Adjust indexing weights and scores

pull/620/head
gnosygnu 7 years ago
parent 8524120a14
commit 6ccee10526

@ -8,5 +8,6 @@
<classpathentry kind="lib" path="lib/6.4.2/lucene-highlighter-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/6.4.2/lucene-memory-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/6.4.2/lucene-queryparser-6.4.2.jar"/>
<classpathentry kind="lib" path="lib/6.4.2/lucene-queries-6.4.2.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>

@ -35,7 +35,7 @@ public class Gflucene_indexer_mgr {
private IndexWriterConfig config;
private Directory index;
private IndexWriter wtr;
private FieldType body_fld;
private FieldType body_fld_type;
public Gflucene_indexer_mgr() {
}
@ -61,28 +61,37 @@ public class Gflucene_indexer_mgr {
// create writer
try {
wtr = new IndexWriter(index, config);
// ((TieredMergePolicy)config.getMergePolicy()).
} catch (IOException e) {
throw Err_.new_exc(e, "lucene_index", "failed to create writer");
}
// create field for body
this.body_fld = new FieldType();
body_fld.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
this.body_fld_type = new FieldType();
body_fld_type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
body_fld_type.setTokenized(true);
body_fld_type.setStored(false);
// body_fld.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
// body_fld.setStored(true);
body_fld.setTokenized(true);
// body_fld.setStoreTermVectors(true);
// body_fld.setStoreTermVectorOffsets(true);
}
public void Exec(Gflucene_doc_data doc_data) {
// org.apache.lucene.document.
Document doc = new Document();
// doc.add(new SortedNumericDocValuesField("page_score", data.score));
doc.add(new StoredField("page_score", doc_data.score));
Document doc = new Document();
doc.add(new StoredField("page_id", doc_data.page_id));
doc.add(new TextField("title", doc_data.title, Field.Store.YES));
doc.add(new Field("body", doc_data.body, body_fld));
doc.add(new NumericDocValuesField("page_score", doc_data.score));
// float score = ((float)doc_data.score / 1000000);
// float score = doc_data.score;
TextField title_field = new TextField("title", doc_data.title, Field.Store.YES);
// title_field.setBoost(score * 1024);
// title_field.setBoost(score);
doc.add(title_field);
Field body_field = new Field("body", doc_data.body, body_fld_type);
// body_field.setBoost(score);
doc.add(body_field);
try {
wtr.addDocument(doc);
} catch (IOException e) {

@ -24,7 +24,13 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
@ -56,9 +62,15 @@ public class Gflucene_searcher_mgr {
try {
IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher = new IndexSearcher(reader);
Query query = new QueryParser("body", analyzer).parse(data.query);
// Query multi_query = MultiFieldQueryParser.parse(data.query, new String[] {"body"}, new BooleanClause.Occur []{BooleanClause.Occur.SHOULD}, analyzer);
// Query body_query = new QueryParser("body", analyzer).parse(data.query);
// Query title_query = new QueryParser("title", analyzer).parse(data.query);
// FunctionQuery boost_query = new FunctionQuery(new LongFieldSource("page_score"));
// CustomScoreQuery query = new CustomScoreQuery(multi_query, boost_query);
// TopDocs docs = searcher.search(query, reader.maxDoc());
TopDocs docs = searcher.search(query, data.match_max);
ScoreDoc[] hits = docs.scoreDocs;
@ -66,8 +78,10 @@ public class Gflucene_searcher_mgr {
for(int i = 0; i < hits.length; i++) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
Gflucene_doc_data doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), Integer.parseInt(d.get("page_score")), d.get("title"), "");
// Gflucene_doc_data doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), Integer.parseInt(d.get("page_score")), d.get("title"), "");
Gflucene_doc_data doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), 0, d.get("title"), "");
doc.lucene_score = hits[i].score;
// Tfds.Write(doc.lucene_score, doc.title);
list.Add(doc);
}

Loading…
Cancel
Save