Full-text search: Adjust indexing weights and scores

2026-01-23 09:19:40 +00:00 · 2017-03-16 17:02:19 -04:00 · 2017-03-16 17:02:19 -04:00 · 6ccee10526
commit 6ccee10526
parent 8524120a14
3 changed files with 39 additions and 15 deletions
--- a/gplx.gflucene/.classpath
+++ b/gplx.gflucene/.classpath
@ -8,5 +8,6 @@
 	<classpathentry kind="lib" path="lib/6.4.2/lucene-highlighter-6.4.2.jar"/>
 	<classpathentry kind="lib" path="lib/6.4.2/lucene-memory-6.4.2.jar"/>
 	<classpathentry kind="lib" path="lib/6.4.2/lucene-queryparser-6.4.2.jar"/>
+	<classpathentry kind="lib" path="lib/6.4.2/lucene-queries-6.4.2.jar"/>
 	<classpathentry kind="output" path="bin"/>
 </classpath>
--- a/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_indexer_mgr.java
+++ b/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_indexer_mgr.java
@ -35,7 +35,7 @@ public class Gflucene_indexer_mgr {
    private IndexWriterConfig config;
 	private Directory index;
    private IndexWriter wtr;
-    private FieldType body_fld;
+    private FieldType body_fld_type;
 	
 		public Gflucene_indexer_mgr() {
 	}
@ -61,28 +61,37 @@ public class Gflucene_indexer_mgr {
        // create writer
        try {
 			wtr = new IndexWriter(index, config);
-//			((TieredMergePolicy)config.getMergePolicy()).
 		} catch (IOException e) {
 			throw Err_.new_exc(e, "lucene_index", "failed to create writer");
 		}
        
        // create field for body
-		this.body_fld = new FieldType();
-		body_fld.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+		this.body_fld_type = new FieldType();
+		body_fld_type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
+		body_fld_type.setTokenized(true);
+		body_fld_type.setStored(false);
 //		body_fld.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
-//		body_fld.setStored(true);
-		body_fld.setTokenized(true);
 //		body_fld.setStoreTermVectors(true);
 //		body_fld.setStoreTermVectorOffsets(true);
        	}
 	public void Exec(Gflucene_doc_data doc_data) {
-		//		org.apache.lucene.document.
-	    Document doc = new Document();
-//	    doc.add(new SortedNumericDocValuesField("page_score", data.score));
-	    doc.add(new StoredField("page_score", doc_data.score));
+			    Document doc = new Document();
+	    
 	    doc.add(new StoredField("page_id", doc_data.page_id));
-	    doc.add(new TextField("title", doc_data.title, Field.Store.YES));
-	    doc.add(new Field("body", doc_data.body, body_fld));
+	    doc.add(new NumericDocValuesField("page_score", doc_data.score));
+
+//	    float score = ((float)doc_data.score / 1000000);
+//	    float score = doc_data.score;
+
+	    TextField title_field = new TextField("title", doc_data.title, Field.Store.YES);
+//	    title_field.setBoost(score * 1024);
+//	    title_field.setBoost(score);
+	    doc.add(title_field);
+	    
+	    Field body_field = new Field("body", doc_data.body, body_fld_type);
+//	    body_field.setBoost(score);
+	    doc.add(body_field);
+	    
 	    try {
 			wtr.addDocument(doc);
 		} catch (IOException e) {
--- a/gplx.gflucene/src/gplx/gflucene/searchers/Gflucene_searcher_mgr.java
+++ b/gplx.gflucene/src/gplx/gflucene/searchers/Gflucene_searcher_mgr.java
@ -24,7 +24,13 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.queries.CustomScoreQuery;
+import org.apache.lucene.queries.function.FunctionQuery;
+import org.apache.lucene.queries.function.valuesource.LongFieldSource;
+import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
 import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
@ -57,8 +63,14 @@ public class Gflucene_searcher_mgr {
 			IndexReader reader = DirectoryReader.open(index);
 			IndexSearcher searcher = new IndexSearcher(reader);

-			
 			Query query = new QueryParser("body", analyzer).parse(data.query);
+//			Query multi_query = MultiFieldQueryParser.parse(data.query, new String[] {"body"}, new BooleanClause.Occur []{BooleanClause.Occur.SHOULD}, analyzer);
+			
+//			Query body_query = new QueryParser("body", analyzer).parse(data.query);
+//			Query title_query = new QueryParser("title", analyzer).parse(data.query);
+//			FunctionQuery boost_query = new FunctionQuery(new LongFieldSource("page_score"));			
+//			CustomScoreQuery query = new CustomScoreQuery(multi_query, boost_query);
+ 
 //			TopDocs docs = searcher.search(query, reader.maxDoc());
 			TopDocs docs = searcher.search(query, data.match_max);
 			ScoreDoc[] hits = docs.scoreDocs;
@ -66,8 +78,10 @@ public class Gflucene_searcher_mgr {
 			for(int i = 0; i < hits.length; i++) {
 				int docId = hits[i].doc;
 				Document d = searcher.doc(docId);
-				Gflucene_doc_data doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), Integer.parseInt(d.get("page_score")), d.get("title"), "");
+//				Gflucene_doc_data doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), Integer.parseInt(d.get("page_score")), d.get("title"), "");
+				Gflucene_doc_data doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), 0, d.get("title"), "");
 				doc.lucene_score = hits[i].score;
+//				Tfds.Write(doc.lucene_score, doc.title);
 				list.Add(doc);
 			}