Embeddable: Create core dbs in proper subdirectory

2026-03-02 03:49:30 +00:00 · 2017-10-23 20:50:50 -04:00
parent 1336d44f34
commit 66877212bf
4537 changed files with 311750 additions and 0 deletions
--- a/gplx.gflucene/src/gplx/gflucene/analyzers/Gflucene_analyzer_mgr_.java
+++ b/gplx.gflucene/src/gplx/gflucene/analyzers/Gflucene_analyzer_mgr_.java
@@ -13,3 +13,45 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
+package gplx.gflucene.analyzers; import gplx.*; import gplx.gflucene.*;
+import gplx.gflucene.core.*;
+import org.apache.lucene.analysis.Analyzer;
+public class Gflucene_analyzer_mgr_ {
+		public static Analyzer New_analyzer(String key) {
+		if 		(String_.Eq(key, "standard"))        return new org.apache.lucene.analysis.standard.StandardAnalyzer();
+		else if (String_.Eq(key, "ar"))              return new org.apache.lucene.analysis.ar.ArabicAnalyzer();
+		else if (String_.Eq(key, "bg"))              return new org.apache.lucene.analysis.bg.BulgarianAnalyzer();
+//		else if (String_.Eq(key, "br"))              return new org.apache.lucene.analysis.br.BrazilianAnalyzer();
+		else if (String_.Eq(key, "ca"))              return new org.apache.lucene.analysis.ca.CatalanAnalyzer();
+		else if (String_.Eq(key, "cjk"))             return new org.apache.lucene.analysis.cjk.CJKAnalyzer();
+		else if (String_.Eq(key, "ckb"))             return new org.apache.lucene.analysis.ckb.SoraniAnalyzer();
+		else if (String_.Eq(key, "cz"))              return new org.apache.lucene.analysis.cz.CzechAnalyzer();
+		else if (String_.Eq(key, "da"))              return new org.apache.lucene.analysis.da.DanishAnalyzer();
+		else if (String_.Eq(key, "de"))              return new org.apache.lucene.analysis.de.GermanAnalyzer();
+		else if (String_.Eq(key, "el"))              return new org.apache.lucene.analysis.el.GreekAnalyzer();
+		else if (String_.Eq(key, "en"))              return new org.apache.lucene.analysis.en.EnglishAnalyzer();
+		else if (String_.Eq(key, "es"))              return new org.apache.lucene.analysis.es.SpanishAnalyzer();
+		else if (String_.Eq(key, "eu"))              return new org.apache.lucene.analysis.eu.BasqueAnalyzer();
+		else if (String_.Eq(key, "fa"))              return new org.apache.lucene.analysis.fa.PersianAnalyzer();
+		else if (String_.Eq(key, "fi"))              return new org.apache.lucene.analysis.fi.FinnishAnalyzer();
+		else if (String_.Eq(key, "fr"))              return new org.apache.lucene.analysis.fr.FrenchAnalyzer();
+		else if (String_.Eq(key, "ga"))              return new org.apache.lucene.analysis.ga.IrishAnalyzer();
+		else if (String_.Eq(key, "gl"))              return new org.apache.lucene.analysis.gl.GalicianAnalyzer();
+		else if (String_.Eq(key, "hi"))              return new org.apache.lucene.analysis.hi.HindiAnalyzer();
+		else if (String_.Eq(key, "hu"))              return new org.apache.lucene.analysis.hu.HungarianAnalyzer();
+		else if (String_.Eq(key, "hy"))              return new org.apache.lucene.analysis.hy.ArmenianAnalyzer();
+		else if (String_.Eq(key, "id"))              return new org.apache.lucene.analysis.id.IndonesianAnalyzer();
+		else if (String_.Eq(key, "it"))              return new org.apache.lucene.analysis.it.ItalianAnalyzer();
+		else if (String_.Eq(key, "lt"))              return new org.apache.lucene.analysis.lt.LithuanianAnalyzer();
+		else if (String_.Eq(key, "lv"))              return new org.apache.lucene.analysis.lv.LatvianAnalyzer();
+		else if (String_.Eq(key, "nl"))              return new org.apache.lucene.analysis.nl.DutchAnalyzer();
+		else if (String_.Eq(key, "no"))              return new org.apache.lucene.analysis.no.NorwegianAnalyzer();
+		else if (String_.Eq(key, "pt"))              return new org.apache.lucene.analysis.pt.PortugueseAnalyzer();
+		else if (String_.Eq(key, "ro"))              return new org.apache.lucene.analysis.ro.RomanianAnalyzer();
+		else if (String_.Eq(key, "ru"))              return new org.apache.lucene.analysis.ru.RussianAnalyzer();
+		else if (String_.Eq(key, "sv"))              return new org.apache.lucene.analysis.sv.SwedishAnalyzer();
+		else if (String_.Eq(key, "th"))              return new org.apache.lucene.analysis.th.ThaiAnalyzer();
+		else if (String_.Eq(key, "tr"))              return new org.apache.lucene.analysis.tr.TurkishAnalyzer();
+		else                                         throw Err_.new_unhandled_default(key);
+	}
+	}
--- a/gplx.gflucene/src/gplx/gflucene/core/Gflucene_analyzer_data.java
+++ b/gplx.gflucene/src/gplx/gflucene/core/Gflucene_analyzer_data.java
@@ -13,3 +13,23 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
+package gplx.gflucene.core; import gplx.*; import gplx.gflucene.*;
+public class Gflucene_analyzer_data {
+	public final    String key;
+	public Gflucene_analyzer_data(String key) {
+		this.key = key;
+	}
+	public static Gflucene_analyzer_data New_data_from_locale(String locale) {
+		String key = null;
+		if 		(String_.Eq(locale, "en"))           key = "standard"; // NOTE: en exists but use standard for now
+		else if (String_.EqAny(locale
+		, "ar", "bg", "ca", "ckb", "cz", "da", "de", "el", "es", "eu", "fa", "fi", "fr", "ga", "gl", "hi"
+		, "hu", "hy", "id", "it", "lt", "lv", "nl", "no", "pt", "ro", "ru", "sv", "th", "tr")
+		)                                            key = locale;
+		else if (String_.EqAny(locale
+		, "zh", "ja", "ko")
+		)                                            key = "cjk";
+		else                                         key = "standard";
+		return new Gflucene_analyzer_data(key);
+	}
+}
--- a/gplx.gflucene/src/gplx/gflucene/core/Gflucene_doc_data.java
+++ b/gplx.gflucene/src/gplx/gflucene/core/Gflucene_doc_data.java
@@ -13,3 +13,19 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
+package gplx.gflucene.core; import gplx.*; import gplx.gflucene.*;
+public class Gflucene_doc_data {
+	public String title;
+	public String body;
+	public int ns_id;
+	public int page_id;
+	public byte[] page_full_db;
+	public int score;
+	public float lucene_score = 0;
+	public Gflucene_doc_data(int page_id, int score, String title, String body) {
+		this.page_id = page_id;
+		this.score = score;
+		this.title = title;
+		this.body = body;
+	}
+}
--- a/gplx.gflucene/src/gplx/gflucene/core/Gflucene_index_data.java
+++ b/gplx.gflucene/src/gplx/gflucene/core/Gflucene_index_data.java
@@ -13,3 +13,15 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
+package gplx.gflucene.core; import gplx.*; import gplx.gflucene.*;
+import gplx.gflucene.analyzers.*;
+public class Gflucene_index_data {
+	public final    Gflucene_analyzer_data analyzer_data;
+	public final    String index_dir;
+
+	public final    float max_merged_segments = 1500; // "limits" maximum file size
+	public Gflucene_index_data(Gflucene_analyzer_data analyzer_data, String index_dir) {
+		this.analyzer_data = analyzer_data;
+		this.index_dir = index_dir;
+	}
+}
--- a/gplx.gflucene/src/gplx/gflucene/highlighters/Gflucene_highlighter_item.java
+++ b/gplx.gflucene/src/gplx/gflucene/highlighters/Gflucene_highlighter_item.java
@@ -13,3 +13,12 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
+package gplx.gflucene.highlighters; import gplx.*; import gplx.gflucene.*;
+public class Gflucene_highlighter_item {
+	public int num;
+	public String text;
+	public Gflucene_highlighter_item(int num, String text) {
+		this.num = num;
+		this.text = text;
+	}
+}
--- a/gplx.gflucene/src/gplx/gflucene/highlighters/Gflucene_highlighter_mgr.java
+++ b/gplx.gflucene/src/gplx/gflucene/highlighters/Gflucene_highlighter_mgr.java
@@ -13,3 +13,91 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
+package gplx.gflucene.highlighters; import gplx.*; import gplx.gflucene.*;
+import gplx.gflucene.core.*;
+import gplx.gflucene.analyzers.*;
+import gplx.gflucene.searchers.*;
+import java.io.IOException;
+import java.nio.file.Paths;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.highlight.Formatter;
+import org.apache.lucene.search.highlight.Fragmenter;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleFragmenter;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.search.highlight.TextFragment;
+import org.apache.lucene.search.highlight.TokenSources;
+import org.apache.lucene.store.FSDirectory;
+public class Gflucene_highlighter_mgr {
+		private Analyzer analyzer;
+	
+		public Gflucene_highlighter_mgr() {
+	}
+	
+	public void Init(Gflucene_index_data idx_data) {
+				this.analyzer = Gflucene_analyzer_mgr_.New_analyzer(idx_data.analyzer_data.key);
+			}
+	public Gflucene_highlighter_item[] Exec(Gflucene_searcher_qry qry_data, Gflucene_doc_data doc_data) {
+				// create query
+		QueryParser parser = new QueryParser("body", analyzer);
+		Query query = null;
+		try {
+			query = parser.parse(qry_data.query);
+		} catch (ParseException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to parse", "query", qry_data.query);
+		}
+		
+		// create highlighter
+		SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span class='snip_highlight'>", "</span>");
+		QueryScorer scorer = new QueryScorer(query);
+		scorer.setExpandMultiTermQuery(false);
+		Highlighter highlighter = new Highlighter(htmlFormatter, scorer);
+		SimpleFragmenter fragmenter = new SimpleFragmenter(100);
+		highlighter.setTextFragmenter(fragmenter);
+
+		// get token stream
+		String text = doc_data.body;
+		TokenStream tokenStream = null;
+		try {
+			tokenStream = analyzer.tokenStream("body", text);
+		} catch (IOException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to get stream", "query", qry_data.query);
+		}
+		
+		// get fragments from stream
+		TextFragment[] frags;
+		try {
+//			frags = highlighter.getBestTextFragments(tokenStream, text, false, 1000);
+			frags = highlighter.getBestTextFragments(tokenStream, text, true, 10);
+		} catch (IOException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to get best", "query", qry_data.query);
+		} catch (InvalidTokenOffsetsException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to get best", "query", qry_data.query);
+		}
+		
+		// convert fragments to highlighter items
+		int frags_len = frags.length;
+		Gflucene_highlighter_item[] array = new Gflucene_highlighter_item[frags_len];
+		for (int i = 0; i < frags_len; i++) {
+			String frag = frags[i].toString();
+			array[i] = new Gflucene_highlighter_item(i, frag);
+		}
+		return array;
+			}
+	public void Term() {
+					}
+}
--- a/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_idx_opt.java
+++ b/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_idx_opt.java
@@ -13,3 +13,37 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
+package gplx.gflucene.indexers; import gplx.*; import gplx.gflucene.*;
+public class Gflucene_idx_opt {
+	public Gflucene_idx_opt(int uid, String key, String name) {
+		this.uid = uid;
+		this.key = key;
+		this.name = name;
+	}
+	public int Uid() {return uid;} private final    int uid;
+	public String Key() {return key;} private final    String key;
+	public String Name() {return name;} private final    String name;
+
+	public static final int 
+	  Uid_docs = 0 // basic inverted word index; number of words is always 1 per doc
+	, Uid_docs_and_freqs = 1 // freqs needed for number of words per doc
+	, Uid_docs_and_freqs_and_positions = 2 // positions needed for proximity queries
+	, Uid_docs_and_freqs_and_positions_and_offsets = 3 // offsets needed for highlighter
+	;
+
+	private static final    Hash_adp parse_hash = Hash_adp_.New();
+	public static final    Gflucene_idx_opt
+	  Docs = New(Uid_docs, "d", "Documents")
+	, Docs_and_freqs = New(Uid_docs_and_freqs, "df", "Documents / Frequencies")
+	, Docs_and_freqs_and_positions = New(Uid_docs_and_freqs_and_positions, "dfp", "Documents / Frequencies / Positions")
+	, Docs_and_freqs_and_positions_and_offsets = New(Uid_docs_and_freqs_and_positions_and_offsets, "dfpo", "Documents / Frequencies / Positions / Offsets")
+	;
+	private static Gflucene_idx_opt New(int uid, String key, String name) {
+		Gflucene_idx_opt rv = new Gflucene_idx_opt(uid, key, name);
+		parse_hash.Add(key, rv);
+		return rv;
+	}
+	public static Gflucene_idx_opt Parse(String key) {
+		return (Gflucene_idx_opt)parse_hash.Get_by_or_fail(key);
+	}
+}
--- a/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_indexer_mgr.java
+++ b/gplx.gflucene/src/gplx/gflucene/indexers/Gflucene_indexer_mgr.java
@@ -13,3 +13,112 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
+package gplx.gflucene.indexers; import gplx.*; import gplx.gflucene.*;
+import gplx.gflucene.core.*;
+import java.io.IOException;
+import org.lukhnos.portmobile.file.Path;
+import org.lukhnos.portmobile.file.Paths;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.*;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.TieredMergePolicy;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+
+import gplx.gflucene.analyzers.*;;
+public class Gflucene_indexer_mgr {
+		private Analyzer analyzer;
+    private IndexWriterConfig config;
+	private Directory index;
+    private IndexWriter wtr;
+    private FieldType body_fld_type;
+	
+		public Gflucene_indexer_mgr() {
+	}
+	
+	public void Init(Gflucene_index_data idx_data, String idx_opt) {
+				// create analyzer
+		this.analyzer = Gflucene_analyzer_mgr_.New_analyzer(idx_data.analyzer_data.key);
+		this.config = new IndexWriterConfig(analyzer);
+		
+		// limit max size by setting merge policy
+		TieredMergePolicy merge_policy = new TieredMergePolicy();
+		merge_policy.setMaxMergedSegmentMB(idx_data.max_merged_segments);
+		config.setMergePolicy(merge_policy);
+		
+		// create index
+		Path path = Paths.get(idx_data.index_dir);
+        try {
+			this.index = FSDirectory.open(path);
+		} catch (IOException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to open lucene index", "path", path);
+		}        
+
+        // create writer
+        try {
+			wtr = new IndexWriter(index, config);
+		} catch (IOException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to create writer");
+		}
+        
+        // create field for body
+		this.body_fld_type = new FieldType();
+		IndexOptions index_options = To_index_options(idx_opt);
+		body_fld_type.setIndexOptions(index_options);
+		body_fld_type.setTokenized(true);
+		body_fld_type.setStored(false);
+//		body_fld.setStoreTermVectors(true);
+//		body_fld.setStoreTermVectorOffsets(true);
+        	}
+	public void Exec(Gflucene_doc_data doc_data) {
+			    Document doc = new Document();
+	    
+	    doc.add(new StoredField("page_id", doc_data.page_id));
+	    doc.add(new NumericDocValuesField("page_score", doc_data.score));
+
+//	    float score = ((float)doc_data.score / 1000000);
+//	    float score = doc_data.score;
+
+	    TextField title_field = new TextField("title", doc_data.title, Field.Store.YES);
+//	    title_field.setBoost(score * 1024);
+//	    title_field.setBoost(score);
+	    doc.add(title_field);
+	    
+	    Field body_field = new Field("body", doc_data.body, body_fld_type);
+//	    body_field.setBoost(score);
+	    doc.add(body_field);
+	    
+	    try {
+			wtr.addDocument(doc);
+		} catch (IOException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to add document", "title", doc_data.title);
+		}
+			}
+	public void Term() {
+		        try {
+			wtr.close();
+		} catch (IOException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to close writer");
+		}
+        try {
+			index.close();
+		} catch (IOException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to close writer");
+		}
+			}
+
+		private static IndexOptions To_index_options(String key) {
+		Gflucene_idx_opt opt = Gflucene_idx_opt.Parse(key);
+		switch (opt.Uid()) {
+			case Gflucene_idx_opt.Uid_docs:                                     return IndexOptions.DOCS;
+			case Gflucene_idx_opt.Uid_docs_and_freqs:                           return IndexOptions.DOCS_AND_FREQS;
+			case Gflucene_idx_opt.Uid_docs_and_freqs_and_positions:             return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
+			case Gflucene_idx_opt.Uid_docs_and_freqs_and_positions_and_offsets: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
+			default:                                                            throw Err_.new_unhandled_default(opt.Uid());
+		}
+	}
+	}
--- a/gplx.gflucene/src/gplx/gflucene/searchers/Gflucene_searcher_mgr.java
+++ b/gplx.gflucene/src/gplx/gflucene/searchers/Gflucene_searcher_mgr.java
@@ -13,3 +13,86 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
+package gplx.gflucene.searchers; import gplx.*; import gplx.gflucene.*;
+import gplx.gflucene.core.*;
+import gplx.gflucene.analyzers.*;
+import java.io.IOException;
+import org.lukhnos.portmobile.file.Path;
+import org.lukhnos.portmobile.file.Paths;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.queries.CustomScoreQuery;
+import org.apache.lucene.queries.function.FunctionQuery;
+import org.apache.lucene.queries.function.valuesource.LongFieldSource;
+import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+
+import gplx.gflucene.indexers.*;
+public class Gflucene_searcher_mgr {
+		private Analyzer analyzer;
+	private Directory index;
+	
+		public Gflucene_searcher_mgr() {
+	}
+	
+	public void Init(Gflucene_index_data idx_data) {
+				// create analyzer
+		this.analyzer = Gflucene_analyzer_mgr_.New_analyzer(idx_data.analyzer_data.key); 
+
+		// get index
+		Path path = Paths.get(idx_data.index_dir);
+		try {
+			this.index = FSDirectory.open(path);
+		} catch (IOException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to init searcher", "dir", idx_data.index_dir);
+		}
+			}
+	public void Exec(Ordered_hash list, Gflucene_searcher_qry data) {
+				try {
+			IndexReader reader = DirectoryReader.open(index);
+			IndexSearcher searcher = new IndexSearcher(reader);
+
+//			Query query = new QueryParser("body", analyzer).parse(data.query);
+			
+			// creates query that boosts by page_score; not sure if this is needed, but 1st release of fts uses this
+			Query multi_query = MultiFieldQueryParser.parse(data.query, new String[] {"body"}, new BooleanClause.Occur []{BooleanClause.Occur.SHOULD}, analyzer);
+			FunctionQuery boost_query = new FunctionQuery(new LongFieldSource("page_score"));			
+			CustomScoreQuery query = new CustomScoreQuery(multi_query, boost_query);
+ 
+			TopDocs docs = searcher.search(query, data.match_max);
+			ScoreDoc[] hits = docs.scoreDocs;
+			
+			for(int i = 0; i < hits.length; i++) {
+				int docId = hits[i].doc;
+				Document d = searcher.doc(docId);
+//				Gflucene_doc_data doc = new Gflucene_doc_data(Integer.parseInt(d.get("page_id")), Integer.parseInt(d.get("page_score")), d.get("title"), "");
+				String docTitle = d.get("title");
+				Gflucene_doc_data doc = (Gflucene_doc_data)list.Get_by(docTitle);
+				if (doc == null) {
+					int doc_id = Integer.parseInt(d.get("page_id"));
+					doc = new Gflucene_doc_data(doc_id, 0, docTitle, "");
+					doc.lucene_score = hits[i].score;
+					list.Add(doc_id, doc);
+				}
+//				Tfds.Write(doc.lucene_score, doc.title);
+			}
+			
+			reader.close();
+		} catch (Exception e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to exec seearch", "query", data.query);
+		}
+		}
+	public void Term() {
+				}
+}
--- a/gplx.gflucene/src/gplx/gflucene/searchers/Gflucene_searcher_qry.java
+++ b/gplx.gflucene/src/gplx/gflucene/searchers/Gflucene_searcher_qry.java
@@ -13,3 +13,12 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
+package gplx.gflucene.searchers; import gplx.*; import gplx.gflucene.*;
+public class Gflucene_searcher_qry {
+	public String query;
+	public int match_max;
+	public Gflucene_searcher_qry(String query, int match_max) {
+		this.query = query;
+		this.match_max = match_max;
+	}
+}