Embeddable: Create core dbs in proper subdirectory

2026-03-02 03:49:30 +00:00 · 2017-10-23 20:50:50 -04:00
parent 1336d44f34
commit 66877212bf
4537 changed files with 311750 additions and 0 deletions
--- a/gplx.gflucene/src/gplx/gflucene/highlighters/Gflucene_highlighter_item.java
+++ b/gplx.gflucene/src/gplx/gflucene/highlighters/Gflucene_highlighter_item.java
@@ -13,3 +13,12 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
+package gplx.gflucene.highlighters; import gplx.*; import gplx.gflucene.*;
+public class Gflucene_highlighter_item {
+	public int num;
+	public String text;
+	public Gflucene_highlighter_item(int num, String text) {
+		this.num = num;
+		this.text = text;
+	}
+}
--- a/gplx.gflucene/src/gplx/gflucene/highlighters/Gflucene_highlighter_mgr.java
+++ b/gplx.gflucene/src/gplx/gflucene/highlighters/Gflucene_highlighter_mgr.java
@@ -13,3 +13,91 @@ The terms of each license can be found in the source code repository:
 GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
 Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
 */
+package gplx.gflucene.highlighters; import gplx.*; import gplx.gflucene.*;
+import gplx.gflucene.core.*;
+import gplx.gflucene.analyzers.*;
+import gplx.gflucene.searchers.*;
+import java.io.IOException;
+import java.nio.file.Paths;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.highlight.Formatter;
+import org.apache.lucene.search.highlight.Fragmenter;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleFragmenter;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.search.highlight.TextFragment;
+import org.apache.lucene.search.highlight.TokenSources;
+import org.apache.lucene.store.FSDirectory;
+public class Gflucene_highlighter_mgr {
+		private Analyzer analyzer;
+	
+		public Gflucene_highlighter_mgr() {
+	}
+	
+	public void Init(Gflucene_index_data idx_data) {
+				this.analyzer = Gflucene_analyzer_mgr_.New_analyzer(idx_data.analyzer_data.key);
+			}
+	public Gflucene_highlighter_item[] Exec(Gflucene_searcher_qry qry_data, Gflucene_doc_data doc_data) {
+				// create query
+		QueryParser parser = new QueryParser("body", analyzer);
+		Query query = null;
+		try {
+			query = parser.parse(qry_data.query);
+		} catch (ParseException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to parse", "query", qry_data.query);
+		}
+		
+		// create highlighter
+		SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span class='snip_highlight'>", "</span>");
+		QueryScorer scorer = new QueryScorer(query);
+		scorer.setExpandMultiTermQuery(false);
+		Highlighter highlighter = new Highlighter(htmlFormatter, scorer);
+		SimpleFragmenter fragmenter = new SimpleFragmenter(100);
+		highlighter.setTextFragmenter(fragmenter);
+
+		// get token stream
+		String text = doc_data.body;
+		TokenStream tokenStream = null;
+		try {
+			tokenStream = analyzer.tokenStream("body", text);
+		} catch (IOException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to get stream", "query", qry_data.query);
+		}
+		
+		// get fragments from stream
+		TextFragment[] frags;
+		try {
+//			frags = highlighter.getBestTextFragments(tokenStream, text, false, 1000);
+			frags = highlighter.getBestTextFragments(tokenStream, text, true, 10);
+		} catch (IOException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to get best", "query", qry_data.query);
+		} catch (InvalidTokenOffsetsException e) {
+			throw Err_.new_exc(e, "lucene_index", "failed to get best", "query", qry_data.query);
+		}
+		
+		// convert fragments to highlighter items
+		int frags_len = frags.length;
+		Gflucene_highlighter_item[] array = new Gflucene_highlighter_item[frags_len];
+		for (int i = 0; i < frags_len; i++) {
+			String frag = frags[i].toString();
+			array[i] = new Gflucene_highlighter_item(i, frag);
+		}
+		return array;
+			}
+	public void Term() {
+					}
+}