mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Full-text search: Add lucene indexer
This commit is contained in:
@@ -3,9 +3,9 @@
|
||||
<classpathentry kind="src" path="src"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
|
||||
<classpathentry combineaccessrules="false" kind="src" path="/100_core"/>
|
||||
<classpathentry kind="lib" path="C:/000/200_dev/110_java/gplx.gflucene/lib/lucene-core-6.4.2.jar"/>
|
||||
<classpathentry kind="lib" path="C:/000/200_dev/110_java/gplx.gflucene/lib/lucene-memory-6.4.2.jar"/>
|
||||
<classpathentry kind="lib" path="C:/000/200_dev/110_java/gplx.gflucene/lib/lucene-highlighter-6.4.2.jar"/>
|
||||
<classpathentry kind="lib" path="C:/000/200_dev/110_java/gplx.gflucene/lib/lucene-queryparser-6.4.2.jar"/>
|
||||
<classpathentry kind="lib" path="lib/lucene-core-6.4.2.jar"/>
|
||||
<classpathentry kind="lib" path="lib/lucene-memory-6.4.2.jar"/>
|
||||
<classpathentry kind="lib" path="lib/lucene-highlighter-6.4.2.jar"/>
|
||||
<classpathentry kind="lib" path="lib/lucene-queryparser-6.4.2.jar"/>
|
||||
<classpathentry kind="output" path="bin"/>
|
||||
</classpath>
|
||||
|
||||
91
gplx.gflucene/src/gplx/gflucene/Gflucene_index_bldr.java
Normal file
91
gplx.gflucene/src/gplx/gflucene/Gflucene_index_bldr.java
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.gflucene; import gplx.*;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
public class Gflucene_index_bldr {
|
||||
private final StandardAnalyzer analyzer = new StandardAnalyzer();
|
||||
private final IndexWriterConfig config;
|
||||
private Directory index;
|
||||
private IndexWriter wtr;
|
||||
private FieldType body_fld;
|
||||
|
||||
public Gflucene_index_bldr() {
|
||||
this.config = new IndexWriterConfig(analyzer);
|
||||
}
|
||||
|
||||
public void Init(String index_dir) {
|
||||
|
||||
// create index
|
||||
Path path = Paths.get(index_dir);
|
||||
try {
|
||||
this.index = FSDirectory.open(path);
|
||||
} catch (IOException e) {
|
||||
throw Err_.new_exc(e, "lucene_index", "failed to open lucene index", "path", path);
|
||||
}
|
||||
|
||||
// create writer
|
||||
try {
|
||||
wtr = new IndexWriter(index, config);
|
||||
} catch (IOException e) {
|
||||
throw Err_.new_exc(e, "lucene_index", "failed to create writer");
|
||||
}
|
||||
|
||||
// create field for body
|
||||
this.body_fld = new FieldType();
|
||||
body_fld.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
// body_fld.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||
// body_fld.setStored(true);
|
||||
body_fld.setTokenized(true);
|
||||
// body_fld.setStoreTermVectors(true);
|
||||
// body_fld.setStoreTermVectorOffsets(true);
|
||||
}
|
||||
public void Exec(Gflucene_index_data data) {
|
||||
// org.apache.lucene.document.
|
||||
Document doc = new Document();
|
||||
// doc.add(new SortedNumericDocValuesField("page_score", data.score));
|
||||
doc.add(new StoredField("page_score", data.score));
|
||||
doc.add(new StoredField("page_id", data.page_id));
|
||||
doc.add(new TextField("title", data.title, Field.Store.YES));
|
||||
doc.add(new Field("body", data.body, body_fld));
|
||||
try {
|
||||
wtr.addDocument(doc);
|
||||
} catch (IOException e) {
|
||||
throw Err_.new_exc(e, "lucene_index", "failed to add document", "title", data.title);
|
||||
}
|
||||
}
|
||||
public void Term() {
|
||||
try {
|
||||
wtr.close();
|
||||
} catch (IOException e) {
|
||||
throw Err_.new_exc(e, "lucene_index", "failed to close writer");
|
||||
}
|
||||
try {
|
||||
index.close();
|
||||
} catch (IOException e) {
|
||||
throw Err_.new_exc(e, "lucene_index", "failed to close writer");
|
||||
}
|
||||
}
|
||||
}
|
||||
29
gplx.gflucene/src/gplx/gflucene/Gflucene_index_data.java
Normal file
29
gplx.gflucene/src/gplx/gflucene/Gflucene_index_data.java
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.gflucene; import gplx.*;
|
||||
public class Gflucene_index_data {
|
||||
public String title;
|
||||
public String body;
|
||||
public int page_id;
|
||||
public int score;
|
||||
public float lucene_score = 0;
|
||||
public Gflucene_index_data(int page_id, int score, String title, String body) {
|
||||
this.page_id = page_id;
|
||||
this.score = score;
|
||||
this.title = title;
|
||||
this.body = body;
|
||||
}
|
||||
}
|
||||
73
gplx.gflucene/src/gplx/gflucene/Gflucene_searcher.java
Normal file
73
gplx.gflucene/src/gplx/gflucene/Gflucene_searcher.java
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.gflucene; import gplx.*;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
public class Gflucene_searcher {
|
||||
private final StandardAnalyzer analyzer = new StandardAnalyzer();
|
||||
private Directory index;
|
||||
|
||||
public Gflucene_searcher() {
|
||||
}
|
||||
|
||||
public void Init(String index_dir) {
|
||||
Path path = Paths.get(index_dir);
|
||||
try {
|
||||
this.index = FSDirectory.open(path);
|
||||
} catch (IOException e) {
|
||||
throw Err_.new_exc(e, "lucene_index", "failed to init searcher", "dir", index_dir);
|
||||
}
|
||||
}
|
||||
public void Exec(List_adp list, Gflucene_searcher_data data) {
|
||||
try {
|
||||
IndexReader reader = DirectoryReader.open(index);
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
|
||||
|
||||
Query query = new QueryParser("body", analyzer).parse(data.query);
|
||||
// TopDocs docs = searcher.search(query, reader.maxDoc());
|
||||
TopDocs docs = searcher.search(query, data.match_max);
|
||||
ScoreDoc[] hits = docs.scoreDocs;
|
||||
|
||||
for(int i = 0; i < hits.length; i++) {
|
||||
int docId = hits[i].doc;
|
||||
Document d = searcher.doc(docId);
|
||||
Gflucene_index_data doc = new Gflucene_index_data(Integer.parseInt(d.get("page_id")), Integer.parseInt(d.get("page_score")), d.get("title"), "");
|
||||
doc.lucene_score = hits[i].score;
|
||||
list.Add(doc);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
} catch (Exception e) {
|
||||
throw Err_.new_exc(e, "lucene_index", "failed to exec seearch", "query", data.query);
|
||||
}
|
||||
}
|
||||
public void Term() {
|
||||
}
|
||||
}
|
||||
24
gplx.gflucene/src/gplx/gflucene/Gflucene_searcher_data.java
Normal file
24
gplx.gflucene/src/gplx/gflucene/Gflucene_searcher_data.java
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.gflucene; import gplx.*;
|
||||
public class Gflucene_searcher_data {
|
||||
public String query;
|
||||
public int match_max;
|
||||
public Gflucene_searcher_data(String query, int match_max) {
|
||||
this.query = query;
|
||||
this.match_max = match_max;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user