mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Full-text search: Add IndexOptions to Indexer
This commit is contained in:
@@ -18,8 +18,8 @@ import gplx.gflucene.analyzers.*;
|
||||
public class Gflucene_index_data {
|
||||
public final Gflucene_analyzer_data analyzer_data;
|
||||
public final String index_dir;
|
||||
|
||||
public final float max_merged_segments = 1500; // "limits" maximum file size
|
||||
public final boolean positional_enabled = false;
|
||||
public Gflucene_index_data(Gflucene_analyzer_data analyzer_data, String index_dir) {
|
||||
this.analyzer_data = analyzer_data;
|
||||
this.index_dir = index_dir;
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.gflucene.indexers; import gplx.*; import gplx.gflucene.*;
|
||||
public class Gflucene_idx_opt {
|
||||
public Gflucene_idx_opt(int uid, String key, String name) {
|
||||
this.uid = uid;
|
||||
this.key = key;
|
||||
this.name = name;
|
||||
}
|
||||
public int Uid() {return uid;} private final int uid;
|
||||
public String Key() {return key;} private final String key;
|
||||
public String Name() {return name;} private final String name;
|
||||
|
||||
public static final int
|
||||
Uid_docs = 0 // basic inverted word index; number of words is always 1 per doc
|
||||
, Uid_docs_and_freqs = 1 // freqs needed for number of words per doc
|
||||
, Uid_docs_and_freqs_and_positions = 2 // positions needed for proximity queries
|
||||
, Uid_docs_and_freqs_and_positions_and_offsets = 3 // offsets needed for highlighter
|
||||
;
|
||||
|
||||
private static final Hash_adp parse_hash = Hash_adp_.New();
|
||||
public static final Gflucene_idx_opt
|
||||
Docs = New(Uid_docs, "d", "Documents")
|
||||
, Docs_and_freqs = New(Uid_docs_and_freqs, "df", "Documents / Frequencies")
|
||||
, Docs_and_freqs_and_positions = New(Uid_docs_and_freqs_and_positions, "dfp", "Documents / Frequencies / Positions")
|
||||
, Docs_and_freqs_and_positions_and_offsets = New(Uid_docs_and_freqs_and_positions_and_offsets, "dfpo", "Documents / Frequencies / Positions / Offsets")
|
||||
;
|
||||
private static Gflucene_idx_opt New(int uid, String key, String name) {
|
||||
Gflucene_idx_opt rv = new Gflucene_idx_opt(uid, key, name);
|
||||
parse_hash.Add(key, rv);
|
||||
return rv;
|
||||
}
|
||||
public static Gflucene_idx_opt Parse(String key) {
|
||||
return (Gflucene_idx_opt)parse_hash.Get_by_or_fail(key);
|
||||
}
|
||||
}
|
||||
@@ -40,7 +40,7 @@ public class Gflucene_indexer_mgr {
|
||||
public Gflucene_indexer_mgr() {
|
||||
}
|
||||
|
||||
public void Init(Gflucene_index_data idx_data) {
|
||||
public void Init(Gflucene_index_data idx_data, String idx_opt) {
|
||||
// create analyzer
|
||||
this.analyzer = Gflucene_analyzer_mgr_.New_analyzer(idx_data.analyzer_data.key);
|
||||
this.config = new IndexWriterConfig(analyzer);
|
||||
@@ -67,7 +67,7 @@ public class Gflucene_indexer_mgr {
|
||||
|
||||
// create field for body
|
||||
this.body_fld_type = new FieldType();
|
||||
IndexOptions index_options = idx_data.positional_enabled ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS : IndexOptions.DOCS_AND_FREQS;
|
||||
IndexOptions index_options = To_index_options(idx_opt);
|
||||
body_fld_type.setIndexOptions(index_options);
|
||||
body_fld_type.setTokenized(true);
|
||||
body_fld_type.setStored(false);
|
||||
@@ -110,4 +110,15 @@ public class Gflucene_indexer_mgr {
|
||||
throw Err_.new_exc(e, "lucene_index", "failed to close writer");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IndexOptions To_index_options(String key) {
|
||||
Gflucene_idx_opt opt = Gflucene_idx_opt.Parse(key);
|
||||
switch (opt.Uid()) {
|
||||
case Gflucene_idx_opt.Uid_docs: return IndexOptions.DOCS;
|
||||
case Gflucene_idx_opt.Uid_docs_and_freqs: return IndexOptions.DOCS_AND_FREQS;
|
||||
case Gflucene_idx_opt.Uid_docs_and_freqs_and_positions: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
case Gflucene_idx_opt.Uid_docs_and_freqs_and_positions_and_offsets: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||
default: throw Err_.new_unhandled_default(opt.Uid());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user