1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Full-text search: Add IndexOptions to Indexer

This commit is contained in:
gnosygnu
2017-03-22 09:30:45 -04:00
parent 49924110f4
commit a9afa7a827
18 changed files with 128 additions and 53 deletions

View File

@@ -18,8 +18,8 @@ import gplx.gflucene.analyzers.*;
public class Gflucene_index_data {
public final Gflucene_analyzer_data analyzer_data;
public final String index_dir;
public final float max_merged_segments = 1500; // "limits" maximum file size
public final boolean positional_enabled = false;
public Gflucene_index_data(Gflucene_analyzer_data analyzer_data, String index_dir) {
this.analyzer_data = analyzer_data;
this.index_dir = index_dir;

View File

@@ -0,0 +1,49 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.gflucene.indexers; import gplx.*; import gplx.gflucene.*;
public class Gflucene_idx_opt {
public Gflucene_idx_opt(int uid, String key, String name) {
this.uid = uid;
this.key = key;
this.name = name;
}
public int Uid() {return uid;} private final int uid;
public String Key() {return key;} private final String key;
public String Name() {return name;} private final String name;
public static final int
Uid_docs = 0 // basic inverted word index; number of words is always 1 per doc
, Uid_docs_and_freqs = 1 // freqs needed for number of words per doc
, Uid_docs_and_freqs_and_positions = 2 // positions needed for proximity queries
, Uid_docs_and_freqs_and_positions_and_offsets = 3 // offsets needed for highlighter
;
private static final Hash_adp parse_hash = Hash_adp_.New();
public static final Gflucene_idx_opt
Docs = New(Uid_docs, "d", "Documents")
, Docs_and_freqs = New(Uid_docs_and_freqs, "df", "Documents / Frequencies")
, Docs_and_freqs_and_positions = New(Uid_docs_and_freqs_and_positions, "dfp", "Documents / Frequencies / Positions")
, Docs_and_freqs_and_positions_and_offsets = New(Uid_docs_and_freqs_and_positions_and_offsets, "dfpo", "Documents / Frequencies / Positions / Offsets")
;
private static Gflucene_idx_opt New(int uid, String key, String name) {
Gflucene_idx_opt rv = new Gflucene_idx_opt(uid, key, name);
parse_hash.Add(key, rv);
return rv;
}
public static Gflucene_idx_opt Parse(String key) {
return (Gflucene_idx_opt)parse_hash.Get_by_or_fail(key);
}
}

View File

@@ -40,7 +40,7 @@ public class Gflucene_indexer_mgr {
public Gflucene_indexer_mgr() {
}
public void Init(Gflucene_index_data idx_data) {
public void Init(Gflucene_index_data idx_data, String idx_opt) {
// create analyzer
this.analyzer = Gflucene_analyzer_mgr_.New_analyzer(idx_data.analyzer_data.key);
this.config = new IndexWriterConfig(analyzer);
@@ -67,7 +67,7 @@ public class Gflucene_indexer_mgr {
// create field for body
this.body_fld_type = new FieldType();
IndexOptions index_options = idx_data.positional_enabled ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS : IndexOptions.DOCS_AND_FREQS;
IndexOptions index_options = To_index_options(idx_opt);
body_fld_type.setIndexOptions(index_options);
body_fld_type.setTokenized(true);
body_fld_type.setStored(false);
@@ -110,4 +110,15 @@ public class Gflucene_indexer_mgr {
throw Err_.new_exc(e, "lucene_index", "failed to close writer");
}
}
}
private static IndexOptions To_index_options(String key) {
Gflucene_idx_opt opt = Gflucene_idx_opt.Parse(key);
switch (opt.Uid()) {
case Gflucene_idx_opt.Uid_docs: return IndexOptions.DOCS;
case Gflucene_idx_opt.Uid_docs_and_freqs: return IndexOptions.DOCS_AND_FREQS;
case Gflucene_idx_opt.Uid_docs_and_freqs_and_positions: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
case Gflucene_idx_opt.Uid_docs_and_freqs_and_positions_and_offsets: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
default: throw Err_.new_unhandled_default(opt.Uid());
}
}
}