1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Refactor: Pull more classes into baselib

This commit is contained in:
gnosygnu
2021-12-19 16:19:19 -05:00
parent 48559edffe
commit 0e80d7ef6d
7999 changed files with 1375876 additions and 1365947 deletions

View File

@@ -1,57 +1,58 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.gflucene.analyzers; import gplx.*; import gplx.gflucene.*;
import gplx.gflucene.core.*;
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.gflucene.analyzers;
import gplx.types.basics.utls.StringUtl;
import gplx.types.errs.ErrUtl;
import org.apache.lucene.analysis.Analyzer;
public class Gflucene_analyzer_mgr_ {
public static Analyzer New_analyzer(String key) {
if (String_.Eq(key, "standard")) return new org.apache.lucene.analysis.standard.StandardAnalyzer();
else if (String_.Eq(key, "ar")) return new org.apache.lucene.analysis.ar.ArabicAnalyzer();
else if (String_.Eq(key, "bg")) return new org.apache.lucene.analysis.bg.BulgarianAnalyzer();
if (StringUtl.Eq(key, "standard")) return new org.apache.lucene.analysis.standard.StandardAnalyzer();
else if (StringUtl.Eq(key, "ar")) return new org.apache.lucene.analysis.ar.ArabicAnalyzer();
else if (StringUtl.Eq(key, "bg")) return new org.apache.lucene.analysis.bg.BulgarianAnalyzer();
// else if (String_.Eq(key, "br")) return new org.apache.lucene.analysis.br.BrazilianAnalyzer();
else if (String_.Eq(key, "ca")) return new org.apache.lucene.analysis.ca.CatalanAnalyzer();
else if (String_.Eq(key, "cjk")) return new org.apache.lucene.analysis.cjk.CJKAnalyzer();
else if (String_.Eq(key, "ckb")) return new org.apache.lucene.analysis.ckb.SoraniAnalyzer();
else if (String_.Eq(key, "cz")) return new org.apache.lucene.analysis.cz.CzechAnalyzer();
else if (String_.Eq(key, "da")) return new org.apache.lucene.analysis.da.DanishAnalyzer();
else if (String_.Eq(key, "de")) return new org.apache.lucene.analysis.de.GermanAnalyzer();
else if (String_.Eq(key, "el")) return new org.apache.lucene.analysis.el.GreekAnalyzer();
else if (String_.Eq(key, "en")) return new org.apache.lucene.analysis.en.EnglishAnalyzer();
else if (String_.Eq(key, "es")) return new org.apache.lucene.analysis.es.SpanishAnalyzer();
else if (String_.Eq(key, "eu")) return new org.apache.lucene.analysis.eu.BasqueAnalyzer();
else if (String_.Eq(key, "fa")) return new org.apache.lucene.analysis.fa.PersianAnalyzer();
else if (String_.Eq(key, "fi")) return new org.apache.lucene.analysis.fi.FinnishAnalyzer();
else if (String_.Eq(key, "fr")) return new org.apache.lucene.analysis.fr.FrenchAnalyzer();
else if (String_.Eq(key, "ga")) return new org.apache.lucene.analysis.ga.IrishAnalyzer();
else if (String_.Eq(key, "gl")) return new org.apache.lucene.analysis.gl.GalicianAnalyzer();
else if (String_.Eq(key, "hi")) return new org.apache.lucene.analysis.hi.HindiAnalyzer();
else if (String_.Eq(key, "hu")) return new org.apache.lucene.analysis.hu.HungarianAnalyzer();
else if (String_.Eq(key, "hy")) return new org.apache.lucene.analysis.hy.ArmenianAnalyzer();
else if (String_.Eq(key, "id")) return new org.apache.lucene.analysis.id.IndonesianAnalyzer();
else if (String_.Eq(key, "it")) return new org.apache.lucene.analysis.it.ItalianAnalyzer();
else if (String_.Eq(key, "lt")) return new org.apache.lucene.analysis.lt.LithuanianAnalyzer();
else if (String_.Eq(key, "lv")) return new org.apache.lucene.analysis.lv.LatvianAnalyzer();
else if (String_.Eq(key, "nl")) return new org.apache.lucene.analysis.nl.DutchAnalyzer();
else if (String_.Eq(key, "no")) return new org.apache.lucene.analysis.no.NorwegianAnalyzer();
else if (String_.Eq(key, "pt")) return new org.apache.lucene.analysis.pt.PortugueseAnalyzer();
else if (String_.Eq(key, "ro")) return new org.apache.lucene.analysis.ro.RomanianAnalyzer();
else if (String_.Eq(key, "ru")) return new org.apache.lucene.analysis.ru.RussianAnalyzer();
else if (String_.Eq(key, "sv")) return new org.apache.lucene.analysis.sv.SwedishAnalyzer();
else if (String_.Eq(key, "th")) return new org.apache.lucene.analysis.th.ThaiAnalyzer();
else if (String_.Eq(key, "tr")) return new org.apache.lucene.analysis.tr.TurkishAnalyzer();
else throw Err_.new_unhandled_default(key);
else if (StringUtl.Eq(key, "ca")) return new org.apache.lucene.analysis.ca.CatalanAnalyzer();
else if (StringUtl.Eq(key, "cjk")) return new org.apache.lucene.analysis.cjk.CJKAnalyzer();
else if (StringUtl.Eq(key, "ckb")) return new org.apache.lucene.analysis.ckb.SoraniAnalyzer();
else if (StringUtl.Eq(key, "cz")) return new org.apache.lucene.analysis.cz.CzechAnalyzer();
else if (StringUtl.Eq(key, "da")) return new org.apache.lucene.analysis.da.DanishAnalyzer();
else if (StringUtl.Eq(key, "de")) return new org.apache.lucene.analysis.de.GermanAnalyzer();
else if (StringUtl.Eq(key, "el")) return new org.apache.lucene.analysis.el.GreekAnalyzer();
else if (StringUtl.Eq(key, "en")) return new org.apache.lucene.analysis.en.EnglishAnalyzer();
else if (StringUtl.Eq(key, "es")) return new org.apache.lucene.analysis.es.SpanishAnalyzer();
else if (StringUtl.Eq(key, "eu")) return new org.apache.lucene.analysis.eu.BasqueAnalyzer();
else if (StringUtl.Eq(key, "fa")) return new org.apache.lucene.analysis.fa.PersianAnalyzer();
else if (StringUtl.Eq(key, "fi")) return new org.apache.lucene.analysis.fi.FinnishAnalyzer();
else if (StringUtl.Eq(key, "fr")) return new org.apache.lucene.analysis.fr.FrenchAnalyzer();
else if (StringUtl.Eq(key, "ga")) return new org.apache.lucene.analysis.ga.IrishAnalyzer();
else if (StringUtl.Eq(key, "gl")) return new org.apache.lucene.analysis.gl.GalicianAnalyzer();
else if (StringUtl.Eq(key, "hi")) return new org.apache.lucene.analysis.hi.HindiAnalyzer();
else if (StringUtl.Eq(key, "hu")) return new org.apache.lucene.analysis.hu.HungarianAnalyzer();
else if (StringUtl.Eq(key, "hy")) return new org.apache.lucene.analysis.hy.ArmenianAnalyzer();
else if (StringUtl.Eq(key, "id")) return new org.apache.lucene.analysis.id.IndonesianAnalyzer();
else if (StringUtl.Eq(key, "it")) return new org.apache.lucene.analysis.it.ItalianAnalyzer();
else if (StringUtl.Eq(key, "lt")) return new org.apache.lucene.analysis.lt.LithuanianAnalyzer();
else if (StringUtl.Eq(key, "lv")) return new org.apache.lucene.analysis.lv.LatvianAnalyzer();
else if (StringUtl.Eq(key, "nl")) return new org.apache.lucene.analysis.nl.DutchAnalyzer();
else if (StringUtl.Eq(key, "no")) return new org.apache.lucene.analysis.no.NorwegianAnalyzer();
else if (StringUtl.Eq(key, "pt")) return new org.apache.lucene.analysis.pt.PortugueseAnalyzer();
else if (StringUtl.Eq(key, "ro")) return new org.apache.lucene.analysis.ro.RomanianAnalyzer();
else if (StringUtl.Eq(key, "ru")) return new org.apache.lucene.analysis.ru.RussianAnalyzer();
else if (StringUtl.Eq(key, "sv")) return new org.apache.lucene.analysis.sv.SwedishAnalyzer();
else if (StringUtl.Eq(key, "th")) return new org.apache.lucene.analysis.th.ThaiAnalyzer();
else if (StringUtl.Eq(key, "tr")) return new org.apache.lucene.analysis.tr.TurkishAnalyzer();
else throw ErrUtl.NewUnhandled(key);
}
}

View File

@@ -13,7 +13,8 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.gflucene.core; import gplx.*; import gplx.gflucene.*;
package gplx.gflucene.core;
import gplx.types.basics.utls.StringUtl;
public class Gflucene_analyzer_data {
public final String key;
public Gflucene_analyzer_data(String key) {
@@ -21,12 +22,12 @@ public class Gflucene_analyzer_data {
}
public static Gflucene_analyzer_data New_data_from_locale(String locale) {
String key = null;
if (String_.Eq(locale, "en")) key = "standard"; // NOTE: en exists but use standard for now
else if (String_.EqAny(locale
if (StringUtl.Eq(locale, "en")) key = "standard"; // NOTE: en exists but use standard for now
else if (StringUtl.EqAny(locale
, "ar", "bg", "ca", "ckb", "cz", "da", "de", "el", "es", "eu", "fa", "fi", "fr", "ga", "gl", "hi"
, "hu", "hy", "id", "it", "lt", "lv", "nl", "no", "pt", "ro", "ru", "sv", "th", "tr")
) key = locale;
else if (String_.EqAny(locale
else if (StringUtl.EqAny(locale
, "zh", "ja", "ko")
) key = "cjk";
else key = "standard";

View File

@@ -1,47 +1,35 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.gflucene.highlighters; import gplx.*; import gplx.gflucene.*;
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.gflucene.highlighters;
import gplx.gflucene.core.*;
import gplx.gflucene.analyzers.*;
import gplx.gflucene.searchers.*;
import java.io.IOException;
import java.nio.file.Paths;
import gplx.types.errs.ErrUtl;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.FSDirectory;
public class Gflucene_highlighter_mgr {
private Analyzer analyzer;
@@ -58,7 +46,7 @@ public class Gflucene_highlighter_mgr {
try {
query = parser.parse(qry_data.query);
} catch (ParseException e) {
throw Err_.new_exc(e, "lucene_index", "failed to parse", "query", qry_data.query);
throw ErrUtl.NewArgs(e, "failed to parse", "query", qry_data.query);
}
// create highlighter
@@ -75,7 +63,7 @@ public class Gflucene_highlighter_mgr {
try {
tokenStream = analyzer.tokenStream("body", text);
} catch (IOException e) {
throw Err_.new_exc(e, "lucene_index", "failed to get stream", "query", qry_data.query);
throw ErrUtl.NewArgs(e, "failed to get stream", "query", qry_data.query);
}
// get fragments from stream
@@ -84,9 +72,9 @@ public class Gflucene_highlighter_mgr {
// frags = highlighter.getBestTextFragments(tokenStream, text, false, 1000);
frags = highlighter.getBestTextFragments(tokenStream, text, true, 10);
} catch (IOException e) {
throw Err_.new_exc(e, "lucene_index", "failed to get best", "query", qry_data.query);
throw ErrUtl.NewArgs(e, "failed to get best", "query", qry_data.query);
} catch (InvalidTokenOffsetsException e) {
throw Err_.new_exc(e, "lucene_index", "failed to get best", "query", qry_data.query);
throw ErrUtl.NewArgs(e, "failed to get best", "query", qry_data.query);
}
// convert fragments to highlighter items

View File

@@ -13,7 +13,9 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.gflucene.indexers; import gplx.*;
package gplx.gflucene.indexers;
import gplx.types.basics.lists.Hash_adp;
import gplx.types.basics.lists.Hash_adp_;
public class Gflucene_idx_opt {
public Gflucene_idx_opt(int uid, String key, String name) {
this.uid = uid;

View File

@@ -1,35 +1,40 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.gflucene.indexers; import gplx.*; import gplx.gflucene.*;
import gplx.gflucene.core.*;
import java.io.IOException;
import org.lukhnos.portmobile.file.Path;
import org.lukhnos.portmobile.file.Paths;
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.gflucene.indexers;
import gplx.gflucene.analyzers.Gflucene_analyzer_mgr_;
import gplx.gflucene.core.Gflucene_doc_data;
import gplx.gflucene.core.Gflucene_index_data;
import gplx.types.errs.ErrUtl;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import gplx.gflucene.analyzers.*;;
import org.lukhnos.portmobile.file.Path;
import org.lukhnos.portmobile.file.Paths;
import java.io.IOException;
;
public class Gflucene_indexer_mgr {
private Analyzer analyzer;
private IndexWriterConfig config;
@@ -55,14 +60,14 @@ public class Gflucene_indexer_mgr {
try {
this.index = FSDirectory.open(path);
} catch (IOException e) {
throw Err_.new_exc(e, "lucene_index", "failed to open lucene index", "path", path);
}
throw ErrUtl.NewArgs(e, "failed to open lucene index", "path", path);
}
// create writer
try {
wtr = new IndexWriter(index, config);
} catch (IOException e) {
throw Err_.new_exc(e, "lucene_index", "failed to create writer");
throw ErrUtl.NewArgs(e, "failed to create writer");
}
// create field for body
@@ -95,19 +100,19 @@ public class Gflucene_indexer_mgr {
try {
wtr.addDocument(doc);
} catch (IOException e) {
throw Err_.new_exc(e, "lucene_index", "failed to add document", "title", doc_data.title);
throw ErrUtl.NewArgs(e, "failed to add document", "title", doc_data.title);
}
}
public void Term() {
try {
wtr.close();
} catch (IOException e) {
throw Err_.new_exc(e, "lucene_index", "failed to close writer");
throw ErrUtl.NewArgs(e, "failed to close writer");
}
try {
index.close();
} catch (IOException e) {
throw Err_.new_exc(e, "lucene_index", "failed to close writer");
throw ErrUtl.NewArgs(e, "failed to close writer");
}
}
@@ -118,7 +123,7 @@ public class Gflucene_indexer_mgr {
case Gflucene_idx_opt.Uid_docs_and_freqs: return IndexOptions.DOCS_AND_FREQS;
case Gflucene_idx_opt.Uid_docs_and_freqs_and_positions: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
case Gflucene_idx_opt.Uid_docs_and_freqs_and_positions_and_offsets: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
default: throw Err_.new_unhandled_default(opt.Uid());
default: throw ErrUtl.NewUnhandled(opt.Uid());
}
}
}

View File

@@ -13,10 +13,12 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.gflucene.searchers; import gplx.*;
package gplx.gflucene.searchers;
import gplx.gflucene.core.*;
import gplx.gflucene.analyzers.*;
import java.io.IOException;
import gplx.types.errs.ErrUtl;
import gplx.types.basics.lists.Ordered_hash;
import org.lukhnos.portmobile.file.Path;
import org.lukhnos.portmobile.file.Paths;
@@ -51,7 +53,7 @@ public class Gflucene_searcher_mgr {
try {
this.index = FSDirectory.open(path);
} catch (IOException e) {
throw Err_.new_exc(e, "lucene_index", "failed to init searcher", "dir", idx_data.index_dir);
throw ErrUtl.NewArgs(e, "failed to init searcher", "dir", idx_data.index_dir);
}
}
public void Exec(Ordered_hash list, Gflucene_searcher_qry data) {
@@ -86,7 +88,7 @@ public class Gflucene_searcher_mgr {
reader.close();
} catch (Exception e) {
throw Err_.new_exc(e, "lucene_index", "failed to exec seearch", "query", data.query);
throw ErrUtl.NewArgs(e, "failed to exec seearch", "query", data.query);
}
}
public void Term() {