1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Search: Add NOT logic

This commit is contained in:
gnosygnu 2017-03-01 11:29:43 -05:00
parent 8be6311cd6
commit c702745bc0
7 changed files with 112 additions and 71 deletions

View File

@ -1,27 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
//namespace gplx.xowa.addons.wikis.fulltexts {
// using gplx.xowa.bldrs.wkrs;
// public class Xofts_addon : Xoax_addon_itm, Xoax_addon_itm__bldr {
// public Xob_cmd[] Bldr_cmds() {
// return new Xob_cmd[]
// { Xofts_make_cmd.Prototype
// };
// }
//
// public String Addon__key() {return "xowa.wikis.fulltexts";}
// }
//}

View File

@ -1,29 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
//namespace gplx.xowa.addons.wikis.fulltexts {
// using gplx.dbs;
// using gplx.xowa.bldrs; using gplx.xowa.bldrs.wkrs; using gplx.xowa.htmls.core.htmls;
// public class Xofts_make_cmd : Xob_cmd__base {
// public Xofts_make_cmd(Xob_bldr bldr, Xowe_wiki wiki) : super(bldr, wiki) {}
// public override void Cmd_run() {
// }
//
// public static final String BLDR_CMD_KEY = "fulltext.make";
// public override String Cmd_key() {return BLDR_CMD_KEY;}
// public static final Xob_cmd Prototype = new Xofts_make_cmd(null, null);
// public override Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xofts_make_cmd(bldr, wiki);}
// }
//}

View File

@ -51,7 +51,7 @@ class Xosearch_fulltext_svc {
.Add_long("page_count", 0) .Add_long("page_count", 0)
); );
finder.Init(query_lcase, false, false, Byte_ascii.Star); finder.Init(query_lcase, false, false, Byte_ascii.Star, Byte_ascii.Dash);
try { try {
int found = 0; int found = 0;
while (page_rdr.Move_next()) { while (page_rdr.Move_next()) {

View File

@ -0,0 +1,82 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
import org.junit.*; import gplx.core.tests.*;
public class Xosearch_finder_cbk__eval__tst {
private final Xosearch_finder_cbk__eval__fxt fxt = new Xosearch_finder_cbk__eval__fxt();
@Test public void Exact() {
fxt.Init__search("a");
// y: basic match
fxt.Test__eval_y("a");
// n: no match
fxt.Test__eval_n("z");
// n: wildcard_bgn not enabled
fxt.Test__eval_n("az");
}
@Test public void Or() {
fxt.Init__search("a, c");
// y: lone char
fxt.Test__eval_y("a" , "c");
// y: one char
fxt.Test__eval_y("a b", "b c");
// y: both chars
fxt.Test__eval_y("a c", "a b c");
// n: no chars
fxt.Test__eval_n("b");
}
@Test public void And() {
fxt.Init__search("a + c");
// y: both chars
fxt.Test__eval_y("a c", "a b c");
// n: one char only
fxt.Test__eval_n("a", "c", "a b", "b c");
}
@Test public void And__shorthand() {
fxt.Init__search("a c");
// y: both chars
fxt.Test__eval_y("a b c");
// n: one char only
fxt.Test__eval_n("a", "c");
}
@Test public void Not() {
fxt.Init__search("-a");
// y: no chars
fxt.Test__eval_y("b");
// n: char exists
fxt.Test__eval_n("a");
}
}
class Xosearch_finder_cbk__eval__fxt {
private boolean case_match = false;
private boolean auto_wildcard = false;
private byte wildcard_byte = Byte_ascii.Star;
private byte not_byte = Byte_ascii.Dash;
private final Xosearch_finder_mgr finder = new Xosearch_finder_mgr();
private final Xosearch_finder_cbk__eval cbk = new Xosearch_finder_cbk__eval();
public void Init__search(String query) {
finder.Init(Bry_.new_u8(query), case_match, auto_wildcard, wildcard_byte, not_byte);
}
public void Test__eval_y(String... texts) {Test__eval(Bool_.Y, texts);}
public void Test__eval_n(String... texts) {Test__eval(Bool_.N, texts);}
public void Test__eval(boolean expd, String... texts) {
for (String text : texts) {
byte[] text_bry = Bry_.new_u8(text);
cbk.found = false;
finder.Match(text_bry, 0, text_bry.length, cbk);
Gftest.Eq__bool(expd, cbk.found, "query={0}, text={1}", finder.Query(), text);
}
}
}

View File

@ -23,12 +23,14 @@ public class Xosearch_finder_mgr {
private final Srch_crt_parser parser = new Srch_crt_parser(Srch_crt_scanner_syms.Dflt); private final Srch_crt_parser parser = new Srch_crt_parser(Srch_crt_scanner_syms.Dflt);
private final Btrie_rv trv = new Btrie_rv(); private final Btrie_rv trv = new Btrie_rv();
public void Init(byte[] query_mcase, boolean case_match, boolean auto_wildcard, byte wildchar_byte) { public byte[] Query() {return query;} private byte[] query;
public void Init(byte[] query, boolean case_match, boolean auto_wildcard, byte wildchar_byte, byte not_byte) {
this.query = query;
// create a new hook_trie based on case_match // create a new hook_trie based on case_match
this.hook_trie = case_match ? Btrie_slim_mgr.cs() : Btrie_slim_mgr.ci_u8(); this.hook_trie = case_match ? Btrie_slim_mgr.cs() : Btrie_slim_mgr.ci_u8();
// create a new tree_root for eval // create a new tree_root for eval
this.tree_root = Xosearch_word_node_.New_root(parser.Parse_or_invalid(query_mcase, auto_wildcard).Root, hook_trie, wildchar_byte); this.tree_root = Xosearch_word_node_.New_root(parser.Parse_or_invalid(query, auto_wildcard).Root, hook_trie, wildchar_byte, not_byte);
} }
public void Match(byte[] src, int src_bgn, int src_end, Xosearch_finder_cbk cbk) { public void Match(byte[] src, int src_bgn, int src_end, Xosearch_finder_cbk cbk) {
// init and clear // init and clear

View File

@ -34,7 +34,7 @@ public class Xosearch_word_node {
// if no wildcard at end, hook_end must match word_end // if no wildcard at end, hook_end must match word_end
if ( !wildcard_at_end if ( !wildcard_at_end
&& hook_bgn != word_end) && hook_end != word_end)
return false; return false;
return true; return true;
@ -62,7 +62,7 @@ public class Xosearch_word_node {
case Srch_crt_itm.Tid__word_quote: case Srch_crt_itm.Tid__word_quote:
return found; return found;
case Srch_crt_itm.Tid__not: case Srch_crt_itm.Tid__not:
return !found; return !subs[0].Eval();
case Srch_crt_itm.Tid__invalid: return false; // should not happen case Srch_crt_itm.Tid__invalid: return false; // should not happen
default: throw Err_.new_unhandled_default(tid); default: throw Err_.new_unhandled_default(tid);
} }

View File

@ -17,7 +17,7 @@ package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import
import gplx.core.btries.*; import gplx.core.btries.*;
import gplx.xowa.addons.wikis.searchs.searchers.crts.*; import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
public class Xosearch_word_node_ { public class Xosearch_word_node_ {
public static Xosearch_word_node New_root(Srch_crt_itm src, Btrie_slim_mgr word_trie, byte wildchar_byte) { public static Xosearch_word_node New_root(Srch_crt_itm src, Btrie_slim_mgr word_trie, byte wildchar_byte, byte not_byte) {
Xosearch_word_node trg = new Xosearch_word_node(); Xosearch_word_node trg = new Xosearch_word_node();
trg.tid = src.Tid; trg.tid = src.Tid;
@ -25,16 +25,29 @@ public class Xosearch_word_node_ {
switch (trg.tid) { switch (trg.tid) {
case Srch_crt_itm.Tid__word: case Srch_crt_itm.Tid__word:
case Srch_crt_itm.Tid__word_quote: case Srch_crt_itm.Tid__word_quote:
byte[] word_orig = src.Raw; // EX: "abc*" // get word_orig; EX: "abc*"
byte[] word_orig = src.Raw;
// determine if wildcards at bgn / end
int word_orig_len = word_orig.length; int word_orig_len = word_orig.length;
boolean wildcard_at_bgn = word_orig_len > 1 && word_orig[0] == wildchar_byte;
boolean wildcard_at_end = word_orig_len > 1 && word_orig[word_orig_len - 1] == wildchar_byte; // init hook_bgn / hook_end
int hook_bgn = 0;
int hook_end = word_orig_len;
// handle wildcard at bgn; EX: "*a"
boolean wildcard_at_bgn = false;
if (word_orig_len > hook_bgn + 1 && word_orig[hook_bgn] == wildchar_byte) {
wildcard_at_bgn = true;
hook_bgn++;
}
// handle wildcard at end; EX: "a*"
boolean wildcard_at_end = false;
if (word_orig_len > hook_bgn + 1 && word_orig[hook_end - 1] == wildchar_byte) {
wildcard_at_end = true;
hook_end--;
}
// get hook // get hook
int hook_bgn = wildcard_at_bgn ? 1 : 0;
int hook_end = wildcard_at_end ? word_orig_len - 1 : word_orig_len;
byte[] word_hook = wildcard_at_bgn || wildcard_at_end ? Bry_.Mid(word_orig, hook_bgn, hook_end) : word_orig; byte[] word_hook = wildcard_at_bgn || wildcard_at_end ? Bry_.Mid(word_orig, hook_bgn, hook_end) : word_orig;
// assign to trg // assign to trg
@ -43,7 +56,7 @@ public class Xosearch_word_node_ {
trg.wildcard_at_bgn = wildcard_at_bgn; trg.wildcard_at_bgn = wildcard_at_bgn;
trg.wildcard_at_end = wildcard_at_end; trg.wildcard_at_end = wildcard_at_end;
// add to hash, trie // add to trie
if (word_trie.Match_exact(word_hook) == null) { // don't add if exists if (word_trie.Match_exact(word_hook) == null) { // don't add if exists
word_trie.Add_obj(word_hook, trg); word_trie.Add_obj(word_hook, trg);
} }
@ -56,7 +69,7 @@ public class Xosearch_word_node_ {
trg.subs = trg_subs; trg.subs = trg_subs;
int len = src_subs.length; int len = src_subs.length;
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
trg.subs[i] = New_root(src_subs[i], word_trie, wildchar_byte); trg.subs[i] = New_root(src_subs[i], word_trie, wildchar_byte, not_byte);
} }
return trg; return trg;