mirror of https://github.com/gnosygnu/xowa
parent
7f04fc5f74
commit
c10cf2ca15
@ -0,0 +1,21 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
|
||||||
|
import gplx.xowa.guis.cbks.*;
|
||||||
|
public interface Xosearch_finder_cbk {
|
||||||
|
void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xosearch_word_node term);
|
||||||
|
void Process_page_done(byte[] src, Xosearch_word_node tree_root);
|
||||||
|
}
|
@ -0,0 +1,25 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
|
||||||
|
public class Xosearch_finder_cbk__eval implements Xosearch_finder_cbk {
|
||||||
|
public boolean found;
|
||||||
|
public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xosearch_word_node term) {
|
||||||
|
term.found = true;
|
||||||
|
}
|
||||||
|
public void Process_page_done(byte[] src, Xosearch_word_node root) {
|
||||||
|
this.found = root.Eval();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,82 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
|
||||||
|
import gplx.xowa.guis.cbks.*;
|
||||||
|
public class Xosearch_finder_cbk__highlight implements Xosearch_finder_cbk {
|
||||||
|
private final Xog_cbk_trg cbk_trg;
|
||||||
|
private final Xoa_app app;
|
||||||
|
private Xow_wiki wiki;
|
||||||
|
private Xoa_ttl ttl;
|
||||||
|
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||||
|
public int found;
|
||||||
|
public Xosearch_finder_cbk__highlight(Xoa_app app, Xog_cbk_trg cbk_trg) {
|
||||||
|
this.app = app;
|
||||||
|
this.cbk_trg = cbk_trg;
|
||||||
|
}
|
||||||
|
public void Init(Xow_wiki wiki, Xoa_ttl ttl) {
|
||||||
|
this.wiki = wiki;
|
||||||
|
this.ttl = ttl;
|
||||||
|
found = 0;
|
||||||
|
}
|
||||||
|
public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xosearch_word_node term) {
|
||||||
|
// get snip bounds by finding flanking 50 chars and then expanding to word-bounds
|
||||||
|
int snip_bgn = hook_bgn - 50;
|
||||||
|
if (snip_bgn < 0)
|
||||||
|
snip_bgn = 0;
|
||||||
|
else {
|
||||||
|
snip_bgn = Bry_find_.Find_bwd_ws(src, snip_bgn, 0) + 1;
|
||||||
|
}
|
||||||
|
int snip_end = hook_end + 50;
|
||||||
|
if (snip_end >= src.length)
|
||||||
|
snip_end = src.length;
|
||||||
|
else {
|
||||||
|
snip_end = Bry_find_.Find_fwd_until_ws(src, snip_end, src.length);
|
||||||
|
if (snip_end == Bry_find_.Not_found) { // when snip_end == src.length
|
||||||
|
snip_end = src.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// build snip
|
||||||
|
Add_snip(tmp_bfr, src, snip_bgn, hook_bgn);
|
||||||
|
tmp_bfr.Add_str_a7("<span class='snip_highlight'>");
|
||||||
|
Add_snip(tmp_bfr, src, hook_bgn, hook_end);
|
||||||
|
tmp_bfr.Add_str_a7("</span>");
|
||||||
|
Add_snip(tmp_bfr, src, hook_end, snip_end);
|
||||||
|
|
||||||
|
// send notification
|
||||||
|
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||||
|
.Add_bry("wiki", wiki.Domain_bry())
|
||||||
|
.Add_bry("page", ttl.Full_db())
|
||||||
|
.Add_int("line", ++found)
|
||||||
|
.Add_bry("html", tmp_bfr.To_bry_and_clear())
|
||||||
|
);
|
||||||
|
app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__page__update__recv", gplx.core.gfobjs.Gfobj_nde.New()
|
||||||
|
.Add_bry("wiki", wiki.Domain_bry())
|
||||||
|
.Add_bry("page", ttl.Full_db())
|
||||||
|
.Add_int("found", found)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
private void Add_snip(Bry_bfr bfr, byte[] src, int bgn, int end) {
|
||||||
|
for (int i = bgn; i < end; i++) {
|
||||||
|
byte b = src[i];
|
||||||
|
if (b == Byte_ascii.Nl)
|
||||||
|
bfr.Add(gplx.langs.htmls.Gfh_tag_.Br_inl);
|
||||||
|
else
|
||||||
|
bfr.Add_byte(b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public void Process_page_done(byte[] src, Xosearch_word_node tree_root) {}
|
||||||
|
}
|
@ -0,0 +1,71 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
|
||||||
|
import gplx.xowa.guis.cbks.*;
|
||||||
|
import gplx.core.btries.*;
|
||||||
|
import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
|
||||||
|
public class Xosearch_finder_mgr {
|
||||||
|
private Btrie_slim_mgr hook_trie;
|
||||||
|
private Xosearch_word_node tree_root;
|
||||||
|
private final Srch_crt_parser parser = new Srch_crt_parser(Srch_crt_scanner_syms.Dflt);
|
||||||
|
private final Btrie_rv trv = new Btrie_rv();
|
||||||
|
|
||||||
|
public void Init(byte[] query_mcase, boolean case_match, boolean auto_wildcard, byte wildchar_byte) {
|
||||||
|
// create a new hook_trie based on case_match
|
||||||
|
this.hook_trie = case_match ? Btrie_slim_mgr.cs() : Btrie_slim_mgr.ci_u8();
|
||||||
|
|
||||||
|
// create a new tree_root for eval
|
||||||
|
this.tree_root = Xosearch_word_node_.New_root(parser.Parse_or_invalid(query_mcase, auto_wildcard).Root, hook_trie, wildchar_byte);
|
||||||
|
}
|
||||||
|
public void Match(byte[] src, int src_bgn, int src_end, Xosearch_finder_cbk cbk) {
|
||||||
|
// init and clear
|
||||||
|
int cur = 0;
|
||||||
|
tree_root.Clear();
|
||||||
|
|
||||||
|
// scan through text one-byte at a time
|
||||||
|
// NOTE: skipping ahead to word-start instead of going byte-by-byte may seem more performant, but will still need to do substring analysis b/c of wildcards and punctuation; EX: "abc" and " 'abc' "; "*abc" and " xyzabc. "
|
||||||
|
while (cur <= src_end) {
|
||||||
|
// check each byte against hook_trie
|
||||||
|
Object hook_obj = hook_trie.Match_at(trv, src, cur, src_end);
|
||||||
|
|
||||||
|
// current byte matches no hooks; go to next byte
|
||||||
|
if (hook_obj == null) {
|
||||||
|
cur++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// current byte matches a hook; get hook and hook_end
|
||||||
|
Xosearch_word_node hook = (Xosearch_word_node)hook_obj;
|
||||||
|
int hook_end = cur + hook.word_hook.length;
|
||||||
|
|
||||||
|
// get current word bounds by finding flanking ws
|
||||||
|
int word_bgn = Bry_find_.Find_bwd_ws(src, cur, 0) + 1;
|
||||||
|
int word_end = Bry_find_.Find_fwd_until_ws(src, hook_end, src_end);
|
||||||
|
if (word_end == -1) word_end = src_end; // WORKAROUND: no match returns -1 instead of src_end
|
||||||
|
|
||||||
|
// check if current word matches criteria-word
|
||||||
|
if (hook.Match_word(src, cur, hook_end, word_bgn, word_end)) {
|
||||||
|
cbk.Process_item_found(src, cur, hook_end, word_bgn, word_end, hook);
|
||||||
|
}
|
||||||
|
|
||||||
|
// update position to word_end
|
||||||
|
cur = word_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
// mark page done
|
||||||
|
cbk.Process_page_done(src, tree_root);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,70 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
|
||||||
|
import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
|
||||||
|
public class Xosearch_word_node {
|
||||||
|
public int tid;
|
||||||
|
public Xosearch_word_node[] subs;
|
||||||
|
public byte[] word_orig;
|
||||||
|
public byte[] word_hook;
|
||||||
|
public boolean wildcard_at_bgn;
|
||||||
|
public boolean wildcard_at_end;
|
||||||
|
public boolean found;
|
||||||
|
|
||||||
|
public boolean Match_word(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end) {
|
||||||
|
// TODO.XO: handle punctuation
|
||||||
|
|
||||||
|
// if no wildcard at bgn, hook_bgn must match word_bgn
|
||||||
|
if ( !wildcard_at_bgn
|
||||||
|
&& hook_bgn != word_bgn)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// if no wildcard at end, hook_end must match word_end
|
||||||
|
if ( !wildcard_at_end
|
||||||
|
&& hook_bgn != word_end)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
public void Clear() {
|
||||||
|
found = false;
|
||||||
|
for (Xosearch_word_node sub : subs)
|
||||||
|
sub.Clear();
|
||||||
|
}
|
||||||
|
public boolean Eval() {
|
||||||
|
switch (tid) {
|
||||||
|
case Srch_crt_itm.Tid__and: {
|
||||||
|
for (Xosearch_word_node sub : subs)
|
||||||
|
if (!sub.Eval())
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case Srch_crt_itm.Tid__or: {
|
||||||
|
for (Xosearch_word_node sub : subs)
|
||||||
|
if (sub.Eval())
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
case Srch_crt_itm.Tid__word:
|
||||||
|
case Srch_crt_itm.Tid__word_quote:
|
||||||
|
return found;
|
||||||
|
case Srch_crt_itm.Tid__not:
|
||||||
|
return !found;
|
||||||
|
case Srch_crt_itm.Tid__invalid: return false; // should not happen
|
||||||
|
default: throw Err_.new_unhandled_default(tid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,64 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*;
|
||||||
|
import gplx.core.btries.*;
|
||||||
|
import gplx.xowa.addons.wikis.searchs.searchers.crts.*;
|
||||||
|
public class Xosearch_word_node_ {
|
||||||
|
public static Xosearch_word_node New_root(Srch_crt_itm src, Btrie_slim_mgr word_trie, byte wildchar_byte) {
|
||||||
|
Xosearch_word_node trg = new Xosearch_word_node();
|
||||||
|
trg.tid = src.Tid;
|
||||||
|
|
||||||
|
// set word-related props
|
||||||
|
switch (trg.tid) {
|
||||||
|
case Srch_crt_itm.Tid__word:
|
||||||
|
case Srch_crt_itm.Tid__word_quote:
|
||||||
|
byte[] word_orig = src.Raw; // EX: "abc*"
|
||||||
|
|
||||||
|
// determine if wildcards at bgn / end
|
||||||
|
int word_orig_len = word_orig.length;
|
||||||
|
boolean wildcard_at_bgn = word_orig_len > 1 && word_orig[0] == wildchar_byte;
|
||||||
|
boolean wildcard_at_end = word_orig_len > 1 && word_orig[word_orig_len - 1] == wildchar_byte;
|
||||||
|
|
||||||
|
// get hook
|
||||||
|
int hook_bgn = wildcard_at_bgn ? 1 : 0;
|
||||||
|
int hook_end = wildcard_at_end ? word_orig_len - 1 : word_orig_len;
|
||||||
|
byte[] word_hook = wildcard_at_bgn || wildcard_at_end ? Bry_.Mid(word_orig, hook_bgn, hook_end) : word_orig;
|
||||||
|
|
||||||
|
// assign to trg
|
||||||
|
trg.word_orig = word_orig;
|
||||||
|
trg.word_hook = word_hook;
|
||||||
|
trg.wildcard_at_bgn = wildcard_at_bgn;
|
||||||
|
trg.wildcard_at_end = wildcard_at_end;
|
||||||
|
|
||||||
|
// add to hash, trie
|
||||||
|
if (word_trie.Match_exact(word_hook) == null) { // don't add if exists
|
||||||
|
word_trie.Add_obj(word_hook, trg);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// set subs
|
||||||
|
Srch_crt_itm[] src_subs = src.Subs;
|
||||||
|
Xosearch_word_node[] trg_subs = new Xosearch_word_node[src_subs.length];
|
||||||
|
trg.subs = trg_subs;
|
||||||
|
int len = src_subs.length;
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
trg.subs[i] = New_root(src_subs[i], word_trie, wildchar_byte);
|
||||||
|
}
|
||||||
|
|
||||||
|
return trg;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in new issue