diff --git a/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/cbks/Xosearch_fulltext_svc.java b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/cbks/Xosearch_fulltext_svc.java index a143b0135..4dea08627 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/cbks/Xosearch_fulltext_svc.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/cbks/Xosearch_fulltext_svc.java @@ -14,124 +14,73 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.addons.wikis.searchs.fulltexts.cbks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*; +import gplx.core.btries.*; import gplx.langs.jsons.*; -import gplx.dbs.*; +import gplx.dbs.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.addons.wikis.searchs.fulltexts.specials.*; -import gplx.xowa.wikis.data.tbls.*; +import gplx.xowa.addons.wikis.searchs.fulltexts.finders.*; +import gplx.xowa.addons.wikis.searchs.searchers.crts.*; +import gplx.xowa.addons.wikis.searchs.searchers.crts.visitors.*; class Xosearch_fulltext_svc { - private gplx.xowa.guis.cbks.Xog_cbk_trg cbk_trg = gplx.xowa.guis.cbks.Xog_cbk_trg.New(Xosearch_fulltext_special.Prototype.Special__meta().Ttl_bry()); private final Xoa_app app; - private final Bry_bfr tmp_bfr = Bry_bfr_.New(); + private final gplx.xowa.guis.cbks.Xog_cbk_trg cbk_trg = gplx.xowa.guis.cbks.Xog_cbk_trg.New(Xosearch_fulltext_special.Prototype.Special__meta().Ttl_bry()); + private final Xosearch_finder_mgr finder = new Xosearch_finder_mgr(); + private final Xosearch_finder_cbk__eval cbk_eval = new Xosearch_finder_cbk__eval(); + private final Xosearch_finder_cbk__highlight cbk_highlight; public Xosearch_fulltext_svc(Xoa_app app) { this.app = app; + cbk_highlight = new Xosearch_finder_cbk__highlight(app, cbk_trg); } public void Search(Json_nde args) { String wikis = args.Get_as_str("wikis"); - byte[] wildcard = Bry_.new_a7("%"); - byte[] query_raw = args.Get_as_bry("query"); - byte[] query_sql = Bry_.Add(wildcard, query_raw, wildcard); + byte[] query_mcase = args.Get_as_bry("query"); String[] wikis_ary = String_.Split(wikis, "|"); for (String wiki_domain : wikis_ary) { Xow_wiki wiki = app.Wiki_mgri().Get_by_or_make_init_y(Bry_.new_u8(wiki_domain)); - Search_wiki(wiki, query_raw, query_sql); + byte[] query_lcase = wiki.Case_mgr().Case_build_lower(query_mcase); + Search_wiki(wiki, query_lcase); } } - private void Search_wiki(Xow_wiki wiki, byte[] query_raw, byte[] query_sql) { + private void Search_wiki(Xow_wiki wiki, byte[] query_lcase) { Db_conn page_conn = wiki.Data__core_mgr().Tbl__page().Conn(); Db_rdr page_rdr = page_conn.Stmt_sql("SELECT * FROM page WHERE page_namespace IN (0) ORDER BY page_score DESC").Exec_select__rls_auto(); app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__wiki__add__recv", gplx.core.gfobjs.Gfobj_nde.New() .Add_bry("wiki", wiki.Domain_bry()) - .Add_long("page_count", wiki.Stats().Num_pages()) + .Add_long("page_count", 0) ); + finder.Init(query_lcase, false, false, Byte_ascii.Star); try { - int found =0; + int found = 0; while (page_rdr.Move_next()) { int page_id = page_rdr.Read_int("page_id"); int text_db_id = page_rdr.Read_int("page_text_db_id"); - byte[] text = wiki.Data__core_mgr().Dbs__get_by_id_or_fail(text_db_id).Tbl__text().Select(page_id); - if (Bry_.Has(text, query_raw)) { - Xowd_text_row text_row = new Xowd_text_row(page_id, text); + byte[] text_mcase = wiki.Data__core_mgr().Dbs__get_by_id_or_fail(text_db_id).Tbl__text().Select(page_id); + + cbk_eval.found = false; + finder.Match(text_mcase, 0, text_mcase.length, cbk_eval); + if (cbk_eval.found) { int ns_id = page_rdr.Read_int("page_namespace"); byte[] ttl_bry = page_rdr.Read_bry_by_str("page_title"); app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__wiki__update__recv", gplx.core.gfobjs.Gfobj_nde.New() .Add_bry("wiki", wiki.Domain_bry()) .Add_int("found", ++found) ); - Write(wiki, query_raw, wiki.Ttl_parse(ns_id, ttl_bry), text_row); + + Xoa_ttl ttl = wiki.Ttl_parse(ns_id, ttl_bry); + cbk_highlight.Init(wiki, ttl); + app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__page__add__recv", gplx.core.gfobjs.Gfobj_nde.New() + .Add_bry("wiki", wiki.Domain_bry()) + .Add_bry("page", ttl.Full_db()) + .Add_int("found", 0) + ); + finder.Match(text_mcase, 0, text_mcase.length, cbk_highlight); } } } finally { page_rdr.Rls(); } } - private void Write(Xow_wiki wiki, byte[] query_raw, Xoa_ttl ttl, Xowd_text_row text_row) { - app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__page__add__recv", gplx.core.gfobjs.Gfobj_nde.New() - .Add_bry("wiki", wiki.Domain_bry()) - .Add_bry("page", ttl.Full_db()) - .Add_int("found", 0) - ); - byte[] text_orig = text_row.text; - byte[] text_lcase = wiki.Lang().Case_mgr().Case_build_lower(text_orig); - int pos = 0; - int found = 0; - while (true) { - int find_bgn = Bry_find_.Find_fwd(text_lcase, query_raw, pos); - if (find_bgn == Bry_find_.Not_found) - break; - - int snip_bgn = find_bgn - 50; - if (snip_bgn < 0) - snip_bgn = 0; - else { - snip_bgn = Bry_find_.Find_bwd_ws(text_orig, snip_bgn, 0) + 1; - } - int find_end = find_bgn + query_raw.length; - int snip_end = find_end + 50; - if (snip_end >= text_lcase.length) - snip_end = text_lcase.length; - else - snip_end = Bry_find_.Find_fwd_until_ws(text_orig, snip_end, text_orig.length); - - Add_snip(tmp_bfr, text_orig, text_lcase, snip_bgn, snip_end, query_raw); - app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New() - .Add_bry("wiki", wiki.Domain_bry()) - .Add_bry("page", ttl.Full_db()) - .Add_int("line", ++found) - .Add_bry("html", tmp_bfr.To_bry_and_clear()) - ); - pos = snip_end; - - app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__page__update__recv", gplx.core.gfobjs.Gfobj_nde.New() - .Add_bry("wiki", wiki.Domain_bry()) - .Add_bry("page", ttl.Full_db()) - .Add_int("found", found) - ); - } - } - private void Add_snip(Bry_bfr bfr, byte[] src_orig, byte[] src_lcase, int bgn, int end, byte[] qry) { - for (int i = bgn; i < end; i++) { - byte b = src_orig[i]; - if (b == Byte_ascii.Nl) - bfr.Add(gplx.langs.htmls.Gfh_tag_.Br_inl); - else { - int qry_end = i + qry.length; - if (Bry_.Eq(src_lcase, i, qry_end, qry)) { - bfr.Add_str_a7(""); - bfr.Add_mid(src_orig, i, qry_end); - bfr.Add_str_a7(""); - i = qry_end - 1; - } - else - bfr.Add_byte(b); - } - } - } } -// class Xosearch_result_wiki { -// public final byte[] wiki; -// public final byte[] page_db; -// public byte -// } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_finder_cbk.java b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_finder_cbk.java new file mode 100644 index 000000000..82eeb5456 --- /dev/null +++ b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_finder_cbk.java @@ -0,0 +1,21 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*; +import gplx.xowa.guis.cbks.*; +public interface Xosearch_finder_cbk { + void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xosearch_word_node term); + void Process_page_done(byte[] src, Xosearch_word_node tree_root); +} diff --git a/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_finder_cbk__eval.java b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_finder_cbk__eval.java new file mode 100644 index 000000000..b2922466f --- /dev/null +++ b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_finder_cbk__eval.java @@ -0,0 +1,25 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*; +public class Xosearch_finder_cbk__eval implements Xosearch_finder_cbk { + public boolean found; + public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xosearch_word_node term) { + term.found = true; + } + public void Process_page_done(byte[] src, Xosearch_word_node root) { + this.found = root.Eval(); + } +} diff --git a/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_finder_cbk__highlight.java b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_finder_cbk__highlight.java new file mode 100644 index 000000000..770d39c35 --- /dev/null +++ b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_finder_cbk__highlight.java @@ -0,0 +1,82 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*; +import gplx.xowa.guis.cbks.*; +public class Xosearch_finder_cbk__highlight implements Xosearch_finder_cbk { + private final Xog_cbk_trg cbk_trg; + private final Xoa_app app; + private Xow_wiki wiki; + private Xoa_ttl ttl; + private final Bry_bfr tmp_bfr = Bry_bfr_.New(); + public int found; + public Xosearch_finder_cbk__highlight(Xoa_app app, Xog_cbk_trg cbk_trg) { + this.app = app; + this.cbk_trg = cbk_trg; + } + public void Init(Xow_wiki wiki, Xoa_ttl ttl) { + this.wiki = wiki; + this.ttl = ttl; + found = 0; + } + public void Process_item_found(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end, Xosearch_word_node term) { + // get snip bounds by finding flanking 50 chars and then expanding to word-bounds + int snip_bgn = hook_bgn - 50; + if (snip_bgn < 0) + snip_bgn = 0; + else { + snip_bgn = Bry_find_.Find_bwd_ws(src, snip_bgn, 0) + 1; + } + int snip_end = hook_end + 50; + if (snip_end >= src.length) + snip_end = src.length; + else { + snip_end = Bry_find_.Find_fwd_until_ws(src, snip_end, src.length); + if (snip_end == Bry_find_.Not_found) { // when snip_end == src.length + snip_end = src.length; + } + } + + // build snip + Add_snip(tmp_bfr, src, snip_bgn, hook_bgn); + tmp_bfr.Add_str_a7(""); + Add_snip(tmp_bfr, src, hook_bgn, hook_end); + tmp_bfr.Add_str_a7(""); + Add_snip(tmp_bfr, src, hook_end, snip_end); + + // send notification + app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__line__add__recv", gplx.core.gfobjs.Gfobj_nde.New() + .Add_bry("wiki", wiki.Domain_bry()) + .Add_bry("page", ttl.Full_db()) + .Add_int("line", ++found) + .Add_bry("html", tmp_bfr.To_bry_and_clear()) + ); + app.Gui__cbk_mgr().Send_json(cbk_trg, "xo.search_fulltext.results__page__update__recv", gplx.core.gfobjs.Gfobj_nde.New() + .Add_bry("wiki", wiki.Domain_bry()) + .Add_bry("page", ttl.Full_db()) + .Add_int("found", found) + ); + } + private void Add_snip(Bry_bfr bfr, byte[] src, int bgn, int end) { + for (int i = bgn; i < end; i++) { + byte b = src[i]; + if (b == Byte_ascii.Nl) + bfr.Add(gplx.langs.htmls.Gfh_tag_.Br_inl); + else + bfr.Add_byte(b); + } + } + public void Process_page_done(byte[] src, Xosearch_word_node tree_root) {} +} diff --git a/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_finder_mgr.java b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_finder_mgr.java new file mode 100644 index 000000000..dfb5defca --- /dev/null +++ b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_finder_mgr.java @@ -0,0 +1,71 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*; +import gplx.xowa.guis.cbks.*; +import gplx.core.btries.*; +import gplx.xowa.addons.wikis.searchs.searchers.crts.*; +public class Xosearch_finder_mgr { + private Btrie_slim_mgr hook_trie; + private Xosearch_word_node tree_root; + private final Srch_crt_parser parser = new Srch_crt_parser(Srch_crt_scanner_syms.Dflt); + private final Btrie_rv trv = new Btrie_rv(); + + public void Init(byte[] query_mcase, boolean case_match, boolean auto_wildcard, byte wildchar_byte) { + // create a new hook_trie based on case_match + this.hook_trie = case_match ? Btrie_slim_mgr.cs() : Btrie_slim_mgr.ci_u8(); + + // create a new tree_root for eval + this.tree_root = Xosearch_word_node_.New_root(parser.Parse_or_invalid(query_mcase, auto_wildcard).Root, hook_trie, wildchar_byte); + } + public void Match(byte[] src, int src_bgn, int src_end, Xosearch_finder_cbk cbk) { + // init and clear + int cur = 0; + tree_root.Clear(); + + // scan through text one-byte at a time + // NOTE: skipping ahead to word-start instead of going byte-by-byte may seem more performant, but will still need to do substring analysis b/c of wildcards and punctuation; EX: "abc" and " 'abc' "; "*abc" and " xyzabc. " + while (cur <= src_end) { + // check each byte against hook_trie + Object hook_obj = hook_trie.Match_at(trv, src, cur, src_end); + + // current byte matches no hooks; go to next byte + if (hook_obj == null) { + cur++; + continue; + } + + // current byte matches a hook; get hook and hook_end + Xosearch_word_node hook = (Xosearch_word_node)hook_obj; + int hook_end = cur + hook.word_hook.length; + + // get current word bounds by finding flanking ws + int word_bgn = Bry_find_.Find_bwd_ws(src, cur, 0) + 1; + int word_end = Bry_find_.Find_fwd_until_ws(src, hook_end, src_end); + if (word_end == -1) word_end = src_end; // WORKAROUND: no match returns -1 instead of src_end + + // check if current word matches criteria-word + if (hook.Match_word(src, cur, hook_end, word_bgn, word_end)) { + cbk.Process_item_found(src, cur, hook_end, word_bgn, word_end, hook); + } + + // update position to word_end + cur = word_end; + } + + // mark page done + cbk.Process_page_done(src, tree_root); + } +} diff --git a/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_word_node.java b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_word_node.java new file mode 100644 index 000000000..dd195626c --- /dev/null +++ b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_word_node.java @@ -0,0 +1,70 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*; +import gplx.xowa.addons.wikis.searchs.searchers.crts.*; +public class Xosearch_word_node { + public int tid; + public Xosearch_word_node[] subs; + public byte[] word_orig; + public byte[] word_hook; + public boolean wildcard_at_bgn; + public boolean wildcard_at_end; + public boolean found; + + public boolean Match_word(byte[] src, int hook_bgn, int hook_end, int word_bgn, int word_end) { + // TODO.XO: handle punctuation + + // if no wildcard at bgn, hook_bgn must match word_bgn + if ( !wildcard_at_bgn + && hook_bgn != word_bgn) + return false; + + // if no wildcard at end, hook_end must match word_end + if ( !wildcard_at_end + && hook_bgn != word_end) + return false; + + return true; + } + public void Clear() { + found = false; + for (Xosearch_word_node sub : subs) + sub.Clear(); + } + public boolean Eval() { + switch (tid) { + case Srch_crt_itm.Tid__and: { + for (Xosearch_word_node sub : subs) + if (!sub.Eval()) + return false; + return true; + } + case Srch_crt_itm.Tid__or: { + for (Xosearch_word_node sub : subs) + if (sub.Eval()) + return true; + return false; + } + case Srch_crt_itm.Tid__word: + case Srch_crt_itm.Tid__word_quote: + return found; + case Srch_crt_itm.Tid__not: + return !found; + case Srch_crt_itm.Tid__invalid: return false; // should not happen + default: throw Err_.new_unhandled_default(tid); + } + } +} diff --git a/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_word_node_.java b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_word_node_.java new file mode 100644 index 000000000..90c9810fb --- /dev/null +++ b/400_xowa/src/gplx/xowa/addons/wikis/searchs/fulltexts/finders/Xosearch_word_node_.java @@ -0,0 +1,64 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.addons.wikis.searchs.fulltexts.finders; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.searchs.*; import gplx.xowa.addons.wikis.searchs.fulltexts.*; +import gplx.core.btries.*; +import gplx.xowa.addons.wikis.searchs.searchers.crts.*; +public class Xosearch_word_node_ { + public static Xosearch_word_node New_root(Srch_crt_itm src, Btrie_slim_mgr word_trie, byte wildchar_byte) { + Xosearch_word_node trg = new Xosearch_word_node(); + trg.tid = src.Tid; + + // set word-related props + switch (trg.tid) { + case Srch_crt_itm.Tid__word: + case Srch_crt_itm.Tid__word_quote: + byte[] word_orig = src.Raw; // EX: "abc*" + + // determine if wildcards at bgn / end + int word_orig_len = word_orig.length; + boolean wildcard_at_bgn = word_orig_len > 1 && word_orig[0] == wildchar_byte; + boolean wildcard_at_end = word_orig_len > 1 && word_orig[word_orig_len - 1] == wildchar_byte; + + // get hook + int hook_bgn = wildcard_at_bgn ? 1 : 0; + int hook_end = wildcard_at_end ? word_orig_len - 1 : word_orig_len; + byte[] word_hook = wildcard_at_bgn || wildcard_at_end ? Bry_.Mid(word_orig, hook_bgn, hook_end) : word_orig; + + // assign to trg + trg.word_orig = word_orig; + trg.word_hook = word_hook; + trg.wildcard_at_bgn = wildcard_at_bgn; + trg.wildcard_at_end = wildcard_at_end; + + // add to hash, trie + if (word_trie.Match_exact(word_hook) == null) { // don't add if exists + word_trie.Add_obj(word_hook, trg); + } + break; + } + + // set subs + Srch_crt_itm[] src_subs = src.Subs; + Xosearch_word_node[] trg_subs = new Xosearch_word_node[src_subs.length]; + trg.subs = trg_subs; + int len = src_subs.length; + for (int i = 0; i < len; i++) { + trg.subs[i] = New_root(src_subs[i], word_trie, wildchar_byte); + } + + return trg; + } +} diff --git a/400_xowa/src/gplx/xowa/xtns/gallery/Gallery_parser.java b/400_xowa/src/gplx/xowa/xtns/gallery/Gallery_parser.java index f6125f69b..5247f6a37 100644 --- a/400_xowa/src/gplx/xowa/xtns/gallery/Gallery_parser.java +++ b/400_xowa/src/gplx/xowa/xtns/gallery/Gallery_parser.java @@ -172,6 +172,7 @@ public class Gallery_parser { cur_itm.Ttl_end_(fld_end); byte[] ttl_bry = Bry_.Mid(src, cur_itm.Ttl_bgn(), fld_end); ttl_bry = gplx.langs.htmls.encoders.Gfo_url_encoder_.Http_url_ttl.Decode(ttl_bry); // NOTE: must decode url-encoded entries; EX: "A%28b%29.png" -> "A(b).png"; DATE:2014-01-01 + if (gplx.core.envs.Env_.Mode_testing() && wiki == null) return; // TEST: else one test will throw benign null ref exception; DATE:2017-03-01 Xoa_ttl ttl = Xoa_ttl.Parse(wiki, ttl_bry); if ( ttl == null // invalid ttl; EX: "" || ttl.Anch_bgn() == 1 // anchor-only ttl; EX: "#invalid"; DATE:2014-03-18