diff --git a/140_dbs/src/gplx/dbs/Db_sql_.java b/140_dbs/src/gplx/dbs/Db_sql_.java index d3b37de76..508a53df6 100644 --- a/140_dbs/src/gplx/dbs/Db_sql_.java +++ b/140_dbs/src/gplx/dbs/Db_sql_.java @@ -48,4 +48,13 @@ public class Db_sql_ { } return dirty ? bfr.To_bry_and_clear() : raw; } + public static String Prep_in_from_ary(Object ary) { + Bry_bfr bfr = Bry_bfr_.New(); + int len = Array_.Len(ary); + for (int i = 0; i < len; i++) { + if (i != 0) bfr.Add_byte(Byte_ascii.Comma); + bfr.Add_byte(Byte_ascii.Question); + } + return bfr.To_str_and_clear(); + } } diff --git a/140_dbs/src/gplx/dbs/Db_stmt_.java b/140_dbs/src/gplx/dbs/Db_stmt_.java index d1c97e428..354349257 100644 --- a/140_dbs/src/gplx/dbs/Db_stmt_.java +++ b/140_dbs/src/gplx/dbs/Db_stmt_.java @@ -46,6 +46,10 @@ public class Db_stmt_ { public static Db_stmt new_select_as_rdr(Db_conn conn, String sql) { return conn.Stmt_new(Db_qry_sql.rdr_(sql)); } + public static Db_stmt New_sql_lines(Db_conn conn, String... lines) { + Db_qry qry = Db_qry_sql.sql_(String_.Concat_with_str("\n", lines)); + return conn.Stmt_new(qry); + } public static Err err_(Exception e, Db_stmt stmt, String proc) { throw Err_.new_exc(e, "db", "db stmt failed", "proc", proc); } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_args.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_args.java index e7925b094..30bfe5691 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_args.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_args.java @@ -18,29 +18,38 @@ import gplx.xowa.wikis.nss.*; import gplx.gflucene.indexers.*; public class Xofulltext_indexer_args implements Gfo_invk { public byte[] wikis; - public String ns_ids; public String idx_opt; + private String ns_ids_str; + public int[] ns_ids_ary; public void Init_by_wiki(Xowe_wiki wiki) { // wikis: null if (wikis == null) wikis = wiki.Domain_bry(); - // ns: null / * - if (ns_ids == null) - ns_ids = "0"; - else if (String_.Eq(ns_ids, "*")) { + // ns: null or * + // if null, use Main namespace + List_adp temp_ns_list = List_adp_.New(); + if (ns_ids_str == null) + temp_ns_list.Add(Xow_ns_.Tid__main); + // if *, use all namespaces + else if (String_.Eq(ns_ids_str, "*")) { Xow_ns[] ns_ary = wiki.Ns_mgr().Ords_ary(); int len = ns_ary.length; - Bry_bfr bfr = Bry_bfr_.New(); for (int i = 0; i < len; i++) { Xow_ns ns = ns_ary[i]; int ns_id = ns.Id(); if (ns_id < 0) continue; // ignore media, special - if (i != 0) bfr.Add_byte(Byte_ascii.Comma); - bfr.Add_int_variable(ns_id); + temp_ns_list.Add(ns_id); } - ns_ids = bfr.To_str_and_clear(); } + // else, parse ns + else { + byte[][] ns_bry_ary = Bry_split_.Split(Bry_.new_u8(ns_ids_str), Byte_ascii.Comma, true); + for (byte[] ns_bry : ns_bry_ary) { + temp_ns_list.Add(Bry_.To_int(ns_bry)); + } + } + ns_ids_ary = (int[])temp_ns_list.To_ary_and_clear(int.class); // idx_opt if (idx_opt == null) { @@ -49,7 +58,7 @@ public class Xofulltext_indexer_args implements Gfo_invk { } public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) { if (ctx.Match(k, "wikis_")) this.wikis = m.ReadBryOr("v", null); - else if (ctx.Match(k, "ns_ids")) this.ns_ids = m.ReadStrOr("v", null); + else if (ctx.Match(k, "ns_ids")) this.ns_ids_str = m.ReadStrOr("v", null); else if (ctx.Match(k, "idx_opt")) this.idx_opt = m.ReadStrOr("v", null); else return Gfo_invk_.Rv_unhandled; return this; @@ -57,7 +66,7 @@ public class Xofulltext_indexer_args implements Gfo_invk { public static Xofulltext_indexer_args New_by_json(gplx.langs.jsons.Json_nde args) { Xofulltext_indexer_args rv = new Xofulltext_indexer_args(); rv.wikis = args.Get_as_bry("wikis"); - rv.ns_ids = args.Get_as_str("ns_ids"); + rv.ns_ids_str = args.Get_as_str("ns_ids"); rv.idx_opt = args.Get_as_str("idx_opt"); return rv; } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_mgr.java b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_mgr.java index 3d5663a86..b378fa8b8 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/fulltexts/indexers/bldrs/Xofulltext_indexer_mgr.java @@ -36,44 +36,57 @@ public class Xofulltext_indexer_mgr { // get rdr and loop Db_conn conn = page_tbl.Conn(); - Db_rdr rdr = conn.Exec_rdr(Db_sql_.Make_by_fmt(String_.Ary - ( "SELECT page_id, page_score, page_namespace, page_title, page_html_db_id" - , "FROM page" - , "WHERE page_namespace IN ({0})" - ), args.ns_ids)); - while (rdr.Move_next()) { - // read vars - int page_namespace = rdr.Read_int("page_namespace"); - byte[] page_ttl_bry = rdr.Read_bry_by_str("page_title"); - int page_id = rdr.Read_int("page_id"); - int page_score = rdr.Read_int("page_score"); - int html_db_id = rdr.Read_int("page_html_db_id"); - - // ignore redirects - if (html_db_id == -1) continue; - try { - // load page - Xoa_ttl page_ttl = wiki.Ttl_parse(page_namespace, page_ttl_bry); - if (page_ttl == null) - continue; - Xow_db_file html_db = html_db_id == -1 ? core_db : wiki.Data__core_mgr().Dbs__get_by_id_or_fail(html_db_id); - hpg.Ctor_by_hview(wiki, wiki.Utl__url_parser().Parse(page_ttl.Full_db()), page_ttl, page_id); - if (!html_db.Tbl__html().Select_by_page(hpg)) - continue; - byte[] html_text = wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry()); - - // run index - indexer.Index(page_id, page_score, page_ttl.Page_txt(), html_text); - - // notify - if ((++count % 10000) == 0) { - Gfo_usr_dlg_.Instance.Prog_many("", "", "indexing page: ~{0}", count); - if (ui != null) - ui.Send_prog(Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": indexing page: " + count); - } - } catch (Exception e) { - Gfo_usr_dlg_.Instance.Warn_many("", "", "err: ~{0}", Err_.Message_gplx_log(e)); + int[] ns_ids = args.ns_ids_ary; + Db_stmt stmt = Db_stmt_.Null; + Db_rdr rdr = Db_rdr_.Empty; + try { + stmt = Db_stmt_.New_sql_lines(conn + , "SELECT page_id, page_score, page_namespace, page_title, page_html_db_id" + , "FROM page" + , "WHERE page_namespace IN (" + Db_sql_.Prep_in_from_ary(ns_ids) + ")" + ); + for (int ns_id : ns_ids) { + stmt.Crt_int("page_namespace", ns_id); } + rdr = stmt.Exec_select__rls_auto(); + while (rdr.Move_next()) { + // read vars + int page_namespace = rdr.Read_int("page_namespace"); + byte[] page_ttl_bry = rdr.Read_bry_by_str("page_title"); + int page_id = rdr.Read_int("page_id"); + int page_score = rdr.Read_int("page_score"); + int html_db_id = rdr.Read_int("page_html_db_id"); + + // ignore redirects + if (html_db_id == -1) continue; + try { + // load page + Xoa_ttl page_ttl = wiki.Ttl_parse(page_namespace, page_ttl_bry); + if (page_ttl == null) + continue; + Xow_db_file html_db = html_db_id == -1 ? core_db : wiki.Data__core_mgr().Dbs__get_by_id_or_fail(html_db_id); + hpg.Ctor_by_hview(wiki, wiki.Utl__url_parser().Parse(page_ttl.Full_db()), page_ttl, page_id); + if (!html_db.Tbl__html().Select_by_page(hpg)) + continue; + byte[] html_text = wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry()); + + // run index + indexer.Index(page_id, page_score, page_ttl.Page_txt(), html_text); + + // notify + if ((++count % 10000) == 0) { + Gfo_usr_dlg_.Instance.Prog_many("", "", "indexing page: ~{0}", count); + if (ui != null) + ui.Send_prog(Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": indexing page: " + count); + } + } catch (Exception e) { + Gfo_usr_dlg_.Instance.Warn_many("", "", "err: ~{0}", Err_.Message_gplx_log(e)); + } + } + } + finally { + rdr.Rls(); + stmt.Rls(); } // term indexer