mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Db: Add exec reader by prepared statement
This commit is contained in:
parent
7269befb14
commit
66f744b11d
@ -48,4 +48,13 @@ public class Db_sql_ {
|
||||
}
|
||||
return dirty ? bfr.To_bry_and_clear() : raw;
|
||||
}
|
||||
public static String Prep_in_from_ary(Object ary) {
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
int len = Array_.Len(ary);
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (i != 0) bfr.Add_byte(Byte_ascii.Comma);
|
||||
bfr.Add_byte(Byte_ascii.Question);
|
||||
}
|
||||
return bfr.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
|
@ -46,6 +46,10 @@ public class Db_stmt_ {
|
||||
public static Db_stmt new_select_as_rdr(Db_conn conn, String sql) {
|
||||
return conn.Stmt_new(Db_qry_sql.rdr_(sql));
|
||||
}
|
||||
public static Db_stmt New_sql_lines(Db_conn conn, String... lines) {
|
||||
Db_qry qry = Db_qry_sql.sql_(String_.Concat_with_str("\n", lines));
|
||||
return conn.Stmt_new(qry);
|
||||
}
|
||||
public static Err err_(Exception e, Db_stmt stmt, String proc) {
|
||||
throw Err_.new_exc(e, "db", "db stmt failed", "proc", proc);
|
||||
}
|
||||
|
@ -18,29 +18,38 @@ import gplx.xowa.wikis.nss.*;
|
||||
import gplx.gflucene.indexers.*;
|
||||
public class Xofulltext_indexer_args implements Gfo_invk {
|
||||
public byte[] wikis;
|
||||
public String ns_ids;
|
||||
public String idx_opt;
|
||||
private String ns_ids_str;
|
||||
public int[] ns_ids_ary;
|
||||
public void Init_by_wiki(Xowe_wiki wiki) {
|
||||
// wikis: null
|
||||
if (wikis == null)
|
||||
wikis = wiki.Domain_bry();
|
||||
|
||||
// ns: null / *
|
||||
if (ns_ids == null)
|
||||
ns_ids = "0";
|
||||
else if (String_.Eq(ns_ids, "*")) {
|
||||
// ns: null or *
|
||||
// if null, use Main namespace
|
||||
List_adp temp_ns_list = List_adp_.New();
|
||||
if (ns_ids_str == null)
|
||||
temp_ns_list.Add(Xow_ns_.Tid__main);
|
||||
// if *, use all namespaces
|
||||
else if (String_.Eq(ns_ids_str, "*")) {
|
||||
Xow_ns[] ns_ary = wiki.Ns_mgr().Ords_ary();
|
||||
int len = ns_ary.length;
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xow_ns ns = ns_ary[i];
|
||||
int ns_id = ns.Id();
|
||||
if (ns_id < 0) continue; // ignore media, special
|
||||
if (i != 0) bfr.Add_byte(Byte_ascii.Comma);
|
||||
bfr.Add_int_variable(ns_id);
|
||||
temp_ns_list.Add(ns_id);
|
||||
}
|
||||
ns_ids = bfr.To_str_and_clear();
|
||||
}
|
||||
// else, parse ns
|
||||
else {
|
||||
byte[][] ns_bry_ary = Bry_split_.Split(Bry_.new_u8(ns_ids_str), Byte_ascii.Comma, true);
|
||||
for (byte[] ns_bry : ns_bry_ary) {
|
||||
temp_ns_list.Add(Bry_.To_int(ns_bry));
|
||||
}
|
||||
}
|
||||
ns_ids_ary = (int[])temp_ns_list.To_ary_and_clear(int.class);
|
||||
|
||||
// idx_opt
|
||||
if (idx_opt == null) {
|
||||
@ -49,7 +58,7 @@ public class Xofulltext_indexer_args implements Gfo_invk {
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, "wikis_")) this.wikis = m.ReadBryOr("v", null);
|
||||
else if (ctx.Match(k, "ns_ids")) this.ns_ids = m.ReadStrOr("v", null);
|
||||
else if (ctx.Match(k, "ns_ids")) this.ns_ids_str = m.ReadStrOr("v", null);
|
||||
else if (ctx.Match(k, "idx_opt")) this.idx_opt = m.ReadStrOr("v", null);
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
@ -57,7 +66,7 @@ public class Xofulltext_indexer_args implements Gfo_invk {
|
||||
public static Xofulltext_indexer_args New_by_json(gplx.langs.jsons.Json_nde args) {
|
||||
Xofulltext_indexer_args rv = new Xofulltext_indexer_args();
|
||||
rv.wikis = args.Get_as_bry("wikis");
|
||||
rv.ns_ids = args.Get_as_str("ns_ids");
|
||||
rv.ns_ids_str = args.Get_as_str("ns_ids");
|
||||
rv.idx_opt = args.Get_as_str("idx_opt");
|
||||
return rv;
|
||||
}
|
||||
|
@ -36,44 +36,57 @@ public class Xofulltext_indexer_mgr {
|
||||
|
||||
// get rdr and loop
|
||||
Db_conn conn = page_tbl.Conn();
|
||||
Db_rdr rdr = conn.Exec_rdr(Db_sql_.Make_by_fmt(String_.Ary
|
||||
( "SELECT page_id, page_score, page_namespace, page_title, page_html_db_id"
|
||||
, "FROM page"
|
||||
, "WHERE page_namespace IN ({0})"
|
||||
), args.ns_ids));
|
||||
while (rdr.Move_next()) {
|
||||
// read vars
|
||||
int page_namespace = rdr.Read_int("page_namespace");
|
||||
byte[] page_ttl_bry = rdr.Read_bry_by_str("page_title");
|
||||
int page_id = rdr.Read_int("page_id");
|
||||
int page_score = rdr.Read_int("page_score");
|
||||
int html_db_id = rdr.Read_int("page_html_db_id");
|
||||
|
||||
// ignore redirects
|
||||
if (html_db_id == -1) continue;
|
||||
try {
|
||||
// load page
|
||||
Xoa_ttl page_ttl = wiki.Ttl_parse(page_namespace, page_ttl_bry);
|
||||
if (page_ttl == null)
|
||||
continue;
|
||||
Xow_db_file html_db = html_db_id == -1 ? core_db : wiki.Data__core_mgr().Dbs__get_by_id_or_fail(html_db_id);
|
||||
hpg.Ctor_by_hview(wiki, wiki.Utl__url_parser().Parse(page_ttl.Full_db()), page_ttl, page_id);
|
||||
if (!html_db.Tbl__html().Select_by_page(hpg))
|
||||
continue;
|
||||
byte[] html_text = wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
|
||||
|
||||
// run index
|
||||
indexer.Index(page_id, page_score, page_ttl.Page_txt(), html_text);
|
||||
|
||||
// notify
|
||||
if ((++count % 10000) == 0) {
|
||||
Gfo_usr_dlg_.Instance.Prog_many("", "", "indexing page: ~{0}", count);
|
||||
if (ui != null)
|
||||
ui.Send_prog(Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": indexing page: " + count);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "err: ~{0}", Err_.Message_gplx_log(e));
|
||||
int[] ns_ids = args.ns_ids_ary;
|
||||
Db_stmt stmt = Db_stmt_.Null;
|
||||
Db_rdr rdr = Db_rdr_.Empty;
|
||||
try {
|
||||
stmt = Db_stmt_.New_sql_lines(conn
|
||||
, "SELECT page_id, page_score, page_namespace, page_title, page_html_db_id"
|
||||
, "FROM page"
|
||||
, "WHERE page_namespace IN (" + Db_sql_.Prep_in_from_ary(ns_ids) + ")"
|
||||
);
|
||||
for (int ns_id : ns_ids) {
|
||||
stmt.Crt_int("page_namespace", ns_id);
|
||||
}
|
||||
rdr = stmt.Exec_select__rls_auto();
|
||||
while (rdr.Move_next()) {
|
||||
// read vars
|
||||
int page_namespace = rdr.Read_int("page_namespace");
|
||||
byte[] page_ttl_bry = rdr.Read_bry_by_str("page_title");
|
||||
int page_id = rdr.Read_int("page_id");
|
||||
int page_score = rdr.Read_int("page_score");
|
||||
int html_db_id = rdr.Read_int("page_html_db_id");
|
||||
|
||||
// ignore redirects
|
||||
if (html_db_id == -1) continue;
|
||||
try {
|
||||
// load page
|
||||
Xoa_ttl page_ttl = wiki.Ttl_parse(page_namespace, page_ttl_bry);
|
||||
if (page_ttl == null)
|
||||
continue;
|
||||
Xow_db_file html_db = html_db_id == -1 ? core_db : wiki.Data__core_mgr().Dbs__get_by_id_or_fail(html_db_id);
|
||||
hpg.Ctor_by_hview(wiki, wiki.Utl__url_parser().Parse(page_ttl.Full_db()), page_ttl, page_id);
|
||||
if (!html_db.Tbl__html().Select_by_page(hpg))
|
||||
continue;
|
||||
byte[] html_text = wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
|
||||
|
||||
// run index
|
||||
indexer.Index(page_id, page_score, page_ttl.Page_txt(), html_text);
|
||||
|
||||
// notify
|
||||
if ((++count % 10000) == 0) {
|
||||
Gfo_usr_dlg_.Instance.Prog_many("", "", "indexing page: ~{0}", count);
|
||||
if (ui != null)
|
||||
ui.Send_prog(Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": indexing page: " + count);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "err: ~{0}", Err_.Message_gplx_log(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
finally {
|
||||
rdr.Rls();
|
||||
stmt.Rls();
|
||||
}
|
||||
|
||||
// term indexer
|
||||
|
Loading…
Reference in New Issue
Block a user