mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Db: Add exec reader by prepared statement
This commit is contained in:
parent
7269befb14
commit
66f744b11d
@ -48,4 +48,13 @@ public class Db_sql_ {
|
|||||||
}
|
}
|
||||||
return dirty ? bfr.To_bry_and_clear() : raw;
|
return dirty ? bfr.To_bry_and_clear() : raw;
|
||||||
}
|
}
|
||||||
|
public static String Prep_in_from_ary(Object ary) {
|
||||||
|
Bry_bfr bfr = Bry_bfr_.New();
|
||||||
|
int len = Array_.Len(ary);
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
if (i != 0) bfr.Add_byte(Byte_ascii.Comma);
|
||||||
|
bfr.Add_byte(Byte_ascii.Question);
|
||||||
|
}
|
||||||
|
return bfr.To_str_and_clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -46,6 +46,10 @@ public class Db_stmt_ {
|
|||||||
public static Db_stmt new_select_as_rdr(Db_conn conn, String sql) {
|
public static Db_stmt new_select_as_rdr(Db_conn conn, String sql) {
|
||||||
return conn.Stmt_new(Db_qry_sql.rdr_(sql));
|
return conn.Stmt_new(Db_qry_sql.rdr_(sql));
|
||||||
}
|
}
|
||||||
|
public static Db_stmt New_sql_lines(Db_conn conn, String... lines) {
|
||||||
|
Db_qry qry = Db_qry_sql.sql_(String_.Concat_with_str("\n", lines));
|
||||||
|
return conn.Stmt_new(qry);
|
||||||
|
}
|
||||||
public static Err err_(Exception e, Db_stmt stmt, String proc) {
|
public static Err err_(Exception e, Db_stmt stmt, String proc) {
|
||||||
throw Err_.new_exc(e, "db", "db stmt failed", "proc", proc);
|
throw Err_.new_exc(e, "db", "db stmt failed", "proc", proc);
|
||||||
}
|
}
|
||||||
|
@ -18,29 +18,38 @@ import gplx.xowa.wikis.nss.*;
|
|||||||
import gplx.gflucene.indexers.*;
|
import gplx.gflucene.indexers.*;
|
||||||
public class Xofulltext_indexer_args implements Gfo_invk {
|
public class Xofulltext_indexer_args implements Gfo_invk {
|
||||||
public byte[] wikis;
|
public byte[] wikis;
|
||||||
public String ns_ids;
|
|
||||||
public String idx_opt;
|
public String idx_opt;
|
||||||
|
private String ns_ids_str;
|
||||||
|
public int[] ns_ids_ary;
|
||||||
public void Init_by_wiki(Xowe_wiki wiki) {
|
public void Init_by_wiki(Xowe_wiki wiki) {
|
||||||
// wikis: null
|
// wikis: null
|
||||||
if (wikis == null)
|
if (wikis == null)
|
||||||
wikis = wiki.Domain_bry();
|
wikis = wiki.Domain_bry();
|
||||||
|
|
||||||
// ns: null / *
|
// ns: null or *
|
||||||
if (ns_ids == null)
|
// if null, use Main namespace
|
||||||
ns_ids = "0";
|
List_adp temp_ns_list = List_adp_.New();
|
||||||
else if (String_.Eq(ns_ids, "*")) {
|
if (ns_ids_str == null)
|
||||||
|
temp_ns_list.Add(Xow_ns_.Tid__main);
|
||||||
|
// if *, use all namespaces
|
||||||
|
else if (String_.Eq(ns_ids_str, "*")) {
|
||||||
Xow_ns[] ns_ary = wiki.Ns_mgr().Ords_ary();
|
Xow_ns[] ns_ary = wiki.Ns_mgr().Ords_ary();
|
||||||
int len = ns_ary.length;
|
int len = ns_ary.length;
|
||||||
Bry_bfr bfr = Bry_bfr_.New();
|
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
Xow_ns ns = ns_ary[i];
|
Xow_ns ns = ns_ary[i];
|
||||||
int ns_id = ns.Id();
|
int ns_id = ns.Id();
|
||||||
if (ns_id < 0) continue; // ignore media, special
|
if (ns_id < 0) continue; // ignore media, special
|
||||||
if (i != 0) bfr.Add_byte(Byte_ascii.Comma);
|
temp_ns_list.Add(ns_id);
|
||||||
bfr.Add_int_variable(ns_id);
|
|
||||||
}
|
}
|
||||||
ns_ids = bfr.To_str_and_clear();
|
|
||||||
}
|
}
|
||||||
|
// else, parse ns
|
||||||
|
else {
|
||||||
|
byte[][] ns_bry_ary = Bry_split_.Split(Bry_.new_u8(ns_ids_str), Byte_ascii.Comma, true);
|
||||||
|
for (byte[] ns_bry : ns_bry_ary) {
|
||||||
|
temp_ns_list.Add(Bry_.To_int(ns_bry));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ns_ids_ary = (int[])temp_ns_list.To_ary_and_clear(int.class);
|
||||||
|
|
||||||
// idx_opt
|
// idx_opt
|
||||||
if (idx_opt == null) {
|
if (idx_opt == null) {
|
||||||
@ -49,7 +58,7 @@ public class Xofulltext_indexer_args implements Gfo_invk {
|
|||||||
}
|
}
|
||||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||||
if (ctx.Match(k, "wikis_")) this.wikis = m.ReadBryOr("v", null);
|
if (ctx.Match(k, "wikis_")) this.wikis = m.ReadBryOr("v", null);
|
||||||
else if (ctx.Match(k, "ns_ids")) this.ns_ids = m.ReadStrOr("v", null);
|
else if (ctx.Match(k, "ns_ids")) this.ns_ids_str = m.ReadStrOr("v", null);
|
||||||
else if (ctx.Match(k, "idx_opt")) this.idx_opt = m.ReadStrOr("v", null);
|
else if (ctx.Match(k, "idx_opt")) this.idx_opt = m.ReadStrOr("v", null);
|
||||||
else return Gfo_invk_.Rv_unhandled;
|
else return Gfo_invk_.Rv_unhandled;
|
||||||
return this;
|
return this;
|
||||||
@ -57,7 +66,7 @@ public class Xofulltext_indexer_args implements Gfo_invk {
|
|||||||
public static Xofulltext_indexer_args New_by_json(gplx.langs.jsons.Json_nde args) {
|
public static Xofulltext_indexer_args New_by_json(gplx.langs.jsons.Json_nde args) {
|
||||||
Xofulltext_indexer_args rv = new Xofulltext_indexer_args();
|
Xofulltext_indexer_args rv = new Xofulltext_indexer_args();
|
||||||
rv.wikis = args.Get_as_bry("wikis");
|
rv.wikis = args.Get_as_bry("wikis");
|
||||||
rv.ns_ids = args.Get_as_str("ns_ids");
|
rv.ns_ids_str = args.Get_as_str("ns_ids");
|
||||||
rv.idx_opt = args.Get_as_str("idx_opt");
|
rv.idx_opt = args.Get_as_str("idx_opt");
|
||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
@ -36,44 +36,57 @@ public class Xofulltext_indexer_mgr {
|
|||||||
|
|
||||||
// get rdr and loop
|
// get rdr and loop
|
||||||
Db_conn conn = page_tbl.Conn();
|
Db_conn conn = page_tbl.Conn();
|
||||||
Db_rdr rdr = conn.Exec_rdr(Db_sql_.Make_by_fmt(String_.Ary
|
int[] ns_ids = args.ns_ids_ary;
|
||||||
( "SELECT page_id, page_score, page_namespace, page_title, page_html_db_id"
|
Db_stmt stmt = Db_stmt_.Null;
|
||||||
, "FROM page"
|
Db_rdr rdr = Db_rdr_.Empty;
|
||||||
, "WHERE page_namespace IN ({0})"
|
try {
|
||||||
), args.ns_ids));
|
stmt = Db_stmt_.New_sql_lines(conn
|
||||||
while (rdr.Move_next()) {
|
, "SELECT page_id, page_score, page_namespace, page_title, page_html_db_id"
|
||||||
// read vars
|
, "FROM page"
|
||||||
int page_namespace = rdr.Read_int("page_namespace");
|
, "WHERE page_namespace IN (" + Db_sql_.Prep_in_from_ary(ns_ids) + ")"
|
||||||
byte[] page_ttl_bry = rdr.Read_bry_by_str("page_title");
|
);
|
||||||
int page_id = rdr.Read_int("page_id");
|
for (int ns_id : ns_ids) {
|
||||||
int page_score = rdr.Read_int("page_score");
|
stmt.Crt_int("page_namespace", ns_id);
|
||||||
int html_db_id = rdr.Read_int("page_html_db_id");
|
|
||||||
|
|
||||||
// ignore redirects
|
|
||||||
if (html_db_id == -1) continue;
|
|
||||||
try {
|
|
||||||
// load page
|
|
||||||
Xoa_ttl page_ttl = wiki.Ttl_parse(page_namespace, page_ttl_bry);
|
|
||||||
if (page_ttl == null)
|
|
||||||
continue;
|
|
||||||
Xow_db_file html_db = html_db_id == -1 ? core_db : wiki.Data__core_mgr().Dbs__get_by_id_or_fail(html_db_id);
|
|
||||||
hpg.Ctor_by_hview(wiki, wiki.Utl__url_parser().Parse(page_ttl.Full_db()), page_ttl, page_id);
|
|
||||||
if (!html_db.Tbl__html().Select_by_page(hpg))
|
|
||||||
continue;
|
|
||||||
byte[] html_text = wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
|
|
||||||
|
|
||||||
// run index
|
|
||||||
indexer.Index(page_id, page_score, page_ttl.Page_txt(), html_text);
|
|
||||||
|
|
||||||
// notify
|
|
||||||
if ((++count % 10000) == 0) {
|
|
||||||
Gfo_usr_dlg_.Instance.Prog_many("", "", "indexing page: ~{0}", count);
|
|
||||||
if (ui != null)
|
|
||||||
ui.Send_prog(Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": indexing page: " + count);
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "err: ~{0}", Err_.Message_gplx_log(e));
|
|
||||||
}
|
}
|
||||||
|
rdr = stmt.Exec_select__rls_auto();
|
||||||
|
while (rdr.Move_next()) {
|
||||||
|
// read vars
|
||||||
|
int page_namespace = rdr.Read_int("page_namespace");
|
||||||
|
byte[] page_ttl_bry = rdr.Read_bry_by_str("page_title");
|
||||||
|
int page_id = rdr.Read_int("page_id");
|
||||||
|
int page_score = rdr.Read_int("page_score");
|
||||||
|
int html_db_id = rdr.Read_int("page_html_db_id");
|
||||||
|
|
||||||
|
// ignore redirects
|
||||||
|
if (html_db_id == -1) continue;
|
||||||
|
try {
|
||||||
|
// load page
|
||||||
|
Xoa_ttl page_ttl = wiki.Ttl_parse(page_namespace, page_ttl_bry);
|
||||||
|
if (page_ttl == null)
|
||||||
|
continue;
|
||||||
|
Xow_db_file html_db = html_db_id == -1 ? core_db : wiki.Data__core_mgr().Dbs__get_by_id_or_fail(html_db_id);
|
||||||
|
hpg.Ctor_by_hview(wiki, wiki.Utl__url_parser().Parse(page_ttl.Full_db()), page_ttl, page_id);
|
||||||
|
if (!html_db.Tbl__html().Select_by_page(hpg))
|
||||||
|
continue;
|
||||||
|
byte[] html_text = wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
|
||||||
|
|
||||||
|
// run index
|
||||||
|
indexer.Index(page_id, page_score, page_ttl.Page_txt(), html_text);
|
||||||
|
|
||||||
|
// notify
|
||||||
|
if ((++count % 10000) == 0) {
|
||||||
|
Gfo_usr_dlg_.Instance.Prog_many("", "", "indexing page: ~{0}", count);
|
||||||
|
if (ui != null)
|
||||||
|
ui.Send_prog(Datetime_now.Get().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss() + ": indexing page: " + count);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
Gfo_usr_dlg_.Instance.Warn_many("", "", "err: ~{0}", Err_.Message_gplx_log(e));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
rdr.Rls();
|
||||||
|
stmt.Rls();
|
||||||
}
|
}
|
||||||
|
|
||||||
// term indexer
|
// term indexer
|
||||||
|
Loading…
Reference in New Issue
Block a user