mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
'v3.8.5.1'
This commit is contained in:
@@ -17,12 +17,14 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.centrals.cmds; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.centrals.*;
|
||||
import gplx.core.gfobjs.*; import gplx.core.progs.*; import gplx.core.progs.rates.*;
|
||||
import gplx.xowa.apps.apis.*;
|
||||
public abstract class Xobc_cmd__base implements Xobc_cmd_itm {
|
||||
private final Xobc_task_mgr task_mgr;
|
||||
private final Gfo_rate_list rate_list; private final long notify_delay = 1000;
|
||||
private final double delta_threshold = .25d; // allow variance of up to 25% before updating rate
|
||||
private long time_prv;
|
||||
private double rate_cur;
|
||||
private boolean log_verbose;
|
||||
public Xobc_cmd__base(Xobc_task_mgr task_mgr, int task_id, int step_id, int cmd_id) {
|
||||
this.task_mgr = task_mgr; this.task_id = task_id; this.step_id = step_id; this.cmd_id = cmd_id;
|
||||
this.cmd_uid = String_.Concat_with_str(":", Int_.To_str(task_id), Int_.To_str(step_id), Int_.To_str(cmd_id));
|
||||
@@ -51,9 +53,12 @@ public abstract class Xobc_cmd__base implements Xobc_cmd_itm {
|
||||
|
||||
public void Cmd_exec(Xobc_cmd_ctx ctx) {
|
||||
// rate_list.Clear(); this.rate_cur = 0; // TOMBSTONE: do not reset rate else pause and resume will show different numbers
|
||||
Xoapi_root api_root = task_mgr.App().Api_root();
|
||||
if (api_root != null)
|
||||
this.log_verbose = api_root.Addon().Bldr().Central().Log_verbose();
|
||||
try {
|
||||
Gfo_log_.Instance.Info("xobc_cmd task bgn", "task_id", task_id, "step_id", step_id, "cmd_id", cmd_id);
|
||||
this.time_prv = gplx.core.envs.Env_.TickCount();
|
||||
this.time_prv = gplx.core.envs.System_.Ticks();
|
||||
this.status = Gfo_prog_ui_.Status__working;
|
||||
this.Cmd_exec_hook(ctx);
|
||||
Gfo_log_.Instance.Info("xobc_cmd task end", "task_id", task_id, "step_id", step_id, "cmd_id", cmd_id);
|
||||
@@ -106,7 +111,7 @@ public abstract class Xobc_cmd__base implements Xobc_cmd_itm {
|
||||
|
||||
public boolean Prog_notify_and_chk_if_suspended(long new_data_cur, long new_data_end) {
|
||||
if (status == Gfo_prog_ui_.Status__suspended) return true; // task paused by ui; exit now;
|
||||
long time_cur = gplx.core.envs.Env_.TickCount();
|
||||
long time_cur = gplx.core.envs.System_.Ticks();
|
||||
if (time_cur < time_prv + notify_delay) return false; // message came too soon. ignore it
|
||||
|
||||
// update rate
|
||||
@@ -130,6 +135,8 @@ public abstract class Xobc_cmd__base implements Xobc_cmd_itm {
|
||||
|
||||
task_mgr.Send_json("xo.bldr.work.prog__update__recv", Gfobj_nde.New()
|
||||
.Add_int ("task_id", task_id).Add_long("prog_data_cur", data_cur).Add_long("prog_data_end", data_end).Add_int("prog_rate", (int)rate_cur));
|
||||
if (log_verbose)
|
||||
Gfo_usr_dlg_.Instance.Note_many("", "", "xobc:notify: task_id=~{0} cmd_id=~{1} prog_data_cur=~{2} prog_data_end=~{3} rate_cur=~{4}", task_id, cmd_id, data_cur, data_end, rate_cur);
|
||||
return false;
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
|
||||
@@ -43,7 +43,7 @@ public class Merge2_mgr {
|
||||
// fails b/c no Main_Page; Gfo_invk_.Invk_by_msg(wiki.App().Gui__tab_mgr(), gplx.xowa.guis.tabs.Xog_tab_mgr_.Invk__new_tab, GfoMsg_.new_cast_("").Add("focus", true).Add("site", wiki.Domain_str()).Add("page", String_.new_u8(wiki.Props().Main_page())));
|
||||
}
|
||||
public void Merge_data(Xow_wiki wiki, Io_url src_url, int idx_cur) {
|
||||
long all_time_bgn = gplx.core.envs.Env_.TickCount();
|
||||
long all_time_bgn = gplx.core.envs.System_.Ticks();
|
||||
wiki.Init_by_wiki();
|
||||
Db_conn src_conn = Db_conn_bldr.Instance.Get_or_autocreate(false, src_url);
|
||||
ctx.Init(wiki, src_conn);
|
||||
@@ -54,12 +54,12 @@ public class Merge2_mgr {
|
||||
if (prog_wkr.Canceled()) break;
|
||||
Merge2_wkr wkr = (Merge2_wkr)wkr_hash.Get_at(i);
|
||||
// if (prog_wkr.Checkpoint__skip_wkr(src_url, wkr.Tbl_name())) continue;
|
||||
long wkr_time_bgn = gplx.core.envs.Env_.TickCount();
|
||||
long wkr_time_bgn = gplx.core.envs.System_.Ticks();
|
||||
wkr.Merge_data(ctx, prog_wkr);
|
||||
Gfo_log_.Instance.Info("merge.wkr.done", "data", src_url.NameAndExt() + "|" + wkr.Tbl().Tbl_name() + "|" + gplx.core.envs.Env_.TickCount_elapsed_in_frac(wkr_time_bgn));
|
||||
Gfo_log_.Instance.Info("merge.wkr.done", "data", src_url.NameAndExt() + "|" + wkr.Tbl().Tbl_name() + "|" + gplx.core.envs.System_.Ticks__elapsed_in_frac(wkr_time_bgn));
|
||||
}
|
||||
if (ctx.Heap__copy_to_wiki()) ctx.Heap__increment_nxt();
|
||||
Gfo_log_.Instance.Info("merge.wkr.done", "data", src_url.NameAndExt() + "|-1|" + gplx.core.envs.Env_.TickCount_elapsed_in_frac(all_time_bgn));
|
||||
Gfo_log_.Instance.Info("merge.wkr.done", "data", src_url.NameAndExt() + "|-1|" + gplx.core.envs.System_.Ticks__elapsed_in_frac(all_time_bgn));
|
||||
src_conn.Rls_conn(); // NOTE: must close conn else pack_conn will stay open
|
||||
}
|
||||
private static Ordered_hash Make_wkrs(Merge2_wkr... wkrs) {
|
||||
|
||||
@@ -45,7 +45,7 @@ public class Merge_prog_wkr implements Db_bulk_prog {
|
||||
this.cur_fil = fil;
|
||||
this.cur_wkr_tid = wkr_tid;
|
||||
this.Checkpoint__save();
|
||||
time_nxt = gplx.core.envs.Env_.TickCount() + time_gap;
|
||||
time_nxt = gplx.core.envs.System_.Ticks() + time_gap;
|
||||
return false;
|
||||
}
|
||||
public void Checkpoint__save() {
|
||||
@@ -54,7 +54,7 @@ public class Merge_prog_wkr implements Db_bulk_prog {
|
||||
}
|
||||
public boolean Prog__insert_and_stop_if_suspended(int row_size) {
|
||||
++prog_count_cur;
|
||||
long time_cur = gplx.core.envs.Env_.TickCount();
|
||||
long time_cur = gplx.core.envs.System_.Ticks();
|
||||
if (time_cur < time_nxt) return false;
|
||||
// gplx.core.threads.Thread_adp_.Sleep(10);
|
||||
time_nxt = time_cur + time_gap;
|
||||
|
||||
@@ -88,7 +88,7 @@ public class Xobldr__fsdb_db__create_data extends Xob_cmd__base implements Xob_c
|
||||
}
|
||||
@Override public void Cmd_run() {
|
||||
Init_bldr_bmks();
|
||||
this.time_bgn = Env_.TickCount();
|
||||
this.time_bgn = System_.Ticks();
|
||||
int total_pending = Xob_xfer_regy_tbl.Select_total_pending(bldr_conn);
|
||||
// if (total_pending > 250000 && src_bin_mgr__fsdb_version == null)
|
||||
usr_dlg.Note_many("", "", "total pending: ~{0}", total_pending);
|
||||
@@ -277,7 +277,7 @@ public class Xobldr__fsdb_db__create_data extends Xob_cmd__base implements Xob_c
|
||||
if (exit_after_commit) exit_now = true;
|
||||
}
|
||||
@Override public void Cmd_end() {
|
||||
usr_dlg.Note_many("", "", "fsdb_make.done: count=~{0} rate=~{1}", exec_count, Decimal_adp_.divide_safe_(exec_count, Env_.TickCount_elapsed_in_sec(time_bgn)).To_str("#,###.000"));
|
||||
usr_dlg.Note_many("", "", "fsdb_make.done: count=~{0} rate=~{1}", exec_count, Decimal_adp_.divide_safe_(exec_count, System_.Ticks__elapsed_in_sec(time_bgn)).To_str("#,###.000"));
|
||||
if (src_fsdb_wkr != null) {
|
||||
src_fsdb_wkr.Mnt_mgr().Mnts__get_main().Txn_end(); // NOTE: src_fsdb_wkr will be null if no src db defined
|
||||
}
|
||||
@@ -295,7 +295,7 @@ public class Xobldr__fsdb_db__create_data extends Xob_cmd__base implements Xob_c
|
||||
bldr_conn.Rls_conn();
|
||||
}
|
||||
private void Print_progress(Xodb_tbl_oimg_xfer_itm itm) {
|
||||
int time_elapsed = Env_.TickCount_elapsed_in_sec(time_bgn);
|
||||
int time_elapsed = System_.Ticks__elapsed_in_sec(time_bgn);
|
||||
usr_dlg.Prog_many("", "", "prog: num=~{0} err=~{1} time=~{2} rate=~{3} page=~{4} lnki=~{5} ttl=~{6}", exec_count, exec_fail, time_elapsed, Math_.Div_safe_as_int(exec_count, time_elapsed), page_id_val, lnki_id_val, itm.Orig_ttl());
|
||||
}
|
||||
private void Delete_files() {}// TODO_OLD: purge /xowa/file/ dir to free up hard disk space
|
||||
|
||||
@@ -26,7 +26,8 @@ import gplx.fsdb.meta.*; import gplx.xowa.files.fsdb.*; import gplx.fsdb.*;
|
||||
import gplx.xowa.langs.vnts.*; import gplx.xowa.parsers.vnts.*;
|
||||
import gplx.xowa.parsers.lnkis.files.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.cmds.*; import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.xowa.addons.bldrs.files.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.xowa.addons.bldrs.files.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.utls.*;
|
||||
import gplx.xowa.addons.bldrs.wmdumps.imglinks.*;
|
||||
public class Xobldr__lnki_temp__create extends Xob_dump_mgr_base implements gplx.xowa.parsers.lnkis.files.Xop_file_logger {
|
||||
private Xob_lnki_temp_tbl tbl; private boolean wdata_enabled = true, xtn_ref_enabled = true, gen_html, gen_hdump;
|
||||
private Xop_log_invoke_wkr invoke_wkr; private Xop_log_property_wkr property_wkr;
|
||||
@@ -61,6 +62,13 @@ public class Xobldr__lnki_temp__create extends Xob_dump_mgr_base implements gplx
|
||||
wiki.File__bin_mgr().Wkrs__del(gplx.xowa.files.bins.Xof_bin_wkr_.Key_http_wmf); // remove wmf wkr, else will try to download images during parsing
|
||||
commons_wiki = app.Wiki_mgr().Get_by_or_make(Xow_domain_itm_.Bry__commons);
|
||||
|
||||
// create imglinks
|
||||
Xof_orig_wkr__img_links orig_wkr = new Xof_orig_wkr__img_links(wiki);
|
||||
wiki.File__orig_mgr().Wkrs__set(orig_wkr);
|
||||
Xof_orig_wkr__img_links_.Load_all(orig_wkr);
|
||||
|
||||
Xow_wiki_utl_.Clone_repos(wiki);
|
||||
|
||||
// init log_mgr / property_wkr
|
||||
Xop_log_mgr log_mgr = ctx.App().Log_mgr();
|
||||
log_mgr.Log_dir_(wiki.Fsys_mgr().Root_dir()); // put log in wiki dir, instead of user.temp
|
||||
@@ -140,7 +148,7 @@ public class Xobldr__lnki_temp__create extends Xob_dump_mgr_base implements gplx
|
||||
Xof_ext ext = Xof_ext_.new_by_ttl_(ttl);
|
||||
double lnki_time = lnki.Time();
|
||||
int lnki_page = lnki.Page();
|
||||
byte[] ttl_commons = Xomp_file_logger.To_commons_ttl(ns_file_is_case_match_all, commons_wiki, ttl);
|
||||
byte[] ttl_commons = Xomp_lnki_temp_wkr.To_commons_ttl(ns_file_is_case_match_all, commons_wiki, ttl);
|
||||
if ( Xof_lnki_page.Null_n(lnki_page) // page set
|
||||
&& Xof_lnki_time.Null_n(lnki_time)) // thumbtime set
|
||||
usr_dlg.Warn_many("", "", "page and thumbtime both set; this may be an issue with fsdb: page=~{0} ttl=~{1}", ctx.Page().Ttl().Page_db_as_str(), String_.new_u8(ttl));
|
||||
|
||||
@@ -38,6 +38,6 @@ class Xobldr__lnki_temp__create__fxt {
|
||||
return this;
|
||||
}
|
||||
public void Test__to_commons(String ttl, String expd) {
|
||||
Tfds.Eq(expd, String_.new_u8(gplx.xowa.addons.bldrs.mass_parses.parses.Xomp_file_logger.To_commons_ttl(wiki_ns_file_is_case_match_all, commons_wiki, Bry_.new_u8(ttl))));
|
||||
Tfds.Eq(expd, String_.new_u8(gplx.xowa.addons.bldrs.mass_parses.parses.utls.Xomp_lnki_temp_wkr.To_commons_ttl(wiki_ns_file_is_case_match_all, commons_wiki, Bry_.new_u8(ttl))));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,6 +42,7 @@ public class Xob_lnki_temp_tbl implements Db_tbl {
|
||||
public String Tbl_name() {return tbl_name;}
|
||||
public void Create_tbl() {conn.Meta_tbl_create(Dbmeta_tbl_itm.New(tbl_name, flds));}
|
||||
public void Insert_bgn() {conn.Txn_bgn("bldr__lnki_temp"); stmt_insert = conn.Stmt_insert(tbl_name, flds);}
|
||||
public void Insert_stmt_make() {stmt_insert = conn.Stmt_insert(tbl_name, flds);}
|
||||
public void Insert_commit() {conn.Txn_sav();}
|
||||
public void Insert_end() {conn.Txn_end(); stmt_insert = Db_stmt_.Rls(stmt_insert);}
|
||||
public void Insert_cmd_by_batch(int tier_id, int page_id, byte[] ttl, byte[] ttl_commons, byte ext_id, byte img_type, byte lnki_src_tid, int w, int h, double upright, double time, int page) {
|
||||
|
||||
@@ -54,7 +54,7 @@ class Dumpdiff_mgr {
|
||||
if (diff != null) {
|
||||
log_tbl.Insert_by_batch(page_id, diff[0], diff[1]);
|
||||
++diff_count;
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "hdump diff: pages=~{0} diffs=~{1} page_id=~{2} cur=~{3} prv=~{4}", page_count, diff_count, page_id, diff[0], diff[1]);
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "hdump diff: pages=~{0} diffs=~{1} page_id=~{2} lhs=~{3} rhs=~{4}", page_count, diff_count, page_id, diff[0], diff[1]);
|
||||
}
|
||||
}
|
||||
list.Clear();
|
||||
|
||||
@@ -37,13 +37,14 @@ class Xobc_info_doc implements Mustache_doc_itm {
|
||||
}
|
||||
}
|
||||
class Xobc_info_url implements Mustache_doc_itm {
|
||||
private final String url; private final byte[] size;
|
||||
public Xobc_info_url(String url, byte[] size) {
|
||||
this.url = url; this.size = size;
|
||||
private final String url, md5; private final byte[] size;
|
||||
public Xobc_info_url(String url, byte[] size, String md5) {
|
||||
this.url = url; this.size = size; this.md5 = md5;
|
||||
}
|
||||
public boolean Mustache__write(String key, Mustache_bfr bfr) {
|
||||
if (String_.Eq(key, "url")) bfr.Add_str_u8(url);
|
||||
else if (String_.Eq(key, "size")) bfr.Add_bry(size);
|
||||
else if (String_.Eq(key, "md5")) bfr.Add_str_u8(md5);
|
||||
else return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ class Xobc_info_html extends Xow_special_wtr__base {
|
||||
String src_fil = host_eval.Eval_src_fil(data_db, host_id, wiki_domain, step_itm.Import_name);
|
||||
Io_size_.To_bfr_new(tmp_size_bfr, step_itm.Import_size_zip, 2);
|
||||
total_size += step_itm.Import_size_raw;
|
||||
Xobc_info_url step_url = new Xobc_info_url(src_fil, tmp_size_bfr.To_bry_and_clear());
|
||||
Xobc_info_url step_url = new Xobc_info_url(src_fil, tmp_size_bfr.To_bry_and_clear(), step_itm.Import_md5);
|
||||
step_urls[i] = step_url;
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ class Xobc_info_html extends Xow_special_wtr__base {
|
||||
String src_dir = host_eval.Eval_src_dir(data_db, host_id, wiki_domain);
|
||||
String[] ary = String_.Split(key, "|");
|
||||
ary[0] = String_.new_u8(wiki_domain.Abrv_wm());
|
||||
torrent_fil = String_.Format("{0}/Xowa_{1}_{2}_{3}.torrent", src_dir, ary[0], ary[1], ary[2]); // EX: http://archive.org/download/Xowa_enwiki_latest/Xowa_enwiktionary_2016-06_core.zip
|
||||
torrent_fil = String_.Format("{0}Xowa_{1}_latest_archive.torrent", src_dir, wiki_domain.Abrv_wm(), ary[1]); // EX: https://archive.org/download/Xowa_dewiki_latest/Xowa_dewiki_latest_archive.torrent
|
||||
}
|
||||
|
||||
return new Xobc_info_doc
|
||||
|
||||
@@ -1,60 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.dbs.*;
|
||||
public class Xomp_db_core {
|
||||
private final Object thread_lock = new Object();
|
||||
private final Io_url root_dir;
|
||||
Xomp_db_core(Io_url root_dir) {
|
||||
this.root_dir = root_dir;
|
||||
Io_url mgr_url = root_dir.GenSubFil("xomp.sqlite3");
|
||||
this.mgr_db = new Xomp_mgr_db(mgr_url);
|
||||
}
|
||||
public Db_conn Conn() {return mgr_db.Conn();}
|
||||
public Xomp_mgr_db Mgr_db() {return mgr_db;} private Xomp_mgr_db mgr_db;
|
||||
public Xomp_wkr_db Wkr_db(boolean delete, int idx) {
|
||||
Io_url wkr_url = root_dir.GenSubFil_nest("xomp_" + Int_.To_str_fmt(idx, "000"), "xomp_wkr.sqlite3");
|
||||
if (delete) Io_mgr.Instance.DeleteFil(wkr_url);
|
||||
return new Xomp_wkr_db(idx, wkr_url);
|
||||
}
|
||||
public int Wkr_count() {
|
||||
Io_url[] wkr_dirs = Io_mgr.Instance.QueryDir_args(root_dir).DirOnly_().ExecAsUrlAry();
|
||||
return wkr_dirs.length;
|
||||
}
|
||||
public void Update_wkr_id(int idx, Db_conn wkr_conn) {
|
||||
synchronized (thread_lock) {
|
||||
Db_attach_mgr attach_mgr = new Db_attach_mgr(mgr_db.Conn(), new Db_attach_itm("wkr_db", wkr_conn));
|
||||
attach_mgr.Exec_sql_w_msg("updating page_regy: wkr_id=" + idx, String_.Concat_lines_nl_skip_last // ANSI.Y
|
||||
( "UPDATE xomp_page"
|
||||
, "SET xomp_wkr_id = " + Int_.To_str(idx)
|
||||
, ", html_len = (SELECT length(body) FROM <wkr_db>html h WHERE h.page_id = xomp_page.page_id)"
|
||||
, "WHERE page_id IN (SELECT page_id FROM <wkr_db>html h)"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
public static Xomp_db_core New__make(Xowe_wiki wiki) {
|
||||
Io_url root_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp");
|
||||
Io_mgr.Instance.DeleteDirDeep(root_dir);
|
||||
return new Xomp_db_core(root_dir);
|
||||
}
|
||||
public static Xomp_db_core New__load(Xowe_wiki wiki) {
|
||||
Io_url root_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp");
|
||||
return new Xomp_db_core(root_dir);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.dbs.*;
|
||||
public class Xomp_lock_req_tbl implements Db_tbl {
|
||||
private final String fld_machine_name, fld_req_time;
|
||||
private final Db_conn conn;
|
||||
public Xomp_lock_req_tbl(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
this.tbl_name = "xomp_lock_req";
|
||||
this.fld_machine_name = flds.Add_str("machine_name", 255); // EX: "MACHINE1"
|
||||
this.fld_req_time = flds.Add_str("req_time", 32); // EX: 20160801 010203
|
||||
conn.Rls_reg(this);
|
||||
}
|
||||
public String Tbl_name() {return tbl_name;} private final String tbl_name;
|
||||
public Dbmeta_fld_list Flds() {return flds;} private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
|
||||
public void Create_tbl() {
|
||||
conn.Meta_tbl_create(Dbmeta_tbl_itm.New(tbl_name, flds
|
||||
, Dbmeta_idx_itm.new_normal_by_tbl(tbl_name, "req_time", fld_req_time)
|
||||
));
|
||||
}
|
||||
public void Insert(String machine_name) {
|
||||
conn.Stmt_insert(tbl_name, flds).Clear().Val_str(fld_machine_name, machine_name).Val_str(fld_req_time, Datetime_now.Get_force().XtoStr_fmt_yyyyMMdd_HHmmss()).Exec_insert();
|
||||
}
|
||||
public String Select_1st() {
|
||||
String sql = String_.Format("SELECT * FROM {0} ORDER BY {1} DESC", tbl_name, fld_machine_name); // ANSI.y
|
||||
Db_rdr rdr = conn.Stmt_sql(sql).Exec_select__rls_auto();
|
||||
try {
|
||||
if (!rdr.Move_next()) throw Err_.new_wo_type("xomp_lock_req has no rows");
|
||||
return rdr.Read_str(fld_machine_name);}
|
||||
finally {rdr.Rls();}
|
||||
}
|
||||
public void Delete(String machine_name) {
|
||||
conn.Stmt_delete(tbl_name, fld_machine_name).Clear().Crt_str(fld_machine_name, machine_name).Exec_delete();
|
||||
}
|
||||
public void Rls() {}
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.dbs.*;
|
||||
public class Xomp_lock_tbl implements Db_tbl {
|
||||
private final String fld_uid_prv;
|
||||
private final Db_conn conn;
|
||||
public Xomp_lock_tbl(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
this.tbl_name = "xomp_lock";
|
||||
this.fld_uid_prv = flds.Add_int("uid_prv"); // EX: -1
|
||||
conn.Rls_reg(this);
|
||||
}
|
||||
public String Tbl_name() {return tbl_name;} private final String tbl_name;
|
||||
public Dbmeta_fld_list Flds() {return flds;} private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
|
||||
public void Create_tbl() {
|
||||
conn.Meta_tbl_create(Dbmeta_tbl_itm.New(tbl_name, flds));
|
||||
conn.Stmt_insert(tbl_name, flds).Clear().Val_int(fld_uid_prv, -1).Exec_insert(); // always add default record when creating table
|
||||
}
|
||||
public int Select() {
|
||||
Db_rdr rdr = conn.Stmt_select(tbl_name, flds).Exec_select__rls_auto();
|
||||
try {
|
||||
if (!rdr.Move_next()) throw Err_.new_wo_type("xomp_lock has no rows");
|
||||
return rdr.Read_int(fld_uid_prv);}
|
||||
finally {rdr.Rls();}
|
||||
}
|
||||
public void Update(int uid_prv) {
|
||||
conn.Stmt_update(tbl_name, String_.Ary_empty, fld_uid_prv).Clear().Val_int(fld_uid_prv, uid_prv).Exec_update();
|
||||
}
|
||||
public void Rls() {}
|
||||
}
|
||||
@@ -17,13 +17,36 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.addons.bldrs.mass_parses.parses.pools.*; import gplx.xowa.addons.bldrs.mass_parses.parses.locks.*;
|
||||
public class Xomp_mgr_db {
|
||||
public Xomp_mgr_db(Io_url url) {
|
||||
this.url = url;
|
||||
this.conn = Db_conn_bldr.Instance.Get_or_autocreate(true, url);
|
||||
this.page_tbl = new Xomp_page_tbl(conn);
|
||||
this.wkr_tbl = new Xomp_wkr_tbl(conn);
|
||||
// this.lock_mgr = new Xomp_lock_mgr__db(conn, 5000);
|
||||
this.lock_mgr = new Xomp_lock_mgr__fsys(5000, this.Dir());
|
||||
}
|
||||
public Db_conn Conn() {return conn;} private final Db_conn conn;
|
||||
public Io_url Url() {return url;} private final Io_url url;
|
||||
public Io_url Dir() {return url.OwnerDir();}
|
||||
public Xomp_page_tbl Page_tbl() {return page_tbl;} private final Xomp_page_tbl page_tbl;
|
||||
public Xomp_wkr_tbl Wkr_tbl() {return wkr_tbl;} private final Xomp_wkr_tbl wkr_tbl;
|
||||
public Xomp_lock_mgr Lock_mgr() {return lock_mgr;} private final Xomp_lock_mgr lock_mgr;
|
||||
|
||||
public void Remake() {
|
||||
conn.Meta_tbl_remake_many(page_tbl, wkr_tbl);
|
||||
lock_mgr.Remake();
|
||||
}
|
||||
|
||||
public static Io_url New__url(Xowe_wiki wiki) {return wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp");}
|
||||
public static Xomp_mgr_db New__make(Xowe_wiki wiki) {
|
||||
Io_url root_dir = New__url(wiki);
|
||||
Io_mgr.Instance.DeleteDirDeep(root_dir);
|
||||
return new Xomp_mgr_db(root_dir.GenSubFil("xomp.sqlite3"));
|
||||
}
|
||||
public static Xomp_mgr_db New__load(Xowe_wiki wiki) {return New__load(wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp"));}
|
||||
public static Xomp_mgr_db New__load(Io_url root_dir) {
|
||||
return new Xomp_mgr_db(root_dir.GenSubFil("xomp.sqlite3"));
|
||||
}
|
||||
public Io_url Url() {return url;} private Io_url url;
|
||||
public Db_conn Conn() {return conn;} private Db_conn conn;
|
||||
public Xomp_page_tbl Page_tbl() {return page_tbl;} private Xomp_page_tbl page_tbl;
|
||||
}
|
||||
|
||||
@@ -18,8 +18,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.addons.bldrs.mass_parses.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.dbs.*;
|
||||
public class Xomp_page_tbl implements Db_tbl {
|
||||
// private final String fld_page_id, fld_page_status, fld_page_mgr_id;
|
||||
private final Object thread_lock = new Object();
|
||||
private final Db_conn conn;
|
||||
// private final String fld_page_id, fld_page_status, fld_page_mgr_id;
|
||||
public Xomp_page_tbl(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
this.tbl_name = "xomp_page";
|
||||
@@ -33,6 +34,7 @@ public class Xomp_page_tbl implements Db_tbl {
|
||||
}
|
||||
public String Tbl_name() {return tbl_name;} private final String tbl_name;
|
||||
public Dbmeta_fld_list Flds() {return flds;} private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
|
||||
|
||||
public void Create_tbl() {
|
||||
conn.Meta_tbl_create
|
||||
( Dbmeta_tbl_itm.New(tbl_name, flds
|
||||
@@ -40,5 +42,16 @@ public class Xomp_page_tbl implements Db_tbl {
|
||||
, Dbmeta_idx_itm.new_normal_by_tbl("xomp_page", "page_ns__page_id" , "page_ns", "page_id") // for make
|
||||
));
|
||||
}
|
||||
public void Update_wkr_uid(int wkr_uid, Db_conn wkr_conn) {
|
||||
synchronized (thread_lock) { // LOCK:used by multiple threads
|
||||
Db_attach_mgr attach_mgr = new Db_attach_mgr(conn, new Db_attach_itm("wkr_db", wkr_conn));
|
||||
attach_mgr.Exec_sql_w_msg("updating page_regy: wkr_id=" + wkr_uid, String_.Concat_lines_nl_skip_last // ANSI.Y
|
||||
( "UPDATE xomp_page"
|
||||
, "SET xomp_wkr_id = " + Int_.To_str(wkr_uid)
|
||||
, ", html_len = (SELECT length(body) FROM <wkr_db>html h WHERE h.page_id = xomp_page.page_id)"
|
||||
, "WHERE page_id IN (SELECT page_id FROM <wkr_db>html h)"
|
||||
));
|
||||
}
|
||||
}
|
||||
public void Rls() {}
|
||||
}
|
||||
|
||||
@@ -30,4 +30,9 @@ public class Xomp_wkr_db {
|
||||
public Io_url Url() {return url;} private Io_url url;
|
||||
public Db_conn Conn() {return conn;} private Db_conn conn;
|
||||
public Xowd_html_tbl Html_tbl() {return html_tbl;} private final Xowd_html_tbl html_tbl;
|
||||
|
||||
public static Xomp_wkr_db New(Io_url root_dir, int uid) {
|
||||
Io_url url = root_dir.GenSubFil_nest("xomp_" + Int_.To_str_fmt(uid, "000"), "xomp_wkr.sqlite3");
|
||||
return new Xomp_wkr_db(uid, url);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.dbs.*;
|
||||
public class Xomp_wkr_tbl implements Db_tbl {
|
||||
private final String fld_wkr_uid, fld_wkr_url, fld_wkr_status, fld_wkr_status_time, fld_wkr_exec_count, fld_wkr_exec_time;
|
||||
private final Db_conn conn;
|
||||
private final Object thread_lock = new Object();
|
||||
public Xomp_wkr_tbl(Db_conn conn) {
|
||||
this.conn = conn;
|
||||
this.tbl_name = "xomp_wkr";
|
||||
fld_wkr_uid = flds.Add_int_pkey("wkr_uid"); // EX: 0
|
||||
fld_wkr_url = flds.Add_str("wkr_url", 255); // EX: //MACHINE/C:/xowa/wiki/en.wikipedia.org/tmp/xomp
|
||||
fld_wkr_status = flds.Add_int("wkr_status"); // EX: running; waiting
|
||||
fld_wkr_status_time = flds.Add_str("wkr_status_time", 255); // EX: 20160801 010203
|
||||
fld_wkr_exec_count = flds.Add_int("wkr_exec_count"); // EX: 1000
|
||||
fld_wkr_exec_time = flds.Add_int("wkr_exec_time"); // EX: 123
|
||||
conn.Rls_reg(this);
|
||||
}
|
||||
public String Tbl_name() {return tbl_name;} private final String tbl_name;
|
||||
public Dbmeta_fld_list Flds() {return flds;} private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
|
||||
public void Create_tbl() {conn.Meta_tbl_create(Dbmeta_tbl_itm.New(tbl_name, flds));}
|
||||
|
||||
public int Init_wkrs(String wkr_url, int wkr_len) {
|
||||
// delete all with machine_name
|
||||
conn.Stmt_delete(tbl_name, fld_wkr_url).Crt_str(fld_wkr_url, wkr_url).Exec_delete();
|
||||
|
||||
// get bgn_uid / end_uid
|
||||
int bgn_uid = conn.Exec_select_max_as_int(tbl_name, fld_wkr_uid, -1) + 1;
|
||||
int end_uid = bgn_uid + wkr_len;
|
||||
|
||||
// insert into tbl
|
||||
Db_stmt insert_stmt = conn.Stmt_insert(tbl_name, flds);
|
||||
for (int i = bgn_uid; i < end_uid; ++i)
|
||||
Insert(insert_stmt, i, wkr_url);
|
||||
insert_stmt.Rls();
|
||||
return bgn_uid;
|
||||
}
|
||||
public int Select_count() {
|
||||
return conn.Exec_select_count_as_int(tbl_name, 0);
|
||||
}
|
||||
|
||||
private void Insert(Db_stmt stmt, int wkr_uid, String wkr_url) {
|
||||
stmt.Clear()
|
||||
.Val_int(fld_wkr_uid, wkr_uid)
|
||||
.Val_str(fld_wkr_url, wkr_url).Val_int(fld_wkr_status, Status__running).Val_str(fld_wkr_status_time, Datetime_now.Get_force().XtoStr_fmt_yyyyMMdd_HHmmss())
|
||||
.Val_int(fld_wkr_exec_count, 0).Val_int(fld_wkr_exec_time, 0)
|
||||
.Exec_insert();
|
||||
}
|
||||
public void Update_exec(int wkr_uid, int wkr_exec_count, long wkr_exec_time) {
|
||||
synchronized (thread_lock) { // LOCK:wkr_tbl is shared by multiple threads / machines
|
||||
int attempts = 0;
|
||||
while (true) {
|
||||
if (++attempts > 10) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "failed to update status; try=~{0}", attempts);
|
||||
break;
|
||||
}
|
||||
try {
|
||||
conn.Stmt_update(tbl_name, String_.Ary(fld_wkr_uid), fld_wkr_status, fld_wkr_status_time, fld_wkr_exec_count, fld_wkr_exec_time).Clear()
|
||||
.Val_int(fld_wkr_status, Status__running).Val_str(fld_wkr_status_time, Datetime_now.Get_force().XtoStr_fmt_yyyyMMdd_HHmmss())
|
||||
.Val_int(fld_wkr_exec_count, wkr_exec_count).Val_int(fld_wkr_exec_time, (int)(wkr_exec_time / 1000))
|
||||
.Crt_int(fld_wkr_uid, wkr_uid)
|
||||
.Exec_update();
|
||||
break; // exit loop
|
||||
} catch (Exception e) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "unable to update status; try=~{0} err=~{1}", attempts, Err_.Message_gplx_log(e));
|
||||
gplx.core.threads.Thread_adp_.Sleep(10000);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Update_status(int wkr_uid, int status) {
|
||||
synchronized (thread_lock) { // LOCK:wkr_tbl is shared by multiple threads
|
||||
conn.Stmt_update(tbl_name, String_.Ary(fld_wkr_uid), fld_wkr_status, fld_wkr_status_time).Clear()
|
||||
.Val_int(fld_wkr_status, status).Val_str(fld_wkr_status_time, Datetime_now.Get_force().XtoStr_fmt_yyyyMMdd_HHmmss())
|
||||
.Crt_int(fld_wkr_uid, wkr_uid)
|
||||
.Exec_update();
|
||||
}
|
||||
}
|
||||
public void Rls() {}
|
||||
|
||||
public static final int Status__running = 1, Status__sleeping = 2;
|
||||
}
|
||||
@@ -18,16 +18,14 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.addons.bldrs.mass_parses.inits; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xomp_init_cmd extends Xob_cmd__base {
|
||||
private final Xomp_init_mgr mgr;
|
||||
public Xomp_init_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);
|
||||
mgr = new Xomp_init_mgr(wiki);
|
||||
}
|
||||
private final Xomp_init_mgr mgr = new Xomp_init_mgr();
|
||||
public Xomp_init_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
||||
@Override public void Cmd_run() {
|
||||
wiki.Init_assert();
|
||||
mgr.Exec();
|
||||
mgr.Exec(wiki);
|
||||
}
|
||||
@Override public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk__cfg)) return this;
|
||||
if (ctx.Match(k, Invk__cfg)) return mgr.Cfg();
|
||||
else return super.Invk(ctx, ikey, k, m);
|
||||
} private static final String Invk__cfg = "cfg";
|
||||
|
||||
|
||||
@@ -20,31 +20,31 @@ import gplx.dbs.*;
|
||||
import gplx.xowa.bldrs.*;
|
||||
import gplx.xowa.addons.bldrs.mass_parses.parses.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
|
||||
class Xomp_init_mgr {
|
||||
private final Xowe_wiki wiki;
|
||||
public Xomp_init_mgr(Xowe_wiki wiki) {this.wiki = wiki;}
|
||||
public void Exec() {
|
||||
// init
|
||||
Xomp_db_core db_core = Xomp_db_core.New__make(wiki);
|
||||
Xomp_page_tbl page_tbl = db_core.Mgr_db().Page_tbl();
|
||||
public Xomp_init_mgr_cfg Cfg() {return cfg;} private final Xomp_init_mgr_cfg cfg = new Xomp_init_mgr_cfg();
|
||||
public void Exec(Xowe_wiki wiki) {
|
||||
// init vars
|
||||
cfg.Init(wiki);
|
||||
Xomp_mgr_db mgr_db = Xomp_mgr_db.New__make(wiki);
|
||||
Db_conn mgr_conn = mgr_db.Conn();
|
||||
|
||||
// rebuild table
|
||||
Db_conn mgr_conn = db_core.Mgr_db().Conn();
|
||||
mgr_conn.Meta_tbl_remake(page_tbl);
|
||||
// remake all
|
||||
mgr_db.Remake();
|
||||
|
||||
// fill table
|
||||
// fill page tbl
|
||||
Db_attach_mgr attach_mgr = new Db_attach_mgr(mgr_conn, new Db_attach_itm("page_db", wiki.Data__core_mgr().Db__core().Conn()));
|
||||
int[] ns_ary = new int[] {0, 4, 14};
|
||||
int[] ns_ary = cfg.Ns_ids();
|
||||
int len = ns_ary.length;
|
||||
String sql = String_.Concat_lines_nl_skip_last
|
||||
String sql = String_.Concat_lines_nl_skip_last // ANSI.Y
|
||||
( "INSERT INTO xomp_page (page_id, page_ns, page_status, html_len, xomp_wkr_id)"
|
||||
, "SELECT p.page_id, p.page_namespace, 0, 0, 0"
|
||||
, "FROM <page_db>page p"
|
||||
, "WHERE p.page_namespace = {0}"
|
||||
, "AND p.page_is_redirect = 0"
|
||||
, "ORDER BY p.page_id"
|
||||
);
|
||||
for (int i = 0; i < len; ++i) {
|
||||
int ns_id = ns_ary[i];
|
||||
attach_mgr.Exec_sql_w_msg("adding rows for xomp_page: ns=" + ns_id, sql, ns_id);// ANSI.Y
|
||||
attach_mgr.Exec_sql_w_msg("adding rows for xomp_page: ns=" + ns_id, sql, ns_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.inits; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.xowa.wikis.nss.*;
|
||||
class Xomp_init_mgr_cfg implements Gfo_invk {
|
||||
public int[] Ns_ids() {return ns_ids;} private int[] ns_ids = new int[] {0, 4, 14, 100};
|
||||
public void Init(Xowe_wiki wiki) {
|
||||
if (ns_ids == null) ns_ids = Ns_ids(wiki.Ns_mgr());
|
||||
}
|
||||
private static int[] Ns_ids(Xow_ns_mgr ns_mgr) {
|
||||
int ns_ids_len = ns_mgr.Ids_len();
|
||||
int[] rv = new int[ns_ids_len];
|
||||
for (int i = 0; i < ns_ids_len; ++i)
|
||||
rv[i] = ns_mgr.Ids_get_at(i).Id();
|
||||
return rv;
|
||||
}
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk__ns_ids_)) ns_ids = Int_.Ary_parse(m.ReadStr("v"), "|");
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
private static final String
|
||||
Invk__ns_ids_ = "ns_ids_"
|
||||
;
|
||||
}
|
||||
@@ -18,7 +18,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.dbs.*;
|
||||
class Xob_lnki_temp_row implements CompareAble {
|
||||
public int Xomp_uid() {return xomp_uid;} private int xomp_uid;
|
||||
public int Lnki_id() {return lnki_id;} private int lnki_id;
|
||||
public int Lnki_tier_id() {return lnki_tier_id;} private int lnki_tier_id;
|
||||
public int Lnki_page_id() {return lnki_page_id;} private int lnki_page_id;
|
||||
@@ -32,9 +31,8 @@ class Xob_lnki_temp_row implements CompareAble {
|
||||
public double Lnki_upright() {return lnki_upright;} private double lnki_upright;
|
||||
public double Lnki_time() {return lnki_time;} private double lnki_time;
|
||||
public int Lnki_page() {return lnki_page;} private int lnki_page;
|
||||
public void Load(Db_rdr rdr, int xomp_uid) {
|
||||
this.xomp_uid = xomp_uid;
|
||||
this.lnki_id = rdr.Read_int("lnki_id");
|
||||
public void Load(Db_rdr rdr, int lnki_id) {
|
||||
this.lnki_id = lnki_id;
|
||||
this.lnki_tier_id = rdr.Read_int("lnki_tier_id");
|
||||
this.lnki_page_id = rdr.Read_int("lnki_page_id");
|
||||
this.lnki_ttl = rdr.Read_bry_by_str("lnki_ttl");
|
||||
@@ -50,8 +48,6 @@ class Xob_lnki_temp_row implements CompareAble {
|
||||
}
|
||||
public int compareTo(Object obj) {
|
||||
Xob_lnki_temp_row comp = (Xob_lnki_temp_row)obj;
|
||||
int rv = Int_.Compare(xomp_uid, comp.xomp_uid);
|
||||
if (rv != CompareAble_.Same) return rv;
|
||||
return Int_.Compare(lnki_id, comp.lnki_id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,17 +20,17 @@ import gplx.dbs.*; import gplx.xowa.htmls.core.dbs.*;
|
||||
import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
|
||||
class Xomp_html_db_rdr {
|
||||
private final Xowd_html_tbl[] src_tbls;
|
||||
private final Xomp_db_core db;
|
||||
private final Xomp_mgr_db mgr_db;
|
||||
public Xomp_html_db_rdr(Xowe_wiki wiki) {
|
||||
this.db = Xomp_db_core.New__load(wiki);
|
||||
this.src_tbls = new Xowd_html_tbl[db.Wkr_count()];
|
||||
this.mgr_db = Xomp_mgr_db.New__load(wiki);
|
||||
this.src_tbls = new Xowd_html_tbl[mgr_db.Wkr_tbl().Select_count()];
|
||||
}
|
||||
public void Rows__get(Xowd_html_row rv, int wkr_id, int page_id) {
|
||||
Xowd_html_tbl src_tbl = src_tbls[wkr_id];
|
||||
public void Rows__get(Xowd_html_row rv, int wkr_uid, int page_id) {
|
||||
Xowd_html_tbl src_tbl = src_tbls[wkr_uid];
|
||||
if (src_tbl == null) {
|
||||
Db_conn wkr_conn = db.Wkr_db(Bool_.N, wkr_id).Conn();
|
||||
Db_conn wkr_conn = Xomp_wkr_db.New(mgr_db.Dir(), wkr_uid).Conn();
|
||||
src_tbl = new Xowd_html_tbl(wkr_conn);
|
||||
src_tbls[wkr_id] = src_tbl;
|
||||
src_tbls[wkr_uid] = src_tbl;
|
||||
}
|
||||
src_tbl.Select_as_row(rv, page_id);
|
||||
}
|
||||
|
||||
@@ -21,8 +21,8 @@ public class Xomp_make_cmd extends Xob_cmd__base {
|
||||
public Xomp_make_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
||||
@Override public void Cmd_run() {
|
||||
wiki.Init_assert();
|
||||
new Xomp_make_wkr(wiki).Exec();
|
||||
new Xomp_make_lnki().Exec(wiki, 1000);
|
||||
new Xomp_make_html().Exec(wiki);
|
||||
new Xomp_make_lnki().Exec(wiki, 10000);
|
||||
}
|
||||
|
||||
@Override public String Cmd_key() {return BLDR_CMD_KEY;} private static final String BLDR_CMD_KEY = "wiki.mass_parse.make";
|
||||
|
||||
@@ -18,44 +18,48 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.core.brys.*;
|
||||
import gplx.dbs.*; import gplx.xowa.htmls.core.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
|
||||
class Xomp_make_wkr {
|
||||
private final Db_conn mgr_conn;
|
||||
private final Xomp_html_db_wtr html_db_wtr;
|
||||
private final Xomp_html_db_rdr html_db_rdr;
|
||||
class Xomp_make_html {
|
||||
private final Int_flag_bldr src_body_flag_bldr = Xowd_html_tbl.Make_body_flag_bldr();
|
||||
public Xomp_make_wkr(Xowe_wiki wiki) {
|
||||
this.db = Xomp_db_core.New__load(wiki);
|
||||
this.mgr_conn = db.Mgr_db().Conn();
|
||||
this.html_db_rdr = new Xomp_html_db_rdr(wiki);
|
||||
this.html_db_wtr = new Xomp_html_db_wtr(wiki);
|
||||
}
|
||||
public Xomp_db_core Db() {return db;} private Xomp_db_core db;
|
||||
public void Exec() {
|
||||
Xowd_html_row src_row = new Xowd_html_row();
|
||||
public void Exec(Xowe_wiki wiki) {
|
||||
// init
|
||||
Xomp_mgr_db mgr_db = Xomp_mgr_db.New__load(wiki);
|
||||
Db_conn mgr_conn = mgr_db.Conn();
|
||||
|
||||
int[] ns_ary = new int[] {0, 4, 14};
|
||||
int ns_ary_len = ns_ary.length;
|
||||
for (int i = 0; i < ns_ary_len; ++i) {
|
||||
int ns_id = ns_ary[i];
|
||||
String sql = String_.Format("SELECT * FROM xomp_page WHERE page_ns = {0} AND html_len != 0 ORDER BY page_id;", ns_id); // NOTE: html_len == 0 when page failed
|
||||
int count = 0;
|
||||
Db_rdr rdr = mgr_conn.Stmt_sql(sql).Exec_select__rls_auto(); // ANSI.Y
|
||||
try {
|
||||
while (rdr.Move_next()) {
|
||||
Make_page(rdr, src_row, ns_id);
|
||||
if (++count % 10000 == 0)
|
||||
Gfo_usr_dlg_.Instance.Prog_many("", "", "xomp.html.insert: ns=~{0} db=~{1} count=~{2}", Int_.To_str_pad_bgn_space(ns_id, 3), Int_.To_str_pad_bgn_space(html_db_wtr.Cur_db_id(), 3), Int_.To_str_pad_bgn_space(count, 8));
|
||||
}
|
||||
} finally {rdr.Rls();}
|
||||
// update wkr_uid; note that this cannot be done in parse_wkr, b/c multiple-writer-errors for xomp.db|page
|
||||
int wkrs_len = mgr_db.Wkr_tbl().Select_count();
|
||||
for (int i = 0; i < wkrs_len; ++i) {
|
||||
Xomp_wkr_db wkr_db = Xomp_wkr_db.New(mgr_db.Dir(), i);
|
||||
mgr_db.Page_tbl().Update_wkr_uid(i, wkr_db.Conn());
|
||||
}
|
||||
|
||||
this.Rls();
|
||||
// init more
|
||||
Xomp_html_db_rdr html_db_rdr = new Xomp_html_db_rdr(wiki);
|
||||
Xomp_html_db_wtr html_db_wtr = new Xomp_html_db_wtr(wiki);
|
||||
Xowd_html_row src_row = new Xowd_html_row();
|
||||
|
||||
// loop xomp|page and generate html dbs
|
||||
String sql = String_.Format("SELECT * FROM xomp_page WHERE html_len != 0 ORDER BY xomp_uid;"); // NOTE: html_len == 0 when page failed
|
||||
int count = 0;
|
||||
Db_rdr rdr = mgr_conn.Stmt_sql(sql).Exec_select__rls_auto(); // ANSI.Y
|
||||
try {
|
||||
while (rdr.Move_next()) {
|
||||
Make_page(html_db_rdr, rdr, html_db_wtr, src_row);
|
||||
if (++count % 10000 == 0)
|
||||
Gfo_usr_dlg_.Instance.Prog_many("", "", "xomp.html.insert: db=~{0} count=~{1}", Int_.To_str_pad_bgn_space(html_db_wtr.Cur_db_id(), 3), Int_.To_str_pad_bgn_space(count, 8));
|
||||
}
|
||||
} finally {rdr.Rls();}
|
||||
|
||||
// cleanup
|
||||
mgr_conn.Rls_conn();
|
||||
html_db_rdr.Rls();
|
||||
html_db_wtr.Rls();
|
||||
}
|
||||
private void Make_page(Db_rdr rdr, Xowd_html_row src_row, int ns_id) {
|
||||
private void Make_page(Xomp_html_db_rdr html_db_rdr, Db_rdr rdr, Xomp_html_db_wtr html_db_wtr, Xowd_html_row src_row) {
|
||||
// get src_row
|
||||
int page_id = rdr.Read_int("page_id");
|
||||
int html_len = rdr.Read_int("html_len");
|
||||
int wkr_id = rdr.Read_int("xomp_wkr_id");
|
||||
int ns_id = rdr.Read_int("page_ns");
|
||||
html_db_rdr.Rows__get(src_row, wkr_id, page_id);
|
||||
src_body_flag_bldr.Decode(src_row.Body_flag());
|
||||
|
||||
@@ -63,9 +67,4 @@ class Xomp_make_wkr {
|
||||
Xowd_html_tbl trg_tbl = html_db_wtr.Tbls__get_or_new(ns_id, html_len);
|
||||
trg_tbl.Insert(src_row.Page_id(), src_row.Head_flag(), src_body_flag_bldr.Get_as_int(0), src_body_flag_bldr.Get_as_int(1), src_row.Display_ttl(), src_row.Content_sub(), src_row.Sidebar_div(), src_row.Body());
|
||||
}
|
||||
private void Rls() {
|
||||
mgr_conn.Rls_conn();
|
||||
html_db_rdr.Rls();
|
||||
html_db_wtr.Rls();
|
||||
}
|
||||
}
|
||||
@@ -16,36 +16,45 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.core.brys.*;
|
||||
import gplx.core.brys.*; import gplx.core.primitives.*;
|
||||
import gplx.dbs.*; import gplx.xowa.htmls.core.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*; import gplx.xowa.addons.bldrs.files.dbs.*;
|
||||
import gplx.xowa.bldrs.*;
|
||||
class Xomp_make_lnki {
|
||||
public void Exec(Xowe_wiki wiki, int uid_count) {
|
||||
// init
|
||||
Xomp_db_core xomp_db = Xomp_db_core.New__load(wiki);
|
||||
Xomp_mgr_db xomp_db = Xomp_mgr_db.New__load(wiki);
|
||||
|
||||
Xob_db_file make_db = Xob_db_file.New__file_make(wiki.Fsys_mgr().Root_dir());
|
||||
Xob_lnki_temp_tbl lnki_temp_tbl = new Xob_lnki_temp_tbl(make_db.Conn());
|
||||
make_db.Conn().Meta_tbl_remake(lnki_temp_tbl);
|
||||
lnki_temp_tbl.Insert_bgn();
|
||||
|
||||
// add index
|
||||
int wkr_count = xomp_db.Wkr_count();
|
||||
// create ary; add index
|
||||
int wkr_count = xomp_db.Wkr_tbl().Select_count();
|
||||
Xomp_wkr_db[] db_ary = new Xomp_wkr_db[wkr_count];
|
||||
for (int i = 0; i < wkr_count; ++i) {
|
||||
Xomp_wkr_db wkr_db = xomp_db.Wkr_db(Bool_.N, i);
|
||||
Xomp_wkr_db wkr_db = Xomp_wkr_db.New(xomp_db.Dir(), i);
|
||||
db_ary[i] = wkr_db;
|
||||
wkr_db.Conn().Meta_idx_assert("lnki_temp", "lnki_page_id", "lnki_page_id");
|
||||
}
|
||||
|
||||
// loop
|
||||
// get max xomp_uid; note that xomp_uid is (a) per page; (b) ordered by page_ns, page_id; (c) starts from 1
|
||||
int max_xomp_uid = xomp_db.Conn().Exec_select_max_as_int("xomp_page", "xomp_uid", -1);
|
||||
|
||||
// loop over wkrs using range of xomp_uid
|
||||
List_adp rows = List_adp_.New();
|
||||
int uid_bgn = -1;
|
||||
int cur_xomp_uid = -1; Int_obj_ref lnki_id_ref = Int_obj_ref.New_zero();
|
||||
while (true) {
|
||||
int count = 0;
|
||||
int tmp_xomp_uid_max = -1; // maximum uid for a grp of wkrs; EX: looping over 8 wkrs with xomp_uid range of 1 - 1000; max xomp_uid may only be 990 b/c pages are missing / failed
|
||||
for (int i = 0; i < wkr_count; ++i) {
|
||||
Xomp_wkr_db wkr_db = xomp_db.Wkr_db(Bool_.N, i);
|
||||
count += Load_rows(rows, xomp_db, wkr_db, uid_bgn, uid_bgn + uid_count);
|
||||
Xomp_wkr_db wkr_db = db_ary[i];
|
||||
int wkr_uid_max = Load_rows(rows, xomp_db, wkr_db, cur_xomp_uid, cur_xomp_uid + uid_count, lnki_id_ref);
|
||||
if (wkr_uid_max > tmp_xomp_uid_max)
|
||||
tmp_xomp_uid_max = wkr_uid_max;
|
||||
}
|
||||
if (count < 0) break;
|
||||
uid_bgn += count;
|
||||
if (tmp_xomp_uid_max >= max_xomp_uid || tmp_xomp_uid_max == -1) break; // if max_xomp_uid seen, break; note that ">" necessary because max_xomp_uid may not be in set of wkrs;
|
||||
cur_xomp_uid += uid_count; // note that this sequentially counts up by uid_count (1000), so inevitable that cur_xomp_uid will exceed wkr_uid_max
|
||||
Gfo_usr_dlg_.Instance.Prog_many("", "", "building lnki_temp; cur_xomp_uid=~{0}", cur_xomp_uid);
|
||||
Save_rows(rows, lnki_temp_tbl);
|
||||
}
|
||||
|
||||
@@ -54,16 +63,17 @@ class Xomp_make_lnki {
|
||||
xomp_db.Conn().Rls_conn();
|
||||
make_db.Conn().Rls_conn();
|
||||
}
|
||||
private int Load_rows(List_adp rows, Xomp_db_core xomp_db, Xomp_wkr_db wkr_db, int uid_bgn, int uid_end) {
|
||||
private int Load_rows(List_adp rows, Xomp_mgr_db xomp_db, Xomp_wkr_db wkr_db, int uid_bgn, int uid_end, Int_obj_ref lnki_id) {
|
||||
// build sql
|
||||
Db_attach_mgr attach_mgr = new Db_attach_mgr(xomp_db.Conn());
|
||||
attach_mgr.Conn_links_(new Db_attach_itm("wkr_db", wkr_db.Conn()));
|
||||
String sql = String_.Format(String_.Concat_lines_nl_skip_last
|
||||
( "SELECT mgr.xomp_uid"
|
||||
, ", wkr.*"
|
||||
, ", wkr.lnki_tier_id, wkr.lnki_page_id, wkr.lnki_ttl, wkr.lnki_commons_ttl, wkr.lnki_ext, wkr.lnki_type, wkr.lnki_src_tid, wkr.lnki_w, wkr.lnki_h, wkr.lnki_upright, wkr.lnki_time, wkr.lnki_page"
|
||||
, "FROM <wkr_db>lnki_temp wkr"
|
||||
, " JOIN xomp_page mgr ON wkr.lnki_page_id = mgr.page_id"
|
||||
, "WHERE mgr.xomp_uid > {0} AND mgr.xomp_uid <= {1}"
|
||||
, "WHERE mgr.xomp_uid > {0} AND mgr.xomp_uid <= {1}" // mgr.xomp_uid will sort pages by ns_id, page_id
|
||||
)
|
||||
, uid_bgn
|
||||
, uid_end
|
||||
@@ -77,7 +87,7 @@ class Xomp_make_lnki {
|
||||
while (rdr.Move_next()) {
|
||||
rv = rdr.Read_int("xomp_uid");
|
||||
Xob_lnki_temp_row row = new Xob_lnki_temp_row();
|
||||
row.Load(rdr, rv);
|
||||
row.Load(rdr, lnki_id.Val_add_pre());
|
||||
rows.Add(row);
|
||||
}
|
||||
} finally {rdr.Rls();}
|
||||
@@ -90,8 +100,9 @@ class Xomp_make_lnki {
|
||||
for (int i = 0; i < len; ++i) {
|
||||
Xob_lnki_temp_row row = (Xob_lnki_temp_row)rows.Get_at(i);
|
||||
lnki_temp_tbl.Insert_cmd_by_batch(row.Lnki_tier_id(), row.Lnki_page_id(), row.Lnki_ttl(), row.Lnki_commons_ttl()
|
||||
, row.Lnki_ext(), row.Lnki_src_tid(), row.Lnki_src_tid(), row.Lnki_w(), row.Lnki_h(), row.Lnki_upright()
|
||||
, row.Lnki_ext(), row.Lnki_type(), row.Lnki_src_tid(), row.Lnki_w(), row.Lnki_h(), row.Lnki_upright()
|
||||
, row.Lnki_time(), row.Lnki_page());
|
||||
}
|
||||
rows.Clear();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.xowa.addons.bldrs.mass_parses.parses.mgrs.*;
|
||||
public class Xomp_parse_cmd extends Xob_cmd__base {
|
||||
private final Xomp_parse_mgr mgr = new Xomp_parse_mgr();
|
||||
public Xomp_parse_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
||||
|
||||
@@ -17,6 +17,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.cases.*;
|
||||
import gplx.xowa.files.*;
|
||||
public class Xow_wiki_utl_ {
|
||||
public static Xowe_wiki Clone_wiki(Xowe_wiki wiki, Io_url wiki_dir) {
|
||||
Xol_lang_itm lang = new Xol_lang_itm(wiki.App().Lang_mgr(), wiki.Lang().Key_bry());
|
||||
@@ -25,6 +26,16 @@ public class Xow_wiki_utl_ {
|
||||
rv.Init_by_wiki();
|
||||
rv.File_mgr().Repo_mgr().Clone(wiki.File_mgr().Repo_mgr());
|
||||
rv.File__fsdb_mode().Tid__v2__bld__y_();
|
||||
|
||||
Clone_repos(wiki);
|
||||
return rv;
|
||||
}
|
||||
public static void Clone_repos(Xowe_wiki wiki) {
|
||||
// force all repos to be lnx; will not convert characters like *,",? to _; also force long titles
|
||||
Xoa_repo_mgr repo_mgr = wiki.Appe().File_mgr().Repo_mgr();
|
||||
int len = repo_mgr.Count();
|
||||
for (int i = 0; i < len; ++i)
|
||||
repo_mgr.Get_at(i).Fsys_is_wnt_(Bool_.N).Shorten_ttl_(Bool_.N);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.locks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
public interface Xomp_lock_mgr {
|
||||
void Remake();
|
||||
int Uid_prv__get(String machine_name);
|
||||
void Uid_prv__rls(String machine_name, int uid_prv);
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.locks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
|
||||
public class Xomp_lock_mgr__db implements Xomp_lock_mgr {
|
||||
private final Db_conn conn;
|
||||
private final Xomp_lock_tbl lock_tbl;
|
||||
private final Xomp_lock_req_tbl req_tbl;
|
||||
private final int wait_time;
|
||||
public Xomp_lock_mgr__db(Db_conn conn, int wait_time) {
|
||||
this.conn = conn;
|
||||
this.lock_tbl = new Xomp_lock_tbl(conn);
|
||||
this.req_tbl = new Xomp_lock_req_tbl(conn);
|
||||
this.wait_time = wait_time;
|
||||
}
|
||||
public void Remake() {
|
||||
conn.Meta_tbl_remake(lock_tbl);
|
||||
conn.Meta_tbl_remake(req_tbl);
|
||||
}
|
||||
public int Uid_prv__get(String machine_name) {
|
||||
// insert into req_tbl
|
||||
req_tbl.Insert(machine_name);
|
||||
|
||||
// loop until req is 1st record in req_tbl
|
||||
while (true) {
|
||||
String machine_name_1st = req_tbl.Select_1st();
|
||||
if (String_.Eq(machine_name, machine_name_1st))
|
||||
break;
|
||||
else {
|
||||
Gfo_usr_dlg_.Instance.Note_many("", "", "waiting for lock: ~{0}", machine_name);
|
||||
gplx.core.threads.Thread_adp_.Sleep(wait_time);
|
||||
}
|
||||
}
|
||||
|
||||
// get next uid and fill pages
|
||||
return lock_tbl.Select();
|
||||
}
|
||||
public void Uid_prv__rls(String machine_name, int uid_prv) {
|
||||
lock_tbl.Update(uid_prv);
|
||||
req_tbl.Delete(machine_name);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.locks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
public class Xomp_lock_mgr__fsys implements Xomp_lock_mgr {
|
||||
private final Io_url root_dir, uid_fil, active_fil;
|
||||
private final int wait_time;
|
||||
private Io_url stop_fil;
|
||||
public Xomp_lock_mgr__fsys(int wait_time, Io_url root_dir) {
|
||||
this.wait_time = wait_time;
|
||||
this.root_dir = root_dir;
|
||||
this.uid_fil = root_dir.GenSubFil("xomp.semaphore.uid.txt");
|
||||
this.active_fil = root_dir.GenSubFil("xomp.semaphore.active.txt");
|
||||
}
|
||||
public void Remake() {
|
||||
Io_url[] fils = Io_mgr.Instance.QueryDir_fils(root_dir);
|
||||
for (Io_url fil : fils) {
|
||||
if (String_.Has_at_end(fil.NameAndExt(), ".sempahore.txt"))
|
||||
Io_mgr.Instance.DeleteFil(fil);
|
||||
}
|
||||
Io_mgr.Instance.SaveFilStr(uid_fil, Int_.To_str(Uid__bos));
|
||||
}
|
||||
public int Uid_prv__get(String machine_name) {
|
||||
// return -1 if stop file exists; note that -1 will stop machine
|
||||
if (stop_fil == null) this.stop_fil = root_dir.GenSubFil("xomp.semaphore.stop." + machine_name + ".txt");
|
||||
if (Io_mgr.Instance.ExistsFil(stop_fil)) return Uid__eos;
|
||||
|
||||
// loop until permit is acquired
|
||||
int tries = 0;
|
||||
while (true) {
|
||||
++tries;
|
||||
if (tries > 10) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "failed to acquire permit");
|
||||
return Uid__eos; // too many tries; just exit now
|
||||
}
|
||||
|
||||
// if active_file exists, then assume another machine is reading;
|
||||
if (Io_mgr.Instance.ExistsFil(active_fil)) {
|
||||
Sleep(machine_name, "active file exists");
|
||||
continue;
|
||||
}
|
||||
|
||||
// write to the active_file
|
||||
Io_mgr.Instance.SaveFilStr(active_fil, machine_name);
|
||||
|
||||
// now read it to make sure it's the same
|
||||
String cur_active = String_.new_u8(Io_mgr.Instance.LoadFilBryOr(active_fil, Bry_.Empty));
|
||||
if (!String_.Eq(cur_active, machine_name)) {
|
||||
Sleep(machine_name, "active file differs: " + cur_active);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// get next uid and fill pages
|
||||
byte[] cur_uid_bry = Io_mgr.Instance.LoadFilBryOr(uid_fil, null);
|
||||
if (cur_uid_bry == null) return 0; // file is empty; should only occur on 1st run; return 0, which will start from beginning;
|
||||
|
||||
int cur_uid = Int_.Min_value;
|
||||
if (cur_uid_bry != null)
|
||||
cur_uid = Bry_.To_int_or(cur_uid_bry, Int_.Min_value);
|
||||
if (cur_uid == Int_.Min_value) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "uid fil has bad data: data:~{0}", cur_uid_bry);
|
||||
return Uid__eos;
|
||||
}
|
||||
return cur_uid;
|
||||
}
|
||||
private void Sleep(String machine_name, String reason) {
|
||||
Gfo_usr_dlg_.Instance.Note_many("", "", "waiting for permit: machine:~{0} reason:~{1}", machine_name, reason);
|
||||
gplx.core.threads.Thread_adp_.Sleep(wait_time);
|
||||
}
|
||||
public void Uid_prv__rls(String machine_name, int uid_prv) {
|
||||
Io_mgr.Instance.SaveFilStr(uid_fil, Int_.To_str(uid_prv));
|
||||
Io_mgr.Instance.DeleteFil(active_fil);
|
||||
}
|
||||
public static final int Uid__bos = 0, Uid__eos = -1;
|
||||
}
|
||||
@@ -15,57 +15,64 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.core.threads.*; import gplx.core.threads.utils.*; import gplx.core.caches.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.cases.*;
|
||||
import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
|
||||
import gplx.xowa.wikis.caches.*;
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.core.threads.*; import gplx.core.threads.utils.*;
|
||||
import gplx.core.caches.*; import gplx.xowa.wikis.caches.*;
|
||||
import gplx.xowa.addons.bldrs.mass_parses.parses.wkrs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.parses.pools.*; import gplx.xowa.addons.bldrs.mass_parses.parses.utls.*;
|
||||
import gplx.xowa.addons.bldrs.wmdumps.imglinks.*;
|
||||
class Xomp_parse_mgr {
|
||||
public class Xomp_parse_mgr {
|
||||
private Gfo_countdown_latch latch;
|
||||
public Xomp_parse_mgr_cfg Cfg() {return cfg;} private final Xomp_parse_mgr_cfg cfg = new Xomp_parse_mgr_cfg();
|
||||
public Xomp_db_core Db_core() {return db_core;} private Xomp_db_core db_core;
|
||||
public Xomp_prog_mgr Prog_mgr() {return prog_mgr;} private final Xomp_prog_mgr prog_mgr = new Xomp_prog_mgr();
|
||||
public void Wkrs_done_add_1() {latch.Countdown();}
|
||||
public void Run(Xowe_wiki wiki) {
|
||||
// init db, pool_loader, pool, prog_mgr
|
||||
// init db
|
||||
cfg.Init(wiki);
|
||||
this.db_core = Xomp_db_core.New__load(wiki);
|
||||
Xomp_page_pool_loader pool_loader = new Xomp_page_pool_loader(wiki, db_core.Mgr_db().Conn(), cfg.Num_pages_in_pool());
|
||||
Xomp_page_pool page_pool = new Xomp_page_pool(pool_loader, cfg.Num_pages_per_wkr());
|
||||
prog_mgr.Init(pool_loader.Get_pending_count(), cfg.Progress_interval());
|
||||
Xomp_mgr_db mgr_db = Xomp_mgr_db.New__load(cfg.Mgr_url());
|
||||
|
||||
// init page_pool
|
||||
Xomp_page_pool_loader page_pool_loader = new Xomp_page_pool_loader(wiki, mgr_db.Conn(), cfg.Num_pages_in_pool());
|
||||
Xomp_page_pool page_pool = new Xomp_page_pool(page_pool_loader, cfg.Num_pages_per_wkr());
|
||||
Xomp_prog_mgr prog_mgr = new Xomp_prog_mgr();
|
||||
prog_mgr.Init(page_pool_loader.Get_pending_count(), cfg.Progress_interval());
|
||||
|
||||
// cache: disable general settings
|
||||
wiki.App().User().User_db_mgr().Cache_mgr().Enabled_n_(); // disable db lookups of cache
|
||||
Xow_page_cache page_cache = Xomp_tmpl_cache_bldr.New(wiki, cfg.Load_all_templates());
|
||||
Gfo_cache_mgr commons_cache = new Gfo_cache_mgr().Max_size_(Int_.Max_value).Reduce_by_(Int_.Max_value);
|
||||
Gfo_cache_mgr ifexist_cache = new Gfo_cache_mgr().Max_size_(Int_.Max_value).Reduce_by_(Int_.Max_value);
|
||||
Xof_orig_wkr__img_links orig_wkr = Xof_orig_wkr__img_links_.Load_all(wiki);
|
||||
|
||||
// cache: preload tmpls and imglinks
|
||||
Xow_page_cache page_cache = Xomp_tmpl_cache_bldr.New(wiki, cfg.Load_all_templates());
|
||||
Xof_orig_wkr__img_links file_orig_wkr = new Xof_orig_wkr__img_links(wiki);
|
||||
if (cfg.Load_all_imglnks()) Xof_orig_wkr__img_links_.Load_all(file_orig_wkr);
|
||||
|
||||
// load_wkr: init and start
|
||||
// Xomp_load_wkr load_wkr = new Xomp_load_wkr(wiki, db_core.Mgr_db().Conn(), cfg.Num_pages_in_pool(), cfg.Num_wkrs());
|
||||
// Xomp_load_wkr load_wkr = new Xomp_load_wkr(wiki, db_mgr.Mgr_db().Conn(), cfg.Num_pages_in_pool(), cfg.Num_wkrs());
|
||||
// Thread_adp_.Start_by_key("xomp.load", Cancelable_.Never, load_wkr, Xomp_load_wkr.Invk__exec);
|
||||
|
||||
// init parse_wkrs
|
||||
// assert wkr_tbl
|
||||
int wkr_len = cfg.Num_wkrs();
|
||||
int wkr_uid_bgn = mgr_db.Wkr_tbl().Init_wkrs(cfg.Wkr_machine_name(), wkr_len);
|
||||
latch = new Gfo_countdown_latch(wkr_len);
|
||||
Xomp_parse_wkr[] wkrs = new Xomp_parse_wkr[wkr_len];
|
||||
|
||||
// init parse_wkrs
|
||||
for (int i = 0; i < wkr_len; ++i) {
|
||||
// make wiki
|
||||
Xowe_wiki wkr_wiki = Xow_wiki_utl_.Clone_wiki(wiki, wiki.Fsys_mgr().Root_dir());
|
||||
Xomp_parse_wkr wkr = new Xomp_parse_wkr(this, wkr_wiki, orig_wkr, page_pool, i, cfg, cfg.Cleanup_interval(), cfg.Progress_interval(), cfg.Log_file_lnkis());
|
||||
wkr_wiki.Cache_mgr().Page_cache_(page_cache).Commons_cache_(commons_cache).Ifexist_cache_(ifexist_cache);
|
||||
// remove wmf wkr, else will try to download images during parsing
|
||||
if (wkr_wiki.File__bin_mgr() != null)
|
||||
wkr_wiki.File__bin_mgr().Wkrs__del(gplx.xowa.files.bins.Xof_bin_wkr_.Key_http_wmf);
|
||||
wkr.Hdump_bldr().Enabled_(cfg.Hdump_enabled()).Hzip_enabled_(cfg.Hzip_enabled()).Hzip_diff_(cfg.Hdiff_enabled());
|
||||
wkr_wiki.Cache_mgr().Page_cache_(page_cache).Commons_cache_(commons_cache).Ifexist_cache_(ifexist_cache);
|
||||
|
||||
// make wkr
|
||||
Xomp_parse_wkr wkr = new Xomp_parse_wkr(this, cfg, mgr_db, page_pool, prog_mgr, file_orig_wkr, wkr_wiki, i + wkr_uid_bgn);
|
||||
wkrs[i] = wkr;
|
||||
}
|
||||
|
||||
// start threads; done separately b/c thread issues when done right after init
|
||||
// start threads; done separately from init b/c thread issues
|
||||
for (int i = 0; i < wkr_len; ++i) {
|
||||
Xomp_parse_wkr wkr = wkrs[i];
|
||||
Thread_adp_.Start_by_key("xomp." + Int_.To_str_fmt(i, "000"), Cancelable_.Never, wkr, Xomp_parse_wkr.Invk__exec);
|
||||
}
|
||||
|
||||
// wait until wkrs are wkrs_done
|
||||
// wait until wkrs are done
|
||||
latch.Await();
|
||||
page_pool.Rls();
|
||||
|
||||
@@ -15,11 +15,11 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.core.ios.streams.*;
|
||||
class Xomp_parse_mgr_cfg implements Gfo_invk {
|
||||
public int Num_wkrs() {return num_wkrs;} private int num_wkrs = -1; // use env.available_processors
|
||||
public int Num_pages_in_pool() {return num_pages_in_pool;} private int num_pages_in_pool = 1000;
|
||||
public class Xomp_parse_mgr_cfg implements Gfo_invk {
|
||||
public int Num_wkrs() {return num_wkrs;} private int num_wkrs = -1;
|
||||
public int Num_pages_in_pool() {return num_pages_in_pool;} private int num_pages_in_pool = -1;
|
||||
public int Num_pages_per_wkr() {return num_pages_per_wkr;} private int num_pages_per_wkr = 1000;
|
||||
public int Progress_interval() {return progress_interval;} private int progress_interval = 1000;
|
||||
public int Commit_interval() {return commit_interval;} private int commit_interval = 10000;
|
||||
@@ -29,15 +29,21 @@ class Xomp_parse_mgr_cfg implements Gfo_invk {
|
||||
public boolean Hdiff_enabled() {return hdiff_enabled;} private boolean hdiff_enabled = true;
|
||||
public boolean Log_file_lnkis() {return log_file_lnkis;} private boolean log_file_lnkis = true;
|
||||
public boolean Load_all_templates() {return load_all_templates;} private boolean load_all_templates = true;
|
||||
public boolean Load_all_imglnks() {return load_all_imglnks;} private boolean load_all_imglnks = true;
|
||||
public byte Zip_tid() {return zip_tid;} private byte zip_tid = Io_stream_.Tid_gzip;
|
||||
public Io_url Mgr_url() {return mgr_url;} private Io_url mgr_url;
|
||||
public String Wkr_machine_name() {return wkr_machine_name;} private String wkr_machine_name;
|
||||
public void Init(Xowe_wiki wiki) {
|
||||
if (num_wkrs == -1) num_wkrs = gplx.core.envs.Env_.System_cpu_count();
|
||||
if (num_wkrs == -1) num_wkrs = gplx.core.envs.Runtime_.Cpu_count();
|
||||
if (num_pages_in_pool == -1) num_pages_in_pool = num_wkrs * 1000;
|
||||
if (mgr_url == null) mgr_url = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp");
|
||||
if (wkr_machine_name == null) wkr_machine_name = gplx.core.envs.System_.Env__machine_name();
|
||||
}
|
||||
|
||||
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
|
||||
if (ctx.Match(k, Invk__num_wkrs_)) num_wkrs = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk__num_pages_in_pool_)) num_pages_in_pool = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk__num_pages_per_wkr_)) num_pages_per_wkr = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk__num_pages_per_wkr_)) num_pages_per_wkr = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk__progress_interval_)) progress_interval = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk__commit_interval_)) commit_interval = m.ReadInt("v");
|
||||
else if (ctx.Match(k, Invk__cleanup_interval_)) cleanup_interval = m.ReadInt("v");
|
||||
@@ -46,7 +52,10 @@ class Xomp_parse_mgr_cfg implements Gfo_invk {
|
||||
else if (ctx.Match(k, Invk__hdiff_enabled_)) hdiff_enabled = m.ReadBool("v");
|
||||
else if (ctx.Match(k, Invk__zip_tid_)) zip_tid = m.ReadByte("v");
|
||||
else if (ctx.Match(k, Invk__load_all_templates_)) load_all_templates = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk__load_all_imglnks_)) load_all_imglnks = m.ReadYn("v");
|
||||
else if (ctx.Match(k, Invk__manual_now_)) Datetime_now.Manual_and_freeze_(m.ReadDate("v"));
|
||||
else if (ctx.Match(k, Invk__mgr_url_)) mgr_url = m.ReadIoUrl("v");
|
||||
else if (ctx.Match(k, Invk__wkr_machine_name_)) wkr_machine_name = m.ReadStr("v");
|
||||
else return Gfo_invk_.Rv_unhandled;
|
||||
return this;
|
||||
}
|
||||
@@ -54,6 +63,7 @@ class Xomp_parse_mgr_cfg implements Gfo_invk {
|
||||
Invk__num_wkrs_ = "num_wkrs_", Invk__num_pages_in_pool_ = "num_pages_in_pool_", Invk__num_pages_per_wkr_ = "num_pages_per_wkr_"
|
||||
, Invk__progress_interval_ = "progress_interval_", Invk__commit_interval_ = "commit_interval_", Invk__cleanup_interval_ = "cleanup_interval_"
|
||||
, Invk__hdump_enabled_ = "hdump_enabled_", Invk__hzip_enabled_ = "hzip_enabled_", Invk__hdiff_enabled_ = "hdiff_enabled_", Invk__zip_tid_ = "zip_tid_"
|
||||
, Invk__load_all_templates_ = "load_all_templates_", Invk__manual_now_ = "manual_now_"
|
||||
, Invk__load_all_templates_ = "load_all_templates_", Invk__load_all_imglnks_ = "load_all_imglnks_", Invk__manual_now_ = "manual_now_"
|
||||
, Invk__mgr_url_ = "mgr_url_", Invk__wkr_machine_name_ = "wkr_machine_name_"
|
||||
;
|
||||
}
|
||||
@@ -15,8 +15,8 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
class Xomp_prog_mgr {
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.mgrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
public class Xomp_prog_mgr {
|
||||
private final Object thread_lock = new Object();
|
||||
private int progress_interval;
|
||||
private int pages_done, pages_total;
|
||||
@@ -24,14 +24,14 @@ class Xomp_prog_mgr {
|
||||
private final Bry_bfr prog_bfr = Bry_bfr_.New();
|
||||
public void Init(int pages_total, int progress_interval) {
|
||||
this.progress_interval = progress_interval;
|
||||
this.time_bgn = this.time_prv = gplx.core.envs.Env_.TickCount();
|
||||
this.time_bgn = this.time_prv = gplx.core.envs.System_.Ticks();
|
||||
this.pages_total = pages_total;
|
||||
}
|
||||
public void Mark_done(int id) {
|
||||
synchronized (thread_lock) {
|
||||
pages_done += 1;
|
||||
if (pages_done % progress_interval == 0) {
|
||||
long time_cur = gplx.core.envs.Env_.TickCount();
|
||||
long time_cur = gplx.core.envs.System_.Ticks();
|
||||
int pages_left = pages_total - pages_done;
|
||||
time_done += (time_cur - time_prv);
|
||||
double rate_cur = pages_done / (time_done / Time_span_.Ratio_f_to_s);
|
||||
@@ -15,9 +15,9 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.dbs.*; import gplx.core.threads.utils.*;
|
||||
class Xomp_load_wkr implements Gfo_invk {
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.pools; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.dbs.*; import gplx.core.threads.utils.*; import gplx.xowa.addons.bldrs.mass_parses.parses.utls.*;
|
||||
public class Xomp_load_wkr implements Gfo_invk {
|
||||
private final Object thread_lock = new Object();
|
||||
private final Xow_wiki wiki;
|
||||
private final Db_conn mgr_conn;
|
||||
@@ -34,7 +34,7 @@ class Xomp_load_wkr implements Gfo_invk {
|
||||
this.attach_mgr = new Db_attach_mgr(mgr_conn);
|
||||
this.queue = new Gfo_blocking_queue(num_pages_in_pool);
|
||||
this.num_wkrs = num_wkrs;
|
||||
this.time_bgn = this.time_prv = gplx.core.envs.Env_.TickCount();
|
||||
this.time_bgn = this.time_prv = gplx.core.envs.System_.Ticks();
|
||||
this.pages_total = this.Get_pending_count();
|
||||
}
|
||||
public int Get_pending_count() {
|
||||
@@ -103,7 +103,7 @@ class Xomp_load_wkr implements Gfo_invk {
|
||||
synchronized (thread_lock) {
|
||||
pages_done += 1;
|
||||
if (pages_done % 1000 == 0) {
|
||||
long time_cur = gplx.core.envs.Env_.TickCount();
|
||||
long time_cur = gplx.core.envs.System_.Ticks();
|
||||
int pages_left = pages_total - pages_done;
|
||||
time_done += (time_cur - time_prv);
|
||||
double rate_cur = pages_done / (time_done / Time_span_.Ratio_f_to_s);
|
||||
@@ -15,9 +15,9 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.pools; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.xowa.wikis.data.tbls.*;
|
||||
class Xomp_page_itm implements Xowd_text_bry_owner {
|
||||
public class Xomp_page_itm implements Xowd_text_bry_owner {
|
||||
public Xomp_page_itm(int id) {this.id = id;}
|
||||
public int Id() {return id;} private final int id;
|
||||
public int Ns_id() {return ns_id;} private int ns_id;
|
||||
@@ -15,9 +15,9 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.dbs.*;
|
||||
class Xomp_page_pool {
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.pools; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
|
||||
public class Xomp_page_pool {
|
||||
private final Object thread_lock = new Object();
|
||||
private final Xomp_page_pool_loader loader;
|
||||
private final int num_pages_per_wkr;
|
||||
@@ -26,15 +26,15 @@ class Xomp_page_pool {
|
||||
this.loader = loader; this.num_pages_per_wkr = num_pages_per_wkr;
|
||||
}
|
||||
public boolean Empty() {synchronized (thread_lock) {return empty;}} private boolean empty = false;
|
||||
public void Get_next(List_adp wkr_list) {
|
||||
synchronized (thread_lock) {
|
||||
// pool already marked exhausted by another wkr; return;
|
||||
public void Get_next(Xomp_mgr_db mgr_db, String machine_name, List_adp wkr_list) {
|
||||
synchronized (thread_lock) { // LOCK:shared by multiple wkrs
|
||||
// all pages read; "empty" marked done by another wkr; return;
|
||||
if (empty) return;
|
||||
int wkr_end = pool_idx + num_pages_per_wkr;
|
||||
|
||||
// need pages to fulfill request
|
||||
if (wkr_end > pool_len) {
|
||||
this.pool = loader.Load(pool, pool_idx, pool_len);
|
||||
this.pool = loader.Load(mgr_db, machine_name, pool, pool_idx, pool_len);
|
||||
this.pool_idx = 0;
|
||||
this.pool_len = pool.Len();
|
||||
if (pool_len == 0) { // no more pages; return;
|
||||
@@ -15,13 +15,13 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
import gplx.dbs.*;
|
||||
class Xomp_page_pool_loader {
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.pools; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
|
||||
import gplx.xowa.addons.bldrs.mass_parses.parses.wkrs.*; import gplx.xowa.addons.bldrs.mass_parses.parses.utls.*; import gplx.xowa.addons.bldrs.mass_parses.parses.locks.*;
|
||||
public class Xomp_page_pool_loader {
|
||||
private final Xow_wiki wiki;
|
||||
private final int num_pages_per_load;
|
||||
private final Db_attach_mgr attach_mgr;
|
||||
private int prv_uid = -1;
|
||||
public Xomp_page_pool_loader(Xow_wiki wiki, Db_conn make_conn, int num_pages_per_load) {
|
||||
this.wiki = wiki;
|
||||
this.make_conn = make_conn;
|
||||
@@ -35,18 +35,25 @@ class Xomp_page_pool_loader {
|
||||
return rdr.Move_next() ? rdr.Read_int("Count_of") : 0;
|
||||
} finally {rdr.Rls();}
|
||||
}
|
||||
public List_adp Load(List_adp list, int list_idx, int list_len) {
|
||||
public List_adp Load(Xomp_mgr_db mgr_db, String machine_name, List_adp list, int list_idx, int list_len) {
|
||||
List_adp rv = List_adp_.New();
|
||||
|
||||
// add remaining pages from old pool to new_pool;
|
||||
for (int i = list_idx; i < list_len; ++i) {
|
||||
rv.Add((Xomp_page_itm)list.Get_at(i));
|
||||
}
|
||||
|
||||
// load pages into new pool
|
||||
this.Load_from_db(rv);
|
||||
Xomp_lock_mgr lock_mgr = mgr_db.Lock_mgr();
|
||||
int uid_db = lock_mgr.Uid_prv__get(machine_name);
|
||||
if (uid_db == Xomp_lock_mgr__fsys.Uid__eos) return rv; // assert that uids must be incrementally larger; handle one machine reaching end, and putting -1 in queue;
|
||||
int uid_new = 0;
|
||||
try {uid_new = this.Load_from_db(rv, uid_db);}
|
||||
finally {lock_mgr.Uid_prv__rls(machine_name, uid_new);}
|
||||
Gfo_usr_dlg_.Instance.Note_many("", "", "fetched new pool: old=~{0} new=~{1}", uid_db, uid_new);
|
||||
return rv;
|
||||
}
|
||||
private void Load_from_db(List_adp list) {
|
||||
private int Load_from_db(List_adp list, int uid_prv) {
|
||||
// prepare for page_tbl
|
||||
String sql = String_.Format(String_.Concat_lines_nl_skip_last // ANSI.Y
|
||||
( "SELECT mp.xomp_uid"
|
||||
@@ -59,17 +66,18 @@ class Xomp_page_pool_loader {
|
||||
, "WHERE mp.xomp_uid > {0}"
|
||||
, "AND mp.page_status = 0"
|
||||
, "LIMIT {1}"
|
||||
), prv_uid, num_pages_per_load);
|
||||
), uid_prv, num_pages_per_load);
|
||||
this.attach_mgr.Conn_links_(new Db_attach_itm("page_db", wiki.Data__core_mgr().Db__core().Conn()));
|
||||
sql = attach_mgr.Resolve_sql(sql);
|
||||
|
||||
// run page_tbl
|
||||
int rv = -1;
|
||||
Xomp_text_db_loader text_db_loader = new Xomp_text_db_loader(wiki);
|
||||
attach_mgr.Attach();
|
||||
Db_rdr rdr = make_conn.Stmt_sql(sql).Exec_select__rls_auto();
|
||||
try {
|
||||
while (rdr.Move_next()) {
|
||||
prv_uid = rdr.Read_int("xomp_uid");
|
||||
rv = rdr.Read_int("xomp_uid");
|
||||
int text_db_id = rdr.Read_int("page_text_db_id");
|
||||
Xomp_page_itm ppg = new Xomp_page_itm(rdr.Read_int("page_id"));
|
||||
ppg.Init_by_page
|
||||
@@ -84,5 +92,6 @@ class Xomp_page_pool_loader {
|
||||
attach_mgr.Detach();
|
||||
|
||||
text_db_loader.Load();
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
@@ -15,40 +15,36 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.parsers.*; import gplx.xowa.parsers.lnkis.*; import gplx.xowa.parsers.lnkis.files.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.addons.bldrs.files.cmds.*; import gplx.xowa.addons.bldrs.files.dbs.*;
|
||||
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.domains.*;
|
||||
public class Xomp_file_logger implements Xop_file_logger {
|
||||
public class Xomp_lnki_temp_wkr implements Xop_file_logger {
|
||||
private final Xob_lnki_temp_tbl tbl;
|
||||
private final Xowe_wiki commons_wiki;
|
||||
private boolean ns_file_is_case_match_all = true;
|
||||
public Xomp_file_logger(Xowe_wiki wiki, Db_conn wkr_conn) {
|
||||
public Xomp_lnki_temp_wkr(Xowe_wiki wiki, Db_conn wkr_conn) {
|
||||
this.tbl = new Xob_lnki_temp_tbl(wkr_conn); wkr_conn.Meta_tbl_assert(tbl);
|
||||
this.commons_wiki = wiki.Appe().Wiki_mgr().Get_by_or_make(Xow_domain_itm_.Bry__commons);
|
||||
this.ns_file_is_case_match_all = wiki.Init_assert().Ns_mgr().Ns_file().Case_match() == Xow_ns_case_.Tid__all; // NOTE: wiki must be init'd;
|
||||
}
|
||||
public void Bgn() {
|
||||
tbl.Insert_bgn();
|
||||
tbl.Insert_stmt_make();
|
||||
}
|
||||
public void Log_file(Xop_ctx ctx, Xop_lnki_tkn lnki, byte caller_tid) {
|
||||
if (lnki.Ttl().ForceLiteralLink()) return; // ignore literal links which create a link to file, but do not show the image; EX: [[:File:A.png|thumb|120px]] creates a link to File:A.png, regardless of other display-oriented args
|
||||
|
||||
// get caller_tid / tttl
|
||||
if (lnki.Ns_id() == Xow_ns_.Tid__media) caller_tid = Xop_file_logger_.Tid__media;
|
||||
|
||||
// get lnki_data
|
||||
byte[] ttl = lnki.Ttl().Page_db();
|
||||
Xof_ext ext = Xof_ext_.new_by_ttl_(ttl);
|
||||
byte[] ttl_commons = Xomp_file_logger.To_commons_ttl(ns_file_is_case_match_all, commons_wiki, ttl);
|
||||
byte[] ttl_commons = Xomp_lnki_temp_wkr.To_commons_ttl(ns_file_is_case_match_all, commons_wiki, ttl);
|
||||
if (lnki.Ns_id() == Xow_ns_.Tid__media) caller_tid = Xop_file_logger_.Tid__media;
|
||||
|
||||
// do insert
|
||||
tbl.Insert_cmd_by_batch(ctx.Page().Bldr__ns_ord(), ctx.Page().Db().Page().Id(), ttl, ttl_commons, Byte_.By_int(ext.Id()), lnki.Lnki_type(), caller_tid, lnki.W(), lnki.H(), lnki.Upright(), lnki.Time(), lnki.Page());
|
||||
}
|
||||
public void End() {
|
||||
tbl.Insert_end();
|
||||
}
|
||||
public void End() {}
|
||||
public static byte[] To_commons_ttl(boolean ns_file_is_case_match_all, Xowe_wiki commons_wiki, byte[] ttl_bry) { // handle case-sensitive wikis (en.d) vs case-insensitive commons
|
||||
if (!ns_file_is_case_match_all) return null; // return "" if wiki matches common
|
||||
Xoa_ttl ttl = Xoa_ttl.Parse(commons_wiki, Xow_ns_.Tid__file, ttl_bry);
|
||||
@@ -15,11 +15,11 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.core.ios.*;
|
||||
import gplx.xowa.wikis.data.tbls.*;
|
||||
class Xomp_text_db_loader {
|
||||
public class Xomp_text_db_loader {
|
||||
private final Xow_wiki wiki;
|
||||
private final Ordered_hash text_db_hash = Ordered_hash_.New();
|
||||
private final Io_stream_zip_mgr zip_mgr;
|
||||
@@ -15,10 +15,10 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.wikis.caches.*;
|
||||
class Xomp_tmpl_cache_bldr {
|
||||
public class Xomp_tmpl_cache_bldr {
|
||||
public static Xow_page_cache New(Xowe_wiki wiki, boolean fill_all) {
|
||||
Xow_page_cache rv = new Xow_page_cache(wiki);
|
||||
if (fill_all) Fill_all(rv, wiki);
|
||||
@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.wikis.nss.*; import gplx.xowa.htmls.core.bldrs.*; import gplx.xowa.htmls.core.dbs.*;
|
||||
class Xob_hdump_tbl_retriever__xomp implements Xob_hdump_tbl_retriever {
|
||||
@@ -15,33 +15,48 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
|
||||
package gplx.xowa.addons.bldrs.mass_parses.parses.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
|
||||
import gplx.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
|
||||
import gplx.xowa.files.origs.*;
|
||||
import gplx.xowa.htmls.core.bldrs.*;
|
||||
import gplx.xowa.parsers.*;
|
||||
class Xomp_parse_wkr implements Gfo_invk {
|
||||
private final Xomp_parse_mgr mgr;
|
||||
import gplx.xowa.parsers.*;
|
||||
import gplx.xowa.addons.bldrs.mass_parses.parses.mgrs.*; import gplx.xowa.addons.bldrs.mass_parses.parses.utls.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.pools.*;
|
||||
public class Xomp_parse_wkr implements Gfo_invk {
|
||||
// mgr vars
|
||||
private final Xomp_parse_mgr mgr;
|
||||
private final Xomp_mgr_db mgr_db;
|
||||
private final Xomp_prog_mgr prog_mgr;
|
||||
private final Xomp_page_pool page_pool;
|
||||
private final Xof_orig_wkr orig_wkr;
|
||||
private final int idx;
|
||||
private final List_adp list = List_adp_.New(); private int list_idx = 0, list_len = 0;
|
||||
private final Xof_orig_wkr file_orig_wkr;
|
||||
|
||||
// cfg vars
|
||||
private final Xomp_parse_mgr_cfg cfg;
|
||||
private int done_count; private long done_time;
|
||||
private Xomp_wkr_db wkr_db; private int cleanup_interval, commit_interval;
|
||||
private int cleanup_interval, commit_interval;
|
||||
private boolean log_file_lnkis;
|
||||
public Xomp_parse_wkr(Xomp_parse_mgr mgr, Xowe_wiki wiki, Xof_orig_wkr orig_wkr, Xomp_page_pool page_pool, int idx, Xomp_parse_mgr_cfg cfg, int cleanup_interval, int commit_interval, boolean log_file_lnkis) {
|
||||
this.mgr = mgr; this.wiki = wiki; this.orig_wkr = orig_wkr;
|
||||
this.page_pool = page_pool;
|
||||
this.idx = idx;
|
||||
this.wkr_db = mgr.Db_core().Wkr_db(Bool_.Y, idx); // NOTE: must go in ctor, or else thread issues
|
||||
|
||||
// wkr vars
|
||||
private final Xowe_wiki wiki;
|
||||
private final Xob_hdump_bldr hdump_bldr = new Xob_hdump_bldr();
|
||||
private final int uid;
|
||||
private Xomp_wkr_db wkr_db;
|
||||
|
||||
private final List_adp list = List_adp_.New(); private int list_idx = 0, list_len = 0;
|
||||
private int done_count; private long done_time;
|
||||
public Xomp_parse_wkr(Xomp_parse_mgr mgr, Xomp_parse_mgr_cfg cfg, Xomp_mgr_db mgr_db, Xomp_page_pool page_pool, Xomp_prog_mgr prog_mgr, Xof_orig_wkr file_orig_wkr, Xowe_wiki wiki, int uid) {
|
||||
// mgr vars
|
||||
this.mgr = mgr; this.mgr_db = mgr_db;
|
||||
this.page_pool = page_pool; this.prog_mgr = prog_mgr; this.file_orig_wkr = file_orig_wkr;
|
||||
|
||||
// cfg vars
|
||||
this.cfg = cfg;
|
||||
this.cleanup_interval = cleanup_interval;
|
||||
this.commit_interval = commit_interval;
|
||||
this.log_file_lnkis = log_file_lnkis;
|
||||
this.cleanup_interval = cfg.Cleanup_interval();
|
||||
this.commit_interval = cfg.Commit_interval();
|
||||
this.log_file_lnkis = cfg.Log_file_lnkis();
|
||||
|
||||
// wkr-specific vars
|
||||
this.wiki = wiki; this.uid = uid;
|
||||
this.wkr_db = Xomp_wkr_db.New(Xomp_mgr_db.New__url(wiki), uid);
|
||||
}
|
||||
public Xowe_wiki Wiki() {return wiki;} private final Xowe_wiki wiki;
|
||||
public Xob_hdump_bldr Hdump_bldr() {return hdump_bldr;} private final Xob_hdump_bldr hdump_bldr = new Xob_hdump_bldr();
|
||||
public void Exec() {
|
||||
// init
|
||||
Xow_parser_mgr parser_mgr = wiki.Parser_mgr();
|
||||
@@ -49,30 +64,41 @@ class Xomp_parse_wkr implements Gfo_invk {
|
||||
// disable file download
|
||||
wiki.File_mgr().Init_file_mgr_by_load(wiki); // must happen after fsdb.make
|
||||
wiki.File__bin_mgr().Wkrs__del(gplx.xowa.files.bins.Xof_bin_wkr_.Key_http_wmf); // must happen after init_file_mgr_by_load; remove wmf wkr, else will try to download images during parsing
|
||||
wiki.File__orig_mgr().Wkrs__set(orig_wkr);
|
||||
wiki.File__orig_mgr().Wkrs__set(file_orig_wkr);
|
||||
wiki.File_mgr().Fsdb_mode().Tid__v2__mp__y_();
|
||||
|
||||
// disable categories else progress messages written (also for PERF)
|
||||
wiki.Html_mgr().Page_wtr_mgr().Wkr(gplx.xowa.wikis.pages.Xopg_page_.Tid_read).Ctgs_enabled_(false);
|
||||
|
||||
// enable lnki_temp
|
||||
Xomp_file_logger logger = null;
|
||||
Xomp_lnki_temp_wkr logger = null;
|
||||
if (log_file_lnkis) {
|
||||
logger = new Xomp_file_logger(wiki, wkr_db.Conn());
|
||||
logger = new Xomp_lnki_temp_wkr(wiki, wkr_db.Conn());
|
||||
parser_mgr.Ctx().Lnki().File_logger_(logger);
|
||||
logger.Bgn();
|
||||
}
|
||||
|
||||
// enable hdump
|
||||
hdump_bldr.Enabled_(cfg.Hdump_enabled()).Hzip_enabled_(cfg.Hzip_enabled()).Hzip_diff_(cfg.Hzip_enabled()).Zip_tid_(cfg.Zip_tid()).Init(wiki, wkr_db.Conn(), new Xob_hdump_tbl_retriever__xomp(wkr_db.Html_tbl()));
|
||||
hdump_bldr.Enabled_(cfg.Hdump_enabled()).Hzip_enabled_(cfg.Hzip_enabled()).Hzip_diff_(cfg.Hdiff_enabled()).Zip_tid_(cfg.Zip_tid());
|
||||
hdump_bldr.Init(wiki, wkr_db.Conn(), new Xob_hdump_tbl_retriever__xomp(wkr_db.Html_tbl()));
|
||||
wkr_db.Conn().Txn_bgn("xomp");
|
||||
|
||||
// set status to running
|
||||
mgr_db.Wkr_tbl().Update_status(uid, Xomp_wkr_tbl.Status__running);
|
||||
|
||||
// main loop
|
||||
while (true) {
|
||||
Xomp_page_itm ppg = Get_next(); if (ppg == Xomp_page_itm.Null) break; // no more pages
|
||||
// get page from page pool
|
||||
Xomp_page_itm ppg = Get_next();
|
||||
if (ppg == Xomp_page_itm.Null) {
|
||||
mgr_db.Wkr_tbl().Update_status(uid, Xomp_wkr_tbl.Status__sleeping);
|
||||
break; // no more pages
|
||||
}
|
||||
if (ppg.Text() == null) continue; // some pages have no text; ignore them else null ref; PAGE: it.d:miercuri DATE:2015-12-05
|
||||
|
||||
try {
|
||||
// init page
|
||||
long done_bgn = gplx.core.envs.Env_.TickCount();
|
||||
long done_bgn = gplx.core.envs.System_.Ticks();
|
||||
Xoa_ttl ttl = wiki.Ttl_parse(ppg.Ns_id(), ppg.Ttl_bry());
|
||||
Xoae_page wpg = Xoae_page.New(wiki, ttl);
|
||||
wpg.Db().Text().Text_bry_(ppg.Text());
|
||||
@@ -87,10 +113,10 @@ class Xomp_parse_wkr implements Gfo_invk {
|
||||
hdump_bldr.Insert(pctx, wpg);
|
||||
|
||||
// mark done for sake of progress
|
||||
mgr.Prog_mgr().Mark_done(ppg.Id());
|
||||
prog_mgr.Mark_done(ppg.Id());
|
||||
|
||||
// update stats
|
||||
long time_cur = gplx.core.envs.Env_.TickCount();
|
||||
long time_cur = gplx.core.envs.System_.Ticks();
|
||||
done_time += time_cur - done_bgn;
|
||||
done_bgn = time_cur;
|
||||
++done_count;
|
||||
@@ -111,18 +137,15 @@ class Xomp_parse_wkr implements Gfo_invk {
|
||||
}
|
||||
}
|
||||
|
||||
if (logger != null)
|
||||
logger.End();
|
||||
|
||||
// cleanup
|
||||
wkr_db.Conn().Txn_end(); // NOTE: must end txn before running update wkr_id
|
||||
mgr.Db_core().Update_wkr_id(idx, wkr_db.Conn());
|
||||
mgr.Wkrs_done_add_1();
|
||||
if (logger != null) logger.End();
|
||||
wkr_db.Conn().Txn_end();
|
||||
wkr_db.Conn().Rls_conn();
|
||||
mgr.Wkrs_done_add_1(); // NOTE: must release latch last else thread errors
|
||||
}
|
||||
public void Bld_stats(Bry_bfr bfr) {
|
||||
int done_time_in_sec = (int)(done_time / 1000); if (done_time_in_sec == 0) done_time_in_sec = 1;
|
||||
bfr.Add_int_pad_bgn(Byte_ascii.Space, 4, idx );
|
||||
bfr.Add_int_pad_bgn(Byte_ascii.Space, 4, uid );
|
||||
bfr.Add_int_pad_bgn(Byte_ascii.Space, 8, (int)(done_count / done_time_in_sec));
|
||||
bfr.Add_int_pad_bgn(Byte_ascii.Space, 8, done_count);
|
||||
bfr.Add_int_pad_bgn(Byte_ascii.Space, 8, done_time_in_sec);
|
||||
@@ -130,8 +153,9 @@ class Xomp_parse_wkr implements Gfo_invk {
|
||||
}
|
||||
private Xomp_page_itm Get_next() {
|
||||
if (list_idx == list_len) {
|
||||
mgr_db.Wkr_tbl().Update_exec(uid, done_count, done_time);
|
||||
list.Clear();
|
||||
page_pool.Get_next(list);
|
||||
page_pool.Get_next(mgr_db, cfg.Wkr_machine_name(), list);
|
||||
list_len = list.Len();
|
||||
if (list_len == 0) return Xomp_page_itm.Null;
|
||||
list_idx = 0;
|
||||
@@ -32,9 +32,15 @@ public class Imglnk_reg_tbl implements Db_tbl {
|
||||
}
|
||||
public Db_conn Conn() {return conn;}
|
||||
public String Tbl_name() {return tbl_name;}
|
||||
public String Fld__img_src() {return fld__img_src;}
|
||||
public void Create_tbl() {conn.Meta_tbl_create(Dbmeta_tbl_itm.New(tbl_name, flds));}
|
||||
public void Create_idx__src_ttl() {conn.Meta_idx_create(tbl_name, fld__img_src, fld__img_src, fld__img_repo);}
|
||||
public void Create_idx__trg_ttl() {conn.Meta_idx_create(tbl_name, fld__img_trg, fld__img_trg, fld__img_repo);}
|
||||
public Db_stmt Select_by_ttl_stmt() {
|
||||
if (select_by_ttl_stmt == null)
|
||||
select_by_ttl_stmt = conn.Stmt_select(tbl_name, flds, fld__img_src);
|
||||
return select_by_ttl_stmt;
|
||||
} private Db_stmt select_by_ttl_stmt;
|
||||
public void Insert(Db_conn conn, byte repo_id, Xowe_wiki wiki) {
|
||||
String repo_id_str = Byte_.To_str(repo_id);
|
||||
Db_attach_mgr attach_mgr = new Db_attach_mgr(conn);
|
||||
@@ -53,17 +59,20 @@ public class Imglnk_reg_tbl implements Db_tbl {
|
||||
);
|
||||
attach_mgr.Exec_sql_w_msg("imglnk_reg.insert.redirect: repo=" + repo_id_str, sql);
|
||||
|
||||
attach_mgr.Conn_links_(new Db_attach_itm("page_db", wiki.Data__core_mgr().Db__core().Tbl__page().Conn()));
|
||||
Xob_db_file image_db = Xob_db_file.New__wiki_image(wiki.Fsys_mgr().Root_dir());
|
||||
attach_mgr.Conn_links_(new Db_attach_itm("image_db", image_db.Conn()));
|
||||
sql = String_.Concat_lines_nl_skip_last // ANSI.Y
|
||||
( "INSERT INTO imglnk_reg (img_src, img_trg, img_repo, img_count)"
|
||||
, "SELECT ilt.img_name, ilt.img_name, " + repo_id_str + ", Count(ilt.img_name)"
|
||||
, "FROM imglnk_tmp ilt"
|
||||
, " JOIN <page_db>page p ON p.page_namespace = 6 AND p.page_title = ilt.img_name"
|
||||
, " JOIN <image_db>image i ON i.img_name = ilt.img_name"
|
||||
, " LEFT JOIN imglnk_reg il ON il.img_src = ilt.img_name"
|
||||
, "WHERE il.img_src IS NULL"
|
||||
, "GROUP BY ilt.img_name"
|
||||
);
|
||||
attach_mgr.Exec_sql_w_msg("imglnk_reg.insert.direct: repo=" + repo_id_str, sql);
|
||||
}
|
||||
public void Rls() {}
|
||||
public void Rls() {
|
||||
select_by_ttl_stmt = Db_stmt_.Rls(select_by_ttl_stmt);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,17 +16,61 @@ You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.wmdumps.imglinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.wmdumps.*;
|
||||
import gplx.xowa.files.origs.*;
|
||||
import gplx.dbs.*; import gplx.xowa.bldrs.*;
|
||||
import gplx.xowa.files.origs.*; import gplx.xowa.files.repos.*;
|
||||
public class Xof_orig_wkr__img_links implements Xof_orig_wkr {
|
||||
private final Ordered_hash hash = Ordered_hash_.New_bry();
|
||||
private final Hash_adp_bry hash = Hash_adp_bry.cs();
|
||||
private Db_conn imglnk_conn;
|
||||
public Xof_orig_wkr__img_links(Xowe_wiki wiki) {
|
||||
this.wiki = wiki;
|
||||
}
|
||||
public byte Tid() {return Xof_orig_wkr_.Tid_xowa_img_links;}
|
||||
public Xof_orig_itm Find_as_itm(byte[] ttl, int list_idx, int list_len) {return (Xof_orig_itm)hash.Get_by(ttl);}
|
||||
public Xof_orig_itm Find_as_itm(byte[] ttl, int list_idx, int list_len) {
|
||||
Xof_orig_itm rv = (Xof_orig_itm)hash.Get_by(ttl);
|
||||
if (rv == Missing) return Xof_orig_itm.Null;
|
||||
else if (rv == null) rv = Load_from_db(ttl);
|
||||
return rv == Missing ? Xof_orig_itm.Null : rv;
|
||||
}
|
||||
public void Find_by_list(Ordered_hash rv, List_adp itms) {throw Err_.new_unimplemented();}
|
||||
public boolean Add_orig(byte repo, byte[] page, int ext_id, int w, int h, byte[] redirect) {return false;}
|
||||
public void Db_txn_save() {}
|
||||
public void Db_rls() {}
|
||||
|
||||
public Xowe_wiki Wiki() {return wiki;} private final Xowe_wiki wiki;
|
||||
public Imglnk_reg_tbl Tbl__imglnk_reg() {
|
||||
if (tbl__imglnk_reg == null)
|
||||
this.tbl__imglnk_reg = new Imglnk_reg_tbl(imglnk_conn);
|
||||
return tbl__imglnk_reg;
|
||||
} private Imglnk_reg_tbl tbl__imglnk_reg;
|
||||
public Db_stmt Stmt__image__select(byte repo, Xowe_wiki wiki) {
|
||||
Db_stmt rv = stmt__image__select[repo];
|
||||
if (rv == null) {
|
||||
rv = Make__stmt__image__select(repo, wiki);
|
||||
stmt__image__select[repo] = rv;
|
||||
}
|
||||
return rv;
|
||||
} private Db_stmt[] stmt__image__select = new Db_stmt[2];
|
||||
private Db_stmt Make__stmt__image__select(byte repo, Xowe_wiki wiki) {
|
||||
Xob_db_file image_db = Xob_db_file.New__wiki_image(wiki.Fsys_mgr().Root_dir());
|
||||
return image_db.Conn().Stmt_select
|
||||
( "image"
|
||||
, String_.Ary("img_media_type", "img_minor_mime", "img_size", "img_width", "img_height", "img_bits", "img_ext_id", "img_timestamp")
|
||||
, String_.Ary("img_name")
|
||||
);
|
||||
}
|
||||
public void Add_by_db(Xof_orig_itm itm) {
|
||||
hash.Add(itm.Ttl(), itm);
|
||||
}
|
||||
private Xof_orig_itm Load_from_db(byte[] ttl) {
|
||||
if (imglnk_conn == null)
|
||||
imglnk_conn = Xob_db_file.New__img_link(wiki).Conn();
|
||||
Xof_orig_itm rv = Xof_orig_wkr__img_links_.Load_itm(this, imglnk_conn, wiki, ttl);
|
||||
if (rv == Xof_orig_itm.Null)
|
||||
rv = Missing;
|
||||
synchronized (hash) { // LOCK:used by multiple threads in xomp
|
||||
hash.Add(ttl, rv);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private static final Xof_orig_itm Missing = new Xof_orig_itm(Byte_.Max_value_127, Bry_.Empty, -1, -1, -1, Bry_.Empty);
|
||||
}
|
||||
|
||||
@@ -20,16 +20,46 @@ import gplx.dbs.*;
|
||||
import gplx.xowa.bldrs.*;
|
||||
import gplx.xowa.files.repos.*; import gplx.xowa.files.origs.*;
|
||||
public class Xof_orig_wkr__img_links_ {
|
||||
public static Xof_orig_wkr__img_links Load_all(Xowe_wiki wiki) {
|
||||
Xof_orig_wkr__img_links rv = new Xof_orig_wkr__img_links();
|
||||
|
||||
public static void Load_all(Xof_orig_wkr__img_links wkr) {
|
||||
Xowe_wiki wiki = wkr.Wiki();
|
||||
Db_conn conn = Xob_db_file.New__img_link(wiki).Conn();
|
||||
Load_by_wiki(rv, conn, Xof_repo_itm_.Repo_local , wiki);
|
||||
Load_by_wiki(rv, conn, Xof_repo_itm_.Repo_remote, wiki.Appe().Wiki_mgr().Wiki_commons());
|
||||
|
||||
return rv;
|
||||
Load_all_by_wiki(wkr, conn, Xof_repo_itm_.Repo_local , wiki);
|
||||
Load_all_by_wiki(wkr, conn, Xof_repo_itm_.Repo_remote, wiki.Appe().Wiki_mgr().Wiki_commons());
|
||||
}
|
||||
private static void Load_by_wiki(Xof_orig_wkr__img_links rv, Db_conn conn, byte repo_id, Xowe_wiki wiki) {
|
||||
public static Xof_orig_itm Load_itm(Xof_orig_wkr__img_links wkr, Db_conn conn, Xowe_wiki wiki, byte[] ttl) {
|
||||
Imglnk_reg_tbl imglnk_reg_tbl = wkr.Tbl__imglnk_reg();
|
||||
Db_rdr rdr = imglnk_reg_tbl.Select_by_ttl_stmt().Clear().Crt_bry_as_str("img_src", ttl).Exec_select__rls_manual();
|
||||
byte img_repo = Byte_.Max_value_127;
|
||||
byte[] img_trg = null;
|
||||
try {
|
||||
if (rdr.Move_next()) {
|
||||
img_repo = rdr.Read_byte("img_repo");
|
||||
img_trg = rdr.Read_bry_by_str("img_trg");
|
||||
}
|
||||
else // ttl missing; EX:</*_File:Chehov_v_serpuhove11.JPG; DATE:2016-08-10
|
||||
return Xof_orig_itm.Null;
|
||||
} finally {rdr.Rls();}
|
||||
Xowe_wiki image_wiki = img_repo == Xof_repo_itm_.Repo_local ? wiki : wiki.Appe().Wiki_mgr().Wiki_commons();
|
||||
return Load_itm_by_wiki(wkr, conn, image_wiki, img_repo, ttl, img_trg);
|
||||
}
|
||||
private static Xof_orig_itm Load_itm_by_wiki(Xof_orig_wkr__img_links wkr, Db_conn conn, Xowe_wiki wiki, byte repo_id, byte[] img_src, byte[] img_trg) {
|
||||
Db_stmt stmt = wkr.Stmt__image__select(repo_id, wiki);
|
||||
Db_rdr rdr = stmt.Clear().Crt_bry_as_str("img_name", img_trg).Exec_select__rls_manual();
|
||||
try {
|
||||
return rdr.Move_next()
|
||||
? new Xof_orig_itm
|
||||
( repo_id
|
||||
, img_trg
|
||||
, rdr.Read_int("img_ext_id")
|
||||
, rdr.Read_int("img_width")
|
||||
, rdr.Read_int("img_height")
|
||||
, img_src
|
||||
)
|
||||
: Xof_orig_itm.Null;
|
||||
} finally {rdr.Rls();}
|
||||
}
|
||||
|
||||
private static void Load_all_by_wiki(Xof_orig_wkr__img_links rv, Db_conn conn, byte repo_id, Xowe_wiki wiki) {
|
||||
String sql = String_.Concat_lines_nl_skip_last // ANSI.Y
|
||||
( "SELECT ilr.img_repo, ilr.img_src, i.img_media_type, i.img_minor_mime, i.img_size, i.img_width, i.img_height, i.img_bits, i.img_ext_id, i.img_timestamp, ilr.img_trg AS img_redirect"
|
||||
, "FROM imglnk_reg ilr"
|
||||
|
||||
Reference in New Issue
Block a user