1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

'v3.7.3.1'

This commit is contained in:
gnosygnu
2016-07-17 21:10:59 -04:00
parent b333db45f8
commit 7a851a41a5
290 changed files with 3048 additions and 2124 deletions

View File

@@ -18,8 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.addons.bldrs.files; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*;
import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.addons.bldrs.files.cmds.*;
import gplx.xowa.addons.bldrs.mass_parses.inits.*;
import gplx.xowa.addons.bldrs.mass_parses.makes.*;
import gplx.xowa.addons.bldrs.mass_parses.inits.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*; import gplx.xowa.addons.bldrs.mass_parses.makes.*;
public class Xoax_builds_files_addon implements Xoax_addon_itm, Xoax_addon_itm__bldr {
public Xob_cmd[] Bldr_cmds() {
return new Xob_cmd[]
@@ -43,6 +42,7 @@ public class Xoax_builds_files_addon implements Xoax_addon_itm, Xoax_addon_itm__
, Xomp_init_cmd.Prototype
, Xomp_parse_cmd.Prototype
, Xomp_make_cmd.Prototype
};
}

View File

@@ -111,11 +111,11 @@ public class Xobldr__lnki_temp__create extends Xob_dump_mgr_base implements gplx
parser.Parse_text_to_defn_obj(ctx, ctx.Tkn_mkr(), wiki.Ns_mgr().Ns_template(), ttl_bry, page_src);
else {
parser.Parse_page_all_clear(root, ctx, ctx.Tkn_mkr(), page_src);
if ( gen_html
if ( gen_html
&& page.Redirect().Itms__len() == 0) // don't generate html for redirected pages
wiki.Html_mgr().Page_wtr_mgr().Gen(ctx.Page().Root_(root), Xopg_page_.Tid_read);
if (gen_hdump)
hdump_bldr.Insert(page.Root_(root));
hdump_bldr.Insert(ctx, page.Root_(root));
root.Clear();
}
}

View File

@@ -0,0 +1,59 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
public class Xomp_db_core {
private final Object thread_lock = new Object();
private final Io_url root_dir;
Xomp_db_core(Io_url root_dir) {
this.root_dir = root_dir;
Io_url mgr_url = root_dir.GenSubFil("xomp.sqlite3");
this.mgr_db = new Xomp_mgr_db(mgr_url);
}
public Xomp_mgr_db Mgr_db() {return mgr_db;} private Xomp_mgr_db mgr_db;
public Xomp_wkr_db Wkr_db(boolean delete, int idx) {
Io_url wkr_url = root_dir.GenSubFil_nest("xomp_" + Int_.To_str_fmt(idx, "000"), "xomp_wkr.sqlite3");
if (delete) Io_mgr.Instance.DeleteFil(wkr_url);
return new Xomp_wkr_db(idx, wkr_url);
}
public int Wkr_count() {
Io_url[] wkr_dirs = Io_mgr.Instance.QueryDir_args(root_dir).DirOnly_().ExecAsUrlAry();
return wkr_dirs.length;
}
public void Update_wkr_id(int idx, Db_conn wkr_conn) {
synchronized (thread_lock) {
Db_attach_mgr attach_mgr = new Db_attach_mgr(mgr_db.Conn(), new Db_attach_itm("wkr_db", wkr_conn));
attach_mgr.Exec_sql_w_msg("updating page_regy: wkr_id=" + idx, String_.Concat_lines_nl_skip_last // ANSI.Y
( "UPDATE xomp_page"
, "SET xomp_wkr_id = " + Int_.To_str(idx)
, ", html_len = (SELECT length(body) FROM <wkr_db>html h WHERE h.page_id = xomp_page.page_id)"
, "WHERE page_id IN (SELECT page_id FROM <wkr_db>html h)"
));
}
}
public static Xomp_db_core New__make(Xowe_wiki wiki) {
Io_url root_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp");
Io_mgr.Instance.DeleteDirDeep(root_dir);
return new Xomp_db_core(root_dir);
}
public static Xomp_db_core New__load(Xowe_wiki wiki) {
Io_url root_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp");
return new Xomp_db_core(root_dir);
}
}

View File

@@ -0,0 +1,29 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
public class Xomp_mgr_db {
public Xomp_mgr_db(Io_url url) {
this.url = url;
this.conn = Db_conn_bldr.Instance.Get_or_autocreate(true, url);
this.page_tbl = new Xomp_page_tbl(conn);
}
public Io_url Url() {return url;} private Io_url url;
public Db_conn Conn() {return conn;} private Db_conn conn;
public Xomp_page_tbl Page_tbl() {return page_tbl;} private Xomp_page_tbl page_tbl;
}

View File

@@ -0,0 +1,39 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
public class Xomp_page_tbl implements Db_tbl {
// private final String fld_page_id, fld_page_status, fld_page_mgr_id;
private final Db_conn conn;
public Xomp_page_tbl(Db_conn conn) {
this.conn = conn;
this.tbl_name = "xomp_page";
flds.Add_int_pkey("page_id");
flds.Add_int("page_ns");
flds.Add_byte("page_status"); // 0=wait; 1=done; 2=fail
flds.Add_int_dflt("html_len", -1);
flds.Add_int_dflt("xomp_wkr_id", -1);
conn.Rls_reg(this);
}
public String Tbl_name() {return tbl_name;} private final String tbl_name;
public Dbmeta_fld_list Flds() {return flds;} private final Dbmeta_fld_list flds = new Dbmeta_fld_list();
public void Create_tbl() {
conn.Meta_tbl_create(Dbmeta_tbl_itm.New(tbl_name, flds));
}
public void Rls() {}
}

View File

@@ -0,0 +1,33 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
import gplx.xowa.htmls.core.dbs.*;
public class Xomp_wkr_db {
public Xomp_wkr_db(int idx, Io_url url) {
this.idx = idx;
this.url = url;
this.conn = Db_conn_bldr.Instance.Get_or_autocreate(true, url);
this.html_tbl = new Xowd_html_tbl(conn);
conn.Meta_tbl_assert(html_tbl);
}
public int Idx() {return idx;} private final int idx;
public Io_url Url() {return url;} private Io_url url;
public Db_conn Conn() {return conn;} private Db_conn conn;
public Xowd_html_tbl Html_tbl() {return html_tbl;} private final Xowd_html_tbl html_tbl;
}

View File

@@ -18,35 +18,33 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.addons.bldrs.mass_parses.inits; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
import gplx.xowa.bldrs.*;
import gplx.xowa.addons.bldrs.mass_parses.parses.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
class Xomp_init_mgr {
private final Xow_wiki wiki;
public Xomp_init_mgr(Xow_wiki wiki) {this.wiki = wiki;}
private final Xowe_wiki wiki;
public Xomp_init_mgr(Xowe_wiki wiki) {this.wiki = wiki;}
public void Exec() {
Xob_db_file make_db = Xob_db_file.New__file_make(wiki.Fsys_mgr().Root_dir());
Db_conn conn = make_db.Conn();
// init
Xomp_db_core db_core = Xomp_db_core.New__make(wiki);
Xomp_page_tbl page_tbl = db_core.Mgr_db().Page_tbl();
// make table
conn.Meta_tbl_remake(Dbmeta_tbl_itm.New("mp_page", new Dbmeta_fld_itm[]
{ Dbmeta_fld_itm.new_int("page_id").Primary_y_()
, Dbmeta_fld_itm.new_bool("page_done")
}
, Dbmeta_idx_itm.new_normal_by_tbl("mp_page", "page_id__page_done", "page_id", "page_done")
));
// rebuild table
Db_conn mgr_conn = db_core.Mgr_db().Conn();
mgr_conn.Meta_tbl_remake(page_tbl);
// fill table
Db_attach_mgr attach_mgr = new Db_attach_mgr(conn, new Db_attach_itm("page_db", wiki.Data__core_mgr().Db__core().Conn()));
Db_attach_mgr attach_mgr = new Db_attach_mgr(mgr_conn, new Db_attach_itm("page_db", wiki.Data__core_mgr().Db__core().Conn()));
int[] ns_ary = new int[] {0, 4, 14};
int len = ns_ary.length;
String sql = String_.Concat_lines_nl_skip_last
( "INSERT INTO mp_page (page_id, page_done)"
, "SELECT page_id, 0"
, "FROM <page_db>page"
, "WHERE page_namespace = {0}"
, "AND page_is_redirect = 0"
( "INSERT INTO xomp_page (page_id, page_ns, page_status, html_len, xomp_wkr_id)"
, "SELECT p.page_id, p.page_namespace, 0, 0, 0"
, "FROM <page_db>page p"
, "WHERE p.page_namespace = {0}"
, "AND p.page_is_redirect = 0"
);
for (int i = 0; i < len; ++i) {
int ns_id = ns_ary[i];
attach_mgr.Exec_sql_w_msg("adding rows for mp_page: ns=" + ns_id, sql, ns_id);// ANSI.Y
attach_mgr.Exec_sql_w_msg("adding rows for xomp_page: ns=" + ns_id, sql, ns_id);// ANSI.Y
}
}
}

View File

@@ -0,0 +1,41 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*; import gplx.xowa.htmls.core.dbs.*;
import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
class Xomp_html_db_rdr {
private final Xowd_html_tbl[] src_tbls;
private final Xomp_db_core db;
public Xomp_html_db_rdr(Xowe_wiki wiki) {
this.db = Xomp_db_core.New__load(wiki);
this.src_tbls = new Xowd_html_tbl[db.Wkr_count()];
}
public void Rows__get(Xowd_html_row rv, int wkr_id, int page_id) {
Xowd_html_tbl src_tbl = src_tbls[wkr_id];
if (src_tbl == null) {
Db_conn wkr_conn = db.Wkr_db(Bool_.N, wkr_id).Conn();
src_tbl = new Xowd_html_tbl(wkr_conn);
src_tbls[wkr_id] = src_tbl;
}
src_tbl.Select_as_row(rv, page_id);
}
public void Rls() {
for (Xowd_html_tbl src_tbl : src_tbls)
src_tbl.Conn().Rls_conn();
}
}

View File

@@ -0,0 +1,66 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*; import gplx.xowa.htmls.core.dbs.*; import gplx.xowa.wikis.data.*;
class Xomp_html_db_wtr {
private final long len_max;
private final Xowe_wiki wiki; private final Xow_db_mgr db_mgr;
private long len_cur;
private Xow_db_file html_db; private Xowd_html_tbl html_tbl;
public Xomp_html_db_wtr(Xowe_wiki wiki) {
this.wiki = wiki; this.db_mgr = wiki.Data__core_mgr();
this.len_max = wiki.Appe().Api_root().Bldr().Wiki().Import().Html_db_max();
// delete all existing tbls
if (!db_mgr.Props().Layout_html().Tid_is_all())
wiki.Data__core_mgr().Dbs__delete_by_tid(Xow_db_file_.Tid__html_data);
}
public int Cur_db_id() {return html_db.Id();}
public Xowd_html_tbl Tbls__get_or_new(int ns_id, long html_len) {
long len_new = len_cur + html_len;
if (html_tbl == null || len_new > len_max) {
Commit();
this.html_db = wiki.Data__core_mgr().Dbs__get_by_tid_or_null(Xow_db_file_.Tid__html_data);
if (html_db == null) {
html_db = wiki.Data__core_mgr().Dbs__make_by_tid(Xow_db_file_.Tid__html_data);
html_db.Conn().Txn_bgn("xomp.html_db_wtr");
this.html_tbl = new Xowd_html_tbl(html_db.Conn());
html_tbl.Create_tbl();
}
}
len_cur = len_new;
return html_tbl;
}
public void Rls() {
this.Commit();
}
private void Commit() {
if (html_tbl == null) return;
html_tbl.Conn().Txn_end();
html_tbl.Conn().Rls_conn();
// update page_ids
String sql = String_.Format(String_.Concat_lines_nl_skip_last // ANSI.Y
( "UPDATE page"
, "SET page_html_db_id = {0}"
, "WHERE page_id IN (SELECT page_id FROM <html_db>html h)"
), html_db.Id());
Db_attach_mgr attach_mgr = new Db_attach_mgr(db_mgr.Db__core().Conn(), new Db_attach_itm("html_db", html_db.Conn()));
attach_mgr.Exec_sql_w_msg("updating page_ids: " + Int_.To_str(html_db.Id()), sql);
}
}

View File

@@ -0,0 +1,31 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
public class Xomp_make_cmd extends Xob_cmd__base {
public Xomp_make_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
@Override public void Cmd_run() {
wiki.Init_assert();
Xomp_make_wkr wkr = new Xomp_make_wkr(wiki);
wkr.Exec();
}
@Override public String Cmd_key() {return BLDR_CMD_KEY;} private static final String BLDR_CMD_KEY = "wiki.mass_parse.make";
public static final Xob_cmd Prototype = new Xomp_make_cmd(null, null);
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xomp_make_cmd(bldr, wiki);}
}

View File

@@ -0,0 +1,71 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.core.brys.*;
import gplx.dbs.*; import gplx.xowa.htmls.core.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
class Xomp_make_wkr {
private final Db_conn mgr_conn;
private final Xomp_html_db_wtr html_db_wtr;
private final Xomp_html_db_rdr html_db_rdr;
private final Int_flag_bldr src_body_flag_bldr = Xowd_html_tbl.Make_body_flag_bldr();
public Xomp_make_wkr(Xowe_wiki wiki) {
this.db = Xomp_db_core.New__load(wiki);
this.mgr_conn = db.Mgr_db().Conn();
this.html_db_rdr = new Xomp_html_db_rdr(wiki);
this.html_db_wtr = new Xomp_html_db_wtr(wiki);
}
public Xomp_db_core Db() {return db;} private Xomp_db_core db;
public void Exec() {
Xowd_html_row src_row = new Xowd_html_row();
int[] ns_ary = new int[] {0, 4, 14};
int ns_ary_len = ns_ary.length;
for (int i = 0; i < ns_ary_len; ++i) {
int ns_id = ns_ary[i];
String sql = String_.Format("SELECT * FROM xomp_page WHERE page_ns = {0} AND html_len != 0 ORDER BY page_id;", ns_id); // NOTE: html_len == 0 when page failed
int count = 0;
Db_rdr rdr = mgr_conn.Stmt_sql(sql).Exec_select__rls_auto(); // ANSI.Y
try {
while (rdr.Move_next()) {
Make_page(rdr, src_row, ns_id);
if (++count % 10000 == 0)
Gfo_usr_dlg_.Instance.Prog_many("", "", "xomp.html.insert: ns=~{0} db=~{1} count=~{2}", Int_.To_str_pad_bgn_space(ns_id, 3), Int_.To_str_pad_bgn_space(html_db_wtr.Cur_db_id(), 3), Int_.To_str_pad_bgn_space(count, 8));
}
} finally {rdr.Rls();}
}
this.Rls();
}
private void Make_page(Db_rdr rdr, Xowd_html_row src_row, int ns_id) {
// get src_row
int page_id = rdr.Read_int("page_id");
int html_len = rdr.Read_int("html_len");
int wkr_id = rdr.Read_int("xomp_wkr_id");
html_db_rdr.Rows__get(src_row, wkr_id, page_id);
src_body_flag_bldr.Decode(src_row.Body_flag());
// get trg_tbl and write
Xowd_html_tbl trg_tbl = html_db_wtr.Tbls__get_or_new(ns_id, html_len);
trg_tbl.Insert(src_row.Page_id(), src_row.Head_flag(), src_body_flag_bldr.Get_as_int(0), src_body_flag_bldr.Get_as_int(1), src_row.Display_ttl(), src_row.Content_sub(), src_row.Sidebar_div(), src_row.Body());
}
private void Rls() {
mgr_conn.Rls_conn();
html_db_rdr.Rls();
html_db_wtr.Rls();
}
}

View File

@@ -1,84 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
class Xomp_page_pool {
private final Object thread_lock = new Object();
private Xomp_page_pool_loader loader;
private List_adp pool = List_adp_.New(); private int pool_idx = 0, pool_len = 0;
private Db_conn make_conn;
private final Bry_bfr prog_bfr = Bry_bfr_.New();
private int pages_done, pages_total;
private long time_bgn, time_prv, time_done;
public void Init(Xow_wiki wiki, int num_pages_per_load) {
this.make_conn = gplx.xowa.bldrs.Xob_db_file.New__file_make(wiki.Fsys_mgr().Root_dir()).Conn();
this.loader = new Xomp_page_pool_loader(wiki, make_conn, num_pages_per_load);
this.pages_done = 0;
this.time_bgn = this.time_prv = gplx.core.envs.Env_.TickCount();
this.pages_total = loader.Get_pending_count();
}
public boolean Empty() {return empty;} private boolean empty = false;
public void Get_next(List_adp wkr_list, int num_pages_per_wkr) {
synchronized (thread_lock) {
// pool already marked exhausted by another wkr; return;
if (empty) return;
int wkr_end = pool_idx + num_pages_per_wkr;
// need pages to fulfill request
if (wkr_end > pool_len) {
this.pool = loader.Load(pool, pool_idx, pool_len);
this.pool_idx = 0;
this.pool_len = pool.Len();
if (pool_len == 0) { // no more pages; return;
empty = true;
return;
}
wkr_end = num_pages_per_wkr; // recalc wkr_end
}
// reset wkr_end; needed for very last set
if (wkr_end >= pool_len)
wkr_end = pool_len;
// add pages to wkr_list
for (int i = pool_idx; i < wkr_end; ++i) {
Xomp_page_itm page = (Xomp_page_itm)pool.Get_at(i);
wkr_list.Add(page);
}
pool_idx = wkr_end;
}
}
public void Mark_done(int id) {
synchronized (thread_lock) {
pages_done += 1;
if (pages_done % 1000 == 0) {
long time_cur = gplx.core.envs.Env_.TickCount();
int pages_left = pages_total - pages_done;
time_done += (time_cur - time_prv);
double rate_cur = pages_done / (time_done / Time_span_.Ratio_f_to_s);
String time_past = gplx.xowa.addons.bldrs.centrals.utils.Time_dhms_.To_str(prog_bfr, (int)((time_cur - time_bgn) / 1000), true, 0);
String time_left = gplx.xowa.addons.bldrs.centrals.utils.Time_dhms_.To_str(prog_bfr, (int)(pages_left / rate_cur), true, 0);
Gfo_usr_dlg_.Instance.Prog_many("", "", "done=~{0} left=~{1} rate=~{2} time_past=~{3} time_left=~{4}", pages_done, pages_left, (int)rate_cur, time_past, time_left);
time_prv = time_cur;
}
}
}
public void Rls() {
make_conn.Rls_conn();
}
}

View File

@@ -15,16 +15,15 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
import gplx.xowa.wikis.nss.*; import gplx.xowa.htmls.core.bldrs.*; import gplx.xowa.htmls.core.dbs.*;
class Xob_hdump_tbl_retriever__xomp implements Xob_hdump_tbl_retriever {
private final Db_conn conn;
private final Xowd_html_tbl tbl;
public Xob_hdump_tbl_retriever__xomp(Db_conn conn) {
this.conn = conn;
this.tbl = new Xowd_html_tbl(conn);
conn.Meta_tbl_assert(tbl);
public Xob_hdump_tbl_retriever__xomp(Xowd_html_tbl tbl) {
this.tbl = tbl;
this.conn = tbl.Conn();
}
public Xowd_html_tbl Get_html_tbl(Xow_ns ns, int prv_row_len) {
return tbl;

View File

@@ -0,0 +1,126 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*; import gplx.core.threads.utils.*;
class Xomp_load_wkr implements Gfo_invk {
private final Object thread_lock = new Object();
private final Xow_wiki wiki;
private final Db_conn mgr_conn;
private final Db_attach_mgr attach_mgr;
private final Gfo_blocking_queue queue;
private final int num_wkrs;
private final Bry_bfr prog_bfr = Bry_bfr_.New();
private int pages_done, pages_total;
private long time_bgn, time_prv, time_done;
public Xomp_load_wkr(Xow_wiki wiki, Db_conn mgr_conn, int num_pages_in_pool, int num_wkrs) {
this.wiki = wiki;
this.mgr_conn = mgr_conn;
this.attach_mgr = new Db_attach_mgr(mgr_conn);
this.queue = new Gfo_blocking_queue(num_pages_in_pool);
this.num_wkrs = num_wkrs;
this.time_bgn = this.time_prv = gplx.core.envs.Env_.TickCount();
this.pages_total = this.Get_pending_count();
}
public int Get_pending_count() {
Db_rdr rdr = mgr_conn.Stmt_sql("SELECT Count(*) AS Count_of FROM xomp_page mp WHERE mp.page_status = 0").Exec_select__rls_auto();
try {return rdr.Move_next() ? rdr.Read_int("Count_of") : 0;}
finally {rdr.Rls();}
}
public Xomp_page_itm Take() {return (Xomp_page_itm)queue.Take();}
private void Exec() {
int prv_page_id = 0;
while (prv_page_id != -1) {
prv_page_id = Load_pages(prv_page_id);
}
for (int i = 0; i < num_wkrs; ++i)
queue.Put(Xomp_page_itm.Null);
}
private int Load_pages(int prv_page_id) {
// page_tbl.prep_sql
String sql = String_.Format(String_.Concat_lines_nl_skip_last // ANSI.Y
( "SELECT mp.page_id"
, ", pp.page_namespace"
, ", pp.page_title"
, ", pp.page_text_db_id"
, "FROM xomp_page mp"
, " JOIN <page_db>page pp ON mp.page_id = pp.page_id"
, "WHERE mp.page_id > {0}"
, "AND mp.page_status = 0"
, "LIMIT {1}"
), prv_page_id, queue.Capacity());
this.attach_mgr.Conn_others_(new Db_attach_itm("page_db", wiki.Data__core_mgr().Db__core().Conn()));
sql = attach_mgr.Resolve_sql(sql);
// page_tbl.load_sql
Xomp_text_db_loader text_db_loader = new Xomp_text_db_loader(wiki);
attach_mgr.Attach();
Db_rdr rdr = mgr_conn.Stmt_sql(sql).Exec_select__rls_auto();
List_adp list = List_adp_.New();
int count = 0;
try {
while (rdr.Move_next()) {
prv_page_id = rdr.Read_int("page_id");
int text_db_id = rdr.Read_int("page_text_db_id");
Xomp_page_itm ppg = new Xomp_page_itm(prv_page_id);
ppg.Init_by_page
( rdr.Read_int("page_namespace")
, rdr.Read_bry_by_str("page_title")
, text_db_id
);
list.Add(ppg);
text_db_loader.Add(text_db_id, ppg);
++count;
}
} finally {rdr.Rls();}
attach_mgr.Detach();
text_db_loader.Load();
int len = list.Len();
for (int i = 0; i < len; ++i) {
queue.Put((Xomp_page_itm)list.Get_at(i));
}
return count == 0 ? -1 : prv_page_id;
}
public void Mark_done(int id) {
synchronized (thread_lock) {
pages_done += 1;
if (pages_done % 1000 == 0) {
long time_cur = gplx.core.envs.Env_.TickCount();
int pages_left = pages_total - pages_done;
time_done += (time_cur - time_prv);
double rate_cur = pages_done / (time_done / Time_span_.Ratio_f_to_s);
String time_past = gplx.xowa.addons.bldrs.centrals.utils.Time_dhms_.To_str(prog_bfr, (int)((time_cur - time_bgn) / 1000), true, 0);
String time_left = gplx.xowa.addons.bldrs.centrals.utils.Time_dhms_.To_str(prog_bfr, (int)(pages_left / rate_cur), true, 0);
Gfo_usr_dlg_.Instance.Prog_many("", "", "done=~{0} left=~{1} rate=~{2} time_past=~{3} time_left=~{4}", pages_done, pages_left, (int)rate_cur, time_past, time_left);
time_prv = time_cur;
}
}
}
public void Rls() {
mgr_conn.Rls_conn();
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk__exec)) this.Exec();
else return Gfo_invk_.Rv_unhandled;
return this;
}
public static final String Invk__exec = "exec";
}

View File

@@ -15,8 +15,9 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
class Xomp_page_itm {
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.xowa.wikis.data.tbls.*;
class Xomp_page_itm implements Xowd_text_bry_owner {
public Xomp_page_itm(int id) {this.id = id;}
public int Id() {return id;} private final int id;
public int Ns_id() {return ns_id;} private int ns_id;
@@ -29,9 +30,9 @@ class Xomp_page_itm {
this.ttl_bry = ttl_bry;
this.text_db_id = text_db_id;
}
public void Init_by_text(byte[] text) {
this.text = text;
}
public int Page_id() {return id;}
public void Set_text_bry_by_db(byte[] v) {this.text = v;}
public static final Xomp_page_itm Null = new Xomp_page_itm(-1);
}

View File

@@ -0,0 +1,62 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
class Xomp_page_pool {
private final Object thread_lock = new Object();
private final Xomp_page_pool_loader loader;
private final int num_pages_per_wkr;
private List_adp pool = List_adp_.New(); private int pool_idx = 0, pool_len = 0;
public Xomp_page_pool(Xomp_page_pool_loader loader, int num_pages_per_wkr) {
this.loader = loader; this.num_pages_per_wkr = num_pages_per_wkr;
}
public boolean Empty() {synchronized (thread_lock) {return empty;}} private boolean empty = false;
public void Get_next(List_adp wkr_list) {
synchronized (thread_lock) {
// pool already marked exhausted by another wkr; return;
if (empty) return;
int wkr_end = pool_idx + num_pages_per_wkr;
// need pages to fulfill request
if (wkr_end > pool_len) {
this.pool = loader.Load(pool, pool_idx, pool_len);
this.pool_idx = 0;
this.pool_len = pool.Len();
if (pool_len == 0) { // no more pages; return;
empty = true;
return;
}
wkr_end = num_pages_per_wkr; // recalc wkr_end
}
// reset wkr_end; needed for very last set
if (wkr_end >= pool_len)
wkr_end = pool_len;
// add pages to wkr_list
for (int i = pool_idx; i < wkr_end; ++i) {
Xomp_page_itm page = (Xomp_page_itm)pool.Get_at(i);
wkr_list.Add(page);
}
pool_idx = wkr_end;
}
}
public void Rls() {
loader.Conn().Rls_conn();
}
}

View File

@@ -15,22 +15,22 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
class Xomp_page_pool_loader {
private final Xow_wiki wiki;
private int prv_page_id = -1;
private final Db_conn make_conn;
private final int num_pages_per_load;
private final Db_attach_mgr attach_mgr;
private int prv_page_id = -1;
public Xomp_page_pool_loader(Xow_wiki wiki, Db_conn make_conn, int num_pages_per_load) {
this.wiki = wiki;
this.make_conn = make_conn;
this.attach_mgr = new Db_attach_mgr(make_conn);
this.num_pages_per_load = num_pages_per_load;
}
public Db_conn Conn() {return make_conn;} private final Db_conn make_conn;
public int Get_pending_count() {
Db_rdr rdr = make_conn.Stmt_sql("SELECT Count(*) AS Count_of FROM mp_page mp WHERE mp.page_done = 0").Exec_select__rls_auto();
Db_rdr rdr = make_conn.Stmt_sql("SELECT Count(*) AS Count_of FROM xomp_page mp WHERE mp.page_status = 0").Exec_select__rls_auto();
try {
return rdr.Move_next() ? rdr.Read_int("Count_of") : 0;
} finally {rdr.Rls();}
@@ -53,10 +53,10 @@ class Xomp_page_pool_loader {
, ", pp.page_namespace"
, ", pp.page_title"
, ", pp.page_text_db_id"
, "FROM mp_page mp"
, "FROM xomp_page mp"
, " JOIN <page_db>page pp ON mp.page_id = pp.page_id"
, "WHERE mp.page_id > {0}"
, "AND mp.page_done = 0"
, "AND mp.page_status = 0"
, "LIMIT {1}"
), prv_page_id, num_pages_per_load);
this.attach_mgr.Conn_others_(new Db_attach_itm("page_db", wiki.Data__core_mgr().Db__core().Conn()));

View File

@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
public class Xomp_parse_cmd extends Xob_cmd__base {
private final Xomp_parse_mgr mgr = new Xomp_parse_mgr();

View File

@@ -15,24 +15,38 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.core.threads.*;
import gplx.xowa.langs.*;
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.core.threads.*; import gplx.core.threads.utils.*;
import gplx.xowa.langs.*; import gplx.xowa.langs.cases.*;
import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
import gplx.xowa.wikis.caches.*;
class Xomp_parse_mgr {
private final Xomp_page_pool page_pool = new Xomp_page_pool();
private Gfo_countdown_latch latch;
public Xomp_parse_mgr_cfg Cfg() {return cfg;} private final Xomp_parse_mgr_cfg cfg = new Xomp_parse_mgr_cfg();
private int wkrs_done;
public void Wkrs_done_add_1() {synchronized (page_pool) {++wkrs_done;}}
public Xomp_db_core Db_core() {return db_core;} private Xomp_db_core db_core;
public Xomp_prog_mgr Prog_mgr() {return prog_mgr;} private final Xomp_prog_mgr prog_mgr = new Xomp_prog_mgr();
public void Wkrs_done_add_1() {latch.Countdown();}
public void Run(Xowe_wiki wiki) {
// init pool
// init db, pool_loader, pool, prog_mgr
cfg.Init(wiki);
page_pool.Init(wiki, cfg.Num_pages_in_pool());
this.db_core = Xomp_db_core.New__load(wiki);
Xomp_page_pool_loader pool_loader = new Xomp_page_pool_loader(wiki, db_core.Mgr_db().Conn(), cfg.Num_pages_in_pool());
Xomp_page_pool page_pool = new Xomp_page_pool(pool_loader, cfg.Num_pages_per_wkr());
prog_mgr.Init(pool_loader.Get_pending_count());
Xow_page_cache page_cache = Xomp_tmpl_cache_bldr.New(wiki, true);
wiki.App().User().User_db_mgr().Cache_mgr().Enabled_n_(); // disable db lookups of cache
// init threads
// load_wkr: init and start
// Xomp_load_wkr load_wkr = new Xomp_load_wkr(wiki, db_core.Mgr_db().Conn(), cfg.Num_pages_in_pool(), cfg.Num_wkrs());
// Thread_adp_.Start_by_key("xomp.load", Cancelable_.Never, load_wkr, Xomp_load_wkr.Invk__exec);
// init parse_wkrs
int wkr_len = cfg.Num_wkrs();
latch = new Gfo_countdown_latch(wkr_len);
Xomp_parse_wkr[] wkrs = new Xomp_parse_wkr[wkr_len];
for (int i = 0; i < wkr_len; ++i) {
Xomp_parse_wkr wkr = new Xomp_parse_wkr(this, Clone_wiki(wiki), page_pool, i, cfg.Num_pages_per_wkr());
Xomp_parse_wkr wkr = new Xomp_parse_wkr(this, Clone_wiki(wiki), page_pool, i);
wkr.Wiki().Cache_mgr().Page_cache_(page_cache);
wkrs[i] = wkr;
}
@@ -43,12 +57,7 @@ class Xomp_parse_mgr {
}
// wait until wkrs are wkrs_done
while (true) {
synchronized (page_pool) {
if (wkrs_done == wkr_len) break;
}
Thread_adp_.Sleep(1000);
}
latch.Await();
page_pool.Rls();
// print stats
@@ -61,7 +70,7 @@ class Xomp_parse_mgr {
private static Xowe_wiki Clone_wiki(Xowe_wiki wiki) {
Xol_lang_itm lang = new Xol_lang_itm(wiki.App().Lang_mgr(), wiki.Lang().Key_bry());
Xol_lang_itm_.Lang_init(lang);
Xowe_wiki rv = new Xowe_wiki(wiki.Appe(), lang, wiki.Ns_mgr(), wiki.Domain_itm(), wiki.Fsys_mgr().Root_dir());
Xowe_wiki rv = new Xowe_wiki(wiki.Appe(), lang, gplx.xowa.wikis.nss.Xow_ns_mgr_.default_(lang.Case_mgr()), wiki.Domain_itm(), wiki.Fsys_mgr().Root_dir());
rv.Init_by_wiki();
return rv;
}

View File

@@ -15,14 +15,14 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
class Xomp_parse_mgr_cfg implements Gfo_invk {
public Io_url Root_dir() {return root_dir;} private Io_url root_dir;
// public Io_url Root_dir() {return root_dir;} private Io_url root_dir;
public int Num_wkrs() {return num_wkrs;} private int num_wkrs = -1;
public int Num_pages_in_pool() {return num_pages_in_pool;} private int num_pages_in_pool = 1000;
public int Num_pages_per_wkr() {return num_pages_per_wkr;} private int num_pages_per_wkr = 1000;
public void Init(Xowe_wiki wiki) {
if (root_dir == null) root_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp");
// if (root_dir == null) root_dir = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("tmp", "xomp");
if (num_wkrs == -1) num_wkrs = gplx.core.envs.Env_.System_cpu_count();
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
@@ -30,11 +30,11 @@ class Xomp_parse_mgr_cfg implements Gfo_invk {
else if (ctx.Match(k, Invk__num_pages_in_pool_)) num_pages_in_pool = m.ReadInt("v");
else if (ctx.Match(k, Invk__num_pages_per_wkr_)) num_pages_per_wkr = m.ReadInt("v");
else if (ctx.Match(k, Invk__num_pages_per_wkr_)) num_pages_per_wkr = m.ReadInt("v");
else if (ctx.Match(k, Invk__root_dir_)) root_dir = m.ReadIoUrl("v");
// else if (ctx.Match(k, Invk__root_dir_)) root_dir = m.ReadIoUrl("v");
else return Gfo_invk_.Rv_unhandled;
return this;
}
private static final String Invk__num_wkrs_ = "num_wkrs_", Invk__num_pages_in_pool_ = "num_pages_in_pool_", Invk__num_pages_per_wkr_ = "num_pages_per_wkr_"
, Invk__root_dir_ = "root_dir_"
// , Invk__root_dir_ = "root_dir_"
;
}

View File

@@ -15,33 +15,33 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
import gplx.xowa.htmls.core.bldrs.*;
import gplx.xowa.parsers.*;
import gplx.xowa.parsers.*;
class Xomp_parse_wkr implements Gfo_invk {
private final Xomp_parse_mgr mgr;
private final Xowe_wiki wiki;
private final Xomp_parse_mgr mgr;
private final Xomp_page_pool page_pool;
private final List_adp list = List_adp_.New(); private int list_idx = 0, list_len = 0;
private final int num_pages_per_wkr;
private final int idx;
private final List_adp list = List_adp_.New(); private int list_idx = 0, list_len = 0;
private final Xob_hdump_bldr hdump_bldr = new Xob_hdump_bldr();
// private boolean gen_html = true;// gen_hdump = true;
private int done_count; private long done_time;
public Xomp_parse_wkr(Xomp_parse_mgr mgr, Xowe_wiki wiki, Xomp_page_pool page_pool, int idx, int num_pages_per_wkr) {
private Xomp_wkr_db wkr_db;
public Xomp_parse_wkr(Xomp_parse_mgr mgr, Xowe_wiki wiki, Xomp_page_pool page_pool, int idx) {
this.mgr = mgr; this.wiki = wiki;
this.page_pool = page_pool; this.num_pages_per_wkr = num_pages_per_wkr;
this.page_pool = page_pool;
this.idx = idx;
this.wkr_db = mgr.Db_core().Wkr_db(Bool_.Y, idx); // NOTE: must go in ctor, or else thread issues
}
public Xowe_wiki Wiki() {return wiki;} private final Xowe_wiki wiki;
public void Exec() {
// init
Db_conn wkr_conn = Db_conn_bldr.Instance.Get_or_autocreate(true, mgr.Cfg().Root_dir().GenSubFil_nest("xomp_" + Int_.To_str_fmt(idx, "000"), "xomp_wkr.sqlite3"));
Xow_parser_mgr parser = new Xow_parser_mgr(wiki);
Xow_parser_mgr parser_mgr = new Xow_parser_mgr(wiki);
wiki.Html_mgr().Page_wtr_mgr().Wkr(gplx.xowa.wikis.pages.Xopg_page_.Tid_read).Ctgs_enabled_(false); // disable categories else progress messages written (also for PERF)
if (wiki.File__bin_mgr() != null)
wiki.File__bin_mgr().Wkrs__del(gplx.xowa.files.bins.Xof_bin_wkr_.Key_http_wmf); // remove wmf wkr, else will try to download images during parsing
hdump_bldr.Init(wiki, wkr_conn, new Xob_hdump_tbl_retriever__xomp(wkr_conn));
hdump_bldr.Enabled_(true).Hzip_enabled_(true).Hzip_diff_(true).Init(wiki, wkr_db.Conn(), new Xob_hdump_tbl_retriever__xomp(wkr_db.Html_tbl()));
wkr_db.Conn().Txn_bgn("xomp");
while (true) {
Xomp_page_itm ppg = Get_next(); if (ppg == Xomp_page_itm.Null) break; // no more pages
@@ -56,18 +56,15 @@ class Xomp_parse_wkr implements Gfo_invk {
wpg.Db().Page().Id_(ppg.Id());
// parse page
parser.Ctx().Clear_all();
parser.Parse(wpg, true);
Xop_ctx pctx = parser_mgr.Ctx();
pctx.Clear_all();
parser_mgr.Parse(wpg, true);
// gen_html
// if ( gen_html
// && wpg.Redirect().Itms__len() == 0) // don't generate html for redirected pages
// wiki.Html_mgr().Page_wtr_mgr().Gen(wpg, gplx.xowa.wikis.pages.Xopg_page_.Tid_read);
// if (gen_hdump)
// hdump_bldr.Insert(wpg);
hdump_bldr.Insert(pctx, wpg);
// mark done for sake of progress
page_pool.Mark_done(ppg.Id());
mgr.Prog_mgr().Mark_done(ppg.Id());
// update stats
long time_cur = gplx.core.envs.Env_.TickCount();
@@ -80,15 +77,17 @@ class Xomp_parse_wkr implements Gfo_invk {
if (wiki.Cache_mgr().Tmpl_result_cache().Count() > 50000)
wiki.Cache_mgr().Tmpl_result_cache().Clear();
if (done_count % 50 == 0) {
wiki.Cache_mgr().Free_mem_all();
wiki.Cache_mgr().Free_mem_all(Bool_.N);
wiki.Parser_mgr().Scrib().Core_term();
}
} catch (Exception e) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "mass_parse.fail:ns=~{0} ttl=~{1} err=~{2}", ppg.Ns_id(), ppg.Ttl_bry(), Err_.Message_gplx_log(e));
}
}
wkr_db.Conn().Txn_end(); // NOTE: must end txn before running update wkr_id
mgr.Db_core().Update_wkr_id(idx, wkr_db.Conn());
mgr.Wkrs_done_add_1();
wkr_conn.Rls_conn();
wkr_db.Conn().Rls_conn();
}
public void Bld_stats(Bry_bfr bfr) {
int done_time_in_sec = (int)(done_time / 1000); if (done_time_in_sec == 0) done_time_in_sec = 1;
@@ -101,7 +100,7 @@ class Xomp_parse_wkr implements Gfo_invk {
private Xomp_page_itm Get_next() {
if (list_idx == list_len) {
list.Clear();
page_pool.Get_next(list, num_pages_per_wkr);
page_pool.Get_next(list);
list_len = list.Len();
if (list_len == 0) return Xomp_page_itm.Null;
list_idx = 0;

View File

@@ -0,0 +1,43 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
class Xomp_prog_mgr {
private final Object thread_lock = new Object();
private int pages_done, pages_total;
private long time_bgn, time_prv, time_done;
private final Bry_bfr prog_bfr = Bry_bfr_.New();
public void Init(int pages_total) {
this.time_bgn = this.time_prv = gplx.core.envs.Env_.TickCount();
this.pages_total = pages_total;
}
public void Mark_done(int id) {
synchronized (thread_lock) {
pages_done += 1;
if (pages_done % 1000 == 0) {
long time_cur = gplx.core.envs.Env_.TickCount();
int pages_left = pages_total - pages_done;
time_done += (time_cur - time_prv);
double rate_cur = pages_done / (time_done / Time_span_.Ratio_f_to_s);
String time_past = gplx.xowa.addons.bldrs.centrals.utils.Time_dhms_.To_str(prog_bfr, (int)((time_cur - time_bgn) / 1000), true, 0);
String time_left = gplx.xowa.addons.bldrs.centrals.utils.Time_dhms_.To_str(prog_bfr, (int)(pages_left / rate_cur), true, 0);
Gfo_usr_dlg_.Instance.Prog_many("", "", "done=~{1} left=~{2} rate=~{3} time_past=~{4} time_left=~{5}", id, pages_done, pages_left, (int)rate_cur, time_past, time_left);
time_prv = time_cur;
}
}
}
}

View File

@@ -15,9 +15,10 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.makes; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
import gplx.core.ios.*;
import gplx.xowa.wikis.data.tbls.*;
class Xomp_text_db_loader {
private final Xow_wiki wiki;
private final Ordered_hash text_db_hash = Ordered_hash_.New();
@@ -25,7 +26,7 @@ class Xomp_text_db_loader {
public Xomp_text_db_loader(Xow_wiki wiki) {
this.wiki = wiki;
}
public void Add(int text_db_id, Xomp_page_itm ppg) {
public void Add(int text_db_id, Xowd_text_bry_owner ppg) {
Xomp_text_db_itm itm = (Xomp_text_db_itm)text_db_hash.Get_by(text_db_id);
if (itm == null) {
itm = new Xomp_text_db_itm(text_db_id);
@@ -53,8 +54,8 @@ class Xomp_text_db_loader {
}
// build WHERE IN for page_ids; EX: "1, 2, 3, 4"
Xomp_page_itm ppg = (Xomp_page_itm)list.Get_at(i);
int page_id = ppg.Id();
Xowd_text_bry_owner ppg = (Xowd_text_bry_owner)list.Get_at(i);
int page_id = ppg.Page_id();
if (batch_idx != 0) bry.Add_byte_comma();
bry.Add_int_variable(page_id);
page_hash.Add(page_id, ppg);
@@ -77,8 +78,8 @@ class Xomp_text_db_loader {
int page_id = rdr.Read_int("page_id");
byte[] text_data = rdr.Read_bry("text_data");
text_data = zip_mgr.Unzip(zip_tid, text_data);
Xomp_page_itm ppg = (Xomp_page_itm)page_hash.Get_by(page_id);
ppg.Init_by_text(text_data);
Xowd_text_bry_owner ppg = (Xowd_text_bry_owner)page_hash.Get_by(page_id);
ppg.Set_text_bry_by_db(text_data);
}
}
finally {

View File

@@ -0,0 +1,81 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.mass_parses.parses; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*;
import gplx.dbs.*;
import gplx.xowa.wikis.caches.*;
class Xomp_tmpl_cache_bldr {
public static Xow_page_cache New(Xowe_wiki wiki, boolean fill_all) {
Xow_page_cache rv = new Xow_page_cache(wiki);
if (fill_all) Fill_all(rv, wiki);
return rv;
}
private static void Fill_all(Xow_page_cache cache, Xowe_wiki wiki) {
String sql = String_.Concat_lines_nl_skip_last // ANSI.Y
( "SELECT pp.page_id"
, ", pp.page_namespace"
, ", pp.page_title"
, ", pp.page_text_db_id"
, ", pp.page_redirect_id"
, "FROM page pp"
, "WHERE pp.page_namespace IN (10, 828)"
);
Xomp_text_db_loader text_db_loader = new Xomp_text_db_loader(wiki);
// load pages
int count = 0;
List_adp redirect_list = List_adp_.New();
Ordered_hash page_regy = Ordered_hash_.New();
Db_rdr rdr = wiki.Data__core_mgr().Db__core().Tbl__page().Conn().Stmt_sql(sql).Exec_select__rls_auto();
try {
while (rdr.Move_next()) {
// get ttl
Xoa_ttl page_ttl = wiki.Ttl_parse(rdr.Read_int("page_namespace"), rdr.Read_bry_by_str("page_title"));
// add to text_db_loader
int page_id = rdr.Read_int("page_id");
int page_redirect_id = rdr.Read_int("page_redirect_id");
Xow_page_cache_itm itm = new Xow_page_cache_itm(page_ttl, Bry_.Empty, Bry_.Empty);
itm.Set_page_ids(page_id, page_redirect_id);
text_db_loader.Add(rdr.Read_int("page_text_db_id"), itm);
cache.Add(page_ttl.Full_db(), itm);
page_regy.Add(page_id, itm);
if (page_redirect_id != -1)
redirect_list.Add(itm);
if ((++count % 10000) == 0)
Gfo_usr_dlg_.Instance.Prog_many("", "", "loading tmpls: ~{0}", count);
}
} finally {rdr.Rls();}
// load wikitext
text_db_loader.Load();
// handle redirects
int redirect_len = redirect_list.Len();
for (int i = 0; i < redirect_len; ++i) {
Xow_page_cache_itm src_itm = (Xow_page_cache_itm)redirect_list.Get_at(i);
Xow_page_cache_itm trg_itm = (Xow_page_cache_itm)page_regy.Get_by(src_itm.Redirect_id());
if (trg_itm == null) {
Gfo_usr_dlg_.Instance.Prog_many("", "", "missing redirect for tmpl: ~{0}", src_itm.Ttl().Full_db());
continue;
}
src_itm.Set_redirect_bry(trg_itm.Wtxt__direct());
}
}
}