mirror of https://github.com/gnosygnu/xowa
parent
9a5c70b506
commit
df45f141ca
@ -0,0 +1,30 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.files.checks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.files.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xocheck_cmd extends Xob_cmd__base { // checks fsdb; needed for en.w and multiple monthly updates
|
||||
public Xocheck_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
||||
@Override public void Cmd_run() {
|
||||
wiki.Init_assert();
|
||||
new Xocheck_mgr().Exec(wiki);
|
||||
}
|
||||
|
||||
@Override public String Cmd_key() {return BLDR_CMD_KEY;} private static final String BLDR_CMD_KEY = "fsdb.check";
|
||||
public static final Xob_cmd Prototype = new Xocheck_cmd(null, null);
|
||||
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xocheck_cmd(bldr, wiki);}
|
||||
}
|
@ -0,0 +1,89 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.files.checks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.files.*;
|
||||
import gplx.core.ios.streams.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.files.repos.*; import gplx.xowa.files.origs.*;
|
||||
import gplx.xowa.addons.bldrs.wmdumps.imglinks.*;
|
||||
import gplx.xowa.htmls.*;
|
||||
// TODO.XO:cache files in memory, else commonly used files (Wiki.png) will be loaded from fsdb for every usage on page
|
||||
// TODO.XO:save results to db to verify unused images (images in fsdb, but not loaded during this code)
|
||||
class Xocheck_mgr {
|
||||
private final Xof_url_bldr url_bldr = Xof_url_bldr.new_v2(); private final Xof_img_size img_size = new Xof_img_size();
|
||||
private Xowe_wiki wiki;
|
||||
public void Exec(Xowe_wiki wiki) {
|
||||
// init
|
||||
this.wiki = wiki;
|
||||
wiki.File__bin_mgr().Wkrs__del(gplx.xowa.files.bins.Xof_bin_wkr_.Key_http_wmf); // must happen after init_file_mgr_by_load; remove wmf wkr, else will try to download images during parsing
|
||||
wiki.File_mgr().Fsdb_mode().Tid__v2__mp__y_();
|
||||
wiki.App().Cfg().Set_bool_app("xowa.app.web.enabled", false); // never enable inet; rely solely on local dbs;
|
||||
|
||||
// select list of pages
|
||||
Xoh_page hpg = new Xoh_page();
|
||||
Xowd_page_tbl page_tbl = wiki.Data__core_mgr().Db__core().Tbl__page();
|
||||
Db_rdr rdr = page_tbl.Conn().Stmt_sql("SELECT page_id, page_namespace, page_title, page_html_db_id FROM page WHERE page_html_db_id != -1;").Exec_select__rls_auto();
|
||||
int page_count = 0, file_count = 0;
|
||||
|
||||
// loop over each page
|
||||
while (rdr.Move_next()) {
|
||||
// init page meta
|
||||
Xoa_ttl page_ttl = wiki.Ttl_parse(rdr.Read_int("page_namespace"), rdr.Read_bry_by_str("page_title"));
|
||||
Xoa_url page_url = Xoa_url.New(wiki, page_ttl);
|
||||
Xow_db_file html_db = wiki.Data__core_mgr().Dbs__get_by_id_or_fail(rdr.Read_int("page_html_db_id"));
|
||||
int page_id = rdr.Read_int("page_id");
|
||||
|
||||
// load html
|
||||
hpg.Ctor_by_hview(wiki, page_url, page_ttl, page_id);
|
||||
if (!html_db.Tbl__html().Select_by_page(hpg)) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "could not load html for page; page_id=~{0}", page_id);
|
||||
continue;
|
||||
}
|
||||
wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
|
||||
|
||||
// load images
|
||||
int imgs_len = hpg.Img_mgr().Len();
|
||||
for (int i = 0; i < imgs_len; i++) {
|
||||
Xof_fsdb_itm fsdb = hpg.Img_mgr().Get_at(i);
|
||||
try {Check_images(page_ttl, fsdb);}
|
||||
catch (Exception e) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "file failed; page_ttl=~{0} img_name=~{1} err=~{2}", page_ttl.Page_db(), fsdb.Lnki_ttl(), Err_.Message_gplx_log(e));
|
||||
}
|
||||
file_count++;
|
||||
}
|
||||
|
||||
// prog
|
||||
page_count++;
|
||||
if ((page_count % 10000) == 0) {
|
||||
Gfo_usr_dlg_.Instance.Prog_many("", "", "checking pages; pages=~{0} files=~{1}", page_count, file_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
private void Check_images(Xoa_ttl page_ttl, Xof_fsdb_itm fsdb) {
|
||||
// get orig
|
||||
Xof_orig_itm orig = wiki.File__orig_mgr().Find_by_ttl_or_null(fsdb.Lnki_ttl());
|
||||
if (orig == null) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "file missing; page_ttl=~{0} img_name=~{1}", page_ttl.Page_db(), fsdb.Lnki_ttl());
|
||||
return;
|
||||
}
|
||||
Xof_file_wkr.Eval_orig(orig, fsdb, url_bldr, wiki.File__repo_mgr(), img_size);
|
||||
|
||||
Io_stream_rdr img_rdr = wiki.File__bin_mgr().Find_as_rdr(Xof_exec_tid.Tid_wiki_page, fsdb);
|
||||
img_rdr.Rls();
|
||||
}
|
||||
}
|
Loading…
Reference in new issue