Fsdb.check: Add new command to check for missing images

pull/620/head
gnosygnu 8 years ago
parent 9a5c70b506
commit df45f141ca

@ -19,7 +19,7 @@ package gplx.xowa.addons.bldrs.files; import gplx.*; import gplx.xowa.*; import
import gplx.xowa.bldrs.wkrs.*; import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.addons.bldrs.files.cmds.*; import gplx.xowa.addons.bldrs.files.cmds.*;
import gplx.xowa.addons.bldrs.mass_parses.inits.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*; import gplx.xowa.addons.bldrs.mass_parses.makes.*; import gplx.xowa.addons.bldrs.mass_parses.resumes.*; import gplx.xowa.addons.bldrs.mass_parses.inits.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*; import gplx.xowa.addons.bldrs.mass_parses.makes.*; import gplx.xowa.addons.bldrs.mass_parses.resumes.*;
import gplx.xowa.addons.bldrs.files.cksums.*; import gplx.xowa.addons.bldrs.files.cksums.*; import gplx.xowa.addons.bldrs.files.checks.*;
import gplx.xowa.addons.bldrs.app_cfgs.wm_server_cfgs.*; import gplx.xowa.addons.bldrs.app_cfgs.wm_server_cfgs.*;
public class Xoax_builds_files_addon implements Xoax_addon_itm, Xoax_addon_itm__bldr { public class Xoax_builds_files_addon implements Xoax_addon_itm, Xoax_addon_itm__bldr {
public Xob_cmd[] Bldr_cmds() { public Xob_cmd[] Bldr_cmds() {
@ -48,6 +48,7 @@ public class Xoax_builds_files_addon implements Xoax_addon_itm, Xoax_addon_itm__
, Xomp_make_cmd.Prototype , Xomp_make_cmd.Prototype
, Xomp_resume_cmd.Prototype , Xomp_resume_cmd.Prototype
, Xocksum_calc_cmd.Prototype , Xocksum_calc_cmd.Prototype
, Xocheck_cmd.Prototype
, Xowm_server_cfg_cmd.Prototype , Xowm_server_cfg_cmd.Prototype
}; };

@ -0,0 +1,30 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.files.checks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.files.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
public class Xocheck_cmd extends Xob_cmd__base { // checks fsdb; needed for en.w and multiple monthly updates
public Xocheck_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
@Override public void Cmd_run() {
wiki.Init_assert();
new Xocheck_mgr().Exec(wiki);
}
@Override public String Cmd_key() {return BLDR_CMD_KEY;} private static final String BLDR_CMD_KEY = "fsdb.check";
public static final Xob_cmd Prototype = new Xocheck_cmd(null, null);
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xocheck_cmd(bldr, wiki);}
}

@ -0,0 +1,89 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.addons.bldrs.files.checks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.files.*;
import gplx.core.ios.streams.*;
import gplx.dbs.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.files.*; import gplx.xowa.files.repos.*; import gplx.xowa.files.origs.*;
import gplx.xowa.addons.bldrs.wmdumps.imglinks.*;
import gplx.xowa.htmls.*;
// TODO.XO:cache files in memory, else commonly used files (Wiki.png) will be loaded from fsdb for every usage on page
// TODO.XO:save results to db to verify unused images (images in fsdb, but not loaded during this code)
class Xocheck_mgr {
private final Xof_url_bldr url_bldr = Xof_url_bldr.new_v2(); private final Xof_img_size img_size = new Xof_img_size();
private Xowe_wiki wiki;
public void Exec(Xowe_wiki wiki) {
// init
this.wiki = wiki;
wiki.File__bin_mgr().Wkrs__del(gplx.xowa.files.bins.Xof_bin_wkr_.Key_http_wmf); // must happen after init_file_mgr_by_load; remove wmf wkr, else will try to download images during parsing
wiki.File_mgr().Fsdb_mode().Tid__v2__mp__y_();
wiki.App().Cfg().Set_bool_app("xowa.app.web.enabled", false); // never enable inet; rely solely on local dbs;
// select list of pages
Xoh_page hpg = new Xoh_page();
Xowd_page_tbl page_tbl = wiki.Data__core_mgr().Db__core().Tbl__page();
Db_rdr rdr = page_tbl.Conn().Stmt_sql("SELECT page_id, page_namespace, page_title, page_html_db_id FROM page WHERE page_html_db_id != -1;").Exec_select__rls_auto();
int page_count = 0, file_count = 0;
// loop over each page
while (rdr.Move_next()) {
// init page meta
Xoa_ttl page_ttl = wiki.Ttl_parse(rdr.Read_int("page_namespace"), rdr.Read_bry_by_str("page_title"));
Xoa_url page_url = Xoa_url.New(wiki, page_ttl);
Xow_db_file html_db = wiki.Data__core_mgr().Dbs__get_by_id_or_fail(rdr.Read_int("page_html_db_id"));
int page_id = rdr.Read_int("page_id");
// load html
hpg.Ctor_by_hview(wiki, page_url, page_ttl, page_id);
if (!html_db.Tbl__html().Select_by_page(hpg)) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "could not load html for page; page_id=~{0}", page_id);
continue;
}
wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
// load images
int imgs_len = hpg.Img_mgr().Len();
for (int i = 0; i < imgs_len; i++) {
Xof_fsdb_itm fsdb = hpg.Img_mgr().Get_at(i);
try {Check_images(page_ttl, fsdb);}
catch (Exception e) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "file failed; page_ttl=~{0} img_name=~{1} err=~{2}", page_ttl.Page_db(), fsdb.Lnki_ttl(), Err_.Message_gplx_log(e));
}
file_count++;
}
// prog
page_count++;
if ((page_count % 10000) == 0) {
Gfo_usr_dlg_.Instance.Prog_many("", "", "checking pages; pages=~{0} files=~{1}", page_count, file_count);
}
}
}
private void Check_images(Xoa_ttl page_ttl, Xof_fsdb_itm fsdb) {
// get orig
Xof_orig_itm orig = wiki.File__orig_mgr().Find_by_ttl_or_null(fsdb.Lnki_ttl());
if (orig == null) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "file missing; page_ttl=~{0} img_name=~{1}", page_ttl.Page_db(), fsdb.Lnki_ttl());
return;
}
Xof_file_wkr.Eval_orig(orig, fsdb, url_bldr, wiki.File__repo_mgr(), img_size);
Io_stream_rdr img_rdr = wiki.File__bin_mgr().Find_as_rdr(Xof_exec_tid.Tid_wiki_page, fsdb);
img_rdr.Rls();
}
}

@ -38,7 +38,7 @@ public class Xod_page_mgr {
Xoh_page hpg = new Xoh_page(); Xoh_page hpg = new Xoh_page();
hpg.Ctor_by_hview(wiki, Xoa_url.New(wiki, ttl), ttl, 1); hpg.Ctor_by_hview(wiki, Xoa_url.New(wiki, ttl), ttl, 1);
rv.Init_by_hpg(hpg); rv.Init_by_hpg(hpg);
wiki.Html__hdump_mgr().Load_mgr().Load(hpg, ttl); wiki.Html__hdump_mgr().Load_mgr().Load_by_xowh(hpg, ttl, Bool_.Y);
Load_sections(rv, hpg); Load_sections(rv, hpg);
return rv; return rv;
} }

@ -39,12 +39,12 @@ public class Xow_hdump_mgr__load implements Gfo_invk {
} }
public void Load_by_xowe(Xoae_page wpg) { public void Load_by_xowe(Xoae_page wpg) {
tmp_hpg.Ctor_by_hview(wpg.Wiki(), wpg.Url(), wpg.Ttl(), wpg.Db().Page().Id()); tmp_hpg.Ctor_by_hview(wpg.Wiki(), wpg.Url(), wpg.Ttl(), wpg.Db().Page().Id());
Load(tmp_hpg, wpg.Ttl()); Load_by_xowh(tmp_hpg, wpg.Ttl(), Bool_.Y);
wpg.Db().Html().Html_bry_(tmp_hpg.Db().Html().Html_bry()); wpg.Db().Html().Html_bry_(tmp_hpg.Db().Html().Html_bry());
wpg.Root_(new gplx.xowa.parsers.Xop_root_tkn()); // HACK: set root, else load page will fail wpg.Root_(new gplx.xowa.parsers.Xop_root_tkn()); // HACK: set root, else load page will fail
Fill_page(wpg, tmp_hpg); Fill_page(wpg, tmp_hpg);
} }
public boolean Load(Xoh_page hpg, Xoa_ttl ttl) { public boolean Load_by_xowh(Xoh_page hpg, Xoa_ttl ttl, boolean load_ctg) {
synchronized (tmp_dbpg) { synchronized (tmp_dbpg) {
if (override_mgr__page == null) { if (override_mgr__page == null) {
Io_url override_root_url = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "wiki"); Io_url override_root_url = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "wiki");
@ -64,19 +64,21 @@ public class Xow_hdump_mgr__load implements Gfo_invk {
byte[] src = Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry()); byte[] src = Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
// write ctgs // write ctgs
if (load_ctg) {
Xoctg_pagebox_itm[] pagebox_itms = wiki.Ctg__pagebox_wtr().Get_catlinks_by_page(wiki, hpg); Xoctg_pagebox_itm[] pagebox_itms = wiki.Ctg__pagebox_wtr().Get_catlinks_by_page(wiki, hpg);
if (pagebox_itms.length > 0) { if (pagebox_itms.length > 0) {
tmp_bfr.Add(src); tmp_bfr.Add(src);
wiki.Ctg__pagebox_wtr().Write_pagebox(tmp_bfr, wiki, hpg, pagebox_itms); wiki.Ctg__pagebox_wtr().Write_pagebox(tmp_bfr, wiki, hpg, pagebox_itms);
src = tmp_bfr.To_bry_and_clear(); src = tmp_bfr.To_bry_and_clear();
} }
}
hpg.Db().Html().Html_bry_(src); hpg.Db().Html().Html_bry_(src);
return true; return true;
} }
} }
public byte[] Decode_as_bry(Bry_bfr bfr, Xoh_page hpg, byte[] src, boolean mode_is_diff) {hzip_mgr.Hctx().Mode_is_diff_(mode_is_diff); hzip_mgr.Decode(bfr, wiki, hpg, src); return bfr.To_bry_and_clear();} public byte[] Decode_as_bry(Bry_bfr bfr, Xoh_page hpg, byte[] src, boolean mode_is_diff) {hzip_mgr.Hctx().Mode_is_diff_(mode_is_diff); hzip_mgr.Decode(bfr, wiki, hpg, src); return bfr.To_bry_and_clear();}
private byte[] Parse(Xoh_page hpg, int zip_tid, int hzip_tid, byte[] src) { public byte[] Parse(Xoh_page hpg, int zip_tid, int hzip_tid, byte[] src) {
if (zip_tid > gplx.core.ios.streams.Io_stream_tid_.Tid__raw) if (zip_tid > gplx.core.ios.streams.Io_stream_tid_.Tid__raw)
src = zip_mgr.Unzip((byte)zip_tid, src); src = zip_mgr.Unzip((byte)zip_tid, src);
switch (hzip_tid) { switch (hzip_tid) {
@ -84,6 +86,7 @@ public class Xow_hdump_mgr__load implements Gfo_invk {
src = make_mgr.Parse(src, hpg, hpg.Wiki()); src = make_mgr.Parse(src, hpg, hpg.Wiki());
break; break;
case Xoh_hzip_dict_.Hzip__v1: case Xoh_hzip_dict_.Hzip__v1:
if (override_mgr__html != null) // null when Parse is called directly
src = override_mgr__html.Get_or_same(hpg.Ttl().Page_db(), src); src = override_mgr__html.Get_or_same(hpg.Ttl().Page_db(), src);
hpg.Section_mgr().Add(0, 2, Bry_.Empty, Bry_.Empty).Content_bgn_(0); // +1 to skip \n hpg.Section_mgr().Add(0, 2, Bry_.Empty, Bry_.Empty).Content_bgn_(0); // +1 to skip \n
src = Decode_as_bry(tmp_bfr.Clear(), hpg, src, Bool_.N); src = Decode_as_bry(tmp_bfr.Clear(), hpg, src, Bool_.N);

@ -137,7 +137,7 @@ public class Xol_msg_mgr_ {
Xoh_page hpg = new Xoh_page(); Xoh_page hpg = new Xoh_page();
pg = hpg; pg = hpg;
hpg.Ctor_by_hview(wiki, Xoa_url.New(wiki, ttl), ttl, -1); hpg.Ctor_by_hview(wiki, Xoa_url.New(wiki, ttl), ttl, -1);
wiki.Html__hdump_mgr().Load_mgr().Load(hpg, ttl); wiki.Html__hdump_mgr().Load_mgr().Load_by_xowh(hpg, ttl, Bool_.N);
pg.Db().Text().Text_bry_(pg.Db().Html().Html_bry()); pg.Db().Text().Text_bry_(pg.Db().Html().Html_bry());
} }
return pg.Db().Page().Exists() ? pg.Db().Text().Text_bry() : null; return pg.Db().Page().Exists() ? pg.Db().Text().Text_bry() : null;

@ -129,7 +129,7 @@ public class Xowv_wiki implements Xow_wiki, Xow_ttl_parser, Gfo_invk {
if (ttl.Ns().Id_is_special()) if (ttl.Ns().Id_is_special())
special_mgr.Get_by_ttl(rv, url, ttl); special_mgr.Get_by_ttl(rv, url, ttl);
else else
html__hdump_mgr.Load_mgr().Load(rv, ttl); html__hdump_mgr.Load_mgr().Load_by_xowh(rv, ttl, Bool_.Y);
} }
public Xoa_ttl Ttl_parse(byte[] ttl) {return Ttl_parse(ttl, 0, ttl.length);} public Xoa_ttl Ttl_parse(byte[] ttl) {return Ttl_parse(ttl, 0, ttl.length);}
public Xoa_ttl Ttl_parse(byte[] src, int src_bgn, int src_end) {return Xoa_ttl.Parse(app.Utl_amp_mgr(), app.Utl_case_mgr(), xwiki_mgr, ns_mgr, src, src_bgn, src_end);} public Xoa_ttl Ttl_parse(byte[] src, int src_bgn, int src_end) {return Xoa_ttl.Parse(app.Utl_amp_mgr(), app.Utl_case_mgr(), xwiki_mgr, ns_mgr, src, src_bgn, src_end);}

Loading…
Cancel
Save