mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Fsdb.check: Add new command to check for missing images
This commit is contained in:
parent
9a5c70b506
commit
df45f141ca
@ -19,7 +19,7 @@ package gplx.xowa.addons.bldrs.files; import gplx.*; import gplx.xowa.*; import
|
||||
import gplx.xowa.bldrs.wkrs.*;
|
||||
import gplx.xowa.addons.bldrs.files.cmds.*;
|
||||
import gplx.xowa.addons.bldrs.mass_parses.inits.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*; import gplx.xowa.addons.bldrs.mass_parses.makes.*; import gplx.xowa.addons.bldrs.mass_parses.resumes.*;
|
||||
import gplx.xowa.addons.bldrs.files.cksums.*;
|
||||
import gplx.xowa.addons.bldrs.files.cksums.*; import gplx.xowa.addons.bldrs.files.checks.*;
|
||||
import gplx.xowa.addons.bldrs.app_cfgs.wm_server_cfgs.*;
|
||||
public class Xoax_builds_files_addon implements Xoax_addon_itm, Xoax_addon_itm__bldr {
|
||||
public Xob_cmd[] Bldr_cmds() {
|
||||
@ -48,6 +48,7 @@ public class Xoax_builds_files_addon implements Xoax_addon_itm, Xoax_addon_itm__
|
||||
, Xomp_make_cmd.Prototype
|
||||
, Xomp_resume_cmd.Prototype
|
||||
, Xocksum_calc_cmd.Prototype
|
||||
, Xocheck_cmd.Prototype
|
||||
|
||||
, Xowm_server_cfg_cmd.Prototype
|
||||
};
|
||||
|
@ -0,0 +1,30 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.files.checks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.files.*;
|
||||
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
|
||||
public class Xocheck_cmd extends Xob_cmd__base { // checks fsdb; needed for en.w and multiple monthly updates
|
||||
public Xocheck_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
|
||||
@Override public void Cmd_run() {
|
||||
wiki.Init_assert();
|
||||
new Xocheck_mgr().Exec(wiki);
|
||||
}
|
||||
|
||||
@Override public String Cmd_key() {return BLDR_CMD_KEY;} private static final String BLDR_CMD_KEY = "fsdb.check";
|
||||
public static final Xob_cmd Prototype = new Xocheck_cmd(null, null);
|
||||
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xocheck_cmd(bldr, wiki);}
|
||||
}
|
@ -0,0 +1,89 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.addons.bldrs.files.checks; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.files.*;
|
||||
import gplx.core.ios.streams.*;
|
||||
import gplx.dbs.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.files.repos.*; import gplx.xowa.files.origs.*;
|
||||
import gplx.xowa.addons.bldrs.wmdumps.imglinks.*;
|
||||
import gplx.xowa.htmls.*;
|
||||
// TODO.XO:cache files in memory, else commonly used files (Wiki.png) will be loaded from fsdb for every usage on page
|
||||
// TODO.XO:save results to db to verify unused images (images in fsdb, but not loaded during this code)
|
||||
class Xocheck_mgr {
|
||||
private final Xof_url_bldr url_bldr = Xof_url_bldr.new_v2(); private final Xof_img_size img_size = new Xof_img_size();
|
||||
private Xowe_wiki wiki;
|
||||
public void Exec(Xowe_wiki wiki) {
|
||||
// init
|
||||
this.wiki = wiki;
|
||||
wiki.File__bin_mgr().Wkrs__del(gplx.xowa.files.bins.Xof_bin_wkr_.Key_http_wmf); // must happen after init_file_mgr_by_load; remove wmf wkr, else will try to download images during parsing
|
||||
wiki.File_mgr().Fsdb_mode().Tid__v2__mp__y_();
|
||||
wiki.App().Cfg().Set_bool_app("xowa.app.web.enabled", false); // never enable inet; rely solely on local dbs;
|
||||
|
||||
// select list of pages
|
||||
Xoh_page hpg = new Xoh_page();
|
||||
Xowd_page_tbl page_tbl = wiki.Data__core_mgr().Db__core().Tbl__page();
|
||||
Db_rdr rdr = page_tbl.Conn().Stmt_sql("SELECT page_id, page_namespace, page_title, page_html_db_id FROM page WHERE page_html_db_id != -1;").Exec_select__rls_auto();
|
||||
int page_count = 0, file_count = 0;
|
||||
|
||||
// loop over each page
|
||||
while (rdr.Move_next()) {
|
||||
// init page meta
|
||||
Xoa_ttl page_ttl = wiki.Ttl_parse(rdr.Read_int("page_namespace"), rdr.Read_bry_by_str("page_title"));
|
||||
Xoa_url page_url = Xoa_url.New(wiki, page_ttl);
|
||||
Xow_db_file html_db = wiki.Data__core_mgr().Dbs__get_by_id_or_fail(rdr.Read_int("page_html_db_id"));
|
||||
int page_id = rdr.Read_int("page_id");
|
||||
|
||||
// load html
|
||||
hpg.Ctor_by_hview(wiki, page_url, page_ttl, page_id);
|
||||
if (!html_db.Tbl__html().Select_by_page(hpg)) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "could not load html for page; page_id=~{0}", page_id);
|
||||
continue;
|
||||
}
|
||||
wiki.Html__hdump_mgr().Load_mgr().Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
|
||||
|
||||
// load images
|
||||
int imgs_len = hpg.Img_mgr().Len();
|
||||
for (int i = 0; i < imgs_len; i++) {
|
||||
Xof_fsdb_itm fsdb = hpg.Img_mgr().Get_at(i);
|
||||
try {Check_images(page_ttl, fsdb);}
|
||||
catch (Exception e) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "file failed; page_ttl=~{0} img_name=~{1} err=~{2}", page_ttl.Page_db(), fsdb.Lnki_ttl(), Err_.Message_gplx_log(e));
|
||||
}
|
||||
file_count++;
|
||||
}
|
||||
|
||||
// prog
|
||||
page_count++;
|
||||
if ((page_count % 10000) == 0) {
|
||||
Gfo_usr_dlg_.Instance.Prog_many("", "", "checking pages; pages=~{0} files=~{1}", page_count, file_count);
|
||||
}
|
||||
}
|
||||
}
|
||||
private void Check_images(Xoa_ttl page_ttl, Xof_fsdb_itm fsdb) {
|
||||
// get orig
|
||||
Xof_orig_itm orig = wiki.File__orig_mgr().Find_by_ttl_or_null(fsdb.Lnki_ttl());
|
||||
if (orig == null) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "file missing; page_ttl=~{0} img_name=~{1}", page_ttl.Page_db(), fsdb.Lnki_ttl());
|
||||
return;
|
||||
}
|
||||
Xof_file_wkr.Eval_orig(orig, fsdb, url_bldr, wiki.File__repo_mgr(), img_size);
|
||||
|
||||
Io_stream_rdr img_rdr = wiki.File__bin_mgr().Find_as_rdr(Xof_exec_tid.Tid_wiki_page, fsdb);
|
||||
img_rdr.Rls();
|
||||
}
|
||||
}
|
@ -38,7 +38,7 @@ public class Xod_page_mgr {
|
||||
Xoh_page hpg = new Xoh_page();
|
||||
hpg.Ctor_by_hview(wiki, Xoa_url.New(wiki, ttl), ttl, 1);
|
||||
rv.Init_by_hpg(hpg);
|
||||
wiki.Html__hdump_mgr().Load_mgr().Load(hpg, ttl);
|
||||
wiki.Html__hdump_mgr().Load_mgr().Load_by_xowh(hpg, ttl, Bool_.Y);
|
||||
Load_sections(rv, hpg);
|
||||
return rv;
|
||||
}
|
||||
|
@ -39,12 +39,12 @@ public class Xow_hdump_mgr__load implements Gfo_invk {
|
||||
}
|
||||
public void Load_by_xowe(Xoae_page wpg) {
|
||||
tmp_hpg.Ctor_by_hview(wpg.Wiki(), wpg.Url(), wpg.Ttl(), wpg.Db().Page().Id());
|
||||
Load(tmp_hpg, wpg.Ttl());
|
||||
Load_by_xowh(tmp_hpg, wpg.Ttl(), Bool_.Y);
|
||||
wpg.Db().Html().Html_bry_(tmp_hpg.Db().Html().Html_bry());
|
||||
wpg.Root_(new gplx.xowa.parsers.Xop_root_tkn()); // HACK: set root, else load page will fail
|
||||
Fill_page(wpg, tmp_hpg);
|
||||
}
|
||||
public boolean Load(Xoh_page hpg, Xoa_ttl ttl) {
|
||||
public boolean Load_by_xowh(Xoh_page hpg, Xoa_ttl ttl, boolean load_ctg) {
|
||||
synchronized (tmp_dbpg) {
|
||||
if (override_mgr__page == null) {
|
||||
Io_url override_root_url = wiki.Fsys_mgr().Root_dir().GenSubDir_nest("data", "wiki");
|
||||
@ -64,11 +64,13 @@ public class Xow_hdump_mgr__load implements Gfo_invk {
|
||||
byte[] src = Parse(hpg, hpg.Db().Html().Zip_tid(), hpg.Db().Html().Hzip_tid(), hpg.Db().Html().Html_bry());
|
||||
|
||||
// write ctgs
|
||||
Xoctg_pagebox_itm[] pagebox_itms = wiki.Ctg__pagebox_wtr().Get_catlinks_by_page(wiki, hpg);
|
||||
if (pagebox_itms.length > 0) {
|
||||
tmp_bfr.Add(src);
|
||||
wiki.Ctg__pagebox_wtr().Write_pagebox(tmp_bfr, wiki, hpg, pagebox_itms);
|
||||
src = tmp_bfr.To_bry_and_clear();
|
||||
if (load_ctg) {
|
||||
Xoctg_pagebox_itm[] pagebox_itms = wiki.Ctg__pagebox_wtr().Get_catlinks_by_page(wiki, hpg);
|
||||
if (pagebox_itms.length > 0) {
|
||||
tmp_bfr.Add(src);
|
||||
wiki.Ctg__pagebox_wtr().Write_pagebox(tmp_bfr, wiki, hpg, pagebox_itms);
|
||||
src = tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
|
||||
hpg.Db().Html().Html_bry_(src);
|
||||
@ -76,7 +78,7 @@ public class Xow_hdump_mgr__load implements Gfo_invk {
|
||||
}
|
||||
}
|
||||
public byte[] Decode_as_bry(Bry_bfr bfr, Xoh_page hpg, byte[] src, boolean mode_is_diff) {hzip_mgr.Hctx().Mode_is_diff_(mode_is_diff); hzip_mgr.Decode(bfr, wiki, hpg, src); return bfr.To_bry_and_clear();}
|
||||
private byte[] Parse(Xoh_page hpg, int zip_tid, int hzip_tid, byte[] src) {
|
||||
public byte[] Parse(Xoh_page hpg, int zip_tid, int hzip_tid, byte[] src) {
|
||||
if (zip_tid > gplx.core.ios.streams.Io_stream_tid_.Tid__raw)
|
||||
src = zip_mgr.Unzip((byte)zip_tid, src);
|
||||
switch (hzip_tid) {
|
||||
@ -84,7 +86,8 @@ public class Xow_hdump_mgr__load implements Gfo_invk {
|
||||
src = make_mgr.Parse(src, hpg, hpg.Wiki());
|
||||
break;
|
||||
case Xoh_hzip_dict_.Hzip__v1:
|
||||
src = override_mgr__html.Get_or_same(hpg.Ttl().Page_db(), src);
|
||||
if (override_mgr__html != null) // null when Parse is called directly
|
||||
src = override_mgr__html.Get_or_same(hpg.Ttl().Page_db(), src);
|
||||
hpg.Section_mgr().Add(0, 2, Bry_.Empty, Bry_.Empty).Content_bgn_(0); // +1 to skip \n
|
||||
src = Decode_as_bry(tmp_bfr.Clear(), hpg, src, Bool_.N);
|
||||
hpg.Section_mgr().Set_content(hpg.Section_mgr().Len() - 1, src, src.length);
|
||||
|
@ -137,7 +137,7 @@ public class Xol_msg_mgr_ {
|
||||
Xoh_page hpg = new Xoh_page();
|
||||
pg = hpg;
|
||||
hpg.Ctor_by_hview(wiki, Xoa_url.New(wiki, ttl), ttl, -1);
|
||||
wiki.Html__hdump_mgr().Load_mgr().Load(hpg, ttl);
|
||||
wiki.Html__hdump_mgr().Load_mgr().Load_by_xowh(hpg, ttl, Bool_.N);
|
||||
pg.Db().Text().Text_bry_(pg.Db().Html().Html_bry());
|
||||
}
|
||||
return pg.Db().Page().Exists() ? pg.Db().Text().Text_bry() : null;
|
||||
|
@ -129,7 +129,7 @@ public class Xowv_wiki implements Xow_wiki, Xow_ttl_parser, Gfo_invk {
|
||||
if (ttl.Ns().Id_is_special())
|
||||
special_mgr.Get_by_ttl(rv, url, ttl);
|
||||
else
|
||||
html__hdump_mgr.Load_mgr().Load(rv, ttl);
|
||||
html__hdump_mgr.Load_mgr().Load_by_xowh(rv, ttl, Bool_.Y);
|
||||
}
|
||||
public Xoa_ttl Ttl_parse(byte[] ttl) {return Ttl_parse(ttl, 0, ttl.length);}
|
||||
public Xoa_ttl Ttl_parse(byte[] src, int src_bgn, int src_end) {return Xoa_ttl.Parse(app.Utl_amp_mgr(), app.Utl_case_mgr(), xwiki_mgr, ns_mgr, src, src_bgn, src_end);}
|
||||
|
Loading…
Reference in New Issue
Block a user