1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Bldr: Add more implementation for missing origs

This commit is contained in:
gnosygnu
2017-03-04 09:44:22 -05:00
parent ef11577d08
commit 3e39b2fe77
12 changed files with 216 additions and 44 deletions

View File

@@ -19,6 +19,7 @@ import gplx.xowa.addons.bldrs.files.cmds.*;
import gplx.xowa.addons.bldrs.mass_parses.inits.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*; import gplx.xowa.addons.bldrs.mass_parses.makes.*; import gplx.xowa.addons.bldrs.mass_parses.resumes.*;
import gplx.xowa.addons.bldrs.files.cksums.*; import gplx.xowa.addons.bldrs.files.checks.*;
import gplx.xowa.addons.bldrs.app_cfgs.wm_server_cfgs.*;
import gplx.xowa.addons.bldrs.files.missing_origs.*;
public class Xoax_builds_files_addon implements Xoax_addon_itm, Xoax_addon_itm__bldr {
public Xob_cmd[] Bldr_cmds() {
return new Xob_cmd[]
@@ -26,7 +27,7 @@ public class Xoax_builds_files_addon implements Xoax_addon_itm, Xoax_addon_itm__
, Xobldr__lnki_regy__create.Prototype
, Xobldr__page_regy__create.Prototype
, Xobldr__orig_regy__create.Prototype
, Xobldr__orig_regy__find_missing.Prototype
, Xobldr_missing_origs_cmd.Prototype
, Xobldr__xfer_temp__insert_thm.Prototype
, Xobldr__xfer_temp__insert_orig.Prototype
, Xobldr__xfer_regy__create.Prototype

View File

@@ -13,21 +13,21 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.files.cmds; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.files.*;
package gplx.xowa.addons.bldrs.files.missing_origs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.files.*;
import gplx.dbs.*;
import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.wkrs.*;
import gplx.xowa.files.*; import gplx.xowa.files.origs.*; import gplx.xowa.apps.wms.apis.origs.*;
import gplx.xowa.addons.bldrs.files.dbs.*;
public class Xobldr__orig_regy__find_missing extends Xob_cmd__base {
public class Xobldr_missing_origs_cmd extends Xob_cmd__base {
private int fail_max = 100000;
public Xobldr__orig_regy__find_missing(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
public Xobldr_missing_origs_cmd(Xob_bldr bldr, Xowe_wiki wiki) {super(bldr, wiki);}
@Override public void Cmd_run() {
// got orig_tbl
Db_conn conn = Xob_db_file.New__file_make(wiki.Fsys_mgr().Root_dir()).Conn();
Xob_orig_regy_tbl.Create_table(conn);
// get counts; fail if too many
int fail_count = conn.Exec_select_count_as_int("orig_regy", 0);
int fail_count = conn.Exec_sql(Db_sql_.Make_by_fmt(String_.Ary("SELECT Count(lnki_ttl) FROM orig_regy WHERE orig_page_id IS NULL")));
if (fail_count > fail_max) throw Err_.new_wo_type("bldr.find_missing: too many missing: missing=~{0} max=~{1}", fail_count, fail_max);
Gfo_usr_dlg_.Instance.Note_many("", "", "bldr.find_missing: found=~{0}", fail_count);
@@ -70,7 +70,8 @@ public class Xobldr__orig_regy__find_missing extends Xob_cmd__base {
for (int i = 0; i < len; i++) {
Xof_fsdb_itm itm = (Xof_fsdb_itm)list.Get_at(i);
update_stmt
.Val_int("orig_w", itm.Orig_w()).Val_int("orig_h", itm.Orig_h())
.Val_int("orig_w", itm.Orig_w())
.Val_int("orig_h", itm.Orig_h())
.Crt_bry_as_str("lnki_ttl", itm.Lnki_ttl()).Exec_update();
}
conn.Txn_end();
@@ -83,6 +84,6 @@ public class Xobldr__orig_regy__find_missing extends Xob_cmd__base {
public static final String BLDR_CMD_KEY = "file.orig_regy.find_missing";
@Override public String Cmd_key() {return BLDR_CMD_KEY;}
public static final Xob_cmd Prototype = new Xobldr__orig_regy__find_missing(null, null);
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xobldr__orig_regy__find_missing(bldr, wiki);}
public static final Xob_cmd Prototype = new Xobldr_missing_origs_cmd(null, null);
@Override public Xob_cmd Cmd_clone(Xob_bldr bldr, Xowe_wiki wiki) {return new Xobldr_missing_origs_cmd(bldr, wiki);}
}

View File

@@ -0,0 +1,95 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.bldrs.files.missing_origs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.files.*;
import gplx.langs.htmls.encoders.*;
import gplx.langs.jsons.*;
import gplx.xowa.files.repos.*;
import gplx.xowa.files.downloads.*;
import gplx.xowa.apps.wms.apis.origs.*;
public class Xobldr_missing_origs_wmfapi {
// private final Xoapi_orig_base orig_api;
// private final Xof_download_wkr download_wkr;
// private final Xow_repo_mgr repo_mgr;
// private final byte[] wiki_domain;
// private final Xoapi_orig_rslts api_rv = new Xoapi_orig_rslts();
public Xobldr_missing_origs_wmfapi(Xoapi_orig_base orig_api, Xof_download_wkr download_wkr, Xow_repo_mgr repo_mgr, byte[] wiki_domain) {
// this.orig_api = orig_api;
// this.download_wkr = download_wkr;
// this.repo_mgr = repo_mgr;
// this.wiki_domain = wiki_domain;
}
public void Find_by_list(Ordered_hash src, Ordered_hash trg, String api_domain, int idx) {
// fail if web access disabled
if (!gplx.core.ios.IoEngine_system.Web_access_enabled) {
throw Err_.new_wo_type("web access must be enabled for missing_origs cmd");
}
// Json_parser parser = new Json_parser();
Gfo_url_encoder encoder = Gfo_url_encoder_.New__http_url().Make();
Bry_bfr bfr = Bry_bfr_.New();
int len = src.Len();
try {
// loop until all titles found
while (idx < len) {
// generate super api; EX: https://commons.wikimedia.org/w/api.php?action=query&format=xml&prop=imageinfo&iiprop=size|url|mediatype|mime|bitdepth|timestamp|size|sha1&redirects&iilimit=500&titles=
bfr.Add_str_a7("https://");
bfr.Add_str_a7(api_domain);
bfr.Add_str_a7("/w/api.php?action=query");
bfr.Add_str_a7("&format=json"); // json easier to use than xml
bfr.Add_str_a7("&iilimit=1"); // limit to 1 revision history (default will return more); EX:File:Different_Faces_Neptune.jpg
bfr.Add_str_a7("&redirects"); // show redirects
bfr.Add_str_a7("&prop=imageinfo&iiprop=size|url|mediatype|mime|bitdepth|timestamp|size|sha1"); // list of props
bfr.Add_str_a7("&titles=");
// add titles; EX: File:A.png|File:B.png|
for (int i = idx; i < idx + 500; i++) {
Xobldr_missing_origs_item item = (Xobldr_missing_origs_item)src.Get_at(i);
Xoa_ttl ttl = item.Lnki_ttl();
// skip "|" if first
if (i != idx) bfr.Add_byte_pipe();
// make ttl_bry so (a) namespace is present (EX:File:); (b) spaces are present (not underscores)
byte[] ttl_bry = ttl.Full_txt_wo_qarg();
ttl_bry = encoder.Encode(ttl_bry);
bfr.Add(ttl_bry);
}
// call api
// byte[] rslt = download_wkr.Download_xrg().Exec_as_bry(bfr.To_bry_and_clear());
// deserialize
// Json_doc jdoc = parser.Parse(rslt);
// loop over /query/pages
// for each node, deserialize orig info and add to hash by "title"
// loop over /query/redirects
// for each node, retrieve from hash by "to"; add "from" as prop
// loop over hash
// for each item, retrieve from src; copy props over
}
} catch (Exception e) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "missing_origs:failure while calling wmf_api; domain=~{0} idx=~{1} err=~{2}", api_domain, idx, Err_.Message_gplx_log(e));
}
}
}
class Xobldr_missing_origs_item {
private final Xoa_ttl lnki_ttl;
public Xobldr_missing_origs_item(Xoa_ttl lnki_ttl) {
this.lnki_ttl = lnki_ttl;
}
public Xoa_ttl Lnki_ttl() {return lnki_ttl;}
}