1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Page_sync: Sync recently-created pages

This commit is contained in:
gnosygnu 2017-05-07 11:47:53 -04:00
parent 417fd8b990
commit bee71c22fe
11 changed files with 180 additions and 60 deletions

View File

@ -30,8 +30,8 @@ public class Xoa_app_ {
}
}
public static final String Name = "xowa";
public static final int Version_id = 524;
public static final String Version = "4.5.2.1704";
public static final int Version_id = 525;
public static final String Version = "4.5.3.1705";
public static String Build_date = "2012-12-30 00:00:00";
public static String Build_date_fmt = "yyyy-MM-dd HH:mm:ss";
public static String Op_sys_str;

View File

@ -19,10 +19,10 @@ import gplx.xowa.htmls.*;
import gplx.xowa.addons.wikis.pages.syncs.dbs.*;
import gplx.xowa.apps.apis.xowa.addons.bldrs.*;
import gplx.xowa.wikis.data.tbls.*;
public class Xosync_read_mgr implements Gfo_invk {
private boolean auto_enabled = false;
import gplx.xowa.wikis.domains.*;
import gplx.xowa.addons.wikis.pages.syncs.wmapis.*;
public class Xosync_read_mgr implements Gfo_invk {
private int auto_interval = 60 * 24; // in minutes
private final Xowd_page_itm tmp_dbpg = new Xowd_page_itm();
private Db_conn sync_conn; private Xosync_sync_tbl sync_tbl;
private final Xopg_match_mgr auto_page_matcher = new Xopg_match_mgr();
private final Xosync_update_mgr update_mgr = new Xosync_update_mgr();
@ -30,42 +30,95 @@ public class Xosync_read_mgr implements Gfo_invk {
this.Auto_scope_("*:Main_Page");
wiki.App().Cfg().Bind_many_wiki(this, wiki, Cfg__manual__enabled, Cfg__auto__enabled, Cfg__auto__interval, Cfg__auto__scope);
}
public boolean Auto_enabled() {return auto_enabled;} private boolean auto_enabled = false;
public boolean Manual_enabled() {return manual_enabled;} private boolean manual_enabled;
public boolean Auto_update(Xow_wiki wiki, Xoa_page page, Xoa_ttl page_ttl) {
if (wiki.Domain_itm().Domain_type_id() == gplx.xowa.wikis.domains.Xow_domain_tid_.Tid__home) return false;
if (wiki.Domain_itm().Domain_type_id() == gplx.xowa.wikis.domains.Xow_domain_tid_.Tid__other) return false;
if (page_ttl.Ns().Id_is_special()) return false;
public Xoa_ttl Auto_update(Xow_wiki wiki, Xoa_page page, Xoa_ttl page_ttl) {
// skip if not enabled
if (!auto_enabled) return null;
if (!auto_enabled) return false;
if (!auto_page_matcher.Match(wiki, page_ttl.Full_db())) return false;
// skip if home or other
if (Int_.In
( wiki.Domain_itm().Domain_type_id()
, Xow_domain_tid_.Tid__home
, Xow_domain_tid_.Tid__other))
return null;
wiki.Data__core_mgr().Db__core().Tbl__page().Select_by_ttl(tmp_dbpg, page_ttl.Ns(), page_ttl.Page_db());
// init some vars
Xoa_ttl rv = null;
int prv_page_id = -1;
Hash_adp pages = Hash_adp_.New();
if (sync_conn == null) {
Io_url sync_db_url = wiki.Fsys_mgr().Root_dir().GenSubFil(wiki.Domain_str() + "-sync.xowa");
Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: loading database for page_sync_data; url=~{0}", sync_db_url.Raw());
sync_conn = Db_conn_bldr.Instance.Get_or_autocreate(true, sync_db_url);
sync_tbl = new Xosync_sync_tbl(sync_conn);
sync_conn.Meta_tbl_assert(sync_tbl);
// loop to handle redirects
while (true) {
// exit early...
if (pages.Has(page_ttl.Full_db()) // ... if circular redirect; EX: A -> B -> A
|| pages.Count() == 3) { // ... or too many redirects EX: A -> B -> C -> D
return rv;
}
// else, add to list of pages
else {
pages.Add_as_key_and_val(page_ttl.Full_db());
}
// skip if special
if (page_ttl.Ns().Id_is_special()) return rv;
// skip if it doesn't match criteria in Options
if (!auto_page_matcher.Match(wiki, page_ttl.Full_db())) return rv;
// get page data based on id
Xowd_page_itm tmp_dbpg = new Xowd_page_itm();
wiki.Data__core_mgr().Db__core().Tbl__page().Select_by_ttl(tmp_dbpg, page_ttl.Ns(), page_ttl.Page_db());
// get sync conn
if (sync_conn == null) {
Io_url sync_db_url = wiki.Fsys_mgr().Root_dir().GenSubFil(wiki.Domain_str() + "-sync.xowa");
Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: loading database for page_sync_data; url=~{0}", sync_db_url.Raw());
sync_conn = Db_conn_bldr.Instance.Get_or_autocreate(true, sync_db_url);
sync_tbl = new Xosync_sync_tbl(sync_conn);
sync_conn.Meta_tbl_assert(sync_tbl);
}
// get sync_date and check if sync needed
DateAdp sync_date = sync_tbl.Select_sync_date_or_min(tmp_dbpg.Id());
if (Datetime_now.Get().Diff(sync_date).Total_mins().To_int() <= auto_interval) {
Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: skipping auto-sync for page; wiki=~{0} page=~{1} sync_date=~{2}", wiki.Domain_bry(), page_ttl.Full_db(), sync_date.XtoStr_fmt_yyyy_MM_dd_HH_mm_ss());
return rv;
}
else {
Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: running auto-sync for page; wiki=~{0} page=~{1} sync_date=~{2}", wiki.Domain_bry(), page_ttl.Full_db(), sync_date.XtoStr_fmt_yyyy_MM_dd_HH_mm_ss());
}
// auto-sync page
Xoa_app app = wiki.App();
Xoh_page hpg = new Xoh_page();
update_mgr.Init_by_app(app);
update_mgr.Init_by_page(wiki, hpg);
Xowm_parse_data parse_data = update_mgr.Update(app.Wmf_mgr().Download_wkr(), wiki, page_ttl);
if (parse_data == null)
return rv;
// insert into sync_db
Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: updating sync table; page=~{0}", page_ttl.Full_db());
sync_tbl.Upsert(parse_data.Page_id(), Datetime_now.Get());
// redirect occurred; EX: A -> B will have A,B in pages
if (pages.Count() > 1) {
wiki.Data__core_mgr().Tbl__page().Update__redirect(parse_data.Page_id(), prv_page_id);
}
// NOTE: set rv to last good page; EX: A -> B; A exists but B doesn't; show A, not B; DATE:2017-05-07
rv = page_ttl;
// no redirects
if (parse_data.Redirect_to_ttl == null)
return rv;
// redirect occured;
else {
prv_page_id = parse_data.Page_id();
page_ttl = wiki.Ttl_parse(parse_data.Redirect_to_ttl);
}
}
DateAdp sync_date = sync_tbl.Select_sync_date_or_min(tmp_dbpg.Id());
if (Datetime_now.Get().Diff(sync_date).Total_mins().To_int() <= auto_interval) {
Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: skipping auto-sync for page; wiki=~{0} page=~{1} sync_date=~{2}", wiki.Domain_bry(), page_ttl.Full_db(), sync_date.XtoStr_fmt_yyyy_MM_dd_HH_mm_ss());
return false;
}
else {
Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: running auto-sync for page; wiki=~{0} page=~{1} sync_date=~{2}", wiki.Domain_bry(), page_ttl.Full_db(), sync_date.XtoStr_fmt_yyyy_MM_dd_HH_mm_ss());
}
Xoa_app app = wiki.App();
Xoh_page hpg = new Xoh_page();
update_mgr.Init_by_app(app);
update_mgr.Init_by_page(wiki, hpg);
update_mgr.Update(app.Wmf_mgr().Download_wkr(), wiki, page_ttl);
Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: updating sync table; page=~{0}", page_ttl.Full_db());
sync_tbl.Upsert(tmp_dbpg.Id(), Datetime_now.Get());
return true;
}
private void Auto_scope_(String v) {
auto_page_matcher.Set(v);

View File

@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.pages.syncs.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*;
import gplx.xowa.files.downloads.*;
import gplx.xowa.wikis.data.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.htmls.*; import gplx.langs.htmls.docs.*;
import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.txts.*; import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.htmls.core.dbs.*;
@ -39,21 +39,43 @@ public class Xosync_update_mgr {
hctx.Init_by_page(wiki, page);
page.Hdump_mgr().Clear();
}
public void Update(Xof_download_wkr download_wkr, Xow_wiki wiki, Xoa_ttl page_ttl) {
public Xowm_parse_data Update(Xof_download_wkr download_wkr, Xow_wiki wiki, Xoa_ttl page_ttl) {
Xoh_page hpg = (Xoh_page)hctx.Page();
// call wmf api
Xowm_parse_wmf parse_wkr = new Xowm_parse_wmf();
Xowm_parse_data data = parse_wkr.Get_parse_or_null(download_wkr, wiki, page_ttl);
if (data == null) return;
if (data == null) return null;
// parse
// parse html to fix images
Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: parsing page; page=~{0}", page_ttl.Full_db());
Parse(hpg, wiki, hctx.Page__url(), data.Revn_html());
// get existing html_tbl
Xow_db_file html_db = html_tbl_mgr.Get_html_db(wiki);
// init some vars
byte[] html_bry = hpg.Db().Html().Html_bry();
Xow_db_file html_db = html_tbl_mgr.Get_html_db(wiki);
Xow_db_file core_db = wiki.Data__core_mgr().Db__core();
Xowd_page_tbl page_tbl = core_db.Tbl__page();
// create entry in page_tbl if it does not exist; DATE:2017-05-06
Xowd_page_itm page_itm = new Xowd_page_itm();
if (!page_tbl.Select_by_id(page_itm, data.Page_id())) {
// update random
int ns_id = page_ttl.Ns().Id();
int next_random_id = core_db.Tbl__ns().Select_ns_count(ns_id) + 1;
core_db.Tbl__ns().Update_ns_count(ns_id, next_random_id);
// insert into page_tbl
page_tbl.Insert_bgn();
try {
page_tbl.Insert_cmd_by_batch(data.Page_id(), ns_id, page_ttl.Page_db(), false, Datetime_now.Get()
, html_bry.length, next_random_id, -1, html_db.Id(), -1);
} finally {
page_tbl.Insert_end();
}
}
// save html
Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: saving html; page=~{0} html_len=~{1}", page_ttl.Full_db(), Bry_.Len(html_bry));
html_tbl_mgr.Save_html(wiki, html_db, data.Page_id(), data.Revn_id(), html_bry);
@ -67,6 +89,8 @@ public class Xosync_update_mgr {
gplx.core.threads.Gfo_thread_pool thread_pool = new gplx.core.threads.Gfo_thread_pool();
thread_pool.Add_at_end(file_thread);
thread_pool.Run();
return data;
}
public void Parse(Xoh_page hpg, Xow_wiki wiki, byte[] page_url, byte[] src) {
int src_len = src.length;

View File

@ -16,12 +16,11 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.xowa.addons.wikis.pages.syncs.wmapis; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*;
public class Xowm_parse_data {
public Xowm_parse_data(byte[] wiki_domain
, int page_id, byte[] page_ttl, byte[] redirect_src
, int page_id, byte[] page_ttl
, int revn_id, byte[] revn_html) {
this.wiki_domain = wiki_domain;
this.page_id = page_id;
this.page_ttl = page_ttl;
this.redirect_src = redirect_src;
this.revn_id = revn_id;
this.revn_html = revn_html;
}
@ -29,8 +28,9 @@ public class Xowm_parse_data {
public int Page_id() {return page_id;} private final int page_id;
public byte[] Page_ttl() {return page_ttl;} private final byte[] page_ttl;
public byte[] Redirect_src() {return redirect_src;} private final byte[] redirect_src;
public int Revn_id() {return revn_id;} private final int revn_id;
public byte[] Revn_html() {return revn_html;} private final byte[] revn_html;
public byte[] Redirect_to_ttl = null;
}

View File

@ -24,19 +24,27 @@ public class Xowm_parse_wmf {
if (!gplx.core.ios.IoEngine_system.Web_access_enabled) return null;
byte[] wiki_domain = wiki.Domain_bry();
byte[] page_full_db = page_ttl.Full_db();
byte[] json = Download(bfr, download_wkr, wiki_domain, page_full_db);
return Parse(json_parser, wiki_domain, page_full_db, json);
// get core json
byte[] core_json = Core_download(bfr, download_wkr, wiki_domain, page_full_db);
Xowm_parse_data rv = Core_parse(json_parser, wiki_domain, page_full_db, core_json);
// get redirect json
byte[] redirect_json = Redirect_download(bfr, download_wkr, wiki_domain, page_full_db);
Redirect_parse(rv, json_parser, wiki_domain, page_full_db, redirect_json);
return rv;
}
private static byte[] Download(Bry_bfr bfr, Xof_download_wkr download_wkr, byte[] wiki_domain, byte[] page_full_db) {
private static byte[] Core_download(Bry_bfr bfr, Xof_download_wkr download_wkr, byte[] wiki_domain, byte[] page_full_db) {
// build url; EX: "https://en.wikipedia.org/w/api.php?action=parse&format=json&redirects=1&page=Wikipedia:Main%20Page"
Xowm_api_bldr.Bld_bgn(bfr, wiki_domain);
bfr.Add_str_a7("action=parse&format=json&redirects=1&page="); // NOTE:redirects=1 needed to resolve redirects
bfr.Add_str_a7("action=parse&format=json&page="); // NOTE:do not add redirects=1; want to get "actual" html, not "redirected" html; DATE:2017-05-06
bfr.Add(page_full_db);
// download
return download_wkr.Download_xrg().Exec_as_bry(bfr.To_str_and_clear());
}
private static Xowm_parse_data Parse(Json_parser json_parser, byte[] wiki_domain, byte[] page_full_db, byte[] json) {
private static Xowm_parse_data Core_parse(Json_parser json_parser, byte[] wiki_domain, byte[] page_full_db, byte[] json) {
Json_doc jdoc = json_parser.Parse(json);
// get data
@ -45,9 +53,31 @@ public class Xowm_parse_wmf {
int page_id = parse_nde.Get_as_int("pageid");
int revn_id = parse_nde.Get_as_int("revid");
byte[] page_ttl = Xoa_ttl.Replace_spaces(parse_nde.Get_as_bry("title"));
byte[] redirect_src = Bry_.Eq(page_ttl, page_full_db) ? null : page_full_db;
byte[] revn_html = parse_nde.Get_as_nde("text").Get_as_bry("*");
return new Xowm_parse_data(wiki_domain, page_id, page_ttl, redirect_src, revn_id, revn_html);
return new Xowm_parse_data(wiki_domain, page_id, page_ttl, revn_id, revn_html);
}
private static byte[] Redirect_download(Bry_bfr bfr, Xof_download_wkr download_wkr, byte[] wiki_domain, byte[] page_full_db) {
// build url; EX: "https://en.wikipedia.org/w/api.php?action=parse&format=json&redirects=1&page=EARTH&prop="
Xowm_api_bldr.Bld_bgn(bfr, wiki_domain);
bfr.Add_str_a7("action=parse&format=json&redirects=1&prop=&page="); // NOTE:"prop=" will ignore all data except for redirect data
bfr.Add(page_full_db);
// download
return download_wkr.Download_xrg().Exec_as_bry(bfr.To_str_and_clear());
}
private static void Redirect_parse(Xowm_parse_data rv, Json_parser json_parser, byte[] wiki_domain, byte[] page_full_db, byte[] json) {
Json_doc jdoc = json_parser.Parse(json);
// get data; "parse { redirects [{from, to}] }"
Json_nde parse_nde = jdoc.Root_nde().Get_as_nde("parse");
if (parse_nde == null) return; // handle pages that don't exist such as s.w:File:AnyFile.png; DATE:2016-11-15
Json_ary redirects_nde = parse_nde.Get_as_ary("redirects");
if (redirects_nde == null || redirects_nde.Len() == 0) return;
Json_nde redirect_nde = redirects_nde.Get_as_nde(0);
byte[] redirect_to_bry = redirect_nde.Get_as_bry("to");
if (redirect_to_bry != null)
rv.Redirect_to_ttl = redirect_to_bry;
}
}

View File

@ -155,7 +155,8 @@ public class Xog_tab_itm implements Gfo_invk {
wiki.Parser_mgr().Ctx().Page_(page);
if ( page.Db().Page().Exists_n()
&& !page.Commons_mgr().Xowa_mockup()) { // do not enter "missing" section if File_mockup; EX:en.wikipedia.org/wiki/File:Protoplanetary-disk.jpg DATE:2016-11-13
if (wiki.Db_mgr().Save_mgr().Create_enabled()) {
if (wiki.Db_mgr().Save_mgr().Create_enabled()
|| wiki.Page_mgr().Sync_mgr().Auto_enabled()) {
page = Xoae_page.New_edit(wiki, ttl);
view_mode = Xopg_page_.Tid_edit;
history_mgr.Add(page); // NOTE: must put new_page on stack so that pressing back will pop new_page, not previous page

View File

@ -369,6 +369,15 @@ public class Xowd_page_tbl implements Db_tbl {
.Exec_update()
;
}
public void Update__redirect(int redirect_to_id, int page_id) {
conn.Stmt_update(tbl_name, String_.Ary(fld_id), fld_is_redirect, fld_redirect_id)
.Val_int(fld_is_redirect, Bool_.Y_int)
.Val_int(fld_redirect_id, redirect_to_id)
.Crt_int(fld_id, page_id)
.Exec_update()
;
}
public void Delete(int page_id) {
Gfo_usr_dlg_.Instance.Log_many("", "", "db.page: delete started: page_id=~{0}", page_id);
conn.Stmt_delete(tbl_name, fld_id).Crt_int(fld_id, page_id).Exec_delete();

View File

@ -57,6 +57,9 @@ public class Xodb_load_mgr_sql implements Xodb_load_mgr {
db_mgr.Core_data_mgr().Tbl__page().Select_in__ns_ttl(cancelable, rv, db_mgr.Wiki().Ns_mgr(), fill_idx_fields_only, bgn, end);
}
public void Load_page(Xowd_page_itm rv, Xow_ns ns) {
if (rv.Text_db_id() == -1) return; // NOTE: page_sync will create pages with -1 text_db_id; DATE:2017-05-06
// get text
Xowd_text_tbl text_tbl = db_mgr.Core_data_mgr().Dbs__get_by_id_or_fail(rv.Text_db_id()).Tbl__text();
byte[] text_bry = text_tbl.Select(rv.Id());
rv.Text_(text_bry);

View File

@ -20,11 +20,10 @@ import gplx.xowa.addons.wikis.pages.syncs.core.*;
import gplx.xowa.wikis.data.tbls.*;
public class Xowe_page_mgr {
private final Xowe_wiki wiki;
private final Xosync_read_mgr read_mgr = new Xosync_read_mgr();
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
private final Gfo_qarg_mgr tmp_qarg_mgr = new Gfo_qarg_mgr();
public Xowe_page_mgr(Xowe_wiki wiki) {this.wiki = wiki;}
public Xosync_read_mgr Sync_mgr() {return read_mgr;}
public Xosync_read_mgr Sync_mgr() {return read_mgr;} private final Xosync_read_mgr read_mgr = new Xosync_read_mgr();
public void Init_by_wiki(Xowe_wiki wiki) {
read_mgr.Init_by_wiki(wiki);
}
@ -64,10 +63,13 @@ public class Xowe_page_mgr {
Wait_for_popups(wiki.App());
// auto-update
if (read_mgr.Auto_update(wiki, page, ttl)) {
// page-sync occurred; reload metadata, especially to pick up Html_db_id; DATE:2017-03-13
Xoa_ttl redirect_ttl = read_mgr.Auto_update(wiki, page, ttl);
if (redirect_ttl != null) {
// page-sync occurred; update ttl to handle any redirection; DATE:2017-05-07
ttl = redirect_ttl;
// reload metadata, needed to pick up Html_db_id; DATE:2017-03-13
page = wiki.Data_mgr().Load_page_and_parse(url, ttl, wiki.Lang(), tab, false);
ttl = page.Ttl(); // note that Load_page_and_parse can redirect ttl; EX: Special:Random -> A; DATE:2017-01-05
}
// load from html_db

View File

@ -51,7 +51,6 @@ class XomwMediaWikiTitleCodecFxt {
public XomwLanguage Make_lang() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
Xol_lang_itm lang = new Xol_lang_itm(app.Lang_mgr(), Xol_lang_itm_.Key_en);
XomwEnv env = new XomwEnv(lang);
return new XomwLanguage(lang);
}
public void Test_splitTitleString(XomwMediaWikiTitleCodec codec, String src, XomwMediaWikiTitleCodecParts expd) {

View File

@ -118,7 +118,6 @@ class XomwLanguageFxt {
public XomwLanguageFxt() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
Xol_lang_itm xoLang = new Xol_lang_itm(app.Lang_mgr(), Bry_.new_a7("en"));
XomwEnv env = new XomwEnv(xoLang);
this.lang = new XomwLanguage(xoLang);
}
public void Init_digitGroupingPattern(String digitGroupingPattern) {