mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
HTML Databases: Make plain-text parser thread-safe; Simplify code; Fix gallery not working [#320]
This commit is contained in:
parent
2b4320b302
commit
42d15b726c
@ -16,7 +16,6 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Gfh_doc_parser {
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private final Gfh_txt_wkr txt_wkr;
|
||||
public Gfh_doc_parser(Gfh_txt_wkr txt_wkr, Gfh_doc_wkr... wkr_ary) {
|
||||
@ -27,6 +26,7 @@ public class Gfh_doc_parser {
|
||||
public void Parse(byte[] page_url, byte[] src, int src_bgn, int src_end) {
|
||||
int txt_bgn = -1;
|
||||
int pos = src_bgn;
|
||||
Btrie_rv trv = new Btrie_rv();
|
||||
while (pos < src_end) {
|
||||
Object o = trie.Match_at(trv, src, pos, src_end);
|
||||
if (o == null) { // not a known hook; add to txt
|
||||
|
@ -26,10 +26,11 @@ public class Gfh_tag_rdr {
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
public Bry_err_wkr Err_wkr() {return err_wkr;} private final Bry_err_wkr err_wkr = new Bry_err_wkr();
|
||||
public Gfh_tag_rdr Reg(String tag_name, int tag_id) {name_hash.Add_str_int(tag_name, tag_id); return this;}
|
||||
public void Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) {
|
||||
public Gfh_tag_rdr Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) {
|
||||
this.src = src; this.pos = src_bgn; this.src_end = src_end;
|
||||
tag__eos.Init(this, src, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Gfh_tag_.Id__eos, Bry_.Empty);
|
||||
err_wkr.Init_by_page(String_.new_u8(ctx_name), src);
|
||||
return this;
|
||||
}
|
||||
public void Src_rng_(int src_bgn, int src_end) {
|
||||
this.pos = src_bgn; this.src_end = src_end;
|
||||
|
@ -91,8 +91,7 @@ public class Xosync_read_mgr implements Gfo_invk {
|
||||
|
||||
// auto-sync page
|
||||
Xoa_app app = wiki.App();
|
||||
update_mgr.Init_by_app(app);
|
||||
Xowm_parse_data parse_data = update_mgr.Update(app.Wmf_mgr().Download_wkr(), wiki, page_ttl);
|
||||
Xowm_parse_data parse_data = update_mgr.Update(app.Wmf_mgr().Download_wkr(), wiki, page.Url_bry_safe(), page_ttl);
|
||||
if (parse_data == null)
|
||||
return rv;
|
||||
|
||||
|
@ -15,28 +15,15 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.pages.syncs.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*;
|
||||
import gplx.xowa.files.downloads.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.wikis.domains.*;
|
||||
import gplx.xowa.htmls.*; import gplx.langs.htmls.docs.*;
|
||||
import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.txts.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
import gplx.xowa.htmls.core.dbs.*;
|
||||
import gplx.xowa.addons.wikis.pages.syncs.wmapis.*;
|
||||
import gplx.xowa.addons.wikis.pages.syncs.core.parsers.*;
|
||||
public class Xosync_update_mgr {
|
||||
private final Xoh_hzip_bfr bfr = new Xoh_hzip_bfr(Io_mgr.Len_kb, Bool_.N, Byte_.Max_value_127);
|
||||
private final Gfh_doc_parser hdoc_parser_mgr;
|
||||
private final Xoh_hdoc_ctx hctx = new Xoh_hdoc_ctx();
|
||||
private final Xosync_hdoc_wtr hdoc_bldr = new Xosync_hdoc_wtr();
|
||||
private final Xosync_hdoc_parser hdoc_parser_wkr;
|
||||
private final Xowd_html_tbl_mgr html_tbl_mgr = new Xowd_html_tbl_mgr();
|
||||
public Xosync_update_mgr() {
|
||||
hdoc_parser_wkr = new Xosync_hdoc_parser(hdoc_bldr);
|
||||
hdoc_parser_mgr = new Gfh_doc_parser(new Xoh_txt_parser(hdoc_bldr), hdoc_parser_wkr);
|
||||
}
|
||||
public void Init_by_app(Xoa_app app) {
|
||||
hctx.Init_by_app(app);
|
||||
}
|
||||
public Xowm_parse_data Update(Xof_download_wkr download_wkr, Xow_wiki wiki, Xoa_ttl page_ttl) {
|
||||
Xoh_page hpg = (Xoh_page)hctx.Page();
|
||||
public Xowm_parse_data Update(Xof_download_wkr download_wkr, Xow_wiki wiki, byte[] page_url, Xoa_ttl page_ttl) {
|
||||
Xoh_page hpg = new Xoh_page();
|
||||
|
||||
// call wmf api
|
||||
Xowm_parse_wmf parse_wkr = new Xowm_parse_wmf();
|
||||
@ -45,10 +32,11 @@ public class Xosync_update_mgr {
|
||||
|
||||
// parse html to fix images
|
||||
Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: parsing page; page=~{0}", page_ttl.Full_db());
|
||||
Parse(hpg, wiki, hctx.Page__url(), data.Revn_html());
|
||||
Xosync_hdoc_parser hdoc_parser_wkr = new Xosync_hdoc_parser();
|
||||
byte[] html_bry = hdoc_parser_wkr.Parse_hdoc(wiki.Domain_itm(), page_url, hpg.Hdump_mgr().Imgs(), data.Revn_html());
|
||||
|
||||
// init some vars
|
||||
byte[] html_bry = hpg.Db().Html().Html_bry();
|
||||
Xowd_html_tbl_mgr html_tbl_mgr = new Xowd_html_tbl_mgr();
|
||||
Xow_db_file html_db = html_tbl_mgr.Get_html_db(wiki);
|
||||
Xow_db_file core_db = wiki.Data__core_mgr().Db__core();
|
||||
Xowd_page_tbl page_tbl = core_db.Tbl__page();
|
||||
@ -88,17 +76,4 @@ public class Xosync_update_mgr {
|
||||
|
||||
return data;
|
||||
}
|
||||
public void Parse(Xoh_page hpg, Xow_wiki wiki, byte[] page_url, byte[] src) {
|
||||
int src_len = src.length;
|
||||
|
||||
// init_by_page for bldr, parser, hdoc
|
||||
hctx.Init_by_page(wiki, hpg);
|
||||
hpg.Hdump_mgr().Clear();
|
||||
hdoc_bldr.Init_by_page(bfr, hpg, hctx, src, 0, src_len);
|
||||
hdoc_parser_wkr.Init_by_page(hctx, src, 0, src_len);
|
||||
|
||||
// parse
|
||||
hdoc_parser_mgr.Parse(page_url, src, 0, src_len);
|
||||
hpg.Db().Html().Html_bry_(bfr.To_bry_and_clear());
|
||||
}
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ public class Xosync_page_loader {
|
||||
img_src_val = Bry_.Replace(img_src_val, src_find, src_repl);
|
||||
|
||||
// parse src
|
||||
img_src_parser.Parse(err_wkr, hctx, wiki.Domain_bry(), img_src_atr.Src(), img_src_atr.Val_bgn(), img_src_atr.Val_end());
|
||||
img_src_parser.Parse(err_wkr, wiki.Domain_bry(), img_src_atr.Src(), img_src_atr.Val_bgn(), img_src_atr.Val_end());
|
||||
if (img_src_parser.File_ttl_bry() == null) return null; // skip images that don't follow format of "commons.wikimedia.org/thumb/7/70/A.png"; for example, enlarge buttons
|
||||
|
||||
// create img
|
||||
@ -103,7 +103,7 @@ public class Xosync_page_loader {
|
||||
// if (path_tid == Xosync_img_src_parser.Path__file)
|
||||
img.Init_at_gallery_end(img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__width,0), img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__height, 0), html_view_url, html_view_url);
|
||||
|
||||
Xosync_hdoc_parser.Write_img_tag(tmp_bfr, img_tag, img_src_val, img.Html_uid());
|
||||
Xosync_hdoc_parser.Write_img_tag(tmp_bfr, img_tag, img.Html_uid(), img_src_val);
|
||||
return img;
|
||||
}
|
||||
}
|
@ -14,23 +14,47 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.pages.syncs.core.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*; import gplx.xowa.addons.wikis.pages.syncs.core.*;
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.docs.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
public class Xosync_hdoc_parser implements Gfh_doc_wkr {
|
||||
private final Xosync_hdoc_wtr hdoc_wtr;
|
||||
private final Gfh_tag_rdr tag_rdr = Gfh_tag_rdr.New__html();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
private final Xosync_img_src_parser img_src_parser = new Xosync_img_src_parser();
|
||||
import gplx.langs.htmls.*; import gplx.langs.htmls.docs.*;
|
||||
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.wkrs.*;
|
||||
import gplx.xowa.wikis.domains.*;
|
||||
public class Xosync_hdoc_parser {
|
||||
public byte[] Parse_hdoc(Xow_domain_itm wiki_domain, byte[] page_url, List_adp imgs, byte[] src) {
|
||||
// init
|
||||
Bry_bfr bfr = Bry_bfr_.New();
|
||||
Xosync_img_src_parser img_src_parser = new Xosync_img_src_parser().Init_by_page(wiki_domain, page_url, imgs);
|
||||
int cur = 0, src_len = src.length;
|
||||
Gfh_tag_rdr tag_rdr = Gfh_tag_rdr.New__html().Init(page_url, src, cur, src_len);
|
||||
|
||||
public Xosync_hdoc_parser(Xosync_hdoc_wtr hdoc_wtr) {this.hdoc_wtr = hdoc_wtr;}
|
||||
public byte[] Hook() {return Byte_ascii.Angle_bgn_bry;}
|
||||
public void Init_by_page(Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
|
||||
tag_rdr.Init(hctx.Page__url(), src, src_bgn, src_end);
|
||||
img_src_parser.Init_by_page(hctx);
|
||||
// loop src
|
||||
while (true) {
|
||||
// look for "<"
|
||||
int find = Bry_find_.Find_fwd(src, Byte_ascii.Angle_bgn, cur, src_len);
|
||||
|
||||
// "<" not found; add rest of src and stop
|
||||
if (find == Bry_find_.Not_found) {
|
||||
bfr.Add_mid(src, cur, src_len);
|
||||
break;
|
||||
}
|
||||
public int Parse(byte[] src, int src_bgn, int src_end, int pos) {
|
||||
|
||||
// "<" found; add everything between cur and "<"
|
||||
bfr.Add_mid(src, cur, find);
|
||||
|
||||
// parse "<"
|
||||
cur = Parse_tag(bfr, tag_rdr, img_src_parser, src, src_len, find);
|
||||
}
|
||||
|
||||
return bfr.To_bry_and_clear();
|
||||
}
|
||||
private int Parse_tag(Bry_bfr bfr, Gfh_tag_rdr tag_rdr, Xosync_img_src_parser img_src_parser, byte[] src, int src_len, int pos) {
|
||||
// note that entry point is at "<"
|
||||
tag_rdr.Pos_(pos);
|
||||
int nxt_pos = tag_rdr.Pos() + 1; if (nxt_pos == src_end) return src_end;
|
||||
int nxt_pos = tag_rdr.Pos() + 1;
|
||||
|
||||
// "<" is at EOS
|
||||
if (nxt_pos == src_len) {
|
||||
bfr.Add_byte(Byte_ascii.Angle_bgn);
|
||||
return src_len;
|
||||
}
|
||||
|
||||
// check if head or tail; EX: "<a>" vs "</a>"
|
||||
byte nxt_byte = src[nxt_pos];
|
||||
@ -39,7 +63,7 @@ public class Xosync_hdoc_parser implements Gfh_doc_wkr {
|
||||
int end_comm = Bry_find_.Move_fwd(src, Gfh_tag_.Comm_end, nxt_pos);
|
||||
if (end_comm == Bry_find_.Not_found) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "end comment not found; src=~{0}", String_.new_u8(src));
|
||||
end_comm = src_end;
|
||||
end_comm = src_len;
|
||||
}
|
||||
return end_comm;
|
||||
}
|
||||
@ -55,15 +79,15 @@ public class Xosync_hdoc_parser implements Gfh_doc_wkr {
|
||||
}
|
||||
break;
|
||||
case Gfh_tag_.Id__img: // rewrite src for XOWA; especially necessary for relative protocol; EX: "//upload.wikimedia.org"; note do not use <super> tag b/c of issues with anchors like "href=#section"
|
||||
return Parse_img_src(cur);
|
||||
return Parse_img_src(bfr, img_src_parser, cur);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
hdoc_wtr.On_txt(cur.Src_bgn(), cur.Src_end());
|
||||
bfr.Add_mid(src, cur.Src_bgn(), cur.Src_end());
|
||||
return cur.Src_end();
|
||||
}
|
||||
private int Parse_img_src(Gfh_tag img_tag) {
|
||||
private int Parse_img_src(Bry_bfr bfr, Xosync_img_src_parser img_src_parser, Gfh_tag img_tag) {
|
||||
// get @src and parse it
|
||||
Gfh_atr src_atr = img_tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__src);
|
||||
img_src_parser.Parse(src_atr.Val());
|
||||
@ -71,21 +95,20 @@ public class Xosync_hdoc_parser implements Gfh_doc_wkr {
|
||||
// if error, write comment; EX: <!--error--><img ...>
|
||||
String err_msg = img_src_parser.Err_msg();
|
||||
if (err_msg != null) {
|
||||
hdoc_wtr.Add_bry(Gfh_tag_.Comm_bgn);
|
||||
hdoc_wtr.Add_str(img_src_parser.Err_msg());
|
||||
hdoc_wtr.Add_bry(Gfh_tag_.Comm_end);
|
||||
bfr.Add(Gfh_tag_.Comm_bgn);
|
||||
bfr.Add_str_u8(img_src_parser.Err_msg());
|
||||
bfr.Add(Gfh_tag_.Comm_end);
|
||||
}
|
||||
|
||||
// get img_src; use img_src_parser if no error, else use original value
|
||||
byte[] img_src_val = err_msg == null ? img_src_parser.To_bry() : src_atr.Val();
|
||||
|
||||
// write html
|
||||
Write_img_tag(tmp_bfr, img_tag, img_src_val, -1);
|
||||
hdoc_wtr.Add_bfr(tmp_bfr);
|
||||
Write_img_tag(bfr, img_tag, -1, img_src_val);
|
||||
|
||||
return img_tag.Src_end();
|
||||
}
|
||||
public static void Write_img_tag(Bry_bfr bfr, Gfh_tag img_tag, byte[] img_src_val, int uid) {
|
||||
public static void Write_img_tag(Bry_bfr bfr, Gfh_tag img_tag, int uid, byte[] img_src_val) {
|
||||
// rewrite <img> tag with custom img_src_val
|
||||
int atrs_len = img_tag.Atrs__len();
|
||||
bfr.Add(Byte_ascii.Angle_bgn_bry);
|
||||
|
@ -18,7 +18,6 @@ import gplx.core.tests.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.htmls.*;
|
||||
import gplx.xowa.files.*; import gplx.xowa.files.repos.*;
|
||||
public class Xosync_hdoc_parser__fxt {
|
||||
private final Xosync_update_mgr mgr = new Xosync_update_mgr();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
private final Xoh_page hpg = new Xoh_page();
|
||||
private Xowe_wiki wiki;
|
||||
@ -28,13 +27,14 @@ public class Xosync_hdoc_parser__fxt {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
this.wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
Xoa_app_fxt.repo2_(app, wiki);
|
||||
mgr.Init_by_app(app);
|
||||
}
|
||||
public void Term() {
|
||||
Gfo_usr_dlg_.Instance = Gfo_usr_dlg_.Noop;
|
||||
}
|
||||
public Xosync_hdoc_parser__fxt Exec__parse(String raw) {
|
||||
mgr.Parse(hpg, wiki, Bry_.Empty, Bry_.new_u8(raw));
|
||||
Xosync_hdoc_parser parser = new Xosync_hdoc_parser();
|
||||
byte[] result = parser.Parse_hdoc(wiki.Domain_itm(), Bry_.Empty, hpg.Hdump_mgr().Imgs(), Bry_.new_u8(raw));
|
||||
hpg.Db().Html().Html_bry_(result);
|
||||
return this;
|
||||
}
|
||||
public Xosync_hdoc_parser__fxt Test__html(String expd) {
|
||||
|
@ -1,41 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.pages.syncs.core.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*; import gplx.xowa.addons.wikis.pages.syncs.core.*;
|
||||
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.hzips.*;
|
||||
public class Xosync_hdoc_wtr implements Xoh_hdoc_wkr {
|
||||
private Xoh_hzip_bfr bfr;
|
||||
private byte[] src;
|
||||
|
||||
public void Init_by_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
|
||||
this.On_new_page(bfr, hpg, hctx, src, src_bgn, src_end);
|
||||
}
|
||||
public void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
|
||||
this.bfr = bfr;
|
||||
this.src = src;
|
||||
}
|
||||
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
|
||||
public void Add_bfr (Bry_bfr v) {bfr.Add_bfr_and_clear(v);}
|
||||
public void Add_str (String v) {bfr.Add_str_u8(v);}
|
||||
public void Add_bry (byte[] v) {bfr.Add(v);}
|
||||
|
||||
// not used
|
||||
public void On_escape (gplx.xowa.htmls.core.wkrs.escapes.Xoh_escape_data data) {}
|
||||
public void On_xnde (gplx.xowa.htmls.core.wkrs.xndes.Xoh_xnde_parser parser) {}
|
||||
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_data parser) {}
|
||||
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_data parser) {}
|
||||
public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_data parser) {}
|
||||
public boolean Process_parse(Xoh_data_itm data) {return false;}
|
||||
}
|
@ -25,7 +25,8 @@ public class Xosync_img_src_parser {
|
||||
private final byte[] wiki_abrv_commons;
|
||||
private final Xoh_img_src_data img_src_parser = new Xoh_img_src_data();
|
||||
|
||||
private Xoh_hdoc_ctx hctx;
|
||||
private Xow_domain_itm wiki_domain;
|
||||
private List_adp imgs;
|
||||
private byte path_tid;
|
||||
private byte[] img_src_bgn_local, img_src_bgn_remote;
|
||||
private byte[] page_url, repo_local;
|
||||
@ -46,13 +47,16 @@ public class Xosync_img_src_parser {
|
||||
img_src_bgn_remote = tmp_bfr.Add(Bry__xowa_file).Add(Xow_domain_itm_.Bry__commons).Add_byte_slash().To_bry_and_clear();
|
||||
wiki_abrv_commons = Xow_abrv_xo_.To_bry(Xow_domain_itm_.Bry__commons);
|
||||
}
|
||||
public void Init_by_page(Xoh_hdoc_ctx hctx) {
|
||||
this.hctx = hctx;
|
||||
this.page_url = hctx.Page__url();
|
||||
public Xosync_img_src_parser Init_by_page(Xow_domain_itm wiki_domain, byte[] page_url, List_adp imgs) {
|
||||
this.wiki_domain = wiki_domain;
|
||||
this.page_url = page_url;
|
||||
this.imgs = imgs;
|
||||
this.imgs.Clear();
|
||||
this.path_tid = Path__unknown;
|
||||
this.repo_local = To_wmf_repo_or_null(tmp_bfr, hctx.Wiki__domain_itm());
|
||||
if (repo_local == null) Gfo_usr_dlg_.Instance.Warn_many("", "", "unsupported wmf repo; domain=~{0}", hctx.Wiki__domain_itm().Domain_bry());
|
||||
img_src_bgn_local = tmp_bfr.Add(Bry__xowa_file).Add(hctx.Wiki__domain_bry()).Add_byte_slash().To_bry_and_clear(); // EX: "xowa:/file/en.wikipedia.org/"
|
||||
this.repo_local = To_wmf_repo_or_null(tmp_bfr, wiki_domain);
|
||||
if (repo_local == null) Gfo_usr_dlg_.Instance.Warn_many("", "", "unsupported wmf repo; domain=~{0}", wiki_domain.Domain_bry());
|
||||
img_src_bgn_local = tmp_bfr.Add(Bry__xowa_file).Add(wiki_domain.Domain_bry()).Add_byte_slash().To_bry_and_clear(); // EX: "xowa:/file/en.wikipedia.org/"
|
||||
return this;
|
||||
}
|
||||
public boolean Parse(byte[] raw) {
|
||||
// init
|
||||
@ -82,7 +86,7 @@ public class Xosync_img_src_parser {
|
||||
}
|
||||
private boolean Parse_file_xo() {
|
||||
img_src_parser.Clear();
|
||||
boolean rv = img_src_parser.Parse(rdr.Err_wkr(), hctx, hctx.Wiki__domain_bry(), raw, 0, raw_len);
|
||||
boolean rv = img_src_parser.Parse(rdr.Err_wkr(), wiki_domain.Domain_bry(), raw, 0, raw_len);
|
||||
if (rv) {
|
||||
this.repo_is_commons = img_src_parser.Repo_is_commons();
|
||||
this.file_is_orig = img_src_parser.File_is_orig();
|
||||
@ -95,7 +99,7 @@ public class Xosync_img_src_parser {
|
||||
if (img_src_parser.File_time_exists())
|
||||
this.file_time = img_src_parser.File_time();
|
||||
}
|
||||
Add_img(hctx.Wiki__domain_itm().Abrv_xo());
|
||||
Add_img(wiki_domain.Abrv_xo());
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
@ -147,7 +151,7 @@ public class Xosync_img_src_parser {
|
||||
}
|
||||
|
||||
// register image
|
||||
Add_img(hctx.Wiki__domain_itm().Abrv_xo());
|
||||
Add_img(wiki_domain.Abrv_xo());
|
||||
return true;
|
||||
}
|
||||
private boolean Parse_math() {
|
||||
@ -162,7 +166,7 @@ public class Xosync_img_src_parser {
|
||||
}
|
||||
private void Add_img(byte[] wiki_abrv) {
|
||||
Xof_fsdb_itm itm = new Xof_fsdb_itm();
|
||||
hctx.Page().Hdump_mgr().Imgs().Add(itm);
|
||||
imgs.Add(itm);
|
||||
itm.Init_by_wm_parse(wiki_abrv, repo_is_commons, file_is_orig, file_ttl_bry, file_ext, file_w, file_time, file_page);
|
||||
}
|
||||
public byte[] To_bry() {
|
||||
|
@ -29,8 +29,7 @@ public class Sync_html_special implements Xow_special_page {
|
||||
|
||||
// update
|
||||
Xosync_update_mgr updater = new Xosync_update_mgr();
|
||||
updater.Init_by_app(wiki.App());
|
||||
updater.Update(wiki.App().Wmf_mgr().Download_wkr(), wiki, redirect_ttl);
|
||||
updater.Update(wiki.App().Wmf_mgr().Download_wkr(), wiki, page.Url_bry_safe(), redirect_ttl);
|
||||
((Xowe_wiki)wiki).Data_mgr().Redirect((Xoae_page)page, redirect_bry); // HACK: should call page.Redirect_trail() below, but need to handle Display_ttl
|
||||
// page.Redirect_trail().Itms__add__article(redirect_url, redirect_ttl, null);
|
||||
}
|
||||
|
@ -27,9 +27,6 @@ public class Xow_hdump_mgr {
|
||||
public Xow_hdump_mgr__save Save_mgr() {return save_mgr;} private Xow_hdump_mgr__save save_mgr;
|
||||
public Xow_hdump_mgr__load Load_mgr() {return load_mgr;} private Xow_hdump_mgr__load load_mgr;
|
||||
public Xoh_hzip_mgr Hzip_mgr() {return hzip_mgr;} private final Xoh_hzip_mgr hzip_mgr = new Xoh_hzip_mgr();
|
||||
public void Init_by_app(Xoae_app app) {
|
||||
save_mgr.Init_by_app(app);
|
||||
}
|
||||
public void Init_by_db(Xow_wiki wiki) {
|
||||
byte dflt_zip_tid = gplx.core.ios.streams.Io_stream_tid_.Tid__raw;
|
||||
boolean dflt_hzip_enable = false;
|
||||
|
@ -81,8 +81,13 @@ public class Xow_hdump_mgr__load implements Gfo_invk {
|
||||
src = zip_mgr.Unzip((byte)zip_tid, src);
|
||||
switch (hzip_tid) {
|
||||
case Xoh_hzip_dict_.Hzip__none:
|
||||
case Xoh_hzip_dict_.Hzip__plain:
|
||||
src = make_mgr.Parse(src, hpg, hpg.Wiki());
|
||||
break;
|
||||
// case Xoh_hzip_dict_.Hzip__plain:
|
||||
// gplx.xowa.addons.wikis.pages.syncs.core.loaders.Xosync_page_loader page_loader = new gplx.xowa.addons.wikis.pages.syncs.core.loaders.Xosync_page_loader();
|
||||
// src = page_loader.Parse(wiki, hpg, src);
|
||||
// break;
|
||||
case Xoh_hzip_dict_.Hzip__v1:
|
||||
if (override_mgr__html != null) // null when Parse is called directly
|
||||
src = override_mgr__html.Get_or_same(hpg.Ttl().Page_db(), src);
|
||||
@ -90,10 +95,6 @@ public class Xow_hdump_mgr__load implements Gfo_invk {
|
||||
src = Decode_as_bry(tmp_bfr.Clear(), hpg, src, Bool_.N);
|
||||
hpg.Section_mgr().Set_content(hpg.Section_mgr().Len() - 1, src, src.length);
|
||||
break;
|
||||
case Xoh_hzip_dict_.Hzip__plain:
|
||||
gplx.xowa.addons.wikis.pages.syncs.core.loaders.Xosync_page_loader page_loader = new gplx.xowa.addons.wikis.pages.syncs.core.loaders.Xosync_page_loader();
|
||||
src = page_loader.Parse(wiki, hpg, src);
|
||||
break;
|
||||
}
|
||||
return src;
|
||||
}
|
||||
|
@ -16,18 +16,15 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
package gplx.xowa.htmls.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
|
||||
import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.heads.*; import gplx.xowa.htmls.core.dbs.*;
|
||||
import gplx.core.ios.*; import gplx.core.primitives.*; import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.pages.*;
|
||||
import gplx.xowa.addons.wikis.pages.syncs.core.*;
|
||||
import gplx.xowa.addons.wikis.pages.syncs.core.parsers.*;
|
||||
public class Xow_hdump_mgr__save {
|
||||
private final Xow_wiki wiki; private final Xoh_hzip_mgr hzip_mgr; private final Io_stream_zip_mgr zip_mgr;
|
||||
private final Xosync_update_mgr update_mgr = new Xosync_update_mgr();
|
||||
private final Xosync_hdoc_parser plain_parser = new Xosync_hdoc_parser();
|
||||
private final Xoh_page tmp_hpg; private final Xoh_hzip_bfr tmp_bfr = Xoh_hzip_bfr.New_txt(32); private Bool_obj_ref html_db_is_new = Bool_obj_ref.n_();
|
||||
private int dflt_zip_tid, dflt_hzip_tid;
|
||||
public Xow_hdump_mgr__save(Xow_wiki wiki, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, Xoh_page tmp_hpg) {
|
||||
this.wiki = wiki; this.hzip_mgr = hzip_mgr; this.zip_mgr = zip_mgr; this.tmp_hpg = tmp_hpg;
|
||||
}
|
||||
public void Init_by_app(Xoae_app app) {
|
||||
update_mgr.Init_by_app(app);
|
||||
}
|
||||
public void Init_by_db(int dflt_zip_tid, int dflt_hzip_tid, boolean mode_is_b256) {
|
||||
this.dflt_zip_tid = dflt_zip_tid; this.dflt_hzip_tid = dflt_hzip_tid; tmp_bfr.Mode_is_b256_(mode_is_b256);
|
||||
}
|
||||
@ -55,8 +52,7 @@ public class Xow_hdump_mgr__save {
|
||||
private byte[] Write(Xoh_hzip_bfr bfr, Xow_wiki wiki, Xoae_page page, Xoh_page hpg, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, int zip_tid, int hzip_tid, byte[] src) {
|
||||
switch (hzip_tid) {
|
||||
case Xoh_hzip_dict_.Hzip__none:
|
||||
update_mgr.Parse(hpg, wiki, page.Url_bry_safe(), src);
|
||||
src = hpg.Db().Html().Html_bry();
|
||||
src = plain_parser.Parse_hdoc(wiki.Domain_itm(), page.Url_bry_safe(), hpg.Hdump_mgr().Imgs(), src);
|
||||
break;
|
||||
case Xoh_hzip_dict_.Hzip__v1:
|
||||
src = hzip_mgr.Encode_as_bry((Xoh_hzip_bfr)bfr.Clear(), wiki, hpg, src);
|
||||
|
@ -40,7 +40,6 @@ public class Xob_hdump_bldr implements Gfo_invk {
|
||||
this.toc_label = wiki.Msg_mgr().Val_by_id(gplx.xowa.langs.msgs.Xol_msg_itm_.Id_toc);
|
||||
|
||||
if (zip_tid == Byte_.Max_value_127) zip_tid = Xobldr_cfg.Zip_mode__html(wiki.App());
|
||||
hdump_mgr.Init_by_app(wiki.Appe());
|
||||
hdump_mgr.Init_by_db(zip_tid, hzip_enabled, hzip_b256);
|
||||
return true;
|
||||
}
|
||||
|
@ -37,6 +37,7 @@ public class Xoh_hdoc_wkr__make implements Xoh_hdoc_wkr {
|
||||
}
|
||||
public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_data data) {
|
||||
bfr.Add_mid(src, data.Src_bgn(), data.Src_end());
|
||||
hpg.Xtn__gallery_exists_y_();
|
||||
}
|
||||
public boolean Process_parse(Xoh_data_itm data) {
|
||||
switch (data.Tid()) {
|
||||
|
@ -85,7 +85,7 @@ public class Xoh_img_data implements Xoh_data_itm {
|
||||
img_alt_bgn = img_alt.Val_bgn(); img_alt_end = img_alt.Val_end();
|
||||
img_cls.Init_by_parse(err_wkr, src, img_tag); // class='thumbborder'
|
||||
img_alt__diff_anch_title = !Bry_.Match(src, img_alt_bgn, img_alt_end, src, anch_title_bgn, anch_title_end);
|
||||
if (!img_src.Parse(err_wkr, hctx, hctx.Wiki__domain_bry(), img_tag)) return false; // src='...'
|
||||
if (!img_src.Parse(err_wkr, hctx.Wiki__domain_bry(), img_tag)) return false; // src='...'
|
||||
if (anch_xo_ttl.Val_is_empty()) {
|
||||
anch_xo_ttl.Val_(img_src.File_ttl_bry());
|
||||
if (anch_xo_ttl.Val_is_empty())
|
||||
|
@ -47,12 +47,12 @@ public class Xoh_img_src_data implements Bfr_arg_clearable, Xoh_itm_parser {
|
||||
repo_tid = Xof_repo_tid_.Tid__null;
|
||||
file_ttl_bry = null;
|
||||
}
|
||||
public boolean Parse(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, byte[] domain_bry, Gfh_tag tag) {
|
||||
public boolean Parse(Bry_err_wkr err_wkr, byte[] domain_bry, Gfh_tag tag) {
|
||||
this.Clear();
|
||||
Gfh_atr atr = tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__src);
|
||||
return Parse(err_wkr, hctx, domain_bry, atr.Src(), atr.Val_bgn(), atr.Val_end());
|
||||
return Parse(err_wkr, domain_bry, atr.Src(), atr.Val_bgn(), atr.Val_end());
|
||||
}
|
||||
public boolean Parse(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, byte[] domain_bry, byte[] src_bry, int src_bgn, int src_end) { // EX: src="file:///C:/xowa/file/commons.wikimedia.org/thumb/7/0/1/2/A.png/220px.png"
|
||||
public boolean Parse(Bry_err_wkr err_wkr, byte[] domain_bry, byte[] src_bry, int src_bgn, int src_end) { // EX: src="file:///C:/xowa/file/commons.wikimedia.org/thumb/7/0/1/2/A.png/220px.png"
|
||||
this.Clear();
|
||||
this.src_bry = src_bry;
|
||||
this.src_bgn = src_bgn; this.src_end = src_end;
|
||||
|
@ -61,6 +61,6 @@ class Xoh_img_src_data_fxt extends Xoh_itm_parser_fxt { private final Xoh_im
|
||||
}
|
||||
@Override public void Exec_parse_hook(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
|
||||
parser.Fail_throws_err_(true);
|
||||
parser.Parse(err_wkr, new Xoh_hdoc_ctx(), Xow_domain_itm_.Bry__enwiki, src, src_bgn, src_end);
|
||||
parser.Parse(err_wkr, Xow_domain_itm_.Bry__enwiki, src, src_bgn, src_end);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user