1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

HTML Databases: Make plain-text parser thread-safe; Simplify code; Fix gallery not working [#320]

This commit is contained in:
gnosygnu 2019-01-06 21:27:33 -05:00
parent 2b4320b302
commit 42d15b726c
18 changed files with 91 additions and 137 deletions

View File

@ -16,7 +16,6 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*;
import gplx.core.btries.*;
public class Gfh_doc_parser {
private final Btrie_rv trv = new Btrie_rv();
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private final Gfh_txt_wkr txt_wkr;
public Gfh_doc_parser(Gfh_txt_wkr txt_wkr, Gfh_doc_wkr... wkr_ary) {
@ -27,6 +26,7 @@ public class Gfh_doc_parser {
public void Parse(byte[] page_url, byte[] src, int src_bgn, int src_end) {
int txt_bgn = -1;
int pos = src_bgn;
Btrie_rv trv = new Btrie_rv();
while (pos < src_end) {
Object o = trie.Match_at(trv, src, pos, src_end);
if (o == null) { // not a known hook; add to txt

View File

@ -26,10 +26,11 @@ public class Gfh_tag_rdr {
public int Src_end() {return src_end;} private int src_end;
public Bry_err_wkr Err_wkr() {return err_wkr;} private final Bry_err_wkr err_wkr = new Bry_err_wkr();
public Gfh_tag_rdr Reg(String tag_name, int tag_id) {name_hash.Add_str_int(tag_name, tag_id); return this;}
public void Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) {
public Gfh_tag_rdr Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) {
this.src = src; this.pos = src_bgn; this.src_end = src_end;
tag__eos.Init(this, src, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Gfh_tag_.Id__eos, Bry_.Empty);
err_wkr.Init_by_page(String_.new_u8(ctx_name), src);
return this;
}
public void Src_rng_(int src_bgn, int src_end) {
this.pos = src_bgn; this.src_end = src_end;

View File

@ -91,8 +91,7 @@ public class Xosync_read_mgr implements Gfo_invk {
// auto-sync page
Xoa_app app = wiki.App();
update_mgr.Init_by_app(app);
Xowm_parse_data parse_data = update_mgr.Update(app.Wmf_mgr().Download_wkr(), wiki, page_ttl);
Xowm_parse_data parse_data = update_mgr.Update(app.Wmf_mgr().Download_wkr(), wiki, page.Url_bry_safe(), page_ttl);
if (parse_data == null)
return rv;

View File

@ -15,28 +15,15 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.pages.syncs.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*;
import gplx.xowa.files.downloads.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.wikis.domains.*;
import gplx.xowa.htmls.*; import gplx.langs.htmls.docs.*;
import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.txts.*; import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.htmls.core.dbs.*;
import gplx.xowa.addons.wikis.pages.syncs.wmapis.*;
import gplx.xowa.addons.wikis.pages.syncs.core.parsers.*;
public class Xosync_update_mgr {
private final Xoh_hzip_bfr bfr = new Xoh_hzip_bfr(Io_mgr.Len_kb, Bool_.N, Byte_.Max_value_127);
private final Gfh_doc_parser hdoc_parser_mgr;
private final Xoh_hdoc_ctx hctx = new Xoh_hdoc_ctx();
private final Xosync_hdoc_wtr hdoc_bldr = new Xosync_hdoc_wtr();
private final Xosync_hdoc_parser hdoc_parser_wkr;
private final Xowd_html_tbl_mgr html_tbl_mgr = new Xowd_html_tbl_mgr();
public Xosync_update_mgr() {
hdoc_parser_wkr = new Xosync_hdoc_parser(hdoc_bldr);
hdoc_parser_mgr = new Gfh_doc_parser(new Xoh_txt_parser(hdoc_bldr), hdoc_parser_wkr);
}
public void Init_by_app(Xoa_app app) {
hctx.Init_by_app(app);
}
public Xowm_parse_data Update(Xof_download_wkr download_wkr, Xow_wiki wiki, Xoa_ttl page_ttl) {
Xoh_page hpg = (Xoh_page)hctx.Page();
public Xowm_parse_data Update(Xof_download_wkr download_wkr, Xow_wiki wiki, byte[] page_url, Xoa_ttl page_ttl) {
Xoh_page hpg = new Xoh_page();
// call wmf api
Xowm_parse_wmf parse_wkr = new Xowm_parse_wmf();
@ -45,10 +32,11 @@ public class Xosync_update_mgr {
// parse html to fix images
Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: parsing page; page=~{0}", page_ttl.Full_db());
Parse(hpg, wiki, hctx.Page__url(), data.Revn_html());
Xosync_hdoc_parser hdoc_parser_wkr = new Xosync_hdoc_parser();
byte[] html_bry = hdoc_parser_wkr.Parse_hdoc(wiki.Domain_itm(), page_url, hpg.Hdump_mgr().Imgs(), data.Revn_html());
// init some vars
byte[] html_bry = hpg.Db().Html().Html_bry();
Xowd_html_tbl_mgr html_tbl_mgr = new Xowd_html_tbl_mgr();
Xow_db_file html_db = html_tbl_mgr.Get_html_db(wiki);
Xow_db_file core_db = wiki.Data__core_mgr().Db__core();
Xowd_page_tbl page_tbl = core_db.Tbl__page();
@ -88,17 +76,4 @@ public class Xosync_update_mgr {
return data;
}
public void Parse(Xoh_page hpg, Xow_wiki wiki, byte[] page_url, byte[] src) {
int src_len = src.length;
// init_by_page for bldr, parser, hdoc
hctx.Init_by_page(wiki, hpg);
hpg.Hdump_mgr().Clear();
hdoc_bldr.Init_by_page(bfr, hpg, hctx, src, 0, src_len);
hdoc_parser_wkr.Init_by_page(hctx, src, 0, src_len);
// parse
hdoc_parser_mgr.Parse(page_url, src, 0, src_len);
hpg.Db().Html().Html_bry_(bfr.To_bry_and_clear());
}
}

View File

@ -74,7 +74,7 @@ public class Xosync_page_loader {
img_src_val = Bry_.Replace(img_src_val, src_find, src_repl);
// parse src
img_src_parser.Parse(err_wkr, hctx, wiki.Domain_bry(), img_src_atr.Src(), img_src_atr.Val_bgn(), img_src_atr.Val_end());
img_src_parser.Parse(err_wkr, wiki.Domain_bry(), img_src_atr.Src(), img_src_atr.Val_bgn(), img_src_atr.Val_end());
if (img_src_parser.File_ttl_bry() == null) return null; // skip images that don't follow format of "commons.wikimedia.org/thumb/7/70/A.png"; for example, enlarge buttons
// create img
@ -103,7 +103,7 @@ public class Xosync_page_loader {
// if (path_tid == Xosync_img_src_parser.Path__file)
img.Init_at_gallery_end(img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__width,0), img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__height, 0), html_view_url, html_view_url);
Xosync_hdoc_parser.Write_img_tag(tmp_bfr, img_tag, img_src_val, img.Html_uid());
Xosync_hdoc_parser.Write_img_tag(tmp_bfr, img_tag, img.Html_uid(), img_src_val);
return img;
}
}

View File

@ -14,23 +14,47 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.pages.syncs.core.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*; import gplx.xowa.addons.wikis.pages.syncs.core.*;
import gplx.langs.htmls.*; import gplx.langs.htmls.docs.*; import gplx.xowa.htmls.core.wkrs.*;
public class Xosync_hdoc_parser implements Gfh_doc_wkr {
private final Xosync_hdoc_wtr hdoc_wtr;
private final Gfh_tag_rdr tag_rdr = Gfh_tag_rdr.New__html();
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
private final Xosync_img_src_parser img_src_parser = new Xosync_img_src_parser();
import gplx.langs.htmls.*; import gplx.langs.htmls.docs.*;
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.wkrs.*;
import gplx.xowa.wikis.domains.*;
public class Xosync_hdoc_parser {
public byte[] Parse_hdoc(Xow_domain_itm wiki_domain, byte[] page_url, List_adp imgs, byte[] src) {
// init
Bry_bfr bfr = Bry_bfr_.New();
Xosync_img_src_parser img_src_parser = new Xosync_img_src_parser().Init_by_page(wiki_domain, page_url, imgs);
int cur = 0, src_len = src.length;
Gfh_tag_rdr tag_rdr = Gfh_tag_rdr.New__html().Init(page_url, src, cur, src_len);
public Xosync_hdoc_parser(Xosync_hdoc_wtr hdoc_wtr) {this.hdoc_wtr = hdoc_wtr;}
public byte[] Hook() {return Byte_ascii.Angle_bgn_bry;}
public void Init_by_page(Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
tag_rdr.Init(hctx.Page__url(), src, src_bgn, src_end);
img_src_parser.Init_by_page(hctx);
// loop src
while (true) {
// look for "<"
int find = Bry_find_.Find_fwd(src, Byte_ascii.Angle_bgn, cur, src_len);
// "<" not found; add rest of src and stop
if (find == Bry_find_.Not_found) {
bfr.Add_mid(src, cur, src_len);
break;
}
// "<" found; add everything between cur and "<"
bfr.Add_mid(src, cur, find);
// parse "<"
cur = Parse_tag(bfr, tag_rdr, img_src_parser, src, src_len, find);
}
return bfr.To_bry_and_clear();
}
public int Parse(byte[] src, int src_bgn, int src_end, int pos) {
private int Parse_tag(Bry_bfr bfr, Gfh_tag_rdr tag_rdr, Xosync_img_src_parser img_src_parser, byte[] src, int src_len, int pos) {
// note that entry point is at "<"
tag_rdr.Pos_(pos);
int nxt_pos = tag_rdr.Pos() + 1; if (nxt_pos == src_end) return src_end;
int nxt_pos = tag_rdr.Pos() + 1;
// "<" is at EOS
if (nxt_pos == src_len) {
bfr.Add_byte(Byte_ascii.Angle_bgn);
return src_len;
}
// check if head or tail; EX: "<a>" vs "</a>"
byte nxt_byte = src[nxt_pos];
@ -39,7 +63,7 @@ public class Xosync_hdoc_parser implements Gfh_doc_wkr {
int end_comm = Bry_find_.Move_fwd(src, Gfh_tag_.Comm_end, nxt_pos);
if (end_comm == Bry_find_.Not_found) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "end comment not found; src=~{0}", String_.new_u8(src));
end_comm = src_end;
end_comm = src_len;
}
return end_comm;
}
@ -55,15 +79,15 @@ public class Xosync_hdoc_parser implements Gfh_doc_wkr {
}
break;
case Gfh_tag_.Id__img: // rewrite src for XOWA; especially necessary for relative protocol; EX: "//upload.wikimedia.org"; note do not use <super> tag b/c of issues with anchors like "href=#section"
return Parse_img_src(cur);
return Parse_img_src(bfr, img_src_parser, cur);
default:
break;
}
}
hdoc_wtr.On_txt(cur.Src_bgn(), cur.Src_end());
bfr.Add_mid(src, cur.Src_bgn(), cur.Src_end());
return cur.Src_end();
}
private int Parse_img_src(Gfh_tag img_tag) {
private int Parse_img_src(Bry_bfr bfr, Xosync_img_src_parser img_src_parser, Gfh_tag img_tag) {
// get @src and parse it
Gfh_atr src_atr = img_tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__src);
img_src_parser.Parse(src_atr.Val());
@ -71,21 +95,20 @@ public class Xosync_hdoc_parser implements Gfh_doc_wkr {
// if error, write comment; EX: <!--error--><img ...>
String err_msg = img_src_parser.Err_msg();
if (err_msg != null) {
hdoc_wtr.Add_bry(Gfh_tag_.Comm_bgn);
hdoc_wtr.Add_str(img_src_parser.Err_msg());
hdoc_wtr.Add_bry(Gfh_tag_.Comm_end);
bfr.Add(Gfh_tag_.Comm_bgn);
bfr.Add_str_u8(img_src_parser.Err_msg());
bfr.Add(Gfh_tag_.Comm_end);
}
// get img_src; use img_src_parser if no error, else use original value
byte[] img_src_val = err_msg == null ? img_src_parser.To_bry() : src_atr.Val();
// write html
Write_img_tag(tmp_bfr, img_tag, img_src_val, -1);
hdoc_wtr.Add_bfr(tmp_bfr);
Write_img_tag(bfr, img_tag, -1, img_src_val);
return img_tag.Src_end();
}
public static void Write_img_tag(Bry_bfr bfr, Gfh_tag img_tag, byte[] img_src_val, int uid) {
public static void Write_img_tag(Bry_bfr bfr, Gfh_tag img_tag, int uid, byte[] img_src_val) {
// rewrite <img> tag with custom img_src_val
int atrs_len = img_tag.Atrs__len();
bfr.Add(Byte_ascii.Angle_bgn_bry);

View File

@ -18,7 +18,6 @@ import gplx.core.tests.*;
import gplx.langs.htmls.*; import gplx.xowa.htmls.*;
import gplx.xowa.files.*; import gplx.xowa.files.repos.*;
public class Xosync_hdoc_parser__fxt {
private final Xosync_update_mgr mgr = new Xosync_update_mgr();
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
private final Xoh_page hpg = new Xoh_page();
private Xowe_wiki wiki;
@ -28,13 +27,14 @@ public class Xosync_hdoc_parser__fxt {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
this.wiki = Xoa_app_fxt.Make__wiki__edit(app);
Xoa_app_fxt.repo2_(app, wiki);
mgr.Init_by_app(app);
}
public void Term() {
Gfo_usr_dlg_.Instance = Gfo_usr_dlg_.Noop;
}
public Xosync_hdoc_parser__fxt Exec__parse(String raw) {
mgr.Parse(hpg, wiki, Bry_.Empty, Bry_.new_u8(raw));
Xosync_hdoc_parser parser = new Xosync_hdoc_parser();
byte[] result = parser.Parse_hdoc(wiki.Domain_itm(), Bry_.Empty, hpg.Hdump_mgr().Imgs(), Bry_.new_u8(raw));
hpg.Db().Html().Html_bry_(result);
return this;
}
public Xosync_hdoc_parser__fxt Test__html(String expd) {

View File

@ -1,41 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.pages.syncs.core.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*; import gplx.xowa.addons.wikis.pages.syncs.core.*;
import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.hzips.*;
public class Xosync_hdoc_wtr implements Xoh_hdoc_wkr {
private Xoh_hzip_bfr bfr;
private byte[] src;
public void Init_by_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
this.On_new_page(bfr, hpg, hctx, src, src_bgn, src_end);
}
public void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
this.bfr = bfr;
this.src = src;
}
public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);}
public void Add_bfr (Bry_bfr v) {bfr.Add_bfr_and_clear(v);}
public void Add_str (String v) {bfr.Add_str_u8(v);}
public void Add_bry (byte[] v) {bfr.Add(v);}
// not used
public void On_escape (gplx.xowa.htmls.core.wkrs.escapes.Xoh_escape_data data) {}
public void On_xnde (gplx.xowa.htmls.core.wkrs.xndes.Xoh_xnde_parser parser) {}
public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_data parser) {}
public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_data parser) {}
public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_data parser) {}
public boolean Process_parse(Xoh_data_itm data) {return false;}
}

View File

@ -25,7 +25,8 @@ public class Xosync_img_src_parser {
private final byte[] wiki_abrv_commons;
private final Xoh_img_src_data img_src_parser = new Xoh_img_src_data();
private Xoh_hdoc_ctx hctx;
private Xow_domain_itm wiki_domain;
private List_adp imgs;
private byte path_tid;
private byte[] img_src_bgn_local, img_src_bgn_remote;
private byte[] page_url, repo_local;
@ -46,13 +47,16 @@ public class Xosync_img_src_parser {
img_src_bgn_remote = tmp_bfr.Add(Bry__xowa_file).Add(Xow_domain_itm_.Bry__commons).Add_byte_slash().To_bry_and_clear();
wiki_abrv_commons = Xow_abrv_xo_.To_bry(Xow_domain_itm_.Bry__commons);
}
public void Init_by_page(Xoh_hdoc_ctx hctx) {
this.hctx = hctx;
this.page_url = hctx.Page__url();
public Xosync_img_src_parser Init_by_page(Xow_domain_itm wiki_domain, byte[] page_url, List_adp imgs) {
this.wiki_domain = wiki_domain;
this.page_url = page_url;
this.imgs = imgs;
this.imgs.Clear();
this.path_tid = Path__unknown;
this.repo_local = To_wmf_repo_or_null(tmp_bfr, hctx.Wiki__domain_itm());
if (repo_local == null) Gfo_usr_dlg_.Instance.Warn_many("", "", "unsupported wmf repo; domain=~{0}", hctx.Wiki__domain_itm().Domain_bry());
img_src_bgn_local = tmp_bfr.Add(Bry__xowa_file).Add(hctx.Wiki__domain_bry()).Add_byte_slash().To_bry_and_clear(); // EX: "xowa:/file/en.wikipedia.org/"
this.repo_local = To_wmf_repo_or_null(tmp_bfr, wiki_domain);
if (repo_local == null) Gfo_usr_dlg_.Instance.Warn_many("", "", "unsupported wmf repo; domain=~{0}", wiki_domain.Domain_bry());
img_src_bgn_local = tmp_bfr.Add(Bry__xowa_file).Add(wiki_domain.Domain_bry()).Add_byte_slash().To_bry_and_clear(); // EX: "xowa:/file/en.wikipedia.org/"
return this;
}
public boolean Parse(byte[] raw) {
// init
@ -82,7 +86,7 @@ public class Xosync_img_src_parser {
}
private boolean Parse_file_xo() {
img_src_parser.Clear();
boolean rv = img_src_parser.Parse(rdr.Err_wkr(), hctx, hctx.Wiki__domain_bry(), raw, 0, raw_len);
boolean rv = img_src_parser.Parse(rdr.Err_wkr(), wiki_domain.Domain_bry(), raw, 0, raw_len);
if (rv) {
this.repo_is_commons = img_src_parser.Repo_is_commons();
this.file_is_orig = img_src_parser.File_is_orig();
@ -95,7 +99,7 @@ public class Xosync_img_src_parser {
if (img_src_parser.File_time_exists())
this.file_time = img_src_parser.File_time();
}
Add_img(hctx.Wiki__domain_itm().Abrv_xo());
Add_img(wiki_domain.Abrv_xo());
}
return rv;
}
@ -147,7 +151,7 @@ public class Xosync_img_src_parser {
}
// register image
Add_img(hctx.Wiki__domain_itm().Abrv_xo());
Add_img(wiki_domain.Abrv_xo());
return true;
}
private boolean Parse_math() {
@ -162,7 +166,7 @@ public class Xosync_img_src_parser {
}
private void Add_img(byte[] wiki_abrv) {
Xof_fsdb_itm itm = new Xof_fsdb_itm();
hctx.Page().Hdump_mgr().Imgs().Add(itm);
imgs.Add(itm);
itm.Init_by_wm_parse(wiki_abrv, repo_is_commons, file_is_orig, file_ttl_bry, file_ext, file_w, file_time, file_page);
}
public byte[] To_bry() {

View File

@ -29,8 +29,7 @@ public class Sync_html_special implements Xow_special_page {
// update
Xosync_update_mgr updater = new Xosync_update_mgr();
updater.Init_by_app(wiki.App());
updater.Update(wiki.App().Wmf_mgr().Download_wkr(), wiki, redirect_ttl);
updater.Update(wiki.App().Wmf_mgr().Download_wkr(), wiki, page.Url_bry_safe(), redirect_ttl);
((Xowe_wiki)wiki).Data_mgr().Redirect((Xoae_page)page, redirect_bry); // HACK: should call page.Redirect_trail() below, but need to handle Display_ttl
// page.Redirect_trail().Itms__add__article(redirect_url, redirect_ttl, null);
}

View File

@ -27,9 +27,6 @@ public class Xow_hdump_mgr {
public Xow_hdump_mgr__save Save_mgr() {return save_mgr;} private Xow_hdump_mgr__save save_mgr;
public Xow_hdump_mgr__load Load_mgr() {return load_mgr;} private Xow_hdump_mgr__load load_mgr;
public Xoh_hzip_mgr Hzip_mgr() {return hzip_mgr;} private final Xoh_hzip_mgr hzip_mgr = new Xoh_hzip_mgr();
public void Init_by_app(Xoae_app app) {
save_mgr.Init_by_app(app);
}
public void Init_by_db(Xow_wiki wiki) {
byte dflt_zip_tid = gplx.core.ios.streams.Io_stream_tid_.Tid__raw;
boolean dflt_hzip_enable = false;

View File

@ -81,8 +81,13 @@ public class Xow_hdump_mgr__load implements Gfo_invk {
src = zip_mgr.Unzip((byte)zip_tid, src);
switch (hzip_tid) {
case Xoh_hzip_dict_.Hzip__none:
case Xoh_hzip_dict_.Hzip__plain:
src = make_mgr.Parse(src, hpg, hpg.Wiki());
break;
// case Xoh_hzip_dict_.Hzip__plain:
// gplx.xowa.addons.wikis.pages.syncs.core.loaders.Xosync_page_loader page_loader = new gplx.xowa.addons.wikis.pages.syncs.core.loaders.Xosync_page_loader();
// src = page_loader.Parse(wiki, hpg, src);
// break;
case Xoh_hzip_dict_.Hzip__v1:
if (override_mgr__html != null) // null when Parse is called directly
src = override_mgr__html.Get_or_same(hpg.Ttl().Page_db(), src);
@ -90,10 +95,6 @@ public class Xow_hdump_mgr__load implements Gfo_invk {
src = Decode_as_bry(tmp_bfr.Clear(), hpg, src, Bool_.N);
hpg.Section_mgr().Set_content(hpg.Section_mgr().Len() - 1, src, src.length);
break;
case Xoh_hzip_dict_.Hzip__plain:
gplx.xowa.addons.wikis.pages.syncs.core.loaders.Xosync_page_loader page_loader = new gplx.xowa.addons.wikis.pages.syncs.core.loaders.Xosync_page_loader();
src = page_loader.Parse(wiki, hpg, src);
break;
}
return src;
}

View File

@ -16,18 +16,15 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.xowa.htmls.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*;
import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.heads.*; import gplx.xowa.htmls.core.dbs.*;
import gplx.core.ios.*; import gplx.core.primitives.*; import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.pages.*;
import gplx.xowa.addons.wikis.pages.syncs.core.*;
import gplx.xowa.addons.wikis.pages.syncs.core.parsers.*;
public class Xow_hdump_mgr__save {
private final Xow_wiki wiki; private final Xoh_hzip_mgr hzip_mgr; private final Io_stream_zip_mgr zip_mgr;
private final Xosync_update_mgr update_mgr = new Xosync_update_mgr();
private final Xosync_hdoc_parser plain_parser = new Xosync_hdoc_parser();
private final Xoh_page tmp_hpg; private final Xoh_hzip_bfr tmp_bfr = Xoh_hzip_bfr.New_txt(32); private Bool_obj_ref html_db_is_new = Bool_obj_ref.n_();
private int dflt_zip_tid, dflt_hzip_tid;
public Xow_hdump_mgr__save(Xow_wiki wiki, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, Xoh_page tmp_hpg) {
this.wiki = wiki; this.hzip_mgr = hzip_mgr; this.zip_mgr = zip_mgr; this.tmp_hpg = tmp_hpg;
}
public void Init_by_app(Xoae_app app) {
update_mgr.Init_by_app(app);
}
public void Init_by_db(int dflt_zip_tid, int dflt_hzip_tid, boolean mode_is_b256) {
this.dflt_zip_tid = dflt_zip_tid; this.dflt_hzip_tid = dflt_hzip_tid; tmp_bfr.Mode_is_b256_(mode_is_b256);
}
@ -55,8 +52,7 @@ public class Xow_hdump_mgr__save {
private byte[] Write(Xoh_hzip_bfr bfr, Xow_wiki wiki, Xoae_page page, Xoh_page hpg, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, int zip_tid, int hzip_tid, byte[] src) {
switch (hzip_tid) {
case Xoh_hzip_dict_.Hzip__none:
update_mgr.Parse(hpg, wiki, page.Url_bry_safe(), src);
src = hpg.Db().Html().Html_bry();
src = plain_parser.Parse_hdoc(wiki.Domain_itm(), page.Url_bry_safe(), hpg.Hdump_mgr().Imgs(), src);
break;
case Xoh_hzip_dict_.Hzip__v1:
src = hzip_mgr.Encode_as_bry((Xoh_hzip_bfr)bfr.Clear(), wiki, hpg, src);

View File

@ -40,7 +40,6 @@ public class Xob_hdump_bldr implements Gfo_invk {
this.toc_label = wiki.Msg_mgr().Val_by_id(gplx.xowa.langs.msgs.Xol_msg_itm_.Id_toc);
if (zip_tid == Byte_.Max_value_127) zip_tid = Xobldr_cfg.Zip_mode__html(wiki.App());
hdump_mgr.Init_by_app(wiki.Appe());
hdump_mgr.Init_by_db(zip_tid, hzip_enabled, hzip_b256);
return true;
}

View File

@ -37,6 +37,7 @@ public class Xoh_hdoc_wkr__make implements Xoh_hdoc_wkr {
}
public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_data data) {
bfr.Add_mid(src, data.Src_bgn(), data.Src_end());
hpg.Xtn__gallery_exists_y_();
}
public boolean Process_parse(Xoh_data_itm data) {
switch (data.Tid()) {

View File

@ -85,7 +85,7 @@ public class Xoh_img_data implements Xoh_data_itm {
img_alt_bgn = img_alt.Val_bgn(); img_alt_end = img_alt.Val_end();
img_cls.Init_by_parse(err_wkr, src, img_tag); // class='thumbborder'
img_alt__diff_anch_title = !Bry_.Match(src, img_alt_bgn, img_alt_end, src, anch_title_bgn, anch_title_end);
if (!img_src.Parse(err_wkr, hctx, hctx.Wiki__domain_bry(), img_tag)) return false; // src='...'
if (!img_src.Parse(err_wkr, hctx.Wiki__domain_bry(), img_tag)) return false; // src='...'
if (anch_xo_ttl.Val_is_empty()) {
anch_xo_ttl.Val_(img_src.File_ttl_bry());
if (anch_xo_ttl.Val_is_empty())

View File

@ -47,12 +47,12 @@ public class Xoh_img_src_data implements Bfr_arg_clearable, Xoh_itm_parser {
repo_tid = Xof_repo_tid_.Tid__null;
file_ttl_bry = null;
}
public boolean Parse(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, byte[] domain_bry, Gfh_tag tag) {
public boolean Parse(Bry_err_wkr err_wkr, byte[] domain_bry, Gfh_tag tag) {
this.Clear();
Gfh_atr atr = tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__src);
return Parse(err_wkr, hctx, domain_bry, atr.Src(), atr.Val_bgn(), atr.Val_end());
return Parse(err_wkr, domain_bry, atr.Src(), atr.Val_bgn(), atr.Val_end());
}
public boolean Parse(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, byte[] domain_bry, byte[] src_bry, int src_bgn, int src_end) { // EX: src="file:///C:/xowa/file/commons.wikimedia.org/thumb/7/0/1/2/A.png/220px.png"
public boolean Parse(Bry_err_wkr err_wkr, byte[] domain_bry, byte[] src_bry, int src_bgn, int src_end) { // EX: src="file:///C:/xowa/file/commons.wikimedia.org/thumb/7/0/1/2/A.png/220px.png"
this.Clear();
this.src_bry = src_bry;
this.src_bgn = src_bgn; this.src_end = src_end;

View File

@ -61,6 +61,6 @@ class Xoh_img_src_data_fxt extends Xoh_itm_parser_fxt { private final Xoh_im
}
@Override public void Exec_parse_hook(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) {
parser.Fail_throws_err_(true);
parser.Parse(err_wkr, new Xoh_hdoc_ctx(), Xow_domain_itm_.Bry__enwiki, src, src_bgn, src_end);
parser.Parse(err_wkr, Xow_domain_itm_.Bry__enwiki, src, src_bgn, src_end);
}
}