From 42d15b726c2f1f4061ce71f67755c0ec66aeeb78 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Sun, 6 Jan 2019 21:27:33 -0500 Subject: [PATCH] HTML Databases: Make plain-text parser thread-safe; Simplify code; Fix gallery not working [#320] --- .../gplx/langs/htmls/docs/Gfh_doc_parser.java | 2 +- .../gplx/langs/htmls/docs/Gfh_tag_rdr.java | 3 +- .../pages/syncs/core/Xosync_read_mgr.java | 3 +- .../pages/syncs/core/Xosync_update_mgr.java | 37 ++-------- .../core/loaders/Xosync_page_loader.java | 4 +- .../core/parsers/Xosync_hdoc_parser.java | 69 ++++++++++++------- .../core/parsers/Xosync_hdoc_parser__fxt.java | 6 +- .../syncs/core/parsers/Xosync_hdoc_wtr.java | 41 ----------- .../core/parsers/Xosync_img_src_parser.java | 26 ++++--- .../syncs/specials/Sync_html_special.java | 3 +- .../gplx/xowa/htmls/core/Xow_hdump_mgr.java | 3 - .../xowa/htmls/core/Xow_hdump_mgr__load.java | 9 +-- .../xowa/htmls/core/Xow_hdump_mgr__save.java | 10 +-- .../xowa/htmls/core/bldrs/Xob_hdump_bldr.java | 1 - .../htmls/core/wkrs/Xoh_hdoc_wkr__make.java | 1 + .../htmls/core/wkrs/imgs/Xoh_img_data.java | 2 +- .../core/wkrs/imgs/atrs/Xoh_img_src_data.java | 6 +- .../wkrs/imgs/atrs/Xoh_img_src_data_tst.java | 2 +- 18 files changed, 91 insertions(+), 137 deletions(-) delete mode 100644 400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_hdoc_wtr.java diff --git a/400_xowa/src/gplx/langs/htmls/docs/Gfh_doc_parser.java b/400_xowa/src/gplx/langs/htmls/docs/Gfh_doc_parser.java index 957529f15..c56838210 100644 --- a/400_xowa/src/gplx/langs/htmls/docs/Gfh_doc_parser.java +++ b/400_xowa/src/gplx/langs/htmls/docs/Gfh_doc_parser.java @@ -16,7 +16,6 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt package gplx.langs.htmls.docs; import gplx.*; import gplx.langs.*; import gplx.langs.htmls.*; import gplx.core.btries.*; public class Gfh_doc_parser { - private final Btrie_rv trv = new Btrie_rv(); private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs(); private final Gfh_txt_wkr txt_wkr; public Gfh_doc_parser(Gfh_txt_wkr txt_wkr, Gfh_doc_wkr... wkr_ary) { @@ -27,6 +26,7 @@ public class Gfh_doc_parser { public void Parse(byte[] page_url, byte[] src, int src_bgn, int src_end) { int txt_bgn = -1; int pos = src_bgn; + Btrie_rv trv = new Btrie_rv(); while (pos < src_end) { Object o = trie.Match_at(trv, src, pos, src_end); if (o == null) { // not a known hook; add to txt diff --git a/400_xowa/src/gplx/langs/htmls/docs/Gfh_tag_rdr.java b/400_xowa/src/gplx/langs/htmls/docs/Gfh_tag_rdr.java index dac2ff808..a542a2a71 100644 --- a/400_xowa/src/gplx/langs/htmls/docs/Gfh_tag_rdr.java +++ b/400_xowa/src/gplx/langs/htmls/docs/Gfh_tag_rdr.java @@ -26,10 +26,11 @@ public class Gfh_tag_rdr { public int Src_end() {return src_end;} private int src_end; public Bry_err_wkr Err_wkr() {return err_wkr;} private final Bry_err_wkr err_wkr = new Bry_err_wkr(); public Gfh_tag_rdr Reg(String tag_name, int tag_id) {name_hash.Add_str_int(tag_name, tag_id); return this;} - public void Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) { + public Gfh_tag_rdr Init(byte[] ctx_name, byte[] src, int src_bgn, int src_end) { this.src = src; this.pos = src_bgn; this.src_end = src_end; tag__eos.Init(this, src, Bool_.N, Bool_.N, src_end, src_end, src_end, src_end, Gfh_tag_.Id__eos, Bry_.Empty); err_wkr.Init_by_page(String_.new_u8(ctx_name), src); + return this; } public void Src_rng_(int src_bgn, int src_end) { this.pos = src_bgn; this.src_end = src_end; diff --git a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/Xosync_read_mgr.java b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/Xosync_read_mgr.java index d44d1b64d..d91c57d26 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/Xosync_read_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/Xosync_read_mgr.java @@ -91,8 +91,7 @@ public class Xosync_read_mgr implements Gfo_invk { // auto-sync page Xoa_app app = wiki.App(); - update_mgr.Init_by_app(app); - Xowm_parse_data parse_data = update_mgr.Update(app.Wmf_mgr().Download_wkr(), wiki, page_ttl); + Xowm_parse_data parse_data = update_mgr.Update(app.Wmf_mgr().Download_wkr(), wiki, page.Url_bry_safe(), page_ttl); if (parse_data == null) return rv; diff --git a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/Xosync_update_mgr.java b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/Xosync_update_mgr.java index 2d0c37e27..a5359a7a2 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/Xosync_update_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/Xosync_update_mgr.java @@ -15,28 +15,15 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.addons.wikis.pages.syncs.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*; import gplx.xowa.files.downloads.*; -import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*; +import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.data.tbls.*; import gplx.xowa.wikis.domains.*; import gplx.xowa.htmls.*; import gplx.langs.htmls.docs.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.wkrs.txts.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.core.dbs.*; import gplx.xowa.addons.wikis.pages.syncs.wmapis.*; import gplx.xowa.addons.wikis.pages.syncs.core.parsers.*; public class Xosync_update_mgr { - private final Xoh_hzip_bfr bfr = new Xoh_hzip_bfr(Io_mgr.Len_kb, Bool_.N, Byte_.Max_value_127); - private final Gfh_doc_parser hdoc_parser_mgr; - private final Xoh_hdoc_ctx hctx = new Xoh_hdoc_ctx(); - private final Xosync_hdoc_wtr hdoc_bldr = new Xosync_hdoc_wtr(); - private final Xosync_hdoc_parser hdoc_parser_wkr; - private final Xowd_html_tbl_mgr html_tbl_mgr = new Xowd_html_tbl_mgr(); - public Xosync_update_mgr() { - hdoc_parser_wkr = new Xosync_hdoc_parser(hdoc_bldr); - hdoc_parser_mgr = new Gfh_doc_parser(new Xoh_txt_parser(hdoc_bldr), hdoc_parser_wkr); - } - public void Init_by_app(Xoa_app app) { - hctx.Init_by_app(app); - } - public Xowm_parse_data Update(Xof_download_wkr download_wkr, Xow_wiki wiki, Xoa_ttl page_ttl) { - Xoh_page hpg = (Xoh_page)hctx.Page(); + public Xowm_parse_data Update(Xof_download_wkr download_wkr, Xow_wiki wiki, byte[] page_url, Xoa_ttl page_ttl) { + Xoh_page hpg = new Xoh_page(); // call wmf api Xowm_parse_wmf parse_wkr = new Xowm_parse_wmf(); @@ -45,10 +32,11 @@ public class Xosync_update_mgr { // parse html to fix images Gfo_usr_dlg_.Instance.Log_many("", "", "page_sync: parsing page; page=~{0}", page_ttl.Full_db()); - Parse(hpg, wiki, hctx.Page__url(), data.Revn_html()); + Xosync_hdoc_parser hdoc_parser_wkr = new Xosync_hdoc_parser(); + byte[] html_bry = hdoc_parser_wkr.Parse_hdoc(wiki.Domain_itm(), page_url, hpg.Hdump_mgr().Imgs(), data.Revn_html()); // init some vars - byte[] html_bry = hpg.Db().Html().Html_bry(); + Xowd_html_tbl_mgr html_tbl_mgr = new Xowd_html_tbl_mgr(); Xow_db_file html_db = html_tbl_mgr.Get_html_db(wiki); Xow_db_file core_db = wiki.Data__core_mgr().Db__core(); Xowd_page_tbl page_tbl = core_db.Tbl__page(); @@ -88,17 +76,4 @@ public class Xosync_update_mgr { return data; } - public void Parse(Xoh_page hpg, Xow_wiki wiki, byte[] page_url, byte[] src) { - int src_len = src.length; - - // init_by_page for bldr, parser, hdoc - hctx.Init_by_page(wiki, hpg); - hpg.Hdump_mgr().Clear(); - hdoc_bldr.Init_by_page(bfr, hpg, hctx, src, 0, src_len); - hdoc_parser_wkr.Init_by_page(hctx, src, 0, src_len); - - // parse - hdoc_parser_mgr.Parse(page_url, src, 0, src_len); - hpg.Db().Html().Html_bry_(bfr.To_bry_and_clear()); - } } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/loaders/Xosync_page_loader.java b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/loaders/Xosync_page_loader.java index 3aa7c2c3f..e305e732e 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/loaders/Xosync_page_loader.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/loaders/Xosync_page_loader.java @@ -74,7 +74,7 @@ public class Xosync_page_loader { img_src_val = Bry_.Replace(img_src_val, src_find, src_repl); // parse src - img_src_parser.Parse(err_wkr, hctx, wiki.Domain_bry(), img_src_atr.Src(), img_src_atr.Val_bgn(), img_src_atr.Val_end()); + img_src_parser.Parse(err_wkr, wiki.Domain_bry(), img_src_atr.Src(), img_src_atr.Val_bgn(), img_src_atr.Val_end()); if (img_src_parser.File_ttl_bry() == null) return null; // skip images that don't follow format of "commons.wikimedia.org/thumb/7/70/A.png"; for example, enlarge buttons // create img @@ -103,7 +103,7 @@ public class Xosync_page_loader { // if (path_tid == Xosync_img_src_parser.Path__file) img.Init_at_gallery_end(img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__width,0), img_tag.Atrs__get_as_int_or(Gfh_atr_.Bry__height, 0), html_view_url, html_view_url); - Xosync_hdoc_parser.Write_img_tag(tmp_bfr, img_tag, img_src_val, img.Html_uid()); + Xosync_hdoc_parser.Write_img_tag(tmp_bfr, img_tag, img.Html_uid(), img_src_val); return img; } } \ No newline at end of file diff --git a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_hdoc_parser.java b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_hdoc_parser.java index d991bd04d..f310e443b 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_hdoc_parser.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_hdoc_parser.java @@ -14,23 +14,47 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.addons.wikis.pages.syncs.core.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*; import gplx.xowa.addons.wikis.pages.syncs.core.*; -import gplx.langs.htmls.*; import gplx.langs.htmls.docs.*; import gplx.xowa.htmls.core.wkrs.*; -public class Xosync_hdoc_parser implements Gfh_doc_wkr { - private final Xosync_hdoc_wtr hdoc_wtr; - private final Gfh_tag_rdr tag_rdr = Gfh_tag_rdr.New__html(); - private final Bry_bfr tmp_bfr = Bry_bfr_.New(); - private final Xosync_img_src_parser img_src_parser = new Xosync_img_src_parser(); +import gplx.langs.htmls.*; import gplx.langs.htmls.docs.*; +import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.wkrs.*; +import gplx.xowa.wikis.domains.*; +public class Xosync_hdoc_parser { + public byte[] Parse_hdoc(Xow_domain_itm wiki_domain, byte[] page_url, List_adp imgs, byte[] src) { + // init + Bry_bfr bfr = Bry_bfr_.New(); + Xosync_img_src_parser img_src_parser = new Xosync_img_src_parser().Init_by_page(wiki_domain, page_url, imgs); + int cur = 0, src_len = src.length; + Gfh_tag_rdr tag_rdr = Gfh_tag_rdr.New__html().Init(page_url, src, cur, src_len); - public Xosync_hdoc_parser(Xosync_hdoc_wtr hdoc_wtr) {this.hdoc_wtr = hdoc_wtr;} - public byte[] Hook() {return Byte_ascii.Angle_bgn_bry;} - public void Init_by_page(Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) { - tag_rdr.Init(hctx.Page__url(), src, src_bgn, src_end); - img_src_parser.Init_by_page(hctx); + // loop src + while (true) { + // look for "<" + int find = Bry_find_.Find_fwd(src, Byte_ascii.Angle_bgn, cur, src_len); + + // "<" not found; add rest of src and stop + if (find == Bry_find_.Not_found) { + bfr.Add_mid(src, cur, src_len); + break; + } + + // "<" found; add everything between cur and "<" + bfr.Add_mid(src, cur, find); + + // parse "<" + cur = Parse_tag(bfr, tag_rdr, img_src_parser, src, src_len, find); + } + + return bfr.To_bry_and_clear(); } - public int Parse(byte[] src, int src_bgn, int src_end, int pos) { + private int Parse_tag(Bry_bfr bfr, Gfh_tag_rdr tag_rdr, Xosync_img_src_parser img_src_parser, byte[] src, int src_len, int pos) { // note that entry point is at "<" tag_rdr.Pos_(pos); - int nxt_pos = tag_rdr.Pos() + 1; if (nxt_pos == src_end) return src_end; + int nxt_pos = tag_rdr.Pos() + 1; + + // "<" is at EOS + if (nxt_pos == src_len) { + bfr.Add_byte(Byte_ascii.Angle_bgn); + return src_len; + } // check if head or tail; EX: "" vs "" byte nxt_byte = src[nxt_pos]; @@ -39,7 +63,7 @@ public class Xosync_hdoc_parser implements Gfh_doc_wkr { int end_comm = Bry_find_.Move_fwd(src, Gfh_tag_.Comm_end, nxt_pos); if (end_comm == Bry_find_.Not_found) { Gfo_usr_dlg_.Instance.Warn_many("", "", "end comment not found; src=~{0}", String_.new_u8(src)); - end_comm = src_end; + end_comm = src_len; } return end_comm; } @@ -55,15 +79,15 @@ public class Xosync_hdoc_parser implements Gfh_doc_wkr { } break; case Gfh_tag_.Id__img: // rewrite src for XOWA; especially necessary for relative protocol; EX: "//upload.wikimedia.org"; note do not use tag b/c of issues with anchors like "href=#section" - return Parse_img_src(cur); + return Parse_img_src(bfr, img_src_parser, cur); default: break; } } - hdoc_wtr.On_txt(cur.Src_bgn(), cur.Src_end()); + bfr.Add_mid(src, cur.Src_bgn(), cur.Src_end()); return cur.Src_end(); } - private int Parse_img_src(Gfh_tag img_tag) { + private int Parse_img_src(Bry_bfr bfr, Xosync_img_src_parser img_src_parser, Gfh_tag img_tag) { // get @src and parse it Gfh_atr src_atr = img_tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__src); img_src_parser.Parse(src_atr.Val()); @@ -71,21 +95,20 @@ public class Xosync_hdoc_parser implements Gfh_doc_wkr { // if error, write comment; EX: String err_msg = img_src_parser.Err_msg(); if (err_msg != null) { - hdoc_wtr.Add_bry(Gfh_tag_.Comm_bgn); - hdoc_wtr.Add_str(img_src_parser.Err_msg()); - hdoc_wtr.Add_bry(Gfh_tag_.Comm_end); + bfr.Add(Gfh_tag_.Comm_bgn); + bfr.Add_str_u8(img_src_parser.Err_msg()); + bfr.Add(Gfh_tag_.Comm_end); } // get img_src; use img_src_parser if no error, else use original value byte[] img_src_val = err_msg == null ? img_src_parser.To_bry() : src_atr.Val(); // write html - Write_img_tag(tmp_bfr, img_tag, img_src_val, -1); - hdoc_wtr.Add_bfr(tmp_bfr); + Write_img_tag(bfr, img_tag, -1, img_src_val); return img_tag.Src_end(); } - public static void Write_img_tag(Bry_bfr bfr, Gfh_tag img_tag, byte[] img_src_val, int uid) { + public static void Write_img_tag(Bry_bfr bfr, Gfh_tag img_tag, int uid, byte[] img_src_val) { // rewrite tag with custom img_src_val int atrs_len = img_tag.Atrs__len(); bfr.Add(Byte_ascii.Angle_bgn_bry); diff --git a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_hdoc_parser__fxt.java b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_hdoc_parser__fxt.java index bd04ae2e5..5ea86143d 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_hdoc_parser__fxt.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_hdoc_parser__fxt.java @@ -18,7 +18,6 @@ import gplx.core.tests.*; import gplx.langs.htmls.*; import gplx.xowa.htmls.*; import gplx.xowa.files.*; import gplx.xowa.files.repos.*; public class Xosync_hdoc_parser__fxt { - private final Xosync_update_mgr mgr = new Xosync_update_mgr(); private final Bry_bfr tmp_bfr = Bry_bfr_.New(); private final Xoh_page hpg = new Xoh_page(); private Xowe_wiki wiki; @@ -28,13 +27,14 @@ public class Xosync_hdoc_parser__fxt { Xoae_app app = Xoa_app_fxt.Make__app__edit(); this.wiki = Xoa_app_fxt.Make__wiki__edit(app); Xoa_app_fxt.repo2_(app, wiki); - mgr.Init_by_app(app); } public void Term() { Gfo_usr_dlg_.Instance = Gfo_usr_dlg_.Noop; } public Xosync_hdoc_parser__fxt Exec__parse(String raw) { - mgr.Parse(hpg, wiki, Bry_.Empty, Bry_.new_u8(raw)); + Xosync_hdoc_parser parser = new Xosync_hdoc_parser(); + byte[] result = parser.Parse_hdoc(wiki.Domain_itm(), Bry_.Empty, hpg.Hdump_mgr().Imgs(), Bry_.new_u8(raw)); + hpg.Db().Html().Html_bry_(result); return this; } public Xosync_hdoc_parser__fxt Test__html(String expd) { diff --git a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_hdoc_wtr.java b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_hdoc_wtr.java deleted file mode 100644 index 98078977b..000000000 --- a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_hdoc_wtr.java +++ /dev/null @@ -1,41 +0,0 @@ -/* -XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com - -XOWA is licensed under the terms of the General Public License (GPL) Version 3, -or alternatively under the terms of the Apache License Version 2.0. - -You may use XOWA according to either of these licenses as is most appropriate -for your project on a case-by-case basis. - -The terms of each license can be found in the source code repository: - -GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt -Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt -*/ -package gplx.xowa.addons.wikis.pages.syncs.core.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.pages.*; import gplx.xowa.addons.wikis.pages.syncs.*; import gplx.xowa.addons.wikis.pages.syncs.core.*; -import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.hzips.*; -public class Xosync_hdoc_wtr implements Xoh_hdoc_wkr { - private Xoh_hzip_bfr bfr; - private byte[] src; - - public void Init_by_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) { - this.On_new_page(bfr, hpg, hctx, src, src_bgn, src_end); - } - public void On_new_page(Xoh_hzip_bfr bfr, Xoh_page hpg, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) { - this.bfr = bfr; - this.src = src; - } - public void On_txt (int rng_bgn, int rng_end) {bfr.Add_mid(src, rng_bgn, rng_end);} - public void Add_bfr (Bry_bfr v) {bfr.Add_bfr_and_clear(v);} - public void Add_str (String v) {bfr.Add_str_u8(v);} - public void Add_bry (byte[] v) {bfr.Add(v);} - - // not used - public void On_escape (gplx.xowa.htmls.core.wkrs.escapes.Xoh_escape_data data) {} - public void On_xnde (gplx.xowa.htmls.core.wkrs.xndes.Xoh_xnde_parser parser) {} - public void On_lnki (gplx.xowa.htmls.core.wkrs.lnkis.Xoh_lnki_data parser) {} - public void On_thm (gplx.xowa.htmls.core.wkrs.thms.Xoh_thm_data parser) {} - public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_data parser) {} - public boolean Process_parse(Xoh_data_itm data) {return false;} -} diff --git a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_img_src_parser.java b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_img_src_parser.java index c374c6a9f..7c90de66d 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_img_src_parser.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/core/parsers/Xosync_img_src_parser.java @@ -25,7 +25,8 @@ public class Xosync_img_src_parser { private final byte[] wiki_abrv_commons; private final Xoh_img_src_data img_src_parser = new Xoh_img_src_data(); - private Xoh_hdoc_ctx hctx; + private Xow_domain_itm wiki_domain; + private List_adp imgs; private byte path_tid; private byte[] img_src_bgn_local, img_src_bgn_remote; private byte[] page_url, repo_local; @@ -46,13 +47,16 @@ public class Xosync_img_src_parser { img_src_bgn_remote = tmp_bfr.Add(Bry__xowa_file).Add(Xow_domain_itm_.Bry__commons).Add_byte_slash().To_bry_and_clear(); wiki_abrv_commons = Xow_abrv_xo_.To_bry(Xow_domain_itm_.Bry__commons); } - public void Init_by_page(Xoh_hdoc_ctx hctx) { - this.hctx = hctx; - this.page_url = hctx.Page__url(); + public Xosync_img_src_parser Init_by_page(Xow_domain_itm wiki_domain, byte[] page_url, List_adp imgs) { + this.wiki_domain = wiki_domain; + this.page_url = page_url; + this.imgs = imgs; + this.imgs.Clear(); this.path_tid = Path__unknown; - this.repo_local = To_wmf_repo_or_null(tmp_bfr, hctx.Wiki__domain_itm()); - if (repo_local == null) Gfo_usr_dlg_.Instance.Warn_many("", "", "unsupported wmf repo; domain=~{0}", hctx.Wiki__domain_itm().Domain_bry()); - img_src_bgn_local = tmp_bfr.Add(Bry__xowa_file).Add(hctx.Wiki__domain_bry()).Add_byte_slash().To_bry_and_clear(); // EX: "xowa:/file/en.wikipedia.org/" + this.repo_local = To_wmf_repo_or_null(tmp_bfr, wiki_domain); + if (repo_local == null) Gfo_usr_dlg_.Instance.Warn_many("", "", "unsupported wmf repo; domain=~{0}", wiki_domain.Domain_bry()); + img_src_bgn_local = tmp_bfr.Add(Bry__xowa_file).Add(wiki_domain.Domain_bry()).Add_byte_slash().To_bry_and_clear(); // EX: "xowa:/file/en.wikipedia.org/" + return this; } public boolean Parse(byte[] raw) { // init @@ -82,7 +86,7 @@ public class Xosync_img_src_parser { } private boolean Parse_file_xo() { img_src_parser.Clear(); - boolean rv = img_src_parser.Parse(rdr.Err_wkr(), hctx, hctx.Wiki__domain_bry(), raw, 0, raw_len); + boolean rv = img_src_parser.Parse(rdr.Err_wkr(), wiki_domain.Domain_bry(), raw, 0, raw_len); if (rv) { this.repo_is_commons = img_src_parser.Repo_is_commons(); this.file_is_orig = img_src_parser.File_is_orig(); @@ -95,7 +99,7 @@ public class Xosync_img_src_parser { if (img_src_parser.File_time_exists()) this.file_time = img_src_parser.File_time(); } - Add_img(hctx.Wiki__domain_itm().Abrv_xo()); + Add_img(wiki_domain.Abrv_xo()); } return rv; } @@ -147,7 +151,7 @@ public class Xosync_img_src_parser { } // register image - Add_img(hctx.Wiki__domain_itm().Abrv_xo()); + Add_img(wiki_domain.Abrv_xo()); return true; } private boolean Parse_math() { @@ -162,7 +166,7 @@ public class Xosync_img_src_parser { } private void Add_img(byte[] wiki_abrv) { Xof_fsdb_itm itm = new Xof_fsdb_itm(); - hctx.Page().Hdump_mgr().Imgs().Add(itm); + imgs.Add(itm); itm.Init_by_wm_parse(wiki_abrv, repo_is_commons, file_is_orig, file_ttl_bry, file_ext, file_w, file_time, file_page); } public byte[] To_bry() { diff --git a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/specials/Sync_html_special.java b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/specials/Sync_html_special.java index 7543a3249..aeae70776 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/specials/Sync_html_special.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/pages/syncs/specials/Sync_html_special.java @@ -29,8 +29,7 @@ public class Sync_html_special implements Xow_special_page { // update Xosync_update_mgr updater = new Xosync_update_mgr(); - updater.Init_by_app(wiki.App()); - updater.Update(wiki.App().Wmf_mgr().Download_wkr(), wiki, redirect_ttl); + updater.Update(wiki.App().Wmf_mgr().Download_wkr(), wiki, page.Url_bry_safe(), redirect_ttl); ((Xowe_wiki)wiki).Data_mgr().Redirect((Xoae_page)page, redirect_bry); // HACK: should call page.Redirect_trail() below, but need to handle Display_ttl // page.Redirect_trail().Itms__add__article(redirect_url, redirect_ttl, null); } diff --git a/400_xowa/src/gplx/xowa/htmls/core/Xow_hdump_mgr.java b/400_xowa/src/gplx/xowa/htmls/core/Xow_hdump_mgr.java index ad9e2f9e1..aaff7d7ec 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/Xow_hdump_mgr.java +++ b/400_xowa/src/gplx/xowa/htmls/core/Xow_hdump_mgr.java @@ -27,9 +27,6 @@ public class Xow_hdump_mgr { public Xow_hdump_mgr__save Save_mgr() {return save_mgr;} private Xow_hdump_mgr__save save_mgr; public Xow_hdump_mgr__load Load_mgr() {return load_mgr;} private Xow_hdump_mgr__load load_mgr; public Xoh_hzip_mgr Hzip_mgr() {return hzip_mgr;} private final Xoh_hzip_mgr hzip_mgr = new Xoh_hzip_mgr(); - public void Init_by_app(Xoae_app app) { - save_mgr.Init_by_app(app); - } public void Init_by_db(Xow_wiki wiki) { byte dflt_zip_tid = gplx.core.ios.streams.Io_stream_tid_.Tid__raw; boolean dflt_hzip_enable = false; diff --git a/400_xowa/src/gplx/xowa/htmls/core/Xow_hdump_mgr__load.java b/400_xowa/src/gplx/xowa/htmls/core/Xow_hdump_mgr__load.java index d07b6f923..c4949a64c 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/Xow_hdump_mgr__load.java +++ b/400_xowa/src/gplx/xowa/htmls/core/Xow_hdump_mgr__load.java @@ -81,8 +81,13 @@ public class Xow_hdump_mgr__load implements Gfo_invk { src = zip_mgr.Unzip((byte)zip_tid, src); switch (hzip_tid) { case Xoh_hzip_dict_.Hzip__none: + case Xoh_hzip_dict_.Hzip__plain: src = make_mgr.Parse(src, hpg, hpg.Wiki()); break; +// case Xoh_hzip_dict_.Hzip__plain: +// gplx.xowa.addons.wikis.pages.syncs.core.loaders.Xosync_page_loader page_loader = new gplx.xowa.addons.wikis.pages.syncs.core.loaders.Xosync_page_loader(); +// src = page_loader.Parse(wiki, hpg, src); +// break; case Xoh_hzip_dict_.Hzip__v1: if (override_mgr__html != null) // null when Parse is called directly src = override_mgr__html.Get_or_same(hpg.Ttl().Page_db(), src); @@ -90,10 +95,6 @@ public class Xow_hdump_mgr__load implements Gfo_invk { src = Decode_as_bry(tmp_bfr.Clear(), hpg, src, Bool_.N); hpg.Section_mgr().Set_content(hpg.Section_mgr().Len() - 1, src, src.length); break; - case Xoh_hzip_dict_.Hzip__plain: - gplx.xowa.addons.wikis.pages.syncs.core.loaders.Xosync_page_loader page_loader = new gplx.xowa.addons.wikis.pages.syncs.core.loaders.Xosync_page_loader(); - src = page_loader.Parse(wiki, hpg, src); - break; } return src; } diff --git a/400_xowa/src/gplx/xowa/htmls/core/Xow_hdump_mgr__save.java b/400_xowa/src/gplx/xowa/htmls/core/Xow_hdump_mgr__save.java index 40becd047..3a0430063 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/Xow_hdump_mgr__save.java +++ b/400_xowa/src/gplx/xowa/htmls/core/Xow_hdump_mgr__save.java @@ -16,18 +16,15 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt package gplx.xowa.htmls.core; import gplx.*; import gplx.xowa.*; import gplx.xowa.htmls.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.wkrs.*; import gplx.xowa.htmls.core.hzips.*; import gplx.xowa.htmls.heads.*; import gplx.xowa.htmls.core.dbs.*; import gplx.core.ios.*; import gplx.core.primitives.*; import gplx.xowa.wikis.data.*; import gplx.xowa.wikis.pages.*; -import gplx.xowa.addons.wikis.pages.syncs.core.*; +import gplx.xowa.addons.wikis.pages.syncs.core.parsers.*; public class Xow_hdump_mgr__save { private final Xow_wiki wiki; private final Xoh_hzip_mgr hzip_mgr; private final Io_stream_zip_mgr zip_mgr; - private final Xosync_update_mgr update_mgr = new Xosync_update_mgr(); + private final Xosync_hdoc_parser plain_parser = new Xosync_hdoc_parser(); private final Xoh_page tmp_hpg; private final Xoh_hzip_bfr tmp_bfr = Xoh_hzip_bfr.New_txt(32); private Bool_obj_ref html_db_is_new = Bool_obj_ref.n_(); private int dflt_zip_tid, dflt_hzip_tid; public Xow_hdump_mgr__save(Xow_wiki wiki, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, Xoh_page tmp_hpg) { this.wiki = wiki; this.hzip_mgr = hzip_mgr; this.zip_mgr = zip_mgr; this.tmp_hpg = tmp_hpg; } - public void Init_by_app(Xoae_app app) { - update_mgr.Init_by_app(app); - } public void Init_by_db(int dflt_zip_tid, int dflt_hzip_tid, boolean mode_is_b256) { this.dflt_zip_tid = dflt_zip_tid; this.dflt_hzip_tid = dflt_hzip_tid; tmp_bfr.Mode_is_b256_(mode_is_b256); } @@ -55,8 +52,7 @@ public class Xow_hdump_mgr__save { private byte[] Write(Xoh_hzip_bfr bfr, Xow_wiki wiki, Xoae_page page, Xoh_page hpg, Xoh_hzip_mgr hzip_mgr, Io_stream_zip_mgr zip_mgr, int zip_tid, int hzip_tid, byte[] src) { switch (hzip_tid) { case Xoh_hzip_dict_.Hzip__none: - update_mgr.Parse(hpg, wiki, page.Url_bry_safe(), src); - src = hpg.Db().Html().Html_bry(); + src = plain_parser.Parse_hdoc(wiki.Domain_itm(), page.Url_bry_safe(), hpg.Hdump_mgr().Imgs(), src); break; case Xoh_hzip_dict_.Hzip__v1: src = hzip_mgr.Encode_as_bry((Xoh_hzip_bfr)bfr.Clear(), wiki, hpg, src); diff --git a/400_xowa/src/gplx/xowa/htmls/core/bldrs/Xob_hdump_bldr.java b/400_xowa/src/gplx/xowa/htmls/core/bldrs/Xob_hdump_bldr.java index d662390be..d30e923cd 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/bldrs/Xob_hdump_bldr.java +++ b/400_xowa/src/gplx/xowa/htmls/core/bldrs/Xob_hdump_bldr.java @@ -40,7 +40,6 @@ public class Xob_hdump_bldr implements Gfo_invk { this.toc_label = wiki.Msg_mgr().Val_by_id(gplx.xowa.langs.msgs.Xol_msg_itm_.Id_toc); if (zip_tid == Byte_.Max_value_127) zip_tid = Xobldr_cfg.Zip_mode__html(wiki.App()); - hdump_mgr.Init_by_app(wiki.Appe()); hdump_mgr.Init_by_db(zip_tid, hzip_enabled, hzip_b256); return true; } diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hdoc_wkr__make.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hdoc_wkr__make.java index d8d8e6781..1323bc1e0 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hdoc_wkr__make.java +++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/Xoh_hdoc_wkr__make.java @@ -37,6 +37,7 @@ public class Xoh_hdoc_wkr__make implements Xoh_hdoc_wkr { } public void On_gly (gplx.xowa.htmls.core.wkrs.glys.Xoh_gly_grp_data data) { bfr.Add_mid(src, data.Src_bgn(), data.Src_end()); + hpg.Xtn__gallery_exists_y_(); } public boolean Process_parse(Xoh_data_itm data) { switch (data.Tid()) { diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/Xoh_img_data.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/Xoh_img_data.java index 1fe0170e8..f5e5af004 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/Xoh_img_data.java +++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/Xoh_img_data.java @@ -85,7 +85,7 @@ public class Xoh_img_data implements Xoh_data_itm { img_alt_bgn = img_alt.Val_bgn(); img_alt_end = img_alt.Val_end(); img_cls.Init_by_parse(err_wkr, src, img_tag); // class='thumbborder' img_alt__diff_anch_title = !Bry_.Match(src, img_alt_bgn, img_alt_end, src, anch_title_bgn, anch_title_end); - if (!img_src.Parse(err_wkr, hctx, hctx.Wiki__domain_bry(), img_tag)) return false; // src='...' + if (!img_src.Parse(err_wkr, hctx.Wiki__domain_bry(), img_tag)) return false; // src='...' if (anch_xo_ttl.Val_is_empty()) { anch_xo_ttl.Val_(img_src.File_ttl_bry()); if (anch_xo_ttl.Val_is_empty()) diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/atrs/Xoh_img_src_data.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/atrs/Xoh_img_src_data.java index 98a2751ac..081571b7c 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/atrs/Xoh_img_src_data.java +++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/atrs/Xoh_img_src_data.java @@ -47,12 +47,12 @@ public class Xoh_img_src_data implements Bfr_arg_clearable, Xoh_itm_parser { repo_tid = Xof_repo_tid_.Tid__null; file_ttl_bry = null; } - public boolean Parse(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, byte[] domain_bry, Gfh_tag tag) { + public boolean Parse(Bry_err_wkr err_wkr, byte[] domain_bry, Gfh_tag tag) { this.Clear(); Gfh_atr atr = tag.Atrs__get_by_or_empty(Gfh_atr_.Bry__src); - return Parse(err_wkr, hctx, domain_bry, atr.Src(), atr.Val_bgn(), atr.Val_end()); + return Parse(err_wkr, domain_bry, atr.Src(), atr.Val_bgn(), atr.Val_end()); } - public boolean Parse(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, byte[] domain_bry, byte[] src_bry, int src_bgn, int src_end) { // EX: src="file:///C:/xowa/file/commons.wikimedia.org/thumb/7/0/1/2/A.png/220px.png" + public boolean Parse(Bry_err_wkr err_wkr, byte[] domain_bry, byte[] src_bry, int src_bgn, int src_end) { // EX: src="file:///C:/xowa/file/commons.wikimedia.org/thumb/7/0/1/2/A.png/220px.png" this.Clear(); this.src_bry = src_bry; this.src_bgn = src_bgn; this.src_end = src_end; diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/atrs/Xoh_img_src_data_tst.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/atrs/Xoh_img_src_data_tst.java index b85f76cf9..c38854702 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/atrs/Xoh_img_src_data_tst.java +++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/atrs/Xoh_img_src_data_tst.java @@ -61,6 +61,6 @@ class Xoh_img_src_data_fxt extends Xoh_itm_parser_fxt { private final Xoh_im } @Override public void Exec_parse_hook(Bry_err_wkr err_wkr, Xoh_hdoc_ctx hctx, byte[] src, int src_bgn, int src_end) { parser.Fail_throws_err_(true); - parser.Parse(err_wkr, new Xoh_hdoc_ctx(), Xow_domain_itm_.Bry__enwiki, src, src_bgn, src_end); + parser.Parse(err_wkr, Xow_domain_itm_.Bry__enwiki, src, src_bgn, src_end); } }