1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Parser: Do not dump build-path for imgs [#553]

This commit is contained in:
gnosygnu 2019-09-25 23:37:49 -04:00
parent 924b5fd17b
commit d362597d8f
4 changed files with 13 additions and 18 deletions

View File

@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
package gplx.xowa.addons.bldrs.mass_parses.parses.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*; package gplx.xowa.addons.bldrs.mass_parses.parses.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.bldrs.*; import gplx.xowa.addons.bldrs.mass_parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*;
import gplx.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*; import gplx.dbs.*; import gplx.xowa.addons.bldrs.mass_parses.dbs.*;
import gplx.xowa.files.origs.*; import gplx.xowa.files.origs.*;
import gplx.xowa.htmls.core.bldrs.*; import gplx.xowa.htmls.hxtns.pages.*; import gplx.xowa.htmls.core.htmls.*; import gplx.xowa.htmls.core.bldrs.*; import gplx.xowa.htmls.hxtns.pages.*; import gplx.xowa.htmls.core.hzips.*;
import gplx.xowa.wikis.pages.*; import gplx.xowa.wikis.pages.*;
import gplx.xowa.parsers.*; import gplx.xowa.parsers.logs.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.logs.*;
import gplx.xowa.addons.bldrs.mass_parses.parses.mgrs.*; import gplx.xowa.addons.bldrs.mass_parses.parses.utls.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.pools.*; import gplx.xowa.addons.bldrs.mass_parses.parses.mgrs.*; import gplx.xowa.addons.bldrs.mass_parses.parses.utls.*; import gplx.xowa.addons.bldrs.mass_parses.parses.*; import gplx.xowa.addons.bldrs.mass_parses.parses.pools.*;
@ -101,6 +101,7 @@ public class Xomp_parse_wkr implements Gfo_invk {
wkr_db.Conn().Txn_bgn("xomp"); wkr_db.Conn().Txn_bgn("xomp");
stat_tbl.Stmt_new(); stat_tbl.Stmt_new();
hxtn_mgr.Insert_bgn(false); hxtn_mgr.Insert_bgn(false);
Xoh_wtr_ctx hctx = Xoh_wtr_ctx.Hdump_by_hzip_tid(cfg.Hzip_enabled() ? Xoh_hzip_dict_.Hdb__hzip : Xoh_hzip_dict_.Hdb__htxt); // ISSUE#:553; DATE:2019-09-25
// set status to running // set status to running
mgr_db.Tbl__wkr().Update_status(uid, Xomp_wkr_tbl.Status__running); mgr_db.Tbl__wkr().Update_status(uid, Xomp_wkr_tbl.Status__running);
@ -140,7 +141,7 @@ public class Xomp_parse_wkr implements Gfo_invk {
parser_mgr.Parse(wpg, true); parser_mgr.Parse(wpg, true);
// gen_html // gen_html
hdump_bldr.Insert(pctx, wpg); hdump_bldr.Insert(pctx, wpg, hctx);
// index // index
long fulltext_time = 0; long fulltext_time = 0;

View File

@ -42,7 +42,7 @@ public class Xob_hdump_bldr implements Gfo_invk {
hdump_mgr.Init_by_db(zip_tid, hzip_enabled, hzip_b256); hdump_mgr.Init_by_db(zip_tid, hzip_enabled, hzip_b256);
return true; return true;
} }
public void Insert(Xop_ctx ctx, Xoae_page wpg) { public void Insert(Xop_ctx ctx, Xoae_page wpg, Xoh_wtr_ctx hctx) {
// clear // clear
tmp_hpg.Clear(); // NOTE: must clear tmp_hpg or else will leak memory during mass build; DATE:2016-01-09 tmp_hpg.Clear(); // NOTE: must clear tmp_hpg or else will leak memory during mass build; DATE:2016-01-09
wpg.File_queue().Clear(); // need to reset uid to 0, else xowa_file_# will resume from last wpg.File_queue().Clear(); // need to reset uid to 0, else xowa_file_# will resume from last
@ -52,7 +52,7 @@ public class Xob_hdump_bldr implements Gfo_invk {
boolean is_wikitext = Xow_page_tid.Identify(wpg.Wiki().Domain_tid(), ttl.Ns().Id(), ttl.Page_db()) == Xow_page_tid.Tid_wikitext; boolean is_wikitext = Xow_page_tid.Identify(wpg.Wiki().Domain_tid(), ttl.Ns().Id(), ttl.Page_db()) == Xow_page_tid.Tid_wikitext;
byte[] orig_bry = Bry_.Empty; byte[] orig_bry = Bry_.Empty;
if (is_wikitext) { if (is_wikitext) {
wiki.Html_mgr().Page_wtr_mgr().Wkr(Xopg_view_mode_.Tid__read).Write_hdump(tmp_bfr, ctx, Xoh_wtr_ctx.Hdump, wpg); wiki.Html_mgr().Page_wtr_mgr().Wkr(Xopg_view_mode_.Tid__read).Write_hdump(tmp_bfr, ctx, hctx, wpg);
orig_bry = tmp_bfr.To_bry_and_clear(); orig_bry = tmp_bfr.To_bry_and_clear();
wpg.Db().Html().Html_bry_(orig_bry); wpg.Db().Html().Html_bry_(orig_bry);
} }

View File

@ -27,15 +27,9 @@ public class Xoh_file_fmtr__hdump extends Xoh_file_fmtr__basic { private final
byte[] data_xowa_title = Gfh_atr_.Make(tmp_bfr, Xoh_img_xoimg_data.Bry__data_xowa_title, a_xowa_title); byte[] data_xowa_title = Gfh_atr_.Make(tmp_bfr, Xoh_img_xoimg_data.Bry__data_xowa_title, a_xowa_title);
byte[] data_xowa_image = Bld_xowa_image_data(tmp_bfr, xfer_itm.Lnki_type(), xfer_itm.Lnki_w(), xfer_itm.Lnki_h(), xfer_itm.Lnki_upright(), xfer_itm.Lnki_time(), xfer_itm.Lnki_page()); byte[] data_xowa_image = Bld_xowa_image_data(tmp_bfr, xfer_itm.Lnki_type(), xfer_itm.Lnki_w(), xfer_itm.Lnki_h(), xfer_itm.Lnki_upright(), xfer_itm.Lnki_time(), xfer_itm.Lnki_page());
// only write src="..." width="..." height="..." if orig is present or if Hzip__none (Hzip__none should always write title and image) // always null out w, h, src; Hdb__hzip and Hdb__htxt should never write src; Hdb__page_sync will never come here; ISSUE#:553; DATE:2019-09-25
if (!xfer_itm.Orig_exists() || hctx.Hzip_tid() == gplx.xowa.htmls.core.hzips.Xoh_hzip_dict_.Hzip__none) { img_w = img_h = 0;
img_w = img_h = 0; img_src = Bry_.Empty;
img_src = Bry_.Empty;
}
else {
data_xowa_title = Bry_.Empty;
data_xowa_image = Bry_.Empty;
}
// bld bfr // bld bfr
if (Bry_.Len_eq_0(a_href)) if (Bry_.Len_eq_0(a_href))
@ -44,12 +38,12 @@ public class Xoh_file_fmtr__hdump extends Xoh_file_fmtr__basic { private final
if (a_href_is_file) a_href = Bry_.Empty; if (a_href_is_file) a_href = Bry_.Empty;
fmt__anch_y.Bld_many(bfr fmt__anch_y.Bld_many(bfr
, a_href, Xoh_lnki_consts.A_cls_to_bry(a_cls), Xoh_lnki_consts.A_rel_to_bry(a_rel), a_title, a_xowa_title , a_href, Xoh_lnki_consts.A_cls_to_bry(a_cls), Xoh_lnki_consts.A_rel_to_bry(a_rel), a_title, a_xowa_title
, data_xowa_title, data_xowa_image, img_src, img_w, img_h, Xoh_img_cls_.To_html(img_cls, img_cls_other), Gfh_utl.Escape_html_as_bry(img_alt) , data_xowa_title, data_xowa_image, Gfh_utl.Escape_html_as_bry(img_alt), img_src, img_w, img_h, Xoh_img_cls_.To_html(img_cls, img_cls_other)
); );
} }
} }
public static void Add_anch_n(Bry_bfr bfr, byte[] data_xowa_title, byte[] data_xowa_image, byte[] img_src, int img_w, int img_h, byte img_cls, byte[] img_cls_other, byte[] img_alt, byte[] img_xtra_atrs) { public static void Add_anch_n(Bry_bfr bfr, byte[] data_xowa_title, byte[] data_xowa_image, byte[] img_src, int img_w, int img_h, byte img_cls, byte[] img_cls_other, byte[] img_alt, byte[] img_xtra_atrs) {
fmt__anch_n.Bld_many(bfr, data_xowa_title, data_xowa_image, img_src, img_w, img_h, Xoh_img_cls_.To_html(img_cls, img_cls_other), Gfh_utl.Escape_html_as_bry(img_alt), img_xtra_atrs); fmt__anch_n.Bld_many(bfr, data_xowa_title, data_xowa_image, Gfh_utl.Escape_html_as_bry(img_alt), img_src, img_w, img_h, Xoh_img_cls_.To_html(img_cls, img_cls_other), img_xtra_atrs);
} }
public static byte[] Bld_xowa_image_data(Bry_bfr bfr, byte tid, int w, int h, double upright, double time, int page) { public static byte[] Bld_xowa_image_data(Bry_bfr bfr, byte tid, int w, int h, double upright, double time, int page) {
bfr.Add_byte_space().Add(Xoh_img_xoimg_data.Bry__data_xowa_image).Add_byte_eq().Add_byte_quote(); bfr.Add_byte_space().Add(Xoh_img_xoimg_data.Bry__data_xowa_image).Add_byte_eq().Add_byte_quote();
@ -63,11 +57,11 @@ public class Xoh_file_fmtr__hdump extends Xoh_file_fmtr__basic { private final
} }
private static final Bry_fmt private static final Bry_fmt
fmt__anch_n = Bry_fmt.Auto fmt__anch_n = Bry_fmt.Auto
( "<img~{data_xowa_title}~{data_xowa_image} src=\"~{img_src}\" width=\"~{img_w}\" height=\"~{img_h}\"~{img_cls} alt=\"~{img_alt}\"~{img_xtra_atrs}/>" ( "<img~{data_xowa_title}~{data_xowa_image} alt=\"~{img_alt}\" src=\"~{img_src}\" width=\"~{img_w}\" height=\"~{img_h}\"~{img_cls}~{img_xtra_atrs}/>"
) )
, fmt__anch_y = Bry_fmt.Auto , fmt__anch_y = Bry_fmt.Auto
( "<a href=\"~{a_href}\"~{a_class}~{a_rel}~{a_title} xowa_title=\"~{a_xowa_title}\">" ( "<a href=\"~{a_href}\"~{a_class}~{a_rel}~{a_title} xowa_title=\"~{a_xowa_title}\">"
+ "<img~{data_xowa_title}~{data_xowa_image} src=\"~{img_src}\" width=\"~{img_w}\" height=\"~{img_h}\"~{img_cls} alt=\"~{img_alt}\"/>" + "<img~{data_xowa_title}~{data_xowa_image} alt=\"~{img_alt}\" src=\"~{img_src}\" width=\"~{img_w}\" height=\"~{img_h}\"~{img_cls}/>"
+ "</a>" + "</a>"
); );
} }

View File

@ -72,7 +72,7 @@ public class Xop_lnki_wkr implements Xop_ctx_wkr, Xop_arg_wkr {
} }
if (lnki_is_file) { if (lnki_is_file) {
ctx.Page().Lnki_list().Add(lnki); ctx.Page().Lnki_list().Add(lnki);
lnki_logger.Log_file(ctx, lnki, Xop_file_logger_.Tid__file); lnki_logger.Log_file(Xop_file_logger_.Tid__file, ctx, lnki);
} }
Xoa_ttl lnki_ttl = lnki.Ttl(); Xoa_ttl lnki_ttl = lnki.Ttl();
if ( lnki_ttl.Wik_bgn() != -1 // lnki is xwiki if ( lnki_ttl.Wik_bgn() != -1 // lnki is xwiki