From 9e97a7ea9f42d42563398bf60526a61690ea0f45 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Mon, 25 Nov 2019 05:26:39 -0500 Subject: [PATCH] Html_dump: Show images for files with url-encoded characters (space, symbols, non-ascii) [#547] --- .../wkrs/imgs/Xoh_img_make__dump__tst.java | 15 ++++++++++-- .../htmls/core/wkrs/imgs/Xoh_img_wtr.java | 23 ++++++++++++------- .../libs/Scrib_lib_ustring__gsub__tst.java | 3 +++ 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/Xoh_img_make__dump__tst.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/Xoh_img_make__dump__tst.java index 3b5e064f5..f1cc4522c 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/Xoh_img_make__dump__tst.java +++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/Xoh_img_make__dump__tst.java @@ -28,9 +28,20 @@ public class Xoh_img_make__dump__tst { ); } @Test public void Utf8() { + // fsdb_itm.lnki_ttl comes from data-xowa-title String - orig = "abc" - , expd = "abc" + orig = "abc" + , expd = "abc" + ; + fxt.Test__make(orig, fxt.Page_chkr().Body_(expd) + .Imgs__add("en.w", "Aé_b.png", Xop_lnki_type.Id_null, 0.5, 220, 110, -1, -1) + ); + } + @Test public void Utf8_src() { + // fsdb_itm.lnki_ttl comes from src (unsupported use-case for make which should assume no pre-existing files) + String + orig = "abc" + , expd = "abc" ; fxt.Test__make(orig, fxt.Page_chkr().Body_(expd) .Imgs__add("en.w", "Aé_b.png", Xop_lnki_type.Id_null, 0.5, 220, 110, -1, -1) diff --git a/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/Xoh_img_wtr.java b/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/Xoh_img_wtr.java index 26ee1be74..6abcd1103 100644 --- a/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/Xoh_img_wtr.java +++ b/400_xowa/src/gplx/xowa/htmls/core/wkrs/imgs/Xoh_img_wtr.java @@ -83,11 +83,18 @@ public class Xoh_img_wtr implements Bfr_arg, Xoh_wtr_itm { this.img_wo_anch = data.Img_wo_anch(); this.fsdb_itm = hpg.Img_mgr().Make_img(data.Img_is_gallery()); - byte[] file_ttl_bry = data.Anch_xo_ttl().Val(); - byte[] img_src_bry_temp = data.Img_src().File_ttl_bry(); - byte[] lnki_ttl = img_src_bry_temp == null // img_src will be empty for htxt; use file_ttl_bry instead; EX: '' DATE:2019-03-10 - ? file_ttl_bry - : Xoa_ttl.Replace_spaces(Gfo_url_encoder_.Href_quotes.Decode(img_src_bry_temp)); // NOTE: must decode for fsdb.lnki_ttl as well as xowa_title; EX: A%C3%A9b -> A�b + byte[] xowa_title = data.Anch_xo_ttl().Val(); + + // set lnki_ttl to img_src.file_ttl; + byte[] lnki_ttl = data.Img_src().File_ttl_bry(); + if (lnki_ttl == null) { + // in most cases, @src; set lnki_ttl from @xowa_title instead; EX: '' DATE:2019-03-10 + lnki_ttl = xowa_title; + } + + // url-decode; must decode since HTML will be url-encoded, but fsdb dbs will be actual characters; EX: A%C3%A9b -> A�b + if (lnki_ttl != null) // NOTE: @src and @xowa_title will be null for score + lnki_ttl = Gfo_url_encoder_.Mw_wfUrlencode.Decode(lnki_ttl); boolean write_xowa_file_title = true; if (data.Img_pgbnr().Exists()) { @@ -125,11 +132,11 @@ public class Xoh_img_wtr implements Bfr_arg, Xoh_wtr_itm { anch_cls.Set_by_arg(data.Anch_cls()); anch_title.Set_by_mid_or_null(src, data.Anch_title_bgn(), data.Anch_title_end()); if ( data.Img_wo_anch() // anchor-less image - || Bry_.Len_gt_0(file_ttl_bry)) // regular anch with image - anch_xowa_title.Set_by_bry(file_ttl_bry); + || Bry_.Len_gt_0(xowa_title)) // regular anch with image + anch_xowa_title.Set_by_bry(xowa_title); if (write_xowa_file_title) - img_xowa_title.Set_by_bry(file_ttl_bry); + img_xowa_title.Set_by_bry(xowa_title); img_alt.Set_by_mid_or_empty(src, data.Img_alt_bgn(), data.Img_alt_end()); img_cls.Set_by_arg(data.Img_cls()); if (data.Img_imap_idx() != -1) img_imap_usemap.Set(data.Img_imap_idx()); diff --git a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring__gsub__tst.java b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring__gsub__tst.java index c0ef1215c..3d4767937 100644 --- a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring__gsub__tst.java +++ b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring__gsub__tst.java @@ -52,6 +52,9 @@ public class Scrib_lib_ustring__gsub__tst { @Test public void Replace__initial() { // PURPOSE:whitespace being replaced during gsub replacement; DATE:2019-04-21 Exec_gsub("a b c", "^%s*", -1, "x", "xa b c;1"); // fails if xabxc } + @Test public void Replace__digit__superscript() {// PURPOSE: ¹ is not a \d; PAGE:en.w:Vilnius ISSUE#:617; DATE:2019-11-24; + Exec_gsub("1796¹", "([%d]+).*", 1, "%1", "1796;1"); + } @Test public void Replace__table() { Exec_gsub("abcd", "[ac]" , -1, Scrib_kv_utl_.flat_many_("a", "A", "c", "C") , "AbCd;2"); Exec_gsub("abc" , "[ab]" , -1, Scrib_kv_utl_.flat_many_("a", "A") , "Abc;2"); // PURPOSE: match not in regex should still print itself; in this case [c] is not in tbl regex; DATE:2014-03-31