Html_dump: Show images for files with url-encoded characters (space, symbols, non-ascii) [#547]

pull/620/head
gnosygnu 5 years ago
parent 966b0eed59
commit 9e97a7ea9f

@ -28,9 +28,20 @@ public class Xoh_img_make__dump__tst {
);
}
@Test public void Utf8() {
// fsdb_itm.lnki_ttl comes from data-xowa-title
String
orig = "<a href='/wiki/File:A%C3%A9_b.png' class='image' title='abc' xowa_title='Aé_b.png'><img data-xowa-title='Aé_b.png' data-xoimg='0|220|110|0.5|-1|-1' src='file:///mem/xowa/file/en.wikipedia.org/thumb/7/0/A%C3%A9_b.png/220px.png' width='220' height='110' alt='abc'></a>"
, expd = "<a href='/wiki/File:A%C3%A9_b.png' class='image' title='abc' xowa_title='Aé_b.png'><img id='xoimg_0' data-xowa-title='Aé_b.png' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
orig = "<a href='/wiki/File:A%C3%A9_b.png' class='image' title='abc' xowa_title='A%C3%A9_b.png'><img data-xowa-title='A%C3%A9_b.png' data-xoimg='0|220|110|0.5|-1|-1' src='' width='220' height='110' alt='abc'></a>"
, expd = "<a href='/wiki/File:A%C3%A9_b.png' class='image' title='abc' xowa_title='A%C3%A9_b.png'><img id='xoimg_0' data-xowa-title='A%C3%A9_b.png' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
;
fxt.Test__make(orig, fxt.Page_chkr().Body_(expd)
.Imgs__add("en.w", "Aé_b.png", Xop_lnki_type.Id_null, 0.5, 220, 110, -1, -1)
);
}
@Test public void Utf8_src() {
// fsdb_itm.lnki_ttl comes from src (unsupported use-case for make which should assume no pre-existing files)
String
orig = "<a href='/wiki/File:A%C3%A9_b.png' class='image' title='abc' xowa_title='A%C3%A9_b.png'><img data-xowa-title='A%C3%A9_b.png' data-xoimg='0|220|110|0.5|-1|-1' src='file:///mem/xowa/file/en.wikipedia.org/thumb/7/0/A%C3%A9_b.png/220px.png' width='220' height='110' alt='abc'></a>"
, expd = "<a href='/wiki/File:A%C3%A9_b.png' class='image' title='abc' xowa_title='A%C3%A9_b.png'><img id='xoimg_0' data-xowa-title='A%C3%A9_b.png' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
;
fxt.Test__make(orig, fxt.Page_chkr().Body_(expd)
.Imgs__add("en.w", "Aé_b.png", Xop_lnki_type.Id_null, 0.5, 220, 110, -1, -1)

@ -83,11 +83,18 @@ public class Xoh_img_wtr implements Bfr_arg, Xoh_wtr_itm {
this.img_wo_anch = data.Img_wo_anch();
this.fsdb_itm = hpg.Img_mgr().Make_img(data.Img_is_gallery());
byte[] file_ttl_bry = data.Anch_xo_ttl().Val();
byte[] img_src_bry_temp = data.Img_src().File_ttl_bry();
byte[] lnki_ttl = img_src_bry_temp == null // img_src will be empty for htxt; use file_ttl_bry instead; EX: '<img src="">' DATE:2019-03-10
? file_ttl_bry
: Xoa_ttl.Replace_spaces(Gfo_url_encoder_.Href_quotes.Decode(img_src_bry_temp)); // NOTE: must decode for fsdb.lnki_ttl as well as xowa_title; EX: A%C3%A9b -> A<>b
byte[] xowa_title = data.Anch_xo_ttl().Val();
// set lnki_ttl to img_src.file_ttl;
byte[] lnki_ttl = data.Img_src().File_ttl_bry();
if (lnki_ttl == null) {
// in most cases, @src; set lnki_ttl from @xowa_title instead; EX: '<img src="" xowa-title="A.png">' DATE:2019-03-10
lnki_ttl = xowa_title;
}
// url-decode; must decode since HTML will be url-encoded, but fsdb dbs will be actual characters; EX: A%C3%A9b -> A<>b
if (lnki_ttl != null) // NOTE: @src and @xowa_title will be null for score
lnki_ttl = Gfo_url_encoder_.Mw_wfUrlencode.Decode(lnki_ttl);
boolean write_xowa_file_title = true;
if (data.Img_pgbnr().Exists()) {
@ -125,11 +132,11 @@ public class Xoh_img_wtr implements Bfr_arg, Xoh_wtr_itm {
anch_cls.Set_by_arg(data.Anch_cls());
anch_title.Set_by_mid_or_null(src, data.Anch_title_bgn(), data.Anch_title_end());
if ( data.Img_wo_anch() // anchor-less image
|| Bry_.Len_gt_0(file_ttl_bry)) // regular anch with image
anch_xowa_title.Set_by_bry(file_ttl_bry);
|| Bry_.Len_gt_0(xowa_title)) // regular anch with image
anch_xowa_title.Set_by_bry(xowa_title);
if (write_xowa_file_title)
img_xowa_title.Set_by_bry(file_ttl_bry);
img_xowa_title.Set_by_bry(xowa_title);
img_alt.Set_by_mid_or_empty(src, data.Img_alt_bgn(), data.Img_alt_end());
img_cls.Set_by_arg(data.Img_cls());
if (data.Img_imap_idx() != -1) img_imap_usemap.Set(data.Img_imap_idx());

@ -52,6 +52,9 @@ public class Scrib_lib_ustring__gsub__tst {
@Test public void Replace__initial() { // PURPOSE:whitespace being replaced during gsub replacement; DATE:2019-04-21
Exec_gsub("a b c", "^%s*", -1, "x", "xa b c;1"); // fails if xabxc
}
@Test public void Replace__digit__superscript() {// PURPOSE: ¹ is not a \d; PAGE:en.w:Vilnius ISSUE#:617; DATE:2019-11-24;
Exec_gsub("1796¹", "([%d]+).*", 1, "%1", "1796;1");
}
@Test public void Replace__table() {
Exec_gsub("abcd", "[ac]" , -1, Scrib_kv_utl_.flat_many_("a", "A", "c", "C") , "AbCd;2");
Exec_gsub("abc" , "[ab]" , -1, Scrib_kv_utl_.flat_many_("a", "A") , "Abc;2"); // PURPOSE: match not in regex should still print itself; in this case [c] is not in tbl regex; DATE:2014-03-31

Loading…
Cancel
Save