mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Html_dump: Show images for files with url-encoded characters (space, symbols, non-ascii) [#547]
This commit is contained in:
parent
966b0eed59
commit
9e97a7ea9f
@ -28,9 +28,20 @@ public class Xoh_img_make__dump__tst {
|
||||
);
|
||||
}
|
||||
@Test public void Utf8() {
|
||||
// fsdb_itm.lnki_ttl comes from data-xowa-title
|
||||
String
|
||||
orig = "<a href='/wiki/File:A%C3%A9_b.png' class='image' title='abc' xowa_title='Aé_b.png'><img data-xowa-title='Aé_b.png' data-xoimg='0|220|110|0.5|-1|-1' src='file:///mem/xowa/file/en.wikipedia.org/thumb/7/0/A%C3%A9_b.png/220px.png' width='220' height='110' alt='abc'></a>"
|
||||
, expd = "<a href='/wiki/File:A%C3%A9_b.png' class='image' title='abc' xowa_title='Aé_b.png'><img id='xoimg_0' data-xowa-title='Aé_b.png' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
orig = "<a href='/wiki/File:A%C3%A9_b.png' class='image' title='abc' xowa_title='A%C3%A9_b.png'><img data-xowa-title='A%C3%A9_b.png' data-xoimg='0|220|110|0.5|-1|-1' src='' width='220' height='110' alt='abc'></a>"
|
||||
, expd = "<a href='/wiki/File:A%C3%A9_b.png' class='image' title='abc' xowa_title='A%C3%A9_b.png'><img id='xoimg_0' data-xowa-title='A%C3%A9_b.png' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
;
|
||||
fxt.Test__make(orig, fxt.Page_chkr().Body_(expd)
|
||||
.Imgs__add("en.w", "Aé_b.png", Xop_lnki_type.Id_null, 0.5, 220, 110, -1, -1)
|
||||
);
|
||||
}
|
||||
@Test public void Utf8_src() {
|
||||
// fsdb_itm.lnki_ttl comes from src (unsupported use-case for make which should assume no pre-existing files)
|
||||
String
|
||||
orig = "<a href='/wiki/File:A%C3%A9_b.png' class='image' title='abc' xowa_title='A%C3%A9_b.png'><img data-xowa-title='A%C3%A9_b.png' data-xoimg='0|220|110|0.5|-1|-1' src='file:///mem/xowa/file/en.wikipedia.org/thumb/7/0/A%C3%A9_b.png/220px.png' width='220' height='110' alt='abc'></a>"
|
||||
, expd = "<a href='/wiki/File:A%C3%A9_b.png' class='image' title='abc' xowa_title='A%C3%A9_b.png'><img id='xoimg_0' data-xowa-title='A%C3%A9_b.png' data-xoimg='0|220|110|0.5|-1|-1' src='' width='0' height='0' alt='abc'></a>"
|
||||
;
|
||||
fxt.Test__make(orig, fxt.Page_chkr().Body_(expd)
|
||||
.Imgs__add("en.w", "Aé_b.png", Xop_lnki_type.Id_null, 0.5, 220, 110, -1, -1)
|
||||
|
@ -83,11 +83,18 @@ public class Xoh_img_wtr implements Bfr_arg, Xoh_wtr_itm {
|
||||
this.img_wo_anch = data.Img_wo_anch();
|
||||
this.fsdb_itm = hpg.Img_mgr().Make_img(data.Img_is_gallery());
|
||||
|
||||
byte[] file_ttl_bry = data.Anch_xo_ttl().Val();
|
||||
byte[] img_src_bry_temp = data.Img_src().File_ttl_bry();
|
||||
byte[] lnki_ttl = img_src_bry_temp == null // img_src will be empty for htxt; use file_ttl_bry instead; EX: '<img src="">' DATE:2019-03-10
|
||||
? file_ttl_bry
|
||||
: Xoa_ttl.Replace_spaces(Gfo_url_encoder_.Href_quotes.Decode(img_src_bry_temp)); // NOTE: must decode for fsdb.lnki_ttl as well as xowa_title; EX: A%C3%A9b -> A<EFBFBD>b
|
||||
byte[] xowa_title = data.Anch_xo_ttl().Val();
|
||||
|
||||
// set lnki_ttl to img_src.file_ttl;
|
||||
byte[] lnki_ttl = data.Img_src().File_ttl_bry();
|
||||
if (lnki_ttl == null) {
|
||||
// in most cases, @src; set lnki_ttl from @xowa_title instead; EX: '<img src="" xowa-title="A.png">' DATE:2019-03-10
|
||||
lnki_ttl = xowa_title;
|
||||
}
|
||||
|
||||
// url-decode; must decode since HTML will be url-encoded, but fsdb dbs will be actual characters; EX: A%C3%A9b -> A<EFBFBD>b
|
||||
if (lnki_ttl != null) // NOTE: @src and @xowa_title will be null for score
|
||||
lnki_ttl = Gfo_url_encoder_.Mw_wfUrlencode.Decode(lnki_ttl);
|
||||
|
||||
boolean write_xowa_file_title = true;
|
||||
if (data.Img_pgbnr().Exists()) {
|
||||
@ -125,11 +132,11 @@ public class Xoh_img_wtr implements Bfr_arg, Xoh_wtr_itm {
|
||||
anch_cls.Set_by_arg(data.Anch_cls());
|
||||
anch_title.Set_by_mid_or_null(src, data.Anch_title_bgn(), data.Anch_title_end());
|
||||
if ( data.Img_wo_anch() // anchor-less image
|
||||
|| Bry_.Len_gt_0(file_ttl_bry)) // regular anch with image
|
||||
anch_xowa_title.Set_by_bry(file_ttl_bry);
|
||||
|| Bry_.Len_gt_0(xowa_title)) // regular anch with image
|
||||
anch_xowa_title.Set_by_bry(xowa_title);
|
||||
|
||||
if (write_xowa_file_title)
|
||||
img_xowa_title.Set_by_bry(file_ttl_bry);
|
||||
img_xowa_title.Set_by_bry(xowa_title);
|
||||
img_alt.Set_by_mid_or_empty(src, data.Img_alt_bgn(), data.Img_alt_end());
|
||||
img_cls.Set_by_arg(data.Img_cls());
|
||||
if (data.Img_imap_idx() != -1) img_imap_usemap.Set(data.Img_imap_idx());
|
||||
|
@ -52,6 +52,9 @@ public class Scrib_lib_ustring__gsub__tst {
|
||||
@Test public void Replace__initial() { // PURPOSE:whitespace being replaced during gsub replacement; DATE:2019-04-21
|
||||
Exec_gsub("a b c", "^%s*", -1, "x", "xa b c;1"); // fails if xabxc
|
||||
}
|
||||
@Test public void Replace__digit__superscript() {// PURPOSE: ¹ is not a \d; PAGE:en.w:Vilnius ISSUE#:617; DATE:2019-11-24;
|
||||
Exec_gsub("1796¹", "([%d]+).*", 1, "%1", "1796;1");
|
||||
}
|
||||
@Test public void Replace__table() {
|
||||
Exec_gsub("abcd", "[ac]" , -1, Scrib_kv_utl_.flat_many_("a", "A", "c", "C") , "AbCd;2");
|
||||
Exec_gsub("abc" , "[ab]" , -1, Scrib_kv_utl_.flat_many_("a", "A") , "Abc;2"); // PURPOSE: match not in regex should still print itself; in this case [c] is not in tbl regex; DATE:2014-03-31
|
||||
|
Loading…
Reference in New Issue
Block a user