1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Parser: Escape dangling tags; do not let them gobble up rest of page

This commit is contained in:
gnosygnu 2017-01-10 14:01:04 -05:00
parent f7b088e7a8
commit 08eb098422
5 changed files with 14 additions and 17 deletions

View File

@ -580,7 +580,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
if (close_bgn == Bry_find_.Not_found) auto_close = true; // auto-close if end not found; verified with <poem>, <gallery>, <imagemap>, <hiero>, <references> DATE:2014-08-23
int close_end = -1;
if (auto_close) {
xnde_end = close_bgn = close_end = src_len;
return ctx.Lxr_make_txt_(open_end); // dangling tags are now escaped; used to gobble up rest of page with "xnde_end = close_bgn = close_end = src_len;"; DATE:2017-01-10
}
else {
close_end = Find_end_tag_pos(src, src_len, close_bgn + close_bry.length);

View File

@ -93,15 +93,14 @@ public class References_nde_rare_tst {
}
@Test public void Dangling_ref_and_stack_overflow() { // PURPOSE: handle dangling <ref> with nested <references/>; PAGE:cs.s:Page:Hejčl,_Jan_-_Pentateuch.pdf/128 DATE:2016-09-01
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "a<ref name='ref_0'/>b"
, "<references><ref name='ref_0'>c"
( "a<ref name='ref_0'>b</ref>c"
, "<references><ref name='ref_1'>d"
, "<references/>" // must be ignored, else it will be same as outer <references>
, "</references>"
), String_.Concat_lines_nl_skip_last
( "a<sup id=\"cite_ref-ref_0_0-0\" class=\"reference\"><a href=\"#cite_note-ref_0-0\">[1]</a></sup>b"
( "a<sup id=\"cite_ref-ref_0_0-0\" class=\"reference\"><a href=\"#cite_note-ref_0-0\">[1]</a></sup>c"
, "<ol class=\"references\">"
, "<li id=\"cite_note-ref_0-0\"><span class=\"mw-cite-backlink\"><a href=\"#cite_ref-ref_0_0-0\">^</a></span> <span class=\"reference-text\">c"
, "</span></li>"
, "<li id=\"cite_note-ref_0-0\"><span class=\"mw-cite-backlink\"><a href=\"#cite_ref-ref_0_0-0\">^</a></span> <span class=\"reference-text\">b</span></li>"
, "</ol>"
, ""
));

View File

@ -274,13 +274,16 @@ public class Dpl_xnde_tst {
));
}
@Test public void Err__bad_key_causes_out_of_bound() { // PURPOSE: bad key causes out of bounds error; PAGE:de.n:Portal:Brandenburg DATE:2016-04-21
fxt.Init__warns("dynamic_page_list:unknown_key: page=Test page key=<DynamicPageList>category", "dynamic_page_list:unknown_key: page=Test page key=<DynamicPageList>category");
fxt.Exec__parse
( "<DynamicPageList>"
, "<DynamicPageList>category=A</DynamicPageList>a=b c=d"
, "<DynamicPageList>category=B</DynamicPageList>"
);
fxt.Test__html("No pages meet these criteria.");
fxt.Test__html(String_.Concat_lines_nl_skip_last
( "&lt;DynamicPageList&gt;"
, "No pages meet these criteria.a=b c=d"
, "No pages meet these criteria."
));
}
}
class Dpl_page_mok {

View File

@ -140,7 +140,7 @@ public class Gallery_mgr_base__basic__tst {
));
}
@Test public void Dangling_autcloses() { // PURPOSE: dangling gallery should auto-close, not escape; PAGE:en.w:Wikipedia:Featured_pictures_thumbs_43 DATE:2014-08-23
fxt.Test_html_frag("<gallery>File:A.png|b", "<ul id=\"xowa_gallery_ul_0\"");
fxt.Test_html_frag("<gallery>File:A.png|b", "&lt;gallery&gt;File:A.png|b");
}
@Test public void Nested() { // PURPOSE: handle gallery nested inside ref; PAGE: es.w:Arquitectura_medieval DATE:2015-07-10
fxt.Test_html_frag(String_.Concat_lines_nl_skip_last
@ -153,7 +153,7 @@ public class Gallery_mgr_base__basic__tst {
);
}
@Test public void Alt__quotes() { // PURPOSE: file name with quotes will cause broken alt; PAGE:en.w:en.w:Alexandria,_Romania; DATE:2015-12-27
fxt.Test_html_frag("<gallery>File:A\"b.png", "alt=\"A&quot;b.png\""); // NOTE: not 'alt="A"b.png"'
fxt.Test_html_frag("<gallery>File:A\"b.png</gallery>", "alt=\"A&quot;b.png\""); // NOTE: not 'alt="A"b.png"'
}
@Test public void Invalid() { // PURPOSE: ignore invalid file names; DATE:2016-01-12
fxt.Test_html_str("<gallery>File:#A.png|a</gallery>"

View File

@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package gplx.xowa.xtns.poems; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
import org.junit.*;
public class Poem_nde_tst {
@Before public void init() {fxt.Wiki().Xtn_mgr().Init_by_wiki(fxt.Wiki());} private final Xop_fxt fxt = new Xop_fxt();
@Before public void init() {fxt.Wiki().Xtn_mgr().Init_by_wiki(fxt.Wiki());} private final Xop_fxt fxt = new Xop_fxt();
@Test public void Lines() { // NOTE: first \n (poem\n) and last \n (\n</poem>)ignored
fxt.Test_parse_page_wiki_str(String_.Concat_lines_nl_skip_last
( "<poem>"
@ -136,12 +136,7 @@ public class Poem_nde_tst {
, "<references/>"
, "<!--"
), String_.Concat_lines_nl_skip_last
( "<div class=\"poem\">"
, "<p>"
, "<br/>"
, ""
, "</p>"
, "</div>"
( "&lt;poem&gt;"
));
}
@Test public void Ref() { // PURPOSE: <ref> inside poem was not showing up; DATE:2014-01-17