1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Parser: Auto-close dangling references tag [#583]

This commit is contained in:
gnosygnu 2019-10-05 12:41:48 -04:00
parent 4955748f6d
commit a10c2cd1dc
3 changed files with 63 additions and 16 deletions

View File

@ -554,6 +554,8 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
private int Find_xtn_end_lhs(Xop_ctx ctx, Xop_xnde_tag tag, byte[] src, int src_len, int open_bgn, int open_end, byte[] open_bry, byte[] close_bry) {
return Xop_xnde_wkr_.Find_xtn_end(ctx, src, open_end, src_len, open_bry, close_bry); // UNIQ; DATE:2017-03-31
}
private static final byte XTN_CLOSE_MODE__MAKE = 0, XTN_CLOSE_MODE__ESCAPE = 1, XTN_CLOSE_MODE__ESCAPE_AND_CLOSE = 2;
private int Make_xnde_xtn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, Xop_xnde_tag tag, int open_bgn, int open_end, int name_bgn, int name_end, int atrs_bgn, int atrs_end, Mwh_atr_itm[] atrs, boolean inline, boolean pre2_hack) {
// NOTE: find end_tag that exactly matches bgnTag; must be case sensitive;
int xnde_end = open_end;
@ -563,33 +565,64 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
xnde.Tag_close_rng_(open_end, open_end); // NOTE: inline tag, so set TagClose to open_end; should noop
}
else {
byte[] close_bry = tag.Xtn_end_tag_tmp(); // get tmp bry (so as not to new)
if (tag.Langs() != null) { // cur tag has langs; EX:<section>; DATE:2014-07-18
// build </xtn> for search
byte[] close_bry = tag.Xtn_end_tag_tmp(); // get tmp bry (so as not to new)
// handle tag with lang translations; EX:<section> is <trecho> in pt; DATE:2014-07-18
if (tag.Langs() != null) {
Xop_xnde_tag_lang tag_lang = tag.Langs_get(ctx.Lang().Case_mgr(), ctx.Page().Lang().Lang_id(), src, name_bgn, name_end);
if (tag_lang == null) // tag does not match lang; EX:<trecho> and lang=de;
if (tag_lang == null) // tag does not match lang; EX:<trecho> tag but current lang is de;
return ctx.Lxr_make_txt_(open_end);
if (tag_lang != Xop_xnde_tag_lang.Instance) // tag matches; note Xop_xnde_tag_lang._ is a wildcard match; EX:<section>
if (tag_lang != Xop_xnde_tag_lang.Instance) // tag matches; note Xop_xnde_tag_lang.Instance is a wildcard match; EX:<section>
close_bry = tag_lang.Xtn_end_tag_tmp();
}
int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore </ ; +1 to include <
// fill </xtn>
int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore </ ; +1 to include <
int close_ary_len = close_bry.length;
for (int i = 2; i < close_ary_len; i++) // 2 to ignore </
close_bry[i] = src[src_offset + i];
boolean auto_close = false;
// search for </xtn>
byte close_mode = XTN_CLOSE_MODE__MAKE;
int close_bgn = Find_xtn_end_lhs(ctx, tag, src, src_len, open_bgn, open_end, tag.Xtn_bgn_tag(), close_bry);
if (close_bgn == Bry_find_.Not_found) auto_close = true; // auto-close if end not found; verified with <poem>, <gallery>, <imagemap>, <hiero>, <references> DATE:2014-08-23
int close_end = -1;
if (auto_close) {
return ctx.Lxr_make_txt_(open_end); // dangling tags are now escaped; used to gobble up rest of page with "xnde_end = close_bgn = close_end = src_len;"; DATE:2017-01-10
}
else {
close_end = Find_end_tag_pos(src, src_len, close_bgn + close_bry.length);
if (close_end == Bry_find_.Not_found) return ctx.Lxr_make_log_(Xop_xnde_log.Xtn_end_not_found, src, open_bgn, open_end);
xnde_end = close_end;
if (close_bgn == Bry_find_.Not_found) {// </xtn> not found
close_mode = (tag.Id() == Xop_xnde_tag_.Tag__references.Id()) // dangling <references> has partial auto-close behavior; ISSUE#:583; DATE:2019-10-05
? XTN_CLOSE_MODE__ESCAPE_AND_CLOSE
: XTN_CLOSE_MODE__ESCAPE; // escape if end not found; verified with <poem>, <gallery>, <imagemap>, <hiero>; DATE:2014-08-23; DATE:2019-10-05
}
// handle close_mode
int close_end = -1;
switch (close_mode) {
case XTN_CLOSE_MODE__ESCAPE_AND_CLOSE: // <references> gobble up to EOS; ISSUE#:583; DATE:2019-10-05
xnde_end = close_bgn = close_end = src_len;
break;
case XTN_CLOSE_MODE__ESCAPE: // dangling tags are escaped; used to gobble up rest of page with "xnde_end = close_bgn = close_end = src_len;"; DATE:2017-01-10
return ctx.Lxr_make_txt_(open_end);
case XTN_CLOSE_MODE__MAKE:
close_end = Find_end_tag_pos(src, src_len, close_bgn + close_bry.length); // search for ">"
if (close_end == Bry_find_.Not_found)
return ctx.Lxr_make_log_(Xop_xnde_log.Xtn_end_not_found, src, open_bgn, open_end);
xnde_end = close_end;
break;
default: throw Err_.new_unhandled_default(close_mode);
}
// pre2_hack
if (pre2_hack)
return ctx.Lxr_make_txt_(close_end);
// add &lt;references and rest of document
if ( close_mode == XTN_CLOSE_MODE__ESCAPE_AND_CLOSE
&& ctx.Parse_tid() == Xop_parser_tid_.Tid__wtxt) { // do not add if tmpl mode;
byte[] escaped_tag_bgn = gplx.xowa.xtns.cites.References_nde.ESCAPED_TAG_BGN;
root.Subs_add(tkn_mkr.Bry_raw(0, escaped_tag_bgn.length, escaped_tag_bgn));
root.Subs_add(tkn_mkr.Bry_mid(src, open_end, src_len));
root.Subs_add(tkn_mkr.Bry_raw(0, 1, Byte_ascii.Nl_bry)); // add \n for EOS; adding because <xtn> may depend on preceding \n; EX: en.d:tepilli; ISSUE#:583; DATE:2019-10-05
}
// create xnde tag which will be everything between <xtn></xtn>
xnde = New_xnde_pair(ctx, root, tkn_mkr, tag, open_bgn, open_end, close_bgn, close_end);
xnde.Atrs_rng_(atrs_bgn, atrs_end);
xnde.Atrs_ary_(atrs);

View File

@ -64,4 +64,5 @@ public class References_nde implements Xox_xnde, Mwh_atr_itm_owner1 {
public static boolean Enabled = true;
private static final Hash_adp_bry xatrs_hash = Hash_adp_bry.ci_a7()
.Add_str_obj("group", Byte_obj_val.new_(References_nde.Xatr_id_group));
public static final byte[] ESCAPED_TAG_BGN = Bry_.new_a7("&lt;references>");
}

View File

@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.cites; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
import org.junit.*;
public class References_nde_rare_tst {
public class References_nde_rare_tst {
@Before public void init() {fxt.Clear_ref_mgr(); fxt.Reset();} private final Xop_fxt fxt = new Xop_fxt();
@After public void term() {fxt.Init_para_n_();}
@Test public void Recursive() { // PURPOSE: handle recursive situations; EX: ja.w:Kソリューション ; ja.w:Template:cite web; DATE:2014-03-05
@ -103,4 +103,17 @@ public class References_nde_rare_tst {
, ""
));
}
@Test public void Dangling_references() { // PURPOSE:dangling references should escape AND auto-close ISSUE#:583; DATE:2019-10-05
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
( "a<ref name='ref_0'>b</ref>c"
, "<references>xyz"
), String_.Concat_lines_nl_skip_last
( "a<sup id=\"cite_ref-ref_0_0-0\" class=\"reference\"><a href=\"#cite_note-ref_0-0\">[1]</a></sup>c"
, "&lt;references>xyz"
, "<ol class=\"references\">"
, "<li id=\"cite_note-ref_0-0\"><span class=\"mw-cite-backlink\"><a href=\"#cite_ref-ref_0_0-0\">^</a></span> <span class=\"reference-text\">b</span></li>"
, "</ol>"
, ""
));
}
}