mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Parser: Auto-close dangling references tag [#583]
This commit is contained in:
parent
4955748f6d
commit
a10c2cd1dc
@ -554,6 +554,8 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
private int Find_xtn_end_lhs(Xop_ctx ctx, Xop_xnde_tag tag, byte[] src, int src_len, int open_bgn, int open_end, byte[] open_bry, byte[] close_bry) {
|
||||
return Xop_xnde_wkr_.Find_xtn_end(ctx, src, open_end, src_len, open_bry, close_bry); // UNIQ; DATE:2017-03-31
|
||||
}
|
||||
|
||||
private static final byte XTN_CLOSE_MODE__MAKE = 0, XTN_CLOSE_MODE__ESCAPE = 1, XTN_CLOSE_MODE__ESCAPE_AND_CLOSE = 2;
|
||||
private int Make_xnde_xtn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, Xop_xnde_tag tag, int open_bgn, int open_end, int name_bgn, int name_end, int atrs_bgn, int atrs_end, Mwh_atr_itm[] atrs, boolean inline, boolean pre2_hack) {
|
||||
// NOTE: find end_tag that exactly matches bgnTag; must be case sensitive;
|
||||
int xnde_end = open_end;
|
||||
@ -563,33 +565,64 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr {
|
||||
xnde.Tag_close_rng_(open_end, open_end); // NOTE: inline tag, so set TagClose to open_end; should noop
|
||||
}
|
||||
else {
|
||||
byte[] close_bry = tag.Xtn_end_tag_tmp(); // get tmp bry (so as not to new)
|
||||
if (tag.Langs() != null) { // cur tag has langs; EX:<section>; DATE:2014-07-18
|
||||
// build </xtn> for search
|
||||
byte[] close_bry = tag.Xtn_end_tag_tmp(); // get tmp bry (so as not to new)
|
||||
|
||||
// handle tag with lang translations; EX:<section> is <trecho> in pt; DATE:2014-07-18
|
||||
if (tag.Langs() != null) {
|
||||
Xop_xnde_tag_lang tag_lang = tag.Langs_get(ctx.Lang().Case_mgr(), ctx.Page().Lang().Lang_id(), src, name_bgn, name_end);
|
||||
if (tag_lang == null) // tag does not match lang; EX:<trecho> and lang=de;
|
||||
if (tag_lang == null) // tag does not match lang; EX:<trecho> tag but current lang is de;
|
||||
return ctx.Lxr_make_txt_(open_end);
|
||||
if (tag_lang != Xop_xnde_tag_lang.Instance) // tag matches; note Xop_xnde_tag_lang._ is a wildcard match; EX:<section>
|
||||
if (tag_lang != Xop_xnde_tag_lang.Instance) // tag matches; note Xop_xnde_tag_lang.Instance is a wildcard match; EX:<section>
|
||||
close_bry = tag_lang.Xtn_end_tag_tmp();
|
||||
}
|
||||
int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore </ ; +1 to include <
|
||||
|
||||
// fill </xtn>
|
||||
int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore </ ; +1 to include <
|
||||
int close_ary_len = close_bry.length;
|
||||
for (int i = 2; i < close_ary_len; i++) // 2 to ignore </
|
||||
close_bry[i] = src[src_offset + i];
|
||||
boolean auto_close = false;
|
||||
|
||||
// search for </xtn>
|
||||
byte close_mode = XTN_CLOSE_MODE__MAKE;
|
||||
int close_bgn = Find_xtn_end_lhs(ctx, tag, src, src_len, open_bgn, open_end, tag.Xtn_bgn_tag(), close_bry);
|
||||
if (close_bgn == Bry_find_.Not_found) auto_close = true; // auto-close if end not found; verified with <poem>, <gallery>, <imagemap>, <hiero>, <references> DATE:2014-08-23
|
||||
int close_end = -1;
|
||||
if (auto_close) {
|
||||
return ctx.Lxr_make_txt_(open_end); // dangling tags are now escaped; used to gobble up rest of page with "xnde_end = close_bgn = close_end = src_len;"; DATE:2017-01-10
|
||||
}
|
||||
else {
|
||||
close_end = Find_end_tag_pos(src, src_len, close_bgn + close_bry.length);
|
||||
if (close_end == Bry_find_.Not_found) return ctx.Lxr_make_log_(Xop_xnde_log.Xtn_end_not_found, src, open_bgn, open_end);
|
||||
xnde_end = close_end;
|
||||
if (close_bgn == Bry_find_.Not_found) {// </xtn> not found
|
||||
close_mode = (tag.Id() == Xop_xnde_tag_.Tag__references.Id()) // dangling <references> has partial auto-close behavior; ISSUE#:583; DATE:2019-10-05
|
||||
? XTN_CLOSE_MODE__ESCAPE_AND_CLOSE
|
||||
: XTN_CLOSE_MODE__ESCAPE; // escape if end not found; verified with <poem>, <gallery>, <imagemap>, <hiero>; DATE:2014-08-23; DATE:2019-10-05
|
||||
}
|
||||
|
||||
// handle close_mode
|
||||
int close_end = -1;
|
||||
switch (close_mode) {
|
||||
case XTN_CLOSE_MODE__ESCAPE_AND_CLOSE: // <references> gobble up to EOS; ISSUE#:583; DATE:2019-10-05
|
||||
xnde_end = close_bgn = close_end = src_len;
|
||||
break;
|
||||
case XTN_CLOSE_MODE__ESCAPE: // dangling tags are escaped; used to gobble up rest of page with "xnde_end = close_bgn = close_end = src_len;"; DATE:2017-01-10
|
||||
return ctx.Lxr_make_txt_(open_end);
|
||||
case XTN_CLOSE_MODE__MAKE:
|
||||
close_end = Find_end_tag_pos(src, src_len, close_bgn + close_bry.length); // search for ">"
|
||||
if (close_end == Bry_find_.Not_found)
|
||||
return ctx.Lxr_make_log_(Xop_xnde_log.Xtn_end_not_found, src, open_bgn, open_end);
|
||||
xnde_end = close_end;
|
||||
break;
|
||||
default: throw Err_.new_unhandled_default(close_mode);
|
||||
}
|
||||
|
||||
// pre2_hack
|
||||
if (pre2_hack)
|
||||
return ctx.Lxr_make_txt_(close_end);
|
||||
|
||||
// add <references and rest of document
|
||||
if ( close_mode == XTN_CLOSE_MODE__ESCAPE_AND_CLOSE
|
||||
&& ctx.Parse_tid() == Xop_parser_tid_.Tid__wtxt) { // do not add if tmpl mode;
|
||||
byte[] escaped_tag_bgn = gplx.xowa.xtns.cites.References_nde.ESCAPED_TAG_BGN;
|
||||
root.Subs_add(tkn_mkr.Bry_raw(0, escaped_tag_bgn.length, escaped_tag_bgn));
|
||||
root.Subs_add(tkn_mkr.Bry_mid(src, open_end, src_len));
|
||||
root.Subs_add(tkn_mkr.Bry_raw(0, 1, Byte_ascii.Nl_bry)); // add \n for EOS; adding because <xtn> may depend on preceding \n; EX: en.d:tepilli; ISSUE#:583; DATE:2019-10-05
|
||||
}
|
||||
|
||||
// create xnde tag which will be everything between <xtn></xtn>
|
||||
xnde = New_xnde_pair(ctx, root, tkn_mkr, tag, open_bgn, open_end, close_bgn, close_end);
|
||||
xnde.Atrs_rng_(atrs_bgn, atrs_end);
|
||||
xnde.Atrs_ary_(atrs);
|
||||
|
@ -64,4 +64,5 @@ public class References_nde implements Xox_xnde, Mwh_atr_itm_owner1 {
|
||||
public static boolean Enabled = true;
|
||||
private static final Hash_adp_bry xatrs_hash = Hash_adp_bry.ci_a7()
|
||||
.Add_str_obj("group", Byte_obj_val.new_(References_nde.Xatr_id_group));
|
||||
public static final byte[] ESCAPED_TAG_BGN = Bry_.new_a7("<references>");
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.cites; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
|
||||
import org.junit.*;
|
||||
public class References_nde_rare_tst {
|
||||
public class References_nde_rare_tst {
|
||||
@Before public void init() {fxt.Clear_ref_mgr(); fxt.Reset();} private final Xop_fxt fxt = new Xop_fxt();
|
||||
@After public void term() {fxt.Init_para_n_();}
|
||||
@Test public void Recursive() { // PURPOSE: handle recursive situations; EX: ja.w:Kソリューション ; ja.w:Template:cite web。; DATE:2014-03-05
|
||||
@ -103,4 +103,17 @@ public class References_nde_rare_tst {
|
||||
, ""
|
||||
));
|
||||
}
|
||||
@Test public void Dangling_references() { // PURPOSE:dangling references should escape AND auto-close ISSUE#:583; DATE:2019-10-05
|
||||
fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last
|
||||
( "a<ref name='ref_0'>b</ref>c"
|
||||
, "<references>xyz"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "a<sup id=\"cite_ref-ref_0_0-0\" class=\"reference\"><a href=\"#cite_note-ref_0-0\">[1]</a></sup>c"
|
||||
, "<references>xyz"
|
||||
, "<ol class=\"references\">"
|
||||
, "<li id=\"cite_note-ref_0-0\"><span class=\"mw-cite-backlink\"><a href=\"#cite_ref-ref_0_0-0\">^</a></span> <span class=\"reference-text\">b</span></li>"
|
||||
, "</ol>"
|
||||
, ""
|
||||
));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user