From 9f0cfc27bb04b9f84fb59fec7c8ef5651cb0a3f8 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Sun, 6 Oct 2019 08:40:26 -0400 Subject: [PATCH] Parser: Auto-inline dangling references tag [#583] --- .../gplx/xowa/parsers/xndes/Xop_xnde_wkr.java | 37 +++++++++++-------- .../xtns/cites/References_nde_rare_tst.java | 6 +-- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_wkr.java b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_wkr.java index 417d60b1f..ad5b183b6 100644 --- a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_wkr.java +++ b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_wkr.java @@ -555,7 +555,7 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { return Xop_xnde_wkr_.Find_xtn_end(ctx, src, open_end, src_len, open_bry, close_bry); // UNIQ; DATE:2017-03-31 } - private static final byte XTN_CLOSE_MODE__MAKE = 0, XTN_CLOSE_MODE__ESCAPE = 1, XTN_CLOSE_MODE__ESCAPE_AND_CLOSE = 2; + private static final byte XTN_CLOSE_MODE__MAKE = 0, XTN_CLOSE_MODE__ESCAPE = 1, XTN_CLOSE_MODE__AUTO_INLINE = 2; private int Make_xnde_xtn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, Xop_xnde_tag tag, int open_bgn, int open_end, int name_bgn, int name_end, int atrs_bgn, int atrs_end, Mwh_atr_itm[] atrs, boolean inline, boolean pre2_hack) { // NOTE: find end_tag that exactly matches bgnTag; must be case sensitive; int xnde_end = open_end; @@ -588,15 +588,15 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { int close_bgn = Find_xtn_end_lhs(ctx, tag, src, src_len, open_bgn, open_end, tag.Xtn_bgn_tag(), close_bry); if (close_bgn == Bry_find_.Not_found) {// not found close_mode = (tag.Id() == Xop_xnde_tag_.Tag__references.Id()) // dangling has partial auto-close behavior; ISSUE#:583; DATE:2019-10-05 - ? XTN_CLOSE_MODE__ESCAPE_AND_CLOSE + ? XTN_CLOSE_MODE__AUTO_INLINE : XTN_CLOSE_MODE__ESCAPE; // escape if end not found; verified with , , , ; DATE:2014-08-23; DATE:2019-10-05 } // handle close_mode int close_end = -1; switch (close_mode) { - case XTN_CLOSE_MODE__ESCAPE_AND_CLOSE: // gobble up to EOS; ISSUE#:583; DATE:2019-10-05 - xnde_end = close_bgn = close_end = src_len; + case XTN_CLOSE_MODE__AUTO_INLINE: // is converted to auto-inline; ISSUE#:583; DATE:2019-10-05 + xnde_end = close_bgn = close_end = open_end; break; case XTN_CLOSE_MODE__ESCAPE: // dangling tags are escaped; used to gobble up rest of page with "xnde_end = close_bgn = close_end = src_len;"; DATE:2017-01-10 return ctx.Lxr_make_txt_(open_end); @@ -613,21 +613,26 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { if (pre2_hack) return ctx.Lxr_make_txt_(close_end); - // add <references and rest of document - if ( close_mode == XTN_CLOSE_MODE__ESCAPE_AND_CLOSE - && ctx.Parse_tid() == Xop_parser_tid_.Tid__wtxt) { // do not add if tmpl mode; + if ( close_mode == XTN_CLOSE_MODE__AUTO_INLINE) { + // add <references + if (ctx.Parse_tid() == Xop_parser_tid_.Tid__wtxt) { // do not add if tmpl mode; byte[] escaped_tag_bgn = gplx.xowa.xtns.cites.References_nde.ESCAPED_TAG_BGN; root.Subs_add(tkn_mkr.Bry_raw(0, escaped_tag_bgn.length, escaped_tag_bgn)); - root.Subs_add(tkn_mkr.Bry_mid(src, open_end, src_len)); - root.Subs_add(tkn_mkr.Bry_raw(0, 1, Byte_ascii.Nl_bry)); // add \n for EOS; adding because may depend on preceding \n; EX: en.d:tepilli; ISSUE#:583; DATE:2019-10-05 - } + } - // create xnde tag which will be everything between - xnde = New_xnde_pair(ctx, root, tkn_mkr, tag, open_bgn, open_end, close_bgn, close_end); - xnde.Atrs_rng_(atrs_bgn, atrs_end); - xnde.Atrs_ary_(atrs); - if (close_bgn - open_end > 0) - xnde.Subs_add(tkn_mkr.Txt(open_end, close_bgn)); + // fake inline + xnde = Xnde_bgn(ctx, tkn_mkr, root, tag, Xop_xnde_tkn.CloseMode_inline, src, open_bgn, open_end, atrs_bgn, atrs_end, atrs); + xnde.Tag_close_rng_(open_end, open_end); // NOTE: inline tag, so set TagClose to open_end; should noop + root.Subs_add(tkn_mkr.Bry_mid(src, open_end, open_end)); + } + else { + // create xnde tag which will be everything between + xnde = New_xnde_pair(ctx, root, tkn_mkr, tag, open_bgn, open_end, close_bgn, close_end); + xnde.Atrs_rng_(atrs_bgn, atrs_end); + xnde.Atrs_ary_(atrs); + if (close_bgn - open_end > 0) + xnde.Subs_add(tkn_mkr.Txt(open_end, close_bgn)); + } } switch (ctx.Parse_tid()) { case Xop_parser_tid_.Tid__tmpl: { diff --git a/400_xowa/src/gplx/xowa/xtns/cites/References_nde_rare_tst.java b/400_xowa/src/gplx/xowa/xtns/cites/References_nde_rare_tst.java index 9a43fd10a..bdf8068f1 100644 --- a/400_xowa/src/gplx/xowa/xtns/cites/References_nde_rare_tst.java +++ b/400_xowa/src/gplx/xowa/xtns/cites/References_nde_rare_tst.java @@ -107,13 +107,13 @@ public class References_nde_rare_tst { fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last ( "abc" , "xyz" + , "[[Category:A]]" // [[Category]] should be processed by category parser; should not show up in output below ), String_.Concat_lines_nl_skip_last ( "a[1]c" - , "<references>xyz" - , "
    " + , "<references>
      " , "
    1. ^ b
    2. " , "
    " - , "" + , "xyz" )); } }