From a10c2cd1dc3e4a92cccb6a33d9b12616d2aaa268 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Sat, 5 Oct 2019 12:41:48 -0400 Subject: [PATCH] Parser: Auto-close dangling references tag [#583] --- .../gplx/xowa/parsers/xndes/Xop_xnde_wkr.java | 63 ++++++++++++++----- .../gplx/xowa/xtns/cites/References_nde.java | 1 + .../xtns/cites/References_nde_rare_tst.java | 15 ++++- 3 files changed, 63 insertions(+), 16 deletions(-) diff --git a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_wkr.java b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_wkr.java index 6a94b1933..417d60b1f 100644 --- a/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_wkr.java +++ b/400_xowa/src/gplx/xowa/parsers/xndes/Xop_xnde_wkr.java @@ -554,6 +554,8 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { private int Find_xtn_end_lhs(Xop_ctx ctx, Xop_xnde_tag tag, byte[] src, int src_len, int open_bgn, int open_end, byte[] open_bry, byte[] close_bry) { return Xop_xnde_wkr_.Find_xtn_end(ctx, src, open_end, src_len, open_bry, close_bry); // UNIQ; DATE:2017-03-31 } + + private static final byte XTN_CLOSE_MODE__MAKE = 0, XTN_CLOSE_MODE__ESCAPE = 1, XTN_CLOSE_MODE__ESCAPE_AND_CLOSE = 2; private int Make_xnde_xtn(Xop_ctx ctx, Xop_tkn_mkr tkn_mkr, Xop_root_tkn root, byte[] src, int src_len, Xop_xnde_tag tag, int open_bgn, int open_end, int name_bgn, int name_end, int atrs_bgn, int atrs_end, Mwh_atr_itm[] atrs, boolean inline, boolean pre2_hack) { // NOTE: find end_tag that exactly matches bgnTag; must be case sensitive; int xnde_end = open_end; @@ -563,33 +565,64 @@ public class Xop_xnde_wkr implements Xop_ctx_wkr { xnde.Tag_close_rng_(open_end, open_end); // NOTE: inline tag, so set TagClose to open_end; should noop } else { - byte[] close_bry = tag.Xtn_end_tag_tmp(); // get tmp bry (so as not to new) - if (tag.Langs() != null) { // cur tag has langs; EX:
; DATE:2014-07-18 + // build for search + byte[] close_bry = tag.Xtn_end_tag_tmp(); // get tmp bry (so as not to new) + + // handle tag with lang translations; EX:
is in pt; DATE:2014-07-18 + if (tag.Langs() != null) { Xop_xnde_tag_lang tag_lang = tag.Langs_get(ctx.Lang().Case_mgr(), ctx.Page().Lang().Lang_id(), src, name_bgn, name_end); - if (tag_lang == null) // tag does not match lang; EX: and lang=de; + if (tag_lang == null) // tag does not match lang; EX: tag but current lang is de; return ctx.Lxr_make_txt_(open_end); - if (tag_lang != Xop_xnde_tag_lang.Instance) // tag matches; note Xop_xnde_tag_lang._ is a wildcard match; EX:
+ if (tag_lang != Xop_xnde_tag_lang.Instance) // tag matches; note Xop_xnde_tag_lang.Instance is a wildcard match; EX:
close_bry = tag_lang.Xtn_end_tag_tmp(); } - int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore + int src_offset = open_bgn - 1; // open bgn to start at <; -2 to ignore + byte close_mode = XTN_CLOSE_MODE__MAKE; int close_bgn = Find_xtn_end_lhs(ctx, tag, src, src_len, open_bgn, open_end, tag.Xtn_bgn_tag(), close_bry); - if (close_bgn == Bry_find_.Not_found) auto_close = true; // auto-close if end not found; verified with , , , , DATE:2014-08-23 - int close_end = -1; - if (auto_close) { - return ctx.Lxr_make_txt_(open_end); // dangling tags are now escaped; used to gobble up rest of page with "xnde_end = close_bgn = close_end = src_len;"; DATE:2017-01-10 - } - else { - close_end = Find_end_tag_pos(src, src_len, close_bgn + close_bry.length); - if (close_end == Bry_find_.Not_found) return ctx.Lxr_make_log_(Xop_xnde_log.Xtn_end_not_found, src, open_bgn, open_end); - xnde_end = close_end; + if (close_bgn == Bry_find_.Not_found) {// not found + close_mode = (tag.Id() == Xop_xnde_tag_.Tag__references.Id()) // dangling has partial auto-close behavior; ISSUE#:583; DATE:2019-10-05 + ? XTN_CLOSE_MODE__ESCAPE_AND_CLOSE + : XTN_CLOSE_MODE__ESCAPE; // escape if end not found; verified with , , , ; DATE:2014-08-23; DATE:2019-10-05 } + // handle close_mode + int close_end = -1; + switch (close_mode) { + case XTN_CLOSE_MODE__ESCAPE_AND_CLOSE: // gobble up to EOS; ISSUE#:583; DATE:2019-10-05 + xnde_end = close_bgn = close_end = src_len; + break; + case XTN_CLOSE_MODE__ESCAPE: // dangling tags are escaped; used to gobble up rest of page with "xnde_end = close_bgn = close_end = src_len;"; DATE:2017-01-10 + return ctx.Lxr_make_txt_(open_end); + case XTN_CLOSE_MODE__MAKE: + close_end = Find_end_tag_pos(src, src_len, close_bgn + close_bry.length); // search for ">" + if (close_end == Bry_find_.Not_found) + return ctx.Lxr_make_log_(Xop_xnde_log.Xtn_end_not_found, src, open_bgn, open_end); + xnde_end = close_end; + break; + default: throw Err_.new_unhandled_default(close_mode); + } + + // pre2_hack if (pre2_hack) return ctx.Lxr_make_txt_(close_end); + + // add <references and rest of document + if ( close_mode == XTN_CLOSE_MODE__ESCAPE_AND_CLOSE + && ctx.Parse_tid() == Xop_parser_tid_.Tid__wtxt) { // do not add if tmpl mode; + byte[] escaped_tag_bgn = gplx.xowa.xtns.cites.References_nde.ESCAPED_TAG_BGN; + root.Subs_add(tkn_mkr.Bry_raw(0, escaped_tag_bgn.length, escaped_tag_bgn)); + root.Subs_add(tkn_mkr.Bry_mid(src, open_end, src_len)); + root.Subs_add(tkn_mkr.Bry_raw(0, 1, Byte_ascii.Nl_bry)); // add \n for EOS; adding because may depend on preceding \n; EX: en.d:tepilli; ISSUE#:583; DATE:2019-10-05 + } + + // create xnde tag which will be everything between xnde = New_xnde_pair(ctx, root, tkn_mkr, tag, open_bgn, open_end, close_bgn, close_end); xnde.Atrs_rng_(atrs_bgn, atrs_end); xnde.Atrs_ary_(atrs); diff --git a/400_xowa/src/gplx/xowa/xtns/cites/References_nde.java b/400_xowa/src/gplx/xowa/xtns/cites/References_nde.java index 6abe2ed81..5bac30fbd 100644 --- a/400_xowa/src/gplx/xowa/xtns/cites/References_nde.java +++ b/400_xowa/src/gplx/xowa/xtns/cites/References_nde.java @@ -64,4 +64,5 @@ public class References_nde implements Xox_xnde, Mwh_atr_itm_owner1 { public static boolean Enabled = true; private static final Hash_adp_bry xatrs_hash = Hash_adp_bry.ci_a7() .Add_str_obj("group", Byte_obj_val.new_(References_nde.Xatr_id_group)); + public static final byte[] ESCAPED_TAG_BGN = Bry_.new_a7("<references>"); } diff --git a/400_xowa/src/gplx/xowa/xtns/cites/References_nde_rare_tst.java b/400_xowa/src/gplx/xowa/xtns/cites/References_nde_rare_tst.java index 3e77ac8a1..9a43fd10a 100644 --- a/400_xowa/src/gplx/xowa/xtns/cites/References_nde_rare_tst.java +++ b/400_xowa/src/gplx/xowa/xtns/cites/References_nde_rare_tst.java @@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.xtns.cites; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import org.junit.*; -public class References_nde_rare_tst { +public class References_nde_rare_tst { @Before public void init() {fxt.Clear_ref_mgr(); fxt.Reset();} private final Xop_fxt fxt = new Xop_fxt(); @After public void term() {fxt.Init_para_n_();} @Test public void Recursive() { // PURPOSE: handle recursive situations; EX: ja.w:Kソリューション ; ja.w:Template:cite web。; DATE:2014-03-05 @@ -103,4 +103,17 @@ public class References_nde_rare_tst { , "" )); } + @Test public void Dangling_references() { // PURPOSE:dangling references should escape AND auto-close ISSUE#:583; DATE:2019-10-05 + fxt.Test_parse_page_all_str(String_.Concat_lines_nl_skip_last + ( "abc" + , "xyz" + ), String_.Concat_lines_nl_skip_last + ( "a[1]c" + , "<references>xyz" + , "
    " + , "
  1. ^ b
  2. " + , "
" + , "" + )); + } }