Mw_parse: Handle interrupt and separator logic for magiclinks

2024-10-27 20:34:16 +00:00 · 2017-01-28 02:47:22 -05:00 · 2017-01-28 02:47:22 -05:00 · 7e27b5415d
commit 7e27b5415d
parent e231df0ce1
6 changed files with 263 additions and 70 deletions
--- a/400_xowa/src/gplx/core/primitives/Bool_ary_bldr.java
+++ b/400_xowa/src/gplx/core/primitives/Bool_ary_bldr.java
@ -0,0 +1,39 @@
 /*
 XOWA: the XOWA Offline Wiki Application
 Copyright (C) 2012 gnosygnu@gmail.com
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU Affero General Public License as
 published by the Free Software Foundation, either version 3 of the
 License, or (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.
 You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 package gplx.core.primitives; import gplx.*; import gplx.core.*;
 public class Bool_ary_bldr {
 	private final    boolean[] ary;
 	public Bool_ary_bldr(int len) {
 		this.ary = new boolean[len];
 	}
 	public Bool_ary_bldr Set_many(int... v) {
 		int len = v.length;
 		for (int i = 0; i < len; i++)
 			ary[v[i]] = true;
 		return this;
 	}
 	public Bool_ary_bldr Set_rng(int bgn, int end) {
 		for (int i = bgn; i <= end; i++)
 			ary[i] = true;
 		return this;
 	}
 	public boolean[] To_ary() {
 		return ary;
 	}
 	public static Bool_ary_bldr New_u8() {return new Bool_ary_bldr(256);}
 }
--- a/400_xowa/src/gplx/langs/phps/utls/Php_str_.java
+++ b/400_xowa/src/gplx/langs/phps/utls/Php_str_.java
@ -39,7 +39,7 @@ public class Php_str_ {
 		int end = len < 0 ? src_len + len : bgn + len;
 		if (end > src.length) end = src.length;; // handle out of bounds;
 		return src[bgn];
-	}		
+	}
 	public static int Strspn_fwd__ary(byte[] src, boolean[] find, int bgn, int max, int src_len) {
 		if (max == -1) max = src_len;
 		int rv = 0;
@ -90,6 +90,17 @@ public class Php_str_ {
 		}
 		return rv;
 	}
 	public static int Strspn_bwd__ary(byte[] src, boolean[] find, int bgn, int max) {
 		if (max == -1) max = Int_.Max_value;
 		int rv = 0;
 		for (int i = bgn - 1; i > -1; i--) {
 			if (find[src[i]] && rv < max) 
 				rv++;
 			else
 				break;
 		}
 		return rv;
 	}
 	public static int Strspn_bwd__space_or_tab(byte[] src, int bgn, int max) {
 		if (max == -1) max = Int_.Max_value;
 		int rv = 0;
--- a/400_xowa/src/gplx/xowa/mws/Xomw_sanitizer.java
+++ b/400_xowa/src/gplx/xowa/mws/Xomw_sanitizer.java
@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
-import gplx.core.encoders.*; import gplx.langs.htmls.entitys.*;
+import gplx.core.encoders.*; import gplx.core.primitives.*; import gplx.langs.htmls.entitys.*;
 import gplx.xowa.parsers.htmls.*;
 import gplx.xowa.mws.parsers.*;
 public class Xomw_sanitizer {
@ -515,24 +515,3 @@ class Xomw_html_ent {
 	public final    byte[] html;
 	public static final byte Type__null = 0, Type__alias = 1, Type__char = 2, Type__entity = 3;
 }
 class Bool_ary_bldr {
 	private final    boolean[] ary;
 	public Bool_ary_bldr(int len) {
 		this.ary = new boolean[len];
 	}
 	public Bool_ary_bldr Set_many(int... v) {
 		int len = v.length;
 		for (int i = 0; i < len; i++)
 			ary[v[i]] = true;
 		return this;
 	}
 	public Bool_ary_bldr Set_rng(int bgn, int end) {
 		for (int i = bgn; i <= end; i++)
 			ary[i] = true;
 		return this;
 	}
 	public boolean[] To_ary() {
 		return ary;
 	}
 	public static Bool_ary_bldr New_u8() {return new Bool_ary_bldr(256);}
 }
--- a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_.java
@ -20,7 +20,7 @@ import gplx.core.btries.*;
 public class Xomw_regex_ {
 	public static int Find_fwd_while(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
 		int cur = src_bgn;
-		while (true) {
+		while (cur < src_end) {
 			byte b = src[cur];
 			Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
 			if (o == null)
@ -32,7 +32,7 @@ public class Xomw_regex_ {
 	}
 	public static int Find_fwd_until(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
 		int cur = src_bgn;
-		while (true) {
+		while (cur < src_end) {
 			byte b = src[cur];
 			Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
 			if (o == null)
--- a/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr.java
@ -23,12 +23,20 @@ public class Xomw_magiclinks_wkr {
 	private final    Btrie_slim_mgr regex_trie = Btrie_slim_mgr.ci_a7(); // NOTE: must be ci to handle protocols; EX: "https:" and "HTTPS:"
 	private final    Btrie_rv trv = new Btrie_rv();
 	private static byte[] Tag__anch__rhs;
 	private boolean[] url_separators;
 	private static Xomw_regex_link_interrupt regex_link_interrupt;
 	private Xomw_regex_boundary regex_boundary;
 	private Xomw_regex_url regex_url;
 	private Xomw_linker linker;
 	private byte[] page_title;
 	private static final byte Regex__anch = 1, Regex__elem = 2, Regex__free = 3;
 	public Xomw_magiclinks_wkr() {
 		// ',;\.:!?'
 		url_separators = Bool_ary_bldr.New_u8()
 			.Set_many(Byte_ascii.Comma,Byte_ascii.Semic, Byte_ascii.Dot, Byte_ascii.Colon, Byte_ascii.Bang, Byte_ascii.Question)
 			.To_ary();
 	}
 	public void Init_by_wiki(Xomw_linker linker, Xomw_regex_boundary regex_boundary, Xomw_regex_url regex_url) {
 		this.linker = linker;
 		this.regex_boundary = regex_boundary;
@ -46,6 +54,7 @@ public class Xomw_magiclinks_wkr {
 		if (Tag__anch__rhs == null) {
 			synchronized (Type_adp_.ClassOf_obj(this)) {
 				Tag__anch__rhs = Bry_.new_a7("</a>");
 				regex_link_interrupt = new Xomw_regex_link_interrupt();
 			}
 		}
 	}
@ -64,19 +73,19 @@ public class Xomw_magiclinks_wkr {
 		int prv = cur;
 		boolean dirty = true;
 		// PORTED.REGEX: handle below
-		// NOTE: not handling RFC|PMID|ISBN b/c of upcoming obsolescence: https://www.mediawiki.org/wiki/Requests_for_comment/Future_of_magic_links
+		// XO.MW.UNSUPPORTED.OBSOLETE: not handling RFC|PMID|ISBN b/c of upcoming obsolescence: https://www.mediawiki.org/wiki/Requests_for_comment/Future_of_magic_links
-		//'!(?:                            // Start cases
+		//'!(?:                                    // Start cases
-		//	(<a[ \t\r\n>].*?</a>) |      // m[1]: Skip link text
+		//	(<a[ \t\r\n>].*?</a>) |                // m[1]: Skip link text
-		//	(<.*?>) |                    // m[2]: Skip stuff inside
+		//	(<.*?>) |                              // m[2]: Skip stuff inside
-		//									//       HTML elements' . "
+		//                                         //       HTML elements' . "
-		//	(\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
+		//	(\b(?i:$prots)($addr$urlChar*)) |      // m[3]: Free external links
-		//									// m[4]: Post-protocol path
+		//                                         // m[4]: Post-protocol path
-		//	\b(?:RFC|PMID) $spaces       // m[5]: RFC or PMID, capture number
+		//	\b(?:RFC|PMID) $spaces                 // m[5]: RFC or PMID, capture number
 		//		([0-9]+)\b |
-		//	\bISBN $spaces (            // m[6]: ISBN, capture number
+		//	\bISBN $spaces (                       // m[6]: ISBN, capture number
-		//		(?: 97[89] $spdash?)?   //  optional 13-digit ISBN prefix
+		//		(?: 97[89] $spdash?)?              //  optional 13-digit ISBN prefix
-		//		(?: [0-9]  $spdash?){9} //  9 digits with opt. delimiters
+		//		(?: [0-9]  $spdash?){9}            //  9 digits with opt. delimiters
-		//		[0-9Xx]                  //  check digit
+		//		[0-9Xx]                            //  check digit
 		//	)\b
 		while (true) {
 			if (cur == src_end) {
@ -173,50 +182,44 @@ public class Xomw_magiclinks_wkr {
 	// Make a free external link, given a user-supplied URL
 	public void Make_free_external_link(Bry_bfr bfr, byte[] url, int num_post_proto) {
-//			byte[] trail = Bry_.Empty;
+		byte[] trail = Bry_.Empty;
 		// The characters '<' and '>' (which were escaped by
 		// removeHTMLtags()) should not be included in
 		// URLs, per RFC 2396.
 		// Make &nbsp; terminate a URL as well (bug T84937)
-
+		int separator_bgn = regex_link_interrupt.Find(trv, url, 0, url.length);
-//			$m2 = [];
+		if (separator_bgn != Bry_find_.Not_found) {
-//			if (preg_match(
+			trail = Bry_.Mid(url, separator_bgn);
-//				'/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
+			url = Bry_.Mid(url, 0, separator_bgn);
-//				$url,
+		}
 //				$m2,
 //				PREG_OFFSET_CAPTURE
 //			)) {
 //				trail = substr($url, $m2[0][1]) . $trail;
 //				$url = substr($url, 0, $m2[0][1]);
 //			}
 		// Move trailing punctuation to $trail
-//			$sep = ',;\.:!?';
+		int url_len = url.length;
 		// If there is no left bracket, then consider right brackets fair game too
-//			if (strpos($url, '(') === false) {
+		// XO.MW: if (strpos($url, '(') === false) {$sep .= ')';}
-//				$sep .= ')';
+		url_separators[Byte_ascii.Paren_end] = Bry_find_.Find_fwd(url, Byte_ascii.Paren_bgn, 0, url_len) == Bry_find_.Not_found;
-//			}
+		
-
+		int num_sep_chars = Php_str_.Strspn_bwd__ary(url, url_separators, url_len, -1);
 //			$urlRev = strrev($url);
 //			$numSepChars = strspn($urlRev, $sep);
 		// Don't break a trailing HTML entity by moving the ; into $trail
 		// This is in hot code, so use substr_compare to avoid having to
 		// create a new String Object for the comparison
-
+		// XO.MW.NOTE: ignore semic if part of entity; EX: "http://a.org&apos;!."
-//			if ($numSepChars && substr_compare($url, ";", -$numSepChars, 1) === 0) {
+		if (num_sep_chars > 0 && Php_str_.Substr_byte(url, -num_sep_chars) == Byte_ascii.Semic) {
 			// more optimization: instead of running preg_match with a $
 			// anchor, which can be slow, do the match on the reversed
 			// String starting at the desired offset.
 			// un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
-//				if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars)) {
+			// if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, num_sep_chars)) {
-//					$numSepChars--;
+			if (Xomw_regex_html_entity.Match_bwd(url, url_len - num_sep_chars, 0)) {
-//				}
+				num_sep_chars--;
-//			}
+			}
-//			if ($numSepChars) {
+		}
-//				$trail = substr($url, -$numSepChars) . $trail;
+
-//				$url = substr($url, 0, -$numSepChars);
+		if (num_sep_chars > 0) {
-//			}
+			trail = Bry_.Add(Php_str_.Substr(url, -num_sep_chars), trail);
 			url = Php_str_.Substr(url, 0, -num_sep_chars);
 		}
 		// Verify that we still have a real URL after trail removal, and
 		// not just lone protocol
@ -226,7 +229,8 @@ public class Xomw_magiclinks_wkr {
 //			$url = Sanitizer::cleanUrl($url);
-		// Is this an external image?
+		// XO.MW.UNSUPPORTED.NON-WMF: not supporting images from freefrom url; (EX: "http://a.org/image.png" -> "<img>"); haven't seen this used on WMF wikis
 		// Is this an external image?			
 		byte[] text = null; // $this->maybeMakeExternalImage($url);
 		if (text == null) {
 			// Not an image, make a link
@ -235,11 +239,130 @@ public class Xomw_magiclinks_wkr {
 				, true, Bry_.new_a7("free")
 				, new Xomwh_atr_mgr()	// $this->getExternalLinkAttribs($url)
 				, page_title);
 			// XO.MW.UNSUPPORTED.HOOK: registers link for processing by other extensions?
 			// Register it in the output Object...
 			// Replace unnecessary URL escape codes with their equivalent characters
-//				$pasteurized = self::normalizeLinkUrl($url);
+			// $pasteurized = self::normalizeLinkUrl($url);
-//				$this->mOutput->addExternalLink($pasteurized);
+			// $this->mOutput->addExternalLink($pasteurized);
 		}
-//			return $text . $trail;
+		bfr.Add(trail);
 	}
 }
 class Xomw_regex_html_entity {
 	// if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, num_sep_chars)) {
 	// REGEX: (letters | hex + "#" | dec + "x#") + "&"
 	// \G means "stop if matching breaks"; so, using a reversed example, "http://&#amp;&#!lt;" will not match "&#amp;" b/c "&#!lt;" breaks match
 	//   http://www.php.net/manual/en/regexp.reference.escape.php
 	//   http://stackoverflow.com/questions/14897949/what-is-the-use-of-g-anchor-in-regex
 	public static boolean Match_bwd(byte[] src, int src_bgn, int src_end) {
 		int cur = src_bgn - 1;
 		int numbers = 0;
 		int letters = 0;
 		while (cur >= src_end) {
 			int b_bgn = gplx.core.intls.Utf8_.Get_pos0_of_char_bwd(src, cur);
 			switch (src[b_bgn]) {
 				case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
 				case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
 				case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
 				case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
 				case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
 				case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
 				case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
 				case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
 				case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
 				case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
 					letters++;
 					break;
 				case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
 				case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
 					numbers++;
 					break;
 				case Byte_ascii.Hash:
 					// next must be &; EX: "&#" and "&#x"
 					int prv = cur - 1;
 					if (prv >= src_end && src[prv] == Byte_ascii.Amp) {
 						// if hex, num | ltr is fine
 						byte hex_byte = src[cur + 1];
 						if (hex_byte == Byte_ascii.Ltr_X || hex_byte == Byte_ascii.Ltr_x) {
 							return numbers > 0 || letters > 1;	// 1 to ignore "x"
 						}
 						// if dec, no letters allowed
 						else {
 							return numbers > 0 && letters == 0;
 						}
 					}
 					return false;
 				case Byte_ascii.Amp:
 					// if entity, no numbers
 					return letters > 0 && numbers == 0;
 				default:
 					return false;
 			}
 			cur--;
 		}
 		return false;
 	}
 }
 class Xomw_regex_link_interrupt {
 	private static final byte Bgn__ent__lt = 0, Bgn__ent__gt = 1, Bgn__ent__nbsp = 2, Bgn__hex = 3, Bgn__dec = 4;
 	private static final byte End__hex__lt = 0, End__hex__gt = 1, End__hex__nbsp = 2, End__dec__lt = 3, End__dec__gt = 4, End__dec__nbsp = 5;
 	private final    Btrie_slim_mgr bgn_trie = Btrie_slim_mgr.cs();
 	private final    Btrie_slim_mgr end_trie = Btrie_slim_mgr.ci_a7();
 	public Xomw_regex_link_interrupt() {
 		// MW.REGEX: &(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));
 		bgn_trie.Add_str_byte("&lt;", Bgn__ent__lt);
 		bgn_trie.Add_str_byte("&gt;", Bgn__ent__gt);
 		bgn_trie.Add_str_byte("&nbsp;", Bgn__ent__nbsp);
 		bgn_trie.Add_str_byte("&#x", Bgn__hex);	// 3C | 3E | A0
 		bgn_trie.Add_str_byte("&#", Bgn__dec);	// 60 | 62 | 160
 		end_trie.Add_str_byte("3c;", End__hex__lt);
 		end_trie.Add_str_byte("3e;", End__hex__gt);
 		end_trie.Add_str_byte("a0;", End__hex__nbsp);
 		end_trie.Add_str_byte("60;", End__dec__lt);
 		end_trie.Add_str_byte("62;", End__dec__gt);
 		end_trie.Add_str_byte("160;", End__dec__nbsp);
 	}
 	public int Find(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
 		int pos = src_bgn;
 		while (true) {
 			if (pos >= src_end) break;
 			byte b = src[pos];
 			Object bgn_obj = bgn_trie.Match_at_w_b0(trv, b, src, pos, src_end);
 			if (bgn_obj == null) {
 				pos += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
 				continue;
 			}
 			byte bgn_tid = ((Byte_obj_val)bgn_obj).Val();
 			int end_pos = trv.Pos();
 			boolean valid = false;
 			switch (bgn_tid) {
 				case Bgn__ent__lt:
 				case Bgn__ent__gt:
 				case Bgn__ent__nbsp:
 					return pos;
 				case Bgn__hex:
 				case Bgn__dec:
 					// match rest of sequence from above; EX: "3c;", "60;" etc.
 					end_pos = Bry_find_.Find_fwd_while(src, end_pos, src_end, Byte_ascii.Num_0);
 					Object end_obj = end_trie.Match_at(trv, src, end_pos, src_end);
 					if (end_obj != null) {
 						// make sure that hex-dec matches; EX: "&#x60;" and "&#3c;" are invalid
 						byte end_tid = ((Byte_obj_val)end_obj).Val();
 						if (   bgn_tid == Bgn__hex && Int_.Between(end_tid, End__hex__lt, End__hex__nbsp)
 							|| bgn_tid == Bgn__dec && Int_.Between(end_tid, End__dec__lt, End__dec__nbsp)
 							)
 						return pos;
 					}
 					break;
 			}
 			if (valid)
 				return pos;
 			else
 				pos += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
 		}
 		return Bry_find_.Not_found;
 	}
 }
--- a/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr__tst.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr__tst.java
@ -20,6 +20,47 @@ import org.junit.*;
 public class Xomw_magiclinks_wkr__tst {
 	private final    Xomw_magiclinks_wkr__fxt fxt = new Xomw_magiclinks_wkr__fxt();
 	@Test   public void Basic() {fxt.Test__parse("a https://b.org c", "a <a class='external free' rel='nofollow' href='https://b.org'>https://b.org</a> c");}
 	@Test   public void Interrupt() {
 		// ent
 		fxt.Test__parse("a https://b.org&lt;c"   , "a <a class='external free' rel='nofollow' href='https://b.org'>https://b.org</a>&lt;c");
 		// hex
 		fxt.Test__parse("a https://b.org&#x3c;c" , "a <a class='external free' rel='nofollow' href='https://b.org'>https://b.org</a>&#x3c;c");
 		// dec
 		fxt.Test__parse("a https://b.org&#60;c"  , "a <a class='external free' rel='nofollow' href='https://b.org'>https://b.org</a>&#60;c");
 	}
 	@Test   public void Interrupt__hex_dec() {// implementation specific test for mixed hex / dec
 		// hex-dec
 		fxt.Test__parse("a https://b.org&#x60;c" , "a <a class='external free' rel='nofollow' href='https://b.org&amp;#x60;c'>https://b.org&amp;#x60;c</a>");
 		// dec-hex
 		fxt.Test__parse("a https://b.org&#3c;c"  , "a <a class='external free' rel='nofollow' href='https://b.org&amp;#3c;c'>https://b.org&amp;#3c;c</a>");
 	}
 	@Test   public void Separator() {
 		// basic
 		fxt.Test__parse("a https://b.org.:!? c"      , "a <a class='external free' rel='nofollow' href='https://b.org'>https://b.org</a>.:!? c");
 		// ")" excluded
 		fxt.Test__parse("a https://b.org).:!? c"     , "a <a class='external free' rel='nofollow' href='https://b.org'>https://b.org</a>).:!? c");
 		// ")" included b/c "(" exists
 		fxt.Test__parse("a https://b.org().:!? c"    , "a <a class='external free' rel='nofollow' href='https://b.org()'>https://b.org()</a>.:!? c");
 		// ";" excluded
 		fxt.Test__parse("a https://b.org;.:!? c"     , "a <a class='external free' rel='nofollow' href='https://b.org'>https://b.org</a>;.:!? c");
 		// ";" included b/c of ent
 		fxt.Test__parse("a https://b.org&abc;.:!? c" , "a <a class='external free' rel='nofollow' href='https://b.org&amp;abc;'>https://b.org&amp;abc;</a>.:!? c");
 		// ";" included b/c of hex
 		fxt.Test__parse("a https://b.org&#xB1;.:!? c", "a <a class='external free' rel='nofollow' href='https://b.org&amp;#xB1;'>https://b.org&amp;#xB1;</a>.:!? c");
 		// ";" included b/c of dec
 		fxt.Test__parse("a https://b.org&#123;.:!? c", "a <a class='external free' rel='nofollow' href='https://b.org&amp;#123;'>https://b.org&amp;#123;</a>.:!? c");
 		// ";" excluded b/c of invalid.ent
 		fxt.Test__parse("a https://b.org&a1b;.:!? c" , "a <a class='external free' rel='nofollow' href='https://b.org&amp;a1b'>https://b.org&amp;a1b</a>;.:!? c");
 		// ";" excluded b/c of invalid.hex
 		fxt.Test__parse("a https://b.org&#x;.:!? c"  , "a <a class='external free' rel='nofollow' href='https://b.org&amp;#x'>https://b.org&amp;#x</a>;.:!? c");
 		// ";" excluded b/c of invalid.dec
 		fxt.Test__parse("a https://b.org&#a;.:!? c"  , "a <a class='external free' rel='nofollow' href='https://b.org&amp;#a'>https://b.org&amp;#a</a>;.:!? c");
 	}
 /*
 TESTS: regex
 "<a https://a.org>"
 "<img https://a.org>"
 */
 	@Test   public void Invalid() {fxt.Test__parse("a _https://b.org c", "a _https://b.org c");}
 }
 class Xomw_magiclinks_wkr__fxt {