Mw_parse: Add basic implementation for magiclinks

2024-10-27 20:34:16 +00:00 · 2017-01-27 07:18:34 -05:00 · 2017-01-27 07:18:34 -05:00 · aa1f1ec801
commit aa1f1ec801
parent 7bd176f51f
14 changed files with 543 additions and 386 deletions
--- a/100_core/src/gplx/langs/regxs/Regx_adp.java
+++ b/100_core/src/gplx/langs/regxs/Regx_adp.java
@ -40,6 +40,7 @@ public class Regx_adp {
 		return (Regx_match[])rv.To_ary(Regx_match.class);
 	}
 		private Pattern under;
+	public Pattern Under() {return under;}
 	void Under_sync() {
 		try {under = Pattern.compile(pattern, Pattern.DOTALL | Pattern.UNICODE_CHARACTER_CLASS);}	// JRE.7:UNICODE_CHARACTER_CLASS; added during %w fix for en.w:A#; DATE:2015-06-10 
 		catch (Exception e) {	// NOTE: if invalid, then default to empty pattern (which should return nothing); EX:d:〆る generates [^]; DATE:2013-10-20
--- a/100_core/src/gplx/langs/regxs/Regx_group.java
+++ b/100_core/src/gplx/langs/regxs/Regx_group.java
@ -17,10 +17,21 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
 public class Regx_group {
-	public Regx_group(boolean rslt, int bgn, int end, String val) {this.rslt = rslt; this.bgn = bgn; this.end = end; this.val = val;}
-	public boolean Rslt() {return rslt;} private boolean rslt;
-	public int Bgn() {return bgn;} int bgn;
-	public int End() {return end;} int end;
-	public String Val() {return val;} private String val;
-	public static final Regx_group[] Ary_empty = new Regx_group[0];
+	public Regx_group(boolean rslt, int bgn, int end, String val) {
+		this.rslt = rslt;
+		this.bgn = bgn;
+		this.end = end;
+		this.val = val;
+	}
+	public boolean   Rslt() {return rslt;} private boolean rslt;
+	public int       Bgn()  {return bgn;}  private int bgn;
+	public int       End()  {return end;}  private int end;
+	public String    Val()  {return val;}  private String val;
+	public void Init(boolean rslt, int bgn, int end, String val) {
+		this.rslt = rslt;
+		this.bgn = bgn;
+		this.end = end;
+		this.val = val;
+	}
+	public static final    Regx_group[] Ary_empty = new Regx_group[0];
 }
--- a/100_core/src/gplx/langs/regxs/Regx_match.java
+++ b/100_core/src/gplx/langs/regxs/Regx_match.java
@ -24,5 +24,5 @@ public class Regx_match {
 	public int Find_end() {return find_end;} int find_end;
 	public int Find_len() {return find_end - find_bgn;}
 	public Regx_group[] Groups() {return groups;} Regx_group[] groups = Regx_group.Ary_empty;
-	public static final Regx_match[] Ary_empty = new Regx_match[0];
+	public static final    Regx_match[] Ary_empty = new Regx_match[0];
 }
--- a/100_core/src/gplx/langs/regxs/Regx_rslt.java
+++ b/100_core/src/gplx/langs/regxs/Regx_rslt.java
@ -0,0 +1,46 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+public class Regx_rslt {// THREAD.UNSAFE
+	private int src_pos;
+	private Regx_group tmp_grp = new Regx_group(false, -1, -1, null);
+		public Matcher match;
+	public int             Groups__len()         {return match.groupCount() + 1;}	// +1 to include group=0 which is entire pattern
+	public Regx_group      Groups__get_at(int i) {		
+		tmp_grp.Init(true, match.start(i), match.end(i), null);
+		return tmp_grp;
+	}
+	public void Init(Regx_adp regex, String src, int src_bgn) {
+		match = regex.Under().matcher(src);
+		this.src_pos = src_bgn;
+	}
+	public boolean Match_next() {
+		this.found = match.find(src_pos);
+		if (found) {
+			this.find_bgn = match.start();
+			this.find_end = match.end();
+			this.src_pos = find_end;
+		}
+		return found;
+	}
+		public boolean         Found()      {return found;}      private boolean found;
+	public int             Find_bgn()   {return find_bgn;}   private int find_bgn;
+	public int             Find_end()   {return find_end;}   private int find_end;
+}
--- a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser.java
@ -19,7 +19,7 @@ package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xo
 import gplx.core.btries.*; import gplx.core.net.*;
 import gplx.xowa.mws.parsers.prepros.*; import gplx.xowa.mws.parsers.headings.*;
 import gplx.xowa.mws.parsers.quotes.*; import gplx.xowa.mws.parsers.tables.*; import gplx.xowa.mws.parsers.hrs.*; import gplx.xowa.mws.parsers.nbsps.*;
-import gplx.xowa.mws.parsers.lnkes.*; import gplx.xowa.mws.parsers.lnkis.*;
+import gplx.xowa.mws.parsers.lnkes.*; import gplx.xowa.mws.parsers.lnkis.*; import gplx.xowa.mws.parsers.magiclinks.*;
 import gplx.xowa.mws.utls.*; import gplx.xowa.mws.linkers.*;
 public class Xomw_parser {
 	private final    Xomw_parser_ctx pctx = new Xomw_parser_ctx();
@ -29,10 +29,13 @@ public class Xomw_parser {
 	private final    Xomw_nbsp_wkr nbsp_wkr = new Xomw_nbsp_wkr();
 	private final    Xomw_block_level_pass block_wkr = new Xomw_block_level_pass();
 	private final    Xomw_heading_wkr heading_wkr = new Xomw_heading_wkr();
+	private final    Xomw_magiclinks_wkr magiclinks_wkr = new Xomw_magiclinks_wkr();
 	private final    Xomw_link_renderer link_renderer = new Xomw_link_renderer();
 	private final    Xomw_link_holders holders;
 	private final    Xomw_heading_cbk__html heading_wkr_cbk;
 	private final    Btrie_slim_mgr protocols_trie;
+	private static Xomw_regex_space regex_space;
+	private static Xomw_regex_url regex_url;
 	private final    Btrie_rv trv = new Btrie_rv();
 	private int marker_index = 0;
 	// private final    Xomw_prepro_wkr prepro_wkr = new Xomw_prepro_wkr();
@ -51,10 +54,16 @@ public class Xomw_parser {
 		this.lnke_wkr = new Xomw_lnke_wkr(this);
 		this.lnki_wkr = new Xomw_lnki_wkr(this, holders, link_renderer, protocols_trie);
 		this.heading_wkr_cbk = new Xomw_heading_cbk__html();
+		if (regex_space == null) {
+			synchronized (Type_adp_.ClassOf_obj(this)) {
+				regex_space = new Xomw_regex_space();
+				regex_url = new Xomw_regex_url(regex_space);
+			}
+		}
 	}
 	public void Init_by_wiki(Xowe_wiki wiki) {
 		linker.Init_by_wiki(wiki.Lang().Lnki_trail_mgr().Trie());
-		lnke_wkr.Init_by_wiki(protocols_trie);
+		lnke_wkr.Init_by_wiki(protocols_trie, regex_url, regex_space);
 		lnki_wkr.Init_by_wiki(wiki);
 	}
 	public void Internal_parse(Xomw_parser_bfr pbfr, byte[] text) {
@ -107,8 +116,8 @@ public class Xomw_parser {
 		// replaceInternalLinks may sometimes leave behind
 		// absolute URLs, which have to be masked to hide them from replaceExternalLinks
 		Xomw_parser_bfr_.Replace(pbfr, Bry__marker__noparse, Bry_.Empty);
+		magiclinks_wkr.Do_magic_links(pctx, pbfr);

-//			$text = $this->doMagicLinks($text);
 //			$text = $this->formatHeadings($text, $origText, $isMain);
 	}

--- a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser__tst.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser__tst.java
@ -21,37 +21,9 @@ public class Xomw_parser__tst {
 	private final    Xomw_parser__fxt fxt = new Xomw_parser__fxt();
 	@Test  public void Basic() {
 		fxt.Test__parse(String_.Concat_lines_nl_skip_last
-		( "== heading_1 =="
-		, "para_1"
-		, "== heading_2 =="
-		, "para_2"
-		, "-----"
-		, "{|"
-		, "|-"
-		, "|a"
-		, "|}"
-		, "''italics''"
-		, "[https://a.org b]"
-		, "[[A|abc]]"
-		, "a »b« &#160;!important c"
+		("a https://c.org b"
 		), String_.Concat_lines_nl_skip_last
-		( "<h2> heading_1 </h2>"
-		, "<p>para_1"
-		, "</p>"
-		, "<h2> heading_2 </h2>"
-		, "<p>para_2"
-		, "</p>"
-		, "<hr />"
-		, "<table>"
-		, ""
-		, "<tr>"
-		, "<td>a"
-		, "</td></tr></table>"
-		, "<p><i>italics</i>"
-		, "<a class=\"external text\" rel=\"nofollow\" href=\"https://a.org\">b</a>"
-		, "<a href=\"/wiki/A\" title=\"A\">abc</a>"
-		, "a&#160;»b«&#160; !important c"
-		, "</p>"
+		( ""
 		));
 	}		
 }
--- a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_.java
@ -0,0 +1,45 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import gplx.core.btries.*;
+public class Xomw_regex_ {
+	public static int Find_fwd_while(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
+		int cur = src_bgn;
+		while (true) {
+			byte b = src[cur];
+			Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
+			if (o == null)
+				break;
+			else
+				cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
+		}
+		return cur;
+	}
+	public static int Find_fwd_until(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
+		int cur = src_bgn;
+		while (true) {
+			byte b = src[cur];
+			Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
+			if (o == null)
+				cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
+			else
+				break;
+		}
+		return cur;
+	}
+}
--- a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_boundary.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_boundary.java
@ -0,0 +1,39 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import gplx.core.btries.*;
+public class Xomw_regex_boundary {	// THREAD.SAFE: trv is only for consistent interface
+	private final    Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
+	private final    Btrie_rv trv = new Btrie_rv();
+	public Xomw_regex_boundary(Xomw_regex_space space) {
+		// naive implementation of is_boundary; ignore all ws and underscore
+		byte[][] ary = space.Ws();
+		for (byte[] bry : ary)
+			trie.Add_bry_byte(bry, Byte_.Zero);
+		ary = space.Zs();
+		for (byte[] bry : ary)
+			trie.Add_bry_byte(bry, Byte_.Zero);
+	}
+	public boolean Is_boundary_prv(byte[] src, int pos) {
+		if (pos == 0) return true; // BOS is true
+		int bgn = gplx.core.intls.Utf8_.Get_pos0_of_char_bwd(src, pos - 1);
+		byte b = src[bgn];
+		Object o = trie.Match_at_w_b0(trv, b, src, bgn, pos);
+		return o != null;
+	}
+}
--- a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_space.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_space.java
@ -0,0 +1,64 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import gplx.core.btries.*;
+public class Xomw_regex_space {
+	private final    Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
+	public Xomw_regex_space() {
+		byte[] space = Bry_.New_by_ints(32);
+		ws = new byte[][]
+		{ space
+		, Bry_.New_by_ints(9)
+		, Bry_.New_by_ints(10)
+		, Bry_.New_by_ints(13)
+		};
+		// Zs; REF:http://www.fileformat.info/info/unicode/category/Zs/list.htm
+		zs = new byte[][]
+		{ space
+		, Bry_.New_by_ints(194, 160)
+		, Bry_.New_by_ints(225, 154, 128)
+		, Bry_.New_by_ints(226, 128, 129)
+		, Bry_.New_by_ints(226, 128, 130)
+		, Bry_.New_by_ints(226, 128, 131)
+		, Bry_.New_by_ints(226, 128, 132)
+		, Bry_.New_by_ints(226, 128, 133)
+		, Bry_.New_by_ints(226, 128, 134)
+		, Bry_.New_by_ints(226, 128, 135)
+		, Bry_.New_by_ints(226, 128, 136)
+		, Bry_.New_by_ints(226, 128, 137)
+		, Bry_.New_by_ints(226, 128, 138)
+		, Bry_.New_by_ints(226, 128, 175)
+		, Bry_.New_by_ints(226, 129, 159)
+		, Bry_.New_by_ints(227, 128, 128)
+		};
+
+		byte[][] ary = ws;
+		for (byte[] bry : ary) {
+			trie.Add_bry_byte(bry, Byte_.Zero);
+		}
+		ary = zs;
+		for (byte[] bry : ary) {
+			trie.Add_bry_byte(bry, Byte_.Zero);
+		}
+	}
+	public byte[][] Ws() {return ws;} private byte[][] ws;
+	public byte[][] Zs() {return zs;} private byte[][] zs;
+	public int Find_fwd_while(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
+		return Xomw_regex_.Find_fwd_while(trie, trv, src, src_bgn, src_end);
+	}
+}
--- a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_url.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_url.java
@ -0,0 +1,39 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import gplx.core.btries.*;
+public class Xomw_regex_url {
+	private final    Btrie_slim_mgr trie;
+	public Xomw_regex_url(Xomw_regex_space regex_space) {
+		// REGEX:[^][<>"\\x00-\\x20\\x7F\p{Zs}]; NOTE: val is just a marker
+		this.trie = Btrie_slim_mgr.cs();
+		trie.Add_str_byte__many(Byte_.Zero, "[", "]", "<", ">", "\"");
+		for (byte i = 0; i < 33; i++) {
+			trie.Add_bry_byte(new byte[] {i}, Byte_.Zero);
+		}
+		trie.Add_bry_byte(Bry_.New_by_ints(127), Byte_.Zero);	// x7F
+
+		byte[][] zs_ary = regex_space.Zs();
+		for (byte[] zs : zs_ary) {
+			trie.Add_bry_byte(zs, Byte_.Zero);
+		}
+	}
+	public int Find_fwd_while(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
+		return Xomw_regex_.Find_fwd_until(trie, trv, src, src_bgn, src_end);
+	}
+}
--- a/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java
@ -26,12 +26,16 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
 	private int autonumber;
 	private final    Xomw_linker linker;
 	private final    Xomwh_atr_mgr attribs = new Xomwh_atr_mgr();
+	private Xomw_regex_url regex_url;
+	private Xomw_regex_space regex_space;
 	public Xomw_lnke_wkr(Xomw_parser mgr) {
 		this.tmp = mgr.Tmp();
 		this.linker = mgr.Linker();
 	}
-	public void Init_by_wiki(Btrie_slim_mgr protocol_trie) {
+	public void Init_by_wiki(Btrie_slim_mgr protocol_trie, Xomw_regex_url regex_url, Xomw_regex_space regex_space) {
 		this.protocol_trie = protocol_trie;
+		this.regex_url = regex_url;
+		this.regex_space = regex_space;
 	}
 	public void Replace_external_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
 		// XO.PBFR
@ -101,14 +105,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
 			
 			// check for one-or-more url chars; [^][<>"\\x00-\\x20\\x7F\p{Zs}]
 			int domain_bgn = cur;
-			while (true) {
-				byte b = src[cur];
-				Object url_char_byte = invalid_url_chars_trie.Match_at_w_b0(trv, b, src, cur, src_end);
-				if (url_char_byte == null)
-					cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
-				else
-					break;
-			}
+			cur = regex_url.Find_fwd_while(trv, src, domain_bgn, src_end);
 			if (cur - domain_bgn == 0) {
 				bfr.Add_mid(src, prv, cur);
 				prv = cur;
@ -116,14 +113,8 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
 			}
 			int url_end = cur;

-			// get ws (if any)
-			int ws_bgn = -1;
-			while (true) {
-				Object space_byte = space_chars_trie.Match_at(trv, src, cur, src_end);
-				if (space_byte == null) break;
-				if (ws_bgn == -1) ws_bgn = cur;
-				cur += ((Int_obj_val)space_byte).Val();
-			}
+			// skip ws
+			cur = regex_space.Find_fwd_while(trv, src, cur, src_end);

 			// get text (if any)
 			int text_bgn = -1, text_end = -1;
@ -244,27 +235,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
 	, Link_type__autonumber     = Bry_.new_a7("autonumber")
 	;

-	private static final    Btrie_slim_mgr
-	  invalid_url_chars_trie  = New__invalid_url_chars_trie()
-	, space_chars_trie        = New__space_chars_trie()
-	, invalid_text_chars_trie = New__invalid_text_chars_trie()
-	;
-	private static Btrie_slim_mgr New__invalid_url_chars_trie() {	// REGEX:[^][<>"\\x00-\\x20\\x7F\p{Zs}]; NOTE: val is just a marker
-		Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
-		rv.Add_str_byte__many(Byte_.Zero, "[", "]", "<", ">", "\"");
-		for (byte i = 0; i < 33; i++) {
-			rv.Add_bry_byte(new byte[] {i}, Byte_.Zero);
-		}
-		rv.Add_bry_byte(Bry_.New_by_ints(127), Byte_.Zero);	// x7F
-		rv.Add_bry_byte(Bry_.New_by_ints(227, 128, 128), Byte_.Zero);	// \p{Zs}	// e3 80 80; https://phabricator.wikimedia.org/T21052
-		return rv;
-	}
-	private static Btrie_slim_mgr New__space_chars_trie() { // REGEX:\p{Zs}; NOTE: val is key.length
-		Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
-		New__trie_itm__by_len(rv, 32);
-		New__trie_itm__by_len(rv, 227, 128, 128);  // \p{Zs}	// e3 80 80; https://phabricator.wikimedia.org/T21052
-		return rv;
-	}
+	private static final    Btrie_slim_mgr invalid_text_chars_trie = New__invalid_text_chars_trie();
 	private static Btrie_slim_mgr New__invalid_text_chars_trie() { // REGEX:([^\]\\x00-\\x08\\x0a-\\x1F]*?); NOTE: val is key.length
 		Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
 		New__trie_itm__by_len(rv, Byte_ascii.Brack_end);
--- a/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr__tst.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr__tst.java
@ -45,7 +45,8 @@ class Xomw_lnke_wkr__fxt {
 	private final    Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
 	private boolean apos = true;
 	public Xomw_lnke_wkr__fxt() {
-		wkr.Init_by_wiki(Xomw_parser.Protocols__dflt());
+		Xomw_regex_space regex_space = new Xomw_regex_space();
+		wkr.Init_by_wiki(Xomw_parser.Protocols__dflt(), new Xomw_regex_url(regex_space), regex_space);
 	}
 	public void Test__parse(String src_str, String expd) {
 		byte[] src_bry = Bry_.new_u8(src_str);
--- a/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr.java
@ -17,315 +17,229 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 package gplx.xowa.mws.parsers.magiclinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
 import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.core.net.*;
-import gplx.langs.phps.utls.*;
-//	public class Xomw_magiclinks_wkr {
-//		private final    Btrie_slim_mgr regex_trie = Btrie_slim_mgr.ci_a7(); // NOTE: must be ci to handle protocols; EX: "https:" and "HTTPS:"
-//		private final    Btrie_rv trv = new Btrie_rv();
-//		public Xomw_magiclinks_wkr() {
-//		}
-//		private static byte[] Tag__anch__rhs, Prefix__rfc, Prefix__pmid;
-//
-//		private static final byte Space__tab = 1, Space__nbsp_ent = 2, Space__nbsp_dec = 3, Space__nbsp_hex = 4;
-//		private static Btrie_slim_mgr space_trie;
-//		//	static final SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
-////		public void Test() {
-////			regex.Add("\t", Space__tab);
-////			regex.Add("&nbsp;", Space__nbsp__ent);
-////			regex.Add(Regex.Make("&#").Star("0").Add("160;"), Space__nbsp__dec);
-////			regex.Add(Regex.Make("&#").Brack("X", "x").Star("0").Brack("A", "a").Add("0"), Space__nbsp__hex);
-////		}
-//		public int Find_fwd_space(byte[] src, int cur, int src_end) {
-//			return -1;
-//		}
-//
-//		private static final byte Regex__anch = 1, Regex__elem = 2, Regex__free = 3, Regex__rfc = 5, Regex__isbn = 6, Regex__pmid = 7;
-//		public void Init_by_wiki() {
-//			regex_trie.Add_str_byte("<a", Regex__anch);
-//			regex_trie.Add_str_byte("<" , Regex__elem);
-//			
-//			Gfo_protocol_itm[] protocol_ary = Gfo_protocol_itm.Ary();
-//			int protocol_len = protocol_ary.length;
-//			for (int i = 0; i < protocol_len; i++) {
-//				Gfo_protocol_itm itm = protocol_ary[i];
-//				regex_trie.Add_bry_byte(itm.Key_w_colon_bry(), Regex__free);
-//			}
-//			regex_trie.Add_str_byte("RFC " , Regex__rfc);
-//			regex_trie.Add_str_byte("PMID " , Regex__rfc);
-//			regex_trie.Add_str_byte("ISBN ", Regex__rfc);
-//
-//			if (Tag__anch__rhs == null) {
-//				synchronized (Type_adp_.ClassOf_obj(this)) {
-//					Tag__anch__rhs = Bry_.new_a7("</a>");
-//					Prefix__rfc = Bry_.new_a7("RFC");
-//					Prefix__pmid = Bry_.new_a7("PMID");
-//					space_trie = Btrie_slim_mgr.ci_a7()
-//					.Add_str_byte("\t", Space__tab)
-//					.Add_str_byte("&nbsp;", Space__nbsp_ent)
-//					.Add_str_byte("&#", Space__nbsp_dec)
-//					.Add_str_byte("&x", Space__nbsp_hex)
-//					;
-//				}
-//			}
-//		}
-//
-//		// Replace special strings like "ISBN xxx" and "RFC xxx" with
-//		// magic external links.
-//		public void Do_magic_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
-//			// XO.PBFR
-//			Bry_bfr src_bfr = pbfr.Src();
-//			byte[] src = src_bfr.Bfr();
-//			int src_bgn = 0;
-//			int src_end = src_bfr.Len();
-//			Bry_bfr bfr = pbfr.Trg();
-//
-//			int cur = src_bgn;
-//			int prv = cur;
-//			boolean dirty = true;
-//			while (true) {
-//				if (cur == src_end) {
-//					if (dirty)
-//						bfr.Add_mid(src, prv, src_end);
-//					break;
-//				}
-//
-//				byte b = src[cur];
-//				Object o = regex_trie.Match_at_w_b0(trv, b, src, cur, src_end);
-//				// current byte doesn't look like magiclink; continue;
-//				if (o == null) {
-//					cur++;
-//					continue;
-//				}
-//				// looks like magiclink; do additional processing
-//				byte regex_tid = ((Byte_obj_ref)o).Val();
-//				int trv_pos = trv.Pos();
-//				int nxt_pos = trv_pos;
-//				boolean regex_valid = true;
-//				switch (regex_tid) {
-//					case Regex__anch:	// (<a[ \t\r\n>].*?</a>) |      // m[1]: Skip link text
-//						if (trv_pos < src_end) {
-//							// find ws in "[ \t\r\n>]"
-//							byte ws_byte = src[cur];
-//							switch (ws_byte) {
-//								case Byte_ascii.Space:
-//								case Byte_ascii.Tab:
-//								case Byte_ascii.Cr:
-//								case Byte_ascii.Nl:
-//									break;
-//								default:
-//									regex_valid = false;
-//									break;
-//							}
-//							if (regex_valid) {
-//								// find </a>
-//								nxt_pos++;
-//								int anch_end = Bry_find_.Find_fwd(src, Tag__anch__rhs, nxt_pos, src_end);
-//								if (anch_end == Bry_find_.Not_found) {
-//									regex_valid = false;
-//								}
-//								else {
-//									cur = anch_end + Tag__anch__rhs.length;
-//								}
-//							}
-//						}
-//						else {
-//							regex_valid = false;
-//						}
-//						break;
-//					case Regex__elem: // (<.*?>) |                    // m[2]: Skip stuff inside
-//						// just find ">"
-//						int elem_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, nxt_pos, src_end);
-//						if (elem_end == Bry_find_.Not_found)
-//							regex_valid = false;
-//						else
-//							cur = elem_end + 1;
-//						break;
-//					case Regex__free:
-//						// addr; urlchar
-//						break;
-//					case Regex__rfc:
-//					case Regex__pmid:
-//						// byte[] prefix = regex == Regex__rfc ? Prefix__rfc : Prefix__pmid;
-//						// match previous for case sensitivity
-////						if (Bry_.Eq(src, trv_pos - prefix.length - 1, trv_pos - 1, prefix)) {
-////
-////						}
-////						else {
-////							regex_valid = false;
-////						}
-//						break;
-//				}
-//				
-////				'!(?:                            // Start cases
-////					(<a[ \t\r\n>].*?</a>) |      // m[1]: Skip link text
-////					(<.*?>) |                    // m[2]: Skip stuff inside
-////												 //       HTML elements' . "
-////					(\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
-////												 // m[4]: Post-protocol path
-////					\b(?:RFC|PMID) $spaces       // m[5]: RFC or PMID, capture number
-////						([0-9]+)\b |
-////					\bISBN $spaces (            // m[6]: ISBN, capture number
-////						(?: 97[89] $spdash?)?   //  optional 13-digit ISBN prefix
-////						(?: [0-9]  $spdash?){9} //  9 digits with opt. delimiters
-////						[0-9Xx]                  //  check digit
-////					)\b
-//
-//			}
-//			if (dirty)
-//				pbfr.Switch();
+import gplx.langs.phps.utls.*; import gplx.xowa.mws.htmls.*;
+import gplx.langs.regxs.*;
+public class Xomw_magiclinks_wkr {
+	private final    Btrie_slim_mgr regex_trie = Btrie_slim_mgr.ci_a7(); // NOTE: must be ci to handle protocols; EX: "https:" and "HTTPS:"
+	private final    Btrie_rv trv = new Btrie_rv();
+	private static byte[] Tag__anch__rhs;
+	private Xomw_regex_boundary regex_boundary;
+	private Xomw_regex_url regex_url;
+	private Xomw_linker linker;
+	private byte[] page_title;

-//			$prots = wfUrlProtocolsWithoutProtRel();
-//			$urlChar = self::EXT_LINK_URL_CLASS;
-//			$addr = self::EXT_LINK_ADDR;
-//			$space = self::SPACE_NOT_NL; //  non-newline space
-//			$spdash = "(?:-|$space)"; // a dash or a non-newline space
-//			$spaces = "$space++"; // possessive match of 1 or more spaces
-//			$text = preg_replace_callback(
-//				'!(?:                            // Start cases
-//					(<a[ \t\r\n>].*?</a>) |      // m[1]: Skip link text
-//					(<.*?>) |                    // m[2]: Skip stuff inside
-//												 //       HTML elements' . "
-//					(\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
-//												 // m[4]: Post-protocol path
-//					\b(?:RFC|PMID) $spaces       // m[5]: RFC or PMID, capture number
-//						([0-9]+)\b |
-//					\bISBN $spaces (            // m[6]: ISBN, capture number
-//						(?: 97[89] $spdash?)?   //  optional 13-digit ISBN prefix
-//						(?: [0-9]  $spdash?){9} //  9 digits with opt. delimiters
-//						[0-9Xx]                  //  check digit
-//					)\b
-//				)!xu", [ &$this, 'magicLinkCallback' ], $text);
-//			return $text;
-//		}
+	private static final byte Regex__anch = 1, Regex__elem = 2, Regex__free = 3;
+	public void Init_by_wiki(Xomw_linker linker, Xomw_regex_boundary regex_boundary, Xomw_regex_url regex_url) {
+		this.linker = linker;
+		this.regex_boundary = regex_boundary;
+		this.regex_url = regex_url;
+		regex_trie.Add_str_byte("<a", Regex__anch);
+		regex_trie.Add_str_byte("<" , Regex__elem);
 		
-//		public function magicLinkCallback($m) {
-//			if (isset($m[1]) && $m[1] !== '') {
-//				// Skip anchor
-//				return $m[0];
-//			} else if (isset($m[2]) && $m[2] !== '') {
-//				// Skip HTML element
-//				return $m[0];
-//			} else if (isset($m[3]) && $m[3] !== '') {
-//				// Free external link
-//				return $this->makeFreeExternalLink($m[0], strlen($m[4]));
-//			} else if (isset($m[5]) && $m[5] !== '') {
-//				// RFC or PMID
-//				if (substr($m[0], 0, 3) === 'RFC') {
-//					if (!$this->mOptions->getMagicRFCLinks()) {
-//						return $m[0];
-//					}
-//					$keyword = 'RFC';
-//					$urlmsg = 'rfcurl';
-//					$cssClass = 'mw-magiclink-rfc';
-//					$trackingCat = 'magiclink-tracking-rfc';
-//					$id = $m[5];
-//				} else if (substr($m[0], 0, 4) === 'PMID') {
-//					if (!$this->mOptions->getMagicPMIDLinks()) {
-//						return $m[0];
-//					}
-//					$keyword = 'PMID';
-//					$urlmsg = 'pubmedurl';
-//					$cssClass = 'mw-magiclink-pmid';
-//					$trackingCat = 'magiclink-tracking-pmid';
-//					$id = $m[5];
-//				} else {
-//					throw new MWException(__METHOD__ . ': unrecognised match type "' .
-//						substr($m[0], 0, 20) . '"');
-//				}
-//				$url = wfMessage($urlmsg, $id)->inContentLanguage()->text();
-//				$this->addTrackingCategory($trackingCat);
-//				return Linker::makeExternalLink($url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle);
-//			} else if (isset($m[6]) && $m[6] !== ''
-//				&& $this->mOptions->getMagicISBNLinks()
-//			) {
-//				// ISBN
-//				$isbn = $m[6];
-//				$space = self::SPACE_NOT_NL; //  non-newline space
-//				$isbn = preg_replace("/$space/", ' ', $isbn);
-//				$num = strtr($isbn, [
-//					'-' => '',
-//					' ' => '',
-//					'x' => 'X',
-//				]);
-//				$this->addTrackingCategory('magiclink-tracking-isbn');
-//				return $this->getLinkRenderer()->makeKnownLink(
-//					SpecialPage::getTitleFor('Booksources', $num),
-//					"ISBN $isbn",
-//					[
-//						'class' => '@gplx.Internal protected mw-magiclink-isbn',
-//						'title' => false // suppress title attribute
-//					]
-//				);
-//			} else {
-//				return $m[0];
+		Gfo_protocol_itm[] protocol_ary = Gfo_protocol_itm.Ary();
+		int protocol_len = protocol_ary.length;
+		for (int i = 0; i < protocol_len; i++) {
+			Gfo_protocol_itm itm = protocol_ary[i];
+			regex_trie.Add_bry_byte(itm.Text_bry(), Regex__free);
+		}
+
+		if (Tag__anch__rhs == null) {
+			synchronized (Type_adp_.ClassOf_obj(this)) {
+				Tag__anch__rhs = Bry_.new_a7("</a>");
+			}
+		}
+	}
+
+	// Replace special strings like "ISBN xxx" and "RFC xxx" with
+	// magic external links.
+	public void Do_magic_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
+		// XO.PBFR
+		Bry_bfr src_bfr = pbfr.Src();
+		byte[] src = src_bfr.Bfr();
+		int src_bgn = 0;
+		int src_end = src_bfr.Len();
+		Bry_bfr bfr = pbfr.Trg();
+
+		int cur = src_bgn;
+		int prv = cur;
+		boolean dirty = true;
+		// PORTED.REGEX: handle below
+		// NOTE: not handling RFC|PMID|ISBN b/c of upcoming obsolescence: https://www.mediawiki.org/wiki/Requests_for_comment/Future_of_magic_links
+		//'!(?:                            // Start cases
+		//	(<a[ \t\r\n>].*?</a>) |      // m[1]: Skip link text
+		//	(<.*?>) |                    // m[2]: Skip stuff inside
+		//									//       HTML elements' . "
+		//	(\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
+		//									// m[4]: Post-protocol path
+		//	\b(?:RFC|PMID) $spaces       // m[5]: RFC or PMID, capture number
+		//		([0-9]+)\b |
+		//	\bISBN $spaces (            // m[6]: ISBN, capture number
+		//		(?: 97[89] $spdash?)?   //  optional 13-digit ISBN prefix
+		//		(?: [0-9]  $spdash?){9} //  9 digits with opt. delimiters
+		//		[0-9Xx]                  //  check digit
+		//	)\b
+		while (true) {
+			if (cur == src_end) {
+				if (dirty)
+					bfr.Add_mid(src, prv, src_end);
+				break;
+			}
+
+			byte b = src[cur];
+			Object o = regex_trie.Match_at_w_b0(trv, b, src, cur, src_end);
+			// current byte doesn't look like magiclink; continue;
+			if (o == null) {
+				cur++;
+				continue;
+			}
+
+			// looks like magiclink; do additional processing
+			byte regex_tid = ((Byte_obj_val)o).Val();
+			int old_pos = cur;
+			int trv_pos = trv.Pos();
+			int nxt_pos = trv_pos;
+			boolean regex_valid = true;
+			switch (regex_tid) {
+				case Regex__anch:	// (<a[ \t\r\n>].*?</a>) |      // m[1]: Skip link text
+					if (trv_pos < src_end) {
+						// find ws in "[ \t\r\n>]"
+						byte ws_byte = src[cur];
+						switch (ws_byte) {
+							case Byte_ascii.Space:
+							case Byte_ascii.Tab:
+							case Byte_ascii.Cr:
+							case Byte_ascii.Nl:
+								break;
+							default:
+								regex_valid = false;
+								break;
+						}
+						if (regex_valid) {
+							// find </a>
+							nxt_pos++;
+							int anch_end = Bry_find_.Find_fwd(src, Tag__anch__rhs, nxt_pos, src_end);
+							if (anch_end == Bry_find_.Not_found) {
+								regex_valid = false;
+							}
+							else {
+								cur = anch_end + Tag__anch__rhs.length;
+							}
+						}
+					}
+					else {
+						regex_valid = false;
+					}
+					break;
+				case Regex__elem: // (<.*?>) |                    // m[2]: Skip stuff inside
+					// just find ">"
+					int elem_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, nxt_pos, src_end);
+					if (elem_end == Bry_find_.Not_found)
+						regex_valid = false;
+					else
+						cur = elem_end + 1;
+					break;
+				case Regex__free:
+					if (regex_boundary.Is_boundary_prv(src, cur)) {
+						int url_end = regex_url.Find_fwd_while(trv, src, nxt_pos, src_end);
+						if (url_end == nxt_pos) {
+							regex_valid = false;
+						}
+						else
+							cur = url_end;
+					}
+					else
+						regex_valid = false;
+					break;
+			}
+			if (!regex_valid) {
+				cur++;
+			}
+			else {
+				if (regex_tid == Regex__free) {
+					this.page_title = pctx.Page_title().Full_db();
+                        dirty = true;
+					bfr.Add_mid(src, prv, old_pos);
+                        this.Make_free_external_link(bfr, Bry_.Mid(src, old_pos, cur), 0);
+					prv = cur;
+				}
+				else {
+				}
+			}
+		}
+		if (dirty) {
+			pbfr.Switch();
+		}
+	}
+
+	// Make a free external link, given a user-supplied URL
+	public void Make_free_external_link(Bry_bfr bfr, byte[] url, int num_post_proto) {
+//			byte[] trail = Bry_.Empty;
+
+		// The characters '<' and '>' (which were escaped by
+		// removeHTMLtags()) should not be included in
+		// URLs, per RFC 2396.
+		// Make &nbsp; terminate a URL as well (bug T84937)
+
+//			$m2 = [];
+//			if (preg_match(
+//				'/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
+//				$url,
+//				$m2,
+//				PREG_OFFSET_CAPTURE
+//			)) {
+//				trail = substr($url, $m2[0][1]) . $trail;
+//				$url = substr($url, 0, $m2[0][1]);
 //			}

-		// Make a free external link, given a user-supplied URL
-//			public void Make_free_external_link(byte[] url, int num_post_proto) {
-//				byte[] trail = Bry_.Empty;
-
-			// The characters '<' and '>' (which were escaped by
-			// removeHTMLtags()) should not be included in
-			// URLs, per RFC 2396.
-			// Make &nbsp; terminate a URL as well (bug T84937)
-//				$m2 = [];
-//				if (preg_match(
-//					'/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
-//					$url,
-//					$m2,
-//					PREG_OFFSET_CAPTURE
-//				)) {
-//					trail = substr($url, $m2[0][1]) . $trail;
-//					$url = substr($url, 0, $m2[0][1]);
-//				}
-
-			// Move trailing punctuation to $trail
-//				$sep = ',;\.:!?';
-			// If there is no left bracket, then consider right brackets fair game too
-//				if (strpos($url, '(') === false) {
-//					$sep .= ')';
-//				}
-
-//				$urlRev = strrev($url);
-//				$numSepChars = strspn($urlRev, $sep);
-			// Don't break a trailing HTML entity by moving the ; into $trail
-			// This is in hot code, so use substr_compare to avoid having to
-			// create a new String Object for the comparison
-//				if ($numSepChars && substr_compare($url, ";", -$numSepChars, 1) === 0) {
-				// more optimization: instead of running preg_match with a $
-				// anchor, which can be slow, do the match on the reversed
-				// String starting at the desired offset.
-				// un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
-//					if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars)) {
-//						$numSepChars--;
-//					}
-//				}
-//				if ($numSepChars) {
-//					$trail = substr($url, -$numSepChars) . $trail;
-//					$url = substr($url, 0, -$numSepChars);
-//				}
-
-			// Verify that we still have a real URL after trail removal, and
-			// not just lone protocol
-//				if (strlen($trail) >= $numPostProto) {
-//					return $url . $trail;
-//				}
-
-//				$url = Sanitizer::cleanUrl($url);
-
-			// Is this an external image?
-//				$text = $this->maybeMakeExternalImage($url);
-//				if ($text === false) {
-				// Not an image, make a link
-//					$text = Linker::makeExternalLink($url,
-//						$this->getConverterLanguage()->markNoConversion($url, true),
-//						true, 'free',
-//						$this->getExternalLinkAttribs($url), $this->mTitle);
-				// Register it in the output Object...
-				// Replace unnecessary URL escape codes with their equivalent characters
-//					$pasteurized = self::normalizeLinkUrl($url);
-//					$this->mOutput->addExternalLink($pasteurized);
-//				}
-//				return $text . $trail;
+		// Move trailing punctuation to $trail
+//			$sep = ',;\.:!?';
+		// If there is no left bracket, then consider right brackets fair game too
+//			if (strpos($url, '(') === false) {
+//				$sep .= ')';
 //			}
-//		}
-//	}
+
+//			$urlRev = strrev($url);
+//			$numSepChars = strspn($urlRev, $sep);
+		// Don't break a trailing HTML entity by moving the ; into $trail
+		// This is in hot code, so use substr_compare to avoid having to
+		// create a new String Object for the comparison
+
+//			if ($numSepChars && substr_compare($url, ";", -$numSepChars, 1) === 0) {
+			// more optimization: instead of running preg_match with a $
+			// anchor, which can be slow, do the match on the reversed
+			// String starting at the desired offset.
+			// un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
+//				if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars)) {
+//					$numSepChars--;
+//				}
+//			}
+//			if ($numSepChars) {
+//				$trail = substr($url, -$numSepChars) . $trail;
+//				$url = substr($url, 0, -$numSepChars);
+//			}
+
+		// Verify that we still have a real URL after trail removal, and
+		// not just lone protocol
+//			if (strlen($trail) >= $numPostProto) {
+//				return $url . $trail;
+//			}
+
+//			$url = Sanitizer::cleanUrl($url);
+
+		// Is this an external image?
+		byte[] text = null; // $this->maybeMakeExternalImage($url);
+		if (text == null) {
+			// Not an image, make a link
+			linker.Make_external_link(bfr, url
+				, url	// $this->getConverterLanguage()->markNoConversion($url, true),
+				, true, Bry_.new_a7("free")
+				, new Xomwh_atr_mgr()	// $this->getExternalLinkAttribs($url)
+				, page_title);
+			// Register it in the output Object...
+			// Replace unnecessary URL escape codes with their equivalent characters
+//				$pasteurized = self::normalizeLinkUrl($url);
+//				$this->mOutput->addExternalLink($pasteurized);
+		}
+//			return $text . $trail;
+	}
+}
--- a/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr__tst.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr__tst.java
@ -0,0 +1,45 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+package gplx.xowa.mws.parsers.magiclinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+import org.junit.*;
+public class Xomw_magiclinks_wkr__tst {
+	private final    Xomw_magiclinks_wkr__fxt fxt = new Xomw_magiclinks_wkr__fxt();
+	@Test   public void Basic() {fxt.Test__parse("a https://b.org c", "a <a class='external free' rel='nofollow' href='https://b.org'>https://b.org</a> c");}
+	@Test   public void Invalid() {fxt.Test__parse("a _https://b.org c", "a _https://b.org c");}
+}
+class Xomw_magiclinks_wkr__fxt {
+	private final    Xomw_magiclinks_wkr wkr = new Xomw_magiclinks_wkr();
+	private final    Xomw_parser_ctx pctx = new Xomw_parser_ctx();
+	private final    Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
+	private boolean apos = true;
+	public Xomw_magiclinks_wkr__fxt() {
+		Xoae_app app = Xoa_app_fxt.Make__app__edit();
+		Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
+
+		Xomw_regex_space regex_space = new Xomw_regex_space();
+		pctx.Init_by_page(wiki.Ttl_parse(Bry_.new_a7("Page_1")));
+		wkr.Init_by_wiki(new Xomw_linker(), new Xomw_regex_boundary(regex_space), new Xomw_regex_url(regex_space));
+	}
+	public void Test__parse(String src_str, String expd) {
+		byte[] src_bry = Bry_.new_u8(src_str);
+		pbfr.Init(src_bry);
+		wkr.Do_magic_links(pctx, pbfr);
+		if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
+		Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
+	}
+}