diff --git a/100_core/src/gplx/langs/regxs/Regx_adp.java b/100_core/src/gplx/langs/regxs/Regx_adp.java
index 5d8bfd335..515fc200e 100644
--- a/100_core/src/gplx/langs/regxs/Regx_adp.java
+++ b/100_core/src/gplx/langs/regxs/Regx_adp.java
@@ -40,6 +40,7 @@ public class Regx_adp {
return (Regx_match[])rv.To_ary(Regx_match.class);
}
private Pattern under;
+ public Pattern Under() {return under;}
void Under_sync() {
try {under = Pattern.compile(pattern, Pattern.DOTALL | Pattern.UNICODE_CHARACTER_CLASS);} // JRE.7:UNICODE_CHARACTER_CLASS; added during %w fix for en.w:A#; DATE:2015-06-10
catch (Exception e) { // NOTE: if invalid, then default to empty pattern (which should return nothing); EX:d:〆る generates [^]; DATE:2013-10-20
diff --git a/100_core/src/gplx/langs/regxs/Regx_group.java b/100_core/src/gplx/langs/regxs/Regx_group.java
index eb32176f7..fb46c72a3 100644
--- a/100_core/src/gplx/langs/regxs/Regx_group.java
+++ b/100_core/src/gplx/langs/regxs/Regx_group.java
@@ -17,10 +17,21 @@ along with this program. If not, see .
*/
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
public class Regx_group {
- public Regx_group(boolean rslt, int bgn, int end, String val) {this.rslt = rslt; this.bgn = bgn; this.end = end; this.val = val;}
- public boolean Rslt() {return rslt;} private boolean rslt;
- public int Bgn() {return bgn;} int bgn;
- public int End() {return end;} int end;
- public String Val() {return val;} private String val;
- public static final Regx_group[] Ary_empty = new Regx_group[0];
+ public Regx_group(boolean rslt, int bgn, int end, String val) {
+ this.rslt = rslt;
+ this.bgn = bgn;
+ this.end = end;
+ this.val = val;
+ }
+ public boolean Rslt() {return rslt;} private boolean rslt;
+ public int Bgn() {return bgn;} private int bgn;
+ public int End() {return end;} private int end;
+ public String Val() {return val;} private String val;
+ public void Init(boolean rslt, int bgn, int end, String val) {
+ this.rslt = rslt;
+ this.bgn = bgn;
+ this.end = end;
+ this.val = val;
+ }
+ public static final Regx_group[] Ary_empty = new Regx_group[0];
}
diff --git a/100_core/src/gplx/langs/regxs/Regx_match.java b/100_core/src/gplx/langs/regxs/Regx_match.java
index 34617151f..2a71106e1 100644
--- a/100_core/src/gplx/langs/regxs/Regx_match.java
+++ b/100_core/src/gplx/langs/regxs/Regx_match.java
@@ -24,5 +24,5 @@ public class Regx_match {
public int Find_end() {return find_end;} int find_end;
public int Find_len() {return find_end - find_bgn;}
public Regx_group[] Groups() {return groups;} Regx_group[] groups = Regx_group.Ary_empty;
- public static final Regx_match[] Ary_empty = new Regx_match[0];
+ public static final Regx_match[] Ary_empty = new Regx_match[0];
}
diff --git a/100_core/src/gplx/langs/regxs/Regx_rslt.java b/100_core/src/gplx/langs/regxs/Regx_rslt.java
new file mode 100644
index 000000000..5ebc86522
--- /dev/null
+++ b/100_core/src/gplx/langs/regxs/Regx_rslt.java
@@ -0,0 +1,46 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+public class Regx_rslt {// THREAD.UNSAFE
+ private int src_pos;
+ private Regx_group tmp_grp = new Regx_group(false, -1, -1, null);
+ public Matcher match;
+ public int Groups__len() {return match.groupCount() + 1;} // +1 to include group=0 which is entire pattern
+ public Regx_group Groups__get_at(int i) {
+ tmp_grp.Init(true, match.start(i), match.end(i), null);
+ return tmp_grp;
+ }
+ public void Init(Regx_adp regex, String src, int src_bgn) {
+ match = regex.Under().matcher(src);
+ this.src_pos = src_bgn;
+ }
+ public boolean Match_next() {
+ this.found = match.find(src_pos);
+ if (found) {
+ this.find_bgn = match.start();
+ this.find_end = match.end();
+ this.src_pos = find_end;
+ }
+ return found;
+ }
+ public boolean Found() {return found;} private boolean found;
+ public int Find_bgn() {return find_bgn;} private int find_bgn;
+ public int Find_end() {return find_end;} private int find_end;
+}
\ No newline at end of file
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser.java b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser.java
index 48fd4a3ab..9aa4dd415 100644
--- a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser.java
@@ -19,7 +19,7 @@ package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xo
import gplx.core.btries.*; import gplx.core.net.*;
import gplx.xowa.mws.parsers.prepros.*; import gplx.xowa.mws.parsers.headings.*;
import gplx.xowa.mws.parsers.quotes.*; import gplx.xowa.mws.parsers.tables.*; import gplx.xowa.mws.parsers.hrs.*; import gplx.xowa.mws.parsers.nbsps.*;
-import gplx.xowa.mws.parsers.lnkes.*; import gplx.xowa.mws.parsers.lnkis.*;
+import gplx.xowa.mws.parsers.lnkes.*; import gplx.xowa.mws.parsers.lnkis.*; import gplx.xowa.mws.parsers.magiclinks.*;
import gplx.xowa.mws.utls.*; import gplx.xowa.mws.linkers.*;
public class Xomw_parser {
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
@@ -29,10 +29,13 @@ public class Xomw_parser {
private final Xomw_nbsp_wkr nbsp_wkr = new Xomw_nbsp_wkr();
private final Xomw_block_level_pass block_wkr = new Xomw_block_level_pass();
private final Xomw_heading_wkr heading_wkr = new Xomw_heading_wkr();
+ private final Xomw_magiclinks_wkr magiclinks_wkr = new Xomw_magiclinks_wkr();
private final Xomw_link_renderer link_renderer = new Xomw_link_renderer();
private final Xomw_link_holders holders;
private final Xomw_heading_cbk__html heading_wkr_cbk;
private final Btrie_slim_mgr protocols_trie;
+ private static Xomw_regex_space regex_space;
+ private static Xomw_regex_url regex_url;
private final Btrie_rv trv = new Btrie_rv();
private int marker_index = 0;
// private final Xomw_prepro_wkr prepro_wkr = new Xomw_prepro_wkr();
@@ -51,10 +54,16 @@ public class Xomw_parser {
this.lnke_wkr = new Xomw_lnke_wkr(this);
this.lnki_wkr = new Xomw_lnki_wkr(this, holders, link_renderer, protocols_trie);
this.heading_wkr_cbk = new Xomw_heading_cbk__html();
+ if (regex_space == null) {
+ synchronized (Type_adp_.ClassOf_obj(this)) {
+ regex_space = new Xomw_regex_space();
+ regex_url = new Xomw_regex_url(regex_space);
+ }
+ }
}
public void Init_by_wiki(Xowe_wiki wiki) {
linker.Init_by_wiki(wiki.Lang().Lnki_trail_mgr().Trie());
- lnke_wkr.Init_by_wiki(protocols_trie);
+ lnke_wkr.Init_by_wiki(protocols_trie, regex_url, regex_space);
lnki_wkr.Init_by_wiki(wiki);
}
public void Internal_parse(Xomw_parser_bfr pbfr, byte[] text) {
@@ -107,8 +116,8 @@ public class Xomw_parser {
// replaceInternalLinks may sometimes leave behind
// absolute URLs, which have to be masked to hide them from replaceExternalLinks
Xomw_parser_bfr_.Replace(pbfr, Bry__marker__noparse, Bry_.Empty);
+ magiclinks_wkr.Do_magic_links(pctx, pbfr);
-// $text = $this->doMagicLinks($text);
// $text = $this->formatHeadings($text, $origText, $isMain);
}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser__tst.java b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser__tst.java
index 1fd74535b..b6a66e4cb 100644
--- a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser__tst.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser__tst.java
@@ -21,37 +21,9 @@ public class Xomw_parser__tst {
private final Xomw_parser__fxt fxt = new Xomw_parser__fxt();
@Test public void Basic() {
fxt.Test__parse(String_.Concat_lines_nl_skip_last
- ( "== heading_1 =="
- , "para_1"
- , "== heading_2 =="
- , "para_2"
- , "-----"
- , "{|"
- , "|-"
- , "|a"
- , "|}"
- , "''italics''"
- , "[https://a.org b]"
- , "[[A|abc]]"
- , "a »b« !important c"
+ ("a https://c.org b"
), String_.Concat_lines_nl_skip_last
- ( "
heading_1
"
- , "para_1"
- , "
"
- , " heading_2
"
- , "para_2"
- , "
"
- , "
"
- , ""
- , ""
- , ""
- , "a"
- , " |
"
- , "italics"
- , "b"
- , "abc"
- , "a »b« !important c"
- , "
"
+ ( ""
));
}
}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_.java b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_.java
new file mode 100644
index 000000000..e098ff240
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_.java
@@ -0,0 +1,45 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import gplx.core.btries.*;
+public class Xomw_regex_ {
+ public static int Find_fwd_while(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
+ int cur = src_bgn;
+ while (true) {
+ byte b = src[cur];
+ Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
+ if (o == null)
+ break;
+ else
+ cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
+ }
+ return cur;
+ }
+ public static int Find_fwd_until(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
+ int cur = src_bgn;
+ while (true) {
+ byte b = src[cur];
+ Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
+ if (o == null)
+ cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
+ else
+ break;
+ }
+ return cur;
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_boundary.java b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_boundary.java
new file mode 100644
index 000000000..0d519f987
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_boundary.java
@@ -0,0 +1,39 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import gplx.core.btries.*;
+public class Xomw_regex_boundary { // THREAD.SAFE: trv is only for consistent interface
+ private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
+ private final Btrie_rv trv = new Btrie_rv();
+ public Xomw_regex_boundary(Xomw_regex_space space) {
+ // naive implementation of is_boundary; ignore all ws and underscore
+ byte[][] ary = space.Ws();
+ for (byte[] bry : ary)
+ trie.Add_bry_byte(bry, Byte_.Zero);
+ ary = space.Zs();
+ for (byte[] bry : ary)
+ trie.Add_bry_byte(bry, Byte_.Zero);
+ }
+ public boolean Is_boundary_prv(byte[] src, int pos) {
+ if (pos == 0) return true; // BOS is true
+ int bgn = gplx.core.intls.Utf8_.Get_pos0_of_char_bwd(src, pos - 1);
+ byte b = src[bgn];
+ Object o = trie.Match_at_w_b0(trv, b, src, bgn, pos);
+ return o != null;
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_space.java b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_space.java
new file mode 100644
index 000000000..21c7eef1c
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_space.java
@@ -0,0 +1,64 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import gplx.core.btries.*;
+public class Xomw_regex_space {
+ private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
+ public Xomw_regex_space() {
+ byte[] space = Bry_.New_by_ints(32);
+ ws = new byte[][]
+ { space
+ , Bry_.New_by_ints(9)
+ , Bry_.New_by_ints(10)
+ , Bry_.New_by_ints(13)
+ };
+ // Zs; REF:http://www.fileformat.info/info/unicode/category/Zs/list.htm
+ zs = new byte[][]
+ { space
+ , Bry_.New_by_ints(194, 160)
+ , Bry_.New_by_ints(225, 154, 128)
+ , Bry_.New_by_ints(226, 128, 129)
+ , Bry_.New_by_ints(226, 128, 130)
+ , Bry_.New_by_ints(226, 128, 131)
+ , Bry_.New_by_ints(226, 128, 132)
+ , Bry_.New_by_ints(226, 128, 133)
+ , Bry_.New_by_ints(226, 128, 134)
+ , Bry_.New_by_ints(226, 128, 135)
+ , Bry_.New_by_ints(226, 128, 136)
+ , Bry_.New_by_ints(226, 128, 137)
+ , Bry_.New_by_ints(226, 128, 138)
+ , Bry_.New_by_ints(226, 128, 175)
+ , Bry_.New_by_ints(226, 129, 159)
+ , Bry_.New_by_ints(227, 128, 128)
+ };
+
+ byte[][] ary = ws;
+ for (byte[] bry : ary) {
+ trie.Add_bry_byte(bry, Byte_.Zero);
+ }
+ ary = zs;
+ for (byte[] bry : ary) {
+ trie.Add_bry_byte(bry, Byte_.Zero);
+ }
+ }
+ public byte[][] Ws() {return ws;} private byte[][] ws;
+ public byte[][] Zs() {return zs;} private byte[][] zs;
+ public int Find_fwd_while(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
+ return Xomw_regex_.Find_fwd_while(trie, trv, src, src_bgn, src_end);
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_url.java b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_url.java
new file mode 100644
index 000000000..fbcba0bbe
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_regex_url.java
@@ -0,0 +1,39 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import gplx.core.btries.*;
+public class Xomw_regex_url {
+ private final Btrie_slim_mgr trie;
+ public Xomw_regex_url(Xomw_regex_space regex_space) {
+ // REGEX:[^][<>"\\x00-\\x20\\x7F\p{Zs}]; NOTE: val is just a marker
+ this.trie = Btrie_slim_mgr.cs();
+ trie.Add_str_byte__many(Byte_.Zero, "[", "]", "<", ">", "\"");
+ for (byte i = 0; i < 33; i++) {
+ trie.Add_bry_byte(new byte[] {i}, Byte_.Zero);
+ }
+ trie.Add_bry_byte(Bry_.New_by_ints(127), Byte_.Zero); // x7F
+
+ byte[][] zs_ary = regex_space.Zs();
+ for (byte[] zs : zs_ary) {
+ trie.Add_bry_byte(zs, Byte_.Zero);
+ }
+ }
+ public int Find_fwd_while(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
+ return Xomw_regex_.Find_fwd_until(trie, trv, src, src_bgn, src_end);
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java
index 8c0311721..78e3f911b 100644
--- a/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java
@@ -26,12 +26,16 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
private int autonumber;
private final Xomw_linker linker;
private final Xomwh_atr_mgr attribs = new Xomwh_atr_mgr();
+ private Xomw_regex_url regex_url;
+ private Xomw_regex_space regex_space;
public Xomw_lnke_wkr(Xomw_parser mgr) {
this.tmp = mgr.Tmp();
this.linker = mgr.Linker();
}
- public void Init_by_wiki(Btrie_slim_mgr protocol_trie) {
+ public void Init_by_wiki(Btrie_slim_mgr protocol_trie, Xomw_regex_url regex_url, Xomw_regex_space regex_space) {
this.protocol_trie = protocol_trie;
+ this.regex_url = regex_url;
+ this.regex_space = regex_space;
}
public void Replace_external_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// XO.PBFR
@@ -101,14 +105,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
// check for one-or-more url chars; [^][<>"\\x00-\\x20\\x7F\p{Zs}]
int domain_bgn = cur;
- while (true) {
- byte b = src[cur];
- Object url_char_byte = invalid_url_chars_trie.Match_at_w_b0(trv, b, src, cur, src_end);
- if (url_char_byte == null)
- cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
- else
- break;
- }
+ cur = regex_url.Find_fwd_while(trv, src, domain_bgn, src_end);
if (cur - domain_bgn == 0) {
bfr.Add_mid(src, prv, cur);
prv = cur;
@@ -116,14 +113,8 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
}
int url_end = cur;
- // get ws (if any)
- int ws_bgn = -1;
- while (true) {
- Object space_byte = space_chars_trie.Match_at(trv, src, cur, src_end);
- if (space_byte == null) break;
- if (ws_bgn == -1) ws_bgn = cur;
- cur += ((Int_obj_val)space_byte).Val();
- }
+ // skip ws
+ cur = regex_space.Find_fwd_while(trv, src, cur, src_end);
// get text (if any)
int text_bgn = -1, text_end = -1;
@@ -244,27 +235,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
, Link_type__autonumber = Bry_.new_a7("autonumber")
;
- private static final Btrie_slim_mgr
- invalid_url_chars_trie = New__invalid_url_chars_trie()
- , space_chars_trie = New__space_chars_trie()
- , invalid_text_chars_trie = New__invalid_text_chars_trie()
- ;
- private static Btrie_slim_mgr New__invalid_url_chars_trie() { // REGEX:[^][<>"\\x00-\\x20\\x7F\p{Zs}]; NOTE: val is just a marker
- Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
- rv.Add_str_byte__many(Byte_.Zero, "[", "]", "<", ">", "\"");
- for (byte i = 0; i < 33; i++) {
- rv.Add_bry_byte(new byte[] {i}, Byte_.Zero);
- }
- rv.Add_bry_byte(Bry_.New_by_ints(127), Byte_.Zero); // x7F
- rv.Add_bry_byte(Bry_.New_by_ints(227, 128, 128), Byte_.Zero); // \p{Zs} // e3 80 80; https://phabricator.wikimedia.org/T21052
- return rv;
- }
- private static Btrie_slim_mgr New__space_chars_trie() { // REGEX:\p{Zs}; NOTE: val is key.length
- Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
- New__trie_itm__by_len(rv, 32);
- New__trie_itm__by_len(rv, 227, 128, 128); // \p{Zs} // e3 80 80; https://phabricator.wikimedia.org/T21052
- return rv;
- }
+ private static final Btrie_slim_mgr invalid_text_chars_trie = New__invalid_text_chars_trie();
private static Btrie_slim_mgr New__invalid_text_chars_trie() { // REGEX:([^\]\\x00-\\x08\\x0a-\\x1F]*?); NOTE: val is key.length
Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
New__trie_itm__by_len(rv, Byte_ascii.Brack_end);
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr__tst.java b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr__tst.java
index d189771cc..28311b621 100644
--- a/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr__tst.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr__tst.java
@@ -45,7 +45,8 @@ class Xomw_lnke_wkr__fxt {
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
private boolean apos = true;
public Xomw_lnke_wkr__fxt() {
- wkr.Init_by_wiki(Xomw_parser.Protocols__dflt());
+ Xomw_regex_space regex_space = new Xomw_regex_space();
+ wkr.Init_by_wiki(Xomw_parser.Protocols__dflt(), new Xomw_regex_url(regex_space), regex_space);
}
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr.java
index 54323a173..576d684f1 100644
--- a/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr.java
@@ -17,315 +17,229 @@ along with this program. If not, see .
*/
package gplx.xowa.mws.parsers.magiclinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.core.net.*;
-import gplx.langs.phps.utls.*;
-// public class Xomw_magiclinks_wkr {
-// private final Btrie_slim_mgr regex_trie = Btrie_slim_mgr.ci_a7(); // NOTE: must be ci to handle protocols; EX: "https:" and "HTTPS:"
-// private final Btrie_rv trv = new Btrie_rv();
-// public Xomw_magiclinks_wkr() {
-// }
-// private static byte[] Tag__anch__rhs, Prefix__rfc, Prefix__pmid;
-//
-// private static final byte Space__tab = 1, Space__nbsp_ent = 2, Space__nbsp_dec = 3, Space__nbsp_hex = 4;
-// private static Btrie_slim_mgr space_trie;
-// // static final SPACE_NOT_NL = '(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
-//// public void Test() {
-//// regex.Add("\t", Space__tab);
-//// regex.Add(" ", Space__nbsp__ent);
-//// regex.Add(Regex.Make("").Star("0").Add("160;"), Space__nbsp__dec);
-//// regex.Add(Regex.Make("").Brack("X", "x").Star("0").Brack("A", "a").Add("0"), Space__nbsp__hex);
-//// }
-// public int Find_fwd_space(byte[] src, int cur, int src_end) {
-// return -1;
-// }
-//
-// private static final byte Regex__anch = 1, Regex__elem = 2, Regex__free = 3, Regex__rfc = 5, Regex__isbn = 6, Regex__pmid = 7;
-// public void Init_by_wiki() {
-// regex_trie.Add_str_byte("");
-// Prefix__rfc = Bry_.new_a7("RFC");
-// Prefix__pmid = Bry_.new_a7("PMID");
-// space_trie = Btrie_slim_mgr.ci_a7()
-// .Add_str_byte("\t", Space__tab)
-// .Add_str_byte(" ", Space__nbsp_ent)
-// .Add_str_byte("", Space__nbsp_dec)
-// .Add_str_byte("&x", Space__nbsp_hex)
-// ;
-// }
-// }
-// }
-//
-// // Replace special strings like "ISBN xxx" and "RFC xxx" with
-// // magic external links.
-// public void Do_magic_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
-// // XO.PBFR
-// Bry_bfr src_bfr = pbfr.Src();
-// byte[] src = src_bfr.Bfr();
-// int src_bgn = 0;
-// int src_end = src_bfr.Len();
-// Bry_bfr bfr = pbfr.Trg();
-//
-// int cur = src_bgn;
-// int prv = cur;
-// boolean dirty = true;
-// while (true) {
-// if (cur == src_end) {
-// if (dirty)
-// bfr.Add_mid(src, prv, src_end);
-// break;
-// }
-//
-// byte b = src[cur];
-// Object o = regex_trie.Match_at_w_b0(trv, b, src, cur, src_end);
-// // current byte doesn't look like magiclink; continue;
-// if (o == null) {
-// cur++;
-// continue;
-// }
-// // looks like magiclink; do additional processing
-// byte regex_tid = ((Byte_obj_ref)o).Val();
-// int trv_pos = trv.Pos();
-// int nxt_pos = trv_pos;
-// boolean regex_valid = true;
-// switch (regex_tid) {
-// case Regex__anch: // (].*?) | // m[1]: Skip link text
-// if (trv_pos < src_end) {
-// // find ws in "[ \t\r\n>]"
-// byte ws_byte = src[cur];
-// switch (ws_byte) {
-// case Byte_ascii.Space:
-// case Byte_ascii.Tab:
-// case Byte_ascii.Cr:
-// case Byte_ascii.Nl:
-// break;
-// default:
-// regex_valid = false;
-// break;
-// }
-// if (regex_valid) {
-// // find
-// nxt_pos++;
-// int anch_end = Bry_find_.Find_fwd(src, Tag__anch__rhs, nxt_pos, src_end);
-// if (anch_end == Bry_find_.Not_found) {
-// regex_valid = false;
-// }
-// else {
-// cur = anch_end + Tag__anch__rhs.length;
-// }
-// }
-// }
-// else {
-// regex_valid = false;
-// }
-// break;
-// case Regex__elem: // (<.*?>) | // m[2]: Skip stuff inside
-// // just find ">"
-// int elem_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, nxt_pos, src_end);
-// if (elem_end == Bry_find_.Not_found)
-// regex_valid = false;
-// else
-// cur = elem_end + 1;
-// break;
-// case Regex__free:
-// // addr; urlchar
-// break;
-// case Regex__rfc:
-// case Regex__pmid:
-// // byte[] prefix = regex == Regex__rfc ? Prefix__rfc : Prefix__pmid;
-// // match previous for case sensitivity
-//// if (Bry_.Eq(src, trv_pos - prefix.length - 1, trv_pos - 1, prefix)) {
-////
-//// }
-//// else {
-//// regex_valid = false;
-//// }
-// break;
-// }
-//
-//// '!(?: // Start cases
-//// (].*?) | // m[1]: Skip link text
-//// (<.*?>) | // m[2]: Skip stuff inside
-//// // HTML elements' . "
-//// (\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
-//// // m[4]: Post-protocol path
-//// \b(?:RFC|PMID) $spaces // m[5]: RFC or PMID, capture number
-//// ([0-9]+)\b |
-//// \bISBN $spaces ( // m[6]: ISBN, capture number
-//// (?: 97[89] $spdash?)? // optional 13-digit ISBN prefix
-//// (?: [0-9] $spdash?){9} // 9 digits with opt. delimiters
-//// [0-9Xx] // check digit
-//// )\b
-//
-// }
-// if (dirty)
-// pbfr.Switch();
+import gplx.langs.phps.utls.*; import gplx.xowa.mws.htmls.*;
+import gplx.langs.regxs.*;
+public class Xomw_magiclinks_wkr {
+ private final Btrie_slim_mgr regex_trie = Btrie_slim_mgr.ci_a7(); // NOTE: must be ci to handle protocols; EX: "https:" and "HTTPS:"
+ private final Btrie_rv trv = new Btrie_rv();
+ private static byte[] Tag__anch__rhs;
+ private Xomw_regex_boundary regex_boundary;
+ private Xomw_regex_url regex_url;
+ private Xomw_linker linker;
+ private byte[] page_title;
-// $prots = wfUrlProtocolsWithoutProtRel();
-// $urlChar = self::EXT_LINK_URL_CLASS;
-// $addr = self::EXT_LINK_ADDR;
-// $space = self::SPACE_NOT_NL; // non-newline space
-// $spdash = "(?:-|$space)"; // a dash or a non-newline space
-// $spaces = "$space++"; // possessive match of 1 or more spaces
-// $text = preg_replace_callback(
-// '!(?: // Start cases
-// (].*?) | // m[1]: Skip link text
-// (<.*?>) | // m[2]: Skip stuff inside
-// // HTML elements' . "
-// (\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
-// // m[4]: Post-protocol path
-// \b(?:RFC|PMID) $spaces // m[5]: RFC or PMID, capture number
-// ([0-9]+)\b |
-// \bISBN $spaces ( // m[6]: ISBN, capture number
-// (?: 97[89] $spdash?)? // optional 13-digit ISBN prefix
-// (?: [0-9] $spdash?){9} // 9 digits with opt. delimiters
-// [0-9Xx] // check digit
-// )\b
-// )!xu", [ &$this, 'magicLinkCallback' ], $text);
-// return $text;
-// }
+ private static final byte Regex__anch = 1, Regex__elem = 2, Regex__free = 3;
+ public void Init_by_wiki(Xomw_linker linker, Xomw_regex_boundary regex_boundary, Xomw_regex_url regex_url) {
+ this.linker = linker;
+ this.regex_boundary = regex_boundary;
+ this.regex_url = regex_url;
+ regex_trie.Add_str_byte("makeFreeExternalLink($m[0], strlen($m[4]));
-// } else if (isset($m[5]) && $m[5] !== '') {
-// // RFC or PMID
-// if (substr($m[0], 0, 3) === 'RFC') {
-// if (!$this->mOptions->getMagicRFCLinks()) {
-// return $m[0];
-// }
-// $keyword = 'RFC';
-// $urlmsg = 'rfcurl';
-// $cssClass = 'mw-magiclink-rfc';
-// $trackingCat = 'magiclink-tracking-rfc';
-// $id = $m[5];
-// } else if (substr($m[0], 0, 4) === 'PMID') {
-// if (!$this->mOptions->getMagicPMIDLinks()) {
-// return $m[0];
-// }
-// $keyword = 'PMID';
-// $urlmsg = 'pubmedurl';
-// $cssClass = 'mw-magiclink-pmid';
-// $trackingCat = 'magiclink-tracking-pmid';
-// $id = $m[5];
-// } else {
-// throw new MWException(__METHOD__ . ': unrecognised match type "' .
-// substr($m[0], 0, 20) . '"');
-// }
-// $url = wfMessage($urlmsg, $id)->inContentLanguage()->text();
-// $this->addTrackingCategory($trackingCat);
-// return Linker::makeExternalLink($url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle);
-// } else if (isset($m[6]) && $m[6] !== ''
-// && $this->mOptions->getMagicISBNLinks()
-// ) {
-// // ISBN
-// $isbn = $m[6];
-// $space = self::SPACE_NOT_NL; // non-newline space
-// $isbn = preg_replace("/$space/", ' ', $isbn);
-// $num = strtr($isbn, [
-// '-' => '',
-// ' ' => '',
-// 'x' => 'X',
-// ]);
-// $this->addTrackingCategory('magiclink-tracking-isbn');
-// return $this->getLinkRenderer()->makeKnownLink(
-// SpecialPage::getTitleFor('Booksources', $num),
-// "ISBN $isbn",
-// [
-// 'class' => '@gplx.Internal protected mw-magiclink-isbn',
-// 'title' => false // suppress title attribute
-// ]
-// );
-// } else {
-// return $m[0];
-// }
+ if (Tag__anch__rhs == null) {
+ synchronized (Type_adp_.ClassOf_obj(this)) {
+ Tag__anch__rhs = Bry_.new_a7("");
+ }
+ }
+ }
- // Make a free external link, given a user-supplied URL
-// public void Make_free_external_link(byte[] url, int num_post_proto) {
-// byte[] trail = Bry_.Empty;
+ // Replace special strings like "ISBN xxx" and "RFC xxx" with
+ // magic external links.
+ public void Do_magic_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
+ // XO.PBFR
+ Bry_bfr src_bfr = pbfr.Src();
+ byte[] src = src_bfr.Bfr();
+ int src_bgn = 0;
+ int src_end = src_bfr.Len();
+ Bry_bfr bfr = pbfr.Trg();
- // The characters '<' and '>' (which were escaped by
- // removeHTMLtags()) should not be included in
- // URLs, per RFC 2396.
- // Make terminate a URL as well (bug T84937)
-// $m2 = [];
-// if (preg_match(
-// '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
-// $url,
-// $m2,
-// PREG_OFFSET_CAPTURE
-// )) {
-// trail = substr($url, $m2[0][1]) . $trail;
-// $url = substr($url, 0, $m2[0][1]);
-// }
+ int cur = src_bgn;
+ int prv = cur;
+ boolean dirty = true;
+ // PORTED.REGEX: handle below
+ // NOTE: not handling RFC|PMID|ISBN b/c of upcoming obsolescence: https://www.mediawiki.org/wiki/Requests_for_comment/Future_of_magic_links
+ //'!(?: // Start cases
+ // (].*?) | // m[1]: Skip link text
+ // (<.*?>) | // m[2]: Skip stuff inside
+ // // HTML elements' . "
+ // (\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
+ // // m[4]: Post-protocol path
+ // \b(?:RFC|PMID) $spaces // m[5]: RFC or PMID, capture number
+ // ([0-9]+)\b |
+ // \bISBN $spaces ( // m[6]: ISBN, capture number
+ // (?: 97[89] $spdash?)? // optional 13-digit ISBN prefix
+ // (?: [0-9] $spdash?){9} // 9 digits with opt. delimiters
+ // [0-9Xx] // check digit
+ // )\b
+ while (true) {
+ if (cur == src_end) {
+ if (dirty)
+ bfr.Add_mid(src, prv, src_end);
+ break;
+ }
- // Move trailing punctuation to $trail
-// $sep = ',;\.:!?';
- // If there is no left bracket, then consider right brackets fair game too
-// if (strpos($url, '(') === false) {
-// $sep .= ')';
-// }
+ byte b = src[cur];
+ Object o = regex_trie.Match_at_w_b0(trv, b, src, cur, src_end);
+ // current byte doesn't look like magiclink; continue;
+ if (o == null) {
+ cur++;
+ continue;
+ }
-// $urlRev = strrev($url);
-// $numSepChars = strspn($urlRev, $sep);
- // Don't break a trailing HTML entity by moving the ; into $trail
- // This is in hot code, so use substr_compare to avoid having to
- // create a new String Object for the comparison
-// if ($numSepChars && substr_compare($url, ";", -$numSepChars, 1) === 0) {
- // more optimization: instead of running preg_match with a $
- // anchor, which can be slow, do the match on the reversed
- // String starting at the desired offset.
- // un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
-// if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars)) {
-// $numSepChars--;
-// }
-// }
-// if ($numSepChars) {
-// $trail = substr($url, -$numSepChars) . $trail;
-// $url = substr($url, 0, -$numSepChars);
-// }
+ // looks like magiclink; do additional processing
+ byte regex_tid = ((Byte_obj_val)o).Val();
+ int old_pos = cur;
+ int trv_pos = trv.Pos();
+ int nxt_pos = trv_pos;
+ boolean regex_valid = true;
+ switch (regex_tid) {
+ case Regex__anch: // (].*?) | // m[1]: Skip link text
+ if (trv_pos < src_end) {
+ // find ws in "[ \t\r\n>]"
+ byte ws_byte = src[cur];
+ switch (ws_byte) {
+ case Byte_ascii.Space:
+ case Byte_ascii.Tab:
+ case Byte_ascii.Cr:
+ case Byte_ascii.Nl:
+ break;
+ default:
+ regex_valid = false;
+ break;
+ }
+ if (regex_valid) {
+ // find
+ nxt_pos++;
+ int anch_end = Bry_find_.Find_fwd(src, Tag__anch__rhs, nxt_pos, src_end);
+ if (anch_end == Bry_find_.Not_found) {
+ regex_valid = false;
+ }
+ else {
+ cur = anch_end + Tag__anch__rhs.length;
+ }
+ }
+ }
+ else {
+ regex_valid = false;
+ }
+ break;
+ case Regex__elem: // (<.*?>) | // m[2]: Skip stuff inside
+ // just find ">"
+ int elem_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, nxt_pos, src_end);
+ if (elem_end == Bry_find_.Not_found)
+ regex_valid = false;
+ else
+ cur = elem_end + 1;
+ break;
+ case Regex__free:
+ if (regex_boundary.Is_boundary_prv(src, cur)) {
+ int url_end = regex_url.Find_fwd_while(trv, src, nxt_pos, src_end);
+ if (url_end == nxt_pos) {
+ regex_valid = false;
+ }
+ else
+ cur = url_end;
+ }
+ else
+ regex_valid = false;
+ break;
+ }
+ if (!regex_valid) {
+ cur++;
+ }
+ else {
+ if (regex_tid == Regex__free) {
+ this.page_title = pctx.Page_title().Full_db();
+ dirty = true;
+ bfr.Add_mid(src, prv, old_pos);
+ this.Make_free_external_link(bfr, Bry_.Mid(src, old_pos, cur), 0);
+ prv = cur;
+ }
+ else {
+ }
+ }
+ }
+ if (dirty) {
+ pbfr.Switch();
+ }
+ }
- // Verify that we still have a real URL after trail removal, and
- // not just lone protocol
-// if (strlen($trail) >= $numPostProto) {
-// return $url . $trail;
-// }
+ // Make a free external link, given a user-supplied URL
+ public void Make_free_external_link(Bry_bfr bfr, byte[] url, int num_post_proto) {
+// byte[] trail = Bry_.Empty;
+
+ // The characters '<' and '>' (which were escaped by
+ // removeHTMLtags()) should not be included in
+ // URLs, per RFC 2396.
+ // Make terminate a URL as well (bug T84937)
+
+// $m2 = [];
+// if (preg_match(
+// '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
+// $url,
+// $m2,
+// PREG_OFFSET_CAPTURE
+// )) {
+// trail = substr($url, $m2[0][1]) . $trail;
+// $url = substr($url, 0, $m2[0][1]);
+// }
+
+ // Move trailing punctuation to $trail
+// $sep = ',;\.:!?';
+ // If there is no left bracket, then consider right brackets fair game too
+// if (strpos($url, '(') === false) {
+// $sep .= ')';
+// }
-// $url = Sanitizer::cleanUrl($url);
+// $urlRev = strrev($url);
+// $numSepChars = strspn($urlRev, $sep);
+ // Don't break a trailing HTML entity by moving the ; into $trail
+ // This is in hot code, so use substr_compare to avoid having to
+ // create a new String Object for the comparison
- // Is this an external image?
-// $text = $this->maybeMakeExternalImage($url);
-// if ($text === false) {
- // Not an image, make a link
-// $text = Linker::makeExternalLink($url,
-// $this->getConverterLanguage()->markNoConversion($url, true),
-// true, 'free',
-// $this->getExternalLinkAttribs($url), $this->mTitle);
- // Register it in the output Object...
- // Replace unnecessary URL escape codes with their equivalent characters
-// $pasteurized = self::normalizeLinkUrl($url);
-// $this->mOutput->addExternalLink($pasteurized);
+// if ($numSepChars && substr_compare($url, ";", -$numSepChars, 1) === 0) {
+ // more optimization: instead of running preg_match with a $
+ // anchor, which can be slow, do the match on the reversed
+ // String starting at the desired offset.
+ // un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
+// if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars)) {
+// $numSepChars--;
// }
-// return $text . $trail;
// }
-// }
-// }
+// if ($numSepChars) {
+// $trail = substr($url, -$numSepChars) . $trail;
+// $url = substr($url, 0, -$numSepChars);
+// }
+
+ // Verify that we still have a real URL after trail removal, and
+ // not just lone protocol
+// if (strlen($trail) >= $numPostProto) {
+// return $url . $trail;
+// }
+
+// $url = Sanitizer::cleanUrl($url);
+
+ // Is this an external image?
+ byte[] text = null; // $this->maybeMakeExternalImage($url);
+ if (text == null) {
+ // Not an image, make a link
+ linker.Make_external_link(bfr, url
+ , url // $this->getConverterLanguage()->markNoConversion($url, true),
+ , true, Bry_.new_a7("free")
+ , new Xomwh_atr_mgr() // $this->getExternalLinkAttribs($url)
+ , page_title);
+ // Register it in the output Object...
+ // Replace unnecessary URL escape codes with their equivalent characters
+// $pasteurized = self::normalizeLinkUrl($url);
+// $this->mOutput->addExternalLink($pasteurized);
+ }
+// return $text . $trail;
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr__tst.java b/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr__tst.java
new file mode 100644
index 000000000..842a00271
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr__tst.java
@@ -0,0 +1,45 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.mws.parsers.magiclinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+import org.junit.*;
+public class Xomw_magiclinks_wkr__tst {
+ private final Xomw_magiclinks_wkr__fxt fxt = new Xomw_magiclinks_wkr__fxt();
+ @Test public void Basic() {fxt.Test__parse("a https://b.org c", "a https://b.org c");}
+ @Test public void Invalid() {fxt.Test__parse("a _https://b.org c", "a _https://b.org c");}
+}
+class Xomw_magiclinks_wkr__fxt {
+ private final Xomw_magiclinks_wkr wkr = new Xomw_magiclinks_wkr();
+ private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
+ private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
+ private boolean apos = true;
+ public Xomw_magiclinks_wkr__fxt() {
+ Xoae_app app = Xoa_app_fxt.Make__app__edit();
+ Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
+
+ Xomw_regex_space regex_space = new Xomw_regex_space();
+ pctx.Init_by_page(wiki.Ttl_parse(Bry_.new_a7("Page_1")));
+ wkr.Init_by_wiki(new Xomw_linker(), new Xomw_regex_boundary(regex_space), new Xomw_regex_url(regex_space));
+ }
+ public void Test__parse(String src_str, String expd) {
+ byte[] src_bry = Bry_.new_u8(src_str);
+ pbfr.Init(src_bry);
+ wkr.Do_magic_links(pctx, pbfr);
+ if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
+ Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
+ }
+}