]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
+// '\\1\\3<div\\5>\\6</div>\\8\\9',
+// // remove empty italic or bold tag pairs, some
+// // introduced by rules above
+// '/<([bi])><\/\\1>/' => '',
+// ];
+
+// $text = preg_replace(
+// array_keys( $tidyregs ),
+// array_values( $tidyregs ),
+// $text );
+// }
+
+ // MW.HOOK:ParserAfterTidy
+ }
+ public byte[] Armor_links(Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
+ // PORTED:preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', self::MARKER_PREFIX . "NOPARSE$1", $text )
+ int cur = src_bgn;
+ int prv = cur;
+ boolean dirty = false;
+ boolean called_by_bry = trg == null;
+ while (true) {
+ // exit if EOS
+ if (cur == src_end) {
+ // if dirty, add rest of String
+ if (dirty)
+ trg.Add_mid(src, prv, src_end);
+ break;
+ }
+
+ // check if cur matches protocol
+ Object protocol_obj = protocols_trie.Match_at(trv, src, cur, src_end);
+ // no match; continue
+ if (protocol_obj == null) {
+ cur++;
+ }
+ // match; add to bfr
+ else {
+ dirty = true;
+ byte[] protocol_bry = (byte[])protocol_obj;
+ if (called_by_bry) trg = Bry_bfr_.New();
+ trg.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__noparse, protocol_bry);
+ cur += protocol_bry.length;
+ prv = cur;
+ }
+ }
+ if (called_by_bry) {
+ if (dirty)
+ return trg.To_bry_and_clear();
+ else {
+ if (src_bgn == 0 && src_end == src.length)
+ return src;
+ else
+ return Bry_.Mid(src, src_bgn, src_end);
+ }
+ }
+ else {
+ if (dirty)
+ return null;
+ else {
+ trg.Add_mid(src, src_bgn, src_end);
+ return null;
+ }
+ }
+ }
+ public byte[] Insert_strip_item(byte[] text) {
+ tmp.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__strip_state_item);
+ tmp.Add_int_variable(marker_index);
+ tmp.Add(Xomw_strip_state.Bry__marker__end);
+ byte[] marker = tmp.To_bry_and_clear();
+ marker_index++;
+ strip_state.Add_general(marker, text);
+ return marker;
+ }
+ private static final byte[] Bry__strip_state_item = Bry_.new_a7("-item-"), Bry__noparse = Bry_.new_a7("NOPARSE");
+ private static final byte[] Bry__marker__noparse = Bry_.Add(Xomw_strip_state.Bry__marker__bgn, Bry__noparse);
+ public static Btrie_slim_mgr Protocols__dflt() {
+ Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
+ Gfo_protocol_itm[] ary = Gfo_protocol_itm.Ary();
+ for (Gfo_protocol_itm itm : ary) {
+ byte[] key = itm.Text_bry(); // EX: "https://"
+ rv.Add_obj(key, key);
+ }
+ return rv;
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser__tst.java b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser__tst.java
new file mode 100644
index 000000000..1fd74535b
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser__tst.java
@@ -0,0 +1,72 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import org.junit.*;
+public class Xomw_parser__tst {
+ private final Xomw_parser__fxt fxt = new Xomw_parser__fxt();
+ @Test public void Basic() {
+ fxt.Test__parse(String_.Concat_lines_nl_skip_last
+ ( "== heading_1 =="
+ , "para_1"
+ , "== heading_2 =="
+ , "para_2"
+ , "-----"
+ , "{|"
+ , "|-"
+ , "|a"
+ , "|}"
+ , "''italics''"
+ , "[https://a.org b]"
+ , "[[A|abc]]"
+ , "a »b« !important c"
+ ), String_.Concat_lines_nl_skip_last
+ ( "
heading_1
"
+ , "
para_1"
+ , "
"
+ , "
heading_2
"
+ , "
para_2"
+ , "
"
+ , "
"
+ , "
"
+ , ""
+ , ""
+ , "a"
+ , " |
"
+ , "
italics"
+ , "b"
+ , "abc"
+ , "a »b« !important c"
+ , "
"
+ ));
+ }
+}
+class Xomw_parser__fxt {
+ private final Xomw_parser mgr = new Xomw_parser();
+ private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
+ public Xomw_parser__fxt() {
+ Xoae_app app = Xoa_app_fxt.Make__app__edit();
+ Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
+ mgr.Init_by_wiki(wiki);
+ }
+ public void Test__parse(String src_str, String expd) {
+ byte[] src_bry = Bry_.new_u8(src_str);
+ mgr.Internal_parse(pbfr, src_bry);
+ mgr.Internal_parse_half_parsed(pbfr, true, true);
+ Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_bfr.java b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_bfr.java
new file mode 100644
index 000000000..457d29ab8
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_bfr.java
@@ -0,0 +1,48 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+public class Xomw_parser_bfr { // manages 2 bfrs to eliminate multiple calls to new memory allocations ("return bfr.To_bry_and_clear()")
+ private final Bry_bfr bfr_1 = Bry_bfr_.New(), bfr_2 = Bry_bfr_.New();
+ private Bry_bfr src, trg;
+ public Xomw_parser_bfr() {
+ this.src = bfr_1;
+ this.trg = bfr_2;
+ }
+ public Bry_bfr Src() {return src;}
+ public Bry_bfr Trg() {return trg;}
+ public Bry_bfr Rslt() {return src;}
+ public Xomw_parser_bfr Init(byte[] text) {
+ // resize each bfr once by guessing that html_len = text_len * 2
+ int text_len = text.length;
+ int html_len = text_len * 2;
+ src.Resize(html_len);
+ trg.Resize(html_len);
+
+ // clear and add
+ src.Clear();
+ trg.Clear();
+ src.Add(text);
+ return this;
+ }
+ public void Switch() {
+ Bry_bfr tmp = src;
+ this.src = trg;
+ this.trg = tmp;
+ trg.Clear();
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_bfr_.java b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_bfr_.java
new file mode 100644
index 000000000..b27219230
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_bfr_.java
@@ -0,0 +1,69 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+public class Xomw_parser_bfr_ {
+ public static void Replace(Xomw_parser_bfr pbfr, byte[] find, byte[] repl) {
+ // XO.PBFR
+ Bry_bfr src_bfr = pbfr.Src();
+ byte[] src = src_bfr.Bfr();
+ int src_bgn = 0;
+ int src_end = src_bfr.Len();
+ Bry_bfr bfr = pbfr.Trg();
+
+ if (Replace(bfr, Bool_.N, src, src_bgn, src_end, find, repl) != null)
+ pbfr.Switch();
+ }
+ private static byte[] Replace(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) {
+ boolean dirty = false;
+ int cur = src_bgn;
+ boolean called_by_bry = bfr == null;
+
+ while (true) {
+ int find_bgn = Bry_find_.Find_fwd(src, find, cur);
+ if (find_bgn == Bry_find_.Not_found) {
+ if (dirty)
+ bfr.Add_mid(src, cur, src_end);
+ break;
+ }
+ if (called_by_bry) bfr = Bry_bfr_.New();
+ bfr.Add_mid(src, cur, find_bgn);
+ cur += find.length;
+ dirty = true;
+ }
+
+ if (dirty) {
+ if (called_by_bry)
+ return bfr.To_bry_and_clear();
+ else
+ return Bry_.Empty;
+ }
+ else {
+ if (called_by_bry) {
+ if (src_bgn == 0 && src_end == src.length)
+ return src;
+ else
+ return Bry_.Mid(src, src_bgn, src_end);
+ }
+ else {
+ if (lone_bfr)
+ bfr.Add_mid(src, src_bgn, src_end);
+ return null;
+ }
+ }
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/Xomw_parser_ctx.java b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_ctx.java
similarity index 63%
rename from 400_xowa/src/gplx/xowa/parsers/mws/Xomw_parser_ctx.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_ctx.java
index 60ebb57c7..3afa3dbd2 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/Xomw_parser_ctx.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_parser_ctx.java
@@ -15,13 +15,13 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see
.
*/
-package gplx.xowa.parsers.mws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*;
-import gplx.xowa.parsers.htmls.*;
-import gplx.xowa.parsers.mws.utils.*;
-import gplx.xowa.parsers.uniqs.*;
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
public class Xomw_parser_ctx {
- public Xomw_sanitizer_mgr Sanitizer() {return sanitizer;} private final Xomw_sanitizer_mgr sanitizer = new Xomw_sanitizer_mgr();
- public Xop_uniq_mgr Uniq_mgr() {return uniq_mgr;} private final Xop_uniq_mgr uniq_mgr = new Xop_uniq_mgr();
-
+ public Xoa_ttl Page_title() {return page_title;} private Xoa_ttl page_title;
+
+ public void Init_by_page(Xoa_ttl page_title) {
+ this.page_title = page_title;
+ }
+
public static final int Pos__bos = -1;
}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_strip_state.java b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_strip_state.java
new file mode 100644
index 000000000..01e71eb69
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_strip_state.java
@@ -0,0 +1,139 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import gplx.core.btries.*;
+public class Xomw_strip_state { // REF.MW:/parser/StripState.php
+ private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
+ private final Btrie_rv trv = new Btrie_rv();
+ private final Bry_bfr tmp_1 = Bry_bfr_.New();
+ private final Bry_bfr tmp_2 = Bry_bfr_.New();
+ private boolean tmp_2_used = false;
+ private int general_len, nowiki_len;
+ public void Clear() {
+ trie.Clear();
+ general_len = nowiki_len = 0;
+ tmp_2_used = false;
+ }
+ public void Add_general(byte[] marker, byte[] val) {Add_item(Tid__general, marker, val);}
+ public void Add_nowiki (byte[] marker, byte[] val) {Add_item(Tid__nowiki, marker, val);}
+ public void Add_item(byte tid, byte[] marker, byte[] val) {
+ trie.Add_obj(marker, new Xomw_strip_item(tid, marker, val));
+ if (tid == Tid__general)
+ general_len++;
+ else
+ nowiki_len++;
+ }
+ public byte[] Unstrip_general(byte[] text) {return Unstrip(Tid__general, text);}
+ public byte[] Unstrip_nowiki (byte[] text) {return Unstrip(Tid__nowiki , text);}
+ public byte[] Unstrip_both (byte[] text) {return Unstrip(Tid__both , text);}
+ public byte[] Unstrip(byte tid, byte[] text) {
+ boolean dirty = Unstrip(tid, tmp_1, text, 0, text.length);
+ return dirty ? tmp_1.To_bry_and_clear() : text;
+ }
+ public void Unstrip_general(Xomw_parser_bfr pbfr) {Unstrip(Tid__general, pbfr);}
+ public void Unstrip_nowiki (Xomw_parser_bfr pbfr) {Unstrip(Tid__nowiki , pbfr);}
+ public void Unstrip_both (Xomw_parser_bfr pbfr) {Unstrip(Tid__both , pbfr);}
+ private boolean Unstrip(byte tid, Xomw_parser_bfr pbfr) {
+ // XO.PBFR
+ Bry_bfr src_bfr = pbfr.Src();
+ byte[] src = src_bfr.Bfr();
+ boolean dirty = Unstrip(tid, pbfr.Trg(), src, 0, src_bfr.Len());
+ if (dirty)
+ pbfr.Switch();
+ return dirty;
+ }
+ private boolean Unstrip(byte tid, Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
+ // exit early if no items for type
+ if ((tid & Tid__general) == Tid__general) {
+ if (general_len == 0)
+ return false;
+ }
+ else if ((tid & Tid__nowiki) == Tid__nowiki) {
+ if (nowiki_len == 0)
+ return false;
+ }
+
+ int cur = src_bgn;
+ int prv = cur;
+ boolean dirty = false;
+ // loop over each src char
+ while (true) {
+ // EOS: exit
+ if (cur == src_end) {
+ if (dirty) // add remainder if dirty
+ trg.Add_mid(src, prv, src_end);
+ break;
+ }
+
+ // check if current pos matches strip state
+ Object o = trie.Match_at(trv, src, cur, src_end);
+ if (o != null) { // match
+ Xomw_strip_item item = (Xomw_strip_item)o;
+ byte item_tid = item.Tid();
+ if ((tid & item_tid) == item_tid) { // check if types match
+ // get bfr for recursion
+ Bry_bfr nested_bfr = null;
+ boolean tmp_2_release = false;
+ if (tmp_2_used) {
+ nested_bfr = Bry_bfr_.New();
+ }
+ else {
+ nested_bfr = tmp_2;
+ tmp_2_used = true;
+ tmp_2_release = true;
+ }
+
+ // recurse
+ byte[] item_val = item.Val();
+ if (Unstrip(tid, nested_bfr, item_val, 0, item_val.length))
+ item_val = nested_bfr.To_bry_and_clear();
+ if (tmp_2_release)
+ tmp_2_used = false;
+
+ // add to trg
+ trg.Add_mid(src, prv, cur);
+ trg.Add(item_val);
+
+ // update vars
+ dirty = true;
+ cur += item.Key().length;
+ prv = cur;
+ continue;
+ }
+ }
+ cur++;
+ }
+ return dirty;
+ }
+ public static final String Str__marker_bgn = "\u007f'\"`UNIQ-";
+ public static final byte[]
+ Bry__marker__bgn = Bry_.new_a7(Str__marker_bgn)
+ , Bry__marker__end = Bry_.new_a7("-QINU`\"'\u007f")
+ ;
+ public static final byte Tid__general = 1, Tid__nowiki = 2, Tid__both = 3;
+}
+class Xomw_strip_item {
+ public Xomw_strip_item(byte tid, byte[] key, byte[] val) {
+ this.tid = tid;
+ this.key = key;
+ this.val = val;
+ }
+ public byte Tid() {return tid;} private final byte tid;
+ public byte[] Key() {return key;} private final byte[] key;
+ public byte[] Val() {return val;} private final byte[] val;
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/Xomw_strip_state__tst.java b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_strip_state__tst.java
new file mode 100644
index 000000000..3e7140995
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/Xomw_strip_state__tst.java
@@ -0,0 +1,44 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import org.junit.*; import gplx.core.tests.*;
+public class Xomw_strip_state__tst {
+ private final Xomw_strip_state__fxt fxt = new Xomw_strip_state__fxt();
+ @Test public void Basic() {
+ fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
+ fxt.Test__nostrip(Xomw_strip_state.Tid__nowiki , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b");
+ fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
+ fxt.Test__unstrip(Xomw_strip_state.Tid__both , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
+ }
+ @Test public void Recurse() {
+ fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
+ fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-2-QINU`\"'\u007f", "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f");
+ fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-2-QINU`\"'\u007f b", "a val-1 b");
+ }
+}
+class Xomw_strip_state__fxt {
+ private final Xomw_strip_state strip_state = new Xomw_strip_state();
+ public void Init__add(byte tid, String marker, String val) {
+ strip_state.Add_item(tid, Bry_.new_u8(marker), Bry_.new_u8(val));
+ }
+ public void Test__nostrip(byte tid, String src) {Test__unstrip(tid, src, src);}
+ public void Test__unstrip(byte tid, String src, String expd) {
+ byte[] actl = strip_state.Unstrip(tid, Bry_.new_u8(src));
+ Gftest.Eq__str(expd, String_.new_u8(actl));
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/doubleunders/Xomw_doubleunder_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/doubleunders/Xomw_doubleunder_wkr.java
new file mode 100644
index 000000000..accd90101
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/doubleunders/Xomw_doubleunder_wkr.java
@@ -0,0 +1,84 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+class Xomw_doubleunder_wkr {
+ public boolean show_toc;
+ public boolean force_toc_position;
+ public boolean output__no_gallery ;
+ public Xomw_doubleunder_data doubleunderscore_data = new Xomw_doubleunder_data();
+ private void Match_and_remove(byte[] text, Xomw_doubleunder_data doubleunderscore_data) {
+ doubleunderscore_data.Reset();
+ }
+ public void Do_double_underscore(byte[] text) {
+ // The position of __TOC__ needs to be recorded
+// $mw = MagicWord::get( 'toc' );
+// if ( $mw->match( $text ) ) {
+ this.show_toc = true;
+ this.force_toc_position = true;
+
+ // Set a placeholder. At the end we'll fill it in with the TOC.
+// $text = $mw->replace( '', $text, 1 );
+
+ // Only keep the first one.
+// $text = $mw->replace( '', $text );
+// }
+
+ // Now match and remove the rest of them
+// $mwa = MagicWord::getDoubleUnderscoreArray();
+ Match_and_remove(text, doubleunderscore_data);
+
+ if (doubleunderscore_data.no_gallery) {
+ output__no_gallery = true;
+ }
+ if (doubleunderscore_data.no_toc && !force_toc_position) {
+ this.show_toc = false;
+ }
+ if ( doubleunderscore_data.hidden_cat
+ // && $this->mTitle->getNamespace() == NS_CATEGORY
+ ) {
+ //$this->addTrackingCategory( 'hidden-category-category' );
+ }
+ // (T10068) Allow control over whether robots index a page.
+ // __INDEX__ always overrides __NOINDEX__, see T16899
+ if (doubleunderscore_data.no_index // && $this->mTitle->canUseNoindex()
+ ) {
+ // $this->mOutput->setIndexPolicy( 'noindex' );
+ // $this->addTrackingCategory( 'noindex-category' );
+ }
+ if (doubleunderscore_data.index //&& $this->mTitle->canUseNoindex()
+ ) {
+ // $this->mOutput->setIndexPolicy( 'index' );
+ // $this->addTrackingCategory( 'index-category' );
+ }
+
+ // Cache all double underscores in the database
+ // foreach ( $this->mDoubleUnderscores as $key => $val ) {
+ // $this->mOutput->setProperty( $key, '' );
+ // }
+ }
+}
+class Xomw_doubleunder_data {
+ public boolean no_gallery;
+ public boolean no_toc;
+ public boolean hidden_cat;
+ public boolean no_index;
+ public boolean index;
+ public void Reset() {
+ no_gallery = no_toc = hidden_cat = no_index = index = false;
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/headings/Xomw_heading_cbk.java b/400_xowa/src/gplx/xowa/mws/parsers/headings/Xomw_heading_cbk.java
new file mode 100644
index 000000000..b89ff670e
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/headings/Xomw_heading_cbk.java
@@ -0,0 +1,22 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+public interface Xomw_heading_cbk {
+ void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr);
+ void On_src_done(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr);
+}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/wkrs/Xomw_hdr_cbk__html.java b/400_xowa/src/gplx/xowa/mws/parsers/headings/Xomw_heading_cbk__html.java
similarity index 76%
rename from 400_xowa/src/gplx/xowa/parsers/mws/wkrs/Xomw_hdr_cbk__html.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/headings/Xomw_heading_cbk__html.java
index 12029bd30..80619cec5 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/wkrs/Xomw_hdr_cbk__html.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/headings/Xomw_heading_cbk__html.java
@@ -15,10 +15,14 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see
.
*/
-package gplx.xowa.parsers.mws.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
-public class Xomw_hdr_cbk__html implements Xomw_hdr_cbk {
- public Bry_bfr Bfr() {return bfr;} private final Bry_bfr bfr = Bry_bfr_.New();
- public void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_hdr_wkr wkr) {
+package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+public class Xomw_heading_cbk__html implements Xomw_heading_cbk {
+ public Bry_bfr Bfr() {return bfr;} private Bry_bfr bfr;
+ public Xomw_heading_cbk__html Bfr_(Bry_bfr bfr) {
+ this.bfr = bfr;
+ return this;
+ }
+ public void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {
// add from txt_bgn to hdr_bgn; EX: "abc\n==A==\n"; "\n==" seen -> add "abc"
byte[] src = wkr.Src();
int hdr_bgn = wkr.Hdr_bgn(), txt_bgn = wkr.Txt_bgn();
@@ -34,7 +38,7 @@ public class Xomw_hdr_cbk__html implements Xomw_hdr_cbk {
bfr.Add_mid(wkr.Src(), wkr.Hdr_lhs_end(), wkr.Hdr_rhs_bgn());
bfr.Add(Tag__rhs).Add_int_digits(1, hdr_num).Add(Byte_ascii.Angle_end_bry); //
}
- public void On_src_done(Xomw_parser_ctx pctx, Xomw_hdr_wkr wkr) {
+ public void On_src_done(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {
// add from txt_bgn to EOS;
byte[] src = wkr.Src();
int txt_bgn = wkr.Txt_bgn(), src_end = wkr.Src_end();
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/wkrs/Xomw_hdr_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/headings/Xomw_heading_wkr.java
similarity index 80%
rename from 400_xowa/src/gplx/xowa/parsers/mws/wkrs/Xomw_hdr_wkr.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/headings/Xomw_heading_wkr.java
index da63bfa8b..f71fbd52e 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/wkrs/Xomw_hdr_wkr.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/headings/Xomw_heading_wkr.java
@@ -15,11 +15,11 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see
.
*/
-package gplx.xowa.parsers.mws.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.core.btries.*; import gplx.xowa.langs.*;
-public class Xomw_hdr_wkr {
+public class Xomw_heading_wkr {
private Xomw_parser_ctx pctx;
- private Xomw_hdr_cbk cbk;
+ private Xomw_heading_cbk cbk;
public byte[] Src() {return src;} private byte[] src;
public int Src_end() {return src_end;} private int src_end;
public int Txt_bgn() {return txt_bgn;} private int txt_bgn;
@@ -30,13 +30,27 @@ public class Xomw_hdr_wkr {
public int Hdr_lhs_end() {return hdr_lhs_end;} private int hdr_lhs_end;
public int Hdr_rhs_bgn() {return hdr_rhs_bgn;} private int hdr_rhs_bgn;
public int Hdr_rhs_end() {return hdr_rhs_end;} private int hdr_rhs_end;
- public void Parse(Xomw_parser_ctx pctx, byte[] src, int src_bgn, int src_end, Xomw_hdr_cbk cbk) { // REF.MW: /includes/parser/Parser.php|doHeadings
+ public void Do_headings(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, Xomw_heading_cbk__html cbk) {
+ Bry_bfr src_bfr = pbfr.Src();
+ byte[] src_bry = src_bfr.Bfr();
+ int src_end = src_bfr.Len();
+ cbk.Bfr_(pbfr.Trg());
+ pbfr.Switch();
+ Parse(pctx, src_bry, 0, src_end, cbk);
+ }
+ public void Parse(Xomw_parser_ctx pctx, byte[] src, int src_bgn, int src_end, Xomw_heading_cbk cbk) { // REF.MW: /includes/parser/Parser.php|doHeadings
// init members
this.pctx = pctx;
this.src = src;
this.src_end = src_end;
this.cbk = cbk;
+ // PORTED:
+ // for ( $i = 6; $i >= 1; --$i ) {
+ // $h = str_repeat( '=', $i );
+ // $text = preg_replace( "/^$h(.+)$h\\s*$/m", "
\\1", $text );
+ // }
+
// do loop
int pos = src_bgn;
this.txt_bgn = pos == Xomw_parser_ctx.Pos__bos ? 0 : pos;
@@ -92,7 +106,3 @@ public class Xomw_hdr_wkr {
return nl_rhs;
}
}
-// for ( $i = 6; $i >= 1; --$i ) {
-// $h = str_repeat( '=', $i );
-// $text = preg_replace( "/^$h(.+)$h\\s*$/m", "
\\1", $text );
-// }
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/wkrs/Xomw_hdr_wkr_tst.java b/400_xowa/src/gplx/xowa/mws/parsers/headings/Xomw_heading_wkr__tst.java
similarity index 74%
rename from 400_xowa/src/gplx/xowa/parsers/mws/wkrs/Xomw_hdr_wkr_tst.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/headings/Xomw_heading_wkr__tst.java
index c089ba8bf..81e86f911 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/wkrs/Xomw_hdr_wkr_tst.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/headings/Xomw_heading_wkr__tst.java
@@ -15,10 +15,10 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see
.
*/
-package gplx.xowa.parsers.mws.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*;
-public class Xomw_hdr_wkr_tst {
- private final Xomw_hdr_wkr_fxt fxt = new Xomw_hdr_wkr_fxt();
+public class Xomw_heading_wkr__tst {
+ private final Xomw_heading_wkr__fxt fxt = new Xomw_heading_wkr__fxt();
@Test public void Basic() {
fxt.Test__parse("==A==" , "
A
");
fxt.Test__parse("abc\n==A==\ndef" , "abc\n
A
\ndef");
@@ -28,10 +28,11 @@ public class Xomw_hdr_wkr_tst {
fxt.Test__parse("abc\n==" , "abc\n
");
}
}
-class Xomw_hdr_wkr_fxt {
- private final Xomw_hdr_wkr wkr = new Xomw_hdr_wkr();
- private final Xomw_hdr_cbk__html cbk = new Xomw_hdr_cbk__html();
+class Xomw_heading_wkr__fxt {
+ private final Xomw_heading_wkr wkr = new Xomw_heading_wkr();
+ private final Xomw_heading_cbk__html cbk = new Xomw_heading_cbk__html().Bfr_(Bry_bfr_.New());
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
+
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Parse(pctx, src_bry, -1, src_bry.length, cbk);
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/hrs/Xomw_hr_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/hrs/Xomw_hr_wkr.java
new file mode 100644
index 000000000..b19fef885
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/hrs/Xomw_hr_wkr.java
@@ -0,0 +1,81 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers.hrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+import gplx.langs.phps.utls.*;
+public class Xomw_hr_wkr {// THREAD.UNSAFE: caching for repeated calls
+ private Bry_bfr bfr;
+ public void Replace_hrs(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) { // REF.MW: text = preg_replace('/(^|\n)-----*/', '\\1
', text);
+ // XO.PBFR
+ Bry_bfr src_bfr = pbfr.Src();
+ byte[] src = src_bfr.Bfr();
+ int src_bgn = 0;
+ int src_end = src_bfr.Len();
+ this.bfr = pbfr.Trg();
+
+ boolean dirty = false;
+
+ // do separate check for "-----" at start of String;
+ int cur = 0;
+ if (Bry_.Eq(src, 0, Len__wtxt__hr__bos, Bry__wtxt__hr__bos)) {
+ cur = Replace_hr(Bool_.N, src, src_bgn, src_end, 0, Len__wtxt__hr__bos);
+ dirty = true;
+ }
+
+ // loop
+ while (true) {
+ // find next "\n-----"
+ int find_bgn = Bry_find_.Find_fwd(src, Bry__wtxt__hr__mid, cur, src_end);
+
+ // nothing found; exit
+ if (find_bgn == Bry_find_.Not_found) {
+ if (dirty) {
+ bfr.Add_mid(src, cur, src_end);
+ }
+ break;
+ }
+
+ // something found
+ cur = Replace_hr(Bool_.Y, src, cur, src_end, find_bgn, Len__wtxt__hr__mid);
+ dirty = true;
+ }
+ if (dirty)
+ pbfr.Switch();
+ }
+ private int Replace_hr(boolean mid, byte[] src, int cur, int src_end, int find_bgn, int tkn_len) {
+ // something found; add to bfr
+ if (mid) {
+ bfr.Add_mid(src, cur, find_bgn); // add everything before "\n-----"
+ bfr.Add_byte_nl();
+ }
+ bfr.Add(Bry__html__hr);
+
+ // set dirty / cur and continue
+ cur = find_bgn + tkn_len;
+ cur = Bry_find_.Find_fwd_while(src, cur, src_end, Byte_ascii.Dash); // gobble up trailing "-"; the "*" in "-----*" from the regex above
+ return cur;
+ }
+ private static final byte[]
+ Bry__wtxt__hr__mid = Bry_.new_a7("\n-----")
+ , Bry__wtxt__hr__bos = Bry_.new_a7("-----")
+ , Bry__html__hr = Bry_.new_a7("
")
+ ;
+ private static final int
+ Len__wtxt__hr__mid = Bry__wtxt__hr__mid.length
+ , Len__wtxt__hr__bos = Bry__wtxt__hr__bos.length
+ ;
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/hrs/Xomw_hr_wkr__tst.java b/400_xowa/src/gplx/xowa/mws/parsers/hrs/Xomw_hr_wkr__tst.java
new file mode 100644
index 000000000..c51641d9b
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/hrs/Xomw_hr_wkr__tst.java
@@ -0,0 +1,36 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers.hrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+import org.junit.*;
+public class Xomw_hr_wkr__tst {
+ private final Xomw_hr_wkr__fxt fxt = new Xomw_hr_wkr__fxt();
+ @Test public void Basic() {fxt.Test__parse("a\n-----b" , "a\n
b");}
+ @Test public void Extend() {fxt.Test__parse("a\n------b" , "a\n
b");}
+ @Test public void Not_found() {fxt.Test__parse("a\n----b" , "a\n----b");}
+ @Test public void Bos() {fxt.Test__parse("-----a" , "
a");}
+ @Test public void Bos_and_mid() {fxt.Test__parse("-----a\n-----b" , "
a\n
b");}
+}
+class Xomw_hr_wkr__fxt {
+ private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
+ private final Xomw_hr_wkr wkr = new Xomw_hr_wkr();
+ public void Test__parse(String src_str, String expd) {
+ byte[] src_bry = Bry_.new_u8(src_str);
+ wkr.Replace_hrs(new Xomw_parser_ctx(), pbfr.Init(src_bry));
+ Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java
new file mode 100644
index 000000000..8c0311721
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java
@@ -0,0 +1,282 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+import gplx.core.btries.*; import gplx.core.primitives.*;
+import gplx.langs.phps.utls.*;
+import gplx.xowa.mws.htmls.*;
+// TODO.XO: add proto-rel; EX: [//a.org b]
+public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
+ private final Bry_bfr tmp;
+ private Btrie_slim_mgr protocol_trie; private final Btrie_rv trv = new Btrie_rv();
+ private int autonumber;
+ private final Xomw_linker linker;
+ private final Xomwh_atr_mgr attribs = new Xomwh_atr_mgr();
+ public Xomw_lnke_wkr(Xomw_parser mgr) {
+ this.tmp = mgr.Tmp();
+ this.linker = mgr.Linker();
+ }
+ public void Init_by_wiki(Btrie_slim_mgr protocol_trie) {
+ this.protocol_trie = protocol_trie;
+ }
+ public void Replace_external_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
+ // XO.PBFR
+ Bry_bfr src_bfr = pbfr.Src();
+ byte[] src = src_bfr.Bfr();
+ int src_bgn = 0;
+ int src_end = src_bfr.Len();
+ Bry_bfr bfr = pbfr.Trg();
+ pbfr.Switch();
+
+ int cur = src_bgn;
+ this.autonumber = 1;
+
+ // find regex
+ int prv = 0;
+ while (true) {
+ // PORTED.BGN: $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
+
+ // $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
+ // self::EXT_LINK_ADDR .
+ // self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
+ //
+ // REGEX: "[" + "protocol" + "url-char"* + "space"* + "text"* + "]";
+ // protocol -> ((?i)' . $this->mUrlProtocols . ') -> "http://", "HTTps://"
+ // url-char* -> (EXT_LINK_ADDR . EXT_LINK_URL_CLASS*) -> "255.255.255.255", "a.b.c"; NOTE: "http:///" is valid
+ // space* -> \p{Zs}*
+ // text -> ([^\]\\x00-\\x08\\x0a-\\x1F]*?) -> "abcd"
+ // NOTE: /S=extra analysis of pattern /u = unicode support; REF.MW:http://php.net/manual/en/reference.pcre.pattern.modifiers.php
+
+ // Simplified expression to match an IPv4 or IPv6 address, or
+ // at least one character of a host name (embeds EXT_LINK_URL_CLASS)
+ // static final EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
+ //
+ // REGEX: "IPv4" | "IPv6" | "url-char"
+ // IPv4 -> [0-9.]+ -> "255."
+ // IPv6 -> \\[(?i:[0-9a-f:.]+)\\] -> "2001:"
+ // url-char -> [^][<>"\\x00-\\x20\\x7F\p{Zs}] -> "abcde"
+
+ // Constants needed for external link processing
+ // Everything except bracket, space, or control characters
+ // \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
+ // as well as U+3000 is IDEOGRAPHIC SPACE for T21052
+ // static final EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
+ //
+ // REGEX: NOT [ "symbols" | "control" | "whitespace" ]
+ // symbols -> ^][<>"
+ // control -> \\x00-\\x20\\x7F
+ // whitespace -> \p{Zs}
+
+ // search for "["
+ int lnke_bgn = Bry_find_.Find_fwd(src, Byte_ascii.Brack_bgn, cur, src_end);
+ if (lnke_bgn == Bry_find_.Not_found) {
+ bfr.Add_mid(src, cur, src_end);
+ break; // no more "["; stop
+ }
+
+ // check for protocol; EX: "https://"
+ cur = lnke_bgn + 1;
+ int url_bgn = cur;
+ Object protocol_bry = protocol_trie.Match_at(trv, src, cur, src_end);
+ if (protocol_bry == null) {
+ bfr.Add_mid(src, prv, cur);
+ prv = cur;
+ continue;// unknown protocol; ignore "["
+ }
+ cur += ((byte[])protocol_bry).length;
+
+ // check for one-or-more url chars; [^][<>"\\x00-\\x20\\x7F\p{Zs}]
+ int domain_bgn = cur;
+ while (true) {
+ byte b = src[cur];
+ Object url_char_byte = invalid_url_chars_trie.Match_at_w_b0(trv, b, src, cur, src_end);
+ if (url_char_byte == null)
+ cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
+ else
+ break;
+ }
+ if (cur - domain_bgn == 0) {
+ bfr.Add_mid(src, prv, cur);
+ prv = cur;
+ continue; // no chars found; invalid; EX: "[https://"abcde"]"
+ }
+ int url_end = cur;
+
+ // get ws (if any)
+ int ws_bgn = -1;
+ while (true) {
+ Object space_byte = space_chars_trie.Match_at(trv, src, cur, src_end);
+ if (space_byte == null) break;
+ if (ws_bgn == -1) ws_bgn = cur;
+ cur += ((Int_obj_val)space_byte).Val();
+ }
+
+ // get text (if any)
+ int text_bgn = -1, text_end = -1;
+ while (true) {
+ byte b = src[cur];
+ Object invalid_text_char = invalid_text_chars_trie.Match_at_w_b0(trv, b, src, cur, src_end);
+ if (invalid_text_char != null) break;
+ if (text_bgn == -1) text_bgn = cur;
+ cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
+ text_end = cur;
+ }
+
+ // check for "]"
+ if (src[cur] != Byte_ascii.Brack_end) {
+ bfr.Add_mid(src, prv, cur);
+ prv = cur;
+ continue;
+ }
+ cur++;
+ // PORTED.END: $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
+
+ // The characters '<' and '>' (which were escaped by
+ // removeHTMLtags()) should not be included in
+ // URLs, per RFC 2396.
+ // TODO.XO:
+ //$m2 = [];
+ //if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
+ // $text = substr( $url, $m2[0][1] ) . ' ' . $text;
+ // $url = substr( $url, 0, $m2[0][1] );
+ //}
+
+ // If the link text is an image URL, replace it with an
tag
+ // This happened by accident in the original parser, but some people used it extensively
+ // TODO.XO:
+ //$img = $this->maybeMakeExternalImage( $text );
+ //if ( $img !== false ) {
+ // $text = $img;
+ //}
+ //
+ //$dtrail = '';
+
+ // Set linktype for CSS - if URL==text, link is essentially free
+ boolean text_missing = text_bgn == -1;
+ byte[] link_type = text_missing ? Link_type__free : Link_type__text;
+
+ // No link text, e.g. [http://domain.tld/some.link]
+ if (text_missing) {
+ // Autonumber; EX: "[123]"
+ tmp.Add_byte(Byte_ascii.Brack_bgn);
+ tmp.Add_int_variable(autonumber++); // TODO.XO:$langObj->formatNum( ++$this->mAutonumber );
+ tmp.Add_byte(Byte_ascii.Brack_end);
+ link_type = Link_type__autonumber;
+ }
+ else {
+ // Have link text, e.g. [http://domain.tld/some.link text]s
+ // Check for trail
+ // TODO.XO:
+ // list( $dtrail, $trail ) = Linker::splitTrail( $trail );
+ }
+
+ // TODO.XO:
+ // $text = $this->getConverterLanguage()->markNoConversion( $text );
+
+ // TODO.XO:
+ // $url = Sanitizer::cleanUrl( $url );
+
+ bfr.Add_mid(src, prv, lnke_bgn);
+ prv = cur;
+ // Use the encoded URL
+ // This means that users can paste URLs directly into the text
+ // Funny characters like � aren't valid in URLs anyway
+ // This was changed in August 2004
+ // TODO.XO:getExternalLinkAttribs
+ attribs.Clear();
+ linker.Make_external_link(bfr, Bry_.Mid(src, url_bgn, url_end), Bry_.Mid(src, text_bgn, text_end), Bool_.N, link_type, attribs, Bry_.Empty);
+
+ // Register link in the output Object.
+ // Replace unnecessary URL escape codes with the referenced character
+ // This prevents spammers from hiding links from the filters
+ // $pasteurized = self::normalizeLinkUrl( $url );
+ // $this->mOutput->addExternalLink( $pasteurized );
+ }
+ }
+// public function getExternalLinkAttribs( $url ) {
+// $attribs = [];
+// $rel = self::getExternalLinkRel( $url, $this->mTitle );
+//
+// $target = $this->mOptions->getExternalLinkTarget();
+// if ( $target ) {
+// $attribs['target'] = $target;
+// if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
+// // T133507. New windows can navigate parent cross-origin.
+// // Including noreferrer due to lacking browser
+// // support of noopener. Eventually noreferrer should be removed.
+// if ( $rel !== '' ) {
+// $rel .= ' ';
+// }
+// $rel .= 'noreferrer noopener';
+// }
+// }
+// $attribs['rel'] = $rel;
+// return $attribs;
+// }
+// public static function getExternalLinkRel( $url = false, $title = null ) {
+// global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
+// $ns = $title ? $title->getNamespace() : false;
+// if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
+// && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
+// ) {
+// return 'nofollow';
+// }
+// return null;
+// }
+
+ private static final byte[]
+ Link_type__free = Bry_.new_a7("free")
+ , Link_type__text = Bry_.new_a7("text")
+ , Link_type__autonumber = Bry_.new_a7("autonumber")
+ ;
+
+ private static final Btrie_slim_mgr
+ invalid_url_chars_trie = New__invalid_url_chars_trie()
+ , space_chars_trie = New__space_chars_trie()
+ , invalid_text_chars_trie = New__invalid_text_chars_trie()
+ ;
+ private static Btrie_slim_mgr New__invalid_url_chars_trie() { // REGEX:[^][<>"\\x00-\\x20\\x7F\p{Zs}]; NOTE: val is just a marker
+ Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
+ rv.Add_str_byte__many(Byte_.Zero, "[", "]", "<", ">", "\"");
+ for (byte i = 0; i < 33; i++) {
+ rv.Add_bry_byte(new byte[] {i}, Byte_.Zero);
+ }
+ rv.Add_bry_byte(Bry_.New_by_ints(127), Byte_.Zero); // x7F
+ rv.Add_bry_byte(Bry_.New_by_ints(227, 128, 128), Byte_.Zero); // \p{Zs} // e3 80 80; https://phabricator.wikimedia.org/T21052
+ return rv;
+ }
+ private static Btrie_slim_mgr New__space_chars_trie() { // REGEX:\p{Zs}; NOTE: val is key.length
+ Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
+ New__trie_itm__by_len(rv, 32);
+ New__trie_itm__by_len(rv, 227, 128, 128); // \p{Zs} // e3 80 80; https://phabricator.wikimedia.org/T21052
+ return rv;
+ }
+ private static Btrie_slim_mgr New__invalid_text_chars_trie() { // REGEX:([^\]\\x00-\\x08\\x0a-\\x1F]*?); NOTE: val is key.length
+ Btrie_slim_mgr rv = Btrie_slim_mgr.cs();
+ New__trie_itm__by_len(rv, Byte_ascii.Brack_end);
+ for (int i = 0; i <= 8; i++) { // x00-x08
+ New__trie_itm__by_len(rv, i);
+ }
+ for (int i = 10; i <= 31; i++) { // x0a-x1F
+ New__trie_itm__by_len(rv, i);
+ }
+ return rv;
+ }
+ private static void New__trie_itm__by_len(Btrie_slim_mgr mgr, int... ary) {
+ mgr.Add_obj(Bry_.New_by_ints(ary), new Int_obj_val(ary.length));
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr__tst.java b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr__tst.java
new file mode 100644
index 000000000..d189771cc
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr__tst.java
@@ -0,0 +1,56 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+import org.junit.*;
+public class Xomw_lnke_wkr__tst {
+ private final Xomw_lnke_wkr__fxt fxt = new Xomw_lnke_wkr__fxt();
+ @Test public void Basic() {fxt.Test__parse("[https://a.org b]" , "
b");}
+ @Test public void Invaild__protocol() {fxt.Test__parse("[httpz:a.org]" , "[httpz:a.org]");}
+ @Test public void Invaild__protocol_slash() {fxt.Test__parse("[https:a.org]" , "[https:a.org]");}
+ @Test public void Invaild__urlchars__0() {fxt.Test__parse("[https://]" , "[https://]");}
+ @Test public void Invaild__urlchars__bad() {fxt.Test__parse("[https://\"]" , "[https://\"]");}
+ @Test public void Many() {
+ fxt.Test__parse(String_.Concat_lines_nl_apos_skip_last
+ ( "a"
+ , "[https://b.org c]"
+ , "d"
+ , "[https://e.org f]"
+ , "g"
+ ), String_.Concat_lines_nl_apos_skip_last
+ ( "a"
+ , "
c"
+ , "d"
+ , "
f"
+ , "g"
+ ));
+ }
+}
+class Xomw_lnke_wkr__fxt {
+ private final Xomw_lnke_wkr wkr = new Xomw_lnke_wkr(new Xomw_parser());
+ private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
+ private boolean apos = true;
+ public Xomw_lnke_wkr__fxt() {
+ wkr.Init_by_wiki(Xomw_parser.Protocols__dflt());
+ }
+ public void Test__parse(String src_str, String expd) {
+ byte[] src_bry = Bry_.new_u8(src_str);
+ wkr.Replace_external_links(new Xomw_parser_ctx(), pbfr.Init(src_bry));
+ if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
+ Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/lnkis/Xomw_lnki_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/lnkis/Xomw_lnki_wkr.java
new file mode 100644
index 000000000..e9db20cc7
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/lnkis/Xomw_lnki_wkr.java
@@ -0,0 +1,462 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+import gplx.core.btries.*; import gplx.core.primitives.*;
+import gplx.langs.phps.utls.*;
+import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.xwikis.*;
+import gplx.xowa.mws.parsers.*; import gplx.xowa.mws.parsers.quotes.*;
+import gplx.xowa.mws.htmls.*; import gplx.xowa.mws.linkers.*;
+import gplx.xowa.mws.utls.*;
+import gplx.xowa.parsers.uniqs.*;
+public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
+ private final Xomw_link_holders holders;
+ private final Xomw_linker linker;
+ private final Xomw_link_renderer link_renderer;
+ // private final Btrie_slim_mgr protocols_trie;
+ private final Xomw_quote_wkr quote_wkr;
+ private final Xomw_strip_state strip_state;
+ private Xow_wiki wiki;
+ private Xoa_ttl page_title;
+ private final Xomw_linker__normalize_subpage_link normalize_subpage_link = new Xomw_linker__normalize_subpage_link();
+ private final Bry_bfr tmp;
+ private final Xomw_parser parser;
+ private final Xomwh_atr_mgr extra_atrs = new Xomwh_atr_mgr();
+ public Xomw_lnki_wkr(Xomw_parser parser, Xomw_link_holders holders, Xomw_link_renderer link_renderer, Btrie_slim_mgr protocols_trie) {
+ this.parser = parser;
+ this.holders = holders;
+ this.link_renderer = link_renderer;
+ // this.protocols_trie = protocols_trie;
+
+ this.linker = parser.Linker();
+ this.quote_wkr = parser.Quote_wkr();
+ this.tmp = parser.Tmp();
+ this.strip_state = parser.Strip_state();
+ }
+ public void Init_by_wiki(Xow_wiki wiki) {
+ this.wiki = wiki;
+ if (title_chars_for_lnki == null) {
+ title_chars_for_lnki = (boolean[])Array_.Clone(Xomw_ttl_utl.Title_chars_valid());
+ // the % is needed to support urlencoded titles as well
+ title_chars_for_lnki[Byte_ascii.Hash] = true;
+ title_chars_for_lnki[Byte_ascii.Percent] = true;
+ }
+ }
+ public void Clear_state() {
+ holders.Clear();
+ }
+ public void Replace_internal_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
+ // XO.PBFR
+ Bry_bfr src_bfr = pbfr.Src();
+ byte[] src = src_bfr.Bfr();
+ int src_bgn = 0;
+ int src_end = src_bfr.Len();
+ Bry_bfr bfr = pbfr.Trg();
+ pbfr.Switch();
+
+ this.page_title = pctx.Page_title();
+
+ Replace_internal_links(bfr, src, src_bgn, src_end);
+ }
+ public void Replace_internal_links(Bry_bfr bfr, byte[] src, int src_bgn, int src_end) {
+ // PORTED: regex for tc move to header; e1 and e1_img moved to code
+ // split the entire text String on occurrences of [[
+ int cur = src_bgn;
+ int prv = cur;
+ while (true) {
+ int lnki_bgn = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__bgn, cur, src_end); // $a = StringUtils::explode('[[', ' ' . $s);
+ if (lnki_bgn == Bry_find_.Not_found) { // no more "[["; stop loop
+ bfr.Add_mid(src, cur, src_end);
+ break;
+ }
+ cur = lnki_bgn + 2; // 2="[[".length
+
+ // IGNORE: handles strange split logic of adding space to String; "$s = substr($s, 1);"
+
+ // TODO.XO:lnke_bgn; EX: b[[A]]
+ // $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
+ // $e2 = null;
+ // if ($useLinkPrefixExtension) {
+ // // Match the end of a line for a word that's not followed by whitespace,
+ // // e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
+ // global $wgContLang;
+ // $charset = $wgContLang->linkPrefixCharset();
+ // $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
+ // }
+
+ // IGNORE: throw new MWException(__METHOD__ . ": \$this->mTitle is null\n");
+
+ // $nottalk = !$this->mTitle->isTalkPage();
+
+ // TODO.XO:lnke_bgn
+ byte[] prefix = Bry_.Empty;
+ //if ($useLinkPrefixExtension) {
+ // $m = [];
+ // if (preg_match($e2, $s, $m)) {
+ // $first_prefix = $m[2];
+ // } else {
+ // $first_prefix = false;
+ // }
+ //} else {
+ // $prefix = '';
+ //}
+
+ // IGNORE: "Check for excessive memory usage"
+
+ // TODO.XO:lnke_bgn; EX: b[[A]]
+ //if ($useLinkPrefixExtension) {
+ // if (preg_match($e2, $s, $m)) {
+ // $prefix = $m[2];
+ // $s = $m[1];
+ // } else {
+ // $prefix = '';
+ // }
+ // // first link
+ // if ($first_prefix) {
+ // $prefix = $first_prefix;
+ // $first_prefix = false;
+ // }
+ //}
+
+ // PORTED.BGN: if (preg_match($e1, $line, $m)) && else if (preg_match($e1_img, $line, $m))
+ // NOTE: both e1 and e1_img are effectively the same; e1_img allows nested "[["; EX: "[[A|b[[c]]d]]" will stop at "[[A|b"
+ int ttl_bgn = cur;
+ int ttl_end = Xomw_ttl_utl.Find_fwd_while_title(src, cur, src_end, title_chars_for_lnki);
+ cur = ttl_end;
+ int capt_bgn = -1, capt_end = -1;
+ int nxt_lnki = -1;
+
+ boolean might_be_img = false;
+ if (ttl_end > ttl_bgn) { // at least one valid title-char found; check for "|" or "]]" EX: "[[a"
+ byte nxt_byte = src[ttl_end];
+ if (nxt_byte == Byte_ascii.Pipe) { // handles lnki with capt ([[A|a]])and lnki with file ([[File:A.png|b|c|d]])
+ cur = ttl_end + 1;
+
+ // find next "[["
+ nxt_lnki = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__bgn, cur, src_end);
+ if (nxt_lnki == Bry_find_.Not_found)
+ nxt_lnki = src_end;
+
+ // find end "]]"
+ capt_bgn = cur;
+ capt_end = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__end, cur, nxt_lnki);
+ if (capt_end == Bry_find_.Not_found) {
+ capt_end = nxt_lnki;
+ cur = nxt_lnki;
+ might_be_img = true;
+ }
+ else {
+ cur = capt_end + Bry__wtxt__lnki__end.length;
+ }
+ }
+ else if (Bry_.Match(src, ttl_end, ttl_end + 2, Bry__wtxt__lnki__end)) { // handles simple lnki; EX: [[A]]
+ cur = ttl_end + 2;
+ }
+ else {
+ ttl_end = -1;
+ }
+ }
+ else
+ ttl_end = -1;
+ if (ttl_end == -1) { // either (a) no valid title-chars ("[[<") or (b) title char, but has stray "]" ("[[a]b]]")
+ // Invalid form; output directly
+ bfr.Add_mid(src, cur, src_end);
+ continue;
+ }
+ // PORTED.END: if (preg_match($e1, $line, $m)) && else if (preg_match($e1_img, $line, $m))
+
+ byte[] text = Bry_.Mid(src, capt_bgn, capt_end);
+ byte[] trail = Bry_.Empty;
+ if (!might_be_img) {
+ // If we get a ] at the beginning of $m[3] that means we have a link that's something like:
+ // [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
+ // the real problem is with the $e1 regex
+ // See T1500.
+ // Still some problems for cases where the ] is meant to be outside punctuation,
+ // and no image is in sight. See T4095.
+// if ($text !== ''
+// && substr($m[3], 0, 1) === ']'
+// && strpos($text, '[') !== false
+// ) {
+// $text .= ']'; // so that replaceExternalLinks($text) works later
+// $m[3] = substr($m[3], 1);
+// }
+
+ // fix up urlencoded title texts
+// if (strpos($m[1], '%') !== false) {
+// // Should anchors '#' also be rejected?
+// $m[1] = str_replace([ '<', '>' ], [ '<', '>' ], rawurldecode($m[1]));
+// }
+// $trail = $m[3];
+ }
+ else {
+ // Invalid, but might be an image with a link in its caption
+// $text = $m[2];
+// if (strpos($m[1], '%') !== false) {
+// $m[1] = str_replace([ '<', '>' ], [ '<', '>' ], rawurldecode($m[1]));
+// }
+// $trail = "";
+ }
+
+ byte[] orig_link = Bry_.Mid(src, ttl_bgn, ttl_end);
+
+ // TODO.XO: handle "[[http://a.org]]"
+ // Don't allow @gplx.Internal protected links to pages containing
+ // PROTO: where PROTO is a valid URL protocol; these
+ // should be external links.
+ // if (preg_match('/^(?i:' . $this->mUrlProtocols . ')/', $origLink)) {
+ // $s .= $prefix . '[[' . $line;
+ // continue;
+ // }
+
+ byte[] link = orig_link;
+ boolean no_force = orig_link[0] != Byte_ascii.Colon;
+ if (!no_force) {
+ // Strip off leading ':'
+ link = Bry_.Mid(link, 1);
+ }
+ Xoa_ttl nt = wiki.Ttl_parse(link);
+
+ // Make subpage if necessary
+ boolean subpages_enabled = nt.Ns().Subpages_enabled();
+ if (subpages_enabled) {
+ Maybe_do_subpage_link(normalize_subpage_link, orig_link, text);
+ link = normalize_subpage_link.link;
+ text = normalize_subpage_link.text;
+ nt = wiki.Ttl_parse(link);
+ }
+ // IGNORE: handled in rewrite above
+ // else {
+ // link = orig_link;
+ // }
+
+ byte[] unstrip = strip_state.Unstrip_nowiki(link);
+ if (!Bry_.Eq(unstrip, link))
+ nt = wiki.Ttl_parse(unstrip);
+ if (nt == null) {
+ bfr.Add_mid(src, prv, lnki_bgn + 2); // $s .= $prefix . '[[' . $line;
+ cur = lnki_bgn + 2;
+ prv = cur;
+ continue;
+ }
+
+ Xow_ns ns = nt.Ns();
+ Xow_xwiki_itm iw = nt.Wik_itm();
+
+ if (might_be_img) { // if this is actually an invalid link
+ if (ns.Id_is_file() && no_force) { // but might be an image
+ boolean found = false;
+// while (true) {
+// // look at the next 'line' to see if we can close it there
+// a->next();
+// next_line = a->current();
+// if (next_line === false || next_line === null) {
+// break;
+// }
+// m = explode(']]', next_line, 3);
+// if (count(m) == 3) {
+// // the first ]] closes the inner link, the second the image
+// found = true;
+// text .= "[[{m[0]}]]{m[1]}";
+// trail = m[2];
+// break;
+// } else if (count(m) == 2) {
+// // if there's exactly one ]] that's fine, we'll keep looking
+// text .= "[[{m[0]}]]{m[1]}";
+// } else {
+// // if next_line is invalid too, we need look no further
+// text .= '[[' . next_line;
+// break;
+// }
+// }
+ if (!found) {
+ // we couldn't find the end of this imageLink, so output it raw
+ // but don't ignore what might be perfectly normal links in the text we've examined
+ Bry_bfr nested = wiki.Utl__bfr_mkr().Get_b128();
+ this.Replace_internal_links(nested, text, 0, text.length);
+ nested.Mkr_rls();
+ bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
+ // note: no trail, because without an end, there *is* no trail
+ continue;
+ }
+ }
+ else { // it's not an image, so output it raw
+ bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
+ // note: no trail, because without an end, there *is* no trail
+ continue;
+ }
+ }
+
+ boolean was_blank = text.length == 0;
+ if (was_blank) {
+ text = link;
+ }
+ else {
+ // T6598 madness. Handle the quotes only if they come from the alternate part
+ // [[Lista d''e paise d''o munno]] ->
Lista d''e paise d''o munno
+ // [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
+ // ->
Criticism of Harry Potter
+ text = quote_wkr.Do_quotes(tmp, text);
+ }
+
+ // Link not escaped by : , create the various objects
+// if (no_force && !nt->wasLocalInterwiki()) {
+ // Interwikis
+// if (
+// iw && this->mOptions->getInterwikiMagic() && nottalk && (
+// Language::fetchLanguageName(iw, null, 'mw') ||
+// in_array(iw, wgExtraInterlanguageLinkPrefixes)
+// )
+// ) {
+ // T26502: filter duplicates
+// if (!isset(this->mLangLinkLanguages[iw])) {
+// this->mLangLinkLanguages[iw] = true;
+// this->mOutput->addLanguageLink(nt->getFullText());
+// }
+//
+// s = rtrim(s . prefix);
+// s .= trim(trail, "\n") == '' ? '': prefix . trail;
+// continue;
+// }
+//
+ if (ns.Id_is_file()) {
+// if (!wfIsBadImage(nt->getDBkey(), this->mTitle)) {
+// if (wasblank) {
+// // if no parameters were passed, text
+// // becomes something like "File:Foo.png",
+// // which we don't want to pass on to the
+// // image generator
+// text = '';
+// } else {
+// // recursively parse links inside the image caption
+// // actually, this will parse them in any other parameters, too,
+// // but it might be hard to fix that, and it doesn't matter ATM
+// text = this->replaceExternalLinks(text);
+// holders->merge(this->replaceInternalLinks2(text));
+// }
+// // cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
+// s .= prefix . this->armorLinks(
+// this->makeImage(nt, text, holders)) . trail;
+// continue;
+// }
+ }
+ else if (ns.Id_is_ctg()) {
+ bfr.Trim_end_ws(); // s = rtrim(s . "\n"); // T2087
+
+ if (was_blank) {
+// sortkey = this->getDefaultSort();
+ }
+ else {
+// sortkey = text;
+ }
+// sortkey = Sanitizer::decodeCharReferences(sortkey);
+// sortkey = str_replace("\n", '', sortkey);
+// sortkey = this->getConverterLanguage()->convertCategoryKey(sortkey);
+// this->mOutput->addCategory(nt->getDBkey(), sortkey);
+//
+ // Strip the whitespace Category links produce, see T2087
+// s .= trim(prefix . trail, "\n") == '' ? '' : prefix . trail;
+
+ continue;
+ }
+// }
+
+ // Self-link checking. For some languages, variants of the title are checked in
+ // LinkHolderArray::doVariants() to allow batching the existence checks necessary
+ // for linking to a different variant.
+ if (!ns.Id_is_special() && nt.Eq_full_db(page_title) && !nt.Has_fragment()) {
+ bfr.Add(prefix);
+ linker.Make_self_link_obj(bfr, nt, text, Bry_.Empty, trail, Bry_.Empty);
+ continue;
+ }
+
+ // NS_MEDIA is a pseudo-namespace for linking directly to a file
+ // @todo FIXME: Should do batch file existence checks, see comment below
+ if (ns.Id_is_media()) {
+ // Give extensions a chance to select the file revision for us
+// options = [];
+// descQuery = false;
+ // MW.HOOK:BeforeParserFetchFileAndTitle
+ // Fetch and register the file (file title may be different via hooks)
+// list(file, nt) = this->fetchFileAndTitle(nt, options);
+ // Cloak with NOPARSE to avoid replacement in replaceExternalLinks
+// s .= prefix . this->armorLinks(
+// Linker::makeMediaLinkFile(nt, file, text)) . trail;
+// continue;
+ }
+
+ // Some titles, such as valid special pages or files in foreign repos, should
+ // be shown as bluelinks even though they're not included in the page table
+ // @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
+ // batch file existence checks for NS_FILE and NS_MEDIA
+ bfr.Add_mid(src, prv, lnki_bgn);
+ prv = cur;
+ if (iw == null && nt.Is_always_known()) {
+ // this->mOutput->addLink(nt);
+ Make_known_link_holder(bfr, nt, text, trail, prefix);
+ }
+ else {
+ // Links will be added to the output link list after checking
+ holders.Make_holder(bfr, nt, text, Bry_.Ary_empty, trail, prefix);
+ }
+ }
+ }
+ public void Maybe_do_subpage_link(Xomw_linker__normalize_subpage_link rv, byte[] target, byte[] text) {
+ linker.Normalize_subpage_link(rv, page_title, target, text);
+ }
+ public void Replace_link_holders(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
+ holders.Replace(pctx, pbfr);
+ }
+ public void Make_known_link_holder(Bry_bfr bfr, Xoa_ttl nt, byte[] text, byte[] trail, byte[] prefix) {
+ byte[][] split_trail = linker.Split_trail(trail);
+ byte[] inside = split_trail[0];
+ trail = split_trail[1];
+
+ if (text == Bry_.Empty) {
+ text = Bry_.Escape_html(nt.Get_prefixed_text());
+ }
+
+ // PORTED:new HtmlArmor( "$prefix$text$inside" )
+ tmp.Add_bry_escape_html(prefix);
+ tmp.Add_bry_escape_html(text);
+ tmp.Add_bry_escape_html(inside);
+ text = tmp.To_bry_and_clear();
+
+ link_renderer.Make_known_link(bfr, nt, text, extra_atrs, Bry_.Empty);
+ byte[] link = bfr.To_bry_and_clear();
+ parser.Armor_links(bfr, link, 0, link.length);
+ bfr.Add(trail);
+ }
+
+ private static boolean[] title_chars_for_lnki;
+ private static final byte[] Bry__wtxt__lnki__bgn = Bry_.new_a7("[["), Bry__wtxt__lnki__end = Bry_.new_a7("]]");
+
+ // $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
+ //
+ // REGEX: "title-char"(1+) + "pipe"(0-1) + "]]"(0-1) + "other chars up to next [["
+ // title-char -> ([{$tc}]+)
+ // pipe -> (?:\\|(.+?))?
+ // ]] -> ?]]
+ // other chars... -> (.*)
+
+ // $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
+ //
+ // REGEX: "title-char"(1+) + "pipe"(0-1) + "other chars up to next [["
+ // title-char -> ([{$tc}]+)
+ // pipe -> \\|
+ // other chars... -> (.*)
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/lnkis/Xomw_lnki_wkr__tst.java b/400_xowa/src/gplx/xowa/mws/parsers/lnkis/Xomw_lnki_wkr__tst.java
new file mode 100644
index 000000000..8ac4bde78
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/lnkis/Xomw_lnki_wkr__tst.java
@@ -0,0 +1,63 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+import org.junit.*;
+public class Xomw_lnki_wkr__tst {
+ private final Xomw_lnki_wkr__fxt fxt = new Xomw_lnki_wkr__fxt();
+ @Before public void init() {fxt.Clear();}
+// @Test public void Basic() {fxt.Test__parse("[[A]]" , "");}
+ @Test public void Text() {fxt.Test__parse("a [[A]] z" , "a z");}
+ @Test public void Capt() {fxt.Test__parse("a [[A|a]] z" , "a z");}
+// @Test public void Text() {fxt.Test__parse("a [[A]] z" , "a
A z");}
+// @Test public void Capt() {fxt.Test__parse("a [[A|a]] z" , "a
a z");}
+// @Test public void Text() {fxt.Test__parse("a [[A]] z" , "a z");}
+// @Test public void Invalid__char() {fxt.Test__parse("[[
]]" , "[[]]");}
+ @Test public void Self() {fxt.Test__to_html("[[Page_1]]" , "Page_1");}
+}
+class Xomw_lnki_wkr__fxt {
+ private final Xomw_lnki_wkr wkr;
+ private final Xomw_parser_ctx pctx;
+ private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
+ private boolean apos = true;
+ public Xomw_lnki_wkr__fxt() {
+ Xoae_app app = Xoa_app_fxt.Make__app__edit();
+ Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
+ Xomw_parser parser = new Xomw_parser();
+ wkr = parser.Lnki_wkr();
+ parser.Init_by_wiki(wiki);
+
+ pctx = new Xomw_parser_ctx();
+ pctx.Init_by_page(wiki.Ttl_parse(Bry_.new_a7("Page_1")));
+ }
+ public void Clear() {
+ wkr.Clear_state();
+ }
+ public void Test__parse(String src_str, String expd) {
+ byte[] src_bry = Bry_.new_u8(src_str);
+ wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
+ if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
+ Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
+ }
+ public void Test__to_html(String src_str, String expd) {
+ byte[] src_bry = Bry_.new_u8(src_str);
+ wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
+ wkr.Replace_link_holders(pctx, pbfr);
+ if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
+ Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr.java
new file mode 100644
index 000000000..54323a173
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/magiclinks/Xomw_magiclinks_wkr.java
@@ -0,0 +1,331 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.mws.parsers.magiclinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.core.net.*;
+import gplx.langs.phps.utls.*;
+// public class Xomw_magiclinks_wkr {
+// private final Btrie_slim_mgr regex_trie = Btrie_slim_mgr.ci_a7(); // NOTE: must be ci to handle protocols; EX: "https:" and "HTTPS:"
+// private final Btrie_rv trv = new Btrie_rv();
+// public Xomw_magiclinks_wkr() {
+// }
+// private static byte[] Tag__anch__rhs, Prefix__rfc, Prefix__pmid;
+//
+// private static final byte Space__tab = 1, Space__nbsp_ent = 2, Space__nbsp_dec = 3, Space__nbsp_hex = 4;
+// private static Btrie_slim_mgr space_trie;
+// // static final SPACE_NOT_NL = '(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
+//// public void Test() {
+//// regex.Add("\t", Space__tab);
+//// regex.Add(" ", Space__nbsp__ent);
+//// regex.Add(Regex.Make("").Star("0").Add("160;"), Space__nbsp__dec);
+//// regex.Add(Regex.Make("").Brack("X", "x").Star("0").Brack("A", "a").Add("0"), Space__nbsp__hex);
+//// }
+// public int Find_fwd_space(byte[] src, int cur, int src_end) {
+// return -1;
+// }
+//
+// private static final byte Regex__anch = 1, Regex__elem = 2, Regex__free = 3, Regex__rfc = 5, Regex__isbn = 6, Regex__pmid = 7;
+// public void Init_by_wiki() {
+// regex_trie.Add_str_byte("");
+// Prefix__rfc = Bry_.new_a7("RFC");
+// Prefix__pmid = Bry_.new_a7("PMID");
+// space_trie = Btrie_slim_mgr.ci_a7()
+// .Add_str_byte("\t", Space__tab)
+// .Add_str_byte(" ", Space__nbsp_ent)
+// .Add_str_byte("", Space__nbsp_dec)
+// .Add_str_byte("&x", Space__nbsp_hex)
+// ;
+// }
+// }
+// }
+//
+// // Replace special strings like "ISBN xxx" and "RFC xxx" with
+// // magic external links.
+// public void Do_magic_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
+// // XO.PBFR
+// Bry_bfr src_bfr = pbfr.Src();
+// byte[] src = src_bfr.Bfr();
+// int src_bgn = 0;
+// int src_end = src_bfr.Len();
+// Bry_bfr bfr = pbfr.Trg();
+//
+// int cur = src_bgn;
+// int prv = cur;
+// boolean dirty = true;
+// while (true) {
+// if (cur == src_end) {
+// if (dirty)
+// bfr.Add_mid(src, prv, src_end);
+// break;
+// }
+//
+// byte b = src[cur];
+// Object o = regex_trie.Match_at_w_b0(trv, b, src, cur, src_end);
+// // current byte doesn't look like magiclink; continue;
+// if (o == null) {
+// cur++;
+// continue;
+// }
+// // looks like magiclink; do additional processing
+// byte regex_tid = ((Byte_obj_ref)o).Val();
+// int trv_pos = trv.Pos();
+// int nxt_pos = trv_pos;
+// boolean regex_valid = true;
+// switch (regex_tid) {
+// case Regex__anch: // (].*?) | // m[1]: Skip link text
+// if (trv_pos < src_end) {
+// // find ws in "[ \t\r\n>]"
+// byte ws_byte = src[cur];
+// switch (ws_byte) {
+// case Byte_ascii.Space:
+// case Byte_ascii.Tab:
+// case Byte_ascii.Cr:
+// case Byte_ascii.Nl:
+// break;
+// default:
+// regex_valid = false;
+// break;
+// }
+// if (regex_valid) {
+// // find
+// nxt_pos++;
+// int anch_end = Bry_find_.Find_fwd(src, Tag__anch__rhs, nxt_pos, src_end);
+// if (anch_end == Bry_find_.Not_found) {
+// regex_valid = false;
+// }
+// else {
+// cur = anch_end + Tag__anch__rhs.length;
+// }
+// }
+// }
+// else {
+// regex_valid = false;
+// }
+// break;
+// case Regex__elem: // (<.*?>) | // m[2]: Skip stuff inside
+// // just find ">"
+// int elem_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, nxt_pos, src_end);
+// if (elem_end == Bry_find_.Not_found)
+// regex_valid = false;
+// else
+// cur = elem_end + 1;
+// break;
+// case Regex__free:
+// // addr; urlchar
+// break;
+// case Regex__rfc:
+// case Regex__pmid:
+// // byte[] prefix = regex == Regex__rfc ? Prefix__rfc : Prefix__pmid;
+// // match previous for case sensitivity
+//// if (Bry_.Eq(src, trv_pos - prefix.length - 1, trv_pos - 1, prefix)) {
+////
+//// }
+//// else {
+//// regex_valid = false;
+//// }
+// break;
+// }
+//
+//// '!(?: // Start cases
+//// (
].*?) | // m[1]: Skip link text
+//// (<.*?>) | // m[2]: Skip stuff inside
+//// // HTML elements' . "
+//// (\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
+//// // m[4]: Post-protocol path
+//// \b(?:RFC|PMID) $spaces // m[5]: RFC or PMID, capture number
+//// ([0-9]+)\b |
+//// \bISBN $spaces ( // m[6]: ISBN, capture number
+//// (?: 97[89] $spdash?)? // optional 13-digit ISBN prefix
+//// (?: [0-9] $spdash?){9} // 9 digits with opt. delimiters
+//// [0-9Xx] // check digit
+//// )\b
+//
+// }
+// if (dirty)
+// pbfr.Switch();
+
+// $prots = wfUrlProtocolsWithoutProtRel();
+// $urlChar = self::EXT_LINK_URL_CLASS;
+// $addr = self::EXT_LINK_ADDR;
+// $space = self::SPACE_NOT_NL; // non-newline space
+// $spdash = "(?:-|$space)"; // a dash or a non-newline space
+// $spaces = "$space++"; // possessive match of 1 or more spaces
+// $text = preg_replace_callback(
+// '!(?: // Start cases
+// (
].*?) | // m[1]: Skip link text
+// (<.*?>) | // m[2]: Skip stuff inside
+// // HTML elements' . "
+// (\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
+// // m[4]: Post-protocol path
+// \b(?:RFC|PMID) $spaces // m[5]: RFC or PMID, capture number
+// ([0-9]+)\b |
+// \bISBN $spaces ( // m[6]: ISBN, capture number
+// (?: 97[89] $spdash?)? // optional 13-digit ISBN prefix
+// (?: [0-9] $spdash?){9} // 9 digits with opt. delimiters
+// [0-9Xx] // check digit
+// )\b
+// )!xu", [ &$this, 'magicLinkCallback' ], $text);
+// return $text;
+// }
+
+// public function magicLinkCallback($m) {
+// if (isset($m[1]) && $m[1] !== '') {
+// // Skip anchor
+// return $m[0];
+// } else if (isset($m[2]) && $m[2] !== '') {
+// // Skip HTML element
+// return $m[0];
+// } else if (isset($m[3]) && $m[3] !== '') {
+// // Free external link
+// return $this->makeFreeExternalLink($m[0], strlen($m[4]));
+// } else if (isset($m[5]) && $m[5] !== '') {
+// // RFC or PMID
+// if (substr($m[0], 0, 3) === 'RFC') {
+// if (!$this->mOptions->getMagicRFCLinks()) {
+// return $m[0];
+// }
+// $keyword = 'RFC';
+// $urlmsg = 'rfcurl';
+// $cssClass = 'mw-magiclink-rfc';
+// $trackingCat = 'magiclink-tracking-rfc';
+// $id = $m[5];
+// } else if (substr($m[0], 0, 4) === 'PMID') {
+// if (!$this->mOptions->getMagicPMIDLinks()) {
+// return $m[0];
+// }
+// $keyword = 'PMID';
+// $urlmsg = 'pubmedurl';
+// $cssClass = 'mw-magiclink-pmid';
+// $trackingCat = 'magiclink-tracking-pmid';
+// $id = $m[5];
+// } else {
+// throw new MWException(__METHOD__ . ': unrecognised match type "' .
+// substr($m[0], 0, 20) . '"');
+// }
+// $url = wfMessage($urlmsg, $id)->inContentLanguage()->text();
+// $this->addTrackingCategory($trackingCat);
+// return Linker::makeExternalLink($url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle);
+// } else if (isset($m[6]) && $m[6] !== ''
+// && $this->mOptions->getMagicISBNLinks()
+// ) {
+// // ISBN
+// $isbn = $m[6];
+// $space = self::SPACE_NOT_NL; // non-newline space
+// $isbn = preg_replace("/$space/", ' ', $isbn);
+// $num = strtr($isbn, [
+// '-' => '',
+// ' ' => '',
+// 'x' => 'X',
+// ]);
+// $this->addTrackingCategory('magiclink-tracking-isbn');
+// return $this->getLinkRenderer()->makeKnownLink(
+// SpecialPage::getTitleFor('Booksources', $num),
+// "ISBN $isbn",
+// [
+// 'class' => '@gplx.Internal protected mw-magiclink-isbn',
+// 'title' => false // suppress title attribute
+// ]
+// );
+// } else {
+// return $m[0];
+// }
+
+ // Make a free external link, given a user-supplied URL
+// public void Make_free_external_link(byte[] url, int num_post_proto) {
+// byte[] trail = Bry_.Empty;
+
+ // The characters '<' and '>' (which were escaped by
+ // removeHTMLtags()) should not be included in
+ // URLs, per RFC 2396.
+ // Make terminate a URL as well (bug T84937)
+// $m2 = [];
+// if (preg_match(
+// '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
+// $url,
+// $m2,
+// PREG_OFFSET_CAPTURE
+// )) {
+// trail = substr($url, $m2[0][1]) . $trail;
+// $url = substr($url, 0, $m2[0][1]);
+// }
+
+ // Move trailing punctuation to $trail
+// $sep = ',;\.:!?';
+ // If there is no left bracket, then consider right brackets fair game too
+// if (strpos($url, '(') === false) {
+// $sep .= ')';
+// }
+
+// $urlRev = strrev($url);
+// $numSepChars = strspn($urlRev, $sep);
+ // Don't break a trailing HTML entity by moving the ; into $trail
+ // This is in hot code, so use substr_compare to avoid having to
+ // create a new String Object for the comparison
+// if ($numSepChars && substr_compare($url, ";", -$numSepChars, 1) === 0) {
+ // more optimization: instead of running preg_match with a $
+ // anchor, which can be slow, do the match on the reversed
+ // String starting at the desired offset.
+ // un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
+// if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars)) {
+// $numSepChars--;
+// }
+// }
+// if ($numSepChars) {
+// $trail = substr($url, -$numSepChars) . $trail;
+// $url = substr($url, 0, -$numSepChars);
+// }
+
+ // Verify that we still have a real URL after trail removal, and
+ // not just lone protocol
+// if (strlen($trail) >= $numPostProto) {
+// return $url . $trail;
+// }
+
+// $url = Sanitizer::cleanUrl($url);
+
+ // Is this an external image?
+// $text = $this->maybeMakeExternalImage($url);
+// if ($text === false) {
+ // Not an image, make a link
+// $text = Linker::makeExternalLink($url,
+// $this->getConverterLanguage()->markNoConversion($url, true),
+// true, 'free',
+// $this->getExternalLinkAttribs($url), $this->mTitle);
+ // Register it in the output Object...
+ // Replace unnecessary URL escape codes with their equivalent characters
+// $pasteurized = self::normalizeLinkUrl($url);
+// $this->mOutput->addExternalLink($pasteurized);
+// }
+// return $text . $trail;
+// }
+// }
+// }
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/nbsps/Xomw_nbsp_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/nbsps/Xomw_nbsp_wkr.java
new file mode 100644
index 000000000..4808a8351
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/nbsps/Xomw_nbsp_wkr.java
@@ -0,0 +1,134 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers.nbsps; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+import gplx.core.btries.*;
+public class Xomw_nbsp_wkr {
+ private final Btrie_rv trv = new Btrie_rv();
+ public void Do_nbsp(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
+ // PORTED:
+ // Clean up special characters, only run once, next-to-last before doBlockLevels
+ // $fixtags = [
+ // // French spaces, last one Guillemet-left
+ // // only if there is something before the space
+ // '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ',
+ // // french spaces, Guillemet-right
+ // '/(\\302\\253) /' => '\\1 ',
+ // '/ (!\s*important)/' => ' \\1', // Beware of CSS magic word !important, T13874.
+ // ];
+ // $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
+ // XO.PBFR
+ Bry_bfr src_bfr = pbfr.Src();
+ byte[] src = src_bfr.Bfr();
+ int src_bgn = 0;
+ int src_end = src_bfr.Len();
+ Bry_bfr bfr = pbfr.Trg();
+
+ if (trie == null) {
+ synchronized (this.getClass()) {
+ trie = Btrie_slim_mgr.cs();
+ Trie__add(trie, Tid__space_lhs, " ?");
+ Trie__add(trie, Tid__space_lhs, " :");
+ Trie__add(trie, Tid__space_lhs, " ;");
+ Trie__add(trie, Tid__space_lhs, " !");
+ Trie__add(trie, Tid__space_lhs, " »");
+ Trie__add(trie, Tid__space_rhs, "« ");
+ Trie__add(trie, Tid__important, " !");
+ }
+ }
+
+ int cur = src_bgn;
+ int prv = cur;
+ boolean dirty = true;
+ // search forward for...
+ // "\s" before ? : ; ! % 302,273; EX: "a :"
+ // "\s" after 302,253
+ // "&160;!\simportant"
+ while (true) {
+ if (cur == src_end) {
+ if (dirty)
+ bfr.Add_mid(src, prv, src_end);
+ break;
+ }
+ Object o = trie.Match_at(trv, src, cur, src_end);
+ if (o == null) {
+ cur++;
+ continue;
+ }
+ Xomw_nbsp_itm itm = (Xomw_nbsp_itm)o;
+
+ // '/ (!\s*important)/' => ' \\1'
+ byte itm_tid = itm.Tid();
+ int important_end = -1;
+ if (itm_tid == Tid__important) {
+ int space_bgn = cur + itm.Key().length;
+ int space_end = Bry_find_.Find_fwd_while(src, space_bgn, src_end, Byte_ascii.Space);
+ important_end = space_end + Bry__important.length;
+ if (!Bry_.Match(src, space_end, important_end, Bry__important)) {
+ continue;
+ }
+ }
+ dirty = true;
+ bfr.Add_mid(src, prv, cur);
+ switch (itm_tid) {
+ case Tid__space_lhs:
+ bfr.Add_bry_many(Bry__nbsp, itm.Val());
+ break;
+ case Tid__space_rhs:
+ bfr.Add_bry_many(itm.Val(), Bry__nbsp);
+ break;
+ case Tid__important:
+ bfr.Add(Bry__important__repl);
+ break;
+ }
+ cur += itm.Key().length;
+ prv = cur;
+ }
+ if (dirty)
+ pbfr.Switch();
+ }
+ private static final byte Tid__space_lhs = 0, Tid__space_rhs = 1, Tid__important = 2;
+ private static Btrie_slim_mgr trie;
+ private static void Trie__add(Btrie_slim_mgr trie, byte tid, String key_str) {
+ byte[] key_bry = Bry_.new_u8(key_str);
+ byte[] val_bry = null;
+ switch (tid) {
+ case Tid__space_lhs:
+ val_bry = Bry_.Mid(key_bry, 1);
+ break;
+ case Tid__space_rhs:
+ val_bry = Bry_.Mid(key_bry, 0, key_bry.length - 1);
+ break;
+ case Tid__important:
+ val_bry = key_bry;
+ break;
+ }
+ Xomw_nbsp_itm itm = new Xomw_nbsp_itm(tid, key_bry, val_bry);
+ trie.Add_obj(key_bry, itm);
+ }
+ private static final byte[] Bry__nbsp = Bry_.new_a7(" "), Bry__important = Bry_.new_a7("important"), Bry__important__repl = Bry_.new_a7(" !");
+}
+class Xomw_nbsp_itm {
+ public Xomw_nbsp_itm(byte tid, byte[] key, byte[] val) {
+ this.tid = tid;
+ this.key = key;
+ this.val = val;
+ }
+ public byte Tid() {return tid;} private final byte tid;
+ public byte[] Key() {return key;} private final byte[] key;
+ public byte[] Val() {return val;} private final byte[] val;
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/nbsps/Xomw_nbsp_wkr__tst.java b/400_xowa/src/gplx/xowa/mws/parsers/nbsps/Xomw_nbsp_wkr__tst.java
new file mode 100644
index 000000000..1fe633da4
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/nbsps/Xomw_nbsp_wkr__tst.java
@@ -0,0 +1,40 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers.nbsps; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+import org.junit.*;
+public class Xomw_nbsp_wkr__tst {
+ private final Xomw_nbsp_wkr__fxt fxt = new Xomw_nbsp_wkr__fxt();
+ @Test public void Noop() {fxt.Test__parse("abc" , "abc");}
+ @Test public void Space_lhs__colon() {fxt.Test__parse("a :b c" , "a :b c");}
+ @Test public void Space_lhs__laquo() {fxt.Test__parse("a »b c" , "a »b c");}
+ @Test public void Space_rhs() {fxt.Test__parse("a« b c" , "a« b c");}
+ @Test public void Important() {fxt.Test__parse("a ! important b" , "a ! important b");}
+}
+class Xomw_nbsp_wkr__fxt {
+ private final Xomw_nbsp_wkr wkr = new Xomw_nbsp_wkr();
+ private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
+ private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
+ private boolean apos = true;
+ public void Test__parse(String src_str, String expd) {
+ byte[] src_bry = Bry_.new_u8(src_str);
+ pbfr.Init(src_bry);
+ wkr.Do_nbsp(pctx, pbfr);
+ if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
+ Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/wkrs/Xomw_hdr_cbk.java b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_frame_itm.java
similarity index 71%
rename from 400_xowa/src/gplx/xowa/parsers/mws/wkrs/Xomw_hdr_cbk.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_frame_itm.java
index f8b0397aa..2790eaa1c 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/wkrs/Xomw_hdr_cbk.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_frame_itm.java
@@ -15,8 +15,9 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see
.
*/
-package gplx.xowa.parsers.mws.wkrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
-public interface Xomw_hdr_cbk {
- void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_hdr_wkr wkr);
- void On_src_done(Xomw_parser_ctx pctx, Xomw_hdr_wkr wkr);
+package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+public class Xomw_frame_itm {
+ public byte[] Expand(byte[] ttl) {
+ return null;
+ }
}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_frame_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_frame_wkr.java
new file mode 100644
index 000000000..1a22c8881
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_frame_wkr.java
@@ -0,0 +1,564 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see
.
+*/
+package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+// public class Xomw_frame_wkr { // THREAD.UNSAFE: caching for repeated calls
+// private final Xomw_parser parser;
+// public Xomw_frame_wkr(Xomw_parser parser) {
+// this.parser = parser;
+// }
+// \\ Replace magic variables, templates, and template arguments
+// \\ with the appropriate text. Templates are substituted recursively,
+// \\ taking care to avoid infinite loops.
+// \\
+// \\ Note that the substitution depends on value of $mOutputType:
+// \\ self::OT_WIKI: only {{subst:}} templates
+// \\ self::OT_PREPROCESS: templates but not extension tags
+// \\ self::OT_HTML: all templates and extension tags
+// \\
+// \\ @param String $text The text to transform
+// \\ @param boolean|PPFrame $frame Object describing the arguments passed to the
+// \\ template. Arguments may also be provided as an associative array, as
+// \\ was the usual case before MW1.12. Providing arguments this way may be
+// \\ useful for extensions wishing to perform variable replacement
+// \\ explicitly.
+// \\ @param boolean $argsOnly Only do argument (triple-brace) expansion, not
+// \\ double-brace expansion.
+// \\ @return String
+// public function replaceVariables($text, $frame = false, $argsOnly = false) {
+// // Is there any text? Also, Prevent too big inclusions!
+// $textSize = strlen($text);
+// if ($textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize()) {
+// return $text;
+// }
+//
+// if ($frame == false) {
+// $frame = $this->getPreprocessor()->newFrame();
+// } elseif (!($frame instanceof PPFrame)) {
+// wfDebug(__METHOD__ . " called using plain parameters instead of "
+// . "a PPFrame instance. Creating custom frame.\n");
+// $frame = $this->getPreprocessor()->newCustomFrame($frame);
+// }
+//
+// $dom = $this->preprocessToDom($text);
+// $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
+// $text = $frame->expand($dom, $flags);
+//
+// return $text;
+// }
+//
+// \\ Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
+// public static function createAssocArgs($args) {
+// $assocArgs = [];
+// $index = 1;
+// foreach ($args as $arg) {
+// $eqpos = strpos($arg, '=');
+// if ($eqpos == false) {
+// $assocArgs[$index++] = $arg;
+// } else {
+// $name = trim(substr($arg, 0, $eqpos));
+// $value = trim(substr($arg, $eqpos + 1));
+// if ($value == false) {
+// $value = '';
+// }
+// if ($name != false) {
+// $assocArgs[$name] = $value;
+// }
+// }
+// }
+//
+// return $assocArgs;
+// }
+
+// \\ Return the text of a template, after recursively
+// \\ replacing any variables or templates within the template.
+// \\
+// \\ @param array $piece The parts of the template
+// \\ $piece['title']: the title, i.e. the part before the |
+// \\ $piece['parts']: the parameter array
+// \\ $piece['lineStart']: whether the brace was at the start of a line
+// \\ @param PPFrame $frame The current frame, contains template arguments
+// \\ @throws Exception
+// \\ @return String The text of the template
+// public void Brace_substitution(Xomw_prepro_node__template piece, Xomw_frame_itm frame) {
+// // Flags
+//
+// // $text has been filled
+// boolean found = false;
+// // wiki markup in $text should be escaped
+// boolean nowiki = false;
+// // $text is HTML, armour it against wikitext transformation
+// boolean is_html = false;
+// // Force interwiki transclusion to be done in raw mode not rendered
+// boolean force_raw_interwiki = false;
+// // $text is a DOM node needing expansion in a child frame
+// boolean is_child_obj = false;
+// // $text is a DOM node needing expansion in the current frame
+// boolean is_local_obj = false;
+//
+// // Title Object, where $text came from
+// byte[] title = null;
+//
+// // $part1 is the bit before the first |, and must contain only title characters.
+// // Various prefixes will be stripped from it later.
+// byte[] title_with_spaces = frame.Expand(piece.Title());
+// byte[] part1 = Bry_.Trim(title_with_spaces);
+// byte[] title_text = null;
+//
+// // Original title text preserved for various purposes
+// byte[] originalTitle = part1;
+//
+// // $args is a list of argument nodes, starting from index 0, not including $part1
+// // @todo FIXME: If piece['parts'] is null then the call to getLength()
+// // below won't work b/c this $args isn't an Object
+// Xomw_prepro_node__part[] args = (null == piece.Parts()) ? null : piece.Parts();
+//
+// byte[] profile_section = null; // profile templates
+//
+// Tfds.Write(nowiki, is_html, force_raw_interwiki, is_child_obj, is_local_obj, title, title_text, profile_section);
+// // SUBST
+// if (!found) {
+// String subst_match = null; // $this->mSubstWords->matchStartAndRemove($part1);
+// boolean literal = false;
+//
+// // Possibilities for substMatch: "subst", "safesubst" or FALSE
+// // Decide whether to expand template or keep wikitext as-is.
+// if (parser.Output_type__wiki()) {
+// if (subst_match == null) {
+// literal = true; // literal when in PST with no prefix
+// }
+// else {
+// literal = false; // expand when in PST with subst: or safesubst:
+// }
+// }
+// else {
+// if (subst_match == "subst") {
+// literal = true; // literal when not in PST with plain subst:
+// }
+// else {
+// literal = false; // expand when not in PST with safesubst: or no prefix
+// }
+// }
+// if (literal) {
+//// $text = $frame->virtualBracketedImplode('{{', '|', '}}', title_with_spaces, $args);
+// is_local_obj = true;
+// found = true;
+// }
+// }
+//
+// // Variables
+// if (!found && args.length == 0) {
+//// $id = $this->mVariables->matchStartToEnd($part1);
+//// if ($id != false) {
+//// $text = $this->getVariableValue($id, $frame);
+//// if (MagicWord::getCacheTTL($id) > -1) {
+//// $this->mOutput->updateCacheExpiry(MagicWord::getCacheTTL($id));
+//// }
+// found = true;
+//// }
+// }
+//
+// // MSG, MSGNW and RAW
+// if (!found) {
+// // Check for MSGNW:
+//// $mwMsgnw = MagicWord::get('msgnw');
+//// if ($mwMsgnw->matchStartAndRemove($part1)) {
+// nowiki = true;
+//// }
+//// else {
+// // Remove obsolete MSG:
+//// $mwMsg = MagicWord::get('msg');
+//// $mwMsg->matchStartAndRemove($part1);
+//// }
+//
+// // Check for RAW:
+//// $mwRaw = MagicWord::get('raw');
+//// if ($mwRaw->matchStartAndRemove($part1)) {
+//// force_raw_interwiki = true;
+//// }
+// }
+
+ // Parser functions
+// if (!found) {
+// $colonPos = strpos($part1, ':');
+// if ($colonPos != false) {
+// $func = substr($part1, 0, $colonPos);
+// $funcArgs = [ trim(substr($part1, $colonPos + 1)) ];
+// $argsLength = $args->getLength();
+// for ($i = 0; $i < $argsLength; $i++) {
+// $funcArgs[] = $args->item($i);
+// }
+// try {
+// $result = $this->callParserFunction($frame, $func, $funcArgs);
+// } catch (Exception $ex) {
+// throw $ex;
+// }
+
+ // The interface for parser functions allows for extracting
+ // flags into the local scope. Extract any forwarded flags
+ // here.
+// extract($result);
+// }
+// }
+
+ // Finish mangling title and then check for loops.
+ // Set title to a Title Object and $title_text to the PDBK
+// if (!found) {
+// $ns = NS_TEMPLATE;
+ // Split the title into page and subpage
+// $subpage = '';
+// $relative = $this->maybeDoSubpageLink($part1, $subpage);
+// if ($part1 != $relative) {
+// $part1 = $relative;
+// $ns = $this->mTitle->getNamespace();
+// }
+// title = Title::newFromText($part1, $ns);
+// if (title) {
+// $title_text = title->getPrefixedText();
+// // Check for language variants if the template is not found
+// if ($this->getConverterLanguage()->hasVariants() && title->getArticleID() == 0) {
+// $this->getConverterLanguage()->findVariantLink($part1, title, true);
+// }
+// // Do recursion depth check
+// $limit = $this->mOptions->getMaxTemplateDepth();
+// if ($frame->depth >= $limit) {
+// found = true;
+// $text = '
'
+// . wfMessage('parser-template-recursion-depth-warning')
+// ->numParams($limit)->inContentLanguage()->text()
+// . '';
+// }
+// }
+// }
+
+ // Load from database
+// if (!found && title) {
+// $profile_section = $this->mProfiler->scopedProfileIn(title->getPrefixedDBkey());
+// if (!title->isExternal()) {
+// if (title->isSpecialPage()
+// && $this->mOptions->getAllowSpecialInclusion()
+// && $this->ot['html']
+// ) {
+// $specialPage = SpecialPageFactory::getPage(title->getDBkey());
+// // Pass the template arguments as URL parameters.
+// // "uselang" will have no effect since the Language Object
+// // is forced to the one defined in ParserOptions.
+// $pageArgs = [];
+// $argsLength = $args->getLength();
+// for ($i = 0; $i < $argsLength; $i++) {
+// $bits = $args->item($i)->splitArg();
+// if (strval($bits['index']) == '') {
+// $name = trim($frame->expand($bits['name'], PPFrame::STRIP_COMMENTS));
+// $value = trim($frame->expand($bits['value']));
+// $pageArgs[$name] = $value;
+// }
+// }
+//
+// // Create a new context to execute the special page
+// $context = new RequestContext;
+// $context->setTitle(title);
+// $context->setRequest(new FauxRequest($pageArgs));
+// if ($specialPage && $specialPage->maxIncludeCacheTime() == 0) {
+// $context->setUser($this->getUser());
+// } else {
+// // If this page is cached, then we better not be per user.
+// $context->setUser(User::newFromName('127.0.0.1', false));
+// }
+// $context->setLanguage($this->mOptions->getUserLangObj());
+// $ret = SpecialPageFactory::capturePath(
+// title, $context, $this->getLinkRenderer());
+// if ($ret) {
+// $text = $context->getOutput()->getHTML();
+// $this->mOutput->addOutputPageMetadata($context->getOutput());
+// found = true;
+// is_html = true;
+// if ($specialPage && $specialPage->maxIncludeCacheTime() != false) {
+// $this->mOutput->updateRuntimeAdaptiveExpiry(
+// $specialPage->maxIncludeCacheTime()
+// );
+// }
+// }
+// } elseif (MWNamespace::isNonincludable(title->getNamespace())) {
+// found = false; // access denied
+// wfDebug(__METHOD__ . ": template inclusion denied for " .
+// title->getPrefixedDBkey() . "\n");
+// } else {
+// list($text, title) = $this->getTemplateDom(title);
+// if ($text != false) {
+// found = true;
+// is_child_obj = true;
+// }
+// }
+//
+// // If the title is valid but undisplayable, make a link to it
+// if (!found && ($this->ot['html'] || $this->ot['pre'])) {
+// $text = "[[:$title_text]]";
+// found = true;
+// }
+// } elseif (title->isTrans()) {
+// // Interwiki transclusion
+// if ($this->ot['html'] && !force_raw_interwiki) {
+// $text = $this->interwikiTransclude(title, 'render');
+// is_html = true;
+// } else {
+// $text = $this->interwikiTransclude(title, 'raw');
+// // Preprocess it like a template
+// $text = $this->preprocessToDom($text, self::PTD_FOR_INCLUSION);
+// is_child_obj = true;
+// }
+// found = true;
+// }
+//
+// // Do infinite loop check
+// // This has to be done after redirect resolution to avoid infinite loops via redirects
+// if (!$frame->loopCheck(title)) {
+// found = true;
+// $text = '
'
+// . wfMessage('parser-template-loop-warning', $title_text)->inContentLanguage()->text()
+// . '';
+// wfDebug(__METHOD__ . ": template loop broken at '$title_text'\n");
+// }
+// }
+
+ // If we haven't found text to substitute by now, we're done
+ // Recover the source wikitext and return it
+// if (!found) {
+// $text = $frame->virtualBracketedImplode('{{', '|', '}}', title_with_spaces, $args);
+// if ($profile_section) {
+// $this->mProfiler->scopedProfileOut($profile_section);
+// }
+// return [ 'Object' => $text ];
+// }
+
+ // Expand DOM-style return values in a child frame
+// if (is_child_obj) {
+// // Clean up argument array
+// $newFrame = $frame->newChild($args, title);
+//
+// if (nowiki) {
+// $text = $newFrame->expand($text, PPFrame::RECOVER_ORIG);
+// } elseif ($title_text != false && $newFrame->isEmpty()) {
+// // Expansion is eligible for the empty-frame cache
+// $text = $newFrame->cachedExpand($title_text, $text);
+// } else {
+// // Uncached expansion
+// $text = $newFrame->expand($text);
+// }
+// }
+// if (is_local_obj && nowiki) {
+// $text = $frame->expand($text, PPFrame::RECOVER_ORIG);
+// is_local_obj = false;
+// }
+
+// if ($profile_section) {
+// $this->mProfiler->scopedProfileOut($profile_section);
+// }
+
+ // Replace raw HTML by a placeholder
+// if (is_html) {
+// $text = $this->insertStripItem($text);
+// } elseif (nowiki && ($this->ot['html'] || $this->ot['pre'])) {
+// // Escape nowiki-style return values
+// $text = wfEscapeWikiText($text);
+// } elseif (is_string($text)
+// && !$piece['lineStart']
+// && preg_match('/^(?:{\\||:|;|#|\*)/', $text)
+// ) {
+// // T2529: if the template begins with a table or block-level
+// // element, it should be treated as beginning a new line.
+// // This behavior is somewhat controversial.
+// $text = "\n" . $text;
+// }
+
+// if (is_string($text) && !$this->incrementIncludeSize('post-expand', strlen($text))) {
+// // Error, oversize inclusion
+// if ($title_text != false) {
+// // Make a working, properly escaped link if possible (T25588)
+// $text = "[[:$title_text]]";
+// } else {
+// // This will probably not be a working link, but at least it may
+// // provide some hint of where the problem is
+// preg_replace('/^:/', '', $originalTitle);
+// $text = "[[:$originalTitle]]";
+// }
+// $text .= $this->insertStripItem('');
+// $this->limitationWarn('post-expand-template-inclusion');
+// }
+//
+// if (is_local_obj) {
+// $ret = [ 'Object' => $text ];
+// } else {
+// $ret = [ 'text' => $text ];
+// }
+
+// return $ret;
+// }
+
+// \\ Triple brace replacement -- used for template arguments
+// public function argSubstitution($piece, $frame) {
+//
+// $error = false;
+// $parts = $piece['parts'];
+// $nameWithSpaces = $frame->expand($piece['title']);
+// $argName = trim($nameWithSpaces);
+// $Object = false;
+// $text = $frame->getArgument($argName);
+// if ($text == false && $parts->getLength() > 0
+// && ($this->ot['html']
+// || $this->ot['pre']
+// || ($this->ot['wiki'] && $frame->isTemplate())
+// )
+// ) {
+// // No match in frame, use the supplied default
+// $Object = $parts->item(0)->getChildren();
+// }
+// if (!$this->incrementIncludeSize('arg', strlen($text))) {
+// $error = '';
+// $this->limitationWarn('post-expand-template-argument');
+// }
+//
+// if ($text == false && $Object == false) {
+// // No match anywhere
+// $Object = $frame->virtualBracketedImplode('{{{', '|', '}}}', $nameWithSpaces, $parts);
+// }
+// if ($error != false) {
+// $text .= $error;
+// }
+// if ($Object != false) {
+// $ret = [ 'Object' => $Object ];
+// } else {
+// $ret = [ 'text' => $text ];
+// }
+//
+// return $ret;
+// }
+//
+// /**
+// \\ Return the text to be used for a given extension tag.
+// \\ This is the ghost of strip().
+// \\
+// \\ @param array $params Associative array of parameters:
+// \\ name PPNode for the tag name
+// \\ attr PPNode for unparsed text where tag attributes are thought to be
+// \\ attributes Optional associative array of parsed attributes
+// \\ inner Contents of extension element
+// \\ noClose Original text did not have a close tag
+// \\ @param PPFrame $frame
+// \\
+// \\ @throws MWException
+// \\ @return String
+// \\/
+// public function extensionSubstitution($params, $frame) {
+// static $errorStr = '
';
+// static $errorLen = 20;
+//
+// $name = $frame->expand($params['name']);
+// if (substr($name, 0, $errorLen) == $errorStr) {
+// // Probably expansion depth or node count exceeded. Just punt the
+// // error up.
+// return $name;
+// }
+//
+// $attrText = !isset($params['attr']) ? null : $frame->expand($params['attr']);
+// if (substr($attrText, 0, $errorLen) == $errorStr) {
+// // See above
+// return $attrText;
+// }
+//
+// // We can't safely check if the expansion for $content resulted in an
+// // error, because the content could happen to be the error String
+// // (T149622).
+// $content = !isset($params['inner']) ? null : $frame->expand($params['inner']);
+//
+// $marker = self::MARKER_PREFIX . "-$name-"
+// . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX;
+//
+// $isFunctionTag = isset($this->mFunctionTagHooks[strtolower($name)]) &&
+// ($this->ot['html'] || $this->ot['pre']);
+// if ($isFunctionTag) {
+// $markerType = 'none';
+// } else {
+// $markerType = 'general';
+// }
+// if ($this->ot['html'] || $isFunctionTag) {
+// $name = strtolower($name);
+// $attributes = Sanitizer::decodeTagAttributes($attrText);
+// if (isset($params['attributes'])) {
+// $attributes = $attributes + $params['attributes'];
+// }
+//
+// if (isset($this->mTagHooks[$name])) {
+// // Workaround for PHP bug 35229 and similar
+// if (!is_callable($this->mTagHooks[$name])) {
+// throw new MWException("Tag hook for $name is not callable\n");
+// }
+// $output = call_user_func_array($this->mTagHooks[$name],
+// [ $content, $attributes, $this, $frame ]);
+// } elseif (isset($this->mFunctionTagHooks[$name])) {
+// list($callback,) = $this->mFunctionTagHooks[$name];
+// if (!is_callable($callback)) {
+// throw new MWException("Tag hook for $name is not callable\n");
+// }
+//
+// $output = call_user_func_array($callback, [ &$this, $frame, $content, $attributes ]);
+// } else {
+// $output = 'Invalid tag extension name: ' .
+// htmlspecialchars($name) . '';
+// }
+//
+// if (is_array($output)) {
+// // Extract flags to local scope (to override $markerType)
+// $flags = $output;
+// $output = $flags[0];
+// unset($flags[0]);
+// extract($flags);
+// }
+// } else {
+// if (is_null($attrText)) {
+// $attrText = '';
+// }
+// if (isset($params['attributes'])) {
+// foreach ($params['attributes'] as $attrName => $attrValue) {
+// $attrText .= ' ' . htmlspecialchars($attrName) . '="' .
+// htmlspecialchars($attrValue) . '"';
+// }
+// }
+// if ($content == null) {
+// $output = "<$name$attrText/>";
+// } else {
+// $close = is_null($params['close']) ? '' : $frame->expand($params['close']);
+// if (substr($close, 0, $errorLen) == $errorStr) {
+// // See above
+// return $close;
+// }
+// $output = "<$name$attrText>$content$close";
+// }
+// }
+//
+// if ($markerType == 'none') {
+// return $output;
+// } elseif ($markerType == 'nowiki') {
+// $this->mStripState->addNoWiki($marker, $output);
+// } elseif ($markerType == 'general') {
+// $this->mStripState->addGeneral($marker, $output);
+// } else {
+// throw new MWException(__METHOD__ . ': invalid marker type');
+// }
+// return $marker;
+// }
+// }
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_node.java b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_node.java
new file mode 100644
index 000000000..fca0fa81f
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_node.java
@@ -0,0 +1,98 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+public interface Xomw_prepro_node {
+ int Subs__len();
+ Xomw_prepro_node Subs__get_at(int i);
+ void Subs__add(Xomw_prepro_node sub);
+ void To_xml(Bry_bfr bfr);
+}
+class Xomw_prepro_node__text extends Xomw_prepro_node__base {
+ public Xomw_prepro_node__text(byte[] bry) {
+ this.bry = bry;
+ }
+ public byte[] Bry() {return bry;} protected final byte[] bry;
+ @Override public void To_xml(Bry_bfr bfr) {
+ bfr.Add(bry);
+ }
+}
+class Xomw_prepro_node__comment extends Xomw_prepro_node__base {
+ public Xomw_prepro_node__comment(byte[] bry) {
+ this.bry = bry;
+ }
+ public byte[] Bry() {return bry;} protected final byte[] bry;
+ @Override public void To_xml(Bry_bfr bfr) {
+ bfr.Add_str_a7("");
+ bfr.Add(bry);
+ bfr.Add_str_a7("");
+ }
+}
+class Xomw_prepro_node__ext extends Xomw_prepro_node__base {
+ public Xomw_prepro_node__ext(byte[] name, byte[] attr, byte[] inner, byte[] close) {
+ this.name = name;
+ this.attr = attr;
+ this.inner = inner;
+ this.close = close;
+ }
+ public byte[] Name() {return name;} private final byte[] name;
+ public byte[] Attr() {return attr;} private final byte[] attr;
+ public byte[] Inner() {return inner;} private final byte[] inner;
+ public byte[] Close() {return close;} private final byte[] close;
+ @Override public void To_xml(Bry_bfr bfr) {
+ bfr.Add_str_a7("");
+ bfr.Add_str_a7("").Add(name).Add_str_a7("");
+ bfr.Add_str_a7("").Add(attr).Add_str_a7("");
+ bfr.Add_str_a7("").Add(inner).Add_str_a7("");
+ bfr.Add_str_a7("").Add(close).Add_str_a7("");
+ bfr.Add_str_a7("");
+ }
+}
+class Xomw_prepro_node__heading extends Xomw_prepro_node__base {
+ public Xomw_prepro_node__heading(int heading_index, int title_index, byte[] text) {
+ this.heading_index = heading_index;
+ this.title_index = title_index;
+ this.text = text;
+ }
+ public int Heading_index() {return heading_index;} private final int heading_index;
+ public int Title_index() {return title_index;} private final int title_index;
+ public byte[] Text() {return text;} private final byte[] text;
+ @Override public void To_xml(Bry_bfr bfr) {
+ bfr.Add_str_a7("");
+ bfr.Add(text);
+ bfr.Add_str_a7("");
+ }
+}
+class Xomw_prepro_node__tplarg extends Xomw_prepro_node__base {
+ public Xomw_prepro_node__tplarg(byte[] title, Xomw_prepro_node__part[] parts) {
+ this.title = title; this.parts = parts;
+ }
+ public byte[] Title() {return title;} private final byte[] title;
+ public Xomw_prepro_node__part[] Parts() {return parts;} private final Xomw_prepro_node__part[] parts;
+ @Override public void To_xml(Bry_bfr bfr) {
+ bfr.Add_str_a7("");
+ bfr.Add_str_a7("").Add(title);
+ bfr.Add_str_a7("");
+ for (Xomw_prepro_node__part part : parts)
+ part.To_xml(bfr);
+
+ bfr.Add_str_a7("");
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_node__base.java b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_node__base.java
new file mode 100644
index 000000000..ba4c2af4c
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_node__base.java
@@ -0,0 +1,28 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+public abstract class Xomw_prepro_node__base implements Xomw_prepro_node {
+ private List_adp subs;
+ public int Subs__len() {return subs == null ? 0 : subs.Len();}
+ public Xomw_prepro_node Subs__get_at(int i) {return subs == null ? null : (Xomw_prepro_node)subs.Get_at(i);}
+ public void Subs__add(Xomw_prepro_node sub) {
+ if (subs == null) subs = List_adp_.New();
+ subs.Add(sub);
+ }
+ public abstract void To_xml(Bry_bfr bfr);
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_node__part.java b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_node__part.java
new file mode 100644
index 000000000..19ea6344d
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_node__part.java
@@ -0,0 +1,45 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+public class Xomw_prepro_node__part extends Xomw_prepro_node__base {
+ public Xomw_prepro_node__part(int idx, byte[] key, byte[] val) {
+ this.idx = idx;
+ this.key = key;
+ this.val = val;
+ }
+ public int Idx() {return idx;} private final int idx;
+ public byte[] Key() {return key;} private final byte[] key;
+ public byte[] Val() {return val;} private final byte[] val;
+ @Override public void To_xml(Bry_bfr bfr) {
+ bfr.Add_str_a7("");
+ bfr.Add_str_a7(" 0) {
+ bfr.Add_str_a7(" index=\"").Add_int_variable(idx).Add_str_a7("\" />");
+ }
+ else {
+ bfr.Add_str_a7(">");
+ bfr.Add(key);
+ bfr.Add_str_a7("");
+ bfr.Add_str_a7("=");
+ }
+ bfr.Add_str_a7("");
+ bfr.Add(val);
+ bfr.Add_str_a7("");
+ bfr.Add_str_a7("");
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_node__template.java b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_node__template.java
new file mode 100644
index 000000000..9d2c7d622
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_node__template.java
@@ -0,0 +1,36 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
+public class Xomw_prepro_node__template extends Xomw_prepro_node__base {
+ public Xomw_prepro_node__template(byte[] title, Xomw_prepro_node__part[] parts, int line_start) {
+ this.title = title; this.parts = parts; this.line_start = line_start;
+ }
+ public byte[] Title() {return title;} private final byte[] title;
+ public Xomw_prepro_node__part[] Parts() {return parts;} private final Xomw_prepro_node__part[] parts;
+ public int Line_start() {return line_start;} private final int line_start;
+ @Override public void To_xml(Bry_bfr bfr) {
+ bfr.Add_str_a7(" 0) bfr.Add_str_a7(" lineStart=\"").Add_int_variable(line_start).Add_byte_quote();
+ bfr.Add_byte(Byte_ascii.Angle_end);
+ bfr.Add_str_a7("").Add(title);
+ bfr.Add_str_a7("");
+ for (Xomw_prepro_node__part part : parts)
+ part.To_xml(bfr);
+ bfr.Add_str_a7("");
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_rule.java b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_rule.java
similarity index 92%
rename from 400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_rule.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_rule.java
index a24f39e23..ccdff14d5 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_rule.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_rule.java
@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
class Xomw_prepro_rule {
public Xomw_prepro_rule(byte[] bgn, byte[] end, int min, int max, int[] names) {
this.bgn = bgn;
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_stack.java
similarity index 94%
rename from 400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_stack.java
index f67ebeb7a..238867e0b 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_stack.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_stack.java
@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
class Xomw_prepro_stack {
public List_adp stack = List_adp_.New();
public Xomw_prepro_piece top;
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_wkr.java
similarity index 97%
rename from 400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_wkr.java
index 1bb59841a..d619af805 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_wkr.java
@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.core.btries.*;
import gplx.langs.phps.utls.*;
public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_wkr__tst.java
similarity index 94%
rename from 400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_wkr__tst.java
index 8b343d030..ba27ecbe9 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/prepros/Xomw_prepro_wkr__tst.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/prepros/Xomw_prepro_wkr__tst.java
@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.parsers.mws.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*;
public class Xomw_prepro_wkr__tst {
private final Xomw_prepro_wkr__fxt fxt = new Xomw_prepro_wkr__fxt();
@@ -55,6 +55,9 @@ public class Xomw_prepro_wkr__tst {
@Test public void Tplarg() {
fxt.Test__parse("a{{{b}}}c", "abc");
}
+ @Test public void Tplarg__dflt() {
+ fxt.Test__parse("a{{{b|c}}}d", "abcd");
+ }
@Test public void Comment() {
fxt.Test__parse("ac", "a<!--b-->c");
}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/quotes/Xomw_quote_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/quotes/Xomw_quote_wkr.java
similarity index 78%
rename from 400_xowa/src/gplx/xowa/parsers/mws/quotes/Xomw_quote_wkr.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/quotes/Xomw_quote_wkr.java
index a14427fd3..16b62f5d4 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/quotes/Xomw_quote_wkr.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/quotes/Xomw_quote_wkr.java
@@ -15,26 +15,53 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.parsers.mws.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+package gplx.xowa.mws.parsers.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.langs.phps.utls.*;
import gplx.xowa.parsers.htmls.*;
import gplx.core.primitives.*;
-public class Xomw_quote_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
- private final Bry_bfr bfr = Bry_bfr_.New();
- private final Bry_bfr tmp = Bry_bfr_.New();
+public class Xomw_quote_wkr {// THREAD.UNSAFE: caching for repeated calls
+ private Bry_bfr tmp;
private final Int_list apos_pos_ary = new Int_list(32);
- public byte[] Do_all_quotes(byte[] src) {
- Bry_split_.Split(src, 0, src.length, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode( "\n", $text );
- bfr.Del_by_1(); // REF.MW: $outtext = substr( $outtext, 0, -1 );
+ public Xomw_quote_wkr(Xomw_parser mgr) {
+ this.tmp = mgr.Tmp();
+ }
+ public void Do_all_quotes(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
+ Bry_bfr src_bfr = pbfr.Src();
+ byte[] src = src_bfr.Bfr();
+ int src_bgn = 0;
+ int src_end = src_bfr.Len();
+ Bry_bfr bfr = pbfr.Trg();
+ pbfr.Switch();
+
+ int cur = src_bgn;
+ int line_bgn = cur;
+ while (true) {
+ int line_end = Bry_find_.Find_fwd(src, Byte_ascii.Nl, line_bgn, src_end);
+ if (line_end == Bry_find_.Not_found) {
+ line_end = src_end;
+ }
+ Do_quotes(bfr, Bool_.Y, src, line_bgn, line_end);
+ if (line_end == src_end)
+ break;
+ else
+ line_bgn = line_end + 1; // 1=\n.length
+ }
+ // Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode( "\n", $text );
+ if (bfr.Match_end_byt(Byte_ascii.Nl))
+ bfr.Del_by_1(); // REF.MW: $outtext = substr( $outtext, 0, -1 );
apos_pos_ary.Clear();
- return bfr.To_bry_and_clear();
}
- private static final byte[] Wtxt__apos = Bry_.new_a7("''");
- public int Split(byte[] src, int itm_bgn, int itm_end) {
- byte[][] arr = Php_preg_.Split(apos_pos_ary, src, itm_bgn, itm_end, Wtxt__apos, Bool_.Y); // PORTED.REGX: arr = preg_split("/(''+)/", text, -1, PREG_SPLIT_DELIM_CAPTURE);
+ public byte[] Do_quotes(Bry_bfr tmp, byte[] src) {
+ boolean found = Do_quotes(tmp, Bool_.N, src, 0, src.length);
+ return found ? tmp.To_bry_and_clear() : src;
+ }
+ private boolean Do_quotes(Bry_bfr bfr, boolean all_quotes_mode, byte[] src, int line_bgn, int line_end) {
+ byte[][] arr = Php_preg_.Split(apos_pos_ary, src, line_bgn, line_end, Wtxt__apos, Bool_.Y); // PORTED.REGX: arr = preg_split("/(''+)/", text, -1, PREG_SPLIT_DELIM_CAPTURE);
if (arr == null) {
- bfr.Add_mid(src, itm_bgn, itm_end).Add_byte_nl();
- return Bry_split_.Rv__ok;
+ if (all_quotes_mode) {
+ bfr.Add_mid(src, line_bgn, line_end).Add_byte_nl();
+ }
+ return false;
}
int arr_len = arr.length;
@@ -226,7 +253,7 @@ public class Xomw_quote_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
bfr.Add_str_a7("").Add_bfr_and_clear(tmp).Add_str_a7("");
}
bfr.Add_byte_nl();
- return Bry_split_.Rv__ok;
+ return true;
}
private static final int
State__empty = 0
@@ -236,4 +263,5 @@ public class Xomw_quote_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
, State__ib = 4
, State__both = 5
;
+ private static final byte[] Wtxt__apos = Bry_.new_a7("''");
}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/quotes/Xomw_quote_wkr__tst.java b/400_xowa/src/gplx/xowa/mws/parsers/quotes/Xomw_quote_wkr__tst.java
similarity index 84%
rename from 400_xowa/src/gplx/xowa/parsers/mws/quotes/Xomw_quote_wkr__tst.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/quotes/Xomw_quote_wkr__tst.java
index e96847945..2b675799c 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/quotes/Xomw_quote_wkr__tst.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/quotes/Xomw_quote_wkr__tst.java
@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.parsers.mws.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+package gplx.xowa.mws.parsers.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*;
public class Xomw_quote_wkr__tst {
private final Xomw_quote_wkr__fxt fxt = new Xomw_quote_wkr__fxt();
@@ -32,12 +32,14 @@ public class Xomw_quote_wkr__tst {
@Test public void Dangling__b() {fxt.Test__parse("a'''b" , "ab");} // COVERS: "if (state == State__b || state == State__ib)"
@Test public void Dangling__i() {fxt.Test__parse("a''b" , "ab");} // COVERS: "if (state == State__i || state == State__bi || state == State__ib)"
@Test public void Dangling__lone(){fxt.Test__parse("a'''''b" , "ab");} // COVERS: "There might be lonely ''''', so make sure we have a buffer"
+ @Test public void Nl__text() {fxt.Test__parse("a\nb''c''d\n\ne" , "a\nbcd\n\ne");}
}
class Xomw_quote_wkr__fxt {
- private final Xomw_quote_wkr wkr = new Xomw_quote_wkr();
+ private final Xomw_quote_wkr wkr = new Xomw_quote_wkr(new Xomw_parser());
+ private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
- byte[] actl = wkr.Do_all_quotes(src_bry);
- Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str);
+ wkr.Do_all_quotes(new Xomw_parser_ctx(), pbfr.Init(src_bry));
+ Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/tables/Xomw_table_wkr.java
similarity index 70%
rename from 400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/tables/Xomw_table_wkr.java
index a4553c98d..0c93e4c9d 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/tables/Xomw_table_wkr.java
@@ -15,12 +15,14 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+package gplx.xowa.mws.parsers.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.langs.phps.utls.*;
import gplx.xowa.parsers.htmls.*;
-import gplx.xowa.parsers.mws.utils.*; import gplx.xowa.parsers.uniqs.*;
+import gplx.xowa.mws.utls.*; import gplx.xowa.parsers.uniqs.*;
public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
- private final Bry_bfr bfr = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New();
+ private final Bry_bfr tmp;
+ private Bry_bfr bfr;
+ private final Xomw_sanitizer sanitizer; private final Xomw_strip_state strip_state;
private final List_adp
td_history = List_adp_.New() // Is currently a td tag open?
, last_tag_history = List_adp_.New() // Save history of last lag activated (td, th or caption)
@@ -30,14 +32,22 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
;
private int indent_level = 0; // indent level of the table
private byte[] first_2 = new byte[2];
- private Xomw_sanitizer_mgr sanitizer;
- private Xop_uniq_mgr uniq_mgr;
- public byte[] Do_table_stuff(Xomw_parser_ctx ctx, byte[] src) {
- this.sanitizer = ctx.Sanitizer();
- this.uniq_mgr = ctx.Uniq_mgr();
+ public Xomw_table_wkr(Xomw_parser parser) {
+ this.tmp = parser.Tmp();
+ this.sanitizer = parser.Sanitizer();
+ this.strip_state = parser.Strip_state();
+ }
+ public void Do_table_stuff(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
+ Bry_bfr src_bfr = pbfr.Src();
+ byte[] src = src_bfr.Bfr();
+ int src_bgn = 0;
+ int src_end = src_bfr.Len();
+ this.bfr = pbfr.Trg();
+ pbfr.Switch();
+
indent_level = 0;
- Bry_split_.Split(src, 0, src.length, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode("\n", $text);
+ Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode("\n", $text);
// Closing open td, tr && table
while (td_history.Len() > 0) {
@@ -62,9 +72,8 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
if ( bfr.Len() == Len__tb__empty
&& Bry_.Eq(bfr.Bfr(), 0, Len__tb__empty, Html__tb__empty)) {
bfr.Clear();
- return Bry_.Empty;
+ return;
}
- return bfr.To_bry_and_clear();
}
public int Split(byte[] src, int itm_bgn, int itm_end) {
byte[] out_line = Bry_.Mid(src, itm_bgn, itm_end); // MW: "$outLine"
@@ -78,7 +87,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
byte first_char = line[0];
first_2[0] = line[0];
- if (line_len > 1) first_2[1] = line[1];
+ first_2[1] = line_len == 1 ? Byte_ascii.Null : line[1];
// PORTED: preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)
byte[] tblw_atrs = null;
@@ -94,15 +103,15 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
// First check if we are starting a new table
indent_level = colons_end;
- tblw_atrs = uniq_mgr.Convert(tblw_atrs);
+ tblw_atrs = strip_state.Unstrip_both(tblw_atrs);
// PORTED: out_line = str_repeat('- ', $indent_level) . "
";
for (int j = 0; j < indent_level; j++)
- tmp_bfr.Add(Html__dl__bgn);
- tmp_bfr.Add_str_a7("").Add_mid(line, 2, line.length).To_bry_and_clear();
+ line = tmp.Add_str_a7("
").Add_mid(line, 2, line.length).To_bry_and_clear();
byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
if (!Php_ary_.Pop_bool_or_n(has_opened_tr)) {
- line = tmp_bfr.Add_str_a7(" |
").Add(line).To_bry_and_clear();
+ line = tmp.Add_str_a7(" |
").Add(line).To_bry_and_clear();
}
if (Php_ary_.Pop_bool_or_n(tr_history)) {
- line = tmp_bfr.Add_str_a7("").Add(line).To_bry_and_clear();
+ line = tmp.Add_str_a7("").Add(line).To_bry_and_clear();
}
if (Php_ary_.Pop_bool_or_n(td_history)) {
- line = tmp_bfr.Add_str_a7("").Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(line).To_bry_and_clear();
+ line = tmp.Add_str_a7("").Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(line).To_bry_and_clear();
}
Php_ary_.Pop_bry_or_null(tr_attributes);
// PORTED:$outLine = $line . str_repeat( '', $indent_level );
- tmp_bfr.Add(line);
+ tmp.Add(line);
for (int j = 0; j < indent_level; j++)
- tmp_bfr.Add(Html__dl__end);
- out_line = tmp_bfr.To_bry_and_clear();
+ tmp.Add(Html__dl__end);
+ out_line = tmp.To_bry_and_clear();
}
else if (Bry_.Eq(first_2, Wtxt__tr)) {
// Now we have a table row
line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line);
// Whats after the tag is now only attributes
- byte[] atrs = uniq_mgr.Unstrip_both(line);
- sanitizer.Fix_tag_attributes(tmp_bfr, Name__tr, atrs);
- atrs = tmp_bfr.To_bry_and_clear();
+ byte[] atrs = strip_state.Unstrip_both(line);
+ sanitizer.Fix_tag_attributes(tmp, Name__tr, atrs);
+ atrs = tmp.To_bry_and_clear();
Php_ary_.Pop_bry_or_null(tr_attributes);
tr_attributes.Add(atrs);
@@ -159,7 +168,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
}
if (Php_ary_.Pop_bool_or_n(td_history)) {
- line = tmp_bfr.Add_str_a7("").Add(last_tag).Add_byte(Byte_ascii.Gt).Add(line).To_bry_and_clear();
+ line = tmp.Add_str_a7("").Add(last_tag).Add_byte(Byte_ascii.Gt).Add(line).To_bry_and_clear();
}
out_line = line;
@@ -181,13 +190,14 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
// Implies both are valid for table headings.
if (first_char == Byte_ascii.Bang) {
- Xomw_string_utils.Replace_markup(line, 0, line.length, Wtxt__th2, Wtxt__td2); // $line = StringUtils::replaceMarkup('!!', '||', $line);
+ Xomw_string_utl.Replace_markup(line, 0, line.length, Wtxt__th2, Wtxt__td2); // $line = StringUtils::replaceMarkup('!!', '||', $line);
}
// Split up multiple cells on the same line.
// FIXME : This can result in improper nesting of tags processed
// by earlier parser steps.
byte[][] cells = Bry_split_.Split(line, Wtxt__td2);
+ if (cells.length == 0) cells = Cells__empty; // handle "\n|\n" which should still generate " |
", not ""; see TEST
out_line = Bry_.Empty;
@@ -200,7 +210,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
if (first_char != Byte_ascii.Plus) {
byte[] tr_after = Php_ary_.Pop_bry_or_null(tr_attributes);
if (!Php_ary_.Pop_bool_or_n(tr_history)) {
- previous = tmp_bfr.Add_str_a7("\n").To_bry_and_clear();
+ previous = tmp.Add_str_a7("
\n").To_bry_and_clear();
}
tr_history.Add(true);
tr_attributes.Add(Bry_.Empty);
@@ -211,7 +221,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
if (Php_ary_.Pop_bool_or_n(td_history)) {
- previous = tmp_bfr.Add_str_a7("").Add(last_tag).Add_str_a7(">\n").Add(previous).To_bry_and_clear();
+ previous = tmp.Add_str_a7("").Add(last_tag).Add_str_a7(">\n").Add(previous).To_bry_and_clear();
}
if (first_char == Byte_ascii.Pipe) {
@@ -237,17 +247,17 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
byte[] cell_data_0 = cell_data[0];
byte[] cell_data_1 = cell_data[1];
if (Bry_find_.Find_fwd(cell_data_0, Wtxt__lnki__bgn) != Bry_find_.Not_found) {
- cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell).To_bry_and_clear();
+ cell = tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell).To_bry_and_clear();
}
else if (cell_data_1 == null) {
- cell = tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell_data_0).To_bry_and_clear();
+ cell = tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell_data_0).To_bry_and_clear();
}
else {
- byte[] atrs = uniq_mgr.Unstrip_both(cell_data_0);
- tmp_bfr.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag);
- sanitizer.Fix_tag_attributes(tmp_bfr, last_tag, atrs);
- tmp_bfr.Add_byte(Byte_ascii.Angle_end).Add(cell_data_1);
- cell = tmp_bfr.To_bry_and_clear();
+ byte[] atrs = strip_state.Unstrip_both(cell_data_0);
+ tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag);
+ sanitizer.Fix_tag_attributes(tmp, last_tag, atrs);
+ tmp.Add_byte(Byte_ascii.Angle_end).Add(cell_data_1);
+ cell = tmp.To_bry_and_clear();
}
out_line = Bry_.Add(out_line, cell);
@@ -278,4 +288,5 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
, Html__tb__empty = Bry_.new_a7("")
;
private static final int Len__tb__empty = Html__tb__empty.length;
+ private static final byte[][] Cells__empty = new byte[][] {Bry_.Empty};
}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr__tst.java b/400_xowa/src/gplx/xowa/mws/parsers/tables/Xomw_table_wkr__tst.java
similarity index 74%
rename from 400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr__tst.java
rename to 400_xowa/src/gplx/xowa/mws/parsers/tables/Xomw_table_wkr__tst.java
index b73d7072f..e05ba26c0 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/tables/Xomw_table_wkr__tst.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/tables/Xomw_table_wkr__tst.java
@@ -15,7 +15,7 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.parsers.mws.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
+package gplx.xowa.mws.parsers.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*;
public class Xomw_table_wkr__tst {
private final Xomw_table_wkr__fxt fxt = new Xomw_table_wkr__fxt();
@@ -101,13 +101,29 @@ public class Xomw_table_wkr__tst {
, "
|
"
));
}
+ @Test public void Td__empty() { // PURPOSE: handles (a) failure due to "first_2" array not handling "\n|\n"; (b) missing |
+ fxt.Test__parse(String_.Concat_lines_nl_skip_last
+ ( "{|"
+ , "|-"
+ , "|"
+ , "|}"
+ ), String_.Concat_lines_nl_skip_last
+ ( ""
+ , ""
+ , ""
+ , ""
+ , " |
"
+ ));
+ }
}
class Xomw_table_wkr__fxt {
- private final Xomw_parser_ctx ctx = new Xomw_parser_ctx();
- private final Xomw_table_wkr wkr = new Xomw_table_wkr();
+ private final Xomw_parser_bfr parser_bfr = new Xomw_parser_bfr();
+ private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
+ private final Xomw_table_wkr wkr = new Xomw_table_wkr(new Xomw_parser());
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
- byte[] actl = wkr.Do_table_stuff(ctx, src_bry);
- Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str);
+ parser_bfr.Init(src_bry);
+ wkr.Do_table_stuff(pctx, parser_bfr);
+ Tfds.Eq_str_lines(expd, parser_bfr.Rslt().To_str_and_clear(), src_str);
}
}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_string_utils.java b/400_xowa/src/gplx/xowa/mws/utls/Xomw_string_utl.java
similarity index 91%
rename from 400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_string_utils.java
rename to 400_xowa/src/gplx/xowa/mws/utls/Xomw_string_utl.java
index b969ee57b..d557b0ee7 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_string_utils.java
+++ b/400_xowa/src/gplx/xowa/mws/utls/Xomw_string_utl.java
@@ -15,8 +15,8 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
-public class Xomw_string_utils {
+package gplx.xowa.mws.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+public class Xomw_string_utl {
public static void Replace_markup(byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) { // REF:/includes/libs/StringUtils.php|replaceMarkup
// PORTED: avoiding multiple regex calls / String creations
// $placeholder = "\x00";
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_string_utils__tst.java b/400_xowa/src/gplx/xowa/mws/utls/Xomw_string_utl__tst.java
similarity index 77%
rename from 400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_string_utils__tst.java
rename to 400_xowa/src/gplx/xowa/mws/utls/Xomw_string_utl__tst.java
index 7b5b9c3be..6912502ef 100644
--- a/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_string_utils__tst.java
+++ b/400_xowa/src/gplx/xowa/mws/utls/Xomw_string_utl__tst.java
@@ -15,10 +15,10 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
-package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
-import org.junit.*;
-public class Xomw_string_utils__tst {
- private final Xomw_string_utils__fxt fxt = new Xomw_string_utils__fxt();
+package gplx.xowa.mws.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import org.junit.*; import gplx.core.tests.*;
+public class Xomw_string_utl__tst {
+ private final Xomw_string_utl__fxt fxt = new Xomw_string_utl__fxt();
@Test public void Basic() {
fxt.Test__replace_markup("a!!b" , "!!", "||", "a||b");
}
@@ -38,10 +38,10 @@ public class Xomw_string_utils__tst {
fxt.Test__replace_markup("a!!b!!>!!c" , "!!", "||", "a||b||>||c"); // NOTE: should probably be "!!>!!>", but unmatched ">" are escaped to ">"
}
}
-class Xomw_string_utils__fxt {
+class Xomw_string_utl__fxt {
public void Test__replace_markup(String src_str, String find, String repl, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
- Xomw_string_utils.Replace_markup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl));
- Tfds.Eq_str(expd, src_bry);
+ Xomw_string_utl.Replace_markup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl));
+ Gftest.Eq__str(expd, src_bry);
}
}
diff --git a/400_xowa/src/gplx/xowa/mws/utls/Xomw_ttl_utl.java b/400_xowa/src/gplx/xowa/mws/utls/Xomw_ttl_utl.java
new file mode 100644
index 000000000..b707c532d
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/utls/Xomw_ttl_utl.java
@@ -0,0 +1,120 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.mws.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+public class Xomw_ttl_utl {
+ // REF.MW: DefaultSettings.php
+ // Allowed title characters -- regex character class
+ // Don't change this unless you know what you're doing
+ //
+ // Problematic punctuation:
+ // - []{}|# Are needed for link syntax, never enable these
+ // - <> Causes problems with HTML escaping, don't use
+ // - % Enabled by default, minor problems with path to query rewrite rules, see below
+ // - + Enabled by default, but doesn't work with path to query rewrite rules,
+ // corrupted by apache
+ // - ? Enabled by default, but doesn't work with path to PATH_INFO rewrites
+ //
+ // All three of these punctuation problems can be avoided by using an alias,
+ // instead of a rewrite rule of either variety.
+ //
+ // The problem with % is that when using a path to query rewrite rule, URLs are
+ // double-unescaped: once by Apache's path conversion code, and again by PHP. So
+ // %253F, for example, becomes "?". Our code does not double-escape to compensate
+ // for this, indeed double escaping would break if the double-escaped title was
+ // passed in the query String rather than the path. This is a minor security issue
+ // because articles can be created such that they are hard to view or edit.
+ //
+ // In some rare cases you may wish to remove + for compatibility with old links.
+ //
+ // Theoretically 0x80-0x9F of ISO 8859-1 should be disallowed, but
+ // this breaks interlanguage links
+ // $wgLegalTitleChars = " %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+";
+ //
+ // REGEX:
+ // without-backslash escaping --> \s%!"$&'()*,-./0-9:;=?@A-Z\^_`a-z~x80-xFF+
+ // rearranged
+ // letters --> 0-9A-Za-z
+ // unicode-chars --> x80-xFF
+ // symbols --> \s%!"$&'()*,-./:;=?@\^_`~+"
+ // deliberately ignores
+ // control chars: 00-31,127
+ // []{}|#<>
+ public static int Find_fwd_while_title(byte[] src, int src_bgn, int src_end, boolean[] valid) {
+ int cur = src_bgn;
+ while (true) {
+ if (cur == src_end) break;
+ byte b = src[cur];
+ int b_len = gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
+ if (b_len == 1) { // ASCII
+ if (valid[b]) // valid; EX: "a0A B&$"
+ cur++;
+ else // invalid; EX: ""
+ break;
+ }
+ else { // Multi-byte UTF8; NOTE: all sequences are valid
+ cur += b_len;
+ }
+ }
+ return cur;
+ }
+ private static boolean[] title_chars_valid;
+ public static boolean[] Title_chars_valid() {
+ if (title_chars_valid == null) {
+ title_chars_valid = new boolean[128];
+ // add num and alpha
+ for (int i = Byte_ascii.Num_0; i <= Byte_ascii.Num_9; i++)
+ title_chars_valid[i] = true;
+ for (int i = Byte_ascii.Ltr_A; i <= Byte_ascii.Ltr_Z; i++)
+ title_chars_valid[i] = true;
+ for (int i = Byte_ascii.Ltr_a; i <= Byte_ascii.Ltr_z; i++)
+ title_chars_valid[i] = true;
+
+ // add symbols: \s%!"$&'()*,-./:;=?@\^_`~+"
+ byte[] symbols = new byte[]
+ { Byte_ascii.Space
+ , Byte_ascii.Percent
+ , Byte_ascii.Bang
+ , Byte_ascii.Quote
+ , Byte_ascii.Amp
+ , Byte_ascii.Apos
+ , Byte_ascii.Paren_bgn
+ , Byte_ascii.Paren_end
+ , Byte_ascii.Star
+ , Byte_ascii.Comma
+ , Byte_ascii.Dash
+ , Byte_ascii.Dot
+ , Byte_ascii.Slash
+ , Byte_ascii.Colon
+ , Byte_ascii.Semic
+ , Byte_ascii.Eq
+ , Byte_ascii.Question
+ , Byte_ascii.At
+ , Byte_ascii.Backslash
+ , Byte_ascii.Pow
+ , Byte_ascii.Underline
+ , Byte_ascii.Tick
+ , Byte_ascii.Tilde
+ , Byte_ascii.Plus
+ };
+ int symbols_len = symbols.length;
+ for (int i = 0; i < symbols_len; i++)
+ title_chars_valid[symbols[i]] = true;
+ }
+ return title_chars_valid;
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/mws/utls/Xomw_ttl_utl__tst.java b/400_xowa/src/gplx/xowa/mws/utls/Xomw_ttl_utl__tst.java
new file mode 100644
index 000000000..905083328
--- /dev/null
+++ b/400_xowa/src/gplx/xowa/mws/utls/Xomw_ttl_utl__tst.java
@@ -0,0 +1,30 @@
+/*
+XOWA: the XOWA Offline Wiki Application
+Copyright (C) 2012 gnosygnu@gmail.com
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as
+published by the Free Software Foundation, either version 3 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with this program. If not, see .
+*/
+package gplx.xowa.mws.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
+import org.junit.*; import gplx.core.tests.*;
+public class Xomw_ttl_utl__tst {
+ private final Xomw_ttl_utl__fxt fxt = new Xomw_ttl_utl__fxt();
+ @Test public void Alphanum() {fxt.Test__find_fwd_while_title("0aB" , 3);}
+ @Test public void Angle() {fxt.Test__find_fwd_while_title("0a<" , 2);}
+}
+class Xomw_ttl_utl__fxt {
+ public void Test__find_fwd_while_title(String src_str, int expd) {
+ byte[] src_bry = Bry_.new_u8(src_str);
+ Gftest.Eq__int(expd, Xomw_ttl_utl.Find_fwd_while_title(src_bry, 0, src_bry.length, Xomw_ttl_utl.Title_chars_valid()));
+ }
+}
diff --git a/400_xowa/src/gplx/xowa/parsers/hdrs/sections/Xop_section_list.java b/400_xowa/src/gplx/xowa/parsers/hdrs/sections/Xop_section_list.java
index 38cb134e5..c290b3295 100644
--- a/400_xowa/src/gplx/xowa/parsers/hdrs/sections/Xop_section_list.java
+++ b/400_xowa/src/gplx/xowa/parsers/hdrs/sections/Xop_section_list.java
@@ -16,10 +16,10 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
package gplx.xowa.parsers.hdrs.sections; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.hdrs.*;
-import gplx.xowa.parsers.mws.*; import gplx.xowa.parsers.mws.wkrs.*;
+import gplx.xowa.mws.parsers.*; import gplx.xowa.mws.parsers.headings.*;
import gplx.xowa.addons.htmls.tocs.*; import gplx.xowa.htmls.core.htmls.tidy.*;
-class Xop_section_list implements Xomw_hdr_cbk {
- private final Xomw_hdr_wkr hdr_wkr = new Xomw_hdr_wkr();
+class Xop_section_list implements Xomw_heading_cbk {
+ private final Xomw_heading_wkr hdr_wkr = new Xomw_heading_wkr();
private final Ordered_hash hash = Ordered_hash_.New_bry();
private final Xoh_toc_mgr toc_mgr = new Xoh_toc_mgr();
private byte[] src;
@@ -92,7 +92,7 @@ class Xop_section_list implements Xomw_hdr_cbk {
return new int[] {src_bgn, src_end};
}
- public void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_hdr_wkr wkr) {
+ public void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {
// get key by taking everything between ==; EX: "== abc ==" -> " abc "
byte[] src = wkr.Src();
int hdr_txt_bgn = wkr.Hdr_lhs_end();
@@ -117,5 +117,5 @@ class Xop_section_list implements Xomw_hdr_cbk {
Xop_section_itm itm = new Xop_section_itm(hash.Count(), num, key, wkr.Hdr_bgn(), wkr.Hdr_end());
hash.Add(key, itm);
}
- public void On_src_done(Xomw_parser_ctx pctx, Xomw_hdr_wkr wkr) {}
+ public void On_src_done(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {}
}
diff --git a/400_xowa/src/gplx/xowa/parsers/hdrs/sections/Xop_section_mgr.java b/400_xowa/src/gplx/xowa/parsers/hdrs/sections/Xop_section_mgr.java
index 90a71aa4c..5c0086a01 100644
--- a/400_xowa/src/gplx/xowa/parsers/hdrs/sections/Xop_section_mgr.java
+++ b/400_xowa/src/gplx/xowa/parsers/hdrs/sections/Xop_section_mgr.java
@@ -17,7 +17,7 @@ along with this program. If not, see .
*/
package gplx.xowa.parsers.hdrs.sections; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.hdrs.*;
import gplx.langs.htmls.*;
-import gplx.xowa.parsers.mws.*; import gplx.xowa.parsers.mws.wkrs.*; import gplx.xowa.parsers.hdrs.*; import gplx.xowa.htmls.core.htmls.tidy.*;
+import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*; import gplx.xowa.parsers.hdrs.*; import gplx.xowa.htmls.core.htmls.tidy.*;
public class Xop_section_mgr implements Gfo_invk {
private Xoae_app app; private Xowe_wiki wiki;
private Xow_tidy_mgr_interface tidy_mgr;
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/blocks/Xomw_block_wkr.java b/400_xowa/src/gplx/xowa/parsers/mws/blocks/Xomw_block_wkr.java
deleted file mode 100644
index 14727a688..000000000
--- a/400_xowa/src/gplx/xowa/parsers/mws/blocks/Xomw_block_wkr.java
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
-XOWA: the XOWA Offline Wiki Application
-Copyright (C) 2012 gnosygnu@gmail.com
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as
-published by the Free Software Foundation, either version 3 of the
-License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see .
-*/
-package gplx.xowa.parsers.mws.blocks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
-import gplx.langs.phps.utls.*;
-public class Xomw_block_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
- private final Bry_bfr bfr = Bry_bfr_.New();
- private byte[] last_prefix, last_section;
- private boolean line_start, dt_open, in_block_elem, para_stack, in_blockquote, in_pre = false;
- private int prefix_len;
- private int src_len;
- public byte[] Do_block_levels(byte[] src, boolean line_start) {
- this.src_len = src.length;
- this.line_start = line_start;
- // Parsing through the text line by line. The main thing
- // happening here is handling of block-level elements p, pre,
- // and making lists from lines starting with * # : etc.
- this.last_prefix = Bry_.Empty;
- bfr.Clear();
- this.dt_open = this.in_block_elem = false;
- this.prefix_len = 0;
- this.para_stack = false;
- this.in_blockquote = false;
-
- // PORTED.SPLIT: $textLines = StringUtils::explode("\n", $text);
- Bry_split_.Split(src, 0, src_len, Byte_ascii.Nl, Bool_.N, this);
-
- while (prefix_len > 0) {
- // bfr .= this.closeList(prefix2[prefix_len - 1]);
- prefix_len--;
- if (prefix_len > 0) {
- bfr.Add_byte_nl();
- }
- }
- if (Bry_.Len_gt_0(last_section)) {
- bfr.Add_str_a7("").Add(last_section).Add_str_a7(">");
- this.last_section = Bry_.Empty;
- }
-
- if (dt_open || in_block_elem || para_stack || in_blockquote || in_pre) {
- }
- return bfr.To_bry_and_clear();
- }
- public int Split(byte[] src, int itm_bgn, int itm_end) {
- // Fix up line_start
- if (!line_start) {
- bfr.Add_mid(src, itm_bgn, itm_end);
- line_start = true;
- return Bry_split_.Rv__ok;
- }
-
- // * = ul
- // # = ol
- // ; = dt
- // : = dd
- int last_prefix_len = last_prefix.length;
- boolean pre_close_match = false; //preg_match('/<\\/pre/i', $oLine);
- boolean pre_open_match = false; //preg_match('/ element, scan for and figure out what prefixes are there.
- if (!in_pre) {
- // Multiple prefixes may abut each other for nested lists.
- prefix_len = 0;// strspn($oLine, '*#:;');
- prefix = Php_str_.Substr(src, itm_bgn, prefix_len);
-
- // eh?
- // ; and : are both from definition-lists, so they're equivalent
- // for the purposes of determining whether or not we need to open/close
- // elements.
- prefix2 = Bry_.Replace(prefix, Byte_ascii.Semic, Byte_ascii.Colon);
- t = Bry_.Mid(src, itm_bgn + prefix_len, itm_end);
-// this.in_pre = (boolean)pre_open_match;
- }
- else {
- // Don't interpret any other prefixes in preformatted text
- prefix_len = 0;
- prefix = prefix2 = Bry_.Empty;
- t = Bry_.Mid(src, itm_bgn, itm_end);
- }
-
- // List generation
- byte[] term = null, t2 = null;
- int common_prefix_len = -1;
- if (prefix_len > 0 && Bry_.Eq(last_prefix, prefix2)) {
- // Same as the last item, so no need to deal with nesting or opening stuff
-// bfr .= this.nextItem(substr(prefix, -1));
- para_stack = false;
-
- if (prefix_len > 0 && prefix[prefix_len - 1] == Byte_ascii.Semic) {
- // The one nasty exception: definition lists work like this:
- // ; title : definition text
- // So we check for : in the remainder text to split up the
- // title and definition, without b0rking links.
- term = t2 = Bry_.Empty;
-// if (this.findColonNoLinks(t, term, t2) !== false) {
- t = t2;
- bfr.Add(term); // . this.nextItem(':');
-// }
- }
- }
- else if (prefix_len > 0 || last_prefix_len > 0) {
- // We need to open or close prefixes, or both.
-
- // Either open or close a level...
-// common_prefix_len = this.getCommon(prefix, last_prefix);
- para_stack = false;
-
- // Close all the prefixes which aren't shared.
- while (common_prefix_len < last_prefix_len) {
-// bfr .= this.closeList(last_prefix[last_prefix_len - 1]);
- last_prefix_len--;
- }
-//
- // Continue the current prefix if appropriate.
- if (prefix_len <= common_prefix_len && common_prefix_len > 0) {
-// bfr .= this.nextItem(prefix[common_prefix_len - 1]);
- }
-
- // Open prefixes where appropriate.
- if (Bry_.Len_gt_0(last_prefix) && prefix_len > common_prefix_len) {
- bfr.Add_byte_nl();
- }
- while (prefix_len > common_prefix_len) {
-// $char = substr(prefix, common_prefix_len, 1);
-// bfr .= this.openList($char);
-//
-// if (';' == $char) {
-// // @todo FIXME: This is dupe of code above
-// if (this.findColonNoLinks(t, term, t2) !== false) {
-// t = t2;
-// bfr .= term . this.nextItem(':');
-// }
-// }
- ++common_prefix_len;
- }
- if (prefix_len == 0 && Bry_.Len_gt_0(last_prefix)) {
- bfr.Add_byte_nl();
- }
- last_prefix = prefix2;
- }
-
- // If we have no prefixes, go to paragraph mode.
- if (0 == prefix_len) {
- // No prefix (not in list)--go to paragraph mode
- // XXX: use a stack for nestable elements like span, table and div
- boolean open_match = false, close_match = false;
-// open_match = preg_match(
-// '/(?:
]/i', t,
-// $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset)
-// ) {
-// in_blockquote = !$bqMatch[1][0]; // is this a close tag?
-// $bqOffset = $bqMatch[0][1] + strlen($bqMatch[0][0]);
-// }
- in_block_elem = !close_match;
- }
- else if (!in_block_elem && !this.in_pre) {
- if ( Byte_ascii.Space == t[0]
-// && (last_section == 'pre' || trim(t) != '')
- && !in_blockquote
- ) {
- // pre
-// if (this.last_section !== 'pre') {
- para_stack = false;
-// bfr .= this.closeParagraph() . '';
-// this.last_section = 'pre';
-// }
- t = Bry_.Mid(t, 1);
- }
- else {
- // paragraph
-// if (trim(t) == '') {
- if (para_stack) {
-// bfr .= para_stack . '
';
- para_stack = false;
-// this.last_section = 'p';
- }
- else {
-// if (this.last_section !== 'p') {
-// bfr .= this.closeParagraph();
-// this.last_section = '';
-// para_stack = '';
-// }
-// else {
-// para_stack = '
';
-// }
- }
-// }
-// else {
- if (para_stack) {
-// bfr .= para_stack;
- para_stack = false;
-// this.last_section = 'p';
- }
-// else if (this.last_section !== 'p') {
-// bfr .= this.closeParagraph() . '
';
-// this.last_section = 'p';
-// }
-// }
- }
- }
- }
- // somewhere above we forget to get out of pre block (bug 785)
- if (pre_close_match && this.in_pre) {
- this.in_pre = false;
- }
- if (para_stack == false) {
- bfr.Add(t);
- if (prefix_len == 0) {
- bfr.Add_byte_nl();
- }
- }
-
- if (last_prefix_len == -1 || common_prefix_len == -1) {
- }
- return Bry_split_.Rv__ok;
- }
-// private static final int
-// Para_stack_none = 0 // false
-// , Para_stack_bgn = 1 //
-// , Para_stack_mid = 2 //
-// ;
-// private static final byte
-// Mode_none = 0 // ''
-// , Mode_para = 1 // p
-// , Mode_pre = 2 // pre
-// ;
-}
diff --git a/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_sanitizer_mgr.java b/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_sanitizer_mgr.java
deleted file mode 100644
index 1842bae56..000000000
--- a/400_xowa/src/gplx/xowa/parsers/mws/utils/Xomw_sanitizer_mgr.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
-XOWA: the XOWA Offline Wiki Application
-Copyright (C) 2012 gnosygnu@gmail.com
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as
-published by the Free Software Foundation, either version 3 of the
-License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with this program. If not, see .
-*/
-package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
-import gplx.xowa.parsers.htmls.*;
-public class Xomw_sanitizer_mgr {
- private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr();
- private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
- public void Fix_tag_attributes(Bry_bfr bfr, byte[] tag_name, byte[] atrs) {
- atr_bldr.Atrs__clear();
- atr_parser.Parse(atr_bldr, -1, -1, atrs, 0, atrs.length);
- int len = atr_bldr.Atrs__len();
-
- // PORTED: Sanitizer.php|safeEncodeTagAttributes
- for (int i = 0; i < len; i++) {
- // $encAttribute = htmlspecialchars( $attribute );
- // $encValue = Sanitizer::safeEncodeAttribute( $value );
- // $attribs[] = "$encAttribute=\"$encValue\"";
- Mwh_atr_itm itm = atr_bldr.Atrs__get_at(i);
- bfr.Add_byte_space(); // "return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';"
- bfr.Add_bry_escape_html(itm.Key_bry(), itm.Key_bgn(), itm.Key_end());
- bfr.Add_byte_eq().Add_byte_quote();
- bfr.Add(itm.Val_as_bry()); // TODO.XO:Sanitizer::encode
- bfr.Add_byte_quote();
- }
- }
-}
diff --git a/400_xowa/src/gplx/xowa/parsers/uniqs/Xop_uniq_mgr.java b/400_xowa/src/gplx/xowa/parsers/uniqs/Xop_uniq_mgr.java
index 4b2553d7b..32464a421 100644
--- a/400_xowa/src/gplx/xowa/parsers/uniqs/Xop_uniq_mgr.java
+++ b/400_xowa/src/gplx/xowa/parsers/uniqs/Xop_uniq_mgr.java
@@ -22,6 +22,7 @@ public class Xop_uniq_mgr { // REF.MW:/parser/StripState.php
private final Bry_bfr key_bfr = Bry_bfr_.New_w_size(32);
private int idx = -1;
public void Clear() {idx = -1; general_trie.Clear();}
+ public byte[] Get(byte[] key) {return (byte[])general_trie.Match_exact(key, 0, key.length);}
public byte[] Add(byte[] val) { // "" -> "\u007fUNIQ-item-1--QINU\u007f"
byte[] key = key_bfr
.Add(Bry__uniq__add__bgn)
@@ -30,10 +31,6 @@ public class Xop_uniq_mgr { // REF.MW:/parser/StripState.php
general_trie.Add_bry_bry(key, val);
return key;
}
- public byte[] Get(byte[] key) {return (byte[])general_trie.Match_exact(key, 0, key.length);}
- public byte[] Unstrip_both(byte[] src) {
- return Convert(src);
- }
public byte[] Convert(byte[] src) {
if (general_trie.Count() == 0) return src;