1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Embeddable: Create core dbs in proper subdirectory

This commit is contained in:
gnosygnu
2017-10-23 20:50:50 -04:00
parent 1336d44f34
commit 66877212bf
4537 changed files with 311750 additions and 0 deletions

View File

@@ -13,3 +13,55 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.core.lists; import gplx.*; import gplx.core.*;
import gplx.core.primitives.*;
public class HashByInt {
private final Ordered_hash hash = Ordered_hash_.New();
private final Int_obj_ref tmp_key = Int_obj_ref.New_neg1();
public void Clear() {
hash.Clear();
}
public int Len() {
return hash.Len();
}
public Object Get_at_or_null(int idx) {
HashByIntItem item = (HashByIntItem)hash.Get_at(idx);
return item.val;
}
public Object Get_by_or_fail(int key) {
synchronized (tmp_key) {
HashByIntItem item = (HashByIntItem)hash.Get_by_or_fail(tmp_key.Val_(key));
return item.val;
}
}
public Object Get_by_or_null(int key) {
synchronized (tmp_key) {
HashByIntItem item = (HashByIntItem)hash.Get_by(tmp_key.Val_(key));
return item == null ? null : item.val;
}
}
public HashByInt Add_as_bry(int key, String val) {return Add(key, Bry_.new_u8(val));}
public HashByInt Add(int key, Object val) {
HashByIntItem item = new HashByIntItem(key, val);
hash.Add(Int_obj_ref.New(key), item);
return this;
}
public HashByInt Clone() {
HashByInt rv = new HashByInt();
int len = hash.Len();
for (int i = 0; i < len; i++) {
HashByIntItem item = (HashByIntItem)hash.Get_at(i);
rv.Add(item.key, item.val);
}
return rv;
}
}
class HashByIntItem {
public final int key;
public final Object val;
public HashByIntItem(int key, Object val) {
this.key = key;
this.val = val;
}
}

View File

@@ -13,3 +13,40 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
import gplx.xowa.mediawiki.includes.*;
import gplx.xowa.mediawiki.includes.interwiki.*;
import gplx.xowa.mediawiki.includes.filerepo.file.*; import gplx.xowa.mediawiki.includes.media.*;
import gplx.xowa.mediawiki.includes.site.*;
import gplx.xowa.mediawiki.languages.*;
import gplx.xowa.langs.*;
public class XomwEnv {
public byte[] Lang__align_end = Bry_.new_a7("right");
public int User__default__thumbsize = 220;
public int Global__wgSVGMaxSize = 5120;
public double Global__wgThumbUpright = .75d;
public int[] Global__wgThumbLimits = new int[] {120, 150, 180, 200, 250, 300};
public XomwEnv(Xol_lang_itm xoLang) {
this.language = new XomwLanguage(xoLang);
XomwSiteLookup siteLookup = new XomwXowaSiteLookup();
XomwInterwikiLookup interwikiLookup = new XomwInterwikiLookupAdapter(siteLookup);
byte[][] localInterwikis = new byte[0][]; // TODO.XO: pass in to XomwEnv or retrieve from datastore
this.mediaWikiServices = new XomwMediaWikiServices(this, interwikiLookup, language, localInterwikis);
}
public XomwLanguage Language() {return language;} private final XomwLanguage language;
public XomwMagicWordMgr Magic_word_mgr() {return magic_word_mgr;} private final XomwMagicWordMgr magic_word_mgr = new XomwMagicWordMgr();
public XomwMessageMgr Message_mgr() {return message_mgr;} private final XomwMessageMgr message_mgr = new XomwMessageMgr();
public XomwFileFinder File_finder() {return file_finder;} private XomwFileFinder file_finder = new XomwFileFinderNoop();
public XomwMediaHandlerFactory MediaHandlerFactory() {return mediaHandlerFactory;} private final XomwMediaHandlerFactory mediaHandlerFactory = new XomwMediaHandlerFactory();
public XomwMediaWikiServices MediaWikiServices() {return mediaWikiServices;} private final XomwMediaWikiServices mediaWikiServices;
public XomwEnv File_finder_(XomwFileFinder v) {file_finder = v; return this;}
public static XomwEnv NewTest() {return NewTestByApp(Xoa_app_fxt.Make__app__edit());}
public static XomwEnv NewTestByApp(Xoae_app app) {
return new XomwEnv(Xol_lang_itm_.Lang_en_make(app.Lang_mgr()));
}
}

View File

@@ -13,3 +13,33 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
public class XophpArray {
public static boolean popBoolOrN(List_adp list) {return Bool_.Cast(List_adp_.Pop_or(list, false));}
public static byte[] popBryOrNull(List_adp list) {return (byte[])List_adp_.Pop_or(list, null);}
public static String[] array_keys_str(Ordered_hash array) {
int len = array.Len();
String[] rv = new String[len];
for (int i = 0; i < len; i++) {
rv[i] = (String)array.Get_at(i);
}
return rv;
}
public static byte[][] array_keys_bry(Ordered_hash array) {
int len = array.Len();
byte[][] rv = new byte[len][];
for (int i = 0; i < len; i++) {
rv[i] = (byte[])array.Get_at(i);
}
return rv;
}
public static boolean array_key_exists(int key, Ordered_hash array) {return array.Has(key);}
public static boolean array_key_exists(String key, Ordered_hash array) {return array.Has(key);}
public static boolean array_key_exists(byte[] key, Ordered_hash array) {return array.Has(key);}
public static boolean array_is_empty(Ordered_hash array) {
return array.Len() == 0;
}
public static void unset(Ordered_hash array, Object key) {
array.Del(key);
}
}

View File

@@ -13,3 +13,9 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
public class XophpEncode {
public static byte[] rawurlencode(byte[] v) {
return gplx.langs.htmls.encoders.Gfo_url_encoder_.Php_rawurlencode.Encode(v);
}
}

View File

@@ -13,3 +13,6 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
public class XophpInvalidArgumentException extends Err { public XophpInvalidArgumentException(String text) {super(false, "", text, text);}
}

View File

@@ -13,3 +13,15 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
public class XophpMath {
public static double round(double v, int places) {
if (places < 0) { // -1 means round to 10; -2 means round to 100; etc..
int factor = (int)Math_.Pow(10, places * -1);
return ((int)(Math_.Round(v, 0) / factor)) * factor; // EX: ((int)Round(123, 0) / 10) * 10: 123 -> 12.3 -> 12 -> 120
}
else {
return Math_.Round(v, places);
}
}
}

View File

@@ -13,3 +13,100 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
import gplx.core.btries.*; import gplx.core.brys.*;
import gplx.core.primitives.*;
public class XophpPreg {
public static byte[][] split(Int_list list, byte[] src, int src_bgn, int src_end, byte[] dlm, boolean extend) {
// find delimiters
int dlm_len = dlm.length;
byte dlm_nth = dlm[dlm_len - 1];
int i = src_bgn;
list.Add(src_bgn);
while (true) {
if (i == src_end) break;
int dlm_end = i + dlm_len;
if (dlm_end <= src_end && Bry_.Eq(src, i, dlm_end, dlm)) {
if (extend) {
dlm_end = Bry_find_.Find_fwd_while(src, i, src_end, dlm_nth);
}
list.Add(i);
list.Add(dlm_end);
i = dlm_end;
}
else
i++;
}
list.Add(src_end);
// create brys
int rv_len = list.Len() - 1;
if (rv_len == 1) {
list.Clear();
return null;
}
if (list.Get_at(list.Len() - 2) == src_end) { // if 2nd to last elem == src_end, then last item is Bry_.Empty; ignore it; EX: "a''" -> "a", "''" x> "a", "''", ""
rv_len--;
}
byte[][] rv = new byte[rv_len][];
for (i = 0; i < rv_len; i += 2) {
rv[i ] = Bry_.Mid(src, list.Get_at(i + 0), list.Get_at(i + 1));
if (i + 1 == rv_len) break;
rv[i + 1] = Bry_.Mid(src, list.Get_at(i + 1), list.Get_at(i + 2));
}
list.Clear();
return rv;
}
public static Object match(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
trv.Match_bgn = -1;
int cur = src_bgn;
while (cur < src_end) {
byte b = src[cur];
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null)
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
else {
trv.Match_bgn = cur;
return o;
}
}
return null;
}
public static void replace(Bry_tmp bry, Bry_bfr tmp, Btrie_slim_mgr find_trie, Btrie_rv trv, byte[] repl_bry) {
byte[] src = bry.src;
int src_bgn = bry.src_bgn;
int src_end = bry.src_end;
int cur = src_bgn;
int prv = cur;
boolean dirty = false;
while (true) {
// eos
if (cur == src_end) {
if (dirty) {
tmp.Add_mid(src, prv, src_end);
}
break;
}
byte b = src[cur];
Object o = find_trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null) {
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
}
else {
dirty = true;
tmp.Add_mid(src, prv, cur);
tmp.Add(repl_bry);
cur = trv.Pos();
prv = cur;
}
}
if (dirty) {
bry.Set_by_bfr(tmp);
}
}
}

View File

@@ -13,3 +13,19 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
import org.junit.*; import gplx.core.tests.*;
public class XophpPregTest {
private final XophpPregFxt fxt = new XophpPregFxt();
@Test public void Basic() {fxt.Test_split("a''b''c" , "''", Bool_.Y, "a", "''", "b", "''", "c");}
@Test public void Extend() {fxt.Test_split("a'''b'''c" , "''", Bool_.Y, "a", "'''", "b", "'''", "c");}
@Test public void Eos() {fxt.Test_split("a''" , "''", Bool_.Y, "a", "''");}
}
class XophpPregFxt {
private final gplx.core.primitives.Int_list rv = new gplx.core.primitives.Int_list();
public void Test_split(String src, String dlm, boolean extend, String... expd) {Test_split(src, 0, String_.Len(src), dlm, extend, expd);}
public void Test_split(String src, int src_bgn, int src_end, String dlm, boolean extend, String... expd) {
byte[][] actl = XophpPreg.split(rv, Bry_.new_u8(src), src_bgn, src_end, Bry_.new_u8(dlm), extend);
Gftest.Eq__ary(expd, String_.Ary(actl), "find_failed");
}
}

View File

@@ -13,3 +13,149 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
import gplx.core.btries.*;
public class XophpString {
public static int strpos(byte[] src, byte find) {return strpos(src, find, 0, src.length);}
public static int strpos(byte[] src, byte find, int bgn, int end) {
return Bry_find_.Find_fwd(src, find, bgn, end);
}
public static byte[] substr(byte[] src, int bgn) {return substr(src, bgn, src.length);}
public static byte[] substr(byte[] src, int bgn, int len) {
int src_len = src.length;
if (bgn < 0) bgn = src_len + bgn; // handle negative
if (bgn < 0) bgn = 0; // handle out of bounds; EX: ("a", -1, -1)
int end = len < 0 ? src_len + len : bgn + len;
if (end > src.length) end = src.length;; // handle out of bounds;
return Bry_.Mid(src, bgn, end);
}
public static byte substr_byte(byte[] src, int bgn) {return substr_byte(src, bgn, src.length);}
public static byte substr_byte(byte[] src, int bgn, int len) {
int src_len = src.length;
if (src_len == 0) return Byte_ascii.Null;
if (bgn < 0) bgn = src_len + bgn; // handle negative
if (bgn < 0) bgn = 0; // handle out of bounds; EX: ("a", -1, -1)
int end = len < 0 ? src_len + len : bgn + len;
if (end > src.length) end = src.length;; // handle out of bounds;
return src[bgn];
}
public static int strspn_fwd__ary(byte[] src, boolean[] find, int bgn, int max, int src_len) {
if (max == -1) max = src_len;
int rv = 0;
for (int i = bgn; i < src_len; i++) {
if (find[src[i] & 0xFF] && rv < max) // PATCH.JAVA:need to convert to unsigned byte
rv++;
else
break;
}
return rv;
}
public static int strspn_fwd__byte(byte[] src, byte find, int bgn, int max, int src_len) {
if (max == -1) max = src_len;
int rv = 0;
for (int i = bgn; i < src_len; i++) {
if (find == src[i] && rv < max)
rv++;
else
break;
}
return rv;
}
public static int strspn_fwd__space_or_tab(byte[] src, int bgn, int max, int src_len) {
if (max == -1) max = src_len;
int rv = 0;
for (int i = bgn; i < src_len; i++) {
switch (src[i]) {
case Byte_ascii.Space:
case Byte_ascii.Tab:
if (rv < max) {
rv++;
continue;
}
break;
}
break;
}
return rv;
}
public static int strspn_bwd__byte(byte[] src, byte find, int bgn, int max) {
if (max == -1) max = Int_.Max_value;
int rv = 0;
for (int i = bgn - 1; i > -1; i--) {
if (find == src[i] && rv < max)
rv++;
else
break;
}
return rv;
}
public static int strspn_bwd__ary(byte[] src, boolean[] find, int bgn, int max) {
if (max == -1) max = Int_.Max_value;
int rv = 0;
for (int i = bgn - 1; i > -1; i--) {
if (find[src[i & 0xFF]] && rv < max) // PATCH.JAVA:need to convert to unsigned byte
rv++;
else
break;
}
return rv;
}
public static int strspn_bwd__space_or_tab(byte[] src, int bgn, int max) {
if (max == -1) max = Int_.Max_value;
int rv = 0;
for (int i = bgn - 1; i > -1; i--) {
switch (src[i]) {
case Byte_ascii.Space:
case Byte_ascii.Tab:
if (rv < max) {
rv++;
continue;
}
break;
}
break;
}
return rv;
}
public static byte[] strtr(byte[] src, Btrie_slim_mgr trie, Bry_bfr tmp, Btrie_rv trv) {
boolean dirty = false;
int src_bgn = 0;
int src_end = src.length;
int i = src_bgn;
while (true) {
if (i == src_end) break;
byte b = src[i];
Object o = trie.Match_at_w_b0(trv, b, src, i, src_end);
if (o == null) {
if (dirty) {
tmp.Add_byte(b);
}
i++;
}
else {
if (!dirty) {
dirty = true;
tmp.Add_mid(src, 0, i);
}
tmp.Add((byte[])o);
i = trv.Pos();
}
}
return dirty ? tmp.To_bry_and_clear() : src;
}
public static byte[] strtr(byte[] src, byte find, byte repl) {
return Bry_.Replace(src, 0, src.length, find, repl);
}
public static byte[] str_replace(byte find, byte repl, byte[] src) {
return Bry_.Replace(src, 0, src.length, find, repl);
}
public static byte[] str_replace(byte[] find, byte[] repl, byte[] src) {
return Bry_.Replace(src, find, repl);
}
public static byte[] strstr(byte[] src, byte[] find) {
int pos = Bry_find_.Find_fwd(src, find);
return pos == Bry_find_.Not_found ? null : Bry_.Mid(src, pos, src.length);
}
public static int strlen(byte[] src) {return src.length;}
}

View File

@@ -13,3 +13,73 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*;
public class XophpStringTest {
private final XophpStringFxt fxt = new XophpStringFxt();
@Test public void Strspn_fwd__byte() {
fxt.Test_strspn_fwd__byte("aaaaab", Byte_ascii.Ltr_a, 0, -1, 5); // basic
fxt.Test_strspn_fwd__byte("aaaaab", Byte_ascii.Ltr_a, 1, -1, 4); // bgn
fxt.Test_strspn_fwd__byte("aaaaab", Byte_ascii.Ltr_a, 1, 2, 2); // max
}
@Test public void Strspn_fwd__space_or_tab() {
fxt.Test_strspn_fwd__space_or_tab(" a", 0, -1, 5); // basic
fxt.Test_strspn_fwd__space_or_tab(" a", 1, -1, 4); // bgn
fxt.Test_strspn_fwd__space_or_tab(" a", 1, 2, 2); // max
}
@Test public void Strspn_bwd__byte() {
fxt.Test_strspn_bwd__byte("aaaaab", Byte_ascii.Ltr_a, 5, -1, 5); // basic
fxt.Test_strspn_bwd__byte("aaaaab", Byte_ascii.Ltr_a, 4, -1, 4); // bgn
fxt.Test_strspn_bwd__byte("aaaaab", Byte_ascii.Ltr_a, 4, 2, 2); // max
}
@Test public void Strspn_bwd__space_or_tab() {
fxt.Test_strspn_bwd__space_or_tab(" a", 5, -1, 5); // basic
fxt.Test_strspn_bwd__space_or_tab(" a", 4, -1, 4); // bgn
fxt.Test_strspn_bwd__space_or_tab(" a", 4, 2, 2); // max
}
@Test public void Substr__bgn_is_neg() {
fxt.Test_substr("abcde" , -1, "e");
fxt.Test_substr("abcde" , -3, -1, "cd");
}
@Test public void Strtr() {
fxt.Init_strtr_by_trie("01", "89", "02", "79");
fxt.Test_strtr_by_trie("abc" , "abc"); // found=none
fxt.Test_strtr_by_trie("ab_01_cd" , "ab_89_cd"); // found=one
fxt.Test_strtr_by_trie("ab_01_cd_02_ef", "ab_89_cd_79_ef"); // found=many
fxt.Test_strtr_by_trie("01_ab" , "89_ab"); // BOS
fxt.Test_strtr_by_trie("ab_01" , "ab_89"); // EOS
}
}
class XophpStringFxt {
public void Test_strspn_fwd__byte(String src_str, byte find, int bgn, int max, int expd) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__int(expd, XophpString.strspn_fwd__byte(src_bry, find, bgn, max, src_bry.length));
}
public void Test_strspn_fwd__space_or_tab(String src_str, int bgn, int max, int expd) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__int(expd, XophpString.strspn_fwd__space_or_tab(src_bry, bgn, max, src_bry.length));
}
public void Test_strspn_bwd__byte(String src_str, byte find, int bgn, int max, int expd) {
Gftest.Eq__int(expd, XophpString.strspn_bwd__byte(Bry_.new_u8(src_str), find, bgn, max));
}
public void Test_strspn_bwd__space_or_tab(String src_str, int bgn, int max, int expd) {
Gftest.Eq__int(expd, XophpString.strspn_bwd__space_or_tab(Bry_.new_u8(src_str), bgn, max));
}
public void Test_substr(String src_str, int bgn, String expd) {Test_substr(src_str, bgn, String_.Len(src_str), expd);}
public void Test_substr(String src_str, int bgn, int len, String expd) {
Gftest.Eq__str(expd, XophpString.substr(Bry_.new_u8(src_str), bgn, len));
}
private Btrie_slim_mgr strtr_trie;
public void Init_strtr_by_trie(String... kvs) {
if (strtr_trie == null) strtr_trie = Btrie_slim_mgr.cs();
int len = kvs.length;
for (int i = 0; i < len; i += 2) {
strtr_trie.Add_str_str(kvs[i], kvs[i + 1]);
}
}
public void Test_strtr_by_trie(String src, String expd) {
Bry_bfr tmp = Bry_bfr_.New();
Btrie_rv trv = new Btrie_rv();
Gftest.Eq__str(expd, XophpString.strtr(Bry_.new_u8(src), strtr_trie, tmp, trv));
}
}

View File

@@ -13,3 +13,29 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
import gplx.core.net.*;
public class XophpUrl {
public static final int
PHP_URL_SCHEME = 0
, PHP_URL_HOST = 3
, PHP_URL_PATH = 5
;
public static String parse_url(String url, int type) {
Gfo_url_parser parser = new Gfo_url_parser();
Gfo_url url_itm = parser.Parse(Bry_.new_u8(url));
switch (type) {
case PHP_URL_SCHEME: return String_.new_u8(url_itm.Protocol_bry());
case PHP_URL_HOST: return String_.new_u8(url_itm.Segs__get_at_1st());
case PHP_URL_PATH:
Bry_bfr bfr = Bry_bfr_.New();
byte[][] segs = url_itm.Segs();
int len = segs.length;
for (int i = 1; i < len; i++) {
bfr.Add_byte_slash().Add(segs[i]);
}
return bfr.To_str_and_clear();
default: throw Err_.new_unhandled_default(type);
}
}
}

View File

@@ -13,3 +13,33 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki; import gplx.*; import gplx.xowa.*;
public class XophpUtility {
public static boolean empty(byte[] v) {return v == null || v.length == 0;}
public static boolean empty(boolean v) {return v == false;}
public static boolean empty(int v) {return v == 0;}
public static boolean isset(byte[] v) {return v != null;}
public static boolean isset(int v) {return v != NULL_INT;}
public static boolean isset(double v) {return v != NULL_DOUBLE;}
public static boolean istrue(int v) {return v != NULL_INT;}
public static boolean isnumeric(byte[] src) {
if (src == null) return false;
int len = src.length;
for (int i = 0; i < len; i++) {
byte b = src[i];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
break;
default:
return false;
}
}
return true;
}
public static boolean is_null(int v) {return v == NULL_INT;}
public static final int NULL_INT = Int_.Max_value;
public static final double NULL_DOUBLE = Double_.MinValue;
public static final byte[] NULL_BRY = null;
}

View File

@@ -13,3 +13,297 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
public class XomwDefines {
// /**
// * @defgroup Constants MediaWiki constants
// */
//
// /**@{
// * Database related constants
// */
// define( 'DBO_DEBUG', 1 );
// define( 'DBO_NOBUFFER', 2 );
// define( 'DBO_IGNORE', 4 );
// define( 'DBO_TRX', 8 ); // automatically start transaction on first query
// define( 'DBO_DEFAULT', 16 );
// define( 'DBO_PERSISTENT', 32 );
// define( 'DBO_SYSDBA', 64 ); // for oracle maintenance
// define( 'DBO_DDLMODE', 128 ); // when using schema files: mostly for Oracle
// define( 'DBO_SSL', 256 );
// define( 'DBO_COMPRESS', 512 );
// /**@}*/
//
// /**@{
// * Valid database indexes
// * Operation-based indexes
// */
// define( 'DB_SLAVE', -1 ); # Read from the slave (or only server)
// define( 'DB_MASTER', -2 ); # Write to master (or only server)
// /**@}*/
//
// # Obsolete aliases
// define( 'DB_READ', -1 );
// define( 'DB_WRITE', -2 );
/**@{
* Virtual namespaces; don't appear in the page database
*/
public static final int NS_MEDIA = -2;
public static final int NS_SPECIAL = -1;
/**@}*/
/**@{
* Real namespaces
*
* Number 100 and beyond are reserved for custom namespaces;
* DO NOT assign standard namespaces at 100 or beyond.
* DO NOT Change integer values as they are most probably hardcoded everywhere
* see bug #696 which talked about that.
*/
public static final int NS_MAIN = 0;
public static final int NS_TALK = 1;
public static final int NS_USER = 2;
public static final int NS_USER_TALK = 3;
public static final int NS_PROJECT = 4;
public static final int NS_PROJECT_TALK = 5;
public static final int NS_FILE = 6;
public static final int NS_FILE_TALK = 7;
public static final int NS_MEDIAWIKI = 8;
public static final int NS_MEDIAWIKI_TALK = 9;
public static final int NS_TEMPLATE = 10;
public static final int NS_TEMPLATE_TALK = 11;
public static final int NS_HELP = 12;
public static final int NS_HELP_TALK = 13;
public static final int NS_CATEGORY = 14;
public static final int NS_CATEGORY_TALK = 15;
// /**
// * NS_IMAGE and NS_IMAGE_TALK are the pre-v1.14 names for NS_FILE and
// * NS_FILE_TALK respectively, and are kept for compatibility.
// *
// * When writing code that should be compatible with older MediaWiki
// * versions, either stick to the old names or define the new constants
// * yourself, if they're not defined already.
// */
// define( 'NS_IMAGE', NS_FILE );
// define( 'NS_IMAGE_TALK', NS_FILE_TALK );
// /**@}*/
//
// /**@{
// * Cache type
// */
// define( 'CACHE_ANYTHING', -1 ); // Use anything, as long as it works
// define( 'CACHE_NONE', 0 ); // Do not cache
// define( 'CACHE_DB', 1 ); // Store cache objects in the DB
// define( 'CACHE_MEMCACHED', 2 ); // MemCached, must specify servers in $wgMemCacheServers
// define( 'CACHE_ACCEL', 3 ); // APC, XCache or WinCache
// /**@}*/
//
// /**@{
// * Media types.
// * This defines constants for the value returned by File::getMediaType()
// */
// // unknown format
// define( 'MEDIATYPE_UNKNOWN', 'UNKNOWN' );
// // some bitmap image or image source (like psd, etc). Can't scale up.
// define( 'MEDIATYPE_BITMAP', 'BITMAP' );
// // some vector drawing (SVG, WMF, PS, ...) or image source (oo-draw, etc). Can scale up.
// define( 'MEDIATYPE_DRAWING', 'DRAWING' );
// // simple audio file (ogg, mp3, wav, midi, whatever)
// define( 'MEDIATYPE_AUDIO', 'AUDIO' );
// // simple video file (ogg, mpg, etc;
// // no not include formats here that may contain executable sections or scripts!)
// define( 'MEDIATYPE_VIDEO', 'VIDEO' );
// // Scriptable Multimedia (flash, advanced video container formats, etc)
// define( 'MEDIATYPE_MULTIMEDIA', 'MULTIMEDIA' );
// // Office Documents, Spreadsheets (office formats possibly containing apples, scripts, etc)
// define( 'MEDIATYPE_OFFICE', 'OFFICE' );
// // Plain text (possibly containing program code or scripts)
// define( 'MEDIATYPE_TEXT', 'TEXT' );
// // binary executable
// define( 'MEDIATYPE_EXECUTABLE', 'EXECUTABLE' );
// // archive file (zip, tar, etc)
// define( 'MEDIATYPE_ARCHIVE', 'ARCHIVE' );
// /**@}*/
//
// /**@{
// * Antivirus result codes, for use in $wgAntivirusSetup.
// */
// define( 'AV_NO_VIRUS', 0 ); # scan ok, no virus found
// define( 'AV_VIRUS_FOUND', 1 ); # virus found!
// define( 'AV_SCAN_ABORTED', -1 ); # scan aborted, the file is probably immune
// define( 'AV_SCAN_FAILED', false ); # scan failed (scanner not found or error in scanner)
// /**@}*/
//
// /**@{
// * Anti-synchronized flags
// * Was used by $wgAntiLockFlags, which was removed with 1.25
// * Constants kept to not have warnings when used in LocalSettings
// */
// define( 'ALF_PRELOAD_LINKS', 1 ); // unused
// define( 'ALF_PRELOAD_EXISTENCE', 2 ); // unused
// define( 'ALF_NO_LINK_LOCK', 4 ); // unused
// define( 'ALF_NO_BLOCK_LOCK', 8 ); // unused
// /**@}*/
//
// /**@{
// * Date format selectors; used in user preference storage and by
// * Language::date() and co.
// */
// define( 'MW_DATE_DEFAULT', 'default' );
// define( 'MW_DATE_MDY', 'mdy' );
// define( 'MW_DATE_DMY', 'dmy' );
// define( 'MW_DATE_YMD', 'ymd' );
// define( 'MW_DATE_ISO', 'ISO 8601' );
// /**@}*/
//
// /**@{
// * RecentChange type identifiers
// */
// define( 'RC_EDIT', 0 );
// define( 'RC_NEW', 1 );
// define( 'RC_LOG', 3 );
// define( 'RC_EXTERNAL', 5 );
// define( 'RC_CATEGORIZE', 6 );
// /**@}*/
//
// /**@{
// * Article edit flags
// */
// define( 'EDIT_NEW', 1 );
// define( 'EDIT_UPDATE', 2 );
// define( 'EDIT_MINOR', 4 );
// define( 'EDIT_SUPPRESS_RC', 8 );
// define( 'EDIT_FORCE_BOT', 16 );
// define( 'EDIT_DEFER_UPDATES', 32 ); // Unused since 1.27
// define( 'EDIT_AUTOSUMMARY', 64 );
// /**@}*/
//
// /**@{
// * Flags for Database::makeList()
// * These are also available as Database class constants
// */
// define( 'LIST_COMMA', 0 );
// define( 'LIST_AND', 1 );
// define( 'LIST_SET', 2 );
// define( 'LIST_NAMES', 3 );
// define( 'LIST_OR', 4 );
// /**@}*/
//
// /**
// * Unicode and normalisation related
// */
// require_once __DIR__ . '/compat/normal/UtfNormalDefines.php';
//
// /**@{
// * Hook support constants
// */
// define( 'MW_SUPPORTS_PARSERFIRSTCALLINIT', 1 );
// define( 'MW_SUPPORTS_LOCALISATIONCACHE', 1 );
// define( 'MW_SUPPORTS_CONTENTHANDLER', 1 );
// define( 'MW_EDITFILTERMERGED_SUPPORTS_API', 1 );
// /**@}*/
//
// /** Support for $wgResourceModules */
// define( 'MW_SUPPORTS_RESOURCE_MODULES', 1 );
//
// /**@{
// * Allowed values for Parser::$mOutputType
// * Parameter to Parser::startExternalParse().
// * Use of Parser consts is preferred:
// * - Parser::OT_HTML
// * - Parser::OT_WIKI
// * - Parser::OT_PREPROCESS
// * - Parser::OT_MSG
// * - Parser::OT_PLAIN
// */
// define( 'OT_HTML', 1 );
// define( 'OT_WIKI', 2 );
// define( 'OT_PREPROCESS', 3 );
// define( 'OT_MSG', 3 ); // b/c alias for OT_PREPROCESS
// define( 'OT_PLAIN', 4 );
// /**@}*/
//
// /**@{
// * Flags for Parser::setFunctionHook
// * Use of Parser consts is preferred:
// * - Parser::SFH_NO_HASH
// * - Parser::SFH_OBJECT_ARGS
// */
// define( 'SFH_NO_HASH', 1 );
// define( 'SFH_OBJECT_ARGS', 2 );
// /**@}*/
//
// /**@{
// * Autopromote conditions (must be here and not in Autopromote.php, so that
// * they're loaded for DefaultSettings.php before AutoLoader.php)
// */
// define( 'APCOND_EDITCOUNT', 1 );
// define( 'APCOND_AGE', 2 );
// define( 'APCOND_EMAILCONFIRMED', 3 );
// define( 'APCOND_INGROUPS', 4 );
// define( 'APCOND_ISIP', 5 );
// define( 'APCOND_IPINRANGE', 6 );
// define( 'APCOND_AGE_FROM_EDIT', 7 );
// define( 'APCOND_BLOCKED', 8 );
// define( 'APCOND_ISBOT', 9 );
// /**@}*/
//
// /** @{
// * Protocol constants for wfExpandUrl()
// */
// define( 'PROTO_HTTP', 'http://' );
// define( 'PROTO_HTTPS', 'https://' );
// define( 'PROTO_RELATIVE', '//' );
// define( 'PROTO_CURRENT', null );
// define( 'PROTO_CANONICAL', 1 );
// define( 'PROTO_INTERNAL', 2 );
// /**@}*/
//
// /**@{
// * Content model ids, used by Content and ContentHandler.
// * These IDs will be exposed in the API and XML dumps.
// *
// * Extensions that define their own content model IDs should take
// * care to avoid conflicts. Using the extension name as a prefix is recommended,
// * for example 'myextension-somecontent'.
// */
// define( 'CONTENT_MODEL_WIKITEXT', 'wikitext' );
// define( 'CONTENT_MODEL_JAVASCRIPT', 'javascript' );
// define( 'CONTENT_MODEL_CSS', 'css' );
// define( 'CONTENT_MODEL_TEXT', 'text' );
// define( 'CONTENT_MODEL_JSON', 'json' );
// /**@}*/
/**@{
* Content formats, used by Content and ContentHandler.
* These should be MIME types, and will be exposed in the API and XML dumps.
*
* Extensions are free to use the below formats, or define their own.
* It is recommended to stick with the conventions for MIME types.
*/
// wikitext
public static final String CONTENT_FORMAT_WIKITEXT = "text/x-wiki";
// for js pages
public static final String CONTENT_FORMAT_JAVASCRIPT = "text/javascript";
// for css pages
public static final String CONTENT_FORMAT_CSS = "text/css";
// for future use, e.g. with some plain-html messages.
public static final String CONTENT_FORMAT_TEXT = "text/plain";
// for future use, e.g. with some plain-html messages.
public static final String CONTENT_FORMAT_HTML = "text/html";
// for future use with the api and for extensions
public static final String CONTENT_FORMAT_SERIALIZED = "application/vnd.php.serialized";
// for future use with the api, and for use by extensions
public static final String CONTENT_FORMAT_JSON = "application/json";
// for future use with the api, and for use by extensions
public static final String CONTENT_FORMAT_XML = "application/xml";
/**@}*/
// /**@{
// * Max String length for shell invocations; based on binfmts.h
// */
// define( 'SHELL_MAX_ARG_STRLEN', '100000' );
// /**@}*/
}

View File

@@ -13,3 +13,9 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import gplx.core.btries.*;
public class XomwHtmlTemp {
public final Bry_bfr bfr = Bry_bfr_.New();
public final Btrie_rv trv = new Btrie_rv();
}

View File

@@ -13,3 +13,31 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import org.junit.*; import gplx.core.tests.*;
import gplx.xowa.mediawiki.includes.xohtml.*;
public class XomwHtml_expandAttributesTest {
private final XomwHtml_expandAttributesFxt fxt = new XomwHtml_expandAttributesFxt();
@Test public void Basic() {
fxt.Test__expand_attributes(" a=\"b\"", "a", "b");
}
@Test public void NullVal() {
fxt.Test__expand_attributes("", "a", null);
}
}
class XomwHtml_expandAttributesFxt {
private final Bry_bfr bfr = Bry_bfr_.New();
private final XomwHtmlTemp temp = new XomwHtmlTemp();
public void Test__expand_attributes(String expd, String... kvs) {
Xomw_atr_mgr atrs = new Xomw_atr_mgr();
int kvs_len = kvs.length;
for (int i = 0; i < kvs_len; i += 2) {
byte[] key = Bry_.new_a7(kvs[i]);
byte[] val = Bry_.new_a7(kvs[i + 1]);
Xomw_atr_itm itm = new Xomw_atr_itm(-1, key, val);
atrs.Add(itm);
}
XomwHtml.expandAttributes(bfr, temp, atrs);
Gftest.Eq__str(expd, bfr.To_str_and_clear());
}
}

View File

@@ -13,3 +13,13 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
public class XomwLinker_NormalizeSubpageLink {
public byte[] link;
public byte[] text;
public XomwLinker_NormalizeSubpageLink Init(byte[] link, byte[] text) {
this.link = link;
this.text = text;
return this;
}
}

View File

@@ -13,3 +13,28 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import org.junit.*; import gplx.core.tests.*;
public class XomwLinker_NormalizeSubpageLinkTest {
private final XomwLinker_NormalizeSubpageLinkFxt fxt = new XomwLinker_NormalizeSubpageLinkFxt();
@Test public void None() {fxt.Test__normalize_subpage_link("A/B/C" , "Z" , "" , "Z" , "");}
@Test public void Hash() {fxt.Test__normalize_subpage_link("A/B/C" , "/Y#Z" , "" , "A/B/C/Y#Z" , "/Y#Z");}
@Test public void Slash__basic() {fxt.Test__normalize_subpage_link("A/B/C" , "/Z" , "" , "A/B/C/Z" , "/Z");}
@Test public void Slash__slash() {fxt.Test__normalize_subpage_link("A/B/C" , "/Z/" , "" , "A/B/C/Z" , "Z");}
@Test public void Dot2__empty() {fxt.Test__normalize_subpage_link("A/B/C" , "../" , "" , "A/B" , "");}
@Test public void Dot2__many() {fxt.Test__normalize_subpage_link("A/B/C" , "../../Z" , "z1" , "A/Z" , "z1");}
@Test public void Dot2__trailing() {fxt.Test__normalize_subpage_link("A/B/C" , "../../Z/" , "" , "A/Z" , "Z");}
}
class XomwLinker_NormalizeSubpageLinkFxt {
private final XomwEnv env;
private final XomwLinker mgr = new XomwLinker(new gplx.xowa.mediawiki.includes.linkers.XomwLinkRenderer(new XomwSanitizer()));
private final XomwLinker_NormalizeSubpageLink normalize_subpage_link = new XomwLinker_NormalizeSubpageLink();
public XomwLinker_NormalizeSubpageLinkFxt() {
this.env = XomwEnv.NewTest();
}
public void Test__normalize_subpage_link(String page_title_str, String link, String text, String expd_link, String expd_text) {
mgr.normalizeSubpageLink(normalize_subpage_link, XomwTitle.newFromText(env, Bry_.new_u8(page_title_str)), Bry_.new_u8(link), Bry_.new_u8(text));
Gftest.Eq__str(expd_link, String_.new_u8(normalize_subpage_link.link));
Gftest.Eq__str(expd_text, String_.new_u8(normalize_subpage_link.text));
}
}

View File

@@ -13,3 +13,25 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mediawiki.includes.parsers.*;
public class XomwLinker_SplitTrailTest {
private final XomwLinker_SplitTrailFxt fxt = new XomwLinker_SplitTrailFxt();
@Test public void Basic() {fxt.Test__split_trail("abc def" , "abc" , " def");}
@Test public void None() {fxt.Test__split_trail(" abc" , null , " abc");}
}
class XomwLinker_SplitTrailFxt {
private final XomwLinker linker = new XomwLinker(new gplx.xowa.mediawiki.includes.linkers.XomwLinkRenderer(new XomwSanitizer()));
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
public XomwLinker_SplitTrailFxt() {
String[] ary = new String[] {"a", "b", "c", "d", "e", "f"};
for (String itm : ary)
trie.Add_str_str(itm, itm);
linker.Init_by_wiki(XomwEnv.NewTest(), trie);
}
public void Test__split_trail(String trail_str, String expd_inside, String expd_trail) {
byte[][] split_trail = linker.splitTrail(Bry_.new_u8(trail_str));
Gftest.Eq__str(expd_inside, String_.new_u8(split_trail[0]));
Gftest.Eq__str(expd_trail , String_.new_u8(split_trail[1]));
}
}

View File

@@ -13,3 +13,19 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
public class XomwMagicWord {
public boolean case_match;
public byte[] name;
public XomwMagicWordSynonym[] synonyms;
public XomwMagicWord(byte[] name, boolean case_match, byte[][] synonyms_ary) {
this.name = name;
this.case_match = case_match;
int synonyms_len = synonyms_ary.length;
this.synonyms = new XomwMagicWordSynonym[synonyms_len];
for (int i = 0; i < synonyms_len; i++) {
synonyms[i] = new XomwMagicWordSynonym(name, case_match, synonyms_ary[i]);
}
}
}

View File

@@ -13,3 +13,362 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import gplx.core.btries.*; import gplx.core.primitives.*;
public class XomwMagicWordArray {
private Btrie_slim_mgr fwd_trie;
private Btrie_bwd_mgr bwd_trie;
private final Btrie_rv trv = new Btrie_rv();
// private final XomwMagicWordMgr magic_word_mgr;
public final byte[][] names;
// /** @var array */
// private hash;
// private baseRegex;
// private regex;
public XomwMagicWordArray(XomwMagicWordMgr magic_word_mgr, byte[][] names) {
// this.magic_word_mgr = magic_word_mgr;
this.names = names;
// ASSUME: all magic words in a group have the same case sensitivity
for (byte[] name : names) {
XomwMagicWord word = magic_word_mgr.Get(name);
if (word == null) continue;
XomwMagicWordSynonym[] synonyms = word.synonyms;
int synonyms_len = synonyms.length;
for (int i = 0; i < synonyms_len; i++) {
XomwMagicWordSynonym synonym = synonyms[i];
switch (synonym.arg1_tid) {
case XomwMagicWordSynonym.Arg1__nil:
case XomwMagicWordSynonym.Arg1__end:
if (fwd_trie == null) fwd_trie = word.case_match ? Btrie_slim_mgr.cs() : Btrie_slim_mgr.ci_u8();
fwd_trie.Add_obj(synonym.text_wo_arg1, synonym);
break;
case XomwMagicWordSynonym.Arg1__bgn:
if (bwd_trie == null) bwd_trie = Btrie_bwd_mgr.c__(word.case_match);
bwd_trie.Add(synonym.text_wo_arg1, synonym);
break;
// ignore if mid / mix
case XomwMagicWordSynonym.Arg1__mid:
case XomwMagicWordSynonym.Arg1__mix:
Gfo_usr_dlg_.Instance.Warn_many("", "", "MagicWordArray: unsupported arg_1_tid: tid=~{0}", synonym.arg1_tid);
continue;
}
}
}
}
// /**
// * Add a magic word by name
// *
// * @param String name
// */
// public function add(name) {
// this->names[] = name;
// this->hash = this->baseRegex = this->regex = null;
// }
//
// /**
// * Add a number of magic words by name
// *
// * @param array names
// */
// public function addArray(names) {
// this->names = array_merge(this->names, array_values(names));
// this->hash = this->baseRegex = this->regex = null;
// }
//
// /**
// * Get a 2-d hashtable for this array
// * @return array
// */
// public function getHash() {
// if (is_null(this->hash)) {
// global wgContLang;
// this->hash = [ 0 => [], 1 => [] ];
// foreach (this->names as name) {
// magic = MagicWord::get(name);
// case = intval(magic->isCaseSensitive());
// foreach (magic->getSynonyms() as syn) {
// if (!case) {
// syn = wgContLang->lc(syn);
// }
// this->hash[case][syn] = name;
// }
// }
// }
// return this->hash;
// }
//
// /**
// * Get the super regex
// * @return array
// */
// public function getBaseRegex() {
// if (is_null(this->baseRegex)) {
// this->baseRegex = [ 0 => '', 1 => '' ];
// foreach (this->names as name) {
// magic = MagicWord::get(name);
// case = intval(magic->isCaseSensitive());
// foreach (magic->getSynonyms() as i => syn) {
// // Group name must start with a non-digit in PCRE 8.34+
// it = strtr(i, '0123456789', 'abcdefghij');
// group = "(?P<{it}_{name}>" . preg_quote(syn, '/') . ')';
// if (this->baseRegex[case] === '') {
// this->baseRegex[case] = group;
// } else {
// this->baseRegex[case] .= '|' . group;
// }
// }
// }
// }
// return this->baseRegex;
// }
//
// /**
// * Get an unanchored regex that does not match parameters
// * @return array
// */
// public function getRegex() {
// if (is_null(this->regex)) {
// super = this->getBaseRegex();
// this->regex = [ '', '' ];
// if (this->baseRegex[0] !== '') {
// this->regex[0] = "/{super[0]}/iuS";
// }
// if (this->baseRegex[1] !== '') {
// this->regex[1] = "/{super[1]}/S";
// }
// }
// return this->regex;
// }
//
// /**
// * Get a regex for matching variables with parameters
// *
// * @return String
// */
// public function getVariableRegex() {
// return str_replace("\\1", "(.*?)", this->getRegex());
// }
//
// /**
// * Get a regex anchored to the start of the String that does not match parameters
// *
// * @return array
// */
// public function getRegexStart() {
// super = this->getBaseRegex();
// newRegex = [ '', '' ];
// if (super[0] !== '') {
// newRegex[0] = "/^(?:{super[0]})/iuS";
// }
// if (super[1] !== '') {
// newRegex[1] = "/^(?:{super[1]})/S";
// }
// return newRegex;
// }
//
// /**
// * Get an anchored regex for matching variables with parameters
// *
// * @return array
// */
// public function getVariableStartToEndRegex() {
// super = this->getBaseRegex();
// newRegex = [ '', '' ];
// if (super[0] !== '') {
// newRegex[0] = str_replace("\\1", "(.*?)", "/^(?:{super[0]})/iuS");
// }
// if (super[1] !== '') {
// newRegex[1] = str_replace("\\1", "(.*?)", "/^(?:{super[1]})/S");
// }
// return newRegex;
// }
//
// /**
// * @since 1.20
// * @return array
// */
// public function getNames() {
// return this->names;
// }
//
// /**
// * Parse a match array from preg_match
// * Returns array(magic word ID, parameter value)
// * If there is no parameter value, that element will be false.
// *
// * @param array m
// *
// * @throws MWException
// * @return array
// */
// public function parseMatch(m) {
// reset(m);
// while (list(key, value) = each(m)) {
// if (key === 0 || value === '') {
// continue;
// }
// parts = explode('_', key, 2);
// if (count(parts) != 2) {
// // This shouldn't happen
// // continue;
// throw new MWException(__METHOD__ . ': bad parameter name');
// }
// list(/* synIndex */, magicName) = parts;
// paramValue = next(m);
// return [ magicName, paramValue ];
// }
// // This shouldn't happen either
// throw new MWException(__METHOD__ . ': parameter not found');
// }
/**
* Match some text, with parameter capture
* Returns an array with the magic word name in the first element and the
* parameter in the second element.
* Both elements are false if there was no match.
*
* @param String text
*
* @return array
*/
public void matchVariableStartToEnd(byte[][] rv, byte[] src) {
int src_end = src.length;
if (src_end == 0) {
rv[0] = rv[1] = null;
return;
}
byte[] name = null;
int val_bgn = -1, val_end = -1;
// check fwd; EX: "thumb=$1"
if (fwd_trie != null) {
Object o = fwd_trie.Match_at(trv, src, 0, src_end);
if (o != null) {
XomwMagicWordSynonym syn = ((XomwMagicWordSynonym)o);
name = syn.magic_name;
val_bgn = trv.Pos();
val_end = src_end;
// if "nil", then must be full match; EX: "thumbx" does not match "thumb"
if (syn.arg1_tid == XomwMagicWordSynonym.Arg1__nil
&& syn.text_wo_arg1.length != src_end) {
rv[0] = rv[1] = null;
return;
}
}
}
// check bwd; EX: "$1px"
if (bwd_trie != null) {
Object o = bwd_trie.Match_at(trv, src, src_end - 1, -1);
if (o != null) {
XomwMagicWordSynonym syn = ((XomwMagicWordSynonym)o);
name = syn.magic_name;
val_bgn = 0;
val_end = src_end - syn.text_wo_arg1.length;
}
}
rv[0] = name;
rv[1] = val_end - val_bgn == 0 ? Bry_.Empty : Bry_.Mid(src, val_bgn, val_end);
}
// /**
// * Match some text, without parameter capture
// * Returns the magic word name, or false if there was no capture
// *
// * @param String text
// *
// * @return String|boolean False on failure
// */
// public function matchStartToEnd(text) {
// hash = this->getHash();
// if (isset(hash[1][text])) {
// return hash[1][text];
// }
// global wgContLang;
// lc = wgContLang->lc(text);
// if (isset(hash[0][lc])) {
// return hash[0][lc];
// }
// return false;
// }
//
// /**
// * Returns an associative array, ID => param value, for all items that match
// * Removes the matched items from the input String (passed by reference)
// *
// * @param String text
// *
// * @return array
// */
// public function matchAndRemove(&text) {
// found = [];
// regexes = this->getRegex();
// foreach (regexes as regex) {
// if (regex === '') {
// continue;
// }
// matches = [];
// res = preg_match_all(regex, text, matches, PREG_SET_ORDER);
// if (res === false) {
// LoggerFactory::getInstance('parser')->warning('preg_match_all returned false', [
// 'code' => preg_last_error(),
// 'regex' => regex,
// 'text' => text,
// ]);
// } elseif (res) {
// foreach (matches as m) {
// list(name, param) = this->parseMatch(m);
// found[name] = param;
// }
// }
// res = preg_replace(regex, '', text);
// if (res === null) {
// LoggerFactory::getInstance('parser')->warning('preg_replace returned null', [
// 'code' => preg_last_error(),
// 'regex' => regex,
// 'text' => text,
// ]);
// }
// text = res;
// }
// return found;
// }
//
// /**
// * Return the ID of the magic word at the start of text, and remove
// * the prefix from text.
// * Return false if no match found and text is not modified.
// * Does not match parameters.
// *
// * @param String text
// *
// * @return int|boolean False on failure
// */
// public function matchStartAndRemove(&text) {
// regexes = this->getRegexStart();
// foreach (regexes as regex) {
// if (regex === '') {
// continue;
// }
// if (preg_match(regex, text, m)) {
// list(id,) = this->parseMatch(m);
// if (strlen(m[0]) >= strlen(text)) {
// text = '';
// } else {
// text = substr(text, strlen(m[0]));
// }
// return id;
// }
// }
// return false;
// }
}

View File

@@ -13,3 +13,50 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import org.junit.*; import gplx.core.tests.*;
public class XomwMagicWordArrayTest {
private final XomwMagicWordArrayFxt fxt = new XomwMagicWordArrayFxt();
@Test public void Nil() {
fxt.Init__word(Bool_.Y, "img_nil", "nil");
fxt.Init__ary("img_nil");
fxt.Test__matchVariableStartToEnd("nil", "img_nil", "");
fxt.Test__matchVariableStartToEnd("nila", null, null);
}
@Test public void Bgn() {
fxt.Init__word(Bool_.Y, "img_bgn", "bgn$1");
fxt.Init__ary("img_bgn");
fxt.Test__matchVariableStartToEnd("bgna", "img_bgn", "a");
fxt.Test__matchVariableStartToEnd("bgn", "img_bgn", "");
}
@Test public void End() {
fxt.Init__word(Bool_.Y, "img_end", "$1end");
fxt.Init__ary("img_end");
fxt.Test__matchVariableStartToEnd("aend", "img_end", "a");
fxt.Test__matchVariableStartToEnd("end", "img_end", "");
}
@Test public void Smoke() {
fxt.Init__word(Bool_.Y, "img_upright", "upright", "upright=$1", "upright $1");
fxt.Init__word(Bool_.Y, "img_width", "$1px");
fxt.Init__ary("img_upright", "img_width");
fxt.Test__matchVariableStartToEnd("upright=123", "img_upright", "123");
fxt.Test__matchVariableStartToEnd("123px", "img_width", "123");
}
}
class XomwMagicWordArrayFxt {
private final XomwMagicWordMgr magic_word_mgr = new XomwMagicWordMgr();
private XomwMagicWordArray magic_word_ary;
public void Init__word(boolean cs, String word, String... synonyms) {
magic_word_mgr.Add(Bry_.new_u8(word), cs, Bry_.Ary(synonyms));
}
public void Init__ary(String... words) {
magic_word_ary = new XomwMagicWordArray(magic_word_mgr, Bry_.Ary(words));
}
public void Test__matchVariableStartToEnd(String src, String expd_name, String expd_val) {
byte[][] rv = new byte[2][];
magic_word_ary.matchVariableStartToEnd(rv, Bry_.new_u8(src));
Gftest.Eq__str(expd_name, rv[0], expd_name);
Gftest.Eq__str(expd_val , rv[1], expd_val);
}
}

View File

@@ -13,3 +13,14 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
public class XomwMagicWordMgr {
private final Hash_adp_bry hash = Hash_adp_bry.cs();
public void Add(byte[] name, boolean cs, byte[]... synonyms) {
XomwMagicWord mw = new XomwMagicWord(name, cs, synonyms);
hash.Add(name, mw);
}
public XomwMagicWord Get(byte[] name) {
return (XomwMagicWord)hash.Get_by(name);
}
}

View File

@@ -13,3 +13,77 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
public class XomwMagicWordSynonym {
public final byte[] magic_name;
public final boolean case_match;
public final byte[] text;
public final byte[] text_wo_arg1;
public final byte arg1_tid;
public XomwMagicWordSynonym(byte[] magic_name, boolean case_match, byte[] text) {
this.magic_name = magic_name;
this.case_match = case_match;
this.text = text;
this.arg1_tid = Get_arg1_tid(text);
switch (arg1_tid) {
case Arg1__bgn:
text_wo_arg1 = Bry_.Mid(text, 2);
break;
case Arg1__end:
text_wo_arg1 = Bry_.Mid(text, 0, text.length - 2);
break;
default:
text_wo_arg1 = text;
break;
}
}
private static byte Get_arg1_tid(byte[] src) {
int len = src.length;
byte rv = Arg1__nil;
int cur = 0;
while (true) {
if (cur == len) break;
byte b = src[cur];
// "$" matched
if (b == Byte_ascii.Dollar) {
// "1" matched?
int nxt_pos = cur + 1;
if (nxt_pos < len && src[nxt_pos] == Byte_ascii.Num_1) {
// "$1" matched
if (cur == 0) {
rv = Arg1__bgn;
}
else if (cur == len - 2) {
rv = rv == Arg1__nil ? Arg1__end : Arg1__mix;
}
else {
if (rv == Arg1__nil)
rv = Arg1__mid;
else if (rv == Arg1__mid)
rv = Arg1__mix;
}
cur += 2;
continue;
}
else {
cur += 1;
continue;
}
}
else {
cur += 1;
continue;
}
}
return rv;
}
public static final byte
Arg1__nil = 0 // EX: "thumb"
, Arg1__bgn = 1 // EX: "$1px"
, Arg1__end = 2 // EX: "thumb=$1"
, Arg1__mid = 3 // EX: "a$1b"
, Arg1__mix = 4 // EX: "a$1b$cc"
;
}

View File

@@ -13,3 +13,589 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import gplx.xowa.mediawiki.languages.*;
import gplx.xowa.mediawiki.includes.interwiki.*;
import gplx.xowa.mediawiki.includes.title.*;
/**
* MediaWikiServices is the service locator for the application scope of MediaWiki.
* Its implemented as a simple configurable DI container.
* MediaWikiServices acts as a top level factory/registry for top level services, and builds
* the network of service objects that defines MediaWiki's application logic.
* It acts as an entry point to MediaWiki's dependency injection mechanism.
*
* Services are defined in the "wiring" array passed to the constructor,
* or by calling defineService().
*
* @see docs/injection.txt for an overview of using dependency injection in the
* MediaWiki code super.
*/
public class XomwMediaWikiServices {
// XO.MW.SKIP:remove global getInstance(). See XomwEnv
private final XomwMediaWikiTitleCodec titleParser;
private final XomwInterwikiLookup interwikiLookup;
public XomwEnv env;
public XomwMediaWikiServices(XomwEnv env, XomwInterwikiLookup interwikiLookup, XomwLanguage language, byte[][] localInterwikis) {
this.env = env;
this.interwikiLookup = interwikiLookup;
this.titleParser = new XomwMediaWikiTitleCodec(this, language, localInterwikis);
}
// /**
// * Replaces the global MediaWikiServices instance.
// *
// * @since 1.28
// *
// * @note This is for use in PHPUnit tests only!
// *
// * @throws MWException if called outside of PHPUnit tests.
// *
// * @param MediaWikiServices $services The new MediaWikiServices Object.
// *
// * @return MediaWikiServices The old MediaWikiServices Object, so it can be restored later.
// */
// public static function forceGlobalInstance( MediaWikiServices $services ) {
// if ( !defined( 'MW_PHPUNIT_TEST' ) ) {
// throw new MWException( __METHOD__ . ' must not be used outside unit tests.' );
// }
//
// $old = self::getInstance();
// self::$instance = $services;
//
// return $old;
// }
//
// /**
// * Creates a new instance of MediaWikiServices and sets it as the global default
// * instance. getInstance() will return a different MediaWikiServices Object
// * after every call to resetGlobalInstance().
// *
// * @since 1.28
// *
// * @warning This should not be used during normal operation. It is intended for use
// * when the configuration has changed significantly since bootstrap time, e.g.
// * during the installation process or during testing.
// *
// * @warning Calling resetGlobalInstance() may leave the application in an inconsistent
// * state. Calling this is only safe under the ASSUMPTION that NO REFERENCE to
// * any of the services managed by MediaWikiServices exist. If any service objects
// * managed by the old MediaWikiServices instance remain in use, they may INTERFERE
// * with the operation of the services managed by the new MediaWikiServices.
// * Operating with a mix of services created by the old and the new
// * MediaWikiServices instance may lead to INCONSISTENCIES and even DATA LOSS!
// * Any class implementing LAZY LOADING is especially prone to this problem,
// * since instances would typically retain a reference to a storage layer service.
// *
// * @see forceGlobalInstance()
// * @see resetGlobalInstance()
// * @see resetBetweenTest()
// *
// * @param Config|null $bootstrapConfig The Config Object to be registered as the
// * 'BootstrapConfig' service. This has to contain at least the information
// * needed to set up the 'ConfigFactory' service. If not given, the bootstrap
// * config of the old instance of MediaWikiServices will be re-used. If there
// * was no previous instance, a new GlobalVarConfig Object will be used to
// * bootstrap the services.
// *
// * @param String $quick Set this to "quick" to allow expensive resources to be re-used.
// * See SalvageableService for details.
// *
// * @throws MWException If called after MW_SERVICE_BOOTSTRAP_COMPLETE has been defined in
// * Setup.php (unless MW_PHPUNIT_TEST or MEDIAWIKI_INSTALL or RUN_MAINTENANCE_IF_MAIN
// * is defined).
// */
// public static function resetGlobalInstance( Config $bootstrapConfig = null, $quick = '' ) {
// if ( self::$instance === null ) {
// // no global instance yet, nothing to reset
// return;
// }
//
// self::failIfResetNotAllowed( __METHOD__ );
//
// if ( $bootstrapConfig === null ) {
// $bootstrapConfig = self::$instance->getBootstrapConfig();
// }
//
// $oldInstance = self::$instance;
//
// self::$instance = self::newInstance( $bootstrapConfig, 'load' );
// self::$instance->importWiring( $oldInstance, [ 'BootstrapConfig' ] );
//
// if ( $quick === 'quick' ) {
// self::$instance->salvage( $oldInstance );
// } else {
// $oldInstance->destroy();
// }
// }
//
// /**
// * Salvages the state of any salvageable service instances in $other.
// *
// * @note $other will have been destroyed when salvage() returns.
// *
// * @param MediaWikiServices $other
// */
// private function salvage( self $other ) {
// foreach ( this.getServiceNames() as $name ) {
// // The service could be new in the new instance and not registered in the
// // other instance (e.g. an extension that was loaded after the instantiation of
// // the other instance. Skip this service in this case. See T143974
// try {
// $oldService = $other->peekService( $name );
// } catch ( NoSuchServiceException $e ) {
// continue;
// }
//
// if ( $oldService instanceof SalvageableService ) {
// /** @var SalvageableService $newService */
// $newService = this.getService( $name );
// $newService->salvage( $oldService );
// }
// }
//
// $other->destroy();
// }
//
// /**
// * Creates a new MediaWikiServices instance and initializes it according to the
// * given $bootstrapConfig. In particular, all wiring files defined in the
// * ServiceWiringFiles setting are loaded, and the MediaWikiServices hook is called.
// *
// * @param Config|null $bootstrapConfig The Config Object to be registered as the
// * 'BootstrapConfig' service.
// *
// * @param String $loadWiring set this to 'load' to load the wiring files specified
// * in the 'ServiceWiringFiles' setting in $bootstrapConfig.
// *
// * @return MediaWikiServices
// * @throws MWException
// * @throws \FatalError
// */
// private static function newInstance( Config $bootstrapConfig, $loadWiring = '' ) {
// $instance = new self( $bootstrapConfig );
//
// // Load the default wiring from the specified files.
// if ( $loadWiring === 'load' ) {
// $wiringFiles = $bootstrapConfig->get( 'ServiceWiringFiles' );
// $instance->loadWiringFiles( $wiringFiles );
// }
//
// // Provide a traditional hook point to allow extensions to configure services.
// Hooks::run( 'MediaWikiServices', [ $instance ] );
//
// return $instance;
// }
//
// /**
// * Disables all storage layer services. After calling this, any attempt to access the
// * storage layer will result in an error. Use resetGlobalInstance() to restore normal
// * operation.
// *
// * @since 1.28
// *
// * @warning This is intended for extreme situations only and should never be used
// * while serving normal web requests. Legitimate use cases for this method include
// * the installation process. Test fixtures may also use this, if the fixture relies
// * on globalState.
// *
// * @see resetGlobalInstance()
// * @see resetChildProcessServices()
// */
// public static function disableStorageBackend() {
// // TODO: also disable some Caches, JobQueues, etc
// $destroy = [ 'DBLoadBalancer', 'DBLoadBalancerFactory' ];
// $services = self::getInstance();
//
// foreach ( $destroy as $name ) {
// $services->disableService( $name );
// }
//
// ObjectCache::clear();
// }
//
// /**
// * Resets any services that may have become stale after a child process
// * returns from after pcntl_fork(). It's also safe, but generally unnecessary,
// * to call this method from the parent process.
// *
// * @since 1.28
// *
// * @note This is intended for use in the context of process forking only!
// *
// * @see resetGlobalInstance()
// * @see disableStorageBackend()
// */
// public static function resetChildProcessServices() {
// // NOTE: for now, just reset everything. Since we don't know the interdependencies
// // between services, we can't do this more selectively at this time.
// self::resetGlobalInstance();
//
// // Child, reseed because there is no bug in PHP:
// // https://bugs.php.net/bug.php?id=42465
// mt_srand( getmypid() );
// }
//
// /**
// * Resets the given service for testing purposes.
// *
// * @since 1.28
// *
// * @warning This is generally unsafe! Other services may still retain references
// * to the stale service instance, leading to failures and inconsistencies. Subclasses
// * may use this method to reset specific services under specific instances, but
// * it should not be exposed to application logic.
// *
// * @note With proper dependency injection used throughout the codebase, this method
// * should not be needed. It is provided to allow tests that pollute global service
// * instances to clean up.
// *
// * @param String $name
// * @param boolean $destroy Whether the service instance should be destroyed if it exists.
// * When set to false, any existing service instance will effectively be detached
// * from the container.
// *
// * @throws MWException if called outside of PHPUnit tests.
// */
// public function resetServiceForTesting( $name, $destroy = true ) {
// if ( !defined( 'MW_PHPUNIT_TEST' ) && !defined( 'MW_PARSER_TEST' ) ) {
// throw new MWException( 'resetServiceForTesting() must not be used outside unit tests.' );
// }
//
// this.resetService( $name, $destroy );
// }
//
// /**
// * Convenience method that throws an exception unless it is called during a phase in which
// * resetting of global services is allowed. In general, services should not be reset
// * individually, since that may introduce inconsistencies.
// *
// * @since 1.28
// *
// * This method will throw an exception if:
// *
// * - self::$resetInProgress is false (to allow all services to be reset together
// * via resetGlobalInstance)
// * - and MEDIAWIKI_INSTALL is not defined (to allow services to be reset during installation)
// * - and MW_PHPUNIT_TEST is not defined (to allow services to be reset during testing)
// *
// * This method is intended to be used to safeguard against accidentally resetting
// * global service instances that are not yet managed by MediaWikiServices. It is
// * defined here in the MediaWikiServices services class to have a central place
// * for managing service bootstrapping and resetting.
// *
// * @param String $method the name of the caller method, as given by __METHOD__.
// *
// * @throws MWException if called outside bootstrap mode.
// *
// * @see resetGlobalInstance()
// * @see forceGlobalInstance()
// * @see disableStorageBackend()
// */
// public static function failIfResetNotAllowed( $method ) {
// if ( !defined( 'MW_PHPUNIT_TEST' )
// && !defined( 'MW_PARSER_TEST' )
// && !defined( 'MEDIAWIKI_INSTALL' )
// && !defined( 'RUN_MAINTENANCE_IF_MAIN' )
// && defined( 'MW_SERVICE_BOOTSTRAP_COMPLETE' )
// ) {
// throw new MWException( $method . ' may only be called during bootstrapping and unit tests!' );
// }
// }
//
// /**
// * @param Config $config The Config Object to be registered as the 'BootstrapConfig' service.
// * This has to contain at least the information needed to set up the 'ConfigFactory'
// * service.
// */
// public function __construct( Config $config ) {
// parent::__construct();
//
// // Register the given Config Object as the bootstrap config service.
// this.defineService( 'BootstrapConfig', function() use ( $config ) {
// return $config;
// } );
// }
//
// // CONVENIENCE GETTERS ////////////////////////////////////////////////////
//
// /**
// * Returns the Config Object containing the bootstrap configuration.
// * Bootstrap configuration would typically include database credentials
// * and other information that may be needed before the ConfigFactory
// * service can be instantiated.
// *
// * @note This should only be used during bootstrapping, in particular
// * when creating the MainConfig service. Application logic should
// * use getMainConfig() to get a Config instances.
// *
// * @since 1.27
// * @return Config
// */
// public function getBootstrapConfig() {
// return this.getService( 'BootstrapConfig' );
// }
//
// /**
// * @since 1.27
// * @return ConfigFactory
// */
// public function getConfigFactory() {
// return this.getService( 'ConfigFactory' );
// }
//
// /**
// * Returns the Config Object that provides configuration for MediaWiki core.
// * This may or may not be the same Object that is returned by getBootstrapConfig().
// *
// * @since 1.27
// * @return Config
// */
// public function getMainConfig() {
// return this.getService( 'MainConfig' );
// }
//
// /**
// * @since 1.27
// * @return SiteLookup
// */
// public function getSiteLookup() {
// return this.getService( 'SiteLookup' );
// }
//
// /**
// * @since 1.27
// * @return SiteStore
// */
// public function getSiteStore() {
// return this.getService( 'SiteStore' );
// }
/**
* @since 1.28
* @return InterwikiLookup
*/
public XomwInterwikiLookup getInterwikiLookup() {
return interwikiLookup;
}
// /**
// * @since 1.27
// * @return StatsdDataFactory
// */
// public function getStatsdDataFactory() {
// return this.getService( 'StatsdDataFactory' );
// }
//
// /**
// * @since 1.27
// * @return EventRelayerGroup
// */
// public function getEventRelayerGroup() {
// return this.getService( 'EventRelayerGroup' );
// }
//
// /**
// * @since 1.27
// * @return SearchEngine
// */
// public function newSearchEngine() {
// // New engine Object every time, since they keep state
// return this.getService( 'SearchEngineFactory' )->create();
// }
//
// /**
// * @since 1.27
// * @return SearchEngineFactory
// */
// public function getSearchEngineFactory() {
// return this.getService( 'SearchEngineFactory' );
// }
//
// /**
// * @since 1.27
// * @return SearchEngineConfig
// */
// public function getSearchEngineConfig() {
// return this.getService( 'SearchEngineConfig' );
// }
//
// /**
// * @since 1.27
// * @return SkinFactory
// */
// public function getSkinFactory() {
// return this.getService( 'SkinFactory' );
// }
//
// /**
// * @since 1.28
// * @return LBFactory
// */
// public function getDBLoadBalancerFactory() {
// return this.getService( 'DBLoadBalancerFactory' );
// }
//
// /**
// * @since 1.28
// * @return LoadBalancer The main DB load balancer for the local wiki.
// */
// public function getDBLoadBalancer() {
// return this.getService( 'DBLoadBalancer' );
// }
//
// /**
// * @since 1.28
// * @return WatchedItemStore
// */
// public function getWatchedItemStore() {
// return this.getService( 'WatchedItemStore' );
// }
//
// /**
// * @since 1.28
// * @return WatchedItemQueryService
// */
// public function getWatchedItemQueryService() {
// return this.getService( 'WatchedItemQueryService' );
// }
//
// /**
// * @since 1.28
// * @return CryptRand
// */
// public function getCryptRand() {
// return this.getService( 'CryptRand' );
// }
//
// /**
// * @since 1.28
// * @return CryptHKDF
// */
// public function getCryptHKDF() {
// return this.getService( 'CryptHKDF' );
// }
//
// /**
// * @since 1.28
// * @return MediaHandlerFactory
// */
// public function getMediaHandlerFactory() {
// return this.getService( 'MediaHandlerFactory' );
// }
//
// /**
// * @since 1.28
// * @return MimeAnalyzer
// */
// public function getMimeAnalyzer() {
// return this.getService( 'MimeAnalyzer' );
// }
//
// /**
// * @since 1.28
// * @return ProxyLookup
// */
// public function getProxyLookup() {
// return this.getService( 'ProxyLookup' );
// }
//
// /**
// * @since 1.29
// * @return Parser
// */
// public function getParser() {
// return this.getService( 'Parser' );
// }
//
// /**
// * @since 1.28
// * @return GenderCache
// */
// public function getGenderCache() {
// return this.getService( 'GenderCache' );
// }
//
// /**
// * @since 1.28
// * @return LinkCache
// */
// public function getLinkCache() {
// return this.getService( 'LinkCache' );
// }
//
// /**
// * @since 1.28
// * @return LinkRendererFactory
// */
// public function getLinkRendererFactory() {
// return this.getService( 'LinkRendererFactory' );
// }
//
// /**
// * LinkRenderer instance that can be used
// * if no custom options are needed
// *
// * @since 1.28
// * @return LinkRenderer
// */
// public function getLinkRenderer() {
// return this.getService( 'LinkRenderer' );
// }
/**
* @since 1.28
* @return TitleFormatter
*/
public XomwMediaWikiTitleCodec getTitleFormatter() {
// return this.getService( 'TitleFormatter' );
return titleParser;
}
/**
* @since 1.28
* @return TitleParser
*/
public XomwMediaWikiTitleCodec getTitleParser() {
// return this.getService( 'TitleParser' );
return titleParser;
}
// /**
// * @since 1.28
// * @return \BagOStuff
// */
// public function getMainObjectStash() {
// return this.getService( 'MainObjectStash' );
// }
//
// /**
// * @since 1.28
// * @return \WANObjectCache
// */
// public function getMainWANObjectCache() {
// return this.getService( 'MainWANObjectCache' );
// }
//
// /**
// * @since 1.28
// * @return \BagOStuff
// */
// public function getLocalServerObjectCache() {
// return this.getService( 'LocalServerObjectCache' );
// }
//
// /**
// * @since 1.28
// * @return VirtualRESTServiceClient
// */
// public function getVirtualRESTServiceClient() {
// return this.getService( 'VirtualRESTServiceClient' );
// }
//
// ///////////////////////////////////////////////////////////////////////////
// // NOTE: When adding a service getter here, don't forget to add a test
// // case for it in MediaWikiServicesTest::provideGetters() and in
// // MediaWikiServicesTest::provideGetService()!
// ///////////////////////////////////////////////////////////////////////////
}

View File

@@ -13,3 +13,12 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import gplx.xowa.mediawiki.languages.*;
public class XomwMessageMgr {
private final Hash_adp hash = Hash_adp_.New();
public void Add(String key, String val, XomwLanguage language) {
hash.Add(key, new XomwMessage(Bry_.new_u8(val), language));
}
public XomwMessage Get_by_str(String key) {return (XomwMessage)hash.Get_by(key);}
}

View File

@@ -13,3 +13,492 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
public class XomwNamespace {
// /**
// * These namespaces should always be first-letter capitalized, now and
// * forevermore. Historically, they could've probably been lowercased too,
// * but some things are just too ingrained now. :)
// */
// private static $alwaysCapitalizedNamespaces = [ NS_SPECIAL, NS_USER, NS_MEDIAWIKI ];
//
// /**
// * Throw an exception when trying to get the subject or talk page
// * for a given namespace where it does not make sense.
// * Special namespaces are defined in includes/Defines.php and have
// * a value below 0 (ex: NS_SPECIAL = -1 , NS_MEDIA = -2)
// *
// * @param int $index
// * @param String $method
// *
// * @throws MWException
// * @return boolean
// */
// private static function isMethodValidFor($index, $method) {
// if ($index < NS_MAIN) {
// throw new MWException("$method does not make any sense for given namespace $index");
// }
// return true;
// }
//
// /**
// * Can pages in the given namespace be moved?
// *
// * @param int $index Namespace index
// * @return boolean
// */
// public static function isMovable($index) {
// global $wgAllowImageMoving;
//
// $result = !($index < NS_MAIN || ($index == NS_FILE && !$wgAllowImageMoving));
//
// /**
// * @since 1.20
// */
// Hooks::run('NamespaceIsMovable', [ $index, &$result ]);
//
// return $result;
// }
//
// /**
// * Is the given namespace is a subject (non-talk) namespace?
// *
// * @param int $index Namespace index
// * @return boolean
// * @since 1.19
// */
// public static function isSubject($index) {
// return !self::isTalk($index);
// }
//
// /**
// * Is the given namespace a talk namespace?
// *
// * @param int $index Namespace index
// * @return boolean
// */
// public static function isTalk($index) {
// return $index > NS_MAIN
// && $index % 2;
// }
//
// /**
// * Get the talk namespace index for a given namespace
// *
// * @param int $index Namespace index
// * @return int
// */
// public static function getTalk($index) {
// self::isMethodValidFor($index, __METHOD__);
// return self::isTalk($index)
// ? $index
// : $index + 1;
// }
//
// /**
// * Get the subject namespace index for a given namespace
// * Special namespaces (NS_MEDIA, NS_SPECIAL) are always the subject.
// *
// * @param int $index Namespace index
// * @return int
// */
// public static function getSubject($index) {
// # Handle special namespaces
// if ($index < NS_MAIN) {
// return $index;
// }
//
// return self::isTalk($index)
// ? $index - 1
// : $index;
// }
//
// /**
// * Get the associated namespace.
// * For talk namespaces, returns the subject (non-talk) namespace
// * For subject (non-talk) namespaces, returns the talk namespace
// *
// * @param int $index Namespace index
// * @return int|null If no associated namespace could be found
// */
// public static function getAssociated($index) {
// self::isMethodValidFor($index, __METHOD__);
//
// if (self::isSubject($index)) {
// return self::getTalk($index);
// } elseif (self::isTalk($index)) {
// return self::getSubject($index);
// } else {
// return null;
// }
// }
//
// /**
// * Returns whether the specified namespace exists
// *
// * @param int $index
// *
// * @return boolean
// * @since 1.19
// */
// public static function exists($index) {
// $nslist = self::getCanonicalNamespaces();
// return isset($nslist[$index]);
// }
//
// /**
// * Returns whether the specified namespaces are the same namespace
// *
// * @note It's possible that in the future we may start using something
// * other than just namespace indexes. Under that circumstance making use
// * of this function rather than directly doing comparison will make
// * sure that code will not potentially break.
// *
// * @param int $ns1 The first namespace index
// * @param int $ns2 The second namespace index
// *
// * @return boolean
// * @since 1.19
// */
// public static function equals($ns1, $ns2) {
// return $ns1 == $ns2;
// }
//
// /**
// * Returns whether the specified namespaces share the same subject.
// * eg: NS_USER and NS_USER wil return true, as well
// * NS_USER and NS_USER_TALK will return true.
// *
// * @param int $ns1 The first namespace index
// * @param int $ns2 The second namespace index
// *
// * @return boolean
// * @since 1.19
// */
// public static function subjectEquals($ns1, $ns2) {
// return self::getSubject($ns1) == self::getSubject($ns2);
// }
/**
* Returns array of all defined namespaces with their canonical
* (English) names.
*
* @param boolean $rebuild Rebuild namespace list (default = false). Used for testing.
*
* @return array
* @since 1.17
*/
private static XomwNamespacesById namespaces = null;
public static XomwNamespacesById getCanonicalNamespaces() {return getCanonicalNamespaces(false);}
public static XomwNamespacesById getCanonicalNamespaces(boolean rebuild) {
if (namespaces == null || rebuild) {
// global $wgExtraNamespaces, $wgCanonicalNamespaceNames;
namespaces = XomwSetup.wgCanonicalNamespaceNames.Clone();
namespaces.Add(XomwDefines.NS_MAIN, "");
// // Add extension namespaces
// $namespaces += ExtensionRegistry::getInstance()->getAttribute('ExtensionNamespaces');
// if (is_array($wgExtraNamespaces)) {
// $namespaces += $wgExtraNamespaces;
// }
// Hooks::run('CanonicalNamespaces', [ &$namespaces ]);
}
return namespaces;
}
// /**
// * Returns the canonical (English) name for a given index
// *
// * @param int $index Namespace index
// * @return String|boolean If no canonical definition.
// */
// public static function getCanonicalName($index) {
// $nslist = self::getCanonicalNamespaces();
// if (isset($nslist[$index])) {
// return $nslist[$index];
// } else {
// return false;
// }
// }
/**
* Returns the index for a given canonical name, or NULL
* The input *must* be converted to lower case first
*
* @param String $name Namespace name
* @return int
*/
private static Hash_adp xNamespaces = null;
public static int getCanonicalIndex(byte[] name) {
if (xNamespaces == null) {
xNamespaces = Hash_adp_bry.cs();
XomwNamespacesById namespacesHash = getCanonicalNamespaces();
int len = namespacesHash.Len();
for (int i = 0; i < len; i++) {
XomwNamespaceItem item = (XomwNamespaceItem)namespacesHash.GetAtOrNull(i);
xNamespaces.Add(Bry_.Lcase__all(item.name), item); // NOTE: MW does "strtolower($text)"; canonical namespaces are always ascii
}
}
XomwNamespaceItem xNs = (XomwNamespaceItem)xNamespaces.Get_by(name);
if (xNs != null) {
return xNs.id;
}
else {
return XomwNamespace.NULL_NS_ID;
}
}
// /**
// * Returns an array of the namespaces (by integer id) that exist on the
// * wiki. Used primarily by the api in help documentation.
// * @return array
// */
// public static function getValidNamespaces() {
// static $mValidNamespaces = null;
//
// if (is_null($mValidNamespaces)) {
// foreach (array_keys(self::getCanonicalNamespaces()) as $ns) {
// if ($ns >= 0) {
// $mValidNamespaces[] = $ns;
// }
// }
// // T109137: sort numerically
// sort($mValidNamespaces, SORT_NUMERIC);
// }
//
// return $mValidNamespaces;
// }
//
// /**
// * Can this namespace ever have a talk namespace?
// *
// * @param int $index Namespace index
// * @return boolean
// */
// public static function canTalk($index) {
// return $index >= NS_MAIN;
// }
//
// /**
// * Does this namespace contain content, for the purposes of calculating
// * statistics, etc?
// *
// * @param int $index Index to check
// * @return boolean
// */
// public static function isContent($index) {
// global $wgContentNamespaces;
// return $index == NS_MAIN || in_array($index, $wgContentNamespaces);
// }
//
// /**
// * Might pages in this namespace require the use of the Signature button on
// * the edit toolbar?
// *
// * @param int $index Index to check
// * @return boolean
// */
// public static function wantSignatures($index) {
// global $wgExtraSignatureNamespaces;
// return self::isTalk($index) || in_array($index, $wgExtraSignatureNamespaces);
// }
//
// /**
// * Can pages in a namespace be watched?
// *
// * @param int $index
// * @return boolean
// */
// public static function isWatchable($index) {
// return $index >= NS_MAIN;
// }
//
// /**
// * Does the namespace allow subpages?
// *
// * @param int $index Index to check
// * @return boolean
// */
// public static function hasSubpages($index) {
// global $wgNamespacesWithSubpages;
// return !empty($wgNamespacesWithSubpages[$index]);
// }
//
// /**
// * Get a list of all namespace indices which are considered to contain content
// * @return array Array of namespace indices
// */
// public static function getContentNamespaces() {
// global $wgContentNamespaces;
// if (!is_array($wgContentNamespaces) || $wgContentNamespaces == []) {
// return [ NS_MAIN ];
// } elseif (!in_array(NS_MAIN, $wgContentNamespaces)) {
// // always force NS_MAIN to be part of array (to match the algorithm used by isContent)
// return array_merge([ NS_MAIN ], $wgContentNamespaces);
// } else {
// return $wgContentNamespaces;
// }
// }
//
// /**
// * List all namespace indices which are considered subject, aka not a talk
// * or special namespace. See also XomwNamespace::isSubject
// *
// * @return array Array of namespace indices
// */
// public static function getSubjectNamespaces() {
// return array_filter(
// XomwNamespace::getValidNamespaces(),
// 'XomwNamespace::isSubject'
// );
// }
//
// /**
// * List all namespace indices which are considered talks, aka not a subject
// * or special namespace. See also XomwNamespace::isTalk
// *
// * @return array Array of namespace indices
// */
// public static function getTalkNamespaces() {
// return array_filter(
// XomwNamespace::getValidNamespaces(),
// 'XomwNamespace::isTalk'
// );
// }
//
// /**
// * Is the namespace first-letter capitalized?
// *
// * @param int $index Index to check
// * @return boolean
// */
// public static function isCapitalized($index) {
// global $wgCapitalLinks, $wgCapitalLinkOverrides;
// // Turn NS_MEDIA into NS_FILE
// $index = $index == NS_MEDIA ? NS_FILE : $index;
//
// // Make sure to get the subject of our namespace
// $index = self::getSubject($index);
//
// // Some namespaces are special and should always be upper case
// if (in_array($index, self::$alwaysCapitalizedNamespaces)) {
// return true;
// }
// if (isset($wgCapitalLinkOverrides[$index])) {
// // $wgCapitalLinkOverrides is explicitly set
// return $wgCapitalLinkOverrides[$index];
// }
// // Default to the global setting
// return $wgCapitalLinks;
// }
//
// /**
// * Does the namespace (potentially) have different aliases for different
// * genders. Not all languages make a distinction here.
// *
// * @since 1.18
// * @param int $index Index to check
// * @return boolean
// */
// public static function hasGenderDistinction($index) {
// return $index == NS_USER || $index == NS_USER_TALK;
// }
//
// /**
// * It is not possible to use pages from this namespace as template?
// *
// * @since 1.20
// * @param int $index Index to check
// * @return boolean
// */
// public static function isNonincludable($index) {
// global $wgNonincludableNamespaces;
// return $wgNonincludableNamespaces && in_array($index, $wgNonincludableNamespaces);
// }
//
// /**
// * Get the default content model for a namespace
// * This does not mean that all pages in that namespace have the model
// *
// * @since 1.21
// * @param int $index Index to check
// * @return null|String Default model name for the given namespace, if set
// */
// public static function getNamespaceContentModel($index) {
// global $wgNamespaceContentModels;
// return isset($wgNamespaceContentModels[$index])
// ? $wgNamespaceContentModels[$index]
// : null;
// }
//
// /**
// * Determine which restriction levels it makes sense to use in a namespace,
// * optionally filtered by a user's rights.
// *
// * @since 1.23
// * @param int $index Index to check
// * @param User $user User to check
// * @return array
// */
// public static function getRestrictionLevels($index, User $user = null) {
// global $wgNamespaceProtection, $wgRestrictionLevels;
//
// if (!isset($wgNamespaceProtection[$index])) {
// // All levels are valid if there's no namespace restriction.
// // But still filter by user, if necessary
// $levels = $wgRestrictionLevels;
// if ($user) {
// $levels = array_values(array_filter($levels, function ($level) use ($user) {
// $right = $level;
// if ($right == 'sysop') {
// $right = 'editprotected'; // BC
// }
// if ($right == 'autoconfirmed') {
// $right = 'editsemiprotected'; // BC
// }
// return ($right == '' || $user->isAllowed($right));
// }));
// }
// return $levels;
// }
//
// // First, get the list of groups that can edit this namespace.
// $namespaceGroups = [];
// $combine = 'array_merge';
// foreach ((array)$wgNamespaceProtection[$index] as $right) {
// if ($right == 'sysop') {
// $right = 'editprotected'; // BC
// }
// if ($right == 'autoconfirmed') {
// $right = 'editsemiprotected'; // BC
// }
// if ($right != '') {
// $namespaceGroups = call_user_func($combine, $namespaceGroups,
// User::getGroupsWithPermission($right));
// $combine = 'array_intersect';
// }
// }
//
// // Now, keep only those restriction levels where there is at least one
// // group that can edit the namespace but would be blocked by the
// // restriction.
// $usableLevels = [ '' ];
// foreach ($wgRestrictionLevels as $level) {
// $right = $level;
// if ($right == 'sysop') {
// $right = 'editprotected'; // BC
// }
// if ($right == 'autoconfirmed') {
// $right = 'editsemiprotected'; // BC
// }
// if ($right != '' && (!$user || $user->isAllowed($right)) &&
// array_diff($namespaceGroups, User::getGroupsWithPermission($right))
// ) {
// $usableLevels[] = $level;
// }
// }
//
// return $usableLevels;
// }
public static final int NULL_NS_ID = XophpUtility.NULL_INT;
}

View File

@@ -13,3 +13,12 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
public class XomwNamespaceItem {
public final int id;
public final byte[] name;
public XomwNamespaceItem(int id, byte[] name) {
this.id = id;
this.name = name;
}
}

View File

@@ -13,3 +13,25 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import gplx.core.lists.*;
public class XomwNamespacesById {
private HashByInt hash = new HashByInt();
public int Len() {return hash.Len();}
public byte[] GetNameOrNull(int id) {
XomwNamespaceItem item = (XomwNamespaceItem)hash.Get_by_or_null(id);
return item == null ? null : item.name;
}
public XomwNamespaceItem GetAtOrNull(int idx) {
return (XomwNamespaceItem)hash.Get_at_or_null(idx);
}
public XomwNamespacesById Add(int id, String text) {
hash.Add(id, new XomwNamespaceItem(id, Bry_.new_u8(text)));
return this;
}
public XomwNamespacesById Clone() {
XomwNamespacesById rv = new XomwNamespacesById();
rv.hash = hash.Clone();
return rv;
}
}

View File

@@ -13,3 +13,23 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
public class XomwNamespacesByName {
private final Ordered_hash hash = Ordered_hash_.New_bry();
public int Len() {return hash.Len();}
public int GetAsIdOrNullInt(byte[] name) {
XomwNamespaceItem item = (XomwNamespaceItem)hash.Get_by(name);
return item == null ? XophpUtility.NULL_INT : item.id;
}
public XomwNamespaceItem GetAtOrNull(int idx) {
return (XomwNamespaceItem)hash.Get_at(idx);
}
public void Add(byte[] name, XomwNamespaceItem item) {
hash.Add(name, item);
}
public XomwNamespacesByName Add(String name, int id) {
byte[] nameBry = Bry_.new_u8(name);
hash.Add(nameBry, new XomwNamespaceItem(id, nameBry));
return this;
}
}

View File

@@ -13,3 +13,55 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
/**
* Variant of the Message cls.
*
* Rather than treating the message key as a lookup
* value (which is passed to the MessageCache and
* translated as necessary), a RawMessage key is
* treated as the actual message.
*
* All other functionality (parsing, escaping, etc.)
* is preserved.
*
* @since 1.21
*/
class XomwRawMessage { // : XomwMessage
//
// /**
// * Call the parent constructor, then store the key as
// * the message.
// *
// * @see Message::__construct
// *
// * @param String $text Message to use.
// * @param array $params Parameters for the message.
// *
// * @throws InvalidArgumentException
// */
// public function __construct( $text, $params = [] ) {
// if ( !is_string( $text ) ) {
// throw new InvalidArgumentException( '$text must be a String' );
// }
//
// parent::__construct( $text, $params );
//
// // The key is the message.
// $this->message = $text;
// }
//
// /**
// * Fetch the message (in this case, the key).
// *
// * @return String
// */
// public function fetchMessage() {
// // Just in case the message is unset somewhere.
// if ( $this->message === null ) {
// $this->message = $this->key;
// }
//
// return $this->message;
// }
}

View File

@@ -13,3 +13,164 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mediawiki.includes.xohtml.*;
public class XomwSanitizerTest {
private final XomwSanitizerFxt fxt = new XomwSanitizerFxt();
@Test public void Normalize__text() {fxt.Test__normalize_char_references("abc" , "abc");}
@Test public void Normalize__dec() {fxt.Test__normalize_char_references("&#08;" , "&amp;#08;");}
@Test public void Normalize__dec__invalid() {fxt.Test__normalize_char_references("&#09;" , "&#9;");}
@Test public void Normalize__hex() {fxt.Test__normalize_char_references("&#xFF;" , "&#xff;");}
@Test public void Normalize__entity() {fxt.Test__normalize_char_references("&alpha;" , "&#945;");}
@Test public void Normalize__entity__lt() {fxt.Test__normalize_char_references("&lt;" , "&lt;");}
@Test public void Normalize__entity__alias() {fxt.Test__normalize_char_references("&רלמ;" , "&rlm;");}
@Test public void Normalize__amp() {fxt.Test__normalize_char_references("a&b" , "a&amp;b");}
@Test public void Normalize__invalid() {fxt.Test__normalize_char_references("&(invalid);" , "&amp;(invalid);");}
@Test public void Normalize__many() {
fxt.Test__normalize_char_references
( "a &#09; b &alpha; c &#xFF; d &(invalid); e"
, "a &#9; b &#945; c &#xff; d &amp;(invalid); e"
);
}
@Test public void Regex__domain() {
Xomw_regex_find_domain regex_domain = new Xomw_regex_find_domain();
// normal
fxt.Test__regex_domain_y(regex_domain, "https://a.org/bcd", "https:", "//a.org", "/bcd");
// trailing backslash
fxt.Test__regex_domain_y(regex_domain, "https://a.org/", "https:", "//a.org", "/");
// domain only
fxt.Test__regex_domain_y(regex_domain, "https://a.org", "https:", "//a.org", "");
// colon not found
fxt.Test__regex_domain_n(regex_domain, "https//a.org/bcd");
// host_bgn.eos
fxt.Test__regex_domain_n(regex_domain, "https:");
// host_bgn.//
fxt.Test__regex_domain_n(regex_domain, "https:a//");
// host_bgn.///
fxt.Test__regex_domain_n(regex_domain, "https:///a.org/b");
}
@Test public void Regex__clean_url() {
Xomw_regex_escape_invalid regex = new Xomw_regex_escape_invalid();
// noop
fxt.Test__regex_escape_invalid(regex, "https://a.org/bcd", Bool_.N, "");
// symbols
fxt.Test__regex_escape_invalid(regex, "[]<>\"|", Bool_.Y, "%5B%5D%3C%3E%22%7C%7F");
// range: 00 - 32
fxt.Test__regex_escape_invalid(regex, "\t\n ", Bool_.Y, "%09%0A+");
}
@Test public void Regex__ipv6_brack() {
Xomw_regex_ipv6_brack regex = new Xomw_regex_ipv6_brack();
// basic
fxt.Test__regex_ipv6_brack(regex, Bool_.Y, "//%5B0a.1b:12%5D:123");
// port: none
fxt.Test__regex_ipv6_brack(regex, Bool_.Y, "//%5Ba%5D");
// port: multiple
fxt.Test__regex_ipv6_brack(regex, Bool_.Y, "//%5Ba%5D:1:2:3");
// "//%5B" missing
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "abc");
// ipv6: invalid
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5Ba!%5D:1");
// ipv6: 0-len
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5B%5D:1");
// port: invalid
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5Ba%5D:a");
// port: 0-len
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5Ba%5D:");
}
@Test public void Decode() {
// dec
fxt.Test__decode_char_references("&#33;" , "!");
// hex
fxt.Test__decode_char_references("&#x23;" , "#");
// entity
fxt.Test__decode_char_references("&alpha;" , "α");
// entity:lt
fxt.Test__decode_char_references("&lt;" , "<");
// entity:rlm
fxt.Test__decode_char_references("&רלמ;" , "");
// entity:invalid
fxt.Test__decode_char_references("&invalid;" , "&invalid;");
// amp
fxt.Test__decode_char_references("a&b" , "a&b");
}
@Test public void Clean_url() {
// entity
fxt.Test__clean_url("http://a.org/b&amp;c" , "http://a.org/b&c");
// entity: escape
fxt.Test__clean_url("http://a.org/b&quot;c" , "http://a.org/b%22c");
// domain=n; make sure &quot; is changed, but not soft-hyphen
fxt.Test__clean_url("a&quot;­z" , "a%22­z");
// host: invalid idn
fxt.Test__clean_url("http://a᠆b.org/c᠆d" , "http://ab.org/c᠆d");
// ipv6_brack
fxt.Test__clean_url("http://[0a.1b:12]:123/cd" , "http://[0a.1b:12]:123/cd");
}
@Test public void Merge_atrs() {
Xomw_atr_mgr src_atrs = new Xomw_atr_mgr();
Xomw_atr_mgr trg_atrs = new Xomw_atr_mgr();
Xomw_atr_mgr expd_atrs = new Xomw_atr_mgr();
String cls = "class";
// basic: k1 + k2
fxt.Test__merge_attributes(src_atrs.Clear().Add_many("k1", "v1"), trg_atrs.Clear().Add_many("k2", "v2"), expd_atrs.Clear().Add_many("k1", "v1", "k2", "v2"));
// overwrite: k1 + k1
fxt.Test__merge_attributes(src_atrs.Clear().Add_many("k1", "v1"), trg_atrs.Clear().Add_many("k1", "v1a"), expd_atrs.Clear().Add_many("k1", "v1a"));
// cls: many
fxt.Test__merge_attributes(src_atrs.Clear().Add_many(cls, "v1 v2"), trg_atrs.Clear().Add_many(cls, "v3 v4"), expd_atrs.Clear().Add_many(cls, "v1 v2 v3 v4"));
// cls: src.empty
fxt.Test__merge_attributes(src_atrs.Clear(), trg_atrs.Clear().Add_many(cls, "v1"), expd_atrs.Clear().Add_many(cls, "v1"));
// cls: ws
fxt.Test__merge_attributes(src_atrs.Clear().Add_many(cls, " v1 v2 "), trg_atrs.Clear().Add_many(cls, " v3 v4 "), expd_atrs.Clear().Add_many(cls, "v1 v2 v3 v4"));
}
@Test public void normalizeWhitespace() {
fxt.Test_normalizeWhitespace("a\r\nb", "a b");
fxt.Test_normalizeWhitespace("a\rb", "a b");
fxt.Test_normalizeWhitespace("a\nb", "a b");
fxt.Test_normalizeWhitespace("a\tb", "a b");
}
}
class XomwSanitizerFxt {
private final XomwSanitizer sanitizer = new XomwSanitizer();
private final Bry_bfr tmp = Bry_bfr_.New();
public void Test__normalize_char_references(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
sanitizer.normalizeCharReferences(tmp, Bool_.Y, src_bry, 0, src_bry.length);
Gftest.Eq__str(expd, tmp.To_str_and_clear());
}
public void Test__regex_domain_y(Xomw_regex_find_domain regex_domain, String src_str, String expd_prot, String expd_host, String expd_rest) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__bool(true, regex_domain.Match(src_bry, 0, src_bry.length), src_str);
Gftest.Eq__str(expd_prot, Bry_.Mid(src_bry, regex_domain.prot_bgn, regex_domain.prot_end));
Gftest.Eq__str(expd_host, Bry_.Mid(src_bry, regex_domain.host_bgn, regex_domain.host_end));
Gftest.Eq__str(expd_rest, Bry_.Mid(src_bry, regex_domain.rest_bgn, regex_domain.rest_end));
}
public void Test__regex_domain_n(Xomw_regex_find_domain regex_domain, String src_str) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__bool(false, regex_domain.Match(src_bry, 0, src_bry.length), src_str);
}
public void Test__regex_escape_invalid(Xomw_regex_escape_invalid regex, String src_str, boolean expd_rslt, String expd_str) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__bool(expd_rslt, regex.Escape(tmp, src_bry, 0, src_bry.length));
Gftest.Eq__str(expd_str, tmp.To_bry_and_clear());
}
public void Test__regex_ipv6_brack(Xomw_regex_ipv6_brack regex, boolean expd_rslt, String src_str) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__bool(expd_rslt, regex.Match(src_bry, 0, src_bry.length));
}
public void Test__decode_char_references(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
sanitizer.decodeCharReferences(tmp, Bool_.Y, src_bry, 0, src_bry.length);
Gftest.Eq__str(expd, tmp.To_str_and_clear());
}
public void Test__clean_url(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__str(expd, sanitizer.cleanUrl(src_bry));
}
public void Test__merge_attributes(Xomw_atr_mgr src, Xomw_atr_mgr trg, Xomw_atr_mgr expd) {
sanitizer.mergeAttributes(src, trg);
Gftest.Eq__ary__lines(expd.To_str(tmp), src.To_str(tmp), "merge_atrs");
}
public void Test_normalizeWhitespace(String src_str, String expd) {
Gftest.Eq__str(expd, sanitizer.normalizeWhitespace(Bry_.new_u8(src_str)), "merge_atrs");
}
}

View File

@@ -13,3 +13,887 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
/**
* Include most things that are needed to make MediaWiki work.
*
* This file is included by WebStart.php and doMaintenance.php so that both
* web and maintenance scripts share a final set up phase to include necessary
* files and create global Object variables.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
public class XomwSetup {
// /**
// * This file is not a valid entry point, perform no further processing unless
// * MEDIAWIKI is defined
// */
// if ( !defined( 'MEDIAWIKI' ) ) {
// exit( 1 );
// }
//
// $fname = 'Setup.php';
// $ps_setup = Profiler::instance()->scopedProfileIn( $fname );
//
// // Load queued extensions
// ExtensionRegistry::getInstance()->loadFromQueue();
// // Don't let any other extensions load
// ExtensionRegistry::getInstance()->finish();
//
// // Check to see if we are at the file scope
// if ( !isset( $wgVersion ) ) {
// echo "Error, Setup.php must be included from the file scope, after DefaultSettings.php\n";
// die( 1 );
// }
//
// mb_internal_encoding( 'UTF-8' );
//
// // Set various default paths sensibly...
// $ps_default = Profiler::instance()->scopedProfileIn( $fname . '-defaults' );
//
// if ( $wgScript === false ) {
// $wgScript = "$wgScriptPath/index.php";
// }
// if ( $wgLoadScript === false ) {
// $wgLoadScript = "$wgScriptPath/load.php";
// }
//
// if ( $wgArticlePath === false ) {
// if ( $wgUsePathInfo ) {
// $wgArticlePath = "$wgScript/$1";
// } else {
// $wgArticlePath = "$wgScript?title=$1";
// }
// }
//
// if ( !empty( $wgActionPaths ) && !isset( $wgActionPaths['view'] ) ) {
// // 'view' is assumed the default action path everywhere in the code
// // but is rarely filled in $wgActionPaths
// $wgActionPaths['view'] = $wgArticlePath;
// }
//
// if ( $wgResourceBasePath === null ) {
// $wgResourceBasePath = $wgScriptPath;
// }
// if ( $wgStylePath === false ) {
// $wgStylePath = "$wgResourceBasePath/skins";
// }
// if ( $wgLocalStylePath === false ) {
// // Avoid wgResourceBasePath here since that may point to a different domain (e.g. CDN)
// $wgLocalStylePath = "$wgScriptPath/skins";
// }
// if ( $wgExtensionAssetsPath === false ) {
// $wgExtensionAssetsPath = "$wgResourceBasePath/extensions";
// }
//
// if ( $wgLogo === false ) {
// $wgLogo = "$wgResourceBasePath/resources/assets/wiki.png";
// }
//
// if ( $wgUploadPath === false ) {
// $wgUploadPath = "$wgScriptPath/images";
// }
// if ( $wgUploadDirectory === false ) {
// $wgUploadDirectory = "$IP/images";
// }
// if ( $wgReadOnlyFile === false ) {
// $wgReadOnlyFile = "{$wgUploadDirectory}/lock_yBgMBwiR";
// }
// if ( $wgFileCacheDirectory === false ) {
// $wgFileCacheDirectory = "{$wgUploadDirectory}/cache";
// }
// if ( $wgDeletedDirectory === false ) {
// $wgDeletedDirectory = "{$wgUploadDirectory}/deleted";
// }
//
// if ( $wgGitInfoCacheDirectory === false && $wgCacheDirectory !== false ) {
// $wgGitInfoCacheDirectory = "{$wgCacheDirectory}/gitinfo";
// }
//
// if ( $wgEnableParserCache === false ) {
// $wgParserCacheType = CACHE_NONE;
// }
//
// // Fix path to icon images after they were moved in 1.24
// if ( $wgRightsIcon ) {
// $wgRightsIcon = str_replace(
// "{$wgStylePath}/common/images/",
// "{$wgResourceBasePath}/resources/assets/licenses/",
// $wgRightsIcon
// );
// }
//
// if ( isset( $wgFooterIcons['copyright']['copyright'] )
// && $wgFooterIcons['copyright']['copyright'] === []
// ) {
// if ( $wgRightsIcon || $wgRightsText ) {
// $wgFooterIcons['copyright']['copyright'] = [
// 'url' => $wgRightsUrl,
// 'src' => $wgRightsIcon,
// 'alt' => $wgRightsText,
// ];
// }
// }
//
// if ( isset( $wgFooterIcons['poweredby'] )
// && isset( $wgFooterIcons['poweredby']['mediawiki'] )
// && $wgFooterIcons['poweredby']['mediawiki']['src'] === null
// ) {
// $wgFooterIcons['poweredby']['mediawiki']['src'] =
// "$wgResourceBasePath/resources/assets/poweredby_mediawiki_88x31.png";
// $wgFooterIcons['poweredby']['mediawiki']['srcset'] =
// "$wgResourceBasePath/resources/assets/poweredby_mediawiki_132x47.png 1.5x, " .
// "$wgResourceBasePath/resources/assets/poweredby_mediawiki_176x62.png 2x";
// }
//
// /**
// * Unconditional protection for NS_MEDIAWIKI since otherwise it's too easy for a
// * sysadmin to set $wgNamespaceProtection incorrectly and leave the wiki insecure.
// *
// * Note that this is the definition of editinterface and it can be granted to
// * all users if desired.
// */
// $wgNamespaceProtection[NS_MEDIAWIKI] = 'editinterface';
/**
* The canonical names of namespaces 6 and 7 are, as of v1.14, "File"
* and "File_talk". The old names "Image" and "Image_talk" are
* retained as aliases for backwards compatibility.
*/
public static final XomwNamespacesByName wgNamespaceAliases = new XomwNamespacesByName()
.Add("Image", XomwDefines.NS_FILE)
.Add("Image_talk", XomwDefines.NS_FILE_TALK)
;
// /**
// * Initialise $wgLockManagers to include basic FS version
// */
// $wgLockManagers[] = [
// 'name' => 'fsLockManager',
// 'class' => 'FSLockManager',
// 'lockDirectory' => "{$wgUploadDirectory}/lockdir",
// ];
// $wgLockManagers[] = [
// 'name' => 'nullLockManager',
// 'class' => 'NullLockManager',
// ];
//
// /**
// * Initialise $wgLocalFileRepo from backwards-compatible settings
// */
// if ( !$wgLocalFileRepo ) {
// $wgLocalFileRepo = [
// 'class' => 'LocalRepo',
// 'name' => 'local',
// 'directory' => $wgUploadDirectory,
// 'scriptDirUrl' => $wgScriptPath,
// 'scriptExtension' => '.php',
// 'url' => $wgUploadBaseUrl ? $wgUploadBaseUrl . $wgUploadPath : $wgUploadPath,
// 'hashLevels' => $wgHashedUploadDirectory ? 2 : 0,
// 'thumbScriptUrl' => $wgThumbnailScriptPath,
// 'transformVia404' => !$wgGenerateThumbnailOnParse,
// 'deletedDir' => $wgDeletedDirectory,
// 'deletedHashLevels' => $wgHashedUploadDirectory ? 3 : 0
// ];
// }
// /**
// * Initialise shared repo from backwards-compatible settings
// */
// if ( $wgUseSharedUploads ) {
// if ( $wgSharedUploadDBname ) {
// $wgForeignFileRepos[] = [
// 'class' => 'ForeignDBRepo',
// 'name' => 'shared',
// 'directory' => $wgSharedUploadDirectory,
// 'url' => $wgSharedUploadPath,
// 'hashLevels' => $wgHashedSharedUploadDirectory ? 2 : 0,
// 'thumbScriptUrl' => $wgSharedThumbnailScriptPath,
// 'transformVia404' => !$wgGenerateThumbnailOnParse,
// 'dbType' => $wgDBtype,
// 'dbServer' => $wgDBserver,
// 'dbUser' => $wgDBuser,
// 'dbPassword' => $wgDBpassword,
// 'dbName' => $wgSharedUploadDBname,
// 'dbFlags' => ( $wgDebugDumpSql ? DBO_DEBUG : 0 ) | DBO_DEFAULT,
// 'tablePrefix' => $wgSharedUploadDBprefix,
// 'hasSharedCache' => $wgCacheSharedUploads,
// 'descBaseUrl' => $wgRepositoryBaseUrl,
// 'fetchDescription' => $wgFetchCommonsDescriptions,
// ];
// } else {
// $wgForeignFileRepos[] = [
// 'class' => 'FileRepo',
// 'name' => 'shared',
// 'directory' => $wgSharedUploadDirectory,
// 'url' => $wgSharedUploadPath,
// 'hashLevels' => $wgHashedSharedUploadDirectory ? 2 : 0,
// 'thumbScriptUrl' => $wgSharedThumbnailScriptPath,
// 'transformVia404' => !$wgGenerateThumbnailOnParse,
// 'descBaseUrl' => $wgRepositoryBaseUrl,
// 'fetchDescription' => $wgFetchCommonsDescriptions,
// ];
// }
// }
// if ( $wgUseInstantCommons ) {
// $wgForeignFileRepos[] = [
// 'class' => 'ForeignAPIRepo',
// 'name' => 'wikimediacommons',
// 'apibase' => 'https://commons.wikimedia.org/w/api.php',
// 'url' => 'https://upload.wikimedia.org/wikipedia/commons',
// 'thumbUrl' => 'https://upload.wikimedia.org/wikipedia/commons/thumb',
// 'hashLevels' => 2,
// 'transformVia404' => true,
// 'fetchDescription' => true,
// 'descriptionCacheExpiry' => 43200,
// 'apiThumbCacheExpiry' => 0,
// ];
// }
// /*
// * Add on default file backend config for file repos.
// * FileBackendGroup will handle initializing the backends.
// */
// if ( !isset( $wgLocalFileRepo['backend'] ) ) {
// $wgLocalFileRepo['backend'] = $wgLocalFileRepo['name'] . '-backend';
// }
// foreach ( $wgForeignFileRepos as &$repo ) {
// if ( !isset( $repo['directory'] ) && $repo['class'] === 'ForeignAPIRepo' ) {
// $repo['directory'] = $wgUploadDirectory; // b/c
// }
// if ( !isset( $repo['backend'] ) ) {
// $repo['backend'] = $repo['name'] . '-backend';
// }
// }
// unset( $repo ); // no global pollution; destroy reference
//
// $rcMaxAgeDays = $wgRCMaxAge / ( 3600 * 24 );
// if ( $wgRCFilterByAge ) {
// // Trim down $wgRCLinkDays so that it only lists links which are valid
// // as determined by $wgRCMaxAge.
// // Note that we allow 1 link higher than the max for things like 56 days but a 60 day link.
// sort( $wgRCLinkDays );
//
// // @codingStandardsIgnoreStart Generic.CodeAnalysis.ForLoopWithTestFunctionCall.NotAllowed
// for ( $i = 0; $i < count( $wgRCLinkDays ); $i++ ) {
// // @codingStandardsIgnoreEnd
// if ( $wgRCLinkDays[$i] >= $rcMaxAgeDays ) {
// $wgRCLinkDays = array_slice( $wgRCLinkDays, 0, $i + 1, false );
// break;
// }
// }
// }
// // Ensure that default user options are not invalid, since that breaks Special:Preferences
// $wgDefaultUserOptions['rcdays'] = min(
// $wgDefaultUserOptions['rcdays'],
// ceil( $rcMaxAgeDays )
// );
// $wgDefaultUserOptions['watchlistdays'] = min(
// $wgDefaultUserOptions['watchlistdays'],
// ceil( $rcMaxAgeDays )
// );
// unset( $rcMaxAgeDays );
//
// if ( $wgSkipSkin ) {
// $wgSkipSkins[] = $wgSkipSkin;
// }
//
// $wgSkipSkins[] = 'fallback';
// $wgSkipSkins[] = 'apioutput';
//
// if ( $wgLocalInterwiki ) {
// array_unshift( $wgLocalInterwikis, $wgLocalInterwiki );
// }
//
// // Set default shared prefix
// if ( $wgSharedPrefix === false ) {
// $wgSharedPrefix = $wgDBprefix;
// }
//
// // Set default shared schema
// if ( $wgSharedSchema === false ) {
// $wgSharedSchema = $wgDBmwschema;
// }
//
// if ( !$wgCookiePrefix ) {
// if ( $wgSharedDB && $wgSharedPrefix && in_array( 'user', $wgSharedTables ) ) {
// $wgCookiePrefix = $wgSharedDB . '_' . $wgSharedPrefix;
// } elseif ( $wgSharedDB && in_array( 'user', $wgSharedTables ) ) {
// $wgCookiePrefix = $wgSharedDB;
// } elseif ( $wgDBprefix ) {
// $wgCookiePrefix = $wgDBname . '_' . $wgDBprefix;
// } else {
// $wgCookiePrefix = $wgDBname;
// }
// }
// $wgCookiePrefix = strtr( $wgCookiePrefix, '=,; +."\'\\[', '__________' );
//
// if ( $wgEnableEmail ) {
// $wgUseEnotif = $wgEnotifUserTalk || $wgEnotifWatchlist;
// } else {
// // Disable all other email settings automatically if $wgEnableEmail
// // is set to false. - bug 63678
// $wgAllowHTMLEmail = false;
// $wgEmailAuthentication = false; // do not require auth if you're not sending email anyway
// $wgEnableUserEmail = false;
// $wgEnotifFromEditor = false;
// $wgEnotifImpersonal = false;
// $wgEnotifMaxRecips = 0;
// $wgEnotifMinorEdits = false;
// $wgEnotifRevealEditorAddress = false;
// $wgEnotifUseRealName = false;
// $wgEnotifUserTalk = false;
// $wgEnotifWatchlist = false;
// unset( $wgGroupPermissions['user']['sendemail'] );
// $wgUseEnotif = false;
// $wgUserEmailUseReplyTo = false;
// $wgUsersNotifiedOnAllChanges = [];
// }
//
// if ( $wgMetaNamespace === false ) {
// $wgMetaNamespace = str_replace( ' ', '_', $wgSitename );
// }
//
// // Default value is 2000 or the suhosin limit if it is between 1 and 2000
// if ( $wgResourceLoaderMaxQueryLength === false ) {
// $suhosinMaxValueLength = (int)ini_get( 'suhosin.get.max_value_length' );
// if ( $suhosinMaxValueLength > 0 && $suhosinMaxValueLength < 2000 ) {
// $wgResourceLoaderMaxQueryLength = $suhosinMaxValueLength;
// } else {
// $wgResourceLoaderMaxQueryLength = 2000;
// }
// unset( $suhosinMaxValueLength );
// }
//
// // Ensure the minimum chunk size is less than PHP upload limits or the maximum
// // upload size.
// $wgMinUploadChunkSize = min(
// $wgMinUploadChunkSize,
// UploadBase::getMaxUploadSize( 'file' ),
// UploadBase::getMaxXophpUploadSize(),
// ( wfShorthandToInteger(
// ini_get( 'post_max_size' ) ?: ini_get( 'hhvm.server.max_post_size' ),
// PHP_INT_MAX
// ) ?: PHP_INT_MAX ) - 1024 // Leave some room for other POST parameters
// );
/**
* Definitions of the NS_ constants are in Defines.php
* @private
*/
public static XomwNamespacesById wgCanonicalNamespaceNames = new XomwNamespacesById()
.Add(XomwDefines.NS_MEDIA , "Media")
.Add(XomwDefines.NS_SPECIAL , "Special")
.Add(XomwDefines.NS_TALK , "Talk")
.Add(XomwDefines.NS_USER , "User")
.Add(XomwDefines.NS_USER_TALK , "User_talk")
.Add(XomwDefines.NS_PROJECT , "Project")
.Add(XomwDefines.NS_PROJECT_TALK , "Project_talk")
.Add(XomwDefines.NS_FILE , "File")
.Add(XomwDefines.NS_FILE_TALK , "File_talk")
.Add(XomwDefines.NS_MEDIAWIKI , "MediaWiki")
.Add(XomwDefines.NS_MEDIAWIKI_TALK , "MediaWiki_talk")
.Add(XomwDefines.NS_TEMPLATE , "Template")
.Add(XomwDefines.NS_TEMPLATE_TALK , "Template_talk")
.Add(XomwDefines.NS_HELP , "Help")
.Add(XomwDefines.NS_HELP_TALK , "Help_talk")
.Add(XomwDefines.NS_CATEGORY , "Category")
.Add(XomwDefines.NS_CATEGORY_TALK , "Category_talk")
;
// /// @todo UGLY UGLY
// if ( is_array( $wgExtraNamespaces ) ) {
// $wgCanonicalNamespaceNames = $wgCanonicalNamespaceNames + $wgExtraNamespaces;
// }
//
// // These are now the same, always
// // To determine the user language, use $wgLang->getCode()
// $wgContLanguageCode = $wgLanguageCode;
//
// // Easy to forget to falsify $wgDebugToolbar for static caches.
// // If file cache or CDN cache is on, just disable this (DWIMD).
// if ( $wgUseFileCache || $wgUseSquid ) {
// $wgDebugToolbar = false;
// }
//
// // We always output HTML5 since 1.22, overriding these is no longer supported
// // we set them here for extensions that depend on its value.
// $wgHtml5 = true;
// $wgXhtmlDefaultNamespace = 'http://www.w3.org/1999/xhtml';
// $wgJsMimeType = 'text/javascript';
//
// // Blacklisted file extensions shouldn't appear on the "allowed" list
// $wgFileExtensions = array_values( array_diff( $wgFileExtensions, $wgFileBlacklist ) );
//
// if ( $wgInvalidateCacheOnLocalSettingsChange ) {
// MediaWiki\suppressWarnings();
// $wgCacheEpoch = max( $wgCacheEpoch, gmdate( 'YmdHis', filemtime( "$IP/LocalSettings.php" ) ) );
// MediaWiki\restoreWarnings();
// }
//
// if ( $wgNewUserLog ) {
// // Add a new log type
// $wgLogTypes[] = 'newusers';
// $wgLogNames['newusers'] = 'newuserlogpage';
// $wgLogHeaders['newusers'] = 'newuserlogpagetext';
// $wgLogActionsHandlers['newusers/newusers'] = 'NewUsersLogFormatter';
// $wgLogActionsHandlers['newusers/create'] = 'NewUsersLogFormatter';
// $wgLogActionsHandlers['newusers/create2'] = 'NewUsersLogFormatter';
// $wgLogActionsHandlers['newusers/byemail'] = 'NewUsersLogFormatter';
// $wgLogActionsHandlers['newusers/autocreate'] = 'NewUsersLogFormatter';
// }
//
// if ( $wgPageLanguageUseDB ) {
// $wgLogTypes[] = 'pagelang';
// $wgLogActionsHandlers['pagelang/pagelang'] = 'PageLangLogFormatter';
// }
//
// if ( $wgCookieSecure === 'detect' ) {
// $wgCookieSecure = ( WebRequest::detectProtocol() === 'https' );
// }
//
// if ( $wgProfileOnly ) {
// $wgDebugLogGroups['profileoutput'] = $wgDebugLogFile;
// $wgDebugLogFile = '';
// }
//
// // Backwards compatibility with old password limits
// if ( $wgMinimalPasswordLength !== false ) {
// $wgPasswordPolicy['policies']['default']['MinimalPasswordLength'] = $wgMinimalPasswordLength;
// }
//
// if ( $wgMaximalPasswordLength !== false ) {
// $wgPasswordPolicy['policies']['default']['MaximalPasswordLength'] = $wgMaximalPasswordLength;
// }
//
// // Backwards compatibility warning
// if ( !$wgSessionsInObjectCache ) {
// wfDeprecated( '$wgSessionsInObjectCache = false', '1.27' );
// if ( $wgSessionHandler ) {
// wfDeprecated( '$wgSessionsHandler', '1.27' );
// }
// $cacheType = get_class( ObjectCache::getInstance( $wgSessionCacheType ) );
// wfDebugLog(
// 'caches',
// "Session data will be stored in \"$cacheType\" cache with " .
// "expiry $wgObjectCacheSessionExpiry seconds"
// );
// }
// $wgSessionsInObjectCache = true;
//
// if ( $wgPHPSessionHandling !== 'enable' &&
// $wgPHPSessionHandling !== 'warn' &&
// $wgPHPSessionHandling !== 'disable'
// ) {
// $wgPHPSessionHandling = 'warn';
// }
// if ( defined( 'MW_NO_SESSION' ) ) {
// // If the entry point wants no session, force 'disable' here unless they
// // specifically set it to the (undocumented) 'warn'.
// $wgPHPSessionHandling = MW_NO_SESSION === 'warn' ? 'warn' : 'disable';
// }
//
// Profiler::instance()->scopedProfileOut( $ps_default );
//
// // Disable MWDebug for command line mode, this prevents MWDebug from eating up
// // all the memory from logging SQL queries on maintenance scripts
// global $wgCommandLineMode;
// if ( $wgDebugToolbar && !$wgCommandLineMode ) {
// MWDebug::init();
// }
//
// // Reset the global service locator, so any services that have already been created will be
// // re-created while taking into account any custom settings and extensions.
// MediaWikiServices::resetGlobalInstance( new GlobalVarConfig(), 'quick' );
//
// if ( $wgSharedDB && $wgSharedTables ) {
// // Apply $wgSharedDB table aliases for the local LB (all non-foreign DB connections)
// MediaWikiServices::getInstance()->getDBLoadBalancer()->setTableAliases(
// array_fill_keys(
// $wgSharedTables,
// [
// 'dbname' => $wgSharedDB,
// 'schema' => $wgSharedSchema,
// 'prefix' => $wgSharedPrefix
// ]
// )
// );
// }
//
// // Define a constant that indicates that the bootstrapping of the service locator
// // is complete.
// define( 'MW_SERVICE_BOOTSTRAP_COMPLETE', 1 );
//
// // Install a header callback to prevent caching of responses with cookies (T127993)
// if ( !$wgCommandLineMode ) {
// header_register_callback( function () {
// $headers = [];
// foreach ( headers_list() as $header ) {
// list( $name, $value ) = explode( ':', $header, 2 );
// $headers[strtolower( trim( $name ) )][] = trim( $value );
// }
//
// if ( isset( $headers['set-cookie'] ) ) {
// $cacheControl = isset( $headers['cache-control'] )
// ? implode( ', ', $headers['cache-control'] )
// : '';
//
// if ( !preg_match( '/(?:^|,)\s*(?:private|no-cache|no-store)\s*(?:$|,)/i', $cacheControl ) ) {
// header( 'Expires: Thu, 01 Jan 1970 00:00:00 GMT' );
// header( 'Cache-Control: private, max-age=0, s-maxage=0' );
// MediaWiki\Logger\LoggerFactory::getInstance( 'cache-cookies' )->warning(
// 'Cookies set on {url} with Cache-Control "{cache-control}"', [
// 'url' => WebRequest::getGlobalRequestURL(),
// 'cookies' => $headers['set-cookie'],
// 'cache-control' => $cacheControl ?: '<not set>',
// ]
// );
// }
// }
// } );
// }
//
// MWExceptionHandler::installHandler();
//
// require_once "$IP/includes/compat/normal/UtfNormalUtil.php";
//
// $ps_validation = Profiler::instance()->scopedProfileIn( $fname . '-validation' );
//
// // T48998: Bail out early if $wgArticlePath is non-absolute
// foreach ( [ 'wgArticlePath', 'wgVariantArticlePath' ] as $varName ) {
// if ( $$varName && !preg_match( '/^(https?:\/\/|\/)/', $$varName ) ) {
// throw new FatalError(
// "If you use a relative URL for \$$varName, it must start " .
// 'with a slash (<code>/</code>).<br><br>See ' .
// "<a href=\"https://www.mediawiki.org/wiki/Manual:\$$varName\">" .
// "https://www.mediawiki.org/wiki/Manual:\$$varName</a>."
// );
// }
// }
//
// Profiler::instance()->scopedProfileOut( $ps_validation );
//
// $ps_default2 = Profiler::instance()->scopedProfileIn( $fname . '-defaults2' );
//
// if ( $wgCanonicalServer === false ) {
// $wgCanonicalServer = wfExpandUrl( $wgServer, PROTO_HTTP );
// }
//
// // Set server name
// $serverParts = wfParseUrl( $wgCanonicalServer );
// if ( $wgServerName !== false ) {
// wfWarn( '$wgServerName should be derived from $wgCanonicalServer, '
// . 'not customized. Overwriting $wgServerName.' );
// }
// $wgServerName = $serverParts['host'];
// unset( $serverParts );
//
// // Set defaults for configuration variables
// // that are derived from the server name by default
// // Note: $wgEmergencyContact and $wgPasswordSender may be false or empty String (T104142)
// if ( !$wgEmergencyContact ) {
// $wgEmergencyContact = 'wikiadmin@' . $wgServerName;
// }
// if ( !$wgPasswordSender ) {
// $wgPasswordSender = 'apache@' . $wgServerName;
// }
// if ( !$wgNoReplyAddress ) {
// $wgNoReplyAddress = $wgPasswordSender;
// }
//
// if ( $wgSecureLogin && substr( $wgServer, 0, 2 ) !== '//' ) {
// $wgSecureLogin = false;
// wfWarn( 'Secure login was enabled on a server that only supports '
// . 'HTTP or HTTPS. Disabling secure login.' );
// }
//
// $wgVirtualRestConfig['global']['domain'] = $wgCanonicalServer;
//
// // Now that GlobalFunctions is loaded, set defaults that depend on it.
// if ( $wgTmpDirectory === false ) {
// $wgTmpDirectory = wfTempDir();
// }
//
// // We don't use counters anymore. Left here for extensions still
// // expecting this to exist. Should be removed sometime 1.26 or later.
// if ( !isset( $wgDisableCounters ) ) {
// $wgDisableCounters = true;
// }
//
// if ( $wgMainWANCache === false ) {
// // Setup a WAN cache from $wgMainCacheType with no relayer.
// // Sites using multiple datacenters can configure a relayer.
// $wgMainWANCache = 'mediawiki-main-default';
// $wgWANObjectCaches[$wgMainWANCache] = [
// 'class' => 'WANObjectCache',
// 'cacheId' => $wgMainCacheType,
// 'channels' => [ 'purge' => 'wancache-main-default-purge' ]
// ];
// }
//
// Profiler::instance()->scopedProfileOut( $ps_default2 );
//
// $ps_misc = Profiler::instance()->scopedProfileIn( $fname . '-misc1' );
//
// // Raise the memory limit if it's too low
// wfMemoryLimit();
//
// /**
// * Set up the timezone, suppressing the pseudo-security warning in PHP 5.1+
// * that happens whenever you use a date function without the timezone being
// * explicitly set. Inspired by phpMyAdmin's treatment of the problem.
// */
// if ( is_null( $wgLocaltimezone ) ) {
// MediaWiki\suppressWarnings();
// $wgLocaltimezone = date_default_timezone_get();
// MediaWiki\restoreWarnings();
// }
//
// date_default_timezone_set( $wgLocaltimezone );
// if ( is_null( $wgLocalTZoffset ) ) {
// $wgLocalTZoffset = date( 'Z' ) / 60;
// }
// // The part after the System| is ignored, but rest of MW fills it
// // out as the local offset.
// $wgDefaultUserOptions['timecorrection'] = "System|$wgLocalTZoffset";
//
// if ( !$wgDBerrorLogTZ ) {
// $wgDBerrorLogTZ = $wgLocaltimezone;
// }
//
// // initialize the request Object in $wgRequest
// $wgRequest = RequestContext::getMain()->getRequest(); // BackCompat
// // Set user IP/agent information for causal consistency purposes
// MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->setRequestInfo( [
// 'IPAddress' => $wgRequest->getIP(),
// 'UserAgent' => $wgRequest->getHeader( 'User-Agent' ),
// 'ChronologyProtection' => $wgRequest->getHeader( 'ChronologyProtection' )
// ] );
//
// // Useful debug output
// if ( $wgCommandLineMode ) {
// wfDebug( "\n\nStart command line script $self\n" );
// } else {
// $debug = "\n\nStart request {$wgRequest->getMethod()} {$wgRequest->getRequestURL()}\n";
//
// if ( $wgDebugPrintHttpHeaders ) {
// $debug .= "HTTP HEADERS:\n";
//
// foreach ( $wgRequest->getAllHeaders() as $name => $value ) {
// $debug .= "$name: $value\n";
// }
// }
// wfDebug( $debug );
// }
//
// Profiler::instance()->scopedProfileOut( $ps_misc );
// $ps_memcached = Profiler::instance()->scopedProfileIn( $fname . '-memcached' );
//
// $wgMemc = wfGetMainCache();
// $messageMemc = wfGetMessageCacheStorage();
// $parserMemc = wfGetParserCacheStorage();
//
// wfDebugLog( 'caches',
// 'cluster: ' . get_class( $wgMemc ) .
// ', WAN: ' . ( $wgMainWANCache === CACHE_NONE ? 'CACHE_NONE' : $wgMainWANCache ) .
// ', stash: ' . $wgMainStash .
// ', message: ' . get_class( $messageMemc ) .
// ', parser: ' . get_class( $parserMemc ) .
// ', session: ' . get_class( ObjectCache::getInstance( $wgSessionCacheType ) )
// );
//
// Profiler::instance()->scopedProfileOut( $ps_memcached );
//
// // Most of the config is out, some might want to run hooks here.
// Hooks::run( 'SetupAfterCache' );
//
// $ps_globals = Profiler::instance()->scopedProfileIn( $fname . '-globals' );
//
// /**
// * @var Language $wgContLang
// */
// $wgContLang = Language::factory( $wgLanguageCode );
// $wgContLang->initContLang();
//
// // Now that variant lists may be available...
// $wgRequest->interpolateTitle();
//
// if ( !is_object( $wgAuth ) ) {
// $wgAuth = new MediaWiki\Auth\AuthManagerAuthPlugin;
// Hooks::run( 'AuthPluginSetup', [ &$wgAuth ] );
// }
// if ( $wgAuth && !$wgAuth instanceof MediaWiki\Auth\AuthManagerAuthPlugin ) {
// MediaWiki\Auth\AuthManager::singleton()->forcePrimaryAuthenticationProviders( [
// new MediaWiki\Auth\TemporaryPasswordPrimaryAuthenticationProvider( [
// 'authoritative' => false,
// ] ),
// new MediaWiki\Auth\AuthPluginPrimaryAuthenticationProvider( $wgAuth ),
// new MediaWiki\Auth\LocalPasswordPrimaryAuthenticationProvider( [
// 'authoritative' => true,
// ] ),
// ], '$wgAuth is ' . get_class( $wgAuth ) );
// }
//
// // Set up the session
// $ps_session = Profiler::instance()->scopedProfileIn( $fname . '-session' );
// /**
// * @var MediaWiki\Session\SessionId|null $wgInitialSessionId The persistent
// * session ID (if any) loaded at startup
// */
// $wgInitialSessionId = null;
// if ( !defined( 'MW_NO_SESSION' ) && !$wgCommandLineMode ) {
// // If session.auto_start is there, we can't touch session name
// if ( $wgPHPSessionHandling !== 'disable' && !wfIniGetBool( 'session.auto_start' ) ) {
// session_name( $wgSessionName ? $wgSessionName : $wgCookiePrefix . '_session' );
// }
//
// // Create the SessionManager singleton and set up our session handler,
// // unless we're specifically asked not to.
// if ( !defined( 'MW_NO_SESSION_HANDLER' ) ) {
// MediaWiki\Session\PHPSessionHandler::install(
// MediaWiki\Session\SessionManager::singleton()
// );
// }
//
// // Initialize the session
// try {
// $session = MediaWiki\Session\SessionManager::getGlobalSession();
// } catch ( OverflowException $ex ) {
// if ( isset( $ex->sessionInfos ) && count( $ex->sessionInfos ) >= 2 ) {
// // The exception is because the request had multiple possible
// // sessions tied for top priority. Report this to the user.
// $list = [];
// foreach ( $ex->sessionInfos as $info ) {
// $list[] = $info->getProvider()->describe( $wgContLang );
// }
// $list = $wgContLang->listToText( $list );
// throw new HttpError( 400,
// Message::newFromKey( 'sessionmanager-tie', $list )->inLanguage( $wgContLang )->plain()
// );
// }
//
// // Not the one we want, rethrow
// throw $ex;
// }
//
// if ( $session->isPersistent() ) {
// $wgInitialSessionId = $session->getSessionId();
// }
//
// $session->renew();
// if ( MediaWiki\Session\PHPSessionHandler::isEnabled() &&
// ( $session->isPersistent() || $session->shouldRememberUser() )
// ) {
// // Start the PHP-session for backwards compatibility
// session_id( $session->getId() );
// MediaWiki\quietCall( 'session_start' );
// }
//
// unset( $session );
// } else {
// // Even if we didn't set up a global Session, still install our session
// // handler unless specifically requested not to.
// if ( !defined( 'MW_NO_SESSION_HANDLER' ) ) {
// MediaWiki\Session\PHPSessionHandler::install(
// MediaWiki\Session\SessionManager::singleton()
// );
// }
// }
// Profiler::instance()->scopedProfileOut( $ps_session );
//
// /**
// * @var User $wgUser
// */
// $wgUser = RequestContext::getMain()->getUser(); // BackCompat
//
// /**
// * @var Language $wgLang
// */
// $wgLang = new StubUserLang;
//
// /**
// * @var OutputPage $wgOut
// */
// $wgOut = RequestContext::getMain()->getOutput(); // BackCompat
//
// /**
// * @var Parser $wgParser
// */
// $wgParser = new StubObject( 'wgParser', function () {
// return MediaWikiServices::getInstance()->getParser();
// } );
//
// /**
// * @var Title $wgTitle
// */
// $wgTitle = null;
//
// Profiler::instance()->scopedProfileOut( $ps_globals );
// $ps_extensions = Profiler::instance()->scopedProfileIn( $fname . '-extensions' );
//
// // Extension setup functions
// // Entries should be added to this variable during the inclusion
// // of the extension file. This allows the extension to perform
// // any necessary initialisation in the fully initialised environment
// foreach ( $wgExtensionFunctions as $func ) {
// // Allow closures in PHP 5.3+
// if ( is_object( $func ) && $func instanceof Closure ) {
// $profName = $fname . '-extensions-closure';
// } elseif ( is_array( $func ) ) {
// if ( is_object( $func[0] ) ) {
// $profName = $fname . '-extensions-' . get_class( $func[0] ) . '::' . $func[1];
// } else {
// $profName = $fname . '-extensions-' . implode( '::', $func );
// }
// } else {
// $profName = $fname . '-extensions-' . strval( $func );
// }
//
// $ps_ext_func = Profiler::instance()->scopedProfileIn( $profName );
// call_user_func( $func );
// Profiler::instance()->scopedProfileOut( $ps_ext_func );
// }
//
// // If the session user has a 0 id but a valid name, that means we need to
// // autocreate it.
// if ( !defined( 'MW_NO_SESSION' ) && !$wgCommandLineMode ) {
// $sessionUser = MediaWiki\Session\SessionManager::getGlobalSession()->getUser();
// if ( $sessionUser->getId() === 0 && User::isValidUserName( $sessionUser->getName() ) ) {
// $ps_autocreate = Profiler::instance()->scopedProfileIn( $fname . '-autocreate' );
// $res = MediaWiki\Auth\AuthManager::singleton()->autoCreateUser(
// $sessionUser,
// MediaWiki\Auth\AuthManager::AUTOCREATE_SOURCE_SESSION,
// true
// );
// Profiler::instance()->scopedProfileOut( $ps_autocreate );
// \MediaWiki\Logger\LoggerFactory::getInstance( 'authevents' )->info( 'Autocreation attempt', [
// 'event' => 'autocreate',
// 'status' => $res,
// ] );
// unset( $res );
// }
// unset( $sessionUser );
// }
//
// if ( !$wgCommandLineMode ) {
// Pingback::schedulePingback();
// }
//
// $wgFullyInitialised = true;
//
// Profiler::instance()->scopedProfileOut( $ps_extensions );
// Profiler::instance()->scopedProfileOut( $ps_setup );
}

View File

@@ -13,3 +13,16 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import org.junit.*; import gplx.core.tests.*;
public class XomwTitleTest {
private final XomwTitleFxt fxt = new XomwTitleFxt();
@Test public void Alphanum() {fxt.Test__find_fwd_while_title("0aB" , 3);}
@Test public void Angle() {fxt.Test__find_fwd_while_title("0a<" , 2);}
}
class XomwTitleFxt {
public void Test__find_fwd_while_title(String src_str, int expd) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__int(expd, XomwTitle.Find_fwd_while_title(src_bry, 0, src_bry.length, XomwTitle.Title_chars_valid()));
}
}

View File

@@ -13,3 +13,71 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
public class XomwXml {
// Format an XML element with given attributes and, optionally, text content.
// Element and attribute names are assumed to be ready for literal inclusion.
// Strings are assumed to not contain XML-illegal characters; special
// characters (<, >, &) are escaped but illegals are not touched.
// ARGS: contents defaults to ""
// XO.MW:SYNC:1.29; DATE:2017-02-03
public static void Element(Bry_bfr bfr, byte[] element, List_adp attribs, byte[] contents, boolean allow_short_tag) {
bfr.Add_byte(Byte_ascii.Angle_bgn).Add(element);
if (attribs.Len() > 0) {
Expand_attributes(bfr, attribs);
}
if (contents == null) {
bfr.Add_byte(Byte_ascii.Angle_end);
}
else {
if (allow_short_tag && contents == Bry_.Empty) {
bfr.Add_str_a7(" />");
}
else {
bfr.Add_byte(Byte_ascii.Angle_end);
bfr.Add_bry_escape_html(contents);
bfr.Add_byte(Byte_ascii.Angle_bgn).Add_byte(Byte_ascii.Slash).Add(element).Add_byte(Byte_ascii.Angle_end);
}
}
}
// Given an array of ('attributename' => 'value'), it generates the code
// to set the XML attributes : attributename="value".
// The values are passed to Sanitizer::encodeAttribute.
// Return null if no attributes given.
// @param array $attribs Array of attributes for an XML element
// XO.MW:SYNC:1.29; DATE:2017-02-03
public static void Expand_attributes(Bry_bfr bfr, List_adp attribs) {
int attribs_len = attribs.Len();
for (int i = 0; i < attribs_len; i += 2) {
// XO.MW: $out .= " {$name}=\"" . Sanitizer::encodeAttribute( $val ) . '"';
bfr.Add_byte_space();
bfr.Add((byte[])attribs.Get_at(i));
bfr.Add_byte_eq().Add_byte_quote();
XomwSanitizer.encodeAttribute(bfr, (byte[])attribs.Get_at(i + 1));
bfr.Add_byte_quote();
}
}
// This opens an XML element
// XO.MW:SYNC:1.29; DATE:2017-02-03
public static void Open_element(Bry_bfr bfr, byte[] element, List_adp attribs) {
bfr.Add_byte(Byte_ascii.Angle_bgn).Add(element);
Expand_attributes(bfr, attribs);
bfr.Add_byte(Byte_ascii.Angle_end);
}
// Shortcut to close an XML element
// XO.MW:SYNC:1.29; DATE:2017-02-03
public static void Close_element(Bry_bfr bfr, byte[] element) {
bfr.Add_byte(Byte_ascii.Angle_bgn).Add_byte(Byte_ascii.Slash).Add(element).Add_byte(Byte_ascii.Angle_end);
}
// Same as Xml::element(), but does not escape contents. Handy when the
// content you have is already valid xml.
// XO.MW:SYNC:1.29; DATE:2017-02-03
public static void Tags(Bry_bfr bfr, byte[] element, List_adp attribs, byte[] contents) {
Open_element(bfr, element, attribs);
bfr.Add(contents);
bfr.Add_byte(Byte_ascii.Angle_bgn).Add_byte(Byte_ascii.Slash).Add(element).Add_byte(Byte_ascii.Angle_end);
}
}

View File

@@ -13,3 +13,563 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.content; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.xowa.mediawiki.includes.exception.*;
import gplx.xowa.mediawiki.includes.parsers.*;
/**
* A content Object represents page content, e.g. the text to show on a page.
* Content objects have no knowledge about how they relate to Wiki pages.
*/
/**
* Base implementation for content objects.
*
* @ingroup Content
*/
public abstract class XomwAbstractContent implements XomwContent {
/**
* Name of the content model this Content Object represents.
* Use with CONTENT_MODEL_XXX constants
*
* @since 1.21
*
* @var String $model_id
*/
private int model_id;
/**
* @param String $modelId
*
* @since 1.21
*/
public XomwAbstractContent(int modelId) {
this.model_id = modelId;
}
/**
* @since 1.21
*
* @see Content::getModel
*/
public int getModel() {
return this.model_id;
}
/**
* @since 1.21
*
* @param String $modelId The model to check
*
* @throws MWException If the provided ID is not the ID of the content model supported by this
* Content Object.
*/
protected void checkModelID(int modelId) {
if (modelId != this.model_id) {
throw new XomwMWException(
"Bad content model: " +
"expected " + this.model_id +
"but got " + modelId
);
}
}
/**
* @since 1.21
*
* @see Content::getContentHandler
*/
public XomwContentHandler getContentHandler() {
return XomwContentHandler.getForContent(this);
}
/**
* @since 1.21
*
* @see Content::getDefaultFormat
*/
public String getDefaultFormat() {
return this.getContentHandler().getDefaultFormat();
}
/**
* @since 1.21
*
* @see Content::getSupportedFormats
*/
public String[] getSupportedFormats() {
return this.getContentHandler().getSupportedFormats();
}
/**
* @since 1.21
*
* @param String $format
*
* @return boolean
*
* @see Content::isSupportedFormat
*/
public boolean isSupportedFormat(String format) {
if (format == null) {
return true; // this means "use the default"
}
return this.getContentHandler().isSupportedFormat(format);
}
/**
* @since 1.21
*
* @param String $format The serialization format to check.
*
* @throws MWException If the format is not supported by this content handler.
*/
public void checkFormat(String format) {
if (!this.isSupportedFormat(format)) {
throw new XomwMWException(
"Format " + format + " is not supported for content model " +
this.getModel()
);
}
}
/**
* @since 1.21
*
* @param String $format
*
* @return String
*
* @see Content::serialize
*/
public String serialize(String format) {
// return this.getContentHandler().serializeContent(this, format);
throw Err_.new_unimplemented();
}
/**
* @since 1.21
*
* @return boolean
*
* @see Content::isEmpty
*/
public boolean isEmpty() {
return this.getSize() == 0;
}
/**
* Subclasses @Override may this to implement (light weight) validation.
*
* @since 1.21
*
* @return boolean Always true.
*
* @see Content::isValid
*/
@gplx.Virtual public boolean isValid() {
return true;
}
/**
* @since 1.21
*
* @param Content that
*
* @return boolean
*
* @see Content::equals
*/
public boolean equals(XomwContent that) {
if (that == null) {
return false;
}
if (that == this) {
return true;
}
if (that.getModel() != this.getModel()) {
return false;
}
return this.getNativeData() == that.getNativeData();
}
/**
* Returns a list of DataUpdate objects for recording information about this
* Content in some secondary data store.
*
* This default implementation returns a LinksUpdate Object and calls the
* SecondaryDataUpdates hook.
*
* Subclasses @Override may this to determine the secondary data updates more
* efficiently, preferably without the need to generate a parser output Object.
* They should however make sure to call SecondaryDataUpdates to give extensions
* a chance to inject additional updates.
*
* @since 1.21
*
* @param Title $title
* @param Content $old
* @param boolean $recursive
* @param ParserOutput parserOutput
*
* @return DataUpdate[]
*
* @see Content::getSecondaryDataUpdates()
*/
// recursive=true
// public XomwDataUpdate[] getSecondaryDataUpdates(Title title, Content old,
// boolean recursive, ParserOutput parserOutput
// ) {
// if (parserOutput == null) {
// parserOutput = this.getParserOutput(title, null, null, false);
// }
//
// XomwDataUpdate[] updates = new XomwDataUpdate[] {
// new LinksUpdate(title, parserOutput, recursive)
// };
//
// Hooks::run('SecondaryDataUpdates', [ $title, $old, $recursive, parserOutput, &$updates ]);
//
// return updates;
// }
/**
* @since 1.21
*
* @return Title[]|null
*
* @see Content::getRedirectChain
*/
public XomwTitle[] getRedirectChain() {
// XomwTitle title = this.getRedirectTarget();
// if (title == null) {
// return null;
// }
// // recursive check to follow double redirects
// int recurse = XomwDefaultSettings.wgMaxRedirects;
//
// List_adp titles = List_adp_.New_by_many(title);
// while (--recurse > 0) {
// XomwTitle newtitle = null;
// if (title.isRedirect()) {
// $page = WikiPage::factory(title);
// $newtitle = $page.getRedirectTarget();
// } else {
// break;
// }
// // Redirects to some special pages are not permitted
// if (Type_.Eq_by_obj(newtitle, typeof(XomwTitle)) && newtitle.isValidRedirectTarget()) {
// // The new title passes the checks, so make that our current
// // title so that further recursion can be checked
// title = newtitle;
// titles.Add(newtitle);
// } else {
// break;
// }
// }
//
// return (XomwTitle[])titles.To_ary_and_clear(typeof(XomwTitle));
throw Err_.new_unimplemented();
}
// /**
// * Subclasses that implement redirects should override this.
// *
// * @since 1.21
// *
// * @return Title|null
// *
// * @see Content::getRedirectTarget
// */
// public function getRedirectTarget() {
// return null;
// }
//
// /**
// * @note Migrated here from Title::newFromRedirectRecurse.
// *
// * @since 1.21
// *
// * @return Title|null
// *
// * @see Content::getUltimateRedirectTarget
// */
// public function getUltimateRedirectTarget() {
// $titles = this.getRedirectChain();
//
// return $titles ? array_pop($titles) : null;
// }
//
// /**
// * @since 1.21
// *
// * @return boolean
// *
// * @see Content::isRedirect
// */
// public function isRedirect() {
// return this.getRedirectTarget() != null;
// }
//
// /**
// * This default implementation always returns $this.
// * Subclasses that implement redirects should override this.
// *
// * @since 1.21
// *
// * @param Title $target
// *
// * @return Content $this
// *
// * @see Content::updateRedirect
// */
// public function updateRedirect(Title $target) {
// return $this;
// }
//
// /**
// * @since 1.21
// *
// * @return null
// *
// * @see Content::getSection
// */
// public function getSection($sectionId) {
// return null;
// }
//
// /**
// * @since 1.21
// *
// * @return null
// *
// * @see Content::replaceSection
// */
// public function replaceSection($sectionId, Content $with, $sectionTitle = '') {
// return null;
// }
//
// /**
// * @since 1.21
// *
// * @return Content $this
// *
// * @see Content::preSaveTransform
// */
// public function preSaveTransform(Title $title, User $user, ParserOptions $popts) {
// return $this;
// }
//
// /**
// * @since 1.21
// *
// * @return Content $this
// *
// * @see Content::addSectionHeader
// */
// public function addSectionHeader($header) {
// return $this;
// }
//
// /**
// * @since 1.21
// *
// * @return Content $this
// *
// * @see Content::preloadTransform
// */
// public function preloadTransform(Title $title, ParserOptions $popts, $params = []) {
// return $this;
// }
//
// /**
// * @since 1.21
// *
// * @return Status
// *
// * @see Content::prepareSave
// */
// public function prepareSave(WikiPage $page, $flags, $parentRevId, User $user) {
// if (this.isValid()) {
// return Status::newGood();
// } else {
// return Status::newFatal("invalid-content-data");
// }
// }
//
// /**
// * @since 1.21
// *
// * @param WikiPage $page
// * @param ParserOutput parserOutput
// *
// * @return LinksDeletionUpdate[]
// *
// * @see Content::getDeletionUpdates
// */
// public function getDeletionUpdates(WikiPage $page, ParserOutput parserOutput = null) {
// return [
// new LinksDeletionUpdate($page),
// ];
// }
/**
* This default implementation always returns false. Subclasses @Override may
* this to supply matching logic.
*
* @since 1.21
*
* @param MagicWord $word
*
* @return boolean Always false.
*
* @see Content::matchMagicWord
*/
@gplx.Virtual public boolean matchMagicWord(XomwMagicWord word) {
return false;
}
// /**
// * This super implementation calls the hook ConvertContent to enable custom conversions.
// * Subclasses may override this to implement conversion for "their" content model.
// *
// * @param String $toModel
// * @param String $lossy
// *
// * @return Content|boolean
// *
// * @see Content::convert()
// */
// public function convert($toModel, $lossy = '') {
// if (this.getModel() == $toModel) {
// // nothing to do, shorten out.
// return $this;
// }
//
// $lossy = ($lossy == 'lossy'); // String flag, convert to boolean for convenience
// $result = false;
//
// Hooks::run('ConvertContent', [ $this, $toModel, $lossy, &$result ]);
//
// return $result;
// }
//
// /**
// * Returns a ParserOutput Object containing information derived from this content.
// * Most importantly, unless $generateHtml was false, the return value contains an
// * HTML representation of the content.
// *
// * Subclasses that want to control the parser output may override this, but it is
// * preferred to override fillParserOutput() instead.
// *
// * Subclasses that override getParserOutput() itself should take care to call the
// * ContentGetParserOutput hook.
// *
// * @since 1.24
// *
// * @param Title $title Context title for parsing
// * @param int|null $revId Revision ID (for {{REVISIONID}})
// * @param ParserOptions|null $options Parser options
// * @param boolean $generateHtml Whether or not to generate HTML
// *
// * @return ParserOutput Containing information derived from this content.
// */
// public function getParserOutput(Title $title, $revId = null,
// ParserOptions $options = null, $generateHtml = true
// ) {
// if ($options == null) {
// $options = this.getContentHandler().makeParserOptions('canonical');
// }
//
// $po = new ParserOutput();
//
// if (Hooks::run('ContentGetParserOutput',
// [ $this, $title, $revId, $options, $generateHtml, &$po ])) {
//
// // Save and restore the old value, just in case something is reusing
// // the ParserOptions Object in some weird way.
// $oldRedir = $options.getRedirectTarget();
// $options.setRedirectTarget(this.getRedirectTarget());
// this.fillParserOutput($title, $revId, $options, $generateHtml, $po);
// $options.setRedirectTarget($oldRedir);
// }
//
// Hooks::run('ContentAlterParserOutput', [ $this, $title, $po ]);
//
// return $po;
// }
//
// /**
// * Fills the provided ParserOutput with information derived from the content.
// * Unless $generateHtml was false, this includes an HTML representation of the content.
// *
// * This is called by getParserOutput() after consulting the ContentGetParserOutput hook.
// * Subclasses are expected to override this method (or getParserOutput(), if need be).
// * Subclasses of TextContent should generally override getHtml() instead.
// *
// * This placeholder implementation always throws an exception.
// *
// * @since 1.24
// *
// * @param Title $title Context title for parsing
// * @param int|null $revId Revision ID (for {{REVISIONID}})
// * @param ParserOptions $options Parser options
// * @param boolean $generateHtml Whether or not to generate HTML
// * @param ParserOutput &$output The output Object to fill (reference).
// *
// * @throws MWException
// */
// protected function fillParserOutput(Title $title, $revId,
// ParserOptions $options, $generateHtml, ParserOutput &$output
// ) {
// // Don't make abstract, so subclasses that override getParserOutput() directly don't fail.
// throw new MWException('Subclasses of AbstractContent must override fillParserOutput!');
// }
public abstract byte[] getTextForSearchIndex();
public abstract byte[] getWikitextForTransclusion();
public abstract byte[] getTextForSummary(int maxLength);
public abstract Object getNativeData();
public abstract int getSize();
public abstract XomwContent copy();
public abstract boolean isCountable(boolean hasLinks);
public abstract XomwParserOutput getParserOutput(XomwTitle title, int revId,
XomwParserOptions options, boolean generateHtml);
public abstract Object getSecondaryDataUpdates(XomwTitle title, XomwContent old,
boolean recursive, XomwParserOutput parserOutput);
public abstract XomwTitle getRedirectTarget();
public abstract XomwTitle getUltimateRedirectTarget();
public abstract boolean isRedirect();
public abstract XomwContent updateRedirect(XomwTitle target);
public abstract XomwContent getSection(String sectionId);
public abstract byte[] replaceSection(String sectionId, XomwContent with, String sectionTitle);
public abstract XomwContent preSaveTransform(XomwTitle title, Object user, XomwParserOptions parserOptions);
public abstract XomwContent addSectionHeader(byte[] header);
public abstract XomwContent preloadTransform(XomwTitle title, XomwParserOptions parserOptions, Object[] ary);
public abstract Object prepareSave(Object page, int flags, int parentRevId, Object user);
public abstract Object getDeletionUpdates(Object page,
XomwParserOutput parserOutput);
public abstract XomwContent convert(byte[] toModel, byte[] lossy);
}

View File

@@ -13,3 +13,511 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.content; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.xowa.mediawiki.includes.parsers.*;
/**
* A content Object represents page content, e.g. the text to show on a page.
* Content objects have no knowledge about how they relate to wiki pages.
*/
/**
* Base interface for content objects.
*
* @ingroup Content
*/
public interface XomwContent {
/**
* @since 1.21
*
* @return String A String representing the content in a way useful for
* building a full text search index. If no useful representation exists,
* this method returns an empty String.
*
* @todo Test that this actually works
* @todo Make sure this also works with LuceneSearch / WikiSearch
*/
byte[] getTextForSearchIndex();
/**
* @since 1.21
*
* @return String|boolean The wikitext to include when another page includes this
* content, or false if the content is not includable in a wikitext page.
*
* @todo Allow native handling, bypassing wikitext representation, like
* for includable special pages.
* @todo Allow transclusion into other content models than Wikitext!
* @todo Used in WikiPage and MessageCache to get message text. Not so
* nice. What should we use instead?!
*/
byte[] getWikitextForTransclusion();
/**
* Returns a textual representation of the content suitable for use in edit
* summaries and log messages.
*
* @since 1.21
*
* @param int $maxLength Maximum length of the summary text.
*
* @return String The summary text.
*/
// DFLT:maxLength=250
byte[] getTextForSummary(int maxLength);
/**
* Returns native representation of the data. Interpretation depends on
* the data model used, as given by getDataModel().
*
* @since 1.21
*
* @return mixed The native representation of the content. Could be a
* String, a nested array structure, an Object, a binary blob...
* anything, really.
*
* @note Caller must be aware of content model!
*/
Object getNativeData();
/**
* Returns the content's nominal size in "bogo-bytes".
*
* @return int
*/
int getSize();
/**
* Returns the ID of the content model used by this Content Object.
* Corresponds to the CONTENT_MODEL_XXX constants.
*
* @since 1.21
*
* @return String The model id
*/
int getModel();
/**
* Convenience method that returns the ContentHandler singleton for handling
* the content model that this Content Object uses.
*
* Shorthand for ContentHandler::getForContent( $this )
*
* @since 1.21
*
* @return ContentHandler
*/
XomwContentHandler getContentHandler();
/**
* Convenience method that returns the default serialization format for the
* content model that this Content Object uses.
*
* Shorthand for $this->getContentHandler()->getDefaultFormat()
*
* @since 1.21
*
* @return String
*/
String getDefaultFormat();
/**
* Convenience method that returns the list of serialization formats
* supported for the content model that this Content Object uses.
*
* Shorthand for $this->getContentHandler()->getSupportedFormats()
*
* @since 1.21
*
* @return String[] List of supported serialization formats
*/
String[] getSupportedFormats();
/**
* Returns true if $format is a supported serialization format for this
* Content Object, false if it isn't.
*
* Note that this should always return true if $format is null, because null
* stands for the default serialization.
*
* Shorthand for $this->getContentHandler()->isSupportedFormat( $format )
*
* @since 1.21
*
* @param String $format The serialization format to check.
*
* @return boolean Whether the format is supported
*/
boolean isSupportedFormat(String format);
/**
* Convenience method for serializing this Content Object.
*
* Shorthand for $this->getContentHandler()->serializeContent( $this, $format )
*
* @since 1.21
*
* @param String $format The desired serialization format, or null for the default format.
*
* @return String Serialized form of this Content Object.
*/
String serialize(String format);
/**
* Returns true if this Content Object represents empty content.
*
* @since 1.21
*
* @return boolean Whether this Content Object is empty
*/
boolean isEmpty();
/**
* Returns whether the content is valid. This is intended for local validity
* checks, not considering global consistency.
*
* Content needs to be valid before it can be saved.
*
* This default implementation always returns true.
*
* @since 1.21
*
* @return boolean
*/
boolean isValid();
/**
* Returns true if this Content objects is conceptually equivalent to the
* given Content Object.
*
* Contract:
*
* - Will return false if $that is null.
* - Will return true if $that === $this.
* - Will return false if $that->getModel() != $this->getModel().
* - Will return false if $that->getNativeData() is not equal to $this->getNativeData(),
* where the meaning of "equal" depends on the actual data model.
*
* Implementations should be careful to make equals() transitive and reflexive:
*
* - $a->equals( $b ) <=> $b->equals( $a )
* - $a->equals( $b ) && $b->equals( $c ) ==> $a->equals( $c )
*
* @since 1.21
*
* @param Content $that The Content Object to compare to.
*
* @return boolean True if this Content Object is equal to $that, false otherwise.
*/
boolean equals(XomwContent that);
/**
* Return a copy of this Content Object. The following must be true for the
* Object returned:
*
* if $copy = $original->copy()
*
* - get_class($original) === get_class($copy)
* - $original->getModel() === $copy->getModel()
* - $original->equals( $copy )
*
* If and only if the Content Object is immutable, the copy() method can and
* should return $this. That is, $copy === $original may be true, but only
* for immutable content objects.
*
* @since 1.21
*
* @return Content A copy of this Object
*/
XomwContent copy();
/**
* Returns true if this content is countable as a "real" wiki page, provided
* that it's also in a countable location (e.g. a current revision in the
* main namespace).
*
* @since 1.21
*
* @param boolean|null $hasLinks If it is known whether this content contains
* links, provide this information here, to avoid redundant parsing to
* find out.
*
* @return boolean
*/
boolean isCountable(boolean hasLinks);
/**
* Parse the Content Object and generate a ParserOutput from the result.
* $result->getText() can be used to obtain the generated HTML. If no HTML
* is needed, $generateHtml can be set to false; in that case,
* $result->getText() may return null.
*
* @note To control which options are used in the cache key for the
* generated parser output, implementations of this method
* may call ParserOutput::recordOption() on the output Object.
*
* @param Title $title The page title to use as a context for rendering.
* @param int $revId Optional revision ID being rendered.
* @param ParserOptions $options Any parser options.
* @param boolean $generateHtml Whether to generate HTML (default: true). If false,
* the result of calling getText() on the ParserOutput Object returned by
* this method is undefined.
*
* @since 1.21
*
* @return ParserOutput
*/
// generateHtml = true
XomwParserOutput getParserOutput(XomwTitle title, int revId,
XomwParserOptions options, boolean generateHtml);
// TODO: make RenderOutput and RenderOptions super classes
/**
* Returns a list of DataUpdate objects for recording information about this
* Content in some secondary data store. If the optional second argument,
* $old, is given, the updates may model only the changes that need to be
* made to replace information about the old content with information about
* the new content.
*
* This default implementation calls
* $this->getParserOutput( $content, $title, null, null, false ),
* and then calls getSecondaryDataUpdates( $title, $recursive ) on the
* resulting ParserOutput Object.
*
* Subclasses may implement this to determine the necessary updates more
* efficiently, or make use of information about the old content.
*
* @note Implementations should call the SecondaryDataUpdates hook, like
* AbstractContent does.
*
* @param Title $title The context for determining the necessary updates
* @param Content $old An optional Content Object representing the
* previous content, i.e. the content being replaced by this Content
* Object.
* @param boolean $recursive Whether to include recursive updates (default:
* false).
* @param ParserOutput $parserOutput Optional ParserOutput Object.
* Provide if you have one handy, to avoid re-parsing of the content.
*
* @return DataUpdate[] A list of DataUpdate objects for putting information
* about this content Object somewhere.
*
* @since 1.21
*/
// DFLT: recursive = true
Object getSecondaryDataUpdates(XomwTitle title, XomwContent old,
boolean recursive, XomwParserOutput parserOutput);
/**
* Construct the redirect destination from this content and return an
* array of Titles, or null if this content doesn't represent a redirect.
* The last element in the array is the final destination after all redirects
* have been resolved (up to $wgMaxRedirects times).
*
* @since 1.21
*
* @return Title[]|null List of Titles, with the destination last.
*/
XomwTitle[] getRedirectChain();
/**
* Construct the redirect destination from this content and return a Title,
* or null if this content doesn't represent a redirect.
* This will only return the immediate redirect target, useful for
* the redirect table and other checks that don't need full recursion.
*
* @since 1.21
*
* @return Title|null The corresponding Title.
*/
XomwTitle getRedirectTarget();
/**
* Construct the redirect destination from this content and return the
* Title, or null if this content doesn't represent a redirect.
*
* This will recurse down $wgMaxRedirects times or until a non-redirect
* target is hit in order to provide (hopefully) the Title of the final
* destination instead of another redirect.
*
* There is usually no need @Override to the default behavior, subclasses that
* want to implement redirects @Override should getRedirectTarget().
*
* @since 1.21
*
* @return Title|null
*/
XomwTitle getUltimateRedirectTarget();
/**
* Returns whether this Content represents a redirect.
* Shorthand for getRedirectTarget() !== null.
*
* @since 1.21
*
* @return boolean
*/
boolean isRedirect();
/**
* If this Content Object is a redirect, this method updates the redirect target.
* Otherwise, it does nothing.
*
* @since 1.21
*
* @param Title $target The new redirect target
*
* @return Content A new Content Object with the updated redirect (or $this
* if this Content Object isn't a redirect)
*/
XomwContent updateRedirect(XomwTitle target);
/**
* Returns the section with the given ID.
*
* @since 1.21
*
* @param String|int $sectionId Section identifier as a number or String
* (e.g. 0, 1 or 'T-1'). The ID "0" retrieves the section before the first heading, "1" the
* text between the first heading (included) and the second heading (excluded), etc.
*
* @return Content|boolean|null The section, or false if no such section
* exist, or null if sections are not supported.
*/
XomwContent getSection(String sectionId);
/**
* Replaces a section of the content and returns a Content Object with the
* section replaced.
*
* @since 1.21
*
* @param String|int|null|boolean $sectionId Section identifier as a number or String
* (e.g. 0, 1 or 'T-1'), null/false or an empty String for the whole page
* or 'new' for a new section.
* @param Content $with New content of the section
* @param String $sectionTitle New section's subject, only if $section is 'new'
*
* @return String|null Complete article text, or null if error
*/
byte[] replaceSection(String sectionId, XomwContent with, String sectionTitle);
/**
* Returns a Content Object with pre-save transformations applied (or this
* Object if no transformations apply).
*
* @since 1.21
*
* @param Title $title
* @param User $user
* @param ParserOptions $parserOptions
*
* @return Content
*/
XomwContent preSaveTransform(XomwTitle title, Object user, XomwParserOptions parserOptions );
/**
* Returns a new WikitextContent Object with the given section heading
* prepended, if supported. The default implementation just returns this
* Content Object unmodified, ignoring the section header.
*
* @since 1.21
*
* @param String $header
*
* @return Content
*/
XomwContent addSectionHeader(byte[] header);
/**
* Returns a Content Object with preload transformations applied (or this
* Object if no transformations apply).
*
* @since 1.21
*
* @param Title $title
* @param ParserOptions $parserOptions
* @param array $prms
*
* @return Content
*/
XomwContent preloadTransform(XomwTitle title, XomwParserOptions parserOptions, Object[] ary);
/**
* Prepare Content for saving. Called before Content is saved by WikiPage::doEditContent() and in
* similar places.
*
* This may be used to check the content's consistency with global state. This function should
* NOT write any information to the database.
*
* Note that this method will usually be called inside the same transaction
* bracket that will be used to save the new revision.
*
* Note that this method is called before any update to the page table is
* performed. This means that $page may not yet know a page ID.
*
* @since 1.21
*
* @param WikiPage $page The page to be saved.
* @param int $flags Bitfield for use with EDIT_XXX constants, see WikiPage::doEditContent()
* @param int $parentRevId The ID of the current revision
* @param User $user
*
* @return Status A status Object indicating whether the content was
* successfully prepared for saving. If the returned status indicates
* an error, a rollback will be performed and the transaction aborted.
*
* @see WikiPage::doEditContent()
*/
Object prepareSave(Object page, int flags, int parentRevId, Object user);
/**
* Returns a list of updates to perform when this content is deleted.
* The necessary updates may be taken from the Content Object, or depend on
* the current state of the database.
*
* @since 1.21
*
* @param WikiPage $page The deleted page
* @param ParserOutput $parserOutput Optional parser output Object
* for efficient access to meta-information about the content Object.
* Provide if you have one handy.
*
* @return DataUpdate[] A list of DataUpdate instances that will clean up the
* database after deletion.
*/
Object getDeletionUpdates(Object page,
XomwParserOutput parserOutput);
/**
* Returns true if this Content Object matches the given magic word.
*
* @since 1.21
*
* @param MagicWord $word The magic word to match
*
* @return boolean Whether this Content Object matches the given magic word.
*/
boolean matchMagicWord(XomwMagicWord word);
/**
* Converts this content Object into another content Object with the given content model,
* if that is possible.
*
* @param String $toModel The desired content model, use the CONTENT_MODEL_XXX flags.
* @param String $lossy Optional flag, set to "lossy" to allow lossy conversion. If lossy
* conversion is not allowed, full round-trip conversion is expected to work without losing
* information.
*
* @return Content|boolean A content Object with the content model $toModel, or false if
* that conversion is not supported.
*/
XomwContent convert(byte[] toModel, byte[] lossy);
// @todo ImagePage and CategoryPage interfere with per-content action handlers
// @todo nice&sane integration of GeSHi syntax highlighting
// [11:59] <vvv> Hooks are ugly; make CodeHighlighter interface and a
// config to set the class which handles syntax highlighting
// [12:00] <vvv> And default it to a DummyHighlighter
}

View File

@@ -13,3 +13,156 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.content; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
// <?php
// /**
// * Wrapper content Object allowing to handle a system message as a Content Object.
// */
//
// /**
// * Wrapper allowing us to handle a system message as a Content Object.
// * Note that this is generally *not* used to represent content from the
// * MediaWiki namespace, and that there is no MessageContentHandler.
// * MessageContent is just intended as glue for wrapping a message programmatically.
// *
// * @ingroup Content
// */
// class MessageContent extends AbstractContent {
//
// /**
// * @var Message
// */
// protected $mMessage;
//
// /**
// * @param Message|String $msg A Message Object, or a message key.
// * @param String[] $params An optional array of message parameters.
// */
// public function __construct( $msg, $params = null ) {
// # XXX: messages may be wikitext, html or plain text! and maybe even something else entirely.
// parent::__construct( CONTENT_MODEL_WIKITEXT );
//
// if ( is_string( $msg ) ) {
// $this->mMessage = wfMessage( $msg );
// } else {
// $this->mMessage = clone $msg;
// }
//
// if ( $params ) {
// $this->mMessage = $this->mMessage->params( $params );
// }
// }
//
// /**
// * Fully parse the text from wikitext to HTML.
// *
// * @return String Parsed HTML.
// */
// public function getHtml() {
// return $this->mMessage->parse();
// }
//
// /**
// * Returns the message text. {{-transformation is done.
// *
// * @return String Unescaped message text.
// */
// public function getWikitext() {
// return $this->mMessage->text();
// }
//
// /**
// * Returns the message Object, with any parameters already substituted.
// *
// * @return Message The message Object.
// */
// public function getNativeData() {
// // NOTE: Message objects are mutable. Cloning here makes MessageContent immutable.
// return clone $this->mMessage;
// }
//
// /**
// * @return String
// *
// * @see Content::getTextForSearchIndex
// */
// public function getTextForSearchIndex() {
// return $this->mMessage->plain();
// }
//
// /**
// * @return String
// *
// * @see Content::getWikitextForTransclusion
// */
// public function getWikitextForTransclusion() {
// return $this->getWikitext();
// }
//
// /**
// * @param int $maxlength Maximum length of the summary text, defaults to 250.
// *
// * @return String The summary text.
// *
// * @see Content::getTextForSummary
// */
// public function getTextForSummary( $maxlength = 250 ) {
// return substr( $this->mMessage->plain(), 0, $maxlength );
// }
//
// /**
// * @return int
// *
// * @see Content::getSize
// */
// public function getSize() {
// return strlen( $this->mMessage->plain() );
// }
//
// /**
// * @return Content A copy of this Object
// *
// * @see Content::copy
// */
// public function copy() {
// // MessageContent is immutable (because getNativeData() returns a clone of the Message Object)
// return $this;
// }
//
// /**
// * @param boolean|null $hasLinks
// *
// * @return boolean Always false.
// *
// * @see Content::isCountable
// */
// public function isCountable( $hasLinks = null ) {
// return false;
// }
//
// /**
// * @param Title $title Unused.
// * @param int $revId Unused.
// * @param ParserOptions $options Unused.
// * @param boolean $generateHtml Whether to generate HTML (default: true).
// *
// * @return ParserOutput
// *
// * @see Content::getParserOutput
// */
// public function getParserOutput( Title $title, $revId = null,
// ParserOptions $options = null, $generateHtml = true ) {
// if ( $generateHtml ) {
// $html = $this->getHtml();
// } else {
// $html = '';
// }
//
// $po = new ParserOutput( $html );
// // Message objects are in the user language.
// $po->recordOption( 'userlang' );
//
// return $po;
// }
//
// }

View File

@@ -13,3 +13,138 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.content; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
/**
* Base content handler implementation for flat text contents.
*
* @ingroup Content
*/
class XomwTextContentHandler extends XomwContentHandler {
public XomwTextContentHandler() {super(XomwDefaultSettings.CONTENT_MODEL_TEXT, XomwDefines.CONTENT_FORMAT_TEXT);
}
public XomwTextContentHandler(int modelId, String... formats) {super(modelId, formats);
}
// /**
// * Returns the content's text as-is.
// *
// * @param Content $content
// * @param String $format The serialization format to check
// *
// * @return mixed
// */
// public function serializeContent( Content $content, $format = null ) {
// $this->checkFormat( $format );
//
// return $content->getNativeData();
// }
//
// /**
// * Attempts to merge differences between three versions. Returns a new
// * Content Object for a clean merge and false for failure or a conflict.
// *
// * All three Content objects passed as parameters must have the same
// * content model.
// *
// * This text-based implementation uses wfMerge().
// *
// * @param Content $oldContent The page's previous content.
// * @param Content $myContent One of the page's conflicting contents.
// * @param Content $yourContent One of the page's conflicting contents.
// *
// * @return Content|boolean
// */
// public function merge3( Content $oldContent, Content $myContent, Content $yourContent ) {
// $this->checkModelID( $oldContent->getModel() );
// $this->checkModelID( $myContent->getModel() );
// $this->checkModelID( $yourContent->getModel() );
//
// $format = $this->getDefaultFormat();
//
// $old = $this->serializeContent( $oldContent, $format );
// $mine = $this->serializeContent( $myContent, $format );
// $yours = $this->serializeContent( $yourContent, $format );
//
// $ok = wfMerge( $old, $mine, $yours, $result );
//
// if ( !$ok ) {
// return false;
// }
//
// if ( !$result ) {
// return $this->makeEmptyContent();
// }
//
// $mergedContent = $this->unserializeContent( $result, $format );
//
// return $mergedContent;
// }
//
// /**
// * Returns the name of the associated Content class, to
// * be used when creating new objects. Override expected
// * by subclasses.
// *
// * @since 1.24
// *
// * @return String
// */
// protected function getContentClass() {
// return TextContent::class;
// }
//
// /**
// * Unserializes a Content Object of the type supported by this ContentHandler.
// *
// * @since 1.21
// *
// * @param String $text Serialized form of the content
// * @param String $format The format used for serialization
// *
// * @return Content The TextContent Object wrapping $text
// */
// public function unserializeContent( $text, $format = null ) {
// $this->checkFormat( $format );
//
// $class = $this->getContentClass();
// return new $class( $text );
// }
//
// /**
// * Creates an empty TextContent Object.
// *
// * @since 1.21
// *
// * @return Content A new TextContent Object with empty text.
// */
// public function makeEmptyContent() {
// $class = $this->getContentClass();
// return new $class( '' );
// }
//
// /**
// * @see ContentHandler::supportsDirectEditing
// *
// * @return boolean Default is true for TextContent and derivatives.
// */
// public function supportsDirectEditing() {
// return true;
// }
//
// public function getFieldsForSearchIndex( SearchEngine $engine ) {
// $fields = parent::getFieldsForSearchIndex( $engine );
// $fields['language'] =
// $engine->makeSearchFieldMapping( 'language', SearchIndexField::INDEX_TYPE_KEYWORD );
//
// return $fields;
// }
//
// public function getDataForSearchIndex( WikiPage $page, ParserOutput $output,
// SearchEngine $engine ) {
// $fields = parent::getDataForSearchIndex( $page, $output, $engine );
// $fields['language'] =
// $this->getPageLanguage( $page->getTitle(), $page->getContent() )->getCode();
// return $fields;
// }
}

View File

@@ -13,3 +13,136 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.content; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
/**
* Content handler for wiki text pages.
*
* @ingroup Content
*/
class XomwWikitextContentHandler extends XomwTextContentHandler {
public XomwWikitextContentHandler() {super(XomwDefaultSettings.CONTENT_MODEL_WIKITEXT, XomwDefines.CONTENT_FORMAT_WIKITEXT);
}
// protected function getContentClass() {
// return WikitextContent::class;
// }
//
// /**
// * Returns a WikitextContent Object representing a redirect to the given destination page.
// *
// * @param Title $destination The page to redirect to.
// * @param String $text Text to include in the redirect, if possible.
// *
// * @return Content
// *
// * @see ContentHandler::makeRedirectContent
// */
// public function makeRedirectContent( Title $destination, $text = '' ) {
// $optionalColon = '';
//
// if ( $destination->getNamespace() == NS_CATEGORY ) {
// $optionalColon = ':';
// } else {
// $iw = $destination->getInterwiki();
// if ( $iw && Language::fetchLanguageName( $iw, null, 'mw' ) ) {
// $optionalColon = ':';
// }
// }
//
// $mwRedir = MagicWord::get( 'redirect' );
// $redirectText = $mwRedir->getSynonym( 0 ) .
// ' [[' . $optionalColon . $destination->getFullText() . ']]';
//
// if ( $text != '' ) {
// $redirectText .= "\n" . $text;
// }
//
// $class = $this->getContentClass();
// return new $class( $redirectText );
// }
//
// /**
// * Returns true because wikitext supports redirects.
// *
// * @return boolean Always true.
// *
// * @see ContentHandler::supportsRedirects
// */
// public function supportsRedirects() {
// return true;
// }
//
// /**
// * Returns true because wikitext supports sections.
// *
// * @return boolean Always true.
// *
// * @see ContentHandler::supportsSections
// */
// public function supportsSections() {
// return true;
// }
//
// /**
// * Returns true, because wikitext supports caching using the
// * ParserCache mechanism.
// *
// * @since 1.21
// *
// * @return boolean Always true.
// *
// * @see ContentHandler::isParserCacheSupported
// */
// public function isParserCacheSupported() {
// return true;
// }
//
// /**
// * Get file handler
// * @return FileContentHandler
// */
// protected function getFileHandler() {
// return new FileContentHandler();
// }
//
// public function getFieldsForSearchIndex( SearchEngine $engine ) {
// $fields = parent::getFieldsForSearchIndex( $engine );
//
// $fields['heading'] =
// $engine->makeSearchFieldMapping( 'heading', SearchIndexField::INDEX_TYPE_TEXT );
// $fields['heading']->setFlag( SearchIndexField::FLAG_SCORING );
//
// $fields['auxiliary_text'] =
// $engine->makeSearchFieldMapping( 'auxiliary_text', SearchIndexField::INDEX_TYPE_TEXT );
//
// $fields['opening_text'] =
// $engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT );
// $fields['opening_text']->setFlag( SearchIndexField::FLAG_SCORING |
// SearchIndexField::FLAG_NO_HIGHLIGHT );
// // Until we have full first-class content handler for files, we invoke it explicitly here
// $fields = array_merge( $fields, $this->getFileHandler()->getFieldsForSearchIndex( $engine ) );
//
// return $fields;
// }
//
// public function getDataForSearchIndex( WikiPage $page, ParserOutput $parserOutput,
// SearchEngine $engine ) {
// $fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine );
//
// $structure = new WikiTextStructure( $parserOutput );
// $fields['heading'] = $structure->headings();
// // text fields
// $fields['opening_text'] = $structure->getOpeningText();
// $fields['text'] = $structure->getMainText(); // overwrites one from ContentHandler
// $fields['auxiliary_text'] = $structure->getAuxiliaryText();
// $fields['defaultsort'] = $structure->getDefaultSort();
//
// // Until we have full first-class content handler for files, we invoke it explicitly here
// if ( NS_FILE == $page->getTitle()->getNamespace() ) {
// $fields = array_merge( $fields,
// $this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine ) );
// }
// return $fields;
// }
}

View File

@@ -13,3 +13,8 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.exception; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
public class XomwMWException extends Err {
public XomwMWException(String msg) {super(true, "", "", msg);
}
}

View File

@@ -13,3 +13,7 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.filerepo.file; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.filerepo.*;
public interface XomwFileFinder {
XomwFile Find_file(XomwTitle ttl);
}

View File

@@ -13,3 +13,19 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.filerepo.file; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.filerepo.*;
import gplx.xowa.mediawiki.includes.parsers.*;
public class XomwFileFinderMock implements XomwFileFinder {
private final XomwEnv env;
public XomwFileFinderMock(XomwEnv env) {this.env = env;}
private final Hash_adp_bry hash = Hash_adp_bry.cs();
public void Clear() {hash.Clear();}
public XomwFile Find_file(XomwTitle ttl) {
return (XomwFile)hash.Get_by(ttl.getPrefixedDBkey());
}
public void Add(String title, XomwFileRepo repo, int w, int h, byte[] mime) {
byte[] title_bry = Bry_.new_u8(title);
XomwLocalFile file = new XomwLocalFile(env, XomwTitle.newFromText(env, title_bry), repo, w, h, mime);
hash.Add_if_dupe_use_nth(title_bry, file);
}
}

View File

@@ -13,3 +13,7 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.filerepo.file; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.filerepo.*;
public class XomwFileFinderNoop implements XomwFileFinder {
public XomwFile Find_file(XomwTitle ttl) {return null;}
}

View File

@@ -13,3 +13,170 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.interwiki; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
/**
* Value Object for representing interwiki records.
*/
public class XomwInterwiki {
/** @var String The interwiki prefix, (e.g. "Meatball", or the language prefix "de") */
private byte[] mPrefix;
/** @var String The URL of the wiki, with "1" as a placeholder for an article name. */
private byte[] mURL;
/** @var String The URL of the file api.php */
private byte[] mAPI;
/** @var String The name of the database (for a connection to be established
* with wfGetLB('wikiid'))
*/
private byte[] mWikiID;
/** @var boolean Whether the wiki is in this project */
private boolean mLocal;
/** @var boolean Whether interwiki transclusions are allowed */
private boolean mTrans;
public byte[] interwikiId;
public XomwInterwiki(byte[] prefix, byte[] url, byte[] api, byte[] wikiId, boolean local, boolean trans) {
this.mPrefix = prefix;
this.mURL = url;
this.mAPI = api;
this.mWikiID = wikiId;
this.mLocal = local;
this.mTrans = trans;
}
/**
* Check whether an interwiki prefix exists
*
* [@]deprecated since 1.28, use InterwikiLookup instead
*
* @param String prefix Interwiki prefix to use
* @return boolean Whether it exists
*/
public static boolean isValidInterwiki(XomwMediaWikiServices mws, byte[] prefix) {
return mws.getInterwikiLookup().isValidInterwiki(prefix);
// return MediaWikiServices::getInstance().getInterwikiLookup().isValidInterwiki(prefix);
}
// /**
// * Fetch an Interwiki Object
// *
// * @deprecated since 1.28, use InterwikiLookup instead
// *
// * @param String prefix Interwiki prefix to use
// * @return Interwiki|null|boolean
// */
// public static function fetch(prefix) {
// return MediaWikiServices::getInstance().getInterwikiLookup().fetch(prefix);
// }
//
// /**
// * Purge the cache (local and persistent) for an interwiki prefix.
// *
// * @param String prefix
// * @since 1.26
// */
// public static function invalidateCache(prefix) {
// return MediaWikiServices::getInstance().getInterwikiLookup().invalidateCache(prefix);
// }
//
// /**
// * Returns all interwiki prefixes
// *
// * @deprecated since 1.28, unused. Use InterwikiLookup instead.
// *
// * @param String|null local If set, limits output to local/non-local interwikis
// * @return array List of prefixes
// * @since 1.19
// */
// public static function getAllPrefixes(local = null) {
// return MediaWikiServices::getInstance().getInterwikiLookup().getAllPrefixes(local);
// }
/**
* Get the URL for a particular title (or with 1 if no title given)
*
* @param String title What text to put for the article name
* @return String The URL
* @note Prior to 1.19 The getURL with an argument was broken.
* If you if you use this arg in an extension that supports MW earlier
* than 1.19 please wfUrlencode and substitute 1 on your own.
*/
// title=null
public byte[] getURL(byte[] title) {
byte[] url = this.mURL;
if (title != null) {
url = XophpString.str_replace(ARG_1, XomwGlobalFunctions.wfUrlencode(title), url);
}
return url;
}
/**
* Get the API URL for this wiki
*
* @return String The URL
*/
public byte[] getAPI() {
return this.mAPI;
}
/**
* Get the DB name for this wiki
*
* @return String The DB name
*/
public byte[] getWikiID() {
return this.mWikiID;
}
/**
* Is this a local link from a sister project, or is
* it something outside, like Google
*
* @return boolean
*/
public boolean isLocal() {
return this.mLocal;
}
/**
* Can pages from this wiki be transcluded?
* Still requires wgEnableScaryTransclusion
*
* @return boolean
*/
public boolean isTranscludable() {
return this.mTrans;
}
/**
* Get the name for the interwiki site
*
* @return String
*/
public byte[] getName(XomwEnv env) {
// XomwMessage msg = XomwGlobalFunctions.wfMessage(env, "interwiki-name-" + this.mPrefix).inContentLanguage();
//
// return !msg.exists() ? Bry_.Empty : msg.text();
Tfds.Write(mPrefix);
return null;
}
// /**
// * Get a description for this interwiki
// *
// * @return String
// */
// public function getDescription() {
// msg = wfMessage('interwiki-desc-' . this.mPrefix).inContentLanguage();
//
// return !msg.exists() ? '' : msg.text();
// }
private static final byte[] ARG_1 = Bry_.new_a7("$1");
}

View File

@@ -13,3 +13,40 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.interwiki; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
/**
* Service interface for looking up Interwiki records.
*
* @since 1.28
*/
public interface XomwInterwikiLookup {
/**
* Check whether an interwiki prefix exists
*
* @param String $prefix Interwiki prefix to use
* @return boolean Whether it exists
*/
boolean isValidInterwiki(byte[] prefix);
/**
* Fetch an Interwiki Object
*
* @param String $prefix Interwiki prefix to use
* @return Interwiki|null|boolean
*/
XomwInterwiki fetch(byte[] prefix);
/**
* Returns all interwiki prefixes
*
* @param String|null $local If set, limits output to local/non-local interwikis
* @return String[] List of prefixes
*/
byte[][] getAllPrefixes(boolean local);
// /**
// * Purge the in-process and persistent Object cache for an interwiki prefix
// * @param String $prefix
// */
// void invalidateCache(byte[] prefix);
}

View File

@@ -13,3 +13,157 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.interwiki; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.xowa.mediawiki.includes.site.*;
public class XomwInterwikiLookupAdapter implements XomwInterwikiLookup {
/**
* @var SiteLookup
*/
private final XomwSiteLookup siteLookup;
/**
* @var Interwiki[]|null associative array mapping interwiki prefixes to Interwiki objects
*/
private Ordered_hash interwikiMap = Ordered_hash_.New_bry();
public XomwInterwikiLookupAdapter (
XomwSiteLookup siteLookup
// Ordered_hash interwikiMap
) {
this.siteLookup = siteLookup;
}
/**
* See InterwikiLookup::isValidInterwiki
* It loads the whole interwiki map.
*
* @param String $prefix Interwiki prefix to use
* @return boolean Whether it exists
*/
public boolean isValidInterwiki(byte[] prefix) {
return XophpArray.array_key_exists(prefix, this.getInterwikiMap());
}
/**
* See InterwikiLookup::fetch
* It loads the whole interwiki map.
*
* @param String $prefix Interwiki prefix to use
* @return Interwiki|null|boolean
*/
public XomwInterwiki fetch(byte[] prefix) {
if (prefix == Bry_.Empty) {
return null;
}
if (!this.isValidInterwiki(prefix)) {
return null;
}
return (XomwInterwiki)this.interwikiMap.Get_by(prefix);
}
/**
* See InterwikiLookup::getAllPrefixes
*
* @param String|null $local If set, limits output to local/non-local interwikis
* @return String[] List of prefixes
*/
public byte[][] getAllPrefixes(boolean local) {
if (!local) {
XophpArray.array_keys_bry(this.getInterwikiMap());
}
List_adp res = List_adp_.New();
Ordered_hash hash = this.getInterwikiMap();
int len = hash.Len();
for (int i = 0; i < len; i++) {
XomwInterwiki interwiki = (XomwInterwiki)hash.Get_at(i);
if (interwiki.isLocal() == local) {
res.Add(interwiki.interwikiId);
}
}
return (byte[][])res.To_ary_and_clear(byte[].class);
}
// /**
// * See InterwikiLookup::invalidateCache
// *
// * @param String $prefix
// */
// public function invalidateCache($prefix) {
// if (!isset(this.interwikiMap[$prefix])) {
// return;
// }
// $globalId = this.interwikiMap[$prefix].getWikiID();
// unset(this.interwikiMap[$prefix]);
//
// // Reload the interwiki
// site = this.siteLookup.getSites().getSite($globalId);
// interwikis = this.getSiteInterwikis(site);
// this.interwikiMap = array_merge(this.interwikiMap, [ interwikis[$prefix] ]);
// }
/**
* Load interwiki map to use as cache
*/
private Ordered_hash loadInterwikiMap() {
Ordered_hash interwikiMap = Ordered_hash_.New();
XomwSiteList siteList = this.siteLookup.getSites();
int len = siteList.Len();
for (int i = 0; i < len; i++) {
XomwSite site = siteList.GetAt(i);
XomwInterwiki[] interwikis = this.getSiteInterwikis(site);
// interwikiMap = array_merge(interwikiMap, interwikis);
for (XomwInterwiki interwiki : interwikis) {
interwikiMap.Add(interwiki.interwikiId, interwiki);
}
}
this.interwikiMap = interwikiMap;
return interwikiMap;
}
/**
* Get interwikiMap attribute, load if needed.
*
* @return Interwiki[]
*/
private Ordered_hash getInterwikiMap() {
if (this.interwikiMap == null) {
this.loadInterwikiMap();
}
return this.interwikiMap;
}
/**
* Load interwikis for the given site
*
* @param Site site
* @return Interwiki[]
*/
private XomwInterwiki[] getSiteInterwikis(XomwSite site) {
Ordered_hash interwikis = Ordered_hash_.New();
Ordered_hash hash = site.getInterwikiIds();
int len = hash.Len();
for (int i = 0; i < len; i++) {
String interwiki = (String)hash.Get_at(i);
String url = site.getPageUrl();
String path = null;
if (Type_.Eq_by_obj(site, XomwMediaWikiSite.class)) {
path = ((XomwMediaWikiSite)site).getFileUrl("api.php");
} else {
path = "";
}
boolean local = String_.Eq(site.getSource(), "local");
// TODO: How to adapt trans?
interwikis.Add(interwiki, new XomwInterwiki(
Bry_.new_u8(interwiki),
Bry_.new_u8(url),
Bry_.new_u8(path),
Bry_.new_u8(site.getGlobalId()),
local
, false
));
}
return (XomwInterwiki[])interwikis.To_ary_and_clear(XomwInterwiki.class);
}
}

View File

@@ -13,3 +13,27 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
// bare-bones implementation of PHP ArrayObject
// REF:http://php.net/manual/en/class.arrayobject.php
public abstract class XomwArrayObject {
private final Ordered_hash hash = Ordered_hash_.New();
public boolean offsetExists(Object key) {
return hash.Has(key);
}
public Object offsetGet(Object key) {
return hash.Get_by(key);
}
public void offsetUnset(Object key) {
hash.Del(key);
}
@gplx.Virtual public void offsetSet(int key, Object val) {
hash.Add(key, val);
}
public int count() {return hash.Len();}
public Object Get_at(int i) {return hash.Get_at(i);}
public void Add_or_update(Object val) {
hash.Add(hash.Count(), val);
}
}

View File

@@ -13,3 +13,219 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
/**
* Extends ArrayObject and does two things:
*
* Allows for deriving cla+sses to easily intercept additions
* and deletions for purposes such as additional indexing.
*
* Enforces the objects to be of a certain type, so this
* can be replied upon, much like if this had true support
* for generics, which sadly enough is not possible in PHP.
*/
public abstract class XomwGenericArrayObject extends XomwArrayObject { /**
* Returns the name of an interface/class that the element should implement/extend.
*
* @since 1.20
*
* @return String
*/
abstract public Class<?> getObjectType();
/**
* @see SiteList::getNewOffset()
* @since 1.20
* @var integer
*/
protected int indexOffset = 0;
/**
* Finds a new offset for when appending an element.
* The super class does this, so it would be better to integrate,
* but there does not appear to be any way to do this...
*
* @since 1.20
*
* @return integer
*/
protected int getNewOffset() {
while (this.offsetExists(this.indexOffset)) {
this.indexOffset++;
}
return this.indexOffset;
}
/**
* Constructor.
* @see ArrayObject::__construct
*
* @since 1.20
*
* @param null|array $input
* @param int $flags
* @param String $iterator_class
*/
public XomwGenericArrayObject() {
// if (input != null) {
// int len = Array_.Len(input);
// for (int i = 0; i < len; i++) {
// Object val = Array_.Get_at(input, i);
// this.offsetSet(i, val);
// }
// }
}
/**
* @see ArrayObject::append
*
* @since 1.20
*
* @param mixed $value
*/
public void append(Object val) {
this.setElement(XophpUtility.NULL_INT, val);
}
/**
* @see ArrayObject::offsetSet()
*
* @since 1.20
*
* @param mixed $index
* @param mixed $value
*/
@Override public void offsetSet(int index, Object val) {
this.setElement(index, val);
}
/**
* Returns if the provided value has the same type as the elements
* that can be added to this ArrayObject.
*
* @since 1.20
*
* @param mixed $value
*
* @return boolean
*/
protected boolean hasValidType(Object val) {
Class<?> cls = this.getObjectType();
return Type_.Eq_by_obj(val, cls);
}
/**
* Method that actually sets the element and holds
* all common code needed for set operations, including
* type checking and offset resolving.
*
* If you want to do additional indexing or have code that
* otherwise needs to be executed whenever an element is added,
* you can overload @see preSetElement.
*
* @since 1.20
*
* @param mixed $index
* @param mixed $value
*
* @throws InvalidArgumentException
*/
protected void setElement(int index, Object val) {
if (!this.hasValidType(val)) {
throw new XophpInvalidArgumentException(
"Can only add " + Type_.Canonical_name(this.getObjectType()) + " implementing objects to "
+ Type_.Type_by_obj(this) + "."
);
}
if (XophpUtility.is_null(index)) {
index = this.getNewOffset();
}
if (this.preSetElement(index, val)) {
super.offsetSet(index, val);
}
}
/**
* Gets called before a new element is added to the ArrayObject.
*
* At this point the index is always set (ie not null) and the
* value is always of the type returned by @see getObjectType.
*
* Should return a boolean. When false is returned the element
* does not get added to the ArrayObject.
*
* @since 1.20
*
* @param integer|String $index
* @param mixed $value
*
* @return boolean
*/
protected boolean preSetElement(int index, Object val) {
return true;
}
// /**
// * @see Serializable::serialize
// *
// * @since 1.20
// *
// * @return String
// */
// public function serialize() {
// return serialize(this.getSerializationData());
// }
//
// /**
// * Returns an array holding all the data that should go into serialization calls.
// * This is intended to allow overloading without having to reimplement the
// * behavior of this super class.
// *
// * @since 1.20
// *
// * @return array
// */
// protected function getSerializationData() {
// return [
// 'data' => this.getArrayCopy(),
// 'index' => this.indexOffset,
// ];
// }
//
// /**
// * @see Serializable::unserialize
// *
// * @since 1.20
// *
// * @param String $serialization
// *
// * @return array
// */
// public function unserialize($serialization) {
// $serializationData = unserialize($serialization);
//
// foreach ($serializationData['data'] as $offset => $value) {
// // Just set the element, bypassing checks and offset resolving,
// // as these elements have already gone through this.
// parent::offsetSet($offset, $value);
// }
//
// this.indexOffset = $serializationData['index'];
//
// return $serializationData;
// }
/**
* Returns if the ArrayObject has no elements.
*
* @since 1.20
*
* @return boolean
*/
@gplx.Virtual public boolean isEmpty() {
return this.count() == 0;
}
}

View File

@@ -13,3 +13,356 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.core.btries.*;
import gplx.xowa.mediawiki.includes.libs.replacers.*;
/**
* A collection of static methods to play with strings.
*/
public class XomwStringUtils {
// /**
// * Test whether a String is valid UTF-8.
// *
// * The function check for invalid byte sequences, overlong encoding but
// * not for different normalisations.
// *
// * @note In MediaWiki 1.21, this function did not provide proper UTF-8 validation.
// * In particular, the pure PHP code path did not in fact check for overlong forms.
// * Beware of this when backporting code to that version of MediaWiki.
// *
// * @since 1.21
// * @param String $value String to check
// * @return boolean Whether the given $value is a valid UTF-8 encoded String
// */
// static function isUtf8($value) {
// $value = (String)$value;
//
// // HHVM 3.4 and older come with an outdated version of libmbfl that
// // incorrectly allows values above U+10FFFF, so we have to check
// // for them separately. (This issue also exists in PHP 5.3 and
// // older, which are no longer supported.)
// static $newPHP;
// if ($newPHP === null) {
// $newPHP = !mb_check_encoding("\xf4\x90\x80\x80", 'UTF-8');
// }
//
// return mb_check_encoding($value, 'UTF-8') &&
// ($newPHP || preg_match("/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $value) === 0);
// }
private static final byte DELIMITER_EXPLODE__SEP = 0, DELIMITER_EXPLODE__BGN = 1, DELIMITER_EXPLODE__END = 2;
private static final Btrie_slim_mgr delimiter_explode_trie = Btrie_slim_mgr.cs()
.Add_str_byte("|" , DELIMITER_EXPLODE__SEP)
.Add_str_byte("-{", DELIMITER_EXPLODE__BGN)
.Add_str_byte("}-", DELIMITER_EXPLODE__END)
;
/**
* Explode a String, but ignore any instances of the separator inside
* the given start and end delimiters, which may optionally nest.
* The delimiters are literal strings, not regular expressions.
* @param String $startDelim Start delimiter
* @param String $endDelim End delimiter
* @param String $separator Separator String for the explode.
* @param String $subject Subject String to explode.
* @param boolean $nested True iff the delimiters are allowed to nest.
* @return ArrayIterator
*/
// XO.MW: NOTE: function only used in two places; hard-coding (a) nested=true; (b) bgn="-{" end="}-" sep="|"
public static byte[][] delimiterExplode(List_adp tmp, Btrie_rv trv, byte[] src) {
// XO.MW.PORTED:entire proc rewritten; see PHP for source
int src_bgn = 0;
int src_end = src.length;
int depth = 0;
int cur = src_bgn;
int prv = cur;
while (true) {
// eos
if (cur == src_end) {
// add rest
tmp.Add(Bry_.Mid(src, prv, src_end));
break;
}
Object o = delimiter_explode_trie.Match_at(trv, src, cur, src_end);
// regular char; continue;
if (o == null) {
cur++;
continue;
}
// handle sep, bgn, end
byte tid = ((gplx.core.primitives.Byte_obj_val)o).Val();
switch (tid) {
case DELIMITER_EXPLODE__SEP:
if (depth == 0) {
tmp.Add(Bry_.Mid(src, prv, cur));
prv = cur + 1;
}
break;
case DELIMITER_EXPLODE__BGN:
depth++;
break;
case DELIMITER_EXPLODE__END:
depth--;
break;
}
cur = trv.Pos();
}
return (byte[][])tmp.To_ary_and_clear(byte[].class);
}
// /**
// * Perform an operation equivalent to `preg_replace()`
// *
// * Matches this code:
// *
// * preg_replace("!$startDelim(.*?)$endDelim!", $replace, $subject);
// *
// * ..except that it's worst-case O(N) instead of O(N^2). Compared to delimiterReplace(), this
// * implementation is fast but memory-hungry and inflexible. The memory requirements are such
// * that I don't recommend using it on anything but guaranteed small chunks of text.
// *
// * @param String $startDelim
// * @param String $endDelim
// * @param String $replace
// * @param String $subject
// * @return String
// */
// static function hungryDelimiterReplace($startDelim, $endDelim, $replace, $subject) {
// $segments = explode($startDelim, $subject);
// $output = array_shift($segments);
// foreach ($segments as $s) {
// $endDelimPos = strpos($s, $endDelim);
// if ($endDelimPos === false) {
// $output .= $startDelim . $s;
// } else {
// $output .= $replace . substr($s, $endDelimPos + strlen($endDelim));
// }
// }
//
// return $output;
// }
/**
* Perform an operation equivalent to `preg_replace_callback()`
*
* Matches this code:
*
* preg_replace_callback("!$startDelim(.*)$endDelim!s$flags", $callback, $subject);
*
* If the start delimiter ends with an initial substring of the end delimiter,
* e.g. in the case of C-style comments, the behavior differs from the model
* regex. In this implementation, the end must share no characters with the
* start, so e.g. `/*\/` is not considered to be both the start and end of a
* comment. `/*\/xy/*\/` is considered to be a single comment with contents `/xy/`.
*
* The implementation of delimiterReplaceCallback() is slower than hungryDelimiterReplace()
* but uses far less memory. The delimiters are literal strings, not regular expressions.
*
* @param String $startDelim Start delimiter
* @param String $endDelim End delimiter
* @param callable $callback Function to call on each match
* @param String $subject
* @param String $flags Regular expression flags
* @throws InvalidArgumentException
* @return String
*/
// XO.MW:flags not supported; goes directly to regex; also, flags of "i" will do case-insensitive
public static void delimiterReplaceCallback(Bry_bfr bfr, byte[] bgn, byte[] end, XomwReplacer callback,
byte[] src
) {
/* XO.MW.PORTED:
MW does following logic
* Run start/end regex on subject till no matches
* If start/end found, evaluate possible match (handling nesting)
* If match found, then pass find-replace pair to callback;
find=substr(subject, outputPos, tokenOffset + tokenLength - outputPos)
replace=substr(subject, contentPos, tokenOffset - contentPos)
* Also, unnecessary "overlapping" logic: bgn=ab;end=abc
$strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0
*/
int pos = 0;
int prv = 0;
int srcLen = src.length;
int bgnLen = bgn.length;
int endLen = end.length;
boolean foundStart = false;
boolean tokenTypeIsStart = false;
while (true) {
if (pos >= srcLen) {
bfr.Add_mid(src, prv, srcLen);
break;
}
if (Bry_.Eq(src, pos, pos + bgnLen, bgn)) {
tokenTypeIsStart = true;
}
else if (Bry_.Eq(src, pos, pos + endLen, end)) {
tokenTypeIsStart = false;
}
else {
pos++;
continue;
}
if (tokenTypeIsStart) {
// Only move the start position if we haven't already found a start
// This means that START START END matches outer pair
// EX: "(a(b)" has match of "a(b"
if (!foundStart) {
// Found start
// Write out the non-matching section
bfr.Add_mid(src, prv, pos);
pos += bgnLen;
prv = pos;
foundStart = true;
} else {
// Move the input position past the *first character* of START,
// to protect against missing END when it overlaps with START
pos++;
}
} else { // elseif (tokenType == 'end')
if (foundStart) {
// Found match
callback.cb(bfr, src, prv, pos);
foundStart = false;
} else {
// Non-matching end, write it out
// EX: "a)b" -> "a)"
bfr.Add_mid(src, prv, pos + endLen);
}
pos += endLen;
prv = pos;
}
}
}
/**
* Perform an operation equivalent to `preg_replace()` with flags.
*
* Matches this code:
*
* preg_replace("!$startDelim(.*)$endDelim!$flags", $replace, $subject);
*
* @param String $startDelim Start delimiter regular expression
* @param String $endDelim End delimiter regular expression
* @param String $replace Replacement String. May contain $1, which will be
* replaced by the text between the delimiters
* @param String $subject String to search
* @param String $flags Regular expression flags
* @return String The String with the matches replaced
*/
// XO.MW:removed flags=''
public static void delimiterReplace(Bry_bfr bfr, byte[] startDelim, byte[] endDelim, byte[] replace, byte[] subject) {
XomwRegexlikeReplacer replacer = new XomwRegexlikeReplacer(replace);
delimiterReplaceCallback(bfr, startDelim, endDelim, replacer, subject);
}
// /**
// * More or less "markup-safe" explode()
// * Ignores any instances of the separator inside `<...>`
// * @param String $separator
// * @param String $text
// * @return array
// */
// static function explodeMarkup($separator, $text) {
// $placeholder = "\x00";
//
// // Remove placeholder instances
// $text = str_replace($placeholder, '', $text);
//
// // Replace instances of the separator inside HTML-like tags with the placeholder
// $replacer = new DoubleReplacer($separator, $placeholder);
// $cleaned = StringUtils::delimiterReplaceCallback('<', '>', $replacer->cb(), $text);
//
// // Explode, then put the replaced separators back in
// $items = explode($separator, $cleaned);
// foreach ($items as $i => $str) {
// $items[$i] = str_replace($placeholder, $separator, $str);
// }
//
// return $items;
// }
/**
* More or less "markup-safe" str_replace()
* Ignores any instances of the separator inside `<...>`
* @param String $search
* @param String $replace
* @param String $text
* @return String
*/
public static void replaceMarkup(byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) { // REF:/includes/libs/StringUtils.php|replaceMarkup
// XO.MW.PORTED: avoiding multiple regex calls / String creations
// $placeholder = "\x00";
//
// Remove placeholder instances
// $text = str_replace($placeholder, '', $text);
//
// Replace instances of the separator inside HTML-like tags with the placeholder
// $replacer = new DoubleReplacer($search, $placeholder);
// $cleaned = StringUtils::delimiterReplaceCallback('<', '>', $replacer->cb(), $text);
//
// Explode, then put the replaced separators back in
// $cleaned = str_replace($search, $replace, $cleaned);
// $text = str_replace($placeholder, $search, $cleaned);
// if same length find / repl, do in-place replacement; EX: "!!" -> "||"
int find_len = find.length;
int repl_len = repl.length;
if (find_len != repl_len) throw Err_.new_wo_type("find and repl should be same length");
byte find_0 = find[0];
byte dlm_bgn = Byte_ascii.Angle_bgn;
byte dlm_end = Byte_ascii.Angle_end;
boolean repl_active = true;
// loop every char in array
for (int i = src_bgn; i < src_end; i++) {
byte b = src[i];
if ( b == find_0
&& Bry_.Match(src, i + 1, i + find_len, find, 1, find_len)
&& repl_active
) {
Bry_.Set(src, i, i + find_len, repl);
}
else if (b == dlm_bgn) {
repl_active = false;
}
else if (b == dlm_end) {
repl_active = true;
}
}
}
// /**
// * Escape a String to make it suitable for inclusion in a preg_replace()
// * replacement parameter.
// *
// * @param String $String
// * @return String
// */
// static function escapeRegexReplacement($String) {
// $String = str_replace('\\', '\\\\', $String);
// $String = str_replace('$', '\\$', $String);
// return $String;
// }
//
// /**
// * Workalike for explode() with limited memory usage.
// *
// * @param String $separator
// * @param String $subject
// * @return ArrayIterator|ExplodeIterator
// */
// static function explode($separator, $subject) {
// if (substr_count($subject, $separator) > 1000) {
// return new ExplodeIterator($separator, $subject);
// } else {
// return new ArrayIterator(explode($separator, $subject));
// }
// }
}

View File

@@ -13,3 +13,62 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import org.junit.*; import gplx.core.tests.*;
public class XomwStringUtilsTest {
private final XomwStringUtilsFxt fxt = new XomwStringUtilsFxt();
@Test public void Delimiter_explode() {
// basic
fxt.Test_delimiter_explode("a|b|c" , "a", "b", "c");
// empty
fxt.Test_delimiter_explode("|a||c|" , "", "a", "", "c", "");
// nest_1
fxt.Test_delimiter_explode("a|-{b|c}-|d" , "a", "-{b|c}-", "d");
// nest_many
fxt.Test_delimiter_explode("a|-{b-{c|d}-e}-|f" , "a", "-{b-{c|d}-e}-", "f");
}
@Test public void Replace_markup() {
// basic
fxt.Test_replace_markup("a!!b" , "!!", "||", "a||b");
// missing
fxt.Test_replace_markup("abcd" , "!!", "||", "abcd");
// eos
fxt.Test_replace_markup("a!!" , "!!", "||", "a||");
// ignore
fxt.Test_replace_markup("a!!b<!!>!!c" , "!!", "||", "a||b<!!>||c");
// ignore asym_lhs
fxt.Test_replace_markup("a!!b<!!<!!>!!c" , "!!", "||", "a||b<!!<!!>||c");
// ignore asym_lhs
fxt.Test_replace_markup("a!!b<!!>!!>!!c" , "!!", "||", "a||b<!!>||>||c"); // NOTE: should probably be "!!>!!>", but unmatched ">" are escaped to "&gt;"
}
@Test public void delimiterReplace() {
// basic
fxt.Test_delimiterReplace("/*", "*/", "a/*0*/c" , "9", "a9c");
// overlapping; "/*/"
fxt.Test_delimiterReplace("/*", "*/", "a/*/0/*/c" , "9", "a9c");
// dangling bgn; "/* /*"
fxt.Test_delimiterReplace("/*", "*/", "a/*0/*1*/c" , "9", "a9c"); // fails if "a/*9c"
// dangling end; "*/ */"
fxt.Test_delimiterReplace("/*", "*/", "a/*0*/1*/c" , "9", "a91*/c");
}
}
class XomwStringUtilsFxt {
public void Test_delimiter_explode(String src_str, String... expd) {
List_adp tmp = List_adp_.New();
gplx.core.btries.Btrie_rv trv = new gplx.core.btries.Btrie_rv();
byte[][] actl = XomwStringUtils.delimiterExplode(tmp, trv, Bry_.new_u8(src_str));
Gftest.Eq__ary(expd, actl, "src=~{0}", src_str);
}
public void Test_replace_markup(String src_str, String find, String repl, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
XomwStringUtils.replaceMarkup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl));
Gftest.Eq__str(expd, src_bry);
}
// byte[] startDelim, byte[] endDelim, byte[] replace, byte[] subject
public void Test_delimiterReplace(String bgn, String end, String src, String repl, String expd) {
Bry_bfr bfr = Bry_bfr_.New();
XomwStringUtils.delimiterReplace(bfr, Bry_.new_u8(bgn), Bry_.new_u8(end), Bry_.new_u8(repl), Bry_.new_u8(src));
Gftest.Eq__str(expd, bfr.To_str_and_clear());
}
}

View File

@@ -13,3 +13,13 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.libs.replacers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.libs.*;
public class XomwRegexlikeReplacer implements XomwReplacer {
private byte[] replace;
public XomwRegexlikeReplacer(byte[] replace) {
this.replace = replace;
}
public void cb(Bry_bfr bfr, byte[] src, int find_bgn, int find_end) {
bfr.Add(replace);
}
}

View File

@@ -13,3 +13,11 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.libs.replacers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.libs.*;
/**
* Base class for "replacers", objects used in preg_replace_callback() and
* StringUtils::delimiterReplaceCallback()
*/
public interface XomwReplacer {
void cb(Bry_bfr bfr, byte[] src, int find_bgn, int find_end);
}

View File

@@ -13,3 +13,465 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.linkers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.langs.htmls.*;
import gplx.xowa.mediawiki.includes.xohtml.*;
/* TODO.XO
* P7: $html = HtmlArmor::getHtml($text);
* P3: getLinkUrl [alternate urls? EX: mw/wiki/index.php/title?]
* P2: titleFormatter->getPrefixedText [depends on redlinks]
* P1: getLinkClasses [depends on redlinks]
*/
/**
* Class that generates HTML <a> links for pages.
*
* @see https://www.mediawiki.org/wiki/Manual:LinkRenderer
* @since 1.28
*/
public class XomwLinkRenderer {
/**
* Whether to force the pretty article path
*
* @var boolean
*/
private boolean forceArticlePath = false;
/**
* A PROTO_* constant or false
*
* @var String|boolean|int
*/
private boolean expandUrls = false;
/**
* @var int
*/
private int stubThreshold = 0;
/**
* @var TitleFormatter
*/
// private $titleFormatter;
/**
* @var LinkCache
*/
// private $linkCache;
/**
* Whether to run the legacy Linker hooks
*
* @var boolean
*/
// private boolean runLegacyBeginHook = true;
private final XomwHtmlTemp htmlTemp = new XomwHtmlTemp();
private final Xomw_atr_mgr attribs = new Xomw_atr_mgr();
private final List_adp tmp_merge_deleted = List_adp_.New();
private final XomwSanitizer sanitizer;
// /**
// * @param TitleFormatter $titleFormatter
// * @param LinkCache $linkCache
// */
public XomwLinkRenderer(XomwSanitizer sanitizer) { // TitleFormatter $titleFormatter, LinkCache $linkCache
// this.titleFormatter = $titleFormatter;
// this.linkCache = $linkCache;
this.sanitizer = sanitizer;
}
/**
* @param boolean $force
*/
public void setForceArticlePath(boolean force) {
this.forceArticlePath = force;
}
/**
* @return boolean
*/
public boolean getForceArticlePath() {
return this.forceArticlePath;
}
/**
* @param String|boolean|int $expand A PROTO_* constant or false
*/
public void setExpandURLs(boolean expand) {
this.expandUrls = expand;
}
/**
* @return String|boolean|int a PROTO_* constant or false
*/
public boolean getExpandURLs() {
return this.expandUrls;
}
/**
* @param int $threshold
*/
public void setStubThreshold(int threshold) {
this.stubThreshold = threshold;
}
/**
* @return int
*/
public int getStubThreshold() {
return this.stubThreshold;
}
/**
* @param boolean $run
*/
// public void setRunLegacyBeginHook(boolean run) {
// this.runLegacyBeginHook = run;
// }
/**
* @param LinkTarget $target
* @param String|HtmlArmor|null $text
* @param array $extraAttribs
* @param array $query
* @return String
*/
public void makeLink(Bry_bfr bfr,
XomwTitle target, byte[] text, Xomw_atr_mgr extraAttribs, Xomw_qry_mgr query) {
// $title = Title::newFromLinkTarget($target); // does db lookup?
if (target.isKnown()) {
this.makeKnownLink(bfr, target, text, extraAttribs, query);
} else {
this.makeBrokenLink(bfr, target, text, extraAttribs, query);
}
}
/**
* Get the options in the legacy format
*
* @param boolean $isKnown Whether the link is known or broken
* @return array
*/
// private function getLegacyOptions($isKnown) {
// $options = [ 'stubThreshold' => this.stubThreshold ];
// if (this.forceArticlePath) {
// $options[] = 'forcearticlepath';
// }
// if (this.expandUrls === PROTO_HTTP) {
// $options[] = 'http';
// } elseif (this.expandUrls === PROTO_HTTPS) {
// $options[] = 'https';
// }
//
// $options[] = $isKnown ? 'known' : 'broken';
//
// return $options;
// }
//
// private function runBeginHook(LinkTarget $target, &$text, &$extraAttribs, &$query, $isKnown) {
// $ret = null;
// if (!Hooks::run('HtmlPageLinkRendererBegin',
// [ $this, $target, &$text, &$extraAttribs, &$query, &$ret ])
// ) {
// return $ret;
// }
//
// // Now run the legacy hook
// return this.runLegacyBeginHook($target, $text, $extraAttribs, $query, $isKnown);
// }
//
// private function runLegacyBeginHook(LinkTarget $target, &$text, &$extraAttribs, &$query,
// $isKnown
// ) {
// if (!this.runLegacyBeginHook || !Hooks::isRegistered('LinkBegin')) {
// // Disabled, or nothing registered
// return null;
// }
//
// $realOptions = $options = this.getLegacyOptions($isKnown);
// $ret = null;
// $dummy = new DummyLinker();
// $title = Title::newFromLinkTarget($target);
// if ($text !== null) {
// $realHtml = $html = HtmlArmor::getHtml($text);
// } else {
// $realHtml = $html = null;
// }
// if (!Hooks::run('LinkBegin',
// [ $dummy, $title, &$html, &$extraAttribs, &$query, &$options, &$ret ])
// ) {
// return $ret;
// }
//
// if ($html !== null && $html !== $realHtml) {
// // &$html was modified, so re-armor it as $text
// $text = new HtmlArmor($html);
// }
//
// // Check if they changed any of the options, hopefully not!
// if ($options !== $realOptions) {
// $factory = MediaWikiServices::getInstance()->getLinkRendererFactory();
// // They did, so create a separate instance and have that take over the rest
// $newRenderer = $factory->createFromLegacyOptions($options);
// // Don't recurse the hook...
// $newRenderer->setRunLegacyBeginHook(false);
// if (in_array('known', $options, true)) {
// return $newRenderer->makeKnownLink($title, $text, $extraAttribs, $query);
// } elseif (in_array('broken', $options, true)) {
// return $newRenderer->makeBrokenLink($title, $text, $extraAttribs, $query);
// } else {
// return $newRenderer->makeLink($title, $text, $extraAttribs, $query);
// }
// }
//
// return null;
// }
/**
* If you have already looked up the proper CSS classes using LinkRenderer::getLinkClasses()
* or some other method, use this to avoid looking it up again.
*
* @param LinkTarget $target
* @param String|HtmlArmor|null $text
* @param String $classes CSS classes to add
* @param array $extraAttribs
* @param array $query
* @return String
*/
public void makePreloadedLink(Bry_bfr bfr,
XomwTitle target, byte[] text, byte[] classes, Xomw_atr_mgr extraAttribs, Xomw_qry_mgr query) {
// XO.MW.HOOK: this.runBeginHook --> 'HtmlPageLinkRendererBegin', 'LinkBegin'
target = this.normalizeTarget(target);
byte[] url = this.getLinkUrl(target, query);
attribs.Clear();
attribs.Add(Gfh_atr_.Bry__href, url); // XO.MW: add url 1st; MW does attribs["url", url] + attribs + extra_attribs
if (classes.length > 0) // XO.MW: do not bother adding if empty
attribs.Add(Gfh_atr_.Bry__class, classes);
byte[] prefixed_text = target.getPrefixedText();
if (prefixed_text != Bry_.Empty) {
attribs.Add(Gfh_atr_.Bry__title, prefixed_text);
}
this.mergeAttribs(attribs, extraAttribs); // XO.MW: changed to not always create another array
if (text == null) {
text = this.getLinkText(target);
}
this.buildAElement(bfr, target, text, attribs, true);
}
/**
* @param LinkTarget $target
* @param String|HtmlArmor|null $text
* @param array $extraAttribs
* @param array $query
* @return String
*/
public void makeKnownLink(Bry_bfr bfr,
XomwTitle target, byte[] text, Xomw_atr_mgr extraAttribs, Xomw_qry_mgr query) {
byte[] classes = Bry_.Empty;
if (target.isExternal()) {
classes = Bry__classes__extiw;
}
byte[] colour = this.getLinkClasses(target);
if (colour != Bry_.Empty) {
classes = Bry_.Add(classes, Byte_ascii.Space_bry, colour); // XO.MW: also does "$classes ? implode(' ', $classes) : '',"
}
this.makePreloadedLink(bfr,
target,
text,
classes,
extraAttribs,
query);
}
/**
* @param LinkTarget $target
* @param String|HtmlArmor|null $text
* @param array $extraAttribs
* @param array $query
* @return String
*/
public void makeBrokenLink(Bry_bfr bfr,
XomwTitle target, byte[] text, Xomw_atr_mgr extraAttribs, Xomw_qry_mgr query) {
// XO.MW.HOOK: Run legacy hook
// We don't want to include fragments for broken links, because they
// generally make no sense.
if (target.hasFragment()) {
target = target.createFragmentTarget(Bry_.Empty);
}
target = this.normalizeTarget(target);
if (!XophpUtility.isset(query.action) && target.getNamespace() != XomwDefines.NS_SPECIAL) {
query.action = Bry__action__edit;
query.redlink = 1;
}
byte[] url = this.getLinkUrl(target, query);
attribs.Clear();
attribs.Add(Gfh_atr_.Bry__href, url); // $attribs = ['href' => $url,] + this.mergeAttribs($attribs, $extraAttribs);
attribs.Add(Gfh_atr_.Bry__class, Bry__class__new);
// $prefixedText = this.titleFormatter->getPrefixedText($target);
// if ($prefixedText !== '') {
// // This ends up in parser cache!
// $attribs['title'] = wfMessage('red-link-title', $prefixedText)
// ->inContentLanguage()
// ->text();
// }
this.mergeAttribs(attribs, extraAttribs);
if (text == null) {
text = this.getLinkText(target);
}
this.buildAElement(bfr, target, text, attribs, false);
}
/**
* Builds the final <a> element
*
* @param LinkTarget $target
* @param String|HtmlArmor $text
* @param array $attribs
* @param boolean $isKnown
* @return null|String
*/
private void buildAElement(Bry_bfr bfr, XomwTitle target, byte[] text, Xomw_atr_mgr attribs, boolean isKnown) {
// XO.MW.HOOK:HtmlPageLinkRendererEnd
byte[] htmlBry = text;
// $html = HtmlArmor::getHtml($text);
// XO.MW.HOOK:LinkEnd
XomwHtml.rawElement(bfr, htmlTemp, Gfh_tag_.Bry__a, attribs, htmlBry);
}
/**
* @param LinkTarget $target
* @return String non-escaped text
*/
// XO.MW:SYNC:1.29; DATE:2017-01-31
private byte[] getLinkText(XomwTitle target) {
byte[] prefixed_text = target.getPrefixedText();
// If the target is just a fragment, with no title, we return the fragment
// text. Otherwise, we return the title text itself.
if (prefixed_text == Bry_.Empty && target.hasFragment()) {
return target.getFragment();
}
return prefixed_text;
}
private byte[] getLinkUrl(XomwTitle target, Xomw_qry_mgr query) {
// TODO: Use a LinkTargetResolver service instead of Title
// $title = Title::newFromLinkTarget($target);
// if (this.forceArticlePath) {
// $realQuery = $query;
// $query = [];
// }
// else {
// $realQuery = [];
// }
byte[] url = target.getLinkURL(query, false, this.expandUrls);
// if (this.forceArticlePath && $realQuery) {
// $url = wfAppendQuery($url, $realQuery);
// }
return url;
}
/**
* Normalizes the provided target
*
* @todo move the code from Linker actually here
* @param LinkTarget $target
* @return LinkTarget
*/
private XomwTitle normalizeTarget(XomwTitle target) {
return XomwLinker.normaliseSpecialPage(target);
}
/**
* Merges two sets of attributes
*
* @param array $defaults
* @param array $attribs
*
* @return array
*/
private void mergeAttribs(Xomw_atr_mgr defaults, Xomw_atr_mgr attribs) {
// XO.MW: ignore; defaults is always non-null and empty; if attribs exists, it will be merged below
// if (!$attribs) {
// return $defaults;
// }
// Merge the custom attribs with the default ones, and iterate
// over that, deleting all "false" attributes.
sanitizer.mergeAttributes(defaults, attribs);
// XO.MW.PORTED.BGN:MW removes "false" values; XO removes "null" values
// foreach ($merged as $key => $val) {
// # A false value suppresses the attribute
// if ($val !== false) {
// $ret[$key] = $val;
// }
// }
boolean deleted = false;
int len = attribs.Len();
for (int i = 0; i < len; i++) {
Xomw_atr_itm trg_atr = attribs.Get_at(i);
// A false value suppresses the attribute
if (trg_atr.Val() == null) {
tmp_merge_deleted.Add(trg_atr);
deleted = true;
}
}
if (deleted) {
len = tmp_merge_deleted.Len();
for (int i = 0; i < len; i++) {
Xomw_atr_itm atr = (Xomw_atr_itm)attribs.Get_at(i);
attribs.Del(atr.Key_bry());
}
tmp_merge_deleted.Clear();
}
// XO.MW.PORTED.END
}
/**
* Return the CSS classes of a known link
*
* @param LinkTarget $target
* @return String CSS class
*/
public byte[] getLinkClasses(XomwTitle target) {
// Make sure the target is in the cache
// $id = this.linkCache->addLinkObj($target);
// if ($id == 0) {
// // Doesn't exist
// return '';
// }
// if (this.linkCache->getGoodLinkFieldObj($target, 'redirect')) {
// Page is a redirect
// return 'mw-redirect';
// }
// elseif (this.stubThreshold > 0 && XomwNamespace::isContent($target->getNamespace())
// && this.linkCache->getGoodLinkFieldObj($target, 'length') < this.stubThreshold
// ) {
// Page is a stub
// return 'stub';
// }
return Bry_.Empty;
}
private static final byte[]
Bry__classes__extiw = Bry_.new_a7("extiw")
, Bry__class__new = Bry_.new_a7("new")
, Bry__action__edit = Bry_.new_a7("edit")
;
}

View File

@@ -13,3 +13,21 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.linkers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
// import org.junit.*;
// public class XomwLinkRendererTest {
// private final XomwLinkRendererFxt fxt = new XomwLinkRendererFxt();
/*
Make_broken_link
target.Has_fragment()
*/
// }
// class XomwLinkRendererFxt {
// private final XomwLinkRenderer wkr = new XomwLinkRenderer(new Xomw_parser());
// public void Test__parse(String src_str, String expd) {
// byte[] src_bry = Bry_.new_u8(src_str);
// wkr.Replace_external_links(new XomwParserCtx(), pbfr.Init(src_bry));
// if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
// Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
// }
// }

View File

@@ -13,3 +13,289 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.xowa.mediawiki.includes.filerepo.file.*; import gplx.xowa.mediawiki.includes.parsers.lnkis.*;
/* XO.TODO:
* validateThumbParams
*/
// MEMORY:only one instance per wiki
public abstract class XomwImageHandler extends XomwMediaHandler { private final Xomw_param_map paramMap = new Xomw_param_map();
public XomwImageHandler(byte[] key) {super(key);
paramMap.Add(Xomw_param_itm.Mw__img_width, Xomw_param_map.Type__handler, Xomw_param_itm.Name_bry__width);
}
/**
* @param File file
* @return boolean
*/
@Override public boolean canRender(XomwFile file) {
return (XophpUtility.istrue(file.getWidth()) && XophpUtility.istrue(file.getHeight()));
}
@Override public Xomw_param_map getParamMap() {
// XO.MW: defined above: "return [ 'img_width' => 'width' ];"
return paramMap;
}
@Override public boolean validateParam(int name_uid, byte[] val_bry, int val_int) {
if (name_uid == Xomw_param_itm.Name__width || name_uid == Xomw_param_itm.Name__height) {
if (val_int <= 0) {
return false;
}
else {
return true;
}
}
else {
return false;
}
}
@Override public byte[] makeParamString(Xomw_params_handler handlerParams) {
int width = 0;
if (XophpUtility.isset(handlerParams.physicalWidth)) {
width = handlerParams.physicalWidth;
}
else if (XophpUtility.isset(handlerParams.width)) {
width = handlerParams.width;
}
else {
throw Err_.new_wo_type("No width specified to makeParamString");
}
// Removed for ProofreadPage
// width = intval(width);
return Bry_.Add(Int_.To_bry(width), Xomw_lnki_wkr.Bry__px);
}
// public Xomw_param_map parseParamString(byte[] src) {
// int len = src.length;
// // XO.MW.REGEX: if (preg_match('/^(\d+)px/', str, m)) {
// if ( len > 0 // at least one char
// && Byte_ascii.Is_num(src[0])) // 1st char is numeric
// {
// pos = Bry_find_.Find_fwd_while_num(src, 1, len); // skip numeric
// if (Bry_.Match(src, pos, len, Xomw_lnki_wkr.Bry__px)) { // matches "px"
// Xomw_params_handler rv = new Xomw_params_handler();
// rv.width = Bry_.To_int_or(src, 0, pos, XophpUtility.NULL_INT);
// return rv;
// }
// }
// return null;
// }
// function getScriptParams(paramsVar) {
// return [ 'width' => paramsVar['width'] ];
// }
/**
* @param File image
* @param array paramsVar
* @return boolean
*/
@Override public boolean normaliseParams(XomwFile image, Xomw_params_handler handlerParams) {
byte[] mimeType = image.getMimeType();
if (!XophpUtility.isset(handlerParams.width)) {
return false;
}
if (!XophpUtility.isset(handlerParams.page)) {
handlerParams.page = 1;
}
else {
// handlerParams.page = intval(handlerParams.page);
// if (handlerParams.page > image.pageCount()) {
// handlerParams.page = image.pageCount();
// }
//
// if (handlerParams.page < 1) {
// handlerParams.page = 1;
// }
}
int srcWidth = image.getWidth(handlerParams.page);
int srcHeight = image.getHeight(handlerParams.page);
if (XophpUtility.isset(handlerParams.height) && handlerParams.height != -1) {
// Height & width were both set
if (handlerParams.width * srcHeight > handlerParams.height * srcWidth) {
// Height is the relative smaller dimension, so scale width accordingly
handlerParams.width = fitBoxWidth(srcWidth, srcHeight, handlerParams.height);
if (handlerParams.width == 0) {
// Very small image, so we need to rely on client side scaling :(
handlerParams.width = 1;
}
handlerParams.physicalWidth = handlerParams.width;
} else {
// Height was crap, unset it so that it will be calculated later
handlerParams.height = XophpUtility.NULL_INT;
}
}
if (!XophpUtility.isset(handlerParams.physicalWidth)) {
// Passed all validations, so set the physicalWidth
handlerParams.physicalWidth = handlerParams.width;
}
// Because thumbs are only referred to by width, the height always needs
// to be scaled by the width to keep the thumbnail sizes consistent,
// even if it was set inside the if block above
handlerParams.physicalHeight = XomwFile.scaleHeight(srcWidth, srcHeight,
handlerParams.physicalWidth);
// Set the height if it was not validated in the if block higher up
if (!XophpUtility.isset(handlerParams.height) || handlerParams.height == -1) {
handlerParams.height = handlerParams.physicalHeight;
}
if (!this.validateThumbParams(handlerParams, srcWidth, srcHeight, mimeType)
) {
return false;
}
return true;
}
/**
* Validate thumbnail parameters and fill in the correct height
*
* @param int width Specified width (input/output)
* @param int height Height (output only)
* @param int srcWidth Width of the source image
* @param int srcHeight Height of the source image
* @param String mimeType Unused
* @return boolean False to indicate that an error should be returned to the user.
*/
// XO.MW.NOTE: MW passes w and h by ref, but only changes h; XO will pass handlerParams directly
private boolean validateThumbParams(Xomw_params_handler handlerParams, int srcWidth, int srcHeight, byte[] mimeType) {
int width = handlerParams.physicalWidth;
int height = handlerParams.physicalHeight;
// width = intval(width);
// Sanity check width
if (width <= 0) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "validateThumbParams: Invalid destination width: width");
return false;
}
if (srcWidth <= 0) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "validateThumbParams: Invalid source width: srcWidth");
return false;
}
height = XomwFile.scaleHeight(srcWidth, srcHeight, width);
if (height == 0) {
// Force height to be at least 1 pixel
height = 1;
}
handlerParams.height = height;
return true;
}
// /**
// * @param File image
// * @param String script
// * @param array paramsVar
// * @return boolean|MediaTransformOutput
// */
// function getScriptedTransform(image, script, paramsVar) {
// if (!this.normaliseParams(image, paramsVar)) {
// return false;
// }
// url = wfAppendQuery(script, this.getScriptParams(paramsVar));
//
// if (image.mustRender() || paramsVar['width'] < image.getWidth()) {
// return new ThumbnailImage(image, url, false, paramsVar);
// }
// }
//
// function getImageSize(image, path) {
// MediaWiki\suppressWarnings();
// gis = getimagesize(path);
// MediaWiki\restoreWarnings();
//
// return gis;
// }
//
// /**
// * Function that returns the number of pixels to be thumbnailed.
// * Intended for animated GIFs to multiply by the number of frames.
// *
// * If the file doesn't support a notion of "area" return 0.
// *
// * @param File image
// * @return int
// */
// function getImageArea(image) {
// return image.getWidth() * image.getHeight();
// }
//
// /**
// * @param File file
// * @return String
// */
// function getShortDesc(file) {
// global wgLang;
// nbytes = htmlspecialchars(wgLang.formatSize(file.getSize()));
// widthheight = wfMessage('widthheight')
// .numParams(file.getWidth(), file.getHeight()).escaped();
//
// return "widthheight (nbytes)";
// }
//
// /**
// * @param File file
// * @return String
// */
// function getLongDesc(file) {
// global wgLang;
// pages = file.pageCount();
// size = htmlspecialchars(wgLang.formatSize(file.getSize()));
// if (pages === false || pages <= 1) {
// msg = wfMessage('file-info-size').numParams(file.getWidth(),
// file.getHeight()).paramsVar(size,
// '<span class="mime-type">' . file.getMimeType() . '</span>').parse();
// } else {
// msg = wfMessage('file-info-size-pages').numParams(file.getWidth(),
// file.getHeight()).paramsVar(size,
// '<span class="mime-type">' . file.getMimeType() . '</span>').numParams(pages).parse();
// }
//
// return msg;
// }
//
// /**
// * @param File file
// * @return String
// */
// function getDimensionsString(file) {
// pages = file.pageCount();
// if (pages > 1) {
// return wfMessage('widthheightpage')
// .numParams(file.getWidth(), file.getHeight(), pages).text();
// } else {
// return wfMessage('widthheight')
// .numParams(file.getWidth(), file.getHeight()).text();
// }
// }
//
// public function sanitizeParamsForBucketing(paramsVar) {
// paramsVar = parent::sanitizeParamsForBucketing(paramsVar);
//
// // We unset the height parameters in order to let normaliseParams recalculate them
// // Otherwise there might be a height discrepancy
// if (isset(paramsVar['height'])) {
// unset(paramsVar['height']);
// }
//
// if (isset(paramsVar['physicalHeight'])) {
// unset(paramsVar['physicalHeight']);
// }
//
// return paramsVar;
// }
}

View File

@@ -13,3 +13,49 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import org.junit.*; import gplx.core.tests.*;
import gplx.xowa.mediawiki.includes.parsers.*; import gplx.xowa.mediawiki.includes.parsers.lnkis.*;
import gplx.xowa.mediawiki.includes.filerepo.*; import gplx.xowa.mediawiki.includes.filerepo.file.*;
import gplx.xowa.mediawiki.languages.*;
public class XomwImageHandlerTest {
private final XomwImageHandler_fxt fxt = new XomwImageHandler_fxt();
@Before public void init() {
fxt.Init__file("A.png", 400, 200);
}
@Test public void normaliseParams() {
// widthOnly; "Because thumbs are only referred to by width, the height always needs"
fxt.Test__normaliseParams(fxt.Make__handlerParams(200), fxt.Make__handlerParams(200, 100, 200, 100));
}
}
class XomwImageHandler_fxt {
private final XomwImageHandler handler;
private final XomwFileRepo repo = new XomwFileRepo(Bry_.new_a7("/orig"), Bry_.new_a7("/thumb"));
private final XomwEnv env = XomwEnv.NewTest();
private XomwFile file;
public XomwImageHandler_fxt() {
this.handler = new XomwTransformationalImageHandler(Bry_.new_a7("test_handler"));
}
public Xomw_params_handler Make__handlerParams(int w) {return Make__handlerParams(w, XophpUtility.NULL_INT, XophpUtility.NULL_INT, XophpUtility.NULL_INT);}
public Xomw_params_handler Make__handlerParams(int w, int h, int phys_w, int phys_h) {
Xomw_params_handler rv = new Xomw_params_handler();
rv.width = w;
rv.height = h;
rv.physicalWidth = phys_w;
rv.physicalHeight = phys_h;
return rv;
}
public void Init__file(String title, int w, int h) {
this.file = new XomwLocalFile(env, XomwTitle.newFromText(env, Bry_.new_u8(title)), repo, w, h, XomwMediaHandlerFactory.Mime__image__png);
}
public void Test__normaliseParams(Xomw_params_handler prms, Xomw_params_handler expd) {
// exec
handler.normaliseParams(file, prms);
// test
Gftest.Eq__int(expd.width, prms.width);
Gftest.Eq__int(expd.height, prms.height);
Gftest.Eq__int(expd.physicalWidth, prms.physicalWidth);
Gftest.Eq__int(expd.physicalHeight, prms.physicalHeight);
}
}

View File

@@ -13,3 +13,854 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.xowa.mediawiki.includes.filerepo.file.*;
import gplx.xowa.mediawiki.includes.parsers.lnkis.*;
public abstract class XomwMediaHandler {
public byte[] Key() {return key;} private byte[] key;
public XomwMediaHandler(byte[] key) {
this.key = key;
}
private static final int TRANSFORM_LATER = 1;
// static final METADATA_GOOD = true;
// static final METADATA_BAD = false;
// static final METADATA_COMPATIBLE = 2; // for old but backwards compatible.
// /**
// * Max length of error logged by logErrorForExternalProcess()
// */
// static final MAX_ERR_LOG_SIZE = 65535;
//
// /**
// * Get a MediaHandler for a given MIME type from the instance cache
// *
// * @param String $type
// * @return MediaHandler|boolean
// */
// static function getHandler($type) {
// return MediaWikiServices::getInstance()
// ->getMediaHandlerFactory()->getHandler($type);
// }
/**
* Get an associative array mapping magic word IDs to parameter names.
* Will be used by the parser to identify parameters.
*/
public abstract Xomw_param_map getParamMap();
/**
* Validate a thumbnail parameter at parse time.
* Return true to accept the parameter, and false to reject it.
* If you return false, the parser will do something quiet and forgiving.
*
* @param String $name
* @param mixed $value
*/
public abstract boolean validateParam(int name_uid, byte[] val_bry, int val_int);
/**
* Merge a parameter array into a String appropriate for inclusion in filenames
*
* @param array paramsVar Array of parameters that have been through normaliseParams.
* @return String
*/
public abstract byte[] makeParamString(Xomw_params_handler handlerParams);
// /**
// * Parse a param String made with makeParamString back into an array
// *
// * @param String $str The parameter String without file name (e.g. 122px)
// * @return array|boolean Array of parameters or false on failure.
// */
// abstract public function parseParamString($str);
/**
* Changes the parameter array as necessary, ready for transformation.
* Should be idempotent.
* Returns false if the parameters are unacceptable and the transform should fail
* @param File $image
* @param array $paramsVar
*/
public abstract boolean normaliseParams(XomwFile image, Xomw_params_handler handlerParams);
// /**
// * Get an image size array like that returned by getimagesize(), or false if it
// * can't be determined.
// *
// * This function is used for determining the width, height and bitdepth directly
// * from an image. The results are stored in the database in the img_width,
// * img_height, img_bits fields.
// *
// * @note If this is a multipage file, return the width and height of the
// * first page.
// *
// * @param File|FSFile $image The image Object, or false if there isn't one.
// * Warning, FSFile::getPropsFromPath might pass an FSFile instead of File (!)
// * @param String $path The filename
// * @return array|boolean Follow the format of PHP getimagesize() @gplx.Internal protected function.
// * See https://secure.php.net/getimagesize. MediaWiki will only ever use the
// * first two array keys (the width and height), and the 'bits' associative
// * key. All other array keys are ignored. Returning a 'bits' key is optional
// * as not all formats have a notion of "bitdepth". Returns false on failure.
// */
// abstract function getImageSize($image, $path);
//
// /**
// * Get handler-specific metadata which will be saved in the img_metadata field.
// *
// * @param File|FSFile $image The image Object, or false if there isn't one.
// * Warning, FSFile::getPropsFromPath might pass an FSFile instead of File (!)
// * @param String $path The filename
// * @return String A String of metadata in php serialized form (Run through serialize())
// */
// function getMetadata($image, $path) {
// return '';
// }
//
// /**
// * Get metadata version.
// *
// * This is not used for validating metadata, this is used for the api when returning
// * metadata, since api content formats should stay the same over time, and so things
// * using ForeignApiRepo can keep backwards compatibility
// *
// * All core media handlers share a common version number, and extensions can
// * use the GetMetadataVersion hook to append to the array (they should append a unique
// * String so not to get confusing). If there was a media handler named 'foo' with metadata
// * version 3 it might add to the end of the array the element 'foo=3'. if the core metadata
// * version is 2, the end version String would look like '2;foo=3'.
// *
// * @return String Version String
// */
// static function getMetadataVersion() {
// $version = [ '2' ]; // core metadata version
// Hooks::run('GetMetadataVersion', [ &$version ]);
//
// return implode(';', $version);
// }
//
// /**
// * Convert metadata version.
// *
// * By default just returns $metadata, but can be used to allow
// * media handlers to convert between metadata versions.
// *
// * @param String|array $metadata Metadata array (serialized if String)
// * @param int $version Target version
// * @return array Serialized metadata in specified version, or $metadata on fail.
// */
// function convertMetadataVersion($metadata, $version = 1) {
// if (!is_array($metadata)) {
//
// // unserialize to keep return parameter consistent.
// MediaWiki\suppressWarnings();
// $ret = unserialize($metadata);
// MediaWiki\restoreWarnings();
//
// return $ret;
// }
//
// return $metadata;
// }
//
// /**
// * Get a String describing the type of metadata, for display purposes.
// *
// * @note This method is currently unused.
// * @param File $image
// * @return String
// */
// function getMetadataType($image) {
// return false;
// }
//
// /**
// * Check if the metadata String is valid for this handler.
// * If it returns MediaHandler::METADATA_BAD (or false), Image
// * will reload the metadata from the file and update the database.
// * MediaHandler::METADATA_GOOD for if the metadata is a-ok,
// * MediaHandler::METADATA_COMPATIBLE if metadata is old but backwards
// * compatible (which may or may not trigger a metadata reload).
// *
// * @note Returning self::METADATA_BAD will trigger a metadata reload from
// * file on page view. Always returning this from a broken file, or suddenly
// * triggering as bad metadata for a large number of files can cause
// * performance problems.
// * @param File $image
// * @param String $metadata The metadata in serialized form
// * @return boolean
// */
// function isMetadataValid($image, $metadata) {
// return self::METADATA_GOOD;
// }
//
// /**
// * Get an array of standard (FormatMetadata type) metadata values.
// *
// * The returned data is largely the same as that from getMetadata(),
// * but formatted in a standard, stable, handler-independent way.
// * The idea being that some values like ImageDescription or Artist
// * are universal and should be retrievable in a handler generic way.
// *
// * The specific properties are the type of properties that can be
// * handled by the FormatMetadata class. These values are exposed to the
// * user via the filemetadata parser function.
// *
// * Details of the response format of this function can be found at
// * https://www.mediawiki.org/wiki/Manual:File_metadata_handling
// * tl/dr: the response is an associative array of
// * properties keyed by name, but the value can be complex. You probably
// * want to call one of the FormatMetadata::flatten* functions on the
// * property values before using them, or call
// * FormatMetadata::getFormattedData() on the full response array, which
// * transforms all values into prettified, human-readable text.
// *
// * Subclasses overriding this function must return a value which is a
// * valid API response fragment (all associative array keys are valid
// * XML tagnames).
// *
// * Note, if the file simply has no metadata, but the handler supports
// * this interface, it should return an empty array, not false.
// *
// * @param File $file
// * @return array|boolean False if interface not supported
// * @since 1.23
// */
// public function getCommonMetaArray(File $file) {
// return false;
// }
//
// /**
// * Get a MediaTransformOutput Object representing an alternate of the transformed
// * output which will call an intermediary thumbnail assist script.
// *
// * Used when the repository has a thumbnailScriptUrl option configured.
// *
// * Return false to fall back to the regular getTransform().
// * @param File $image
// * @param String $script
// * @param array $paramsVar
// * @return boolean|ThumbnailImage
// */
// function getScriptedTransform($image, $script, $paramsVar) {
// return false;
// }
/**
* Get a MediaTransformOutput Object representing the transformed output. Does not
* actually do the transform.
*
* @param File $image The image Object
* @param String $dstPath Filesystem destination path
* @param String $dstUrl Destination URL to use in output HTML
* @param array $paramsVar Arbitrary set of parameters validated by $this->validateParam()
* @return MediaTransformOutput
*/
public XomwMediaTransformOutput getTransform(XomwFile image, byte[] dstPath, byte[] dstUrl, Xomw_params_handler handlerParams) {
return this.doTransform(image, dstPath, dstUrl, handlerParams, TRANSFORM_LATER);
}
/**
* Get a MediaTransformOutput Object representing the transformed output. Does the
* transform unless $flags contains self::TRANSFORM_LATER.
*
* @param File $image The image Object
* @param String $dstPath Filesystem destination path
* @param String $dstUrl Destination URL to use in output HTML
* @param array $paramsVar Arbitrary set of parameters validated by $this->validateParam()
* Note: These parameters have *not* gone through $this->normaliseParams()
* @param int $flags A bitfield, may contain self::TRANSFORM_LATER
* @return MediaTransformOutput
*/
public XomwMediaTransformOutput doTransform(XomwFile image, byte[] dstPath, byte[] dstUrl, Xomw_params_handler handlerParams) {return doTransform(image, dstPath, dstUrl, handlerParams, 0);}
public abstract XomwMediaTransformOutput doTransform(XomwFile image, byte[] dstPath, byte[] dstUrl, Xomw_params_handler handlerParams, int flags);
// /**
// * Get the thumbnail extension and MIME type for a given source MIME type
// *
// * @param String $ext Extension of original file
// * @param String $mime MIME type of original file
// * @param array $paramsVar Handler specific rendering parameters
// * @return array Thumbnail extension and MIME type
// */
// function getThumbType($ext, $mime, $paramsVar = null) {
// $magic = MimeMagic::singleton();
// if (!$ext || $magic->isMatchingExtension($ext, $mime) === false) {
// // The extension is not valid for this MIME type and we do
// // recognize the MIME type
// $extensions = $magic->getExtensionsForType($mime);
// if ($extensions) {
// return [ strtok($extensions, ' '), $mime ];
// }
// }
//
// // The extension is correct (true) or the MIME type is unknown to
// // MediaWiki (null)
// return [ $ext, $mime ];
// }
//
// /**
// * Get useful response headers for GET/HEAD requests for a file with the given metadata
// *
// * @param mixed $metadata Result of the getMetadata() function of this handler for a file
// * @return array
// */
// public function getStreamHeaders($metadata) {
// return [];
// }
/**
* True if the handled types can be transformed
*
* @param File $file
* @return boolean
*/
@gplx.Virtual public boolean canRender(XomwFile file) {
return true;
}
/**
* True if handled types cannot be displayed directly in a browser
* but can be rendered
*
* @param File $file
* @return boolean
*/
public boolean mustRender(XomwFile file) {
return false;
}
// /**
// * True if the type has multi-page capabilities
// *
// * @param File $file
// * @return boolean
// */
// public function isMultiPage($file) {
// return false;
// }
//
// /**
// * Page count for a multi-page document, false if unsupported or unknown
// *
// * @param File $file
// * @return boolean
// */
// function pageCount(File $file) {
// return false;
// }
//
// /**
// * The material is vectorized and thus scaling is lossless
// *
// * @param File $file
// * @return boolean
// */
// function isVectorized($file) {
// return false;
// }
//
// /**
// * The material is an image, and is animated.
// * In particular, video material need not return true.
// * @note Before 1.20, this was a method of ImageHandler only
// *
// * @param File $file
// * @return boolean
// */
// function isAnimatedImage($file) {
// return false;
// }
//
// /**
// * If the material is animated, we can animate the thumbnail
// * @since 1.20
// *
// * @param File $file
// * @return boolean If material is not animated, handler may return any value.
// */
// function canAnimateThumbnail($file) {
// return true;
// }
//
// /**
// * False if the handler is disabled for all files
// * @return boolean
// */
// function isEnabled() {
// return true;
// }
//
// /**
// * Get an associative array of page dimensions
// * Currently "width" and "height" are understood, but this might be
// * expanded in the future.
// * Returns false if unknown.
// *
// * It is expected that handlers for paged media (e.g. DjVuHandler)
// * will override this method so that it gives the correct results
// * for each specific page of the file, using the $page argument.
// *
// * @note For non-paged media, use getImageSize.
// *
// * @param File $image
// * @param int $page What page to get dimensions of
// * @return array|boolean
// */
// function getPageDimensions(File $image, $page) {
// $gis = $this->getImageSize($image, $image->getLocalRefPath());
// if ($gis) {
// return [
// 'width' => $gis[0],
// 'height' => $gis[1]
// ];
// } else {
// return false;
// }
// }
//
// /**
// * Generic getter for text layer.
// * Currently overloaded by PDF and DjVu handlers
// * @param File $image
// * @param int $page Page number to get information for
// * @return boolean|String Page text or false when no text found or if
// * unsupported.
// */
// function getPageText(File $image, $page) {
// return false;
// }
//
// /**
// * Get the text of the entire document.
// * @param File $file
// * @return boolean|String The text of the document or false if unsupported.
// */
// public function getEntireText(File $file) {
// $numPages = $file->pageCount();
// if (!$numPages) {
// // Not a multipage document
// return $this->getPageText($file, 1);
// }
// $document = '';
// for ($i = 1; $i <= $numPages; $i++) {
// $curPage = $this->getPageText($file, $i);
// if (is_string($curPage)) {
// $document .= $curPage . "\n";
// }
// }
// if ($document !== '') {
// return $document;
// }
// return false;
// }
//
// /**
// * Get an array structure that looks like this:
// *
// * [
// * 'visible' => [
// * 'Human-readable name' => 'Human readable value',
// * ...
// * ],
// * 'collapsed' => [
// * 'Human-readable name' => 'Human readable value',
// * ...
// * ]
// * ]
// * The UI will format this into a table where the visible fields are always
// * visible, and the collapsed fields are optionally visible.
// *
// * The function should return false if there is no metadata to display.
// */
//
// /**
// * @todo FIXME: This interface is not very flexible. The media handler
// * should generate HTML instead. It can do all the formatting according
// * to some standard. That makes it possible to do things like visual
// * indication of grouped and chained streams in ogg container files.
// * @param File $image
// * @param boolean|IContextSource $context Context to use (optional)
// * @return array|boolean
// */
// function formatMetadata($image, $context = false) {
// return false;
// }
//
// /** sorts the visible/invisible field.
// * Split off from ImageHandler::formatMetadata, as used by more than
// * one type of handler.
// *
// * This is used by the media handlers that use the FormatMetadata class
// *
// * @param array $metadataArray Metadata array
// * @param boolean|IContextSource $context Context to use (optional)
// * @return array Array for use displaying metadata.
// */
// function formatMetadataHelper($metadataArray, $context = false) {
// $result = [
// 'visible' => [],
// 'collapsed' => []
// ];
//
// $formatted = FormatMetadata::getFormattedData($metadataArray, $context);
// // Sort fields into visible and collapsed
// $visibleFields = $this->visibleMetadataFields();
// foreach ($formatted as $name => $value) {
// $tag = strtolower($name);
// self::addMeta($result,
// in_array($tag, $visibleFields) ? 'visible' : 'collapsed',
// 'exif',
// $tag,
// $value
// );
// }
//
// return $result;
// }
//
// /**
// * Get a list of metadata items which should be displayed when
// * the metadata table is collapsed.
// *
// * @return array Array of strings
// */
// protected function visibleMetadataFields() {
// return FormatMetadata::getVisibleFields();
// }
//
// /**
// * This is used to generate an array element for each metadata value
// * That array is then used to generate the table of metadata values
// * on the image page
// *
// * @param array &$array An array containing elements for each type of visibility
// * and each of those elements being an array of metadata items. This function adds
// * a value to that array.
// * @param String $visibility ('visible' or 'collapsed') if this value is hidden
// * by default.
// * @param String $type Type of metadata tag (currently always 'exif')
// * @param String $id The name of the metadata tag (like 'artist' for example).
// * its name in the table displayed is the message "$type-$id" (Ex exif-artist).
// * @param String $value Thingy goes into a wikitext table; it used to be escaped but
// * that was incompatible with previous practise of customized display
// * with wikitext formatting via messages such as 'exif-model-value'.
// * So the escaping is taken back out, but generally this seems a confusing
// * interface.
// * @param boolean|String $param Value to pass to the message for the name of the field
// * as $1. Currently this parameter doesn't seem to ever be used.
// *
// * Note, everything here is passed through the parser later on (!)
// */
// protected static function addMeta(&$array, $visibility, $type, $id, $value, $param = false) {
// $msg = wfMessage("$type-$id", $param);
// if ($msg->exists()) {
// $name = $msg->text();
// } else {
// // This is for future compatibility when using instant commons.
// // So as to not display as ugly a name if a new metadata
// // property is defined that we don't know about
// // (not a major issue since such a property would be collapsed
// // by default).
// wfDebug(__METHOD__ . ' Unknown metadata name: ' . $id . "\n");
// $name = wfEscapeWikiText($id);
// }
// $array[$visibility][] = [
// 'id' => "$type-$id",
// 'name' => $name,
// 'value' => $value
// ];
// }
//
// /**
// * Short description. Shown on Special:Search results.
// *
// * @param File $file
// * @return String
// */
// function getShortDesc($file) {
// return self::getGeneralShortDesc($file);
// }
//
// /**
// * Long description. Shown under image on image description page surounded by ().
// *
// * @param File $file
// * @return String
// */
// function getLongDesc($file) {
// return self::getGeneralLongDesc($file);
// }
//
// /**
// * Used instead of getShortDesc if there is no handler registered for file.
// *
// * @param File $file
// * @return String
// */
// static function getGeneralShortDesc($file) {
// global $wgLang;
//
// return htmlspecialchars($wgLang->formatSize($file->getSize()));
// }
//
// /**
// * Used instead of getLongDesc if there is no handler registered for file.
// *
// * @param File $file
// * @return String
// */
// static function getGeneralLongDesc($file) {
// return wfMessage('file-info')->sizeParams($file->getSize())
// ->paramsVar('<span class="mime-type">' . $file->getMimeType() . '</span>')->parse();
// }
/**
* Calculate the largest thumbnail width for a given original file size
* such that the thumbnail's height is at most $maxHeight.
* @param int $boxWidth Width of the thumbnail box.
* @param int $boxHeight Height of the thumbnail box.
* @param int $maxHeight Maximum height expected for the thumbnail.
* @return int
*/
public static int fitBoxWidth(int boxWidth, int boxHeight, int maxHeight) {
double idealWidth = boxWidth * maxHeight / boxHeight;
int roundedUp = Math_.Ceil_as_int(idealWidth);
if (Math_.Round(roundedUp * boxHeight / boxWidth, 0) > maxHeight) {
return Math_.Floor_as_int(idealWidth);
} else {
return roundedUp;
}
}
// /**
// * Shown in file history box on image description page.
// *
// * @param File $file
// * @return String Dimensions
// */
// function getDimensionsString($file) {
// return '';
// }
//
// /**
// * Modify the parser Object post-transform.
// *
// * This is often used to do $parser->addOutputHook(),
// * in order to add some javascript to render a viewer.
// * See TimedMediaHandler or OggHandler for an example.
// *
// * @param Parser $parser
// * @param File $file
// */
// function parserTransformHook($parser, $file) {
// }
//
// /**
// * File validation hook called on upload.
// *
// * If the file at the given local path is not valid, or its MIME type does not
// * match the handler class, a Status Object should be returned containing
// * relevant errors.
// *
// * @param String $fileName The local path to the file.
// * @return Status
// */
// function verifyUpload($fileName) {
// return Status::newGood();
// }
//
// /**
// * Check for zero-sized thumbnails. These can be generated when
// * no disk space is available or some other error occurs
// *
// * @param String $dstPath The location of the suspect file
// * @param int $retval Return value of some shell process, file will be deleted if this is non-zero
// * @return boolean True if removed, false otherwise
// */
// function removeBadFile($dstPath, $retval = 0) {
// if (file_exists($dstPath)) {
// $thumbstat = stat($dstPath);
// if ($thumbstat['size'] == 0 || $retval != 0) {
// $result = unlink($dstPath);
//
// if ($result) {
// wfDebugLog('thumbnail',
// sprintf('Removing bad %d-byte thumbnail "%s". unlink() succeeded',
// $thumbstat['size'], $dstPath));
// } else {
// wfDebugLog('thumbnail',
// sprintf('Removing bad %d-byte thumbnail "%s". unlink() failed',
// $thumbstat['size'], $dstPath));
// }
//
// return true;
// }
// }
//
// return false;
// }
//
// /**
// * Remove files from the purge list.
// *
// * This is used by some video handlers to prevent ?action=purge
// * from removing a transcoded video, which is expensive to
// * regenerate.
// *
// * @see LocalFile::purgeThumbnails
// *
// * @param array $files
// * @param array $options Purge options. Currently will always be
// * an array with a single key 'forThumbRefresh' set to true.
// */
// public function filterThumbnailPurgeList(&$files, $options) {
// // Do nothing
// }
//
// /**
// * True if the handler can rotate the media
// * @since 1.24 non-static. From 1.21-1.23 was static
// * @return boolean
// */
// public function canRotate() {
// return false;
// }
//
// /**
// * On supporting image formats, try to read out the low-level orientation
// * of the file and return the angle that the file needs to be rotated to
// * be viewed.
// *
// * This information is only useful when manipulating the original file;
// * the width and height we normally work with is logical, and will match
// * any produced output views.
// *
// * For files we don't know, we return 0.
// *
// * @param File $file
// * @return int 0, 90, 180 or 270
// */
// public function getRotation($file) {
// return 0;
// }
//
// /**
// * Log an error that occurred in an external process
// *
// * Moved from BitmapHandler to MediaHandler with MediaWiki 1.23
// *
// * @since 1.23
// * @param int $retval
// * @param String $err Error reported by command. Anything longer than
// * MediaHandler::MAX_ERR_LOG_SIZE is stripped off.
// * @param String $cmd
// */
// protected function logErrorForExternalProcess($retval, $err, $cmd) {
// # Keep error output limited (bug 57985)
// $errMessage = trim(substr($err, 0, self::MAX_ERR_LOG_SIZE));
//
// wfDebugLog('thumbnail',
// sprintf('thumbnail failed on %s: error %d "%s" from "%s"',
// wfHostname(), $retval, $errMessage, $cmd));
// }
//
// /**
// * Get list of languages file can be viewed in.
// *
// * @param File $file
// * @return String[] Array of language codes, or empty array if unsupported.
// * @since 1.23
// */
// public function getAvailableLanguages(File $file) {
// return [];
// }
//
// /**
// * On file types that support renderings in multiple languages,
// * which language is used by default if unspecified.
// *
// * If getAvailableLanguages returns a non-empty array, this must return
// * a valid language code. Otherwise can return null if files of this
// * type do not support alternative language renderings.
// *
// * @param File $file
// * @return String|null Language code or null if multi-language not supported for filetype.
// * @since 1.23
// */
// public function getDefaultRenderLanguage(File $file) {
// return null;
// }
//
// /**
// * If its an audio file, return the length of the file. Otherwise 0.
// *
// * File::getLength() existed for a long time, but was calling a method
// * that only existed in some subclasses of this class (The TMH ones).
// *
// * @param File $file
// * @return float length in seconds
// * @since 1.23
// */
// public function getLength($file) {
// return 0.0;
// }
//
// /**
// * True if creating thumbnails from the file is large or otherwise resource-intensive.
// * @param File $file
// * @return boolean
// */
// public function isExpensiveToThumbnail($file) {
// return false;
// }
//
// /**
// * Returns whether or not this handler supports the chained generation of thumbnails according
// * to buckets
// * @return boolean
// * @since 1.24
// */
// public function supportsBucketing() {
// return false;
// }
//
// /**
// * Returns a normalised paramsVar array for which parameters have been cleaned up for bucketing
// * purposes
// * @param array $paramsVar
// * @return array
// */
// public function sanitizeParamsForBucketing($paramsVar) {
// return $paramsVar;
// }
//
// /**
// * Gets configuration for the file warning message. Return value of
// * the following structure:
// * [
// * // Required, module with messages loaded for the client
// * 'module' => 'example.filewarning.messages',
// * // Required, array of names of messages
// * 'messages' => [
// * // Required, main warning message
// * 'main' => 'example-filewarning-main',
// * // Optional, header for warning dialog
// * 'header' => 'example-filewarning-header',
// * // Optional, footer for warning dialog
// * 'footer' => 'example-filewarning-footer',
// * // Optional, text for more-information link (see below)
// * 'info' => 'example-filewarning-info',
// * ],
// * // Optional, link for more information
// * 'link' => 'http://example.com',
// * ]
// *
// * Returns null if no warning is necessary.
// * @param File $file
// * @return array|null
// */
// public function getWarningConfig($file) {
// return null;
// }
}

View File

@@ -13,3 +13,49 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
// XO.MW:MW has registry and instance cache; XO only has instance
// XO.MW:SYNC:1.29; DATE:2017-02-05
public class XomwMediaHandlerFactory {
private final Hash_adp_bry handlers = Hash_adp_bry.cs();
// XO.MW:SYNC:1.29; DATE:2017-02-05
public XomwMediaHandlerFactory() {
// Default, MediaWiki core media handlers
// 'image/jpeg' => JpegHandler::class,
handlers.Add(Mime__image__png, new XomwTransformationalImageHandler(Mime__image__png)); // PngHandler
// 'image/gif' => GIFHandler::class,
// 'image/tiff' => TiffHandler::class,
// 'image/webp' => WebPHandler::class,
// 'image/x-ms-bmp' => BmpHandler::class,
// 'image/x-bmp' => BmpHandler::class,
// 'image/x-xcf' => XCFHandler::class,
// 'image/svg+xml' => SvgHandler::class, // official
// 'image/svg' => SvgHandler::class, // compat
// 'image/vnd.djvu' => DjVuHandler::class, // official
// 'image/x.djvu' => DjVuHandler::class, // compat
// 'image/x-djvu' => DjVuHandler::class, // compat
}
// XO.MW:SYNC:1.29; DATE:2017-02-05
public XomwMediaHandler getHandler(byte[] type) {
return (XomwMediaHandler)handlers.Get_by(type);
}
public static byte[]
Mime__image__jpeg = Bry_.new_a7("image/jpeg")
, Mime__image__png = Bry_.new_a7("image/png")
, Mime__image__gif = Bry_.new_a7("image/gif")
, Mime__image__tiff = Bry_.new_a7("image/tiff")
, Mime__image__webp = Bry_.new_a7("image/webp")
, Mime__image__x_ms_bmp = Bry_.new_a7("image/x-ms-bmp")
, Mime__image__x_bmp = Bry_.new_a7("image/x-bmp")
, Mime__image__x_xcf = Bry_.new_a7("image/x-xcf")
, Mime__image__svg_xml = Bry_.new_a7("image/svg+xml")
, Mime__image__svg = Bry_.new_a7("image/svg")
, Mime__image__vnd_djvu = Bry_.new_a7("image/vnd.djvu")
, Mime__image__x_djvu_dot = Bry_.new_a7("image/x.djvu")
, Mime__image__x_djvu_dash = Bry_.new_a7("image/x-djvu")
;
}

View File

@@ -13,3 +13,266 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.langs.htmls.*;
import gplx.xowa.mediawiki.includes.parsers.lnkis.*;
import gplx.xowa.mediawiki.includes.filerepo.file.*;
public abstract class XomwMediaTransformOutput {
public XomwMediaTransformOutput(XomwFile file, byte[] url, byte[] path, int width, int height) {
this.file = file;
this.url = url;
this.width = width;
this.height = height;
}
// /** @var array Associative array mapping optional supplementary image files
// * from pixel density (eg 1.5 or 2) to additional URLs.
// */
// public $responsiveUrls = [];
/** @var File */
private final XomwFile file;
/** @var int Image width */
protected final int width;
/** @var int Image height */
protected final int height;
/** @var String URL path to the thumb */
protected final byte[] url;
// /** @var boolean|String */
// protected $page;
//
// /** @var boolean|String Filesystem path to the thumb */
// protected $path;
//
// /** @var boolean|String Language code, false if not set */
// protected $lang;
//
// /** @var boolean|String Permanent storage path */
// protected $storagePath = false;
/**
* @return int Width of the output box
*/
public int getWidth() {
return this.width;
}
/**
* @return int Height of the output box
*/
public int getHeight() {
return this.height;
}
// /**
// * @return File
// */
// public function getFile() {
// return $this->file;
// }
//
// /**
// * Get the final extension of the thumbnail.
// * Returns false for scripted transformations.
// * @return String|boolean
// */
// public function getExtension() {
// return $this->path ? FileBackend::extensionFromPath( $this->path ) : false;
// }
//
// /**
// * @return String|boolean The thumbnail URL
// */
// public function getUrl() {
// return $this->url;
// }
//
// /**
// * @return String|boolean The permanent thumbnail storage path
// */
// public function getStoragePath() {
// return $this->storagePath;
// }
//
// /**
// * @param String $storagePath The permanent storage path
// * @return void
// */
// public function setStoragePath( $storagePath ) {
// $this->storagePath = $storagePath;
// if ( $this->path === false ) {
// $this->path = $storagePath;
// }
// }
/**
* Fetch HTML for this transform output
*
* @param array $options Associative array of options. Boolean options
* should be indicated with a value of true for true, and false or
* absent for false.
*
* alt Alternate text or caption
* desc-link Boolean, show a description link
* file-link Boolean, show a file download link
* custom-url-link Custom URL to link to
* custom-title-link Custom Title Object to link to
* valign vertical-align property, if the output is an inline element
* img-class Class applied to the "<img>" tag, if there is such a tag
*
* For images, desc-link and file-link are implemented as a click-through. For
* sounds and videos, they may be displayed in other ways.
*
* @return String
*/
public abstract void toHtml(Bry_bfr bfr, Bry_bfr tmp, Xomw_params_mto options);
// /**
// * This will be overridden to return true in error classes
// * @return boolean
// */
// public function isError() {
// return false;
// }
//
// /**
// * Check if an output thumbnail file actually exists.
// *
// * This will return false if there was an error, the
// * thumbnail is to be handled client-side only, or if
// * transformation was deferred via TRANSFORM_LATER.
// * This file may exist as a new file in /tmp, a file
// * in permanent storage, or even refer to the original.
// *
// * @return boolean
// */
// public function hasFile() {
// // If TRANSFORM_LATER, $this->path will be false.
// // Note: a null path means "use the source file".
// return ( !$this->isError() && ( $this->path || $this->path === null ) );
// }
//
// /**
// * Check if the output thumbnail is the same as the source.
// * This can occur if the requested width was bigger than the source.
// *
// * @return boolean
// */
// public function fileIsSource() {
// return ( !$this->isError() && $this->path === null );
// }
//
// /**
// * Get the path of a file system copy of the thumbnail.
// * Callers should never write to this path.
// *
// * @return String|boolean Returns false if there isn't one
// */
// public function getLocalCopyPath() {
// if ( $this->isError() ) {
// return false;
// } elseif ( $this->path === null ) {
// return $this->file->getLocalRefPath(); // assume thumb was not scaled
// } elseif ( FileBackend::isStoragePath( $this->path ) ) {
// $be = $this->file->getRepo()->getBackend();
// // The temp file will be process cached by FileBackend
// $fsFile = $be->getLocalReference( [ 'src' => $this->path ] );
//
// return $fsFile ? $fsFile->getPath() : false;
// } else {
// return $this->path; // may return false
// }
// }
//
// /**
// * Stream the file if there were no errors
// *
// * @param array $headers Additional HTTP headers to send on success
// * @return Status
// * @since 1.27
// */
// public function streamFileWithStatus( $headers = [] ) {
// if ( !$this->path ) {
// return Status::newFatal( 'backend-fail-stream', '<no path>' );
// } elseif ( FileBackend::isStoragePath( $this->path ) ) {
// $be = $this->file->getRepo()->getBackend();
// return $be->streamFile( [ 'src' => $this->path, 'headers' => $headers ] );
// } else { // FS-file
// $success = StreamFile::stream( $this->getLocalCopyPath(), $headers );
// return $success ? Status::newGood() : Status::newFatal( 'backend-fail-stream', $this->path );
// }
// }
//
// /**
// * Stream the file if there were no errors
// *
// * @deprecated since 1.26, use streamFileWithStatus
// * @param array $headers Additional HTTP headers to send on success
// * @return boolean Success
// */
// public function streamFile( $headers = [] ) {
// $this->streamFileWithStatus( $headers )->isOK();
// }
//
// /**
// * Wrap some XHTML text in an anchor tag with the given attributes
// *
// * @param array $linkAttribs
// * @param String $contents
// * @return String
// */
// protected function linkWrap( $linkAttribs, $contents ) {
// if ( $linkAttribs ) {
// return Xml::tags( 'a', $linkAttribs, $contents );
// } else {
// return $contents;
// }
// }
/**
* @param String $title
* @param String|array $prms Query parameters to add
* @return array
*/
public void getDescLinkAttribs(List_adp attribs, byte[] title, List_adp prms) {
byte[] query = Bry_.Empty;
// if ( is_array( prms ) ) {
// $query = prms;
// } else {
// $query = [];
// }
// if ( $this->page && $this->page !== 1 ) {
// $query['page'] = $this->page;
// }
// if ( $this->lang ) {
// $query['lang'] = $this->lang;
// }
//
// if ( is_string( prms ) && prms !== '' ) {
// $query = prms . '&' . wfArrayToCgi( $query );
// }
attribs.Clear();
attribs.Add_many(Gfh_atr_.Bry__href, this.file.getTitle().getLocalURL(query));
attribs.Add_many(Gfh_atr_.Bry__class, Bry__class__image);
if (title != null) {
attribs.Add_many(Gfh_atr_.Bry__title, title);
}
}
// Wrap some XHTML text in an anchor tag with the given attributes
// XO.MW:SYNC:1.29; DATE:2017-02-03
protected void Link_wrap(Bry_bfr bfr, List_adp link_attribs, byte[] contents) {
if (link_attribs != null) {
XomwXml.Tags(bfr, Gfh_tag_.Bry__a, link_attribs, contents);
}
else {
bfr.Add(contents);
}
}
private static final byte[] Bry__class__image = Bry_.new_a7("image");
}

View File

@@ -13,3 +13,199 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.langs.htmls.*;
import gplx.xowa.mediawiki.includes.parsers.lnkis.*;
import gplx.xowa.mediawiki.includes.filerepo.file.*;
// Media transform output for images
public class XomwThumbnailImage extends XomwMediaTransformOutput { private final List_adp attribs = List_adp_.New(), link_attribs = List_adp_.New();
public XomwThumbnailImage(XomwFile file, byte[] url, byte[] path, int w, int h) {super(file, url, path, w, h);
}
/**
* Get a thumbnail Object from a file and parameters.
* If path is set to null, the output file is treated as a source copy.
* If path is set to false, no output file will be created.
* parameters should include, as a minimum, (file) 'width' and 'height'.
* It may also include a 'page' parameter for multipage files.
*
* @param File file
* @param String url URL path to the thumb
* @param String|boolean path Filesystem path to the thumb
* @param array parameters Associative array of parameters
*/
public XomwThumbnailImage(XomwFile file, byte[] url, byte[] path, Xomw_params_handler parameters) {super(file, url, path, parameters.width, parameters.height);
// defaults = [
// 'page' => false,
// 'lang' => false
// ];
//
// if (is_array(parameters)) {
// actualParams = parameters + defaults;
// } else {
// // Using old format, should convert. Later a warning could be added here.
// numArgs = func_num_args();
// actualParams = [
// 'width' => path,
// 'height' => parameters,
// 'page' => (numArgs > 5) ? func_get_arg(5) : false
// ] + defaults;
// path = (numArgs > 4) ? func_get_arg(4) : false;
// }
// this->file = file;
// this->url = url;
// this->path = path;
// These should be integers when they get here.
// If not, there's a bug somewhere. But let's at
// least produce valid HTML code regardless.
// this->width = round(actualParams['width']);
// this->height = round(actualParams['height']);
// this->page = actualParams['page'];
// this->lang = actualParams['lang'];
}
/**
* Return HTML <img ... /> tag for the thumbnail, will include
* width and height attributes and a blank alt text (as required).
*
* @param array options Associative array of options. Boolean options
* should be indicated with a value of true for true, and false or
* absent for false.
*
* alt HTML alt attribute
* title HTML title attribute
* desc-link Boolean, show a description link
* file-link Boolean, show a file download link
* valign vertical-align property, if the output is an inline element
* img-class Class applied to the \<img\> tag, if there is such a tag
* desc-query String, description link query prms
* @Override width Override width attribute. Should generally not set
* @Override height Override height attribute. Should generally not set
* no-dimensions Boolean, skip width and height attributes (useful if
* set in CSS)
* custom-url-link Custom URL to link to
* custom-title-link Custom Title Object to link to
* custom target-link Value of the target attribute, for custom-target-link
* parser-extlink-* Attributes added by parser for external links:
* parser-extlink-rel: add rel="nofollow"
* parser-extlink-target: link target, but overridden by custom-target-link
*
* For images, desc-link and file-link are implemented as a click-through. For
* sounds and videos, they may be displayed in other ways.
*
* @throws MWException
* @return String
*/
// Return HTML <img ... /> tag for the thumbnail, will include
// width and height attributes and a blank alt text (as required).
//
// @param array options Associative array of options. Boolean options
// should be indicated with a value of true for true, and false or
// absent for false.
//
// alt HTML alt attribute
// title HTML title attribute
// desc-link Boolean, show a description link
// file-link Boolean, show a file download link
// valign vertical-align property, if the output is an inline element
// img-class Class applied to the \<img\> tag, if there is such a tag
// desc-query String, description link query prms
// override-width Override width attribute. Should generally not set
// override-height Override height attribute. Should generally not set
// no-dimensions Boolean, skip width and height attributes (useful if
// set in CSS)
// custom-url-link Custom URL to link to
// custom-title-link Custom Title Object to link to
// custom target-link Value of the target attribute, for custom-target-link
// parser-extlink-* Attributes added by parser for external links:
// parser-extlink-rel: add rel="nofollow"
// parser-extlink-target: link target, but overridden by custom-target-link
//
// For images, desc-link and file-link are implemented as a click-through. For
// sounds and videos, they may be displayed in other ways.
// XO.MW:SYNC:1.29; DATE:2017-02-03
@Override public void toHtml(Bry_bfr bfr, Bry_bfr tmp, Xomw_params_mto options) {
byte[] alt = options.alt;
// byte[] query = options.desc_query;
attribs.Clear();
attribs.Add_many(Gfh_atr_.Bry__alt, alt);
attribs.Add_many(Gfh_atr_.Bry__src, url);
boolean link_attribs_is_null = false;
if (!XophpUtility.empty(options.custom_url_link)) {
link_attribs.Clear();
link_attribs.Add_many(Gfh_atr_.Bry__href, options.custom_url_link);
if (!XophpUtility.empty(options.title)) {
link_attribs.Add_many(Gfh_atr_.Bry__title, options.title);
}
if (XophpUtility.empty(options.custom_target_link)) {
link_attribs.Add_many(Gfh_atr_.Bry__target, options.custom_target_link);
}
else if (XophpUtility.empty(options.parser_extlink_target)) {
link_attribs.Add_many(Gfh_atr_.Bry__target, options.parser_extlink_target);
}
if (XophpUtility.empty(options.parser_extlink_rel)) {
link_attribs.Add_many(Gfh_atr_.Bry__rel, options.parser_extlink_rel);
}
}
else if (!XophpUtility.empty(options.custom_title_link)) {
// byte[] title = options.custom_title_link;
// link_attribs.Clear();
// link_attribs.Add_many(Gfh_atr_.Bry__href, title.Get_link_url());
// byte[] options_title = options.title;
// link_attribs.Add_many(Gfh_atr_.Bry__title, XophpUtility.empty(options_title) ? title.Get_full_text() : options_title);
}
else if (!XophpUtility.empty(options.desc_link)) {
// link_attribs = this.getDescLinkAttribs(
// empty(options['title']) ? null : options['title'],
// $query
// );
link_attribs.Clear();
this.getDescLinkAttribs(link_attribs,
XophpUtility.empty(options.title) ? null : options.title,
null);
}
else if (!XophpUtility.empty(options.file_link)) {
// link_attribs.Clear();
// link_attribs.Add_many(Gfh_atr_.Bry__href, file.Get_url());
}
else {
link_attribs_is_null = true;
if (!XophpUtility.empty(options.title)) {
attribs.Add_many(Gfh_atr_.Bry__title, options.title);
}
}
if (XophpUtility.empty(options.no_dimensions)) {
attribs.Add_many(Gfh_atr_.Bry__width, Int_.To_bry(width));
attribs.Add_many(Gfh_atr_.Bry__height, Int_.To_bry(height));
}
if (!XophpUtility.empty(options.valign)) {
attribs.Add_many(Gfh_atr_.Bry__style, Bry_.Add(Bry__vertical_align, options.valign));
}
if (!XophpUtility.empty(options.img_cls)) {
attribs.Add_many(Gfh_atr_.Bry__class, options.img_cls);
}
if (XophpUtility.isset(options.override_height)) {
attribs.Add_many(Gfh_atr_.Bry__class, options.override_height);
}
if (XophpUtility.isset(options.override_width)) {
attribs.Add_many(Gfh_atr_.Bry__width, options.override_height);
}
// Additional densities for responsive images, if specified.
// If any of these urls is the same as src url, it'll be excluded.
// $responsiveUrls = array_diff(this.responsiveUrls, [ this.url ]);
// if (!XophpUtility.empty($responsiveUrls)) {
// $attribs['srcset'] = Html::srcSet($responsiveUrls);
// }
// XO.MW.HOOK:ThumbnailBeforeProduceHTML
XomwXml.Element(tmp, Gfh_tag_.Bry__img, attribs, Bry_.Empty, Bool_.Y);
Link_wrap(bfr, link_attribs_is_null ? null : link_attribs, tmp.To_bry_and_clear());
}
private static final byte[] Bry__vertical_align = Bry_.new_a7("vertical-align: ");
}

View File

@@ -13,3 +13,597 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.xowa.mediawiki.includes.filerepo.file.*;
import gplx.xowa.mediawiki.includes.parsers.lnkis.*;
public class XomwTransformationalImageHandler extends XomwImageHandler { public XomwTransformationalImageHandler(byte[] key) {super(key);
}
/**
* @param File image
* @param array paramsVar Transform parameters. Entries with the keys 'width'
* and 'height' are the respective screen width and height, while the keys
* 'physicalWidth' and 'physicalHeight' indicate the thumbnail dimensions.
* @return boolean
*/
@Override public boolean normaliseParams(XomwFile image, Xomw_params_handler prms) {
if (!super.normaliseParams(image, prms)) {
return false;
}
// Obtain the source, pre-rotation dimensions
int srcWidth = image.getWidth(prms.page);
int srcHeight = image.getHeight(prms.page);
// Don't make an image bigger than the source
if (prms.physicalWidth >= srcWidth) {
prms.physicalWidth = srcWidth;
prms.physicalHeight = srcHeight;
// Skip scaling limit checks if no scaling is required
// due to requested size being bigger than source.
if (!image.mustRender()) {
return true;
}
}
return true;
}
// /**
// * Extracts the width/height if the image will be scaled before rotating
// *
// * This will match the physical size/aspect ratio of the original image
// * prior to application of the rotation -- so for a portrait image that's
// * stored as raw landscape with 90-degress rotation, the resulting size
// * will be wider than it is tall.
// *
// * @param array paramsVar Parameters as returned by normaliseParams
// * @param int rotation The rotation angle that will be applied
// * @return array (width, height) array
// */
// public function extractPreRotationDimensions(paramsVar, rotation) {
// if (rotation == 90 || rotation == 270) {
// // We'll resize before rotation, so swap the dimensions again
// width = paramsVar['physicalHeight'];
// height = paramsVar['physicalWidth'];
// } else {
// width = paramsVar['physicalWidth'];
// height = paramsVar['physicalHeight'];
// }
//
// return [ width, height ];
// }
//
/**
* Create a thumbnail.
*
* This sets up various parameters, and then calls a helper method
* based on this.getScalerType in order to scale the image.
*
* @param File image
* @param String dstPath
* @param String dstUrl
* @param array paramsVar
* @param int flags
* @return MediaTransformError|ThumbnailImage|TransformParameterError
*/
@Override public XomwMediaTransformOutput doTransform(XomwFile image, byte[] dstPath, byte[] dstUrl, Xomw_params_handler prms, int flags) {
// if (!this.normaliseParams(image, paramsVar)) {
// return new TransformParameterError(paramsVar);
// }
//
// // Create a parameter array to pass to the scaler
Xomw_params_scalar scalerParams = new Xomw_params_scalar();
// // The size to which the image will be resized
scalerParams.physicalWidth = prms.physicalWidth;
scalerParams.physicalHeight = prms.physicalHeight;
// 'physicalDimensions' => "{paramsVar['physicalWidth']}x{paramsVar['physicalHeight']}",
// The size of the image on the page
scalerParams.clientWidth = prms.width;
scalerParams.clientHeight = prms.height;
// Comment as will be added to the Exif of the thumbnail
// 'comment' => isset(paramsVar['descriptionUrl'])
// ? "File source: {paramsVar['descriptionUrl']}"
// : '',
// Properties of the original image
scalerParams.srcWidth = image.getWidth();
scalerParams.srcHeight = image.getHeight();
scalerParams.mimeType = image.getMimeType();
scalerParams.dstPath = dstPath;
scalerParams.dstUrl = dstUrl;
// 'interlace' => isset(paramsVar['interlace']) ? paramsVar['interlace'] : false,
// if (isset(paramsVar['quality']) && paramsVar['quality'] === 'low') {
// scalerParams['quality'] = 30;
// }
// For subclasses that might be paged.
// if (image.isMultipage() && isset(paramsVar['page'])) {
// scalerParams['page'] = intval(paramsVar['page']);
// }
// Determine scaler type
// scaler = this.getScalerType(dstPath);
//
// if (is_array(scaler)) {
// scalerName = get_class(scaler[0]);
// } else {
// scalerName = scaler;
// }
//
// wfDebug(__METHOD__ . ": creating {scalerParams['physicalDimensions']} " .
// "thumbnail at dstPath using scaler scalerName\n");
if (!image.mustRender() &&
scalerParams.physicalWidth == scalerParams.srcWidth
&& scalerParams.physicalHeight == scalerParams.srcHeight
// && !isset(scalerParams['quality'])
) {
// normaliseParams (or the user) wants us to return the unscaled image
// wfDebug(__METHOD__ . ": returning unscaled image\n");
return this.getClientScalingThumbnailImage(image, scalerParams);
}
// if (scaler == 'client') {
// // Client-side image scaling, use the source URL
// // Using the destination URL in a TRANSFORM_LATER request would be incorrect
// return this.getClientScalingThumbnailImage(image, scalerParams);
// }
//
// if (image.isTransformedLocally() && !this.isImageAreaOkForThumbnaling(image, paramsVar)) {
// global wgMaxImageArea;
// return new TransformTooBigImageAreaError(paramsVar, wgMaxImageArea);
// }
//
// if (flags & self::TRANSFORM_LATER) {
// wfDebug(__METHOD__ . ": Transforming later per flags.\n");
// newParams = [
// 'width' => scalerParams['clientWidth'],
// 'height' => scalerParams['clientHeight']
// ];
// if (isset(paramsVar['quality'])) {
// newParams['quality'] = paramsVar['quality'];
// }
// if (isset(paramsVar['page']) && paramsVar['page']) {
// newParams['page'] = paramsVar['page'];
// }
// return new XomwThumbnailImage(image, dstUrl, null, newParams);
return new XomwThumbnailImage(image, dstUrl, null, prms);
// }
//
// // Try to make a target path for the thumbnail
// if (!wfMkdirParents(dirname(dstPath), null, __METHOD__)) {
// wfDebug(__METHOD__ . ": Unable to create thumbnail destination " .
// "directory, falling back to client scaling\n");
//
// return this.getClientScalingThumbnailImage(image, scalerParams);
// }
//
// // Transform functions and binaries need a FS source file
// thumbnailSource = this.getThumbnailSource(image, paramsVar);
//
// // If the source isn't the original, disable EXIF rotation because it's already been applied
// if (scalerParams['srcWidth'] != thumbnailSource['width']
// || scalerParams['srcHeight'] != thumbnailSource['height']) {
// scalerParams['disableRotation'] = true;
// }
//
// scalerParams['srcPath'] = thumbnailSource['path'];
// scalerParams['srcWidth'] = thumbnailSource['width'];
// scalerParams['srcHeight'] = thumbnailSource['height'];
//
// if (scalerParams['srcPath'] === false) { // Failed to get local copy
// wfDebugLog('thumbnail',
// sprintf('Thumbnail failed on %s: could not get local copy of "%s"',
// wfHostname(), image.getName()));
//
// return new MediaTransformError('thumbnail_error',
// scalerParams['clientWidth'], scalerParams['clientHeight'],
// wfMessage('filemissing')
// );
// }
//
// // Try a hook. Called "Bitmap" for historical reasons.
// /** @var mto MediaTransformOutput */
// mto = null;
// Hooks::run('BitmapHandlerTransform', [ this, image, &scalerParams, &mto ]);
// if (!is_null(mto)) {
// wfDebug(__METHOD__ . ": Hook to BitmapHandlerTransform created an mto\n");
// scaler = 'hookaborted';
// }
//
// // scaler will return a MediaTransformError on failure, or false on success.
// // If the scaler is succesful, it will have created a thumbnail at the destination
// // path.
// if (is_array(scaler) && is_callable(scaler)) {
// // Allow subclasses to specify their own rendering methods.
// err = call_user_func(scaler, image, scalerParams);
// } else {
// switch (scaler) {
// case 'hookaborted':
// // Handled by the hook above
// err = mto.isError() ? mto : false;
// break;
// case 'im':
// err = this.transformImageMagick(image, scalerParams);
// break;
// case 'custom':
// err = this.transformCustom(image, scalerParams);
// break;
// case 'imext':
// err = this.transformImageMagickExt(image, scalerParams);
// break;
// case 'gd':
// default:
// err = this.transformGd(image, scalerParams);
// break;
// }
// }
//
// // Remove the file if a zero-byte thumbnail was created, or if there was an error
// removed = this.removeBadFile(dstPath, (boolean)err);
// if (err) {
// // transform returned MediaTransforError
// return err;
// } elseif (removed) {
// // Thumbnail was zero-byte and had to be removed
// return new MediaTransformError('thumbnail_error',
// scalerParams['clientWidth'], scalerParams['clientHeight'],
// wfMessage('unknown-error')
// );
// } elseif (mto) {
// return mto;
// } else {
// newParams = [
// 'width' => scalerParams['clientWidth'],
// 'height' => scalerParams['clientHeight']
// ];
// if (isset(paramsVar['quality'])) {
// newParams['quality'] = paramsVar['quality'];
// }
// if (isset(paramsVar['page']) && paramsVar['page']) {
// newParams['page'] = paramsVar['page'];
// }
// return new ThumbnailImage(image, dstUrl, dstPath, newParams);
// }
// return null;
}
// /**
// * Get the source file for the transform
// *
// * @param File file
// * @param array paramsVar
// * @return array Array with keys width, height and path.
// */
// protected function getThumbnailSource(file, paramsVar) {
// return file.getThumbnailSource(paramsVar);
// }
//
// /**
// * Returns what sort of scaler type should be used.
// *
// * Values can be one of client, im, custom, gd, imext, or an array
// * of Object, method-name to call that specific method.
// *
// * If specifying a custom scaler command with [ Obj, method ],
// * the method in question should take 2 parameters, a File Object,
// * and a scalerParams array with various options (See doTransform
// * for what is in scalerParams). On error it should return a
// * MediaTransformError Object. On success it should return false,
// * and simply make sure the thumbnail file is located at
// * scalerParams['dstPath'].
// *
// * If there is a problem with the output path, it returns "client"
// * to do client side scaling.
// *
// * @param String dstPath
// * @param boolean checkDstPath Check that dstPath is valid
// * @return String|Callable One of client, im, custom, gd, imext, or a Callable array.
// */
// abstract protected function getScalerType(dstPath, checkDstPath = true);
/**
* Get a ThumbnailImage that respresents an image that will be scaled
* client side
*
* @param File image File associated with this thumbnail
* @param array scalerParams Array with scaler paramsVar
* @return ThumbnailImage
*
* @todo FIXME: No rotation support
*/
private XomwThumbnailImage getClientScalingThumbnailImage(XomwFile image, Xomw_params_scalar scalerParams) {
Xomw_params_handler prms = new Xomw_params_handler();
prms.width = scalerParams.clientWidth;
prms.height = scalerParams.clientHeight;
return new XomwThumbnailImage(image, image.getUrl(), null, prms);
}
// /**
// * Transform an image using ImageMagick
// *
// * This is a stub method. The real method is in BitmapHander.
// *
// * @param File image File associated with this thumbnail
// * @param array paramsVar Array with scaler paramsVar
// *
// * @return MediaTransformError Error Object if error occurred, false (=no error) otherwise
// */
// protected function transformImageMagick(image, paramsVar) {
// return this.getMediaTransformError(paramsVar, "Unimplemented");
// }
//
// /**
// * Transform an image using the Imagick PHP extension
// *
// * This is a stub method. The real method is in BitmapHander.
// *
// * @param File image File associated with this thumbnail
// * @param array paramsVar Array with scaler paramsVar
// *
// * @return MediaTransformError Error Object if error occurred, false (=no error) otherwise
// */
// protected function transformImageMagickExt(image, paramsVar) {
// return this.getMediaTransformError(paramsVar, "Unimplemented");
// }
//
// /**
// * Transform an image using a custom command
// *
// * This is a stub method. The real method is in BitmapHander.
// *
// * @param File image File associated with this thumbnail
// * @param array paramsVar Array with scaler paramsVar
// *
// * @return MediaTransformError Error Object if error occurred, false (=no error) otherwise
// */
// protected function transformCustom(image, paramsVar) {
// return this.getMediaTransformError(paramsVar, "Unimplemented");
// }
//
// /**
// * Get a MediaTransformError with error 'thumbnail_error'
// *
// * @param array paramsVar Parameter array as passed to the transform* functions
// * @param String errMsg Error message
// * @return MediaTransformError
// */
// public function getMediaTransformError(paramsVar, errMsg) {
// return new MediaTransformError('thumbnail_error', paramsVar['clientWidth'],
// paramsVar['clientHeight'], errMsg);
// }
//
// /**
// * Transform an image using the built in GD library
// *
// * This is a stub method. The real method is in BitmapHander.
// *
// * @param File image File associated with this thumbnail
// * @param array paramsVar Array with scaler paramsVar
// *
// * @return MediaTransformError Error Object if error occurred, false (=no error) otherwise
// */
// protected function transformGd(image, paramsVar) {
// return this.getMediaTransformError(paramsVar, "Unimplemented");
// }
//
// /**
// * Escape a String for ImageMagick's property input (e.g. -set -comment)
// * See InterpretImageProperties() in magick/property.c
// * @param String s
// * @return String
// */
// function escapeMagickProperty(s) {
// // Double the backslashes
// s = str_replace('\\', '\\\\', s);
// // Double the percents
// s = str_replace('%', '%%', s);
// // Escape initial - or @
// if (strlen(s) > 0 && (s[0] === '-' || s[0] === '@')) {
// s = '\\' . s;
// }
//
// return s;
// }
//
// /**
// * Escape a String for ImageMagick's input filenames. See ExpandFilenames()
// * and GetPathComponent() in magick/utility.c.
// *
// * This won't work with an initial ~ or @, so input files should be prefixed
// * with the directory name.
// *
// * Glob character unescaping is broken in ImageMagick before 6.6.1-5, but
// * it's broken in a way that doesn't involve trying to convert every file
// * in a directory, so we're better off escaping and waiting for the bugfix
// * to filter down to users.
// *
// * @param String path The file path
// * @param boolean|String scene The scene specification, or false if there is none
// * @throws MWException
// * @return String
// */
// function escapeMagickInput(path, scene = false) {
// // Die on initial metacharacters (caller should prepend path)
// firstChar = substr(path, 0, 1);
// if (firstChar === '~' || firstChar === '@') {
// throw new MWException(__METHOD__ . ': cannot escape this path name');
// }
//
// // Escape glob chars
// path = preg_replace('/[*?\[\]{}]/', '\\\\\0', path);
//
// return this.escapeMagickPath(path, scene);
// }
//
// /**
// * Escape a String for ImageMagick's output filename. See
// * InterpretImageFilename() in magick/image.c.
// * @param String path The file path
// * @param boolean|String scene The scene specification, or false if there is none
// * @return String
// */
// function escapeMagickOutput(path, scene = false) {
// path = str_replace('%', '%%', path);
//
// return this.escapeMagickPath(path, scene);
// }
//
// /**
// * Armour a String against ImageMagick's GetPathComponent(). This is a
// * helper function for escapeMagickInput() and escapeMagickOutput().
// *
// * @param String path The file path
// * @param boolean|String scene The scene specification, or false if there is none
// * @throws MWException
// * @return String
// */
// protected function escapeMagickPath(path, scene = false) {
// // Die on format specifiers (other than drive letters). The regex is
// // meant to match all the formats you get from "convert -list format"
// if (preg_match('/^([a-zA-Z0-9-]+):/', path, m)) {
// if (wfIsWindows() && is_dir(m[0])) {
// // OK, it's a drive letter
// // ImageMagick has a similar exception, see IsMagickConflict()
// } else {
// throw new MWException(__METHOD__ . ': unexpected colon character in path name');
// }
// }
//
// // If there are square brackets, add a do-nothing scene specification
// // to force a literal interpretation
// if (scene === false) {
// if (strpos(path, '[') !== false) {
// path .= '[0--1]';
// }
// } else {
// path .= "[scene]";
// }
//
// return path;
// }
//
// /**
// * Retrieve the version of the installed ImageMagick
// * You can use PHPs version_compare() to use this value
// * Value is cached for one hour.
// * @return String|boolean Representing the IM version; false on error
// */
// protected function getMagickVersion() {
// cache = MediaWikiServices::getInstance().getLocalServerObjectCache();
// method = __METHOD__;
// return cache.getWithSetCallback(
// 'imagemagick-version',
// cache::TTL_HOUR,
// function () use (method) {
// global wgImageMagickConvertCommand;
//
// cmd = wfEscapeShellArg(wgImageMagickConvertCommand) . ' -version';
// wfDebug(method . ": Running convert -version\n");
// retval = '';
// return = wfShellExec(cmd, retval);
// x = preg_match(
// '/Version: ImageMagick ([0-9]*\.[0-9]*\.[0-9]*)/', return, matches
// );
// if (x != 1) {
// wfDebug(method . ": ImageMagick version check failed\n");
// return false;
// }
//
// return matches[1];
// }
// );
// }
//
// /**
// * Returns whether the current scaler supports rotation.
// *
// * @since 1.24 No longer static
// * @return boolean
// */
// public function canRotate() {
// return false;
// }
//
// /**
// * Should we automatically rotate an image based on exif
// *
// * @since 1.24 No longer static
// * @see wgEnableAutoRotation
// * @return boolean Whether auto rotation is enabled
// */
// public function autoRotateEnabled() {
// return false;
// }
//
// /**
// * Rotate a thumbnail.
// *
// * This is a stub. See BitmapHandler::rotate.
// *
// * @param File file
// * @param array paramsVar Rotate parameters.
// * 'rotation' clockwise rotation in degrees, allowed are multiples of 90
// * @since 1.24 Is non-static. From 1.21 it was static
// * @return boolean|MediaTransformError
// */
// public function rotate(file, paramsVar) {
// return new MediaTransformError('thumbnail_error', 0, 0,
// get_class(this) . ' rotation not implemented');
// }
//
// /**
// * Returns whether the file needs to be rendered. Returns true if the
// * file requires rotation and we are able to rotate it.
// *
// * @param File file
// * @return boolean
// */
// public function mustRender(file) {
// return this.canRotate() && this.getRotation(file) != 0;
// }
//
// /**
// * Check if the file is smaller than the maximum image area for thumbnailing.
// *
// * Runs the 'BitmapHandlerCheckImageArea' hook.
// *
// * @param File file
// * @param array paramsVar
// * @return boolean
// * @since 1.25
// */
// public function isImageAreaOkForThumbnaling(file, &paramsVar) {
// global wgMaxImageArea;
//
// // For historical reasons, hook starts with BitmapHandler
// checkImageAreaHookResult = null;
// Hooks::run(
// 'BitmapHandlerCheckImageArea',
// [ file, &paramsVar, &checkImageAreaHookResult ]
// );
//
// if (!is_null(checkImageAreaHookResult)) {
// // was set by hook, so return that value
// return (boolean)checkImageAreaHookResult;
// }
//
// srcWidth = file.getWidth(paramsVar['page']);
// srcHeight = file.getHeight(paramsVar['page']);
//
// if (srcWidth * srcHeight > wgMaxImageArea
// && !(file.getMimeType() == 'image/jpeg'
// && this.getScalerType(false, false) == 'im')
// ) {
// // Only ImageMagick can efficiently downsize jpg images without loading
// // the entire file in memory
// return false;
// }
// return true;
// }
}

View File

@@ -13,3 +13,652 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.core.btries.*;
import gplx.langs.htmls.*;
/**
* This is the part of the wikitext parser which handles automatic paragraphs
* and conversion of start-of-line prefixes to HTML lists.
*/
public class XomwBlockLevelPass {
private boolean DTopen = false;
private boolean inPre = false;
private int lastSection = LAST_SECTION_NONE;
private boolean linestart;
// private $text;
private final Bry_bfr tmp = Bry_bfr_.New();
private final Btrie_rv trv = new Btrie_rv();
private byte[] find_colon_no_links__before, find_colon_no_links__after;
// State constants for the definition list colon extraction
private static final int
COLON_STATE_TEXT = 0
, COLON_STATE_TAG = 1
, COLON_STATE_TAGSTART = 2
, COLON_STATE_CLOSETAG = 3
, COLON_STATE_TAGSLASH = 4
, COLON_STATE_COMMENT = 5
, COLON_STATE_COMMENTDASH = 6
, COLON_STATE_COMMENTDASHDASH = 7
;
/**
* Make lists from lines starting with ':', '*', '#', etc.
*
* @param String $text
* @param boolean $linestart Whether or not this is at the start of a line.
* @return String The lists rendered as HTML
*/
// public static function doBlockLevels($text, $linestart) {
// $pass = new self($text, $linestart);
// return $pass->execute();
// }
public void doBlockLevels(XomwParserCtx pctx, XomwParserBfr pbfr, boolean linestart) {
this.linestart = linestart;
execute(pctx, pbfr, linestart);
}
// /**
// * Private constructor
// */
// private function __construct($text, $linestart) {
// $this->text = $text;
// $this->linestart = $linestart;
// }
/**
* If a pre or p is open, return the corresponding close tag and update
* the state. If no tag is open, return an empty String.
* @return String
*/
private byte[] closeParagraph() {
byte[] result = Bry_.Empty;
if (this.lastSection != LAST_SECTION_NONE) {
result = tmp.Add(lastSection == LAST_SECTION_PARA ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs).Add_byte_nl().To_bry_and_clear(); // $result = '</' . $this->lastSection . ">\n";
}
this.inPre = false;
this.lastSection = LAST_SECTION_NONE;
return result;
}
/**
* getCommon() returns the length of the longest common substring
* of both arguments, starting at the beginning of both.
*
* @param String $st1
* @param String $st2
*
* @return int
*/
// getCommon() returns the length of the longest common substring
// of both arguments, starting at the beginning of both.
private int getCommon(byte[] st1, byte[] st2) {
int st1Len = st1.length, st2Len = st2.length;
int shorter = st1Len < st2Len ? st1Len : st2Len;
int i;
for (i = 0; i < shorter; ++i) {
if (st1[i] != st2[i]) {
break;
}
}
return i;
}
/**
* Open the list item element identified by the prefix character.
*
* @param String $char
*
* @return String
*/
private byte[] openList(byte c) {
byte[] result = this.closeParagraph();
if (c == Byte_ascii.Star)
result = Bry_.Add(result, Bry_.new_a7("<ul><li>"));
else if (c == Byte_ascii.Hash)
result = Bry_.Add(result, Bry_.new_a7("<ol><li>"));
else if (c == Byte_ascii.Colon)
result = Bry_.Add(result, Bry_.new_a7("<dl><dd>"));
else if (c == Byte_ascii.Semic) {
result = Bry_.Add(result, Bry_.new_a7("<dl><dt>"));
this.DTopen = true;
}
else {
result = Bry_.new_a7("<!-- ERR 1 -->");
}
return result;
}
/**
* Close the current list item and open the next one.
* @param String $char
*
* @return String
*/
private byte[] nextItem(byte c) {
if (c == Byte_ascii.Star || c == Byte_ascii.Hash) {
return Bry_.new_a7("</li>\n<li>");
}
else if (c == Byte_ascii.Colon || c == Byte_ascii.Semic) {
byte[] close = Bry_.new_a7("</dd>\n");
if (this.DTopen) {
close = Bry_.new_a7("</dt>\n");
}
if (c == Byte_ascii.Semic) {
this.DTopen = true;
return Bry_.Add(close, Bry_.new_a7("<dt>"));
}
else {
this.DTopen = false;
return Bry_.Add(close, Bry_.new_a7("<dd>"));
}
}
return Bry_.new_a7("<!-- ERR 2 -->");
}
/**
* Close the current list item identified by the prefix character.
* @param String $char
*
* @return String
*/
private byte[] closeList(byte c) {
byte[] text = null;
if (c == Byte_ascii.Star) {
text = Bry_.new_a7("</li></ul>");
}
else if (c == Byte_ascii.Hash) {
text = Bry_.new_a7("</li></ol>");
}
else if (c == Byte_ascii.Colon) {
if (this.DTopen) {
this.DTopen = false;
text = Bry_.new_a7("</dt></dl>");
}
else {
text = Bry_.new_a7("</dd></dl>");
}
}
else {
return Bry_.new_a7("<!-- ERR 3 -->");
}
return text;
}
/**
* Execute the pass.
* @return String
*/
public void execute(XomwParserCtx pctx, XomwParserBfr pbfr, boolean linestart) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
pbfr.Switch();
// XO.STATIC
if (block_chars_ary == null) {
synchronized (Type_.Type_by_obj(this)) {
block_chars_ary = Block_chars_ary__new();
openMatchTrie = Btrie_slim_mgr.ci_a7().Add_many_str
( "<table", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6", "<pre", "<tr"
, "<p", "<ul", "<ol", "<dl", "<li", "</tr", "</td", "</th");
closeMatchTrie = Btrie_slim_mgr.ci_a7().Add_many_str
( "</table", "</h1", "</h2", "</h3", "</h4", "</h5", "</h6"
, "<td", "<th", "<blockquote", "</blockquote", "<div", "</div", "<hr", "</pre", "</p", "</mw:"
, XomwParser.MARKER_PREFIX_STR + "-pre"
, "</li", "</ul", "</ol", "</dl", "<center", "</center");
blockquoteTrie = Btrie_slim_mgr.ci_a7().Add_many_str("<blockquote", "</blockquote");
pre_trie = Btrie_slim_mgr.ci_a7().Add_str_int("<pre", PRE_BGN).Add_str_int("</pre", PRE_END);
}
}
// clear state
this.inPre = false;
this.lastSection = LAST_SECTION_NONE;
byte[] prefix2 = null;
bfr.Clear();
// Parsing through the text line by line. The main thing
// happening here is handling of block-level elements p, pre,
// and making lists from lines starting with * # : etc.
byte[] lastPrefix = Bry_.Empty;
this.DTopen = false;
boolean inBlockElem = false;
int prefixLen = 0;
byte pendingPTag = PARA_STACK_NONE;
boolean inBlockquote = false;
// PORTED.SPLIT: $textLines = StringUtils::explode("\n", $text);
int lineBgn = src_bgn;
while (lineBgn < src_end) {
int lineEnd = Bry_find_.Find_fwd(src, Byte_ascii.Nl, lineBgn);
if (lineEnd == Bry_find_.Not_found)
lineEnd = src_end;
// Fix up linestart
if (!this.linestart) {
bfr.Add_mid(src, lineBgn, lineEnd);
this.linestart = true;
continue;
}
// * = ul
// # = ol
// ; = dt
// : = dd
int lastPrefixLen = lastPrefix.length;
// PORTED.BGN: preCloseMatch = preg_match('/<\\/pre/i', $oLine); preOpenMatch = preg_match('/<pre/i', $oLine);
int preCur = lineBgn;
boolean preCloseMatch = false;
boolean preOpenMatch = false;
while (true) {
if (preCur >= lineEnd)
break;
Object o = pre_trie.Match_at(trv, src, preCur, lineEnd);
if (o == null)
preCur++;
else {
int pre_tid = Int_.Cast(o);
if (pre_tid == PRE_BGN)
preOpenMatch = true;
else if (pre_tid == PRE_END)
preCloseMatch = true;
preCur = trv.Pos();
}
}
// PORTED.END
byte[] prefix = null, t = null;
// If not in a <pre> element, scan for and figure out what prefixes are there.
if (!this.inPre) {
// Multiple prefixes may abut each other for nested lists.
prefixLen = XophpString.strspn_fwd__ary(src, block_chars_ary, lineBgn, lineEnd, lineEnd); // strspn($oLine, '*#:;');
prefix = XophpString.substr(src, lineBgn, prefixLen);
// eh?
// ; and : are both from definition-lists, so they're equivalent
// for the purposes of determining whether or not we need to open/close
// elements.
// substr($inputLine, $prefixLength);
prefix2 = Bry_.Replace(prefix, Byte_ascii.Semic, Byte_ascii.Colon);
t = Bry_.Mid(src, lineBgn + prefixLen, lineEnd);
this.inPre = preOpenMatch;
}
else {
// Don't interpret any other prefixes in preformatted text
prefixLen = 0;
prefix = prefix2 = Bry_.Empty;
t = Bry_.Mid(src, lineBgn, lineEnd);
}
// List generation
byte[] term = null, t2 = null;
int commonPrefixLen = -1;
if (prefixLen > 0 && Bry_.Eq(lastPrefix, prefix2)) {
// Same as the last item, so no need to deal with nesting or opening stuff
bfr.Add(this.nextItem(XophpString.substr_byte(prefix, -1)));
pendingPTag = PARA_STACK_NONE;
if (prefixLen > 0 && prefix[prefixLen - 1] == Byte_ascii.Semic) {
// The one nasty exception: definition lists work like this:
// ; title : definition text
// So we check for : in the remainder text to split up the
// title and definition, without b0rking links.
term = t2 = Bry_.Empty;
if (this.findColonNoLinks(t, term, t2) != Bry_find_.Not_found) {
term = find_colon_no_links__before;
t2 = find_colon_no_links__after;
t = t2;
bfr.Add(term).Add(nextItem(Byte_ascii.Colon));
}
}
}
else if (prefixLen > 0 || lastPrefixLen > 0) {
// We need to open or close prefixes, or both.
// Either open or close a level...
commonPrefixLen = this.getCommon(prefix, lastPrefix);
pendingPTag = PARA_STACK_NONE;
// Close all the prefixes which aren't shared.
while (commonPrefixLen < lastPrefixLen) {
bfr.Add(this.closeList(lastPrefix[lastPrefixLen - 1]));
--lastPrefixLen;
}
// Continue the current prefix if appropriate.
if (prefixLen <= commonPrefixLen && commonPrefixLen > 0) {
bfr.Add(this.nextItem(prefix[commonPrefixLen - 1]));
}
// Open prefixes where appropriate.
if (Bry_.Len_gt_0(lastPrefix) && prefixLen > commonPrefixLen) {
bfr.Add_byte_nl();
}
while (prefixLen > commonPrefixLen) {
byte c = XophpString.substr_byte(prefix, commonPrefixLen, 1);
bfr.Add(this.openList(c));
if (c == Byte_ascii.Semic) {
// @todo FIXME: This is dupe of code above
if (findColonNoLinks(t, term, t2) != Bry_find_.Not_found) {
term = find_colon_no_links__before;
t2 = find_colon_no_links__after;
t = t2;
bfr.Add(term).Add(nextItem(Byte_ascii.Colon));
}
}
++commonPrefixLen;
}
if (prefixLen == 0 && Bry_.Len_gt_0(lastPrefix)) {
bfr.Add_byte_nl();
}
lastPrefix = prefix2;
}
// If we have no prefixes, go to paragraph mode.
if (0 == prefixLen) {
// No prefix (not in list)--go to paragraph mode
// @todo consider using a stack for nestable elements like span, table and div
int tLen = t.length;
// XO.MW.PORTED.BGN:
boolean openMatch = XophpPreg.match(openMatchTrie, trv, t, 0, tLen) != null;
boolean closeMatch = XophpPreg.match(closeMatchTrie, trv, t, 0, tLen) != null;
// XO.MW.PORTED.END
if (openMatch || closeMatch) {
pendingPTag = PARA_STACK_NONE;
// @todo bug 5718: paragraph closed
bfr.Add(this.closeParagraph());
if (preOpenMatch && !preCloseMatch) {
this.inPre = true;
}
int bqOffset = 0;
// PORTED:preg_match('/<(\\/?)blockquote[\s>]/i', t, $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset)
while (true) {
Object o = XophpPreg.match(blockquoteTrie, trv, t, bqOffset, tLen);
if (o == null) { // no more blockquotes found; exit
break;
}
else {
byte[] bq_bry = (byte[])o;
inBlockquote = bq_bry[1] != Byte_ascii.Slash; // is this a close tag?
bqOffset = trv.Pos();
}
}
// PORTED:END
inBlockElem = !closeMatch;
}
else if (!inBlockElem && !this.inPre) {
if (XophpString.substr_byte(t, 0) == Byte_ascii.Space
&& (this.lastSection == LAST_SECTION_PRE || Bry_.Trim(t) != Bry_.Empty)
&& !inBlockquote
) {
// pre
if (this.lastSection != LAST_SECTION_PRE) {
pendingPTag = PARA_STACK_NONE;
bfr.Add(closeParagraph()).Add(Gfh_tag_.Pre_lhs);
this.lastSection = LAST_SECTION_PRE;
}
t = Bry_.Mid(t, 1);
}
else {
// paragraph
if (Bry_.Trim(t) == Bry_.Empty) {
if (pendingPTag != PARA_STACK_NONE) {
ParaStackAdd(bfr, pendingPTag);
bfr.Add_str_a7("<br />");
pendingPTag = PARA_STACK_NONE;
this.lastSection = LAST_SECTION_PARA;
}
else {
if (this.lastSection != LAST_SECTION_PARA) {
bfr.Add(this.closeParagraph());
this.lastSection = LAST_SECTION_NONE;
pendingPTag = PARA_STACK_BGN;
}
else {
pendingPTag = PARA_STACK_MID;
}
}
}
else {
if (pendingPTag != PARA_STACK_NONE) {
ParaStackAdd(bfr, pendingPTag);
pendingPTag = PARA_STACK_NONE;
this.lastSection = LAST_SECTION_PARA;
}
else if (lastSection != LAST_SECTION_PARA) {
bfr.Add(this.closeParagraph()).Add(Gfh_tag_.P_lhs);
this.lastSection = LAST_SECTION_PARA;
}
}
}
}
}
// somewhere above we forget to get out of pre block (bug 785)
if (preCloseMatch && this.inPre) {
this.inPre = false;
}
if (pendingPTag == PARA_STACK_NONE) {
bfr.Add(t);
if (prefixLen == 0) {
bfr.Add_byte_nl();
}
}
lineBgn = lineEnd + 1;
}
while (prefixLen > 0) {
bfr.Add(this.closeList(prefix2[prefixLen - 1]));
--prefixLen;
if (prefixLen > 0) {
bfr.Add_byte_nl();
}
}
if (this.lastSection != LAST_SECTION_NONE) {
bfr.Add(this.lastSection == LAST_SECTION_PARA ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
this.lastSection = LAST_SECTION_NONE;
}
}
/**
* Split up a String on ':', ignoring any occurrences inside tags
* to prevent illegal overlapping.
*
* @param String $str The String to split
* @param String &$before Set to everything before the ':'
* @param String &$after Set to everything after the ':'
* @throws MWException
* @return String The position of the ':', or false if none found
*/
private int findColonNoLinks(byte[] str, byte[] before, byte[] after) {
int len = str.length;
int colonPos = XophpString.strpos(str, Byte_ascii.Colon, 0, len);
if (colonPos == Bry_find_.Not_found) {
// Nothing to find!
return Bry_find_.Not_found;
}
int ltPos = XophpString.strpos(str, Byte_ascii.Angle_bgn, 0, len);
if (ltPos == Bry_find_.Not_found || ltPos > colonPos) {
// Easy; no tag nesting to worry about
// XOMW: MW passes before / after by reference; XO: changes member and depends on callers to update
find_colon_no_links__before = XophpString.substr(str, 0, colonPos);
find_colon_no_links__after = XophpString.substr(str, colonPos + 1);
return colonPos;
}
// Ugly state machine to walk through avoiding tags.
int state = COLON_STATE_TEXT;
int level = 0;
for (int i = 0; i < len; i++) {
byte c = str[i];
switch (state) {
case COLON_STATE_TEXT:
switch (c) {
case Byte_ascii.Angle_bgn:
// Could be either a <start> tag or an </end> tag
state = COLON_STATE_TAGSTART;
break;
case Byte_ascii.Colon:
if (level == 0) {
// We found it!
find_colon_no_links__before = XophpString.substr(str, 0, i);
find_colon_no_links__after = XophpString.substr(str, i + 1);
return i;
}
// Embedded in a tag; don't break it.
break;
default:
// Skip ahead looking for something interesting
colonPos = XophpString.strpos(str, Byte_ascii.Colon, i, len);
if (colonPos == Bry_find_.Not_found) {
// Nothing else interesting
return Bry_find_.Not_found;
}
ltPos = XophpString.strpos(str, Byte_ascii.Angle_bgn, i, len);
if (level == 0) {
if (ltPos == Bry_find_.Not_found || colonPos < ltPos) {
// We found it!
find_colon_no_links__before = XophpString.substr(str, 0, colonPos);
find_colon_no_links__after = XophpString.substr(str, colonPos + 1);
return i;
}
}
if (ltPos == Bry_find_.Not_found) {
// Nothing else interesting to find; abort!
// We're nested, but there's no close tags left. Abort!
i = len; // break 2
break;
}
// Skip ahead to next tag start
i = ltPos;
state = COLON_STATE_TAGSTART;
break;
}
break;
case COLON_STATE_TAG:
// In a <tag>
switch (c) {
case Byte_ascii.Angle_end:
level++;
state = COLON_STATE_TEXT;
break;
case Byte_ascii.Slash:
// Slash may be followed by >?
state = COLON_STATE_TAGSLASH;
break;
default:
// ignore
break;
}
break;
case COLON_STATE_TAGSTART:
switch (c) {
case Byte_ascii.Slash:
state = COLON_STATE_CLOSETAG;
break;
case Byte_ascii.Bang:
state = COLON_STATE_COMMENT;
break;
case Byte_ascii.Angle_end:
// Illegal early close? This shouldn't happen D:
state = COLON_STATE_TEXT;
break;
default:
state = COLON_STATE_TAG;
break;
}
break;
case COLON_STATE_CLOSETAG:
// In a </tag>
if (c == Byte_ascii.Angle_end) {
level--;
if (level < 0) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; too many close tags");
return Bry_find_.Not_found;
}
state = COLON_STATE_TEXT;
}
break;
case COLON_STATE_TAGSLASH:
if (c == Byte_ascii.Angle_end) {
// Yes, a self-closed tag <blah/>
state = COLON_STATE_TEXT;
}
else {
// Probably we're jumping the gun, and this is an attribute
state = COLON_STATE_TAG;
}
break;
case COLON_STATE_COMMENT:
if (c == Byte_ascii.Dash) {
state = COLON_STATE_COMMENTDASH;
}
break;
case COLON_STATE_COMMENTDASH:
if (c == Byte_ascii.Dash) {
state = COLON_STATE_COMMENTDASHDASH;
}
else {
state = COLON_STATE_COMMENT;
}
break;
case COLON_STATE_COMMENTDASHDASH:
if (c == Byte_ascii.Angle_bgn) {
state = COLON_STATE_TEXT;
}
else {
state = COLON_STATE_COMMENT;
}
break;
default:
throw Err_.new_wo_type("State machine error");
}
}
if (level > 0) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; not enough close tags (level ~{0}, state ~{1})", level, state);
return Bry_find_.Not_found;
}
return Bry_find_.Not_found;
}
private static final byte
LAST_SECTION_NONE = 0 // ''
, LAST_SECTION_PARA = 1 // p
, LAST_SECTION_PRE = 2 // pre
;
private static final byte
PARA_STACK_NONE = 0 // false
, PARA_STACK_BGN = 1 // <p>
, PARA_STACK_MID = 2 // </p><p>
;
private static final int PRE_BGN = 0, PRE_END = 1;
private static Btrie_slim_mgr pre_trie;
private static boolean[] block_chars_ary;
private static boolean[] Block_chars_ary__new() {
boolean[] rv = new boolean[256];
rv[Byte_ascii.Star] = true;
rv[Byte_ascii.Hash] = true;
rv[Byte_ascii.Colon] = true;
rv[Byte_ascii.Semic] = true;
return rv;
}
private static Btrie_slim_mgr openMatchTrie, closeMatchTrie, blockquoteTrie;
private static void ParaStackAdd(Bry_bfr bfr, int id) {
switch (id) {
case PARA_STACK_BGN: bfr.Add_str_a7("<p>"); break;
case PARA_STACK_MID: bfr.Add_str_a7("</p><p>"); break;
default: throw Err_.new_unhandled_default(id);
}
}
}

View File

@@ -13,3 +13,28 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import org.junit.*; import gplx.core.tests.*;
import gplx.xowa.mediawiki.includes.linkers.*;
public class XomwBlockLevelPassTest {
private final XomwBlockLevelPassFxt fxt = new XomwBlockLevelPassFxt();
@Test public void Basic() {
fxt.Test__do_block_levels(String_.Concat_lines_nl_skip_last
( "a"
), String_.Concat_lines_nl_skip_last
( "<p>a"
, "</p>"
));
}
}
class XomwBlockLevelPassFxt {
private final XomwBlockLevelPass block_level_pass = new XomwBlockLevelPass();
private final XomwParserCtx pctx = new XomwParserCtx();
private final XomwParserBfr pbfr = new XomwParserBfr();
private boolean apos = true;
public void Test__do_block_levels(String src, String expd) {
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
block_level_pass.doBlockLevels(pctx, pbfr.Init(Bry_.new_u8(src)), true);
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear());
}
}

View File

@@ -13,3 +13,767 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.langs.htmls.*;
import gplx.xowa.mediawiki.includes.xohtml.*;
import gplx.xowa.mediawiki.includes.linkers.*;
/**
* Holder of replacement pairs for wiki links
*/
public class XomwLinkHolderArray {
private final XomwLinkHolderList internals = new XomwLinkHolderList();
// public $interwikis = [];
// private int size = 0;
private final Bry_bfr tmp = Bry_bfr_.New();
private final Xomw_atr_mgr extraAtrs = new Xomw_atr_mgr();
private final Xomw_qry_mgr query = new Xomw_qry_mgr();
/**
* @var Parser
*/
private final XomwParserIface parent;
// protected $tempIdOffset;
/**
* @param Parser $parent
*/
public XomwLinkHolderArray(XomwParserIface parent) {
this.parent = parent;
}
/**
* Reduce memory usage to reduce the impact of circular references
*/
// public function __destruct() {
// foreach ( $this as $name => $value ) {
// unset( this.$name );
// }
// }
/**
* Don't serialize the parent Object, it is big, and not needed when it is
* a parameter to mergeForeign(), which is the only application of
* serializing at present.
*
* Compact the titles, only serialize the text form.
* @return array
*/
// public function __sleep() {
// foreach ( this.internals as &$nsLinks ) {
// foreach ( $nsLinks as &$entry ) {
// unset( $entry['title'] );
// }
// }
// unset( $nsLinks );
// unset( $entry );
//
// foreach ( this.interwikis as &$entry ) {
// unset( $entry['title'] );
// }
// unset( $entry );
//
// return [ 'internals', 'interwikis', 'size' ];
// }
/**
* Recreate the Title objects
*/
// public function __wakeup() {
// foreach ( this.internals as &$nsLinks ) {
// foreach ( $nsLinks as &$entry ) {
// $entry['title'] = Title::newFromText( $entry['pdbk'] );
// }
// }
// unset( $nsLinks );
// unset( $entry );
//
// foreach ( this.interwikis as &$entry ) {
// $entry['title'] = Title::newFromText( $entry['pdbk'] );
// }
// unset( $entry );
// }
// /**
// * Merge another LinkHolderArray into this one
// * @param LinkHolderArray $other
// */
// public function merge( $other ) {
// foreach ( $other->internals as $ns => $entries ) {
// this.size += count( $entries );
// if ( !isset( this.internals[$ns] ) ) {
// this.internals[$ns] = $entries;
// } else {
// this.internals[$ns] += $entries;
// }
// }
// this.interwikis += $other->interwikis;
// }
//
// /**
// * Merge a LinkHolderArray from another parser instance into this one. The
// * keys will not be preserved. Any text which went with the old
// * LinkHolderArray and needs to work with the new one should be passed in
// * the $texts array. The strings in this array will have their link holders
// * converted for use in the destination link holder. The resulting array of
// * strings will be returned.
// *
// * @param LinkHolderArray $other
// * @param array $texts Array of strings
// * @return array
// */
// public function mergeForeign( $other, $texts ) {
// this.tempIdOffset = $idOffset = this.parent->nextLinkID();
// $maxId = 0;
//
// # Renumber @gplx.Internal protected links
// foreach ( $other->internals as $ns => $nsLinks ) {
// foreach ( $nsLinks as $key => $entry ) {
// $newKey = $idOffset + $key;
// this.internals[$ns][$newKey] = $entry;
// $maxId = $newKey > $maxId ? $newKey : $maxId;
// }
// }
// $texts = preg_replace_callback( '/(<!--LINK \d+:)(\d+)(-->)/',
// [ $this, 'mergeForeignCallback' ], $texts );
//
// # Renumber interwiki links
// foreach ( $other->interwikis as $key => $entry ) {
// $newKey = $idOffset + $key;
// this.interwikis[$newKey] = $entry;
// $maxId = $newKey > $maxId ? $newKey : $maxId;
// }
// $texts = preg_replace_callback( '/(<!--IWLINK )(\d+)(-->)/',
// [ $this, 'mergeForeignCallback' ], $texts );
//
// # Set the parent link ID to be beyond the highest used ID
// this.parent->setLinkID( $maxId + 1 );
// this.tempIdOffset = null;
// return $texts;
// }
//
// /**
// * @param array $m
// * @return String
// */
// protected function mergeForeignCallback( $m ) {
// return $m[1] . ( $m[2] + this.tempIdOffset ) . $m[3];
// }
//
// /**
// * Get a subset of the current LinkHolderArray which is sufficient to
// * interpret the given text.
// * @param String $text
// * @return LinkHolderArray
// */
// public function getSubArray( $text ) {
// $sub = new LinkHolderArray( this.parent );
//
// # Internal links
// $pos = 0;
// while ( $pos < strlen( $text ) ) {
// if ( !preg_match( '/<!--LINK (\d+):(\d+)-->/',
// $text, $m, PREG_OFFSET_CAPTURE, $pos )
// ) {
// break;
// }
// $ns = $m[1][0];
// $key = $m[2][0];
// $sub->internals[$ns][$key] = this.internals[$ns][$key];
// $pos = $m[0][1] + strlen( $m[0][0] );
// }
//
// # Interwiki links
// $pos = 0;
// while ( $pos < strlen( $text ) ) {
// if ( !preg_match( '/<!--IWLINK (\d+)-->/', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) {
// break;
// }
// $key = $m[1][0];
// $sub->interwikis[$key] = this.interwikis[$key];
// $pos = $m[0][1] + strlen( $m[0][0] );
// }
// return $sub;
// }
//
// /**
// * Returns true if the memory requirements of this Object are getting large
// * @return boolean
// */
// public function isBig() {
// global $wgLinkHolderBatchSize;
// return this.size > $wgLinkHolderBatchSize;
// }
/**
* Clear all stored link holders.
* Make sure you don't have any text left using these link holders, before you call this
*/
public void clear() {
this.internals.Clear();//
// this.interwikis = [];
// this.size = 0;
}
/**
* Make a link placeholder. The text returned can be later resolved to a real link with
* replaceLinkHolders(). This is done for two reasons: firstly to avoid further
* parsing of interwiki links, and secondly to allow all existence checks and
* article length checks (for stub links) to be bundled into a single query.
*
* @param Title $nt
* @param String $text
* @param array $query [optional]
* @param String $trail [optional]
* @param String $prefix [optional]
* @return String
*/
public void makeHolder(Bry_bfr bfr, XomwTitle nt, byte[] text, byte[][] query, byte[] trail, byte[] prefix) {
if (nt == null) {
// Fail gracefully
bfr.Add_str_a7("<!-- ERROR -->").Add(prefix).Add(text).Add(trail);
}
else {
// Separate the link trail from the rest of the link
// list( $inside, $trail ) = Linker::splitTrail( $trail );
byte[] inside = Bry_.Empty;
XomwLinkHolderItem entry = new XomwLinkHolderItem
( nt
, tmp.Add_bry_many(prefix, text, inside).To_bry_and_clear()
, query);
if (nt.isExternal()) {
// Use a globally unique ID to keep the objects mergable
// $key = this.parent->nextLinkID();
// this.interwikis[$key] = $entry;
// $retVal = "<!--IWLINK $key-->{$trail}";
}
else {
int key = this.parent.nextLinkID();
this.internals.Add(key, entry);
bfr.Add(Bry__link__bgn).Add_int_variable(key).Add(Gfh_tag_.Comm_end).Add(trail); // "<!--LINK $ns:$key-->{$trail}";
}
}
}
/**
* Replace <!--LINK--> link placeholders with actual links, in the buffer
*
* @param String $text
*/
public boolean replace(XomwParserBfr pbfr) {
return this.replaceInternal(pbfr);
// $this->replaceInterwiki( $text );
}
public byte[] replace(XomwParserBfr pbfr, byte[] text) {
boolean rv = this.replace(pbfr.Init(text));
return rv ? pbfr.Trg().To_bry_and_clear() : pbfr.Src().To_bry_and_clear();
}
/**
* Replace @gplx.Internal protected links
* @param String $text
*/
private boolean replaceInternal(XomwParserBfr pbfr) {
if (internals.Len() == 0) {
return false;
}
// SKIP:Replace_internals does db lookup to identify redlinks;
// global $wgContLang;
//
// $colours = [];
// $linkCache = LinkCache::singleton();
// $output = this.parent->getOutput();
XomwLinkRenderer linkRenderer = this.parent.getLinkRenderer();
//
// $dbr = wfGetDB( DB_REPLICA );
//
// # Sort by namespace
// ksort( this.internals );
//
// $linkcolour_ids = [];
//
// # Generate query
// $lb = new LinkBatch();
// $lb->setCaller( __METHOD__ );
//
// foreach ( this.internals as $ns => $entries ) {
// foreach ( $entries as $entry ) {
// /** @var Title $title */
// $title = $entry['title'];
// $pdbk = $entry['pdbk'];
//
// # Skip invalid entries.
// # Result will be ugly, but prevents crash.
// if ( is_null( $title ) ) {
// continue;
// }
//
// # Check if it's a static known link, e.g. interwiki
// if ( $title->isAlwaysKnown() ) {
// $colours[$pdbk] = '';
// } elseif ( $ns == NS_SPECIAL ) {
// $colours[$pdbk] = 'new';
// } else {
// $id = $linkCache->getGoodLinkID( $pdbk );
// if ( $id != 0 ) {
// $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
// $output->addLink( $title, $id );
// $linkcolour_ids[$id] = $pdbk;
// } elseif ( $linkCache->isBadLink( $pdbk ) ) {
// $colours[$pdbk] = 'new';
// } else {
// # Not in the link cache, add it to the query
// $lb->addObj( $title );
// }
// }
// }
// }
// if ( !$lb->isEmpty() ) {
// $fields = array_merge(
// LinkCache::getSelectFields(),
// [ 'page_namespace', 'page_title' ]
// );
//
// $res = $dbr->select(
// 'page',
// $fields,
// $lb->constructSet( 'page', $dbr ),
// __METHOD__
// );
//
// # Fetch data and form into an associative array
// # non-existent = broken
// foreach ( $res as $s ) {
// $title = Title::makeTitle( $s->page_namespace, $s->page_title );
// $pdbk = $title->getPrefixedDBkey();
// $linkCache->addGoodLinkObjFromRow( $title, $s );
// $output->addLink( $title, $s->page_id );
// $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
// // add id to the extension todolist
// $linkcolour_ids[$s->page_id] = $pdbk;
// }
// unset( $res );
// }
// if ( count( $linkcolour_ids ) ) {
// // pass an array of page_ids to an extension
// Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours ] );
// }
//
// # Do a second query for different language variants of links and categories
// if ( $wgContLang->hasVariants() ) {
// this.doVariants( $colours );
// }
// Construct search and replace arrays
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
pbfr.Switch();
int cur = src_bgn;
int prv = 0;
while (true) {
int link_bgn = Bry_find_.Find_fwd(src, Bry__link__bgn, cur, src_end);
if (link_bgn == Bry_find_.Not_found) {
bfr.Add_mid(src, prv, src_end);
break;
}
int key_bgn = link_bgn + Bry__link__bgn.length;
int key_end = Bry_find_.Find_fwd_while_num(src, key_bgn, src_end);
int link_key = Bry_.To_int_or(src, key_bgn, key_end, -1);
XomwLinkHolderItem item = internals.Get_by(link_key);
// $pdbk = $entry['pdbk'];
// $title = $entry['title'];
// $query = isset( $entry['query'] ) ? $entry['query'] : [];
// $key = "$ns:$index";
// $searchkey = "<!--LINK $key-->";
// $displayText = $entry['text'];
// if ( isset( $entry['selflink'] ) ) {
// $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query );
// continue;
// }
// if ( $displayText === '' ) {
// $displayText = null;
// } else {
// $displayText = new HtmlArmor( $displayText );
// }
// if ( !isset( $colours[$pdbk] ) ) {
// $colours[$pdbk] = 'new';
// }
// $attribs = [];
// if ( $colours[$pdbk] == 'new' ) {
// $linkCache->addBadLinkObj( $title );
// $output->addLink( $title, 0 );
// $link = $linkRenderer->makeBrokenLink(
// $title, $displayText, $attribs, $query
// );
// } else {
// $link = $linkRenderer->makePreloadedLink(
// $title, $displayText, $colours[$pdbk], $attribs, $query
// );
// }
//
// $replacePairs[$searchkey] = $link;
// }
// }
bfr.Add_mid(src, prv, link_bgn);
linkRenderer.makePreloadedLink(bfr, item.Title(), item.Text(), Bry_.Empty, extraAtrs, query.Clear());
cur = key_end + Gfh_tag_.Comm_end_len;
prv = cur;
}
// $replacer = new HashtableReplacer( $replacePairs, 1 );
//
// # Do the thing
// $text = preg_replace_callback(
// '/(<!--LINK .*?-->)/',
// $replacer->cb(),
// $text
// );
return true;
}
// /**
// * Replace interwiki links
// * @param String $text
// */
// protected function replaceInterwiki( &$text ) {
// if ( empty( this.interwikis ) ) {
// return;
// }
//
// # Make interwiki link HTML
// $output = this.parent->getOutput();
// $replacePairs = [];
// $linkRenderer = this.parent->getLinkRenderer();
// foreach ( this.interwikis as $key => $link ) {
// $replacePairs[$key] = $linkRenderer->makeLink(
// $link['title'],
// new HtmlArmor( $link['text'] )
// );
// $output->addInterwikiLink( $link['title'] );
// }
// $replacer = new HashtableReplacer( $replacePairs, 1 );
//
// $text = preg_replace_callback(
// '/<!--IWLINK (.*?)-->/',
// $replacer->cb(),
// $text );
// }
//
// /**
// * Modify this.internals and $colours according to language variant linking rules
// * @param array $colours
// */
// protected function doVariants( &$colours ) {
// global $wgContLang;
// $linkBatch = new LinkBatch();
// $variantMap = []; // maps $pdbkey_Variant => $keys (of link holders)
// $output = this.parent->getOutput();
// $linkCache = LinkCache::singleton();
// $titlesToBeConverted = '';
// $titlesAttrs = [];
//
// // Concatenate titles to a single String, thus we only need auto convert the
// // single String to all variants. This would improve parser's performance
// // significantly.
// foreach ( this.internals as $ns => $entries ) {
// if ( $ns == NS_SPECIAL ) {
// continue;
// }
// foreach ( $entries as $index => $entry ) {
// $pdbk = $entry['pdbk'];
// // we only deal with new links (in its first query)
// if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
// $titlesAttrs[] = [ $index, $entry['title'] ];
// // separate titles with \0 because it would never appears
// // in a valid title
// $titlesToBeConverted .= $entry['title']->getText() . "\0";
// }
// }
// }
//
// // Now do the conversion and explode String to text of titles
// $titlesAllVariants = $wgContLang->autoConvertToAllVariants( rtrim( $titlesToBeConverted, "\0" ) );
// $allVariantsName = array_keys( $titlesAllVariants );
// foreach ( $titlesAllVariants as &$titlesVariant ) {
// $titlesVariant = explode( "\0", $titlesVariant );
// }
//
// // Then add variants of links to link batch
// $parentTitle = this.parent->getTitle();
// foreach ( $titlesAttrs as $i => $attrs ) {
// /** @var Title $title */
// list( $index, $title ) = $attrs;
// $ns = $title->getNamespace();
// $text = $title->getText();
//
// foreach ( $allVariantsName as $variantName ) {
// $textVariant = $titlesAllVariants[$variantName][$i];
// if ( $textVariant === $text ) {
// continue;
// }
//
// $variantTitle = Title::makeTitle( $ns, $textVariant );
//
// // Self-link checking for mixed/different variant titles. At this point, we
// // already know the exact title does not exist, so the link cannot be to a
// // variant of the current title that exists as a separate page.
// if ( $variantTitle->equals( $parentTitle ) && !$title->hasFragment() ) {
// this.internals[$ns][$index]['selflink'] = true;
// continue 2;
// }
//
// $linkBatch->addObj( $variantTitle );
// $variantMap[$variantTitle->getPrefixedDBkey()][] = "$ns:$index";
// }
// }
//
// // process categories, check if a category exists in some variant
// $categoryMap = []; // maps $category_variant => $category (dbkeys)
// $varCategories = []; // category replacements oldDBkey => newDBkey
// foreach ( $output->getCategoryLinks() as $category ) {
// $categoryTitle = Title::makeTitleSafe( NS_CATEGORY, $category );
// $linkBatch->addObj( $categoryTitle );
// $variants = $wgContLang->autoConvertToAllVariants( $category );
// foreach ( $variants as $variant ) {
// if ( $variant !== $category ) {
// $variantTitle = Title::makeTitleSafe( NS_CATEGORY, $variant );
// if ( is_null( $variantTitle ) ) {
// continue;
// }
// $linkBatch->addObj( $variantTitle );
// $categoryMap[$variant] = [ $category, $categoryTitle ];
// }
// }
// }
//
// if ( !$linkBatch->isEmpty() ) {
// // construct query
// $dbr = wfGetDB( DB_REPLICA );
// $fields = array_merge(
// LinkCache::getSelectFields(),
// [ 'page_namespace', 'page_title' ]
// );
//
// $varRes = $dbr->select( 'page',
// $fields,
// $linkBatch->constructSet( 'page', $dbr ),
// __METHOD__
// );
//
// $linkcolour_ids = [];
// $linkRenderer = this.parent->getLinkRenderer();
//
// // for each found variants, figure out link holders and replace
// foreach ( $varRes as $s ) {
// $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
// $varPdbk = $variantTitle->getPrefixedDBkey();
// $vardbk = $variantTitle->getDBkey();
//
// $holderKeys = [];
// if ( isset( $variantMap[$varPdbk] ) ) {
// $holderKeys = $variantMap[$varPdbk];
// $linkCache->addGoodLinkObjFromRow( $variantTitle, $s );
// $output->addLink( $variantTitle, $s->page_id );
// }
//
// // loop over link holders
// foreach ( $holderKeys as $key ) {
// list( $ns, $index ) = explode( ':', $key, 2 );
// $entry =& this.internals[$ns][$index];
// $pdbk = $entry['pdbk'];
//
// if ( !isset( $colours[$pdbk] ) || $colours[$pdbk] === 'new' ) {
// // found link in some of the variants, replace the link holder data
// $entry['title'] = $variantTitle;
// $entry['pdbk'] = $varPdbk;
//
// // set pdbk and colour
// $colours[$varPdbk] = $linkRenderer->getLinkClasses( $variantTitle );
// $linkcolour_ids[$s->page_id] = $pdbk;
// }
// }
//
// // check if the Object is a variant of a category
// if ( isset( $categoryMap[$vardbk] ) ) {
// list( $oldkey, $oldtitle ) = $categoryMap[$vardbk];
// if ( !isset( $varCategories[$oldkey] ) && !$oldtitle->exists() ) {
// $varCategories[$oldkey] = $vardbk;
// }
// }
// }
// Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours ] );
//
// // rebuild the categories in original order (if there are replacements)
// if ( count( $varCategories ) > 0 ) {
// $newCats = [];
// $originalCats = $output->getCategories();
// foreach ( $originalCats as $cat => $sortkey ) {
// // make the replacement
// if ( array_key_exists( $cat, $varCategories ) ) {
// $newCats[$varCategories[$cat]] = $sortkey;
// } else {
// $newCats[$cat] = $sortkey;
// }
// }
// $output->setCategoryLinks( $newCats );
// }
// }
// }
//
// /**
// * Replace <!--LINK--> link placeholders with plain text of links
// * (not HTML-formatted).
// *
// * @param String $text
// * @return String
// */
// public function replaceText( $text ) {
// $text = preg_replace_callback(
// '/<!--(LINK|IWLINK) (.*?)-->/',
// [ &$this, 'replaceTextCallback' ],
// $text );
//
// return $text;
// }
//
// /**
// * Callback for replaceText()
// *
// * @param array $matches
// * @return String
// * @private
// */
// public function replaceTextCallback( $matches ) {
// $type = $matches[1];
// $key = $matches[2];
// if ( $type == 'LINK' ) {
// list( $ns, $index ) = explode( ':', $key, 2 );
// if ( isset( this.internals[$ns][$index]['text'] ) ) {
// return this.internals[$ns][$index]['text'];
// }
// } elseif ( $type == 'IWLINK' ) {
// if ( isset( this.interwikis[$key]['text'] ) ) {
// return this.interwikis[$key]['text'];
// }
// }
// return $matches[0];
// }
// private void Replace_internal__db() {
// // Generate query
// $lb = new LinkBatch();
// $lb->setCaller( __METHOD__ );
//
// foreach ( $this->internals as $ns => $entries ) {
// foreach ( $entries as $entry ) {
// /** @var Title $title */
// $title = $entry['title'];
// $pdbk = $entry['pdbk'];
//
// # Skip invalid entries.
// # Result will be ugly, but prevents crash.
// if ( is_null( $title ) ) {
// continue;
// }
//
// # Check if it's a static known link, e.g. interwiki
// if ( $title->isAlwaysKnown() ) {
// $colours[$pdbk] = '';
// } elseif ( $ns == NS_SPECIAL ) {
// $colours[$pdbk] = 'new';
// } else {
// $id = $linkCache->getGoodLinkID( $pdbk );
// if ( $id != 0 ) {
// $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
// $output->addLink( $title, $id );
// $linkcolour_ids[$id] = $pdbk;
// } elseif ( $linkCache->isBadLink( $pdbk ) ) {
// $colours[$pdbk] = 'new';
// } else {
// # Not in the link cache, add it to the query
// $lb->addObj( $title );
// }
// }
// }
// }
// if ( !$lb->isEmpty() ) {
// $fields = array_merge(
// LinkCache::getSelectFields(),
// [ 'page_namespace', 'page_title' ]
// );
//
// $res = $dbr->select(
// 'page',
// $fields,
// $lb->constructSet( 'page', $dbr ),
// __METHOD__
// );
//
// # Fetch data and form into an associative array
// # non-existent = broken
// foreach ( $res as $s ) {
// $title = Title::makeTitle( $s->page_namespace, $s->page_title );
// $pdbk = $title->getPrefixedDBkey();
// $linkCache->addGoodLinkObjFromRow( $title, $s );
// $output->addLink( $title, $s->page_id );
// $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
// // add id to the extension todolist
// $linkcolour_ids[$s->page_id] = $pdbk;
// }
// unset( $res );
// }
// if ( count( $linkcolour_ids ) ) {
// // pass an array of page_ids to an extension
// Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours ] );
// }
//
// # Do a second query for different language variants of links and categories
// if ( $wgContLang->hasVariants() ) {
// $this->doVariants( $colours );
// }
// }
public void Test__add(XomwTitle ttl, byte[] capt) {
int key = parent.nextLinkID();
XomwLinkHolderItem item = new XomwLinkHolderItem(ttl, capt, Bry_.Ary_empty);
internals.Add(key, item);
}
private static final byte[] Bry__link__bgn = Bry_.new_a7("<!--LINK ");
}
class XomwLinkHolderList {
private int ary_len = 0, ary_max = 128;
private XomwLinkHolderItem[] ary = new XomwLinkHolderItem[128];
public int Len() {return ary_len;}
public void Clear() {
ary_len = 0;
if (ary_max > 128)
ary = new XomwLinkHolderItem[128];
}
public void Add(int key, XomwLinkHolderItem item) {
if (key >= ary_max) {
int new_max = ary_max * 2;
ary = (XomwLinkHolderItem[])Array_.Resize(ary, new_max);
ary_max = new_max;
}
ary[key] = item;
ary_len++;
}
public XomwLinkHolderItem Get_by(int key) {return ary[key];}
}
class XomwLinkHolderItem {
public XomwLinkHolderItem(XomwTitle title, byte[] text, byte[][] query) {
this.title = title;
this.text = text;
this.query = query;
}
public XomwTitle Title() {return title;} private final XomwTitle title;
public byte[] Text() {return text;} private final byte[] text;
public byte[] Pdbk() {return title.getPrefixedDBkey();}
public byte[][] Query() {return query;} private final byte[][] query;
}

View File

@@ -13,3 +13,32 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import org.junit.*; import gplx.core.tests.*;
import gplx.xowa.mediawiki.includes.linkers.*;
public class XomwLinkHolderArrayTest {
private final XomwLinkHolderArrayFxt fxt = new XomwLinkHolderArrayFxt();
@Test public void Replace__basic() {
fxt.Init__add("A", "a");
fxt.Test__replace("a <!--LINK 0--> b", "a <a href='/wiki/A' title='A'>a</a> b");
}
}
class XomwLinkHolderArrayFxt {
private final XomwEnv env;
private final XomwLinkHolderArray holders;
private final XomwParserBfr pbfr = new XomwParserBfr();
private boolean apos = true;
public XomwLinkHolderArrayFxt() {
XomwParser parser = new XomwParser(XomwEnv.NewTest());
this.env = parser.Env();
this.holders = new XomwLinkHolderArray(parser);
}
public void Init__add(String ttl, String capt) {
holders.Test__add(XomwTitle.newFromText(env, Bry_.new_u8(ttl)), Bry_.new_u8(capt));
}
public void Test__replace(String src, String expd) {
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
holders.replace(pbfr.Init(Bry_.new_u8(src)));
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear());
}
}

View File

@@ -13,3 +13,34 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
public class XomwParserBfr { // manages 2 bfrs to eliminate multiple calls to new memory allocations ("return bfr.To_bry_and_clear()")
private final Bry_bfr bfr_1 = Bry_bfr_.New(), bfr_2 = Bry_bfr_.New();
private Bry_bfr src, trg;
public XomwParserBfr() {
this.src = bfr_1;
this.trg = bfr_2;
}
public Bry_bfr Src() {return src;}
public Bry_bfr Trg() {return trg;}
public Bry_bfr Rslt() {return src;}
public XomwParserBfr Init(byte[] text) {
// resize each bfr once by guessing that html_len = text_len * 2
int text_len = text.length;
int html_len = text_len * 2;
src.Resize(html_len);
trg.Resize(html_len);
// clear and add
src.Clear();
trg.Clear();
src.Add(text);
return this;
}
public void Switch() {
Bry_bfr tmp = src;
this.src = trg;
this.trg = tmp;
trg.Clear();
}
}

View File

@@ -13,3 +13,55 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
public class XomwParserBfr_ {
public static void Replace(XomwParserBfr pbfr, byte[] find, byte[] repl) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
if (Replace(bfr, Bool_.N, src, src_bgn, src_end, find, repl) != null)
pbfr.Switch();
}
private static byte[] Replace(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) {
boolean dirty = false;
int cur = src_bgn;
boolean called_by_bry = bfr == null;
while (true) {
int find_bgn = Bry_find_.Find_fwd(src, find, cur);
if (find_bgn == Bry_find_.Not_found) {
if (dirty)
bfr.Add_mid(src, cur, src_end);
break;
}
if (called_by_bry) bfr = Bry_bfr_.New();
bfr.Add_mid(src, cur, find_bgn);
cur += find.length;
dirty = true;
}
if (dirty) {
if (called_by_bry)
return bfr.To_bry_and_clear();
else
return Bry_.Empty;
}
else {
if (called_by_bry) {
if (src_bgn == 0 && src_end == src.length)
return src;
else
return Bry_.Mid(src, src_bgn, src_end);
}
else {
if (lone_bfr)
bfr.Add_mid(src, src_bgn, src_end);
return null;
}
}
}
}

View File

@@ -13,3 +13,18 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.xowa.mediawiki.includes.parsers.lnkis.*;
public class XomwParserCtx {
public XomwTitle Page_title() {return page_title;} private XomwTitle page_title;
public Xomw_image_params Lnki_wkr__make_image__img_params = new Xomw_image_params();
public byte[][] Lnki_wkr__make_image__match_magic_word = new byte[2][];
public int[] Lnki_wkr__make_image__img_size = new int[2];
public Xomw_params_mto Linker__makeImageLink__prms = new Xomw_params_mto();
public void Init_by_page(XomwTitle page_title) {
this.page_title = page_title;
}
public static final int Pos__bos = -1;
}

View File

@@ -13,3 +13,15 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.xowa.mediawiki.includes.xohtml.*;
import gplx.xowa.mediawiki.includes.linkers.*;
public interface XomwParserIface {
int nextLinkID();
XomwParserOptions getOptions();
XomwLinkRenderer getLinkRenderer();
byte[] armorLinks(Bry_bfr trg, byte[] src, int src_bgn, int src_end);
Xomw_atr_mgr getExternalLinkAttribs(Xomw_atr_mgr atrs);
byte[] stripAltText(byte[] caption, XomwLinkHolderArray holders);
}

View File

@@ -13,3 +13,919 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
public class XomwParserOptions {
public XomwParserOptions() {
this.mThumbSize = 220;
}
// /**
// * Interlanguage links are removed and returned in an array
// */
// private $mInterwikiMagic;
//
// /**
// * Allow external images inline?
// */
// private $mAllowExternalImages;
//
// /**
// * If not, any exception?
// */
// private $mAllowExternalImagesFrom;
//
// /**
// * If not or it doesn't match, should we check an on-wiki whitelist?
// */
// private $mEnableImageWhitelist;
//
// /**
// * Date format index
// */
// private $mDateFormat = null;
//
// /**
// * Create "edit section" links?
// */
// private $mEditSection = true;
//
// /**
// * Allow inclusion of special pages?
// */
// private $mAllowSpecialInclusion;
//
// /**
// * Use tidy to cleanup output HTML?
// */
// private $mTidy = false;
//
// /**
// * Which lang to call for PLURAL and GRAMMAR
// */
// private $mInterfaceMessage = false;
//
// /**
// * Overrides $mInterfaceMessage with arbitrary language
// */
// private $mTargetLanguage = null;
//
// /**
// * Maximum size of template expansions, in bytes
// */
// private $mMaxIncludeSize;
//
// /**
// * Maximum number of nodes touched by PPFrame::expand()
// */
// private $mMaxPPNodeCount;
//
// /**
// * Maximum number of nodes generated by Preprocessor::preprocessToObj()
// */
// private $mMaxGeneratedPPNodeCount;
//
// /**
// * Maximum recursion depth in PPFrame::expand()
// */
// private $mMaxPPExpandDepth;
//
// /**
// * Maximum recursion depth for templates within templates
// */
// private $mMaxTemplateDepth;
//
// /**
// * Maximum number of calls per parse to expensive parser functions
// */
// private $mExpensiveParserFunctionLimit;
//
// /**
// * Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS
// */
// private $mRemoveComments = true;
//
// /**
// * @var callable Callback for current revision fetching; first argument to call_user_func().
// */
// private $mCurrentRevisionCallback =
// [ 'Parser', 'statelessFetchRevision' ];
//
// /**
// * @var callable Callback for template fetching; first argument to call_user_func().
// */
// private $mTemplateCallback =
// [ 'Parser', 'statelessFetchTemplate' ];
//
// /**
// * @var callable|null Callback to generate a guess for {{REVISIONID}}
// */
// private $mSpeculativeRevIdCallback;
//
// /**
// * Enable limit report in an HTML comment on output
// */
// private $mEnableLimitReport = false;
//
// /**
// * Timestamp used for {{CURRENTDAY}} etc.
// */
// private $mTimestamp;
//
// /**
// * Target attribute for external links
// */
// private $mExternalLinkTarget;
//
// /**
// * Clean up signature texts?
// * @see Parser::cleanSig
// */
// private $mCleanSignatures;
//
// /**
// * Transform wiki markup when saving the page?
// */
// private $mPreSaveTransform = true;
//
// /**
// * Whether content conversion should be disabled
// */
// private $mDisableContentConversion;
//
// /**
// * Whether title conversion should be disabled
// */
// private $mDisableTitleConversion;
//
// /**
// * Automatically number headings?
// */
// private $mNumberHeadings;
/**
* Thumb size preferred by the user.
*/
private int mThumbSize;
// /**
// * Maximum article size of an article to be marked as "stub"
// */
// private $mStubThreshold;
//
// /**
// * Language Object of the User language.
// */
// private $mUserLang;
//
// /**
// * @var User
// * Stored user Object
// */
// private $mUser;
//
// /**
// * Parsing the page for a "preview" operation?
// */
// private $mIsPreview = false;
//
// /**
// * Parsing the page for a "preview" operation on a single section?
// */
// private $mIsSectionPreview = false;
//
// /**
// * Parsing the printable version of the page?
// */
// private $mIsPrintable = false;
//
// /**
// * Extra key that should be present in the caching key.
// */
// private $mExtraKey = '';
//
// /**
// * Are magic ISBN links enabled?
// */
// private $mMagicISBNLinks = true;
//
// /**
// * Are magic PMID links enabled?
// */
// private $mMagicPMIDLinks = true;
//
// /**
// * Are magic RFC links enabled?
// */
// private $mMagicRFCLinks = true;
//
// /**
// * Function to be called when an option is accessed.
// */
// private $onAccessCallback = null;
//
// /**
// * If the page being parsed is a redirect, this should hold the redirect
// * target.
// * @var Title|null
// */
// private $redirectTarget = null;
//
// public function getInterwikiMagic() {
// return this.mInterwikiMagic;
// }
//
// public function getAllowExternalImages() {
// return this.mAllowExternalImages;
// }
//
// public function getAllowExternalImagesFrom() {
// return this.mAllowExternalImagesFrom;
// }
//
// public function getEnableImageWhitelist() {
// return this.mEnableImageWhitelist;
// }
//
// public function getEditSection() {
// return this.mEditSection;
// }
//
// public function getNumberHeadings() {
// this.optionUsed( 'numberheadings' );
//
// return this.mNumberHeadings;
// }
//
// public function getAllowSpecialInclusion() {
// return this.mAllowSpecialInclusion;
// }
//
// public function getTidy() {
// return this.mTidy;
// }
//
// public function getInterfaceMessage() {
// return this.mInterfaceMessage;
// }
//
// public function getTargetLanguage() {
// return this.mTargetLanguage;
// }
//
// public function getMaxIncludeSize() {
// return this.mMaxIncludeSize;
// }
//
// public function getMaxPPNodeCount() {
// return this.mMaxPPNodeCount;
// }
//
// public function getMaxGeneratedPPNodeCount() {
// return this.mMaxGeneratedPPNodeCount;
// }
//
// public function getMaxPPExpandDepth() {
// return this.mMaxPPExpandDepth;
// }
//
// public function getMaxTemplateDepth() {
// return this.mMaxTemplateDepth;
// }
//
// /* @since 1.20 */
// public function getExpensiveParserFunctionLimit() {
// return this.mExpensiveParserFunctionLimit;
// }
//
// public function getRemoveComments() {
// return this.mRemoveComments;
// }
//
// /* @since 1.24 */
// public function getCurrentRevisionCallback() {
// return this.mCurrentRevisionCallback;
// }
//
// public function getTemplateCallback() {
// return this.mTemplateCallback;
// }
//
// /** @since 1.28 */
// public function getSpeculativeRevIdCallback() {
// return this.mSpeculativeRevIdCallback;
// }
//
// public function getEnableLimitReport() {
// return this.mEnableLimitReport;
// }
//
// public function getCleanSignatures() {
// return this.mCleanSignatures;
// }
//
// public function getExternalLinkTarget() {
// return this.mExternalLinkTarget;
// }
//
// public function getDisableContentConversion() {
// return this.mDisableContentConversion;
// }
//
// public function getDisableTitleConversion() {
// return this.mDisableTitleConversion;
// }
public int getThumbSize() {
// this.optionUsed( 'thumbsize' );
return this.mThumbSize;
}
// public function getStubThreshold() {
// this.optionUsed( 'stubthreshold' );
//
// return this.mStubThreshold;
// }
//
// public function getIsPreview() {
// return this.mIsPreview;
// }
//
// public function getIsSectionPreview() {
// return this.mIsSectionPreview;
// }
//
// public function getIsPrintable() {
// this.optionUsed( 'printable' );
//
// return this.mIsPrintable;
// }
//
// public function getUser() {
// return this.mUser;
// }
//
// public function getPreSaveTransform() {
// return this.mPreSaveTransform;
// }
//
// public function getDateFormat() {
// this.optionUsed( 'dateformat' );
// if ( !isset( this.mDateFormat ) ) {
// this.mDateFormat = this.mUser->getDatePreference();
// }
// return this.mDateFormat;
// }
//
// public function getTimestamp() {
// if ( !isset( this.mTimestamp ) ) {
// this.mTimestamp = wfTimestampNow();
// }
// return this.mTimestamp;
// }
//
// /**
// * Get the user language used by the parser for this page and split the parser cache.
// *
// * @warning: Calling this causes the parser cache to be fragmented by user language!
// * To avoid cache fragmentation, output should not depend on the user language.
// * Use Parser::getFunctionLang() or Parser::getTargetLanguage() instead!
// *
// * @note This function will trigger a cache fragmentation by recording the
// * 'userlang' option, see optionUsed(). This is done to avoid cache pollution
// * when the page is rendered based on the language of the user.
// *
// * @note When saving, this will return the default language instead of the user's.
// * {{int: }} uses this which used to produce inconsistent link tables (bug 14404).
// *
// * @return Language
// * @since 1.19
// */
// public function getUserLangObj() {
// this.optionUsed( 'userlang' );
// return this.mUserLang;
// }
//
// /**
// * Same as getUserLangObj() but returns a String instead.
// *
// * @warning: Calling this causes the parser cache to be fragmented by user language!
// * To avoid cache fragmentation, output should not depend on the user language.
// * Use Parser::getFunctionLang() or Parser::getTargetLanguage() instead!
// *
// * @see getUserLangObj()
// *
// * @return String Language code
// * @since 1.17
// */
// public function getUserLang() {
// return this.getUserLangObj()->getCode();
// }
//
// /**
// * @since 1.28
// * @return boolean
// */
// public function getMagicISBNLinks() {
// return this.mMagicISBNLinks;
// }
//
// /**
// * @since 1.28
// * @return boolean
// */
// public function getMagicPMIDLinks() {
// return this.mMagicPMIDLinks;
// }
// /**
// * @since 1.28
// * @return boolean
// */
// public function getMagicRFCLinks() {
// return this.mMagicRFCLinks;
// }
// public function setInterwikiMagic( $x ) {
// return wfSetVar( this.mInterwikiMagic, $x );
// }
//
// public function setAllowExternalImages( $x ) {
// return wfSetVar( this.mAllowExternalImages, $x );
// }
//
// public function setAllowExternalImagesFrom( $x ) {
// return wfSetVar( this.mAllowExternalImagesFrom, $x );
// }
//
// public function setEnableImageWhitelist( $x ) {
// return wfSetVar( this.mEnableImageWhitelist, $x );
// }
//
// public function setDateFormat( $x ) {
// return wfSetVar( this.mDateFormat, $x );
// }
//
// public function setEditSection( $x ) {
// return wfSetVar( this.mEditSection, $x );
// }
//
// public function setNumberHeadings( $x ) {
// return wfSetVar( this.mNumberHeadings, $x );
// }
//
// public function setAllowSpecialInclusion( $x ) {
// return wfSetVar( this.mAllowSpecialInclusion, $x );
// }
//
// public function setTidy( $x ) {
// return wfSetVar( this.mTidy, $x );
// }
//
// public function setInterfaceMessage( $x ) {
// return wfSetVar( this.mInterfaceMessage, $x );
// }
//
// public function setTargetLanguage( $x ) {
// return wfSetVar( this.mTargetLanguage, $x, true );
// }
//
// public function setMaxIncludeSize( $x ) {
// return wfSetVar( this.mMaxIncludeSize, $x );
// }
//
// public function setMaxPPNodeCount( $x ) {
// return wfSetVar( this.mMaxPPNodeCount, $x );
// }
//
// public function setMaxGeneratedPPNodeCount( $x ) {
// return wfSetVar( this.mMaxGeneratedPPNodeCount, $x );
// }
//
// public function setMaxTemplateDepth( $x ) {
// return wfSetVar( this.mMaxTemplateDepth, $x );
// }
//
// /* @since 1.20 */
// public function setExpensiveParserFunctionLimit( $x ) {
// return wfSetVar( this.mExpensiveParserFunctionLimit, $x );
// }
//
// public function setRemoveComments( $x ) {
// return wfSetVar( this.mRemoveComments, $x );
// }
//
// /* @since 1.24 */
// public function setCurrentRevisionCallback( $x ) {
// return wfSetVar( this.mCurrentRevisionCallback, $x );
// }
//
// /** @since 1.28 */
// public function setSpeculativeRevIdCallback( $x ) {
// return wfSetVar( this.mSpeculativeRevIdCallback, $x );
// }
//
// public function setTemplateCallback( $x ) {
// return wfSetVar( this.mTemplateCallback, $x );
// }
//
// public function enableLimitReport( $x = true ) {
// return wfSetVar( this.mEnableLimitReport, $x );
// }
//
// public function setTimestamp( $x ) {
// return wfSetVar( this.mTimestamp, $x );
// }
//
// public function setCleanSignatures( $x ) {
// return wfSetVar( this.mCleanSignatures, $x );
// }
//
// public function setExternalLinkTarget( $x ) {
// return wfSetVar( this.mExternalLinkTarget, $x );
// }
//
// public function disableContentConversion( $x = true ) {
// return wfSetVar( this.mDisableContentConversion, $x );
// }
//
// public function disableTitleConversion( $x = true ) {
// return wfSetVar( this.mDisableTitleConversion, $x );
// }
//
// public function setUserLang( $x ) {
// if ( is_string( $x ) ) {
// $x = Language::factory( $x );
// }
//
// return wfSetVar( this.mUserLang, $x );
// }
//
// public function setThumbSize( $x ) {
// return wfSetVar( this.mThumbSize, $x );
// }
//
// public function setStubThreshold( $x ) {
// return wfSetVar( this.mStubThreshold, $x );
// }
//
// public function setPreSaveTransform( $x ) {
// return wfSetVar( this.mPreSaveTransform, $x );
// }
//
// public function setIsPreview( $x ) {
// return wfSetVar( this.mIsPreview, $x );
// }
//
// public function setIsSectionPreview( $x ) {
// return wfSetVar( this.mIsSectionPreview, $x );
// }
//
// public function setIsPrintable( $x ) {
// return wfSetVar( this.mIsPrintable, $x );
// }
//
// /**
// * Set the redirect target.
// *
// * Note that setting or changing this does not *make* the page a redirect
// * or change its target, it merely records the information for reference
// * during the parse.
// *
// * @since 1.24
// * @param Title|null $title
// */
// function setRedirectTarget( $title ) {
// this.redirectTarget = $title;
// }
//
// /**
// * Get the previously-set redirect target.
// *
// * @since 1.24
// * @return Title|null
// */
// function getRedirectTarget() {
// return this.redirectTarget;
// }
//
// /**
// * Extra key that should be present in the parser cache key.
// * @param String $key
// */
// public function addExtraKey( $key ) {
// this.mExtraKey .= '!' . $key;
// }
//
// /**
// * Constructor
// * @param User $user
// * @param Language $lang
// */
// public function __construct( $user = null, $lang = null ) {
// if ( $user === null ) {
// global $wgUser;
// if ( $wgUser === null ) {
// $user = new User;
// } else {
// $user = $wgUser;
// }
// }
// if ( $lang === null ) {
// global $wgLang;
// if ( !StubObject::isRealObject( $wgLang ) ) {
// $wgLang->_unstub();
// }
// $lang = $wgLang;
// }
// this.initialiseFromUser( $user, $lang );
// }
//
// /**
// * Get a ParserOptions Object for an anonymous user
// * @since 1.27
// * @return ParserOptions
// */
// public static function newFromAnon() {
// global $wgContLang;
// return new ParserOptions( new User, $wgContLang );
// }
//
// /**
// * Get a ParserOptions Object from a given user.
// * Language will be taken from $wgLang.
// *
// * @param User $user
// * @return ParserOptions
// */
// public static function newFromUser( $user ) {
// return new ParserOptions( $user );
// }
//
// /**
// * Get a ParserOptions Object from a given user and language
// *
// * @param User $user
// * @param Language $lang
// * @return ParserOptions
// */
// public static function newFromUserAndLang( User $user, Language $lang ) {
// return new ParserOptions( $user, $lang );
// }
//
// /**
// * Get a ParserOptions Object from a IContextSource Object
// *
// * @param IContextSource $context
// * @return ParserOptions
// */
// public static function newFromContext( IContextSource $context ) {
// return new ParserOptions( $context->getUser(), $context->getLanguage() );
// }
//
// /**
// * Get user options
// *
// * @param User $user
// * @param Language $lang
// */
// private function initialiseFromUser( $user, $lang ) {
// global $wgInterwikiMagic, $wgAllowExternalImages,
// $wgAllowExternalImagesFrom, $wgEnableImageWhitelist, $wgAllowSpecialInclusion,
// $wgMaxArticleSize, $wgMaxPPNodeCount, $wgMaxTemplateDepth, $wgMaxPPExpandDepth,
// $wgCleanSignatures, $wgExternalLinkTarget, $wgExpensiveParserFunctionLimit,
// $wgMaxGeneratedPPNodeCount, $wgDisableLangConversion, $wgDisableTitleConversion,
// $wgEnableMagicLinks;
//
// // *UPDATE* ParserOptions::matches() if any of this changes as needed
// this.mInterwikiMagic = $wgInterwikiMagic;
// this.mAllowExternalImages = $wgAllowExternalImages;
// this.mAllowExternalImagesFrom = $wgAllowExternalImagesFrom;
// this.mEnableImageWhitelist = $wgEnableImageWhitelist;
// this.mAllowSpecialInclusion = $wgAllowSpecialInclusion;
// this.mMaxIncludeSize = $wgMaxArticleSize * 1024;
// this.mMaxPPNodeCount = $wgMaxPPNodeCount;
// this.mMaxGeneratedPPNodeCount = $wgMaxGeneratedPPNodeCount;
// this.mMaxPPExpandDepth = $wgMaxPPExpandDepth;
// this.mMaxTemplateDepth = $wgMaxTemplateDepth;
// this.mExpensiveParserFunctionLimit = $wgExpensiveParserFunctionLimit;
// this.mCleanSignatures = $wgCleanSignatures;
// this.mExternalLinkTarget = $wgExternalLinkTarget;
// this.mDisableContentConversion = $wgDisableLangConversion;
// this.mDisableTitleConversion = $wgDisableLangConversion || $wgDisableTitleConversion;
// this.mMagicISBNLinks = $wgEnableMagicLinks['ISBN'];
// this.mMagicPMIDLinks = $wgEnableMagicLinks['PMID'];
// this.mMagicRFCLinks = $wgEnableMagicLinks['RFC'];
//
// this.mUser = $user;
// this.mNumberHeadings = $user->getOption( 'numberheadings' );
// this.mThumbSize = $user->getOption( 'thumbsize' );
// this.mStubThreshold = $user->getStubThreshold();
// this.mUserLang = $lang;
// }
//
// /**
// * Check if these options match that of another options set
// *
// * This ignores report limit settings that only affect HTML comments
// *
// * @param ParserOptions $other
// * @return boolean
// * @since 1.25
// */
// public function matches( ParserOptions $other ) {
// $fields = array_keys( get_class_vars( __CLASS__ ) );
// $fields = array_diff( $fields, [
// 'mEnableLimitReport', // only effects HTML comments
// 'onAccessCallback', // only used for ParserOutput option tracking
// ] );
// foreach ( $fields as $field ) {
// if ( !is_object( this.$field ) && this.$field !== $other->$field ) {
// return false;
// }
// }
// // Check the Object and lazy-loaded options
// return (
// this.mUserLang->equals( $other->mUserLang ) &&
// this.getDateFormat() === $other->getDateFormat()
// );
// }
//
// /**
// * Registers a callback for tracking which ParserOptions which are used.
// * This is a private API with the parser.
// * @param callable $callback
// */
// public function registerWatcher( $callback ) {
// this.onAccessCallback = $callback;
// }
//
// /**
// * Called when an option is accessed.
// * Calls the watcher that was set using registerWatcher().
// * Typically, the watcher callback is ParserOutput::registerOption().
// * The information registered that way will be used by ParserCache::save().
// *
// * @param String $optionName Name of the option
// */
// public function optionUsed( $optionName ) {
// if ( this.onAccessCallback ) {
// call_user_func( this.onAccessCallback, $optionName );
// }
// }
//
// /**
// * Returns the full array of options that would have been used by
// * in 1.16.
// * Used to get the old parser cache entries when available.
// * @return array
// */
// public static function legacyOptions() {
// return [
// 'stubthreshold',
// 'numberheadings',
// 'userlang',
// 'thumbsize',
// 'editsection',
// 'printable'
// ];
// }
//
// /**
// * Generate a hash String with the values set on these ParserOptions
// * for the keys given in the array.
// * This will be used as part of the hash key for the parser cache,
// * so users sharing the options with vary for the same page share
// * the same cached data safely.
// *
// * Extensions which require it should install 'PageRenderingHash' hook,
// * which will give them a chance to modify this key based on their own
// * settings.
// *
// * @since 1.17
// * @param array $forOptions
// * @param Title $title Used to get the content language of the page (since r97636)
// * @return String Page rendering hash
// */
// public function optionsHash( $forOptions, $title = null ) {
// global $wgRenderHashAppend;
//
// // FIXME: Once the cache key is reorganized this argument
// // can be dropped. It was used when the math extension was
// // part of core.
// $confstr = '*';
//
// // Space assigned for the stubthreshold but unused
// // since it disables the parser cache, its value will always
// // be 0 when this function is called by parsercache.
// if ( in_array( 'stubthreshold', $forOptions ) ) {
// $confstr .= '!' . this.mStubThreshold;
// } else {
// $confstr .= '!*';
// }
//
// if ( in_array( 'dateformat', $forOptions ) ) {
// $confstr .= '!' . this.getDateFormat();
// }
//
// if ( in_array( 'numberheadings', $forOptions ) ) {
// $confstr .= '!' . ( this.mNumberHeadings ? '1' : '' );
// } else {
// $confstr .= '!*';
// }
//
// if ( in_array( 'userlang', $forOptions ) ) {
// $confstr .= '!' . this.mUserLang->getCode();
// } else {
// $confstr .= '!*';
// }
//
// if ( in_array( 'thumbsize', $forOptions ) ) {
// $confstr .= '!' . this.mThumbSize;
// } else {
// $confstr .= '!*';
// }
//
// // add in language specific options, if any
// // @todo FIXME: This is just a way of retrieving the url/user preferred variant
// if ( !is_null( $title ) ) {
// $confstr .= $title->getPageLanguage()->getExtraHashOptions();
// } else {
// global $wgContLang;
// $confstr .= $wgContLang->getExtraHashOptions();
// }
//
// $confstr .= $wgRenderHashAppend;
//
// // @note: as of Feb 2015, core never sets the editsection flag, since it uses
// // <mw:editsection> tags to inject editsections on the fly. However, extensions
// // may be using it by calling ParserOption::optionUsed resp. ParserOutput::registerOption
// // directly. At least Wikibase does at this point in time.
// if ( !in_array( 'editsection', $forOptions ) ) {
// $confstr .= '!*';
// } elseif ( !this.mEditSection ) {
// $confstr .= '!edit=0';
// }
//
// if ( this.mIsPrintable && in_array( 'printable', $forOptions ) ) {
// $confstr .= '!printable=1';
// }
//
// if ( this.mExtraKey != '' ) {
// $confstr .= this.mExtraKey;
// }
//
// // Give a chance for extensions to modify the hash, if they have
// // extra options or other effects on the parser cache.
// Hooks::run( 'PageRenderingHash', [ &$confstr, this.getUser(), &$forOptions ] );
//
// // Make it a valid memcached key fragment
// $confstr = str_replace( ' ', '_', $confstr );
//
// return $confstr;
// }
//
// /**
// * Sets a hook to force that a page exists, and sets a current revision callback to return
// * a revision with custom content when the current revision of the page is requested.
// *
// * @since 1.25
// * @param Title $title
// * @param Content $content
// * @param User $user The user that the fake revision is attributed to
// * @return ScopedCallback to unset the hook
// */
// public function setupFakeRevision( $title, $content, $user ) {
// $oldCallback = this.setCurrentRevisionCallback(
// function (
// $titleToCheck, $parser = false ) use ( $title, $content, $user, &$oldCallback
// ) {
// if ( $titleToCheck->equals( $title ) ) {
// return new Revision( [
// 'page' => $title->getArticleID(),
// 'user_text' => $user->getName(),
// 'user' => $user->getId(),
// 'parent_id' => $title->getLatestRevID(),
// 'title' => $title,
// 'content' => $content
// ] );
// } else {
// return call_user_func( $oldCallback, $titleToCheck, $parser );
// }
// }
// );
//
// global $wgHooks;
// $wgHooks['TitleExists'][] =
// function ( $titleToCheck, &$exists ) use ( $title ) {
// if ( $titleToCheck->equals( $title ) ) {
// $exists = true;
// }
// };
// end( $wgHooks['TitleExists'] );
// $key = key( $wgHooks['TitleExists'] );
// LinkCache::singleton()->clearBadLink( $title->getPrefixedDBkey() );
// return new ScopedCallback( function () use ( $title, $key ) {
// global $wgHooks;
// unset( $wgHooks['TitleExists'][$key] );
// LinkCache::singleton()->clearLink( $title );
// } );
// }
}

View File

@@ -13,3 +13,66 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import org.junit.*;
public class XomwParserTest {
private final XomwParserFxt fxt = new XomwParserFxt();
@Test public void Basic() {
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "== heading_1 =="
, "para_1"
, "== heading_2 =="
, "para_2"
, "-----"
, "{|"
, "|-"
, "|a"
, "|}"
, "''italics''"
, "__TOC__"
, "[https://a.org b]"
, "[[A|abc]]"
, "https://c.org"
, "a »b« &#160;!important c"
), String_.Concat_lines_nl_skip_last
( "<h2> heading_1 </h2>"
, "<p>para_1"
, "</p>"
, "<h2> heading_2 </h2>"
, "<p>para_2"
, "</p>"
, "<hr />"
, "<table>"
, ""
, "<tr>"
, "<td>a"
, "</td></tr></table>"
, "<p><i>italics</i>"
, "<!--MWTOC-->"
, "<a rel=\"nofollow\" class=\"external text\" href=\"https://a.org\">b</a>"
, "<a href=\"/wiki/A\" title=\"A\">abc</a>"
, "<a rel=\"nofollow\" class=\"external free\" href=\"https://c.org\">https://c.org</a>"
, "a&#160;»b«&#160; !important c"
, "</p>"
));
}
}
class XomwParserFxt {
private final XomwParser parser;
private final XomwParserCtx pctx = new XomwParserCtx();
private final XomwParserBfr pbfr = new XomwParserBfr();
public XomwParserFxt() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
this.parser = new XomwParser(XomwEnv.NewTestByApp(app));
parser.Init_by_wiki(wiki);
parser.Init_by_page(XomwTitle.newFromText(parser.Env(), Bry_.new_a7("Page_1")));
pctx.Init_by_page(XomwTitle.newFromText(parser.Env(), Bry_.new_a7("Page_1")));
}
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
parser.internalParse(pbfr, pctx, src_bry);
parser.internalParseHalfParsed(pbfr, pctx, true, true);
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
}

View File

@@ -13,3 +13,334 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.core.btries.*;
public class XomwStripState {
// protected $prefix;
// protected $data;
// protected $regex;
//
// protected $tempType, $tempMergePrefix;
// protected $circularRefGuard;
// protected $recursionLevel = 0;
//
// static final UNSTRIP_RECURSION_LIMIT = 20;
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private final Btrie_rv trv = new Btrie_rv();
private final Bry_bfr tmp_1 = Bry_bfr_.New();
private final Bry_bfr tmp_2 = Bry_bfr_.New();
private boolean tmp_2_used = false;
private int generalLen, nowikiLen;
// /**
// * @param String|null $prefix
// * @since 1.26 The prefix argument should be omitted, as the strip marker
// * prefix String is now a constant.
// */
// public function __construct($prefix = null) {
// if ($prefix !== null) {
// wfDeprecated(__METHOD__ . ' with called with $prefix argument' .
// ' (call with no arguments instead)', '1.26');
// }
// this.data = [
// 'nowiki' => [],
// 'general' => []
// ];
// this.regex = '/' . Parser::MARKER_PREFIX . "([^\x7f<>&'\"]+)" . Parser::MARKER_SUFFIX . '/';
// this.circularRefGuard = [];
// }
// public void Clear() {
// trie.Clear();
// generalLen = nowikiLen = 0;
// tmp_2_used = false;
// }
/**
* Add a nowiki strip item
* @param String $marker
* @param String $value
*/
public void addNoWiki(byte[] marker, byte[] val) {
this.addItem(TYPE_NOWIKI, marker, val);
}
/**
* @param String $marker
* @param String $value
*/
public void addGeneral(byte[] marker, byte[] val) {
this.addItem(TYPE_GENERAL, marker, val);
}
/**
* @throws MWException
* @param String $type
* @param String $marker
* @param String $value
*/
public void addItem(byte type, byte[] marker, byte[] val) {
// if (!preg_match(this.regex, $marker, $m)) {
// throw new MWException("Invalid marker: $marker");
// }
// XO.MW:ported
// this.data[$type][$m[1]] = $value;
trie.Add_obj(marker, new XomwStripItem(type, marker, val));
if (type == TYPE_GENERAL)
generalLen++;
else
nowikiLen++;
}
/**
* @param String $text
* @return mixed
*/
public byte[] unstripGeneral(byte[] text) {
return this.unstripType(TYPE_GENERAL, text);
}
/**
* @param String $text
* @return mixed
*/
public byte[] unstripNoWiki(byte[] text) {
return this.unstripType(TYPE_NOWIKI, text);
}
/**
* @param String $text
* @return mixed
*/
public byte[] unstripBoth(byte[] text) {
// $text = this.unstripType('general', $text);
// $text = this.unstripType('nowiki', $text);
return this.unstripType(TYPE_BOTH, text);
}
public byte[] unstripType(byte tid, byte[] text) {
boolean dirty = unstripType(tid, tmp_1, text, 0, text.length);
return dirty ? tmp_1.To_bry_and_clear() : text;
}
// XOWA
public void unstripGeneral(XomwParserBfr pbfr) {unstripType(TYPE_GENERAL, pbfr);}
public void unstripNoWiki(XomwParserBfr pbfr) {unstripType(TYPE_NOWIKI , pbfr);}
public void unstripBoth(XomwParserBfr pbfr) {unstripType(TYPE_BOTH , pbfr);}
private boolean unstripType(byte tid, XomwParserBfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
boolean dirty = unstripType(tid, pbfr.Trg(), src, 0, src_bfr.Len());
if (dirty)
pbfr.Switch();
return dirty;
}
/**
* @param String $type
* @param String $text
* @return mixed
*/
private boolean unstripType(byte tid, Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
// // Shortcut
// if (!count(this.data[$type])) {
// return $text;
// }
// exit early if no items for type
if ((tid & TYPE_GENERAL) == TYPE_GENERAL) {
if (generalLen == 0)
return false;
}
else if ((tid & TYPE_NOWIKI) == TYPE_NOWIKI) {
if (nowikiLen == 0)
return false;
}
// XO.MW:PORTED
// $oldType = this.tempType;
// this.tempType = $type;
// $text = preg_replace_callback(this.regex, [ $this, 'unstripCallback' ], $text);
// this.tempType = $oldType;
// return $text;
int cur = src_bgn;
int prv = cur;
boolean dirty = false;
// loop over each src char
while (true) {
// EOS: exit
if (cur == src_end) {
if (dirty) // add remainder if dirty
trg.Add_mid(src, prv, src_end);
break;
}
// check if current pos matches strip state
Object o = trie.Match_at(trv, src, cur, src_end);
if (o != null) { // match
XomwStripItem item = (XomwStripItem)o;
byte item_tid = item.Type();
if ((tid & item_tid) == item_tid) { // check if types match
// get bfr for recursion
Bry_bfr nested_bfr = null;
boolean tmp_2_release = false;
if (tmp_2_used) {
nested_bfr = Bry_bfr_.New();
}
else {
nested_bfr = tmp_2;
tmp_2_used = true;
tmp_2_release = true;
}
// recurse
byte[] item_val = item.Val();
if (unstripType(tid, nested_bfr, item_val, 0, item_val.length))
item_val = nested_bfr.To_bry_and_clear();
if (tmp_2_release)
tmp_2_used = false;
// add to trg
trg.Add_mid(src, prv, cur);
trg.Add(item_val);
// update vars
dirty = true;
cur += item.Key().length;
prv = cur;
continue;
}
}
cur++;
}
return dirty;
}
// /**
// * @param array $m
// * @return array
// */
// protected function unstripCallback($m) {
// $marker = $m[1];
// if (isset(this.data[this.tempType][$marker])) {
// if (isset(this.circularRefGuard[$marker])) {
// return '<span class="error">'
// . wfMessage('parser-unstrip-loop-warning')->inContentLanguage()->text()
// . '</span>';
// }
// if (this.recursionLevel >= self::UNSTRIP_RECURSION_LIMIT) {
// return '<span class="error">' .
// wfMessage('parser-unstrip-recursion-limit')
// ->numParams(self::UNSTRIP_RECURSION_LIMIT)->inContentLanguage()->text() .
// '</span>';
// }
// this.circularRefGuard[$marker] = true;
// this.recursionLevel++;
// $value = this.data[this.tempType][$marker];
// if ($value instanceof Closure) {
// $value = $value();
// }
// $ret = this.unstripType(this.tempType, $value);
// this.recursionLevel--;
// unset(this.circularRefGuard[$marker]);
// return $ret;
// } else {
// return $m[0];
// }
// }
// /**
// * Get a StripState Object which is sufficient to unstrip the given text.
// * It will contain the minimum subset of strip items necessary.
// *
// * @param String $text
// *
// * @return StripState
// */
// public function getSubState($text) {
// $subState = new StripState();
// $pos = 0;
// while (true) {
// $startPos = strpos($text, Parser::MARKER_PREFIX, $pos);
// $endPos = strpos($text, Parser::MARKER_SUFFIX, $pos);
// if ($startPos === false || $endPos === false) {
// break;
// }
//
// $endPos += strlen(Parser::MARKER_SUFFIX);
// $marker = substr($text, $startPos, $endPos - $startPos);
// if (!preg_match(this.regex, $marker, $m)) {
// continue;
// }
//
// $key = $m[1];
// if (isset(this.data['nowiki'][$key])) {
// $subState->data['nowiki'][$key] = this.data['nowiki'][$key];
// } elseif (isset(this.data['general'][$key])) {
// $subState->data['general'][$key] = this.data['general'][$key];
// }
// $pos = $endPos;
// }
// return $subState;
// }
//
// /**
// * Merge another StripState Object into this one. The strip marker keys
// * will not be preserved. The strings in the $texts array will have their
// * strip markers rewritten, the resulting array of strings will be returned.
// *
// * @param StripState $otherState
// * @param array $texts
// * @return array
// */
// public function merge($otherState, $texts) {
// $mergePrefix = wfRandomString(16);
//
// foreach ($otherState->data as $type => $items) {
// foreach ($items as $key => $value) {
// this.data[$type]["$mergePrefix-$key"] = $value;
// }
// }
//
// this.tempMergePrefix = $mergePrefix;
// $texts = preg_replace_callback($otherState->regex, [ $this, 'mergeCallback' ], $texts);
// this.tempMergePrefix = null;
// return $texts;
// }
//
// /**
// * @param array $m
// * @return String
// */
// protected function mergeCallback($m) {
// $key = $m[1];
// return Parser::MARKER_PREFIX . this.tempMergePrefix . '-' . $key . Parser::MARKER_SUFFIX;
// }
//
// /**
// * Remove any strip markers found in the given text.
// *
// * @param String $text Input String
// * @return String
// */
// public function killMarkers($text) {
// return preg_replace(this.regex, '', $text);
// }
// public static final String Str__marker_bgn = "\u007f'\"`UNIQ-";
// public static final byte[]
// Bry__marker__bgn = Bry_.new_a7(Str__marker_bgn)
// , Bry__marker__end = Bry_.new_a7("-QINU`\"'\u007f")
// ;
public static final byte TYPE_GENERAL = 1, TYPE_NOWIKI = 2, TYPE_BOTH = 3;
}
class XomwStripItem {
public XomwStripItem(byte tid, byte[] key, byte[] val) {
this.tid = tid;
this.key = key;
this.val = val;
}
public byte Type() {return tid;} private final byte tid;
public byte[] Key() {return key;} private final byte[] key;
public byte[] Val() {return val;} private final byte[] val;
}

View File

@@ -13,3 +13,30 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import org.junit.*; import gplx.core.tests.*;
public class XomwStripStateTest {
private final XomwStripStateFxt fxt = new XomwStripStateFxt();
@Test public void Basic() {
fxt.Init__add (XomwStripState.TYPE_GENERAL, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
fxt.Test__nostrip(XomwStripState.TYPE_NOWIKI , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b");
fxt.Test__unstrip(XomwStripState.TYPE_GENERAL, "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
fxt.Test__unstrip(XomwStripState.TYPE_BOTH , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
}
@Test public void Recurse() {
fxt.Init__add (XomwStripState.TYPE_GENERAL, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
fxt.Init__add (XomwStripState.TYPE_GENERAL, "\u007f'\"`UNIQ-key-2-QINU`\"'\u007f", "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f");
fxt.Test__unstrip(XomwStripState.TYPE_GENERAL, "a \u007f'\"`UNIQ-key-2-QINU`\"'\u007f b", "a val-1 b");
}
}
class XomwStripStateFxt {
private final XomwStripState stripState = new XomwStripState();
public void Init__add(byte tid, String marker, String val) {
stripState.addItem(tid, Bry_.new_u8(marker), Bry_.new_u8(val));
}
public void Test__nostrip(byte tid, String src) {Test__unstrip(tid, src, src);}
public void Test__unstrip(byte tid, String src, String expd) {
byte[] actl = stripState.unstripType(tid, Bry_.new_u8(src));
Gftest.Eq__str(expd, String_.new_u8(actl));
}
}

View File

@@ -13,3 +13,13 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
public class Xomw_output_type {
public static final byte
Tid__html = 1 // like parse()
, Tid__wiki = 2 // like preSaveTransform()
, Tid__preprocess = 3 // like preprocess()
, Tid__msg = 3
, Tid__plain = 4 // like extractSections() - portions of the original are returned unchanged.
;
}

View File

@@ -13,3 +13,31 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.core.btries.*;
public class Xomw_regex_ {
public static int Find_fwd_while(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
int cur = src_bgn;
while (cur < src_end) {
byte b = src[cur];
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null)
break;
else
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
}
return cur;
}
public static int Find_fwd_until(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
int cur = src_bgn;
while (cur < src_end) {
byte b = src[cur];
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null)
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
else
break;
}
return cur;
}
}

View File

@@ -13,3 +13,25 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.core.btries.*;
public class Xomw_regex_boundary { // THREAD.SAFE: trv is only for consistent interface
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private final Btrie_rv trv = new Btrie_rv();
public Xomw_regex_boundary(Xomw_regex_space space) {
// naive implementation of is_boundary; ignore all ws and underscore
byte[][] ary = space.Ws();
for (byte[] bry : ary)
trie.Add_bry_byte(bry, Byte_.Zero);
ary = space.Zs();
for (byte[] bry : ary)
trie.Add_bry_byte(bry, Byte_.Zero);
}
public boolean Is_boundary_prv(byte[] src, int pos) {
if (pos == 0) return true; // BOS is true
int bgn = gplx.core.intls.Utf8_.Get_prv_char_pos0(src, pos);
byte b = src[bgn];
Object o = trie.Match_at_w_b0(trv, b, src, bgn, pos);
return o != null;
}
}

View File

@@ -13,3 +13,87 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
public class Xomw_regex_parser {
private Bry_bfr tmp;
public byte[][] Rslt() {return rslt;} private byte[][] rslt;
public Xomw_regex_parser Add_ary(String... ary) {return Set_or_add(Parse_ary(ary));}
private byte[][] Parse_ary(String... ary) {
if (tmp == null) tmp = Bry_bfr_.New();
int ary_len = ary.length;
byte[][] rv = new byte[ary_len][];
for (int i = 0; i < ary_len; i++) {
rv[i] = Compile_itm(tmp, Bry_.new_u8(ary[i]));
}
return rv;
}
public Xomw_regex_parser Add_rng(String bgn, String end) {return Set_or_add(Parse_rng(bgn, end));}
private byte[][] Parse_rng(String bgn, String end) {
if (tmp == null) tmp = Bry_bfr_.New();
byte[] bgn_bry = Compile_itm(tmp, Bry_.new_u8(bgn));
int bgn_val = gplx.core.intls.Utf16_.Decode_to_int(bgn_bry, 0);
byte[] end_bry = Compile_itm(tmp, Bry_.new_u8(end));
int end_val = gplx.core.intls.Utf16_.Decode_to_int(end_bry, 0);
int rv_len = end_val - bgn_val + 1;
byte[][] rv = new byte[rv_len][];
for (int i = 0; i < rv_len; i++) {
rv[i] = gplx.core.intls.Utf16_.Encode_int_to_bry(i + bgn_val);
}
return rv;
}
private Xomw_regex_parser Set_or_add(byte[][] val) {
rslt = rslt == null ? val : Bry_.Ary_add(rslt, val);
return this;
}
private static byte[] Compile_itm(Bry_bfr tmp, byte[] src) {
// parse each itm
int src_end = src.length;
int cur = 0;
int prv = cur;
boolean dirty = false;
while (true) {
// eos
if (cur == src_end) {
if (dirty)
tmp.Add_mid(src, prv, src_end);
break;
}
// look at byte
byte b = src[cur];
switch (b) { // escape
case Byte_ascii.Backslash:
int nxt = cur + 1;
if (nxt >= src_end) throw Err_.new_wo_type("regex escape failed: no more chars left", "src", src, "pos", nxt);
byte nxt_byte = src[nxt];
switch (nxt_byte) {
case Byte_ascii.Ltr_s: // \s -> " "
src = Byte_ascii.Space_bry;
cur = src_end;
break;
case Byte_ascii.Ltr_x: // \ u -> utf8 sequence in hex-dec; EX: "\xc2\xad" -> new byte[] {194, 160}
// read next two bytes
dirty = true;
nxt++;
if (nxt + 2 > src_end) throw Err_.new_wo_type("utf8 escape failed: no more chars left", "src", src, "pos", nxt);
tmp.Add_byte((byte)gplx.core.encoders.Hex_utl_.Parse_or(src, nxt, nxt + 2, -1));
cur = nxt + 2;
prv = cur;
break;
default:
throw Err_.new_wo_type("regex escape failed: unknown char", "src", src, "pos", nxt);
}
break;
default: // handles ascii only
if (b > 127)
throw Err_.new_wo_type("regex compiled failed: unknown char", "src", src, "pos", cur);
cur++;
break;
}
}
// set item
return dirty ? tmp.To_bry_and_clear() : src;
}
}

View File

@@ -13,3 +13,28 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import org.junit.*; import gplx.core.tests.*;
public class Xomw_regex_parser__tst {
private final Xomw_regex_parser__fxt fxt = new Xomw_regex_parser__fxt();
@Test public void Ary__space() {
fxt.Test__parse_ary(String_.Ary("\\s"), String_.Ary(" "));
}
@Test public void Ary__utf8() {
fxt.Test__parse_ary(String_.Ary("\\xc2\\xa7", "\\xe0\\xb9\\x90"), String_.Ary("§", ""));
}
@Test public void Rng__ascii() {
fxt.Test__parse_rng("a", "c", String_.Ary("a", "b", "c"));
}
}
class Xomw_regex_parser__fxt {
private final Xomw_regex_parser parser = new Xomw_regex_parser();
public void Test__parse_ary(String[] ary, String[] expd) {
parser.Add_ary(ary);
Gftest.Eq__ary(expd, String_.Ary(parser.Rslt()));
}
public void Test__parse_rng(String bgn, String end, String[] expd) {
parser.Add_rng("a", "c");
Gftest.Eq__ary(expd, String_.Ary(parser.Rslt()));
}
}

View File

@@ -13,3 +13,50 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.core.btries.*;
public class Xomw_regex_space {
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
public Xomw_regex_space() {
byte[] space = Bry_.New_by_ints(32);
ws = new byte[][]
{ space
, Bry_.New_by_ints(9)
, Bry_.New_by_ints(10)
, Bry_.New_by_ints(13)
};
// Zs; REF:http://www.fileformat.info/info/unicode/category/Zs/list.htm
zs = new byte[][]
{ space
, Bry_.New_by_ints(194, 160)
, Bry_.New_by_ints(225, 154, 128)
, Bry_.New_by_ints(226, 128, 129)
, Bry_.New_by_ints(226, 128, 130)
, Bry_.New_by_ints(226, 128, 131)
, Bry_.New_by_ints(226, 128, 132)
, Bry_.New_by_ints(226, 128, 133)
, Bry_.New_by_ints(226, 128, 134)
, Bry_.New_by_ints(226, 128, 135)
, Bry_.New_by_ints(226, 128, 136)
, Bry_.New_by_ints(226, 128, 137)
, Bry_.New_by_ints(226, 128, 138)
, Bry_.New_by_ints(226, 128, 175)
, Bry_.New_by_ints(226, 129, 159)
, Bry_.New_by_ints(227, 128, 128)
};
byte[][] ary = ws;
for (byte[] bry : ary) {
trie.Add_bry_byte(bry, Byte_.Zero);
}
ary = zs;
for (byte[] bry : ary) {
trie.Add_bry_byte(bry, Byte_.Zero);
}
}
public byte[][] Ws() {return ws;} private byte[][] ws;
public byte[][] Zs() {return zs;} private byte[][] zs;
public int Find_fwd_while(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
return Xomw_regex_.Find_fwd_while(trie, trv, src, src_bgn, src_end);
}
}

View File

@@ -13,3 +13,26 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.core.btries.*;
public class Xomw_regex_url {
private final Btrie_slim_mgr trie;
public Xomw_regex_url(Xomw_regex_space regex_space) {
// [^][<>"\\x00-\\x20\\x7F\|]
// REGEX:[^][<>"\\x00-\\x20\\x7F\p{Zs}]; NOTE: val is just a marker
this.trie = Btrie_slim_mgr.cs();
trie.Add_str_byte__many(Byte_.Zero, "[", "]", "<", ">", "\"");
for (byte i = 0; i < 33; i++) {
trie.Add_bry_byte(new byte[] {i}, Byte_.Zero);
}
trie.Add_bry_byte(Bry_.New_by_ints(127), Byte_.Zero); // x7F
byte[][] zs_ary = regex_space.Zs();
for (byte[] zs : zs_ary) {
trie.Add_bry_byte(zs, Byte_.Zero);
}
}
public int Find_fwd_while(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
return Xomw_regex_.Find_fwd_until(trie, trv, src, src_bgn, src_end);
}
}

View File

@@ -13,3 +13,42 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
public class Xomw_doubleunder_data {
// XO.MW: MW stores these as mDoubleUnderscores in Parser
public boolean toc;
public boolean no_toc;
public boolean force_toc;
public boolean no_gallery;
public boolean force_gallery;
public boolean no_title_convert;
public boolean no_content_convert;
public boolean no_edit_section;
public boolean new_section_link;
public boolean static_redirect;
public boolean hidden_cat;
public boolean index;
public boolean no_index;
// XO.MW: MW stores these as member variables in Parser
public boolean show_toc;
public boolean force_toc_position;
public void Reset() {
toc = no_toc = force_toc =
no_gallery = force_gallery =
no_title_convert = no_content_convert =
no_edit_section = new_section_link =
static_redirect =
hidden_cat = index = no_index =
false;
show_toc = force_toc_position = false;
}
}

View File

@@ -13,3 +13,190 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
import gplx.core.btries.*;
import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*;
public class Xomw_doubleunder_wkr {
private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_u8();
private final Btrie_rv trv = new Btrie_rv();
private Xomw_doubleunder_data data;
public void Init_by_wiki(Xomw_doubleunder_data data, Xol_lang_itm lang) {
this.data = data;
Reg(trie, lang.Kwd_mgr()
, Xol_kwd_grp_.Id_notoc
, Xol_kwd_grp_.Id_nogallery
, Xol_kwd_grp_.Id_forcetoc
, Xol_kwd_grp_.Id_toc
, Xol_kwd_grp_.Id_noeditsection
, Xol_kwd_grp_.Id_newsectionlink
, Xol_kwd_grp_.Id_hiddencat
, Xol_kwd_grp_.Id_index
, Xol_kwd_grp_.Id_noindex
, Xol_kwd_grp_.Id_staticredirect
, Xol_kwd_grp_.Id_notitleconvert
, Xol_kwd_grp_.Id_nocontentconvert
);
}
public void doDoubleUnderscore(XomwParserCtx pctx, XomwParserBfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
data.Reset();
// XO.MW: MW does TOC before others; XO does it at the same time
// Now match and remove the rest of them
// XO.MW.BGN: $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
int cur = src_bgn;
int prv = cur;
boolean dirty = false;
while (true) {
// reached end; stop
if (cur == src_end) {
if (dirty) {
bfr.Add_mid(src, prv, src_end);
}
break;
}
// no match; keep searching
byte b = src[cur];
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null) {
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
continue;
}
// if cs, ensure exact-match (trie is case-insensitive)
int kwd_end = trv.Pos();
Xomw_doubleunder_itm itm = (Xomw_doubleunder_itm)o;
if (itm.case_match && !Bry_.Match(src, cur, kwd_end, itm.val)) {
cur = kwd_end;
continue;
}
// match; replace __KWD__ with "" (or "<!--MWTOC-->" if __TOC__)
dirty = true;
bfr.Add_mid(src, prv, cur);
switch (itm.tid) {
case Xol_kwd_grp_.Id_toc:
// The position of __TOC__ needs to be recorded
boolean already_seen = !data.show_toc;
data.toc = true;
data.show_toc = true;
data.force_toc_position = true;
if (already_seen) { // Set a placeholder. At the end we'll fill it in with the TOC.
bfr.Add_str_a7("<!--MWTOC-->");
}
else { // Only keep the first one. XO.MW:ignore by not adding anything to bfr
}
break;
// XO.MW: MW adds boolean to hash_table; XO uses boolean props; note that "remove" is done by not adding to bfr
case Xol_kwd_grp_.Id_notoc: data.no_toc = true; break;
case Xol_kwd_grp_.Id_nogallery: data.no_gallery = true; break;
case Xol_kwd_grp_.Id_forcetoc: data.force_toc = true; break;
case Xol_kwd_grp_.Id_noeditsection: data.no_edit_section = true; break;
case Xol_kwd_grp_.Id_newsectionlink: data.new_section_link = true; break;
case Xol_kwd_grp_.Id_hiddencat: data.hidden_cat = true; break;
case Xol_kwd_grp_.Id_index: data.index = true; break;
case Xol_kwd_grp_.Id_noindex: data.no_index = true; break;
case Xol_kwd_grp_.Id_staticredirect: data.static_redirect = true; break;
case Xol_kwd_grp_.Id_notitleconvert: data.no_title_convert = true; break;
case Xol_kwd_grp_.Id_nocontentconvert: data.no_content_convert = true; break;
default: throw Err_.new_unhandled_default(itm.tid);
}
cur = kwd_end;
prv = cur;
}
// XO.MW.END: $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
if (data.no_toc && !data.force_toc_position) {
data.show_toc = false;
}
// XO.MW.EDIT: hidden_cat, index, noindex are used to add to tracking category
if (dirty)
pbfr.Switch();
}
// /**
// * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
// * Fills this.mDoubleUnderscores, returns the modified text
// *
// * @param String $text
// *
// * @return String
// */
// public function doDoubleUnderscore($text) {
//
// # The position of __TOC__ needs to be recorded
// $mw = MagicWord::get('toc');
// if ($mw->match($text)) {
// this.mShowToc = true;
// this.mForceTocPosition = true;
//
// # Set a placeholder. At the end we'll fill it in with the TOC.
// $text = $mw->replace('<!--MWTOC-->', $text, 1);
//
// # Only keep the first one.
// $text = $mw->replace('', $text);
// }
//
// # Now match and remove the rest of them
// $mwa = MagicWord::getDoubleUnderscoreArray();
// this.mDoubleUnderscores = $mwa->matchAndRemove($text);
//
// if (isset(this.mDoubleUnderscores['nogallery'])) {
// this.mOutput->mNoGallery = true;
// }
// if (isset(this.mDoubleUnderscores['notoc']) && !this.mForceTocPosition) {
// this.mShowToc = false;
// }
// if (isset(this.mDoubleUnderscores['hiddencat'])
// && this.mTitle->getNamespace() == NS_CATEGORY
// ) {
// this.addTrackingCategory('hidden-category-category');
// }
// # (T10068) Allow control over whether robots index a page.
// # __INDEX__ always overrides __NOINDEX__, see T16899
// if (isset(this.mDoubleUnderscores['noindex']) && this.mTitle->canUseNoindex()) {
// this.mOutput->setIndexPolicy('noindex');
// this.addTrackingCategory('noindex-category');
// }
// if (isset(this.mDoubleUnderscores['index']) && this.mTitle->canUseNoindex()) {
// this.mOutput->setIndexPolicy('index');
// this.addTrackingCategory('index-category');
// }
//
// # Cache all double underscores in the database
// foreach (this.mDoubleUnderscores as $key => $val) {
// this.mOutput->setProperty($key, '');
// }
//
// return $text;
// }
private static void Reg(Btrie_slim_mgr trie, Xol_kwd_mgr mgr, int... ids) {
for (int id : ids) {
Xol_kwd_grp grp = mgr.Get_or_new(id);
Xol_kwd_itm[] itms = grp.Itms();
for (Xol_kwd_itm itm : itms) {
byte[] val = itm.Val();
trie.Add_obj(val, new Xomw_doubleunder_itm(id, grp.Case_match(), val));
}
}
}
}
class Xomw_doubleunder_itm {
public int tid;
public boolean case_match;
public byte[] val;
public Xomw_doubleunder_itm(int tid, boolean case_match, byte[] val) {
this.tid = tid;
this.case_match = case_match;
this.val = val;
}
}

View File

@@ -13,3 +13,38 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
import org.junit.*; import gplx.core.tests.*;
public class Xomw_doubleunder_wkr__tst {
private final Xomw_doubleunder_wkr__fxt fxt = new Xomw_doubleunder_wkr__fxt();
@Test public void No_match() {fxt.Test__parse("a b c" , "a b c");}
@Test public void Force_toc() {fxt.Test__parse("a __FORCETOC__ b" , "a b").Test__prop_y(fxt.data.force_toc);}
@Test public void Toc() {fxt.Test__parse("a __TOC__ b __TOC__ c" , "a <!--MWTOC--> b c").Test__prop_y(fxt.data.toc, fxt.data.show_toc, fxt.data.force_toc_position);}
@Test public void Notoc_only() {fxt.Test__parse("a __NOTOC__ b" , "a b").Test__prop_y(fxt.data.no_toc).Test__prop_n(fxt.data.show_toc);} // show_toc is false
@Test public void Notoc_w_toc() {fxt.Test__parse("a __TOC__ b __NOTOC__ c" , "a <!--MWTOC--> b c").Test__prop_y(fxt.data.toc, fxt.data.show_toc, fxt.data.force_toc_position);} // show_toc is true
@Test public void Case_match() {fxt.Test__parse("a __index__ b" , "a __index__ b");}
}
class Xomw_doubleunder_wkr__fxt {
private final XomwParserCtx pctx = new XomwParserCtx();
private final XomwParserBfr pbfr = new XomwParserBfr();
private final Xomw_doubleunder_wkr wkr = new Xomw_doubleunder_wkr();
public Xomw_doubleunder_data data = new Xomw_doubleunder_data();
public Xomw_doubleunder_wkr__fxt() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
wkr.Init_by_wiki(data, wiki.Lang());
}
public Xomw_doubleunder_wkr__fxt Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.doDoubleUnderscore(pctx, pbfr.Init(src_bry));
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear(), src_str);
return this;
}
public Xomw_doubleunder_wkr__fxt Test__prop_y(boolean... ary) {return Test__prop(Bool_.Y, ary);}
public Xomw_doubleunder_wkr__fxt Test__prop_n(boolean... ary) {return Test__prop(Bool_.N, ary);}
private Xomw_doubleunder_wkr__fxt Test__prop(boolean expd, boolean... ary) {
for (boolean v : ary)
Gftest.Eq__bool(expd, v);
return this;
}
}

View File

@@ -13,3 +13,8 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
public interface Xomw_heading_cbk {
void On_hdr_seen(XomwParserCtx pctx, Xomw_heading_wkr wkr);
void On_src_done(XomwParserCtx pctx, Xomw_heading_wkr wkr);
}

View File

@@ -13,3 +13,38 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
public class Xomw_heading_cbk__html implements Xomw_heading_cbk {
public Bry_bfr Bfr() {return bfr;} private Bry_bfr bfr;
public Xomw_heading_cbk__html Bfr_(Bry_bfr bfr) {
this.bfr = bfr;
return this;
}
public void On_hdr_seen(XomwParserCtx pctx, Xomw_heading_wkr wkr) {
// add from txt_bgn to hdr_bgn; EX: "abc\n==A==\n"; "\n==" seen -> add "abc"
byte[] src = wkr.Src();
int hdr_bgn = wkr.Hdr_bgn(), txt_bgn = wkr.Txt_bgn();
if (hdr_bgn > txt_bgn)
bfr.Add_mid(src, txt_bgn, hdr_bgn);
// add "\n" unless BOS
if (hdr_bgn != XomwParserCtx.Pos__bos) bfr.Add_byte_nl();
// add <h2>...</h2>
int hdr_num = wkr.Hdr_num();
bfr.Add(Tag__lhs).Add_int_digits(1, hdr_num).Add(Byte_ascii.Angle_end_bry); // <h2>
bfr.Add_mid(wkr.Src(), wkr.Hdr_lhs_end(), wkr.Hdr_rhs_bgn());
bfr.Add(Tag__rhs).Add_int_digits(1, hdr_num).Add(Byte_ascii.Angle_end_bry); // </h2>
}
public void On_src_done(XomwParserCtx pctx, Xomw_heading_wkr wkr) {
// add from txt_bgn to EOS;
byte[] src = wkr.Src();
int txt_bgn = wkr.Txt_bgn(), src_end = wkr.Src_end();
if (txt_bgn != src_end) // PERF: don't call Add_mid() if hdr is at end of EOS
bfr.Add_mid(src, txt_bgn, src_end);
}
private static final byte[]
Tag__lhs = Bry_.new_a7("<h")
, Tag__rhs = Bry_.new_a7("</h")
;
}

View File

@@ -13,3 +13,110 @@ The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
import gplx.core.btries.*; import gplx.xowa.langs.*;
public class Xomw_heading_wkr {
private XomwParserCtx pctx;
private Xomw_heading_cbk cbk;
public byte[] Src() {return src;} private byte[] src;
public int Src_end() {return src_end;} private int src_end;
public int Txt_bgn() {return txt_bgn;} private int txt_bgn;
public int Hdr_bgn() {return hdr_bgn;} private int hdr_bgn;
public int Hdr_end() {return hdr_end;} private int hdr_end;
public int Hdr_num() {return hdr_num;} private int hdr_num;
public int Hdr_lhs_bgn() {return hdr_lhs_bgn;} private int hdr_lhs_bgn;
public int Hdr_lhs_end() {return hdr_lhs_end;} private int hdr_lhs_end;
public int Hdr_rhs_bgn() {return hdr_rhs_bgn;} private int hdr_rhs_bgn;
public int Hdr_rhs_end() {return hdr_rhs_end;} private int hdr_rhs_end;
public void doHeadings(XomwParserCtx pctx, XomwParserBfr pbfr, Xomw_heading_cbk__html cbk) {
Bry_bfr src_bfr = pbfr.Src();
byte[] src_bry = src_bfr.Bfr();
int src_end = src_bfr.Len();
cbk.Bfr_(pbfr.Trg());
pbfr.Switch();
Parse(pctx, src_bry, 0, src_end, cbk);
}
public void Parse(XomwParserCtx pctx, byte[] src, int src_bgn, int src_end, Xomw_heading_cbk cbk) { // REF.MW: /includes/parser/Parser.php|doHeadings
// init members
this.pctx = pctx;
this.src = src;
this.src_end = src_end;
this.cbk = cbk;
// PORTED:
// for ( $i = 6; $i >= 1; --$i ) {
// $h = str_repeat( '=', $i );
// $text = preg_replace( "/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text );
// }
// do loop
int pos = src_bgn;
this.txt_bgn = pos == XomwParserCtx.Pos__bos ? 0 : pos;
byte b = Byte_ascii.Nl;
while (true) {
int nxt = pos + 1;
// check if (a) cur is \n; (b) nxt is '='
if ( b == Byte_ascii.Nl
&& nxt < src_end
&& src[nxt] == Byte_ascii.Eq
) {
pos = Parse_hdr_nl(txt_bgn, pos, nxt + 1);
this.txt_bgn = pos;
}
else
++pos;
// EOS; add all text after last "==\n"
if (pos == src_end) {
cbk.On_src_done(pctx, this);
break;
}
b = src[pos];
}
}
private int Parse_hdr_nl(int txt_bgn, int nl_lhs, int pos) {
// calc lhs vars
this.hdr_bgn = nl_lhs;
this.hdr_lhs_bgn = nl_lhs == 0 ? 0 : nl_lhs + 1; // set pos of 1st "="; note that "==" can be at BOS;
this.hdr_lhs_end = Bry_find_.Find_fwd_while(src, pos, src_end, Byte_ascii.Eq);
// calc rhs vars
int nl_rhs = Bry_find_.Find_fwd_or(src, Byte_ascii.Nl, hdr_lhs_end + 1, src_end, src_end); // if no "\n", src_end is rest of text; EX: "\n==<text>EOS
this.hdr_end = nl_rhs;
this.hdr_rhs_end = Bry_find_.Find_bwd__skip_ws(src, nl_rhs, hdr_lhs_end);
this.hdr_rhs_bgn = Bry_find_.Find_bwd__skip(src, hdr_rhs_end - 1, hdr_lhs_end, Byte_ascii.Eq);
int hdr_lhs_len = hdr_lhs_end - hdr_lhs_bgn;
int hdr_rhs_len = hdr_rhs_end - hdr_rhs_bgn;
// handle rare situations like "\n====\n"
if (hdr_rhs_len == 0) {
int hdr_lhs_len_half = hdr_lhs_len / 2;
hdr_rhs_len = hdr_lhs_len - hdr_lhs_len_half;
hdr_lhs_len = hdr_lhs_len_half;
this.hdr_lhs_end = hdr_lhs_bgn + hdr_lhs_len;
this.hdr_rhs_bgn = hdr_lhs_end;
}
this.hdr_num = hdr_lhs_len < hdr_rhs_len ? hdr_lhs_len : hdr_rhs_len;
cbk.On_hdr_seen(pctx, this);
return nl_rhs;
}
// /**
// * Parse headers and return html
// *
// * @private
// *
// * @param String $text
// *
// * @return String
// */
// public function doHeadings($text) {
// for ($i = 6; $i >= 1; --$i) {
// $h = str_repeat('=', $i);
// $text = preg_replace("/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text);
// }
// return $text;
// }
}

Some files were not shown because too many files have changed in this diff Show More