1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Xomw: Change Php classes to Xophp; move to root namespace

This commit is contained in:
gnosygnu
2017-02-24 07:56:49 -05:00
parent a8c7f27ff5
commit d8c2eaba1d
42 changed files with 408 additions and 864 deletions

View File

@@ -14,7 +14,6 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import gplx.xowa.mediawiki.includes.utls.*;
import gplx.xowa.mediawiki.includes.title.*;
/**
* Represents a title within MediaWiki.
@@ -308,7 +307,7 @@ public class XomwTitle {
byte[] filteredText = text;
XomwTitle t = new XomwTitle();
t.mDbkeyform = Php_str_.strtr(filteredText, Byte_ascii.Space, Byte_ascii.Underline);
t.mDbkeyform = XophpString.strtr(filteredText, Byte_ascii.Space, Byte_ascii.Underline);
t.mDefaultNamespace = defaultNamespace;
t.secureAndSplit();
@@ -1434,7 +1433,7 @@ public class XomwTitle {
*/
public byte[] getPrefixedDBkey() {
byte[] s = this.prefix(this.mDbkeyform);
s = Php_str_.strtr(s, Byte_ascii.Space, Byte_ascii.Underline);
s = XophpString.strtr(s, Byte_ascii.Space, Byte_ascii.Underline);
return s;
}
@@ -1447,7 +1446,7 @@ public class XomwTitle {
public byte[] getPrefixedText() {
if (this.mPrefixedText == null) {
byte[] s = this.prefix(this.mTextform);
s = Php_str_.strtr(s, Byte_ascii.Underline, Byte_ascii.Space);
s = XophpString.strtr(s, Byte_ascii.Underline, Byte_ascii.Space);
this.mPrefixedText = s;
}
return this.mPrefixedText;
@@ -3378,7 +3377,7 @@ public class XomwTitle {
this.mDbkeyform = parts.dbkey;
// this.mUrlform = wfUrlencode(this.mDbkeyform);
this.mTextform = Php_str_.strtr(this.mDbkeyform, Byte_ascii.Underline, Byte_ascii.Space);
this.mTextform = XophpString.strtr(this.mDbkeyform, Byte_ascii.Underline, Byte_ascii.Space);
// We already know that some pages won't be in the database!
if (this.isExternal() || this.mNamespace == XomwDefines.NS_SPECIAL) {
@@ -4836,4 +4835,105 @@ public class XomwTitle {
// this.mTextform = strtr(this.mDbkeyform, '_', ' ');
// }
private static final byte[] Bry__wgArticlePath__wiki = Bry_.new_a7("/wiki/");
// REF.MW: DefaultSettings.php
// Allowed title characters -- regex character class
// Don't change this unless you know what you're doing
//
// Problematic punctuation:
// - []{}|# Are needed for link syntax, never enable these
// - <> Causes problems with HTML escaping, don't use
// - % Enabled by default, minor problems with path to query rewrite rules, see below
// - + Enabled by default, but doesn't work with path to query rewrite rules,
// corrupted by apache
// - ? Enabled by default, but doesn't work with path to PATH_INFO rewrites
//
// All three of these punctuation problems can be avoided by using an alias,
// instead of a rewrite rule of either variety.
//
// The problem with % is that when using a path to query rewrite rule, URLs are
// double-unescaped: once by Apache's path conversion code, and again by PHP. So
// %253F, for example, becomes "?". Our code does not double-escape to compensate
// for this, indeed double escaping would break if the double-escaped title was
// passed in the query String rather than the path. This is a minor security issue
// because articles can be created such that they are hard to view or edit.
//
// In some rare cases you may wish to remove + for compatibility with old links.
//
// Theoretically 0x80-0x9F of ISO 8859-1 should be disallowed, but
// this breaks interlanguage links
// $wgLegalTitleChars = " %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+";
//
// REGEX:
// without-backslash escaping --> \s%!"$&'()*,-./0-9:;=?@A-Z\^_`a-z~x80-xFF+
// rearranged
// letters --> 0-9A-Za-z
// unicode-chars --> x80-xFF
// symbols --> \s%!"$&'()*,-./:;=?@\^_`~+"
// deliberately ignores
// control chars: 00-31,127
// []{}|#<>
public static int Find_fwd_while_title(byte[] src, int src_bgn, int src_end, boolean[] valid) {
int cur = src_bgn;
while (true) {
if (cur == src_end) break;
byte b = src[cur];
int b_len = gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
if (b_len == 1) { // ASCII
if (valid[b & 0xFF]) // valid; EX: "a0A B&$"; PATCH.JAVA:need to convert to unsigned byte
cur++;
else // invalid; EX: "<title>"
break;
}
else { // Multi-byte UTF8; NOTE: all sequences are valid
cur += b_len;
}
}
return cur;
}
private static boolean[] title_chars_valid;
public static boolean[] Title_chars_valid() {
if (title_chars_valid == null) {
title_chars_valid = new boolean[128];
// add num and alpha
for (int i = Byte_ascii.Num_0; i <= Byte_ascii.Num_9; i++)
title_chars_valid[i] = true;
for (int i = Byte_ascii.Ltr_A; i <= Byte_ascii.Ltr_Z; i++)
title_chars_valid[i] = true;
for (int i = Byte_ascii.Ltr_a; i <= Byte_ascii.Ltr_z; i++)
title_chars_valid[i] = true;
// add symbols: \s%!"$&'()*,-./:;=?@\^_`~+"
byte[] symbols = new byte[]
{ Byte_ascii.Space
, Byte_ascii.Percent
, Byte_ascii.Bang
, Byte_ascii.Quote
, Byte_ascii.Amp
, Byte_ascii.Apos
, Byte_ascii.Paren_bgn
, Byte_ascii.Paren_end
, Byte_ascii.Star
, Byte_ascii.Comma
, Byte_ascii.Dash
, Byte_ascii.Dot
, Byte_ascii.Slash
, Byte_ascii.Colon
, Byte_ascii.Semic
, Byte_ascii.Eq
, Byte_ascii.Question
, Byte_ascii.At
, Byte_ascii.Backslash
, Byte_ascii.Pow
, Byte_ascii.Underline
, Byte_ascii.Tick
, Byte_ascii.Tilde
, Byte_ascii.Plus
};
int symbols_len = symbols.length;
for (int i = 0; i < symbols_len; i++)
title_chars_valid[symbols[i]] = true;
}
return title_chars_valid;
}
}