1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2025-06-04 00:14:14 +00:00

Xomw: Add wfUrlencode

This commit is contained in:
gnosygnu 2017-02-25 18:06:47 -05:00
parent 4781529d12
commit 2b2f93b766
3 changed files with 903 additions and 917 deletions

View File

@ -60,19 +60,6 @@ public class Gfo_url_encoder_ {
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.N)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
}
public static Gfo_url_encoder_mkr New__php_urlencode() {
// REF: http://php.net/manual/en/function.urlencode.php;
// "Returns a String in which all non-alphanumeric characters except -_. have been replaced with a percent (%) sign followed by two hex digits and spaces encoded as plus (+) signs"
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
}
public static Gfo_url_encoder_mkr New__php_rawurlencode() {
// REF: http://php.net/manual/en/function.rawurlencode.php
// "Returns a String in which all non-alphanumeric characters except -_.~ have been replaced with a percent (%) sign followed by two hex digits. "
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__same__many(Byte_ascii.Tilde)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
}
private static Gfo_url_encoder_mkr New__http_url_ttl() {
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y);
}
@ -103,6 +90,31 @@ public class Gfo_url_encoder_ {
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
;
}
public static Gfo_url_encoder_mkr New__php_urlencode() {
// REF: http://php.net/manual/en/function.urlencode.php;
// "Returns a String in which all non-alphanumeric characters except -_. have been replaced with a percent (%) sign followed by two hex digits and spaces encoded as plus (+) signs"
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
}
public static Gfo_url_encoder_mkr New__wfUrlencode() {
// REF: GlobalFunctions.php|wfUrlencode
// same as php_urlencode, but do not encode ";:@$!*(),/~"
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus)
.Init__same__many
( Byte_ascii.Semic, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash
, Byte_ascii.Tilde
, Byte_ascii.Colon // NOTE: MW doesn't unescape colon if IIS. However, all of WMF servers run on non-IIS boxes, so include this;
);
}
public static Gfo_url_encoder_mkr New__php_rawurlencode() {
// REF: http://php.net/manual/en/function.rawurlencode.php
// "Returns a String in which all non-alphanumeric characters except -_.~ have been replaced with a percent (%) sign followed by two hex digits. "
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__same__many(Byte_ascii.Tilde)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
}
public static final Gfo_url_encoder
Id = Gfo_url_encoder_.New__html_id().Make()
, Href = Gfo_url_encoder_.New__html_href_mw(Bool_.Y).Make()
@ -116,5 +128,6 @@ public class Gfo_url_encoder_ {
, Mw_ttl = Gfo_url_encoder_.New__mw_ttl().Make()
, Php_urlencode = Gfo_url_encoder_.New__php_urlencode().Make()
, Php_rawurlencode = Gfo_url_encoder_.New__php_rawurlencode().Make()
, Mw_wfUrlencode = Gfo_url_encoder_.New__wfUrlencode().Make()
;
}

View File

@ -14,39 +14,13 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import gplx.core.btries.*;
import gplx.langs.htmls.encoders.*;
import gplx.xowa.mediawiki.includes.parsers.*;
/**
* Global functions used everywhere.
*/
public class XomwGlobalFunctions {
// <?php
// /**
// * Global functions used everywhere.
// *
// * This program is free software; you can redistribute it and/or modify
// * it under the terms of the GNU General Public License as published by
// * the Free Software Foundation; either version 2 of the License, or
// * (at your option) any later version.
// *
// * This program is distributed in the hope that it will be useful,
// * but WITHOUT ANY WARRANTY; without even the implied warranty of
// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// * GNU General Public License for more details.
// *
// * You should have received a copy of the GNU General Public License along
// * with this program; if not, write to the Free Software Foundation, Inc.,
// * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
// * http://www.gnu.org/copyleft/gpl.html
// *
// * @file
// */
//
// if ( !defined( 'MEDIAWIKI' ) ) {
// die( "This file is part of MediaWiki, it is not a valid entry point" );
// }
//
// use Liuggio\StatsdClient\Sender\SocketSender;
// use MediaWiki\Logger\LoggerFactory;
// use MediaWiki\Session\SessionManager;
// use Wikimedia\ScopedCallback;
//
// // Hide compatibility functions from Doxygen
// /// @cond
// /**
@ -357,44 +331,43 @@ public class XomwGlobalFunctions {
// }
// return substr($str, 0, $length);
// }
//
// /**
// * We want some things to be included as literal characters in our title URLs
// * for prettiness, which urlencode encodes by default. According to RFC 1738,
// * all of the following should be safe:
// *
// * ;:@&=$-_.+!*'(),
// *
// * RFC 1738 says ~ is unsafe, however RFC 3986 considers it an unreserved
// * character which should not be encoded. More importantly, google chrome
// * always converts %7E back to ~, and converting it in this function can
// * cause a redirect loop (T105265).
// *
// * But + is not safe because it's used to indicate a space; &= are only safe in
// * paths and not in queries (and we don't distinguish here); ' seems kind of
// * scary; and urlencode() doesn't touch -_. to begin with. Plus, although /
// * is reserved, we don't care. So the list we unescape is:
// *
// * ;:@$!*(),/~
// *
// * However, IIS7 redirects fail when the url contains a colon (see T24709),
// * so no fancy : for IIS7.
// *
// * %2F in the page titles seems to fatally break for some reason.
// *
// * @param String $s
// * @return String
// */
// function wfUrlencode( $s ) {
// static $needle;
//
// if ( is_null( $s ) ) {
/**
* We want some things to be included as literal characters in our title URLs
* for prettiness, which urlencode encodes by default. According to RFC 1738,
* all of the following should be safe:
*
* ;:@&=$-_.+!*'(),
*
* RFC 1738 says ~ is unsafe, however RFC 3986 considers it an unreserved
* character which should not be encoded. More importantly, google chrome
* always converts %7E back to ~, and converting it in this function can
* cause a redirect loop (T105265).
*
* But + is not safe because it's used to indicate a space; &= are only safe in
* paths and not in queries (and we don't distinguish here); ' seems kind of
* scary; and urlencode() doesn't touch -_. to begin with. Plus, although /
* is reserved, we don't care. So the list we unescape is:
*
* ;:@$!*(),/~
*
* However, IIS7 redirects fail when the url contains a colon (see T24709),
* so no fancy : for IIS7.
*
* %2F in the page titles seems to fatally break for some reason.
*
* @param String $s
* @return String
*/
public static byte[] wfUrlencode(byte[] s) {
if (s == null) {
// $needle = null;
// return '';
// }
//
// if ( is_null( $needle ) ) {
// $needle = [ '%3B', '%40', '%24', '%21', '%2A', '%28', '%29', '%2C', '%2F', '%7E' ];
return Bry_.Empty;
}
// SKIP: handled directly in Gfo_url_encode
// if (needle == null) {
// needle = [ '%3B', '%40', '%24', '%21', '%2A', '%28', '%29', '%2C', '%2F', '%7E' ];
// if (!isset($_SERVER['SERVER_SOFTWARE']) ||
// (strpos($_SERVER['SERVER_SOFTWARE'], 'Microsoft-IIS/7') === false)
// ) {
@ -408,10 +381,10 @@ public class XomwGlobalFunctions {
// [ ';', '@', '$', '!', '*', '(', ')', ',', '/', '~', ':' ],
// $s
// );
//
// return $s;
// }
//
return Gfo_url_encoder_.Mw_wfUrlencode.Encode(s);
}
// /**
// * This function takes one or two arrays as input, and returns a CGI-style String, e.g.
// * "days=7&limit=100". Options in the first array override options in the second.

View File

@ -50,8 +50,8 @@ public class XomwTitle {
/** @var String Text form (spaces not underscores) of the main part */
private byte[] mTextform = Bry_.Empty;
// /** @var String URL-encoded form of the main part */
// public $mUrlform = Bry_.Empty;
/** @var String URL-encoded form of the main part */
private byte[] mUrlform = Bry_.Empty;
/** @var String Main part with underscores */
// XO: EX: "Help_talk:A_b" . "A_b"
@ -878,14 +878,14 @@ public class XomwTitle {
return this.mTextform;
}
// /**
// * Get the URL-encoded form of the main part
// *
// * @return String Main part of the title, URL-encoded
// */
// public function getPartialURL() {
// return this.mUrlform;
// }
/**
* Get the URL-encoded form of the main part
*
* @return String Main part of the title, URL-encoded
*/
public byte[] getPartialURL() {
return this.mUrlform;
}
/**
* Get the main part with underscores
@ -3376,7 +3376,7 @@ public class XomwTitle {
this.mUserCaseDBKey = parts.user_case_dbkey;
this.mDbkeyform = parts.dbkey;
// this.mUrlform = wfUrlencode(this.mDbkeyform);
this.mUrlform = XomwGlobalFunctions.wfUrlencode(this.mDbkeyform);
this.mTextform = XophpString.strtr(this.mDbkeyform, Byte_ascii.Underline, Byte_ascii.Space);
// We already know that some pages won't be in the database!
@ -4828,7 +4828,7 @@ public class XomwTitle {
// 'mDefaultNamespace',
// ];
// }
//
// public function __wakeup() {
// this.mArticleID = (this.mNamespace >= 0) ? -1 : 0;
// this.mUrlform = wfUrlencode(this.mDbkeyform);