1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2025-06-05 17:04:15 +00:00

Xomw: Add wfUrlencode

This commit is contained in:
gnosygnu 2017-02-25 18:06:47 -05:00
parent 4781529d12
commit 2b2f93b766
3 changed files with 903 additions and 917 deletions

View File

@ -60,19 +60,6 @@ public class Gfo_url_encoder_ {
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.N) return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.N)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus); .Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
} }
public static Gfo_url_encoder_mkr New__php_urlencode() {
// REF: http://php.net/manual/en/function.urlencode.php;
// "Returns a String in which all non-alphanumeric characters except -_. have been replaced with a percent (%) sign followed by two hex digits and spaces encoded as plus (+) signs"
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
}
public static Gfo_url_encoder_mkr New__php_rawurlencode() {
// REF: http://php.net/manual/en/function.rawurlencode.php
// "Returns a String in which all non-alphanumeric characters except -_.~ have been replaced with a percent (%) sign followed by two hex digits. "
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__same__many(Byte_ascii.Tilde)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
}
private static Gfo_url_encoder_mkr New__http_url_ttl() { private static Gfo_url_encoder_mkr New__http_url_ttl() {
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y); return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y);
} }
@ -103,6 +90,31 @@ public class Gfo_url_encoder_ {
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline) .Init__diff__one(Byte_ascii.Space, Byte_ascii.Underline)
; ;
} }
public static Gfo_url_encoder_mkr New__php_urlencode() {
// REF: http://php.net/manual/en/function.urlencode.php;
// "Returns a String in which all non-alphanumeric characters except -_. have been replaced with a percent (%) sign followed by two hex digits and spaces encoded as plus (+) signs"
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
}
public static Gfo_url_encoder_mkr New__wfUrlencode() {
// REF: GlobalFunctions.php|wfUrlencode
// same as php_urlencode, but do not encode ";:@$!*(),/~"
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus)
.Init__same__many
( Byte_ascii.Semic, Byte_ascii.At, Byte_ascii.Dollar, Byte_ascii.Bang, Byte_ascii.Star
, Byte_ascii.Paren_bgn, Byte_ascii.Paren_end, Byte_ascii.Comma, Byte_ascii.Slash
, Byte_ascii.Tilde
, Byte_ascii.Colon // NOTE: MW doesn't unescape colon if IIS. However, all of WMF servers run on non-IIS boxes, so include this;
);
}
public static Gfo_url_encoder_mkr New__php_rawurlencode() {
// REF: http://php.net/manual/en/function.rawurlencode.php
// "Returns a String in which all non-alphanumeric characters except -_.~ have been replaced with a percent (%) sign followed by two hex digits. "
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__same__many(Byte_ascii.Tilde)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
}
public static final Gfo_url_encoder public static final Gfo_url_encoder
Id = Gfo_url_encoder_.New__html_id().Make() Id = Gfo_url_encoder_.New__html_id().Make()
, Href = Gfo_url_encoder_.New__html_href_mw(Bool_.Y).Make() , Href = Gfo_url_encoder_.New__html_href_mw(Bool_.Y).Make()
@ -116,5 +128,6 @@ public class Gfo_url_encoder_ {
, Mw_ttl = Gfo_url_encoder_.New__mw_ttl().Make() , Mw_ttl = Gfo_url_encoder_.New__mw_ttl().Make()
, Php_urlencode = Gfo_url_encoder_.New__php_urlencode().Make() , Php_urlencode = Gfo_url_encoder_.New__php_urlencode().Make()
, Php_rawurlencode = Gfo_url_encoder_.New__php_rawurlencode().Make() , Php_rawurlencode = Gfo_url_encoder_.New__php_rawurlencode().Make()
, Mw_wfUrlencode = Gfo_url_encoder_.New__wfUrlencode().Make()
; ;
} }

View File

@ -14,39 +14,13 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/ */
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
import gplx.core.btries.*;
import gplx.langs.htmls.encoders.*;
import gplx.xowa.mediawiki.includes.parsers.*; import gplx.xowa.mediawiki.includes.parsers.*;
/**
* Global functions used everywhere.
*/
public class XomwGlobalFunctions { public class XomwGlobalFunctions {
// <?php
// /**
// * Global functions used everywhere.
// *
// * This program is free software; you can redistribute it and/or modify
// * it under the terms of the GNU General Public License as published by
// * the Free Software Foundation; either version 2 of the License, or
// * (at your option) any later version.
// *
// * This program is distributed in the hope that it will be useful,
// * but WITHOUT ANY WARRANTY; without even the implied warranty of
// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// * GNU General Public License for more details.
// *
// * You should have received a copy of the GNU General Public License along
// * with this program; if not, write to the Free Software Foundation, Inc.,
// * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
// * http://www.gnu.org/copyleft/gpl.html
// *
// * @file
// */
//
// if ( !defined( 'MEDIAWIKI' ) ) {
// die( "This file is part of MediaWiki, it is not a valid entry point" );
// }
//
// use Liuggio\StatsdClient\Sender\SocketSender;
// use MediaWiki\Logger\LoggerFactory;
// use MediaWiki\Session\SessionManager;
// use Wikimedia\ScopedCallback;
//
// // Hide compatibility functions from Doxygen // // Hide compatibility functions from Doxygen
// /// @cond // /// @cond
// /** // /**
@ -357,44 +331,43 @@ public class XomwGlobalFunctions {
// } // }
// return substr($str, 0, $length); // return substr($str, 0, $length);
// } // }
//
// /** /**
// * We want some things to be included as literal characters in our title URLs * We want some things to be included as literal characters in our title URLs
// * for prettiness, which urlencode encodes by default. According to RFC 1738, * for prettiness, which urlencode encodes by default. According to RFC 1738,
// * all of the following should be safe: * all of the following should be safe:
// * *
// * ;:@&=$-_.+!*'(), * ;:@&=$-_.+!*'(),
// * *
// * RFC 1738 says ~ is unsafe, however RFC 3986 considers it an unreserved * RFC 1738 says ~ is unsafe, however RFC 3986 considers it an unreserved
// * character which should not be encoded. More importantly, google chrome * character which should not be encoded. More importantly, google chrome
// * always converts %7E back to ~, and converting it in this function can * always converts %7E back to ~, and converting it in this function can
// * cause a redirect loop (T105265). * cause a redirect loop (T105265).
// * *
// * But + is not safe because it's used to indicate a space; &= are only safe in * But + is not safe because it's used to indicate a space; &= are only safe in
// * paths and not in queries (and we don't distinguish here); ' seems kind of * paths and not in queries (and we don't distinguish here); ' seems kind of
// * scary; and urlencode() doesn't touch -_. to begin with. Plus, although / * scary; and urlencode() doesn't touch -_. to begin with. Plus, although /
// * is reserved, we don't care. So the list we unescape is: * is reserved, we don't care. So the list we unescape is:
// * *
// * ;:@$!*(),/~ * ;:@$!*(),/~
// * *
// * However, IIS7 redirects fail when the url contains a colon (see T24709), * However, IIS7 redirects fail when the url contains a colon (see T24709),
// * so no fancy : for IIS7. * so no fancy : for IIS7.
// * *
// * %2F in the page titles seems to fatally break for some reason. * %2F in the page titles seems to fatally break for some reason.
// * *
// * @param String $s * @param String $s
// * @return String * @return String
// */ */
// function wfUrlencode( $s ) { public static byte[] wfUrlencode(byte[] s) {
// static $needle; if (s == null) {
//
// if ( is_null( $s ) ) {
// $needle = null; // $needle = null;
// return ''; return Bry_.Empty;
// } }
//
// if ( is_null( $needle ) ) { // SKIP: handled directly in Gfo_url_encode
// $needle = [ '%3B', '%40', '%24', '%21', '%2A', '%28', '%29', '%2C', '%2F', '%7E' ]; // if (needle == null) {
// needle = [ '%3B', '%40', '%24', '%21', '%2A', '%28', '%29', '%2C', '%2F', '%7E' ];
// if (!isset($_SERVER['SERVER_SOFTWARE']) || // if (!isset($_SERVER['SERVER_SOFTWARE']) ||
// (strpos($_SERVER['SERVER_SOFTWARE'], 'Microsoft-IIS/7') === false) // (strpos($_SERVER['SERVER_SOFTWARE'], 'Microsoft-IIS/7') === false)
// ) { // ) {
@ -408,10 +381,10 @@ public class XomwGlobalFunctions {
// [ ';', '@', '$', '!', '*', '(', ')', ',', '/', '~', ':' ], // [ ';', '@', '$', '!', '*', '(', ')', ',', '/', '~', ':' ],
// $s // $s
// ); // );
//
// return $s; return Gfo_url_encoder_.Mw_wfUrlencode.Encode(s);
// } }
//
// /** // /**
// * This function takes one or two arrays as input, and returns a CGI-style String, e.g. // * This function takes one or two arrays as input, and returns a CGI-style String, e.g.
// * "days=7&limit=100". Options in the first array override options in the second. // * "days=7&limit=100". Options in the first array override options in the second.

View File

@ -50,8 +50,8 @@ public class XomwTitle {
/** @var String Text form (spaces not underscores) of the main part */ /** @var String Text form (spaces not underscores) of the main part */
private byte[] mTextform = Bry_.Empty; private byte[] mTextform = Bry_.Empty;
// /** @var String URL-encoded form of the main part */ /** @var String URL-encoded form of the main part */
// public $mUrlform = Bry_.Empty; private byte[] mUrlform = Bry_.Empty;
/** @var String Main part with underscores */ /** @var String Main part with underscores */
// XO: EX: "Help_talk:A_b" . "A_b" // XO: EX: "Help_talk:A_b" . "A_b"
@ -878,14 +878,14 @@ public class XomwTitle {
return this.mTextform; return this.mTextform;
} }
// /** /**
// * Get the URL-encoded form of the main part * Get the URL-encoded form of the main part
// * *
// * @return String Main part of the title, URL-encoded * @return String Main part of the title, URL-encoded
// */ */
// public function getPartialURL() { public byte[] getPartialURL() {
// return this.mUrlform; return this.mUrlform;
// } }
/** /**
* Get the main part with underscores * Get the main part with underscores
@ -3376,7 +3376,7 @@ public class XomwTitle {
this.mUserCaseDBKey = parts.user_case_dbkey; this.mUserCaseDBKey = parts.user_case_dbkey;
this.mDbkeyform = parts.dbkey; this.mDbkeyform = parts.dbkey;
// this.mUrlform = wfUrlencode(this.mDbkeyform); this.mUrlform = XomwGlobalFunctions.wfUrlencode(this.mDbkeyform);
this.mTextform = XophpString.strtr(this.mDbkeyform, Byte_ascii.Underline, Byte_ascii.Space); this.mTextform = XophpString.strtr(this.mDbkeyform, Byte_ascii.Underline, Byte_ascii.Space);
// We already know that some pages won't be in the database! // We already know that some pages won't be in the database!
@ -4828,7 +4828,7 @@ public class XomwTitle {
// 'mDefaultNamespace', // 'mDefaultNamespace',
// ]; // ];
// } // }
//
// public function __wakeup() { // public function __wakeup() {
// this.mArticleID = (this.mNamespace >= 0) ? -1 : 0; // this.mArticleID = (this.mNamespace >= 0) ? -1 : 0;
// this.mUrlform = wfUrlencode(this.mDbkeyform); // this.mUrlform = wfUrlencode(this.mDbkeyform);