From 1f10e613716eb332a651fa65b734cfa0fa924efd Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Mon, 27 Feb 2017 08:36:18 -0500 Subject: [PATCH] Xomw: Add XomwMessage class and related --- .../src/gplx/xowa/mediawiki/XophpString.java | 3 + .../xowa/mediawiki/includes/XomwHtml.java | 2 +- .../XomwHtml_expandAttributesTest.java | 2 +- .../xowa/mediawiki/includes/XomwLinker.java | 2 +- .../xowa/mediawiki/includes/XomwMessage.java | 1321 +++++++++++++++++ .../mediawiki/includes/XomwRawMessage.java | 67 + .../mediawiki/includes/XomwSanitizer.java | 2 +- .../mediawiki/includes/XomwSanitizerTest.java | 2 +- .../includes/cache/XomwMessageCache.java | 1282 ++++++++++++++++ .../includes/interwiki/XomwInterwiki.java | 179 +++ .../interwiki/XomwInterwikiLookup.java | 52 + .../interwiki/XomwInterwikiLookupAdapter.java | 156 ++ .../includes/linkers/XomwLinkRenderer.java | 2 +- .../includes/parsers/XomwLinkHolderArray.java | 2 +- .../includes/parsers/XomwParser.java | 2 +- .../includes/parsers/XomwParserIface.java | 2 +- .../includes/parsers/lnkes/Xomw_lnke_wkr.java | 2 +- .../includes/parsers/lnkis/Xomw_lnki_wkr.java | 2 +- .../magiclinks/Xomw_magiclinks_wkr.java | 2 +- .../{htmls => xohtml}/Xomw_atr_itm.java | 2 +- .../{htmls => xohtml}/Xomw_atr_mgr.java | 2 +- .../{htmls => xohtml}/Xomw_html_elem.java | 2 +- .../{htmls => xohtml}/Xomw_opt_mgr.java | 2 +- .../{htmls => xohtml}/Xomw_qry_mgr.java | 2 +- 24 files changed, 3077 insertions(+), 17 deletions(-) create mode 100644 gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwRawMessage.java create mode 100644 gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/cache/XomwMessageCache.java create mode 100644 gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/interwiki/XomwInterwiki.java create mode 100644 gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/interwiki/XomwInterwikiLookup.java create mode 100644 gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/interwiki/XomwInterwikiLookupAdapter.java rename gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/{htmls => xohtml}/Xomw_atr_itm.java (85%) rename gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/{htmls => xohtml}/Xomw_atr_mgr.java (92%) rename gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/{htmls => xohtml}/Xomw_html_elem.java (86%) rename gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/{htmls => xohtml}/Xomw_opt_mgr.java (82%) rename gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/{htmls => xohtml}/Xomw_qry_mgr.java (82%) diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/XophpString.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/XophpString.java index e1ef2a303..329c45164 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/XophpString.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/XophpString.java @@ -150,4 +150,7 @@ public class XophpString { public static byte[] str_replace(byte find, byte repl, byte[] src) { return Bry_.Replace(src, 0, src.length, find, repl); } + public static byte[] str_replace(byte[] find, byte[] repl, byte[] src) { + return Bry_.Replace(src, find, repl); + } } diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwHtml.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwHtml.java index dc0c81743..231bd7f4e 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwHtml.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwHtml.java @@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.core.btries.*; -import gplx.xowa.mediawiki.includes.htmls.*; +import gplx.xowa.mediawiki.includes.xohtml.*; /* TODO.XO: * handle spaceSeparatedListAttributes; EX: "cls=a cls=b" -> "cls='a b'" * self::dropDefaults($element, $attribs) diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwHtml_expandAttributesTest.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwHtml_expandAttributesTest.java index e6672deda..af255b7c8 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwHtml_expandAttributesTest.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwHtml_expandAttributesTest.java @@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import org.junit.*; import gplx.core.tests.*; -import gplx.xowa.mediawiki.includes.htmls.*; +import gplx.xowa.mediawiki.includes.xohtml.*; public class XomwHtml_expandAttributesTest { private final XomwHtml_expandAttributesFxt fxt = new XomwHtml_expandAttributesFxt(); @Test public void Basic() { diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwLinker.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwLinker.java index a71971b7b..861829821 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwLinker.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwLinker.java @@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.core.btries.*; import gplx.langs.htmls.*; -import gplx.xowa.mediawiki.includes.htmls.*; import gplx.xowa.mediawiki.includes.linkers.*; import gplx.xowa.mediawiki.includes.parsers.*; +import gplx.xowa.mediawiki.includes.xohtml.*; import gplx.xowa.mediawiki.includes.linkers.*; import gplx.xowa.mediawiki.includes.parsers.*; import gplx.xowa.mediawiki.includes.filerepo.file.*; import gplx.xowa.mediawiki.includes.media.*; import gplx.xowa.mediawiki.includes.parsers.lnkis.*; /* TODO.XO diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwMessage.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwMessage.java index 1ac8761da..fb5734b72 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwMessage.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwMessage.java @@ -14,7 +14,1328 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; +/** +* The Message cls provides methods which fulfil two basic services: +* - fetching interface messages +* - processing messages into a variety of formats +* +* First implemented with MediaWiki 1.17, the Message cls is intended to +* replace the old wfMsg* functions that over time grew unusable. +* @see https://www.mediawiki.org/wiki/Manual:Messages_API for equivalences +* between old and new functions. +* +* You should use the wfMessage() global function which acts as a wrapper for +* the Message cls. The wrapper let you pass parameters as arguments. +* +* The most basic usage cases would be: +* +* @code +* // Initialize a Message Object using the 'some_key' message key +* $message = wfMessage( 'some_key' ); +* +* // Using two parameters those values are strings 'value1' and 'value2': +* $message = wfMessage( 'some_key', +* 'value1', 'value2' +* ); +* @endcode +* +* @section message_global_fn Global function wrapper: +* +* Since wfMessage() returns a Message instance, you can chain its call with +* a method. Some of them return a Message instance too so you can chain them. +* You will find below several examples of wfMessage() usage. +* +* Fetching a message text for interface message: +* +* @code +* $button = Xml::button( +* wfMessage( 'submit' )->text() +* ); +* @endcode +* +* A Message instance can be passed parameters after it has been constructed, +* use the prms() method to do so: +* +* @code +* wfMessage( 'welcome-to' ) +* -> $wgSitename ) +* ->text(); +* @endcode +* +* {{GRAMMAR}} and friends work correctly: +* +* @code +* wfMessage( 'are-friends', +* $user, $friend +* ); +* wfMessage( 'bad-message' ) +* ->rawParams( '' ) +* ->escaped(); +* @endcode +* +* @section message_language Changing language: +* +* Messages can be requested in a different language or in whatever current +* content language is being used. The methods are: +* - Message->inContentLanguage() +* - Message->inLanguage() +* +* Sometimes the message text ends up in the database, so content language is +* needed: +* +* @code +* wfMessage( 'file-log', +* $user, $filename +* )->inContentLanguage()->text(); +* @endcode +* +* Checking whether a message exists: +* +* @code +* wfMessage( 'mysterious-message' )->exists() +* // returns a boolean whether the 'mysterious-message' key exist. +* @endcode +* +* If you want to use a different language: +* +* @code +* $userLanguage = $user->getOption( 'language' ); +* wfMessage( 'email-header' ) +* ->inLanguage( $userLanguage ) +* ->plain(); +* @endcode +* +* @note You can parse the text only in the content or interface languages +* +* @section message_compare_old Comparison with old wfMsg* functions: +* +* Use full parsing: +* +* @code +* // old style: +* wfMsgExt( 'key', [ 'parseinline' ], 'apple' ); +* // new style: +* wfMessage( 'key', 'apple' )->parse(); +* @endcode +* +* Parseinline is used because it is more useful when pre-building HTML. +* In normal use it is better to use OutputPage::(add|wrap)WikiMsg. +* +* Places where HTML cannot be used. {{-transformation is done. +* @code +* // old style: +* wfMsgExt( 'key', [ 'parsemag' ], 'apple', 'pear' ); +* // new style: +* wfMessage( 'key', 'apple', 'pear' )->text(); +* @endcode +* +* Shortcut for escaping the message too, similar to wfMsgHTML(), but +* parameters are not replaced after escaping by default. +* @code +* $escaped = wfMessage( 'key' ) +* ->rawParams( 'apple' ) +* ->escaped(); +* @endcode +* +* @section message_appendix Appendix: +* +* @todo +* - test, can we have tests? +* - this documentation needs to be extended +* +* @see https://www.mediawiki.org/wiki/WfMessage() +* @see https://www.mediawiki.org/wiki/New_messages_API +* @see https://www.mediawiki.org/wiki/Localisation +* +* @since 1.17 +*/ public class XomwMessage { +// /** Use message text as-is */ +// static final FORMAT_PLAIN = 'plain'; +// /** Use normal wikitext -> HTML parsing (the result will be wrapped in a block-level HTML tag) */ +// static final FORMAT_BLOCK_PARSE = 'block-parse'; +// /** Use normal wikitext -> HTML parsing but strip the block-level wrapper */ +// static final FORMAT_PARSE = 'parse'; +// /** Transform {{..}} constructs but don't transform to HTML */ +// static final FORMAT_TEXT = 'text'; +// /** Transform {{..}} constructs, HTML-escape the result */ +// static final FORMAT_ESCAPED = 'escaped'; +// +// /** +// * Mapping from Message::listParam() types to Language methods. +// * @var array +// */ +// protected static $listTypeMap = [ +// 'comma' => 'commaList', +// 'semicolon' => 'semicolonList', +// 'pipe' => 'pipeList', +// 'text' => 'listToText', +// ]; +// +// /** +// * In which language to get this message. True, which is the default, +// * means the current user language, false content language. +// * +// * @var boolean +// */ +// protected $interface = true; +// +// /** +// * In which language to get this message. Overrides the $interface setting. +// * +// * @var Language|boolean Explicit language Object, or false for user language +// */ +// protected $language = false; +// +// /** +// * @var String The message key. If $keysToTry has more than one element, +// * this may change to one of the keys to try when fetching the message text. +// */ +// protected $key; +// +// /** +// * @var String... List of keys to try when fetching the message. +// */ +// protected $keysToTry; +// +// /** +// * @var array List of parameters which will be substituted into the message. +// */ +// protected $parameters = []; +// +// /** +// * @var String +// * @deprecated +// */ +// protected $format = 'parse'; +// +// /** +// * @var boolean Whether database can be used. +// */ +// protected $useDatabase = true; +// +// /** +// * @var Title Title Object to use as context. +// */ +// protected $title = null; +// +// /** +// * @var Content Content Object representing the message. +// */ +// protected $content = null; +// +// /** +// * @var String +// */ +// protected $message; +// +// /** +// * @since 1.17 +// * @param String|String[]|MessageSpecifier $key Message key, or array of +// * message keys to try and use the first non-empty message for, or a +// * MessageSpecifier to copy from. +// * @param array $params Message parameters. +// * @param Language $language [optional] Language to use (defaults to current user language). +// * @throws InvalidArgumentException +// */ +// public function __construct( $key, $params = [], Language $language = null ) { +// if ( $key instanceof MessageSpecifier ) { +// if ( $params ) { +// throw new InvalidArgumentException( +// '$params must be empty if $key is a MessageSpecifier' +// ); +// } +// $params = $key->getParams(); +// $key = $key->getKey(); +// } +// +// if ( !is_string( $key ) && !is_array( $key ) ) { +// throw new InvalidArgumentException( '$key must be a String or an array' ); +// } +// +// $this->keysToTry = (array)$key; +// +// if ( empty( $this->keysToTry ) ) { +// throw new InvalidArgumentException( '$key must not be an empty list' ); +// } +// +// $this->key = reset( $this->keysToTry ); +// +// $this->parameters = array_values( $params ); +// // User language is only resolved in getLanguage(). This helps preserve the +// // semantic intent of "user language" across serialize() and unserialize(). +// $this->language = $language ?: false; +// } +// +// /** +// * @see Serializable::serialize() +// * @since 1.26 +// * @return String +// */ +// public function serialize() { +// return serialize( [ +// 'interface' => $this->interface, +// 'language' => $this->language ? $this->language->getCode() : false, +// 'key' => $this->key, +// 'keysToTry' => $this->keysToTry, +// 'parameters' => $this->parameters, +// 'format' => $this->format, +// 'useDatabase' => $this->useDatabase, +// 'title' => $this->title, +// ] ); +// } +// +// /** +// * @see Serializable::unserialize() +// * @since 1.26 +// * @param String $serialized +// */ +// public function unserialize( $serialized ) { +// $data = unserialize( $serialized ); +// $this->interface = $data['interface']; +// $this->key = $data['key']; +// $this->keysToTry = $data['keysToTry']; +// $this->parameters = $data['parameters']; +// $this->format = $data['format']; +// $this->useDatabase = $data['useDatabase']; +// $this->language = $data['language'] ? Language::factory( $data['language'] ) : false; +// $this->title = $data['title']; +// } +// +// /** +// * @since 1.24 +// * +// * @return boolean True if this is a multi-key message, that is, if the key provided to the +// * constructor was a fallback list of keys to try. +// */ +// public function isMultiKey() { +// return count( $this->keysToTry ) > 1; +// } +// +// /** +// * @since 1.24 +// * +// * @return String[] The list of keys to try when fetching the message text, +// * in order of preference. +// */ +// public function getKeysToTry() { +// return $this->keysToTry; +// } +// +// /** +// * Returns the message key. +// * +// * If a list of multiple possible keys was supplied to the constructor, this method may +// * return any of these keys. After the message has been fetched, this method will return +// * the key that was actually used to fetch the message. +// * +// * @since 1.21 +// * +// * @return String +// */ +// public function getKey() { +// return $this->key; +// } +// +// /** +// * Returns the message parameters. +// * +// * @since 1.21 +// * +// * @return array +// */ +// public function getParams() { +// return $this->parameters; +// } +// +// /** +// * Returns the message format. +// * +// * @since 1.21 +// * +// * @return String +// * @deprecated since 1.29 formatting is not stateful +// */ +// public function getFormat() { +// wfDeprecated( __METHOD__, '1.29' ); +// return $this->format; +// } +// +// /** +// * Returns the Language of the Message. +// * +// * @since 1.23 +// * +// * @return Language +// */ +// public function getLanguage() { +// // Defaults to false which means current user language +// return $this->language ?: RequestContext::getMain()->getLanguage(); +// } +// +// /** +// * Factory function that is just wrapper for the real constructor. It is +// * intended to be used instead of the real constructor, because it allows +// * chaining method calls, while new objects don't. +// * +// * @since 1.17 +// * +// * @param String|String[]|MessageSpecifier $key +// * @param mixed $param,... Parameters as strings. +// * +// * @return Message +// */ +// public static function newFromKey( $key /*...*/ ) { +// $params = func_get_args(); +// array_shift( $params ); +// return new self( $key, $params ); +// } +// +// /** +// * Transform a MessageSpecifier or a primitive value used interchangeably with +// * specifiers (a message key String, or a key + params array) into a proper Message. +// * +// * Also accepts a MessageSpecifier inside an array: that's not considered a valid format +// * but is an easy error to make due to how StatusValue stores messages internally. +// * Further array elements are ignored in that case. +// * +// * @param String|array|MessageSpecifier $value +// * @return Message +// * @throws InvalidArgumentException +// * @since 1.27 +// */ +// public static function newFromSpecifier( $value ) { +// $params = []; +// if ( is_array( $value ) ) { +// $params = $value; +// $value = array_shift( $params ); +// } +// +// if ( $value instanceof Message ) { // Message, RawMessage, ApiMessage, etc +// $message = clone( $value ); +// } elseif ( $value instanceof MessageSpecifier ) { +// $message = new Message( $value ); +// } elseif ( is_string( $value ) ) { +// $message = new Message( $value, $params ); +// } else { +// throw new InvalidArgumentException( __METHOD__ . ': invalid argument type ' +// . gettype( $value ) ); +// } +// +// return $message; +// } +// +// /** +// * Factory function accepting multiple message keys and returning a message instance +// * for the first message which is non-empty. If all messages are empty then an +// * instance of the first message key is returned. +// * +// * @since 1.18 +// * +// * @param String|String[] $keys,... Message keys, or first argument as an array of all the +// * message keys. +// * +// * @return Message +// */ +// public static function newFallbackSequence( /*...*/ ) { +// $keys = func_get_args(); +// if ( func_num_args() == 1 ) { +// if ( is_array( $keys[0] ) ) { +// // Allow an array to be passed as the first argument instead +// $keys = array_values( $keys[0] ); +// } else { +// // Optimize a single String to not need special fallback handling +// $keys = $keys[0]; +// } +// } +// return new self( $keys ); +// } +// +// /** +// * Get a title Object for a mediawiki message, where it can be found in the mediawiki namespace. +// * The title will be for the current language, if the message key is in +// * $wgForceUIMsgAsContentMsg it will be append with the language code (except content +// * language), because Message::inContentLanguage will also return in user language. +// * +// * @see $wgForceUIMsgAsContentMsg +// * @return Title +// * @since 1.26 +// */ +// public function getTitle() { +// global $wgContLang, $wgForceUIMsgAsContentMsg; +// +// $title = $this->key; +// if ( +// !$this->language->equals( $wgContLang ) +// && in_array( $this->key, (array)$wgForceUIMsgAsContentMsg ) +// ) { +// $code = $this->language->getCode(); +// $title .= '/' . $code; +// } +// +// return Title::makeTitle( NS_MEDIAWIKI, $wgContLang->ucfirst( strtr( $title, ' ', '_' ) ) ); +// } +// +// /** +// * Adds parameters to the parameter list of this message. +// * +// * @since 1.17 +// * +// * @param mixed ... Parameters as strings or arrays from +// * Message::numParam() and the like, or a single array of parameters. +// * +// * @return Message $this +// */ +// public function params( /*...*/ ) { +// $args = func_get_args(); +// +// // If $args has only one entry and it's an array, then it's either a +// // non-varargs call or it happens to be a call with just a single +// // "special" parameter. Since the "special" parameters don't have any +// // numeric keys, we'll test that to differentiate the cases. +// if ( count( $args ) === 1 && isset( $args[0] ) && is_array( $args[0] ) ) { +// if ( $args[0] === [] ) { +// $args = []; +// } else { +// foreach ( $args[0] as $key => $value ) { +// if ( is_int( $key ) ) { +// $args = $args[0]; +// break; +// } +// } +// } +// } +// +// $this->parameters = array_merge( $this->parameters, array_values( $args ) ); +// return $this; +// } +// +// /** +// * Add parameters that are substituted after parsing or escaping. +// * In other words the parsing process cannot access the contents +// * of this type of parameter, and you need to make sure it is +// * sanitized beforehand. The parser will see "$n", instead. +// * +// * @since 1.17 +// * +// * @param mixed $params,... Raw parameters as strings, or a single argument that is +// * an array of raw parameters. +// * +// * @return Message $this +// */ +// public function rawParams( /*...*/ ) { +// $params = func_get_args(); +// if ( isset( $params[0] ) && is_array( $params[0] ) ) { +// $params = $params[0]; +// } +// foreach ( $params as $param ) { +// $this->parameters[] = self::rawParam( $param ); +// } +// return $this; +// } +// +// /** +// * Add parameters that are numeric and will be passed through +// * Language::formatNum before substitution +// * +// * @since 1.18 +// * +// * @param mixed $param,... Numeric parameters, or a single argument that is +// * an array of numeric parameters. +// * +// * @return Message $this +// */ +// public function numParams( /*...*/ ) { +// $params = func_get_args(); +// if ( isset( $params[0] ) && is_array( $params[0] ) ) { +// $params = $params[0]; +// } +// foreach ( $params as $param ) { +// $this->parameters[] = self::numParam( $param ); +// } +// return $this; +// } +// +// /** +// * Add parameters that are durations of time and will be passed through +// * Language::formatDuration before substitution +// * +// * @since 1.22 +// * +// * @param int|int[] $param,... Duration parameters, or a single argument that is +// * an array of duration parameters. +// * +// * @return Message $this +// */ +// public function durationParams( /*...*/ ) { +// $params = func_get_args(); +// if ( isset( $params[0] ) && is_array( $params[0] ) ) { +// $params = $params[0]; +// } +// foreach ( $params as $param ) { +// $this->parameters[] = self::durationParam( $param ); +// } +// return $this; +// } +// +// /** +// * Add parameters that are expiration times and will be passed through +// * Language::formatExpiry before substitution +// * +// * @since 1.22 +// * +// * @param String|String[] $param,... Expiry parameters, or a single argument that is +// * an array of expiry parameters. +// * +// * @return Message $this +// */ +// public function expiryParams( /*...*/ ) { +// $params = func_get_args(); +// if ( isset( $params[0] ) && is_array( $params[0] ) ) { +// $params = $params[0]; +// } +// foreach ( $params as $param ) { +// $this->parameters[] = self::expiryParam( $param ); +// } +// return $this; +// } +// +// /** +// * Add parameters that are time periods and will be passed through +// * Language::formatTimePeriod before substitution +// * +// * @since 1.22 +// * +// * @param int|int[] $param,... Time period parameters, or a single argument that is +// * an array of time period parameters. +// * +// * @return Message $this +// */ +// public function timeperiodParams( /*...*/ ) { +// $params = func_get_args(); +// if ( isset( $params[0] ) && is_array( $params[0] ) ) { +// $params = $params[0]; +// } +// foreach ( $params as $param ) { +// $this->parameters[] = self::timeperiodParam( $param ); +// } +// return $this; +// } +// +// /** +// * Add parameters that are file sizes and will be passed through +// * Language::formatSize before substitution +// * +// * @since 1.22 +// * +// * @param int|int[] $param,... Size parameters, or a single argument that is +// * an array of size parameters. +// * +// * @return Message $this +// */ +// public function sizeParams( /*...*/ ) { +// $params = func_get_args(); +// if ( isset( $params[0] ) && is_array( $params[0] ) ) { +// $params = $params[0]; +// } +// foreach ( $params as $param ) { +// $this->parameters[] = self::sizeParam( $param ); +// } +// return $this; +// } +// +// /** +// * Add parameters that are bitrates and will be passed through +// * Language::formatBitrate before substitution +// * +// * @since 1.22 +// * +// * @param int|int[] $param,... Bit rate parameters, or a single argument that is +// * an array of bit rate parameters. +// * +// * @return Message $this +// */ +// public function bitrateParams( /*...*/ ) { +// $params = func_get_args(); +// if ( isset( $params[0] ) && is_array( $params[0] ) ) { +// $params = $params[0]; +// } +// foreach ( $params as $param ) { +// $this->parameters[] = self::bitrateParam( $param ); +// } +// return $this; +// } +// +// /** +// * Add parameters that are plaintext and will be passed through without +// * the content being evaluated. Plaintext parameters are not valid as +// * arguments to parser functions. This differs from self::rawParams in +// * that the Message cls handles escaping to match the output format. +// * +// * @since 1.25 +// * +// * @param String|String[] $param,... plaintext parameters, or a single argument that is +// * an array of plaintext parameters. +// * +// * @return Message $this +// */ +// public function plaintextParams( /*...*/ ) { +// $params = func_get_args(); +// if ( isset( $params[0] ) && is_array( $params[0] ) ) { +// $params = $params[0]; +// } +// foreach ( $params as $param ) { +// $this->parameters[] = self::plaintextParam( $param ); +// } +// return $this; +// } +// +// /** +// * Set the language and the title from a context Object +// * +// * @since 1.19 +// * +// * @param IContextSource $context +// * +// * @return Message $this +// */ +// public function setContext( IContextSource $context ) { +// $this->inLanguage( $context->getLanguage() ); +// $this->title( $context->getTitle() ); +// $this->interface = true; +// +// return $this; +// } +// +// /** +// * Request the message in any language that is supported. +// * +// * As a side effect interface message status is unconditionally +// * turned off. +// * +// * @since 1.17 +// * @param Language|String $lang Language code or Language Object. +// * @return Message $this +// * @throws MWException +// */ +// public function inLanguage( $lang ) { +// if ( $lang instanceof Language ) { +// $this->language = $lang; +// } elseif ( is_string( $lang ) ) { +// if ( !$this->language instanceof Language || $this->language->getCode() != $lang ) { +// $this->language = Language::factory( $lang ); +// } +// } elseif ( $lang instanceof StubUserLang ) { +// $this->language = false; +// } else { +// $type = gettype( $lang ); +// throw new MWException( __METHOD__ . " must be " +// . "passed a String or Language Object; $type given" +// ); +// } +// $this->message = null; +// $this->interface = false; +// return $this; +// } +// +// /** +// * Request the message in the wiki's content language, +// * unless it is disabled for this message. +// * +// * @since 1.17 +// * @see $wgForceUIMsgAsContentMsg +// * +// * @return Message $this +// */ +// public function inContentLanguage() { +// global $wgForceUIMsgAsContentMsg; +// if ( in_array( $this->key, (array)$wgForceUIMsgAsContentMsg ) ) { +// return $this; +// } +// +// global $wgContLang; +// $this->inLanguage( $wgContLang ); +// return $this; +// } +// +// /** +// * Allows manipulating the interface message flag directly. +// * Can be used to restore the flag after setting a language. +// * +// * @since 1.20 +// * +// * @param boolean $interface +// * +// * @return Message $this +// */ +// public function setInterfaceMessageFlag( $interface ) { +// $this->interface = (boolean)$interface; +// return $this; +// } +// +// /** +// * Enable or disable database use. +// * +// * @since 1.17 +// * +// * @param boolean $useDatabase +// * +// * @return Message $this +// */ +// public function useDatabase( $useDatabase ) { +// $this->useDatabase = (boolean)$useDatabase; +// $this->message = null; +// return $this; +// } +// +// /** +// * Set the Title Object to use as context when transforming the message +// * +// * @since 1.18 +// * +// * @param Title $title +// * +// * @return Message $this +// */ +// public function title( $title ) { +// $this->title = $title; +// return $this; +// } +// +// /** +// * Returns the message as a Content Object. +// * +// * @return Content +// */ +// public function content() { +// if ( !$this->content ) { +// $this->content = new MessageContent( $this ); +// } +// +// return $this->content; +// } +// +// /** +// * Returns the message parsed from wikitext to HTML. +// * +// * @since 1.17 +// * +// * @param String|null $format One of the FORMAT_* constants. Null means use whatever was used +// * the last time (this is for B/C and should be avoided). +// * +// * @return String HTML +// */ +// public function toString( $format = null ) { +// if ( $format === null ) { +// $ex = new LogicException( __METHOD__ . ' using implicit format: ' . $this->format ); +// \MediaWiki\Logger\LoggerFactory::getInstance( 'message-format' )->warning( +// $ex->getMessage(), [ 'exception' => $ex, 'format' => $this->format, 'key' => $this->key ] ); +// $format = $this->format; +// } +// $String = $this->fetchMessage(); +// +// if ( $String === false ) { +// // Err on the side of safety, ensure that the output +// // is always html safe in the event the message key is +// // missing, since in that case its highly likely the +// // message key is user-controlled. +// // '⧼' is used instead of '<' to side-step any +// // double-escaping issues. +// // (Keep synchronised with mw.Message#toString in JS.) +// return '⧼' . htmlspecialchars( $this->key ) . '⧽'; +// } +// +// # Replace $* with a list of parameters for &uselang=qqx. +// if ( strpos( $String, '$*' ) !== false ) { +// $paramlist = ''; +// if ( $this->parameters !== [] ) { +// $paramlist = ': $' . implode( ', $', range( 1, count( $this->parameters ) ) ); +// } +// $String = str_replace( '$*', $paramlist, $String ); +// } +// +// # Replace parameters before text parsing +// $String = $this->replaceParameters( $String, 'before', $format ); +// +// # Maybe transform using the full parser +// if ( $format === self::FORMAT_PARSE ) { +// $String = $this->parseText( $String ); +// $String = Parser::stripOuterParagraph( $String ); +// } elseif ( $format === self::FORMAT_BLOCK_PARSE ) { +// $String = $this->parseText( $String ); +// } elseif ( $format === self::FORMAT_TEXT ) { +// $String = $this->transformText( $String ); +// } elseif ( $format === self::FORMAT_ESCAPED ) { +// $String = $this->transformText( $String ); +// $String = htmlspecialchars( $String, ENT_QUOTES, 'UTF-8', false ); +// } +// +// # Raw parameter replacement +// $String = $this->replaceParameters( $String, 'after', $format ); +// +// return $String; +// } +// +// /** +// * Magic method implementation of the above (for PHP >= 5.2.0), so we can do, eg: +// * $foo = new Message( $key ); +// * $String = "$foo"; +// * +// * @since 1.18 +// * +// * @return String +// */ +// public function __toString() { +// // PHP doesn't allow __toString to throw exceptions and will +// // trigger a fatal error if it does. So, catch any exceptions. +// +// try { +// return $this->toString( self::FORMAT_PARSE ); +// } catch ( Exception $ex ) { +// try { +// trigger_error( "Exception caught in " . __METHOD__ . " (message " . $this->key . "): " +// . $ex, E_USER_WARNING ); +// } catch ( Exception $ex ) { +// // Doh! Cause a fatal error after all? +// } +// +// return '⧼' . htmlspecialchars( $this->key ) . '⧽'; +// } +// } +// +// /** +// * Fully parse the text from wikitext to HTML. +// * +// * @since 1.17 +// * +// * @return String Parsed HTML. +// */ +// public function parse() { +// $this->format = self::FORMAT_PARSE; +// return $this->toString( self::FORMAT_PARSE ); +// } +// +// /** +// * Returns the message text. {{-transformation is done. +// * +// * @since 1.17 +// * +// * @return String Unescaped message text. +// */ +// public function text() { +// $this->format = self::FORMAT_TEXT; +// return $this->toString( self::FORMAT_TEXT ); +// } +// +// /** +// * Returns the message text as-is, only parameters are substituted. +// * +// * @since 1.17 +// * +// * @return String Unescaped untransformed message text. +// */ +// public function plain() { +// $this->format = self::FORMAT_PLAIN; +// return $this->toString( self::FORMAT_PLAIN ); +// } +// +// /** +// * Returns the parsed message text which is always surrounded by a block element. +// * +// * @since 1.17 +// * +// * @return String HTML +// */ +// public function parseAsBlock() { +// $this->format = self::FORMAT_BLOCK_PARSE; +// return $this->toString( self::FORMAT_BLOCK_PARSE ); +// } +// +// /** +// * Returns the message text. {{-transformation is done and the result +// * is escaped excluding any raw parameters. +// * +// * @since 1.17 +// * +// * @return String Escaped message text. +// */ +// public function escaped() { +// $this->format = self::FORMAT_ESCAPED; +// return $this->toString( self::FORMAT_ESCAPED ); +// } +// +// /** +// * Check whether a message key has been defined currently. +// * +// * @since 1.17 +// * +// * @return boolean +// */ +// public function exists() { +// return $this->fetchMessage() !== false; +// } +// +// /** +// * Check whether a message does not exist, or is an empty String +// * +// * @since 1.18 +// * @todo FIXME: Merge with isDisabled()? +// * +// * @return boolean +// */ +// public function isBlank() { +// $message = $this->fetchMessage(); +// return $message === false || $message === ''; +// } +// +// /** +// * Check whether a message does not exist, is an empty String, or is "-". +// * +// * @since 1.18 +// * +// * @return boolean +// */ +// public function isDisabled() { +// $message = $this->fetchMessage(); +// return $message === false || $message === '' || $message === '-'; +// } +// +// /** +// * @since 1.17 +// * +// * @param mixed $raw +// * +// * @return array Array with a single "raw" key. +// */ +// public static function rawParam( $raw ) { +// return [ 'raw' => $raw ]; +// } +// +// /** +// * @since 1.18 +// * +// * @param mixed $num +// * +// * @return array Array with a single "num" key. +// */ +// public static function numParam( $num ) { +// return [ 'num' => $num ]; +// } +// +// /** +// * @since 1.22 +// * +// * @param int $duration +// * +// * @return int[] Array with a single "duration" key. +// */ +// public static function durationParam( $duration ) { +// return [ 'duration' => $duration ]; +// } +// +// /** +// * @since 1.22 +// * +// * @param String $expiry +// * +// * @return String[] Array with a single "expiry" key. +// */ +// public static function expiryParam( $expiry ) { +// return [ 'expiry' => $expiry ]; +// } +// +// /** +// * @since 1.22 +// * +// * @param int $period +// * +// * @return int[] Array with a single "period" key. +// */ +// public static function timeperiodParam( $period ) { +// return [ 'period' => $period ]; +// } +// +// /** +// * @since 1.22 +// * +// * @param int $size +// * +// * @return int[] Array with a single "size" key. +// */ +// public static function sizeParam( $size ) { +// return [ 'size' => $size ]; +// } +// +// /** +// * @since 1.22 +// * +// * @param int $bitrate +// * +// * @return int[] Array with a single "bitrate" key. +// */ +// public static function bitrateParam( $bitrate ) { +// return [ 'bitrate' => $bitrate ]; +// } +// +// /** +// * @since 1.25 +// * +// * @param String $plaintext +// * +// * @return String[] Array with a single "plaintext" key. +// */ +// public static function plaintextParam( $plaintext ) { +// return [ 'plaintext' => $plaintext ]; +// } +// +// /** +// * @since 1.29 +// * +// * @param array $list +// * @param String $type 'comma', 'semicolon', 'pipe', 'text' +// * @return array Array with "list" and "type" keys. +// */ +// public static function listParam( array $list, $type = 'text' ) { +// if ( !isset( self::$listTypeMap[$type] ) ) { +// throw new InvalidArgumentException( +// "Invalid type '$type'. Known types are: " . join( ', ', array_keys( self::$listTypeMap ) ) +// ); +// } +// return [ 'list' => $list, 'type' => $type ]; +// } +// +// /** +// * Substitutes any parameters into the message text. +// * +// * @since 1.17 +// * +// * @param String $message The message text. +// * @param String $type Either "before" or "after". +// * @param String $format One of the FORMAT_* constants. +// * +// * @return String +// */ +// protected function replaceParameters( $message, $type = 'before', $format ) { +// $replacementKeys = []; +// foreach ( $this->parameters as $n => $param ) { +// list( $paramType, $value ) = $this->extractParam( $param, $format ); +// if ( $type === $paramType ) { +// $replacementKeys['$' . ( $n + 1 )] = $value; +// } +// } +// $message = strtr( $message, $replacementKeys ); +// return $message; +// } +// +// /** +// * Extracts the parameter type and preprocessed the value if needed. +// * +// * @since 1.18 +// * +// * @param mixed $param Parameter as defined in this cls. +// * @param String $format One of the FORMAT_* constants. +// * +// * @return array Array with the parameter type (either "before" or "after") and the value. +// */ +// protected function extractParam( $param, $format ) { +// if ( is_array( $param ) ) { +// if ( isset( $param['raw'] ) ) { +// return [ 'after', $param['raw'] ]; +// } elseif ( isset( $param['num'] ) ) { +// // Replace number params always in before step for now. +// // No support for combined raw and num params +// return [ 'before', $this->getLanguage()->formatNum( $param['num'] ) ]; +// } elseif ( isset( $param['duration'] ) ) { +// return [ 'before', $this->getLanguage()->formatDuration( $param['duration'] ) ]; +// } elseif ( isset( $param['expiry'] ) ) { +// return [ 'before', $this->getLanguage()->formatExpiry( $param['expiry'] ) ]; +// } elseif ( isset( $param['period'] ) ) { +// return [ 'before', $this->getLanguage()->formatTimePeriod( $param['period'] ) ]; +// } elseif ( isset( $param['size'] ) ) { +// return [ 'before', $this->getLanguage()->formatSize( $param['size'] ) ]; +// } elseif ( isset( $param['bitrate'] ) ) { +// return [ 'before', $this->getLanguage()->formatBitrate( $param['bitrate'] ) ]; +// } elseif ( isset( $param['plaintext'] ) ) { +// return [ 'after', $this->formatPlaintext( $param['plaintext'], $format ) ]; +// } elseif ( isset( $param['list'] ) ) { +// return $this->formatListParam( $param['list'], $param['type'], $format ); +// } else { +// $warning = 'Invalid parameter for message "' . $this->getKey() . '": ' . +// htmlspecialchars( serialize( $param ) ); +// trigger_error( $warning, E_USER_WARNING ); +// $e = new Exception; +// wfDebugLog( 'Bug58676', $warning . "\n" . $e->getTraceAsString() ); +// +// return [ 'before', '[INVALID]' ]; +// } +// } elseif ( $param instanceof Message ) { +// // Match language, flags, etc. to the current message. +// $msg = clone $param; +// if ( $msg->language !== $this->language || $msg->useDatabase !== $this->useDatabase ) { +// // Cache depends on these parameters +// $msg->message = null; +// } +// $msg->interface = $this->interface; +// $msg->language = $this->language; +// $msg->useDatabase = $this->useDatabase; +// $msg->title = $this->title; +// +// // DWIM +// if ( $format === 'block-parse' ) { +// $format = 'parse'; +// } +// $msg->format = $format; +// +// // Message objects should not be before parameters because +// // then they'll get double escaped. If the message needs to be +// // escaped, it'll happen right here when we call toString(). +// return [ 'after', $msg->toString( $format ) ]; +// } else { +// return [ 'before', $param ]; +// } +// } +// +// /** +// * Wrapper for what ever method we use to parse wikitext. +// * +// * @since 1.17 +// * +// * @param String $String Wikitext message contents. +// * +// * @return String Wikitext parsed into HTML. +// */ +// protected function parseText( $String ) { +// $out = MessageCache::singleton()->parse( +// $String, +// $this->title, +// /*linestart*/true, +// $this->interface, +// $this->getLanguage() +// ); +// +// return $out instanceof ParserOutput ? $out->getText() : $out; +// } +// +// /** +// * Wrapper for what ever method we use to {{-transform wikitext. +// * +// * @since 1.17 +// * +// * @param String $String Wikitext message contents. +// * +// * @return String Wikitext with {{-constructs replaced with their values. +// */ +// protected function transformText( $String ) { +// return MessageCache::singleton()->transform( +// $String, +// $this->interface, +// $this->getLanguage(), +// $this->title +// ); +// } +// +// /** +// * Wrapper for what ever method we use to get message contents. +// * +// * @since 1.17 +// * +// * @return String +// * @throws MWException If message key array is empty. +// */ +// protected function fetchMessage() { +// if ( $this->message === null ) { +// $cache = MessageCache::singleton(); +// +// foreach ( $this->keysToTry as $key ) { +// $message = $cache->get( $key, $this->useDatabase, $this->getLanguage() ); +// if ( $message !== false && $message !== '' ) { +// break; +// } +// } +// +// // NOTE: The constructor makes sure keysToTry isn't empty, +// // so we know that $key and $message are initialized. +// $this->key = $key; +// $this->message = $message; +// } +// return $this->message; +// } +// +// /** +// * Formats a message parameter wrapped with 'plaintext'. Ensures that +// * the entire String is displayed unchanged when displayed in the output +// * format. +// * +// * @since 1.25 +// * +// * @param String $plaintext String to ensure plaintext output of +// * @param String $format One of the FORMAT_* constants. +// * +// * @return String Input plaintext encoded for output to $format +// */ +// protected function formatPlaintext( $plaintext, $format ) { +// switch ( $format ) { +// case self::FORMAT_TEXT: +// case self::FORMAT_PLAIN: +// return $plaintext; +// +// case self::FORMAT_PARSE: +// case self::FORMAT_BLOCK_PARSE: +// case self::FORMAT_ESCAPED: +// default: +// return htmlspecialchars( $plaintext, ENT_QUOTES ); +// +// } +// } +// +// /** +// * Formats a list of parameters as a concatenated String. +// * @since 1.29 +// * @param array $params +// * @param String $listType +// * @param String $format One of the FORMAT_* constants. +// * @return array Array with the parameter type (either "before" or "after") and the value. +// */ +// protected function formatListParam( array $params, $listType, $format ) { +// if ( !isset( self::$listTypeMap[$listType] ) ) { +// $warning = 'Invalid list type for message "' . $this->getKey() . '": ' +// . htmlspecialchars( $listType ) +// . ' (params are ' . htmlspecialchars( serialize( $params ) ) . ')'; +// trigger_error( $warning, E_USER_WARNING ); +// $e = new Exception; +// wfDebugLog( 'Bug58676', $warning . "\n" . $e->getTraceAsString() ); +// return [ 'before', '[INVALID]' ]; +// } +// $func = self::$listTypeMap[$listType]; +// +// // Handle an empty list sensibly +// if ( !$params ) { +// return [ 'before', $this->getLanguage()->$func( [] ) ]; +// } +// +// // First, determine what kinds of list items we have +// $types = []; +// $vars = []; +// $list = []; +// foreach ( $params as $n => $p ) { +// list( $type, $value ) = $this->extractParam( $p, $format ); +// $types[$type] = true; +// $list[] = $value; +// $vars[] = '$' . ( $n + 1 ); +// } +// +// // Easy case: all are 'before' or 'after', so just join the +// // values and use the same type. +// if ( count( $types ) === 1 ) { +// return [ key( $types ), $this->getLanguage()->$func( $list ) ]; +// } +// +// // Hard case: We need to process each value per its type, then +// // return the concatenated values as 'after'. We handle this by turning +// // the list into a RawMessage and processing that as a parameter. +// $vars = $this->getLanguage()->$func( $vars ); +// return $this->extractParam( new RawMessage( $vars, $params ), $format ); +// } public XomwMessage(byte[] textBry) { this.textBry = textBry; } diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwRawMessage.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwRawMessage.java new file mode 100644 index 000000000..b54d48c2f --- /dev/null +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwRawMessage.java @@ -0,0 +1,67 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; +/** +* Variant of the Message cls. +* +* Rather than treating the message key as a lookup +* value (which is passed to the MessageCache and +* translated as necessary), a RawMessage key is +* treated as the actual message. +* +* All other functionality (parsing, escaping, etc.) +* is preserved. +* +* @since 1.21 +*/ +class XomwRawMessage { // : XomwMessage +// +// /** +// * Call the parent constructor, then store the key as +// * the message. +// * +// * @see Message::__construct +// * +// * @param String $text Message to use. +// * @param array $params Parameters for the message. +// * +// * @throws InvalidArgumentException +// */ +// public function __construct( $text, $params = [] ) { +// if ( !is_string( $text ) ) { +// throw new InvalidArgumentException( '$text must be a String' ); +// } +// +// parent::__construct( $text, $params ); +// +// // The key is the message. +// $this->message = $text; +// } +// +// /** +// * Fetch the message (in this case, the key). +// * +// * @return String +// */ +// public function fetchMessage() { +// // Just in case the message is unset somewhere. +// if ( $this->message === null ) { +// $this->message = $this->key; +// } +// +// return $this->message; +// } +} diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwSanitizer.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwSanitizer.java index cc84f8794..dcaaf7dff 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwSanitizer.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwSanitizer.java @@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.core.brys.*; import gplx.core.btries.*; import gplx.core.encoders.*; import gplx.core.primitives.*; import gplx.langs.htmls.entitys.*; import gplx.xowa.parsers.htmls.*; -import gplx.langs.htmls.*; import gplx.xowa.mediawiki.includes.htmls.*; import gplx.xowa.mediawiki.includes.parsers.*; +import gplx.langs.htmls.*; import gplx.xowa.mediawiki.includes.xohtml.*; import gplx.xowa.mediawiki.includes.parsers.*; import gplx.xowa.mediawiki.includes.libs.*; public class XomwSanitizer { private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr(); diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwSanitizerTest.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwSanitizerTest.java index 529d6a99d..c8717ef22 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwSanitizerTest.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/XomwSanitizerTest.java @@ -14,7 +14,7 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; -import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mediawiki.includes.htmls.*; +import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mediawiki.includes.xohtml.*; public class XomwSanitizerTest { private final XomwSanitizerFxt fxt = new XomwSanitizerFxt(); @Test public void Normalize__text() {fxt.Test__normalize_char_references("abc" , "abc");} diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/cache/XomwMessageCache.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/cache/XomwMessageCache.java new file mode 100644 index 000000000..0c6071c30 --- /dev/null +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/cache/XomwMessageCache.java @@ -0,0 +1,1282 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.mediawiki.includes.cache; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; +/** +* Message cache +* Performs various MediaWiki namespace-related functions +* @ingroup Cache +*/ +public class XomwMessageCache { +// static final FOR_UPDATE = 1; // force message reload +// +// /** How long to wait for memcached locks */ +// static final WAIT_SEC = 15; +// /** How long memcached locks last */ +// static final LOCK_TTL = 30; +// +// /** +// * Process local cache of loaded messages that are defined in +// * MediaWiki namespace. First array level is a language code, +// * second level is message key and the values are either message +// * content prefixed with space, or !NONEXISTENT for negative +// * caching. +// * @var array $mCache +// */ +// protected $mCache; +// +// /** +// * @var boolean[] Map of (language code => boolean) +// */ +// protected $mCacheVolatile = []; +// +// /** +// * Should mean that database cannot be used, but check +// * @var boolean $mDisable +// */ +// protected $mDisable; +// +// /** +// * Lifetime for cache, used by Object caching. +// * Set on construction, see __construct(). +// */ +// protected $mExpiry; +// +// /** +// * Message cache has its own parser which it uses to transform messages +// * @var ParserOptions +// */ +// protected $mParserOptions; +// /** @var Parser */ +// protected $mParser; +// +// /** +// * Variable for tracking which variables are already loaded +// * @var array $mLoadedLanguages +// */ +// protected $mLoadedLanguages = []; +// +// /** +// * @var boolean $mInParser +// */ +// protected $mInParser = false; +// +// /** @var WANObjectCache */ +// protected $wanCache; +// /** @var BagOStuff */ +// protected $clusterCache; +// /** @var BagOStuff */ +// protected $srvCache; +// +// /** +// * Singleton instance +// * +// * @var MessageCache $instance +// */ +// private static $instance; +// +// /** +// * Get the signleton instance of this class +// * +// * @since 1.18 +// * @return MessageCache +// */ +// public static function singleton() { +// if ( self::$instance === null ) { +// global $wgUseDatabaseMessages, $wgMsgCacheExpiry, $wgUseLocalMessageCache; +// self::$instance = new self( +// MediaWikiServices::getInstance()->getMainWANObjectCache(), +// wfGetMessageCacheStorage(), +// $wgUseLocalMessageCache +// ? MediaWikiServices::getInstance()->getLocalServerObjectCache() +// : new EmptyBagOStuff(), +// $wgUseDatabaseMessages, +// $wgMsgCacheExpiry +// ); +// } +// +// return self::$instance; +// } +// +// /** +// * Destroy the singleton instance +// * +// * @since 1.18 +// */ +// public static function destroyInstance() { +// self::$instance = null; +// } +// +// /** +// * Normalize message key input +// * +// * @param String $key Input message key to be normalized +// * @return String Normalized message key +// */ +// public static function normalizeKey( $key ) { +// global $wgContLang; +// +// $lckey = strtr( $key, ' ', '_' ); +// if ( ord( $lckey ) < 128 ) { +// $lckey[0] = strtolower( $lckey[0] ); +// } else { +// $lckey = $wgContLang->lcfirst( $lckey ); +// } +// +// return $lckey; +// } +// +// /** +// * @param WANObjectCache $wanCache WAN cache instance +// * @param BagOStuff $clusterCache Cluster cache instance +// * @param BagOStuff $srvCache Server cache instance +// * @param boolean $useDB Whether to look for message overrides (e.g. MediaWiki: pages) +// * @param int $expiry Lifetime for cache. @see $mExpiry. +// */ +// public function __construct( +// WANObjectCache $wanCache, +// BagOStuff $clusterCache, +// BagOStuff $srvCache, +// $useDB, +// $expiry +// ) { +// $this->wanCache = $wanCache; +// $this->clusterCache = $clusterCache; +// $this->srvCache = $srvCache; +// +// $this->mDisable = !$useDB; +// $this->mExpiry = $expiry; +// } +// +// /** +// * ParserOptions is lazy initialised. +// * +// * @return ParserOptions +// */ +// function getParserOptions() { +// global $wgUser; +// +// if ( !$this->mParserOptions ) { +// if ( !$wgUser->isSafeToLoad() ) { +// // $wgUser isn't unstubbable yet, so don't try to get a +// // ParserOptions for it. And don't cache this ParserOptions +// // either. +// $po = ParserOptions::newFromAnon(); +// $po->setEditSection( false ); +// return $po; +// } +// +// $this->mParserOptions = new ParserOptions; +// $this->mParserOptions->setEditSection( false ); +// } +// +// return $this->mParserOptions; +// } +// +// /** +// * Try to load the cache from APC. +// * +// * @param String $code Optional language code, see documenation of load(). +// * @return array|boolean The cache array, or false if not in cache. +// */ +// protected function getLocalCache( $code ) { +// $cacheKey = wfMemcKey( __CLASS__, $code ); +// +// return $this->srvCache->get( $cacheKey ); +// } +// +// /** +// * Save the cache to APC. +// * +// * @param String $code +// * @param array $cache The cache array +// */ +// protected function saveToLocalCache( $code, $cache ) { +// $cacheKey = wfMemcKey( __CLASS__, $code ); +// $this->srvCache->set( $cacheKey, $cache ); +// } +// +// /** +// * Loads messages from caches or from database in this order: +// * (1) local message cache (if $wgUseLocalMessageCache is enabled) +// * (2) memcached +// * (3) from the database. +// * +// * When succesfully loading from (2) or (3), all higher level caches are +// * updated for the newest version. +// * +// * Nothing is loaded if member variable mDisable is true, either manually +// * set by calling code or if message loading fails (is this possible?). +// * +// * Returns true if cache is already populated or it was succesfully populated, +// * or false if populating empty cache fails. Also returns true if MessageCache +// * is disabled. +// * +// * @param String $code Language to which load messages +// * @param integer $mode Use MessageCache::FOR_UPDATE to skip process cache [optional] +// * @throws MWException +// * @return boolean +// */ +// protected function load( $code, $mode = null ) { +// if ( !is_string( $code ) ) { +// throw new InvalidArgumentException( "Missing language code" ); +// } +// +// # Don't do double loading... +// if ( isset( $this->mLoadedLanguages[$code] ) && $mode != self::FOR_UPDATE ) { +// return true; +// } +// +// # 8 lines of code just to say (once) that message cache is disabled +// if ( $this->mDisable ) { +// static $shownDisabled = false; +// if ( !$shownDisabled ) { +// wfDebug( __METHOD__ . ": disabled\n" ); +// $shownDisabled = true; +// } +// +// return true; +// } +// +// # Loading code starts +// $success = false; # Keep track of success +// $staleCache = false; # a cache array with expired data, or false if none has been loaded +// $where = []; # Debug info, delayed to avoid spamming debug log too much +// +// # Hash of the contents is stored in memcache, to detect if data-center cache +// # or local cache goes out of date (e.g. due to replace() on some other server) +// list( $hash, $hashVolatile ) = $this->getValidationHash( $code ); +// $this->mCacheVolatile[$code] = $hashVolatile; +// +// # Try the local cache and check against the cluster hash key... +// $cache = $this->getLocalCache( $code ); +// if ( !$cache ) { +// $where[] = 'local cache is empty'; +// } elseif ( !isset( $cache['HASH'] ) || $cache['HASH'] !== $hash ) { +// $where[] = 'local cache has the wrong hash'; +// $staleCache = $cache; +// } elseif ( $this->isCacheExpired( $cache ) ) { +// $where[] = 'local cache is expired'; +// $staleCache = $cache; +// } elseif ( $hashVolatile ) { +// $where[] = 'local cache validation key is expired/volatile'; +// $staleCache = $cache; +// } else { +// $where[] = 'got from local cache'; +// $success = true; +// $this->mCache[$code] = $cache; +// } +// +// if ( !$success ) { +// $cacheKey = wfMemcKey( 'messages', $code ); # Key in memc for messages +// # Try the global cache. If it is empty, try to acquire a synchronized. If +// # the synchronized can't be acquired, wait for the other thread to finish +// # and then try the global cache a second time. +// for ( $failedAttempts = 0; $failedAttempts <= 1; $failedAttempts++ ) { +// if ( $hashVolatile && $staleCache ) { +// # Do not bother fetching the whole cache blob to avoid I/O. +// # Instead, just try to get the non-blocking $statusKey synchronized +// # below, and use the local stale value if it was not acquired. +// $where[] = 'global cache is presumed expired'; +// } else { +// $cache = $this->clusterCache->get( $cacheKey ); +// if ( !$cache ) { +// $where[] = 'global cache is empty'; +// } elseif ( $this->isCacheExpired( $cache ) ) { +// $where[] = 'global cache is expired'; +// $staleCache = $cache; +// } elseif ( $hashVolatile ) { +// # DB results are replica DB lag prone until the holdoff TTL passes. +// # By then, updates should be reflected in loadFromDBWithLock(). +// # One thread renerates the cache while others use old values. +// $where[] = 'global cache is expired/volatile'; +// $staleCache = $cache; +// } else { +// $where[] = 'got from global cache'; +// $this->mCache[$code] = $cache; +// $this->saveToCaches( $cache, 'local-only', $code ); +// $success = true; +// } +// } +// +// if ( $success ) { +// # Done, no need to retry +// break; +// } +// +// # We need to call loadFromDB. Limit the concurrency to one process. +// # This prevents the site from going down when the cache expires. +// # Note that the DB slam protection synchronized here is non-blocking. +// $loadStatus = $this->loadFromDBWithLock( $code, $where, $mode ); +// if ( $loadStatus === true ) { +// $success = true; +// break; +// } elseif ( $staleCache ) { +// # Use the stale cache while some other thread constructs the new one +// $where[] = 'using stale cache'; +// $this->mCache[$code] = $staleCache; +// $success = true; +// break; +// } elseif ( $failedAttempts > 0 ) { +// # Already blocked once, so avoid another synchronized/unlock cycle. +// # This case will typically be hit if memcached is down, or if +// # loadFromDB() takes longer than LOCK_WAIT. +// $where[] = "could not acquire status key."; +// break; +// } elseif ( $loadStatus === 'cantacquire' ) { +// # Wait for the other thread to finish, then retry. Normally, +// # the memcached get() will then yeild the other thread's result. +// $where[] = 'waited for other thread to complete'; +// $this->getReentrantScopedLock( $cacheKey ); +// } else { +// # Disable cache; $loadStatus is 'disabled' +// break; +// } +// } +// } +// +// if ( !$success ) { +// $where[] = 'loading FAILED - cache is disabled'; +// $this->mDisable = true; +// $this->mCache = false; +// wfDebugLog( 'MessageCacheError', __METHOD__ . ": Failed to load $code\n" ); +// # This used to throw an exception, but that led to nasty side effects like +// # the whole wiki being instantly down if the memcached server died +// } else { +// # All good, just record the success +// $this->mLoadedLanguages[$code] = true; +// } +// +// $info = implode( ', ', $where ); +// wfDebugLog( 'MessageCache', __METHOD__ . ": Loading $code... $info\n" ); +// +// return $success; +// } +// +// /** +// * @param String $code +// * @param array $where List of wfDebug() comments +// * @param integer $mode Use MessageCache::FOR_UPDATE to use DB_MASTER +// * @return boolean|String True on success or one of ("cantacquire", "disabled") +// */ +// protected function loadFromDBWithLock( $code, array &$where, $mode = null ) { +// # If cache updates on all levels fail, give up on message overrides. +// # This is to avoid easy site outages; see $saveSuccess comments below. +// $statusKey = wfMemcKey( 'messages', $code, 'status' ); +// $status = $this->clusterCache->get( $statusKey ); +// if ( $status === 'error' ) { +// $where[] = "could not load; method is still globally disabled"; +// return 'disabled'; +// } +// +// # Now let's regenerate +// $where[] = 'loading from database'; +// +// # Lock the cache to prevent conflicting writes. +// # This synchronized is non-blocking so stale cache can quickly be used. +// # Note that load() will call a blocking getReentrantScopedLock() +// # after this if it really need to wait for any current thread. +// $cacheKey = wfMemcKey( 'messages', $code ); +// $scopedLock = $this->getReentrantScopedLock( $cacheKey, 0 ); +// if ( !$scopedLock ) { +// $where[] = 'could not acquire main synchronized'; +// return 'cantacquire'; +// } +// +// $cache = $this->loadFromDB( $code, $mode ); +// $this->mCache[$code] = $cache; +// $saveSuccess = $this->saveToCaches( $cache, 'all', $code ); +// +// if ( !$saveSuccess ) { +// /** +// * Cache save has failed. +// * +// * There are two main scenarios where this could be a problem: +// * - The cache is more than the maximum size (typically 1MB compressed). +// * - Memcached has no space remaining in the relevant slab class. This is +// * unlikely with recent versions of memcached. +// * +// * Either way, if there is a local cache, nothing bad will happen. If there +// * is no local cache, disabling the message cache for all requests avoids +// * incurring a loadFromDB() overhead on every request, and thus saves the +// * wiki from complete downtime under moderate traffic conditions. +// */ +// if ( $this->srvCache instanceof EmptyBagOStuff ) { +// $this->clusterCache->set( $statusKey, 'error', 60 * 5 ); +// $where[] = 'could not save cache, disabled globally for 5 minutes'; +// } else { +// $where[] = "could not save global cache"; +// } +// } +// +// return true; +// } +// +// /** +// * Loads cacheable messages from the database. Messages bigger than +// * $wgMaxMsgCacheEntrySize are assigned a special value, and are loaded +// * on-demand from the database later. +// * +// * @param String $code Language code +// * @param integer $mode Use MessageCache::FOR_UPDATE to skip process cache +// * @return array Loaded messages for storing in caches +// */ +// protected function loadFromDB( $code, $mode = null ) { +// global $wgMaxMsgCacheEntrySize, $wgLanguageCode, $wgAdaptiveMessageCache; +// +// $dbr = wfGetDB( ( $mode == self::FOR_UPDATE ) ? DB_MASTER : DB_REPLICA ); +// +// $cache = []; +// +// # Common conditions +// $conds = [ +// 'page_is_redirect' => 0, +// 'page_namespace' => NS_MEDIAWIKI, +// ]; +// +// $mostused = []; +// if ( $wgAdaptiveMessageCache && $code !== $wgLanguageCode ) { +// if ( !isset( $this->mCache[$wgLanguageCode] ) ) { +// $this->load( $wgLanguageCode ); +// } +// $mostused = array_keys( $this->mCache[$wgLanguageCode] ); +// foreach ( $mostused as $key => $value ) { +// $mostused[$key] = "$value/$code"; +// } +// } +// +// if ( count( $mostused ) ) { +// $conds['page_title'] = $mostused; +// } elseif ( $code !== $wgLanguageCode ) { +// $conds[] = 'page_title' . $dbr->buildLike( $dbr->anyString(), '/', $code ); +// } else { +// # Effectively disallows use of '/' character in NS_MEDIAWIKI for uses +// # other than language code. +// $conds[] = 'page_title NOT' . $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString() ); +// } +// +// # Conditions to fetch oversized pages to ignore them +// $bigConds = $conds; +// $bigConds[] = 'page_len > ' . intval( $wgMaxMsgCacheEntrySize ); +// +// # Load titles for all oversized pages in the MediaWiki namespace +// $res = $dbr->select( +// 'page', +// [ 'page_title', 'page_latest' ], +// $bigConds, +// __METHOD__ . "($code)-big" +// ); +// foreach ( $res as $row ) { +// $cache[$row->page_title] = '!TOO BIG'; +// // At least include revision ID so page changes are reflected in the hash +// $cache['EXCESSIVE'][$row->page_title] = $row->page_latest; +// } +// +// # Conditions to load the remaining pages with their contents +// $smallConds = $conds; +// $smallConds[] = 'page_latest=rev_id'; +// $smallConds[] = 'rev_text_id=old_id'; +// $smallConds[] = 'page_len <= ' . intval( $wgMaxMsgCacheEntrySize ); +// +// $res = $dbr->select( +// [ 'page', 'revision', 'text' ], +// [ 'page_title', 'old_text', 'old_flags' ], +// $smallConds, +// __METHOD__ . "($code)-small" +// ); +// +// foreach ( $res as $row ) { +// $text = Revision::getRevisionText( $row ); +// if ( $text === false ) { +// // Failed to fetch data; possible ES errors? +// // Store a marker to fetch on-demand as a workaround... +// // TODO Use a differnt marker +// $entry = '!TOO BIG'; +// wfDebugLog( +// 'MessageCache', +// __METHOD__ +// . ": failed to load message page text for {$row->page_title} ($code)" +// ); +// } else { +// $entry = ' ' . $text; +// } +// $cache[$row->page_title] = $entry; +// } +// +// $cache['VERSION'] = MSG_CACHE_VERSION; +// ksort( $cache ); +// +// # Hash for validating local cache (APC). No need to take into account +// # messages larger than $wgMaxMsgCacheEntrySize, since those are only +// # stored and fetched from memcache. +// $cache['HASH'] = md5( serialize( $cache ) ); +// $cache['EXPIRY'] = wfTimestamp( TS_MW, time() + $this->mExpiry ); +// +// return $cache; +// } +// +// /** +// * Updates cache as necessary when message page is changed +// * +// * @param String $title Message cache key with initial uppercase letter. +// * @param String|boolean $text New contents of the page (false if deleted) +// */ +// public function replace( $title, $text ) { +// global $wgLanguageCode; +// +// if ( $this->mDisable ) { +// return; +// } +// +// list( $msg, $code ) = $this->figureMessage( $title ); +// if ( strpos( $title, '/' ) !== false && $code === $wgLanguageCode ) { +// // Content language overrides do not use the / suffix +// return; +// } +// +// // (a) Update the process cache with the new message text +// if ( $text === false ) { +// // Page deleted +// $this->mCache[$code][$title] = '!NONEXISTENT'; +// } else { +// // Ignore $wgMaxMsgCacheEntrySize so the process cache is up to date +// $this->mCache[$code][$title] = ' ' . $text; +// } +// +// // (b) Update the shared caches in a deferred update with a fresh DB snapshot +// DeferredUpdates::addCallableUpdate( +// function () use ( $title, $msg, $code ) { +// global $wgContLang, $wgMaxMsgCacheEntrySize; +// // Allow one caller at a time to avoid race conditions +// $scopedLock = $this->getReentrantScopedLock( wfMemcKey( 'messages', $code ) ); +// if ( !$scopedLock ) { +// LoggerFactory::getInstance( 'MessageCache' )->error( +// __METHOD__ . ': could not acquire synchronized to update {title} ({code})', +// [ 'title' => $title, 'code' => $code ] ); +// return; +// } +// // Load the messages from the master DB to avoid race conditions +// $this->loadFromDB( $code, self::FOR_UPDATE ); +// // Load the process cache values and set the per-title cache keys +// $page = WikiPage::factory( Title::makeTitle( NS_MEDIAWIKI, $title ) ); +// $page->loadPageData( $page::READ_LATEST ); +// $text = $this->getMessageTextFromContent( $page->getContent() ); +// // Check if an individual cache key should exist and update cache accordingly +// $titleKey = $this->wanCache->makeKey( +// 'messages-big', $this->mCache[$code]['HASH'], $title ); +// if ( is_string( $text ) && strlen( $text ) > $wgMaxMsgCacheEntrySize ) { +// $this->wanCache->set( $titleKey, ' ' . $text, $this->mExpiry ); +// } +// // Mark this cache as definitely being "latest" (non-volatile) so +// // load() calls do try to refresh the cache with replica DB data +// $this->mCache[$code]['LATEST'] = time(); +// // Pre-emptively update the local datacenter cache so things like edit filter and +// // blacklist changes are reflect immediately, as these often use MediaWiki: pages. +// // The datacenter handling replace() calls should be the same one handling edits +// // as they require HTTP POST. +// $this->saveToCaches( $this->mCache[$code], 'all', $code ); +// // Release the synchronized now that the cache is saved +// ScopedCallback::consume( $scopedLock ); +// +// // Relay the purge. Touching this check key expires cache contents +// // and local cache (APC) validation hash across all datacenters. +// $this->wanCache->touchCheckKey( wfMemcKey( 'messages', $code ) ); +// // Also delete cached sidebar... just in case it is affected +// // @TODO: shouldn't this be $code === $wgLanguageCode? +// if ( $code === 'en' ) { +// // Purge all language sidebars, e.g. on ?action=purge to the sidebar messages +// $codes = array_keys( Language::fetchLanguageNames() ); +// } else { +// // Purge only the sidebar for this language +// $codes = [ $code ]; +// } +// foreach ( $codes as $code ) { +// $this->wanCache->delete( wfMemcKey( 'sidebar', $code ) ); +// } +// +// // Purge the message in the message blob store +// $resourceloader = RequestContext::getMain()->getOutput()->getResourceLoader(); +// $blobStore = $resourceloader->getMessageBlobStore(); +// $blobStore->updateMessage( $wgContLang->lcfirst( $msg ) ); +// +// Hooks::run( 'MessageCacheReplace', [ $title, $text ] ); +// }, +// DeferredUpdates::PRESEND +// ); +// } +// +// /** +// * Is the given cache array expired due to time passing or a version change? +// * +// * @param array $cache +// * @return boolean +// */ +// protected function isCacheExpired( $cache ) { +// if ( !isset( $cache['VERSION'] ) || !isset( $cache['EXPIRY'] ) ) { +// return true; +// } +// if ( $cache['VERSION'] != MSG_CACHE_VERSION ) { +// return true; +// } +// if ( wfTimestampNow() >= $cache['EXPIRY'] ) { +// return true; +// } +// +// return false; +// } +// +// /** +// * Shortcut to update caches. +// * +// * @param array $cache Cached messages with a version. +// * @param String $dest Either "local-only" to save to local caches only +// * or "all" to save to all caches. +// * @param String|boolean $code Language code (default: false) +// * @return boolean +// */ +// protected function saveToCaches( array $cache, $dest, $code = false ) { +// if ( $dest === 'all' ) { +// $cacheKey = wfMemcKey( 'messages', $code ); +// $success = $this->clusterCache->set( $cacheKey, $cache ); +// $this->setValidationHash( $code, $cache ); +// } else { +// $success = true; +// } +// +// $this->saveToLocalCache( $code, $cache ); +// +// return $success; +// } +// +// /** +// * Get the md5 used to validate the local APC cache +// * +// * @param String $code +// * @return array (hash or false, boolean expiry/volatility status) +// */ +// protected function getValidationHash( $code ) { +// $curTTL = null; +// $value = $this->wanCache->get( +// $this->wanCache->makeKey( 'messages', $code, 'hash', 'v1' ), +// $curTTL, +// [ wfMemcKey( 'messages', $code ) ] +// ); +// +// if ( $value ) { +// $hash = $value['hash']; +// if ( ( time() - $value['latest'] ) < WANObjectCache::TTL_MINUTE ) { +// // Cache was recently updated via replace() and should be up-to-date. +// // That method is only called in the primary datacenter and uses FOR_UPDATE. +// // Also, it is unlikely that the current datacenter is *now* secondary one. +// $expired = false; +// } else { +// // See if the "check" key was bumped after the hash was generated +// $expired = ( $curTTL < 0 ); +// } +// } else { +// // No hash found at all; cache must regenerate to be safe +// $hash = false; +// $expired = true; +// } +// +// return [ $hash, $expired ]; +// } +// +// /** +// * Set the md5 used to validate the local disk cache +// * +// * If $cache has a 'LATEST' UNIX timestamp key, then the hash will not +// * be treated as "volatile" by getValidationHash() for the next few seconds. +// * This is triggered when $cache is generated using FOR_UPDATE mode. +// * +// * @param String $code +// * @param array $cache Cached messages with a version +// */ +// protected function setValidationHash( $code, array $cache ) { +// $this->wanCache->set( +// $this->wanCache->makeKey( 'messages', $code, 'hash', 'v1' ), +// [ +// 'hash' => $cache['HASH'], +// 'latest' => isset( $cache['LATEST'] ) ? $cache['LATEST'] : 0 +// ], +// WANObjectCache::TTL_INDEFINITE +// ); +// } +// +// /** +// * @param String $key A language message cache key that stores blobs +// * @param integer $timeout Wait timeout in seconds +// * @return null|ScopedCallback +// */ +// protected function getReentrantScopedLock( $key, $timeout = self::WAIT_SEC ) { +// return $this->clusterCache->getScopedLock( $key, $timeout, self::LOCK_TTL, __METHOD__ ); +// } +// +// /** +// * Get a message from either the content language or the user language. +// * +// * First, assemble a list of languages to attempt getting the message from. This +// * chain begins with the requested language and its fallbacks and then continues with +// * the content language and its fallbacks. For each language in the chain, the following +// * process will occur (in this order): +// * 1. If a language-specific override, i.e., [[MW:msg/lang]], is available, use that. +// * Note: for the content language, there is no /lang subpage. +// * 2. Fetch from the static CDB cache. +// * 3. If available, check the database for fallback language overrides. +// * +// * This process provides a number of guarantees. When changing this code, make sure all +// * of these guarantees are preserved. +// * * If the requested language is *not* the content language, then the CDB cache for that +// * specific language will take precedence over the root database page ([[MW:msg]]). +// * * Fallbacks will be just that: fallbacks. A fallback language will never be reached if +// * the message is available *anywhere* in the language for which it is a fallback. +// * +// * @param String $key The message key +// * @param boolean $useDB If true, look for the message in the DB, false +// * to use only the compiled l10n cache. +// * @param boolean|String|Object $langcode Code of the language to get the message for. +// * - If String and a valid code, will create a standard language Object +// * - If String but not a valid code, will create a basic language Object +// * - If boolean and false, create Object from the current users language +// * - If boolean and true, create Object from the wikis content language +// * - If language Object, use it as given +// * @param boolean $isFullKey Specifies whether $key is a two part key "msg/lang". +// * +// * @throws MWException When given an invalid key +// * @return String|boolean False if the message doesn't exist, otherwise the +// * message (which can be empty) +// */ +// function get( $key, $useDB = true, $langcode = true, $isFullKey = false ) { +// if ( is_int( $key ) ) { +// // Fix numerical strings that somehow become ints +// // on their way here +// $key = (String)$key; +// } elseif ( !is_string( $key ) ) { +// throw new MWException( 'Non-String key given' ); +// } elseif ( $key === '' ) { +// // Shortcut: the empty key is always missing +// return false; +// } +// +// // For full keys, get the language code from the key +// $pos = strrpos( $key, '/' ); +// if ( $isFullKey && $pos !== false ) { +// $langcode = substr( $key, $pos + 1 ); +// $key = substr( $key, 0, $pos ); +// } +// +// // Normalise title-case input (with some inlining) +// $lckey = MessageCache::normalizeKey( $key ); +// +// Hooks::run( 'MessageCache::get', [ &$lckey ] ); +// +// // Loop through each language in the fallback list until we find something useful +// $lang = wfGetLangObj( $langcode ); +// $message = $this->getMessageFromFallbackChain( +// $lang, +// $lckey, +// !$this->mDisable && $useDB +// ); +// +// // If we still have no message, maybe the key was in fact a full key so try that +// if ( $message === false ) { +// $parts = explode( '/', $lckey ); +// // We may get calls for things that are http-urls from sidebar +// // Let's not load nonexistent languages for those +// // They usually have more than one slash. +// if ( count( $parts ) == 2 && $parts[1] !== '' ) { +// $message = Language::getMessageFor( $parts[0], $parts[1] ); +// if ( $message === null ) { +// $message = false; +// } +// } +// } +// +// // Post-processing if the message exists +// if ( $message !== false ) { +// // Fix whitespace +// $message = str_replace( +// [ +// # Fix for trailing whitespace, removed by textarea +// ' ', +// # Fix for NBSP, converted to space by firefox +// ' ', +// ' ', +// '­' +// ], +// [ +// ' ', +// "\xc2\xa0", +// "\xc2\xa0", +// "\xc2\xad" +// ], +// $message +// ); +// } +// +// return $message; +// } +// +// /** +// * Given a language, try and fetch messages from that language. +// * +// * Will also consider fallbacks of that language, the site language, and fallbacks for +// * the site language. +// * +// * @see MessageCache::get +// * @param Language|StubObject $lang Preferred language +// * @param String $lckey Lowercase key for the message (as for localisation cache) +// * @param boolean $useDB Whether to include messages from the wiki database +// * @return String|boolean The message, or false if not found +// */ +// protected function getMessageFromFallbackChain( $lang, $lckey, $useDB ) { +// global $wgContLang; +// +// $alreadyTried = []; +// +// // First try the requested language. +// $message = $this->getMessageForLang( $lang, $lckey, $useDB, $alreadyTried ); +// if ( $message !== false ) { +// return $message; +// } +// +// // Now try checking the site language. +// $message = $this->getMessageForLang( $wgContLang, $lckey, $useDB, $alreadyTried ); +// return $message; +// } +// +// /** +// * Given a language, try and fetch messages from that language and its fallbacks. +// * +// * @see MessageCache::get +// * @param Language|StubObject $lang Preferred language +// * @param String $lckey Lowercase key for the message (as for localisation cache) +// * @param boolean $useDB Whether to include messages from the wiki database +// * @param boolean[] $alreadyTried Contains true for each language that has been tried already +// * @return String|boolean The message, or false if not found +// */ +// private function getMessageForLang( $lang, $lckey, $useDB, &$alreadyTried ) { +// global $wgContLang; +// +// $langcode = $lang->getCode(); +// +// // Try checking the database for the requested language +// if ( $useDB ) { +// $uckey = $wgContLang->ucfirst( $lckey ); +// +// if ( !isset( $alreadyTried[ $langcode ] ) ) { +// $message = $this->getMsgFromNamespace( +// $this->getMessagePageName( $langcode, $uckey ), +// $langcode +// ); +// +// if ( $message !== false ) { +// return $message; +// } +// $alreadyTried[ $langcode ] = true; +// } +// } else { +// $uckey = null; +// } +// +// // Check the CDB cache +// $message = $lang->getMessage( $lckey ); +// if ( $message !== null ) { +// return $message; +// } +// +// // Try checking the database for all of the fallback languages +// if ( $useDB ) { +// $fallbackChain = Language::getFallbacksFor( $langcode ); +// +// foreach ( $fallbackChain as $code ) { +// if ( isset( $alreadyTried[ $code ] ) ) { +// continue; +// } +// +// $message = $this->getMsgFromNamespace( +// $this->getMessagePageName( $code, $uckey ), $code ); +// +// if ( $message !== false ) { +// return $message; +// } +// $alreadyTried[ $code ] = true; +// } +// } +// +// return false; +// } +// +// /** +// * Get the message page name for a given language +// * +// * @param String $langcode +// * @param String $uckey Uppercase key for the message +// * @return String The page name +// */ +// private function getMessagePageName( $langcode, $uckey ) { +// global $wgLanguageCode; +// +// if ( $langcode === $wgLanguageCode ) { +// // Messages created in the content language will not have the /lang extension +// return $uckey; +// } else { +// return "$uckey/$langcode"; +// } +// } +// +// /** +// * Get a message from the MediaWiki namespace, with caching. The key must +// * first be converted to two-part lang/msg form if necessary. +// * +// * Unlike self::get(), this function doesn't resolve fallback chains, and +// * some callers require this behavior. LanguageConverter::parseCachedTable() +// * and self::get() are some examples in core. +// * +// * @param String $title Message cache key with initial uppercase letter. +// * @param String $code Code denoting the language to try. +// * @return String|boolean The message, or false if it does not exist or on error +// */ +// public function getMsgFromNamespace( $title, $code ) { +// $this->load( $code ); +// +// if ( isset( $this->mCache[$code][$title] ) ) { +// $entry = $this->mCache[$code][$title]; +// if ( substr( $entry, 0, 1 ) === ' ' ) { +// // The message exists, so make sure a String is returned. +// return (String)substr( $entry, 1 ); +// } elseif ( $entry === '!NONEXISTENT' ) { +// return false; +// } elseif ( $entry === '!TOO BIG' ) { +// // Fall through and try invididual message cache below +// } +// } else { +// // XXX: This is not cached in process cache, should it? +// $message = false; +// Hooks::run( 'MessagesPreLoad', [ $title, &$message, $code ] ); +// if ( $message !== false ) { +// return $message; +// } +// +// return false; +// } +// +// // Try the individual message cache +// $titleKey = $this->wanCache->makeKey( 'messages-big', $this->mCache[$code]['HASH'], $title ); +// +// if ( $this->mCacheVolatile[$code] ) { +// $entry = false; +// // Make sure that individual keys respect the WAN cache holdoff period too +// LoggerFactory::getInstance( 'MessageCache' )->debug( +// __METHOD__ . ': loading volatile key \'{titleKey}\'', +// [ 'titleKey' => $titleKey, 'code' => $code ] ); +// } else { +// $entry = $this->wanCache->get( $titleKey ); +// } +// +// if ( $entry !== false ) { +// if ( substr( $entry, 0, 1 ) === ' ' ) { +// $this->mCache[$code][$title] = $entry; +// // The message exists, so make sure a String is returned +// return (String)substr( $entry, 1 ); +// } elseif ( $entry === '!NONEXISTENT' ) { +// $this->mCache[$code][$title] = '!NONEXISTENT'; +// +// return false; +// } else { +// // Corrupt/obsolete entry, delete it +// $this->wanCache->delete( $titleKey ); +// } +// } +// +// // Try loading the message from the database +// $dbr = wfGetDB( DB_REPLICA ); +// $cacheOpts = Database::getCacheSetOptions( $dbr ); +// // Use newKnownCurrent() to avoid querying revision/user tables +// $titleObj = Title::makeTitle( NS_MEDIAWIKI, $title ); +// if ( $titleObj->getLatestRevID() ) { +// $revision = Revision::newKnownCurrent( +// $dbr, +// $titleObj->getArticleID(), +// $titleObj->getLatestRevID() +// ); +// } else { +// $revision = false; +// } +// +// if ( $revision ) { +// $content = $revision->getContent(); +// if ( $content ) { +// $message = $this->getMessageTextFromContent( $content ); +// if ( is_string( $message ) ) { +// $this->mCache[$code][$title] = ' ' . $message; +// $this->wanCache->set( $titleKey, ' ' . $message, $this->mExpiry, $cacheOpts ); +// } +// } else { +// // A possibly temporary loading failure +// LoggerFactory::getInstance( 'MessageCache' )->warning( +// __METHOD__ . ': failed to load message page text for \'{titleKey}\'', +// [ 'titleKey' => $titleKey, 'code' => $code ] ); +// $message = null; // no negative caching +// } +// } else { +// $message = false; // negative caching +// } +// +// if ( $message === false ) { // negative caching +// $this->mCache[$code][$title] = '!NONEXISTENT'; +// $this->wanCache->set( $titleKey, '!NONEXISTENT', $this->mExpiry, $cacheOpts ); +// } +// +// return $message; +// } +// +// /** +// * @param String $message +// * @param boolean $interface +// * @param String $language Language code +// * @param Title $title +// * @return String +// */ +// function transform( $message, $interface = false, $language = null, $title = null ) { +// // Avoid creating parser if nothing to transform +// if ( strpos( $message, '{{' ) === false ) { +// return $message; +// } +// +// if ( $this->mInParser ) { +// return $message; +// } +// +// $parser = $this->getParser(); +// if ( $parser ) { +// $popts = $this->getParserOptions(); +// $popts->setInterfaceMessage( $interface ); +// $popts->setTargetLanguage( $language ); +// +// $userlang = $popts->setUserLang( $language ); +// $this->mInParser = true; +// $message = $parser->transformMsg( $message, $popts, $title ); +// $this->mInParser = false; +// $popts->setUserLang( $userlang ); +// } +// +// return $message; +// } +// +// /** +// * @return Parser +// */ +// function getParser() { +// global $wgParser, $wgParserConf; +// +// if ( !$this->mParser && isset( $wgParser ) ) { +// # Do some initialisation so that we don't have to do it twice +// $wgParser->firstCallInit(); +// # Clone it and store it +// $class = $wgParserConf['class']; +// if ( $class == 'ParserDiffTest' ) { +// # Uncloneable +// $this->mParser = new $class( $wgParserConf ); +// } else { +// $this->mParser = clone $wgParser; +// } +// } +// +// return $this->mParser; +// } +// +// /** +// * @param String $text +// * @param Title $title +// * @param boolean $linestart Whether or not this is at the start of a line +// * @param boolean $interface Whether this is an interface message +// * @param Language|String $language Language code +// * @return ParserOutput|String +// */ +// public function parse( $text, $title = null, $linestart = true, +// $interface = false, $language = null +// ) { +// global $wgTitle; +// +// if ( $this->mInParser ) { +// return htmlspecialchars( $text ); +// } +// +// $parser = $this->getParser(); +// $popts = $this->getParserOptions(); +// $popts->setInterfaceMessage( $interface ); +// +// if ( is_string( $language ) ) { +// $language = Language::factory( $language ); +// } +// $popts->setTargetLanguage( $language ); +// +// if ( !$title || !$title instanceof Title ) { +// wfDebugLog( 'GlobalTitleFail', __METHOD__ . ' called by ' . +// wfGetAllCallers( 6 ) . ' with no title set.' ); +// $title = $wgTitle; +// } +// // Sometimes $wgTitle isn't set either... +// if ( !$title ) { +// # It's not uncommon having a null $wgTitle in scripts. See r80898 +// # Create a ghost title in such case +// $title = Title::makeTitle( NS_SPECIAL, 'Badtitle/title not set in ' . __METHOD__ ); +// } +// +// $this->mInParser = true; +// $res = $parser->parse( $text, $title, $popts, $linestart ); +// $this->mInParser = false; +// +// return $res; +// } +// +// function disable() { +// $this->mDisable = true; +// } +// +// function enable() { +// $this->mDisable = false; +// } +// +// /** +// * Whether DB/cache usage is disabled for determining messages +// * +// * If so, this typically indicates either: +// * - a) load() failed to find a cached copy nor query the DB +// * - b) we are in a special context or error mode that cannot use the DB +// * If the DB is ignored, any derived HTML output or cached objects may be wrong. +// * To avoid long-term cache pollution, TTLs can be adjusted accordingly. +// * +// * @return boolean +// * @since 1.27 +// */ +// public function isDisabled() { +// return $this->mDisable; +// } +// +// /** +// * Clear all stored messages. Mainly used after a mass rebuild. +// */ +// function clear() { +// $langs = Language::fetchLanguageNames( null, 'mw' ); +// foreach ( array_keys( $langs ) as $code ) { +// # Global and local caches +// $this->wanCache->touchCheckKey( wfMemcKey( 'messages', $code ) ); +// } +// +// $this->mLoadedLanguages = []; +// } +// +// /** +// * @param String $key +// * @return array +// */ +// public function figureMessage( $key ) { +// global $wgLanguageCode; +// +// $pieces = explode( '/', $key ); +// if ( count( $pieces ) < 2 ) { +// return [ $key, $wgLanguageCode ]; +// } +// +// $lang = array_pop( $pieces ); +// if ( !Language::fetchLanguageName( $lang, null, 'mw' ) ) { +// return [ $key, $wgLanguageCode ]; +// } +// +// $message = implode( '/', $pieces ); +// +// return [ $message, $lang ]; +// } +// +// /** +// * Get all message keys stored in the message cache for a given language. +// * If $code is the content language code, this will return all message keys +// * for which MediaWiki:msgkey exists. If $code is another language code, this +// * will ONLY return message keys for which MediaWiki:msgkey/$code exists. +// * @param String $code Language code +// * @return array Array of message keys (strings) +// */ +// public function getAllMessageKeys( $code ) { +// global $wgContLang; +// +// $this->load( $code ); +// if ( !isset( $this->mCache[$code] ) ) { +// // Apparently load() failed +// return null; +// } +// // Remove administrative keys +// $cache = $this->mCache[$code]; +// unset( $cache['VERSION'] ); +// unset( $cache['EXPIRY'] ); +// unset( $cache['EXCESSIVE'] ); +// // Remove any !NONEXISTENT keys +// $cache = array_diff( $cache, [ '!NONEXISTENT' ] ); +// +// // Keys may appear with a capital first letter. lcfirst them. +// return array_map( [ $wgContLang, 'lcfirst' ], array_keys( $cache ) ); +// } +// +// /** +// * Purge message caches when a MediaWiki: page is created, updated, or deleted +// * +// * @param Title $title Message page title +// * @param Content|null $content New content for edit/create, null on deletion +// * @since 1.29 +// */ +// public function updateMessageOverride( Title $title, Content $content = null ) { +// global $wgContLang; +// +// $msgText = $this->getMessageTextFromContent( $content ); +// if ( $msgText === null ) { +// $msgText = false; // treat as not existing +// } +// +// $this->replace( $title->getDBkey(), $msgText ); +// +// if ( $wgContLang->hasVariants() ) { +// $wgContLang->updateConversionTable( $title ); +// } +// } +// +// /** +// * @param Content|null $content Content or null if the message page does not exist +// * @return String|boolean|null Returns false if $content is null and null on error +// */ +// private function getMessageTextFromContent( Content $content = null ) { +// // @TODO: could skip pseudo-messages like js/css here, based on content model +// if ( $content ) { +// // Message page exists... +// // XXX: Is this the right way to turn a Content Object into a message? +// // NOTE: $content is typically either WikitextContent, JavaScriptContent or +// // CssContent. MessageContent is *not* used for storing messages, it's +// // only used for wrapping them when needed. +// $msgText = $content->getWikitextForTransclusion(); +// if ( $msgText === false || $msgText === null ) { +// // This might be due to some kind of misconfiguration... +// $msgText = null; +// LoggerFactory::getInstance( 'MessageCache' )->warning( +// __METHOD__ . ": message content doesn't provide wikitext " +// . "(content model: " . $content->getModel() . ")" ); +// } +// } else { +// // Message page does not exist... +// $msgText = false; +// } +// +// return $msgText; +// } +} diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/interwiki/XomwInterwiki.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/interwiki/XomwInterwiki.java new file mode 100644 index 000000000..3420cb74e --- /dev/null +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/interwiki/XomwInterwiki.java @@ -0,0 +1,179 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.mediawiki.includes.interwiki; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; +/** +* Value Object for representing interwiki records. +*/ +public class XomwInterwiki { + + /** @var String The interwiki prefix, (e.g. "Meatball", or the language prefix "de") */ + private byte[] mPrefix; + + /** @var String The URL of the wiki, with "1" as a placeholder for an article name. */ + private byte[] mURL; + + /** @var String The URL of the file api.php */ + private byte[] mAPI; + + /** @var String The name of the database (for a connection to be established + * with wfGetLB('wikiid')) + */ + private byte[] mWikiID; + + /** @var boolean Whether the wiki is in this project */ + private boolean mLocal; + + /** @var boolean Whether interwiki transclusions are allowed */ + private boolean mTrans; + + public XomwInterwiki(byte[] prefix, byte[] url, byte[] api, byte[] wikiId, boolean local, boolean trans) { + this.mPrefix = prefix; + this.mURL = url; + this.mAPI = api; + this.mWikiID = wikiId; + this.mLocal = local; + this.mTrans = trans; + } + +// /** +// * Check whether an interwiki prefix exists +// * +// * @deprecated since 1.28, use InterwikiLookup instead +// * +// * @param String prefix Interwiki prefix to use +// * @return boolean Whether it exists +// */ +// public static function isValidInterwiki(prefix) { +// return MediaWikiServices::getInstance().getInterwikiLookup().isValidInterwiki(prefix); +// } +// +// /** +// * Fetch an Interwiki Object +// * +// * @deprecated since 1.28, use InterwikiLookup instead +// * +// * @param String prefix Interwiki prefix to use +// * @return Interwiki|null|boolean +// */ +// public static function fetch(prefix) { +// return MediaWikiServices::getInstance().getInterwikiLookup().fetch(prefix); +// } +// +// /** +// * Purge the cache (local and persistent) for an interwiki prefix. +// * +// * @param String prefix +// * @since 1.26 +// */ +// public static function invalidateCache(prefix) { +// return MediaWikiServices::getInstance().getInterwikiLookup().invalidateCache(prefix); +// } +// +// /** +// * Returns all interwiki prefixes +// * +// * @deprecated since 1.28, unused. Use InterwikiLookup instead. +// * +// * @param String|null local If set, limits output to local/non-local interwikis +// * @return array List of prefixes +// * @since 1.19 +// */ +// public static function getAllPrefixes(local = null) { +// return MediaWikiServices::getInstance().getInterwikiLookup().getAllPrefixes(local); +// } + + /** + * Get the URL for a particular title (or with 1 if no title given) + * + * @param String title What text to put for the article name + * @return String The URL + * @note Prior to 1.19 The getURL with an argument was broken. + * If you if you use this arg in an extension that supports MW earlier + * than 1.19 please wfUrlencode and substitute 1 on your own. + */ + // title=null + public byte[] getURL(byte[] title) { + byte[] url = this.mURL; + if (title != null) { + url = XophpString.str_replace(ARG_1, XomwGlobalFunctions.wfUrlencode(title), url); + } + + return url; + } + + /** + * Get the API URL for this wiki + * + * @return String The URL + */ + public byte[] getAPI() { + return this.mAPI; + } + + /** + * Get the DB name for this wiki + * + * @return String The DB name + */ + public byte[] getWikiID() { + return this.mWikiID; + } + + /** + * Is this a local link from a sister project, or is + * it something outside, like Google + * + * @return boolean + */ + public boolean isLocal() { + return this.mLocal; + } + + /** + * Can pages from this wiki be transcluded? + * Still requires wgEnableScaryTransclusion + * + * @return boolean + */ + public boolean isTranscludable() { + return this.mTrans; + } + + /** + * Get the name for the interwiki site + * + * @return String + */ + public byte[] getName(XomwEnv env) { +// XomwMessage msg = XomwGlobalFunctions.wfMessage(env, "interwiki-name-" + this.mPrefix).inContentLanguage(); +// +// return !msg.exists() ? Bry_.Empty : msg.text(); + Tfds.Write(mPrefix); + return null; + } + +// /** +// * Get a description for this interwiki +// * +// * @return String +// */ +// public function getDescription() { +// msg = wfMessage('interwiki-desc-' . this.mPrefix).inContentLanguage(); +// +// return !msg.exists() ? '' : msg.text(); +// } + private static final byte[] ARG_1 = Bry_.new_a7("$1"); +} diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/interwiki/XomwInterwikiLookup.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/interwiki/XomwInterwikiLookup.java new file mode 100644 index 000000000..9fefff40e --- /dev/null +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/interwiki/XomwInterwikiLookup.java @@ -0,0 +1,52 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.mediawiki.includes.interwiki; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; +/** +* Service interface for looking up Interwiki records. +* +* @since 1.28 +*/ +public interface XomwInterwikiLookup { + /** + * Check whether an interwiki prefix exists + * + * @param String $prefix Interwiki prefix to use + * @return boolean Whether it exists + */ + boolean isValidInterwiki(byte[] prefix); + + /** + * Fetch an Interwiki Object + * + * @param String $prefix Interwiki prefix to use + * @return Interwiki|null|boolean + */ + XomwInterwiki fetch(byte[] prefix); + + /** + * Returns all interwiki prefixes + * + * @param String|null $local If set, limits output to local/non-local interwikis + * @return String[] List of prefixes + */ + byte[][] getAllPrefixes(byte[] local); + + /** + * Purge the in-process and persistent Object cache for an interwiki prefix + * @param String $prefix + */ + void invalidateCache(byte[] prefix); +} diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/interwiki/XomwInterwikiLookupAdapter.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/interwiki/XomwInterwikiLookupAdapter.java new file mode 100644 index 000000000..5ba24229c --- /dev/null +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/interwiki/XomwInterwikiLookupAdapter.java @@ -0,0 +1,156 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.mediawiki.includes.interwiki; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; +class XomwInterwikiLookupAdapter { +// /** +// * @var SiteLookup +// */ +// private $siteLookup; +// +// /** +// * @var Interwiki[]|null associative array mapping interwiki prefixes to Interwiki objects +// */ +// private $interwikiMap; +// +// function __construct( +// SiteLookup $siteLookup, +// array $interwikiMap = null +// ) { +// $this->siteLookup = $siteLookup; +// $this->interwikiMap = $interwikiMap; +// } +// +// /** +// * See InterwikiLookup::isValidInterwiki +// * It loads the whole interwiki map. +// * +// * @param String $prefix Interwiki prefix to use +// * @return boolean Whether it exists +// */ +// public function isValidInterwiki( $prefix ) { +// +// return array_key_exists( $prefix, $this->getInterwikiMap() ); +// } +// +// /** +// * See InterwikiLookup::fetch +// * It loads the whole interwiki map. +// * +// * @param String $prefix Interwiki prefix to use +// * @return Interwiki|null|boolean +// */ +// public function fetch( $prefix ) { +// if ( $prefix == '' ) { +// return null; +// } +// +// if ( !$this->isValidInterwiki( $prefix ) ) { +// return false; +// } +// +// return $this->interwikiMap[$prefix]; +// } +// +// /** +// * See InterwikiLookup::getAllPrefixes +// * +// * @param String|null $local If set, limits output to local/non-local interwikis +// * @return String[] List of prefixes +// */ +// public function getAllPrefixes( $local = null ) { +// if ( $local === null ) { +// return array_keys( $this->getInterwikiMap() ); +// } +// $res = []; +// foreach ( $this->getInterwikiMap() as $interwikiId => $interwiki ) { +// if ( $interwiki->isLocal() === $local ) { +// $res[] = $interwikiId; +// } +// } +// return $res; +// } +// +// /** +// * See InterwikiLookup::invalidateCache +// * +// * @param String $prefix +// */ +// public function invalidateCache( $prefix ) { +// if ( !isset( $this->interwikiMap[$prefix] ) ) { +// return; +// } +// $globalId = $this->interwikiMap[$prefix]->getWikiID(); +// unset( $this->interwikiMap[$prefix] ); +// +// // Reload the interwiki +// $site = $this->siteLookup->getSites()->getSite( $globalId ); +// $interwikis = $this->getSiteInterwikis( $site ); +// $this->interwikiMap = array_merge( $this->interwikiMap, [ $interwikis[$prefix] ] ); +// } +// +// /** +// * Load interwiki map to use as cache +// */ +// private function loadInterwikiMap() { +// $interwikiMap = []; +// $siteList = $this->siteLookup->getSites(); +// foreach ( $siteList as $site ) { +// $interwikis = $this->getSiteInterwikis( $site ); +// $interwikiMap = array_merge( $interwikiMap, $interwikis ); +// } +// $this->interwikiMap = $interwikiMap; +// } +// +// /** +// * Get interwikiMap attribute, load if needed. +// * +// * @return Interwiki[] +// */ +// private function getInterwikiMap() { +// if ( $this->interwikiMap === null ) { +// $this->loadInterwikiMap(); +// } +// return $this->interwikiMap; +// } +// +// /** +// * Load interwikis for the given site +// * +// * @param Site $site +// * @return Interwiki[] +// */ +// private function getSiteInterwikis( Site $site ) { +// $interwikis = []; +// foreach ( $site->getInterwikiIds() as $interwiki ) { +// $url = $site->getPageUrl(); +// if ( $site instanceof MediaWikiSite ) { +// $path = $site->getFileUrl( 'api.php' ); +// } else { +// $path = ''; +// } +// $local = $site->getSource() === 'local'; +// // TODO: How to adapt trans? +// $interwikis[$interwiki] = new Interwiki( +// $interwiki, +// $url, +// $path, +// $site->getGlobalId(), +// $local +// ); +// } +// return $interwikis; +// } +} diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/linkers/XomwLinkRenderer.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/linkers/XomwLinkRenderer.java index e12ac494e..4d4f8172f 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/linkers/XomwLinkRenderer.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/linkers/XomwLinkRenderer.java @@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.mediawiki.includes.linkers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.langs.htmls.*; -import gplx.xowa.mediawiki.includes.htmls.*; +import gplx.xowa.mediawiki.includes.xohtml.*; /* TODO.XO * P7: $html = HtmlArmor::getHtml($text); * P3: getLinkUrl [alternate urls? EX: mw/wiki/index.php/title?] diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/XomwLinkHolderArray.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/XomwLinkHolderArray.java index 2d18b0cb7..f0f14429d 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/XomwLinkHolderArray.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/XomwLinkHolderArray.java @@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.langs.htmls.*; -import gplx.xowa.mediawiki.includes.htmls.*; +import gplx.xowa.mediawiki.includes.xohtml.*; import gplx.xowa.mediawiki.includes.linkers.*; /** * Holder of replacement pairs for wiki links diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/XomwParser.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/XomwParser.java index 1b195474d..285d0db62 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/XomwParser.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/XomwParser.java @@ -16,7 +16,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.core.btries.*; import gplx.core.net.*; -import gplx.xowa.mediawiki.includes.htmls.*; +import gplx.xowa.mediawiki.includes.xohtml.*; import gplx.xowa.mediawiki.includes.linkers.*; import gplx.xowa.mediawiki.includes.parsers.tables.*; import gplx.xowa.mediawiki.includes.parsers.hrs.*; diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/XomwParserIface.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/XomwParserIface.java index 7c7e44ba3..898ae4acd 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/XomwParserIface.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/XomwParserIface.java @@ -14,7 +14,7 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; -import gplx.xowa.mediawiki.includes.htmls.*; +import gplx.xowa.mediawiki.includes.xohtml.*; import gplx.xowa.mediawiki.includes.linkers.*; public interface XomwParserIface { int nextLinkID(); diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/lnkes/Xomw_lnke_wkr.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/lnkes/Xomw_lnke_wkr.java index a9f010401..386b725a6 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/lnkes/Xomw_lnke_wkr.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/lnkes/Xomw_lnke_wkr.java @@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.mediawiki.includes.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*; import gplx.core.btries.*; import gplx.core.primitives.*; -import gplx.xowa.mediawiki.includes.htmls.*; +import gplx.xowa.mediawiki.includes.xohtml.*; /* TODO.XO * P3: $langObj->formatNum( ++$this->mAutonumber ); * P2: $this->getConverterLanguage()->markNoConversion( $text ); diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/lnkis/Xomw_lnki_wkr.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/lnkis/Xomw_lnki_wkr.java index b34725aa9..f56ee542a 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/lnkis/Xomw_lnki_wkr.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/lnkis/Xomw_lnki_wkr.java @@ -17,7 +17,7 @@ package gplx.xowa.mediawiki.includes.parsers.lnkis; import gplx.*; import gplx.x import gplx.core.btries.*; import gplx.core.primitives.*; import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.xwikis.*; import gplx.xowa.mediawiki.includes.parsers.*; import gplx.xowa.mediawiki.includes.parsers.quotes.*; -import gplx.xowa.mediawiki.includes.htmls.*; import gplx.xowa.mediawiki.includes.linkers.*; +import gplx.xowa.mediawiki.includes.xohtml.*; import gplx.xowa.mediawiki.includes.linkers.*; import gplx.xowa.mediawiki.includes.libs.*; import gplx.xowa.mediawiki.includes.media.*; import gplx.xowa.mediawiki.includes.filerepo.file.*; import gplx.xowa.parsers.uniqs.*; diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/magiclinks/Xomw_magiclinks_wkr.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/magiclinks/Xomw_magiclinks_wkr.java index 741b5d7be..122796c1c 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/magiclinks/Xomw_magiclinks_wkr.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/parsers/magiclinks/Xomw_magiclinks_wkr.java @@ -15,7 +15,7 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ package gplx.xowa.mediawiki.includes.parsers.magiclinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*; import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.core.net.*; -import gplx.xowa.mediawiki.includes.htmls.*; +import gplx.xowa.mediawiki.includes.xohtml.*; import gplx.langs.regxs.*; // TODO.XO: this->getConverterLanguage()->markNoConversion($url, true), public class Xomw_magiclinks_wkr { diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_atr_itm.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_atr_itm.java similarity index 85% rename from gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_atr_itm.java rename to gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_atr_itm.java index b3312e549..8bcd7e80b 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_atr_itm.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_atr_itm.java @@ -13,7 +13,7 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.mediawiki.includes.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; +package gplx.xowa.mediawiki.includes.xohtml; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; public class Xomw_atr_itm { public Xomw_atr_itm(int key_int, byte[] key, byte[] val) { this.key_int = key_int; diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_atr_mgr.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_atr_mgr.java similarity index 92% rename from gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_atr_mgr.java rename to gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_atr_mgr.java index e1bcd4c39..21694a8d9 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_atr_mgr.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_atr_mgr.java @@ -13,7 +13,7 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.mediawiki.includes.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; +package gplx.xowa.mediawiki.includes.xohtml; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; public class Xomw_atr_mgr { private final Ordered_hash hash = Ordered_hash_.New_bry(); public int Len() {return hash.Len();} diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_html_elem.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_html_elem.java similarity index 86% rename from gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_html_elem.java rename to gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_html_elem.java index 156071487..baee9ac29 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_html_elem.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_html_elem.java @@ -13,7 +13,7 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.mediawiki.includes.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; +package gplx.xowa.mediawiki.includes.xohtml; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; public class Xomw_html_elem { public Xomw_html_elem(byte[] name) { this.name = name; diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_opt_mgr.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_opt_mgr.java similarity index 82% rename from gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_opt_mgr.java rename to gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_opt_mgr.java index 17ee83864..06b3047f9 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_opt_mgr.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_opt_mgr.java @@ -13,7 +13,7 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.mediawiki.includes.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; +package gplx.xowa.mediawiki.includes.xohtml; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; public class Xomw_opt_mgr { public boolean known; public boolean broken; diff --git a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_qry_mgr.java b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_qry_mgr.java similarity index 82% rename from gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_qry_mgr.java rename to gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_qry_mgr.java index 11b47b4ab..8fc4cb37d 100644 --- a/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/htmls/Xomw_qry_mgr.java +++ b/gplx.xowa.mediawiki/src/gplx/xowa/mediawiki/includes/xohtml/Xomw_qry_mgr.java @@ -13,7 +13,7 @@ The terms of each license can be found in the source code repository: GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt */ -package gplx.xowa.mediawiki.includes.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; +package gplx.xowa.mediawiki.includes.xohtml; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; public class Xomw_qry_mgr { public byte[] action; public int redlink;