You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gnosygnu_xowa/400_xowa/src/gplx/xowa/mediawiki/includes/parsers/lnkis/Xomw_lnki_wkr.java

1163 lines
44 KiB

/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
import gplx.core.btries.*; import gplx.core.primitives.*;
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.xwikis.*;
import gplx.xowa.mediawiki.includes.parsers.*; import gplx.xowa.mediawiki.includes.parsers.quotes.*;
import gplx.xowa.mediawiki.includes.xohtml.*; import gplx.xowa.mediawiki.includes.linkers.*;
import gplx.xowa.mediawiki.includes.libs.*;
import gplx.xowa.mediawiki.includes.media.*; import gplx.xowa.mediawiki.includes.filerepo.file.*;
import gplx.xowa.parsers.uniqs.*;
/* TODO.XO
* P7: multi-line links; // look at the next 'line' to see if we can close it there
* P7: interwiki
* P7: [[File:]]
* P7: [[Category:]]
* P6: [[Media:]]
* P4: handle "]]]"; "If we get a ] at the beginning of $m[3]"
* P4: handle "[[http://a.org]]"
* P3: $langObj->formatNum( ++this.mAutonumber );
* P2: this.getConverterLanguage()->markNoConversion( $text );
* P1: link_prefix; EX: b[[A]]; [not enabled on enwiki]
*/
public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
private final XomwLinkHolderArray holders;
private final XomwLinker linker;
private final XomwLinkRenderer link_renderer;
// private final Btrie_slim_mgr protocols_trie;
private final Xomw_quote_wkr quote_wkr;
private final XomwStripState strip_state;
private XomwEnv env;
private Xow_wiki wiki;
private XomwTitleOld mPageTitle;
// private final XomwLinker_NormalizeSubpageLink normalize_subpage_link = new XomwLinker_NormalizeSubpageLink();
private final Bry_bfr tmp;
private final XomwParserIface parser;
private final Xomw_atr_mgr extra_atrs = new Xomw_atr_mgr();
private final Xomw_qry_mgr query = new Xomw_qry_mgr();
private final Btrie_rv trv = new Btrie_rv();
private final List_adp tmp_list = List_adp_.New();
private final Hash_adp mImageParams = Hash_adp_bry.cs();
private final Hash_adp mImageParamsMagicArray = Hash_adp_bry.cs();
public Xomw_lnki_wkr(XomwParserIface parser, XomwLinkHolderArray holders, XomwLinkRenderer link_renderer, Btrie_slim_mgr protocols_trie
, XomwLinker linker, Xomw_quote_wkr quote_wkr, Bry_bfr tmp, XomwStripState strip_state
) {
this.parser = parser;
this.holders = holders;
this.link_renderer = link_renderer;
// this.protocols_trie = protocols_trie;
this.linker = linker;
this.quote_wkr = quote_wkr;
this.tmp = tmp;
this.strip_state = strip_state;
}
public void Init_by_wiki(XomwEnv env, Xow_wiki wiki) {
this.env = env;
this.wiki = wiki;
if (title_chars_for_lnki == null) {
title_chars_for_lnki = (boolean[])Array_.Clone(XomwTitleOld.Title_chars_valid());
// the % is needed to support urlencoded titles as well
title_chars_for_lnki[Byte_ascii.Hash] = true;
title_chars_for_lnki[Byte_ascii.Percent] = true;
}
}
public void Clear_state() {
holders.clear();
}
public void replaceInternalLinks(XomwParserBfr pbfr, XomwEnv env, XomwParserCtx pctx) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
pbfr.Switch();
this.mPageTitle = pctx.Page_title();
replaceInternalLinks(env, pctx, bfr, src, src_bgn, src_end);
}
// XO.MW:SYNC:1.29; DATE:2017-02-02
public void replaceInternalLinks(XomwEnv env, XomwParserCtx pctx, Bry_bfr bfr, byte[] src, int src_bgn, int src_end) {
// XO.MW: regex for tc move to header; e1 and e1_img moved to code
// the % is needed to support urlencoded titles as well
// XO.MW.BGN: split the entire text String on occurrences of [[
int cur = src_bgn;
int prv = cur;
while (true) {
int lnki_bgn = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__bgn, cur, src_end); // $a = StringUtils::explode('[[', ' ' . $s);
if (lnki_bgn == Bry_find_.Not_found) { // no more "[["; stop loop
bfr.Add_mid(src, cur, src_end);
break;
}
cur = lnki_bgn + 2; // 2="[[".length
// XO.MW.IGNORE: handles strange split logic of adding space to String; "$s = substr($s, 1);"
// TODO.XO:link_prefix; EX: b[[A]]
// $useLinkPrefixExtension = this.getTargetLanguage()->linkPrefixExtension();
// $e2 = null;
// if ($useLinkPrefixExtension) {
// // Match the end of a line for a word that's not followed by whitespace,
// // e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
// global $wgContLang;
// $charset = $wgContLang->linkPrefixCharset();
// $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
// }
// IGNORE: throw new MWException(__METHOD__ . ": \this.mTitle is null\n");
// $nottalk = !this.mTitle->isTalkPage();
// TODO.XO:link_prefix
byte[] prefix = Bry_.Empty;
//if ($useLinkPrefixExtension) {
// $m = [];
// if (preg_match($e2, $s, $m)) {
// $first_prefix = $m[2];
// } else {
// $first_prefix = false;
// }
//} else {
// $prefix = '';
//}
// TODO.XO:link_prefix; EX: b[[A]]
//if ($useLinkPrefixExtension) {
// if (preg_match($e2, $s, $m)) {
// $prefix = $m[2];
// $s = $m[1];
// } else {
// $prefix = '';
// }
// // first link
// if ($first_prefix) {
// $prefix = $first_prefix;
// $first_prefix = false;
// }
//}
// PORTED.BGN: if (preg_match($e1, $line, $m)) && else if (preg_match($e1_img, $line, $m))
// NOTE: both e1 and e1_img are effectively the same; e1_img allows nested "[["; EX: "[[A|b[[c]]d]]" will stop at "[[A|b"
int ttl_bgn = cur;
int ttl_end = XomwTitleOld.Find_fwd_while_title(src, cur, src_end, title_chars_for_lnki);
cur = ttl_end;
int capt_bgn = -1, capt_end = -1;
int nxt_lnki = -1;
boolean might_be_img = false;
if (ttl_end > ttl_bgn) { // at least one valid title-char found; check for "|" or "]]" EX: "[[a"
byte nxt_byte = src[ttl_end];
if (nxt_byte == Byte_ascii.Pipe) { // handles lnki with capt ([[A|a]])and lnki with file ([[File:A.png|b|c|d]])
cur = ttl_end + 1;
// find next "[["
nxt_lnki = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__bgn, cur, src_end);
if (nxt_lnki == Bry_find_.Not_found)
nxt_lnki = src_end;
// find end "]]"
capt_bgn = cur;
capt_end = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__end, cur, nxt_lnki);
if (capt_end == Bry_find_.Not_found) {
capt_end = nxt_lnki;
cur = nxt_lnki;
might_be_img = true;
}
else {
cur = capt_end + Bry__wtxt__lnki__end.length;
}
}
else if (Bry_.Match(src, ttl_end, ttl_end + 2, Bry__wtxt__lnki__end)) { // handles simple lnki; EX: [[A]]
cur = ttl_end + 2;
}
else {
ttl_end = -1;
}
}
else
ttl_end = -1;
if (ttl_end == -1) { // either (a) no valid title-chars ("[[<") or (b) title char, but has stray "]" ("[[a]b]]")
// Invalid form; output directly
bfr.Add_mid(src, prv, lnki_bgn + 2);
bfr.Add_mid(src, cur, ttl_bgn);
prv = cur = ttl_bgn;
continue;
}
// PORTED.END: if (preg_match($e1, $line, $m)) && else if (preg_match($e1_img, $line, $m))
byte[] text = Bry_.Mid(src, capt_bgn, capt_end);
byte[] trail = Bry_.Empty;
if (!might_be_img) {
// TODO.XO:
// If we get a ] at the beginning of $m[3] that means we have a link that's something like:
// [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
// the real problem is with the $e1 regex
// See T1500.
// Still some problems for cases where the ] is meant to be outside punctuation,
// and no image is in sight. See T4095.
// if ($text !== ''
// && substr($m[3], 0, 1) === ']'
// && strpos($text, '[') !== false
// ) {
// $text .= ']'; // so that replaceExternalLinks($text) works later
// $m[3] = substr($m[3], 1);
// }
// fix up urlencoded title texts
// if (strpos($m[1], '%') !== false) {
// // Should anchors '#' also be rejected?
// $m[1] = str_replace([ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode($m[1]));
// }
// $trail = $m[3];
}
else {
// Invalid, but might be an image with a link in its caption
// $text = $m[2];
// if (strpos($m[1], '%') !== false) {
// $m[1] = str_replace([ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode($m[1]));
// }
// $trail = "";
}
byte[] orig_link = Bry_.Mid(src, ttl_bgn, ttl_end);
// TODO.XO: handle "[[http://a.org]]"
// Don't allow @gplx.Internal protected links to pages containing
// PROTO: where PROTO is a valid URL protocol; these
// should be external links.
// if (preg_match('/^(?i:' . this.mUrlProtocols . ')/', $origLink)) {
// $s .= $prefix . '[[' . $line;
// continue;
// }
byte[] link = orig_link;
boolean no_force = orig_link[0] != Byte_ascii.Colon;
if (!no_force) {
// Strip off leading ':'
link = Bry_.Mid(link, 1);
}
// $nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null;
XomwTitleOld nt = XomwTitleOld.newFromText(env, link);
// Make subpage if necessary
// boolean useSubpages = nt.Ns().Subpages_enabled();
// if (useSubpages) {
// Maybe_do_subpage_link(normalize_subpage_link, orig_link, text);
// link = normalize_subpage_link.link;
// text = normalize_subpage_link.text;
// nt = XomwTitleOld.newFromText(link);
// }
// IGNORE: handled in rewrite above
// else {
// link = orig_link;
// }
byte[] unstrip = strip_state.unstripNoWiki(link);
if (!Bry_.Eq(unstrip, link))
nt = XomwTitleOld.newFromText(env, unstrip);
if (nt == null) {
bfr.Add_mid(src, prv, lnki_bgn + 2); // $s .= $prefix . '[[' . $line;
prv = cur = lnki_bgn + 2;
continue;
}
byte[] iw = nt.getInterwiki();
int ns = nt.getNamespace();
if (might_be_img) { // if this is actually an invalid link
if (ns == XomwDefines.NS_FILE && no_force) { // but might be an image
boolean found = false;
// while (true) {
// // look at the next 'line' to see if we can close it there
// a->next();
// next_line = a->current();
// if (next_line === false || next_line === null) {
// break;
// }
// m = explode(']]', next_line, 3);
// if (count(m) == 3) {
// // the first ]] closes the inner link, the second the image
// found = true;
// text .= "[[{m[0]}]]{m[1]}";
// trail = m[2];
// break;
// } else if (count(m) == 2) {
// // if there's exactly one ]] that's fine, we'll keep looking
// text .= "[[{m[0]}]]{m[1]}";
// } else {
// // if next_line is invalid too, we need look no further
// text .= '[[' . next_line;
// break;
// }
// }
if (!found) {
// we couldn't find the end of this imageLink, so output it raw
// but don't ignore what might be perfectly normal links in the text we've examined
Bry_bfr nested = wiki.Utl__bfr_mkr().Get_b128();
this.replaceInternalLinks(env, pctx, nested, text, 0, text.length);
nested.Mkr_rls();
bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
// note: no trail, because without an end, there *is* no trail
continue;
}
}
else { // it's not an image, so output it raw
bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
// note: no trail, because without an end, there *is* no trail
continue;
}
}
boolean was_blank = text.length == 0;
if (was_blank) {
text = link;
}
else {
// T6598 madness. Handle the quotes only if they come from the alternate part
// [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
// [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
// -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
text = quote_wkr.Do_quotes(tmp, text);
}
// Link not escaped by : , create the various objects
// if (no_force && !nt->wasLocalInterwiki()) {
// Interwikis
// if (
// iw && this->mOptions->getInterwikiMagic() && nottalk && (
// Language::fetchLanguageName(iw, null, 'mw') ||
// in_array(iw, wgExtraInterlanguageLinkPrefixes)
// )
// ) {
// T26502: filter duplicates
// if (!isset(this->mLangLinkLanguages[iw])) {
// this->mLangLinkLanguages[iw] = true;
// this->mOutput->addLanguageLink(nt->getFullText());
// }
//
// s = rtrim(s . prefix);
// s .= trim(trail, "\n") == '' ? '': prefix . trail;
// continue;
// }
//
if (ns == XomwDefines.NS_FILE) {
// boolean is_good_image = !wfIsBadImage(nt->getDBkey(), this->mTitle)
boolean is_good_image = true;
if (is_good_image) {
if (was_blank) {
// if no parameters were passed, text
// becomes something like "File:Foo.png",
// which we don't want to pass on to the
// image generator
text = Bry_.Empty;
}
else {
// recursively parse links inside the image caption
// actually, this will parse them in any other parameters, too,
// but it might be hard to fix that, and it doesn't matter ATM
// text = this->replaceExternalLinks(text);
// holders->merge(this->replaceInternalLinks2(text));
}
// cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
bfr.Add(prefix);
// Armor_links(Make_image(bfr, nt, text, holders))
this.makeImage(env, pctx, bfr, nt, text, holders);
bfr.Add(trail);
continue;
}
}
else if (ns == XomwDefines.NS_CATEGORY) {
bfr.Trim_end_ws(); // s = rtrim(s . "\n"); // T2087
if (was_blank) {
// sortkey = this->getDefaultSort();
}
else {
// sortkey = text;
}
// sortkey = Sanitizer::decodeCharReferences(sortkey);
// sortkey = str_replace("\n", '', sortkey);
// sortkey = this->getConverterLanguage()->convertCategoryKey(sortkey);
// this->mOutput->addCategory(nt->getDBkey(), sortkey);
//
// Strip the whitespace Category links produce, see T2087
// s .= trim(prefix . trail, "\n") == '' ? '' : prefix . trail;
continue;
}
// }
// Self-link checking. For some languages, variants of the title are checked in
// LinkHolderArray::doVariants() to allow batching the existence checks necessary
// for linking to a different variant.
if (ns != XomwDefines.NS_SPECIAL && nt.equals(mPageTitle) && !nt.hasFragment()) {
bfr.Add(prefix);
linker.makeSelfLinkObj(bfr, nt, text, Bry_.Empty, trail, Bry_.Empty);
continue;
}
// NS_MEDIA is a pseudo-namespace for linking directly to a file
// @todo FIXME: Should do batch file existence checks, see comment below
if (ns == XomwDefines.NS_MEDIA) {
// Give extensions a chance to select the file revision for us
// options = [];
// desc_query = false;
// MW.HOOK:BeforeParserFetchFileAndTitle
// Fetch and register the file (file title may be different via hooks)
// list(file, nt) = this->fetchFileAndTitle(nt, options);
// Cloak with NOPARSE to avoid replacement in replaceExternalLinks
// s .= prefix . this->armorLinks(
// Linker::makeMediaLinkFile(nt, file, text)) . trail;
// continue;
}
// Some titles, such as valid special pages or files in foreign repos, should
// be shown as bluelinks even though they're not included in the page table
// @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
// batch file existence checks for NS_FILE and NS_MEDIA
bfr.Add_mid(src, prv, lnki_bgn);
prv = cur;
if (iw == null && nt.isAlwaysKnown()) {
// this->mOutput->addLink(nt);
Make_known_link_holder(bfr, nt, text, trail, prefix);
}
else {
// Links will be added to the output link list after checking
holders.makeHolder(bfr, nt, text, Bry_.Ary_empty, trail, prefix);
}
}
}
public void makeImage(XomwEnv env, XomwParserCtx pctx, Bry_bfr bfr, XomwTitleOld title, byte[] options_at_link, XomwLinkHolderArray holders) {
// Check if the options text is of the form "options|alt text"
// Options are:
// * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
// * left no resizing, just left align. label is used for alt= only
// * right same, but right aligned
// * none same, but not aligned
// * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
// * center center the image
// * frame Keep original image size, no magnify-button.
// * framed Same as "frame"
// * frameless like 'thumb' but without a frame. Keeps user preferences for width
// * upright reduce width for upright images, rounded to full __0 px
// * border draw a 1px border around the image
// * alt Text for HTML alt attribute (defaults to empty)
// * class Set a class for img node
// * link Set the target of the image link. Can be external, interwiki, or local
// vertical-align values (no % or length right now):
// * baseline
// * sub
// * super
// * top
// * text-top
// * middle
// * bottom
// * text-bottom
// Protect LanguageConverter markup when splitting into parts
byte[][] parts = XomwStringUtils.delimiterExplode(tmp_list, trv, options_at_link);
// Give extensions a chance to select the file revision for us
// $options = [];
byte[] desc_query = null;
// XO.MW.HOOK:BeforeParserFetchFileAndTitle
// Fetch and register the file (file title may be different via hooks)
// list($file, $title) = this.fetchFileAndTitle($title, $options);
XomwFile file = fetchFileAndTitle(title, null);
// Get parameter map
XomwMediaHandler handler = file == null ? null : file.getHandler();
Xomw_image_params tmp_img_params = pctx.Lnki_wkr__make_image__img_params;
this.getImageParams(tmp_img_params, handler);
Xomw_param_map paramMap = tmp_img_params.paramMap;
XomwMagicWordArray mwArray = tmp_img_params.mwArray;
// XO.MW.UNSUPPORTED.TrackingCategory: if (!$file) this.addTrackingCategory('broken-file-category');
// Process the input parameters
byte[] caption = Bry_.Empty;
// XO.MW: $params = [ 'frame' => [], 'handler' => [], 'horizAlign' => [], 'vertAlign' => [] ];
Xomw_params_frame frameParams = paramMap.Frame.Clear();
Xomw_params_handler handlerParams = paramMap.Handler.Clear();
// Xomw_params_horizAlign horizAlignParams = paramMap.HorizAlign.Clear();
// Xomw_params_vertAlign vertAlignParams = paramMap.VertAlign.Clear();
boolean seen_format = false;
int parts_len = parts.length;
for (int i = 0; i < parts_len; i++) {
byte[] part = parts[i];
part = Bry_.Trim(part);
byte[][] tmp_match_word = pctx.Lnki_wkr__make_image__match_magic_word;
mwArray.matchVariableStartToEnd(tmp_match_word, part);
byte[] magic_name = tmp_match_word[0];
byte[] val = tmp_match_word[1];
boolean validated = false;
Xomw_param_itm param_item = paramMap.Get_by(magic_name);
if (param_item != null) {
int typeUid = param_item.type_uid;
int paramNameUid = param_item.name_uid;
// Special case; width and height come in one variable together
if (typeUid == Xomw_param_map.Type__handler && paramNameUid == Xomw_param_itm.Name__width) {
int[] tmp_img_size = pctx.Lnki_wkr__make_image__img_size;
this.parseWidthParam(tmp_img_size, val);
int parsedW = tmp_img_size[0];
int parsedH = tmp_img_size[1];
if (parsedW != 0) {
if (handler.validateParam(Xomw_param_itm.Name__width, null, parsedW)) {
paramMap.Set(typeUid, Xomw_param_itm.Name__width, null, parsedW);
validated = true;
}
}
if (parsedH != 0) {
if (handler.validateParam(Xomw_param_itm.Name__height, null, parsedH)) {
paramMap.Set(typeUid, Xomw_param_itm.Name__height, null, parsedH);
validated = true;
}
}
// else no validation -- T15436
}
else {
if (typeUid == Xomw_param_map.Type__handler) {
// Validate handler parameter
// validated = $handler->validateParam($paramName, $value);
}
else {
// Validate @gplx.Internal protected parameters
switch (paramNameUid) {
case Xomw_param_itm.Name__manual_thumb:
case Xomw_param_itm.Name__alt:
case Xomw_param_itm.Name__class:
// @todo FIXME: Possibly check validity here for
// manualthumb? downstream behavior seems odd with
// missing manual thumbs.
validated = true;
val = parser.stripAltText(val, holders);
break;
case Xomw_param_itm.Name__link:
// $chars = self::EXT_LINK_URL_CLASS;
// $addr = self::EXT_LINK_ADDR;
// $prots = this.mUrlProtocols;
// if ($value === '') {
// $paramName = 'no-link';
// $value = true;
validated = true;
// }
// else if (preg_match("/^((?i)$prots)/", $value)) {
// if (preg_match("/^((?i)$prots)$addr$chars*$/u", $value, $m)) {
// $paramName = 'link-url';
// this.mOutput->addExternalLink($value);
// if (this.mOptions->getExternalLinkTarget()) {
// $params[$type]['link-target'] = this.mOptions->getExternalLinkTarget();
// }
validated = true;
// }
// } else {
// $linkTitle = Title::newFromText($value);
// if ($linkTitle) {
// $paramName = 'link-title';
// $value = $linkTitle;
// this.mOutput->addLink($linkTitle);
validated = true;
// }
// }
break;
case Xomw_param_itm.Name__frameless:
case Xomw_param_itm.Name__framed:
case Xomw_param_itm.Name__thumbnail:
// use first appearing option, discard others.
validated = !seen_format;
seen_format = true;
break;
default:
// Most other things appear to be empty or numeric...
validated = (val == null || XophpObject_.isnumeric(Bry_.Trim(val)));
break;
}
}
if (validated) {
paramMap.Set(typeUid, paramNameUid, val, -1);
}
}
}
if (!validated) {
caption = part;
}
}
// Process alignment parameters
Xomw_param_itm tmp = paramMap.Get_by(Xomw_param_map.Type__horizAlign);
if (tmp != null) {
// frameParams.align = tmp.val;
}
tmp = paramMap.Get_by(Xomw_param_map.Type__vertAlign);
if (tmp != null) {
// frameParams.valign = tmp.val;
}
frameParams.caption = caption;
boolean image_is_framed
= frameParams.frame != null
|| frameParams.framed != null
|| frameParams.thumbnail != null
|| frameParams.manualthumb != null
;
// Will the image be presented in a frame, with the caption below?
// In the old days, [[Image:Foo|text...]] would set alt text. Later it
// came to also set the caption, ordinary text after the image -- which
// makes no sense, because that just repeats the text multiple times in
// screen readers. It *also* came to set the title attribute.
// Now that we have an alt attribute, we should not set the alt text to
// equal the caption: that's worse than useless, it just repeats the
// text. This is the framed/thumbnail case. If there's no caption, we
// use the unnamed parameter for alt text as well, just for the time be-
// ing, if the unnamed param is set and the alt param is not.
// For the future, we need to figure out if we want to tweak this more,
// e.g., introducing a title= parameter for the title; ignoring the un-
// named parameter entirely for images without a caption; adding an ex-
// plicit caption= parameter and preserving the old magic unnamed para-
// meter for BC; ...
if (image_is_framed) { // Framed image
if (caption == Bry_.Empty && frameParams.alt == null) {
// No caption or alt text, add the filename as the alt text so
// that screen readers at least get some description of the image
frameParams.alt = title.getText();
}
// Do not set $params['frame']['title'] because tooltips don't make sense
// for framed images
}
else { // Inline image
if (frameParams.alt == null) {
// No alt text, use the "caption" for the alt text
if (caption != Bry_.Empty) {
frameParams.alt = parser.stripAltText(caption, holders);
}
else {
// No caption, fall back to using the filename for the
// alt text
frameParams.alt = title.getText();
}
}
// Use the "caption" for the tooltip text
frameParams.title = parser.stripAltText(caption, holders);
}
// MW.HOOK:ParserMakeImageParams
// Linker does the rest
// byte[] time = options.time;
Object time = null;
linker.makeImageLink(bfr, env, pctx, parser, title, file, frameParams, handlerParams, time, desc_query, parser.getOptions().getThumbSize());
// Give the handler a chance to modify the parser Object
// if (handler != null) {
// $handler->parserTransformHook($this, $file);
// }
}
private static Xomw_param_list[] internalParamNames;
private static Xomw_param_map internalParamMap;
private void getImageParams(Xomw_image_params rv, XomwMediaHandler handler) {
byte[] handlerClass = handler == null ? Bry_.Empty : handler.Key();
rv.paramMap = (Xomw_param_map)mImageParams.Get_by(handlerClass);
// NOTE: lazy-init; code below can be inefficent
if (rv.paramMap == null) {
// Initialise static lists
if (internalParamNames == null) {
internalParamNames = new Xomw_param_list[]
{ Xomw_param_list.New(Xomw_param_map.Type__horizAlign, "horizAlign", "left", "right", "center", "none")
, Xomw_param_list.New(Xomw_param_map.Type__vertAlign , "vertAlign", "baseline", "sub", "super", "top", "text-top", "middle", "bottom", "text-bottom")
, Xomw_param_list.New(Xomw_param_map.Type__frame , "frame", "thumbnail", "manual_thumb", "framed", "frameless", "upright", "border", "link", "alt", "class")
};
internalParamMap = new Xomw_param_map();
byte[] bry_img = Bry_.new_a7("img_");
for (Xomw_param_list param_list : internalParamNames) {
for (byte[] name : param_list.names) {
byte[] magic_name = Bry_.Add(bry_img, Bry_.Replace(name, Byte_ascii.Dash, Byte_ascii.Underline));
internalParamMap.Add(magic_name, param_list.type_uid, name);
}
}
}
// Add handler params
Xomw_param_map paramMap = internalParamMap.Clone();
if (handler != null) {
Xomw_param_map handlerParamMap = handler.getParamMap();
int handlerParamMapLen = handlerParamMap.Len();
for (int i = 0; i < handlerParamMapLen; i++) {
Xomw_param_itm itm = (Xomw_param_itm)handlerParamMap.Get_at(i);
paramMap.Add(itm.magic, itm.type_uid, itm.name);
}
}
this.mImageParams.Add(handlerClass, paramMap);
rv.paramMap = paramMap;
XomwMagicWordArray mw_array = new XomwMagicWordArray(env.Magic_word_mgr(), paramMap.Keys());
this.mImageParamsMagicArray.Add(handlerClass, mw_array);
rv.mwArray = mw_array;
}
else {
rv.mwArray = (XomwMagicWordArray)mImageParamsMagicArray.Get_by(handlerClass);
}
}
// Parsed a width param of imagelink like 300px or 200x300px
// XO.MW.NOTE: for MW, "" -> null, null while "AxB" -> 0x0
public void parseWidthParam(int[] img_size, byte[] src) {
img_size[0] = img_size[1] = XophpObject_.NULL_INT;
if (src == Bry_.Empty) {
return;
}
// (T15500) In both cases (width/height and width only),
// permit trailing "px" for backward compatibility.
int src_bgn = 0;
int src_end = src.length;
// XO: "px" is optional; if exists at end, ignore it
if (Bry_.Has_at_end(src, Bry__px)) {
src_end -= 2;
}
// XO.MW: if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
int w_bgn = 0;
int w_end = Bry_find_.Find_fwd_while_num(src, src_bgn, src_end);
int h_bgn = -1;
int h_end = -1;
if (w_end < src_end && src[w_end] == Byte_ascii.Ltr_x) {
h_bgn = w_end + 1;
h_end = Bry_find_.Find_fwd_while_num(src, h_bgn, src_end);
}
img_size[0] = Bry_.To_int_or(src, w_bgn, w_end, 0);
img_size[1] = Bry_.To_int_or(src, h_bgn, h_end, 0);
}
public static final byte[] Bry__px = Bry_.new_a7("px");
/**
* Fetch a file and its title and register a reference to it.
* If 'broken' is a key in $options then the file will appear as a broken thumbnail.
* @param Title $title
* @param array $options Array of options to RepoGroup::findFile
* @return array ( File or false, Title of file )
*/
public XomwFile fetchFileAndTitle(XomwTitleOld title, Hash_adp options) {
XomwFile file = fetchFileNoRegister(title, options);
//$time = $file ? $file->getTimestamp() : false;
//$sha1 = $file ? $file->getSha1() : false;
//# Register the file as a dependency...
//this.mOutput->addImage( $title->getDBkey(), $time, $sha1 );
//if ( $file && !$title->equals( $file->getTitle() ) ) {
// # Update fetched file title
// $title = $file->getTitle();
// this.mOutput->addImage( $title->getDBkey(), $time, $sha1 );
//}
return file;
}
/**
* Helper function for fetchFileAndTitle.
*
* Also useful if you need to fetch a file but not use it yet,
* for example to get the file's handler.
*
* @param Title $title
* @param array $options Array of options to RepoGroup::findFile
* @return File|boolean
*/
private XomwFile fetchFileNoRegister(XomwTitleOld title, Hash_adp options) {
XomwFile file = null;
// if ( isset( $options['broken'] ) ) {
// file = false; // broken thumbnail forced by hook
// } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
// file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
// } else { // get by (name,timestamp)
file = env.File_finder().Find_file(title); // $options
// }
return file;
}
public void Maybe_do_subpage_link(XomwLinker_NormalizeSubpageLink rv, byte[] target, byte[] text) {
linker.normalizeSubpageLink(rv, mPageTitle, target, text);
}
public void replaceLinkHolders(XomwParserBfr pbfr) {
holders.replace(pbfr);
}
public void Make_known_link_holder(Bry_bfr bfr, XomwTitleOld nt, byte[] text, byte[] trail, byte[] prefix) {
byte[][] split_trail = linker.splitTrail(trail);
byte[] inside = split_trail[0];
trail = split_trail[1];
if (text == Bry_.Empty) {
text = Bry_.Escape_html(nt.getPrefixedText());
}
// PORTED:new HtmlArmor( "$prefix$text$inside" )
tmp.Add_bry_escape_html(prefix);
tmp.Add_bry_escape_html(text);
tmp.Add_bry_escape_html(inside);
text = tmp.To_bry_and_clear();
link_renderer.makeKnownLink(bfr, nt, text, extra_atrs, query);
byte[] link = bfr.To_bry_and_clear();
parser.armorLinks(bfr, link, 0, link.length);
bfr.Add(trail);
}
private static boolean[] title_chars_for_lnki;
private static final byte[] Bry__wtxt__lnki__bgn = Bry_.new_a7("[["), Bry__wtxt__lnki__end = Bry_.new_a7("]]");
// $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
//
// REGEX: "title-char"(1+) + "pipe"(0-1) + "]]"(0-1) + "other chars up to next [["
// title-char -> ([{$tc}]+)
// pipe -> (?:\\|(.+?))?
// ]] -> ?]]
// other chars... -> (.*)
// $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
//
// REGEX: "title-char"(1+) + "pipe"(0-1) + "other chars up to next [["
// title-char -> ([{$tc}]+)
// pipe -> \\|
// other chars... -> (.*)
//
// /**
// * Process [[ ]] wikilinks
// *
// * @param String $s
// *
// * @return String Processed text
// *
// * @private
// */
// public function replaceInternalLinks($s) {
// this.mLinkHolders->merge(this.replaceInternalLinks2($s));
// return $s;
// }
//
// /**
// * Process [[ ]] wikilinks (RIL)
// * @param String $s
// * @throws MWException
// * @return LinkHolderArray
// *
// * @private
// */
// public function replaceInternalLinks2(&$s) {
// global $wgExtraInterlanguageLinkPrefixes;
//
// static $tc = false, $e1, $e1_img;
// # the % is needed to support urlencoded titles as well
// if (!$tc) {
// $tc = Title::legalChars() . '#%';
// # Match a link having the form [[namespace:link|alternate]]trail
// $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
// # Match cases where there is no "]]", which might still be images
// $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
// }
//
// $holders = new LinkHolderArray($this);
//
// # split the entire text String on occurrences of [[
// $a = StringUtils::explode('[[', ' ' . $s);
// # get the first element (all text up to first [[), and remove the space we added
// $s = $a->current();
// $a->next();
// $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
// $s = substr($s, 1);
//
// $useLinkPrefixExtension = this.getTargetLanguage()->linkPrefixExtension();
// $e2 = null;
// if ($useLinkPrefixExtension) {
// # Match the end of a line for a word that's not followed by whitespace,
// # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
// global $wgContLang;
// $charset = $wgContLang->linkPrefixCharset();
// $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
// }
//
// if (is_null(this.mTitle)) {
// throw new MWException(__METHOD__ . ": \this.mTitle is null\n");
// }
// $nottalk = !this.mTitle->isTalkPage();
//
// if ($useLinkPrefixExtension) {
// $m = [];
// if (preg_match($e2, $s, $m)) {
// $first_prefix = $m[2];
// } else {
// $first_prefix = false;
// }
// } else {
// $prefix = '';
// }
//
// $useSubpages = this.areSubpagesAllowed();
//
// // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
// # Loop for each link
// for (; $line !== false && $line !== null; $a->next(), $line = $a->current()) {
// // @codingStandardsIgnoreEnd
//
// # Check for excessive memory usage
// if ($holders->isBig()) {
// # Too big
// # Do the existence check, replace the link holders and clear the array
// $holders->replace($s);
// $holders->clear();
// }
//
// if ($useLinkPrefixExtension) {
// if (preg_match($e2, $s, $m)) {
// $prefix = $m[2];
// $s = $m[1];
// } else {
// $prefix = '';
// }
// # first link
// if ($first_prefix) {
// $prefix = $first_prefix;
// $first_prefix = false;
// }
// }
//
// $might_be_img = false;
//
// if (preg_match($e1, $line, $m)) { # page with normal text or alt
// $text = $m[2];
// # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
// # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
// # the real problem is with the $e1 regex
// # See T1500.
// # Still some problems for cases where the ] is meant to be outside punctuation,
// # and no image is in sight. See T4095.
// if ($text !== ''
// && substr($m[3], 0, 1) === ']'
// && strpos($text, '[') !== false
// ) {
// $text .= ']'; # so that replaceExternalLinks($text) works later
// $m[3] = substr($m[3], 1);
// }
// # fix up urlencoded title texts
// if (strpos($m[1], '%') !== false) {
// # Should anchors '#' also be rejected?
// $m[1] = str_replace([ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode($m[1]));
// }
// $trail = $m[3];
// } elseif (preg_match($e1_img, $line, $m)) {
// # Invalid, but might be an image with a link in its caption
// $might_be_img = true;
// $text = $m[2];
// if (strpos($m[1], '%') !== false) {
// $m[1] = str_replace([ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode($m[1]));
// }
// $trail = "";
// } else { # Invalid form; output directly
// $s .= $prefix . '[[' . $line;
// continue;
// }
//
// $origLink = ltrim($m[1], ' ');
//
// # Don't allow @gplx.Internal protected links to pages containing
// # PROTO: where PROTO is a valid URL protocol; these
// # should be external links.
// if (preg_match('/^(?i:' . this.mUrlProtocols . ')/', $origLink)) {
// $s .= $prefix . '[[' . $line;
// continue;
// }
//
// # Make subpage if necessary
// if ($useSubpages) {
// $link = this.maybeDoSubpageLink($origLink, $text);
// } else {
// $link = $origLink;
// }
//
// $noforce = (substr($origLink, 0, 1) !== ':');
// if (!$noforce) {
// # Strip off leading ':'
// $link = substr($link, 1);
// }
//
// $unstrip = this.mStripState->unstripNoWiki($link);
// $nt = is_string($unstrip) ? Title::newFromText($unstrip) : null;
// if ($nt === null) {
// $s .= $prefix . '[[' . $line;
// continue;
// }
//
// $ns = $nt->getNamespace();
// $iw = $nt->getInterwiki();
//
// if ($might_be_img) { # if this is actually an invalid link
// if ($ns == NS_FILE && $noforce) { # but might be an image
// $found = false;
// while (true) {
// # look at the next 'line' to see if we can close it there
// $a->next();
// $next_line = $a->current();
// if ($next_line === false || $next_line === null) {
// break;
// }
// $m = explode(']]', $next_line, 3);
// if (count($m) == 3) {
// # the first ]] closes the inner link, the second the image
// $found = true;
// $text .= "[[{$m[0]}]]{$m[1]}";
// $trail = $m[2];
// break;
// } elseif (count($m) == 2) {
// # if there's exactly one ]] that's fine, we'll keep looking
// $text .= "[[{$m[0]}]]{$m[1]}";
// } else {
// # if $next_line is invalid too, we need look no further
// $text .= '[[' . $next_line;
// break;
// }
// }
// if (!$found) {
// # we couldn't find the end of this imageLink, so output it raw
// # but don't ignore what might be perfectly normal links in the text we've examined
// $holders->merge(this.replaceInternalLinks2($text));
// $s .= "{$prefix}[[$link|$text";
// # note: no $trail, because without an end, there *is* no trail
// continue;
// }
// } else { # it's not an image, so output it raw
// $s .= "{$prefix}[[$link|$text";
// # note: no $trail, because without an end, there *is* no trail
// continue;
// }
// }
//
// $wasblank = ($text == '');
// if ($wasblank) {
// $text = $link;
// } else {
// # T6598 madness. Handle the quotes only if they come from the alternate part
// # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
// # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
// # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
// $text = this.doQuotes($text);
// }
//
// # Link not escaped by : , create the various objects
// if ($noforce && !$nt->wasLocalInterwiki()) {
// # Interwikis
// if (
// $iw && this.mOptions->getInterwikiMagic() && $nottalk && (
// Language::fetchLanguageName($iw, null, 'mw') ||
// in_array($iw, $wgExtraInterlanguageLinkPrefixes)
// )
// ) {
// # T26502: filter duplicates
// if (!isset(this.mLangLinkLanguages[$iw])) {
// this.mLangLinkLanguages[$iw] = true;
// this.mOutput->addLanguageLink($nt->getFullText());
// }
//
// $s = rtrim($s . $prefix);
// $s .= trim($trail, "\n") == '' ? '': $prefix . $trail;
// continue;
// }
//
// if ($ns == NS_FILE) {
// if (!wfIsBadImage($nt->getDBkey(), this.mTitle)) {
// if ($wasblank) {
// # if no parameters were passed, $text
// # becomes something like "File:Foo.png",
// # which we don't want to pass on to the
// # image generator
// $text = '';
// } else {
// # recursively parse links inside the image caption
// # actually, this will parse them in any other parameters, too,
// # but it might be hard to fix that, and it doesn't matter ATM
// $text = this.replaceExternalLinks($text);
// $holders->merge(this.replaceInternalLinks2($text));
// }
// # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
// $s .= $prefix . this.armorLinks(
// this.makeImage($nt, $text, $holders)) . $trail;
// continue;
// }
// } elseif ($ns == NS_CATEGORY) {
// $s = rtrim($s . "\n"); # T2087
//
// if ($wasblank) {
// $sortkey = this.getDefaultSort();
// } else {
// $sortkey = $text;
// }
// $sortkey = Sanitizer::decodeCharReferences($sortkey);
// $sortkey = str_replace("\n", '', $sortkey);
// $sortkey = this.getConverterLanguage()->convertCategoryKey($sortkey);
// this.mOutput->addCategory($nt->getDBkey(), $sortkey);
//
// /**
// * Strip the whitespace Category links produce, see T2087
// */
// $s .= trim($prefix . $trail, "\n") == '' ? '' : $prefix . $trail;
//
// continue;
// }
// }
//
// # Self-link checking. For some languages, variants of the title are checked in
// # LinkHolderArray::doVariants() to allow batching the existence checks necessary
// # for linking to a different variant.
// if ($ns != NS_SPECIAL && $nt->equals(this.mTitle) && !$nt->hasFragment()) {
// $s .= $prefix . Linker::makeSelfLinkObj($nt, $text, '', $trail);
// continue;
// }
//
// # NS_MEDIA is a pseudo-namespace for linking directly to a file
// # @todo FIXME: Should do batch file existence checks, see comment below
// if ($ns == NS_MEDIA) {
// # Give extensions a chance to select the file revision for us
// $options = [];
// $descQuery = false;
// Hooks::run('BeforeParserFetchFileAndTitle',
// [ $this, $nt, &$options, &$descQuery ]);
// # Fetch and register the file (file title may be different via hooks)
// list($file, $nt) = this.fetchFileAndTitle($nt, $options);
// # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
// $s .= $prefix . this.armorLinks(
// Linker::makeMediaLinkFile($nt, $file, $text)) . $trail;
// continue;
// }
//
// # Some titles, such as valid special pages or files in foreign repos, should
// # be shown as bluelinks even though they're not included in the page table
// # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
// # batch file existence checks for NS_FILE and NS_MEDIA
// if ($iw == '' && $nt->isAlwaysKnown()) {
// this.mOutput->addLink($nt);
// $s .= this.makeKnownLinkHolder($nt, $text, $trail, $prefix);
// } else {
// # Links will be added to the output link list after checking
// $s .= $holders->makeHolder($nt, $text, [], $trail, $prefix);
// }
// }
// return $holders;
// }
}