mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Xomw: Convert Parser; also support caption
This commit is contained in:
parent
b6abbf2ffe
commit
09dbfc894e
@ -332,8 +332,7 @@ public class XomwLinker {
|
||||
// @since 1.20
|
||||
// @return String HTML for an image, with links, wrappers, etc.
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-08
|
||||
public void makeImageLink(Bry_bfr bfr, Xomw_parser_ctx pctx, Xomw_parser parser, XomwTitle title, XomwFile file, Xomw_params_frame frameParams, Xomw_params_handler handlerParams, Object time, byte[] query, int widthOption) {
|
||||
Xomw_parser_env env = parser.Env();
|
||||
public void makeImageLink(Bry_bfr bfr, Xomw_parser_env env, Xomw_parser_ctx pctx, XomwParserIface parser, XomwTitle title, XomwFile file, Xomw_params_frame frameParams, Xomw_params_handler handlerParams, Object time, byte[] query, int widthOption) {
|
||||
// XO.MW.HOOK:ImageBeforeProduceHTML
|
||||
|
||||
if (file != null && !file.allowInlineDisplay()) {
|
||||
@ -413,7 +412,7 @@ public class XomwLinker {
|
||||
// If a thumbnail width has not been provided, it is set
|
||||
// to the default user option as specified in Language*.php
|
||||
if (frameParams.align == Bry_.Empty) {
|
||||
frameParams.align = parser.Env().Lang__align_end;
|
||||
frameParams.align = env.Lang__align_end;
|
||||
}
|
||||
bfr.Add(prefix);
|
||||
makeThumbLink2(bfr, env, pctx, title, file, frameParams, handlerParams, time, query);
|
||||
@ -482,7 +481,7 @@ public class XomwLinker {
|
||||
// @param Parser|null $parser
|
||||
// @return array
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-08
|
||||
private static void getImageLinkMTOParams(Xomw_params_mto rv, Xomw_params_frame frameParams, byte[] query, Xomw_parser parser) {
|
||||
private static void getImageLinkMTOParams(Xomw_params_mto rv, Xomw_params_frame frameParams, byte[] query, XomwParserIface parser) {
|
||||
if (Php_utl_.isset(frameParams.link_url) && frameParams.link_url != Bry_.Empty) {
|
||||
rv.custom_url_link = frameParams.link_url;
|
||||
if (Php_utl_.isset(frameParams.link_target)) {
|
||||
|
@ -31,13 +31,13 @@ public class XomwLinkHolderArray {
|
||||
/**
|
||||
* @var Parser
|
||||
*/
|
||||
private final Xomw_parser parent;
|
||||
private final XomwParserIface parent;
|
||||
// protected $tempIdOffset;
|
||||
|
||||
/**
|
||||
* @param Parser $parent
|
||||
*/
|
||||
public XomwLinkHolderArray(Xomw_parser parent) {
|
||||
public XomwLinkHolderArray(XomwParserIface parent) {
|
||||
this.parent = parent;
|
||||
}
|
||||
|
||||
@ -261,18 +261,22 @@ public class XomwLinkHolderArray {
|
||||
*
|
||||
* @param String $text
|
||||
*/
|
||||
public void replace(Xomw_parser_bfr pbfr) {
|
||||
this.replaceInternal(pbfr);
|
||||
public boolean replace(Xomw_parser_bfr pbfr) {
|
||||
return this.replaceInternal(pbfr);
|
||||
// $this->replaceInterwiki( $text );
|
||||
}
|
||||
public byte[] replace(Xomw_parser_bfr pbfr, byte[] text) {
|
||||
boolean rv = this.replace(pbfr.Init(text));
|
||||
return rv ? pbfr.Trg().To_bry_and_clear() : pbfr.Src().To_bry_and_clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace @gplx.Internal protected links
|
||||
* @param String $text
|
||||
*/
|
||||
private void replaceInternal(Xomw_parser_bfr pbfr) {
|
||||
private boolean replaceInternal(Xomw_parser_bfr pbfr) {
|
||||
if (internals.Len() == 0) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
// SKIP:Replace_internals does db lookup to identify redlinks;
|
||||
@ -430,6 +434,7 @@ public class XomwLinkHolderArray {
|
||||
// $replacer->cb(),
|
||||
// $text
|
||||
// );
|
||||
return true;
|
||||
}
|
||||
|
||||
// /**
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,27 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
||||
import gplx.xowa.mediawiki.includes.htmls.*;
|
||||
import gplx.xowa.mediawiki.includes.linkers.*;
|
||||
public interface XomwParserIface {
|
||||
int nextLinkID();
|
||||
XomwParserOptions getOptions();
|
||||
Xomw_link_renderer getLinkRenderer();
|
||||
|
||||
byte[] armorLinks(Bry_bfr trg, byte[] src, int src_bgn, int src_end);
|
||||
Xomw_atr_mgr getExternalLinkAttribs(Xomw_atr_mgr atrs);
|
||||
byte[] stripAltText(byte[] caption, XomwLinkHolderArray holders);
|
||||
}
|
@ -14,8 +14,8 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
||||
public class Xomw_parser_options {
|
||||
public Xomw_parser_options() {
|
||||
public class XomwParserOptions {
|
||||
public XomwParserOptions() {
|
||||
this.mThumbSize = 220;
|
||||
}
|
||||
// /**
|
@ -15,8 +15,8 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_parser__tst {
|
||||
private final Xomw_parser__fxt fxt = new Xomw_parser__fxt();
|
||||
public class XomwParserTest {
|
||||
private final XomwParserFxt fxt = new XomwParserFxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "== heading_1 =="
|
||||
@ -57,19 +57,21 @@ public class Xomw_parser__tst {
|
||||
));
|
||||
}
|
||||
}
|
||||
class Xomw_parser__fxt {
|
||||
private final Xomw_parser mgr = new Xomw_parser();
|
||||
class XomwParserFxt {
|
||||
private final XomwParser parser = new XomwParser();
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
public Xomw_parser__fxt() {
|
||||
public XomwParserFxt() {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
mgr.Init_by_wiki(wiki);
|
||||
mgr.Init_by_page(XomwTitle.newFromText(Bry_.new_a7("Page_1")));
|
||||
parser.Init_by_wiki(wiki);
|
||||
parser.Init_by_page(XomwTitle.newFromText(Bry_.new_a7("Page_1")));
|
||||
pctx.Init_by_page(XomwTitle.newFromText(Bry_.new_a7("Page_1")));
|
||||
}
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
mgr.Internal_parse(pbfr, src_bry);
|
||||
mgr.Internal_parse_half_parsed(pbfr, true, true);
|
||||
parser.internalParse(pbfr, pctx, src_bry);
|
||||
parser.internalParseHalfParsed(pbfr, pctx, true, true);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
@ -0,0 +1,346 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
||||
import gplx.core.btries.*;
|
||||
public class XomwStripState {
|
||||
// protected $prefix;
|
||||
// protected $data;
|
||||
// protected $regex;
|
||||
//
|
||||
// protected $tempType, $tempMergePrefix;
|
||||
// protected $circularRefGuard;
|
||||
// protected $recursionLevel = 0;
|
||||
//
|
||||
// static final UNSTRIP_RECURSION_LIMIT = 20;
|
||||
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final Bry_bfr tmp_1 = Bry_bfr_.New();
|
||||
private final Bry_bfr tmp_2 = Bry_bfr_.New();
|
||||
private boolean tmp_2_used = false;
|
||||
private int generalLen, nowikiLen;
|
||||
|
||||
// /**
|
||||
// * @param String|null $prefix
|
||||
// * @since 1.26 The prefix argument should be omitted, as the strip marker
|
||||
// * prefix String is now a constant.
|
||||
// */
|
||||
// public function __construct($prefix = null) {
|
||||
// if ($prefix !== null) {
|
||||
// wfDeprecated(__METHOD__ . ' with called with $prefix argument' .
|
||||
// ' (call with no arguments instead)', '1.26');
|
||||
// }
|
||||
// this.data = [
|
||||
// 'nowiki' => [],
|
||||
// 'general' => []
|
||||
// ];
|
||||
// this.regex = '/' . Parser::MARKER_PREFIX . "([^\x7f<>&'\"]+)" . Parser::MARKER_SUFFIX . '/';
|
||||
// this.circularRefGuard = [];
|
||||
// }
|
||||
// public void Clear() {
|
||||
// trie.Clear();
|
||||
// generalLen = nowikiLen = 0;
|
||||
// tmp_2_used = false;
|
||||
// }
|
||||
|
||||
/**
|
||||
* Add a nowiki strip item
|
||||
* @param String $marker
|
||||
* @param String $value
|
||||
*/
|
||||
public void addNoWiki(byte[] marker, byte[] val) {
|
||||
this.addItem(TYPE_NOWIKI, marker, val);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param String $marker
|
||||
* @param String $value
|
||||
*/
|
||||
public void addGeneral(byte[] marker, byte[] val) {
|
||||
this.addItem(TYPE_GENERAL, marker, val);
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws MWException
|
||||
* @param String $type
|
||||
* @param String $marker
|
||||
* @param String $value
|
||||
*/
|
||||
public void addItem(byte type, byte[] marker, byte[] val) {
|
||||
// if (!preg_match(this.regex, $marker, $m)) {
|
||||
// throw new MWException("Invalid marker: $marker");
|
||||
// }
|
||||
|
||||
// XO.MW:ported
|
||||
// this.data[$type][$m[1]] = $value;
|
||||
trie.Add_obj(marker, new XomwStripItem(type, marker, val));
|
||||
if (type == TYPE_GENERAL)
|
||||
generalLen++;
|
||||
else
|
||||
nowikiLen++;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param String $text
|
||||
* @return mixed
|
||||
*/
|
||||
public byte[] unstripGeneral(byte[] text) {
|
||||
return this.unstripType(TYPE_GENERAL, text);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param String $text
|
||||
* @return mixed
|
||||
*/
|
||||
public byte[] unstripNoWiki(byte[] text) {
|
||||
return this.unstripType(TYPE_NOWIKI, text);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param String $text
|
||||
* @return mixed
|
||||
*/
|
||||
public byte[] unstripBoth(byte[] text) {
|
||||
// $text = this.unstripType('general', $text);
|
||||
// $text = this.unstripType('nowiki', $text);
|
||||
return this.unstripType(TYPE_BOTH, text);
|
||||
}
|
||||
|
||||
public byte[] unstripType(byte tid, byte[] text) {
|
||||
boolean dirty = unstripType(tid, tmp_1, text, 0, text.length);
|
||||
return dirty ? tmp_1.To_bry_and_clear() : text;
|
||||
}
|
||||
|
||||
// XOWA
|
||||
public void unstripGeneral(Xomw_parser_bfr pbfr) {unstripType(TYPE_GENERAL, pbfr);}
|
||||
public void unstripNoWiki(Xomw_parser_bfr pbfr) {unstripType(TYPE_NOWIKI , pbfr);}
|
||||
public void unstripBoth(Xomw_parser_bfr pbfr) {unstripType(TYPE_BOTH , pbfr);}
|
||||
private boolean unstripType(byte tid, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
boolean dirty = unstripType(tid, pbfr.Trg(), src, 0, src_bfr.Len());
|
||||
if (dirty)
|
||||
pbfr.Switch();
|
||||
return dirty;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param String $type
|
||||
* @param String $text
|
||||
* @return mixed
|
||||
*/
|
||||
private boolean unstripType(byte tid, Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
|
||||
// // Shortcut
|
||||
// if (!count(this.data[$type])) {
|
||||
// return $text;
|
||||
// }
|
||||
// exit early if no items for type
|
||||
if ((tid & TYPE_GENERAL) == TYPE_GENERAL) {
|
||||
if (generalLen == 0)
|
||||
return false;
|
||||
}
|
||||
else if ((tid & TYPE_NOWIKI) == TYPE_NOWIKI) {
|
||||
if (nowikiLen == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
// XO.MW:PORTED
|
||||
// $oldType = this.tempType;
|
||||
// this.tempType = $type;
|
||||
// $text = preg_replace_callback(this.regex, [ $this, 'unstripCallback' ], $text);
|
||||
// this.tempType = $oldType;
|
||||
// return $text;
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
boolean dirty = false;
|
||||
// loop over each src char
|
||||
while (true) {
|
||||
// EOS: exit
|
||||
if (cur == src_end) {
|
||||
if (dirty) // add remainder if dirty
|
||||
trg.Add_mid(src, prv, src_end);
|
||||
break;
|
||||
}
|
||||
|
||||
// check if current pos matches strip state
|
||||
Object o = trie.Match_at(trv, src, cur, src_end);
|
||||
if (o != null) { // match
|
||||
XomwStripItem item = (XomwStripItem)o;
|
||||
byte item_tid = item.Type();
|
||||
if ((tid & item_tid) == item_tid) { // check if types match
|
||||
// get bfr for recursion
|
||||
Bry_bfr nested_bfr = null;
|
||||
boolean tmp_2_release = false;
|
||||
if (tmp_2_used) {
|
||||
nested_bfr = Bry_bfr_.New();
|
||||
}
|
||||
else {
|
||||
nested_bfr = tmp_2;
|
||||
tmp_2_used = true;
|
||||
tmp_2_release = true;
|
||||
}
|
||||
|
||||
// recurse
|
||||
byte[] item_val = item.Val();
|
||||
if (unstripType(tid, nested_bfr, item_val, 0, item_val.length))
|
||||
item_val = nested_bfr.To_bry_and_clear();
|
||||
if (tmp_2_release)
|
||||
tmp_2_used = false;
|
||||
|
||||
// add to trg
|
||||
trg.Add_mid(src, prv, cur);
|
||||
trg.Add(item_val);
|
||||
|
||||
// update vars
|
||||
dirty = true;
|
||||
cur += item.Key().length;
|
||||
prv = cur;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
cur++;
|
||||
}
|
||||
return dirty;
|
||||
}
|
||||
|
||||
// /**
|
||||
// * @param array $m
|
||||
// * @return array
|
||||
// */
|
||||
// protected function unstripCallback($m) {
|
||||
// $marker = $m[1];
|
||||
// if (isset(this.data[this.tempType][$marker])) {
|
||||
// if (isset(this.circularRefGuard[$marker])) {
|
||||
// return '<span class="error">'
|
||||
// . wfMessage('parser-unstrip-loop-warning')->inContentLanguage()->text()
|
||||
// . '</span>';
|
||||
// }
|
||||
// if (this.recursionLevel >= self::UNSTRIP_RECURSION_LIMIT) {
|
||||
// return '<span class="error">' .
|
||||
// wfMessage('parser-unstrip-recursion-limit')
|
||||
// ->numParams(self::UNSTRIP_RECURSION_LIMIT)->inContentLanguage()->text() .
|
||||
// '</span>';
|
||||
// }
|
||||
// this.circularRefGuard[$marker] = true;
|
||||
// this.recursionLevel++;
|
||||
// $value = this.data[this.tempType][$marker];
|
||||
// if ($value instanceof Closure) {
|
||||
// $value = $value();
|
||||
// }
|
||||
// $ret = this.unstripType(this.tempType, $value);
|
||||
// this.recursionLevel--;
|
||||
// unset(this.circularRefGuard[$marker]);
|
||||
// return $ret;
|
||||
// } else {
|
||||
// return $m[0];
|
||||
// }
|
||||
// }
|
||||
|
||||
// /**
|
||||
// * Get a StripState Object which is sufficient to unstrip the given text.
|
||||
// * It will contain the minimum subset of strip items necessary.
|
||||
// *
|
||||
// * @param String $text
|
||||
// *
|
||||
// * @return StripState
|
||||
// */
|
||||
// public function getSubState($text) {
|
||||
// $subState = new StripState();
|
||||
// $pos = 0;
|
||||
// while (true) {
|
||||
// $startPos = strpos($text, Parser::MARKER_PREFIX, $pos);
|
||||
// $endPos = strpos($text, Parser::MARKER_SUFFIX, $pos);
|
||||
// if ($startPos === false || $endPos === false) {
|
||||
// break;
|
||||
// }
|
||||
//
|
||||
// $endPos += strlen(Parser::MARKER_SUFFIX);
|
||||
// $marker = substr($text, $startPos, $endPos - $startPos);
|
||||
// if (!preg_match(this.regex, $marker, $m)) {
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// $key = $m[1];
|
||||
// if (isset(this.data['nowiki'][$key])) {
|
||||
// $subState->data['nowiki'][$key] = this.data['nowiki'][$key];
|
||||
// } elseif (isset(this.data['general'][$key])) {
|
||||
// $subState->data['general'][$key] = this.data['general'][$key];
|
||||
// }
|
||||
// $pos = $endPos;
|
||||
// }
|
||||
// return $subState;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Merge another StripState Object into this one. The strip marker keys
|
||||
// * will not be preserved. The strings in the $texts array will have their
|
||||
// * strip markers rewritten, the resulting array of strings will be returned.
|
||||
// *
|
||||
// * @param StripState $otherState
|
||||
// * @param array $texts
|
||||
// * @return array
|
||||
// */
|
||||
// public function merge($otherState, $texts) {
|
||||
// $mergePrefix = wfRandomString(16);
|
||||
//
|
||||
// foreach ($otherState->data as $type => $items) {
|
||||
// foreach ($items as $key => $value) {
|
||||
// this.data[$type]["$mergePrefix-$key"] = $value;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// this.tempMergePrefix = $mergePrefix;
|
||||
// $texts = preg_replace_callback($otherState->regex, [ $this, 'mergeCallback' ], $texts);
|
||||
// this.tempMergePrefix = null;
|
||||
// return $texts;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @param array $m
|
||||
// * @return String
|
||||
// */
|
||||
// protected function mergeCallback($m) {
|
||||
// $key = $m[1];
|
||||
// return Parser::MARKER_PREFIX . this.tempMergePrefix . '-' . $key . Parser::MARKER_SUFFIX;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Remove any strip markers found in the given text.
|
||||
// *
|
||||
// * @param String $text Input String
|
||||
// * @return String
|
||||
// */
|
||||
// public function killMarkers($text) {
|
||||
// return preg_replace(this.regex, '', $text);
|
||||
// }
|
||||
public static final String Str__marker_bgn = "\u007f'\"`UNIQ-";
|
||||
public static final byte[]
|
||||
Bry__marker__bgn = Bry_.new_a7(Str__marker_bgn)
|
||||
, Bry__marker__end = Bry_.new_a7("-QINU`\"'\u007f")
|
||||
;
|
||||
public static final byte TYPE_GENERAL = 1, TYPE_NOWIKI = 2, TYPE_BOTH = 3;
|
||||
}
|
||||
class XomwStripItem {
|
||||
public XomwStripItem(byte tid, byte[] key, byte[] val) {
|
||||
this.tid = tid;
|
||||
this.key = key;
|
||||
this.val = val;
|
||||
}
|
||||
public byte Type() {return tid;} private final byte tid;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public byte[] Val() {return val;} private final byte[] val;
|
||||
}
|
@ -15,28 +15,28 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_strip_state__tst {
|
||||
private final Xomw_strip_state__fxt fxt = new Xomw_strip_state__fxt();
|
||||
public class XomwStripStateTest {
|
||||
private final XomwStripStateFxt fxt = new XomwStripStateFxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
|
||||
fxt.Test__nostrip(Xomw_strip_state.Tid__nowiki , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b");
|
||||
fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
|
||||
fxt.Test__unstrip(Xomw_strip_state.Tid__both , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
|
||||
fxt.Init__add (XomwStripState.TYPE_GENERAL, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
|
||||
fxt.Test__nostrip(XomwStripState.TYPE_NOWIKI , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b");
|
||||
fxt.Test__unstrip(XomwStripState.TYPE_GENERAL, "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
|
||||
fxt.Test__unstrip(XomwStripState.TYPE_BOTH , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
|
||||
}
|
||||
@Test public void Recurse() {
|
||||
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
|
||||
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-2-QINU`\"'\u007f", "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f");
|
||||
fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-2-QINU`\"'\u007f b", "a val-1 b");
|
||||
fxt.Init__add (XomwStripState.TYPE_GENERAL, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
|
||||
fxt.Init__add (XomwStripState.TYPE_GENERAL, "\u007f'\"`UNIQ-key-2-QINU`\"'\u007f", "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f");
|
||||
fxt.Test__unstrip(XomwStripState.TYPE_GENERAL, "a \u007f'\"`UNIQ-key-2-QINU`\"'\u007f b", "a val-1 b");
|
||||
}
|
||||
}
|
||||
class Xomw_strip_state__fxt {
|
||||
private final Xomw_strip_state strip_state = new Xomw_strip_state();
|
||||
class XomwStripStateFxt {
|
||||
private final XomwStripState stripState = new XomwStripState();
|
||||
public void Init__add(byte tid, String marker, String val) {
|
||||
strip_state.Add_item(tid, Bry_.new_u8(marker), Bry_.new_u8(val));
|
||||
stripState.addItem(tid, Bry_.new_u8(marker), Bry_.new_u8(val));
|
||||
}
|
||||
public void Test__nostrip(byte tid, String src) {Test__unstrip(tid, src, src);}
|
||||
public void Test__unstrip(byte tid, String src, String expd) {
|
||||
byte[] actl = strip_state.Unstrip(tid, Bry_.new_u8(src));
|
||||
byte[] actl = stripState.unstripType(tid, Bry_.new_u8(src));
|
||||
Gftest.Eq__str(expd, String_.new_u8(actl));
|
||||
}
|
||||
}
|
@ -24,7 +24,7 @@ public class Xomw_block_level_pass {
|
||||
private int last_section;
|
||||
private byte[] find_colon_no_links__before, find_colon_no_links__after;
|
||||
|
||||
public void Do_block_levels(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, boolean line_start) {
|
||||
public void doBlockLevels(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, boolean line_start) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
@ -41,7 +41,7 @@ public class Xomw_block_level_pass {
|
||||
("<table", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6", "<pre", "<tr", "<p", "<ul", "<ol", "<dl", "<li", "</tr", "</td", "</th");
|
||||
close_match_trie = Btrie_slim_mgr.ci_a7().Add_many_str
|
||||
( "</table", "</h1", "</h2", "</h3", "</h4", "</h5", "</h6", "<td", "<th", "<blockquote", "</blockquote", "<div", "</div", "<hr"
|
||||
, "</pre", "</p", "</mw:", Xomw_strip_state.Str__marker_bgn + "-pre", "</li", "</ul", "</ol", "</dl", "<center", "</center");
|
||||
, "</pre", "</p", "</mw:", XomwStripState.Str__marker_bgn + "-pre", "</li", "</ul", "</ol", "</dl", "<center", "</center");
|
||||
blockquote_trie = Btrie_slim_mgr.ci_a7().Add_many_str("<blockquote", "</blockquote");
|
||||
pre_trie = Btrie_slim_mgr.ci_a7().Add_str_int("<pre", Pre__bgn).Add_str_int("</pre", Pre__end);
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ class Xomw_block_level_pass__fxt {
|
||||
private boolean apos = true;
|
||||
public void Test__do_block_levels(String src, String expd) {
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
block_level_pass.Do_block_levels(pctx, pbfr.Init(Bry_.new_u8(src)), true);
|
||||
block_level_pass.doBlockLevels(pctx, pbfr.Init(Bry_.new_u8(src)), true);
|
||||
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear());
|
||||
}
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ import gplx.xowa.mediawiki.includes.parsers.quotes.*; import gplx.xowa.mediawiki
|
||||
import gplx.xowa.mediawiki.includes.parsers.lnkes.*; import gplx.xowa.mediawiki.includes.parsers.lnkis.*; import gplx.xowa.mediawiki.includes.parsers.magiclinks.*; import gplx.xowa.mediawiki.includes.parsers.doubleunders.*;
|
||||
import gplx.xowa.mediawiki.includes.utls.*; import gplx.xowa.mediawiki.includes.linkers.*;
|
||||
import gplx.xowa.mediawiki.includes.htmls.*;
|
||||
public class Xomw_parser {
|
||||
public class Xomw_parser implements XomwParserIface {
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
private final Xomw_table_wkr table_wkr;
|
||||
private final Xomw_hr_wkr hr_wkr = new Xomw_hr_wkr();
|
||||
@ -42,8 +42,8 @@ public class Xomw_parser {
|
||||
private int marker_index = 0;
|
||||
// private final Xomw_prepro_wkr prepro_wkr = new Xomw_prepro_wkr();
|
||||
public Xomw_parser_env Env() {return env;} private final Xomw_parser_env env = new Xomw_parser_env();
|
||||
public Xomw_parser_options Options() {return options;} private final Xomw_parser_options options = new Xomw_parser_options();
|
||||
public Xomw_strip_state Strip_state() {return strip_state;} private final Xomw_strip_state strip_state = new Xomw_strip_state();
|
||||
public XomwParserOptions getOptions() {return options;} private final XomwParserOptions options = new XomwParserOptions();
|
||||
public XomwStripState Strip_state() {return strip_state;} private final XomwStripState strip_state = new XomwStripState();
|
||||
public XomwSanitizer Sanitizer() {return sanitizer;} private final XomwSanitizer sanitizer = new XomwSanitizer();
|
||||
public XomwLinker Linker() {return linker;} private final XomwLinker linker;
|
||||
public Bry_bfr Tmp() {return tmp;} private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
@ -92,10 +92,10 @@ public class Xomw_parser {
|
||||
this.linker = new XomwLinker(link_renderer);
|
||||
this.protocols_trie = Xomw_parser.Protocols__dflt();
|
||||
this.holders = new XomwLinkHolderArray(this);
|
||||
this.table_wkr = new Xomw_table_wkr(this);
|
||||
this.quote_wkr = new Xomw_quote_wkr(this);
|
||||
this.lnke_wkr = new Xomw_lnke_wkr(this);
|
||||
this.lnki_wkr = new Xomw_lnki_wkr(this, holders, link_renderer, protocols_trie);
|
||||
this.table_wkr = new Xomw_table_wkr(tmp, sanitizer, strip_state);
|
||||
this.quote_wkr = new Xomw_quote_wkr(tmp);
|
||||
this.lnke_wkr = new Xomw_lnke_wkr(this, tmp, linker, sanitizer);
|
||||
this.lnki_wkr = new Xomw_lnki_wkr(this, holders, link_renderer, protocols_trie, linker, quote_wkr, tmp, strip_state);
|
||||
this.heading_wkr_cbk = new Xomw_heading_cbk__html();
|
||||
this.magiclinks_wkr = new Xomw_magiclinks_wkr(this, sanitizer, linker, regex_boundary, regex_url);
|
||||
}
|
||||
@ -146,33 +146,34 @@ public class Xomw_parser {
|
||||
// properly; putting them before other transformations should keep
|
||||
// exciting things like link expansions from showing up in surprising
|
||||
// places.
|
||||
table_wkr.Do_table_stuff(pctx, pbfr);
|
||||
hr_wkr.Replace_hrs(pctx, pbfr);
|
||||
table_wkr.doTableStuff(pctx, pbfr);
|
||||
|
||||
doubleunder_wkr.Do_double_underscore(pctx, pbfr); // DONE: DATE:2017-01-27
|
||||
hr_wkr.replaceHrs(pctx, pbfr);
|
||||
|
||||
heading_wkr.Do_headings(pctx, pbfr, heading_wkr_cbk);
|
||||
lnki_wkr.Replace_internal_links(pctx, pbfr);
|
||||
quote_wkr.Do_all_quotes(pctx, pbfr);
|
||||
lnke_wkr.Replace_external_links(pctx, pbfr);
|
||||
doubleunder_wkr.doDoubleUnderscore(pctx, pbfr); // DONE: DATE:2017-01-27
|
||||
|
||||
heading_wkr.doHeadings(pctx, pbfr, heading_wkr_cbk);
|
||||
lnki_wkr.replaceInternalLinks(pbfr, env, pctx);
|
||||
quote_wkr.doAllQuotes(pctx, pbfr);
|
||||
lnke_wkr.replaceExternalLinks(pctx, pbfr);
|
||||
|
||||
// replaceInternalLinks may sometimes leave behind
|
||||
// absolute URLs, which have to be masked to hide them from replaceExternalLinks
|
||||
Xomw_parser_bfr_.Replace(pbfr, Bry__marker__noparse, Bry_.Empty);
|
||||
magiclinks_wkr.Do_magic_links(pctx, pbfr);
|
||||
magiclinks_wkr.doMagicLinks(pctx, pbfr);
|
||||
|
||||
// $text = $this->formatHeadings($text, $origText, $isMain);
|
||||
}
|
||||
|
||||
public void Internal_parse_half_parsed(Xomw_parser_bfr pbfr, boolean is_main, boolean line_start) {
|
||||
strip_state.Unstrip_general(pbfr);
|
||||
strip_state.unstripGeneral(pbfr);
|
||||
|
||||
// MW.HOOK:ParserAfterUnstrip
|
||||
|
||||
// Clean up special characters, only run once, next-to-last before doBlockLevels
|
||||
nbsp_wkr.Do_nbsp(pctx, pbfr);
|
||||
nbsp_wkr.doNbsp(pctx, pbfr);
|
||||
|
||||
block_wkr.Do_block_levels(pctx, pbfr, line_start);
|
||||
block_wkr.doBlockLevels(pctx, pbfr, line_start);
|
||||
|
||||
lnki_wkr.replaceLinkHolders(pbfr);
|
||||
|
||||
@ -192,12 +193,12 @@ public class Xomw_parser {
|
||||
// }
|
||||
// }
|
||||
|
||||
strip_state.Unstrip_nowiki(pbfr);
|
||||
strip_state.unstripNoWiki(pbfr);
|
||||
|
||||
// MW.HOOK:ParserBeforeTidy
|
||||
|
||||
// $text = $this->replaceTransparentTags( $text );
|
||||
strip_state.Unstrip_general(pbfr);
|
||||
strip_state.unstripGeneral(pbfr);
|
||||
|
||||
sanitizer.Normalize_char_references(pbfr);
|
||||
|
||||
@ -236,7 +237,11 @@ public class Xomw_parser {
|
||||
|
||||
// MW.HOOK:ParserAfterTidy
|
||||
}
|
||||
public byte[] Armor_links(Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
|
||||
public byte[] stripAltText(byte[] caption, XomwLinkHolderArray holders) {
|
||||
return caption;
|
||||
}
|
||||
|
||||
public byte[] armorLinks(Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
|
||||
// PORTED:preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', self::MARKER_PREFIX . "NOPARSE$1", $text )
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
@ -262,7 +267,7 @@ public class Xomw_parser {
|
||||
dirty = true;
|
||||
byte[] protocol_bry = (byte[])protocol_obj;
|
||||
if (called_by_bry) trg = Bry_bfr_.New();
|
||||
trg.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__noparse, protocol_bry);
|
||||
trg.Add_bry_many(XomwStripState.Bry__marker__bgn, Bry__noparse, protocol_bry);
|
||||
cur += protocol_bry.length;
|
||||
prv = cur;
|
||||
}
|
||||
@ -287,15 +292,15 @@ public class Xomw_parser {
|
||||
}
|
||||
}
|
||||
public byte[] Insert_strip_item(byte[] text) {
|
||||
tmp.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__strip_state_item);
|
||||
tmp.Add_bry_many(XomwStripState.Bry__marker__bgn, Bry__strip_state_item);
|
||||
tmp.Add_int_variable(marker_index);
|
||||
tmp.Add(Xomw_strip_state.Bry__marker__end);
|
||||
tmp.Add(XomwStripState.Bry__marker__end);
|
||||
byte[] marker = tmp.To_bry_and_clear();
|
||||
marker_index++;
|
||||
strip_state.Add_general(marker, text);
|
||||
strip_state.addGeneral(marker, text);
|
||||
return marker;
|
||||
}
|
||||
public Xomw_atr_mgr Get_external_link_attribs(Xomw_atr_mgr atrs) {
|
||||
public Xomw_atr_mgr getExternalLinkAttribs(Xomw_atr_mgr atrs) {
|
||||
atrs.Clear();
|
||||
byte[] rel = Get_external_link_rel;
|
||||
|
||||
@ -309,7 +314,7 @@ public class Xomw_parser {
|
||||
public byte[] Get_external_link_rel;
|
||||
private static byte[] Atr__rel;
|
||||
private static final byte[] Bry__strip_state_item = Bry_.new_a7("-item-"), Bry__noparse = Bry_.new_a7("NOPARSE");
|
||||
private static final byte[] Bry__marker__noparse = Bry_.Add(Xomw_strip_state.Bry__marker__bgn, Bry__noparse);
|
||||
private static final byte[] Bry__marker__noparse = Bry_.Add(XomwStripState.Bry__marker__bgn, Bry__noparse);
|
||||
public static Btrie_slim_mgr Protocols__dflt() {
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
|
||||
Gfo_protocol_itm[] ary = Gfo_protocol_itm.Ary();
|
||||
|
@ -1,137 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xomw_strip_state { // REF.MW:/parser/StripState.php
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final Bry_bfr tmp_1 = Bry_bfr_.New();
|
||||
private final Bry_bfr tmp_2 = Bry_bfr_.New();
|
||||
private boolean tmp_2_used = false;
|
||||
private int general_len, nowiki_len;
|
||||
public void Clear() {
|
||||
trie.Clear();
|
||||
general_len = nowiki_len = 0;
|
||||
tmp_2_used = false;
|
||||
}
|
||||
public void Add_general(byte[] marker, byte[] val) {Add_item(Tid__general, marker, val);}
|
||||
public void Add_nowiki (byte[] marker, byte[] val) {Add_item(Tid__nowiki, marker, val);}
|
||||
public void Add_item(byte tid, byte[] marker, byte[] val) {
|
||||
trie.Add_obj(marker, new Xomw_strip_item(tid, marker, val));
|
||||
if (tid == Tid__general)
|
||||
general_len++;
|
||||
else
|
||||
nowiki_len++;
|
||||
}
|
||||
public byte[] Unstrip_general(byte[] text) {return Unstrip(Tid__general, text);}
|
||||
public byte[] Unstrip_nowiki (byte[] text) {return Unstrip(Tid__nowiki , text);}
|
||||
public byte[] Unstrip_both (byte[] text) {return Unstrip(Tid__both , text);}
|
||||
public byte[] Unstrip(byte tid, byte[] text) {
|
||||
boolean dirty = Unstrip(tid, tmp_1, text, 0, text.length);
|
||||
return dirty ? tmp_1.To_bry_and_clear() : text;
|
||||
}
|
||||
public void Unstrip_general(Xomw_parser_bfr pbfr) {Unstrip(Tid__general, pbfr);}
|
||||
public void Unstrip_nowiki (Xomw_parser_bfr pbfr) {Unstrip(Tid__nowiki , pbfr);}
|
||||
public void Unstrip_both (Xomw_parser_bfr pbfr) {Unstrip(Tid__both , pbfr);}
|
||||
private boolean Unstrip(byte tid, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
boolean dirty = Unstrip(tid, pbfr.Trg(), src, 0, src_bfr.Len());
|
||||
if (dirty)
|
||||
pbfr.Switch();
|
||||
return dirty;
|
||||
}
|
||||
private boolean Unstrip(byte tid, Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
|
||||
// exit early if no items for type
|
||||
if ((tid & Tid__general) == Tid__general) {
|
||||
if (general_len == 0)
|
||||
return false;
|
||||
}
|
||||
else if ((tid & Tid__nowiki) == Tid__nowiki) {
|
||||
if (nowiki_len == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
boolean dirty = false;
|
||||
// loop over each src char
|
||||
while (true) {
|
||||
// EOS: exit
|
||||
if (cur == src_end) {
|
||||
if (dirty) // add remainder if dirty
|
||||
trg.Add_mid(src, prv, src_end);
|
||||
break;
|
||||
}
|
||||
|
||||
// check if current pos matches strip state
|
||||
Object o = trie.Match_at(trv, src, cur, src_end);
|
||||
if (o != null) { // match
|
||||
Xomw_strip_item item = (Xomw_strip_item)o;
|
||||
byte item_tid = item.Tid();
|
||||
if ((tid & item_tid) == item_tid) { // check if types match
|
||||
// get bfr for recursion
|
||||
Bry_bfr nested_bfr = null;
|
||||
boolean tmp_2_release = false;
|
||||
if (tmp_2_used) {
|
||||
nested_bfr = Bry_bfr_.New();
|
||||
}
|
||||
else {
|
||||
nested_bfr = tmp_2;
|
||||
tmp_2_used = true;
|
||||
tmp_2_release = true;
|
||||
}
|
||||
|
||||
// recurse
|
||||
byte[] item_val = item.Val();
|
||||
if (Unstrip(tid, nested_bfr, item_val, 0, item_val.length))
|
||||
item_val = nested_bfr.To_bry_and_clear();
|
||||
if (tmp_2_release)
|
||||
tmp_2_used = false;
|
||||
|
||||
// add to trg
|
||||
trg.Add_mid(src, prv, cur);
|
||||
trg.Add(item_val);
|
||||
|
||||
// update vars
|
||||
dirty = true;
|
||||
cur += item.Key().length;
|
||||
prv = cur;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
cur++;
|
||||
}
|
||||
return dirty;
|
||||
}
|
||||
public static final String Str__marker_bgn = "\u007f'\"`UNIQ-";
|
||||
public static final byte[]
|
||||
Bry__marker__bgn = Bry_.new_a7(Str__marker_bgn)
|
||||
, Bry__marker__end = Bry_.new_a7("-QINU`\"'\u007f")
|
||||
;
|
||||
public static final byte Tid__general = 1, Tid__nowiki = 2, Tid__both = 3;
|
||||
}
|
||||
class Xomw_strip_item {
|
||||
public Xomw_strip_item(byte tid, byte[] key, byte[] val) {
|
||||
this.tid = tid;
|
||||
this.key = key;
|
||||
this.val = val;
|
||||
}
|
||||
public byte Tid() {return tid;} private final byte tid;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public byte[] Val() {return val;} private final byte[] val;
|
||||
}
|
@ -37,7 +37,7 @@ public class Xomw_doubleunder_wkr {
|
||||
, Xol_kwd_grp_.Id_nocontentconvert
|
||||
);
|
||||
}
|
||||
public void Do_double_underscore(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
public void doDoubleUnderscore(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
@ -123,6 +123,62 @@ public class Xomw_doubleunder_wkr {
|
||||
if (dirty)
|
||||
pbfr.Switch();
|
||||
}
|
||||
// /**
|
||||
// * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
|
||||
// * Fills this.mDoubleUnderscores, returns the modified text
|
||||
// *
|
||||
// * @param String $text
|
||||
// *
|
||||
// * @return String
|
||||
// */
|
||||
// public function doDoubleUnderscore($text) {
|
||||
//
|
||||
// # The position of __TOC__ needs to be recorded
|
||||
// $mw = MagicWord::get('toc');
|
||||
// if ($mw->match($text)) {
|
||||
// this.mShowToc = true;
|
||||
// this.mForceTocPosition = true;
|
||||
//
|
||||
// # Set a placeholder. At the end we'll fill it in with the TOC.
|
||||
// $text = $mw->replace('<!--MWTOC-->', $text, 1);
|
||||
//
|
||||
// # Only keep the first one.
|
||||
// $text = $mw->replace('', $text);
|
||||
// }
|
||||
//
|
||||
// # Now match and remove the rest of them
|
||||
// $mwa = MagicWord::getDoubleUnderscoreArray();
|
||||
// this.mDoubleUnderscores = $mwa->matchAndRemove($text);
|
||||
//
|
||||
// if (isset(this.mDoubleUnderscores['nogallery'])) {
|
||||
// this.mOutput->mNoGallery = true;
|
||||
// }
|
||||
// if (isset(this.mDoubleUnderscores['notoc']) && !this.mForceTocPosition) {
|
||||
// this.mShowToc = false;
|
||||
// }
|
||||
// if (isset(this.mDoubleUnderscores['hiddencat'])
|
||||
// && this.mTitle->getNamespace() == NS_CATEGORY
|
||||
// ) {
|
||||
// this.addTrackingCategory('hidden-category-category');
|
||||
// }
|
||||
// # (T10068) Allow control over whether robots index a page.
|
||||
// # __INDEX__ always overrides __NOINDEX__, see T16899
|
||||
// if (isset(this.mDoubleUnderscores['noindex']) && this.mTitle->canUseNoindex()) {
|
||||
// this.mOutput->setIndexPolicy('noindex');
|
||||
// this.addTrackingCategory('noindex-category');
|
||||
// }
|
||||
// if (isset(this.mDoubleUnderscores['index']) && this.mTitle->canUseNoindex()) {
|
||||
// this.mOutput->setIndexPolicy('index');
|
||||
// this.addTrackingCategory('index-category');
|
||||
// }
|
||||
//
|
||||
// # Cache all double underscores in the database
|
||||
// foreach (this.mDoubleUnderscores as $key => $val) {
|
||||
// this.mOutput->setProperty($key, '');
|
||||
// }
|
||||
//
|
||||
// return $text;
|
||||
// }
|
||||
private static void Reg(Btrie_slim_mgr trie, Xol_kwd_mgr mgr, int... ids) {
|
||||
for (int id : ids) {
|
||||
Xol_kwd_grp grp = mgr.Get_or_new(id);
|
||||
|
@ -36,7 +36,7 @@ class Xomw_doubleunder_wkr__fxt {
|
||||
}
|
||||
public Xomw_doubleunder_wkr__fxt Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Do_double_underscore(pctx, pbfr.Init(src_bry));
|
||||
wkr.doDoubleUnderscore(pctx, pbfr.Init(src_bry));
|
||||
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
return this;
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ public class Xomw_heading_wkr {
|
||||
public int Hdr_lhs_end() {return hdr_lhs_end;} private int hdr_lhs_end;
|
||||
public int Hdr_rhs_bgn() {return hdr_rhs_bgn;} private int hdr_rhs_bgn;
|
||||
public int Hdr_rhs_end() {return hdr_rhs_end;} private int hdr_rhs_end;
|
||||
public void Do_headings(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, Xomw_heading_cbk__html cbk) {
|
||||
public void doHeadings(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, Xomw_heading_cbk__html cbk) {
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src_bry = src_bfr.Bfr();
|
||||
int src_end = src_bfr.Len();
|
||||
@ -103,4 +103,20 @@ public class Xomw_heading_wkr {
|
||||
cbk.On_hdr_seen(pctx, this);
|
||||
return nl_rhs;
|
||||
}
|
||||
// /**
|
||||
// * Parse headers and return html
|
||||
// *
|
||||
// * @private
|
||||
// *
|
||||
// * @param String $text
|
||||
// *
|
||||
// * @return String
|
||||
// */
|
||||
// public function doHeadings($text) {
|
||||
// for ($i = 6; $i >= 1; --$i) {
|
||||
// $h = str_repeat('=', $i);
|
||||
// $text = preg_replace("/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text);
|
||||
// }
|
||||
// return $text;
|
||||
// }
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ package gplx.xowa.mediawiki.includes.parsers.hrs; import gplx.*; import gplx.xow
|
||||
import gplx.xowa.mediawiki.includes.utls.*;
|
||||
public class Xomw_hr_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private Bry_bfr bfr;
|
||||
public void Replace_hrs(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) { // REF.MW: text = preg_replace('/(^|\n)-----*/', '\\1<hr />', text);
|
||||
public void replaceHrs(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) { // REF.MW: text = preg_replace('/(^|\n)-----*/', '\\1<hr />', text);
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
|
@ -28,7 +28,7 @@ class Xomw_hr_wkr__fxt {
|
||||
private final Xomw_hr_wkr wkr = new Xomw_hr_wkr();
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Replace_hrs(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
wkr.replaceHrs(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
||||
|
@ -25,17 +25,17 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private final Bry_bfr tmp;
|
||||
private Btrie_slim_mgr protocol_trie; private final Btrie_rv trv = new Btrie_rv();
|
||||
private int autonumber;
|
||||
private final Xomw_parser parser;
|
||||
private final XomwParserIface parser;
|
||||
private final XomwLinker linker;
|
||||
private final XomwSanitizer sanitizer;
|
||||
private final Xomw_atr_mgr attribs = new Xomw_atr_mgr();
|
||||
private Xomw_regex_url regex_url;
|
||||
private Xomw_regex_space regex_space;
|
||||
public Xomw_lnke_wkr(Xomw_parser parser) {
|
||||
public Xomw_lnke_wkr(XomwParserIface parser, Bry_bfr tmp, XomwLinker linker, XomwSanitizer sanitizer) {
|
||||
this.parser = parser;
|
||||
this.tmp = parser.Tmp();
|
||||
this.linker = parser.Linker();
|
||||
this.sanitizer = parser.Sanitizer();
|
||||
this.tmp = tmp;
|
||||
this.linker = linker;
|
||||
this.sanitizer = sanitizer;
|
||||
|
||||
if (angle_entities_trie == null) {
|
||||
synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||
@ -63,7 +63,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
this.regex_space = regex_space;
|
||||
}
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-01
|
||||
public void Replace_external_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
public void replaceExternalLinks(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
@ -211,7 +211,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// This means that users can paste URLs directly into the text
|
||||
// Funny characters like <EFBFBD> aren't valid in URLs anyway
|
||||
// This was changed in August 2004
|
||||
linker.makeExternalLink(bfr, url, Bry_.Mid(src, text_bgn, text_end), Bool_.N, link_type, parser.Get_external_link_attribs(attribs), Bry_.Empty);
|
||||
linker.makeExternalLink(bfr, url, Bry_.Mid(src, text_bgn, text_end), Bool_.N, link_type, parser.getExternalLinkAttribs(attribs), Bry_.Empty);
|
||||
|
||||
// XO.MW.UNSUPPORTED.HOOK: registers link for processing by other extensions?
|
||||
// Register link in the output Object.
|
||||
|
@ -53,16 +53,18 @@ public class Xomw_lnke_wkr__tst {
|
||||
}
|
||||
}
|
||||
class Xomw_lnke_wkr__fxt {
|
||||
private final Xomw_lnke_wkr wkr = new Xomw_lnke_wkr(new Xomw_parser());
|
||||
private final Xomw_lnke_wkr wkr;
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private boolean apos = true;
|
||||
public Xomw_lnke_wkr__fxt() {
|
||||
Xomw_parser parser = new Xomw_parser();
|
||||
this.wkr = new Xomw_lnke_wkr(parser, parser.Tmp(), parser.Linker(), parser.Sanitizer());
|
||||
Xomw_regex_space regex_space = new Xomw_regex_space();
|
||||
wkr.Init_by_wiki(Xomw_parser.Protocols__dflt(), new Xomw_regex_url(regex_space), regex_space);
|
||||
}
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Replace_external_links(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
wkr.replaceExternalLinks(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
|
@ -30,8 +30,8 @@ import gplx.xowa.parsers.uniqs.*;
|
||||
* P6: [[Media:]]
|
||||
* P4: handle "]]]"; "If we get a ] at the beginning of $m[3]"
|
||||
* P4: handle "[[http://a.org]]"
|
||||
* P3: $langObj->formatNum( ++$this->mAutonumber );
|
||||
* P2: $this->getConverterLanguage()->markNoConversion( $text );
|
||||
* P3: $langObj->formatNum( ++this.mAutonumber );
|
||||
* P2: this.getConverterLanguage()->markNoConversion( $text );
|
||||
* P1: link_prefix; EX: b[[A]]; [not enabled on enwiki]
|
||||
*/
|
||||
public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
@ -40,29 +40,31 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private final Xomw_link_renderer link_renderer;
|
||||
// private final Btrie_slim_mgr protocols_trie;
|
||||
private final Xomw_quote_wkr quote_wkr;
|
||||
private final Xomw_strip_state strip_state;
|
||||
private final XomwStripState strip_state;
|
||||
private Xomw_parser_env env;
|
||||
private Xow_wiki wiki;
|
||||
private XomwTitle mPageTitle;
|
||||
// private final XomwLinker_NormalizeSubpageLink normalize_subpage_link = new XomwLinker_NormalizeSubpageLink();
|
||||
private final Bry_bfr tmp;
|
||||
private final Xomw_parser parser;
|
||||
private final XomwParserIface parser;
|
||||
private final Xomw_atr_mgr extra_atrs = new Xomw_atr_mgr();
|
||||
private final Xomw_qry_mgr query = new Xomw_qry_mgr();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final List_adp tmp_list = List_adp_.New();
|
||||
private final Hash_adp mImageParams = Hash_adp_bry.cs();
|
||||
private final Hash_adp mImageParamsMagicArray = Hash_adp_bry.cs();
|
||||
public Xomw_lnki_wkr(Xomw_parser parser, XomwLinkHolderArray holders, Xomw_link_renderer link_renderer, Btrie_slim_mgr protocols_trie) {
|
||||
public Xomw_lnki_wkr(XomwParserIface parser, XomwLinkHolderArray holders, Xomw_link_renderer link_renderer, Btrie_slim_mgr protocols_trie
|
||||
, XomwLinker linker, Xomw_quote_wkr quote_wkr, Bry_bfr tmp, XomwStripState strip_state
|
||||
) {
|
||||
this.parser = parser;
|
||||
this.holders = holders;
|
||||
this.link_renderer = link_renderer;
|
||||
// this.protocols_trie = protocols_trie;
|
||||
|
||||
this.linker = parser.Linker();
|
||||
this.quote_wkr = parser.Quote_wkr();
|
||||
this.tmp = parser.Tmp();
|
||||
this.strip_state = parser.Strip_state();
|
||||
this.linker = linker;
|
||||
this.quote_wkr = quote_wkr;
|
||||
this.tmp = tmp;
|
||||
this.strip_state = strip_state;
|
||||
}
|
||||
public void Init_by_wiki(Xomw_parser_env env, Xow_wiki wiki) {
|
||||
this.env = env;
|
||||
@ -77,7 +79,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
public void Clear_state() {
|
||||
holders.clear();
|
||||
}
|
||||
public void Replace_internal_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
public void replaceInternalLinks(Xomw_parser_bfr pbfr, Xomw_parser_env env, Xomw_parser_ctx pctx) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
@ -88,10 +90,10 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
|
||||
this.mPageTitle = pctx.Page_title();
|
||||
|
||||
Replace_internal_links(pctx, bfr, src, src_bgn, src_end);
|
||||
replaceInternalLinks(env, pctx, bfr, src, src_bgn, src_end);
|
||||
}
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-02
|
||||
public void Replace_internal_links(Xomw_parser_ctx pctx, Bry_bfr bfr, byte[] src, int src_bgn, int src_end) {
|
||||
public void replaceInternalLinks(Xomw_parser_env env, Xomw_parser_ctx pctx, Bry_bfr bfr, byte[] src, int src_bgn, int src_end) {
|
||||
// XO.MW: regex for tc move to header; e1 and e1_img moved to code
|
||||
// the % is needed to support urlencoded titles as well
|
||||
|
||||
@ -109,7 +111,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// XO.MW.IGNORE: handles strange split logic of adding space to String; "$s = substr($s, 1);"
|
||||
|
||||
// TODO.XO:link_prefix; EX: b[[A]]
|
||||
// $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
|
||||
// $useLinkPrefixExtension = this.getTargetLanguage()->linkPrefixExtension();
|
||||
// $e2 = null;
|
||||
// if ($useLinkPrefixExtension) {
|
||||
// // Match the end of a line for a word that's not followed by whitespace,
|
||||
@ -119,9 +121,9 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
|
||||
// }
|
||||
|
||||
// IGNORE: throw new MWException(__METHOD__ . ": \$this->mTitle is null\n");
|
||||
// IGNORE: throw new MWException(__METHOD__ . ": \this.mTitle is null\n");
|
||||
|
||||
// $nottalk = !$this->mTitle->isTalkPage();
|
||||
// $nottalk = !this.mTitle->isTalkPage();
|
||||
|
||||
// TODO.XO:link_prefix
|
||||
byte[] prefix = Bry_.Empty;
|
||||
@ -240,7 +242,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// Don't allow @gplx.Internal protected links to pages containing
|
||||
// PROTO: where PROTO is a valid URL protocol; these
|
||||
// should be external links.
|
||||
// if (preg_match('/^(?i:' . $this->mUrlProtocols . ')/', $origLink)) {
|
||||
// if (preg_match('/^(?i:' . this.mUrlProtocols . ')/', $origLink)) {
|
||||
// $s .= $prefix . '[[' . $line;
|
||||
// continue;
|
||||
// }
|
||||
@ -267,7 +269,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// link = orig_link;
|
||||
// }
|
||||
|
||||
byte[] unstrip = strip_state.Unstrip_nowiki(link);
|
||||
byte[] unstrip = strip_state.unstripNoWiki(link);
|
||||
if (!Bry_.Eq(unstrip, link))
|
||||
nt = XomwTitle.newFromText(unstrip);
|
||||
if (nt == null) {
|
||||
@ -309,7 +311,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// we couldn't find the end of this imageLink, so output it raw
|
||||
// but don't ignore what might be perfectly normal links in the text we've examined
|
||||
Bry_bfr nested = wiki.Utl__bfr_mkr().Get_b128();
|
||||
this.Replace_internal_links(pctx, nested, text, 0, text.length);
|
||||
this.replaceInternalLinks(env, pctx, nested, text, 0, text.length);
|
||||
nested.Mkr_rls();
|
||||
bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
|
||||
// note: no trail, because without an end, there *is* no trail
|
||||
@ -376,7 +378,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
|
||||
bfr.Add(prefix);
|
||||
// Armor_links(Make_image(bfr, nt, text, holders))
|
||||
this.makeImage(pctx, bfr, nt, text, holders);
|
||||
this.makeImage(env, pctx, bfr, nt, text, holders);
|
||||
bfr.Add(trail);
|
||||
continue;
|
||||
}
|
||||
@ -442,7 +444,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
}
|
||||
}
|
||||
}
|
||||
public void makeImage(Xomw_parser_ctx pctx, Bry_bfr bfr, XomwTitle title, byte[] options_at_link, XomwLinkHolderArray holders) {
|
||||
public void makeImage(Xomw_parser_env env, Xomw_parser_ctx pctx, Bry_bfr bfr, XomwTitle title, byte[] options_at_link, XomwLinkHolderArray holders) {
|
||||
// Check if the options text is of the form "options|alt text"
|
||||
// Options are:
|
||||
// * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
|
||||
@ -478,7 +480,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// XO.MW.HOOK:BeforeParserFetchFileAndTitle
|
||||
|
||||
// Fetch and register the file (file title may be different via hooks)
|
||||
// list($file, $title) = $this->fetchFileAndTitle($title, $options);
|
||||
// list($file, $title) = this.fetchFileAndTitle($title, $options);
|
||||
XomwFile file = fetchFileAndTitle(title, null);
|
||||
|
||||
// Get parameter map
|
||||
@ -489,7 +491,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
Xomw_param_map paramMap = tmp_img_params.paramMap;
|
||||
XomwMagicWordArray mwArray = tmp_img_params.mwArray;
|
||||
|
||||
// XO.MW.UNSUPPORTED.TrackingCategory: if (!$file) $this->addTrackingCategory('broken-file-category');
|
||||
// XO.MW.UNSUPPORTED.TrackingCategory: if (!$file) this.addTrackingCategory('broken-file-category');
|
||||
|
||||
// Process the input parameters
|
||||
byte[] caption = Bry_.Empty;
|
||||
@ -549,12 +551,12 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// manualthumb? downstream behavior seems odd with
|
||||
// missing manual thumbs.
|
||||
validated = true;
|
||||
// $value = $this->stripAltText($value, $holders);
|
||||
val = parser.stripAltText(val, holders);
|
||||
break;
|
||||
case Xomw_param_itm.Name__link:
|
||||
// $chars = self::EXT_LINK_URL_CLASS;
|
||||
// $addr = self::EXT_LINK_ADDR;
|
||||
// $prots = $this->mUrlProtocols;
|
||||
// $prots = this.mUrlProtocols;
|
||||
// if ($value === '') {
|
||||
// $paramName = 'no-link';
|
||||
// $value = true;
|
||||
@ -563,9 +565,9 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// else if (preg_match("/^((?i)$prots)/", $value)) {
|
||||
// if (preg_match("/^((?i)$prots)$addr$chars*$/u", $value, $m)) {
|
||||
// $paramName = 'link-url';
|
||||
// $this->mOutput->addExternalLink($value);
|
||||
// if ($this->mOptions->getExternalLinkTarget()) {
|
||||
// $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
|
||||
// this.mOutput->addExternalLink($value);
|
||||
// if (this.mOptions->getExternalLinkTarget()) {
|
||||
// $params[$type]['link-target'] = this.mOptions->getExternalLinkTarget();
|
||||
// }
|
||||
validated = true;
|
||||
// }
|
||||
@ -574,7 +576,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// if ($linkTitle) {
|
||||
// $paramName = 'link-title';
|
||||
// $value = $linkTitle;
|
||||
// $this->mOutput->addLink($linkTitle);
|
||||
// this.mOutput->addLink($linkTitle);
|
||||
validated = true;
|
||||
// }
|
||||
// }
|
||||
@ -649,7 +651,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
if (frameParams.alt == null) {
|
||||
// No alt text, use the "caption" for the alt text
|
||||
if (caption != Bry_.Empty) {
|
||||
// frameParams.alt = $this->stripAltText(caption, $holders);
|
||||
frameParams.alt = parser.stripAltText(caption, holders);
|
||||
}
|
||||
else {
|
||||
// No caption, fall back to using the filename for the
|
||||
@ -658,7 +660,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
}
|
||||
}
|
||||
// Use the "caption" for the tooltip text
|
||||
// frameParams.title = $this->stripAltText(caption, $holders);
|
||||
frameParams.title = parser.stripAltText(caption, holders);
|
||||
}
|
||||
|
||||
// MW.HOOK:ParserMakeImageParams
|
||||
@ -666,33 +668,13 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// Linker does the rest
|
||||
// byte[] time = options.time;
|
||||
Object time = null;
|
||||
linker.makeImageLink(bfr, pctx, parser, title, file, frameParams, handlerParams, time, desc_query, parser.Options().getThumbSize());
|
||||
linker.makeImageLink(bfr, env, pctx, parser, title, file, frameParams, handlerParams, time, desc_query, parser.getOptions().getThumbSize());
|
||||
|
||||
// Give the handler a chance to modify the parser Object
|
||||
// if (handler != null) {
|
||||
// $handler->parserTransformHook($this, $file);
|
||||
// }
|
||||
}
|
||||
// private byte[] stripAltText(byte[] caption, XomwLinkHolderArray holders) {
|
||||
// // Strip bad stuff out of the title (tooltip). We can't just use
|
||||
// // replaceLinkHoldersText() here, because if this function is called
|
||||
// // from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
|
||||
// byte[] tooltip;
|
||||
// if (holders != null) {
|
||||
// tooltip = holders.replace(caption);
|
||||
// } else {
|
||||
// tooltip = this.replace_link_holders(caption);
|
||||
// }
|
||||
//
|
||||
// // make sure there are no placeholders in thumbnail attributes
|
||||
// // that are later expanded to html- so expand them now and
|
||||
// // remove the tags
|
||||
//// $tooltip = $this->mStripState->unstripBoth( $tooltip );
|
||||
//// $tooltip = Sanitizer::stripAllTags( $tooltip );
|
||||
////
|
||||
//// return $tooltip;
|
||||
// return null;
|
||||
// }
|
||||
|
||||
private static Xomw_param_list[] internalParamNames;
|
||||
private static Xomw_param_map internalParamMap;
|
||||
@ -783,11 +765,11 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
//$time = $file ? $file->getTimestamp() : false;
|
||||
//$sha1 = $file ? $file->getSha1() : false;
|
||||
//# Register the file as a dependency...
|
||||
//$this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
|
||||
//this.mOutput->addImage( $title->getDBkey(), $time, $sha1 );
|
||||
//if ( $file && !$title->equals( $file->getTitle() ) ) {
|
||||
// # Update fetched file title
|
||||
// $title = $file->getTitle();
|
||||
// $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
|
||||
// this.mOutput->addImage( $title->getDBkey(), $time, $sha1 );
|
||||
//}
|
||||
return file;
|
||||
}
|
||||
@ -835,7 +817,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
|
||||
link_renderer.Make_known_link(bfr, nt, text, extra_atrs, query);
|
||||
byte[] link = bfr.To_bry_and_clear();
|
||||
parser.Armor_links(bfr, link, 0, link.length);
|
||||
parser.armorLinks(bfr, link, 0, link.length);
|
||||
bfr.Add(trail);
|
||||
}
|
||||
|
||||
@ -856,4 +838,326 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
// title-char -> ([{$tc}]+)
|
||||
// pipe -> \\|
|
||||
// other chars... -> (.*)
|
||||
//
|
||||
// /**
|
||||
// * Process [[ ]] wikilinks
|
||||
// *
|
||||
// * @param String $s
|
||||
// *
|
||||
// * @return String Processed text
|
||||
// *
|
||||
// * @private
|
||||
// */
|
||||
// public function replaceInternalLinks($s) {
|
||||
// this.mLinkHolders->merge(this.replaceInternalLinks2($s));
|
||||
// return $s;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Process [[ ]] wikilinks (RIL)
|
||||
// * @param String $s
|
||||
// * @throws MWException
|
||||
// * @return LinkHolderArray
|
||||
// *
|
||||
// * @private
|
||||
// */
|
||||
// public function replaceInternalLinks2(&$s) {
|
||||
// global $wgExtraInterlanguageLinkPrefixes;
|
||||
//
|
||||
// static $tc = false, $e1, $e1_img;
|
||||
// # the % is needed to support urlencoded titles as well
|
||||
// if (!$tc) {
|
||||
// $tc = Title::legalChars() . '#%';
|
||||
// # Match a link having the form [[namespace:link|alternate]]trail
|
||||
// $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
|
||||
// # Match cases where there is no "]]", which might still be images
|
||||
// $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
|
||||
// }
|
||||
//
|
||||
// $holders = new LinkHolderArray($this);
|
||||
//
|
||||
// # split the entire text String on occurrences of [[
|
||||
// $a = StringUtils::explode('[[', ' ' . $s);
|
||||
// # get the first element (all text up to first [[), and remove the space we added
|
||||
// $s = $a->current();
|
||||
// $a->next();
|
||||
// $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
|
||||
// $s = substr($s, 1);
|
||||
//
|
||||
// $useLinkPrefixExtension = this.getTargetLanguage()->linkPrefixExtension();
|
||||
// $e2 = null;
|
||||
// if ($useLinkPrefixExtension) {
|
||||
// # Match the end of a line for a word that's not followed by whitespace,
|
||||
// # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
|
||||
// global $wgContLang;
|
||||
// $charset = $wgContLang->linkPrefixCharset();
|
||||
// $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
|
||||
// }
|
||||
//
|
||||
// if (is_null(this.mTitle)) {
|
||||
// throw new MWException(__METHOD__ . ": \this.mTitle is null\n");
|
||||
// }
|
||||
// $nottalk = !this.mTitle->isTalkPage();
|
||||
//
|
||||
// if ($useLinkPrefixExtension) {
|
||||
// $m = [];
|
||||
// if (preg_match($e2, $s, $m)) {
|
||||
// $first_prefix = $m[2];
|
||||
// } else {
|
||||
// $first_prefix = false;
|
||||
// }
|
||||
// } else {
|
||||
// $prefix = '';
|
||||
// }
|
||||
//
|
||||
// $useSubpages = this.areSubpagesAllowed();
|
||||
//
|
||||
// // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
|
||||
// # Loop for each link
|
||||
// for (; $line !== false && $line !== null; $a->next(), $line = $a->current()) {
|
||||
// // @codingStandardsIgnoreEnd
|
||||
//
|
||||
// # Check for excessive memory usage
|
||||
// if ($holders->isBig()) {
|
||||
// # Too big
|
||||
// # Do the existence check, replace the link holders and clear the array
|
||||
// $holders->replace($s);
|
||||
// $holders->clear();
|
||||
// }
|
||||
//
|
||||
// if ($useLinkPrefixExtension) {
|
||||
// if (preg_match($e2, $s, $m)) {
|
||||
// $prefix = $m[2];
|
||||
// $s = $m[1];
|
||||
// } else {
|
||||
// $prefix = '';
|
||||
// }
|
||||
// # first link
|
||||
// if ($first_prefix) {
|
||||
// $prefix = $first_prefix;
|
||||
// $first_prefix = false;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// $might_be_img = false;
|
||||
//
|
||||
// if (preg_match($e1, $line, $m)) { # page with normal text or alt
|
||||
// $text = $m[2];
|
||||
// # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
|
||||
// # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
|
||||
// # the real problem is with the $e1 regex
|
||||
// # See T1500.
|
||||
// # Still some problems for cases where the ] is meant to be outside punctuation,
|
||||
// # and no image is in sight. See T4095.
|
||||
// if ($text !== ''
|
||||
// && substr($m[3], 0, 1) === ']'
|
||||
// && strpos($text, '[') !== false
|
||||
// ) {
|
||||
// $text .= ']'; # so that replaceExternalLinks($text) works later
|
||||
// $m[3] = substr($m[3], 1);
|
||||
// }
|
||||
// # fix up urlencoded title texts
|
||||
// if (strpos($m[1], '%') !== false) {
|
||||
// # Should anchors '#' also be rejected?
|
||||
// $m[1] = str_replace([ '<', '>' ], [ '<', '>' ], rawurldecode($m[1]));
|
||||
// }
|
||||
// $trail = $m[3];
|
||||
// } elseif (preg_match($e1_img, $line, $m)) {
|
||||
// # Invalid, but might be an image with a link in its caption
|
||||
// $might_be_img = true;
|
||||
// $text = $m[2];
|
||||
// if (strpos($m[1], '%') !== false) {
|
||||
// $m[1] = str_replace([ '<', '>' ], [ '<', '>' ], rawurldecode($m[1]));
|
||||
// }
|
||||
// $trail = "";
|
||||
// } else { # Invalid form; output directly
|
||||
// $s .= $prefix . '[[' . $line;
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// $origLink = ltrim($m[1], ' ');
|
||||
//
|
||||
// # Don't allow @gplx.Internal protected links to pages containing
|
||||
// # PROTO: where PROTO is a valid URL protocol; these
|
||||
// # should be external links.
|
||||
// if (preg_match('/^(?i:' . this.mUrlProtocols . ')/', $origLink)) {
|
||||
// $s .= $prefix . '[[' . $line;
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// # Make subpage if necessary
|
||||
// if ($useSubpages) {
|
||||
// $link = this.maybeDoSubpageLink($origLink, $text);
|
||||
// } else {
|
||||
// $link = $origLink;
|
||||
// }
|
||||
//
|
||||
// $noforce = (substr($origLink, 0, 1) !== ':');
|
||||
// if (!$noforce) {
|
||||
// # Strip off leading ':'
|
||||
// $link = substr($link, 1);
|
||||
// }
|
||||
//
|
||||
// $unstrip = this.mStripState->unstripNoWiki($link);
|
||||
// $nt = is_string($unstrip) ? Title::newFromText($unstrip) : null;
|
||||
// if ($nt === null) {
|
||||
// $s .= $prefix . '[[' . $line;
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// $ns = $nt->getNamespace();
|
||||
// $iw = $nt->getInterwiki();
|
||||
//
|
||||
// if ($might_be_img) { # if this is actually an invalid link
|
||||
// if ($ns == NS_FILE && $noforce) { # but might be an image
|
||||
// $found = false;
|
||||
// while (true) {
|
||||
// # look at the next 'line' to see if we can close it there
|
||||
// $a->next();
|
||||
// $next_line = $a->current();
|
||||
// if ($next_line === false || $next_line === null) {
|
||||
// break;
|
||||
// }
|
||||
// $m = explode(']]', $next_line, 3);
|
||||
// if (count($m) == 3) {
|
||||
// # the first ]] closes the inner link, the second the image
|
||||
// $found = true;
|
||||
// $text .= "[[{$m[0]}]]{$m[1]}";
|
||||
// $trail = $m[2];
|
||||
// break;
|
||||
// } elseif (count($m) == 2) {
|
||||
// # if there's exactly one ]] that's fine, we'll keep looking
|
||||
// $text .= "[[{$m[0]}]]{$m[1]}";
|
||||
// } else {
|
||||
// # if $next_line is invalid too, we need look no further
|
||||
// $text .= '[[' . $next_line;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// if (!$found) {
|
||||
// # we couldn't find the end of this imageLink, so output it raw
|
||||
// # but don't ignore what might be perfectly normal links in the text we've examined
|
||||
// $holders->merge(this.replaceInternalLinks2($text));
|
||||
// $s .= "{$prefix}[[$link|$text";
|
||||
// # note: no $trail, because without an end, there *is* no trail
|
||||
// continue;
|
||||
// }
|
||||
// } else { # it's not an image, so output it raw
|
||||
// $s .= "{$prefix}[[$link|$text";
|
||||
// # note: no $trail, because without an end, there *is* no trail
|
||||
// continue;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// $wasblank = ($text == '');
|
||||
// if ($wasblank) {
|
||||
// $text = $link;
|
||||
// } else {
|
||||
// # T6598 madness. Handle the quotes only if they come from the alternate part
|
||||
// # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
|
||||
// # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
|
||||
// # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
|
||||
// $text = this.doQuotes($text);
|
||||
// }
|
||||
//
|
||||
// # Link not escaped by : , create the various objects
|
||||
// if ($noforce && !$nt->wasLocalInterwiki()) {
|
||||
// # Interwikis
|
||||
// if (
|
||||
// $iw && this.mOptions->getInterwikiMagic() && $nottalk && (
|
||||
// Language::fetchLanguageName($iw, null, 'mw') ||
|
||||
// in_array($iw, $wgExtraInterlanguageLinkPrefixes)
|
||||
// )
|
||||
// ) {
|
||||
// # T26502: filter duplicates
|
||||
// if (!isset(this.mLangLinkLanguages[$iw])) {
|
||||
// this.mLangLinkLanguages[$iw] = true;
|
||||
// this.mOutput->addLanguageLink($nt->getFullText());
|
||||
// }
|
||||
//
|
||||
// $s = rtrim($s . $prefix);
|
||||
// $s .= trim($trail, "\n") == '' ? '': $prefix . $trail;
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// if ($ns == NS_FILE) {
|
||||
// if (!wfIsBadImage($nt->getDBkey(), this.mTitle)) {
|
||||
// if ($wasblank) {
|
||||
// # if no parameters were passed, $text
|
||||
// # becomes something like "File:Foo.png",
|
||||
// # which we don't want to pass on to the
|
||||
// # image generator
|
||||
// $text = '';
|
||||
// } else {
|
||||
// # recursively parse links inside the image caption
|
||||
// # actually, this will parse them in any other parameters, too,
|
||||
// # but it might be hard to fix that, and it doesn't matter ATM
|
||||
// $text = this.replaceExternalLinks($text);
|
||||
// $holders->merge(this.replaceInternalLinks2($text));
|
||||
// }
|
||||
// # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
|
||||
// $s .= $prefix . this.armorLinks(
|
||||
// this.makeImage($nt, $text, $holders)) . $trail;
|
||||
// continue;
|
||||
// }
|
||||
// } elseif ($ns == NS_CATEGORY) {
|
||||
// $s = rtrim($s . "\n"); # T2087
|
||||
//
|
||||
// if ($wasblank) {
|
||||
// $sortkey = this.getDefaultSort();
|
||||
// } else {
|
||||
// $sortkey = $text;
|
||||
// }
|
||||
// $sortkey = Sanitizer::decodeCharReferences($sortkey);
|
||||
// $sortkey = str_replace("\n", '', $sortkey);
|
||||
// $sortkey = this.getConverterLanguage()->convertCategoryKey($sortkey);
|
||||
// this.mOutput->addCategory($nt->getDBkey(), $sortkey);
|
||||
//
|
||||
// /**
|
||||
// * Strip the whitespace Category links produce, see T2087
|
||||
// */
|
||||
// $s .= trim($prefix . $trail, "\n") == '' ? '' : $prefix . $trail;
|
||||
//
|
||||
// continue;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// # Self-link checking. For some languages, variants of the title are checked in
|
||||
// # LinkHolderArray::doVariants() to allow batching the existence checks necessary
|
||||
// # for linking to a different variant.
|
||||
// if ($ns != NS_SPECIAL && $nt->equals(this.mTitle) && !$nt->hasFragment()) {
|
||||
// $s .= $prefix . Linker::makeSelfLinkObj($nt, $text, '', $trail);
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// # NS_MEDIA is a pseudo-namespace for linking directly to a file
|
||||
// # @todo FIXME: Should do batch file existence checks, see comment below
|
||||
// if ($ns == NS_MEDIA) {
|
||||
// # Give extensions a chance to select the file revision for us
|
||||
// $options = [];
|
||||
// $descQuery = false;
|
||||
// Hooks::run('BeforeParserFetchFileAndTitle',
|
||||
// [ $this, $nt, &$options, &$descQuery ]);
|
||||
// # Fetch and register the file (file title may be different via hooks)
|
||||
// list($file, $nt) = this.fetchFileAndTitle($nt, $options);
|
||||
// # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
|
||||
// $s .= $prefix . this.armorLinks(
|
||||
// Linker::makeMediaLinkFile($nt, $file, $text)) . $trail;
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// # Some titles, such as valid special pages or files in foreign repos, should
|
||||
// # be shown as bluelinks even though they're not included in the page table
|
||||
// # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
|
||||
// # batch file existence checks for NS_FILE and NS_MEDIA
|
||||
// if ($iw == '' && $nt->isAlwaysKnown()) {
|
||||
// this.mOutput->addLink($nt);
|
||||
// $s .= this.makeKnownLinkHolder($nt, $text, $trail, $prefix);
|
||||
// } else {
|
||||
// # Links will be added to the output link list after checking
|
||||
// $s .= $holders->makeHolder($nt, $text, [], $trail, $prefix);
|
||||
// }
|
||||
// }
|
||||
// return $holders;
|
||||
// }
|
||||
}
|
||||
|
@ -23,11 +23,19 @@ public class Xomw_lnki_wkr__file__tst {
|
||||
fxt.Clear();
|
||||
fxt.Init__file("File:A.png", 300, 200);
|
||||
}
|
||||
@Test public void Plain() {
|
||||
@Test public void Orig() {
|
||||
// basic
|
||||
fxt.Test__to_html("[[File:A.png]]", "<a href='/wiki/File:A.png' class='image'><img alt='A.png' src='/orig/7/70/A.png' width='300' height='200' /></a>");
|
||||
|
||||
// caption
|
||||
fxt.Test__to_html("[[File:A.png|abc]]", "<a href='/wiki/File:A.png' class='image' title='abc'><img alt='abc' src='/orig/7/70/A.png' width='300' height='200' /></a>");
|
||||
}
|
||||
@Test public void Thumb() {
|
||||
// basic
|
||||
fxt.Test__to_html("[[File:A.png|thumb]]", "<div class='thumb tright'><div class='thumbinner' style='width:222px;'><a href='/wiki/File:A.png' class='image'><img alt='A.png' src='/thumb/7/70/A.png/220px-A.png' width='220' height='146' class='thumbimage' /></a> <div class='thumbcaption'><div class='magnify'><a href='/wiki/File:A.png' class='internal' title='enlarge'></a></div></div></div></div>");
|
||||
|
||||
// caption
|
||||
fxt.Test__to_html("[[File:A.png|thumb|abc]]", "<div class='thumb tright'><div class='thumbinner' style='width:222px;'><a href='/wiki/File:A.png' class='image'><img alt='' src='/thumb/7/70/A.png/220px-A.png' width='220' height='146' class='thumbimage' /></a> <div class='thumbcaption'><div class='magnify'><a href='/wiki/File:A.png' class='internal' title='enlarge'></a></div>abc</div></div></div>");
|
||||
}
|
||||
@Test public void Size() {
|
||||
fxt.Test__to_html("[[File:A.png|123x456px]]", "<a href='/wiki/File:A.png' class='image'><img alt='A.png' src='/thumb/7/70/A.png/123px-A.png' width='123' height='82' /></a>");
|
||||
@ -66,17 +74,19 @@ class Xomw_lnki_wkr__fxt {
|
||||
private final Xomw_lnki_wkr wkr;
|
||||
private final Xomw_parser_ctx pctx;
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private final Xomw_parser_env env;
|
||||
private final XomwFileFinderMock file_finder;
|
||||
private final XomwFileRepo repo = new XomwFileRepo(Bry_.new_a7("/orig"), Bry_.new_a7("/thumb"));
|
||||
private boolean apos = true;
|
||||
public Xomw_lnki_wkr__fxt() {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
Xomw_parser parser = new Xomw_parser();
|
||||
XomwParser parser = new XomwParser();
|
||||
wkr = parser.Lnki_wkr();
|
||||
|
||||
// env
|
||||
file_finder = new XomwFileFinderMock(parser.Env());
|
||||
env = parser.Env();
|
||||
parser.Env().File_finder_(file_finder);
|
||||
parser.Env().Magic_word_mgr().Add(Bry_.new_u8("img_thumbnail"), Bool_.Y, Bry_.Ary("thumb"));
|
||||
parser.Env().Magic_word_mgr().Add(Bry_.new_u8("img_width"), Bool_.Y, Bry_.Ary("$1px"));
|
||||
@ -95,7 +105,7 @@ class Xomw_lnki_wkr__fxt {
|
||||
}
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
|
||||
wkr.replaceInternalLinks(pbfr.Init(src_bry), env, pctx);
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Gftest.Eq__ary__lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
@ -109,7 +119,7 @@ class Xomw_lnki_wkr__fxt {
|
||||
}
|
||||
private String Exec__to_html(String src_str) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
|
||||
wkr.replaceInternalLinks(pbfr.Init(src_bry), env, pctx);
|
||||
wkr.replaceLinkHolders(pbfr);
|
||||
return pbfr.Rslt().To_str_and_clear();
|
||||
}
|
||||
|
@ -24,7 +24,7 @@ public class Xomw_magiclinks_wkr {
|
||||
private static byte[] Tag__anch__rhs;
|
||||
private boolean[] url_separators;
|
||||
private static Xomw_regex_link_interrupt regex_link_interrupt;
|
||||
private final Xomw_parser parser;
|
||||
private final XomwParserIface parser;
|
||||
private final Xomw_regex_boundary regex_boundary;
|
||||
private final Xomw_regex_url regex_url;
|
||||
private final XomwSanitizer sanitizer;
|
||||
@ -33,7 +33,7 @@ public class Xomw_magiclinks_wkr {
|
||||
private byte[] page_title;
|
||||
|
||||
private static final byte Regex__anch = 1, Regex__elem = 2, Regex__free = 3;
|
||||
public Xomw_magiclinks_wkr(Xomw_parser parser, XomwSanitizer sanitizer, XomwLinker linker, Xomw_regex_boundary regex_boundary, Xomw_regex_url regex_url) {
|
||||
public Xomw_magiclinks_wkr(XomwParserIface parser, XomwSanitizer sanitizer, XomwLinker linker, Xomw_regex_boundary regex_boundary, Xomw_regex_url regex_url) {
|
||||
this.parser = parser;
|
||||
this.sanitizer = sanitizer;
|
||||
this.linker = linker;
|
||||
@ -66,7 +66,7 @@ public class Xomw_magiclinks_wkr {
|
||||
|
||||
// Replace special strings like "ISBN xxx" and "RFC xxx" with
|
||||
// magic external links.
|
||||
public void Do_magic_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
public void doMagicLinks(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
@ -262,7 +262,7 @@ public class Xomw_magiclinks_wkr {
|
||||
linker.makeExternalLink(bfr, url
|
||||
, url // $this->getConverterLanguage()->markNoConversion($url, true),
|
||||
, true, Bry_.new_a7("free")
|
||||
, parser.Get_external_link_attribs(atrs)
|
||||
, parser.getExternalLinkAttribs(atrs)
|
||||
, page_title);
|
||||
|
||||
// XO.MW.UNSUPPORTED.HOOK: registers link for processing by other extensions?
|
||||
@ -390,4 +390,107 @@ class Xomw_regex_link_interrupt {
|
||||
}
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
// /**
|
||||
// * Replace special strings like "ISBN xxx" and "RFC xxx" with
|
||||
// * magic external links.
|
||||
// *
|
||||
// * DML
|
||||
// * @private
|
||||
// *
|
||||
// * @param String $text
|
||||
// *
|
||||
// * @return String
|
||||
// */
|
||||
// public function doMagicLinks($text) {
|
||||
// $prots = wfUrlProtocolsWithoutProtRel();
|
||||
// $urlChar = self::EXT_LINK_URL_CLASS;
|
||||
// $addr = self::EXT_LINK_ADDR;
|
||||
// $space = self::SPACE_NOT_NL; # non-newline space
|
||||
// $spdash = "(?:-|$space)"; # a dash or a non-newline space
|
||||
// $spaces = "$space++"; # possessive match of 1 or more spaces
|
||||
// $text = preg_replace_callback(
|
||||
// '!(?: # Start cases
|
||||
// (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
|
||||
// (<.*?>) | # m[2]: Skip stuff inside
|
||||
// # HTML elements' . "
|
||||
// (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
|
||||
// # m[4]: Post-protocol path
|
||||
// \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
|
||||
// ([0-9]+)\b |
|
||||
// \bISBN $spaces ( # m[6]: ISBN, capture number
|
||||
// (?: 97[89] $spdash?)? # optional 13-digit ISBN prefix
|
||||
// (?: [0-9] $spdash?){9} # 9 digits with opt. delimiters
|
||||
// [0-9Xx] # check digit
|
||||
// )\b
|
||||
// )!xu", [ &$this, 'magicLinkCallback' ], $text);
|
||||
// return $text;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @throws MWException
|
||||
// * @param array $m
|
||||
// * @return HTML|String
|
||||
// */
|
||||
// public function magicLinkCallback($m) {
|
||||
// if (isset($m[1]) && $m[1] !== '') {
|
||||
// # Skip anchor
|
||||
// return $m[0];
|
||||
// } elseif (isset($m[2]) && $m[2] !== '') {
|
||||
// # Skip HTML element
|
||||
// return $m[0];
|
||||
// } elseif (isset($m[3]) && $m[3] !== '') {
|
||||
// # Free external link
|
||||
// return this.makeFreeExternalLink($m[0], strlen($m[4]));
|
||||
// } elseif (isset($m[5]) && $m[5] !== '') {
|
||||
// # RFC or PMID
|
||||
// if (substr($m[0], 0, 3) === 'RFC') {
|
||||
// if (!this.mOptions->getMagicRFCLinks()) {
|
||||
// return $m[0];
|
||||
// }
|
||||
// $keyword = 'RFC';
|
||||
// $urlmsg = 'rfcurl';
|
||||
// $cssClass = 'mw-magiclink-rfc';
|
||||
// $trackingCat = 'magiclink-tracking-rfc';
|
||||
// $id = $m[5];
|
||||
// } elseif (substr($m[0], 0, 4) === 'PMID') {
|
||||
// if (!this.mOptions->getMagicPMIDLinks()) {
|
||||
// return $m[0];
|
||||
// }
|
||||
// $keyword = 'PMID';
|
||||
// $urlmsg = 'pubmedurl';
|
||||
// $cssClass = 'mw-magiclink-pmid';
|
||||
// $trackingCat = 'magiclink-tracking-pmid';
|
||||
// $id = $m[5];
|
||||
// } else {
|
||||
// throw new MWException(__METHOD__ . ': unrecognised match type "' .
|
||||
// substr($m[0], 0, 20) . '"');
|
||||
// }
|
||||
// $url = wfMessage($urlmsg, $id)->inContentLanguage()->text();
|
||||
// this.addTrackingCategory($trackingCat);
|
||||
// return Linker::makeExternalLink($url, "{$keyword} {$id}", true, $cssClass, [], this.mTitle);
|
||||
// } elseif (isset($m[6]) && $m[6] !== ''
|
||||
// && this.mOptions->getMagicISBNLinks()
|
||||
// ) {
|
||||
// # ISBN
|
||||
// $isbn = $m[6];
|
||||
// $space = self::SPACE_NOT_NL; # non-newline space
|
||||
// $isbn = preg_replace("/$space/", ' ', $isbn);
|
||||
// $num = strtr($isbn, [
|
||||
// '-' => '',
|
||||
// ' ' => '',
|
||||
// 'x' => 'X',
|
||||
// ]);
|
||||
// this.addTrackingCategory('magiclink-tracking-isbn');
|
||||
// return this.getLinkRenderer()->makeKnownLink(
|
||||
// SpecialPage::getTitleFor('Booksources', $num),
|
||||
// "ISBN $isbn",
|
||||
// [
|
||||
// 'class' => '@gplx.Internal protected mw-magiclink-isbn',
|
||||
// 'title' => false // suppress title attribute
|
||||
// ]
|
||||
// );
|
||||
// } else {
|
||||
// return $m[0];
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
@ -79,7 +79,7 @@ class Xomw_magiclinks_wkr__fxt {
|
||||
public void Test__parse(boolean apos, String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
pbfr.Init(src_bry);
|
||||
wkr.Do_magic_links(pctx, pbfr);
|
||||
wkr.doMagicLinks(pctx, pbfr);
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ package gplx.xowa.mediawiki.includes.parsers.nbsps; import gplx.*; import gplx.x
|
||||
import gplx.core.btries.*;
|
||||
public class Xomw_nbsp_wkr {
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
public void Do_nbsp(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
public void doNbsp(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// PORTED:
|
||||
// Clean up special characters, only run once, next-to-last before doBlockLevels
|
||||
// $fixtags = [
|
||||
|
@ -31,7 +31,7 @@ class Xomw_nbsp_wkr__fxt {
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
pbfr.Init(src_bry);
|
||||
wkr.Do_nbsp(pctx, pbfr);
|
||||
wkr.doNbsp(pctx, pbfr);
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
|
@ -20,10 +20,10 @@ import gplx.core.primitives.*;
|
||||
public class Xomw_quote_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private Bry_bfr tmp;
|
||||
private final Int_list apos_pos_ary = new Int_list(32);
|
||||
public Xomw_quote_wkr(Xomw_parser mgr) {
|
||||
this.tmp = mgr.Tmp();
|
||||
public Xomw_quote_wkr(Bry_bfr tmp) {
|
||||
this.tmp = tmp;
|
||||
}
|
||||
public void Do_all_quotes(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
public void doAllQuotes(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
@ -262,4 +262,203 @@ public class Xomw_quote_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
, State__both = 5
|
||||
;
|
||||
private static final byte[] Wtxt__apos = Bry_.new_a7("''");
|
||||
// /**
|
||||
// * Replace single quotes with HTML markup
|
||||
// * @private
|
||||
// *
|
||||
// * @param String $text
|
||||
// *
|
||||
// * @return String The altered text
|
||||
// */
|
||||
// public function doAllQuotes($text) {
|
||||
// $outtext = '';
|
||||
// $lines = StringUtils::explode("\n", $text);
|
||||
// foreach ($lines as $line) {
|
||||
// $outtext .= this.doQuotes($line) . "\n";
|
||||
// }
|
||||
// $outtext = substr($outtext, 0, -1);
|
||||
// return $outtext;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Helper function for doAllQuotes()
|
||||
// *
|
||||
// * @param String $text
|
||||
// *
|
||||
// * @return String
|
||||
// */
|
||||
// public function doQuotes($text) {
|
||||
// $arr = preg_split("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
// $countarr = count($arr);
|
||||
// if ($countarr == 1) {
|
||||
// return $text;
|
||||
// }
|
||||
//
|
||||
// // First, do some preliminary work. This may shift some apostrophes from
|
||||
// // being mark-up to being text. It also counts the number of occurrences
|
||||
// // of bold and italics mark-ups.
|
||||
// $numbold = 0;
|
||||
// $numitalics = 0;
|
||||
// for ($i = 1; $i < $countarr; $i += 2) {
|
||||
// $thislen = strlen($arr[$i]);
|
||||
// // If there are ever four apostrophes, assume the first is supposed to
|
||||
// // be text, and the remaining three constitute mark-up for bold text.
|
||||
// // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
|
||||
// if ($thislen == 4) {
|
||||
// $arr[$i - 1] .= "'";
|
||||
// $arr[$i] = "'''";
|
||||
// $thislen = 3;
|
||||
// } elseif ($thislen > 5) {
|
||||
// // If there are more than 5 apostrophes in a row, assume they're all
|
||||
// // text except for the last 5.
|
||||
// // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
|
||||
// $arr[$i - 1] .= str_repeat("'", $thislen - 5);
|
||||
// $arr[$i] = "'''''";
|
||||
// $thislen = 5;
|
||||
// }
|
||||
// // Count the number of occurrences of bold and italics mark-ups.
|
||||
// if ($thislen == 2) {
|
||||
// $numitalics++;
|
||||
// } elseif ($thislen == 3) {
|
||||
// $numbold++;
|
||||
// } elseif ($thislen == 5) {
|
||||
// $numitalics++;
|
||||
// $numbold++;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // If there is an odd number of both bold and italics, it is likely
|
||||
// // that one of the bold ones was meant to be an apostrophe followed
|
||||
// // by italics. Which one we cannot know for certain, but it is more
|
||||
// // likely to be one that has a single-letter word before it.
|
||||
// if (($numbold % 2 == 1) && ($numitalics % 2 == 1)) {
|
||||
// $firstsingleletterword = -1;
|
||||
// $firstmultiletterword = -1;
|
||||
// $firstspace = -1;
|
||||
// for ($i = 1; $i < $countarr; $i += 2) {
|
||||
// if (strlen($arr[$i]) == 3) {
|
||||
// $x1 = substr($arr[$i - 1], -1);
|
||||
// $x2 = substr($arr[$i - 1], -2, 1);
|
||||
// if ($x1 === ' ') {
|
||||
// if ($firstspace == -1) {
|
||||
// $firstspace = $i;
|
||||
// }
|
||||
// } elseif ($x2 === ' ') {
|
||||
// $firstsingleletterword = $i;
|
||||
// // if $firstsingleletterword is set, we don't
|
||||
// // look at the other options, so we can bail early.
|
||||
// break;
|
||||
// } else {
|
||||
// if ($firstmultiletterword == -1) {
|
||||
// $firstmultiletterword = $i;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // If there is a single-letter word, use it!
|
||||
// if ($firstsingleletterword > -1) {
|
||||
// $arr[$firstsingleletterword] = "''";
|
||||
// $arr[$firstsingleletterword - 1] .= "'";
|
||||
// } elseif ($firstmultiletterword > -1) {
|
||||
// // If not, but there's a multi-letter word, use that one.
|
||||
// $arr[$firstmultiletterword] = "''";
|
||||
// $arr[$firstmultiletterword - 1] .= "'";
|
||||
// } elseif ($firstspace > -1) {
|
||||
// // ... otherwise use the first one that has neither.
|
||||
// // (notice that it is possible for all three to be -1 if, for example,
|
||||
// // there is only one pentuple-apostrophe in the line)
|
||||
// $arr[$firstspace] = "''";
|
||||
// $arr[$firstspace - 1] .= "'";
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // Now let's actually convert our apostrophic mush to HTML!
|
||||
// $output = '';
|
||||
// $buffer = '';
|
||||
// $state = '';
|
||||
// $i = 0;
|
||||
// foreach ($arr as $r) {
|
||||
// if (($i % 2) == 0) {
|
||||
// if ($state === 'both') {
|
||||
// $buffer .= $r;
|
||||
// } else {
|
||||
// $output .= $r;
|
||||
// }
|
||||
// } else {
|
||||
// $thislen = strlen($r);
|
||||
// if ($thislen == 2) {
|
||||
// if ($state === 'i') {
|
||||
// $output .= '</i>';
|
||||
// $state = '';
|
||||
// } elseif ($state === 'bi') {
|
||||
// $output .= '</i>';
|
||||
// $state = 'b';
|
||||
// } elseif ($state === 'ib') {
|
||||
// $output .= '</b></i><b>';
|
||||
// $state = 'b';
|
||||
// } elseif ($state === 'both') {
|
||||
// $output .= '<b><i>' . $buffer . '</i>';
|
||||
// $state = 'b';
|
||||
// } else { // $state can be 'b' or ''
|
||||
// $output .= '<i>';
|
||||
// $state .= 'i';
|
||||
// }
|
||||
// } elseif ($thislen == 3) {
|
||||
// if ($state === 'b') {
|
||||
// $output .= '</b>';
|
||||
// $state = '';
|
||||
// } elseif ($state === 'bi') {
|
||||
// $output .= '</i></b><i>';
|
||||
// $state = 'i';
|
||||
// } elseif ($state === 'ib') {
|
||||
// $output .= '</b>';
|
||||
// $state = 'i';
|
||||
// } elseif ($state === 'both') {
|
||||
// $output .= '<i><b>' . $buffer . '</b>';
|
||||
// $state = 'i';
|
||||
// } else { // $state can be 'i' or ''
|
||||
// $output .= '<b>';
|
||||
// $state .= 'b';
|
||||
// }
|
||||
// } elseif ($thislen == 5) {
|
||||
// if ($state === 'b') {
|
||||
// $output .= '</b><i>';
|
||||
// $state = 'i';
|
||||
// } elseif ($state === 'i') {
|
||||
// $output .= '</i><b>';
|
||||
// $state = 'b';
|
||||
// } elseif ($state === 'bi') {
|
||||
// $output .= '</i></b>';
|
||||
// $state = '';
|
||||
// } elseif ($state === 'ib') {
|
||||
// $output .= '</b></i>';
|
||||
// $state = '';
|
||||
// } elseif ($state === 'both') {
|
||||
// $output .= '<i><b>' . $buffer . '</b></i>';
|
||||
// $state = '';
|
||||
// } else { // ($state == '')
|
||||
// $buffer = '';
|
||||
// $state = 'both';
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// $i++;
|
||||
// }
|
||||
// // Now close all remaining tags. Notice that the order is important.
|
||||
// if ($state === 'b' || $state === 'ib') {
|
||||
// $output .= '</b>';
|
||||
// }
|
||||
// if ($state === 'i' || $state === 'bi' || $state === 'ib') {
|
||||
// $output .= '</i>';
|
||||
// }
|
||||
// if ($state === 'bi') {
|
||||
// $output .= '</b>';
|
||||
// }
|
||||
// // There might be lonely ''''', so make sure we have a buffer
|
||||
// if ($state === 'both' && $buffer) {
|
||||
// $output .= '<b><i>' . $buffer . '</i></b>';
|
||||
// }
|
||||
// return $output;
|
||||
// }
|
||||
}
|
||||
|
@ -33,11 +33,11 @@ public class Xomw_quote_wkr__tst {
|
||||
@Test public void Nl__text() {fxt.Test__parse("a\nb''c''d\n\ne" , "a\nb<i>c</i>d\n\ne");}
|
||||
}
|
||||
class Xomw_quote_wkr__fxt {
|
||||
private final Xomw_quote_wkr wkr = new Xomw_quote_wkr(new Xomw_parser());
|
||||
private final Xomw_quote_wkr wkr = new Xomw_quote_wkr(Bry_bfr_.New());
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Do_all_quotes(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
wkr.doAllQuotes(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ import gplx.xowa.mediawiki.includes.libs.*; import gplx.xowa.parsers.uniqs.*;
|
||||
public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private final Bry_bfr tmp;
|
||||
private Bry_bfr bfr;
|
||||
private final XomwSanitizer sanitizer; private final Xomw_strip_state strip_state;
|
||||
private final XomwSanitizer sanitizer; private final XomwStripState strip_state;
|
||||
private final List_adp
|
||||
td_history = List_adp_.New() // Is currently a td tag open?
|
||||
, last_tag_history = List_adp_.New() // Save history of last lag activated (td, th or caption)
|
||||
@ -30,12 +30,12 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
;
|
||||
private int indent_level = 0; // indent level of the table
|
||||
private byte[] first_2 = new byte[2];
|
||||
public Xomw_table_wkr(Xomw_parser parser) {
|
||||
this.tmp = parser.Tmp();
|
||||
this.sanitizer = parser.Sanitizer();
|
||||
this.strip_state = parser.Strip_state();
|
||||
public Xomw_table_wkr(Bry_bfr tmp, XomwSanitizer sanitizer, XomwStripState stripState) {
|
||||
this.tmp = tmp;
|
||||
this.sanitizer = sanitizer;
|
||||
this.strip_state = stripState;
|
||||
}
|
||||
public void Do_table_stuff(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
public void doTableStuff(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
@ -101,7 +101,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
// First check if we are starting a new table
|
||||
indent_level = colons_end;
|
||||
|
||||
tblw_atrs = strip_state.Unstrip_both(tblw_atrs);
|
||||
tblw_atrs = strip_state.unstripBoth(tblw_atrs);
|
||||
|
||||
// PORTED: out_line = str_repeat('<dl><dd>', $indent_level) . "<table{atrs}>";
|
||||
for (int j = 0; j < indent_level; j++)
|
||||
@ -149,7 +149,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line);
|
||||
|
||||
// Whats after the tag is now only attributes
|
||||
byte[] atrs = strip_state.Unstrip_both(line);
|
||||
byte[] atrs = strip_state.unstripBoth(line);
|
||||
sanitizer.Fix_tag_attributes(tmp, Name__tr, atrs);
|
||||
atrs = tmp.To_bry_and_clear();
|
||||
|
||||
@ -251,7 +251,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
cell = tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell_data_0).To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
byte[] atrs = strip_state.Unstrip_both(cell_data_0);
|
||||
byte[] atrs = strip_state.unstripBoth(cell_data_0);
|
||||
tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag);
|
||||
sanitizer.Fix_tag_attributes(tmp, last_tag, atrs);
|
||||
tmp.Add_byte(Byte_ascii.Angle_end).Add(cell_data_1);
|
||||
@ -265,6 +265,197 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
|
||||
bfr.Add(out_line).Add_byte_nl();
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
// public function doTableStuff($text) {
|
||||
//
|
||||
// $lines = StringUtils::explode("\n", $text);
|
||||
// $out = '';
|
||||
// $td_history = []; # Is currently a td tag open?
|
||||
// $last_tag_history = []; # Save history of last lag activated (td, th or caption)
|
||||
// $tr_history = []; # Is currently a tr tag open?
|
||||
// $tr_attributes = []; # history of tr attributes
|
||||
// $has_opened_tr = []; # Did this table open a <tr> element?
|
||||
// $indent_level = 0; # indent level of the table
|
||||
//
|
||||
// foreach ($lines as $outLine) {
|
||||
// $line = trim($outLine);
|
||||
//
|
||||
// if ($line === '') { # empty line, go to next line
|
||||
// $out .= $outLine . "\n";
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// $first_character = $line[0];
|
||||
// $first_two = substr($line, 0, 2);
|
||||
// $matches = [];
|
||||
//
|
||||
// if (preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)) {
|
||||
// # First check if we are starting a new table
|
||||
// $indent_level = strlen($matches[1]);
|
||||
//
|
||||
// $attributes = this.mStripState->unstripBoth($matches[2]);
|
||||
// $attributes = Sanitizer::fixTagAttributes($attributes, 'table');
|
||||
//
|
||||
// $outLine = str_repeat('<dl><dd>', $indent_level) . "<table{$attributes}>";
|
||||
// array_push($td_history, false);
|
||||
// array_push($last_tag_history, '');
|
||||
// array_push($tr_history, false);
|
||||
// array_push($tr_attributes, '');
|
||||
// array_push($has_opened_tr, false);
|
||||
// } elseif (count($td_history) == 0) {
|
||||
// # Don't do any of the following
|
||||
// $out .= $outLine . "\n";
|
||||
// continue;
|
||||
// } elseif ($first_two === '|}') {
|
||||
// # We are ending a table
|
||||
// $line = '</table>' . substr($line, 2);
|
||||
// $last_tag = array_pop($last_tag_history);
|
||||
//
|
||||
// if (!array_pop($has_opened_tr)) {
|
||||
// $line = "<tr><td></td></tr>{$line}";
|
||||
// }
|
||||
//
|
||||
// if (array_pop($tr_history)) {
|
||||
// $line = "</tr>{$line}";
|
||||
// }
|
||||
//
|
||||
// if (array_pop($td_history)) {
|
||||
// $line = "</{$last_tag}>{$line}";
|
||||
// }
|
||||
// array_pop($tr_attributes);
|
||||
// $outLine = $line . str_repeat('</dd></dl>', $indent_level);
|
||||
// } elseif ($first_two === '|-') {
|
||||
// # Now we have a table row
|
||||
// $line = preg_replace('#^\|-+#', '', $line);
|
||||
//
|
||||
// # Whats after the tag is now only attributes
|
||||
// $attributes = this.mStripState->unstripBoth($line);
|
||||
// $attributes = Sanitizer::fixTagAttributes($attributes, 'tr');
|
||||
// array_pop($tr_attributes);
|
||||
// array_push($tr_attributes, $attributes);
|
||||
//
|
||||
// $line = '';
|
||||
// $last_tag = array_pop($last_tag_history);
|
||||
// array_pop($has_opened_tr);
|
||||
// array_push($has_opened_tr, true);
|
||||
//
|
||||
// if (array_pop($tr_history)) {
|
||||
// $line = '</tr>';
|
||||
// }
|
||||
//
|
||||
// if (array_pop($td_history)) {
|
||||
// $line = "</{$last_tag}>{$line}";
|
||||
// }
|
||||
//
|
||||
// $outLine = $line;
|
||||
// array_push($tr_history, false);
|
||||
// array_push($td_history, false);
|
||||
// array_push($last_tag_history, '');
|
||||
// } elseif ($first_character === '|'
|
||||
// || $first_character === '!'
|
||||
// || $first_two === '|+'
|
||||
// ) {
|
||||
// # This might be cell elements, td, th or captions
|
||||
// if ($first_two === '|+') {
|
||||
// $first_character = '+';
|
||||
// $line = substr($line, 2);
|
||||
// } else {
|
||||
// $line = substr($line, 1);
|
||||
// }
|
||||
//
|
||||
// // Implies both are valid for table headings.
|
||||
// if ($first_character === '!') {
|
||||
// $line = StringUtils::replaceMarkup('!!', '||', $line);
|
||||
// }
|
||||
//
|
||||
// # Split up multiple cells on the same line.
|
||||
// # FIXME : This can result in improper nesting of tags processed
|
||||
// # by earlier parser steps.
|
||||
// $cells = explode('||', $line);
|
||||
//
|
||||
// $outLine = '';
|
||||
//
|
||||
// # Loop through each table cell
|
||||
// foreach ($cells as $cell) {
|
||||
// $previous = '';
|
||||
// if ($first_character !== '+') {
|
||||
// $tr_after = array_pop($tr_attributes);
|
||||
// if (!array_pop($tr_history)) {
|
||||
// $previous = "<tr{$tr_after}>\n";
|
||||
// }
|
||||
// array_push($tr_history, true);
|
||||
// array_push($tr_attributes, '');
|
||||
// array_pop($has_opened_tr);
|
||||
// array_push($has_opened_tr, true);
|
||||
// }
|
||||
//
|
||||
// $last_tag = array_pop($last_tag_history);
|
||||
//
|
||||
// if (array_pop($td_history)) {
|
||||
// $previous = "</{$last_tag}>\n{$previous}";
|
||||
// }
|
||||
//
|
||||
// if ($first_character === '|') {
|
||||
// $last_tag = 'td';
|
||||
// } elseif ($first_character === '!') {
|
||||
// $last_tag = 'th';
|
||||
// } elseif ($first_character === '+') {
|
||||
// $last_tag = 'caption';
|
||||
// } else {
|
||||
// $last_tag = '';
|
||||
// }
|
||||
//
|
||||
// array_push($last_tag_history, $last_tag);
|
||||
//
|
||||
// # A cell could contain both parameters and data
|
||||
// $cell_data = explode('|', $cell, 2);
|
||||
//
|
||||
// # T2553: Note that a '|' inside an invalid link should not
|
||||
// # be mistaken as delimiting cell parameters
|
||||
// # Bug T153140: Neither should language converter markup.
|
||||
// if (preg_match('/\[\[|-\{/', $cell_data[0]) === 1) {
|
||||
// $cell = "{$previous}<{$last_tag}>{$cell}";
|
||||
// } elseif (count($cell_data) == 1) {
|
||||
// $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
|
||||
// } else {
|
||||
// $attributes = this.mStripState->unstripBoth($cell_data[0]);
|
||||
// $attributes = Sanitizer::fixTagAttributes($attributes, $last_tag);
|
||||
// $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
|
||||
// }
|
||||
//
|
||||
// $outLine .= $cell;
|
||||
// array_push($td_history, true);
|
||||
// }
|
||||
// }
|
||||
// $out .= $outLine . "\n";
|
||||
// }
|
||||
//
|
||||
// # Closing open td, tr && table
|
||||
// while (count($td_history) > 0) {
|
||||
// if (array_pop($td_history)) {
|
||||
// $out .= "</td>\n";
|
||||
// }
|
||||
// if (array_pop($tr_history)) {
|
||||
// $out .= "</tr>\n";
|
||||
// }
|
||||
// if (!array_pop($has_opened_tr)) {
|
||||
// $out .= "<tr><td></td></tr>\n";
|
||||
// }
|
||||
//
|
||||
// $out .= "</table>\n";
|
||||
// }
|
||||
//
|
||||
// # Remove trailing line-ending (b/c)
|
||||
// if (substr($out, -1) === "\n") {
|
||||
// $out = substr($out, 0, -1);
|
||||
// }
|
||||
//
|
||||
// # special case: don't return empty table
|
||||
// if ($out === "<table>\n<tr><td></td></tr>\n</table>") {
|
||||
// $out = '';
|
||||
// }
|
||||
//
|
||||
// return $out;
|
||||
// }
|
||||
private static final byte[]
|
||||
Wtxt__tb__bgn = Bry_.new_a7("{|")
|
||||
, Wtxt__tb__end = Bry_.new_a7("|}")
|
||||
|
@ -117,11 +117,16 @@ public class Xomw_table_wkr__tst {
|
||||
class Xomw_table_wkr__fxt {
|
||||
private final Xomw_parser_bfr parser_bfr = new Xomw_parser_bfr();
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
private final Xomw_table_wkr wkr = new Xomw_table_wkr(new Xomw_parser());
|
||||
private final Xomw_table_wkr wkr;
|
||||
public Xomw_table_wkr__fxt() {
|
||||
Xomw_parser parser = new Xomw_parser();
|
||||
this.wkr = new Xomw_table_wkr(parser.Tmp(), parser.Sanitizer(), parser.Strip_state());
|
||||
}
|
||||
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
parser_bfr.Init(src_bry);
|
||||
wkr.Do_table_stuff(pctx, parser_bfr);
|
||||
wkr.doTableStuff(pctx, parser_bfr);
|
||||
Tfds.Eq_str_lines(expd, parser_bfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user