1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Xomw: Convert Parser; also support caption

This commit is contained in:
gnosygnu 2017-02-22 22:46:56 -05:00
parent b6abbf2ffe
commit 09dbfc894e
29 changed files with 1872 additions and 1481 deletions

View File

@ -332,8 +332,7 @@ public class XomwLinker {
// @since 1.20
// @return String HTML for an image, with links, wrappers, etc.
// XO.MW:SYNC:1.29; DATE:2017-02-08
public void makeImageLink(Bry_bfr bfr, Xomw_parser_ctx pctx, Xomw_parser parser, XomwTitle title, XomwFile file, Xomw_params_frame frameParams, Xomw_params_handler handlerParams, Object time, byte[] query, int widthOption) {
Xomw_parser_env env = parser.Env();
public void makeImageLink(Bry_bfr bfr, Xomw_parser_env env, Xomw_parser_ctx pctx, XomwParserIface parser, XomwTitle title, XomwFile file, Xomw_params_frame frameParams, Xomw_params_handler handlerParams, Object time, byte[] query, int widthOption) {
// XO.MW.HOOK:ImageBeforeProduceHTML
if (file != null && !file.allowInlineDisplay()) {
@ -413,7 +412,7 @@ public class XomwLinker {
// If a thumbnail width has not been provided, it is set
// to the default user option as specified in Language*.php
if (frameParams.align == Bry_.Empty) {
frameParams.align = parser.Env().Lang__align_end;
frameParams.align = env.Lang__align_end;
}
bfr.Add(prefix);
makeThumbLink2(bfr, env, pctx, title, file, frameParams, handlerParams, time, query);
@ -482,7 +481,7 @@ public class XomwLinker {
// @param Parser|null $parser
// @return array
// XO.MW:SYNC:1.29; DATE:2017-02-08
private static void getImageLinkMTOParams(Xomw_params_mto rv, Xomw_params_frame frameParams, byte[] query, Xomw_parser parser) {
private static void getImageLinkMTOParams(Xomw_params_mto rv, Xomw_params_frame frameParams, byte[] query, XomwParserIface parser) {
if (Php_utl_.isset(frameParams.link_url) && frameParams.link_url != Bry_.Empty) {
rv.custom_url_link = frameParams.link_url;
if (Php_utl_.isset(frameParams.link_target)) {

View File

@ -31,13 +31,13 @@ public class XomwLinkHolderArray {
/**
* @var Parser
*/
private final Xomw_parser parent;
private final XomwParserIface parent;
// protected $tempIdOffset;
/**
* @param Parser $parent
*/
public XomwLinkHolderArray(Xomw_parser parent) {
public XomwLinkHolderArray(XomwParserIface parent) {
this.parent = parent;
}
@ -261,18 +261,22 @@ public class XomwLinkHolderArray {
*
* @param String $text
*/
public void replace(Xomw_parser_bfr pbfr) {
this.replaceInternal(pbfr);
public boolean replace(Xomw_parser_bfr pbfr) {
return this.replaceInternal(pbfr);
// $this->replaceInterwiki( $text );
}
public byte[] replace(Xomw_parser_bfr pbfr, byte[] text) {
boolean rv = this.replace(pbfr.Init(text));
return rv ? pbfr.Trg().To_bry_and_clear() : pbfr.Src().To_bry_and_clear();
}
/**
* Replace @gplx.Internal protected links
* @param String $text
*/
private void replaceInternal(Xomw_parser_bfr pbfr) {
private boolean replaceInternal(Xomw_parser_bfr pbfr) {
if (internals.Len() == 0) {
return;
return false;
}
// SKIP:Replace_internals does db lookup to identify redlinks;
@ -430,6 +434,7 @@ public class XomwLinkHolderArray {
// $replacer->cb(),
// $text
// );
return true;
}
// /**

View File

@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.xowa.mediawiki.includes.htmls.*;
import gplx.xowa.mediawiki.includes.linkers.*;
public interface XomwParserIface {
int nextLinkID();
XomwParserOptions getOptions();
Xomw_link_renderer getLinkRenderer();
byte[] armorLinks(Bry_bfr trg, byte[] src, int src_bgn, int src_end);
Xomw_atr_mgr getExternalLinkAttribs(Xomw_atr_mgr atrs);
byte[] stripAltText(byte[] caption, XomwLinkHolderArray holders);
}

View File

@ -14,8 +14,8 @@ GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
public class Xomw_parser_options {
public Xomw_parser_options() {
public class XomwParserOptions {
public XomwParserOptions() {
this.mThumbSize = 220;
}
// /**

View File

@ -15,8 +15,8 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import org.junit.*;
public class Xomw_parser__tst {
private final Xomw_parser__fxt fxt = new Xomw_parser__fxt();
public class XomwParserTest {
private final XomwParserFxt fxt = new XomwParserFxt();
@Test public void Basic() {
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "== heading_1 =="
@ -57,19 +57,21 @@ public class Xomw_parser__tst {
));
}
}
class Xomw_parser__fxt {
private final Xomw_parser mgr = new Xomw_parser();
class XomwParserFxt {
private final XomwParser parser = new XomwParser();
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
public Xomw_parser__fxt() {
public XomwParserFxt() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
mgr.Init_by_wiki(wiki);
mgr.Init_by_page(XomwTitle.newFromText(Bry_.new_a7("Page_1")));
parser.Init_by_wiki(wiki);
parser.Init_by_page(XomwTitle.newFromText(Bry_.new_a7("Page_1")));
pctx.Init_by_page(XomwTitle.newFromText(Bry_.new_a7("Page_1")));
}
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
mgr.Internal_parse(pbfr, src_bry);
mgr.Internal_parse_half_parsed(pbfr, true, true);
parser.internalParse(pbfr, pctx, src_bry);
parser.internalParseHalfParsed(pbfr, pctx, true, true);
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
}

View File

@ -0,0 +1,346 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.core.btries.*;
public class XomwStripState {
// protected $prefix;
// protected $data;
// protected $regex;
//
// protected $tempType, $tempMergePrefix;
// protected $circularRefGuard;
// protected $recursionLevel = 0;
//
// static final UNSTRIP_RECURSION_LIMIT = 20;
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private final Btrie_rv trv = new Btrie_rv();
private final Bry_bfr tmp_1 = Bry_bfr_.New();
private final Bry_bfr tmp_2 = Bry_bfr_.New();
private boolean tmp_2_used = false;
private int generalLen, nowikiLen;
// /**
// * @param String|null $prefix
// * @since 1.26 The prefix argument should be omitted, as the strip marker
// * prefix String is now a constant.
// */
// public function __construct($prefix = null) {
// if ($prefix !== null) {
// wfDeprecated(__METHOD__ . ' with called with $prefix argument' .
// ' (call with no arguments instead)', '1.26');
// }
// this.data = [
// 'nowiki' => [],
// 'general' => []
// ];
// this.regex = '/' . Parser::MARKER_PREFIX . "([^\x7f<>&'\"]+)" . Parser::MARKER_SUFFIX . '/';
// this.circularRefGuard = [];
// }
// public void Clear() {
// trie.Clear();
// generalLen = nowikiLen = 0;
// tmp_2_used = false;
// }
/**
* Add a nowiki strip item
* @param String $marker
* @param String $value
*/
public void addNoWiki(byte[] marker, byte[] val) {
this.addItem(TYPE_NOWIKI, marker, val);
}
/**
* @param String $marker
* @param String $value
*/
public void addGeneral(byte[] marker, byte[] val) {
this.addItem(TYPE_GENERAL, marker, val);
}
/**
* @throws MWException
* @param String $type
* @param String $marker
* @param String $value
*/
public void addItem(byte type, byte[] marker, byte[] val) {
// if (!preg_match(this.regex, $marker, $m)) {
// throw new MWException("Invalid marker: $marker");
// }
// XO.MW:ported
// this.data[$type][$m[1]] = $value;
trie.Add_obj(marker, new XomwStripItem(type, marker, val));
if (type == TYPE_GENERAL)
generalLen++;
else
nowikiLen++;
}
/**
* @param String $text
* @return mixed
*/
public byte[] unstripGeneral(byte[] text) {
return this.unstripType(TYPE_GENERAL, text);
}
/**
* @param String $text
* @return mixed
*/
public byte[] unstripNoWiki(byte[] text) {
return this.unstripType(TYPE_NOWIKI, text);
}
/**
* @param String $text
* @return mixed
*/
public byte[] unstripBoth(byte[] text) {
// $text = this.unstripType('general', $text);
// $text = this.unstripType('nowiki', $text);
return this.unstripType(TYPE_BOTH, text);
}
public byte[] unstripType(byte tid, byte[] text) {
boolean dirty = unstripType(tid, tmp_1, text, 0, text.length);
return dirty ? tmp_1.To_bry_and_clear() : text;
}
// XOWA
public void unstripGeneral(Xomw_parser_bfr pbfr) {unstripType(TYPE_GENERAL, pbfr);}
public void unstripNoWiki(Xomw_parser_bfr pbfr) {unstripType(TYPE_NOWIKI , pbfr);}
public void unstripBoth(Xomw_parser_bfr pbfr) {unstripType(TYPE_BOTH , pbfr);}
private boolean unstripType(byte tid, Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
boolean dirty = unstripType(tid, pbfr.Trg(), src, 0, src_bfr.Len());
if (dirty)
pbfr.Switch();
return dirty;
}
/**
* @param String $type
* @param String $text
* @return mixed
*/
private boolean unstripType(byte tid, Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
// // Shortcut
// if (!count(this.data[$type])) {
// return $text;
// }
// exit early if no items for type
if ((tid & TYPE_GENERAL) == TYPE_GENERAL) {
if (generalLen == 0)
return false;
}
else if ((tid & TYPE_NOWIKI) == TYPE_NOWIKI) {
if (nowikiLen == 0)
return false;
}
// XO.MW:PORTED
// $oldType = this.tempType;
// this.tempType = $type;
// $text = preg_replace_callback(this.regex, [ $this, 'unstripCallback' ], $text);
// this.tempType = $oldType;
// return $text;
int cur = src_bgn;
int prv = cur;
boolean dirty = false;
// loop over each src char
while (true) {
// EOS: exit
if (cur == src_end) {
if (dirty) // add remainder if dirty
trg.Add_mid(src, prv, src_end);
break;
}
// check if current pos matches strip state
Object o = trie.Match_at(trv, src, cur, src_end);
if (o != null) { // match
XomwStripItem item = (XomwStripItem)o;
byte item_tid = item.Type();
if ((tid & item_tid) == item_tid) { // check if types match
// get bfr for recursion
Bry_bfr nested_bfr = null;
boolean tmp_2_release = false;
if (tmp_2_used) {
nested_bfr = Bry_bfr_.New();
}
else {
nested_bfr = tmp_2;
tmp_2_used = true;
tmp_2_release = true;
}
// recurse
byte[] item_val = item.Val();
if (unstripType(tid, nested_bfr, item_val, 0, item_val.length))
item_val = nested_bfr.To_bry_and_clear();
if (tmp_2_release)
tmp_2_used = false;
// add to trg
trg.Add_mid(src, prv, cur);
trg.Add(item_val);
// update vars
dirty = true;
cur += item.Key().length;
prv = cur;
continue;
}
}
cur++;
}
return dirty;
}
// /**
// * @param array $m
// * @return array
// */
// protected function unstripCallback($m) {
// $marker = $m[1];
// if (isset(this.data[this.tempType][$marker])) {
// if (isset(this.circularRefGuard[$marker])) {
// return '<span class="error">'
// . wfMessage('parser-unstrip-loop-warning')->inContentLanguage()->text()
// . '</span>';
// }
// if (this.recursionLevel >= self::UNSTRIP_RECURSION_LIMIT) {
// return '<span class="error">' .
// wfMessage('parser-unstrip-recursion-limit')
// ->numParams(self::UNSTRIP_RECURSION_LIMIT)->inContentLanguage()->text() .
// '</span>';
// }
// this.circularRefGuard[$marker] = true;
// this.recursionLevel++;
// $value = this.data[this.tempType][$marker];
// if ($value instanceof Closure) {
// $value = $value();
// }
// $ret = this.unstripType(this.tempType, $value);
// this.recursionLevel--;
// unset(this.circularRefGuard[$marker]);
// return $ret;
// } else {
// return $m[0];
// }
// }
// /**
// * Get a StripState Object which is sufficient to unstrip the given text.
// * It will contain the minimum subset of strip items necessary.
// *
// * @param String $text
// *
// * @return StripState
// */
// public function getSubState($text) {
// $subState = new StripState();
// $pos = 0;
// while (true) {
// $startPos = strpos($text, Parser::MARKER_PREFIX, $pos);
// $endPos = strpos($text, Parser::MARKER_SUFFIX, $pos);
// if ($startPos === false || $endPos === false) {
// break;
// }
//
// $endPos += strlen(Parser::MARKER_SUFFIX);
// $marker = substr($text, $startPos, $endPos - $startPos);
// if (!preg_match(this.regex, $marker, $m)) {
// continue;
// }
//
// $key = $m[1];
// if (isset(this.data['nowiki'][$key])) {
// $subState->data['nowiki'][$key] = this.data['nowiki'][$key];
// } elseif (isset(this.data['general'][$key])) {
// $subState->data['general'][$key] = this.data['general'][$key];
// }
// $pos = $endPos;
// }
// return $subState;
// }
//
// /**
// * Merge another StripState Object into this one. The strip marker keys
// * will not be preserved. The strings in the $texts array will have their
// * strip markers rewritten, the resulting array of strings will be returned.
// *
// * @param StripState $otherState
// * @param array $texts
// * @return array
// */
// public function merge($otherState, $texts) {
// $mergePrefix = wfRandomString(16);
//
// foreach ($otherState->data as $type => $items) {
// foreach ($items as $key => $value) {
// this.data[$type]["$mergePrefix-$key"] = $value;
// }
// }
//
// this.tempMergePrefix = $mergePrefix;
// $texts = preg_replace_callback($otherState->regex, [ $this, 'mergeCallback' ], $texts);
// this.tempMergePrefix = null;
// return $texts;
// }
//
// /**
// * @param array $m
// * @return String
// */
// protected function mergeCallback($m) {
// $key = $m[1];
// return Parser::MARKER_PREFIX . this.tempMergePrefix . '-' . $key . Parser::MARKER_SUFFIX;
// }
//
// /**
// * Remove any strip markers found in the given text.
// *
// * @param String $text Input String
// * @return String
// */
// public function killMarkers($text) {
// return preg_replace(this.regex, '', $text);
// }
public static final String Str__marker_bgn = "\u007f'\"`UNIQ-";
public static final byte[]
Bry__marker__bgn = Bry_.new_a7(Str__marker_bgn)
, Bry__marker__end = Bry_.new_a7("-QINU`\"'\u007f")
;
public static final byte TYPE_GENERAL = 1, TYPE_NOWIKI = 2, TYPE_BOTH = 3;
}
class XomwStripItem {
public XomwStripItem(byte tid, byte[] key, byte[] val) {
this.tid = tid;
this.key = key;
this.val = val;
}
public byte Type() {return tid;} private final byte tid;
public byte[] Key() {return key;} private final byte[] key;
public byte[] Val() {return val;} private final byte[] val;
}

View File

@ -15,28 +15,28 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import org.junit.*; import gplx.core.tests.*;
public class Xomw_strip_state__tst {
private final Xomw_strip_state__fxt fxt = new Xomw_strip_state__fxt();
public class XomwStripStateTest {
private final XomwStripStateFxt fxt = new XomwStripStateFxt();
@Test public void Basic() {
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
fxt.Test__nostrip(Xomw_strip_state.Tid__nowiki , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b");
fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
fxt.Test__unstrip(Xomw_strip_state.Tid__both , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
fxt.Init__add (XomwStripState.TYPE_GENERAL, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
fxt.Test__nostrip(XomwStripState.TYPE_NOWIKI , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b");
fxt.Test__unstrip(XomwStripState.TYPE_GENERAL, "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
fxt.Test__unstrip(XomwStripState.TYPE_BOTH , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
}
@Test public void Recurse() {
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-2-QINU`\"'\u007f", "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f");
fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-2-QINU`\"'\u007f b", "a val-1 b");
fxt.Init__add (XomwStripState.TYPE_GENERAL, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
fxt.Init__add (XomwStripState.TYPE_GENERAL, "\u007f'\"`UNIQ-key-2-QINU`\"'\u007f", "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f");
fxt.Test__unstrip(XomwStripState.TYPE_GENERAL, "a \u007f'\"`UNIQ-key-2-QINU`\"'\u007f b", "a val-1 b");
}
}
class Xomw_strip_state__fxt {
private final Xomw_strip_state strip_state = new Xomw_strip_state();
class XomwStripStateFxt {
private final XomwStripState stripState = new XomwStripState();
public void Init__add(byte tid, String marker, String val) {
strip_state.Add_item(tid, Bry_.new_u8(marker), Bry_.new_u8(val));
stripState.addItem(tid, Bry_.new_u8(marker), Bry_.new_u8(val));
}
public void Test__nostrip(byte tid, String src) {Test__unstrip(tid, src, src);}
public void Test__unstrip(byte tid, String src, String expd) {
byte[] actl = strip_state.Unstrip(tid, Bry_.new_u8(src));
byte[] actl = stripState.unstripType(tid, Bry_.new_u8(src));
Gftest.Eq__str(expd, String_.new_u8(actl));
}
}

View File

@ -24,7 +24,7 @@ public class Xomw_block_level_pass {
private int last_section;
private byte[] find_colon_no_links__before, find_colon_no_links__after;
public void Do_block_levels(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, boolean line_start) {
public void doBlockLevels(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, boolean line_start) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
@ -41,7 +41,7 @@ public class Xomw_block_level_pass {
("<table", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6", "<pre", "<tr", "<p", "<ul", "<ol", "<dl", "<li", "</tr", "</td", "</th");
close_match_trie = Btrie_slim_mgr.ci_a7().Add_many_str
( "</table", "</h1", "</h2", "</h3", "</h4", "</h5", "</h6", "<td", "<th", "<blockquote", "</blockquote", "<div", "</div", "<hr"
, "</pre", "</p", "</mw:", Xomw_strip_state.Str__marker_bgn + "-pre", "</li", "</ul", "</ol", "</dl", "<center", "</center");
, "</pre", "</p", "</mw:", XomwStripState.Str__marker_bgn + "-pre", "</li", "</ul", "</ol", "</dl", "<center", "</center");
blockquote_trie = Btrie_slim_mgr.ci_a7().Add_many_str("<blockquote", "</blockquote");
pre_trie = Btrie_slim_mgr.ci_a7().Add_str_int("<pre", Pre__bgn).Add_str_int("</pre", Pre__end);
}

View File

@ -34,7 +34,7 @@ class Xomw_block_level_pass__fxt {
private boolean apos = true;
public void Test__do_block_levels(String src, String expd) {
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
block_level_pass.Do_block_levels(pctx, pbfr.Init(Bry_.new_u8(src)), true);
block_level_pass.doBlockLevels(pctx, pbfr.Init(Bry_.new_u8(src)), true);
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear());
}
}

View File

@ -20,7 +20,7 @@ import gplx.xowa.mediawiki.includes.parsers.quotes.*; import gplx.xowa.mediawiki
import gplx.xowa.mediawiki.includes.parsers.lnkes.*; import gplx.xowa.mediawiki.includes.parsers.lnkis.*; import gplx.xowa.mediawiki.includes.parsers.magiclinks.*; import gplx.xowa.mediawiki.includes.parsers.doubleunders.*;
import gplx.xowa.mediawiki.includes.utls.*; import gplx.xowa.mediawiki.includes.linkers.*;
import gplx.xowa.mediawiki.includes.htmls.*;
public class Xomw_parser {
public class Xomw_parser implements XomwParserIface {
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
private final Xomw_table_wkr table_wkr;
private final Xomw_hr_wkr hr_wkr = new Xomw_hr_wkr();
@ -42,8 +42,8 @@ public class Xomw_parser {
private int marker_index = 0;
// private final Xomw_prepro_wkr prepro_wkr = new Xomw_prepro_wkr();
public Xomw_parser_env Env() {return env;} private final Xomw_parser_env env = new Xomw_parser_env();
public Xomw_parser_options Options() {return options;} private final Xomw_parser_options options = new Xomw_parser_options();
public Xomw_strip_state Strip_state() {return strip_state;} private final Xomw_strip_state strip_state = new Xomw_strip_state();
public XomwParserOptions getOptions() {return options;} private final XomwParserOptions options = new XomwParserOptions();
public XomwStripState Strip_state() {return strip_state;} private final XomwStripState strip_state = new XomwStripState();
public XomwSanitizer Sanitizer() {return sanitizer;} private final XomwSanitizer sanitizer = new XomwSanitizer();
public XomwLinker Linker() {return linker;} private final XomwLinker linker;
public Bry_bfr Tmp() {return tmp;} private final Bry_bfr tmp = Bry_bfr_.New();
@ -92,10 +92,10 @@ public class Xomw_parser {
this.linker = new XomwLinker(link_renderer);
this.protocols_trie = Xomw_parser.Protocols__dflt();
this.holders = new XomwLinkHolderArray(this);
this.table_wkr = new Xomw_table_wkr(this);
this.quote_wkr = new Xomw_quote_wkr(this);
this.lnke_wkr = new Xomw_lnke_wkr(this);
this.lnki_wkr = new Xomw_lnki_wkr(this, holders, link_renderer, protocols_trie);
this.table_wkr = new Xomw_table_wkr(tmp, sanitizer, strip_state);
this.quote_wkr = new Xomw_quote_wkr(tmp);
this.lnke_wkr = new Xomw_lnke_wkr(this, tmp, linker, sanitizer);
this.lnki_wkr = new Xomw_lnki_wkr(this, holders, link_renderer, protocols_trie, linker, quote_wkr, tmp, strip_state);
this.heading_wkr_cbk = new Xomw_heading_cbk__html();
this.magiclinks_wkr = new Xomw_magiclinks_wkr(this, sanitizer, linker, regex_boundary, regex_url);
}
@ -146,33 +146,34 @@ public class Xomw_parser {
// properly; putting them before other transformations should keep
// exciting things like link expansions from showing up in surprising
// places.
table_wkr.Do_table_stuff(pctx, pbfr);
hr_wkr.Replace_hrs(pctx, pbfr);
table_wkr.doTableStuff(pctx, pbfr);
doubleunder_wkr.Do_double_underscore(pctx, pbfr); // DONE: DATE:2017-01-27
hr_wkr.replaceHrs(pctx, pbfr);
heading_wkr.Do_headings(pctx, pbfr, heading_wkr_cbk);
lnki_wkr.Replace_internal_links(pctx, pbfr);
quote_wkr.Do_all_quotes(pctx, pbfr);
lnke_wkr.Replace_external_links(pctx, pbfr);
doubleunder_wkr.doDoubleUnderscore(pctx, pbfr); // DONE: DATE:2017-01-27
heading_wkr.doHeadings(pctx, pbfr, heading_wkr_cbk);
lnki_wkr.replaceInternalLinks(pbfr, env, pctx);
quote_wkr.doAllQuotes(pctx, pbfr);
lnke_wkr.replaceExternalLinks(pctx, pbfr);
// replaceInternalLinks may sometimes leave behind
// absolute URLs, which have to be masked to hide them from replaceExternalLinks
Xomw_parser_bfr_.Replace(pbfr, Bry__marker__noparse, Bry_.Empty);
magiclinks_wkr.Do_magic_links(pctx, pbfr);
magiclinks_wkr.doMagicLinks(pctx, pbfr);
// $text = $this->formatHeadings($text, $origText, $isMain);
}
public void Internal_parse_half_parsed(Xomw_parser_bfr pbfr, boolean is_main, boolean line_start) {
strip_state.Unstrip_general(pbfr);
strip_state.unstripGeneral(pbfr);
// MW.HOOK:ParserAfterUnstrip
// Clean up special characters, only run once, next-to-last before doBlockLevels
nbsp_wkr.Do_nbsp(pctx, pbfr);
nbsp_wkr.doNbsp(pctx, pbfr);
block_wkr.Do_block_levels(pctx, pbfr, line_start);
block_wkr.doBlockLevels(pctx, pbfr, line_start);
lnki_wkr.replaceLinkHolders(pbfr);
@ -192,12 +193,12 @@ public class Xomw_parser {
// }
// }
strip_state.Unstrip_nowiki(pbfr);
strip_state.unstripNoWiki(pbfr);
// MW.HOOK:ParserBeforeTidy
// $text = $this->replaceTransparentTags( $text );
strip_state.Unstrip_general(pbfr);
strip_state.unstripGeneral(pbfr);
sanitizer.Normalize_char_references(pbfr);
@ -236,7 +237,11 @@ public class Xomw_parser {
// MW.HOOK:ParserAfterTidy
}
public byte[] Armor_links(Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
public byte[] stripAltText(byte[] caption, XomwLinkHolderArray holders) {
return caption;
}
public byte[] armorLinks(Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
// PORTED:preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', self::MARKER_PREFIX . "NOPARSE$1", $text )
int cur = src_bgn;
int prv = cur;
@ -262,7 +267,7 @@ public class Xomw_parser {
dirty = true;
byte[] protocol_bry = (byte[])protocol_obj;
if (called_by_bry) trg = Bry_bfr_.New();
trg.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__noparse, protocol_bry);
trg.Add_bry_many(XomwStripState.Bry__marker__bgn, Bry__noparse, protocol_bry);
cur += protocol_bry.length;
prv = cur;
}
@ -287,15 +292,15 @@ public class Xomw_parser {
}
}
public byte[] Insert_strip_item(byte[] text) {
tmp.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__strip_state_item);
tmp.Add_bry_many(XomwStripState.Bry__marker__bgn, Bry__strip_state_item);
tmp.Add_int_variable(marker_index);
tmp.Add(Xomw_strip_state.Bry__marker__end);
tmp.Add(XomwStripState.Bry__marker__end);
byte[] marker = tmp.To_bry_and_clear();
marker_index++;
strip_state.Add_general(marker, text);
strip_state.addGeneral(marker, text);
return marker;
}
public Xomw_atr_mgr Get_external_link_attribs(Xomw_atr_mgr atrs) {
public Xomw_atr_mgr getExternalLinkAttribs(Xomw_atr_mgr atrs) {
atrs.Clear();
byte[] rel = Get_external_link_rel;
@ -309,7 +314,7 @@ public class Xomw_parser {
public byte[] Get_external_link_rel;
private static byte[] Atr__rel;
private static final byte[] Bry__strip_state_item = Bry_.new_a7("-item-"), Bry__noparse = Bry_.new_a7("NOPARSE");
private static final byte[] Bry__marker__noparse = Bry_.Add(Xomw_strip_state.Bry__marker__bgn, Bry__noparse);
private static final byte[] Bry__marker__noparse = Bry_.Add(XomwStripState.Bry__marker__bgn, Bry__noparse);
public static Btrie_slim_mgr Protocols__dflt() {
Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
Gfo_protocol_itm[] ary = Gfo_protocol_itm.Ary();

View File

@ -1,137 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.core.btries.*;
public class Xomw_strip_state { // REF.MW:/parser/StripState.php
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private final Btrie_rv trv = new Btrie_rv();
private final Bry_bfr tmp_1 = Bry_bfr_.New();
private final Bry_bfr tmp_2 = Bry_bfr_.New();
private boolean tmp_2_used = false;
private int general_len, nowiki_len;
public void Clear() {
trie.Clear();
general_len = nowiki_len = 0;
tmp_2_used = false;
}
public void Add_general(byte[] marker, byte[] val) {Add_item(Tid__general, marker, val);}
public void Add_nowiki (byte[] marker, byte[] val) {Add_item(Tid__nowiki, marker, val);}
public void Add_item(byte tid, byte[] marker, byte[] val) {
trie.Add_obj(marker, new Xomw_strip_item(tid, marker, val));
if (tid == Tid__general)
general_len++;
else
nowiki_len++;
}
public byte[] Unstrip_general(byte[] text) {return Unstrip(Tid__general, text);}
public byte[] Unstrip_nowiki (byte[] text) {return Unstrip(Tid__nowiki , text);}
public byte[] Unstrip_both (byte[] text) {return Unstrip(Tid__both , text);}
public byte[] Unstrip(byte tid, byte[] text) {
boolean dirty = Unstrip(tid, tmp_1, text, 0, text.length);
return dirty ? tmp_1.To_bry_and_clear() : text;
}
public void Unstrip_general(Xomw_parser_bfr pbfr) {Unstrip(Tid__general, pbfr);}
public void Unstrip_nowiki (Xomw_parser_bfr pbfr) {Unstrip(Tid__nowiki , pbfr);}
public void Unstrip_both (Xomw_parser_bfr pbfr) {Unstrip(Tid__both , pbfr);}
private boolean Unstrip(byte tid, Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
boolean dirty = Unstrip(tid, pbfr.Trg(), src, 0, src_bfr.Len());
if (dirty)
pbfr.Switch();
return dirty;
}
private boolean Unstrip(byte tid, Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
// exit early if no items for type
if ((tid & Tid__general) == Tid__general) {
if (general_len == 0)
return false;
}
else if ((tid & Tid__nowiki) == Tid__nowiki) {
if (nowiki_len == 0)
return false;
}
int cur = src_bgn;
int prv = cur;
boolean dirty = false;
// loop over each src char
while (true) {
// EOS: exit
if (cur == src_end) {
if (dirty) // add remainder if dirty
trg.Add_mid(src, prv, src_end);
break;
}
// check if current pos matches strip state
Object o = trie.Match_at(trv, src, cur, src_end);
if (o != null) { // match
Xomw_strip_item item = (Xomw_strip_item)o;
byte item_tid = item.Tid();
if ((tid & item_tid) == item_tid) { // check if types match
// get bfr for recursion
Bry_bfr nested_bfr = null;
boolean tmp_2_release = false;
if (tmp_2_used) {
nested_bfr = Bry_bfr_.New();
}
else {
nested_bfr = tmp_2;
tmp_2_used = true;
tmp_2_release = true;
}
// recurse
byte[] item_val = item.Val();
if (Unstrip(tid, nested_bfr, item_val, 0, item_val.length))
item_val = nested_bfr.To_bry_and_clear();
if (tmp_2_release)
tmp_2_used = false;
// add to trg
trg.Add_mid(src, prv, cur);
trg.Add(item_val);
// update vars
dirty = true;
cur += item.Key().length;
prv = cur;
continue;
}
}
cur++;
}
return dirty;
}
public static final String Str__marker_bgn = "\u007f'\"`UNIQ-";
public static final byte[]
Bry__marker__bgn = Bry_.new_a7(Str__marker_bgn)
, Bry__marker__end = Bry_.new_a7("-QINU`\"'\u007f")
;
public static final byte Tid__general = 1, Tid__nowiki = 2, Tid__both = 3;
}
class Xomw_strip_item {
public Xomw_strip_item(byte tid, byte[] key, byte[] val) {
this.tid = tid;
this.key = key;
this.val = val;
}
public byte Tid() {return tid;} private final byte tid;
public byte[] Key() {return key;} private final byte[] key;
public byte[] Val() {return val;} private final byte[] val;
}

View File

@ -37,7 +37,7 @@ public class Xomw_doubleunder_wkr {
, Xol_kwd_grp_.Id_nocontentconvert
);
}
public void Do_double_underscore(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
public void doDoubleUnderscore(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
@ -123,6 +123,62 @@ public class Xomw_doubleunder_wkr {
if (dirty)
pbfr.Switch();
}
// /**
// * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
// * Fills this.mDoubleUnderscores, returns the modified text
// *
// * @param String $text
// *
// * @return String
// */
// public function doDoubleUnderscore($text) {
//
// # The position of __TOC__ needs to be recorded
// $mw = MagicWord::get('toc');
// if ($mw->match($text)) {
// this.mShowToc = true;
// this.mForceTocPosition = true;
//
// # Set a placeholder. At the end we'll fill it in with the TOC.
// $text = $mw->replace('<!--MWTOC-->', $text, 1);
//
// # Only keep the first one.
// $text = $mw->replace('', $text);
// }
//
// # Now match and remove the rest of them
// $mwa = MagicWord::getDoubleUnderscoreArray();
// this.mDoubleUnderscores = $mwa->matchAndRemove($text);
//
// if (isset(this.mDoubleUnderscores['nogallery'])) {
// this.mOutput->mNoGallery = true;
// }
// if (isset(this.mDoubleUnderscores['notoc']) && !this.mForceTocPosition) {
// this.mShowToc = false;
// }
// if (isset(this.mDoubleUnderscores['hiddencat'])
// && this.mTitle->getNamespace() == NS_CATEGORY
// ) {
// this.addTrackingCategory('hidden-category-category');
// }
// # (T10068) Allow control over whether robots index a page.
// # __INDEX__ always overrides __NOINDEX__, see T16899
// if (isset(this.mDoubleUnderscores['noindex']) && this.mTitle->canUseNoindex()) {
// this.mOutput->setIndexPolicy('noindex');
// this.addTrackingCategory('noindex-category');
// }
// if (isset(this.mDoubleUnderscores['index']) && this.mTitle->canUseNoindex()) {
// this.mOutput->setIndexPolicy('index');
// this.addTrackingCategory('index-category');
// }
//
// # Cache all double underscores in the database
// foreach (this.mDoubleUnderscores as $key => $val) {
// this.mOutput->setProperty($key, '');
// }
//
// return $text;
// }
private static void Reg(Btrie_slim_mgr trie, Xol_kwd_mgr mgr, int... ids) {
for (int id : ids) {
Xol_kwd_grp grp = mgr.Get_or_new(id);

View File

@ -36,7 +36,7 @@ class Xomw_doubleunder_wkr__fxt {
}
public Xomw_doubleunder_wkr__fxt Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Do_double_underscore(pctx, pbfr.Init(src_bry));
wkr.doDoubleUnderscore(pctx, pbfr.Init(src_bry));
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear(), src_str);
return this;
}

View File

@ -28,7 +28,7 @@ public class Xomw_heading_wkr {
public int Hdr_lhs_end() {return hdr_lhs_end;} private int hdr_lhs_end;
public int Hdr_rhs_bgn() {return hdr_rhs_bgn;} private int hdr_rhs_bgn;
public int Hdr_rhs_end() {return hdr_rhs_end;} private int hdr_rhs_end;
public void Do_headings(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, Xomw_heading_cbk__html cbk) {
public void doHeadings(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, Xomw_heading_cbk__html cbk) {
Bry_bfr src_bfr = pbfr.Src();
byte[] src_bry = src_bfr.Bfr();
int src_end = src_bfr.Len();
@ -103,4 +103,20 @@ public class Xomw_heading_wkr {
cbk.On_hdr_seen(pctx, this);
return nl_rhs;
}
// /**
// * Parse headers and return html
// *
// * @private
// *
// * @param String $text
// *
// * @return String
// */
// public function doHeadings($text) {
// for ($i = 6; $i >= 1; --$i) {
// $h = str_repeat('=', $i);
// $text = preg_replace("/^$h(.+)$h\\s*$/m", "<h$i>\\1</h$i>", $text);
// }
// return $text;
// }
}

View File

@ -17,7 +17,7 @@ package gplx.xowa.mediawiki.includes.parsers.hrs; import gplx.*; import gplx.xow
import gplx.xowa.mediawiki.includes.utls.*;
public class Xomw_hr_wkr {// THREAD.UNSAFE: caching for repeated calls
private Bry_bfr bfr;
public void Replace_hrs(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) { // REF.MW: text = preg_replace('/(^|\n)-----*/', '\\1<hr />', text);
public void replaceHrs(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) { // REF.MW: text = preg_replace('/(^|\n)-----*/', '\\1<hr />', text);
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();

View File

@ -28,7 +28,7 @@ class Xomw_hr_wkr__fxt {
private final Xomw_hr_wkr wkr = new Xomw_hr_wkr();
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Replace_hrs(new Xomw_parser_ctx(), pbfr.Init(src_bry));
wkr.replaceHrs(new Xomw_parser_ctx(), pbfr.Init(src_bry));
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
}

View File

@ -25,17 +25,17 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
private final Bry_bfr tmp;
private Btrie_slim_mgr protocol_trie; private final Btrie_rv trv = new Btrie_rv();
private int autonumber;
private final Xomw_parser parser;
private final XomwParserIface parser;
private final XomwLinker linker;
private final XomwSanitizer sanitizer;
private final Xomw_atr_mgr attribs = new Xomw_atr_mgr();
private Xomw_regex_url regex_url;
private Xomw_regex_space regex_space;
public Xomw_lnke_wkr(Xomw_parser parser) {
public Xomw_lnke_wkr(XomwParserIface parser, Bry_bfr tmp, XomwLinker linker, XomwSanitizer sanitizer) {
this.parser = parser;
this.tmp = parser.Tmp();
this.linker = parser.Linker();
this.sanitizer = parser.Sanitizer();
this.tmp = tmp;
this.linker = linker;
this.sanitizer = sanitizer;
if (angle_entities_trie == null) {
synchronized (Type_adp_.ClassOf_obj(this)) {
@ -63,7 +63,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
this.regex_space = regex_space;
}
// XO.MW:SYNC:1.29; DATE:2017-02-01
public void Replace_external_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
public void replaceExternalLinks(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
@ -211,7 +211,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
// This means that users can paste URLs directly into the text
// Funny characters like <EFBFBD> aren't valid in URLs anyway
// This was changed in August 2004
linker.makeExternalLink(bfr, url, Bry_.Mid(src, text_bgn, text_end), Bool_.N, link_type, parser.Get_external_link_attribs(attribs), Bry_.Empty);
linker.makeExternalLink(bfr, url, Bry_.Mid(src, text_bgn, text_end), Bool_.N, link_type, parser.getExternalLinkAttribs(attribs), Bry_.Empty);
// XO.MW.UNSUPPORTED.HOOK: registers link for processing by other extensions?
// Register link in the output Object.

View File

@ -53,16 +53,18 @@ public class Xomw_lnke_wkr__tst {
}
}
class Xomw_lnke_wkr__fxt {
private final Xomw_lnke_wkr wkr = new Xomw_lnke_wkr(new Xomw_parser());
private final Xomw_lnke_wkr wkr;
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
private boolean apos = true;
public Xomw_lnke_wkr__fxt() {
Xomw_parser parser = new Xomw_parser();
this.wkr = new Xomw_lnke_wkr(parser, parser.Tmp(), parser.Linker(), parser.Sanitizer());
Xomw_regex_space regex_space = new Xomw_regex_space();
wkr.Init_by_wiki(Xomw_parser.Protocols__dflt(), new Xomw_regex_url(regex_space), regex_space);
}
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Replace_external_links(new Xomw_parser_ctx(), pbfr.Init(src_bry));
wkr.replaceExternalLinks(new Xomw_parser_ctx(), pbfr.Init(src_bry));
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}

View File

@ -30,8 +30,8 @@ import gplx.xowa.parsers.uniqs.*;
* P6: [[Media:]]
* P4: handle "]]]"; "If we get a ] at the beginning of $m[3]"
* P4: handle "[[http://a.org]]"
* P3: $langObj->formatNum( ++$this->mAutonumber );
* P2: $this->getConverterLanguage()->markNoConversion( $text );
* P3: $langObj->formatNum( ++this.mAutonumber );
* P2: this.getConverterLanguage()->markNoConversion( $text );
* P1: link_prefix; EX: b[[A]]; [not enabled on enwiki]
*/
public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
@ -40,29 +40,31 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
private final Xomw_link_renderer link_renderer;
// private final Btrie_slim_mgr protocols_trie;
private final Xomw_quote_wkr quote_wkr;
private final Xomw_strip_state strip_state;
private final XomwStripState strip_state;
private Xomw_parser_env env;
private Xow_wiki wiki;
private XomwTitle mPageTitle;
// private final XomwLinker_NormalizeSubpageLink normalize_subpage_link = new XomwLinker_NormalizeSubpageLink();
private final Bry_bfr tmp;
private final Xomw_parser parser;
private final XomwParserIface parser;
private final Xomw_atr_mgr extra_atrs = new Xomw_atr_mgr();
private final Xomw_qry_mgr query = new Xomw_qry_mgr();
private final Btrie_rv trv = new Btrie_rv();
private final List_adp tmp_list = List_adp_.New();
private final Hash_adp mImageParams = Hash_adp_bry.cs();
private final Hash_adp mImageParamsMagicArray = Hash_adp_bry.cs();
public Xomw_lnki_wkr(Xomw_parser parser, XomwLinkHolderArray holders, Xomw_link_renderer link_renderer, Btrie_slim_mgr protocols_trie) {
public Xomw_lnki_wkr(XomwParserIface parser, XomwLinkHolderArray holders, Xomw_link_renderer link_renderer, Btrie_slim_mgr protocols_trie
, XomwLinker linker, Xomw_quote_wkr quote_wkr, Bry_bfr tmp, XomwStripState strip_state
) {
this.parser = parser;
this.holders = holders;
this.link_renderer = link_renderer;
// this.protocols_trie = protocols_trie;
this.linker = parser.Linker();
this.quote_wkr = parser.Quote_wkr();
this.tmp = parser.Tmp();
this.strip_state = parser.Strip_state();
this.linker = linker;
this.quote_wkr = quote_wkr;
this.tmp = tmp;
this.strip_state = strip_state;
}
public void Init_by_wiki(Xomw_parser_env env, Xow_wiki wiki) {
this.env = env;
@ -77,7 +79,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
public void Clear_state() {
holders.clear();
}
public void Replace_internal_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
public void replaceInternalLinks(Xomw_parser_bfr pbfr, Xomw_parser_env env, Xomw_parser_ctx pctx) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
@ -88,10 +90,10 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
this.mPageTitle = pctx.Page_title();
Replace_internal_links(pctx, bfr, src, src_bgn, src_end);
replaceInternalLinks(env, pctx, bfr, src, src_bgn, src_end);
}
// XO.MW:SYNC:1.29; DATE:2017-02-02
public void Replace_internal_links(Xomw_parser_ctx pctx, Bry_bfr bfr, byte[] src, int src_bgn, int src_end) {
public void replaceInternalLinks(Xomw_parser_env env, Xomw_parser_ctx pctx, Bry_bfr bfr, byte[] src, int src_bgn, int src_end) {
// XO.MW: regex for tc move to header; e1 and e1_img moved to code
// the % is needed to support urlencoded titles as well
@ -109,7 +111,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// XO.MW.IGNORE: handles strange split logic of adding space to String; "$s = substr($s, 1);"
// TODO.XO:link_prefix; EX: b[[A]]
// $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
// $useLinkPrefixExtension = this.getTargetLanguage()->linkPrefixExtension();
// $e2 = null;
// if ($useLinkPrefixExtension) {
// // Match the end of a line for a word that's not followed by whitespace,
@ -119,9 +121,9 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
// }
// IGNORE: throw new MWException(__METHOD__ . ": \$this->mTitle is null\n");
// IGNORE: throw new MWException(__METHOD__ . ": \this.mTitle is null\n");
// $nottalk = !$this->mTitle->isTalkPage();
// $nottalk = !this.mTitle->isTalkPage();
// TODO.XO:link_prefix
byte[] prefix = Bry_.Empty;
@ -240,7 +242,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// Don't allow @gplx.Internal protected links to pages containing
// PROTO: where PROTO is a valid URL protocol; these
// should be external links.
// if (preg_match('/^(?i:' . $this->mUrlProtocols . ')/', $origLink)) {
// if (preg_match('/^(?i:' . this.mUrlProtocols . ')/', $origLink)) {
// $s .= $prefix . '[[' . $line;
// continue;
// }
@ -267,7 +269,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// link = orig_link;
// }
byte[] unstrip = strip_state.Unstrip_nowiki(link);
byte[] unstrip = strip_state.unstripNoWiki(link);
if (!Bry_.Eq(unstrip, link))
nt = XomwTitle.newFromText(unstrip);
if (nt == null) {
@ -309,7 +311,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// we couldn't find the end of this imageLink, so output it raw
// but don't ignore what might be perfectly normal links in the text we've examined
Bry_bfr nested = wiki.Utl__bfr_mkr().Get_b128();
this.Replace_internal_links(pctx, nested, text, 0, text.length);
this.replaceInternalLinks(env, pctx, nested, text, 0, text.length);
nested.Mkr_rls();
bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
// note: no trail, because without an end, there *is* no trail
@ -376,7 +378,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
bfr.Add(prefix);
// Armor_links(Make_image(bfr, nt, text, holders))
this.makeImage(pctx, bfr, nt, text, holders);
this.makeImage(env, pctx, bfr, nt, text, holders);
bfr.Add(trail);
continue;
}
@ -442,7 +444,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
}
}
}
public void makeImage(Xomw_parser_ctx pctx, Bry_bfr bfr, XomwTitle title, byte[] options_at_link, XomwLinkHolderArray holders) {
public void makeImage(Xomw_parser_env env, Xomw_parser_ctx pctx, Bry_bfr bfr, XomwTitle title, byte[] options_at_link, XomwLinkHolderArray holders) {
// Check if the options text is of the form "options|alt text"
// Options are:
// * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
@ -478,7 +480,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// XO.MW.HOOK:BeforeParserFetchFileAndTitle
// Fetch and register the file (file title may be different via hooks)
// list($file, $title) = $this->fetchFileAndTitle($title, $options);
// list($file, $title) = this.fetchFileAndTitle($title, $options);
XomwFile file = fetchFileAndTitle(title, null);
// Get parameter map
@ -489,7 +491,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
Xomw_param_map paramMap = tmp_img_params.paramMap;
XomwMagicWordArray mwArray = tmp_img_params.mwArray;
// XO.MW.UNSUPPORTED.TrackingCategory: if (!$file) $this->addTrackingCategory('broken-file-category');
// XO.MW.UNSUPPORTED.TrackingCategory: if (!$file) this.addTrackingCategory('broken-file-category');
// Process the input parameters
byte[] caption = Bry_.Empty;
@ -549,12 +551,12 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// manualthumb? downstream behavior seems odd with
// missing manual thumbs.
validated = true;
// $value = $this->stripAltText($value, $holders);
val = parser.stripAltText(val, holders);
break;
case Xomw_param_itm.Name__link:
// $chars = self::EXT_LINK_URL_CLASS;
// $addr = self::EXT_LINK_ADDR;
// $prots = $this->mUrlProtocols;
// $prots = this.mUrlProtocols;
// if ($value === '') {
// $paramName = 'no-link';
// $value = true;
@ -563,9 +565,9 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// else if (preg_match("/^((?i)$prots)/", $value)) {
// if (preg_match("/^((?i)$prots)$addr$chars*$/u", $value, $m)) {
// $paramName = 'link-url';
// $this->mOutput->addExternalLink($value);
// if ($this->mOptions->getExternalLinkTarget()) {
// $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
// this.mOutput->addExternalLink($value);
// if (this.mOptions->getExternalLinkTarget()) {
// $params[$type]['link-target'] = this.mOptions->getExternalLinkTarget();
// }
validated = true;
// }
@ -574,7 +576,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// if ($linkTitle) {
// $paramName = 'link-title';
// $value = $linkTitle;
// $this->mOutput->addLink($linkTitle);
// this.mOutput->addLink($linkTitle);
validated = true;
// }
// }
@ -649,7 +651,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
if (frameParams.alt == null) {
// No alt text, use the "caption" for the alt text
if (caption != Bry_.Empty) {
// frameParams.alt = $this->stripAltText(caption, $holders);
frameParams.alt = parser.stripAltText(caption, holders);
}
else {
// No caption, fall back to using the filename for the
@ -658,7 +660,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
}
}
// Use the "caption" for the tooltip text
// frameParams.title = $this->stripAltText(caption, $holders);
frameParams.title = parser.stripAltText(caption, holders);
}
// MW.HOOK:ParserMakeImageParams
@ -666,33 +668,13 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// Linker does the rest
// byte[] time = options.time;
Object time = null;
linker.makeImageLink(bfr, pctx, parser, title, file, frameParams, handlerParams, time, desc_query, parser.Options().getThumbSize());
linker.makeImageLink(bfr, env, pctx, parser, title, file, frameParams, handlerParams, time, desc_query, parser.getOptions().getThumbSize());
// Give the handler a chance to modify the parser Object
// if (handler != null) {
// $handler->parserTransformHook($this, $file);
// }
}
// private byte[] stripAltText(byte[] caption, XomwLinkHolderArray holders) {
// // Strip bad stuff out of the title (tooltip). We can't just use
// // replaceLinkHoldersText() here, because if this function is called
// // from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
// byte[] tooltip;
// if (holders != null) {
// tooltip = holders.replace(caption);
// } else {
// tooltip = this.replace_link_holders(caption);
// }
//
// // make sure there are no placeholders in thumbnail attributes
// // that are later expanded to html- so expand them now and
// // remove the tags
//// $tooltip = $this->mStripState->unstripBoth( $tooltip );
//// $tooltip = Sanitizer::stripAllTags( $tooltip );
////
//// return $tooltip;
// return null;
// }
private static Xomw_param_list[] internalParamNames;
private static Xomw_param_map internalParamMap;
@ -783,11 +765,11 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
//$time = $file ? $file->getTimestamp() : false;
//$sha1 = $file ? $file->getSha1() : false;
//# Register the file as a dependency...
//$this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
//this.mOutput->addImage( $title->getDBkey(), $time, $sha1 );
//if ( $file && !$title->equals( $file->getTitle() ) ) {
// # Update fetched file title
// $title = $file->getTitle();
// $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
// this.mOutput->addImage( $title->getDBkey(), $time, $sha1 );
//}
return file;
}
@ -835,7 +817,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
link_renderer.Make_known_link(bfr, nt, text, extra_atrs, query);
byte[] link = bfr.To_bry_and_clear();
parser.Armor_links(bfr, link, 0, link.length);
parser.armorLinks(bfr, link, 0, link.length);
bfr.Add(trail);
}
@ -856,4 +838,326 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// title-char -> ([{$tc}]+)
// pipe -> \\|
// other chars... -> (.*)
//
// /**
// * Process [[ ]] wikilinks
// *
// * @param String $s
// *
// * @return String Processed text
// *
// * @private
// */
// public function replaceInternalLinks($s) {
// this.mLinkHolders->merge(this.replaceInternalLinks2($s));
// return $s;
// }
//
// /**
// * Process [[ ]] wikilinks (RIL)
// * @param String $s
// * @throws MWException
// * @return LinkHolderArray
// *
// * @private
// */
// public function replaceInternalLinks2(&$s) {
// global $wgExtraInterlanguageLinkPrefixes;
//
// static $tc = false, $e1, $e1_img;
// # the % is needed to support urlencoded titles as well
// if (!$tc) {
// $tc = Title::legalChars() . '#%';
// # Match a link having the form [[namespace:link|alternate]]trail
// $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
// # Match cases where there is no "]]", which might still be images
// $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
// }
//
// $holders = new LinkHolderArray($this);
//
// # split the entire text String on occurrences of [[
// $a = StringUtils::explode('[[', ' ' . $s);
// # get the first element (all text up to first [[), and remove the space we added
// $s = $a->current();
// $a->next();
// $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
// $s = substr($s, 1);
//
// $useLinkPrefixExtension = this.getTargetLanguage()->linkPrefixExtension();
// $e2 = null;
// if ($useLinkPrefixExtension) {
// # Match the end of a line for a word that's not followed by whitespace,
// # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
// global $wgContLang;
// $charset = $wgContLang->linkPrefixCharset();
// $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
// }
//
// if (is_null(this.mTitle)) {
// throw new MWException(__METHOD__ . ": \this.mTitle is null\n");
// }
// $nottalk = !this.mTitle->isTalkPage();
//
// if ($useLinkPrefixExtension) {
// $m = [];
// if (preg_match($e2, $s, $m)) {
// $first_prefix = $m[2];
// } else {
// $first_prefix = false;
// }
// } else {
// $prefix = '';
// }
//
// $useSubpages = this.areSubpagesAllowed();
//
// // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
// # Loop for each link
// for (; $line !== false && $line !== null; $a->next(), $line = $a->current()) {
// // @codingStandardsIgnoreEnd
//
// # Check for excessive memory usage
// if ($holders->isBig()) {
// # Too big
// # Do the existence check, replace the link holders and clear the array
// $holders->replace($s);
// $holders->clear();
// }
//
// if ($useLinkPrefixExtension) {
// if (preg_match($e2, $s, $m)) {
// $prefix = $m[2];
// $s = $m[1];
// } else {
// $prefix = '';
// }
// # first link
// if ($first_prefix) {
// $prefix = $first_prefix;
// $first_prefix = false;
// }
// }
//
// $might_be_img = false;
//
// if (preg_match($e1, $line, $m)) { # page with normal text or alt
// $text = $m[2];
// # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
// # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
// # the real problem is with the $e1 regex
// # See T1500.
// # Still some problems for cases where the ] is meant to be outside punctuation,
// # and no image is in sight. See T4095.
// if ($text !== ''
// && substr($m[3], 0, 1) === ']'
// && strpos($text, '[') !== false
// ) {
// $text .= ']'; # so that replaceExternalLinks($text) works later
// $m[3] = substr($m[3], 1);
// }
// # fix up urlencoded title texts
// if (strpos($m[1], '%') !== false) {
// # Should anchors '#' also be rejected?
// $m[1] = str_replace([ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode($m[1]));
// }
// $trail = $m[3];
// } elseif (preg_match($e1_img, $line, $m)) {
// # Invalid, but might be an image with a link in its caption
// $might_be_img = true;
// $text = $m[2];
// if (strpos($m[1], '%') !== false) {
// $m[1] = str_replace([ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode($m[1]));
// }
// $trail = "";
// } else { # Invalid form; output directly
// $s .= $prefix . '[[' . $line;
// continue;
// }
//
// $origLink = ltrim($m[1], ' ');
//
// # Don't allow @gplx.Internal protected links to pages containing
// # PROTO: where PROTO is a valid URL protocol; these
// # should be external links.
// if (preg_match('/^(?i:' . this.mUrlProtocols . ')/', $origLink)) {
// $s .= $prefix . '[[' . $line;
// continue;
// }
//
// # Make subpage if necessary
// if ($useSubpages) {
// $link = this.maybeDoSubpageLink($origLink, $text);
// } else {
// $link = $origLink;
// }
//
// $noforce = (substr($origLink, 0, 1) !== ':');
// if (!$noforce) {
// # Strip off leading ':'
// $link = substr($link, 1);
// }
//
// $unstrip = this.mStripState->unstripNoWiki($link);
// $nt = is_string($unstrip) ? Title::newFromText($unstrip) : null;
// if ($nt === null) {
// $s .= $prefix . '[[' . $line;
// continue;
// }
//
// $ns = $nt->getNamespace();
// $iw = $nt->getInterwiki();
//
// if ($might_be_img) { # if this is actually an invalid link
// if ($ns == NS_FILE && $noforce) { # but might be an image
// $found = false;
// while (true) {
// # look at the next 'line' to see if we can close it there
// $a->next();
// $next_line = $a->current();
// if ($next_line === false || $next_line === null) {
// break;
// }
// $m = explode(']]', $next_line, 3);
// if (count($m) == 3) {
// # the first ]] closes the inner link, the second the image
// $found = true;
// $text .= "[[{$m[0]}]]{$m[1]}";
// $trail = $m[2];
// break;
// } elseif (count($m) == 2) {
// # if there's exactly one ]] that's fine, we'll keep looking
// $text .= "[[{$m[0]}]]{$m[1]}";
// } else {
// # if $next_line is invalid too, we need look no further
// $text .= '[[' . $next_line;
// break;
// }
// }
// if (!$found) {
// # we couldn't find the end of this imageLink, so output it raw
// # but don't ignore what might be perfectly normal links in the text we've examined
// $holders->merge(this.replaceInternalLinks2($text));
// $s .= "{$prefix}[[$link|$text";
// # note: no $trail, because without an end, there *is* no trail
// continue;
// }
// } else { # it's not an image, so output it raw
// $s .= "{$prefix}[[$link|$text";
// # note: no $trail, because without an end, there *is* no trail
// continue;
// }
// }
//
// $wasblank = ($text == '');
// if ($wasblank) {
// $text = $link;
// } else {
// # T6598 madness. Handle the quotes only if they come from the alternate part
// # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
// # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
// # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
// $text = this.doQuotes($text);
// }
//
// # Link not escaped by : , create the various objects
// if ($noforce && !$nt->wasLocalInterwiki()) {
// # Interwikis
// if (
// $iw && this.mOptions->getInterwikiMagic() && $nottalk && (
// Language::fetchLanguageName($iw, null, 'mw') ||
// in_array($iw, $wgExtraInterlanguageLinkPrefixes)
// )
// ) {
// # T26502: filter duplicates
// if (!isset(this.mLangLinkLanguages[$iw])) {
// this.mLangLinkLanguages[$iw] = true;
// this.mOutput->addLanguageLink($nt->getFullText());
// }
//
// $s = rtrim($s . $prefix);
// $s .= trim($trail, "\n") == '' ? '': $prefix . $trail;
// continue;
// }
//
// if ($ns == NS_FILE) {
// if (!wfIsBadImage($nt->getDBkey(), this.mTitle)) {
// if ($wasblank) {
// # if no parameters were passed, $text
// # becomes something like "File:Foo.png",
// # which we don't want to pass on to the
// # image generator
// $text = '';
// } else {
// # recursively parse links inside the image caption
// # actually, this will parse them in any other parameters, too,
// # but it might be hard to fix that, and it doesn't matter ATM
// $text = this.replaceExternalLinks($text);
// $holders->merge(this.replaceInternalLinks2($text));
// }
// # cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
// $s .= $prefix . this.armorLinks(
// this.makeImage($nt, $text, $holders)) . $trail;
// continue;
// }
// } elseif ($ns == NS_CATEGORY) {
// $s = rtrim($s . "\n"); # T2087
//
// if ($wasblank) {
// $sortkey = this.getDefaultSort();
// } else {
// $sortkey = $text;
// }
// $sortkey = Sanitizer::decodeCharReferences($sortkey);
// $sortkey = str_replace("\n", '', $sortkey);
// $sortkey = this.getConverterLanguage()->convertCategoryKey($sortkey);
// this.mOutput->addCategory($nt->getDBkey(), $sortkey);
//
// /**
// * Strip the whitespace Category links produce, see T2087
// */
// $s .= trim($prefix . $trail, "\n") == '' ? '' : $prefix . $trail;
//
// continue;
// }
// }
//
// # Self-link checking. For some languages, variants of the title are checked in
// # LinkHolderArray::doVariants() to allow batching the existence checks necessary
// # for linking to a different variant.
// if ($ns != NS_SPECIAL && $nt->equals(this.mTitle) && !$nt->hasFragment()) {
// $s .= $prefix . Linker::makeSelfLinkObj($nt, $text, '', $trail);
// continue;
// }
//
// # NS_MEDIA is a pseudo-namespace for linking directly to a file
// # @todo FIXME: Should do batch file existence checks, see comment below
// if ($ns == NS_MEDIA) {
// # Give extensions a chance to select the file revision for us
// $options = [];
// $descQuery = false;
// Hooks::run('BeforeParserFetchFileAndTitle',
// [ $this, $nt, &$options, &$descQuery ]);
// # Fetch and register the file (file title may be different via hooks)
// list($file, $nt) = this.fetchFileAndTitle($nt, $options);
// # Cloak with NOPARSE to avoid replacement in replaceExternalLinks
// $s .= $prefix . this.armorLinks(
// Linker::makeMediaLinkFile($nt, $file, $text)) . $trail;
// continue;
// }
//
// # Some titles, such as valid special pages or files in foreign repos, should
// # be shown as bluelinks even though they're not included in the page table
// # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
// # batch file existence checks for NS_FILE and NS_MEDIA
// if ($iw == '' && $nt->isAlwaysKnown()) {
// this.mOutput->addLink($nt);
// $s .= this.makeKnownLinkHolder($nt, $text, $trail, $prefix);
// } else {
// # Links will be added to the output link list after checking
// $s .= $holders->makeHolder($nt, $text, [], $trail, $prefix);
// }
// }
// return $holders;
// }
}

View File

@ -23,11 +23,19 @@ public class Xomw_lnki_wkr__file__tst {
fxt.Clear();
fxt.Init__file("File:A.png", 300, 200);
}
@Test public void Plain() {
@Test public void Orig() {
// basic
fxt.Test__to_html("[[File:A.png]]", "<a href='/wiki/File:A.png' class='image'><img alt='A.png' src='/orig/7/70/A.png' width='300' height='200' /></a>");
// caption
fxt.Test__to_html("[[File:A.png|abc]]", "<a href='/wiki/File:A.png' class='image' title='abc'><img alt='abc' src='/orig/7/70/A.png' width='300' height='200' /></a>");
}
@Test public void Thumb() {
// basic
fxt.Test__to_html("[[File:A.png|thumb]]", "<div class='thumb tright'><div class='thumbinner' style='width:222px;'><a href='/wiki/File:A.png' class='image'><img alt='A.png' src='/thumb/7/70/A.png/220px-A.png' width='220' height='146' class='thumbimage' /></a> <div class='thumbcaption'><div class='magnify'><a href='/wiki/File:A.png' class='internal' title='enlarge'></a></div></div></div></div>");
// caption
fxt.Test__to_html("[[File:A.png|thumb|abc]]", "<div class='thumb tright'><div class='thumbinner' style='width:222px;'><a href='/wiki/File:A.png' class='image'><img alt='' src='/thumb/7/70/A.png/220px-A.png' width='220' height='146' class='thumbimage' /></a> <div class='thumbcaption'><div class='magnify'><a href='/wiki/File:A.png' class='internal' title='enlarge'></a></div>abc</div></div></div>");
}
@Test public void Size() {
fxt.Test__to_html("[[File:A.png|123x456px]]", "<a href='/wiki/File:A.png' class='image'><img alt='A.png' src='/thumb/7/70/A.png/123px-A.png' width='123' height='82' /></a>");
@ -66,17 +74,19 @@ class Xomw_lnki_wkr__fxt {
private final Xomw_lnki_wkr wkr;
private final Xomw_parser_ctx pctx;
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
private final Xomw_parser_env env;
private final XomwFileFinderMock file_finder;
private final XomwFileRepo repo = new XomwFileRepo(Bry_.new_a7("/orig"), Bry_.new_a7("/thumb"));
private boolean apos = true;
public Xomw_lnki_wkr__fxt() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
Xomw_parser parser = new Xomw_parser();
XomwParser parser = new XomwParser();
wkr = parser.Lnki_wkr();
// env
file_finder = new XomwFileFinderMock(parser.Env());
env = parser.Env();
parser.Env().File_finder_(file_finder);
parser.Env().Magic_word_mgr().Add(Bry_.new_u8("img_thumbnail"), Bool_.Y, Bry_.Ary("thumb"));
parser.Env().Magic_word_mgr().Add(Bry_.new_u8("img_width"), Bool_.Y, Bry_.Ary("$1px"));
@ -95,7 +105,7 @@ class Xomw_lnki_wkr__fxt {
}
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
wkr.replaceInternalLinks(pbfr.Init(src_bry), env, pctx);
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
Gftest.Eq__ary__lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
@ -109,7 +119,7 @@ class Xomw_lnki_wkr__fxt {
}
private String Exec__to_html(String src_str) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
wkr.replaceInternalLinks(pbfr.Init(src_bry), env, pctx);
wkr.replaceLinkHolders(pbfr);
return pbfr.Rslt().To_str_and_clear();
}

View File

@ -24,7 +24,7 @@ public class Xomw_magiclinks_wkr {
private static byte[] Tag__anch__rhs;
private boolean[] url_separators;
private static Xomw_regex_link_interrupt regex_link_interrupt;
private final Xomw_parser parser;
private final XomwParserIface parser;
private final Xomw_regex_boundary regex_boundary;
private final Xomw_regex_url regex_url;
private final XomwSanitizer sanitizer;
@ -33,7 +33,7 @@ public class Xomw_magiclinks_wkr {
private byte[] page_title;
private static final byte Regex__anch = 1, Regex__elem = 2, Regex__free = 3;
public Xomw_magiclinks_wkr(Xomw_parser parser, XomwSanitizer sanitizer, XomwLinker linker, Xomw_regex_boundary regex_boundary, Xomw_regex_url regex_url) {
public Xomw_magiclinks_wkr(XomwParserIface parser, XomwSanitizer sanitizer, XomwLinker linker, Xomw_regex_boundary regex_boundary, Xomw_regex_url regex_url) {
this.parser = parser;
this.sanitizer = sanitizer;
this.linker = linker;
@ -66,7 +66,7 @@ public class Xomw_magiclinks_wkr {
// Replace special strings like "ISBN xxx" and "RFC xxx" with
// magic external links.
public void Do_magic_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
public void doMagicLinks(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
@ -262,7 +262,7 @@ public class Xomw_magiclinks_wkr {
linker.makeExternalLink(bfr, url
, url // $this->getConverterLanguage()->markNoConversion($url, true),
, true, Bry_.new_a7("free")
, parser.Get_external_link_attribs(atrs)
, parser.getExternalLinkAttribs(atrs)
, page_title);
// XO.MW.UNSUPPORTED.HOOK: registers link for processing by other extensions?
@ -390,4 +390,107 @@ class Xomw_regex_link_interrupt {
}
return Bry_find_.Not_found;
}
// /**
// * Replace special strings like "ISBN xxx" and "RFC xxx" with
// * magic external links.
// *
// * DML
// * @private
// *
// * @param String $text
// *
// * @return String
// */
// public function doMagicLinks($text) {
// $prots = wfUrlProtocolsWithoutProtRel();
// $urlChar = self::EXT_LINK_URL_CLASS;
// $addr = self::EXT_LINK_ADDR;
// $space = self::SPACE_NOT_NL; # non-newline space
// $spdash = "(?:-|$space)"; # a dash or a non-newline space
// $spaces = "$space++"; # possessive match of 1 or more spaces
// $text = preg_replace_callback(
// '!(?: # Start cases
// (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
// (<.*?>) | # m[2]: Skip stuff inside
// # HTML elements' . "
// (\b(?i:$prots)($addr$urlChar*)) | # m[3]: Free external links
// # m[4]: Post-protocol path
// \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
// ([0-9]+)\b |
// \bISBN $spaces ( # m[6]: ISBN, capture number
// (?: 97[89] $spdash?)? # optional 13-digit ISBN prefix
// (?: [0-9] $spdash?){9} # 9 digits with opt. delimiters
// [0-9Xx] # check digit
// )\b
// )!xu", [ &$this, 'magicLinkCallback' ], $text);
// return $text;
// }
//
// /**
// * @throws MWException
// * @param array $m
// * @return HTML|String
// */
// public function magicLinkCallback($m) {
// if (isset($m[1]) && $m[1] !== '') {
// # Skip anchor
// return $m[0];
// } elseif (isset($m[2]) && $m[2] !== '') {
// # Skip HTML element
// return $m[0];
// } elseif (isset($m[3]) && $m[3] !== '') {
// # Free external link
// return this.makeFreeExternalLink($m[0], strlen($m[4]));
// } elseif (isset($m[5]) && $m[5] !== '') {
// # RFC or PMID
// if (substr($m[0], 0, 3) === 'RFC') {
// if (!this.mOptions->getMagicRFCLinks()) {
// return $m[0];
// }
// $keyword = 'RFC';
// $urlmsg = 'rfcurl';
// $cssClass = 'mw-magiclink-rfc';
// $trackingCat = 'magiclink-tracking-rfc';
// $id = $m[5];
// } elseif (substr($m[0], 0, 4) === 'PMID') {
// if (!this.mOptions->getMagicPMIDLinks()) {
// return $m[0];
// }
// $keyword = 'PMID';
// $urlmsg = 'pubmedurl';
// $cssClass = 'mw-magiclink-pmid';
// $trackingCat = 'magiclink-tracking-pmid';
// $id = $m[5];
// } else {
// throw new MWException(__METHOD__ . ': unrecognised match type "' .
// substr($m[0], 0, 20) . '"');
// }
// $url = wfMessage($urlmsg, $id)->inContentLanguage()->text();
// this.addTrackingCategory($trackingCat);
// return Linker::makeExternalLink($url, "{$keyword} {$id}", true, $cssClass, [], this.mTitle);
// } elseif (isset($m[6]) && $m[6] !== ''
// && this.mOptions->getMagicISBNLinks()
// ) {
// # ISBN
// $isbn = $m[6];
// $space = self::SPACE_NOT_NL; # non-newline space
// $isbn = preg_replace("/$space/", ' ', $isbn);
// $num = strtr($isbn, [
// '-' => '',
// ' ' => '',
// 'x' => 'X',
// ]);
// this.addTrackingCategory('magiclink-tracking-isbn');
// return this.getLinkRenderer()->makeKnownLink(
// SpecialPage::getTitleFor('Booksources', $num),
// "ISBN $isbn",
// [
// 'class' => '@gplx.Internal protected mw-magiclink-isbn',
// 'title' => false // suppress title attribute
// ]
// );
// } else {
// return $m[0];
// }
// }
}

View File

@ -79,7 +79,7 @@ class Xomw_magiclinks_wkr__fxt {
public void Test__parse(boolean apos, String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
pbfr.Init(src_bry);
wkr.Do_magic_links(pctx, pbfr);
wkr.doMagicLinks(pctx, pbfr);
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}

View File

@ -17,7 +17,7 @@ package gplx.xowa.mediawiki.includes.parsers.nbsps; import gplx.*; import gplx.x
import gplx.core.btries.*;
public class Xomw_nbsp_wkr {
private final Btrie_rv trv = new Btrie_rv();
public void Do_nbsp(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
public void doNbsp(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// PORTED:
// Clean up special characters, only run once, next-to-last before doBlockLevels
// $fixtags = [

View File

@ -31,7 +31,7 @@ class Xomw_nbsp_wkr__fxt {
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
pbfr.Init(src_bry);
wkr.Do_nbsp(pctx, pbfr);
wkr.doNbsp(pctx, pbfr);
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}

View File

@ -20,10 +20,10 @@ import gplx.core.primitives.*;
public class Xomw_quote_wkr {// THREAD.UNSAFE: caching for repeated calls
private Bry_bfr tmp;
private final Int_list apos_pos_ary = new Int_list(32);
public Xomw_quote_wkr(Xomw_parser mgr) {
this.tmp = mgr.Tmp();
public Xomw_quote_wkr(Bry_bfr tmp) {
this.tmp = tmp;
}
public void Do_all_quotes(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
public void doAllQuotes(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
@ -262,4 +262,203 @@ public class Xomw_quote_wkr {// THREAD.UNSAFE: caching for repeated calls
, State__both = 5
;
private static final byte[] Wtxt__apos = Bry_.new_a7("''");
// /**
// * Replace single quotes with HTML markup
// * @private
// *
// * @param String $text
// *
// * @return String The altered text
// */
// public function doAllQuotes($text) {
// $outtext = '';
// $lines = StringUtils::explode("\n", $text);
// foreach ($lines as $line) {
// $outtext .= this.doQuotes($line) . "\n";
// }
// $outtext = substr($outtext, 0, -1);
// return $outtext;
// }
//
// /**
// * Helper function for doAllQuotes()
// *
// * @param String $text
// *
// * @return String
// */
// public function doQuotes($text) {
// $arr = preg_split("/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// $countarr = count($arr);
// if ($countarr == 1) {
// return $text;
// }
//
// // First, do some preliminary work. This may shift some apostrophes from
// // being mark-up to being text. It also counts the number of occurrences
// // of bold and italics mark-ups.
// $numbold = 0;
// $numitalics = 0;
// for ($i = 1; $i < $countarr; $i += 2) {
// $thislen = strlen($arr[$i]);
// // If there are ever four apostrophes, assume the first is supposed to
// // be text, and the remaining three constitute mark-up for bold text.
// // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
// if ($thislen == 4) {
// $arr[$i - 1] .= "'";
// $arr[$i] = "'''";
// $thislen = 3;
// } elseif ($thislen > 5) {
// // If there are more than 5 apostrophes in a row, assume they're all
// // text except for the last 5.
// // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
// $arr[$i - 1] .= str_repeat("'", $thislen - 5);
// $arr[$i] = "'''''";
// $thislen = 5;
// }
// // Count the number of occurrences of bold and italics mark-ups.
// if ($thislen == 2) {
// $numitalics++;
// } elseif ($thislen == 3) {
// $numbold++;
// } elseif ($thislen == 5) {
// $numitalics++;
// $numbold++;
// }
// }
//
// // If there is an odd number of both bold and italics, it is likely
// // that one of the bold ones was meant to be an apostrophe followed
// // by italics. Which one we cannot know for certain, but it is more
// // likely to be one that has a single-letter word before it.
// if (($numbold % 2 == 1) && ($numitalics % 2 == 1)) {
// $firstsingleletterword = -1;
// $firstmultiletterword = -1;
// $firstspace = -1;
// for ($i = 1; $i < $countarr; $i += 2) {
// if (strlen($arr[$i]) == 3) {
// $x1 = substr($arr[$i - 1], -1);
// $x2 = substr($arr[$i - 1], -2, 1);
// if ($x1 === ' ') {
// if ($firstspace == -1) {
// $firstspace = $i;
// }
// } elseif ($x2 === ' ') {
// $firstsingleletterword = $i;
// // if $firstsingleletterword is set, we don't
// // look at the other options, so we can bail early.
// break;
// } else {
// if ($firstmultiletterword == -1) {
// $firstmultiletterword = $i;
// }
// }
// }
// }
//
// // If there is a single-letter word, use it!
// if ($firstsingleletterword > -1) {
// $arr[$firstsingleletterword] = "''";
// $arr[$firstsingleletterword - 1] .= "'";
// } elseif ($firstmultiletterword > -1) {
// // If not, but there's a multi-letter word, use that one.
// $arr[$firstmultiletterword] = "''";
// $arr[$firstmultiletterword - 1] .= "'";
// } elseif ($firstspace > -1) {
// // ... otherwise use the first one that has neither.
// // (notice that it is possible for all three to be -1 if, for example,
// // there is only one pentuple-apostrophe in the line)
// $arr[$firstspace] = "''";
// $arr[$firstspace - 1] .= "'";
// }
// }
//
// // Now let's actually convert our apostrophic mush to HTML!
// $output = '';
// $buffer = '';
// $state = '';
// $i = 0;
// foreach ($arr as $r) {
// if (($i % 2) == 0) {
// if ($state === 'both') {
// $buffer .= $r;
// } else {
// $output .= $r;
// }
// } else {
// $thislen = strlen($r);
// if ($thislen == 2) {
// if ($state === 'i') {
// $output .= '</i>';
// $state = '';
// } elseif ($state === 'bi') {
// $output .= '</i>';
// $state = 'b';
// } elseif ($state === 'ib') {
// $output .= '</b></i><b>';
// $state = 'b';
// } elseif ($state === 'both') {
// $output .= '<b><i>' . $buffer . '</i>';
// $state = 'b';
// } else { // $state can be 'b' or ''
// $output .= '<i>';
// $state .= 'i';
// }
// } elseif ($thislen == 3) {
// if ($state === 'b') {
// $output .= '</b>';
// $state = '';
// } elseif ($state === 'bi') {
// $output .= '</i></b><i>';
// $state = 'i';
// } elseif ($state === 'ib') {
// $output .= '</b>';
// $state = 'i';
// } elseif ($state === 'both') {
// $output .= '<i><b>' . $buffer . '</b>';
// $state = 'i';
// } else { // $state can be 'i' or ''
// $output .= '<b>';
// $state .= 'b';
// }
// } elseif ($thislen == 5) {
// if ($state === 'b') {
// $output .= '</b><i>';
// $state = 'i';
// } elseif ($state === 'i') {
// $output .= '</i><b>';
// $state = 'b';
// } elseif ($state === 'bi') {
// $output .= '</i></b>';
// $state = '';
// } elseif ($state === 'ib') {
// $output .= '</b></i>';
// $state = '';
// } elseif ($state === 'both') {
// $output .= '<i><b>' . $buffer . '</b></i>';
// $state = '';
// } else { // ($state == '')
// $buffer = '';
// $state = 'both';
// }
// }
// }
// $i++;
// }
// // Now close all remaining tags. Notice that the order is important.
// if ($state === 'b' || $state === 'ib') {
// $output .= '</b>';
// }
// if ($state === 'i' || $state === 'bi' || $state === 'ib') {
// $output .= '</i>';
// }
// if ($state === 'bi') {
// $output .= '</b>';
// }
// // There might be lonely ''''', so make sure we have a buffer
// if ($state === 'both' && $buffer) {
// $output .= '<b><i>' . $buffer . '</i></b>';
// }
// return $output;
// }
}

View File

@ -33,11 +33,11 @@ public class Xomw_quote_wkr__tst {
@Test public void Nl__text() {fxt.Test__parse("a\nb''c''d\n\ne" , "a\nb<i>c</i>d\n\ne");}
}
class Xomw_quote_wkr__fxt {
private final Xomw_quote_wkr wkr = new Xomw_quote_wkr(new Xomw_parser());
private final Xomw_quote_wkr wkr = new Xomw_quote_wkr(Bry_bfr_.New());
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Do_all_quotes(new Xomw_parser_ctx(), pbfr.Init(src_bry));
wkr.doAllQuotes(new Xomw_parser_ctx(), pbfr.Init(src_bry));
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
}

View File

@ -20,7 +20,7 @@ import gplx.xowa.mediawiki.includes.libs.*; import gplx.xowa.parsers.uniqs.*;
public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
private final Bry_bfr tmp;
private Bry_bfr bfr;
private final XomwSanitizer sanitizer; private final Xomw_strip_state strip_state;
private final XomwSanitizer sanitizer; private final XomwStripState strip_state;
private final List_adp
td_history = List_adp_.New() // Is currently a td tag open?
, last_tag_history = List_adp_.New() // Save history of last lag activated (td, th or caption)
@ -30,12 +30,12 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
;
private int indent_level = 0; // indent level of the table
private byte[] first_2 = new byte[2];
public Xomw_table_wkr(Xomw_parser parser) {
this.tmp = parser.Tmp();
this.sanitizer = parser.Sanitizer();
this.strip_state = parser.Strip_state();
public Xomw_table_wkr(Bry_bfr tmp, XomwSanitizer sanitizer, XomwStripState stripState) {
this.tmp = tmp;
this.sanitizer = sanitizer;
this.strip_state = stripState;
}
public void Do_table_stuff(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
public void doTableStuff(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
@ -101,7 +101,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
// First check if we are starting a new table
indent_level = colons_end;
tblw_atrs = strip_state.Unstrip_both(tblw_atrs);
tblw_atrs = strip_state.unstripBoth(tblw_atrs);
// PORTED: out_line = str_repeat('<dl><dd>', $indent_level) . "<table{atrs}>";
for (int j = 0; j < indent_level; j++)
@ -149,7 +149,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line);
// Whats after the tag is now only attributes
byte[] atrs = strip_state.Unstrip_both(line);
byte[] atrs = strip_state.unstripBoth(line);
sanitizer.Fix_tag_attributes(tmp, Name__tr, atrs);
atrs = tmp.To_bry_and_clear();
@ -251,7 +251,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
cell = tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell_data_0).To_bry_and_clear();
}
else {
byte[] atrs = strip_state.Unstrip_both(cell_data_0);
byte[] atrs = strip_state.unstripBoth(cell_data_0);
tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag);
sanitizer.Fix_tag_attributes(tmp, last_tag, atrs);
tmp.Add_byte(Byte_ascii.Angle_end).Add(cell_data_1);
@ -265,6 +265,197 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
bfr.Add(out_line).Add_byte_nl();
return Bry_split_.Rv__ok;
}
// public function doTableStuff($text) {
//
// $lines = StringUtils::explode("\n", $text);
// $out = '';
// $td_history = []; # Is currently a td tag open?
// $last_tag_history = []; # Save history of last lag activated (td, th or caption)
// $tr_history = []; # Is currently a tr tag open?
// $tr_attributes = []; # history of tr attributes
// $has_opened_tr = []; # Did this table open a <tr> element?
// $indent_level = 0; # indent level of the table
//
// foreach ($lines as $outLine) {
// $line = trim($outLine);
//
// if ($line === '') { # empty line, go to next line
// $out .= $outLine . "\n";
// continue;
// }
//
// $first_character = $line[0];
// $first_two = substr($line, 0, 2);
// $matches = [];
//
// if (preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)) {
// # First check if we are starting a new table
// $indent_level = strlen($matches[1]);
//
// $attributes = this.mStripState->unstripBoth($matches[2]);
// $attributes = Sanitizer::fixTagAttributes($attributes, 'table');
//
// $outLine = str_repeat('<dl><dd>', $indent_level) . "<table{$attributes}>";
// array_push($td_history, false);
// array_push($last_tag_history, '');
// array_push($tr_history, false);
// array_push($tr_attributes, '');
// array_push($has_opened_tr, false);
// } elseif (count($td_history) == 0) {
// # Don't do any of the following
// $out .= $outLine . "\n";
// continue;
// } elseif ($first_two === '|}') {
// # We are ending a table
// $line = '</table>' . substr($line, 2);
// $last_tag = array_pop($last_tag_history);
//
// if (!array_pop($has_opened_tr)) {
// $line = "<tr><td></td></tr>{$line}";
// }
//
// if (array_pop($tr_history)) {
// $line = "</tr>{$line}";
// }
//
// if (array_pop($td_history)) {
// $line = "</{$last_tag}>{$line}";
// }
// array_pop($tr_attributes);
// $outLine = $line . str_repeat('</dd></dl>', $indent_level);
// } elseif ($first_two === '|-') {
// # Now we have a table row
// $line = preg_replace('#^\|-+#', '', $line);
//
// # Whats after the tag is now only attributes
// $attributes = this.mStripState->unstripBoth($line);
// $attributes = Sanitizer::fixTagAttributes($attributes, 'tr');
// array_pop($tr_attributes);
// array_push($tr_attributes, $attributes);
//
// $line = '';
// $last_tag = array_pop($last_tag_history);
// array_pop($has_opened_tr);
// array_push($has_opened_tr, true);
//
// if (array_pop($tr_history)) {
// $line = '</tr>';
// }
//
// if (array_pop($td_history)) {
// $line = "</{$last_tag}>{$line}";
// }
//
// $outLine = $line;
// array_push($tr_history, false);
// array_push($td_history, false);
// array_push($last_tag_history, '');
// } elseif ($first_character === '|'
// || $first_character === '!'
// || $first_two === '|+'
// ) {
// # This might be cell elements, td, th or captions
// if ($first_two === '|+') {
// $first_character = '+';
// $line = substr($line, 2);
// } else {
// $line = substr($line, 1);
// }
//
// // Implies both are valid for table headings.
// if ($first_character === '!') {
// $line = StringUtils::replaceMarkup('!!', '||', $line);
// }
//
// # Split up multiple cells on the same line.
// # FIXME : This can result in improper nesting of tags processed
// # by earlier parser steps.
// $cells = explode('||', $line);
//
// $outLine = '';
//
// # Loop through each table cell
// foreach ($cells as $cell) {
// $previous = '';
// if ($first_character !== '+') {
// $tr_after = array_pop($tr_attributes);
// if (!array_pop($tr_history)) {
// $previous = "<tr{$tr_after}>\n";
// }
// array_push($tr_history, true);
// array_push($tr_attributes, '');
// array_pop($has_opened_tr);
// array_push($has_opened_tr, true);
// }
//
// $last_tag = array_pop($last_tag_history);
//
// if (array_pop($td_history)) {
// $previous = "</{$last_tag}>\n{$previous}";
// }
//
// if ($first_character === '|') {
// $last_tag = 'td';
// } elseif ($first_character === '!') {
// $last_tag = 'th';
// } elseif ($first_character === '+') {
// $last_tag = 'caption';
// } else {
// $last_tag = '';
// }
//
// array_push($last_tag_history, $last_tag);
//
// # A cell could contain both parameters and data
// $cell_data = explode('|', $cell, 2);
//
// # T2553: Note that a '|' inside an invalid link should not
// # be mistaken as delimiting cell parameters
// # Bug T153140: Neither should language converter markup.
// if (preg_match('/\[\[|-\{/', $cell_data[0]) === 1) {
// $cell = "{$previous}<{$last_tag}>{$cell}";
// } elseif (count($cell_data) == 1) {
// $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
// } else {
// $attributes = this.mStripState->unstripBoth($cell_data[0]);
// $attributes = Sanitizer::fixTagAttributes($attributes, $last_tag);
// $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
// }
//
// $outLine .= $cell;
// array_push($td_history, true);
// }
// }
// $out .= $outLine . "\n";
// }
//
// # Closing open td, tr && table
// while (count($td_history) > 0) {
// if (array_pop($td_history)) {
// $out .= "</td>\n";
// }
// if (array_pop($tr_history)) {
// $out .= "</tr>\n";
// }
// if (!array_pop($has_opened_tr)) {
// $out .= "<tr><td></td></tr>\n";
// }
//
// $out .= "</table>\n";
// }
//
// # Remove trailing line-ending (b/c)
// if (substr($out, -1) === "\n") {
// $out = substr($out, 0, -1);
// }
//
// # special case: don't return empty table
// if ($out === "<table>\n<tr><td></td></tr>\n</table>") {
// $out = '';
// }
//
// return $out;
// }
private static final byte[]
Wtxt__tb__bgn = Bry_.new_a7("{|")
, Wtxt__tb__end = Bry_.new_a7("|}")

View File

@ -117,11 +117,16 @@ public class Xomw_table_wkr__tst {
class Xomw_table_wkr__fxt {
private final Xomw_parser_bfr parser_bfr = new Xomw_parser_bfr();
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
private final Xomw_table_wkr wkr = new Xomw_table_wkr(new Xomw_parser());
private final Xomw_table_wkr wkr;
public Xomw_table_wkr__fxt() {
Xomw_parser parser = new Xomw_parser();
this.wkr = new Xomw_table_wkr(parser.Tmp(), parser.Sanitizer(), parser.Strip_state());
}
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
parser_bfr.Init(src_bry);
wkr.Do_table_stuff(pctx, parser_bfr);
wkr.doTableStuff(pctx, parser_bfr);
Tfds.Eq_str_lines(expd, parser_bfr.Rslt().To_str_and_clear(), src_str);
}
}