1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-09-28 14:30:51 +00:00

Xomw: Convert Sanitizer, StringUtils; also, support stripAllTags

This commit is contained in:
gnosygnu 2017-02-23 09:08:03 -05:00
parent 09dbfc894e
commit 31fcfaf1bd
13 changed files with 2082 additions and 238 deletions

View File

@ -121,13 +121,20 @@ public class XomwSanitizerTest {
// cls: ws
fxt.Test__merge_attributes(src_atrs.Clear().Add_many(cls, " v1 v2 "), trg_atrs.Clear().Add_many(cls, " v3 v4 "), expd_atrs.Clear().Add_many(cls, "v1 v2 v3 v4"));
}
@Test public void normalizeWhitespace() {
fxt.Test_normalizeWhitespace("a\r\nb", "a b");
fxt.Test_normalizeWhitespace("a\rb", "a b");
fxt.Test_normalizeWhitespace("a\nb", "a b");
fxt.Test_normalizeWhitespace("a\tb", "a b");
}
}
class XomwSanitizerFxt {
private final XomwSanitizer sanitizer = new XomwSanitizer();
private final Bry_bfr tmp = Bry_bfr_.New();
public void Test__normalize_char_references(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
sanitizer.Normalize_char_references(tmp, Bool_.Y, src_bry, 0, src_bry.length);
sanitizer.normalizeCharReferences(tmp, Bool_.Y, src_bry, 0, src_bry.length);
Gftest.Eq__str(expd, tmp.To_str_and_clear());
}
public void Test__regex_domain_y(Xomw_regex_find_domain regex_domain, String src_str, String expd_prot, String expd_host, String expd_rest) {
@ -152,15 +159,18 @@ class XomwSanitizerFxt {
}
public void Test__decode_char_references(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
sanitizer.Decode_char_references(tmp, Bool_.Y, src_bry, 0, src_bry.length);
sanitizer.decodeCharReferences(tmp, Bool_.Y, src_bry, 0, src_bry.length);
Gftest.Eq__str(expd, tmp.To_str_and_clear());
}
public void Test__clean_url(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__str(expd, sanitizer.Clean_url(src_bry));
Gftest.Eq__str(expd, sanitizer.cleanUrl(src_bry));
}
public void Test__merge_attributes(Xomw_atr_mgr src, Xomw_atr_mgr trg, Xomw_atr_mgr expd) {
sanitizer.Merge_attributes(src, trg);
sanitizer.mergeAttributes(src, trg);
Gftest.Eq__ary__lines(expd.To_str(tmp), src.To_str(tmp), "merge_atrs");
}
public void Test_normalizeWhitespace(String src_str, String expd) {
Gftest.Eq__str(expd, sanitizer.normalizeWhitespace(Bry_.new_u8(src_str)), "merge_atrs");
}
}

View File

@ -53,7 +53,7 @@ public class XomwXml {
bfr.Add_byte_space();
bfr.Add((byte[])attribs.Get_at(i));
bfr.Add_byte_eq().Add_byte_quote();
XomwSanitizer.Encode_attribute(bfr, (byte[])attribs.Get_at(i + 1));
XomwSanitizer.encodeAttribute(bfr, (byte[])attribs.Get_at(i + 1));
bfr.Add_byte_quote();
}
}

View File

@ -0,0 +1,373 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.core.btries.*;
/**
* A collection of static methods to play with strings.
*/
public class XomwStringUtils {
// /**
// * Test whether a String is valid UTF-8.
// *
// * The function check for invalid byte sequences, overlong encoding but
// * not for different normalisations.
// *
// * @note In MediaWiki 1.21, this function did not provide proper UTF-8 validation.
// * In particular, the pure PHP code path did not in fact check for overlong forms.
// * Beware of this when backporting code to that version of MediaWiki.
// *
// * @since 1.21
// * @param String $value String to check
// * @return boolean Whether the given $value is a valid UTF-8 encoded String
// */
// static function isUtf8($value) {
// $value = (String)$value;
//
// // HHVM 3.4 and older come with an outdated version of libmbfl that
// // incorrectly allows values above U+10FFFF, so we have to check
// // for them separately. (This issue also exists in PHP 5.3 and
// // older, which are no longer supported.)
// static $newPHP;
// if ($newPHP === null) {
// $newPHP = !mb_check_encoding("\xf4\x90\x80\x80", 'UTF-8');
// }
//
// return mb_check_encoding($value, 'UTF-8') &&
// ($newPHP || preg_match("/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $value) === 0);
// }
private static final byte DELIMITER_EXPLODE__SEP = 0, DELIMITER_EXPLODE__BGN = 1, DELIMITER_EXPLODE__END = 2;
private static final Btrie_slim_mgr delimiter_explode_trie = Btrie_slim_mgr.cs()
.Add_str_byte("|" , DELIMITER_EXPLODE__SEP)
.Add_str_byte("-{", DELIMITER_EXPLODE__BGN)
.Add_str_byte("}-", DELIMITER_EXPLODE__END)
;
/**
* Explode a String, but ignore any instances of the separator inside
* the given start and end delimiters, which may optionally nest.
* The delimiters are literal strings, not regular expressions.
* @param String $startDelim Start delimiter
* @param String $endDelim End delimiter
* @param String $separator Separator String for the explode.
* @param String $subject Subject String to explode.
* @param boolean $nested True iff the delimiters are allowed to nest.
* @return ArrayIterator
*/
// XO.MW: NOTE: function only used in two places; hard-coding (a) nested=true; (b) bgn="-{" end="}-" sep="|"
public static byte[][] delimiterExplode(List_adp tmp, Btrie_rv trv, byte[] src) {
// XO.MW.PORTED:entire proc rewritten; see PHP for source
int src_bgn = 0;
int src_end = src.length;
int depth = 0;
int cur = src_bgn;
int prv = cur;
while (true) {
// eos
if (cur == src_end) {
// add rest
tmp.Add(Bry_.Mid(src, prv, src_end));
break;
}
Object o = delimiter_explode_trie.Match_at(trv, src, cur, src_end);
// regular char; continue;
if (o == null) {
cur++;
continue;
}
// handle sep, bgn, end
byte tid = ((gplx.core.primitives.Byte_obj_val)o).Val();
switch (tid) {
case DELIMITER_EXPLODE__SEP:
if (depth == 0) {
tmp.Add(Bry_.Mid(src, prv, cur));
prv = cur + 1;
}
break;
case DELIMITER_EXPLODE__BGN:
depth++;
break;
case DELIMITER_EXPLODE__END:
depth--;
break;
}
cur = trv.Pos();
}
return (byte[][])tmp.To_ary_and_clear(byte[].class);
}
// /**
// * Perform an operation equivalent to `preg_replace()`
// *
// * Matches this code:
// *
// * preg_replace("!$startDelim(.*?)$endDelim!", $replace, $subject);
// *
// * ..except that it's worst-case O(N) instead of O(N^2). Compared to delimiterReplace(), this
// * implementation is fast but memory-hungry and inflexible. The memory requirements are such
// * that I don't recommend using it on anything but guaranteed small chunks of text.
// *
// * @param String $startDelim
// * @param String $endDelim
// * @param String $replace
// * @param String $subject
// * @return String
// */
// static function hungryDelimiterReplace($startDelim, $endDelim, $replace, $subject) {
// $segments = explode($startDelim, $subject);
// $output = array_shift($segments);
// foreach ($segments as $s) {
// $endDelimPos = strpos($s, $endDelim);
// if ($endDelimPos === false) {
// $output .= $startDelim . $s;
// } else {
// $output .= $replace . substr($s, $endDelimPos + strlen($endDelim));
// }
// }
//
// return $output;
// }
//
// /**
// * Perform an operation equivalent to `preg_replace_callback()`
// *
// * Matches this code:
// *
// * preg_replace_callback("!$startDelim(.*)$endDelim!s$flags", $callback, $subject);
// *
// * If the start delimiter ends with an initial substring of the end delimiter,
// * e.g. in the case of C-style comments, the behavior differs from the model
// * regex. In this implementation, the end must share no characters with the
// * start, so e.g. `/*\/` is not considered to be both the start and end of a
// * comment. `/*\/xy/*\/` is considered to be a single comment with contents `/xy/`.
// *
// * The implementation of delimiterReplaceCallback() is slower than hungryDelimiterReplace()
// * but uses far less memory. The delimiters are literal strings, not regular expressions.
// *
// * @param String $startDelim Start delimiter
// * @param String $endDelim End delimiter
// * @param callable $callback Function to call on each match
// * @param String $subject
// * @param String $flags Regular expression flags
// * @throws InvalidArgumentException
// * @return String
// */
// static function delimiterReplaceCallback($startDelim, $endDelim, $callback,
// $subject, $flags = ''
// ) {
// $inputPos = 0;
// $outputPos = 0;
// $contentPos = 0;
// $output = '';
// $foundStart = false;
// $encStart = preg_quote($startDelim, '!');
// $encEnd = preg_quote($endDelim, '!');
// $strcmp = strpos($flags, 'i') === false ? 'strcmp' : 'strcasecmp';
// $endLength = strlen($endDelim);
// $m = [];
//
// while ($inputPos < strlen($subject) &&
// preg_match("!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos)
// ) {
// $tokenOffset = $m[0][1];
// if ($m[1][0] != '') {
// if ($foundStart &&
// $strcmp($endDelim, substr($subject, $tokenOffset, $endLength)) == 0
// ) {
// # An end match is present at the same location
// $tokenType = 'end';
// $tokenLength = $endLength;
// } else {
// $tokenType = 'start';
// $tokenLength = strlen($m[0][0]);
// }
// } elseif ($m[2][0] != '') {
// $tokenType = 'end';
// $tokenLength = strlen($m[0][0]);
// } else {
// throw new InvalidArgumentException('Invalid delimiter given to ' . __METHOD__);
// }
//
// if ($tokenType == 'start') {
// # Only move the start position if we haven't already found a start
// # This means that START START END matches outer pair
// if (!$foundStart) {
// # Found start
// $inputPos = $tokenOffset + $tokenLength;
// # Write out the non-matching section
// $output .= substr($subject, $outputPos, $tokenOffset - $outputPos);
// $outputPos = $tokenOffset;
// $contentPos = $inputPos;
// $foundStart = true;
// } else {
// # Move the input position past the *first character* of START,
// # to protect against missing END when it overlaps with START
// $inputPos = $tokenOffset + 1;
// }
// } elseif ($tokenType == 'end') {
// if ($foundStart) {
// # Found match
// $output .= call_user_func($callback, [
// substr($subject, $outputPos, $tokenOffset + $tokenLength - $outputPos),
// substr($subject, $contentPos, $tokenOffset - $contentPos)
// ]);
// $foundStart = false;
// } else {
// # Non-matching end, write it out
// $output .= substr($subject, $inputPos, $tokenOffset + $tokenLength - $outputPos);
// }
// $inputPos = $outputPos = $tokenOffset + $tokenLength;
// } else {
// throw new InvalidArgumentException('Invalid delimiter given to ' . __METHOD__);
// }
// }
// if ($outputPos < strlen($subject)) {
// $output .= substr($subject, $outputPos);
// }
//
// return $output;
// }
//
// /**
// * Perform an operation equivalent to `preg_replace()` with flags.
// *
// * Matches this code:
// *
// * preg_replace("!$startDelim(.*)$endDelim!$flags", $replace, $subject);
// *
// * @param String $startDelim Start delimiter regular expression
// * @param String $endDelim End delimiter regular expression
// * @param String $replace Replacement String. May contain $1, which will be
// * replaced by the text between the delimiters
// * @param String $subject String to search
// * @param String $flags Regular expression flags
// * @return String The String with the matches replaced
// */
// static function delimiterReplace($startDelim, $endDelim, $replace, $subject, $flags = '') {
// $replacer = new RegexlikeReplacer($replace);
//
// return self::delimiterReplaceCallback($startDelim, $endDelim,
// $replacer->cb(), $subject, $flags);
// }
//
// /**
// * More or less "markup-safe" explode()
// * Ignores any instances of the separator inside `<...>`
// * @param String $separator
// * @param String $text
// * @return array
// */
// static function explodeMarkup($separator, $text) {
// $placeholder = "\x00";
//
// // Remove placeholder instances
// $text = str_replace($placeholder, '', $text);
//
// // Replace instances of the separator inside HTML-like tags with the placeholder
// $replacer = new DoubleReplacer($separator, $placeholder);
// $cleaned = StringUtils::delimiterReplaceCallback('<', '>', $replacer->cb(), $text);
//
// // Explode, then put the replaced separators back in
// $items = explode($separator, $cleaned);
// foreach ($items as $i => $str) {
// $items[$i] = str_replace($placeholder, $separator, $str);
// }
//
// return $items;
// }
/**
* More or less "markup-safe" str_replace()
* Ignores any instances of the separator inside `<...>`
* @param String $search
* @param String $replace
* @param String $text
* @return String
*/
public static void replaceMarkup(byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) { // REF:/includes/libs/StringUtils.php|replaceMarkup
// XO.MW.PORTED: avoiding multiple regex calls / String creations
// $placeholder = "\x00";
//
// Remove placeholder instances
// $text = str_replace($placeholder, '', $text);
//
// Replace instances of the separator inside HTML-like tags with the placeholder
// $replacer = new DoubleReplacer($search, $placeholder);
// $cleaned = StringUtils::delimiterReplaceCallback('<', '>', $replacer->cb(), $text);
//
// Explode, then put the replaced separators back in
// $cleaned = str_replace($search, $replace, $cleaned);
// $text = str_replace($placeholder, $search, $cleaned);
// if same length find / repl, do in-place replacement; EX: "!!" -> "||"
int find_len = find.length;
int repl_len = repl.length;
if (find_len != repl_len) throw Err_.new_wo_type("find and repl should be same length");
byte find_0 = find[0];
byte dlm_bgn = Byte_ascii.Angle_bgn;
byte dlm_end = Byte_ascii.Angle_end;
boolean repl_active = true;
// loop every char in array
for (int i = src_bgn; i < src_end; i++) {
byte b = src[i];
if ( b == find_0
&& Bry_.Match(src, i + 1, i + find_len, find, 1, find_len)
&& repl_active
) {
Bry_.Set(src, i, i + find_len, repl);
}
else if (b == dlm_bgn) {
repl_active = false;
}
else if (b == dlm_end) {
repl_active = true;
}
}
}
// /**
// * Escape a String to make it suitable for inclusion in a preg_replace()
// * replacement parameter.
// *
// * @param String $String
// * @return String
// */
// static function escapeRegexReplacement($String) {
// $String = str_replace('\\', '\\\\', $String);
// $String = str_replace('$', '\\$', $String);
// return $String;
// }
//
// /**
// * Workalike for explode() with limited memory usage.
// *
// * @param String $separator
// * @param String $subject
// * @return ArrayIterator|ExplodeIterator
// */
// static function explode($separator, $subject) {
// if (substr_count($subject, $separator) > 1000) {
// return new ExplodeIterator($separator, $subject);
// } else {
// return new ArrayIterator(explode($separator, $subject));
// }
// }
}

View File

@ -15,8 +15,8 @@ Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import org.junit.*; import gplx.core.tests.*;
public class Xomw_string_utils__tst {
private final Xomw_string_utils__fxt fxt = new Xomw_string_utils__fxt();
public class XomwStringUtilsTest {
private final XomwStringUtilsFxt fxt = new XomwStringUtilsFxt();
@Test public void Delimiter_explode() {
// basic
fxt.Test__delimiter_explode("a|b|c" , "a", "b", "c");
@ -42,17 +42,17 @@ public class Xomw_string_utils__tst {
fxt.Test__replace_markup("a!!b<!!>!!>!!c" , "!!", "||", "a||b<!!>||>||c"); // NOTE: should probably be "!!>!!>", but unmatched ">" are escaped to "&gt;"
}
}
class Xomw_string_utils__fxt {
class XomwStringUtilsFxt {
public void Test__delimiter_explode(String src_str, String... expd) {
List_adp tmp = List_adp_.New();
gplx.core.btries.Btrie_rv trv = new gplx.core.btries.Btrie_rv();
byte[][] actl = Xomw_string_utils.Delimiter_explode(tmp, trv, Bry_.new_u8(src_str));
byte[][] actl = XomwStringUtils.delimiterExplode(tmp, trv, Bry_.new_u8(src_str));
Gftest.Eq__ary(expd, actl, "src=~{0}", src_str);
}
public void Test__replace_markup(String src_str, String find, String repl, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
Xomw_string_utils.Replace_markup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl));
XomwStringUtils.replaceMarkup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl));
Gftest.Eq__str(expd, src_bry);
}
}

View File

@ -1,123 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.mediawiki.includes.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*;
import gplx.core.btries.*;
public class Xomw_string_utils {
// Explode a String, but ignore any instances of the separator inside
// the given start and end delimiters, which may optionally nest.
// The delimiters are literal strings, not regular expressions.
// @param String bgn_delim Start delimiter
// @param String end_delim End delimiter
// @param String separator Separator String for the explode.
// @param String subject Subject String to explode.
// @param boolean nested True iff the delimiters are allowed to nest.
// @return ArrayIterator
// XO.MW: hard-coding (a) nested=true; (b) bgn="-{" end="}-" sep="|"
// XO.MW:SYNC:1.29; DATE:2017-02-03
private static final byte Delimiter_explode__sep = 0, Delimiter_explode__bgn = 1, Delimiter_explode__end = 2;
private static final Btrie_slim_mgr delimiter_explode_trie = Btrie_slim_mgr.cs()
.Add_str_byte("|" , Delimiter_explode__sep)
.Add_str_byte("-{", Delimiter_explode__bgn)
.Add_str_byte("}-", Delimiter_explode__end)
;
public static byte[][] Delimiter_explode(List_adp tmp, Btrie_rv trv, byte[] src) {
int src_bgn = 0;
int src_end = src.length;
int depth = 0;
int cur = src_bgn;
int prv = cur;
while (true) {
// eos
if (cur == src_end) {
// add rest
tmp.Add(Bry_.Mid(src, prv, src_end));
break;
}
Object o = delimiter_explode_trie.Match_at(trv, src, cur, src_end);
// regular char; continue;
if (o == null) {
cur++;
continue;
}
// handle sep, bgn, end
byte tid = ((gplx.core.primitives.Byte_obj_val)o).Val();
switch (tid) {
case Delimiter_explode__sep:
if (depth == 0) {
tmp.Add(Bry_.Mid(src, prv, cur));
prv = cur + 1;
}
break;
case Delimiter_explode__bgn:
depth++;
break;
case Delimiter_explode__end:
depth--;
break;
}
cur = trv.Pos();
}
return (byte[][])tmp.To_ary_and_clear(byte[].class);
}
// More or less "markup-safe" str_replace()
// Ignores any instances of the separator inside `<...>`
public static void Replace_markup(byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) { // REF:/includes/libs/StringUtils.php|replaceMarkup
// PORTED: avoiding multiple regex calls / String creations
// $placeholder = "\x00";
// Remove placeholder instances
// $text = str_replace( $placeholder, '', $text );
// Replace instances of the separator inside HTML-like tags with the placeholder
// $replacer = new DoubleReplacer( $search, $placeholder );
// $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
// Explode, then put the replaced separators back in
// $cleaned = str_replace( $search, $replace, $cleaned );
// $text = str_replace( $placeholder, $search, $cleaned );
// if same length find / repl, do in-place replacement; EX: "!!" -> "||"
int find_len = find.length;
int repl_len = repl.length;
if (find_len != repl_len) throw Err_.new_wo_type("find and repl should be same length");
byte find_0 = find[0];
byte dlm_bgn = Byte_ascii.Angle_bgn;
byte dlm_end = Byte_ascii.Angle_end;
boolean repl_active = true;
// loop every char in array
for (int i = src_bgn; i < src_end; i++) {
byte b = src[i];
if ( b == find_0
&& Bry_.Match(src, i + 1, i + find_len, find, 1, find_len)
&& repl_active
) {
Bry_.Set(src, i, i + find_len, repl);
}
else if (b == dlm_bgn) {
repl_active = false;
}
else if (b == dlm_end) {
repl_active = true;
}
}
}
}

View File

@ -164,7 +164,7 @@ public class Xomw_link_renderer {
// Merge the custom attribs with the default ones, and iterate
// over that, deleting all "false" attributes.
sanitizer.Merge_attributes(src, trg);
sanitizer.mergeAttributes(src, trg);
// XO.MW:MW removes "false" values; XO removes "null" values
boolean deleted = false;

View File

@ -1244,7 +1244,7 @@ public class XomwParser implements XomwParserIface {
// $text = $this->replaceTransparentTags( $text );
mStripState.unstripGeneral(pbfr);
sanitizer.Normalize_char_references(pbfr);
sanitizer.normalizeCharReferences(pbfr);
// if ( MWTidy::isEnabled() ) {
// if ( $this->mOptions->getTidy() ) {
@ -4605,20 +4605,11 @@ public class XomwParser implements XomwParserIface {
// that are later expanded to html- so expand them now and
// remove the tags
tooltip = this.mStripState.unstripBoth(tooltip);
// tooltip = Sanitizer::stripAllTags( tooltip );
tooltip = sanitizer.stripAllTags(tooltip);
return tooltip;
}
// protected function stripAltText($caption, $holders) {
// # make sure there are no placeholders in thumbnail attributes
// # that are later expanded to html- so expand them now and
// # remove the tags
// $tooltip = this.mStripState->unstripBoth($tooltip);
// $tooltip = Sanitizer::stripAllTags($tooltip);
//
// return $tooltip;
// }
//
// /**
// * Set a flag in the output Object indicating that the content is dynamic and
// * shouldn't be cached.

View File

@ -200,7 +200,7 @@ public class Xomw_parser implements XomwParserIface {
// $text = $this->replaceTransparentTags( $text );
strip_state.unstripGeneral(pbfr);
sanitizer.Normalize_char_references(pbfr);
sanitizer.normalizeCharReferences(pbfr);
// if ( MWTidy::isEnabled() ) {
// if ( $this->mOptions->getTidy() ) {

View File

@ -203,7 +203,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
// $text = $this->getConverterLanguage()->markNoConversion( $text );
byte[] url = Bry_.Mid(src, url_bgn, url_end);
url = sanitizer.Clean_url(url);
url = sanitizer.cleanUrl(url);
bfr.Add_mid(src, prv, lnke_bgn);
prv = cur;

View File

@ -472,7 +472,7 @@ public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
// * text-bottom
// Protect LanguageConverter markup when splitting into parts
byte[][] parts = Xomw_string_utils.Delimiter_explode(tmp_list, trv, options_at_link);
byte[][] parts = XomwStringUtils.delimiterExplode(tmp_list, trv, options_at_link);
// Give extensions a chance to select the file revision for us
// $options = [];

View File

@ -252,7 +252,7 @@ public class Xomw_magiclinks_wkr {
return;
}
url = sanitizer.Clean_url(url);
url = sanitizer.cleanUrl(url);
// XO.MW.UNSUPPORTED.NON-WMF: not supporting images from freefrom url; (EX: "http://a.org/image.png" -> "<img>"); haven't seen this used on WMF wikis
// Is this an external image?

View File

@ -107,7 +107,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
for (int j = 0; j < indent_level; j++)
tmp.Add(Html__dl__bgn);
tmp.Add_str_a7("<table");
sanitizer.Fix_tag_attributes(tmp, Name__table, tblw_atrs);
sanitizer.fixTagAttributes(tmp, Name__table, tblw_atrs);
tmp.Add_byte(Byte_ascii.Angle_end);
out_line = tmp.To_bry_and_clear();
td_history.Add(false);
@ -150,7 +150,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
// Whats after the tag is now only attributes
byte[] atrs = strip_state.unstripBoth(line);
sanitizer.Fix_tag_attributes(tmp, Name__tr, atrs);
sanitizer.fixTagAttributes(tmp, Name__tr, atrs);
atrs = tmp.To_bry_and_clear();
Php_ary_.Pop_bry_or_null(tr_attributes);
@ -188,7 +188,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
// Implies both are valid for table headings.
if (first_char == Byte_ascii.Bang) {
Xomw_string_utils.Replace_markup(line, 0, line.length, Wtxt__th2, Wtxt__td2); // $line = StringUtils::replaceMarkup('!!', '||', $line);
XomwStringUtils.replaceMarkup(line, 0, line.length, Wtxt__th2, Wtxt__td2); // $line = StringUtils::replaceMarkup('!!', '||', $line);
}
// Split up multiple cells on the same line.
@ -253,7 +253,7 @@ public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.U
else {
byte[] atrs = strip_state.unstripBoth(cell_data_0);
tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag);
sanitizer.Fix_tag_attributes(tmp, last_tag, atrs);
sanitizer.fixTagAttributes(tmp, last_tag, atrs);
tmp.Add_byte(Byte_ascii.Angle_end).Add(cell_data_1);
cell = tmp.To_bry_and_clear();
}