mirror of https://github.com/gnosygnu/xowa
parent
6a5c114998
commit
cef2d7e2f6
@ -0,0 +1,819 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||||
|
import gplx.core.btries.*;
|
||||||
|
import gplx.xowa.mws.htmls.*;
|
||||||
|
public class Xomw_linker {
|
||||||
|
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||||
|
private final Linker_rel_splitter splitter = new Linker_rel_splitter();
|
||||||
|
private final Xomw_html_utl html_utl = new Xomw_html_utl();
|
||||||
|
private byte[] wg_title = null;
|
||||||
|
private final Btrie_rv trv = new Btrie_rv();
|
||||||
|
private final byte[][] split_trail_rv = new byte[2][];
|
||||||
|
private Btrie_slim_mgr split_trail_trie;
|
||||||
|
private static final byte[] Atr__class = Bry_.new_a7("class"), Atr__rel = Bry_.new_a7("rel"), Atr__href = Bry_.new_a7("href"), Rel__nofollow = Bry_.new_a7("nofollow");
|
||||||
|
public void Init_by_wiki(Btrie_slim_mgr trie) {
|
||||||
|
this.split_trail_trie = trie;
|
||||||
|
}
|
||||||
|
// /**
|
||||||
|
// * This function returns an HTML link to the given target. It serves a few
|
||||||
|
// * purposes:
|
||||||
|
// * 1) If $target is a Title, the correct URL to link to will be figured
|
||||||
|
// * out automatically.
|
||||||
|
// * 2) It automatically adds the usual classes for various types of link
|
||||||
|
// * targets: "new" for red links, "stub" for short articles, etc.
|
||||||
|
// * 3) It escapes all attribute values safely so there's no risk of XSS.
|
||||||
|
// * 4) It provides a default tooltip if the target is a Title (the page
|
||||||
|
// * name of the target).
|
||||||
|
// * link() replaces the old functions in the makeLink() family.
|
||||||
|
// *
|
||||||
|
// * @since 1.18 Method exists since 1.16 as non-static, made static in 1.18.
|
||||||
|
// * @deprecated since 1.28, use MediaWiki\Linker\LinkRenderer instead
|
||||||
|
// *
|
||||||
|
// * @param Title $target Can currently only be a Title, but this may
|
||||||
|
// * change to support Images, literal URLs, etc.
|
||||||
|
// * @param String $html The HTML contents of the <a> element, i.e.,
|
||||||
|
// * the link text. This is raw HTML and will not be escaped. If null,
|
||||||
|
// * defaults to the prefixed text of the Title; or if the Title is just a
|
||||||
|
// * fragment, the contents of the fragment.
|
||||||
|
// * @param array $customAttribs A key => value array of extra HTML attributes,
|
||||||
|
// * such as title and class. (href is ignored.) Classes will be
|
||||||
|
// * merged with the default classes, while other attributes will replace
|
||||||
|
// * default attributes. All passed attribute values will be HTML-escaped.
|
||||||
|
// * A false attribute value means to suppress that attribute.
|
||||||
|
// * @param array $query The query String to append to the URL
|
||||||
|
// * you're linking to, in key => value array form. Query keys and values
|
||||||
|
// * will be URL-encoded.
|
||||||
|
// * @param String|array $options String or array of strings:
|
||||||
|
// * 'known': Page is known to exist, so don't check if it does.
|
||||||
|
// * 'broken': Page is known not to exist, so don't check if it does.
|
||||||
|
// * 'noclasses': Don't add any classes automatically (includes "new",
|
||||||
|
// * "stub", "mw-redirect", "extiw"). Only use the class attribute
|
||||||
|
// * provided, if any, so you get a simple blue link with no funny i-
|
||||||
|
// * cons.
|
||||||
|
// * 'forcearticlepath': Use the article path always, even with a querystring.
|
||||||
|
// * Has compatibility issues on some setups, so avoid wherever possible.
|
||||||
|
// * 'http': Force a full URL with http:// as the scheme.
|
||||||
|
// * 'https': Force a full URL with https:// as the scheme.
|
||||||
|
// * 'stubThreshold' => (int): Stub threshold to use when determining link classes.
|
||||||
|
// * @return String HTML <a> attribute
|
||||||
|
// */
|
||||||
|
// public static function link(
|
||||||
|
// $target, $html = null, $customAttribs = [], $query = [], $options = []
|
||||||
|
// ) {
|
||||||
|
// if ( !$target instanceof Title ) {
|
||||||
|
// wfWarn( __METHOD__ . ': Requires $target to be a Title Object.', 2 );
|
||||||
|
// return "<!-- ERROR -->$html";
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if ( is_string( $query ) ) {
|
||||||
|
// // some functions withing core using this still hand over query strings
|
||||||
|
// wfDeprecated( __METHOD__ . ' with parameter $query as String (should be array)', '1.20' );
|
||||||
|
// $query = wfCgiToArray( $query );
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// $services = MediaWikiServices::getInstance();
|
||||||
|
// $options = (array)$options;
|
||||||
|
// if ( $options ) {
|
||||||
|
// // Custom options, create new LinkRenderer
|
||||||
|
// if ( !isset( $options['stubThreshold'] ) ) {
|
||||||
|
// $defaultLinkRenderer = $services->getLinkRenderer();
|
||||||
|
// $options['stubThreshold'] = $defaultLinkRenderer->getStubThreshold();
|
||||||
|
// }
|
||||||
|
// $linkRenderer = $services->getLinkRendererFactory()
|
||||||
|
// ->createFromLegacyOptions( $options );
|
||||||
|
// } else {
|
||||||
|
// $linkRenderer = $services->getLinkRenderer();
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if ( $html !== null ) {
|
||||||
|
// $text = new HtmlArmor( $html );
|
||||||
|
// } else {
|
||||||
|
// $text = $html; // null
|
||||||
|
// }
|
||||||
|
// if ( in_array( 'known', $options, true ) ) {
|
||||||
|
// return $linkRenderer->makeKnownLink( $target, $text, $customAttribs, $query );
|
||||||
|
// } elseif ( in_array( 'broken', $options, true ) ) {
|
||||||
|
// return $linkRenderer->makeBrokenLink( $target, $text, $customAttribs, $query );
|
||||||
|
// } elseif ( in_array( 'noclasses', $options, true ) ) {
|
||||||
|
// return $linkRenderer->makePreloadedLink( $target, $text, '', $customAttribs, $query );
|
||||||
|
// } else {
|
||||||
|
// return $linkRenderer->makeLink( $target, $text, $customAttribs, $query );
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
public void Make_self_link_obj(Bry_bfr bfr, Xoa_ttl nt, byte[] html, byte[] query, byte[] trail, byte[] prefix) {
|
||||||
|
// MW.HOOK:SelfLinkBegin
|
||||||
|
if (html == Bry_.Empty) {
|
||||||
|
html = tmp.Add_bry_escape_html(nt.Get_prefixed_text()).To_bry_and_clear();
|
||||||
|
}
|
||||||
|
byte[] inside = Bry_.Empty;
|
||||||
|
byte[][] split_trail = Split_trail(trail);
|
||||||
|
inside = split_trail[0];
|
||||||
|
trail = split_trail[1];
|
||||||
|
bfr.Add_str_a7("<strong class=\"selflink\">");
|
||||||
|
bfr.Add_bry_many(prefix, html, inside);
|
||||||
|
bfr.Add_str_a7("</strong>");
|
||||||
|
bfr.Add(trail);
|
||||||
|
}
|
||||||
|
public void Make_external_link(Bry_bfr bfr, byte[] url, byte[] text, boolean escape, byte[] link_type, Xomwh_atr_mgr attribs, byte[] title) {
|
||||||
|
tmp.Add_str_a7("external");
|
||||||
|
if (link_type != null) {
|
||||||
|
tmp.Add_byte_space().Add(link_type);
|
||||||
|
}
|
||||||
|
Xomwh_atr_itm cls_itm = attribs.Get_by_or_make(Atr__class);
|
||||||
|
if (cls_itm.Val() != null) {
|
||||||
|
tmp.Add(cls_itm.Val());
|
||||||
|
}
|
||||||
|
cls_itm.Val_(tmp.To_bry_and_clear());
|
||||||
|
|
||||||
|
if (escape)
|
||||||
|
text = tmp.Add_bry_escape_html(text).To_bry_and_clear();
|
||||||
|
|
||||||
|
if (title == null)
|
||||||
|
title = wg_title;
|
||||||
|
|
||||||
|
byte[] new_rel = Get_external_link_rel(url, title);
|
||||||
|
Xomwh_atr_itm cur_rel_atr = attribs.Get_by_or_make(Atr__rel);
|
||||||
|
if (cur_rel_atr.Val() == null) {
|
||||||
|
cur_rel_atr.Val_(new_rel);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Merge the rel attributes.
|
||||||
|
byte[] cur_rel = cur_rel_atr.Val();
|
||||||
|
Bry_split_.Split(new_rel, 0, new_rel.length, Byte_ascii.Space, Bool_.N, splitter); // $newRels = explode( ' ', $newRel );
|
||||||
|
Bry_split_.Split(cur_rel, 0, cur_rel.length, Byte_ascii.Space, Bool_.N, splitter); // $oldRels = explode( ' ', $attribs['rel'] );
|
||||||
|
cur_rel_atr.Val_(splitter.To_bry()); // $attribs['rel'] = implode( ' ', $combined );
|
||||||
|
}
|
||||||
|
//$link = '';
|
||||||
|
//$success = Hooks::run( 'LinkerMakeExternalLink',
|
||||||
|
// [ &$url, &$text, &$link, &$attribs, $linktype ] );
|
||||||
|
//if ( !$success ) {
|
||||||
|
// wfDebug( "Hook LinkerMakeExternalLink changed the output of link "
|
||||||
|
// . "with url {$url} and text {$text} to {$link}\n", true );
|
||||||
|
// return $link;
|
||||||
|
//}
|
||||||
|
attribs.Set(Atr__href, url);
|
||||||
|
|
||||||
|
html_utl.Raw_element(bfr, Bry_.new_a7("a"), attribs, text);
|
||||||
|
}
|
||||||
|
private byte[] Get_external_link_rel(byte[] url, byte[] title) {
|
||||||
|
// global $wgNoFollowLinks, $wgNoFollowNsExceptions, $wgNoFollowDomainExceptions;
|
||||||
|
// $ns = $title ? $title->getNamespace() : false;
|
||||||
|
// if ( $wgNoFollowLinks && !in_array( $ns, $wgNoFollowNsExceptions )
|
||||||
|
// && !wfMatchesDomainList( $url, $wgNoFollowDomainExceptions )
|
||||||
|
// ) {
|
||||||
|
return Rel__nofollow;
|
||||||
|
// }
|
||||||
|
// return null;
|
||||||
|
}
|
||||||
|
public void Normalize_subpage_link(Xomw_linker__normalize_subpage_link rv, Xoa_ttl context_title, byte[] target, byte[] text) {
|
||||||
|
// Valid link forms:
|
||||||
|
// Foobar -- normal
|
||||||
|
// :Foobar -- override special treatment of prefix (images, language links)
|
||||||
|
// /Foobar -- convert to CurrentPage/Foobar
|
||||||
|
// /Foobar/ -- convert to CurrentPage/Foobar, strip the initial and final / from text
|
||||||
|
// ../ -- convert to CurrentPage, from CurrentPage/CurrentSubPage
|
||||||
|
// ../Foobar -- convert to CurrentPage/Foobar,
|
||||||
|
// (from CurrentPage/CurrentSubPage)
|
||||||
|
// ../Foobar/ -- convert to CurrentPage/Foobar, use 'Foobar' as text
|
||||||
|
// (from CurrentPage/CurrentSubPage)
|
||||||
|
|
||||||
|
byte[] ret = target; // default return value is no change
|
||||||
|
|
||||||
|
// Some namespaces don't allow subpages,
|
||||||
|
// so only perform processing if subpages are allowed
|
||||||
|
if (context_title != null && context_title.Ns().Subpages_enabled()) {
|
||||||
|
int hash = Bry_find_.Find_fwd(target, Byte_ascii.Hash);
|
||||||
|
byte[] suffix = null;
|
||||||
|
if (hash != Bry_find_.Not_found) {
|
||||||
|
suffix = Bry_.Mid(target, hash);
|
||||||
|
target = Bry_.Mid(target, 0, hash);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
suffix = Bry_.Empty;
|
||||||
|
}
|
||||||
|
// bug 7425
|
||||||
|
target = Bry_.Trim(target);
|
||||||
|
// Look at the first character
|
||||||
|
if (target != Bry_.Empty && target[0] == Byte_ascii.Slash) {
|
||||||
|
// / at end means we don't want the slash to be shown
|
||||||
|
int target_len = target.length;
|
||||||
|
int trailing_slashes_bgn = Bry_find_.Find_bwd_while(target, target_len, 0, Byte_ascii.Slash) + 1;
|
||||||
|
byte[] no_slash = null;
|
||||||
|
if (trailing_slashes_bgn != target_len) {
|
||||||
|
no_slash = target = Bry_.Mid(target, 1, trailing_slashes_bgn);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
no_slash = Bry_.Mid(target, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = Bry_.Add(context_title.Get_prefixed_text(), Byte_ascii.Slash_bry, Bry_.Trim(no_slash), suffix);
|
||||||
|
if (text == Bry_.Empty) {
|
||||||
|
text = Bry_.Add(target, suffix);
|
||||||
|
} // this might be changed for ugliness reasons
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// check for .. subpage backlinks
|
||||||
|
int dot2_count = 0;
|
||||||
|
byte[] dot2_stripped = target;
|
||||||
|
while (Bry_.Match(dot2_stripped, 0, 3, Bry__dot2)) {
|
||||||
|
++dot2_count;
|
||||||
|
dot2_stripped = Bry_.Mid(dot2_stripped, 3);
|
||||||
|
}
|
||||||
|
if (dot2_count > 0) {
|
||||||
|
byte[][] exploded = Bry_split_.Split(context_title.Get_prefixed_text(), Byte_ascii.Slash);
|
||||||
|
int exploded_len = exploded.length;
|
||||||
|
if (exploded_len > dot2_count) { // not allowed to go below top level page
|
||||||
|
// PORTED: ret = implode('/', array_slice($exploded, 0, -dot2_count));
|
||||||
|
int implode_len = exploded_len - dot2_count;
|
||||||
|
for (int i = 0; i < implode_len; i++) {
|
||||||
|
if (i != 0) tmp.Add_byte(Byte_ascii.Slash);
|
||||||
|
tmp.Add(exploded[i]);
|
||||||
|
}
|
||||||
|
// / at the end means don't show full path
|
||||||
|
if (Bry_.Has_at_end(dot2_stripped, Byte_ascii.Slash)) {
|
||||||
|
dot2_stripped = Bry_.Mid(dot2_stripped, 0, dot2_stripped.length - 1);
|
||||||
|
if (text == Bry_.Empty) {
|
||||||
|
text = Bry_.Add(dot2_stripped, suffix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dot2_stripped = Bry_.Trim(dot2_stripped);
|
||||||
|
if (dot2_stripped != Bry_.Empty) {
|
||||||
|
tmp.Add_bry_many(Byte_ascii.Slash_bry, dot2_stripped);
|
||||||
|
}
|
||||||
|
tmp.Add(suffix);
|
||||||
|
ret = tmp.To_bry_and_clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rv.Init(ret, text);
|
||||||
|
}
|
||||||
|
public byte[][] Split_trail(byte[] trail) {
|
||||||
|
int cur = 0;
|
||||||
|
int src_end = trail.length;
|
||||||
|
while (true) {
|
||||||
|
Object o = split_trail_trie.Match_at(trv, trail, cur, src_end);
|
||||||
|
if (o == null) break;
|
||||||
|
byte[] bry = (byte[])o;
|
||||||
|
cur += bry.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cur == 0) { // no trail
|
||||||
|
split_trail_rv[0] = null;
|
||||||
|
split_trail_rv[1] = trail;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
split_trail_rv[0] = Bry_.Mid(trail, 0, cur);
|
||||||
|
split_trail_rv[1] = Bry_.Mid(trail, cur, src_end);
|
||||||
|
}
|
||||||
|
return split_trail_rv;
|
||||||
|
}
|
||||||
|
public void Make_image(Bry_bfr bfr, Xoa_ttl title, byte[] options, boolean holders) {
|
||||||
|
// Check if the options text is of the form "options|alt text"
|
||||||
|
// Options are:
|
||||||
|
// * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
|
||||||
|
// * left no resizing, just left align. label is used for alt= only
|
||||||
|
// * right same, but right aligned
|
||||||
|
// * none same, but not aligned
|
||||||
|
// * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
|
||||||
|
// * center center the image
|
||||||
|
// * frame Keep original image size, no magnify-button.
|
||||||
|
// * framed Same as "frame"
|
||||||
|
// * frameless like 'thumb' but without a frame. Keeps user preferences for width
|
||||||
|
// * upright reduce width for upright images, rounded to full __0 px
|
||||||
|
// * border draw a 1px border around the image
|
||||||
|
// * alt Text for HTML alt attribute (defaults to empty)
|
||||||
|
// * class Set a class for img node
|
||||||
|
// * link Set the target of the image link. Can be external, interwiki, or local
|
||||||
|
// vertical-align values (no % or length right now):
|
||||||
|
// * baseline
|
||||||
|
// * sub
|
||||||
|
// * super
|
||||||
|
// * top
|
||||||
|
// * text-top
|
||||||
|
// * middle
|
||||||
|
// * bottom
|
||||||
|
// * text-bottom
|
||||||
|
|
||||||
|
// Protect LanguageConverter markup when splitting into parts
|
||||||
|
// $parts = StringUtils::delimiterExplode(
|
||||||
|
// '-{', '}-', '|', $options, true /* allow nesting */
|
||||||
|
// );
|
||||||
|
|
||||||
|
// Give extensions a chance to select the file revision for us
|
||||||
|
// $options = [];
|
||||||
|
// $descQuery = false;
|
||||||
|
// MW.HOOK:BeforeParserFetchFileAndTitle
|
||||||
|
|
||||||
|
// Fetch and register the file (file title may be different via hooks)
|
||||||
|
// list($file, $title) = $this->fetchFileAndTitle($title, $options);
|
||||||
|
|
||||||
|
// Get parameter map
|
||||||
|
// $handler = $file ? $file->getHandler() : false;
|
||||||
|
|
||||||
|
// list($paramMap, $mwArray) = $this->getImageParams($handler);
|
||||||
|
|
||||||
|
// if (!$file) {
|
||||||
|
// $this->addTrackingCategory('broken-file-category');
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Process the input parameters
|
||||||
|
// $caption = '';
|
||||||
|
// $params = [ 'frame' => [], 'handler' => [],
|
||||||
|
// 'horizAlign' => [], 'vertAlign' => [] ];
|
||||||
|
// $seenformat = false;
|
||||||
|
// foreach ($parts as $part) {
|
||||||
|
// $part = trim($part);
|
||||||
|
// list($magicName, $value) = $mwArray->matchVariableStartToEnd($part);
|
||||||
|
// $validated = false;
|
||||||
|
// if (isset($paramMap[$magicName])) {
|
||||||
|
// list($type, $paramName) = $paramMap[$magicName];
|
||||||
|
|
||||||
|
// Special case; width and height come in one variable together
|
||||||
|
// if ($type === 'handler' && $paramName === 'width') {
|
||||||
|
// $parsedWidthParam = $this->parseWidthParam($value);
|
||||||
|
// if (isset($parsedWidthParam['width'])) {
|
||||||
|
// $width = $parsedWidthParam['width'];
|
||||||
|
// if ($handler->validateParam('width', $width)) {
|
||||||
|
// $params[$type]['width'] = $width;
|
||||||
|
// $validated = true;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// if (isset($parsedWidthParam['height'])) {
|
||||||
|
// $height = $parsedWidthParam['height'];
|
||||||
|
// if ($handler->validateParam('height', $height)) {
|
||||||
|
// $params[$type]['height'] = $height;
|
||||||
|
// $validated = true;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else no validation -- T15436
|
||||||
|
// } else {
|
||||||
|
// if ($type === 'handler') {
|
||||||
|
// // Validate handler parameter
|
||||||
|
// $validated = $handler->validateParam($paramName, $value);
|
||||||
|
// } else {
|
||||||
|
// // Validate @gplx.Internal protected parameters
|
||||||
|
// switch ($paramName) {
|
||||||
|
// case 'manualthumb':
|
||||||
|
// case 'alt':
|
||||||
|
// case 'class':
|
||||||
|
// @todo FIXME: Possibly check validity here for
|
||||||
|
// manualthumb? downstream behavior seems odd with
|
||||||
|
// missing manual thumbs.
|
||||||
|
// $validated = true;
|
||||||
|
// $value = $this->stripAltText($value, $holders);
|
||||||
|
// break;
|
||||||
|
// case 'link':
|
||||||
|
// $chars = self::EXT_LINK_URL_CLASS;
|
||||||
|
// $addr = self::EXT_LINK_ADDR;
|
||||||
|
// $prots = $this->mUrlProtocols;
|
||||||
|
// if ($value === '') {
|
||||||
|
// $paramName = 'no-link';
|
||||||
|
// $value = true;
|
||||||
|
// $validated = true;
|
||||||
|
// } elseif (preg_match("/^((?i)$prots)/", $value)) {
|
||||||
|
// if (preg_match("/^((?i)$prots)$addr$chars*$/u", $value, $m)) {
|
||||||
|
// $paramName = 'link-url';
|
||||||
|
// $this->mOutput->addExternalLink($value);
|
||||||
|
// if ($this->mOptions->getExternalLinkTarget()) {
|
||||||
|
// $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
|
||||||
|
// }
|
||||||
|
// $validated = true;
|
||||||
|
// }
|
||||||
|
// } else {
|
||||||
|
// $linkTitle = Title::newFromText($value);
|
||||||
|
// if ($linkTitle) {
|
||||||
|
// $paramName = 'link-title';
|
||||||
|
// $value = $linkTitle;
|
||||||
|
// $this->mOutput->addLink($linkTitle);
|
||||||
|
// $validated = true;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// break;
|
||||||
|
// case 'frameless':
|
||||||
|
// case 'framed':
|
||||||
|
// case 'thumbnail':
|
||||||
|
// // use first appearing option, discard others.
|
||||||
|
// $validated = !$seenformat;
|
||||||
|
// $seenformat = true;
|
||||||
|
// break;
|
||||||
|
// default:
|
||||||
|
// // Most other things appear to be empty or numeric...
|
||||||
|
// $validated = ($value === false || is_numeric(trim($value)));
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// if ($validated) {
|
||||||
|
// $params[$type][$paramName] = $value;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// if (!$validated) {
|
||||||
|
// $caption = $part;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Process alignment parameters
|
||||||
|
// if ($params['horizAlign']) {
|
||||||
|
// $params['frame']['align'] = key($params['horizAlign']);
|
||||||
|
// }
|
||||||
|
// if ($params['vertAlign']) {
|
||||||
|
// $params['frame']['valign'] = key($params['vertAlign']);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// $params['frame']['caption'] = $caption;
|
||||||
|
|
||||||
|
// Will the image be presented in a frame, with the caption below?
|
||||||
|
// $imageIsFramed = isset($params['frame']['frame'])
|
||||||
|
// || isset($params['frame']['framed'])
|
||||||
|
// || isset($params['frame']['thumbnail'])
|
||||||
|
// || isset($params['frame']['manualthumb']);
|
||||||
|
|
||||||
|
// In the old days, [[Image:Foo|text...]] would set alt text. Later it
|
||||||
|
// came to also set the caption, ordinary text after the image -- which
|
||||||
|
// makes no sense, because that just repeats the text multiple times in
|
||||||
|
// screen readers. It *also* came to set the title attribute.
|
||||||
|
// Now that we have an alt attribute, we should not set the alt text to
|
||||||
|
// equal the caption: that's worse than useless, it just repeats the
|
||||||
|
// text. This is the framed/thumbnail case. If there's no caption, we
|
||||||
|
// use the unnamed parameter for alt text as well, just for the time be-
|
||||||
|
// ing, if the unnamed param is set and the alt param is not.
|
||||||
|
// For the future, we need to figure out if we want to tweak this more,
|
||||||
|
// e.g., introducing a title= parameter for the title; ignoring the un-
|
||||||
|
// named parameter entirely for images without a caption; adding an ex-
|
||||||
|
// plicit caption= parameter and preserving the old magic unnamed para-
|
||||||
|
// meter for BC; ...
|
||||||
|
// if ($imageIsFramed) { // Framed image
|
||||||
|
// if ($caption === '' && !isset($params['frame']['alt'])) {
|
||||||
|
// // No caption or alt text, add the filename as the alt text so
|
||||||
|
// // that screen readers at least get some description of the image
|
||||||
|
// $params['frame']['alt'] = $title->getText();
|
||||||
|
// }
|
||||||
|
// Do not set $params['frame']['title'] because tooltips don't make sense
|
||||||
|
// for framed images
|
||||||
|
// } else { // Inline image
|
||||||
|
// if (!isset($params['frame']['alt'])) {
|
||||||
|
// // No alt text, use the "caption" for the alt text
|
||||||
|
// if ($caption !== '') {
|
||||||
|
// $params['frame']['alt'] = $this->stripAltText($caption, $holders);
|
||||||
|
// } else {
|
||||||
|
// // No caption, fall back to using the filename for the
|
||||||
|
// // alt text
|
||||||
|
// $params['frame']['alt'] = $title->getText();
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// Use the "caption" for the tooltip text
|
||||||
|
// $params['frame']['title'] = $this->stripAltText($caption, $holders);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// MW.HOOK:ParserMakeImageParams
|
||||||
|
|
||||||
|
// Linker does the rest
|
||||||
|
// $time = isset($options['time']) ? $options['time'] : false;
|
||||||
|
// $ret = Linker::makeImageLink($this, $title, $file, $params['frame'], $params['handler'],
|
||||||
|
// $time, $descQuery, $this->mOptions->getThumbSize());
|
||||||
|
|
||||||
|
// Give the handler a chance to modify the parser Object
|
||||||
|
// if ($handler) {
|
||||||
|
// $handler->parserTransformHook($this, $file);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// return $ret;
|
||||||
|
}
|
||||||
|
// public function getImageParams($handler) {
|
||||||
|
// if ($handler) {
|
||||||
|
// $handlerClass = get_class($handler);
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
// $handlerClass = '';
|
||||||
|
// }
|
||||||
|
// if (!isset($this->mImageParams[$handlerClass])) {
|
||||||
|
// Initialise static lists
|
||||||
|
// static $internalParamNames = [
|
||||||
|
// 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
|
||||||
|
// 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
|
||||||
|
// 'bottom', 'text-bottom' ],
|
||||||
|
// 'frame' => [ 'thumbnail', 'manualthumb', 'framed', 'frameless',
|
||||||
|
// 'upright', 'border', 'link', 'alt', 'class' ],
|
||||||
|
// ];
|
||||||
|
// static $internalParamMap;
|
||||||
|
// if (!$internalParamMap) {
|
||||||
|
// $internalParamMap = [];
|
||||||
|
// foreach ($internalParamNames as $type => $names) {
|
||||||
|
// foreach ($names as $name) {
|
||||||
|
// $magicName = str_replace('-', '_', "img_$name");
|
||||||
|
// $internalParamMap[$magicName] = [ $type, $name ];
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Add handler params
|
||||||
|
// $paramMap = $internalParamMap;
|
||||||
|
// if ($handler) {
|
||||||
|
// $handlerParamMap = $handler->getParamMap();
|
||||||
|
// foreach ($handlerParamMap as $magic => $paramName) {
|
||||||
|
// $paramMap[$magic] = [ 'handler', $paramName ];
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// $this->mImageParams[$handlerClass] = $paramMap;
|
||||||
|
// $this->mImageParamsMagicArray[$handlerClass] = new MagicWordArray(array_keys($paramMap));
|
||||||
|
// }
|
||||||
|
// return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
|
||||||
|
// }
|
||||||
|
// /**
|
||||||
|
// * Make HTML for a thumbnail including image, border and caption
|
||||||
|
// * @param Title $title
|
||||||
|
// * @param File|boolean $file File Object or false if it doesn't exist
|
||||||
|
// * @param String $label
|
||||||
|
// * @param String $alt
|
||||||
|
// * @param String $align
|
||||||
|
// * @param array $params
|
||||||
|
// * @param boolean $framed
|
||||||
|
// * @param String $manualthumb
|
||||||
|
// * @return String
|
||||||
|
// */
|
||||||
|
// public static function makeThumbLinkObj( Title $title, $file, $label = '', $alt,
|
||||||
|
// $align = 'right', $params = [], $framed = false, $manualthumb = ""
|
||||||
|
// ) {
|
||||||
|
// $frameParams = [
|
||||||
|
// 'alt' => $alt,
|
||||||
|
// 'caption' => $label,
|
||||||
|
// 'align' => $align
|
||||||
|
// ];
|
||||||
|
// if ( $framed ) {
|
||||||
|
// $frameParams['framed'] = true;
|
||||||
|
// }
|
||||||
|
// if ( $manualthumb ) {
|
||||||
|
// $frameParams['manualthumb'] = $manualthumb;
|
||||||
|
// }
|
||||||
|
// return self::makeThumbLink2( $title, $file, $frameParams, $params );
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// /**
|
||||||
|
// * @param Title $title
|
||||||
|
// * @param File $file
|
||||||
|
// * @param array $frameParams
|
||||||
|
// * @param array $handlerParams
|
||||||
|
// * @param boolean $time
|
||||||
|
// * @param String $query
|
||||||
|
// * @return String
|
||||||
|
// */
|
||||||
|
// public static function makeThumbLink2( Title $title, $file, $frameParams = [],
|
||||||
|
// $handlerParams = [], $time = false, $query = ""
|
||||||
|
// ) {
|
||||||
|
// $exists = $file && $file->exists();
|
||||||
|
//
|
||||||
|
// $page = isset( $handlerParams['page'] ) ? $handlerParams['page'] : false;
|
||||||
|
// if ( !isset( $frameParams['align'] ) ) {
|
||||||
|
// $frameParams['align'] = 'right';
|
||||||
|
// }
|
||||||
|
// if ( !isset( $frameParams['alt'] ) ) {
|
||||||
|
// $frameParams['alt'] = '';
|
||||||
|
// }
|
||||||
|
// if ( !isset( $frameParams['title'] ) ) {
|
||||||
|
// $frameParams['title'] = '';
|
||||||
|
// }
|
||||||
|
// if ( !isset( $frameParams['caption'] ) ) {
|
||||||
|
// $frameParams['caption'] = '';
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if ( empty( $handlerParams['width'] ) ) {
|
||||||
|
// // Reduce width for upright images when parameter 'upright' is used
|
||||||
|
// $handlerParams['width'] = isset( $frameParams['upright'] ) ? 130 : 180;
|
||||||
|
// }
|
||||||
|
// $thumb = false;
|
||||||
|
// $noscale = false;
|
||||||
|
// $manualthumb = false;
|
||||||
|
//
|
||||||
|
// if ( !$exists ) {
|
||||||
|
// $outerWidth = $handlerParams['width'] + 2;
|
||||||
|
// } else {
|
||||||
|
// if ( isset( $frameParams['manualthumb'] ) ) {
|
||||||
|
// # Use manually specified thumbnail
|
||||||
|
// $manual_title = Title::makeTitleSafe( NS_FILE, $frameParams['manualthumb'] );
|
||||||
|
// if ( $manual_title ) {
|
||||||
|
// $manual_img = wfFindFile( $manual_title );
|
||||||
|
// if ( $manual_img ) {
|
||||||
|
// $thumb = $manual_img->getUnscaledThumb( $handlerParams );
|
||||||
|
// $manualthumb = true;
|
||||||
|
// } else {
|
||||||
|
// $exists = false;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// } elseif ( isset( $frameParams['framed'] ) ) {
|
||||||
|
// // Use image dimensions, don't scale
|
||||||
|
// $thumb = $file->getUnscaledThumb( $handlerParams );
|
||||||
|
// $noscale = true;
|
||||||
|
// } else {
|
||||||
|
// # Do not present an image bigger than the source, for bitmap-style images
|
||||||
|
// # This is a hack to maintain compatibility with arbitrary pre-1.10 behavior
|
||||||
|
// $srcWidth = $file->getWidth( $page );
|
||||||
|
// if ( $srcWidth && !$file->mustRender() && $handlerParams['width'] > $srcWidth ) {
|
||||||
|
// $handlerParams['width'] = $srcWidth;
|
||||||
|
// }
|
||||||
|
// $thumb = $file->transform( $handlerParams );
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if ( $thumb ) {
|
||||||
|
// $outerWidth = $thumb->getWidth() + 2;
|
||||||
|
// } else {
|
||||||
|
// $outerWidth = $handlerParams['width'] + 2;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// # ThumbnailImage::toHtml() already adds page= onto the end of DjVu URLs
|
||||||
|
// # So we don't need to pass it here in $query. However, the URL for the
|
||||||
|
// # zoom icon still needs it, so we make a unique query for it. See bug 14771
|
||||||
|
// $url = $title->getLocalURL( $query );
|
||||||
|
// if ( $page ) {
|
||||||
|
// $url = wfAppendQuery( $url, [ 'page' => $page ] );
|
||||||
|
// }
|
||||||
|
// if ( $manualthumb
|
||||||
|
// && !isset( $frameParams['link-title'] )
|
||||||
|
// && !isset( $frameParams['link-url'] )
|
||||||
|
// && !isset( $frameParams['no-link'] ) ) {
|
||||||
|
// $frameParams['link-url'] = $url;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// $s = "<div class=\"thumb t{$frameParams['align']}\">"
|
||||||
|
// . "<div class=\"thumbinner\" style=\"width:{$outerWidth}px;\">";
|
||||||
|
//
|
||||||
|
// if ( !$exists ) {
|
||||||
|
// $s .= self::makeBrokenImageLinkObj( $title, $frameParams['title'], '', '', '', $time == true );
|
||||||
|
// $zoomIcon = '';
|
||||||
|
// } elseif ( !$thumb ) {
|
||||||
|
// $s .= wfMessage( 'thumbnail_error', '' )->escaped();
|
||||||
|
// $zoomIcon = '';
|
||||||
|
// } else {
|
||||||
|
// if ( !$noscale && !$manualthumb ) {
|
||||||
|
// self::processResponsiveImages( $file, $thumb, $handlerParams );
|
||||||
|
// }
|
||||||
|
// $params = [
|
||||||
|
// 'alt' => $frameParams['alt'],
|
||||||
|
// 'title' => $frameParams['title'],
|
||||||
|
// 'img-class' => ( isset( $frameParams['class'] ) && $frameParams['class'] !== ''
|
||||||
|
// ? $frameParams['class'] . ' '
|
||||||
|
// : '' ) . 'thumbimage'
|
||||||
|
// ];
|
||||||
|
// $params = self::getImageLinkMTOParams( $frameParams, $query ) + $params;
|
||||||
|
// $s .= $thumb->toHtml( $params );
|
||||||
|
// if ( isset( $frameParams['framed'] ) ) {
|
||||||
|
// $zoomIcon = "";
|
||||||
|
// } else {
|
||||||
|
// $zoomIcon = Html::rawElement( 'div', [ 'class' => 'magnify' ],
|
||||||
|
// Html::rawElement( 'a', [
|
||||||
|
// 'href' => $url,
|
||||||
|
// 'class' => '@gplx.Internal protected',
|
||||||
|
// 'title' => wfMessage( 'thumbnail-more' )->text() ],
|
||||||
|
// "" ) );
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// $s .= ' <div class="thumbcaption">' . $zoomIcon . $frameParams['caption'] . "</div></div></div>";
|
||||||
|
// return str_replace( "\n", ' ', $s );
|
||||||
|
// }
|
||||||
|
// /**
|
||||||
|
// * Make a "broken" link to an image
|
||||||
|
// *
|
||||||
|
// * @since 1.16.3
|
||||||
|
// * @param Title $title
|
||||||
|
// * @param String $label Link label (plain text)
|
||||||
|
// * @param String $query Query String
|
||||||
|
// * @param String $unused1 Unused parameter kept for b/c
|
||||||
|
// * @param String $unused2 Unused parameter kept for b/c
|
||||||
|
// * @param boolean $time A file of a certain timestamp was requested
|
||||||
|
// * @return String
|
||||||
|
// */
|
||||||
|
// public static function makeBrokenImageLinkObj( $title, $label = '',
|
||||||
|
// $query = '', $unused1 = '', $unused2 = '', $time = false
|
||||||
|
// ) {
|
||||||
|
// if ( !$title instanceof Title ) {
|
||||||
|
// wfWarn( __METHOD__ . ': Requires $title to be a Title Object.' );
|
||||||
|
// return "<!-- ERROR -->" . htmlspecialchars( $label );
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// global $wgEnableUploads, $wgUploadMissingFileUrl, $wgUploadNavigationUrl;
|
||||||
|
// if ( $label == '' ) {
|
||||||
|
// $label = $title->getPrefixedText();
|
||||||
|
// }
|
||||||
|
// $encLabel = htmlspecialchars( $label );
|
||||||
|
// $currentExists = $time ? ( wfFindFile( $title ) != false ) : false;
|
||||||
|
//
|
||||||
|
// if ( ( $wgUploadMissingFileUrl || $wgUploadNavigationUrl || $wgEnableUploads )
|
||||||
|
// && !$currentExists
|
||||||
|
// ) {
|
||||||
|
// $redir = RepoGroup::singleton()->getLocalRepo()->checkRedirect( $title );
|
||||||
|
//
|
||||||
|
// if ( $redir ) {
|
||||||
|
// // We already know it's a redirect, so mark it
|
||||||
|
// // accordingly
|
||||||
|
// return self::link(
|
||||||
|
// $title,
|
||||||
|
// $encLabel,
|
||||||
|
// [ 'class' => 'mw-redirect' ],
|
||||||
|
// wfCgiToArray( $query ),
|
||||||
|
// [ 'known', 'noclasses' ]
|
||||||
|
// );
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// $href = self::getUploadUrl( $title, $query );
|
||||||
|
//
|
||||||
|
// return '<a href="' . htmlspecialchars( $href ) . '" class="new" title="' .
|
||||||
|
// htmlspecialchars( $title->getPrefixedText(), ENT_QUOTES ) . '">' .
|
||||||
|
// $encLabel . '</a>';
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return self::link( $title, $encLabel, [], wfCgiToArray( $query ), [ 'known', 'noclasses' ] );
|
||||||
|
// }
|
||||||
|
// /**
|
||||||
|
// * Create a direct link to a given uploaded file.
|
||||||
|
// *
|
||||||
|
// * @since 1.16.3
|
||||||
|
// * @param Title $title
|
||||||
|
// * @param String $html Pre-sanitized HTML
|
||||||
|
// * @param String $time MW timestamp of file creation time
|
||||||
|
// * @return String HTML
|
||||||
|
// */
|
||||||
|
// public static function makeMediaLinkObj( $title, $html = '', $time = false ) {
|
||||||
|
// $img = wfFindFile( $title, [ 'time' => $time ] );
|
||||||
|
// return self::makeMediaLinkFile( $title, $img, $html );
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// /**
|
||||||
|
// * Create a direct link to a given uploaded file.
|
||||||
|
// * This will make a broken link if $file is false.
|
||||||
|
// *
|
||||||
|
// * @since 1.16.3
|
||||||
|
// * @param Title $title
|
||||||
|
// * @param File|boolean $file File Object or false
|
||||||
|
// * @param String $html Pre-sanitized HTML
|
||||||
|
// * @return String HTML
|
||||||
|
// *
|
||||||
|
// * @todo Handle invalid or missing images better.
|
||||||
|
// */
|
||||||
|
// public static function makeMediaLinkFile( Title $title, $file, $html = '' ) {
|
||||||
|
// if ( $file && $file->exists() ) {
|
||||||
|
// $url = $file->getUrl();
|
||||||
|
// $class = '@gplx.Internal protected';
|
||||||
|
// } else {
|
||||||
|
// $url = self::getUploadUrl( $title );
|
||||||
|
// $class = 'new';
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// $alt = $title->getText();
|
||||||
|
// if ( $html == '' ) {
|
||||||
|
// $html = $alt;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// $ret = '';
|
||||||
|
// $attribs = [
|
||||||
|
// 'href' => $url,
|
||||||
|
// 'class' => $class,
|
||||||
|
// 'title' => $alt
|
||||||
|
// ];
|
||||||
|
//
|
||||||
|
// if ( !Hooks::run( 'LinkerMakeMediaLinkFile',
|
||||||
|
// [ $title, $file, &$html, &$attribs, &$ret ] ) ) {
|
||||||
|
// wfDebug( "Hook LinkerMakeMediaLinkFile changed the output of link "
|
||||||
|
// . "with url {$url} and text {$html} to {$ret}\n", true );
|
||||||
|
// return $ret;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return Html::rawElement( 'a', $attribs, $html );
|
||||||
|
// }
|
||||||
|
private static final byte[] Bry__dot2 = Bry_.new_a7("../");
|
||||||
|
}
|
||||||
|
class Linker_rel_splitter implements gplx.core.brys.Bry_split_wkr {
|
||||||
|
private final Hash_adp_bry hash = Hash_adp_bry.cs();
|
||||||
|
private final Bry_bfr bfr = Bry_bfr_.New();
|
||||||
|
public int Split(byte[] src, int itm_bgn, int itm_end) { // $combined = array_unique( array_merge( $newRels, $oldRels ) );
|
||||||
|
byte[] val = (byte[])hash.Get_by_mid(src, itm_bgn, itm_end);
|
||||||
|
if (val == null) {
|
||||||
|
val = Bry_.Mid(src, itm_bgn, itm_end);
|
||||||
|
hash.Add_as_key_and_val(val);
|
||||||
|
if (bfr.Len_gt_0()) bfr.Add_byte_space();
|
||||||
|
bfr.Add(val);
|
||||||
|
}
|
||||||
|
return Bry_split_.Rv__ok;
|
||||||
|
}
|
||||||
|
public byte[] To_bry() {
|
||||||
|
hash.Clear();
|
||||||
|
return bfr.To_bry_and_clear();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,27 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||||
|
public class Xomw_linker__normalize_subpage_link {
|
||||||
|
public byte[] link;
|
||||||
|
public byte[] text;
|
||||||
|
public Xomw_linker__normalize_subpage_link Init(byte[] link, byte[] text) {
|
||||||
|
this.link = link;
|
||||||
|
this.text = text;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,43 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||||
|
import org.junit.*; import gplx.core.tests.*;
|
||||||
|
public class Xomw_linker__normalize_subpage_link__tst {
|
||||||
|
private final Xomw_linker__normalize_subpage_link__fxt fxt = new Xomw_linker__normalize_subpage_link__fxt();
|
||||||
|
@Test public void None() {fxt.Test__normalize_subpage_link("A/B/C" , "Z" , "" , "Z" , "");}
|
||||||
|
@Test public void Hash() {fxt.Test__normalize_subpage_link("A/B/C" , "/Y#Z" , "" , "A/B/C/Y#Z" , "/Y#Z");}
|
||||||
|
@Test public void Slash__basic() {fxt.Test__normalize_subpage_link("A/B/C" , "/Z" , "" , "A/B/C/Z" , "/Z");}
|
||||||
|
@Test public void Slash__slash() {fxt.Test__normalize_subpage_link("A/B/C" , "/Z/" , "" , "A/B/C/Z" , "Z");}
|
||||||
|
@Test public void Dot2__empty() {fxt.Test__normalize_subpage_link("A/B/C" , "../" , "" , "A/B" , "");}
|
||||||
|
@Test public void Dot2__many() {fxt.Test__normalize_subpage_link("A/B/C" , "../../Z" , "z1" , "A/Z" , "z1");}
|
||||||
|
@Test public void Dot2__trailing() {fxt.Test__normalize_subpage_link("A/B/C" , "../../Z/" , "" , "A/Z" , "Z");}
|
||||||
|
}
|
||||||
|
class Xomw_linker__normalize_subpage_link__fxt {
|
||||||
|
private final Xomw_linker mgr = new Xomw_linker();
|
||||||
|
private final Xowe_wiki wiki;
|
||||||
|
private final Xomw_linker__normalize_subpage_link normalize_subpage_link = new Xomw_linker__normalize_subpage_link();
|
||||||
|
public Xomw_linker__normalize_subpage_link__fxt() {
|
||||||
|
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||||
|
this.wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||||
|
}
|
||||||
|
public void Test__normalize_subpage_link(String page_title_str, String link, String text, String expd_link, String expd_text) {
|
||||||
|
mgr.Normalize_subpage_link(normalize_subpage_link, wiki.Ttl_parse(Bry_.new_u8(page_title_str)), Bry_.new_u8(link), Bry_.new_u8(text));
|
||||||
|
Gftest.Eq__str(expd_link, String_.new_u8(normalize_subpage_link.link));
|
||||||
|
Gftest.Eq__str(expd_text, String_.new_u8(normalize_subpage_link.text));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,39 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||||
|
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*;
|
||||||
|
public class Xomw_linker__split_trail__tst {
|
||||||
|
private final Xomw_linker__split_trail__fxt fxt = new Xomw_linker__split_trail__fxt();
|
||||||
|
@Test public void Basic() {fxt.Test__split_trail("abc def" , "abc" , " def");}
|
||||||
|
@Test public void None() {fxt.Test__split_trail(" abc" , null , " abc");}
|
||||||
|
}
|
||||||
|
class Xomw_linker__split_trail__fxt {
|
||||||
|
private final Xomw_linker linker = new Xomw_linker();
|
||||||
|
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||||
|
public Xomw_linker__split_trail__fxt() {
|
||||||
|
String[] ary = new String[] {"a", "b", "c", "d", "e", "f"};
|
||||||
|
for (String itm : ary)
|
||||||
|
trie.Add_str_str(itm, itm);
|
||||||
|
linker.Init_by_wiki(trie);
|
||||||
|
}
|
||||||
|
public void Test__split_trail(String trail_str, String expd_inside, String expd_trail) {
|
||||||
|
byte[][] split_trail = linker.Split_trail(Bry_.new_u8(trail_str));
|
||||||
|
Gftest.Eq__str(expd_inside, String_.new_u8(split_trail[0]));
|
||||||
|
Gftest.Eq__str(expd_trail , String_.new_u8(split_trail[1]));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,538 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||||
|
import gplx.core.encoders.*; import gplx.langs.htmls.entitys.*;
|
||||||
|
import gplx.xowa.parsers.htmls.*;
|
||||||
|
import gplx.xowa.mws.parsers.*;
|
||||||
|
public class Xomw_sanitizer {
|
||||||
|
private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr();
|
||||||
|
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||||
|
public void Fix_tag_attributes(Bry_bfr bfr, byte[] tag_name, byte[] atrs) {
|
||||||
|
atr_bldr.Atrs__clear();
|
||||||
|
atr_parser.Parse(atr_bldr, -1, -1, atrs, 0, atrs.length);
|
||||||
|
int len = atr_bldr.Atrs__len();
|
||||||
|
|
||||||
|
// PORTED: Sanitizer.php|safeEncodeTagAttributes
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
// $encAttribute = htmlspecialchars( $attribute );
|
||||||
|
// $encValue = Sanitizer::safeEncodeAttribute( $value );
|
||||||
|
// $attribs[] = "$encAttribute=\"$encValue\"";
|
||||||
|
Mwh_atr_itm itm = atr_bldr.Atrs__get_at(i);
|
||||||
|
bfr.Add_byte_space(); // "return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';"
|
||||||
|
bfr.Add_bry_escape_html(itm.Key_bry(), itm.Key_bgn(), itm.Key_end());
|
||||||
|
bfr.Add_byte_eq().Add_byte_quote();
|
||||||
|
bfr.Add(itm.Val_as_bry()); // TODO.XO:Sanitizer::encode
|
||||||
|
bfr.Add_byte_quote();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public void Normalize_char_references(Xomw_parser_bfr pbfr) {
|
||||||
|
// XO.PBFR
|
||||||
|
Bry_bfr src_bfr = pbfr.Src();
|
||||||
|
byte[] src = src_bfr.Bfr();
|
||||||
|
int src_bgn = 0;
|
||||||
|
int src_end = src_bfr.Len();
|
||||||
|
Bry_bfr bfr = pbfr.Trg();
|
||||||
|
pbfr.Switch();
|
||||||
|
|
||||||
|
Normalize_char_references(bfr, Bool_.N, src, src_bgn, src_end);
|
||||||
|
}
|
||||||
|
public byte[] Normalize_char_references(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end) {
|
||||||
|
// assert static structs
|
||||||
|
if (Normalize__dec == null) {
|
||||||
|
synchronized (Xomw_sanitizer.class) {
|
||||||
|
html_entities = Html_entities_new();
|
||||||
|
Normalize__dec = Bool_ary_bldr.New_u8().Set_rng(Byte_ascii.Num_0, Byte_ascii.Num_9).To_ary();
|
||||||
|
Normalize__hex = Bool_ary_bldr.New_u8()
|
||||||
|
.Set_rng(Byte_ascii.Num_0, Byte_ascii.Num_9)
|
||||||
|
.Set_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z)
|
||||||
|
.Set_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z)
|
||||||
|
.To_ary();
|
||||||
|
Normalize__ent = Bool_ary_bldr.New_u8()
|
||||||
|
.Set_rng(Byte_ascii.Num_0, Byte_ascii.Num_9)
|
||||||
|
.Set_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z)
|
||||||
|
.Set_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z)
|
||||||
|
.Set_rng(128, 255)
|
||||||
|
.To_ary();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// XO.BRY_BFR
|
||||||
|
boolean dirty = false;
|
||||||
|
int cur = src_bgn;
|
||||||
|
boolean called_by_bry = bfr == null;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
// search for "&"
|
||||||
|
int find_bgn = Bry_find_.Find_fwd(src, Byte_ascii.Amp, cur);
|
||||||
|
if (find_bgn == Bry_find_.Not_found) { // "&" not found; exit
|
||||||
|
if (dirty)
|
||||||
|
bfr.Add_mid(src, cur, src_end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
int ent_bgn = find_bgn + 1; // +1 to skip &
|
||||||
|
|
||||||
|
// get regex; (a) dec (	); (b) hex (ÿ); (c) entity (α);
|
||||||
|
boolean[] regex = null;
|
||||||
|
// check for #;
|
||||||
|
if (ent_bgn < src_end && src[ent_bgn] == Byte_ascii.Hash) {
|
||||||
|
ent_bgn++;
|
||||||
|
if (ent_bgn < src_end) {
|
||||||
|
byte nxt = src[ent_bgn];
|
||||||
|
// check for x
|
||||||
|
if (nxt == Byte_ascii.Ltr_X || nxt == Byte_ascii.Ltr_x) {
|
||||||
|
ent_bgn++;
|
||||||
|
regex = Normalize__hex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (regex == null)
|
||||||
|
regex = Normalize__dec;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
regex = Normalize__ent;
|
||||||
|
}
|
||||||
|
|
||||||
|
// keep looping until invalid regex
|
||||||
|
int ent_end = ent_bgn;
|
||||||
|
byte b = Byte_ascii.Null;
|
||||||
|
for (int i = ent_bgn; i < src_end; i++) {
|
||||||
|
b = src[i];
|
||||||
|
if (regex[b])
|
||||||
|
ent_end++;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// mark dirty; can optimize later by checking if "<" already exists
|
||||||
|
dirty = true;
|
||||||
|
if (bfr == null) bfr = Bry_bfr_.New();
|
||||||
|
bfr.Add_mid(src, cur, find_bgn); // add everything before &
|
||||||
|
|
||||||
|
// invalid <- regex ended, but not at semic
|
||||||
|
if (b != Byte_ascii.Semic) {
|
||||||
|
bfr.Add(Gfh_entity_.Amp_bry); // transform "&" to "&"
|
||||||
|
cur = find_bgn + 1; // position after "&"
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// do normalization
|
||||||
|
byte[] name = Bry_.Mid(src, ent_bgn, ent_end);
|
||||||
|
boolean ret = false;
|
||||||
|
if (regex == Normalize__ent) {
|
||||||
|
Normalize_entity(bfr, name);
|
||||||
|
ret = true;
|
||||||
|
}
|
||||||
|
else if (regex == Normalize__dec) {
|
||||||
|
ret = Dec_char_reference(bfr, name);
|
||||||
|
}
|
||||||
|
else if (regex == Normalize__hex) {
|
||||||
|
ret = Hex_char_reference(bfr, name);
|
||||||
|
}
|
||||||
|
if (!ret) {
|
||||||
|
bfr.Add(Gfh_entity_.Amp_bry); // transform "&" to "&"
|
||||||
|
bfr.Add_bry_escape_html(src, find_bgn + 1, ent_end + 1); // "find_bgn + 1" to start after "&"; "ent_end + 1" to include ";"
|
||||||
|
}
|
||||||
|
|
||||||
|
cur = ent_end + 1; // +1 to position after ";"
|
||||||
|
}
|
||||||
|
|
||||||
|
// XO.BRY_BFR
|
||||||
|
if (dirty) {
|
||||||
|
if (called_by_bry)
|
||||||
|
return bfr.To_bry_and_clear();
|
||||||
|
else
|
||||||
|
return Bry_.Empty;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (called_by_bry) {
|
||||||
|
if (src_bgn == 0 && src_end == src.length)
|
||||||
|
return src;
|
||||||
|
else
|
||||||
|
return Bry_.Mid(src, src_bgn, src_end);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (lone_bfr)
|
||||||
|
bfr.Add_mid(src, src_bgn, src_end);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
|
||||||
|
// return the equivalent numeric entity reference (except for the core <
|
||||||
|
// > & "). If the entity is a MediaWiki-specific alias, returns
|
||||||
|
// the HTML equivalent. Otherwise, returns HTML-escaped text of
|
||||||
|
// pseudo-entity source (eg &foo;)
|
||||||
|
private void Normalize_entity(Bry_bfr bfr, byte[] name) {
|
||||||
|
Object o = html_entities.Get_by_bry(name);
|
||||||
|
if (o == null) {
|
||||||
|
bfr.Add_str_a7("&").Add(name).Add_byte_semic();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Xomw_html_ent entity = (Xomw_html_ent)o;
|
||||||
|
bfr.Add(entity.html);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean Dec_char_reference(Bry_bfr bfr, byte[] codepoint) {
|
||||||
|
int point = Bry_.To_int_or(codepoint, -1);
|
||||||
|
if (Validate_codepoint(point)) {
|
||||||
|
bfr.Add_str_a7("&#").Add_int_variable(point).Add_byte_semic();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean Hex_char_reference(Bry_bfr bfr, byte[] codepoint) {
|
||||||
|
int point = Hex_utl_.Parse_or(codepoint, -1);
|
||||||
|
if (Validate_codepoint(point)) {
|
||||||
|
bfr.Add_str_a7("&#x");
|
||||||
|
Hex_utl_.Write_bfr(bfr, Bool_.Y, point); // sprintf( '&#x%x;', $point )
|
||||||
|
bfr.Add_byte_semic();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean Validate_codepoint(int codepoint) {
|
||||||
|
// U+000C is valid in HTML5 but not allowed in XML.
|
||||||
|
// U+000D is valid in XML but not allowed in HTML5.
|
||||||
|
// U+007F - U+009F are disallowed in HTML5 (control characters).
|
||||||
|
return codepoint == 0x09
|
||||||
|
|| codepoint == 0x0a
|
||||||
|
|| (codepoint >= 0x20 && codepoint <= 0x7e)
|
||||||
|
|| (codepoint >= 0xa0 && codepoint <= 0xd7ff)
|
||||||
|
|| (codepoint >= 0xe000 && codepoint <= 0xfffd)
|
||||||
|
|| (codepoint >= 0x10000 && codepoint <= 0x10ffff);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean[] Normalize__dec, Normalize__hex, Normalize__ent;
|
||||||
|
private static Hash_adp_bry html_entities;
|
||||||
|
private static Hash_adp_bry Html_entities_new() {
|
||||||
|
Bry_bfr tmp = Bry_bfr_.New();
|
||||||
|
Hash_adp_bry rv = Hash_adp_bry.cs();
|
||||||
|
|
||||||
|
Html_entities_set(rv, Xomw_html_ent.Type__alias, -1, "רלמ", "‏");
|
||||||
|
Html_entities_set(rv, Xomw_html_ent.Type__alias, -1, "رلم", "‏");
|
||||||
|
|
||||||
|
Html_entities_set(rv, Xomw_html_ent.Type__char, 60, "lt", "<");
|
||||||
|
Html_entities_set(rv, Xomw_html_ent.Type__char, 62, "gt", ">");
|
||||||
|
Html_entities_set(rv, Xomw_html_ent.Type__char, 38, "amp", "&");
|
||||||
|
Html_entities_set(rv, Xomw_html_ent.Type__char, 34, "quot", """);
|
||||||
|
|
||||||
|
// List of all named character entities defined in HTML 4.01
|
||||||
|
// https://www.w3.org/TR/html4/sgml/entities.html
|
||||||
|
// As well as ' which is only defined starting in XHTML1.
|
||||||
|
Html_entities_set(rv, tmp, "Aacute" , 193);
|
||||||
|
Html_entities_set(rv, tmp, "aacute" , 225);
|
||||||
|
Html_entities_set(rv, tmp, "Acirc" , 194);
|
||||||
|
Html_entities_set(rv, tmp, "acirc" , 226);
|
||||||
|
Html_entities_set(rv, tmp, "acute" , 180);
|
||||||
|
Html_entities_set(rv, tmp, "AElig" , 198);
|
||||||
|
Html_entities_set(rv, tmp, "aelig" , 230);
|
||||||
|
Html_entities_set(rv, tmp, "Agrave" , 192);
|
||||||
|
Html_entities_set(rv, tmp, "agrave" , 224);
|
||||||
|
Html_entities_set(rv, tmp, "alefsym" , 8501);
|
||||||
|
Html_entities_set(rv, tmp, "Alpha" , 913);
|
||||||
|
Html_entities_set(rv, tmp, "alpha" , 945);
|
||||||
|
Html_entities_set(rv, tmp, "amp" , 38); // XO: identical to Type__char entry; note that Type__char should be evaluated first
|
||||||
|
Html_entities_set(rv, tmp, "and" , 8743);
|
||||||
|
Html_entities_set(rv, tmp, "ang" , 8736);
|
||||||
|
Html_entities_set(rv, tmp, "apos" , 39); // New in XHTML & HTML 5; avoid in output for compatibility with IE.
|
||||||
|
Html_entities_set(rv, tmp, "Aring" , 197);
|
||||||
|
Html_entities_set(rv, tmp, "aring" , 229);
|
||||||
|
Html_entities_set(rv, tmp, "asymp" , 8776);
|
||||||
|
Html_entities_set(rv, tmp, "Atilde" , 195);
|
||||||
|
Html_entities_set(rv, tmp, "atilde" , 227);
|
||||||
|
Html_entities_set(rv, tmp, "Auml" , 196);
|
||||||
|
Html_entities_set(rv, tmp, "auml" , 228);
|
||||||
|
Html_entities_set(rv, tmp, "bdquo" , 8222);
|
||||||
|
Html_entities_set(rv, tmp, "Beta" , 914);
|
||||||
|
Html_entities_set(rv, tmp, "beta" , 946);
|
||||||
|
Html_entities_set(rv, tmp, "brvbar" , 166);
|
||||||
|
Html_entities_set(rv, tmp, "bull" , 8226);
|
||||||
|
Html_entities_set(rv, tmp, "cap" , 8745);
|
||||||
|
Html_entities_set(rv, tmp, "Ccedil" , 199);
|
||||||
|
Html_entities_set(rv, tmp, "ccedil" , 231);
|
||||||
|
Html_entities_set(rv, tmp, "cedil" , 184);
|
||||||
|
Html_entities_set(rv, tmp, "cent" , 162);
|
||||||
|
Html_entities_set(rv, tmp, "Chi" , 935);
|
||||||
|
Html_entities_set(rv, tmp, "chi" , 967);
|
||||||
|
Html_entities_set(rv, tmp, "circ" , 710);
|
||||||
|
Html_entities_set(rv, tmp, "clubs" , 9827);
|
||||||
|
Html_entities_set(rv, tmp, "cong" , 8773);
|
||||||
|
Html_entities_set(rv, tmp, "copy" , 169);
|
||||||
|
Html_entities_set(rv, tmp, "crarr" , 8629);
|
||||||
|
Html_entities_set(rv, tmp, "cup" , 8746);
|
||||||
|
Html_entities_set(rv, tmp, "curren" , 164);
|
||||||
|
Html_entities_set(rv, tmp, "dagger" , 8224);
|
||||||
|
Html_entities_set(rv, tmp, "Dagger" , 8225);
|
||||||
|
Html_entities_set(rv, tmp, "darr" , 8595);
|
||||||
|
Html_entities_set(rv, tmp, "dArr" , 8659);
|
||||||
|
Html_entities_set(rv, tmp, "deg" , 176);
|
||||||
|
Html_entities_set(rv, tmp, "Delta" , 916);
|
||||||
|
Html_entities_set(rv, tmp, "delta" , 948);
|
||||||
|
Html_entities_set(rv, tmp, "diams" , 9830);
|
||||||
|
Html_entities_set(rv, tmp, "divide" , 247);
|
||||||
|
Html_entities_set(rv, tmp, "Eacute" , 201);
|
||||||
|
Html_entities_set(rv, tmp, "eacute" , 233);
|
||||||
|
Html_entities_set(rv, tmp, "Ecirc" , 202);
|
||||||
|
Html_entities_set(rv, tmp, "ecirc" , 234);
|
||||||
|
Html_entities_set(rv, tmp, "Egrave" , 200);
|
||||||
|
Html_entities_set(rv, tmp, "egrave" , 232);
|
||||||
|
Html_entities_set(rv, tmp, "empty" , 8709);
|
||||||
|
Html_entities_set(rv, tmp, "emsp" , 8195);
|
||||||
|
Html_entities_set(rv, tmp, "ensp" , 8194);
|
||||||
|
Html_entities_set(rv, tmp, "Epsilon" , 917);
|
||||||
|
Html_entities_set(rv, tmp, "epsilon" , 949);
|
||||||
|
Html_entities_set(rv, tmp, "equiv" , 8801);
|
||||||
|
Html_entities_set(rv, tmp, "Eta" , 919);
|
||||||
|
Html_entities_set(rv, tmp, "eta" , 951);
|
||||||
|
Html_entities_set(rv, tmp, "ETH" , 208);
|
||||||
|
Html_entities_set(rv, tmp, "eth" , 240);
|
||||||
|
Html_entities_set(rv, tmp, "Euml" , 203);
|
||||||
|
Html_entities_set(rv, tmp, "euml" , 235);
|
||||||
|
Html_entities_set(rv, tmp, "euro" , 8364);
|
||||||
|
Html_entities_set(rv, tmp, "exist" , 8707);
|
||||||
|
Html_entities_set(rv, tmp, "fnof" , 402);
|
||||||
|
Html_entities_set(rv, tmp, "forall" , 8704);
|
||||||
|
Html_entities_set(rv, tmp, "frac12" , 189);
|
||||||
|
Html_entities_set(rv, tmp, "frac14" , 188);
|
||||||
|
Html_entities_set(rv, tmp, "frac34" , 190);
|
||||||
|
Html_entities_set(rv, tmp, "frasl" , 8260);
|
||||||
|
Html_entities_set(rv, tmp, "Gamma" , 915);
|
||||||
|
Html_entities_set(rv, tmp, "gamma" , 947);
|
||||||
|
Html_entities_set(rv, tmp, "ge" , 8805);
|
||||||
|
Html_entities_set(rv, tmp, "gt" , 62);
|
||||||
|
Html_entities_set(rv, tmp, "harr" , 8596);
|
||||||
|
Html_entities_set(rv, tmp, "hArr" , 8660);
|
||||||
|
Html_entities_set(rv, tmp, "hearts" , 9829);
|
||||||
|
Html_entities_set(rv, tmp, "hellip" , 8230);
|
||||||
|
Html_entities_set(rv, tmp, "Iacute" , 205);
|
||||||
|
Html_entities_set(rv, tmp, "iacute" , 237);
|
||||||
|
Html_entities_set(rv, tmp, "Icirc" , 206);
|
||||||
|
Html_entities_set(rv, tmp, "icirc" , 238);
|
||||||
|
Html_entities_set(rv, tmp, "iexcl" , 161);
|
||||||
|
Html_entities_set(rv, tmp, "Igrave" , 204);
|
||||||
|
Html_entities_set(rv, tmp, "igrave" , 236);
|
||||||
|
Html_entities_set(rv, tmp, "image" , 8465);
|
||||||
|
Html_entities_set(rv, tmp, "infin" , 8734);
|
||||||
|
Html_entities_set(rv, tmp, "int" , 8747);
|
||||||
|
Html_entities_set(rv, tmp, "Iota" , 921);
|
||||||
|
Html_entities_set(rv, tmp, "iota" , 953);
|
||||||
|
Html_entities_set(rv, tmp, "iquest" , 191);
|
||||||
|
Html_entities_set(rv, tmp, "isin" , 8712);
|
||||||
|
Html_entities_set(rv, tmp, "Iuml" , 207);
|
||||||
|
Html_entities_set(rv, tmp, "iuml" , 239);
|
||||||
|
Html_entities_set(rv, tmp, "Kappa" , 922);
|
||||||
|
Html_entities_set(rv, tmp, "kappa" , 954);
|
||||||
|
Html_entities_set(rv, tmp, "Lambda" , 923);
|
||||||
|
Html_entities_set(rv, tmp, "lambda" , 955);
|
||||||
|
Html_entities_set(rv, tmp, "lang" , 9001);
|
||||||
|
Html_entities_set(rv, tmp, "laquo" , 171);
|
||||||
|
Html_entities_set(rv, tmp, "larr" , 8592);
|
||||||
|
Html_entities_set(rv, tmp, "lArr" , 8656);
|
||||||
|
Html_entities_set(rv, tmp, "lceil" , 8968);
|
||||||
|
Html_entities_set(rv, tmp, "ldquo" , 8220);
|
||||||
|
Html_entities_set(rv, tmp, "le" , 8804);
|
||||||
|
Html_entities_set(rv, tmp, "lfloor" , 8970);
|
||||||
|
Html_entities_set(rv, tmp, "lowast" , 8727);
|
||||||
|
Html_entities_set(rv, tmp, "loz" , 9674);
|
||||||
|
Html_entities_set(rv, tmp, "lrm" , 8206);
|
||||||
|
Html_entities_set(rv, tmp, "lsaquo" , 8249);
|
||||||
|
Html_entities_set(rv, tmp, "lsquo" , 8216);
|
||||||
|
Html_entities_set(rv, tmp, "lt" , 60);
|
||||||
|
Html_entities_set(rv, tmp, "macr" , 175);
|
||||||
|
Html_entities_set(rv, tmp, "mdash" , 8212);
|
||||||
|
Html_entities_set(rv, tmp, "micro" , 181);
|
||||||
|
Html_entities_set(rv, tmp, "middot" , 183);
|
||||||
|
Html_entities_set(rv, tmp, "minus" , 8722);
|
||||||
|
Html_entities_set(rv, tmp, "Mu" , 924);
|
||||||
|
Html_entities_set(rv, tmp, "mu" , 956);
|
||||||
|
Html_entities_set(rv, tmp, "nabla" , 8711);
|
||||||
|
Html_entities_set(rv, tmp, "nbsp" , 160);
|
||||||
|
Html_entities_set(rv, tmp, "ndash" , 8211);
|
||||||
|
Html_entities_set(rv, tmp, "ne" , 8800);
|
||||||
|
Html_entities_set(rv, tmp, "ni" , 8715);
|
||||||
|
Html_entities_set(rv, tmp, "not" , 172);
|
||||||
|
Html_entities_set(rv, tmp, "notin" , 8713);
|
||||||
|
Html_entities_set(rv, tmp, "nsub" , 8836);
|
||||||
|
Html_entities_set(rv, tmp, "Ntilde" , 209);
|
||||||
|
Html_entities_set(rv, tmp, "ntilde" , 241);
|
||||||
|
Html_entities_set(rv, tmp, "Nu" , 925);
|
||||||
|
Html_entities_set(rv, tmp, "nu" , 957);
|
||||||
|
Html_entities_set(rv, tmp, "Oacute" , 211);
|
||||||
|
Html_entities_set(rv, tmp, "oacute" , 243);
|
||||||
|
Html_entities_set(rv, tmp, "Ocirc" , 212);
|
||||||
|
Html_entities_set(rv, tmp, "ocirc" , 244);
|
||||||
|
Html_entities_set(rv, tmp, "OElig" , 338);
|
||||||
|
Html_entities_set(rv, tmp, "oelig" , 339);
|
||||||
|
Html_entities_set(rv, tmp, "Ograve" , 210);
|
||||||
|
Html_entities_set(rv, tmp, "ograve" , 242);
|
||||||
|
Html_entities_set(rv, tmp, "oline" , 8254);
|
||||||
|
Html_entities_set(rv, tmp, "Omega" , 937);
|
||||||
|
Html_entities_set(rv, tmp, "omega" , 969);
|
||||||
|
Html_entities_set(rv, tmp, "Omicron" , 927);
|
||||||
|
Html_entities_set(rv, tmp, "omicron" , 959);
|
||||||
|
Html_entities_set(rv, tmp, "oplus" , 8853);
|
||||||
|
Html_entities_set(rv, tmp, "or" , 8744);
|
||||||
|
Html_entities_set(rv, tmp, "ordf" , 170);
|
||||||
|
Html_entities_set(rv, tmp, "ordm" , 186);
|
||||||
|
Html_entities_set(rv, tmp, "Oslash" , 216);
|
||||||
|
Html_entities_set(rv, tmp, "oslash" , 248);
|
||||||
|
Html_entities_set(rv, tmp, "Otilde" , 213);
|
||||||
|
Html_entities_set(rv, tmp, "otilde" , 245);
|
||||||
|
Html_entities_set(rv, tmp, "otimes" , 8855);
|
||||||
|
Html_entities_set(rv, tmp, "Ouml" , 214);
|
||||||
|
Html_entities_set(rv, tmp, "ouml" , 246);
|
||||||
|
Html_entities_set(rv, tmp, "para" , 182);
|
||||||
|
Html_entities_set(rv, tmp, "part" , 8706);
|
||||||
|
Html_entities_set(rv, tmp, "permil" , 8240);
|
||||||
|
Html_entities_set(rv, tmp, "perp" , 8869);
|
||||||
|
Html_entities_set(rv, tmp, "Phi" , 934);
|
||||||
|
Html_entities_set(rv, tmp, "phi" , 966);
|
||||||
|
Html_entities_set(rv, tmp, "Pi" , 928);
|
||||||
|
Html_entities_set(rv, tmp, "pi" , 960);
|
||||||
|
Html_entities_set(rv, tmp, "piv" , 982);
|
||||||
|
Html_entities_set(rv, tmp, "plusmn" , 177);
|
||||||
|
Html_entities_set(rv, tmp, "pound" , 163);
|
||||||
|
Html_entities_set(rv, tmp, "prime" , 8242);
|
||||||
|
Html_entities_set(rv, tmp, "Prime" , 8243);
|
||||||
|
Html_entities_set(rv, tmp, "prod" , 8719);
|
||||||
|
Html_entities_set(rv, tmp, "prop" , 8733);
|
||||||
|
Html_entities_set(rv, tmp, "Psi" , 936);
|
||||||
|
Html_entities_set(rv, tmp, "psi" , 968);
|
||||||
|
Html_entities_set(rv, tmp, "quot" , 34);
|
||||||
|
Html_entities_set(rv, tmp, "radic" , 8730);
|
||||||
|
Html_entities_set(rv, tmp, "rang" , 9002);
|
||||||
|
Html_entities_set(rv, tmp, "raquo" , 187);
|
||||||
|
Html_entities_set(rv, tmp, "rarr" , 8594);
|
||||||
|
Html_entities_set(rv, tmp, "rArr" , 8658);
|
||||||
|
Html_entities_set(rv, tmp, "rceil" , 8969);
|
||||||
|
Html_entities_set(rv, tmp, "rdquo" , 8221);
|
||||||
|
Html_entities_set(rv, tmp, "real" , 8476);
|
||||||
|
Html_entities_set(rv, tmp, "reg" , 174);
|
||||||
|
Html_entities_set(rv, tmp, "rfloor" , 8971);
|
||||||
|
Html_entities_set(rv, tmp, "Rho" , 929);
|
||||||
|
Html_entities_set(rv, tmp, "rho" , 961);
|
||||||
|
Html_entities_set(rv, tmp, "rlm" , 8207);
|
||||||
|
Html_entities_set(rv, tmp, "rsaquo" , 8250);
|
||||||
|
Html_entities_set(rv, tmp, "rsquo" , 8217);
|
||||||
|
Html_entities_set(rv, tmp, "sbquo" , 8218);
|
||||||
|
Html_entities_set(rv, tmp, "Scaron" , 352);
|
||||||
|
Html_entities_set(rv, tmp, "scaron" , 353);
|
||||||
|
Html_entities_set(rv, tmp, "sdot" , 8901);
|
||||||
|
Html_entities_set(rv, tmp, "sect" , 167);
|
||||||
|
Html_entities_set(rv, tmp, "shy" , 173);
|
||||||
|
Html_entities_set(rv, tmp, "Sigma" , 931);
|
||||||
|
Html_entities_set(rv, tmp, "sigma" , 963);
|
||||||
|
Html_entities_set(rv, tmp, "sigmaf" , 962);
|
||||||
|
Html_entities_set(rv, tmp, "sim" , 8764);
|
||||||
|
Html_entities_set(rv, tmp, "spades" , 9824);
|
||||||
|
Html_entities_set(rv, tmp, "sub" , 8834);
|
||||||
|
Html_entities_set(rv, tmp, "sube" , 8838);
|
||||||
|
Html_entities_set(rv, tmp, "sum" , 8721);
|
||||||
|
Html_entities_set(rv, tmp, "sup" , 8835);
|
||||||
|
Html_entities_set(rv, tmp, "sup1" , 185);
|
||||||
|
Html_entities_set(rv, tmp, "sup2" , 178);
|
||||||
|
Html_entities_set(rv, tmp, "sup3" , 179);
|
||||||
|
Html_entities_set(rv, tmp, "supe" , 8839);
|
||||||
|
Html_entities_set(rv, tmp, "szlig" , 223);
|
||||||
|
Html_entities_set(rv, tmp, "Tau" , 932);
|
||||||
|
Html_entities_set(rv, tmp, "tau" , 964);
|
||||||
|
Html_entities_set(rv, tmp, "there4" , 8756);
|
||||||
|
Html_entities_set(rv, tmp, "Theta" , 920);
|
||||||
|
Html_entities_set(rv, tmp, "theta" , 952);
|
||||||
|
Html_entities_set(rv, tmp, "thetasym" , 977);
|
||||||
|
Html_entities_set(rv, tmp, "thinsp" , 8201);
|
||||||
|
Html_entities_set(rv, tmp, "THORN" , 222);
|
||||||
|
Html_entities_set(rv, tmp, "thorn" , 254);
|
||||||
|
Html_entities_set(rv, tmp, "tilde" , 732);
|
||||||
|
Html_entities_set(rv, tmp, "times" , 215);
|
||||||
|
Html_entities_set(rv, tmp, "trade" , 8482);
|
||||||
|
Html_entities_set(rv, tmp, "Uacute" , 218);
|
||||||
|
Html_entities_set(rv, tmp, "uacute" , 250);
|
||||||
|
Html_entities_set(rv, tmp, "uarr" , 8593);
|
||||||
|
Html_entities_set(rv, tmp, "uArr" , 8657);
|
||||||
|
Html_entities_set(rv, tmp, "Ucirc" , 219);
|
||||||
|
Html_entities_set(rv, tmp, "ucirc" , 251);
|
||||||
|
Html_entities_set(rv, tmp, "Ugrave" , 217);
|
||||||
|
Html_entities_set(rv, tmp, "ugrave" , 249);
|
||||||
|
Html_entities_set(rv, tmp, "uml" , 168);
|
||||||
|
Html_entities_set(rv, tmp, "upsih" , 978);
|
||||||
|
Html_entities_set(rv, tmp, "Upsilon" , 933);
|
||||||
|
Html_entities_set(rv, tmp, "upsilon" , 965);
|
||||||
|
Html_entities_set(rv, tmp, "Uuml" , 220);
|
||||||
|
Html_entities_set(rv, tmp, "uuml" , 252);
|
||||||
|
Html_entities_set(rv, tmp, "weierp" , 8472);
|
||||||
|
Html_entities_set(rv, tmp, "Xi" , 926);
|
||||||
|
Html_entities_set(rv, tmp, "xi" , 958);
|
||||||
|
Html_entities_set(rv, tmp, "Yacute" , 221);
|
||||||
|
Html_entities_set(rv, tmp, "yacute" , 253);
|
||||||
|
Html_entities_set(rv, tmp, "yen" , 165);
|
||||||
|
Html_entities_set(rv, tmp, "Yuml" , 376);
|
||||||
|
Html_entities_set(rv, tmp, "yuml" , 255);
|
||||||
|
Html_entities_set(rv, tmp, "Zeta" , 918);
|
||||||
|
Html_entities_set(rv, tmp, "zeta" , 950);
|
||||||
|
Html_entities_set(rv, tmp, "zwj" , 8205);
|
||||||
|
Html_entities_set(rv, tmp, "zwnj" , 8204);
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
private static void Html_entities_set(Hash_adp_bry rv, Bry_bfr tmp, String name_str, int code) {
|
||||||
|
byte[] html_bry = tmp.Add_str_a7("&#").Add_int_variable(code).Add_byte_semic().To_bry_and_clear();
|
||||||
|
Html_entities_set(rv, Xomw_html_ent.Type__entity, code, name_str, html_bry);
|
||||||
|
}
|
||||||
|
private static void Html_entities_set(Hash_adp_bry rv, byte type, int code, String name_str, String html_str) {Html_entities_set(rv, type, code, name_str, Bry_.new_u8(html_str));}
|
||||||
|
private static void Html_entities_set(Hash_adp_bry rv, byte type, int code, String name_str, byte[] html_bry) {
|
||||||
|
byte[] name_bry = Bry_.new_u8(name_str);
|
||||||
|
rv.Add_if_dupe_use_1st(name_bry, new Xomw_html_ent(type, code, name_bry, html_bry)); // Add_dupe needed b/c "lt" and co. are added early; ignore subsequent call
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_html_ent {
|
||||||
|
public Xomw_html_ent(byte type, int code, byte[] name, byte[] html) {
|
||||||
|
this.type = type;
|
||||||
|
this.code = code;
|
||||||
|
this.name = name;
|
||||||
|
this.html = html;
|
||||||
|
}
|
||||||
|
public final byte type;
|
||||||
|
public final int code;
|
||||||
|
public final byte[] name;
|
||||||
|
public final byte[] html;
|
||||||
|
public static final byte Type__null = 0, Type__alias = 1, Type__char = 2, Type__entity = 3;
|
||||||
|
}
|
||||||
|
class Bool_ary_bldr {
|
||||||
|
private final boolean[] ary;
|
||||||
|
public Bool_ary_bldr(int len) {
|
||||||
|
this.ary = new boolean[len];
|
||||||
|
}
|
||||||
|
public Bool_ary_bldr Set_many(int... v) {
|
||||||
|
int len = v.length;
|
||||||
|
for (int i = 0; i < len; i++)
|
||||||
|
ary[v[i]] = true;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
public Bool_ary_bldr Set_rng(int bgn, int end) {
|
||||||
|
for (int i = bgn; i <= end; i++)
|
||||||
|
ary[i] = true;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
public boolean[] To_ary() {
|
||||||
|
return ary;
|
||||||
|
}
|
||||||
|
public static Bool_ary_bldr New_u8() {return new Bool_ary_bldr(256);}
|
||||||
|
}
|
@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||||
|
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*;
|
||||||
|
public class Xomw_sanitizer__tst {
|
||||||
|
private final Xomw_sanitizer__fxt fxt = new Xomw_sanitizer__fxt();
|
||||||
|
@Test public void Text() {fxt.Test__normalize_char_references("abc" , "abc");}
|
||||||
|
@Test public void Dec() {fxt.Test__normalize_char_references("" , "&#08;");}
|
||||||
|
@Test public void Dec__invalid() {fxt.Test__normalize_char_references("	" , "	");}
|
||||||
|
@Test public void Hex() {fxt.Test__normalize_char_references("ÿ" , "ÿ");}
|
||||||
|
@Test public void Entity() {fxt.Test__normalize_char_references("α" , "α");}
|
||||||
|
@Test public void Entity__lt() {fxt.Test__normalize_char_references("<" , "<");}
|
||||||
|
@Test public void Invalid() {fxt.Test__normalize_char_references("&(invalid);" , "&(invalid);");}
|
||||||
|
@Test public void Many() {
|
||||||
|
fxt.Test__normalize_char_references
|
||||||
|
( "a 	 b α c ÿ d &(invalid); e"
|
||||||
|
, "a 	 b α c ÿ d &(invalid); e"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_sanitizer__fxt {
|
||||||
|
private final Xomw_sanitizer sanitizer = new Xomw_sanitizer();
|
||||||
|
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||||
|
public void Test__normalize_char_references(String src_str, String expd) {
|
||||||
|
byte[] src_bry = Bry_.new_u8(src_str);
|
||||||
|
sanitizer.Normalize_char_references(tmp, Bool_.Y, src_bry, 0, src_bry.length);
|
||||||
|
Gftest.Eq__str(expd, tmp.To_str_and_clear());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,26 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
public class Xomw_html_elem {
|
||||||
|
public Xomw_html_elem(byte[] name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
public byte[] Name() {return name;} private final byte[] name; // EX: "a", "div", "img"
|
||||||
|
|
||||||
|
// private static final Hash_adp_bry void_elements = Hash_adp_bry.cs().Add_many_str("area", "super", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr");
|
||||||
|
}
|
@ -0,0 +1,267 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
import gplx.core.btries.*;
|
||||||
|
import gplx.langs.phps.utls.*;
|
||||||
|
public class Xomw_html_utl {
|
||||||
|
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||||
|
private final Btrie_rv trv = new Btrie_rv();
|
||||||
|
public void Raw_element(Bry_bfr bfr, byte[] element, Xomwh_atr_mgr attribs, byte[] contents) {
|
||||||
|
Bry_.Lcase__all(element); // XO:lcase element
|
||||||
|
|
||||||
|
Open_element__lcased(bfr, element, attribs);
|
||||||
|
if (void_elements.Has(element)) {
|
||||||
|
bfr.Del_by_1().Add(Bry__elem__lhs__inl);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
bfr.Add(contents);
|
||||||
|
Close_element__lcased(bfr, element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private void Open_element__lcased(Bry_bfr bfr, byte[] element, Xomwh_atr_mgr attribs) {
|
||||||
|
// This is not required in HTML5, but let's do it anyway, for
|
||||||
|
// consistency and better compression.
|
||||||
|
// $element = strtolower($element); // XO:handled by callers
|
||||||
|
|
||||||
|
// Remove invalid input types
|
||||||
|
if (Bry_.Eq(element, Tag__input)) {
|
||||||
|
// PORTED.HEADER:valid_input_types
|
||||||
|
byte[] type_atr_val = attribs.Get_val_or_null(Atr__type);
|
||||||
|
if (type_atr_val != null && !valid_input_types.Has(type_atr_val)) {
|
||||||
|
attribs.Del(Atr__type);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// According to standard the default type for <button> elements is "submit".
|
||||||
|
// Depending on compatibility mode IE might use "button", instead.
|
||||||
|
// We enforce the standard "submit".
|
||||||
|
if (Bry_.Eq(element, Tag__button) && attribs.Get_val_or_null(Atr__type) == null) {
|
||||||
|
attribs.Set(Atr__type, Val__type__submit);
|
||||||
|
}
|
||||||
|
|
||||||
|
bfr.Add_byte(Byte_ascii.Angle_bgn).Add(element);
|
||||||
|
Expand_attributes(bfr, attribs); // TODO.XO:self::dropDefaults($element, $attribs)
|
||||||
|
bfr.Add_byte(Byte_ascii.Angle_end);
|
||||||
|
}
|
||||||
|
public void Expand_attributes(Bry_bfr bfr, Xomwh_atr_mgr atrs) {
|
||||||
|
int len = atrs.Len();
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
Xomwh_atr_itm atr = (Xomwh_atr_itm)atrs.Get_at(i);
|
||||||
|
byte[] key = atr.Key_bry();
|
||||||
|
byte[] val = atr.Val();
|
||||||
|
|
||||||
|
// Support intuitive [ 'checked' => true/false ] form
|
||||||
|
if (val == null) { // TESTME
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For boolean attributes, support [ 'foo' ] instead of
|
||||||
|
// requiring [ 'foo' => 'meaningless' ].
|
||||||
|
boolean bool_attrib = bool_attribs.Has(val);
|
||||||
|
if (atr.Key_int() != -1 && bool_attrib) {
|
||||||
|
key = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not technically required in HTML5 but we'd like consistency
|
||||||
|
// and better compression anyway.
|
||||||
|
key = Bry_.Xcase__build__all(tmp, Bool_.N, key);
|
||||||
|
|
||||||
|
// PORTED.HEADER:$spaceSeparatedListAttributes
|
||||||
|
|
||||||
|
// Specific features for attributes that allow a list of space-separated values
|
||||||
|
if (space_separated_list_attributes.Has(key)) {
|
||||||
|
// Apply some normalization and remove duplicates
|
||||||
|
|
||||||
|
// Convert into correct array. Array can contain space-separated
|
||||||
|
// values. Implode/explode to get those into the main array as well.
|
||||||
|
// if (is_array($value)) {
|
||||||
|
// If input wasn't an array, we can skip this step
|
||||||
|
// $newValue = [];
|
||||||
|
// foreach ($value as $k => $v) {
|
||||||
|
// if (is_string($v)) {
|
||||||
|
// String values should be normal `array('foo')`
|
||||||
|
// Just append them
|
||||||
|
// if (!isset($value[$v])) {
|
||||||
|
// As a special case don't set 'foo' if a
|
||||||
|
// separate 'foo' => true/false exists in the array
|
||||||
|
// keys should be authoritative
|
||||||
|
// $newValue[] = $v;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// elseif ($v) {
|
||||||
|
// If the value is truthy but not a String this is likely
|
||||||
|
// an [ 'foo' => true ], falsy values don't add strings
|
||||||
|
// $newValue[] = $k;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// $value = implode(' ', $newValue);
|
||||||
|
// }
|
||||||
|
// $value = explode(' ', $value);
|
||||||
|
|
||||||
|
// Normalize spacing by fixing up cases where people used
|
||||||
|
// more than 1 space and/or a trailing/leading space
|
||||||
|
// $value = array_diff($value, [ '', ' ' ]);
|
||||||
|
|
||||||
|
// Remove duplicates and create the String
|
||||||
|
// $value = implode(' ', array_unique($value));
|
||||||
|
}
|
||||||
|
// DELETE
|
||||||
|
// elseif (is_array($value)) {
|
||||||
|
// throw new MWException("HTML attribute $key can not contain a list of values");
|
||||||
|
// }
|
||||||
|
|
||||||
|
if (bool_attrib) {
|
||||||
|
bfr.Add_byte_space().Add(key).Add(Bry__atr__val__empty); // $ret .= " $key=\"\"";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// PORTED.HEADER:atr_val_encodings
|
||||||
|
val = Php_str_.Strtr(val, atr_val_encodings, tmp, trv);
|
||||||
|
bfr.Add_byte_space().Add(key).Add(Bry__atr__val__quote).Add(val).Add_byte_quote();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void Close_element__lcased(Bry_bfr bfr, byte[] element) {
|
||||||
|
bfr.Add(Bry__elem__rhs__bgn).Add(element).Add_byte(Byte_ascii.Angle_end); // EX: "</", element, ">";
|
||||||
|
}
|
||||||
|
private static final byte[]
|
||||||
|
Bry__elem__lhs__inl = Bry_.new_a7("/>")
|
||||||
|
, Bry__elem__rhs__bgn = Bry_.new_a7("</")
|
||||||
|
, Bry__atr__val__quote = Bry_.new_a7("=\"")
|
||||||
|
, Bry__atr__val__empty = Bry_.new_a7("=\"\"")
|
||||||
|
|
||||||
|
, Tag__input = Bry_.new_a7("input")
|
||||||
|
, Tag__button = Bry_.new_a7("button")
|
||||||
|
, Atr__type = Bry_.new_a7("type")
|
||||||
|
, Val__type__submit = Bry_.new_a7("submit")
|
||||||
|
;
|
||||||
|
|
||||||
|
// List of void elements from HTML5, section 8.1.2 as of 2016-09-19
|
||||||
|
private static final Hash_adp_bry void_elements = Hash_adp_bry.cs().Add_many_str
|
||||||
|
(
|
||||||
|
"area",
|
||||||
|
"super",
|
||||||
|
"br",
|
||||||
|
"col",
|
||||||
|
"embed",
|
||||||
|
"hr",
|
||||||
|
"img",
|
||||||
|
"input",
|
||||||
|
"keygen",
|
||||||
|
"link",
|
||||||
|
"meta",
|
||||||
|
"param",
|
||||||
|
"source",
|
||||||
|
"track",
|
||||||
|
"wbr"
|
||||||
|
);
|
||||||
|
|
||||||
|
// Boolean attributes, which may have the value omitted entirely. Manually
|
||||||
|
// collected from the HTML5 spec as of 2011-08-12.
|
||||||
|
private static final Hash_adp_bry bool_attribs = Hash_adp_bry.ci_a7().Add_many_str(
|
||||||
|
"async",
|
||||||
|
"autofocus",
|
||||||
|
"autoplay",
|
||||||
|
"checked",
|
||||||
|
"controls",
|
||||||
|
"default",
|
||||||
|
"defer",
|
||||||
|
"disabled",
|
||||||
|
"formnovalidate",
|
||||||
|
"hidden",
|
||||||
|
"ismap",
|
||||||
|
// "itemscope", //XO:duplicate; added below
|
||||||
|
"loop",
|
||||||
|
"multiple",
|
||||||
|
"muted",
|
||||||
|
"novalidate",
|
||||||
|
"open",
|
||||||
|
"pubdate",
|
||||||
|
"final ",
|
||||||
|
"required",
|
||||||
|
"reversed",
|
||||||
|
"scoped",
|
||||||
|
"seamless",
|
||||||
|
"selected",
|
||||||
|
"truespeed",
|
||||||
|
"typemustmatch",
|
||||||
|
// HTML5 Microdata
|
||||||
|
"itemscope"
|
||||||
|
);
|
||||||
|
|
||||||
|
private static final Btrie_slim_mgr atr_val_encodings = Btrie_slim_mgr.cs()
|
||||||
|
// Apparently we need to entity-encode \n, \r, \t, although the
|
||||||
|
// spec doesn't mention that. Since we're doing strtr() anyway,
|
||||||
|
// we may as well not call htmlspecialchars().
|
||||||
|
// @todo FIXME: Verify that we actually need to
|
||||||
|
// escape \n\r\t here, and explain why, exactly.
|
||||||
|
// We could call Sanitizer::encodeAttribute() for this, but we
|
||||||
|
// don't because we're stubborn and like our marginal savings on
|
||||||
|
// byte size from not having to encode unnecessary quotes.
|
||||||
|
// The only difference between this transform and the one by
|
||||||
|
// Sanitizer::encodeAttribute() is ' is not encoded.
|
||||||
|
.Add_str_str("&" , "&")
|
||||||
|
.Add_str_str("\"" , """)
|
||||||
|
.Add_str_str(">" , ">")
|
||||||
|
// '<' allegedly allowed per spec
|
||||||
|
// but breaks some tools if not escaped.
|
||||||
|
.Add_str_str("<" , "<")
|
||||||
|
.Add_str_str("\n" , " ")
|
||||||
|
.Add_str_str("\r" , " ")
|
||||||
|
.Add_str_str("\t" , "	");
|
||||||
|
|
||||||
|
// https://www.w3.org/TR/html401/index/attributes.html ("space-separated")
|
||||||
|
// https://www.w3.org/TR/html5/index.html#attributes-1 ("space-separated")
|
||||||
|
private static final Hash_adp_bry space_separated_list_attributes = Hash_adp_bry.ci_a7().Add_many_str(
|
||||||
|
"class", // html4, html5
|
||||||
|
"accesskey", // as of html5, multiple space-separated values allowed
|
||||||
|
// html4-spec doesn't document rel= as space-separated
|
||||||
|
// but has been used like that and is now documented as such
|
||||||
|
// in the html5-spec.
|
||||||
|
"rel"
|
||||||
|
);
|
||||||
|
|
||||||
|
private static final Hash_adp_bry valid_input_types = Hash_adp_bry.ci_a7().Add_many_str(
|
||||||
|
// Remove invalid input types
|
||||||
|
"hidden",
|
||||||
|
"text",
|
||||||
|
"password",
|
||||||
|
"checkbox",
|
||||||
|
"radio",
|
||||||
|
"file",
|
||||||
|
"submit",
|
||||||
|
"image",
|
||||||
|
"reset",
|
||||||
|
"button",
|
||||||
|
|
||||||
|
// HTML input types
|
||||||
|
"datetime",
|
||||||
|
"datetime-local",
|
||||||
|
"date",
|
||||||
|
"month",
|
||||||
|
"time",
|
||||||
|
"week",
|
||||||
|
"number",
|
||||||
|
"range",
|
||||||
|
"email",
|
||||||
|
"url",
|
||||||
|
"search",
|
||||||
|
"tel",
|
||||||
|
"color"
|
||||||
|
);
|
||||||
|
}
|
@ -0,0 +1,39 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
import org.junit.*; import gplx.core.tests.*;
|
||||||
|
public class Xomw_html_utl__expand_attributes__tst {
|
||||||
|
private final Xomw_html_utl__expand_attributes__fxt fxt = new Xomw_html_utl__expand_attributes__fxt();
|
||||||
|
@Test public void Basic() {fxt.Test__expand_attributes(" a=\"b\"", "a", "b");}
|
||||||
|
}
|
||||||
|
class Xomw_html_utl__expand_attributes__fxt {
|
||||||
|
private final Xomw_html_utl utl = new Xomw_html_utl();
|
||||||
|
private final Bry_bfr bfr = Bry_bfr_.New();
|
||||||
|
public void Test__expand_attributes(String expd, String... kvs) {
|
||||||
|
Xomwh_atr_mgr atrs = new Xomwh_atr_mgr();
|
||||||
|
int kvs_len = kvs.length;
|
||||||
|
for (int i = 0; i < kvs_len; i += 2) {
|
||||||
|
byte[] key = Bry_.new_a7(kvs[i]);
|
||||||
|
byte[] val = Bry_.new_a7(kvs[i + 1]);
|
||||||
|
Xomwh_atr_itm itm = new Xomwh_atr_itm(-1, key, val);
|
||||||
|
atrs.Add(itm);
|
||||||
|
}
|
||||||
|
utl.Expand_attributes(bfr, atrs);
|
||||||
|
Gftest.Eq__str(expd, bfr.To_str_and_clear());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,53 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
public class Xomwh_atr_mgr {
|
||||||
|
private final Ordered_hash hash = Ordered_hash_.New();
|
||||||
|
public int Len() {return hash.Len();}
|
||||||
|
public Xomwh_atr_itm Get_at(int i) {return (Xomwh_atr_itm)hash.Get_at(i);}
|
||||||
|
public Xomwh_atr_mgr Clear() {hash.Clear(); return this;}
|
||||||
|
public void Add(byte[] key, byte[] val) {hash.Add(key, new Xomwh_atr_itm(-1, key, val));}
|
||||||
|
public void Add(Xomwh_atr_itm itm) {hash.Add(itm.Key_bry(), itm);}
|
||||||
|
public void Del(byte[] key) {hash.Del(key);}
|
||||||
|
public void Set(byte[] key, byte[] val) {
|
||||||
|
Xomwh_atr_itm atr = Get_by_or_make(key);
|
||||||
|
atr.Val_(val);
|
||||||
|
}
|
||||||
|
public void Add_or_set(Xomwh_atr_itm src) {
|
||||||
|
Xomwh_atr_itm trg = (Xomwh_atr_itm)hash.Get_by(src.Key_bry());
|
||||||
|
if (trg == null)
|
||||||
|
this.Add(src);
|
||||||
|
else
|
||||||
|
trg.Val_(src.Val());
|
||||||
|
}
|
||||||
|
public Xomwh_atr_itm Get_by_or_null(byte[] k) {
|
||||||
|
return (Xomwh_atr_itm)hash.Get_by(k);
|
||||||
|
}
|
||||||
|
public Xomwh_atr_itm Get_by_or_make(byte[] k) {
|
||||||
|
Xomwh_atr_itm rv = (Xomwh_atr_itm)hash.Get_by(k);
|
||||||
|
if (rv == null) {
|
||||||
|
rv = new Xomwh_atr_itm(-1, k, null);
|
||||||
|
Add(rv);
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
public byte[] Get_val_or_null(byte[] k) {
|
||||||
|
Xomwh_atr_itm atr = (Xomwh_atr_itm)hash.Get_by(k);
|
||||||
|
return atr == null ? null : atr.Val();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,137 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.linkers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
import gplx.langs.htmls.*;
|
||||||
|
import gplx.xowa.mws.htmls.*;
|
||||||
|
public class Xomw_link_renderer {
|
||||||
|
private boolean expand_urls = false;
|
||||||
|
private final Xomw_html_utl html_utl = new Xomw_html_utl();
|
||||||
|
private final Xomwh_atr_mgr attribs = new Xomwh_atr_mgr();
|
||||||
|
|
||||||
|
// If you have already looked up the proper CSS classes using LinkRenderer::getLinkClasses()
|
||||||
|
// or some other method, use this to avoid looking it up again.
|
||||||
|
public void Make_preloaded_link(Bry_bfr bfr, Xoa_ttl target, byte[] text, byte[] classes, Xomwh_atr_mgr extra_atrs, byte[] query) {
|
||||||
|
// IGNORE: $this->runBeginHook --> 'HtmlPageLinkRendererBegin', 'LinkBegin'
|
||||||
|
|
||||||
|
// $target = $this->normalizeTarget( $target ); // normalizeSpecialPage
|
||||||
|
byte[] url = Get_link_url(target, query);
|
||||||
|
attribs.Clear();
|
||||||
|
attribs.Add(Gfh_atr_.Bry__href, url); // NOTE: add url 1st; MW does attribs["url", url] + attribs + extra_attribs
|
||||||
|
if (classes.length > 0) // XO:do not bother adding if empty
|
||||||
|
attribs.Add(Gfh_atr_.Bry__class, classes);
|
||||||
|
byte[] prefixed_text = target.Get_prefixed_text();
|
||||||
|
if (prefixed_text != Bry_.Empty) {
|
||||||
|
attribs.Add(Gfh_atr_.Bry__title, prefixed_text);
|
||||||
|
}
|
||||||
|
|
||||||
|
int extra_atrs_len = extra_atrs.Len();
|
||||||
|
for (int i = 0; i < extra_atrs_len; i++) {
|
||||||
|
attribs.Add_or_set(extra_atrs.Get_at(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (text == null) {
|
||||||
|
text = this.Get_link_text(target);
|
||||||
|
}
|
||||||
|
|
||||||
|
Build_a_element(bfr, target,text, attribs, true);
|
||||||
|
}
|
||||||
|
private void Build_a_element(Bry_bfr bfr, Xoa_ttl target, byte[] text, Xomwh_atr_mgr attribs, boolean is_known) {
|
||||||
|
// IGNORE: if ( !Hooks::run( 'HtmlPageLinkRendererEnd',
|
||||||
|
|
||||||
|
byte[] html = text;
|
||||||
|
// $html = HtmlArmor::getHtml( $text );
|
||||||
|
|
||||||
|
// IGNORE: if ( Hooks::isRegistered( 'LinkEnd' ) ) {
|
||||||
|
|
||||||
|
html_utl.Raw_element(bfr, Gfh_tag_.Bry__a, attribs, html);
|
||||||
|
}
|
||||||
|
private byte[] Get_link_url(Xoa_ttl target, byte[] query) {
|
||||||
|
// TODO: Use a LinkTargetResolver service instead of Title
|
||||||
|
// if ( $this->forceArticlePath ) {
|
||||||
|
// $realQuery = $query;
|
||||||
|
// $query = [];
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
// $realQuery = [];
|
||||||
|
// }
|
||||||
|
byte[] url = target.Get_link_url(query, false, expand_urls);
|
||||||
|
|
||||||
|
// if ( $this->forceArticlePath && $realQuery ) {
|
||||||
|
// $url = wfAppendQuery( $url, $realQuery );
|
||||||
|
// }
|
||||||
|
return url;
|
||||||
|
}
|
||||||
|
private byte[] Get_link_text(Xoa_ttl target) {
|
||||||
|
byte[] prefixed_text = target.Get_prefixed_text();
|
||||||
|
// If the target is just a fragment, with no title, we return the fragment
|
||||||
|
// text. Otherwise, we return the title text itself.
|
||||||
|
if (prefixed_text == Bry_.Empty && target.Has_fragment()) {
|
||||||
|
return target.Get_fragment();
|
||||||
|
}
|
||||||
|
return prefixed_text;
|
||||||
|
}
|
||||||
|
// private function normalizeTarget( LinkTarget $target ) {
|
||||||
|
// return Linker::normaliseSpecialPage( $target );
|
||||||
|
// }
|
||||||
|
// public static function normaliseSpecialPage( LinkTarget $target ) {
|
||||||
|
// if ( $target->getNamespace() == NS_SPECIAL && !$target->isExternal() ) {
|
||||||
|
// list( $name, $subpage ) = SpecialPageFactory::resolveAlias( $target->getDBkey() );
|
||||||
|
// if ( !$name ) {
|
||||||
|
// return $target;
|
||||||
|
// }
|
||||||
|
// $ret = SpecialPage::getTitleValueFor( $name, $subpage, $target->getFragment() );
|
||||||
|
// return $ret;
|
||||||
|
// } else {
|
||||||
|
// return $target;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
private static final byte[] Bry__classes__extiw = Bry_.new_a7("extiw");
|
||||||
|
public void Make_known_link(Bry_bfr bfr, Xoa_ttl target, byte[] text, Xomwh_atr_mgr extra_atrs, byte[] query) {
|
||||||
|
byte[] classes = Bry_.Empty;
|
||||||
|
if (target.Is_external()) {
|
||||||
|
classes = Bry__classes__extiw;
|
||||||
|
}
|
||||||
|
byte[] colour = Get_link_classes(target);
|
||||||
|
if (colour != Bry_.Empty) {
|
||||||
|
classes = Bry_.Add(classes, Byte_ascii.Space_bry, colour);
|
||||||
|
}
|
||||||
|
|
||||||
|
Make_preloaded_link(bfr, target, text, classes, extra_atrs, query);
|
||||||
|
}
|
||||||
|
public byte[] Get_link_classes(Xoa_ttl target) {
|
||||||
|
// Make sure the target is in the cache
|
||||||
|
// $id = $this->linkCache->addLinkObj( $target );
|
||||||
|
// if ( $id == 0 ) {
|
||||||
|
// // Doesn't exist
|
||||||
|
// return '';
|
||||||
|
// }
|
||||||
|
|
||||||
|
// if ( $this->linkCache->getGoodLinkFieldObj( $target, 'redirect' ) ) {
|
||||||
|
// Page is a redirect
|
||||||
|
// return 'mw-redirect';
|
||||||
|
// }
|
||||||
|
// elseif ( $this->stubThreshold > 0 && MWNamespace::isContent( $target->getNamespace() )
|
||||||
|
// && $this->linkCache->getGoodLinkFieldObj( $target, 'length' ) < $this->stubThreshold
|
||||||
|
// ) {
|
||||||
|
// Page is a stub
|
||||||
|
// return 'stub';
|
||||||
|
// }
|
||||||
|
|
||||||
|
return Bry_.Empty;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,584 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
import gplx.core.btries.*;
|
||||||
|
import gplx.langs.htmls.*;
|
||||||
|
import gplx.langs.phps.utls.*;
|
||||||
|
public class Xomw_block_level_pass {
|
||||||
|
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||||
|
private final Btrie_rv trv = new Btrie_rv();
|
||||||
|
private boolean in_pre, dt_open;
|
||||||
|
private int last_section;
|
||||||
|
private byte[] find_colon_no_links__before, find_colon_no_links__after;
|
||||||
|
|
||||||
|
public void Do_block_levels(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, boolean line_start) {
|
||||||
|
// XO.PBFR
|
||||||
|
Bry_bfr src_bfr = pbfr.Src();
|
||||||
|
byte[] src = src_bfr.Bfr();
|
||||||
|
int src_bgn = 0;
|
||||||
|
int src_end = src_bfr.Len();
|
||||||
|
Bry_bfr bfr = pbfr.Trg();
|
||||||
|
pbfr.Switch();
|
||||||
|
|
||||||
|
// XO.STATIC
|
||||||
|
if (block_chars_ary == null) {
|
||||||
|
synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||||
|
block_chars_ary = Block_chars_ary__new();
|
||||||
|
open_match_trie = Btrie_slim_mgr.ci_a7().Add_many_str
|
||||||
|
("<table", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6", "<pre", "<tr", "<p", "<ul", "<ol", "<dl", "<li", "</tr", "</td", "</th");
|
||||||
|
close_match_trie = Btrie_slim_mgr.ci_a7().Add_many_str
|
||||||
|
( "</table", "</h1", "</h2", "</h3", "</h4", "</h5", "</h6", "<td", "<th", "<blockquote", "</blockquote", "<div", "</div", "<hr"
|
||||||
|
, "</pre", "</p", "</mw:", Xomw_strip_state.Str__marker_bgn + "-pre", "</li", "</ul", "</ol", "</dl", "<center", "</center");
|
||||||
|
blockquote_trie = Btrie_slim_mgr.ci_a7().Add_many_str("<blockquote", "</blockquote");
|
||||||
|
pre_trie = Btrie_slim_mgr.ci_a7().Add_str_int("<pre", Pre__bgn).Add_str_int("</pre", Pre__end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parsing through the text line by line. The main thing
|
||||||
|
// happening here is handling of block-level elements p, pre,
|
||||||
|
// and making lists from lines starting with * # : etc.
|
||||||
|
byte[] last_prefix = Bry_.Empty;
|
||||||
|
bfr.Clear();
|
||||||
|
this.dt_open = false;
|
||||||
|
boolean in_block_elem = false;
|
||||||
|
int prefix_len = 0;
|
||||||
|
byte para_stack = Para_stack__none;
|
||||||
|
boolean in_blockquote = false;
|
||||||
|
this.in_pre = false;
|
||||||
|
this.last_section = Last_section__none;
|
||||||
|
byte[] prefix2 = null;
|
||||||
|
|
||||||
|
// PORTED.SPLIT: $textLines = StringUtils::explode("\n", $text);
|
||||||
|
int line_bgn = src_bgn;
|
||||||
|
while (line_bgn < src_end) {
|
||||||
|
int line_end = Bry_find_.Find_fwd(src, Byte_ascii.Nl, line_bgn);
|
||||||
|
if (line_end == Bry_find_.Not_found)
|
||||||
|
line_end = src_end;
|
||||||
|
|
||||||
|
// Fix up line_start
|
||||||
|
if (!line_start) {
|
||||||
|
bfr.Add_mid(src, line_bgn, line_end);
|
||||||
|
line_start = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// * = ul
|
||||||
|
// # = ol
|
||||||
|
// ; = dt
|
||||||
|
// : = dd
|
||||||
|
int last_prefix_len = last_prefix.length;
|
||||||
|
|
||||||
|
// PORTED: pre_close_match = preg_match('/<\\/pre/i', $oLine); pre_open_match = preg_match('/<pre/i', $oLine);
|
||||||
|
int pre_cur = line_bgn;
|
||||||
|
boolean pre_close_match = false;
|
||||||
|
boolean pre_open_match = false;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
if (pre_cur >= line_end)
|
||||||
|
break;
|
||||||
|
Object o = pre_trie.Match_at(trv, src, pre_cur, line_end);
|
||||||
|
if (o == null)
|
||||||
|
pre_cur++;
|
||||||
|
else {
|
||||||
|
int pre_tid = (int)o;
|
||||||
|
if (pre_tid == Pre__bgn)
|
||||||
|
pre_open_match = true;
|
||||||
|
else if (pre_tid == Pre__end)
|
||||||
|
pre_close_match = true;
|
||||||
|
pre_cur = trv.Pos();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] prefix = null, t = null;
|
||||||
|
// If not in a <pre> element, scan for and figure out what prefixes are there.
|
||||||
|
if (!in_pre) {
|
||||||
|
// Multiple prefixes may abut each other for nested lists.
|
||||||
|
prefix_len = Php_str_.Strspn_fwd__ary(src, block_chars_ary, line_bgn, line_end, line_end); // strspn($oLine, '*#:;');
|
||||||
|
prefix = Php_str_.Substr(src, line_bgn, prefix_len);
|
||||||
|
|
||||||
|
// eh?
|
||||||
|
// ; and : are both from definition-lists, so they're equivalent
|
||||||
|
// for the purposes of determining whether or not we need to open/close
|
||||||
|
// elements.
|
||||||
|
// substr( $inputLine, $prefixLength );
|
||||||
|
prefix2 = Bry_.Replace(prefix, Byte_ascii.Semic, Byte_ascii.Colon);
|
||||||
|
t = Bry_.Mid(src, line_bgn + prefix_len, line_end);
|
||||||
|
in_pre = pre_open_match;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Don't interpret any other prefixes in preformatted text
|
||||||
|
prefix_len = 0;
|
||||||
|
prefix = prefix2 = Bry_.Empty;
|
||||||
|
t = Bry_.Mid(src, line_bgn, line_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
// List generation
|
||||||
|
byte[] term = null, t2 = null;
|
||||||
|
int common_prefix_len = -1;
|
||||||
|
if (prefix_len > 0 && Bry_.Eq(last_prefix, prefix2)) {
|
||||||
|
// Same as the last item, so no need to deal with nesting or opening stuff
|
||||||
|
bfr.Add(Next_item(Php_str_.Substr_byte(prefix, -1)));
|
||||||
|
para_stack = Para_stack__none;
|
||||||
|
|
||||||
|
if (prefix_len > 0 && prefix[prefix_len - 1] == Byte_ascii.Semic) {
|
||||||
|
// The one nasty exception: definition lists work like this:
|
||||||
|
// ; title : definition text
|
||||||
|
// So we check for : in the remainder text to split up the
|
||||||
|
// title and definition, without b0rking links.
|
||||||
|
term = t2 = Bry_.Empty;
|
||||||
|
if (Find_colon_no_links(t, term, t2) != Bry_find_.Not_found) {
|
||||||
|
term = find_colon_no_links__before;
|
||||||
|
t2 = find_colon_no_links__after;
|
||||||
|
t = t2;
|
||||||
|
bfr.Add(term).Add(Next_item(Byte_ascii.Colon));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (prefix_len > 0 || last_prefix_len > 0) {
|
||||||
|
// We need to open or close prefixes, or both.
|
||||||
|
|
||||||
|
// Either open or close a level...
|
||||||
|
common_prefix_len = Get_common(prefix, last_prefix);
|
||||||
|
para_stack = Para_stack__none;
|
||||||
|
|
||||||
|
// Close all the prefixes which aren't shared.
|
||||||
|
while (common_prefix_len < last_prefix_len) {
|
||||||
|
bfr.Add(Close_list(last_prefix[last_prefix_len - 1]));
|
||||||
|
last_prefix_len--;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Continue the current prefix if appropriate.
|
||||||
|
if (prefix_len <= common_prefix_len && common_prefix_len > 0) {
|
||||||
|
bfr.Add(Next_item(prefix[common_prefix_len - 1]));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open prefixes where appropriate.
|
||||||
|
if (Bry_.Len_gt_0(last_prefix) && prefix_len > common_prefix_len) {
|
||||||
|
bfr.Add_byte_nl();
|
||||||
|
}
|
||||||
|
while (prefix_len > common_prefix_len) {
|
||||||
|
byte c = Php_str_.Substr_byte(prefix, common_prefix_len, 1);
|
||||||
|
bfr.Add(Open_list(c));
|
||||||
|
|
||||||
|
if (c == Byte_ascii.Semic) {
|
||||||
|
// @todo FIXME: This is dupe of code above
|
||||||
|
if (Find_colon_no_links(t, term, t2) != Bry_find_.Not_found) {
|
||||||
|
term = find_colon_no_links__before;
|
||||||
|
t2 = find_colon_no_links__after;
|
||||||
|
t = t2;
|
||||||
|
bfr.Add(term).Add(Next_item(Byte_ascii.Colon));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
++common_prefix_len;
|
||||||
|
}
|
||||||
|
if (prefix_len == 0 && Bry_.Len_gt_0(last_prefix)) {
|
||||||
|
bfr.Add_byte_nl();
|
||||||
|
}
|
||||||
|
last_prefix = prefix2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we have no prefixes, go to paragraph mode.
|
||||||
|
if (0 == prefix_len) {
|
||||||
|
// No prefix (not in list)--go to paragraph mode
|
||||||
|
// XXX: use a stack for nestable elements like span, table and div
|
||||||
|
int t_len = t.length;
|
||||||
|
boolean open_match = Php_preg_.Match(open_match_trie, trv, t, 0, t_len) != null;
|
||||||
|
boolean close_match = Php_preg_.Match(close_match_trie, trv, t, 0, t_len) != null;
|
||||||
|
|
||||||
|
if (open_match || close_match) {
|
||||||
|
para_stack = Para_stack__none;
|
||||||
|
// @todo bug 5718: paragraph closed
|
||||||
|
bfr.Add(Close_paragraph());
|
||||||
|
if (pre_open_match && !pre_close_match) {
|
||||||
|
in_pre = true;
|
||||||
|
}
|
||||||
|
int bq_offset = 0;
|
||||||
|
// PORTED:preg_match('/<(\\/?)blockquote[\s>]/i', t, $bqMatch, PREG_OFFSET_CAPTURE, $bq_offset)
|
||||||
|
while (true) {
|
||||||
|
Object o = Php_preg_.Match(blockquote_trie, trv, t, bq_offset, t_len);
|
||||||
|
if (o == null) { // no more blockquotes found; exit
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
byte[] bq_bry = (byte[])o;
|
||||||
|
in_blockquote = bq_bry[1] != Byte_ascii.Slash; // is this a close tag?
|
||||||
|
bq_offset = trv.Pos();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
in_block_elem = !close_match;
|
||||||
|
}
|
||||||
|
else if (!in_block_elem && !in_pre) {
|
||||||
|
if ( Php_str_.Substr_byte(t, 0) == Byte_ascii.Space
|
||||||
|
&& (last_section == Last_section__pre || Bry_.Trim(t) != Bry_.Empty)
|
||||||
|
&& !in_blockquote
|
||||||
|
) {
|
||||||
|
// pre
|
||||||
|
if (last_section != Last_section__pre) {
|
||||||
|
para_stack = Para_stack__none;
|
||||||
|
bfr.Add(Close_paragraph()).Add(Gfh_tag_.Pre_lhs);
|
||||||
|
last_section = Last_section__pre;
|
||||||
|
}
|
||||||
|
t = Bry_.Mid(t, 1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// paragraph
|
||||||
|
if (Bry_.Trim(t) == Bry_.Empty) {
|
||||||
|
if (para_stack != Para_stack__none) {
|
||||||
|
Para_stack_bfr(bfr, para_stack);
|
||||||
|
bfr.Add_str_a7("<br />");
|
||||||
|
para_stack = Para_stack__none;
|
||||||
|
last_section = Last_section__para;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (last_section != Last_section__para) {
|
||||||
|
bfr.Add(Close_paragraph());
|
||||||
|
last_section = Last_section__none;
|
||||||
|
para_stack = Para_stack__bgn;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
para_stack = Para_stack__mid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (para_stack != Para_stack__none) {
|
||||||
|
Para_stack_bfr(bfr, para_stack);
|
||||||
|
para_stack = Para_stack__none;
|
||||||
|
last_section = Last_section__para;
|
||||||
|
}
|
||||||
|
else if (last_section != Last_section__para) {
|
||||||
|
bfr.Add(Close_paragraph()).Add(Gfh_tag_.P_lhs);
|
||||||
|
this.last_section = Last_section__para;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// somewhere above we forget to get out of pre block (bug 785)
|
||||||
|
if (pre_close_match && in_pre) {
|
||||||
|
in_pre = false;
|
||||||
|
}
|
||||||
|
if (para_stack == Para_stack__none) {
|
||||||
|
bfr.Add(t);
|
||||||
|
if (prefix_len == 0) {
|
||||||
|
bfr.Add_byte_nl();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
line_bgn = line_end + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (prefix_len > 0) {
|
||||||
|
bfr.Add(Close_list(prefix2[prefix_len - 1]));
|
||||||
|
prefix_len--;
|
||||||
|
if (prefix_len > 0) {
|
||||||
|
bfr.Add_byte_nl();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (last_section != Last_section__none) {
|
||||||
|
bfr.Add(last_section == Last_section__para ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
|
||||||
|
last_section = Last_section__none;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If a pre or p is open, return the corresponding close tag and update
|
||||||
|
// the state. If no tag is open, return an empty String.
|
||||||
|
public byte[] Close_paragraph() {
|
||||||
|
byte[] result = Bry_.Empty;
|
||||||
|
if (last_section != Last_section__none) {
|
||||||
|
tmp.Add(last_section == Last_section__para ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
|
||||||
|
result = tmp.Add_byte_nl().To_bry_and_clear();
|
||||||
|
}
|
||||||
|
in_pre = false;
|
||||||
|
last_section = Last_section__none;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// getCommon() returns the length of the longest common substring
|
||||||
|
// of both arguments, starting at the beginning of both.
|
||||||
|
private int Get_common(byte[] st1, byte[] st2) {
|
||||||
|
int st1_len = st1.length, st2_len = st2.length;
|
||||||
|
int shorter = st1_len < st2_len ? st1_len : st2_len;
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < shorter; i++) {
|
||||||
|
if (st1[i] != st2[i]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open the list item element identified by the prefix character.
|
||||||
|
private byte[] Open_list(byte c) {
|
||||||
|
byte[] result = Close_paragraph();
|
||||||
|
|
||||||
|
if (c == Byte_ascii.Star)
|
||||||
|
result = tmp.Add(result).Add_str_a7("<ul><li>").To_bry_and_clear();
|
||||||
|
else if (c == Byte_ascii.Hash)
|
||||||
|
result = tmp.Add(result).Add_str_a7("<ol><li>").To_bry_and_clear();
|
||||||
|
else if (c == Byte_ascii.Hash)
|
||||||
|
result = tmp.Add(result).Add_str_a7("<dl><dd>").To_bry_and_clear();
|
||||||
|
else if (c == Byte_ascii.Semic) {
|
||||||
|
result = tmp.Add(result).Add_str_a7("<dl><dt>").To_bry_and_clear();
|
||||||
|
dt_open = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
result = tmp.Add_str_a7("<!-- ERR 1 -->").To_bry_and_clear();
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the current list item and open the next one.
|
||||||
|
private byte[] Next_item(byte c) {
|
||||||
|
if (c == Byte_ascii.Star || c == Byte_ascii.Hash) {
|
||||||
|
return tmp.Add_str_a7("</li>\n<li>").To_bry_and_clear();
|
||||||
|
}
|
||||||
|
else if (c == Byte_ascii.Colon || c == Byte_ascii.Semic) {
|
||||||
|
byte[] close = tmp.Add_str_a7("</dd>\n").To_bry_and_clear();
|
||||||
|
if (dt_open) {
|
||||||
|
close = tmp.Add_str_a7("</dt>\n").To_bry_and_clear();
|
||||||
|
}
|
||||||
|
if (c == Byte_ascii.Semic) {
|
||||||
|
dt_open = true;
|
||||||
|
return tmp.Add(close).Add_str_a7("<dt>").To_bry_and_clear();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
dt_open = false;
|
||||||
|
return tmp.Add(close).Add_str_a7("<dd>").To_bry_and_clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tmp.Add_str_a7("<!-- ERR 2 -->").To_bry_and_clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the current list item identified by the prefix character.
|
||||||
|
private byte[] Close_list(byte c) {
|
||||||
|
byte[] text = null;
|
||||||
|
if (c == Byte_ascii.Star) {
|
||||||
|
text = Bry_.new_a7("</li></ul>");
|
||||||
|
}
|
||||||
|
else if (c == Byte_ascii.Hash) {
|
||||||
|
text = Bry_.new_a7("</li></ol>");
|
||||||
|
}
|
||||||
|
else if (c == Byte_ascii.Colon) {
|
||||||
|
if (dt_open) {
|
||||||
|
dt_open = false;
|
||||||
|
text = Bry_.new_a7("</dt></dl>");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
text = Bry_.new_a7("</dd></dl>");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return Bry_.new_a7("<!-- ERR 3 -->");
|
||||||
|
}
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split up a String on ':', ignoring any occurrences inside tags
|
||||||
|
// to prevent illegal overlapping.
|
||||||
|
private int Find_colon_no_links(byte[] str, byte[] before, byte[] after) {
|
||||||
|
int len = str.length;
|
||||||
|
int colon_pos = Php_str_.Strpos(str, Byte_ascii.Colon, 0, len);
|
||||||
|
if (colon_pos == Bry_find_.Not_found) {
|
||||||
|
// Nothing to find!
|
||||||
|
return Bry_find_.Not_found;
|
||||||
|
}
|
||||||
|
|
||||||
|
int lt_pos = Php_str_.Strpos(str, Byte_ascii.Angle_bgn, 0, len);
|
||||||
|
if (lt_pos == Bry_find_.Not_found || lt_pos > colon_pos) {
|
||||||
|
// Easy; no tag nesting to worry about
|
||||||
|
find_colon_no_links__before = Php_str_.Substr(str, 0, colon_pos);
|
||||||
|
find_colon_no_links__after = Php_str_.Substr(str, colon_pos + 1);
|
||||||
|
return colon_pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ugly state machine to walk through avoiding tags.
|
||||||
|
int state = COLON_STATE_TEXT;
|
||||||
|
int level = 0;
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
byte c = str[i];
|
||||||
|
|
||||||
|
switch (state) {
|
||||||
|
case COLON_STATE_TEXT:
|
||||||
|
switch (c) {
|
||||||
|
case Byte_ascii.Angle_bgn:
|
||||||
|
// Could be either a <start> tag or an </end> tag
|
||||||
|
state = COLON_STATE_TAGSTART;
|
||||||
|
break;
|
||||||
|
case Byte_ascii.Colon:
|
||||||
|
if (level == 0) {
|
||||||
|
// We found it!
|
||||||
|
find_colon_no_links__before = Php_str_.Substr(str, 0, i);
|
||||||
|
find_colon_no_links__after = Php_str_.Substr(str, i + 1);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
// Embedded in a tag; don't break it.
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// Skip ahead looking for something interesting
|
||||||
|
colon_pos = Php_str_.Strpos(str, Byte_ascii.Colon, i, len);
|
||||||
|
if (colon_pos == Bry_find_.Not_found) {
|
||||||
|
// Nothing else interesting
|
||||||
|
return Bry_find_.Not_found;
|
||||||
|
}
|
||||||
|
lt_pos = Php_str_.Strpos(str, Byte_ascii.Angle_bgn, i, len);
|
||||||
|
if (level == 0) {
|
||||||
|
if (lt_pos == Bry_find_.Not_found || colon_pos < lt_pos) {
|
||||||
|
// We found it!
|
||||||
|
find_colon_no_links__before = Php_str_.Substr(str, 0, colon_pos);
|
||||||
|
find_colon_no_links__after = Php_str_.Substr(str, colon_pos + 1);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (lt_pos == Bry_find_.Not_found) {
|
||||||
|
// Nothing else interesting to find; abort!
|
||||||
|
// We're nested, but there's no close tags left. Abort!
|
||||||
|
i = len; // break 2
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Skip ahead to next tag start
|
||||||
|
i = lt_pos;
|
||||||
|
state = COLON_STATE_TAGSTART;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case COLON_STATE_TAG:
|
||||||
|
// In a <tag>
|
||||||
|
switch (c) {
|
||||||
|
case Byte_ascii.Angle_bgn:
|
||||||
|
level++;
|
||||||
|
state = COLON_STATE_TEXT;
|
||||||
|
break;
|
||||||
|
case Byte_ascii.Slash:
|
||||||
|
// Slash may be followed by >?
|
||||||
|
state = COLON_STATE_TAGSLASH;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// ignore
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case COLON_STATE_TAGSTART:
|
||||||
|
switch (c) {
|
||||||
|
case Byte_ascii.Slash:
|
||||||
|
state = COLON_STATE_CLOSETAG;
|
||||||
|
break;
|
||||||
|
case Byte_ascii.Bang:
|
||||||
|
state = COLON_STATE_COMMENT;
|
||||||
|
break;
|
||||||
|
case Byte_ascii.Angle_bgn:
|
||||||
|
// Illegal early close? This shouldn't happen D:
|
||||||
|
state = COLON_STATE_TEXT;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
state = COLON_STATE_TAG;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case COLON_STATE_CLOSETAG:
|
||||||
|
// In a </tag>
|
||||||
|
if (c == Byte_ascii.Angle_bgn) {
|
||||||
|
level--;
|
||||||
|
if (level < 0) {
|
||||||
|
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; too many close tags");
|
||||||
|
return Bry_find_.Not_found;
|
||||||
|
}
|
||||||
|
state = COLON_STATE_TEXT;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case COLON_STATE_TAGSLASH:
|
||||||
|
if (c == Byte_ascii.Angle_bgn) {
|
||||||
|
// Yes, a self-closed tag <blah/>
|
||||||
|
state = COLON_STATE_TEXT;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Probably we're jumping the gun, and this is an attribute
|
||||||
|
state = COLON_STATE_TAG;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case COLON_STATE_COMMENT:
|
||||||
|
if (c == Byte_ascii.Dash) {
|
||||||
|
state = COLON_STATE_COMMENTDASH;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case COLON_STATE_COMMENTDASH:
|
||||||
|
if (c == Byte_ascii.Dash) {
|
||||||
|
state = COLON_STATE_COMMENTDASHDASH;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
state = COLON_STATE_COMMENT;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case COLON_STATE_COMMENTDASHDASH:
|
||||||
|
if (c == Byte_ascii.Angle_bgn) {
|
||||||
|
state = COLON_STATE_TEXT;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
state = COLON_STATE_COMMENT;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw Err_.new_wo_type("State machine error");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (level > 0) {
|
||||||
|
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; not enough close tags (level ~{0}, state ~{1})", level, state);
|
||||||
|
return Bry_find_.Not_found;
|
||||||
|
}
|
||||||
|
return Bry_find_.Not_found;
|
||||||
|
}
|
||||||
|
private static final int
|
||||||
|
COLON_STATE_TEXT = 0
|
||||||
|
, COLON_STATE_TAG = 1
|
||||||
|
, COLON_STATE_TAGSTART = 2
|
||||||
|
, COLON_STATE_CLOSETAG = 3
|
||||||
|
, COLON_STATE_TAGSLASH = 4
|
||||||
|
, COLON_STATE_COMMENT = 5
|
||||||
|
, COLON_STATE_COMMENTDASH = 6
|
||||||
|
, COLON_STATE_COMMENTDASHDASH = 7
|
||||||
|
;
|
||||||
|
private static final byte
|
||||||
|
Last_section__none = 0 // ''
|
||||||
|
, Last_section__para = 1 // p
|
||||||
|
, Last_section__pre = 2 // pre
|
||||||
|
;
|
||||||
|
private static final byte
|
||||||
|
Para_stack__none = 0 // false
|
||||||
|
, Para_stack__bgn = 1 // <p>
|
||||||
|
, Para_stack__mid = 2 // </p><p>
|
||||||
|
;
|
||||||
|
private static final int Pre__bgn = 0, Pre__end = 1;
|
||||||
|
private static Btrie_slim_mgr pre_trie;
|
||||||
|
private static boolean[] block_chars_ary;
|
||||||
|
private static boolean[] Block_chars_ary__new() {
|
||||||
|
boolean[] rv = new boolean[256];
|
||||||
|
rv[Byte_ascii.Star] = true;
|
||||||
|
rv[Byte_ascii.Hash] = true;
|
||||||
|
rv[Byte_ascii.Colon] = true;
|
||||||
|
rv[Byte_ascii.Semic] = true;
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
private static Btrie_slim_mgr open_match_trie, close_match_trie, blockquote_trie;
|
||||||
|
private static void Para_stack_bfr(Bry_bfr bfr, int id) {
|
||||||
|
switch (id) {
|
||||||
|
case Para_stack__bgn: bfr.Add_str_a7("<p>"); break;
|
||||||
|
case Para_stack__mid: bfr.Add_str_a7("</p><p>"); break;
|
||||||
|
default: throw Err_.new_unhandled_default(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,42 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
import org.junit.*; import gplx.core.tests.*;
|
||||||
|
import gplx.xowa.mws.linkers.*;
|
||||||
|
public class Xomw_block_level_pass__tst {
|
||||||
|
private final Xomw_block_level_pass__fxt fxt = new Xomw_block_level_pass__fxt();
|
||||||
|
@Test public void Basic() {
|
||||||
|
fxt.Test__do_block_levels(String_.Concat_lines_nl_skip_last
|
||||||
|
( "a"
|
||||||
|
), String_.Concat_lines_nl_skip_last
|
||||||
|
( "<p>a"
|
||||||
|
, "</p>"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_block_level_pass__fxt {
|
||||||
|
private final Xomw_block_level_pass block_level_pass = new Xomw_block_level_pass();
|
||||||
|
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||||
|
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||||
|
private boolean apos = true;
|
||||||
|
public void Test__do_block_levels(String src, String expd) {
|
||||||
|
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||||
|
block_level_pass.Do_block_levels(pctx, pbfr.Init(Bry_.new_u8(src)), true);
|
||||||
|
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,250 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
import gplx.langs.htmls.*;
|
||||||
|
import gplx.xowa.mws.*;
|
||||||
|
import gplx.xowa.mws.htmls.*;
|
||||||
|
import gplx.xowa.mws.linkers.*;
|
||||||
|
public class Xomw_link_holders {
|
||||||
|
private final Xomw_link_renderer link_renderer;
|
||||||
|
private final Bry_bfr tmp;
|
||||||
|
private int link_id = 0; // MOVED:Parser.php
|
||||||
|
private final Xomw_link_holder_list internals = new Xomw_link_holder_list();
|
||||||
|
private final Xomwh_atr_mgr extra_atrs = new Xomwh_atr_mgr();
|
||||||
|
public Xomw_link_holders(Xomw_link_renderer link_renderer, Bry_bfr tmp) {
|
||||||
|
this.link_renderer = link_renderer;
|
||||||
|
this.tmp = tmp;
|
||||||
|
}
|
||||||
|
public void Clear() {
|
||||||
|
internals.Clear();
|
||||||
|
|
||||||
|
link_id = 0;
|
||||||
|
}
|
||||||
|
public void Make_holder(Bry_bfr bfr, Xoa_ttl nt, byte[] text, byte[][] query, byte[] trail, byte[] prefix) {
|
||||||
|
if (nt == null) {
|
||||||
|
// Fail gracefully
|
||||||
|
bfr.Add_str_a7("<!-- ERROR -->").Add(prefix).Add(text).Add(trail);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Separate the link trail from the rest of the link
|
||||||
|
// list( $inside, $trail ) = Linker::splitTrail( $trail );
|
||||||
|
byte[] inside = Bry_.Empty;
|
||||||
|
|
||||||
|
Xomw_link_holder_item entry = new Xomw_link_holder_item(nt, tmp.Add_bry_many(prefix, text, inside).To_bry_and_clear(), query);
|
||||||
|
|
||||||
|
boolean is_external = false; // $nt->isExternal()
|
||||||
|
if (is_external) {
|
||||||
|
// Use a globally unique ID to keep the objects mergable
|
||||||
|
// $key = $this->parent->nextLinkID();
|
||||||
|
// $this->interwikis[$key] = $entry;
|
||||||
|
// $retVal = "<!--IWLINK $key-->{$trail}";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int key = link_id++;
|
||||||
|
internals.Add(key, entry);
|
||||||
|
bfr.Add(Bry__link__bgn).Add_int_variable(key).Add(Gfh_tag_.Comm_end).Add(trail); // "<!--LINK $ns:$key-->{$trail}";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public void Test__add(Xoa_ttl ttl, byte[] capt) {
|
||||||
|
int key = link_id++;
|
||||||
|
Xomw_link_holder_item item = new Xomw_link_holder_item(ttl, capt, Bry_.Ary_empty);
|
||||||
|
internals.Add(key, item);
|
||||||
|
}
|
||||||
|
public void Replace(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||||
|
this.Replace_internal(pbfr);
|
||||||
|
// $this->replaceInterwiki( $text );
|
||||||
|
}
|
||||||
|
private void Replace_internal(Xomw_parser_bfr pbfr) {
|
||||||
|
if (internals.Len() == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// $colours = [];
|
||||||
|
// $linkCache = LinkCache::singleton();
|
||||||
|
// $output = $this->parent->getOutput();
|
||||||
|
// $linkRenderer = $this->parent->getLinkRenderer();
|
||||||
|
|
||||||
|
// $linkcolour_ids = [];
|
||||||
|
|
||||||
|
// SKIP:Replace_internals does db lookup to identify redlinks;
|
||||||
|
|
||||||
|
// Construct search and replace arrays
|
||||||
|
Bry_bfr src_bfr = pbfr.Src();
|
||||||
|
byte[] src = src_bfr.Bfr();
|
||||||
|
int src_bgn = 0;
|
||||||
|
int src_end = src_bfr.Len();
|
||||||
|
Bry_bfr bfr = pbfr.Trg();
|
||||||
|
pbfr.Switch();
|
||||||
|
|
||||||
|
int cur = src_bgn;
|
||||||
|
int prv = 0;
|
||||||
|
while (true) {
|
||||||
|
int link_bgn = Bry_find_.Find_fwd(src, Bry__link__bgn, cur, src_end);
|
||||||
|
if (link_bgn == Bry_find_.Not_found) {
|
||||||
|
bfr.Add_mid(src, prv, src_end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
int key_bgn = link_bgn + Bry__link__bgn.length;
|
||||||
|
int key_end = Bry_find_.Find_fwd_while_num(src, key_bgn, src_end);
|
||||||
|
int link_key = Bry_.To_int_or(src, key_bgn, key_end, -1);
|
||||||
|
Xomw_link_holder_item item = internals.Get_by(link_key);
|
||||||
|
|
||||||
|
// $pdbk = $entry['pdbk'];
|
||||||
|
// $title = $entry['title'];
|
||||||
|
// $query = isset( $entry['query'] ) ? $entry['query'] : [];
|
||||||
|
// $key = "$ns:$index";
|
||||||
|
// $searchkey = "<!--LINK $key-->";
|
||||||
|
// $displayText = $entry['text'];
|
||||||
|
// if ( isset( $entry['selflink'] ) ) {
|
||||||
|
// $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query );
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
// if ( $displayText === '' ) {
|
||||||
|
// $displayText = null;
|
||||||
|
// } else {
|
||||||
|
// $displayText = new HtmlArmor( $displayText );
|
||||||
|
// }
|
||||||
|
// if ( !isset( $colours[$pdbk] ) ) {
|
||||||
|
// $colours[$pdbk] = 'new';
|
||||||
|
// }
|
||||||
|
// $attribs = [];
|
||||||
|
// if ( $colours[$pdbk] == 'new' ) {
|
||||||
|
// $linkCache->addBadLinkObj( $title );
|
||||||
|
// $output->addLink( $title, 0 );
|
||||||
|
// $link = $linkRenderer->makeBrokenLink(
|
||||||
|
// $title, $displayText, $attribs, $query
|
||||||
|
// );
|
||||||
|
// } else {
|
||||||
|
// $link = $linkRenderer->makePreloadedLink(
|
||||||
|
// $title, $displayText, $colours[$pdbk], $attribs, $query
|
||||||
|
// );
|
||||||
|
// }
|
||||||
|
|
||||||
|
bfr.Add_mid(src, prv, link_bgn);
|
||||||
|
link_renderer.Make_preloaded_link(bfr, item.Title(), item.Text(), Bry_.Empty, extra_atrs, Bry_.Empty);
|
||||||
|
cur = key_end + Gfh_tag_.Comm_end_len;
|
||||||
|
prv = cur;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// private void Replace_internal__db() {
|
||||||
|
// // Generate query
|
||||||
|
// $lb = new LinkBatch();
|
||||||
|
// $lb->setCaller( __METHOD__ );
|
||||||
|
//
|
||||||
|
// foreach ( $this->internals as $ns => $entries ) {
|
||||||
|
// foreach ( $entries as $entry ) {
|
||||||
|
// /** @var Title $title */
|
||||||
|
// $title = $entry['title'];
|
||||||
|
// $pdbk = $entry['pdbk'];
|
||||||
|
//
|
||||||
|
// # Skip invalid entries.
|
||||||
|
// # Result will be ugly, but prevents crash.
|
||||||
|
// if ( is_null( $title ) ) {
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// # Check if it's a static known link, e.g. interwiki
|
||||||
|
// if ( $title->isAlwaysKnown() ) {
|
||||||
|
// $colours[$pdbk] = '';
|
||||||
|
// } elseif ( $ns == NS_SPECIAL ) {
|
||||||
|
// $colours[$pdbk] = 'new';
|
||||||
|
// } else {
|
||||||
|
// $id = $linkCache->getGoodLinkID( $pdbk );
|
||||||
|
// if ( $id != 0 ) {
|
||||||
|
// $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
|
||||||
|
// $output->addLink( $title, $id );
|
||||||
|
// $linkcolour_ids[$id] = $pdbk;
|
||||||
|
// } elseif ( $linkCache->isBadLink( $pdbk ) ) {
|
||||||
|
// $colours[$pdbk] = 'new';
|
||||||
|
// } else {
|
||||||
|
// # Not in the link cache, add it to the query
|
||||||
|
// $lb->addObj( $title );
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// if ( !$lb->isEmpty() ) {
|
||||||
|
// $fields = array_merge(
|
||||||
|
// LinkCache::getSelectFields(),
|
||||||
|
// [ 'page_namespace', 'page_title' ]
|
||||||
|
// );
|
||||||
|
//
|
||||||
|
// $res = $dbr->select(
|
||||||
|
// 'page',
|
||||||
|
// $fields,
|
||||||
|
// $lb->constructSet( 'page', $dbr ),
|
||||||
|
// __METHOD__
|
||||||
|
// );
|
||||||
|
//
|
||||||
|
// # Fetch data and form into an associative array
|
||||||
|
// # non-existent = broken
|
||||||
|
// foreach ( $res as $s ) {
|
||||||
|
// $title = Title::makeTitle( $s->page_namespace, $s->page_title );
|
||||||
|
// $pdbk = $title->getPrefixedDBkey();
|
||||||
|
// $linkCache->addGoodLinkObjFromRow( $title, $s );
|
||||||
|
// $output->addLink( $title, $s->page_id );
|
||||||
|
// $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
|
||||||
|
// // add id to the extension todolist
|
||||||
|
// $linkcolour_ids[$s->page_id] = $pdbk;
|
||||||
|
// }
|
||||||
|
// unset( $res );
|
||||||
|
// }
|
||||||
|
// if ( count( $linkcolour_ids ) ) {
|
||||||
|
// // pass an array of page_ids to an extension
|
||||||
|
// Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours ] );
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// # Do a second query for different language variants of links and categories
|
||||||
|
// if ( $wgContLang->hasVariants() ) {
|
||||||
|
// $this->doVariants( $colours );
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
private static final byte[] Bry__link__bgn = Bry_.new_a7("<!--LINK ");
|
||||||
|
}
|
||||||
|
class Xomw_link_holder_list {
|
||||||
|
private int ary_len = 0, ary_max = 128;
|
||||||
|
private Xomw_link_holder_item[] ary = new Xomw_link_holder_item[128];
|
||||||
|
public int Len() {return ary_len;}
|
||||||
|
public void Clear() {
|
||||||
|
ary_len = 0;
|
||||||
|
if (ary_max > 128)
|
||||||
|
ary = new Xomw_link_holder_item[128];
|
||||||
|
}
|
||||||
|
public void Add(int key, Xomw_link_holder_item item) {
|
||||||
|
if (key >= ary_max) {
|
||||||
|
int new_max = ary_max * 2;
|
||||||
|
ary = (Xomw_link_holder_item[])Array_.Resize(ary, new_max);
|
||||||
|
ary_max = new_max;
|
||||||
|
}
|
||||||
|
ary[key] = item;
|
||||||
|
ary_len++;
|
||||||
|
}
|
||||||
|
public Xomw_link_holder_item Get_by(int key) {return ary[key];}
|
||||||
|
}
|
||||||
|
class Xomw_link_holder_item {
|
||||||
|
public Xomw_link_holder_item(Xoa_ttl title, byte[] text, byte[][] query) {
|
||||||
|
this.title = title;
|
||||||
|
this.text = text;
|
||||||
|
this.query = query;
|
||||||
|
}
|
||||||
|
public Xoa_ttl Title() {return title;} private final Xoa_ttl title;
|
||||||
|
public byte[] Text() {return text;} private final byte[] text;
|
||||||
|
public byte[] Pdbk() {return title.Get_prefixed_db_key();}
|
||||||
|
public byte[][] Query() {return query;} private final byte[][] query;
|
||||||
|
}
|
@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
import org.junit.*; import gplx.core.tests.*;
|
||||||
|
import gplx.xowa.mws.linkers.*;
|
||||||
|
public class Xomw_link_holders__tst {
|
||||||
|
private final Xomw_link_holders__fxt fxt = new Xomw_link_holders__fxt();
|
||||||
|
@Test public void Replace__basic() {
|
||||||
|
fxt.Init__add("A", "a");
|
||||||
|
fxt.Test__replace("a <!--LINK 0--> b", "a <a href='/wiki/A' title='A'>a</a> b");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_link_holders__fxt {
|
||||||
|
private final Xomw_link_holders holders = new Xomw_link_holders(new Xomw_link_renderer(), Bry_bfr_.New());
|
||||||
|
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||||
|
private final Xowe_wiki wiki;
|
||||||
|
private boolean apos = true;
|
||||||
|
public Xomw_link_holders__fxt() {
|
||||||
|
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||||
|
this.wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||||
|
}
|
||||||
|
public void Init__add(String ttl, String capt) {
|
||||||
|
holders.Test__add(wiki.Ttl_parse(Bry_.new_u8(ttl)), Bry_.new_u8(capt));
|
||||||
|
}
|
||||||
|
public void Test__replace(String src, String expd) {
|
||||||
|
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||||
|
holders.Replace(new Xomw_parser_ctx(), pbfr.Init(Bry_.new_u8(src)));
|
||||||
|
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,27 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
public class Xomw_output_type {
|
||||||
|
public static final byte
|
||||||
|
Tid__html = 1 // like parse()
|
||||||
|
, Tid__wiki = 2 // like preSaveTransform()
|
||||||
|
, Tid__preprocess = 3 // like preprocess()
|
||||||
|
, Tid__msg = 3
|
||||||
|
, Tid__plain = 4 // like extractSections() - portions of the original are returned unchanged.
|
||||||
|
;
|
||||||
|
}
|
@ -0,0 +1,257 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
import gplx.core.btries.*; import gplx.core.net.*;
|
||||||
|
import gplx.xowa.mws.parsers.prepros.*; import gplx.xowa.mws.parsers.headings.*;
|
||||||
|
import gplx.xowa.mws.parsers.quotes.*; import gplx.xowa.mws.parsers.tables.*; import gplx.xowa.mws.parsers.hrs.*; import gplx.xowa.mws.parsers.nbsps.*;
|
||||||
|
import gplx.xowa.mws.parsers.lnkes.*; import gplx.xowa.mws.parsers.lnkis.*;
|
||||||
|
import gplx.xowa.mws.utls.*; import gplx.xowa.mws.linkers.*;
|
||||||
|
public class Xomw_parser {
|
||||||
|
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||||
|
private final Xomw_table_wkr table_wkr;
|
||||||
|
private final Xomw_hr_wkr hr_wkr = new Xomw_hr_wkr();
|
||||||
|
private final Xomw_lnke_wkr lnke_wkr;
|
||||||
|
private final Xomw_nbsp_wkr nbsp_wkr = new Xomw_nbsp_wkr();
|
||||||
|
private final Xomw_block_level_pass block_wkr = new Xomw_block_level_pass();
|
||||||
|
private final Xomw_heading_wkr heading_wkr = new Xomw_heading_wkr();
|
||||||
|
private final Xomw_link_renderer link_renderer = new Xomw_link_renderer();
|
||||||
|
private final Xomw_link_holders holders;
|
||||||
|
private final Xomw_heading_cbk__html heading_wkr_cbk;
|
||||||
|
private final Btrie_slim_mgr protocols_trie;
|
||||||
|
private final Btrie_rv trv = new Btrie_rv();
|
||||||
|
private int marker_index = 0;
|
||||||
|
// private final Xomw_prepro_wkr prepro_wkr = new Xomw_prepro_wkr();
|
||||||
|
public Xomw_strip_state Strip_state() {return strip_state;} private final Xomw_strip_state strip_state = new Xomw_strip_state();
|
||||||
|
public Xomw_sanitizer Sanitizer() {return sanitizer;} private final Xomw_sanitizer sanitizer = new Xomw_sanitizer();
|
||||||
|
public Xomw_linker Linker() {return linker;} private final Xomw_linker linker = new Xomw_linker();
|
||||||
|
public Bry_bfr Tmp() {return tmp;} private final Bry_bfr tmp = Bry_bfr_.New();
|
||||||
|
public Xomw_quote_wkr Quote_wkr() {return quote_wkr;} private final Xomw_quote_wkr quote_wkr;
|
||||||
|
public Xomw_lnki_wkr Lnki_wkr() {return lnki_wkr;} private final Xomw_lnki_wkr lnki_wkr;
|
||||||
|
public boolean Output_type__wiki() {return output_type__wiki;} private final boolean output_type__wiki = false;
|
||||||
|
public Xomw_parser() {
|
||||||
|
this.protocols_trie = Xomw_parser.Protocols__dflt();
|
||||||
|
this.holders = new Xomw_link_holders(link_renderer, tmp);
|
||||||
|
this.table_wkr = new Xomw_table_wkr(this);
|
||||||
|
this.quote_wkr = new Xomw_quote_wkr(this);
|
||||||
|
this.lnke_wkr = new Xomw_lnke_wkr(this);
|
||||||
|
this.lnki_wkr = new Xomw_lnki_wkr(this, holders, link_renderer, protocols_trie);
|
||||||
|
this.heading_wkr_cbk = new Xomw_heading_cbk__html();
|
||||||
|
}
|
||||||
|
public void Init_by_wiki(Xowe_wiki wiki) {
|
||||||
|
linker.Init_by_wiki(wiki.Lang().Lnki_trail_mgr().Trie());
|
||||||
|
lnke_wkr.Init_by_wiki(protocols_trie);
|
||||||
|
lnki_wkr.Init_by_wiki(wiki);
|
||||||
|
}
|
||||||
|
public void Internal_parse(Xomw_parser_bfr pbfr, byte[] text) {
|
||||||
|
pbfr.Init(text);
|
||||||
|
// $origText = text;
|
||||||
|
|
||||||
|
// MW.HOOK:ParserBeforeInternalParse
|
||||||
|
|
||||||
|
// if ($frame) {
|
||||||
|
// use frame depth to infer how include/noinclude tags should be handled
|
||||||
|
// depth=0 means this is the top-level document; otherwise it's an included document
|
||||||
|
// boolean for_inclusion = false;
|
||||||
|
// if (!$frame->depth) {
|
||||||
|
// $flag = 0;
|
||||||
|
// } else {
|
||||||
|
// $flag = Parser::PTD_FOR_INCLUSION;
|
||||||
|
// }
|
||||||
|
// text = prepro_wkr.Preprocess_to_xml(text, for_inclusion);
|
||||||
|
// text = $frame->expand($dom);
|
||||||
|
// } else {
|
||||||
|
// // if $frame is not provided, then use old-style replaceVariables
|
||||||
|
// text = $this->replaceVariables(text);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// MW.HOOK:InternalParseBeforeSanitize
|
||||||
|
// text = Sanitizer::removeHTMLtags(
|
||||||
|
// text,
|
||||||
|
// [ &$this, 'attributeStripCallback' ],
|
||||||
|
// false,
|
||||||
|
// array_keys($this->mTransparentTagHooks),
|
||||||
|
// [],
|
||||||
|
// [ &$this, 'addTrackingCategory' ]
|
||||||
|
// );
|
||||||
|
// MW.HOOK:InternalParseBeforeLinks
|
||||||
|
|
||||||
|
// Tables need to come after variable replacement for things to work
|
||||||
|
// properly; putting them before other transformations should keep
|
||||||
|
// exciting things like link expansions from showing up in surprising
|
||||||
|
// places.
|
||||||
|
table_wkr.Do_table_stuff(pctx, pbfr);
|
||||||
|
hr_wkr.Replace_hrs(pctx, pbfr);
|
||||||
|
|
||||||
|
// text = $this->doDoubleUnderscore(text);
|
||||||
|
|
||||||
|
heading_wkr.Do_headings(pctx, pbfr, heading_wkr_cbk);
|
||||||
|
lnki_wkr.Replace_internal_links(pctx, pbfr);
|
||||||
|
quote_wkr.Do_all_quotes(pctx, pbfr);
|
||||||
|
lnke_wkr.Replace_external_links(pctx, pbfr);
|
||||||
|
|
||||||
|
// replaceInternalLinks may sometimes leave behind
|
||||||
|
// absolute URLs, which have to be masked to hide them from replaceExternalLinks
|
||||||
|
Xomw_parser_bfr_.Replace(pbfr, Bry__marker__noparse, Bry_.Empty);
|
||||||
|
|
||||||
|
// $text = $this->doMagicLinks($text);
|
||||||
|
// $text = $this->formatHeadings($text, $origText, $isMain);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void Internal_parse_half_parsed(Xomw_parser_bfr pbfr, boolean is_main, boolean line_start) {
|
||||||
|
strip_state.Unstrip_general(pbfr);
|
||||||
|
|
||||||
|
// MW.HOOK:ParserAfterUnstrip
|
||||||
|
|
||||||
|
// Clean up special characters, only run once, next-to-last before doBlockLevels
|
||||||
|
nbsp_wkr.Do_nbsp(pctx, pbfr);
|
||||||
|
|
||||||
|
block_wkr.Do_block_levels(pctx, pbfr, line_start);
|
||||||
|
|
||||||
|
lnki_wkr.Replace_link_holders(pctx, pbfr);
|
||||||
|
|
||||||
|
// The input doesn't get language converted if
|
||||||
|
// a) It's disabled
|
||||||
|
// b) Content isn't converted
|
||||||
|
// c) It's a conversion table
|
||||||
|
// d) it is an interface message (which is in the user language)
|
||||||
|
// if ( !( $this->mOptions->getDisableContentConversion()
|
||||||
|
// || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
|
||||||
|
// ) {
|
||||||
|
// if ( !$this->mOptions->getInterfaceMessage() ) {
|
||||||
|
// // The position of the convert() call should not be changed. it
|
||||||
|
// // assumes that the links are all replaced and the only thing left
|
||||||
|
// // is the <nowiki> mark.
|
||||||
|
// $text = $this->getConverterLanguage()->convert( $text );
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
strip_state.Unstrip_nowiki(pbfr);
|
||||||
|
|
||||||
|
// MW.HOOK:ParserBeforeTidy
|
||||||
|
|
||||||
|
// $text = $this->replaceTransparentTags( $text );
|
||||||
|
strip_state.Unstrip_general(pbfr);
|
||||||
|
|
||||||
|
sanitizer.Normalize_char_references(pbfr);
|
||||||
|
|
||||||
|
// if ( MWTidy::isEnabled() ) {
|
||||||
|
// if ( $this->mOptions->getTidy() ) {
|
||||||
|
// $text = MWTidy::tidy( $text );
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
// attempt to sanitize at least some nesting problems
|
||||||
|
// (T4702 and quite a few others)
|
||||||
|
// $tidyregs = [
|
||||||
|
// // ''Something [http://www.cool.com cool''] -->
|
||||||
|
// // <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
|
||||||
|
// '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
|
||||||
|
// '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
|
||||||
|
// // fix up an anchor inside another anchor, only
|
||||||
|
// // at least for a single single nested link (T5695)
|
||||||
|
// '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
|
||||||
|
// '\\1\\2</a>\\3</a>\\1\\4</a>',
|
||||||
|
// // fix div inside inline elements- doBlockLevels won't wrap a line which
|
||||||
|
// // contains a div, so fix it up here; replace
|
||||||
|
// // div with escaped text
|
||||||
|
// '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
|
||||||
|
// '\\1\\3<div\\5>\\6</div>\\8\\9',
|
||||||
|
// // remove empty italic or bold tag pairs, some
|
||||||
|
// // introduced by rules above
|
||||||
|
// '/<([bi])><\/\\1>/' => '',
|
||||||
|
// ];
|
||||||
|
|
||||||
|
// $text = preg_replace(
|
||||||
|
// array_keys( $tidyregs ),
|
||||||
|
// array_values( $tidyregs ),
|
||||||
|
// $text );
|
||||||
|
// }
|
||||||
|
|
||||||
|
// MW.HOOK:ParserAfterTidy
|
||||||
|
}
|
||||||
|
public byte[] Armor_links(Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
|
||||||
|
// PORTED:preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', self::MARKER_PREFIX . "NOPARSE$1", $text )
|
||||||
|
int cur = src_bgn;
|
||||||
|
int prv = cur;
|
||||||
|
boolean dirty = false;
|
||||||
|
boolean called_by_bry = trg == null;
|
||||||
|
while (true) {
|
||||||
|
// exit if EOS
|
||||||
|
if (cur == src_end) {
|
||||||
|
// if dirty, add rest of String
|
||||||
|
if (dirty)
|
||||||
|
trg.Add_mid(src, prv, src_end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if cur matches protocol
|
||||||
|
Object protocol_obj = protocols_trie.Match_at(trv, src, cur, src_end);
|
||||||
|
// no match; continue
|
||||||
|
if (protocol_obj == null) {
|
||||||
|
cur++;
|
||||||
|
}
|
||||||
|
// match; add to bfr
|
||||||
|
else {
|
||||||
|
dirty = true;
|
||||||
|
byte[] protocol_bry = (byte[])protocol_obj;
|
||||||
|
if (called_by_bry) trg = Bry_bfr_.New();
|
||||||
|
trg.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__noparse, protocol_bry);
|
||||||
|
cur += protocol_bry.length;
|
||||||
|
prv = cur;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (called_by_bry) {
|
||||||
|
if (dirty)
|
||||||
|
return trg.To_bry_and_clear();
|
||||||
|
else {
|
||||||
|
if (src_bgn == 0 && src_end == src.length)
|
||||||
|
return src;
|
||||||
|
else
|
||||||
|
return Bry_.Mid(src, src_bgn, src_end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (dirty)
|
||||||
|
return null;
|
||||||
|
else {
|
||||||
|
trg.Add_mid(src, src_bgn, src_end);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public byte[] Insert_strip_item(byte[] text) {
|
||||||
|
tmp.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__strip_state_item);
|
||||||
|
tmp.Add_int_variable(marker_index);
|
||||||
|
tmp.Add(Xomw_strip_state.Bry__marker__end);
|
||||||
|
byte[] marker = tmp.To_bry_and_clear();
|
||||||
|
marker_index++;
|
||||||
|
strip_state.Add_general(marker, text);
|
||||||
|
return marker;
|
||||||
|
}
|
||||||
|
private static final byte[] Bry__strip_state_item = Bry_.new_a7("-item-"), Bry__noparse = Bry_.new_a7("NOPARSE");
|
||||||
|
private static final byte[] Bry__marker__noparse = Bry_.Add(Xomw_strip_state.Bry__marker__bgn, Bry__noparse);
|
||||||
|
public static Btrie_slim_mgr Protocols__dflt() {
|
||||||
|
Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
|
||||||
|
Gfo_protocol_itm[] ary = Gfo_protocol_itm.Ary();
|
||||||
|
for (Gfo_protocol_itm itm : ary) {
|
||||||
|
byte[] key = itm.Text_bry(); // EX: "https://"
|
||||||
|
rv.Add_obj(key, key);
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,72 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
import org.junit.*;
|
||||||
|
public class Xomw_parser__tst {
|
||||||
|
private final Xomw_parser__fxt fxt = new Xomw_parser__fxt();
|
||||||
|
@Test public void Basic() {
|
||||||
|
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||||
|
( "== heading_1 =="
|
||||||
|
, "para_1"
|
||||||
|
, "== heading_2 =="
|
||||||
|
, "para_2"
|
||||||
|
, "-----"
|
||||||
|
, "{|"
|
||||||
|
, "|-"
|
||||||
|
, "|a"
|
||||||
|
, "|}"
|
||||||
|
, "''italics''"
|
||||||
|
, "[https://a.org b]"
|
||||||
|
, "[[A|abc]]"
|
||||||
|
, "a »b«  !important c"
|
||||||
|
), String_.Concat_lines_nl_skip_last
|
||||||
|
( "<h2> heading_1 </h2>"
|
||||||
|
, "<p>para_1"
|
||||||
|
, "</p>"
|
||||||
|
, "<h2> heading_2 </h2>"
|
||||||
|
, "<p>para_2"
|
||||||
|
, "</p>"
|
||||||
|
, "<hr />"
|
||||||
|
, "<table>"
|
||||||
|
, ""
|
||||||
|
, "<tr>"
|
||||||
|
, "<td>a"
|
||||||
|
, "</td></tr></table>"
|
||||||
|
, "<p><i>italics</i>"
|
||||||
|
, "<a class=\"external text\" rel=\"nofollow\" href=\"https://a.org\">b</a>"
|
||||||
|
, "<a href=\"/wiki/A\" title=\"A\">abc</a>"
|
||||||
|
, "a »b«  !important c"
|
||||||
|
, "</p>"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_parser__fxt {
|
||||||
|
private final Xomw_parser mgr = new Xomw_parser();
|
||||||
|
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||||
|
public Xomw_parser__fxt() {
|
||||||
|
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||||
|
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||||
|
mgr.Init_by_wiki(wiki);
|
||||||
|
}
|
||||||
|
public void Test__parse(String src_str, String expd) {
|
||||||
|
byte[] src_bry = Bry_.new_u8(src_str);
|
||||||
|
mgr.Internal_parse(pbfr, src_bry);
|
||||||
|
mgr.Internal_parse_half_parsed(pbfr, true, true);
|
||||||
|
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,48 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
public class Xomw_parser_bfr { // manages 2 bfrs to eliminate multiple calls to new memory allocations ("return bfr.To_bry_and_clear()")
|
||||||
|
private final Bry_bfr bfr_1 = Bry_bfr_.New(), bfr_2 = Bry_bfr_.New();
|
||||||
|
private Bry_bfr src, trg;
|
||||||
|
public Xomw_parser_bfr() {
|
||||||
|
this.src = bfr_1;
|
||||||
|
this.trg = bfr_2;
|
||||||
|
}
|
||||||
|
public Bry_bfr Src() {return src;}
|
||||||
|
public Bry_bfr Trg() {return trg;}
|
||||||
|
public Bry_bfr Rslt() {return src;}
|
||||||
|
public Xomw_parser_bfr Init(byte[] text) {
|
||||||
|
// resize each bfr once by guessing that html_len = text_len * 2
|
||||||
|
int text_len = text.length;
|
||||||
|
int html_len = text_len * 2;
|
||||||
|
src.Resize(html_len);
|
||||||
|
trg.Resize(html_len);
|
||||||
|
|
||||||
|
// clear and add
|
||||||
|
src.Clear();
|
||||||
|
trg.Clear();
|
||||||
|
src.Add(text);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
public void Switch() {
|
||||||
|
Bry_bfr tmp = src;
|
||||||
|
this.src = trg;
|
||||||
|
this.trg = tmp;
|
||||||
|
trg.Clear();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,69 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
public class Xomw_parser_bfr_ {
|
||||||
|
public static void Replace(Xomw_parser_bfr pbfr, byte[] find, byte[] repl) {
|
||||||
|
// XO.PBFR
|
||||||
|
Bry_bfr src_bfr = pbfr.Src();
|
||||||
|
byte[] src = src_bfr.Bfr();
|
||||||
|
int src_bgn = 0;
|
||||||
|
int src_end = src_bfr.Len();
|
||||||
|
Bry_bfr bfr = pbfr.Trg();
|
||||||
|
|
||||||
|
if (Replace(bfr, Bool_.N, src, src_bgn, src_end, find, repl) != null)
|
||||||
|
pbfr.Switch();
|
||||||
|
}
|
||||||
|
private static byte[] Replace(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) {
|
||||||
|
boolean dirty = false;
|
||||||
|
int cur = src_bgn;
|
||||||
|
boolean called_by_bry = bfr == null;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
int find_bgn = Bry_find_.Find_fwd(src, find, cur);
|
||||||
|
if (find_bgn == Bry_find_.Not_found) {
|
||||||
|
if (dirty)
|
||||||
|
bfr.Add_mid(src, cur, src_end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (called_by_bry) bfr = Bry_bfr_.New();
|
||||||
|
bfr.Add_mid(src, cur, find_bgn);
|
||||||
|
cur += find.length;
|
||||||
|
dirty = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dirty) {
|
||||||
|
if (called_by_bry)
|
||||||
|
return bfr.To_bry_and_clear();
|
||||||
|
else
|
||||||
|
return Bry_.Empty;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (called_by_bry) {
|
||||||
|
if (src_bgn == 0 && src_end == src.length)
|
||||||
|
return src;
|
||||||
|
else
|
||||||
|
return Bry_.Mid(src, src_bgn, src_end);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (lone_bfr)
|
||||||
|
bfr.Add_mid(src, src_bgn, src_end);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,139 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
import gplx.core.btries.*;
|
||||||
|
public class Xomw_strip_state { // REF.MW:/parser/StripState.php
|
||||||
|
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||||
|
private final Btrie_rv trv = new Btrie_rv();
|
||||||
|
private final Bry_bfr tmp_1 = Bry_bfr_.New();
|
||||||
|
private final Bry_bfr tmp_2 = Bry_bfr_.New();
|
||||||
|
private boolean tmp_2_used = false;
|
||||||
|
private int general_len, nowiki_len;
|
||||||
|
public void Clear() {
|
||||||
|
trie.Clear();
|
||||||
|
general_len = nowiki_len = 0;
|
||||||
|
tmp_2_used = false;
|
||||||
|
}
|
||||||
|
public void Add_general(byte[] marker, byte[] val) {Add_item(Tid__general, marker, val);}
|
||||||
|
public void Add_nowiki (byte[] marker, byte[] val) {Add_item(Tid__nowiki, marker, val);}
|
||||||
|
public void Add_item(byte tid, byte[] marker, byte[] val) {
|
||||||
|
trie.Add_obj(marker, new Xomw_strip_item(tid, marker, val));
|
||||||
|
if (tid == Tid__general)
|
||||||
|
general_len++;
|
||||||
|
else
|
||||||
|
nowiki_len++;
|
||||||
|
}
|
||||||
|
public byte[] Unstrip_general(byte[] text) {return Unstrip(Tid__general, text);}
|
||||||
|
public byte[] Unstrip_nowiki (byte[] text) {return Unstrip(Tid__nowiki , text);}
|
||||||
|
public byte[] Unstrip_both (byte[] text) {return Unstrip(Tid__both , text);}
|
||||||
|
public byte[] Unstrip(byte tid, byte[] text) {
|
||||||
|
boolean dirty = Unstrip(tid, tmp_1, text, 0, text.length);
|
||||||
|
return dirty ? tmp_1.To_bry_and_clear() : text;
|
||||||
|
}
|
||||||
|
public void Unstrip_general(Xomw_parser_bfr pbfr) {Unstrip(Tid__general, pbfr);}
|
||||||
|
public void Unstrip_nowiki (Xomw_parser_bfr pbfr) {Unstrip(Tid__nowiki , pbfr);}
|
||||||
|
public void Unstrip_both (Xomw_parser_bfr pbfr) {Unstrip(Tid__both , pbfr);}
|
||||||
|
private boolean Unstrip(byte tid, Xomw_parser_bfr pbfr) {
|
||||||
|
// XO.PBFR
|
||||||
|
Bry_bfr src_bfr = pbfr.Src();
|
||||||
|
byte[] src = src_bfr.Bfr();
|
||||||
|
boolean dirty = Unstrip(tid, pbfr.Trg(), src, 0, src_bfr.Len());
|
||||||
|
if (dirty)
|
||||||
|
pbfr.Switch();
|
||||||
|
return dirty;
|
||||||
|
}
|
||||||
|
private boolean Unstrip(byte tid, Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
|
||||||
|
// exit early if no items for type
|
||||||
|
if ((tid & Tid__general) == Tid__general) {
|
||||||
|
if (general_len == 0)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else if ((tid & Tid__nowiki) == Tid__nowiki) {
|
||||||
|
if (nowiki_len == 0)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cur = src_bgn;
|
||||||
|
int prv = cur;
|
||||||
|
boolean dirty = false;
|
||||||
|
// loop over each src char
|
||||||
|
while (true) {
|
||||||
|
// EOS: exit
|
||||||
|
if (cur == src_end) {
|
||||||
|
if (dirty) // add remainder if dirty
|
||||||
|
trg.Add_mid(src, prv, src_end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if current pos matches strip state
|
||||||
|
Object o = trie.Match_at(trv, src, cur, src_end);
|
||||||
|
if (o != null) { // match
|
||||||
|
Xomw_strip_item item = (Xomw_strip_item)o;
|
||||||
|
byte item_tid = item.Tid();
|
||||||
|
if ((tid & item_tid) == item_tid) { // check if types match
|
||||||
|
// get bfr for recursion
|
||||||
|
Bry_bfr nested_bfr = null;
|
||||||
|
boolean tmp_2_release = false;
|
||||||
|
if (tmp_2_used) {
|
||||||
|
nested_bfr = Bry_bfr_.New();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
nested_bfr = tmp_2;
|
||||||
|
tmp_2_used = true;
|
||||||
|
tmp_2_release = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// recurse
|
||||||
|
byte[] item_val = item.Val();
|
||||||
|
if (Unstrip(tid, nested_bfr, item_val, 0, item_val.length))
|
||||||
|
item_val = nested_bfr.To_bry_and_clear();
|
||||||
|
if (tmp_2_release)
|
||||||
|
tmp_2_used = false;
|
||||||
|
|
||||||
|
// add to trg
|
||||||
|
trg.Add_mid(src, prv, cur);
|
||||||
|
trg.Add(item_val);
|
||||||
|
|
||||||
|
// update vars
|
||||||
|
dirty = true;
|
||||||
|
cur += item.Key().length;
|
||||||
|
prv = cur;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cur++;
|
||||||
|
}
|
||||||
|
return dirty;
|
||||||
|
}
|
||||||
|
public static final String Str__marker_bgn = "\u007f'\"`UNIQ-";
|
||||||
|
public static final byte[]
|
||||||
|
Bry__marker__bgn = Bry_.new_a7(Str__marker_bgn)
|
||||||
|
, Bry__marker__end = Bry_.new_a7("-QINU`\"'\u007f")
|
||||||
|
;
|
||||||
|
public static final byte Tid__general = 1, Tid__nowiki = 2, Tid__both = 3;
|
||||||
|
}
|
||||||
|
class Xomw_strip_item {
|
||||||
|
public Xomw_strip_item(byte tid, byte[] key, byte[] val) {
|
||||||
|
this.tid = tid;
|
||||||
|
this.key = key;
|
||||||
|
this.val = val;
|
||||||
|
}
|
||||||
|
public byte Tid() {return tid;} private final byte tid;
|
||||||
|
public byte[] Key() {return key;} private final byte[] key;
|
||||||
|
public byte[] Val() {return val;} private final byte[] val;
|
||||||
|
}
|
@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
import org.junit.*; import gplx.core.tests.*;
|
||||||
|
public class Xomw_strip_state__tst {
|
||||||
|
private final Xomw_strip_state__fxt fxt = new Xomw_strip_state__fxt();
|
||||||
|
@Test public void Basic() {
|
||||||
|
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
|
||||||
|
fxt.Test__nostrip(Xomw_strip_state.Tid__nowiki , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b");
|
||||||
|
fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
|
||||||
|
fxt.Test__unstrip(Xomw_strip_state.Tid__both , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
|
||||||
|
}
|
||||||
|
@Test public void Recurse() {
|
||||||
|
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
|
||||||
|
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-2-QINU`\"'\u007f", "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f");
|
||||||
|
fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-2-QINU`\"'\u007f b", "a val-1 b");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_strip_state__fxt {
|
||||||
|
private final Xomw_strip_state strip_state = new Xomw_strip_state();
|
||||||
|
public void Init__add(byte tid, String marker, String val) {
|
||||||
|
strip_state.Add_item(tid, Bry_.new_u8(marker), Bry_.new_u8(val));
|
||||||
|
}
|
||||||
|
public void Test__nostrip(byte tid, String src) {Test__unstrip(tid, src, src);}
|
||||||
|
public void Test__unstrip(byte tid, String src, String expd) {
|
||||||
|
byte[] actl = strip_state.Unstrip(tid, Bry_.new_u8(src));
|
||||||
|
Gftest.Eq__str(expd, String_.new_u8(actl));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,84 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
class Xomw_doubleunder_wkr {
|
||||||
|
public boolean show_toc;
|
||||||
|
public boolean force_toc_position;
|
||||||
|
public boolean output__no_gallery ;
|
||||||
|
public Xomw_doubleunder_data doubleunderscore_data = new Xomw_doubleunder_data();
|
||||||
|
private void Match_and_remove(byte[] text, Xomw_doubleunder_data doubleunderscore_data) {
|
||||||
|
doubleunderscore_data.Reset();
|
||||||
|
}
|
||||||
|
public void Do_double_underscore(byte[] text) {
|
||||||
|
// The position of __TOC__ needs to be recorded
|
||||||
|
// $mw = MagicWord::get( 'toc' );
|
||||||
|
// if ( $mw->match( $text ) ) {
|
||||||
|
this.show_toc = true;
|
||||||
|
this.force_toc_position = true;
|
||||||
|
|
||||||
|
// Set a placeholder. At the end we'll fill it in with the TOC.
|
||||||
|
// $text = $mw->replace( '<!--MWTOC-->', $text, 1 );
|
||||||
|
|
||||||
|
// Only keep the first one.
|
||||||
|
// $text = $mw->replace( '', $text );
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Now match and remove the rest of them
|
||||||
|
// $mwa = MagicWord::getDoubleUnderscoreArray();
|
||||||
|
Match_and_remove(text, doubleunderscore_data);
|
||||||
|
|
||||||
|
if (doubleunderscore_data.no_gallery) {
|
||||||
|
output__no_gallery = true;
|
||||||
|
}
|
||||||
|
if (doubleunderscore_data.no_toc && !force_toc_position) {
|
||||||
|
this.show_toc = false;
|
||||||
|
}
|
||||||
|
if ( doubleunderscore_data.hidden_cat
|
||||||
|
// && $this->mTitle->getNamespace() == NS_CATEGORY
|
||||||
|
) {
|
||||||
|
//$this->addTrackingCategory( 'hidden-category-category' );
|
||||||
|
}
|
||||||
|
// (T10068) Allow control over whether robots index a page.
|
||||||
|
// __INDEX__ always overrides __NOINDEX__, see T16899
|
||||||
|
if (doubleunderscore_data.no_index // && $this->mTitle->canUseNoindex()
|
||||||
|
) {
|
||||||
|
// $this->mOutput->setIndexPolicy( 'noindex' );
|
||||||
|
// $this->addTrackingCategory( 'noindex-category' );
|
||||||
|
}
|
||||||
|
if (doubleunderscore_data.index //&& $this->mTitle->canUseNoindex()
|
||||||
|
) {
|
||||||
|
// $this->mOutput->setIndexPolicy( 'index' );
|
||||||
|
// $this->addTrackingCategory( 'index-category' );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache all double underscores in the database
|
||||||
|
// foreach ( $this->mDoubleUnderscores as $key => $val ) {
|
||||||
|
// $this->mOutput->setProperty( $key, '' );
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_doubleunder_data {
|
||||||
|
public boolean no_gallery;
|
||||||
|
public boolean no_toc;
|
||||||
|
public boolean hidden_cat;
|
||||||
|
public boolean no_index;
|
||||||
|
public boolean index;
|
||||||
|
public void Reset() {
|
||||||
|
no_gallery = no_toc = hidden_cat = no_index = index = false;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,22 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
public interface Xomw_heading_cbk {
|
||||||
|
void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr);
|
||||||
|
void On_src_done(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr);
|
||||||
|
}
|
@ -0,0 +1,81 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.hrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
import gplx.langs.phps.utls.*;
|
||||||
|
public class Xomw_hr_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||||
|
private Bry_bfr bfr;
|
||||||
|
public void Replace_hrs(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) { // REF.MW: text = preg_replace('/(^|\n)-----*/', '\\1<hr />', text);
|
||||||
|
// XO.PBFR
|
||||||
|
Bry_bfr src_bfr = pbfr.Src();
|
||||||
|
byte[] src = src_bfr.Bfr();
|
||||||
|
int src_bgn = 0;
|
||||||
|
int src_end = src_bfr.Len();
|
||||||
|
this.bfr = pbfr.Trg();
|
||||||
|
|
||||||
|
boolean dirty = false;
|
||||||
|
|
||||||
|
// do separate check for "-----" at start of String;
|
||||||
|
int cur = 0;
|
||||||
|
if (Bry_.Eq(src, 0, Len__wtxt__hr__bos, Bry__wtxt__hr__bos)) {
|
||||||
|
cur = Replace_hr(Bool_.N, src, src_bgn, src_end, 0, Len__wtxt__hr__bos);
|
||||||
|
dirty = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// loop
|
||||||
|
while (true) {
|
||||||
|
// find next "\n-----"
|
||||||
|
int find_bgn = Bry_find_.Find_fwd(src, Bry__wtxt__hr__mid, cur, src_end);
|
||||||
|
|
||||||
|
// nothing found; exit
|
||||||
|
if (find_bgn == Bry_find_.Not_found) {
|
||||||
|
if (dirty) {
|
||||||
|
bfr.Add_mid(src, cur, src_end);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// something found
|
||||||
|
cur = Replace_hr(Bool_.Y, src, cur, src_end, find_bgn, Len__wtxt__hr__mid);
|
||||||
|
dirty = true;
|
||||||
|
}
|
||||||
|
if (dirty)
|
||||||
|
pbfr.Switch();
|
||||||
|
}
|
||||||
|
private int Replace_hr(boolean mid, byte[] src, int cur, int src_end, int find_bgn, int tkn_len) {
|
||||||
|
// something found; add to bfr
|
||||||
|
if (mid) {
|
||||||
|
bfr.Add_mid(src, cur, find_bgn); // add everything before "\n-----"
|
||||||
|
bfr.Add_byte_nl();
|
||||||
|
}
|
||||||
|
bfr.Add(Bry__html__hr);
|
||||||
|
|
||||||
|
// set dirty / cur and continue
|
||||||
|
cur = find_bgn + tkn_len;
|
||||||
|
cur = Bry_find_.Find_fwd_while(src, cur, src_end, Byte_ascii.Dash); // gobble up trailing "-"; the "*" in "-----*" from the regex above
|
||||||
|
return cur;
|
||||||
|
}
|
||||||
|
private static final byte[]
|
||||||
|
Bry__wtxt__hr__mid = Bry_.new_a7("\n-----")
|
||||||
|
, Bry__wtxt__hr__bos = Bry_.new_a7("-----")
|
||||||
|
, Bry__html__hr = Bry_.new_a7("<hr />")
|
||||||
|
;
|
||||||
|
private static final int
|
||||||
|
Len__wtxt__hr__mid = Bry__wtxt__hr__mid.length
|
||||||
|
, Len__wtxt__hr__bos = Bry__wtxt__hr__bos.length
|
||||||
|
;
|
||||||
|
}
|
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.hrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
import org.junit.*;
|
||||||
|
public class Xomw_hr_wkr__tst {
|
||||||
|
private final Xomw_hr_wkr__fxt fxt = new Xomw_hr_wkr__fxt();
|
||||||
|
@Test public void Basic() {fxt.Test__parse("a\n-----b" , "a\n<hr />b");}
|
||||||
|
@Test public void Extend() {fxt.Test__parse("a\n------b" , "a\n<hr />b");}
|
||||||
|
@Test public void Not_found() {fxt.Test__parse("a\n----b" , "a\n----b");}
|
||||||
|
@Test public void Bos() {fxt.Test__parse("-----a" , "<hr />a");}
|
||||||
|
@Test public void Bos_and_mid() {fxt.Test__parse("-----a\n-----b" , "<hr />a\n<hr />b");}
|
||||||
|
}
|
||||||
|
class Xomw_hr_wkr__fxt {
|
||||||
|
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||||
|
private final Xomw_hr_wkr wkr = new Xomw_hr_wkr();
|
||||||
|
public void Test__parse(String src_str, String expd) {
|
||||||
|
byte[] src_bry = Bry_.new_u8(src_str);
|
||||||
|
wkr.Replace_hrs(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||||
|
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,56 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
import org.junit.*;
|
||||||
|
public class Xomw_lnke_wkr__tst {
|
||||||
|
private final Xomw_lnke_wkr__fxt fxt = new Xomw_lnke_wkr__fxt();
|
||||||
|
@Test public void Basic() {fxt.Test__parse("[https://a.org b]" , "<a class='external text' rel='nofollow' href='https://a.org'>b</a>");}
|
||||||
|
@Test public void Invaild__protocol() {fxt.Test__parse("[httpz:a.org]" , "[httpz:a.org]");}
|
||||||
|
@Test public void Invaild__protocol_slash() {fxt.Test__parse("[https:a.org]" , "[https:a.org]");}
|
||||||
|
@Test public void Invaild__urlchars__0() {fxt.Test__parse("[https://]" , "[https://]");}
|
||||||
|
@Test public void Invaild__urlchars__bad() {fxt.Test__parse("[https://\"]" , "[https://\"]");}
|
||||||
|
@Test public void Many() {
|
||||||
|
fxt.Test__parse(String_.Concat_lines_nl_apos_skip_last
|
||||||
|
( "a"
|
||||||
|
, "[https://b.org c]"
|
||||||
|
, "d"
|
||||||
|
, "[https://e.org f]"
|
||||||
|
, "g"
|
||||||
|
), String_.Concat_lines_nl_apos_skip_last
|
||||||
|
( "a"
|
||||||
|
, "<a class='external text' rel='nofollow' href='https://b.org'>c</a>"
|
||||||
|
, "d"
|
||||||
|
, "<a class='external text' rel='nofollow' href='https://e.org'>f</a>"
|
||||||
|
, "g"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_lnke_wkr__fxt {
|
||||||
|
private final Xomw_lnke_wkr wkr = new Xomw_lnke_wkr(new Xomw_parser());
|
||||||
|
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||||
|
private boolean apos = true;
|
||||||
|
public Xomw_lnke_wkr__fxt() {
|
||||||
|
wkr.Init_by_wiki(Xomw_parser.Protocols__dflt());
|
||||||
|
}
|
||||||
|
public void Test__parse(String src_str, String expd) {
|
||||||
|
byte[] src_bry = Bry_.new_u8(src_str);
|
||||||
|
wkr.Replace_external_links(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||||
|
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||||
|
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,462 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||||
|
import gplx.langs.phps.utls.*;
|
||||||
|
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.xwikis.*;
|
||||||
|
import gplx.xowa.mws.parsers.*; import gplx.xowa.mws.parsers.quotes.*;
|
||||||
|
import gplx.xowa.mws.htmls.*; import gplx.xowa.mws.linkers.*;
|
||||||
|
import gplx.xowa.mws.utls.*;
|
||||||
|
import gplx.xowa.parsers.uniqs.*;
|
||||||
|
public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||||
|
private final Xomw_link_holders holders;
|
||||||
|
private final Xomw_linker linker;
|
||||||
|
private final Xomw_link_renderer link_renderer;
|
||||||
|
// private final Btrie_slim_mgr protocols_trie;
|
||||||
|
private final Xomw_quote_wkr quote_wkr;
|
||||||
|
private final Xomw_strip_state strip_state;
|
||||||
|
private Xow_wiki wiki;
|
||||||
|
private Xoa_ttl page_title;
|
||||||
|
private final Xomw_linker__normalize_subpage_link normalize_subpage_link = new Xomw_linker__normalize_subpage_link();
|
||||||
|
private final Bry_bfr tmp;
|
||||||
|
private final Xomw_parser parser;
|
||||||
|
private final Xomwh_atr_mgr extra_atrs = new Xomwh_atr_mgr();
|
||||||
|
public Xomw_lnki_wkr(Xomw_parser parser, Xomw_link_holders holders, Xomw_link_renderer link_renderer, Btrie_slim_mgr protocols_trie) {
|
||||||
|
this.parser = parser;
|
||||||
|
this.holders = holders;
|
||||||
|
this.link_renderer = link_renderer;
|
||||||
|
// this.protocols_trie = protocols_trie;
|
||||||
|
|
||||||
|
this.linker = parser.Linker();
|
||||||
|
this.quote_wkr = parser.Quote_wkr();
|
||||||
|
this.tmp = parser.Tmp();
|
||||||
|
this.strip_state = parser.Strip_state();
|
||||||
|
}
|
||||||
|
public void Init_by_wiki(Xow_wiki wiki) {
|
||||||
|
this.wiki = wiki;
|
||||||
|
if (title_chars_for_lnki == null) {
|
||||||
|
title_chars_for_lnki = (boolean[])Array_.Clone(Xomw_ttl_utl.Title_chars_valid());
|
||||||
|
// the % is needed to support urlencoded titles as well
|
||||||
|
title_chars_for_lnki[Byte_ascii.Hash] = true;
|
||||||
|
title_chars_for_lnki[Byte_ascii.Percent] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public void Clear_state() {
|
||||||
|
holders.Clear();
|
||||||
|
}
|
||||||
|
public void Replace_internal_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||||
|
// XO.PBFR
|
||||||
|
Bry_bfr src_bfr = pbfr.Src();
|
||||||
|
byte[] src = src_bfr.Bfr();
|
||||||
|
int src_bgn = 0;
|
||||||
|
int src_end = src_bfr.Len();
|
||||||
|
Bry_bfr bfr = pbfr.Trg();
|
||||||
|
pbfr.Switch();
|
||||||
|
|
||||||
|
this.page_title = pctx.Page_title();
|
||||||
|
|
||||||
|
Replace_internal_links(bfr, src, src_bgn, src_end);
|
||||||
|
}
|
||||||
|
public void Replace_internal_links(Bry_bfr bfr, byte[] src, int src_bgn, int src_end) {
|
||||||
|
// PORTED: regex for tc move to header; e1 and e1_img moved to code
|
||||||
|
// split the entire text String on occurrences of [[
|
||||||
|
int cur = src_bgn;
|
||||||
|
int prv = cur;
|
||||||
|
while (true) {
|
||||||
|
int lnki_bgn = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__bgn, cur, src_end); // $a = StringUtils::explode('[[', ' ' . $s);
|
||||||
|
if (lnki_bgn == Bry_find_.Not_found) { // no more "[["; stop loop
|
||||||
|
bfr.Add_mid(src, cur, src_end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cur = lnki_bgn + 2; // 2="[[".length
|
||||||
|
|
||||||
|
// IGNORE: handles strange split logic of adding space to String; "$s = substr($s, 1);"
|
||||||
|
|
||||||
|
// TODO.XO:lnke_bgn; EX: b[[A]]
|
||||||
|
// $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
|
||||||
|
// $e2 = null;
|
||||||
|
// if ($useLinkPrefixExtension) {
|
||||||
|
// // Match the end of a line for a word that's not followed by whitespace,
|
||||||
|
// // e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
|
||||||
|
// global $wgContLang;
|
||||||
|
// $charset = $wgContLang->linkPrefixCharset();
|
||||||
|
// $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
|
||||||
|
// }
|
||||||
|
|
||||||
|
// IGNORE: throw new MWException(__METHOD__ . ": \$this->mTitle is null\n");
|
||||||
|
|
||||||
|
// $nottalk = !$this->mTitle->isTalkPage();
|
||||||
|
|
||||||
|
// TODO.XO:lnke_bgn
|
||||||
|
byte[] prefix = Bry_.Empty;
|
||||||
|
//if ($useLinkPrefixExtension) {
|
||||||
|
// $m = [];
|
||||||
|
// if (preg_match($e2, $s, $m)) {
|
||||||
|
// $first_prefix = $m[2];
|
||||||
|
// } else {
|
||||||
|
// $first_prefix = false;
|
||||||
|
// }
|
||||||
|
//} else {
|
||||||
|
// $prefix = '';
|
||||||
|
//}
|
||||||
|
|
||||||
|
// IGNORE: "Check for excessive memory usage"
|
||||||
|
|
||||||
|
// TODO.XO:lnke_bgn; EX: b[[A]]
|
||||||
|
//if ($useLinkPrefixExtension) {
|
||||||
|
// if (preg_match($e2, $s, $m)) {
|
||||||
|
// $prefix = $m[2];
|
||||||
|
// $s = $m[1];
|
||||||
|
// } else {
|
||||||
|
// $prefix = '';
|
||||||
|
// }
|
||||||
|
// // first link
|
||||||
|
// if ($first_prefix) {
|
||||||
|
// $prefix = $first_prefix;
|
||||||
|
// $first_prefix = false;
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
|
||||||
|
// PORTED.BGN: if (preg_match($e1, $line, $m)) && else if (preg_match($e1_img, $line, $m))
|
||||||
|
// NOTE: both e1 and e1_img are effectively the same; e1_img allows nested "[["; EX: "[[A|b[[c]]d]]" will stop at "[[A|b"
|
||||||
|
int ttl_bgn = cur;
|
||||||
|
int ttl_end = Xomw_ttl_utl.Find_fwd_while_title(src, cur, src_end, title_chars_for_lnki);
|
||||||
|
cur = ttl_end;
|
||||||
|
int capt_bgn = -1, capt_end = -1;
|
||||||
|
int nxt_lnki = -1;
|
||||||
|
|
||||||
|
boolean might_be_img = false;
|
||||||
|
if (ttl_end > ttl_bgn) { // at least one valid title-char found; check for "|" or "]]" EX: "[[a"
|
||||||
|
byte nxt_byte = src[ttl_end];
|
||||||
|
if (nxt_byte == Byte_ascii.Pipe) { // handles lnki with capt ([[A|a]])and lnki with file ([[File:A.png|b|c|d]])
|
||||||
|
cur = ttl_end + 1;
|
||||||
|
|
||||||
|
// find next "[["
|
||||||
|
nxt_lnki = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__bgn, cur, src_end);
|
||||||
|
if (nxt_lnki == Bry_find_.Not_found)
|
||||||
|
nxt_lnki = src_end;
|
||||||
|
|
||||||
|
// find end "]]"
|
||||||
|
capt_bgn = cur;
|
||||||
|
capt_end = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__end, cur, nxt_lnki);
|
||||||
|
if (capt_end == Bry_find_.Not_found) {
|
||||||
|
capt_end = nxt_lnki;
|
||||||
|
cur = nxt_lnki;
|
||||||
|
might_be_img = true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
cur = capt_end + Bry__wtxt__lnki__end.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (Bry_.Match(src, ttl_end, ttl_end + 2, Bry__wtxt__lnki__end)) { // handles simple lnki; EX: [[A]]
|
||||||
|
cur = ttl_end + 2;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ttl_end = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
ttl_end = -1;
|
||||||
|
if (ttl_end == -1) { // either (a) no valid title-chars ("[[<") or (b) title char, but has stray "]" ("[[a]b]]")
|
||||||
|
// Invalid form; output directly
|
||||||
|
bfr.Add_mid(src, cur, src_end);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// PORTED.END: if (preg_match($e1, $line, $m)) && else if (preg_match($e1_img, $line, $m))
|
||||||
|
|
||||||
|
byte[] text = Bry_.Mid(src, capt_bgn, capt_end);
|
||||||
|
byte[] trail = Bry_.Empty;
|
||||||
|
if (!might_be_img) {
|
||||||
|
// If we get a ] at the beginning of $m[3] that means we have a link that's something like:
|
||||||
|
// [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
|
||||||
|
// the real problem is with the $e1 regex
|
||||||
|
// See T1500.
|
||||||
|
// Still some problems for cases where the ] is meant to be outside punctuation,
|
||||||
|
// and no image is in sight. See T4095.
|
||||||
|
// if ($text !== ''
|
||||||
|
// && substr($m[3], 0, 1) === ']'
|
||||||
|
// && strpos($text, '[') !== false
|
||||||
|
// ) {
|
||||||
|
// $text .= ']'; // so that replaceExternalLinks($text) works later
|
||||||
|
// $m[3] = substr($m[3], 1);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// fix up urlencoded title texts
|
||||||
|
// if (strpos($m[1], '%') !== false) {
|
||||||
|
// // Should anchors '#' also be rejected?
|
||||||
|
// $m[1] = str_replace([ '<', '>' ], [ '<', '>' ], rawurldecode($m[1]));
|
||||||
|
// }
|
||||||
|
// $trail = $m[3];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Invalid, but might be an image with a link in its caption
|
||||||
|
// $text = $m[2];
|
||||||
|
// if (strpos($m[1], '%') !== false) {
|
||||||
|
// $m[1] = str_replace([ '<', '>' ], [ '<', '>' ], rawurldecode($m[1]));
|
||||||
|
// }
|
||||||
|
// $trail = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] orig_link = Bry_.Mid(src, ttl_bgn, ttl_end);
|
||||||
|
|
||||||
|
// TODO.XO: handle "[[http://a.org]]"
|
||||||
|
// Don't allow @gplx.Internal protected links to pages containing
|
||||||
|
// PROTO: where PROTO is a valid URL protocol; these
|
||||||
|
// should be external links.
|
||||||
|
// if (preg_match('/^(?i:' . $this->mUrlProtocols . ')/', $origLink)) {
|
||||||
|
// $s .= $prefix . '[[' . $line;
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
|
||||||
|
byte[] link = orig_link;
|
||||||
|
boolean no_force = orig_link[0] != Byte_ascii.Colon;
|
||||||
|
if (!no_force) {
|
||||||
|
// Strip off leading ':'
|
||||||
|
link = Bry_.Mid(link, 1);
|
||||||
|
}
|
||||||
|
Xoa_ttl nt = wiki.Ttl_parse(link);
|
||||||
|
|
||||||
|
// Make subpage if necessary
|
||||||
|
boolean subpages_enabled = nt.Ns().Subpages_enabled();
|
||||||
|
if (subpages_enabled) {
|
||||||
|
Maybe_do_subpage_link(normalize_subpage_link, orig_link, text);
|
||||||
|
link = normalize_subpage_link.link;
|
||||||
|
text = normalize_subpage_link.text;
|
||||||
|
nt = wiki.Ttl_parse(link);
|
||||||
|
}
|
||||||
|
// IGNORE: handled in rewrite above
|
||||||
|
// else {
|
||||||
|
// link = orig_link;
|
||||||
|
// }
|
||||||
|
|
||||||
|
byte[] unstrip = strip_state.Unstrip_nowiki(link);
|
||||||
|
if (!Bry_.Eq(unstrip, link))
|
||||||
|
nt = wiki.Ttl_parse(unstrip);
|
||||||
|
if (nt == null) {
|
||||||
|
bfr.Add_mid(src, prv, lnki_bgn + 2); // $s .= $prefix . '[[' . $line;
|
||||||
|
cur = lnki_bgn + 2;
|
||||||
|
prv = cur;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Xow_ns ns = nt.Ns();
|
||||||
|
Xow_xwiki_itm iw = nt.Wik_itm();
|
||||||
|
|
||||||
|
if (might_be_img) { // if this is actually an invalid link
|
||||||
|
if (ns.Id_is_file() && no_force) { // but might be an image
|
||||||
|
boolean found = false;
|
||||||
|
// while (true) {
|
||||||
|
// // look at the next 'line' to see if we can close it there
|
||||||
|
// a->next();
|
||||||
|
// next_line = a->current();
|
||||||
|
// if (next_line === false || next_line === null) {
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
// m = explode(']]', next_line, 3);
|
||||||
|
// if (count(m) == 3) {
|
||||||
|
// // the first ]] closes the inner link, the second the image
|
||||||
|
// found = true;
|
||||||
|
// text .= "[[{m[0]}]]{m[1]}";
|
||||||
|
// trail = m[2];
|
||||||
|
// break;
|
||||||
|
// } else if (count(m) == 2) {
|
||||||
|
// // if there's exactly one ]] that's fine, we'll keep looking
|
||||||
|
// text .= "[[{m[0]}]]{m[1]}";
|
||||||
|
// } else {
|
||||||
|
// // if next_line is invalid too, we need look no further
|
||||||
|
// text .= '[[' . next_line;
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
if (!found) {
|
||||||
|
// we couldn't find the end of this imageLink, so output it raw
|
||||||
|
// but don't ignore what might be perfectly normal links in the text we've examined
|
||||||
|
Bry_bfr nested = wiki.Utl__bfr_mkr().Get_b128();
|
||||||
|
this.Replace_internal_links(nested, text, 0, text.length);
|
||||||
|
nested.Mkr_rls();
|
||||||
|
bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
|
||||||
|
// note: no trail, because without an end, there *is* no trail
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { // it's not an image, so output it raw
|
||||||
|
bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
|
||||||
|
// note: no trail, because without an end, there *is* no trail
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean was_blank = text.length == 0;
|
||||||
|
if (was_blank) {
|
||||||
|
text = link;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// T6598 madness. Handle the quotes only if they come from the alternate part
|
||||||
|
// [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
|
||||||
|
// [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
|
||||||
|
// -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
|
||||||
|
text = quote_wkr.Do_quotes(tmp, text);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Link not escaped by : , create the various objects
|
||||||
|
// if (no_force && !nt->wasLocalInterwiki()) {
|
||||||
|
// Interwikis
|
||||||
|
// if (
|
||||||
|
// iw && this->mOptions->getInterwikiMagic() && nottalk && (
|
||||||
|
// Language::fetchLanguageName(iw, null, 'mw') ||
|
||||||
|
// in_array(iw, wgExtraInterlanguageLinkPrefixes)
|
||||||
|
// )
|
||||||
|
// ) {
|
||||||
|
// T26502: filter duplicates
|
||||||
|
// if (!isset(this->mLangLinkLanguages[iw])) {
|
||||||
|
// this->mLangLinkLanguages[iw] = true;
|
||||||
|
// this->mOutput->addLanguageLink(nt->getFullText());
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// s = rtrim(s . prefix);
|
||||||
|
// s .= trim(trail, "\n") == '' ? '': prefix . trail;
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
if (ns.Id_is_file()) {
|
||||||
|
// if (!wfIsBadImage(nt->getDBkey(), this->mTitle)) {
|
||||||
|
// if (wasblank) {
|
||||||
|
// // if no parameters were passed, text
|
||||||
|
// // becomes something like "File:Foo.png",
|
||||||
|
// // which we don't want to pass on to the
|
||||||
|
// // image generator
|
||||||
|
// text = '';
|
||||||
|
// } else {
|
||||||
|
// // recursively parse links inside the image caption
|
||||||
|
// // actually, this will parse them in any other parameters, too,
|
||||||
|
// // but it might be hard to fix that, and it doesn't matter ATM
|
||||||
|
// text = this->replaceExternalLinks(text);
|
||||||
|
// holders->merge(this->replaceInternalLinks2(text));
|
||||||
|
// }
|
||||||
|
// // cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
|
||||||
|
// s .= prefix . this->armorLinks(
|
||||||
|
// this->makeImage(nt, text, holders)) . trail;
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
else if (ns.Id_is_ctg()) {
|
||||||
|
bfr.Trim_end_ws(); // s = rtrim(s . "\n"); // T2087
|
||||||
|
|
||||||
|
if (was_blank) {
|
||||||
|
// sortkey = this->getDefaultSort();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// sortkey = text;
|
||||||
|
}
|
||||||
|
// sortkey = Sanitizer::decodeCharReferences(sortkey);
|
||||||
|
// sortkey = str_replace("\n", '', sortkey);
|
||||||
|
// sortkey = this->getConverterLanguage()->convertCategoryKey(sortkey);
|
||||||
|
// this->mOutput->addCategory(nt->getDBkey(), sortkey);
|
||||||
|
//
|
||||||
|
// Strip the whitespace Category links produce, see T2087
|
||||||
|
// s .= trim(prefix . trail, "\n") == '' ? '' : prefix . trail;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Self-link checking. For some languages, variants of the title are checked in
|
||||||
|
// LinkHolderArray::doVariants() to allow batching the existence checks necessary
|
||||||
|
// for linking to a different variant.
|
||||||
|
if (!ns.Id_is_special() && nt.Eq_full_db(page_title) && !nt.Has_fragment()) {
|
||||||
|
bfr.Add(prefix);
|
||||||
|
linker.Make_self_link_obj(bfr, nt, text, Bry_.Empty, trail, Bry_.Empty);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// NS_MEDIA is a pseudo-namespace for linking directly to a file
|
||||||
|
// @todo FIXME: Should do batch file existence checks, see comment below
|
||||||
|
if (ns.Id_is_media()) {
|
||||||
|
// Give extensions a chance to select the file revision for us
|
||||||
|
// options = [];
|
||||||
|
// descQuery = false;
|
||||||
|
// MW.HOOK:BeforeParserFetchFileAndTitle
|
||||||
|
// Fetch and register the file (file title may be different via hooks)
|
||||||
|
// list(file, nt) = this->fetchFileAndTitle(nt, options);
|
||||||
|
// Cloak with NOPARSE to avoid replacement in replaceExternalLinks
|
||||||
|
// s .= prefix . this->armorLinks(
|
||||||
|
// Linker::makeMediaLinkFile(nt, file, text)) . trail;
|
||||||
|
// continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some titles, such as valid special pages or files in foreign repos, should
|
||||||
|
// be shown as bluelinks even though they're not included in the page table
|
||||||
|
// @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
|
||||||
|
// batch file existence checks for NS_FILE and NS_MEDIA
|
||||||
|
bfr.Add_mid(src, prv, lnki_bgn);
|
||||||
|
prv = cur;
|
||||||
|
if (iw == null && nt.Is_always_known()) {
|
||||||
|
// this->mOutput->addLink(nt);
|
||||||
|
Make_known_link_holder(bfr, nt, text, trail, prefix);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Links will be added to the output link list after checking
|
||||||
|
holders.Make_holder(bfr, nt, text, Bry_.Ary_empty, trail, prefix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public void Maybe_do_subpage_link(Xomw_linker__normalize_subpage_link rv, byte[] target, byte[] text) {
|
||||||
|
linker.Normalize_subpage_link(rv, page_title, target, text);
|
||||||
|
}
|
||||||
|
public void Replace_link_holders(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||||
|
holders.Replace(pctx, pbfr);
|
||||||
|
}
|
||||||
|
public void Make_known_link_holder(Bry_bfr bfr, Xoa_ttl nt, byte[] text, byte[] trail, byte[] prefix) {
|
||||||
|
byte[][] split_trail = linker.Split_trail(trail);
|
||||||
|
byte[] inside = split_trail[0];
|
||||||
|
trail = split_trail[1];
|
||||||
|
|
||||||
|
if (text == Bry_.Empty) {
|
||||||
|
text = Bry_.Escape_html(nt.Get_prefixed_text());
|
||||||
|
}
|
||||||
|
|
||||||
|
// PORTED:new HtmlArmor( "$prefix$text$inside" )
|
||||||
|
tmp.Add_bry_escape_html(prefix);
|
||||||
|
tmp.Add_bry_escape_html(text);
|
||||||
|
tmp.Add_bry_escape_html(inside);
|
||||||
|
text = tmp.To_bry_and_clear();
|
||||||
|
|
||||||
|
link_renderer.Make_known_link(bfr, nt, text, extra_atrs, Bry_.Empty);
|
||||||
|
byte[] link = bfr.To_bry_and_clear();
|
||||||
|
parser.Armor_links(bfr, link, 0, link.length);
|
||||||
|
bfr.Add(trail);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean[] title_chars_for_lnki;
|
||||||
|
private static final byte[] Bry__wtxt__lnki__bgn = Bry_.new_a7("[["), Bry__wtxt__lnki__end = Bry_.new_a7("]]");
|
||||||
|
|
||||||
|
// $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
|
||||||
|
//
|
||||||
|
// REGEX: "title-char"(1+) + "pipe"(0-1) + "]]"(0-1) + "other chars up to next [["
|
||||||
|
// title-char -> ([{$tc}]+)
|
||||||
|
// pipe -> (?:\\|(.+?))?
|
||||||
|
// ]] -> ?]]
|
||||||
|
// other chars... -> (.*)
|
||||||
|
|
||||||
|
// $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
|
||||||
|
//
|
||||||
|
// REGEX: "title-char"(1+) + "pipe"(0-1) + "other chars up to next [["
|
||||||
|
// title-char -> ([{$tc}]+)
|
||||||
|
// pipe -> \\|
|
||||||
|
// other chars... -> (.*)
|
||||||
|
}
|
@ -0,0 +1,63 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
import org.junit.*;
|
||||||
|
public class Xomw_lnki_wkr__tst {
|
||||||
|
private final Xomw_lnki_wkr__fxt fxt = new Xomw_lnki_wkr__fxt();
|
||||||
|
@Before public void init() {fxt.Clear();}
|
||||||
|
// @Test public void Basic() {fxt.Test__parse("[[A]]" , "<!--LINK 0-->");}
|
||||||
|
@Test public void Text() {fxt.Test__parse("a [[A]] z" , "a <!--LINK 0--> z");}
|
||||||
|
@Test public void Capt() {fxt.Test__parse("a [[A|a]] z" , "a <!--LINK 0--> z");}
|
||||||
|
// @Test public void Text() {fxt.Test__parse("a [[A]] z" , "a <a href='/wiki/A' title='A'>A</a> z");}
|
||||||
|
// @Test public void Capt() {fxt.Test__parse("a [[A|a]] z" , "a <a href='/wiki/A' title='A'>a</a> z");}
|
||||||
|
// @Test public void Text() {fxt.Test__parse("a [[A]] z" , "a <!--LINK 0--> z");}
|
||||||
|
// @Test public void Invalid__char() {fxt.Test__parse("[[<A>]]" , "[[<A>]]");}
|
||||||
|
@Test public void Self() {fxt.Test__to_html("[[Page_1]]" , "<strong class='selflink'>Page_1</strong>");}
|
||||||
|
}
|
||||||
|
class Xomw_lnki_wkr__fxt {
|
||||||
|
private final Xomw_lnki_wkr wkr;
|
||||||
|
private final Xomw_parser_ctx pctx;
|
||||||
|
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||||
|
private boolean apos = true;
|
||||||
|
public Xomw_lnki_wkr__fxt() {
|
||||||
|
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||||
|
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||||
|
Xomw_parser parser = new Xomw_parser();
|
||||||
|
wkr = parser.Lnki_wkr();
|
||||||
|
parser.Init_by_wiki(wiki);
|
||||||
|
|
||||||
|
pctx = new Xomw_parser_ctx();
|
||||||
|
pctx.Init_by_page(wiki.Ttl_parse(Bry_.new_a7("Page_1")));
|
||||||
|
}
|
||||||
|
public void Clear() {
|
||||||
|
wkr.Clear_state();
|
||||||
|
}
|
||||||
|
public void Test__parse(String src_str, String expd) {
|
||||||
|
byte[] src_bry = Bry_.new_u8(src_str);
|
||||||
|
wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
|
||||||
|
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||||
|
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||||
|
}
|
||||||
|
public void Test__to_html(String src_str, String expd) {
|
||||||
|
byte[] src_bry = Bry_.new_u8(src_str);
|
||||||
|
wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
|
||||||
|
wkr.Replace_link_holders(pctx, pbfr);
|
||||||
|
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||||
|
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,331 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.magiclinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.core.net.*;
|
||||||
|
import gplx.langs.phps.utls.*;
|
||||||
|
// public class Xomw_magiclinks_wkr {
|
||||||
|
// private final Btrie_slim_mgr regex_trie = Btrie_slim_mgr.ci_a7(); // NOTE: must be ci to handle protocols; EX: "https:" and "HTTPS:"
|
||||||
|
// private final Btrie_rv trv = new Btrie_rv();
|
||||||
|
// public Xomw_magiclinks_wkr() {
|
||||||
|
// }
|
||||||
|
// private static byte[] Tag__anch__rhs, Prefix__rfc, Prefix__pmid;
|
||||||
|
//
|
||||||
|
// private static final byte Space__tab = 1, Space__nbsp_ent = 2, Space__nbsp_dec = 3, Space__nbsp_hex = 4;
|
||||||
|
// private static Btrie_slim_mgr space_trie;
|
||||||
|
// // static final SPACE_NOT_NL = '(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
|
||||||
|
//// public void Test() {
|
||||||
|
//// regex.Add("\t", Space__tab);
|
||||||
|
//// regex.Add(" ", Space__nbsp__ent);
|
||||||
|
//// regex.Add(Regex.Make("&#").Star("0").Add("160;"), Space__nbsp__dec);
|
||||||
|
//// regex.Add(Regex.Make("&#").Brack("X", "x").Star("0").Brack("A", "a").Add("0"), Space__nbsp__hex);
|
||||||
|
//// }
|
||||||
|
// public int Find_fwd_space(byte[] src, int cur, int src_end) {
|
||||||
|
// return -1;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// private static final byte Regex__anch = 1, Regex__elem = 2, Regex__free = 3, Regex__rfc = 5, Regex__isbn = 6, Regex__pmid = 7;
|
||||||
|
// public void Init_by_wiki() {
|
||||||
|
// regex_trie.Add_str_byte("<a", Regex__anch);
|
||||||
|
// regex_trie.Add_str_byte("<" , Regex__elem);
|
||||||
|
//
|
||||||
|
// Gfo_protocol_itm[] protocol_ary = Gfo_protocol_itm.Ary();
|
||||||
|
// int protocol_len = protocol_ary.length;
|
||||||
|
// for (int i = 0; i < protocol_len; i++) {
|
||||||
|
// Gfo_protocol_itm itm = protocol_ary[i];
|
||||||
|
// regex_trie.Add_bry_byte(itm.Key_w_colon_bry(), Regex__free);
|
||||||
|
// }
|
||||||
|
// regex_trie.Add_str_byte("RFC " , Regex__rfc);
|
||||||
|
// regex_trie.Add_str_byte("PMID " , Regex__rfc);
|
||||||
|
// regex_trie.Add_str_byte("ISBN ", Regex__rfc);
|
||||||
|
//
|
||||||
|
// if (Tag__anch__rhs == null) {
|
||||||
|
// synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||||
|
// Tag__anch__rhs = Bry_.new_a7("</a>");
|
||||||
|
// Prefix__rfc = Bry_.new_a7("RFC");
|
||||||
|
// Prefix__pmid = Bry_.new_a7("PMID");
|
||||||
|
// space_trie = Btrie_slim_mgr.ci_a7()
|
||||||
|
// .Add_str_byte("\t", Space__tab)
|
||||||
|
// .Add_str_byte(" ", Space__nbsp_ent)
|
||||||
|
// .Add_str_byte("&#", Space__nbsp_dec)
|
||||||
|
// .Add_str_byte("&x", Space__nbsp_hex)
|
||||||
|
// ;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // Replace special strings like "ISBN xxx" and "RFC xxx" with
|
||||||
|
// // magic external links.
|
||||||
|
// public void Do_magic_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||||
|
// // XO.PBFR
|
||||||
|
// Bry_bfr src_bfr = pbfr.Src();
|
||||||
|
// byte[] src = src_bfr.Bfr();
|
||||||
|
// int src_bgn = 0;
|
||||||
|
// int src_end = src_bfr.Len();
|
||||||
|
// Bry_bfr bfr = pbfr.Trg();
|
||||||
|
//
|
||||||
|
// int cur = src_bgn;
|
||||||
|
// int prv = cur;
|
||||||
|
// boolean dirty = true;
|
||||||
|
// while (true) {
|
||||||
|
// if (cur == src_end) {
|
||||||
|
// if (dirty)
|
||||||
|
// bfr.Add_mid(src, prv, src_end);
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// byte b = src[cur];
|
||||||
|
// Object o = regex_trie.Match_at_w_b0(trv, b, src, cur, src_end);
|
||||||
|
// // current byte doesn't look like magiclink; continue;
|
||||||
|
// if (o == null) {
|
||||||
|
// cur++;
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
// // looks like magiclink; do additional processing
|
||||||
|
// byte regex_tid = ((Byte_obj_ref)o).Val();
|
||||||
|
// int trv_pos = trv.Pos();
|
||||||
|
// int nxt_pos = trv_pos;
|
||||||
|
// boolean regex_valid = true;
|
||||||
|
// switch (regex_tid) {
|
||||||
|
// case Regex__anch: // (<a[ \t\r\n>].*?</a>) | // m[1]: Skip link text
|
||||||
|
// if (trv_pos < src_end) {
|
||||||
|
// // find ws in "[ \t\r\n>]"
|
||||||
|
// byte ws_byte = src[cur];
|
||||||
|
// switch (ws_byte) {
|
||||||
|
// case Byte_ascii.Space:
|
||||||
|
// case Byte_ascii.Tab:
|
||||||
|
// case Byte_ascii.Cr:
|
||||||
|
// case Byte_ascii.Nl:
|
||||||
|
// break;
|
||||||
|
// default:
|
||||||
|
// regex_valid = false;
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
// if (regex_valid) {
|
||||||
|
// // find </a>
|
||||||
|
// nxt_pos++;
|
||||||
|
// int anch_end = Bry_find_.Find_fwd(src, Tag__anch__rhs, nxt_pos, src_end);
|
||||||
|
// if (anch_end == Bry_find_.Not_found) {
|
||||||
|
// regex_valid = false;
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
// cur = anch_end + Tag__anch__rhs.length;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
// regex_valid = false;
|
||||||
|
// }
|
||||||
|
// break;
|
||||||
|
// case Regex__elem: // (<.*?>) | // m[2]: Skip stuff inside
|
||||||
|
// // just find ">"
|
||||||
|
// int elem_end = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, nxt_pos, src_end);
|
||||||
|
// if (elem_end == Bry_find_.Not_found)
|
||||||
|
// regex_valid = false;
|
||||||
|
// else
|
||||||
|
// cur = elem_end + 1;
|
||||||
|
// break;
|
||||||
|
// case Regex__free:
|
||||||
|
// // addr; urlchar
|
||||||
|
// break;
|
||||||
|
// case Regex__rfc:
|
||||||
|
// case Regex__pmid:
|
||||||
|
// // byte[] prefix = regex == Regex__rfc ? Prefix__rfc : Prefix__pmid;
|
||||||
|
// // match previous for case sensitivity
|
||||||
|
//// if (Bry_.Eq(src, trv_pos - prefix.length - 1, trv_pos - 1, prefix)) {
|
||||||
|
////
|
||||||
|
//// }
|
||||||
|
//// else {
|
||||||
|
//// regex_valid = false;
|
||||||
|
//// }
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
//// '!(?: // Start cases
|
||||||
|
//// (<a[ \t\r\n>].*?</a>) | // m[1]: Skip link text
|
||||||
|
//// (<.*?>) | // m[2]: Skip stuff inside
|
||||||
|
//// // HTML elements' . "
|
||||||
|
//// (\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
|
||||||
|
//// // m[4]: Post-protocol path
|
||||||
|
//// \b(?:RFC|PMID) $spaces // m[5]: RFC or PMID, capture number
|
||||||
|
//// ([0-9]+)\b |
|
||||||
|
//// \bISBN $spaces ( // m[6]: ISBN, capture number
|
||||||
|
//// (?: 97[89] $spdash?)? // optional 13-digit ISBN prefix
|
||||||
|
//// (?: [0-9] $spdash?){9} // 9 digits with opt. delimiters
|
||||||
|
//// [0-9Xx] // check digit
|
||||||
|
//// )\b
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
// if (dirty)
|
||||||
|
// pbfr.Switch();
|
||||||
|
|
||||||
|
// $prots = wfUrlProtocolsWithoutProtRel();
|
||||||
|
// $urlChar = self::EXT_LINK_URL_CLASS;
|
||||||
|
// $addr = self::EXT_LINK_ADDR;
|
||||||
|
// $space = self::SPACE_NOT_NL; // non-newline space
|
||||||
|
// $spdash = "(?:-|$space)"; // a dash or a non-newline space
|
||||||
|
// $spaces = "$space++"; // possessive match of 1 or more spaces
|
||||||
|
// $text = preg_replace_callback(
|
||||||
|
// '!(?: // Start cases
|
||||||
|
// (<a[ \t\r\n>].*?</a>) | // m[1]: Skip link text
|
||||||
|
// (<.*?>) | // m[2]: Skip stuff inside
|
||||||
|
// // HTML elements' . "
|
||||||
|
// (\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
|
||||||
|
// // m[4]: Post-protocol path
|
||||||
|
// \b(?:RFC|PMID) $spaces // m[5]: RFC or PMID, capture number
|
||||||
|
// ([0-9]+)\b |
|
||||||
|
// \bISBN $spaces ( // m[6]: ISBN, capture number
|
||||||
|
// (?: 97[89] $spdash?)? // optional 13-digit ISBN prefix
|
||||||
|
// (?: [0-9] $spdash?){9} // 9 digits with opt. delimiters
|
||||||
|
// [0-9Xx] // check digit
|
||||||
|
// )\b
|
||||||
|
// )!xu", [ &$this, 'magicLinkCallback' ], $text);
|
||||||
|
// return $text;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// public function magicLinkCallback($m) {
|
||||||
|
// if (isset($m[1]) && $m[1] !== '') {
|
||||||
|
// // Skip anchor
|
||||||
|
// return $m[0];
|
||||||
|
// } else if (isset($m[2]) && $m[2] !== '') {
|
||||||
|
// // Skip HTML element
|
||||||
|
// return $m[0];
|
||||||
|
// } else if (isset($m[3]) && $m[3] !== '') {
|
||||||
|
// // Free external link
|
||||||
|
// return $this->makeFreeExternalLink($m[0], strlen($m[4]));
|
||||||
|
// } else if (isset($m[5]) && $m[5] !== '') {
|
||||||
|
// // RFC or PMID
|
||||||
|
// if (substr($m[0], 0, 3) === 'RFC') {
|
||||||
|
// if (!$this->mOptions->getMagicRFCLinks()) {
|
||||||
|
// return $m[0];
|
||||||
|
// }
|
||||||
|
// $keyword = 'RFC';
|
||||||
|
// $urlmsg = 'rfcurl';
|
||||||
|
// $cssClass = 'mw-magiclink-rfc';
|
||||||
|
// $trackingCat = 'magiclink-tracking-rfc';
|
||||||
|
// $id = $m[5];
|
||||||
|
// } else if (substr($m[0], 0, 4) === 'PMID') {
|
||||||
|
// if (!$this->mOptions->getMagicPMIDLinks()) {
|
||||||
|
// return $m[0];
|
||||||
|
// }
|
||||||
|
// $keyword = 'PMID';
|
||||||
|
// $urlmsg = 'pubmedurl';
|
||||||
|
// $cssClass = 'mw-magiclink-pmid';
|
||||||
|
// $trackingCat = 'magiclink-tracking-pmid';
|
||||||
|
// $id = $m[5];
|
||||||
|
// } else {
|
||||||
|
// throw new MWException(__METHOD__ . ': unrecognised match type "' .
|
||||||
|
// substr($m[0], 0, 20) . '"');
|
||||||
|
// }
|
||||||
|
// $url = wfMessage($urlmsg, $id)->inContentLanguage()->text();
|
||||||
|
// $this->addTrackingCategory($trackingCat);
|
||||||
|
// return Linker::makeExternalLink($url, "{$keyword} {$id}", true, $cssClass, [], $this->mTitle);
|
||||||
|
// } else if (isset($m[6]) && $m[6] !== ''
|
||||||
|
// && $this->mOptions->getMagicISBNLinks()
|
||||||
|
// ) {
|
||||||
|
// // ISBN
|
||||||
|
// $isbn = $m[6];
|
||||||
|
// $space = self::SPACE_NOT_NL; // non-newline space
|
||||||
|
// $isbn = preg_replace("/$space/", ' ', $isbn);
|
||||||
|
// $num = strtr($isbn, [
|
||||||
|
// '-' => '',
|
||||||
|
// ' ' => '',
|
||||||
|
// 'x' => 'X',
|
||||||
|
// ]);
|
||||||
|
// $this->addTrackingCategory('magiclink-tracking-isbn');
|
||||||
|
// return $this->getLinkRenderer()->makeKnownLink(
|
||||||
|
// SpecialPage::getTitleFor('Booksources', $num),
|
||||||
|
// "ISBN $isbn",
|
||||||
|
// [
|
||||||
|
// 'class' => '@gplx.Internal protected mw-magiclink-isbn',
|
||||||
|
// 'title' => false // suppress title attribute
|
||||||
|
// ]
|
||||||
|
// );
|
||||||
|
// } else {
|
||||||
|
// return $m[0];
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Make a free external link, given a user-supplied URL
|
||||||
|
// public void Make_free_external_link(byte[] url, int num_post_proto) {
|
||||||
|
// byte[] trail = Bry_.Empty;
|
||||||
|
|
||||||
|
// The characters '<' and '>' (which were escaped by
|
||||||
|
// removeHTMLtags()) should not be included in
|
||||||
|
// URLs, per RFC 2396.
|
||||||
|
// Make terminate a URL as well (bug T84937)
|
||||||
|
// $m2 = [];
|
||||||
|
// if (preg_match(
|
||||||
|
// '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
|
||||||
|
// $url,
|
||||||
|
// $m2,
|
||||||
|
// PREG_OFFSET_CAPTURE
|
||||||
|
// )) {
|
||||||
|
// trail = substr($url, $m2[0][1]) . $trail;
|
||||||
|
// $url = substr($url, 0, $m2[0][1]);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Move trailing punctuation to $trail
|
||||||
|
// $sep = ',;\.:!?';
|
||||||
|
// If there is no left bracket, then consider right brackets fair game too
|
||||||
|
// if (strpos($url, '(') === false) {
|
||||||
|
// $sep .= ')';
|
||||||
|
// }
|
||||||
|
|
||||||
|
// $urlRev = strrev($url);
|
||||||
|
// $numSepChars = strspn($urlRev, $sep);
|
||||||
|
// Don't break a trailing HTML entity by moving the ; into $trail
|
||||||
|
// This is in hot code, so use substr_compare to avoid having to
|
||||||
|
// create a new String Object for the comparison
|
||||||
|
// if ($numSepChars && substr_compare($url, ";", -$numSepChars, 1) === 0) {
|
||||||
|
// more optimization: instead of running preg_match with a $
|
||||||
|
// anchor, which can be slow, do the match on the reversed
|
||||||
|
// String starting at the desired offset.
|
||||||
|
// un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
|
||||||
|
// if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars)) {
|
||||||
|
// $numSepChars--;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// if ($numSepChars) {
|
||||||
|
// $trail = substr($url, -$numSepChars) . $trail;
|
||||||
|
// $url = substr($url, 0, -$numSepChars);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Verify that we still have a real URL after trail removal, and
|
||||||
|
// not just lone protocol
|
||||||
|
// if (strlen($trail) >= $numPostProto) {
|
||||||
|
// return $url . $trail;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// $url = Sanitizer::cleanUrl($url);
|
||||||
|
|
||||||
|
// Is this an external image?
|
||||||
|
// $text = $this->maybeMakeExternalImage($url);
|
||||||
|
// if ($text === false) {
|
||||||
|
// Not an image, make a link
|
||||||
|
// $text = Linker::makeExternalLink($url,
|
||||||
|
// $this->getConverterLanguage()->markNoConversion($url, true),
|
||||||
|
// true, 'free',
|
||||||
|
// $this->getExternalLinkAttribs($url), $this->mTitle);
|
||||||
|
// Register it in the output Object...
|
||||||
|
// Replace unnecessary URL escape codes with their equivalent characters
|
||||||
|
// $pasteurized = self::normalizeLinkUrl($url);
|
||||||
|
// $this->mOutput->addExternalLink($pasteurized);
|
||||||
|
// }
|
||||||
|
// return $text . $trail;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
@ -0,0 +1,134 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.nbsps; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
import gplx.core.btries.*;
|
||||||
|
public class Xomw_nbsp_wkr {
|
||||||
|
private final Btrie_rv trv = new Btrie_rv();
|
||||||
|
public void Do_nbsp(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||||
|
// PORTED:
|
||||||
|
// Clean up special characters, only run once, next-to-last before doBlockLevels
|
||||||
|
// $fixtags = [
|
||||||
|
// // French spaces, last one Guillemet-left
|
||||||
|
// // only if there is something before the space
|
||||||
|
// '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ',
|
||||||
|
// // french spaces, Guillemet-right
|
||||||
|
// '/(\\302\\253) /' => '\\1 ',
|
||||||
|
// '/ (!\s*important)/' => ' \\1', // Beware of CSS magic word !important, T13874.
|
||||||
|
// ];
|
||||||
|
// $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
|
||||||
|
// XO.PBFR
|
||||||
|
Bry_bfr src_bfr = pbfr.Src();
|
||||||
|
byte[] src = src_bfr.Bfr();
|
||||||
|
int src_bgn = 0;
|
||||||
|
int src_end = src_bfr.Len();
|
||||||
|
Bry_bfr bfr = pbfr.Trg();
|
||||||
|
|
||||||
|
if (trie == null) {
|
||||||
|
synchronized (this.getClass()) {
|
||||||
|
trie = Btrie_slim_mgr.cs();
|
||||||
|
Trie__add(trie, Tid__space_lhs, " ?");
|
||||||
|
Trie__add(trie, Tid__space_lhs, " :");
|
||||||
|
Trie__add(trie, Tid__space_lhs, " ;");
|
||||||
|
Trie__add(trie, Tid__space_lhs, " !");
|
||||||
|
Trie__add(trie, Tid__space_lhs, " »");
|
||||||
|
Trie__add(trie, Tid__space_rhs, "« ");
|
||||||
|
Trie__add(trie, Tid__important, " !");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int cur = src_bgn;
|
||||||
|
int prv = cur;
|
||||||
|
boolean dirty = true;
|
||||||
|
// search forward for...
|
||||||
|
// "\s" before ? : ; ! % 302,273; EX: "a :"
|
||||||
|
// "\s" after 302,253
|
||||||
|
// "&160;!\simportant"
|
||||||
|
while (true) {
|
||||||
|
if (cur == src_end) {
|
||||||
|
if (dirty)
|
||||||
|
bfr.Add_mid(src, prv, src_end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Object o = trie.Match_at(trv, src, cur, src_end);
|
||||||
|
if (o == null) {
|
||||||
|
cur++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Xomw_nbsp_itm itm = (Xomw_nbsp_itm)o;
|
||||||
|
|
||||||
|
// '/ (!\s*important)/' => ' \\1'
|
||||||
|
byte itm_tid = itm.Tid();
|
||||||
|
int important_end = -1;
|
||||||
|
if (itm_tid == Tid__important) {
|
||||||
|
int space_bgn = cur + itm.Key().length;
|
||||||
|
int space_end = Bry_find_.Find_fwd_while(src, space_bgn, src_end, Byte_ascii.Space);
|
||||||
|
important_end = space_end + Bry__important.length;
|
||||||
|
if (!Bry_.Match(src, space_end, important_end, Bry__important)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dirty = true;
|
||||||
|
bfr.Add_mid(src, prv, cur);
|
||||||
|
switch (itm_tid) {
|
||||||
|
case Tid__space_lhs:
|
||||||
|
bfr.Add_bry_many(Bry__nbsp, itm.Val());
|
||||||
|
break;
|
||||||
|
case Tid__space_rhs:
|
||||||
|
bfr.Add_bry_many(itm.Val(), Bry__nbsp);
|
||||||
|
break;
|
||||||
|
case Tid__important:
|
||||||
|
bfr.Add(Bry__important__repl);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cur += itm.Key().length;
|
||||||
|
prv = cur;
|
||||||
|
}
|
||||||
|
if (dirty)
|
||||||
|
pbfr.Switch();
|
||||||
|
}
|
||||||
|
private static final byte Tid__space_lhs = 0, Tid__space_rhs = 1, Tid__important = 2;
|
||||||
|
private static Btrie_slim_mgr trie;
|
||||||
|
private static void Trie__add(Btrie_slim_mgr trie, byte tid, String key_str) {
|
||||||
|
byte[] key_bry = Bry_.new_u8(key_str);
|
||||||
|
byte[] val_bry = null;
|
||||||
|
switch (tid) {
|
||||||
|
case Tid__space_lhs:
|
||||||
|
val_bry = Bry_.Mid(key_bry, 1);
|
||||||
|
break;
|
||||||
|
case Tid__space_rhs:
|
||||||
|
val_bry = Bry_.Mid(key_bry, 0, key_bry.length - 1);
|
||||||
|
break;
|
||||||
|
case Tid__important:
|
||||||
|
val_bry = key_bry;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Xomw_nbsp_itm itm = new Xomw_nbsp_itm(tid, key_bry, val_bry);
|
||||||
|
trie.Add_obj(key_bry, itm);
|
||||||
|
}
|
||||||
|
private static final byte[] Bry__nbsp = Bry_.new_a7(" "), Bry__important = Bry_.new_a7("important"), Bry__important__repl = Bry_.new_a7(" !");
|
||||||
|
}
|
||||||
|
class Xomw_nbsp_itm {
|
||||||
|
public Xomw_nbsp_itm(byte tid, byte[] key, byte[] val) {
|
||||||
|
this.tid = tid;
|
||||||
|
this.key = key;
|
||||||
|
this.val = val;
|
||||||
|
}
|
||||||
|
public byte Tid() {return tid;} private final byte tid;
|
||||||
|
public byte[] Key() {return key;} private final byte[] key;
|
||||||
|
public byte[] Val() {return val;} private final byte[] val;
|
||||||
|
}
|
@ -0,0 +1,40 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.nbsps; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
import org.junit.*;
|
||||||
|
public class Xomw_nbsp_wkr__tst {
|
||||||
|
private final Xomw_nbsp_wkr__fxt fxt = new Xomw_nbsp_wkr__fxt();
|
||||||
|
@Test public void Noop() {fxt.Test__parse("abc" , "abc");}
|
||||||
|
@Test public void Space_lhs__colon() {fxt.Test__parse("a :b c" , "a :b c");}
|
||||||
|
@Test public void Space_lhs__laquo() {fxt.Test__parse("a »b c" , "a »b c");}
|
||||||
|
@Test public void Space_rhs() {fxt.Test__parse("a« b c" , "a« b c");}
|
||||||
|
@Test public void Important() {fxt.Test__parse("a  ! important b" , "a ! important b");}
|
||||||
|
}
|
||||||
|
class Xomw_nbsp_wkr__fxt {
|
||||||
|
private final Xomw_nbsp_wkr wkr = new Xomw_nbsp_wkr();
|
||||||
|
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||||
|
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||||
|
private boolean apos = true;
|
||||||
|
public void Test__parse(String src_str, String expd) {
|
||||||
|
byte[] src_bry = Bry_.new_u8(src_str);
|
||||||
|
pbfr.Init(src_bry);
|
||||||
|
wkr.Do_nbsp(pctx, pbfr);
|
||||||
|
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||||
|
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,564 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
// public class Xomw_frame_wkr { // THREAD.UNSAFE: caching for repeated calls
|
||||||
|
// private final Xomw_parser parser;
|
||||||
|
// public Xomw_frame_wkr(Xomw_parser parser) {
|
||||||
|
// this.parser = parser;
|
||||||
|
// }
|
||||||
|
// \\ Replace magic variables, templates, and template arguments
|
||||||
|
// \\ with the appropriate text. Templates are substituted recursively,
|
||||||
|
// \\ taking care to avoid infinite loops.
|
||||||
|
// \\
|
||||||
|
// \\ Note that the substitution depends on value of $mOutputType:
|
||||||
|
// \\ self::OT_WIKI: only {{subst:}} templates
|
||||||
|
// \\ self::OT_PREPROCESS: templates but not extension tags
|
||||||
|
// \\ self::OT_HTML: all templates and extension tags
|
||||||
|
// \\
|
||||||
|
// \\ @param String $text The text to transform
|
||||||
|
// \\ @param boolean|PPFrame $frame Object describing the arguments passed to the
|
||||||
|
// \\ template. Arguments may also be provided as an associative array, as
|
||||||
|
// \\ was the usual case before MW1.12. Providing arguments this way may be
|
||||||
|
// \\ useful for extensions wishing to perform variable replacement
|
||||||
|
// \\ explicitly.
|
||||||
|
// \\ @param boolean $argsOnly Only do argument (triple-brace) expansion, not
|
||||||
|
// \\ double-brace expansion.
|
||||||
|
// \\ @return String
|
||||||
|
// public function replaceVariables($text, $frame = false, $argsOnly = false) {
|
||||||
|
// // Is there any text? Also, Prevent too big inclusions!
|
||||||
|
// $textSize = strlen($text);
|
||||||
|
// if ($textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize()) {
|
||||||
|
// return $text;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if ($frame == false) {
|
||||||
|
// $frame = $this->getPreprocessor()->newFrame();
|
||||||
|
// } elseif (!($frame instanceof PPFrame)) {
|
||||||
|
// wfDebug(__METHOD__ . " called using plain parameters instead of "
|
||||||
|
// . "a PPFrame instance. Creating custom frame.\n");
|
||||||
|
// $frame = $this->getPreprocessor()->newCustomFrame($frame);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// $dom = $this->preprocessToDom($text);
|
||||||
|
// $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
|
||||||
|
// $text = $frame->expand($dom, $flags);
|
||||||
|
//
|
||||||
|
// return $text;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// \\ Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
|
||||||
|
// public static function createAssocArgs($args) {
|
||||||
|
// $assocArgs = [];
|
||||||
|
// $index = 1;
|
||||||
|
// foreach ($args as $arg) {
|
||||||
|
// $eqpos = strpos($arg, '=');
|
||||||
|
// if ($eqpos == false) {
|
||||||
|
// $assocArgs[$index++] = $arg;
|
||||||
|
// } else {
|
||||||
|
// $name = trim(substr($arg, 0, $eqpos));
|
||||||
|
// $value = trim(substr($arg, $eqpos + 1));
|
||||||
|
// if ($value == false) {
|
||||||
|
// $value = '';
|
||||||
|
// }
|
||||||
|
// if ($name != false) {
|
||||||
|
// $assocArgs[$name] = $value;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return $assocArgs;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// \\ Return the text of a template, after recursively
|
||||||
|
// \\ replacing any variables or templates within the template.
|
||||||
|
// \\
|
||||||
|
// \\ @param array $piece The parts of the template
|
||||||
|
// \\ $piece['title']: the title, i.e. the part before the |
|
||||||
|
// \\ $piece['parts']: the parameter array
|
||||||
|
// \\ $piece['lineStart']: whether the brace was at the start of a line
|
||||||
|
// \\ @param PPFrame $frame The current frame, contains template arguments
|
||||||
|
// \\ @throws Exception
|
||||||
|
// \\ @return String The text of the template
|
||||||
|
// public void Brace_substitution(Xomw_prepro_node__template piece, Xomw_frame_itm frame) {
|
||||||
|
// // Flags
|
||||||
|
//
|
||||||
|
// // $text has been filled
|
||||||
|
// boolean found = false;
|
||||||
|
// // wiki markup in $text should be escaped
|
||||||
|
// boolean nowiki = false;
|
||||||
|
// // $text is HTML, armour it against wikitext transformation
|
||||||
|
// boolean is_html = false;
|
||||||
|
// // Force interwiki transclusion to be done in raw mode not rendered
|
||||||
|
// boolean force_raw_interwiki = false;
|
||||||
|
// // $text is a DOM node needing expansion in a child frame
|
||||||
|
// boolean is_child_obj = false;
|
||||||
|
// // $text is a DOM node needing expansion in the current frame
|
||||||
|
// boolean is_local_obj = false;
|
||||||
|
//
|
||||||
|
// // Title Object, where $text came from
|
||||||
|
// byte[] title = null;
|
||||||
|
//
|
||||||
|
// // $part1 is the bit before the first |, and must contain only title characters.
|
||||||
|
// // Various prefixes will be stripped from it later.
|
||||||
|
// byte[] title_with_spaces = frame.Expand(piece.Title());
|
||||||
|
// byte[] part1 = Bry_.Trim(title_with_spaces);
|
||||||
|
// byte[] title_text = null;
|
||||||
|
//
|
||||||
|
// // Original title text preserved for various purposes
|
||||||
|
// byte[] originalTitle = part1;
|
||||||
|
//
|
||||||
|
// // $args is a list of argument nodes, starting from index 0, not including $part1
|
||||||
|
// // @todo FIXME: If piece['parts'] is null then the call to getLength()
|
||||||
|
// // below won't work b/c this $args isn't an Object
|
||||||
|
// Xomw_prepro_node__part[] args = (null == piece.Parts()) ? null : piece.Parts();
|
||||||
|
//
|
||||||
|
// byte[] profile_section = null; // profile templates
|
||||||
|
//
|
||||||
|
// Tfds.Write(nowiki, is_html, force_raw_interwiki, is_child_obj, is_local_obj, title, title_text, profile_section);
|
||||||
|
// // SUBST
|
||||||
|
// if (!found) {
|
||||||
|
// String subst_match = null; // $this->mSubstWords->matchStartAndRemove($part1);
|
||||||
|
// boolean literal = false;
|
||||||
|
//
|
||||||
|
// // Possibilities for substMatch: "subst", "safesubst" or FALSE
|
||||||
|
// // Decide whether to expand template or keep wikitext as-is.
|
||||||
|
// if (parser.Output_type__wiki()) {
|
||||||
|
// if (subst_match == null) {
|
||||||
|
// literal = true; // literal when in PST with no prefix
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
// literal = false; // expand when in PST with subst: or safesubst:
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
// if (subst_match == "subst") {
|
||||||
|
// literal = true; // literal when not in PST with plain subst:
|
||||||
|
// }
|
||||||
|
// else {
|
||||||
|
// literal = false; // expand when not in PST with safesubst: or no prefix
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// if (literal) {
|
||||||
|
//// $text = $frame->virtualBracketedImplode('{{', '|', '}}', title_with_spaces, $args);
|
||||||
|
// is_local_obj = true;
|
||||||
|
// found = true;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // Variables
|
||||||
|
// if (!found && args.length == 0) {
|
||||||
|
//// $id = $this->mVariables->matchStartToEnd($part1);
|
||||||
|
//// if ($id != false) {
|
||||||
|
//// $text = $this->getVariableValue($id, $frame);
|
||||||
|
//// if (MagicWord::getCacheTTL($id) > -1) {
|
||||||
|
//// $this->mOutput->updateCacheExpiry(MagicWord::getCacheTTL($id));
|
||||||
|
//// }
|
||||||
|
// found = true;
|
||||||
|
//// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // MSG, MSGNW and RAW
|
||||||
|
// if (!found) {
|
||||||
|
// // Check for MSGNW:
|
||||||
|
//// $mwMsgnw = MagicWord::get('msgnw');
|
||||||
|
//// if ($mwMsgnw->matchStartAndRemove($part1)) {
|
||||||
|
// nowiki = true;
|
||||||
|
//// }
|
||||||
|
//// else {
|
||||||
|
// // Remove obsolete MSG:
|
||||||
|
//// $mwMsg = MagicWord::get('msg');
|
||||||
|
//// $mwMsg->matchStartAndRemove($part1);
|
||||||
|
//// }
|
||||||
|
//
|
||||||
|
// // Check for RAW:
|
||||||
|
//// $mwRaw = MagicWord::get('raw');
|
||||||
|
//// if ($mwRaw->matchStartAndRemove($part1)) {
|
||||||
|
//// force_raw_interwiki = true;
|
||||||
|
//// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Parser functions
|
||||||
|
// if (!found) {
|
||||||
|
// $colonPos = strpos($part1, ':');
|
||||||
|
// if ($colonPos != false) {
|
||||||
|
// $func = substr($part1, 0, $colonPos);
|
||||||
|
// $funcArgs = [ trim(substr($part1, $colonPos + 1)) ];
|
||||||
|
// $argsLength = $args->getLength();
|
||||||
|
// for ($i = 0; $i < $argsLength; $i++) {
|
||||||
|
// $funcArgs[] = $args->item($i);
|
||||||
|
// }
|
||||||
|
// try {
|
||||||
|
// $result = $this->callParserFunction($frame, $func, $funcArgs);
|
||||||
|
// } catch (Exception $ex) {
|
||||||
|
// throw $ex;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// The interface for parser functions allows for extracting
|
||||||
|
// flags into the local scope. Extract any forwarded flags
|
||||||
|
// here.
|
||||||
|
// extract($result);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Finish mangling title and then check for loops.
|
||||||
|
// Set title to a Title Object and $title_text to the PDBK
|
||||||
|
// if (!found) {
|
||||||
|
// $ns = NS_TEMPLATE;
|
||||||
|
// Split the title into page and subpage
|
||||||
|
// $subpage = '';
|
||||||
|
// $relative = $this->maybeDoSubpageLink($part1, $subpage);
|
||||||
|
// if ($part1 != $relative) {
|
||||||
|
// $part1 = $relative;
|
||||||
|
// $ns = $this->mTitle->getNamespace();
|
||||||
|
// }
|
||||||
|
// title = Title::newFromText($part1, $ns);
|
||||||
|
// if (title) {
|
||||||
|
// $title_text = title->getPrefixedText();
|
||||||
|
// // Check for language variants if the template is not found
|
||||||
|
// if ($this->getConverterLanguage()->hasVariants() && title->getArticleID() == 0) {
|
||||||
|
// $this->getConverterLanguage()->findVariantLink($part1, title, true);
|
||||||
|
// }
|
||||||
|
// // Do recursion depth check
|
||||||
|
// $limit = $this->mOptions->getMaxTemplateDepth();
|
||||||
|
// if ($frame->depth >= $limit) {
|
||||||
|
// found = true;
|
||||||
|
// $text = '<span class="error">'
|
||||||
|
// . wfMessage('parser-template-recursion-depth-warning')
|
||||||
|
// ->numParams($limit)->inContentLanguage()->text()
|
||||||
|
// . '</span>';
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Load from database
|
||||||
|
// if (!found && title) {
|
||||||
|
// $profile_section = $this->mProfiler->scopedProfileIn(title->getPrefixedDBkey());
|
||||||
|
// if (!title->isExternal()) {
|
||||||
|
// if (title->isSpecialPage()
|
||||||
|
// && $this->mOptions->getAllowSpecialInclusion()
|
||||||
|
// && $this->ot['html']
|
||||||
|
// ) {
|
||||||
|
// $specialPage = SpecialPageFactory::getPage(title->getDBkey());
|
||||||
|
// // Pass the template arguments as URL parameters.
|
||||||
|
// // "uselang" will have no effect since the Language Object
|
||||||
|
// // is forced to the one defined in ParserOptions.
|
||||||
|
// $pageArgs = [];
|
||||||
|
// $argsLength = $args->getLength();
|
||||||
|
// for ($i = 0; $i < $argsLength; $i++) {
|
||||||
|
// $bits = $args->item($i)->splitArg();
|
||||||
|
// if (strval($bits['index']) == '') {
|
||||||
|
// $name = trim($frame->expand($bits['name'], PPFrame::STRIP_COMMENTS));
|
||||||
|
// $value = trim($frame->expand($bits['value']));
|
||||||
|
// $pageArgs[$name] = $value;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // Create a new context to execute the special page
|
||||||
|
// $context = new RequestContext;
|
||||||
|
// $context->setTitle(title);
|
||||||
|
// $context->setRequest(new FauxRequest($pageArgs));
|
||||||
|
// if ($specialPage && $specialPage->maxIncludeCacheTime() == 0) {
|
||||||
|
// $context->setUser($this->getUser());
|
||||||
|
// } else {
|
||||||
|
// // If this page is cached, then we better not be per user.
|
||||||
|
// $context->setUser(User::newFromName('127.0.0.1', false));
|
||||||
|
// }
|
||||||
|
// $context->setLanguage($this->mOptions->getUserLangObj());
|
||||||
|
// $ret = SpecialPageFactory::capturePath(
|
||||||
|
// title, $context, $this->getLinkRenderer());
|
||||||
|
// if ($ret) {
|
||||||
|
// $text = $context->getOutput()->getHTML();
|
||||||
|
// $this->mOutput->addOutputPageMetadata($context->getOutput());
|
||||||
|
// found = true;
|
||||||
|
// is_html = true;
|
||||||
|
// if ($specialPage && $specialPage->maxIncludeCacheTime() != false) {
|
||||||
|
// $this->mOutput->updateRuntimeAdaptiveExpiry(
|
||||||
|
// $specialPage->maxIncludeCacheTime()
|
||||||
|
// );
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// } elseif (MWNamespace::isNonincludable(title->getNamespace())) {
|
||||||
|
// found = false; // access denied
|
||||||
|
// wfDebug(__METHOD__ . ": template inclusion denied for " .
|
||||||
|
// title->getPrefixedDBkey() . "\n");
|
||||||
|
// } else {
|
||||||
|
// list($text, title) = $this->getTemplateDom(title);
|
||||||
|
// if ($text != false) {
|
||||||
|
// found = true;
|
||||||
|
// is_child_obj = true;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // If the title is valid but undisplayable, make a link to it
|
||||||
|
// if (!found && ($this->ot['html'] || $this->ot['pre'])) {
|
||||||
|
// $text = "[[:$title_text]]";
|
||||||
|
// found = true;
|
||||||
|
// }
|
||||||
|
// } elseif (title->isTrans()) {
|
||||||
|
// // Interwiki transclusion
|
||||||
|
// if ($this->ot['html'] && !force_raw_interwiki) {
|
||||||
|
// $text = $this->interwikiTransclude(title, 'render');
|
||||||
|
// is_html = true;
|
||||||
|
// } else {
|
||||||
|
// $text = $this->interwikiTransclude(title, 'raw');
|
||||||
|
// // Preprocess it like a template
|
||||||
|
// $text = $this->preprocessToDom($text, self::PTD_FOR_INCLUSION);
|
||||||
|
// is_child_obj = true;
|
||||||
|
// }
|
||||||
|
// found = true;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // Do infinite loop check
|
||||||
|
// // This has to be done after redirect resolution to avoid infinite loops via redirects
|
||||||
|
// if (!$frame->loopCheck(title)) {
|
||||||
|
// found = true;
|
||||||
|
// $text = '<span class="error">'
|
||||||
|
// . wfMessage('parser-template-loop-warning', $title_text)->inContentLanguage()->text()
|
||||||
|
// . '</span>';
|
||||||
|
// wfDebug(__METHOD__ . ": template loop broken at '$title_text'\n");
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// If we haven't found text to substitute by now, we're done
|
||||||
|
// Recover the source wikitext and return it
|
||||||
|
// if (!found) {
|
||||||
|
// $text = $frame->virtualBracketedImplode('{{', '|', '}}', title_with_spaces, $args);
|
||||||
|
// if ($profile_section) {
|
||||||
|
// $this->mProfiler->scopedProfileOut($profile_section);
|
||||||
|
// }
|
||||||
|
// return [ 'Object' => $text ];
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Expand DOM-style return values in a child frame
|
||||||
|
// if (is_child_obj) {
|
||||||
|
// // Clean up argument array
|
||||||
|
// $newFrame = $frame->newChild($args, title);
|
||||||
|
//
|
||||||
|
// if (nowiki) {
|
||||||
|
// $text = $newFrame->expand($text, PPFrame::RECOVER_ORIG);
|
||||||
|
// } elseif ($title_text != false && $newFrame->isEmpty()) {
|
||||||
|
// // Expansion is eligible for the empty-frame cache
|
||||||
|
// $text = $newFrame->cachedExpand($title_text, $text);
|
||||||
|
// } else {
|
||||||
|
// // Uncached expansion
|
||||||
|
// $text = $newFrame->expand($text);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// if (is_local_obj && nowiki) {
|
||||||
|
// $text = $frame->expand($text, PPFrame::RECOVER_ORIG);
|
||||||
|
// is_local_obj = false;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// if ($profile_section) {
|
||||||
|
// $this->mProfiler->scopedProfileOut($profile_section);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Replace raw HTML by a placeholder
|
||||||
|
// if (is_html) {
|
||||||
|
// $text = $this->insertStripItem($text);
|
||||||
|
// } elseif (nowiki && ($this->ot['html'] || $this->ot['pre'])) {
|
||||||
|
// // Escape nowiki-style return values
|
||||||
|
// $text = wfEscapeWikiText($text);
|
||||||
|
// } elseif (is_string($text)
|
||||||
|
// && !$piece['lineStart']
|
||||||
|
// && preg_match('/^(?:{\\||:|;|#|\*)/', $text)
|
||||||
|
// ) {
|
||||||
|
// // T2529: if the template begins with a table or block-level
|
||||||
|
// // element, it should be treated as beginning a new line.
|
||||||
|
// // This behavior is somewhat controversial.
|
||||||
|
// $text = "\n" . $text;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// if (is_string($text) && !$this->incrementIncludeSize('post-expand', strlen($text))) {
|
||||||
|
// // Error, oversize inclusion
|
||||||
|
// if ($title_text != false) {
|
||||||
|
// // Make a working, properly escaped link if possible (T25588)
|
||||||
|
// $text = "[[:$title_text]]";
|
||||||
|
// } else {
|
||||||
|
// // This will probably not be a working link, but at least it may
|
||||||
|
// // provide some hint of where the problem is
|
||||||
|
// preg_replace('/^:/', '', $originalTitle);
|
||||||
|
// $text = "[[:$originalTitle]]";
|
||||||
|
// }
|
||||||
|
// $text .= $this->insertStripItem('<!-- WARNING: template omitted, '
|
||||||
|
// . 'post-expand include size too large -->');
|
||||||
|
// $this->limitationWarn('post-expand-template-inclusion');
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if (is_local_obj) {
|
||||||
|
// $ret = [ 'Object' => $text ];
|
||||||
|
// } else {
|
||||||
|
// $ret = [ 'text' => $text ];
|
||||||
|
// }
|
||||||
|
|
||||||
|
// return $ret;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// \\ Triple brace replacement -- used for template arguments
|
||||||
|
// public function argSubstitution($piece, $frame) {
|
||||||
|
//
|
||||||
|
// $error = false;
|
||||||
|
// $parts = $piece['parts'];
|
||||||
|
// $nameWithSpaces = $frame->expand($piece['title']);
|
||||||
|
// $argName = trim($nameWithSpaces);
|
||||||
|
// $Object = false;
|
||||||
|
// $text = $frame->getArgument($argName);
|
||||||
|
// if ($text == false && $parts->getLength() > 0
|
||||||
|
// && ($this->ot['html']
|
||||||
|
// || $this->ot['pre']
|
||||||
|
// || ($this->ot['wiki'] && $frame->isTemplate())
|
||||||
|
// )
|
||||||
|
// ) {
|
||||||
|
// // No match in frame, use the supplied default
|
||||||
|
// $Object = $parts->item(0)->getChildren();
|
||||||
|
// }
|
||||||
|
// if (!$this->incrementIncludeSize('arg', strlen($text))) {
|
||||||
|
// $error = '<!-- WARNING: argument omitted, expansion size too large -->';
|
||||||
|
// $this->limitationWarn('post-expand-template-argument');
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if ($text == false && $Object == false) {
|
||||||
|
// // No match anywhere
|
||||||
|
// $Object = $frame->virtualBracketedImplode('{{{', '|', '}}}', $nameWithSpaces, $parts);
|
||||||
|
// }
|
||||||
|
// if ($error != false) {
|
||||||
|
// $text .= $error;
|
||||||
|
// }
|
||||||
|
// if ($Object != false) {
|
||||||
|
// $ret = [ 'Object' => $Object ];
|
||||||
|
// } else {
|
||||||
|
// $ret = [ 'text' => $text ];
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return $ret;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// /**
|
||||||
|
// \\ Return the text to be used for a given extension tag.
|
||||||
|
// \\ This is the ghost of strip().
|
||||||
|
// \\
|
||||||
|
// \\ @param array $params Associative array of parameters:
|
||||||
|
// \\ name PPNode for the tag name
|
||||||
|
// \\ attr PPNode for unparsed text where tag attributes are thought to be
|
||||||
|
// \\ attributes Optional associative array of parsed attributes
|
||||||
|
// \\ inner Contents of extension element
|
||||||
|
// \\ noClose Original text did not have a close tag
|
||||||
|
// \\ @param PPFrame $frame
|
||||||
|
// \\
|
||||||
|
// \\ @throws MWException
|
||||||
|
// \\ @return String
|
||||||
|
// \\/
|
||||||
|
// public function extensionSubstitution($params, $frame) {
|
||||||
|
// static $errorStr = '<span class="error">';
|
||||||
|
// static $errorLen = 20;
|
||||||
|
//
|
||||||
|
// $name = $frame->expand($params['name']);
|
||||||
|
// if (substr($name, 0, $errorLen) == $errorStr) {
|
||||||
|
// // Probably expansion depth or node count exceeded. Just punt the
|
||||||
|
// // error up.
|
||||||
|
// return $name;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// $attrText = !isset($params['attr']) ? null : $frame->expand($params['attr']);
|
||||||
|
// if (substr($attrText, 0, $errorLen) == $errorStr) {
|
||||||
|
// // See above
|
||||||
|
// return $attrText;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // We can't safely check if the expansion for $content resulted in an
|
||||||
|
// // error, because the content could happen to be the error String
|
||||||
|
// // (T149622).
|
||||||
|
// $content = !isset($params['inner']) ? null : $frame->expand($params['inner']);
|
||||||
|
//
|
||||||
|
// $marker = self::MARKER_PREFIX . "-$name-"
|
||||||
|
// . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX;
|
||||||
|
//
|
||||||
|
// $isFunctionTag = isset($this->mFunctionTagHooks[strtolower($name)]) &&
|
||||||
|
// ($this->ot['html'] || $this->ot['pre']);
|
||||||
|
// if ($isFunctionTag) {
|
||||||
|
// $markerType = 'none';
|
||||||
|
// } else {
|
||||||
|
// $markerType = 'general';
|
||||||
|
// }
|
||||||
|
// if ($this->ot['html'] || $isFunctionTag) {
|
||||||
|
// $name = strtolower($name);
|
||||||
|
// $attributes = Sanitizer::decodeTagAttributes($attrText);
|
||||||
|
// if (isset($params['attributes'])) {
|
||||||
|
// $attributes = $attributes + $params['attributes'];
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if (isset($this->mTagHooks[$name])) {
|
||||||
|
// // Workaround for PHP bug 35229 and similar
|
||||||
|
// if (!is_callable($this->mTagHooks[$name])) {
|
||||||
|
// throw new MWException("Tag hook for $name is not callable\n");
|
||||||
|
// }
|
||||||
|
// $output = call_user_func_array($this->mTagHooks[$name],
|
||||||
|
// [ $content, $attributes, $this, $frame ]);
|
||||||
|
// } elseif (isset($this->mFunctionTagHooks[$name])) {
|
||||||
|
// list($callback,) = $this->mFunctionTagHooks[$name];
|
||||||
|
// if (!is_callable($callback)) {
|
||||||
|
// throw new MWException("Tag hook for $name is not callable\n");
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// $output = call_user_func_array($callback, [ &$this, $frame, $content, $attributes ]);
|
||||||
|
// } else {
|
||||||
|
// $output = '<span class="error">Invalid tag extension name: ' .
|
||||||
|
// htmlspecialchars($name) . '</span>';
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if (is_array($output)) {
|
||||||
|
// // Extract flags to local scope (to override $markerType)
|
||||||
|
// $flags = $output;
|
||||||
|
// $output = $flags[0];
|
||||||
|
// unset($flags[0]);
|
||||||
|
// extract($flags);
|
||||||
|
// }
|
||||||
|
// } else {
|
||||||
|
// if (is_null($attrText)) {
|
||||||
|
// $attrText = '';
|
||||||
|
// }
|
||||||
|
// if (isset($params['attributes'])) {
|
||||||
|
// foreach ($params['attributes'] as $attrName => $attrValue) {
|
||||||
|
// $attrText .= ' ' . htmlspecialchars($attrName) . '="' .
|
||||||
|
// htmlspecialchars($attrValue) . '"';
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// if ($content == null) {
|
||||||
|
// $output = "<$name$attrText/>";
|
||||||
|
// } else {
|
||||||
|
// $close = is_null($params['close']) ? '' : $frame->expand($params['close']);
|
||||||
|
// if (substr($close, 0, $errorLen) == $errorStr) {
|
||||||
|
// // See above
|
||||||
|
// return $close;
|
||||||
|
// }
|
||||||
|
// $output = "<$name$attrText>$content$close";
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if ($markerType == 'none') {
|
||||||
|
// return $output;
|
||||||
|
// } elseif ($markerType == 'nowiki') {
|
||||||
|
// $this->mStripState->addNoWiki($marker, $output);
|
||||||
|
// } elseif ($markerType == 'general') {
|
||||||
|
// $this->mStripState->addGeneral($marker, $output);
|
||||||
|
// } else {
|
||||||
|
// throw new MWException(__METHOD__ . ': invalid marker type');
|
||||||
|
// }
|
||||||
|
// return $marker;
|
||||||
|
// }
|
||||||
|
// }
|
@ -0,0 +1,98 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
public interface Xomw_prepro_node {
|
||||||
|
int Subs__len();
|
||||||
|
Xomw_prepro_node Subs__get_at(int i);
|
||||||
|
void Subs__add(Xomw_prepro_node sub);
|
||||||
|
void To_xml(Bry_bfr bfr);
|
||||||
|
}
|
||||||
|
class Xomw_prepro_node__text extends Xomw_prepro_node__base {
|
||||||
|
public Xomw_prepro_node__text(byte[] bry) {
|
||||||
|
this.bry = bry;
|
||||||
|
}
|
||||||
|
public byte[] Bry() {return bry;} protected final byte[] bry;
|
||||||
|
@Override public void To_xml(Bry_bfr bfr) {
|
||||||
|
bfr.Add(bry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_prepro_node__comment extends Xomw_prepro_node__base {
|
||||||
|
public Xomw_prepro_node__comment(byte[] bry) {
|
||||||
|
this.bry = bry;
|
||||||
|
}
|
||||||
|
public byte[] Bry() {return bry;} protected final byte[] bry;
|
||||||
|
@Override public void To_xml(Bry_bfr bfr) {
|
||||||
|
bfr.Add_str_a7("<comment>");
|
||||||
|
bfr.Add(bry);
|
||||||
|
bfr.Add_str_a7("</comment>");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_prepro_node__ext extends Xomw_prepro_node__base {
|
||||||
|
public Xomw_prepro_node__ext(byte[] name, byte[] attr, byte[] inner, byte[] close) {
|
||||||
|
this.name = name;
|
||||||
|
this.attr = attr;
|
||||||
|
this.inner = inner;
|
||||||
|
this.close = close;
|
||||||
|
}
|
||||||
|
public byte[] Name() {return name;} private final byte[] name;
|
||||||
|
public byte[] Attr() {return attr;} private final byte[] attr;
|
||||||
|
public byte[] Inner() {return inner;} private final byte[] inner;
|
||||||
|
public byte[] Close() {return close;} private final byte[] close;
|
||||||
|
@Override public void To_xml(Bry_bfr bfr) {
|
||||||
|
bfr.Add_str_a7("<ext>");
|
||||||
|
bfr.Add_str_a7("<name>").Add(name).Add_str_a7("</name>");
|
||||||
|
bfr.Add_str_a7("<atr>").Add(attr).Add_str_a7("</atr>");
|
||||||
|
bfr.Add_str_a7("<inner>").Add(inner).Add_str_a7("</inner>");
|
||||||
|
bfr.Add_str_a7("<close>").Add(close).Add_str_a7("</close>");
|
||||||
|
bfr.Add_str_a7("</ext>");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_prepro_node__heading extends Xomw_prepro_node__base {
|
||||||
|
public Xomw_prepro_node__heading(int heading_index, int title_index, byte[] text) {
|
||||||
|
this.heading_index = heading_index;
|
||||||
|
this.title_index = title_index;
|
||||||
|
this.text = text;
|
||||||
|
}
|
||||||
|
public int Heading_index() {return heading_index;} private final int heading_index;
|
||||||
|
public int Title_index() {return title_index;} private final int title_index;
|
||||||
|
public byte[] Text() {return text;} private final byte[] text;
|
||||||
|
@Override public void To_xml(Bry_bfr bfr) {
|
||||||
|
bfr.Add_str_a7("<h ");
|
||||||
|
bfr.Add_str_a7(" level=\"").Add_int_variable(heading_index);
|
||||||
|
bfr.Add_str_a7("\" i=\"").Add_int_variable(title_index);
|
||||||
|
bfr.Add_str_a7("\">");
|
||||||
|
bfr.Add(text);
|
||||||
|
bfr.Add_str_a7("</h>");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
class Xomw_prepro_node__tplarg extends Xomw_prepro_node__base {
|
||||||
|
public Xomw_prepro_node__tplarg(byte[] title, Xomw_prepro_node__part[] parts) {
|
||||||
|
this.title = title; this.parts = parts;
|
||||||
|
}
|
||||||
|
public byte[] Title() {return title;} private final byte[] title;
|
||||||
|
public Xomw_prepro_node__part[] Parts() {return parts;} private final Xomw_prepro_node__part[] parts;
|
||||||
|
@Override public void To_xml(Bry_bfr bfr) {
|
||||||
|
bfr.Add_str_a7("<tplarg>");
|
||||||
|
bfr.Add_str_a7("<title>").Add(title);
|
||||||
|
bfr.Add_str_a7("</title>");
|
||||||
|
for (Xomw_prepro_node__part part : parts)
|
||||||
|
part.To_xml(bfr);
|
||||||
|
|
||||||
|
bfr.Add_str_a7("</tplarg>");
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,28 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
public abstract class Xomw_prepro_node__base implements Xomw_prepro_node {
|
||||||
|
private List_adp subs;
|
||||||
|
public int Subs__len() {return subs == null ? 0 : subs.Len();}
|
||||||
|
public Xomw_prepro_node Subs__get_at(int i) {return subs == null ? null : (Xomw_prepro_node)subs.Get_at(i);}
|
||||||
|
public void Subs__add(Xomw_prepro_node sub) {
|
||||||
|
if (subs == null) subs = List_adp_.New();
|
||||||
|
subs.Add(sub);
|
||||||
|
}
|
||||||
|
public abstract void To_xml(Bry_bfr bfr);
|
||||||
|
}
|
@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
public class Xomw_prepro_node__part extends Xomw_prepro_node__base {
|
||||||
|
public Xomw_prepro_node__part(int idx, byte[] key, byte[] val) {
|
||||||
|
this.idx = idx;
|
||||||
|
this.key = key;
|
||||||
|
this.val = val;
|
||||||
|
}
|
||||||
|
public int Idx() {return idx;} private final int idx;
|
||||||
|
public byte[] Key() {return key;} private final byte[] key;
|
||||||
|
public byte[] Val() {return val;} private final byte[] val;
|
||||||
|
@Override public void To_xml(Bry_bfr bfr) {
|
||||||
|
bfr.Add_str_a7("<part>");
|
||||||
|
bfr.Add_str_a7("<name");
|
||||||
|
if (idx > 0) {
|
||||||
|
bfr.Add_str_a7(" index=\"").Add_int_variable(idx).Add_str_a7("\" />");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
bfr.Add_str_a7(">");
|
||||||
|
bfr.Add(key);
|
||||||
|
bfr.Add_str_a7("</name>");
|
||||||
|
bfr.Add_str_a7("=");
|
||||||
|
}
|
||||||
|
bfr.Add_str_a7("<value>");
|
||||||
|
bfr.Add(val);
|
||||||
|
bfr.Add_str_a7("</value>");
|
||||||
|
bfr.Add_str_a7("</part>");
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||||
|
public class Xomw_prepro_node__template extends Xomw_prepro_node__base {
|
||||||
|
public Xomw_prepro_node__template(byte[] title, Xomw_prepro_node__part[] parts, int line_start) {
|
||||||
|
this.title = title; this.parts = parts; this.line_start = line_start;
|
||||||
|
}
|
||||||
|
public byte[] Title() {return title;} private final byte[] title;
|
||||||
|
public Xomw_prepro_node__part[] Parts() {return parts;} private final Xomw_prepro_node__part[] parts;
|
||||||
|
public int Line_start() {return line_start;} private final int line_start;
|
||||||
|
@Override public void To_xml(Bry_bfr bfr) {
|
||||||
|
bfr.Add_str_a7("<template");
|
||||||
|
if (line_start > 0) bfr.Add_str_a7(" lineStart=\"").Add_int_variable(line_start).Add_byte_quote();
|
||||||
|
bfr.Add_byte(Byte_ascii.Angle_end);
|
||||||
|
bfr.Add_str_a7("<title>").Add(title);
|
||||||
|
bfr.Add_str_a7("</title>");
|
||||||
|
for (Xomw_prepro_node__part part : parts)
|
||||||
|
part.To_xml(bfr);
|
||||||
|
bfr.Add_str_a7("</template>");
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,120 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
public class Xomw_ttl_utl {
|
||||||
|
// REF.MW: DefaultSettings.php
|
||||||
|
// Allowed title characters -- regex character class
|
||||||
|
// Don't change this unless you know what you're doing
|
||||||
|
//
|
||||||
|
// Problematic punctuation:
|
||||||
|
// - []{}|# Are needed for link syntax, never enable these
|
||||||
|
// - <> Causes problems with HTML escaping, don't use
|
||||||
|
// - % Enabled by default, minor problems with path to query rewrite rules, see below
|
||||||
|
// - + Enabled by default, but doesn't work with path to query rewrite rules,
|
||||||
|
// corrupted by apache
|
||||||
|
// - ? Enabled by default, but doesn't work with path to PATH_INFO rewrites
|
||||||
|
//
|
||||||
|
// All three of these punctuation problems can be avoided by using an alias,
|
||||||
|
// instead of a rewrite rule of either variety.
|
||||||
|
//
|
||||||
|
// The problem with % is that when using a path to query rewrite rule, URLs are
|
||||||
|
// double-unescaped: once by Apache's path conversion code, and again by PHP. So
|
||||||
|
// %253F, for example, becomes "?". Our code does not double-escape to compensate
|
||||||
|
// for this, indeed double escaping would break if the double-escaped title was
|
||||||
|
// passed in the query String rather than the path. This is a minor security issue
|
||||||
|
// because articles can be created such that they are hard to view or edit.
|
||||||
|
//
|
||||||
|
// In some rare cases you may wish to remove + for compatibility with old links.
|
||||||
|
//
|
||||||
|
// Theoretically 0x80-0x9F of ISO 8859-1 should be disallowed, but
|
||||||
|
// this breaks interlanguage links
|
||||||
|
// $wgLegalTitleChars = " %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\\x80-\\xFF+";
|
||||||
|
//
|
||||||
|
// REGEX:
|
||||||
|
// without-backslash escaping --> \s%!"$&'()*,-./0-9:;=?@A-Z\^_`a-z~x80-xFF+
|
||||||
|
// rearranged
|
||||||
|
// letters --> 0-9A-Za-z
|
||||||
|
// unicode-chars --> x80-xFF
|
||||||
|
// symbols --> \s%!"$&'()*,-./:;=?@\^_`~+"
|
||||||
|
// deliberately ignores
|
||||||
|
// control chars: 00-31,127
|
||||||
|
// []{}|#<>
|
||||||
|
public static int Find_fwd_while_title(byte[] src, int src_bgn, int src_end, boolean[] valid) {
|
||||||
|
int cur = src_bgn;
|
||||||
|
while (true) {
|
||||||
|
if (cur == src_end) break;
|
||||||
|
byte b = src[cur];
|
||||||
|
int b_len = gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||||
|
if (b_len == 1) { // ASCII
|
||||||
|
if (valid[b]) // valid; EX: "a0A B&$"
|
||||||
|
cur++;
|
||||||
|
else // invalid; EX: "<title>"
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else { // Multi-byte UTF8; NOTE: all sequences are valid
|
||||||
|
cur += b_len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cur;
|
||||||
|
}
|
||||||
|
private static boolean[] title_chars_valid;
|
||||||
|
public static boolean[] Title_chars_valid() {
|
||||||
|
if (title_chars_valid == null) {
|
||||||
|
title_chars_valid = new boolean[128];
|
||||||
|
// add num and alpha
|
||||||
|
for (int i = Byte_ascii.Num_0; i <= Byte_ascii.Num_9; i++)
|
||||||
|
title_chars_valid[i] = true;
|
||||||
|
for (int i = Byte_ascii.Ltr_A; i <= Byte_ascii.Ltr_Z; i++)
|
||||||
|
title_chars_valid[i] = true;
|
||||||
|
for (int i = Byte_ascii.Ltr_a; i <= Byte_ascii.Ltr_z; i++)
|
||||||
|
title_chars_valid[i] = true;
|
||||||
|
|
||||||
|
// add symbols: \s%!"$&'()*,-./:;=?@\^_`~+"
|
||||||
|
byte[] symbols = new byte[]
|
||||||
|
{ Byte_ascii.Space
|
||||||
|
, Byte_ascii.Percent
|
||||||
|
, Byte_ascii.Bang
|
||||||
|
, Byte_ascii.Quote
|
||||||
|
, Byte_ascii.Amp
|
||||||
|
, Byte_ascii.Apos
|
||||||
|
, Byte_ascii.Paren_bgn
|
||||||
|
, Byte_ascii.Paren_end
|
||||||
|
, Byte_ascii.Star
|
||||||
|
, Byte_ascii.Comma
|
||||||
|
, Byte_ascii.Dash
|
||||||
|
, Byte_ascii.Dot
|
||||||
|
, Byte_ascii.Slash
|
||||||
|
, Byte_ascii.Colon
|
||||||
|
, Byte_ascii.Semic
|
||||||
|
, Byte_ascii.Eq
|
||||||
|
, Byte_ascii.Question
|
||||||
|
, Byte_ascii.At
|
||||||
|
, Byte_ascii.Backslash
|
||||||
|
, Byte_ascii.Pow
|
||||||
|
, Byte_ascii.Underline
|
||||||
|
, Byte_ascii.Tick
|
||||||
|
, Byte_ascii.Tilde
|
||||||
|
, Byte_ascii.Plus
|
||||||
|
};
|
||||||
|
int symbols_len = symbols.length;
|
||||||
|
for (int i = 0; i < symbols_len; i++)
|
||||||
|
title_chars_valid[symbols[i]] = true;
|
||||||
|
}
|
||||||
|
return title_chars_valid;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,30 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.mws.utls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||||
|
import org.junit.*; import gplx.core.tests.*;
|
||||||
|
public class Xomw_ttl_utl__tst {
|
||||||
|
private final Xomw_ttl_utl__fxt fxt = new Xomw_ttl_utl__fxt();
|
||||||
|
@Test public void Alphanum() {fxt.Test__find_fwd_while_title("0aB" , 3);}
|
||||||
|
@Test public void Angle() {fxt.Test__find_fwd_while_title("0a<" , 2);}
|
||||||
|
}
|
||||||
|
class Xomw_ttl_utl__fxt {
|
||||||
|
public void Test__find_fwd_while_title(String src_str, int expd) {
|
||||||
|
byte[] src_bry = Bry_.new_u8(src_str);
|
||||||
|
Gftest.Eq__int(expd, Xomw_ttl_utl.Find_fwd_while_title(src_bry, 0, src_bry.length, Xomw_ttl_utl.Title_chars_valid()));
|
||||||
|
}
|
||||||
|
}
|
@ -1,261 +0,0 @@
|
|||||||
/*
|
|
||||||
XOWA: the XOWA Offline Wiki Application
|
|
||||||
Copyright (C) 2012 gnosygnu@gmail.com
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU Affero General Public License as
|
|
||||||
published by the Free Software Foundation, either version 3 of the
|
|
||||||
License, or (at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU Affero General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Affero General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
package gplx.xowa.parsers.mws.blocks; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
|
||||||
import gplx.langs.phps.utls.*;
|
|
||||||
public class Xomw_block_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
|
|
||||||
private final Bry_bfr bfr = Bry_bfr_.New();
|
|
||||||
private byte[] last_prefix, last_section;
|
|
||||||
private boolean line_start, dt_open, in_block_elem, para_stack, in_blockquote, in_pre = false;
|
|
||||||
private int prefix_len;
|
|
||||||
private int src_len;
|
|
||||||
public byte[] Do_block_levels(byte[] src, boolean line_start) {
|
|
||||||
this.src_len = src.length;
|
|
||||||
this.line_start = line_start;
|
|
||||||
// Parsing through the text line by line. The main thing
|
|
||||||
// happening here is handling of block-level elements p, pre,
|
|
||||||
// and making lists from lines starting with * # : etc.
|
|
||||||
this.last_prefix = Bry_.Empty;
|
|
||||||
bfr.Clear();
|
|
||||||
this.dt_open = this.in_block_elem = false;
|
|
||||||
this.prefix_len = 0;
|
|
||||||
this.para_stack = false;
|
|
||||||
this.in_blockquote = false;
|
|
||||||
|
|
||||||
// PORTED.SPLIT: $textLines = StringUtils::explode("\n", $text);
|
|
||||||
Bry_split_.Split(src, 0, src_len, Byte_ascii.Nl, Bool_.N, this);
|
|
||||||
|
|
||||||
while (prefix_len > 0) {
|
|
||||||
// bfr .= this.closeList(prefix2[prefix_len - 1]);
|
|
||||||
prefix_len--;
|
|
||||||
if (prefix_len > 0) {
|
|
||||||
bfr.Add_byte_nl();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (Bry_.Len_gt_0(last_section)) {
|
|
||||||
bfr.Add_str_a7("</").Add(last_section).Add_str_a7(">");
|
|
||||||
this.last_section = Bry_.Empty;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dt_open || in_block_elem || para_stack || in_blockquote || in_pre) {
|
|
||||||
}
|
|
||||||
return bfr.To_bry_and_clear();
|
|
||||||
}
|
|
||||||
public int Split(byte[] src, int itm_bgn, int itm_end) {
|
|
||||||
// Fix up line_start
|
|
||||||
if (!line_start) {
|
|
||||||
bfr.Add_mid(src, itm_bgn, itm_end);
|
|
||||||
line_start = true;
|
|
||||||
return Bry_split_.Rv__ok;
|
|
||||||
}
|
|
||||||
|
|
||||||
// * = ul
|
|
||||||
// # = ol
|
|
||||||
// ; = dt
|
|
||||||
// : = dd
|
|
||||||
int last_prefix_len = last_prefix.length;
|
|
||||||
boolean pre_close_match = false; //preg_match('/<\\/pre/i', $oLine);
|
|
||||||
boolean pre_open_match = false; //preg_match('/<pre/i', $oLine);
|
|
||||||
byte[] prefix = null, prefix2 = null, t = null;
|
|
||||||
// If not in a <pre> element, scan for and figure out what prefixes are there.
|
|
||||||
if (!in_pre) {
|
|
||||||
// Multiple prefixes may abut each other for nested lists.
|
|
||||||
prefix_len = 0;// strspn($oLine, '*#:;');
|
|
||||||
prefix = Php_str_.Substr(src, itm_bgn, prefix_len);
|
|
||||||
|
|
||||||
// eh?
|
|
||||||
// ; and : are both from definition-lists, so they're equivalent
|
|
||||||
// for the purposes of determining whether or not we need to open/close
|
|
||||||
// elements.
|
|
||||||
prefix2 = Bry_.Replace(prefix, Byte_ascii.Semic, Byte_ascii.Colon);
|
|
||||||
t = Bry_.Mid(src, itm_bgn + prefix_len, itm_end);
|
|
||||||
// this.in_pre = (boolean)pre_open_match;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Don't interpret any other prefixes in preformatted text
|
|
||||||
prefix_len = 0;
|
|
||||||
prefix = prefix2 = Bry_.Empty;
|
|
||||||
t = Bry_.Mid(src, itm_bgn, itm_end);
|
|
||||||
}
|
|
||||||
|
|
||||||
// List generation
|
|
||||||
byte[] term = null, t2 = null;
|
|
||||||
int common_prefix_len = -1;
|
|
||||||
if (prefix_len > 0 && Bry_.Eq(last_prefix, prefix2)) {
|
|
||||||
// Same as the last item, so no need to deal with nesting or opening stuff
|
|
||||||
// bfr .= this.nextItem(substr(prefix, -1));
|
|
||||||
para_stack = false;
|
|
||||||
|
|
||||||
if (prefix_len > 0 && prefix[prefix_len - 1] == Byte_ascii.Semic) {
|
|
||||||
// The one nasty exception: definition lists work like this:
|
|
||||||
// ; title : definition text
|
|
||||||
// So we check for : in the remainder text to split up the
|
|
||||||
// title and definition, without b0rking links.
|
|
||||||
term = t2 = Bry_.Empty;
|
|
||||||
// if (this.findColonNoLinks(t, term, t2) !== false) {
|
|
||||||
t = t2;
|
|
||||||
bfr.Add(term); // . this.nextItem(':');
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (prefix_len > 0 || last_prefix_len > 0) {
|
|
||||||
// We need to open or close prefixes, or both.
|
|
||||||
|
|
||||||
// Either open or close a level...
|
|
||||||
// common_prefix_len = this.getCommon(prefix, last_prefix);
|
|
||||||
para_stack = false;
|
|
||||||
|
|
||||||
// Close all the prefixes which aren't shared.
|
|
||||||
while (common_prefix_len < last_prefix_len) {
|
|
||||||
// bfr .= this.closeList(last_prefix[last_prefix_len - 1]);
|
|
||||||
last_prefix_len--;
|
|
||||||
}
|
|
||||||
//
|
|
||||||
// Continue the current prefix if appropriate.
|
|
||||||
if (prefix_len <= common_prefix_len && common_prefix_len > 0) {
|
|
||||||
// bfr .= this.nextItem(prefix[common_prefix_len - 1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open prefixes where appropriate.
|
|
||||||
if (Bry_.Len_gt_0(last_prefix) && prefix_len > common_prefix_len) {
|
|
||||||
bfr.Add_byte_nl();
|
|
||||||
}
|
|
||||||
while (prefix_len > common_prefix_len) {
|
|
||||||
// $char = substr(prefix, common_prefix_len, 1);
|
|
||||||
// bfr .= this.openList($char);
|
|
||||||
//
|
|
||||||
// if (';' == $char) {
|
|
||||||
// // @todo FIXME: This is dupe of code above
|
|
||||||
// if (this.findColonNoLinks(t, term, t2) !== false) {
|
|
||||||
// t = t2;
|
|
||||||
// bfr .= term . this.nextItem(':');
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
++common_prefix_len;
|
|
||||||
}
|
|
||||||
if (prefix_len == 0 && Bry_.Len_gt_0(last_prefix)) {
|
|
||||||
bfr.Add_byte_nl();
|
|
||||||
}
|
|
||||||
last_prefix = prefix2;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we have no prefixes, go to paragraph mode.
|
|
||||||
if (0 == prefix_len) {
|
|
||||||
// No prefix (not in list)--go to paragraph mode
|
|
||||||
// XXX: use a stack for nestable elements like span, table and div
|
|
||||||
boolean open_match = false, close_match = false;
|
|
||||||
// open_match = preg_match(
|
|
||||||
// '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
|
|
||||||
// . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS',
|
|
||||||
// t
|
|
||||||
// );
|
|
||||||
// close_match = preg_match(
|
|
||||||
// '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
|
|
||||||
// . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
|
|
||||||
// . self::MARKER_PREFIX
|
|
||||||
// . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS',
|
|
||||||
// t
|
|
||||||
// );
|
|
||||||
|
|
||||||
if (open_match || close_match) {
|
|
||||||
para_stack = false;
|
|
||||||
// @todo bug 5718: paragraph closed
|
|
||||||
// bfr .= this.closeParagraph();
|
|
||||||
if (pre_open_match && !pre_close_match) {
|
|
||||||
this.in_pre = true;
|
|
||||||
}
|
|
||||||
// $bqOffset = 0;
|
|
||||||
// while (preg_match('/<(\\/?)blockquote[\s>]/i', t,
|
|
||||||
// $bqMatch, PREG_OFFSET_CAPTURE, $bqOffset)
|
|
||||||
// ) {
|
|
||||||
// in_blockquote = !$bqMatch[1][0]; // is this a close tag?
|
|
||||||
// $bqOffset = $bqMatch[0][1] + strlen($bqMatch[0][0]);
|
|
||||||
// }
|
|
||||||
in_block_elem = !close_match;
|
|
||||||
}
|
|
||||||
else if (!in_block_elem && !this.in_pre) {
|
|
||||||
if ( Byte_ascii.Space == t[0]
|
|
||||||
// && (last_section == 'pre' || trim(t) != '')
|
|
||||||
&& !in_blockquote
|
|
||||||
) {
|
|
||||||
// pre
|
|
||||||
// if (this.last_section !== 'pre') {
|
|
||||||
para_stack = false;
|
|
||||||
// bfr .= this.closeParagraph() . '<pre>';
|
|
||||||
// this.last_section = 'pre';
|
|
||||||
// }
|
|
||||||
t = Bry_.Mid(t, 1);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// paragraph
|
|
||||||
// if (trim(t) == '') {
|
|
||||||
if (para_stack) {
|
|
||||||
// bfr .= para_stack . '<br />';
|
|
||||||
para_stack = false;
|
|
||||||
// this.last_section = 'p';
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// if (this.last_section !== 'p') {
|
|
||||||
// bfr .= this.closeParagraph();
|
|
||||||
// this.last_section = '';
|
|
||||||
// para_stack = '<p>';
|
|
||||||
// }
|
|
||||||
// else {
|
|
||||||
// para_stack = '</p><p>';
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
// }
|
|
||||||
// else {
|
|
||||||
if (para_stack) {
|
|
||||||
// bfr .= para_stack;
|
|
||||||
para_stack = false;
|
|
||||||
// this.last_section = 'p';
|
|
||||||
}
|
|
||||||
// else if (this.last_section !== 'p') {
|
|
||||||
// bfr .= this.closeParagraph() . '<p>';
|
|
||||||
// this.last_section = 'p';
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// somewhere above we forget to get out of pre block (bug 785)
|
|
||||||
if (pre_close_match && this.in_pre) {
|
|
||||||
this.in_pre = false;
|
|
||||||
}
|
|
||||||
if (para_stack == false) {
|
|
||||||
bfr.Add(t);
|
|
||||||
if (prefix_len == 0) {
|
|
||||||
bfr.Add_byte_nl();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (last_prefix_len == -1 || common_prefix_len == -1) {
|
|
||||||
}
|
|
||||||
return Bry_split_.Rv__ok;
|
|
||||||
}
|
|
||||||
// private static final int
|
|
||||||
// Para_stack_none = 0 // false
|
|
||||||
// , Para_stack_bgn = 1 // <p>
|
|
||||||
// , Para_stack_mid = 2 // </p><p>
|
|
||||||
// ;
|
|
||||||
// private static final byte
|
|
||||||
// Mode_none = 0 // ''
|
|
||||||
// , Mode_para = 1 // p
|
|
||||||
// , Mode_pre = 2 // pre
|
|
||||||
// ;
|
|
||||||
}
|
|
@ -1,41 +0,0 @@
|
|||||||
/*
|
|
||||||
XOWA: the XOWA Offline Wiki Application
|
|
||||||
Copyright (C) 2012 gnosygnu@gmail.com
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU Affero General Public License as
|
|
||||||
published by the Free Software Foundation, either version 3 of the
|
|
||||||
License, or (at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU Affero General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Affero General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
package gplx.xowa.parsers.mws.utils; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
|
||||||
import gplx.xowa.parsers.htmls.*;
|
|
||||||
public class Xomw_sanitizer_mgr {
|
|
||||||
private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr();
|
|
||||||
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
|
||||||
public void Fix_tag_attributes(Bry_bfr bfr, byte[] tag_name, byte[] atrs) {
|
|
||||||
atr_bldr.Atrs__clear();
|
|
||||||
atr_parser.Parse(atr_bldr, -1, -1, atrs, 0, atrs.length);
|
|
||||||
int len = atr_bldr.Atrs__len();
|
|
||||||
|
|
||||||
// PORTED: Sanitizer.php|safeEncodeTagAttributes
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
// $encAttribute = htmlspecialchars( $attribute );
|
|
||||||
// $encValue = Sanitizer::safeEncodeAttribute( $value );
|
|
||||||
// $attribs[] = "$encAttribute=\"$encValue\"";
|
|
||||||
Mwh_atr_itm itm = atr_bldr.Atrs__get_at(i);
|
|
||||||
bfr.Add_byte_space(); // "return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : '';"
|
|
||||||
bfr.Add_bry_escape_html(itm.Key_bry(), itm.Key_bgn(), itm.Key_end());
|
|
||||||
bfr.Add_byte_eq().Add_byte_quote();
|
|
||||||
bfr.Add(itm.Val_as_bry()); // TODO.XO:Sanitizer::encode
|
|
||||||
bfr.Add_byte_quote();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in new issue