mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
Xomw: Move Mw_parse classes into separate project
This commit is contained in:
@@ -141,7 +141,7 @@ public class Xoa_ttl { // PAGE:en.w:http://en.wikipedia.org/wiki/Help:Link; REF.
|
||||
public byte[] Get_prefixed_db_key() {return Full_db();}
|
||||
public boolean Has_fragment() {return anch_bgn != -1;}
|
||||
public byte[] Get_fragment() {return Anch_txt();}
|
||||
public byte[] Get_link_url(gplx.xowa.mws.htmls.Xomw_qry_mgr qry_mgr, boolean query2, boolean proto) {
|
||||
public byte[] Get_link_url(Object qry_mgr, boolean query2, boolean proto) {
|
||||
// if ( $this->isExternal() || $proto !== false ) {
|
||||
// $ret = $this->getFullURL( $query, $query2, $proto );
|
||||
// }
|
||||
|
||||
@@ -15,8 +15,8 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
package gplx.xowa.mediawiki.includes.parsers.headingsOld; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
|
||||
public interface Xomw_heading_cbk {
|
||||
void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr);
|
||||
void On_src_done(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr);
|
||||
void On_hdr_seen(Xomw_heading_wkr wkr);
|
||||
void On_src_done(Xomw_heading_wkr wkr);
|
||||
}
|
||||
@@ -15,10 +15,9 @@ GNU Affero General Public License for more details.
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
package gplx.xowa.mediawiki.includes.parsers.headingsOld; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.xowa.langs.*;
|
||||
public class Xomw_heading_wkr {
|
||||
private Xomw_parser_ctx pctx;
|
||||
private Xomw_heading_cbk cbk;
|
||||
public byte[] Src() {return src;} private byte[] src;
|
||||
public int Src_end() {return src_end;} private int src_end;
|
||||
@@ -30,17 +29,8 @@ public class Xomw_heading_wkr {
|
||||
public int Hdr_lhs_end() {return hdr_lhs_end;} private int hdr_lhs_end;
|
||||
public int Hdr_rhs_bgn() {return hdr_rhs_bgn;} private int hdr_rhs_bgn;
|
||||
public int Hdr_rhs_end() {return hdr_rhs_end;} private int hdr_rhs_end;
|
||||
public void Do_headings(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, Xomw_heading_cbk__html cbk) {
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src_bry = src_bfr.Bfr();
|
||||
int src_end = src_bfr.Len();
|
||||
cbk.Bfr_(pbfr.Trg());
|
||||
pbfr.Switch();
|
||||
Parse(pctx, src_bry, 0, src_end, cbk);
|
||||
}
|
||||
public void Parse(Xomw_parser_ctx pctx, byte[] src, int src_bgn, int src_end, Xomw_heading_cbk cbk) { // REF.MW: /includes/parser/Parser.php|doHeadings
|
||||
public void Parse(byte[] src, int src_bgn, int src_end, Xomw_heading_cbk cbk) { // REF.MW: /includes/parser/Parser.php|doHeadings
|
||||
// init members
|
||||
this.pctx = pctx;
|
||||
this.src = src;
|
||||
this.src_end = src_end;
|
||||
this.cbk = cbk;
|
||||
@@ -53,7 +43,7 @@ public class Xomw_heading_wkr {
|
||||
|
||||
// do loop
|
||||
int pos = src_bgn;
|
||||
this.txt_bgn = pos == Xomw_parser_ctx.Pos__bos ? 0 : pos;
|
||||
this.txt_bgn = pos == -1 ? 0 : pos;
|
||||
byte b = Byte_ascii.Nl;
|
||||
while (true) {
|
||||
int nxt = pos + 1;
|
||||
@@ -70,7 +60,7 @@ public class Xomw_heading_wkr {
|
||||
|
||||
// EOS; add all text after last "==\n"
|
||||
if (pos == src_end) {
|
||||
cbk.On_src_done(pctx, this);
|
||||
cbk.On_src_done(this);
|
||||
break;
|
||||
}
|
||||
b = src[pos];
|
||||
@@ -102,7 +92,7 @@ public class Xomw_heading_wkr {
|
||||
|
||||
this.hdr_num = hdr_lhs_len < hdr_rhs_len ? hdr_lhs_len : hdr_rhs_len;
|
||||
|
||||
cbk.On_hdr_seen(pctx, this);
|
||||
cbk.On_hdr_seen(this);
|
||||
return nl_rhs;
|
||||
}
|
||||
}
|
||||
@@ -1,33 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
public class Xomw_MagicWord {
|
||||
public boolean case_match;
|
||||
public byte[] name;
|
||||
public Xomw_MagicWordSynonym[] synonyms;
|
||||
public Xomw_MagicWord(byte[] name, boolean case_match, byte[][] synonyms_ary) {
|
||||
this.name = name;
|
||||
this.case_match = case_match;
|
||||
|
||||
int synonyms_len = synonyms_ary.length;
|
||||
this.synonyms = new Xomw_MagicWordSynonym[synonyms_len];
|
||||
for (int i = 0; i < synonyms_len; i++) {
|
||||
synonyms[i] = new Xomw_MagicWordSynonym(name, case_match, synonyms_ary[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,376 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
public class Xomw_MagicWordArray {
|
||||
private Btrie_slim_mgr fwd_trie;
|
||||
private Btrie_bwd_mgr bwd_trie;
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
// private final Xomw_MagicWordMgr magic_word_mgr;
|
||||
public final byte[][] names;
|
||||
|
||||
// /** @var array */
|
||||
// private hash;
|
||||
|
||||
// private baseRegex;
|
||||
|
||||
// private regex;
|
||||
|
||||
public Xomw_MagicWordArray(Xomw_MagicWordMgr magic_word_mgr, byte[][] names) {
|
||||
// this.magic_word_mgr = magic_word_mgr;
|
||||
this.names = names;
|
||||
|
||||
// ASSUME: all magic words in a group have the same case sensitivity
|
||||
for (byte[] name : names) {
|
||||
Xomw_MagicWord word = magic_word_mgr.Get(name);
|
||||
if (word == null) continue;
|
||||
Xomw_MagicWordSynonym[] synonyms = word.synonyms;
|
||||
int synonyms_len = synonyms.length;
|
||||
for (int i = 0; i < synonyms_len; i++) {
|
||||
Xomw_MagicWordSynonym synonym = synonyms[i];
|
||||
switch (synonym.arg1_tid) {
|
||||
case Xomw_MagicWordSynonym.Arg1__nil:
|
||||
case Xomw_MagicWordSynonym.Arg1__end:
|
||||
if (fwd_trie == null) fwd_trie = word.case_match ? Btrie_slim_mgr.cs() : Btrie_slim_mgr.ci_u8();
|
||||
fwd_trie.Add_obj(synonym.text_wo_arg1, synonym);
|
||||
break;
|
||||
case Xomw_MagicWordSynonym.Arg1__bgn:
|
||||
if (bwd_trie == null) bwd_trie = Btrie_bwd_mgr.c__(word.case_match);
|
||||
bwd_trie.Add(synonym.text_wo_arg1, synonym);
|
||||
break;
|
||||
// ignore if mid / mix
|
||||
case Xomw_MagicWordSynonym.Arg1__mid:
|
||||
case Xomw_MagicWordSynonym.Arg1__mix:
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "MagicWordArray: unsupported arg_1_tid: tid=~{0}", synonym.arg1_tid);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Add a magic word by name
|
||||
// *
|
||||
// * @param String name
|
||||
// */
|
||||
// public function add(name) {
|
||||
// this->names[] = name;
|
||||
// this->hash = this->baseRegex = this->regex = null;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Add a number of magic words by name
|
||||
// *
|
||||
// * @param array names
|
||||
// */
|
||||
// public function addArray(names) {
|
||||
// this->names = array_merge(this->names, array_values(names));
|
||||
// this->hash = this->baseRegex = this->regex = null;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a 2-d hashtable for this array
|
||||
// * @return array
|
||||
// */
|
||||
// public function getHash() {
|
||||
// if (is_null(this->hash)) {
|
||||
// global wgContLang;
|
||||
// this->hash = [ 0 => [], 1 => [] ];
|
||||
// foreach (this->names as name) {
|
||||
// magic = MagicWord::get(name);
|
||||
// case = intval(magic->isCaseSensitive());
|
||||
// foreach (magic->getSynonyms() as syn) {
|
||||
// if (!case) {
|
||||
// syn = wgContLang->lc(syn);
|
||||
// }
|
||||
// this->hash[case][syn] = name;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return this->hash;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get the super regex
|
||||
// * @return array
|
||||
// */
|
||||
// public function getBaseRegex() {
|
||||
// if (is_null(this->baseRegex)) {
|
||||
// this->baseRegex = [ 0 => '', 1 => '' ];
|
||||
// foreach (this->names as name) {
|
||||
// magic = MagicWord::get(name);
|
||||
// case = intval(magic->isCaseSensitive());
|
||||
// foreach (magic->getSynonyms() as i => syn) {
|
||||
// // Group name must start with a non-digit in PCRE 8.34+
|
||||
// it = strtr(i, '0123456789', 'abcdefghij');
|
||||
// group = "(?P<{it}_{name}>" . preg_quote(syn, '/') . ')';
|
||||
// if (this->baseRegex[case] === '') {
|
||||
// this->baseRegex[case] = group;
|
||||
// } else {
|
||||
// this->baseRegex[case] .= '|' . group;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return this->baseRegex;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get an unanchored regex that does not match parameters
|
||||
// * @return array
|
||||
// */
|
||||
// public function getRegex() {
|
||||
// if (is_null(this->regex)) {
|
||||
// super = this->getBaseRegex();
|
||||
// this->regex = [ '', '' ];
|
||||
// if (this->baseRegex[0] !== '') {
|
||||
// this->regex[0] = "/{super[0]}/iuS";
|
||||
// }
|
||||
// if (this->baseRegex[1] !== '') {
|
||||
// this->regex[1] = "/{super[1]}/S";
|
||||
// }
|
||||
// }
|
||||
// return this->regex;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a regex for matching variables with parameters
|
||||
// *
|
||||
// * @return String
|
||||
// */
|
||||
// public function getVariableRegex() {
|
||||
// return str_replace("\\1", "(.*?)", this->getRegex());
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a regex anchored to the start of the String that does not match parameters
|
||||
// *
|
||||
// * @return array
|
||||
// */
|
||||
// public function getRegexStart() {
|
||||
// super = this->getBaseRegex();
|
||||
// newRegex = [ '', '' ];
|
||||
// if (super[0] !== '') {
|
||||
// newRegex[0] = "/^(?:{super[0]})/iuS";
|
||||
// }
|
||||
// if (super[1] !== '') {
|
||||
// newRegex[1] = "/^(?:{super[1]})/S";
|
||||
// }
|
||||
// return newRegex;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get an anchored regex for matching variables with parameters
|
||||
// *
|
||||
// * @return array
|
||||
// */
|
||||
// public function getVariableStartToEndRegex() {
|
||||
// super = this->getBaseRegex();
|
||||
// newRegex = [ '', '' ];
|
||||
// if (super[0] !== '') {
|
||||
// newRegex[0] = str_replace("\\1", "(.*?)", "/^(?:{super[0]})/iuS");
|
||||
// }
|
||||
// if (super[1] !== '') {
|
||||
// newRegex[1] = str_replace("\\1", "(.*?)", "/^(?:{super[1]})/S");
|
||||
// }
|
||||
// return newRegex;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @since 1.20
|
||||
// * @return array
|
||||
// */
|
||||
// public function getNames() {
|
||||
// return this->names;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Parse a match array from preg_match
|
||||
// * Returns array(magic word ID, parameter value)
|
||||
// * If there is no parameter value, that element will be false.
|
||||
// *
|
||||
// * @param array m
|
||||
// *
|
||||
// * @throws MWException
|
||||
// * @return array
|
||||
// */
|
||||
// public function parseMatch(m) {
|
||||
// reset(m);
|
||||
// while (list(key, value) = each(m)) {
|
||||
// if (key === 0 || value === '') {
|
||||
// continue;
|
||||
// }
|
||||
// parts = explode('_', key, 2);
|
||||
// if (count(parts) != 2) {
|
||||
// // This shouldn't happen
|
||||
// // continue;
|
||||
// throw new MWException(__METHOD__ . ': bad parameter name');
|
||||
// }
|
||||
// list(/* synIndex */, magicName) = parts;
|
||||
// paramValue = next(m);
|
||||
// return [ magicName, paramValue ];
|
||||
// }
|
||||
// // This shouldn't happen either
|
||||
// throw new MWException(__METHOD__ . ': parameter not found');
|
||||
// }
|
||||
|
||||
/**
|
||||
* Match some text, with parameter capture
|
||||
* Returns an array with the magic word name in the first element and the
|
||||
* parameter in the second element.
|
||||
* Both elements are false if there was no match.
|
||||
*
|
||||
* @param String text
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public void matchVariableStartToEnd(byte[][] rv, byte[] src) {
|
||||
int src_end = src.length;
|
||||
if (src_end == 0) {
|
||||
rv[0] = rv[1] = null;
|
||||
return;
|
||||
}
|
||||
|
||||
byte[] name = null;
|
||||
int val_bgn = -1, val_end = -1;
|
||||
|
||||
// check fwd; EX: "thumb=$1"
|
||||
if (fwd_trie != null) {
|
||||
Object o = fwd_trie.Match_at(trv, src, 0, src_end);
|
||||
if (o != null) {
|
||||
Xomw_MagicWordSynonym syn = ((Xomw_MagicWordSynonym)o);
|
||||
name = syn.magic_name;
|
||||
val_bgn = trv.Pos();
|
||||
val_end = src_end;
|
||||
|
||||
// if "nil", then must be full match; EX: "thumbx" does not match "thumb"
|
||||
if (syn.arg1_tid == Xomw_MagicWordSynonym.Arg1__nil
|
||||
&& syn.text_wo_arg1.length != src_end) {
|
||||
rv[0] = rv[1] = null;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check bwd; EX: "$1px"
|
||||
if (bwd_trie != null) {
|
||||
Object o = bwd_trie.Match_at(trv, src, src_end - 1, -1);
|
||||
if (o != null) {
|
||||
Xomw_MagicWordSynonym syn = ((Xomw_MagicWordSynonym)o);
|
||||
name = syn.magic_name;
|
||||
val_bgn = 0;
|
||||
val_end = src_end - syn.text_wo_arg1.length;
|
||||
}
|
||||
}
|
||||
|
||||
rv[0] = name;
|
||||
rv[1] = val_end - val_bgn == 0 ? Bry_.Empty : Bry_.Mid(src, val_bgn, val_end);
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Match some text, without parameter capture
|
||||
// * Returns the magic word name, or false if there was no capture
|
||||
// *
|
||||
// * @param String text
|
||||
// *
|
||||
// * @return String|boolean False on failure
|
||||
// */
|
||||
// public function matchStartToEnd(text) {
|
||||
// hash = this->getHash();
|
||||
// if (isset(hash[1][text])) {
|
||||
// return hash[1][text];
|
||||
// }
|
||||
// global wgContLang;
|
||||
// lc = wgContLang->lc(text);
|
||||
// if (isset(hash[0][lc])) {
|
||||
// return hash[0][lc];
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Returns an associative array, ID => param value, for all items that match
|
||||
// * Removes the matched items from the input String (passed by reference)
|
||||
// *
|
||||
// * @param String text
|
||||
// *
|
||||
// * @return array
|
||||
// */
|
||||
// public function matchAndRemove(&text) {
|
||||
// found = [];
|
||||
// regexes = this->getRegex();
|
||||
// foreach (regexes as regex) {
|
||||
// if (regex === '') {
|
||||
// continue;
|
||||
// }
|
||||
// matches = [];
|
||||
// res = preg_match_all(regex, text, matches, PREG_SET_ORDER);
|
||||
// if (res === false) {
|
||||
// LoggerFactory::getInstance('parser')->warning('preg_match_all returned false', [
|
||||
// 'code' => preg_last_error(),
|
||||
// 'regex' => regex,
|
||||
// 'text' => text,
|
||||
// ]);
|
||||
// } elseif (res) {
|
||||
// foreach (matches as m) {
|
||||
// list(name, param) = this->parseMatch(m);
|
||||
// found[name] = param;
|
||||
// }
|
||||
// }
|
||||
// res = preg_replace(regex, '', text);
|
||||
// if (res === null) {
|
||||
// LoggerFactory::getInstance('parser')->warning('preg_replace returned null', [
|
||||
// 'code' => preg_last_error(),
|
||||
// 'regex' => regex,
|
||||
// 'text' => text,
|
||||
// ]);
|
||||
// }
|
||||
// text = res;
|
||||
// }
|
||||
// return found;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Return the ID of the magic word at the start of text, and remove
|
||||
// * the prefix from text.
|
||||
// * Return false if no match found and text is not modified.
|
||||
// * Does not match parameters.
|
||||
// *
|
||||
// * @param String text
|
||||
// *
|
||||
// * @return int|boolean False on failure
|
||||
// */
|
||||
// public function matchStartAndRemove(&text) {
|
||||
// regexes = this->getRegexStart();
|
||||
// foreach (regexes as regex) {
|
||||
// if (regex === '') {
|
||||
// continue;
|
||||
// }
|
||||
// if (preg_match(regex, text, m)) {
|
||||
// list(id,) = this->parseMatch(m);
|
||||
// if (strlen(m[0]) >= strlen(text)) {
|
||||
// text = '';
|
||||
// } else {
|
||||
// text = substr(text, strlen(m[0]));
|
||||
// }
|
||||
// return id;
|
||||
// }
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_MagicWordArray__tst {
|
||||
private final Xomw_MagicWordArray__fxt fxt = new Xomw_MagicWordArray__fxt();
|
||||
@Test public void Nil() {
|
||||
fxt.Init__word(Bool_.Y, "img_nil", "nil");
|
||||
fxt.Init__ary("img_nil");
|
||||
fxt.Test__matchVariableStartToEnd("nil", "img_nil", "");
|
||||
fxt.Test__matchVariableStartToEnd("nila", null, null);
|
||||
}
|
||||
@Test public void Bgn() {
|
||||
fxt.Init__word(Bool_.Y, "img_bgn", "bgn$1");
|
||||
fxt.Init__ary("img_bgn");
|
||||
fxt.Test__matchVariableStartToEnd("bgna", "img_bgn", "a");
|
||||
fxt.Test__matchVariableStartToEnd("bgn", "img_bgn", "");
|
||||
}
|
||||
@Test public void End() {
|
||||
fxt.Init__word(Bool_.Y, "img_end", "$1end");
|
||||
fxt.Init__ary("img_end");
|
||||
fxt.Test__matchVariableStartToEnd("aend", "img_end", "a");
|
||||
fxt.Test__matchVariableStartToEnd("end", "img_end", "");
|
||||
}
|
||||
@Test public void Smoke() {
|
||||
fxt.Init__word(Bool_.Y, "img_upright", "upright", "upright=$1", "upright $1");
|
||||
fxt.Init__word(Bool_.Y, "img_width", "$1px");
|
||||
fxt.Init__ary("img_upright", "img_width");
|
||||
|
||||
fxt.Test__matchVariableStartToEnd("upright=123", "img_upright", "123");
|
||||
fxt.Test__matchVariableStartToEnd("123px", "img_width", "123");
|
||||
}
|
||||
}
|
||||
class Xomw_MagicWordArray__fxt {
|
||||
private final Xomw_MagicWordMgr magic_word_mgr = new Xomw_MagicWordMgr();
|
||||
private Xomw_MagicWordArray magic_word_ary;
|
||||
public void Init__word(boolean cs, String word, String... synonyms) {
|
||||
magic_word_mgr.Add(Bry_.new_u8(word), cs, Bry_.Ary(synonyms));
|
||||
}
|
||||
public void Init__ary(String... words) {
|
||||
magic_word_ary = new Xomw_MagicWordArray(magic_word_mgr, Bry_.Ary(words));
|
||||
}
|
||||
public void Test__matchVariableStartToEnd(String src, String expd_name, String expd_val) {
|
||||
byte[][] rv = new byte[2][];
|
||||
magic_word_ary.matchVariableStartToEnd(rv, Bry_.new_u8(src));
|
||||
Gftest.Eq__str(expd_name, rv[0], expd_name);
|
||||
Gftest.Eq__str(expd_val , rv[1], expd_val);
|
||||
}
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
public class Xomw_MagicWordMgr {
|
||||
private final Hash_adp_bry hash = Hash_adp_bry.cs();
|
||||
public void Add(byte[] name, boolean cs, byte[]... synonyms) {
|
||||
Xomw_MagicWord mw = new Xomw_MagicWord(name, cs, synonyms);
|
||||
hash.Add(name, mw);
|
||||
}
|
||||
public Xomw_MagicWord Get(byte[] name) {
|
||||
return (Xomw_MagicWord)hash.Get_by(name);
|
||||
}
|
||||
}
|
||||
@@ -1,91 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
public class Xomw_MagicWordSynonym {
|
||||
public final byte[] magic_name;
|
||||
public final boolean case_match;
|
||||
public final byte[] text;
|
||||
public final byte[] text_wo_arg1;
|
||||
public final byte arg1_tid;
|
||||
public Xomw_MagicWordSynonym(byte[] magic_name, boolean case_match, byte[] text) {
|
||||
this.magic_name = magic_name;
|
||||
this.case_match = case_match;
|
||||
this.text = text;
|
||||
this.arg1_tid = Get_arg1_tid(text);
|
||||
switch (arg1_tid) {
|
||||
case Arg1__bgn:
|
||||
text_wo_arg1 = Bry_.Mid(text, 2);
|
||||
break;
|
||||
case Arg1__end:
|
||||
text_wo_arg1 = Bry_.Mid(text, 0, text.length - 2);
|
||||
break;
|
||||
default:
|
||||
text_wo_arg1 = text;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static byte Get_arg1_tid(byte[] src) {
|
||||
int len = src.length;
|
||||
byte rv = Arg1__nil;
|
||||
int cur = 0;
|
||||
while (true) {
|
||||
if (cur == len) break;
|
||||
byte b = src[cur];
|
||||
// "$" matched
|
||||
if (b == Byte_ascii.Dollar) {
|
||||
// "1" matched?
|
||||
int nxt_pos = cur + 1;
|
||||
if (nxt_pos < len && src[nxt_pos] == Byte_ascii.Num_1) {
|
||||
// "$1" matched
|
||||
if (cur == 0) {
|
||||
rv = Arg1__bgn;
|
||||
}
|
||||
else if (cur == len - 2) {
|
||||
rv = rv == Arg1__nil ? Arg1__end : Arg1__mix;
|
||||
}
|
||||
else {
|
||||
if (rv == Arg1__nil)
|
||||
rv = Arg1__mid;
|
||||
else if (rv == Arg1__mid)
|
||||
rv = Arg1__mix;
|
||||
}
|
||||
cur += 2;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
cur += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else {
|
||||
cur += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
public static final byte
|
||||
Arg1__nil = 0 // EX: "thumb"
|
||||
, Arg1__bgn = 1 // EX: "$1px"
|
||||
, Arg1__end = 2 // EX: "thumb=$1"
|
||||
, Arg1__mid = 3 // EX: "a$1b"
|
||||
, Arg1__mix = 4 // EX: "a$1b$cc"
|
||||
;
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
public class Xomw_Message {
|
||||
public byte[] text() {return null;}
|
||||
public byte[] escaped() {return null;}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,27 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
public class Xomw_linker__normalize_subpage_link {
|
||||
public byte[] link;
|
||||
public byte[] text;
|
||||
public Xomw_linker__normalize_subpage_link Init(byte[] link, byte[] text) {
|
||||
this.link = link;
|
||||
this.text = text;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_linker__normalize_subpage_link__tst {
|
||||
private final Xomw_linker__normalize_subpage_link__fxt fxt = new Xomw_linker__normalize_subpage_link__fxt();
|
||||
@Test public void None() {fxt.Test__normalize_subpage_link("A/B/C" , "Z" , "" , "Z" , "");}
|
||||
@Test public void Hash() {fxt.Test__normalize_subpage_link("A/B/C" , "/Y#Z" , "" , "A/B/C/Y#Z" , "/Y#Z");}
|
||||
@Test public void Slash__basic() {fxt.Test__normalize_subpage_link("A/B/C" , "/Z" , "" , "A/B/C/Z" , "/Z");}
|
||||
@Test public void Slash__slash() {fxt.Test__normalize_subpage_link("A/B/C" , "/Z/" , "" , "A/B/C/Z" , "Z");}
|
||||
@Test public void Dot2__empty() {fxt.Test__normalize_subpage_link("A/B/C" , "../" , "" , "A/B" , "");}
|
||||
@Test public void Dot2__many() {fxt.Test__normalize_subpage_link("A/B/C" , "../../Z" , "z1" , "A/Z" , "z1");}
|
||||
@Test public void Dot2__trailing() {fxt.Test__normalize_subpage_link("A/B/C" , "../../Z/" , "" , "A/Z" , "Z");}
|
||||
}
|
||||
class Xomw_linker__normalize_subpage_link__fxt {
|
||||
private final Xomw_linker mgr = new Xomw_linker(new gplx.xowa.mws.linkers.Xomw_link_renderer(new Xomw_sanitizer()));
|
||||
private final Xowe_wiki wiki;
|
||||
private final Xomw_linker__normalize_subpage_link normalize_subpage_link = new Xomw_linker__normalize_subpage_link();
|
||||
public Xomw_linker__normalize_subpage_link__fxt() {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
this.wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
}
|
||||
public void Test__normalize_subpage_link(String page_title_str, String link, String text, String expd_link, String expd_text) {
|
||||
mgr.normalizeSubpageLink(normalize_subpage_link, wiki.Ttl_parse(Bry_.new_u8(page_title_str)), Bry_.new_u8(link), Bry_.new_u8(text));
|
||||
Gftest.Eq__str(expd_link, String_.new_u8(normalize_subpage_link.link));
|
||||
Gftest.Eq__str(expd_text, String_.new_u8(normalize_subpage_link.text));
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_linker__split_trail__tst {
|
||||
private final Xomw_linker__split_trail__fxt fxt = new Xomw_linker__split_trail__fxt();
|
||||
@Test public void Basic() {fxt.Test__split_trail("abc def" , "abc" , " def");}
|
||||
@Test public void None() {fxt.Test__split_trail(" abc" , null , " abc");}
|
||||
}
|
||||
class Xomw_linker__split_trail__fxt {
|
||||
private final Xomw_linker linker = new Xomw_linker(new gplx.xowa.mws.linkers.Xomw_link_renderer(new Xomw_sanitizer()));
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
public Xomw_linker__split_trail__fxt() {
|
||||
String[] ary = new String[] {"a", "b", "c", "d", "e", "f"};
|
||||
for (String itm : ary)
|
||||
trie.Add_str_str(itm, itm);
|
||||
linker.Init_by_wiki(new Xomw_parser_env(), trie);
|
||||
}
|
||||
public void Test__split_trail(String trail_str, String expd_inside, String expd_trail) {
|
||||
byte[][] split_trail = linker.splitTrail(Bry_.new_u8(trail_str));
|
||||
Gftest.Eq__str(expd_inside, String_.new_u8(split_trail[0]));
|
||||
Gftest.Eq__str(expd_trail , String_.new_u8(split_trail[1]));
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
public class Xomw_message_mgr {
|
||||
private final Hash_adp hash = Hash_adp_.New();
|
||||
public Xomw_Message Get_by_str(String key) {return (Xomw_Message)hash.Get_by(key);}
|
||||
}
|
||||
@@ -1,921 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.brys.*; import gplx.core.btries.*; import gplx.core.encoders.*; import gplx.core.primitives.*; import gplx.langs.htmls.entitys.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
import gplx.langs.htmls.*; import gplx.xowa.mws.htmls.*; import gplx.xowa.mws.parsers.*; import gplx.xowa.mws.utls.*;
|
||||
public class Xomw_sanitizer {
|
||||
private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr();
|
||||
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
|
||||
private final Xomw_regex_escape_invalid regex_clean_url = new Xomw_regex_escape_invalid();
|
||||
private final Xomw_regex_find_domain regex_find_domain = new Xomw_regex_find_domain();
|
||||
private final Xomw_regex_ipv6_brack regex_ipv6_brack = new Xomw_regex_ipv6_brack();
|
||||
private final Bry_tmp tmp_host = new Bry_tmp();
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final Xomw_regex_url_char_cbk__normalize normalize_cbk;
|
||||
private final Xomw_regex_url_char_cbk__decode decode_cbk;
|
||||
|
||||
private static Xomw_regex_url_char regex_url_char;
|
||||
private static Btrie_slim_mgr invalid_idn_trie;
|
||||
public Xomw_sanitizer() {
|
||||
this.normalize_cbk = new Xomw_regex_url_char_cbk__normalize(this);
|
||||
this.decode_cbk = new Xomw_regex_url_char_cbk__decode(this);
|
||||
if (regex_url_char == null) {
|
||||
synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||
regex_url_char = new Xomw_regex_url_char();
|
||||
|
||||
// Characters that will be ignored in IDNs.
|
||||
// https://tools.ietf.org/html/rfc3454#section-3.1
|
||||
// $strip = "/
|
||||
// \\s| // general whitespace
|
||||
// \xc2\xad| // 00ad SOFT HYPHEN
|
||||
// \xe1\xa0\x86| // 1806 MONGOLIAN TODO SOFT HYPHEN
|
||||
// \xe2\x80\x8b| // 200b ZERO WIDTH SPACE
|
||||
// \xe2\x81\xa0| // 2060 WORD JOINER
|
||||
// \xef\xbb\xbf| // feff ZERO WIDTH NO-BREAK SPACE
|
||||
// \xcd\x8f| // 034f COMBINING GRAPHEME JOINER
|
||||
// \xe1\xa0\x8b| // 180b MONGOLIAN FREE VARIATION SELECTOR ONE
|
||||
// \xe1\xa0\x8c| // 180c MONGOLIAN FREE VARIATION SELECTOR TWO
|
||||
// \xe1\xa0\x8d| // 180d MONGOLIAN FREE VARIATION SELECTOR THREE
|
||||
// \xe2\x80\x8c| // 200c ZERO WIDTH NON-JOINER
|
||||
// \xe2\x80\x8d| // 200d ZERO WIDTH JOINER
|
||||
// [\xef\xb8\x80-\xef\xb8\x8f] // fe00-fe0f VARIATION SELECTOR-1-16
|
||||
// /xuD";
|
||||
// XO.MW.REGEX:http://php.net/manual/en/reference.pcre.pattern.modifiers.php
|
||||
// /x : ignore embedded ws
|
||||
// /u : enabled pcre utf8
|
||||
// /D : $ matches EOS, not NL
|
||||
invalid_idn_trie = Btrie_slim_mgr.cs()
|
||||
.Add_many_bry(new Xomw_regex_parser().Add_ary
|
||||
( "\\s"
|
||||
, "\\xc2\\xad" // 00ad SOFT HYPHEN
|
||||
, "\\xe1\\xa0\\x86" // 1806 MONGOLIAN TODO SOFT HYPHEN
|
||||
, "\\xe2\\x80\\x8b" // 200b ZERO WIDTH SPACE
|
||||
, "\\xe2\\x81\\xa0" // 2060 WORD JOINER
|
||||
, "\\xef\\xbb\\xbf" // feff ZERO WIDTH NO-BREAK SPACE
|
||||
, "\\xcd\\x8f" // 034f COMBINING GRAPHEME JOINER
|
||||
, "\\xe1\\xa0\\x8b" // 180b MONGOLIAN FREE VARIATION SELECTOR ONE
|
||||
, "\\xe1\\xa0\\x8c" // 180c MONGOLIAN FREE VARIATION SELECTOR TWO
|
||||
, "\\xe1\\xa0\\x8d" // 180d MONGOLIAN FREE VARIATION SELECTOR THREE
|
||||
, "\\xe2\\x80\\x8c" // 200c ZERO WIDTH NON-JOINER
|
||||
, "\\xe2\\x80\\x8d" // 200d ZERO WIDTH JOINER
|
||||
)
|
||||
.Add_rng
|
||||
( "\\xef\\xb8\\x80", "\\xef\\xb8\\x8f" // fe00-fe0f VARIATION SELECTOR-1-16
|
||||
)
|
||||
.Rslt());
|
||||
|
||||
// assert static structs
|
||||
if (html_entities == null) {
|
||||
synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||
html_entities = Html_entities_new();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Merge two sets of HTML attributes. Conflicting items in the second set
|
||||
// will override those in the first, except for 'class' attributes which
|
||||
// will be combined (if they're both strings).
|
||||
// XO.MW: XO does src += trg; MW does rv = src + trg;
|
||||
public void Merge_attributes(Xomw_atr_mgr src, Xomw_atr_mgr trg) {
|
||||
int trg_len = trg.Len();
|
||||
for (int i = 0; i < trg_len; i++) {
|
||||
Xomw_atr_itm trg_atr = trg.Get_at(i);
|
||||
// merge trg and src
|
||||
byte[] atr_cls = Gfh_atr_.Bry__class;
|
||||
if (Bry_.Eq(trg_atr.Key_bry(), atr_cls)) {
|
||||
Xomw_atr_itm src_atr = src.Get_by_or_null(atr_cls);
|
||||
if (src_atr != null) {
|
||||
// NOTE: need byte[]-creation is unavoidable b/c src_atr and trg_atr are non-null
|
||||
Merge_atrs_combine(tmp_bfr, src_atr.Val(), Byte_ascii.Space);
|
||||
tmp_bfr.Add_byte_space();
|
||||
Merge_atrs_combine(tmp_bfr, trg_atr.Val(), Byte_ascii.Space);
|
||||
src_atr.Val_(tmp_bfr.To_bry_and_clear());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
src.Add_or_set(trg_atr);
|
||||
}
|
||||
}
|
||||
private void Merge_atrs_combine(Bry_bfr trg, byte[] src, byte sep) {
|
||||
int src_len = src.length;
|
||||
for (int i = 0; i < src_len; i++) {
|
||||
byte b = src[i];
|
||||
if (b == sep) {
|
||||
// gobble ws; EX: "a b"
|
||||
int space_bgn = i;
|
||||
int space_end = Bry_find_.Find_fwd_while(src, i, src_len, sep);
|
||||
i = space_end - 1; // -1 b/c i++ above
|
||||
|
||||
// ignore ws at BOS; EX: " a"
|
||||
if (space_bgn == 0)
|
||||
continue;
|
||||
// ignore ws at EOS; EX: "a "
|
||||
if (space_end == src_len)
|
||||
break;
|
||||
}
|
||||
trg.Add_byte(b);
|
||||
}
|
||||
}
|
||||
public byte[] Clean_url(byte[] url) {
|
||||
// Normalize any HTML entities in input. They will be
|
||||
// re-escaped by makeExternalLink().
|
||||
url = Decode_char_references(null, Bool_.Y, url, 0, url.length);
|
||||
|
||||
// Escape any control characters introduced by the above step
|
||||
// XO.MW.REGEX: $url = preg_replace_callback('/[\][<>"\\x00-\\x20\\x7F\|]/', [ __CLASS__, 'cleanUrlCallback' ], $url);
|
||||
// '[]<>"' | '00 -> 32' | 127
|
||||
if (regex_clean_url.Escape(tmp_bfr, url, 0, url.length))
|
||||
url = tmp_bfr.To_bry_and_clear();
|
||||
|
||||
// XO.MW.REGEX: if (preg_match('!^([^:]+:)(//[^/]+)?(.*)$!iD', $url, $matches))
|
||||
if (regex_find_domain.Match(url, 0, url.length)) {
|
||||
// Characters that will be ignored in IDNs.
|
||||
// https://tools.ietf.org/html/rfc3454#section-3.1
|
||||
// Strip them before further processing so blacklists and such work.
|
||||
Php_preg_.Replace(tmp_host.Init(url, regex_find_domain.host_bgn, regex_find_domain.host_end), tmp_bfr, invalid_idn_trie, trv, Bry_.Empty);
|
||||
|
||||
// IPv6 host names are bracketed with []. Url-decode these.
|
||||
// if (substr_compare("//%5B", $host, 0, 5) === 0 &&
|
||||
// preg_match('!^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!', $host, $matches)
|
||||
// XO.MW.REGEX:
|
||||
// !^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!
|
||||
// "//%5B" + ("hex-dec" | [:.]) + "%5D" + numbers
|
||||
// EX: [ABCD]:80:12
|
||||
if (regex_ipv6_brack.Match(tmp_host.src, tmp_host.src_bgn, tmp_host.src_end)) {
|
||||
tmp_bfr.Add_str_a7("//[").Add_mid(tmp_host.src, regex_ipv6_brack.host_bgn, regex_ipv6_brack.host_end)
|
||||
.Add_byte(Byte_ascii.Brack_end).Add_mid(tmp_host.src, regex_ipv6_brack.segs_bgn, regex_ipv6_brack.segs_end);
|
||||
tmp_host.Set_by_bfr(tmp_bfr);
|
||||
}
|
||||
|
||||
// @todo FIXME: Validate hostnames here
|
||||
|
||||
tmp_bfr.Add_mid(url, regex_find_domain.prot_bgn, regex_find_domain.prot_end);
|
||||
tmp_host.Add_to_bfr(tmp_bfr);
|
||||
tmp_bfr.Add_mid(url, regex_find_domain.rest_bgn, regex_find_domain.rest_end);
|
||||
return tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
return url;
|
||||
}
|
||||
}
|
||||
public void Fix_tag_attributes(Bry_bfr bfr, byte[] tag_name, byte[] atrs) {
|
||||
atr_bldr.Atrs__clear();
|
||||
atr_parser.Parse(atr_bldr, -1, -1, atrs, 0, atrs.length);
|
||||
int len = atr_bldr.Atrs__len();
|
||||
|
||||
// PORTED: Sanitizer.php|safeEncodeTagAttributes
|
||||
for (int i = 0; i < len; i++) {
|
||||
// $encAttribute = htmlspecialchars($attribute);
|
||||
// $encValue = Sanitizer::safeEncodeAttribute($value);
|
||||
// $attribs[] = "$encAttribute=\"$encValue\"";
|
||||
Mwh_atr_itm itm = atr_bldr.Atrs__get_at(i);
|
||||
bfr.Add_byte_space(); // "return count($attribs) ? ' ' . implode(' ', $attribs) : '';"
|
||||
bfr.Add_bry_escape_html(itm.Key_bry(), itm.Key_bgn(), itm.Key_end());
|
||||
bfr.Add_byte_eq().Add_byte_quote();
|
||||
bfr.Add(itm.Val_as_bry()); // TODO.XO:Sanitizer::encode
|
||||
bfr.Add_byte_quote();
|
||||
}
|
||||
}
|
||||
public void Normalize_char_references(Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
Normalize_char_references(bfr, Bool_.N, src, src_bgn, src_end);
|
||||
}
|
||||
public byte[] Normalize_char_references(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end) {
|
||||
return regex_url_char.Replace_by_cbk(bfr, lone_bfr, src, src_bgn, src_end, normalize_cbk);
|
||||
}
|
||||
public byte[] Decode_char_references(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end) {
|
||||
return regex_url_char.Replace_by_cbk(bfr, lone_bfr, src, src_bgn, src_end, decode_cbk);
|
||||
}
|
||||
|
||||
public boolean Validate_codepoint(int codepoint) {
|
||||
// U+000C is valid in HTML5 but not allowed in XML.
|
||||
// U+000D is valid in XML but not allowed in HTML5.
|
||||
// U+007F - U+009F are disallowed in HTML5 (control characters).
|
||||
return codepoint == 0x09
|
||||
|| codepoint == 0x0a
|
||||
|| (codepoint >= 0x20 && codepoint <= 0x7e)
|
||||
|| (codepoint >= 0xa0 && codepoint <= 0xd7ff)
|
||||
|| (codepoint >= 0xe000 && codepoint <= 0xfffd)
|
||||
|| (codepoint >= 0x10000 && codepoint <= 0x10ffff);
|
||||
}
|
||||
// Encode an attribute value for HTML output.
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-03
|
||||
public static void Encode_attribute(Bry_bfr bfr, byte[] text) {
|
||||
// Whitespace is normalized during attribute decoding,
|
||||
// so if we've been passed non-spaces we must encode them
|
||||
// ahead of time or they won't be preserved.
|
||||
bfr.Add_bry_escape_xml(text, 0, text.length);
|
||||
}
|
||||
|
||||
public static Hash_adp_bry html_entities;
|
||||
private static Hash_adp_bry Html_entities_new() {
|
||||
Bry_bfr tmp = Bry_bfr_.New();
|
||||
Hash_adp_bry rv = Hash_adp_bry.cs();
|
||||
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__alias, 8207, "רלמ", "‏");
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__alias, 8207, "رلم", "‏");
|
||||
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__char, 60, "lt", "<");
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__char, 62, "gt", ">");
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__char, 38, "amp", "&");
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__char, 34, "quot", """);
|
||||
|
||||
// List of all named character entities defined in HTML 4.01
|
||||
// https://www.w3.org/TR/html4/sgml/entities.html
|
||||
// As well as ' which is only defined starting in XHTML1.
|
||||
Html_entities_set(rv, tmp, "Aacute" , 193);
|
||||
Html_entities_set(rv, tmp, "aacute" , 225);
|
||||
Html_entities_set(rv, tmp, "Acirc" , 194);
|
||||
Html_entities_set(rv, tmp, "acirc" , 226);
|
||||
Html_entities_set(rv, tmp, "acute" , 180);
|
||||
Html_entities_set(rv, tmp, "AElig" , 198);
|
||||
Html_entities_set(rv, tmp, "aelig" , 230);
|
||||
Html_entities_set(rv, tmp, "Agrave" , 192);
|
||||
Html_entities_set(rv, tmp, "agrave" , 224);
|
||||
Html_entities_set(rv, tmp, "alefsym" , 8501);
|
||||
Html_entities_set(rv, tmp, "Alpha" , 913);
|
||||
Html_entities_set(rv, tmp, "alpha" , 945);
|
||||
Html_entities_set(rv, tmp, "amp" , 38); // XO: identical to Type__char entry; note that Type__char should be evaluated first
|
||||
Html_entities_set(rv, tmp, "and" , 8743);
|
||||
Html_entities_set(rv, tmp, "ang" , 8736);
|
||||
Html_entities_set(rv, tmp, "apos" , 39); // New in XHTML & HTML 5; avoid in output for compatibility with IE.
|
||||
Html_entities_set(rv, tmp, "Aring" , 197);
|
||||
Html_entities_set(rv, tmp, "aring" , 229);
|
||||
Html_entities_set(rv, tmp, "asymp" , 8776);
|
||||
Html_entities_set(rv, tmp, "Atilde" , 195);
|
||||
Html_entities_set(rv, tmp, "atilde" , 227);
|
||||
Html_entities_set(rv, tmp, "Auml" , 196);
|
||||
Html_entities_set(rv, tmp, "auml" , 228);
|
||||
Html_entities_set(rv, tmp, "bdquo" , 8222);
|
||||
Html_entities_set(rv, tmp, "Beta" , 914);
|
||||
Html_entities_set(rv, tmp, "beta" , 946);
|
||||
Html_entities_set(rv, tmp, "brvbar" , 166);
|
||||
Html_entities_set(rv, tmp, "bull" , 8226);
|
||||
Html_entities_set(rv, tmp, "cap" , 8745);
|
||||
Html_entities_set(rv, tmp, "Ccedil" , 199);
|
||||
Html_entities_set(rv, tmp, "ccedil" , 231);
|
||||
Html_entities_set(rv, tmp, "cedil" , 184);
|
||||
Html_entities_set(rv, tmp, "cent" , 162);
|
||||
Html_entities_set(rv, tmp, "Chi" , 935);
|
||||
Html_entities_set(rv, tmp, "chi" , 967);
|
||||
Html_entities_set(rv, tmp, "circ" , 710);
|
||||
Html_entities_set(rv, tmp, "clubs" , 9827);
|
||||
Html_entities_set(rv, tmp, "cong" , 8773);
|
||||
Html_entities_set(rv, tmp, "copy" , 169);
|
||||
Html_entities_set(rv, tmp, "crarr" , 8629);
|
||||
Html_entities_set(rv, tmp, "cup" , 8746);
|
||||
Html_entities_set(rv, tmp, "curren" , 164);
|
||||
Html_entities_set(rv, tmp, "dagger" , 8224);
|
||||
Html_entities_set(rv, tmp, "Dagger" , 8225);
|
||||
Html_entities_set(rv, tmp, "darr" , 8595);
|
||||
Html_entities_set(rv, tmp, "dArr" , 8659);
|
||||
Html_entities_set(rv, tmp, "deg" , 176);
|
||||
Html_entities_set(rv, tmp, "Delta" , 916);
|
||||
Html_entities_set(rv, tmp, "delta" , 948);
|
||||
Html_entities_set(rv, tmp, "diams" , 9830);
|
||||
Html_entities_set(rv, tmp, "divide" , 247);
|
||||
Html_entities_set(rv, tmp, "Eacute" , 201);
|
||||
Html_entities_set(rv, tmp, "eacute" , 233);
|
||||
Html_entities_set(rv, tmp, "Ecirc" , 202);
|
||||
Html_entities_set(rv, tmp, "ecirc" , 234);
|
||||
Html_entities_set(rv, tmp, "Egrave" , 200);
|
||||
Html_entities_set(rv, tmp, "egrave" , 232);
|
||||
Html_entities_set(rv, tmp, "empty" , 8709);
|
||||
Html_entities_set(rv, tmp, "emsp" , 8195);
|
||||
Html_entities_set(rv, tmp, "ensp" , 8194);
|
||||
Html_entities_set(rv, tmp, "Epsilon" , 917);
|
||||
Html_entities_set(rv, tmp, "epsilon" , 949);
|
||||
Html_entities_set(rv, tmp, "equiv" , 8801);
|
||||
Html_entities_set(rv, tmp, "Eta" , 919);
|
||||
Html_entities_set(rv, tmp, "eta" , 951);
|
||||
Html_entities_set(rv, tmp, "ETH" , 208);
|
||||
Html_entities_set(rv, tmp, "eth" , 240);
|
||||
Html_entities_set(rv, tmp, "Euml" , 203);
|
||||
Html_entities_set(rv, tmp, "euml" , 235);
|
||||
Html_entities_set(rv, tmp, "euro" , 8364);
|
||||
Html_entities_set(rv, tmp, "exist" , 8707);
|
||||
Html_entities_set(rv, tmp, "fnof" , 402);
|
||||
Html_entities_set(rv, tmp, "forall" , 8704);
|
||||
Html_entities_set(rv, tmp, "frac12" , 189);
|
||||
Html_entities_set(rv, tmp, "frac14" , 188);
|
||||
Html_entities_set(rv, tmp, "frac34" , 190);
|
||||
Html_entities_set(rv, tmp, "frasl" , 8260);
|
||||
Html_entities_set(rv, tmp, "Gamma" , 915);
|
||||
Html_entities_set(rv, tmp, "gamma" , 947);
|
||||
Html_entities_set(rv, tmp, "ge" , 8805);
|
||||
Html_entities_set(rv, tmp, "gt" , 62);
|
||||
Html_entities_set(rv, tmp, "harr" , 8596);
|
||||
Html_entities_set(rv, tmp, "hArr" , 8660);
|
||||
Html_entities_set(rv, tmp, "hearts" , 9829);
|
||||
Html_entities_set(rv, tmp, "hellip" , 8230);
|
||||
Html_entities_set(rv, tmp, "Iacute" , 205);
|
||||
Html_entities_set(rv, tmp, "iacute" , 237);
|
||||
Html_entities_set(rv, tmp, "Icirc" , 206);
|
||||
Html_entities_set(rv, tmp, "icirc" , 238);
|
||||
Html_entities_set(rv, tmp, "iexcl" , 161);
|
||||
Html_entities_set(rv, tmp, "Igrave" , 204);
|
||||
Html_entities_set(rv, tmp, "igrave" , 236);
|
||||
Html_entities_set(rv, tmp, "image" , 8465);
|
||||
Html_entities_set(rv, tmp, "infin" , 8734);
|
||||
Html_entities_set(rv, tmp, "int" , 8747);
|
||||
Html_entities_set(rv, tmp, "Iota" , 921);
|
||||
Html_entities_set(rv, tmp, "iota" , 953);
|
||||
Html_entities_set(rv, tmp, "iquest" , 191);
|
||||
Html_entities_set(rv, tmp, "isin" , 8712);
|
||||
Html_entities_set(rv, tmp, "Iuml" , 207);
|
||||
Html_entities_set(rv, tmp, "iuml" , 239);
|
||||
Html_entities_set(rv, tmp, "Kappa" , 922);
|
||||
Html_entities_set(rv, tmp, "kappa" , 954);
|
||||
Html_entities_set(rv, tmp, "Lambda" , 923);
|
||||
Html_entities_set(rv, tmp, "lambda" , 955);
|
||||
Html_entities_set(rv, tmp, "lang" , 9001);
|
||||
Html_entities_set(rv, tmp, "laquo" , 171);
|
||||
Html_entities_set(rv, tmp, "larr" , 8592);
|
||||
Html_entities_set(rv, tmp, "lArr" , 8656);
|
||||
Html_entities_set(rv, tmp, "lceil" , 8968);
|
||||
Html_entities_set(rv, tmp, "ldquo" , 8220);
|
||||
Html_entities_set(rv, tmp, "le" , 8804);
|
||||
Html_entities_set(rv, tmp, "lfloor" , 8970);
|
||||
Html_entities_set(rv, tmp, "lowast" , 8727);
|
||||
Html_entities_set(rv, tmp, "loz" , 9674);
|
||||
Html_entities_set(rv, tmp, "lrm" , 8206);
|
||||
Html_entities_set(rv, tmp, "lsaquo" , 8249);
|
||||
Html_entities_set(rv, tmp, "lsquo" , 8216);
|
||||
Html_entities_set(rv, tmp, "lt" , 60);
|
||||
Html_entities_set(rv, tmp, "macr" , 175);
|
||||
Html_entities_set(rv, tmp, "mdash" , 8212);
|
||||
Html_entities_set(rv, tmp, "micro" , 181);
|
||||
Html_entities_set(rv, tmp, "middot" , 183);
|
||||
Html_entities_set(rv, tmp, "minus" , 8722);
|
||||
Html_entities_set(rv, tmp, "Mu" , 924);
|
||||
Html_entities_set(rv, tmp, "mu" , 956);
|
||||
Html_entities_set(rv, tmp, "nabla" , 8711);
|
||||
Html_entities_set(rv, tmp, "nbsp" , 160);
|
||||
Html_entities_set(rv, tmp, "ndash" , 8211);
|
||||
Html_entities_set(rv, tmp, "ne" , 8800);
|
||||
Html_entities_set(rv, tmp, "ni" , 8715);
|
||||
Html_entities_set(rv, tmp, "not" , 172);
|
||||
Html_entities_set(rv, tmp, "notin" , 8713);
|
||||
Html_entities_set(rv, tmp, "nsub" , 8836);
|
||||
Html_entities_set(rv, tmp, "Ntilde" , 209);
|
||||
Html_entities_set(rv, tmp, "ntilde" , 241);
|
||||
Html_entities_set(rv, tmp, "Nu" , 925);
|
||||
Html_entities_set(rv, tmp, "nu" , 957);
|
||||
Html_entities_set(rv, tmp, "Oacute" , 211);
|
||||
Html_entities_set(rv, tmp, "oacute" , 243);
|
||||
Html_entities_set(rv, tmp, "Ocirc" , 212);
|
||||
Html_entities_set(rv, tmp, "ocirc" , 244);
|
||||
Html_entities_set(rv, tmp, "OElig" , 338);
|
||||
Html_entities_set(rv, tmp, "oelig" , 339);
|
||||
Html_entities_set(rv, tmp, "Ograve" , 210);
|
||||
Html_entities_set(rv, tmp, "ograve" , 242);
|
||||
Html_entities_set(rv, tmp, "oline" , 8254);
|
||||
Html_entities_set(rv, tmp, "Omega" , 937);
|
||||
Html_entities_set(rv, tmp, "omega" , 969);
|
||||
Html_entities_set(rv, tmp, "Omicron" , 927);
|
||||
Html_entities_set(rv, tmp, "omicron" , 959);
|
||||
Html_entities_set(rv, tmp, "oplus" , 8853);
|
||||
Html_entities_set(rv, tmp, "or" , 8744);
|
||||
Html_entities_set(rv, tmp, "ordf" , 170);
|
||||
Html_entities_set(rv, tmp, "ordm" , 186);
|
||||
Html_entities_set(rv, tmp, "Oslash" , 216);
|
||||
Html_entities_set(rv, tmp, "oslash" , 248);
|
||||
Html_entities_set(rv, tmp, "Otilde" , 213);
|
||||
Html_entities_set(rv, tmp, "otilde" , 245);
|
||||
Html_entities_set(rv, tmp, "otimes" , 8855);
|
||||
Html_entities_set(rv, tmp, "Ouml" , 214);
|
||||
Html_entities_set(rv, tmp, "ouml" , 246);
|
||||
Html_entities_set(rv, tmp, "para" , 182);
|
||||
Html_entities_set(rv, tmp, "part" , 8706);
|
||||
Html_entities_set(rv, tmp, "permil" , 8240);
|
||||
Html_entities_set(rv, tmp, "perp" , 8869);
|
||||
Html_entities_set(rv, tmp, "Phi" , 934);
|
||||
Html_entities_set(rv, tmp, "phi" , 966);
|
||||
Html_entities_set(rv, tmp, "Pi" , 928);
|
||||
Html_entities_set(rv, tmp, "pi" , 960);
|
||||
Html_entities_set(rv, tmp, "piv" , 982);
|
||||
Html_entities_set(rv, tmp, "plusmn" , 177);
|
||||
Html_entities_set(rv, tmp, "pound" , 163);
|
||||
Html_entities_set(rv, tmp, "prime" , 8242);
|
||||
Html_entities_set(rv, tmp, "Prime" , 8243);
|
||||
Html_entities_set(rv, tmp, "prod" , 8719);
|
||||
Html_entities_set(rv, tmp, "prop" , 8733);
|
||||
Html_entities_set(rv, tmp, "Psi" , 936);
|
||||
Html_entities_set(rv, tmp, "psi" , 968);
|
||||
Html_entities_set(rv, tmp, "quot" , 34);
|
||||
Html_entities_set(rv, tmp, "radic" , 8730);
|
||||
Html_entities_set(rv, tmp, "rang" , 9002);
|
||||
Html_entities_set(rv, tmp, "raquo" , 187);
|
||||
Html_entities_set(rv, tmp, "rarr" , 8594);
|
||||
Html_entities_set(rv, tmp, "rArr" , 8658);
|
||||
Html_entities_set(rv, tmp, "rceil" , 8969);
|
||||
Html_entities_set(rv, tmp, "rdquo" , 8221);
|
||||
Html_entities_set(rv, tmp, "real" , 8476);
|
||||
Html_entities_set(rv, tmp, "reg" , 174);
|
||||
Html_entities_set(rv, tmp, "rfloor" , 8971);
|
||||
Html_entities_set(rv, tmp, "Rho" , 929);
|
||||
Html_entities_set(rv, tmp, "rho" , 961);
|
||||
Html_entities_set(rv, tmp, "rlm" , 8207);
|
||||
Html_entities_set(rv, tmp, "rsaquo" , 8250);
|
||||
Html_entities_set(rv, tmp, "rsquo" , 8217);
|
||||
Html_entities_set(rv, tmp, "sbquo" , 8218);
|
||||
Html_entities_set(rv, tmp, "Scaron" , 352);
|
||||
Html_entities_set(rv, tmp, "scaron" , 353);
|
||||
Html_entities_set(rv, tmp, "sdot" , 8901);
|
||||
Html_entities_set(rv, tmp, "sect" , 167);
|
||||
Html_entities_set(rv, tmp, "shy" , 173);
|
||||
Html_entities_set(rv, tmp, "Sigma" , 931);
|
||||
Html_entities_set(rv, tmp, "sigma" , 963);
|
||||
Html_entities_set(rv, tmp, "sigmaf" , 962);
|
||||
Html_entities_set(rv, tmp, "sim" , 8764);
|
||||
Html_entities_set(rv, tmp, "spades" , 9824);
|
||||
Html_entities_set(rv, tmp, "sub" , 8834);
|
||||
Html_entities_set(rv, tmp, "sube" , 8838);
|
||||
Html_entities_set(rv, tmp, "sum" , 8721);
|
||||
Html_entities_set(rv, tmp, "sup" , 8835);
|
||||
Html_entities_set(rv, tmp, "sup1" , 185);
|
||||
Html_entities_set(rv, tmp, "sup2" , 178);
|
||||
Html_entities_set(rv, tmp, "sup3" , 179);
|
||||
Html_entities_set(rv, tmp, "supe" , 8839);
|
||||
Html_entities_set(rv, tmp, "szlig" , 223);
|
||||
Html_entities_set(rv, tmp, "Tau" , 932);
|
||||
Html_entities_set(rv, tmp, "tau" , 964);
|
||||
Html_entities_set(rv, tmp, "there4" , 8756);
|
||||
Html_entities_set(rv, tmp, "Theta" , 920);
|
||||
Html_entities_set(rv, tmp, "theta" , 952);
|
||||
Html_entities_set(rv, tmp, "thetasym" , 977);
|
||||
Html_entities_set(rv, tmp, "thinsp" , 8201);
|
||||
Html_entities_set(rv, tmp, "THORN" , 222);
|
||||
Html_entities_set(rv, tmp, "thorn" , 254);
|
||||
Html_entities_set(rv, tmp, "tilde" , 732);
|
||||
Html_entities_set(rv, tmp, "times" , 215);
|
||||
Html_entities_set(rv, tmp, "trade" , 8482);
|
||||
Html_entities_set(rv, tmp, "Uacute" , 218);
|
||||
Html_entities_set(rv, tmp, "uacute" , 250);
|
||||
Html_entities_set(rv, tmp, "uarr" , 8593);
|
||||
Html_entities_set(rv, tmp, "uArr" , 8657);
|
||||
Html_entities_set(rv, tmp, "Ucirc" , 219);
|
||||
Html_entities_set(rv, tmp, "ucirc" , 251);
|
||||
Html_entities_set(rv, tmp, "Ugrave" , 217);
|
||||
Html_entities_set(rv, tmp, "ugrave" , 249);
|
||||
Html_entities_set(rv, tmp, "uml" , 168);
|
||||
Html_entities_set(rv, tmp, "upsih" , 978);
|
||||
Html_entities_set(rv, tmp, "Upsilon" , 933);
|
||||
Html_entities_set(rv, tmp, "upsilon" , 965);
|
||||
Html_entities_set(rv, tmp, "Uuml" , 220);
|
||||
Html_entities_set(rv, tmp, "uuml" , 252);
|
||||
Html_entities_set(rv, tmp, "weierp" , 8472);
|
||||
Html_entities_set(rv, tmp, "Xi" , 926);
|
||||
Html_entities_set(rv, tmp, "xi" , 958);
|
||||
Html_entities_set(rv, tmp, "Yacute" , 221);
|
||||
Html_entities_set(rv, tmp, "yacute" , 253);
|
||||
Html_entities_set(rv, tmp, "yen" , 165);
|
||||
Html_entities_set(rv, tmp, "Yuml" , 376);
|
||||
Html_entities_set(rv, tmp, "yuml" , 255);
|
||||
Html_entities_set(rv, tmp, "Zeta" , 918);
|
||||
Html_entities_set(rv, tmp, "zeta" , 950);
|
||||
Html_entities_set(rv, tmp, "zwj" , 8205);
|
||||
Html_entities_set(rv, tmp, "zwnj" , 8204);
|
||||
return rv;
|
||||
}
|
||||
private static void Html_entities_set(Hash_adp_bry rv, Bry_bfr tmp, String name_str, int code) {
|
||||
byte[] html_bry = tmp.Add_str_a7("&#").Add_int_variable(code).Add_byte_semic().To_bry_and_clear();
|
||||
Html_entities_set(rv, Xomw_html_ent.Type__entity, code, name_str, html_bry);
|
||||
}
|
||||
private static void Html_entities_set(Hash_adp_bry rv, byte type, int code, String name_str, String html_str) {Html_entities_set(rv, type, code, name_str, Bry_.new_u8(html_str));}
|
||||
private static void Html_entities_set(Hash_adp_bry rv, byte type, int code, String name_str, byte[] html_bry) {
|
||||
byte[] name_bry = Bry_.new_u8(name_str);
|
||||
rv.Add_if_dupe_use_1st(name_bry, new Xomw_html_ent(type, code, name_bry, html_bry)); // Add_dupe needed b/c "lt" and co. are added early; ignore subsequent call
|
||||
}
|
||||
}
|
||||
class Xomw_html_ent {
|
||||
public Xomw_html_ent(byte type, int code, byte[] name, byte[] html) {
|
||||
this.type = type;
|
||||
this.code = code;
|
||||
this.name = name;
|
||||
this.html = html;
|
||||
}
|
||||
public final byte type;
|
||||
public final int code;
|
||||
public final byte[] name;
|
||||
public final byte[] html;
|
||||
public static final byte Type__null = 0, Type__alias = 1, Type__char = 2, Type__entity = 3;
|
||||
}
|
||||
class Xomw_regex_find_domain {
|
||||
public int prot_bgn;
|
||||
public int prot_end;
|
||||
public int host_bgn;
|
||||
public int host_end;
|
||||
public int rest_bgn;
|
||||
public int rest_end;
|
||||
public boolean Match(byte[] src, int src_bgn, int src_end) {
|
||||
// Validate hostname portion
|
||||
// XO.MW.REGEX: if (preg_match('!^([^:]+:)(//[^/]+)?(.*)$!iD', $url, $matches)) {
|
||||
// ([^:]+:)(//[^/]+)?(.*)
|
||||
// "protocol" + "host" + "rest"
|
||||
// "protocol" -> ([^:]+:) EX: "https:" anything not-colon up to colon
|
||||
// "host" -> (//[^/]+)? EX: "//abc/" anything not-slash up to slash
|
||||
// "rest" -> (.*) EX: rest"
|
||||
// /i : case-insensitive
|
||||
// /D : $ matches EOS, not NL
|
||||
|
||||
// find prot; EX: "https:"
|
||||
prot_bgn = src_bgn;
|
||||
prot_end = Bry_find_.Move_fwd(src, Byte_ascii.Colon, prot_bgn, src_end);
|
||||
// exit if not found
|
||||
if (prot_end == Bry_find_.Not_found) return false;
|
||||
|
||||
// find host: EX: "//a.org"
|
||||
host_bgn = prot_end;
|
||||
int double_slash_end = host_bgn + 2;
|
||||
// exit if eos
|
||||
if (double_slash_end >= src_end) return false;
|
||||
// exit if not "//"
|
||||
if ( src[host_bgn ] != Byte_ascii.Slash
|
||||
|| src[host_bgn + 1] != Byte_ascii.Slash
|
||||
) return false;
|
||||
host_end = Bry_find_.Find_fwd(src, Byte_ascii.Slash, double_slash_end, src_end);
|
||||
// exit if not found
|
||||
if (host_end == Bry_find_.Not_found) {
|
||||
host_end = src_end;
|
||||
rest_bgn = rest_end = -1;
|
||||
}
|
||||
// exit if only "//"
|
||||
if (host_end - host_bgn == 2) return false;
|
||||
|
||||
// set rest
|
||||
rest_bgn = host_end;
|
||||
rest_end = src_end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
class Xomw_regex_escape_invalid {
|
||||
// [\][<>"\\x00-\\x20\\x7F\|]
|
||||
public boolean Escape(Bry_bfr bfr, byte[] src, int src_bgn, int src_end) {
|
||||
boolean dirty = false;
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
while (true) {
|
||||
// eos
|
||||
if (cur == src_end) {
|
||||
if (dirty) {
|
||||
bfr.Add_mid(src, prv, src_end);
|
||||
}
|
||||
break;
|
||||
}
|
||||
boolean match = false;
|
||||
byte b = src[cur];
|
||||
switch (b) {
|
||||
case Byte_ascii.Brack_bgn:
|
||||
case Byte_ascii.Brack_end:
|
||||
case Byte_ascii.Angle_bgn:
|
||||
case Byte_ascii.Angle_end:
|
||||
case Byte_ascii.Quote:
|
||||
case Byte_ascii.Pipe:
|
||||
case Byte_ascii.Delete:
|
||||
match = true;
|
||||
break;
|
||||
default:
|
||||
if (b >= 0 && b <= 32)
|
||||
match = true;
|
||||
break;
|
||||
}
|
||||
if (match) {
|
||||
bfr.Add_mid(src, prv, cur);
|
||||
gplx.langs.htmls.encoders.Gfo_url_encoder_.Php_urlencode.Encode(bfr, src, cur, cur + 1);
|
||||
dirty = true;
|
||||
cur++;
|
||||
prv = cur;
|
||||
}
|
||||
else
|
||||
cur++;
|
||||
}
|
||||
return dirty;
|
||||
}
|
||||
}
|
||||
class Xomw_regex_ipv6_brack {
|
||||
public int host_bgn;
|
||||
public int host_end;
|
||||
public int segs_bgn;
|
||||
public int segs_end;
|
||||
private final byte[]
|
||||
Bry__host_bgn = Bry_.new_a7("//%5B")
|
||||
, Bry__host_end = Bry_.new_a7("%5D")
|
||||
;
|
||||
public boolean Match(byte[] src, int src_bgn, int src_end) {
|
||||
// preg_match('!^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!', $host, $matches)
|
||||
// XO.MW.REGEX:
|
||||
// !^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!
|
||||
// "//%5B" + ("hex-dec" | [:.]) + "%5D" + numbers
|
||||
// EX: [ABCD]:80:12
|
||||
host_bgn = src_bgn + Bry__host_bgn.length;
|
||||
// exit if no match for "//%5B"
|
||||
if (!Bry_.Match(src, src_bgn, host_bgn, Bry__host_bgn)) return false;
|
||||
|
||||
// skip all [0-9A-Fa-f:.]
|
||||
host_end = host_bgn;
|
||||
while (true) {
|
||||
// exit if eos
|
||||
if (host_end == src_end) return false;
|
||||
boolean done = false;
|
||||
byte b = src[host_end];
|
||||
switch (b) {
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E: case Byte_ascii.Ltr_F:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e: case Byte_ascii.Ltr_f:
|
||||
case Byte_ascii.Colon:
|
||||
case Byte_ascii.Dot:
|
||||
host_end++;
|
||||
break;
|
||||
case Byte_ascii.Percent:
|
||||
// matches "%5D"
|
||||
segs_bgn = host_end + Bry__host_end.length;
|
||||
if ( Bry_.Match(src, host_end, segs_bgn, Bry__host_end)
|
||||
&& host_end - host_bgn > 0) // host can't be 0-len; EX: "//%5B%5D"
|
||||
done = true;
|
||||
// exit if no match
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
// exit if no match
|
||||
default: {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (done) break;
|
||||
}
|
||||
// skip all (:\d+)
|
||||
segs_end = segs_bgn;
|
||||
while (true) {
|
||||
// stop if eos
|
||||
if (segs_end == src_end) return true;
|
||||
|
||||
// check if ":"
|
||||
if (src[segs_end] == Byte_ascii.Colon) {
|
||||
int num_bgn = segs_end + 1;
|
||||
int num_end = Bry_find_.Find_fwd_while_num(src, num_bgn, src_end);
|
||||
// exit if no nums found; EX:"[ABC]:80:"
|
||||
if (num_end == num_bgn) {
|
||||
return false;
|
||||
}
|
||||
segs_end = num_end;
|
||||
}
|
||||
// exit if seg doesn't start with ":"
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
interface Xomw_regex_url_char_cbk {
|
||||
boolean When_ent(Bry_bfr bfr, byte[] name);
|
||||
boolean When_dec(Bry_bfr bfr, byte[] name);
|
||||
boolean When_hex(Bry_bfr bfr, byte[] name);
|
||||
boolean When_amp(Bry_bfr bfr);
|
||||
}
|
||||
class Xomw_regex_url_char_cbk__normalize implements Xomw_regex_url_char_cbk {
|
||||
private final Xomw_sanitizer sanitizer;
|
||||
public Xomw_regex_url_char_cbk__normalize(Xomw_sanitizer sanitizer) {
|
||||
this.sanitizer = sanitizer;
|
||||
}
|
||||
public boolean When_ent(Bry_bfr bfr, byte[] name) { // XO.MW:normalizeEntity
|
||||
// If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
|
||||
// return the equivalent numeric entity reference (except for the core <
|
||||
// > & "). If the entity is a MediaWiki-specific alias, returns
|
||||
// the HTML equivalent. Otherwise, returns HTML-escaped text of
|
||||
// pseudo-entity source (eg &foo;)
|
||||
Object o = Xomw_sanitizer.html_entities.Get_by_bry(name);
|
||||
if (o == null) {
|
||||
bfr.Add_str_a7("&").Add(name).Add_byte_semic();
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
Xomw_html_ent entity = (Xomw_html_ent)o;
|
||||
bfr.Add(entity.html);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
public boolean When_dec(Bry_bfr bfr, byte[] name) { // XO.MW:decCharReference
|
||||
int point = Bry_.To_int_or(name, -1);
|
||||
if (sanitizer.Validate_codepoint(point)) {
|
||||
bfr.Add_str_a7("&#").Add_int_variable(point).Add_byte_semic();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
public boolean When_hex(Bry_bfr bfr, byte[] name) { // XO.MW:hexCharReference
|
||||
int point = Hex_utl_.Parse_or(name, -1);
|
||||
if (sanitizer.Validate_codepoint(point)) {
|
||||
bfr.Add_str_a7("&#x");
|
||||
Hex_utl_.Write_bfr(bfr, Bool_.Y, point); // sprintf('&#x%x;', $point)
|
||||
bfr.Add_byte_semic();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
public boolean When_amp(Bry_bfr bfr) {
|
||||
bfr.Add(Gfh_entity_.Amp_bry); // transform "&" to "&"
|
||||
return true;
|
||||
}
|
||||
}
|
||||
class Xomw_regex_url_char_cbk__decode implements Xomw_regex_url_char_cbk {
|
||||
private final Xomw_sanitizer sanitizer;
|
||||
public Xomw_regex_url_char_cbk__decode(Xomw_sanitizer sanitizer) {
|
||||
this.sanitizer = sanitizer;
|
||||
}
|
||||
public boolean When_ent(Bry_bfr bfr, byte[] name) {// XO.MW:decodeEntity
|
||||
// If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
|
||||
// return the UTF-8 encoding of that character. Otherwise, returns
|
||||
// pseudo-entity source (eg "&foo;")
|
||||
Object o = Xomw_sanitizer.html_entities.Get_by_bry(name);
|
||||
if (o == null) {
|
||||
bfr.Add_byte(Byte_ascii.Amp).Add(name).Add_byte_semic();
|
||||
}
|
||||
else {
|
||||
Xomw_html_ent entity = (Xomw_html_ent)o;
|
||||
bfr.Add(gplx.core.intls.Utf16_.Encode_int_to_bry(entity.code));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
public boolean When_dec(Bry_bfr bfr, byte[] name) {
|
||||
return Decode_char(bfr, Bry_.To_int(name));
|
||||
}
|
||||
public boolean When_hex(Bry_bfr bfr, byte[] name) {
|
||||
return Decode_char(bfr, gplx.core.encoders.Hex_utl_.Parse_or(name, 0, name.length, -1));
|
||||
}
|
||||
public boolean When_amp(Bry_bfr bfr) {
|
||||
bfr.Add_byte(Byte_ascii.Amp);
|
||||
return true;
|
||||
}
|
||||
private boolean Decode_char(Bry_bfr bfr, int point) {// XO.MW:decodeChar
|
||||
// Return UTF-8 String for a codepoint if that is a valid
|
||||
// character reference, otherwise U+FFFD REPLACEMENT CHARACTER.
|
||||
if (sanitizer.Validate_codepoint(point)) {
|
||||
bfr.Add(gplx.core.intls.Utf16_.Encode_int_to_bry(point));
|
||||
}
|
||||
else {
|
||||
bfr.Add(Utf8_replacement_char);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
private static final byte[] Utf8_replacement_char = Bry_.New_by_ints(255, 253); // 0xfffd
|
||||
}
|
||||
class Xomw_regex_url_char {
|
||||
// Regular expression to match various types of character references in
|
||||
// Sanitizer::normalizeCharReferences and Sanitizer::decodeCharReferences
|
||||
// static final CHAR_REFS_REGEX =
|
||||
// '/&([A-Za-z0-9\x80-\xff]+);
|
||||
// |&\#([0-9]+);
|
||||
// |&\#[xX]([0-9A-Fa-f]+);
|
||||
// |(&)/x';
|
||||
public Xomw_regex_url_char() {
|
||||
// assert static structs
|
||||
if (Normalize__dec == null) {
|
||||
synchronized (Xomw_sanitizer.class) {
|
||||
Normalize__dec = Bool_ary_bldr.New_u8().Set_rng(Byte_ascii.Num_0, Byte_ascii.Num_9).To_ary();
|
||||
Normalize__hex = Bool_ary_bldr.New_u8()
|
||||
.Set_rng(Byte_ascii.Num_0, Byte_ascii.Num_9)
|
||||
.Set_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z)
|
||||
.Set_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z)
|
||||
.To_ary();
|
||||
Normalize__ent = Bool_ary_bldr.New_u8()
|
||||
.Set_rng(Byte_ascii.Num_0, Byte_ascii.Num_9)
|
||||
.Set_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z)
|
||||
.Set_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z)
|
||||
.Set_rng(128, 255)
|
||||
.To_ary();
|
||||
}
|
||||
}
|
||||
}
|
||||
public byte[] Replace_by_cbk(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end, Xomw_regex_url_char_cbk cbk) {
|
||||
// XO.BRY_BFR
|
||||
boolean dirty = false;
|
||||
int cur = src_bgn;
|
||||
boolean called_by_bry = bfr == null;
|
||||
|
||||
while (true) {
|
||||
// search for "&"
|
||||
int find_bgn = Bry_find_.Find_fwd(src, Byte_ascii.Amp, cur);
|
||||
if (find_bgn == Bry_find_.Not_found) { // "&" not found; exit
|
||||
if (dirty)
|
||||
bfr.Add_mid(src, cur, src_end);
|
||||
break;
|
||||
}
|
||||
int ent_bgn = find_bgn + 1; // +1 to skip &
|
||||
|
||||
// get regex; (a) dec (	); (b) hex (ÿ); (c) entity (α);
|
||||
boolean[] regex = null;
|
||||
// check for #;
|
||||
if (ent_bgn < src_end && src[ent_bgn] == Byte_ascii.Hash) {
|
||||
ent_bgn++;
|
||||
if (ent_bgn < src_end) {
|
||||
byte nxt = src[ent_bgn];
|
||||
// check for x
|
||||
if (nxt == Byte_ascii.Ltr_X || nxt == Byte_ascii.Ltr_x) {
|
||||
ent_bgn++;
|
||||
regex = Normalize__hex;
|
||||
}
|
||||
}
|
||||
if (regex == null)
|
||||
regex = Normalize__dec;
|
||||
}
|
||||
else {
|
||||
regex = Normalize__ent;
|
||||
}
|
||||
|
||||
// keep looping until invalid regex
|
||||
int ent_end = ent_bgn;
|
||||
int b = Byte_ascii.Null;
|
||||
for (int i = ent_bgn; i < src_end; i++) {
|
||||
b = src[i] & 0xFF; // PATCH.JAVA:need to convert to unsigned byte
|
||||
if (regex[b])
|
||||
ent_end++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
// mark dirty; can optimize later by checking if "<" already exists
|
||||
dirty = true;
|
||||
if (bfr == null) bfr = Bry_bfr_.New();
|
||||
bfr.Add_mid(src, cur, find_bgn); // add everything before &
|
||||
|
||||
// invalid <- regex ended, but not at semic
|
||||
if (b != Byte_ascii.Semic) {
|
||||
cbk.When_amp(bfr);
|
||||
cur = find_bgn + 1; // position after "&"
|
||||
continue;
|
||||
}
|
||||
|
||||
// do normalization
|
||||
byte[] name = Bry_.Mid(src, ent_bgn, ent_end);
|
||||
boolean ret = false;
|
||||
if (regex == Normalize__ent) {
|
||||
cbk.When_ent(bfr, name);
|
||||
ret = true;
|
||||
}
|
||||
else if (regex == Normalize__dec) {
|
||||
ret = cbk.When_dec(bfr, name);
|
||||
}
|
||||
else if (regex == Normalize__hex) {
|
||||
ret = cbk.When_hex(bfr, name);
|
||||
}
|
||||
if (!ret) {
|
||||
cbk.When_amp(bfr);
|
||||
cur = find_bgn + 1; // position after "&"
|
||||
continue;
|
||||
}
|
||||
|
||||
cur = ent_end + 1; // +1 to position after ";"
|
||||
}
|
||||
|
||||
// XO.BRY_BFR
|
||||
if (dirty) {
|
||||
if (called_by_bry)
|
||||
return bfr.To_bry_and_clear();
|
||||
else
|
||||
return Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
if (called_by_bry) {
|
||||
if (src_bgn == 0 && src_end == src.length)
|
||||
return src;
|
||||
else
|
||||
return Bry_.Mid(src, src_bgn, src_end);
|
||||
}
|
||||
else {
|
||||
if (lone_bfr)
|
||||
bfr.Add_mid(src, src_bgn, src_end);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
private static boolean[] Normalize__dec, Normalize__hex, Normalize__ent;
|
||||
}
|
||||
@@ -1,168 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mws.htmls.*;
|
||||
public class Xomw_sanitizer__tst {
|
||||
private final Xomw_sanitizer__fxt fxt = new Xomw_sanitizer__fxt();
|
||||
@Test public void Normalize__text() {fxt.Test__normalize_char_references("abc" , "abc");}
|
||||
@Test public void Normalize__dec() {fxt.Test__normalize_char_references("" , "&#08;");}
|
||||
@Test public void Normalize__dec__invalid() {fxt.Test__normalize_char_references("	" , "	");}
|
||||
@Test public void Normalize__hex() {fxt.Test__normalize_char_references("ÿ" , "ÿ");}
|
||||
@Test public void Normalize__entity() {fxt.Test__normalize_char_references("α" , "α");}
|
||||
@Test public void Normalize__entity__lt() {fxt.Test__normalize_char_references("<" , "<");}
|
||||
@Test public void Normalize__entity__alias() {fxt.Test__normalize_char_references("&רלמ;" , "‏");}
|
||||
@Test public void Normalize__amp() {fxt.Test__normalize_char_references("a&b" , "a&b");}
|
||||
@Test public void Normalize__invalid() {fxt.Test__normalize_char_references("&(invalid);" , "&(invalid);");}
|
||||
@Test public void Normalize__many() {
|
||||
fxt.Test__normalize_char_references
|
||||
( "a 	 b α c ÿ d &(invalid); e"
|
||||
, "a 	 b α c ÿ d &(invalid); e"
|
||||
);
|
||||
}
|
||||
@Test public void Regex__domain() {
|
||||
Xomw_regex_find_domain regex_domain = new Xomw_regex_find_domain();
|
||||
// normal
|
||||
fxt.Test__regex_domain_y(regex_domain, "https://a.org/bcd", "https:", "//a.org", "/bcd");
|
||||
// trailing backslash
|
||||
fxt.Test__regex_domain_y(regex_domain, "https://a.org/", "https:", "//a.org", "/");
|
||||
// domain only
|
||||
fxt.Test__regex_domain_y(regex_domain, "https://a.org", "https:", "//a.org", "");
|
||||
// colon not found
|
||||
fxt.Test__regex_domain_n(regex_domain, "https//a.org/bcd");
|
||||
// host_bgn.eos
|
||||
fxt.Test__regex_domain_n(regex_domain, "https:");
|
||||
// host_bgn.//
|
||||
fxt.Test__regex_domain_n(regex_domain, "https:a//");
|
||||
// host_bgn.///
|
||||
fxt.Test__regex_domain_n(regex_domain, "https:///a.org/b");
|
||||
}
|
||||
@Test public void Regex__clean_url() {
|
||||
Xomw_regex_escape_invalid regex = new Xomw_regex_escape_invalid();
|
||||
// noop
|
||||
fxt.Test__regex_escape_invalid(regex, "https://a.org/bcd", Bool_.N, "");
|
||||
// symbols
|
||||
fxt.Test__regex_escape_invalid(regex, "[]<>\"|", Bool_.Y, "%5B%5D%3C%3E%22%7C%7F");
|
||||
// range: 00 - 32
|
||||
fxt.Test__regex_escape_invalid(regex, "\t\n ", Bool_.Y, "%09%0A+");
|
||||
}
|
||||
@Test public void Regex__ipv6_brack() {
|
||||
Xomw_regex_ipv6_brack regex = new Xomw_regex_ipv6_brack();
|
||||
// basic
|
||||
fxt.Test__regex_ipv6_brack(regex, Bool_.Y, "//%5B0a.1b:12%5D:123");
|
||||
// port: none
|
||||
fxt.Test__regex_ipv6_brack(regex, Bool_.Y, "//%5Ba%5D");
|
||||
// port: multiple
|
||||
fxt.Test__regex_ipv6_brack(regex, Bool_.Y, "//%5Ba%5D:1:2:3");
|
||||
// "//%5B" missing
|
||||
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "abc");
|
||||
// ipv6: invalid
|
||||
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5Ba!%5D:1");
|
||||
// ipv6: 0-len
|
||||
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5B%5D:1");
|
||||
// port: invalid
|
||||
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5Ba%5D:a");
|
||||
// port: 0-len
|
||||
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5Ba%5D:");
|
||||
}
|
||||
@Test public void Decode() {
|
||||
// dec
|
||||
fxt.Test__decode_char_references("!" , "!");
|
||||
// hex
|
||||
fxt.Test__decode_char_references("#" , "#");
|
||||
// entity
|
||||
fxt.Test__decode_char_references("α" , "α");
|
||||
// entity:lt
|
||||
fxt.Test__decode_char_references("<" , "<");
|
||||
// entity:rlm
|
||||
fxt.Test__decode_char_references("&רלמ;" , "");
|
||||
// entity:invalid
|
||||
fxt.Test__decode_char_references("&invalid;" , "&invalid;");
|
||||
// amp
|
||||
fxt.Test__decode_char_references("a&b" , "a&b");
|
||||
}
|
||||
@Test public void Clean_url() {
|
||||
// entity
|
||||
fxt.Test__clean_url("http://a.org/b&c" , "http://a.org/b&c");
|
||||
// entity: escape
|
||||
fxt.Test__clean_url("http://a.org/b"c" , "http://a.org/b%22c");
|
||||
// domain=n; make sure " is changed, but not soft-hyphen
|
||||
fxt.Test__clean_url("a"z" , "a%22z");
|
||||
// host: invalid idn
|
||||
fxt.Test__clean_url("http://a᠆b.org/c᠆d" , "http://ab.org/c᠆d");
|
||||
// ipv6_brack
|
||||
fxt.Test__clean_url("http://[0a.1b:12]:123/cd" , "http://[0a.1b:12]:123/cd");
|
||||
}
|
||||
@Test public void Merge_atrs() {
|
||||
Xomw_atr_mgr src_atrs = new Xomw_atr_mgr();
|
||||
Xomw_atr_mgr trg_atrs = new Xomw_atr_mgr();
|
||||
Xomw_atr_mgr expd_atrs = new Xomw_atr_mgr();
|
||||
String cls = "class";
|
||||
// basic: k1 + k2
|
||||
fxt.Test__merge_attributes(src_atrs.Clear().Add_many("k1", "v1"), trg_atrs.Clear().Add_many("k2", "v2"), expd_atrs.Clear().Add_many("k1", "v1", "k2", "v2"));
|
||||
// overwrite: k1 + k1
|
||||
fxt.Test__merge_attributes(src_atrs.Clear().Add_many("k1", "v1"), trg_atrs.Clear().Add_many("k1", "v1a"), expd_atrs.Clear().Add_many("k1", "v1a"));
|
||||
// cls: many
|
||||
fxt.Test__merge_attributes(src_atrs.Clear().Add_many(cls, "v1 v2"), trg_atrs.Clear().Add_many(cls, "v3 v4"), expd_atrs.Clear().Add_many(cls, "v1 v2 v3 v4"));
|
||||
// cls: src.empty
|
||||
fxt.Test__merge_attributes(src_atrs.Clear(), trg_atrs.Clear().Add_many(cls, "v1"), expd_atrs.Clear().Add_many(cls, "v1"));
|
||||
// cls: ws
|
||||
fxt.Test__merge_attributes(src_atrs.Clear().Add_many(cls, " v1 v2 "), trg_atrs.Clear().Add_many(cls, " v3 v4 "), expd_atrs.Clear().Add_many(cls, "v1 v2 v3 v4"));
|
||||
}
|
||||
}
|
||||
class Xomw_sanitizer__fxt {
|
||||
private final Xomw_sanitizer sanitizer = new Xomw_sanitizer();
|
||||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
public void Test__normalize_char_references(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
sanitizer.Normalize_char_references(tmp, Bool_.Y, src_bry, 0, src_bry.length);
|
||||
Gftest.Eq__str(expd, tmp.To_str_and_clear());
|
||||
}
|
||||
public void Test__regex_domain_y(Xomw_regex_find_domain regex_domain, String src_str, String expd_prot, String expd_host, String expd_rest) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
Gftest.Eq__bool(true, regex_domain.Match(src_bry, 0, src_bry.length), src_str);
|
||||
Gftest.Eq__str(expd_prot, Bry_.Mid(src_bry, regex_domain.prot_bgn, regex_domain.prot_end));
|
||||
Gftest.Eq__str(expd_host, Bry_.Mid(src_bry, regex_domain.host_bgn, regex_domain.host_end));
|
||||
Gftest.Eq__str(expd_rest, Bry_.Mid(src_bry, regex_domain.rest_bgn, regex_domain.rest_end));
|
||||
}
|
||||
public void Test__regex_domain_n(Xomw_regex_find_domain regex_domain, String src_str) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
Gftest.Eq__bool(false, regex_domain.Match(src_bry, 0, src_bry.length), src_str);
|
||||
}
|
||||
public void Test__regex_escape_invalid(Xomw_regex_escape_invalid regex, String src_str, boolean expd_rslt, String expd_str) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
Gftest.Eq__bool(expd_rslt, regex.Escape(tmp, src_bry, 0, src_bry.length));
|
||||
Gftest.Eq__str(expd_str, tmp.To_bry_and_clear());
|
||||
}
|
||||
public void Test__regex_ipv6_brack(Xomw_regex_ipv6_brack regex, boolean expd_rslt, String src_str) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
Gftest.Eq__bool(expd_rslt, regex.Match(src_bry, 0, src_bry.length));
|
||||
}
|
||||
public void Test__decode_char_references(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
sanitizer.Decode_char_references(tmp, Bool_.Y, src_bry, 0, src_bry.length);
|
||||
Gftest.Eq__str(expd, tmp.To_str_and_clear());
|
||||
}
|
||||
public void Test__clean_url(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
Gftest.Eq__str(expd, sanitizer.Clean_url(src_bry));
|
||||
}
|
||||
public void Test__merge_attributes(Xomw_atr_mgr src, Xomw_atr_mgr trg, Xomw_atr_mgr expd) {
|
||||
sanitizer.Merge_attributes(src, trg);
|
||||
Gftest.Eq__ary__lines(expd.To_str(tmp), src.To_str(tmp), "merge_atrs");
|
||||
}
|
||||
}
|
||||
@@ -1,85 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
public class Xomw_xml {
|
||||
// Format an XML element with given attributes and, optionally, text content.
|
||||
// Element and attribute names are assumed to be ready for literal inclusion.
|
||||
// Strings are assumed to not contain XML-illegal characters; special
|
||||
// characters (<, >, &) are escaped but illegals are not touched.
|
||||
// ARGS: contents defaults to ""
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-03
|
||||
public static void Element(Bry_bfr bfr, byte[] element, List_adp attribs, byte[] contents, boolean allow_short_tag) {
|
||||
bfr.Add_byte(Byte_ascii.Angle_bgn).Add(element);
|
||||
if (attribs.Len() > 0) {
|
||||
Expand_attributes(bfr, attribs);
|
||||
}
|
||||
if (contents == null) {
|
||||
bfr.Add_byte(Byte_ascii.Angle_end);
|
||||
}
|
||||
else {
|
||||
if (allow_short_tag && contents == Bry_.Empty) {
|
||||
bfr.Add_str_a7(" />");
|
||||
}
|
||||
else {
|
||||
bfr.Add_byte(Byte_ascii.Angle_end);
|
||||
bfr.Add_bry_escape_html(contents);
|
||||
bfr.Add_byte(Byte_ascii.Angle_bgn).Add_byte(Byte_ascii.Slash).Add(element).Add_byte(Byte_ascii.Angle_end);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Given an array of ('attributename' => 'value'), it generates the code
|
||||
// to set the XML attributes : attributename="value".
|
||||
// The values are passed to Sanitizer::encodeAttribute.
|
||||
// Return null if no attributes given.
|
||||
// @param array $attribs Array of attributes for an XML element
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-03
|
||||
public static void Expand_attributes(Bry_bfr bfr, List_adp attribs) {
|
||||
int attribs_len = attribs.Len();
|
||||
for (int i = 0; i < attribs_len; i += 2) {
|
||||
// XO.MW: $out .= " {$name}=\"" . Sanitizer::encodeAttribute( $val ) . '"';
|
||||
bfr.Add_byte_space();
|
||||
bfr.Add((byte[])attribs.Get_at(i));
|
||||
bfr.Add_byte_eq().Add_byte_quote();
|
||||
Xomw_sanitizer.Encode_attribute(bfr, (byte[])attribs.Get_at(i + 1));
|
||||
bfr.Add_byte_quote();
|
||||
}
|
||||
}
|
||||
|
||||
// This opens an XML element
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-03
|
||||
public static void Open_element(Bry_bfr bfr, byte[] element, List_adp attribs) {
|
||||
bfr.Add_byte(Byte_ascii.Angle_bgn).Add(element);
|
||||
Expand_attributes(bfr, attribs);
|
||||
bfr.Add_byte(Byte_ascii.Angle_end);
|
||||
}
|
||||
|
||||
// Shortcut to close an XML element
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-03
|
||||
public static void Close_element(Bry_bfr bfr, byte[] element) {
|
||||
bfr.Add_byte(Byte_ascii.Angle_bgn).Add_byte(Byte_ascii.Slash).Add(element).Add_byte(Byte_ascii.Angle_end);
|
||||
}
|
||||
|
||||
// Same as Xml::element(), but does not escape contents. Handy when the
|
||||
// content you have is already valid xml.
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-03
|
||||
public static void Tags(Bry_bfr bfr, byte[] element, List_adp attribs, byte[] contents) {
|
||||
Open_element(bfr, element, attribs);
|
||||
bfr.Add(contents);
|
||||
bfr.Add_byte(Byte_ascii.Angle_bgn).Add_byte(Byte_ascii.Slash).Add(element).Add_byte(Byte_ascii.Angle_end);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,21 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.filerepo.file; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.filerepo.*;
|
||||
public interface Xomw_file_finder {
|
||||
Xomw_File Find_file(Xoa_ttl ttl);
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.filerepo.file; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.filerepo.*;
|
||||
import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_file_finder__mock implements Xomw_file_finder {
|
||||
private final Xomw_parser_env env;
|
||||
public Xomw_file_finder__mock(Xomw_parser_env env) {this.env = env;}
|
||||
private final Hash_adp hash = Hash_adp_.New();
|
||||
public void Clear() {hash.Clear();}
|
||||
public Xomw_File Find_file(Xoa_ttl ttl) {
|
||||
return (Xomw_File)hash.Get_by(ttl.Page_db_as_str());
|
||||
}
|
||||
public void Add(String title, Xomw_FileRepo repo, int w, int h, byte[] mime) {
|
||||
Xomw_LocalFile file = new Xomw_LocalFile(env, Bry_.new_u8(title), repo, w, h, mime);
|
||||
hash.Add_if_dupe_use_nth(title, file);
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.filerepo.file; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.filerepo.*;
|
||||
public class Xomw_file_finder__noop implements Xomw_file_finder {
|
||||
public Xomw_File Find_file(Xoa_ttl ttl) {return null;}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_atr_itm {
|
||||
public Xomw_atr_itm(int key_int, byte[] key, byte[] val) {
|
||||
this.key_int = key_int;
|
||||
this.key_bry = key;
|
||||
this.val = val;
|
||||
}
|
||||
public int Key_int() {return key_int;} private int key_int;
|
||||
public byte[] Key_bry() {return key_bry;} private byte[] key_bry;
|
||||
public byte[] Val() {return val;} private byte[] val;
|
||||
public void Val_(byte[] v) {this.val = v;}
|
||||
}
|
||||
@@ -1,72 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_atr_mgr {
|
||||
private final Ordered_hash hash = Ordered_hash_.New_bry();
|
||||
public int Len() {return hash.Len();}
|
||||
public Xomw_atr_itm Get_at(int i) {return (Xomw_atr_itm)hash.Get_at(i);}
|
||||
public Xomw_atr_itm Get_by_or_null(byte[] k) {return (Xomw_atr_itm)hash.Get_by(k);}
|
||||
public Xomw_atr_mgr Clear() {hash.Clear(); return this;}
|
||||
public void Del(byte[] key) {hash.Del(key);}
|
||||
public void Add(Xomw_atr_itm itm) {hash.Add(itm.Key_bry(), itm);}
|
||||
public Xomw_atr_mgr Add(byte[] key, byte[] val) {
|
||||
this.Add(new Xomw_atr_itm(-1, key, val));
|
||||
return this;
|
||||
}
|
||||
public void Add_or_set(Xomw_atr_itm src) {
|
||||
Xomw_atr_itm trg = (Xomw_atr_itm)hash.Get_by(src.Key_bry());
|
||||
if (trg == null)
|
||||
this.Add(src);
|
||||
else
|
||||
trg.Val_(src.Val());
|
||||
}
|
||||
public void Set(byte[] key, byte[] val) {
|
||||
Xomw_atr_itm atr = Get_by_or_make(key);
|
||||
atr.Val_(val);
|
||||
}
|
||||
public Xomw_atr_itm Get_by_or_make(byte[] k) {
|
||||
Xomw_atr_itm rv = (Xomw_atr_itm)hash.Get_by(k);
|
||||
if (rv == null) {
|
||||
rv = new Xomw_atr_itm(-1, k, null);
|
||||
Add(rv);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public byte[] Get_val_or_null(byte[] k) {
|
||||
Xomw_atr_itm atr = (Xomw_atr_itm)hash.Get_by(k);
|
||||
return atr == null ? null : atr.Val();
|
||||
}
|
||||
public Xomw_atr_mgr Add_many(String... kvs) {// TEST
|
||||
int len = kvs.length;
|
||||
for (int i = 0; i < len; i += 2) {
|
||||
byte[] key = Bry_.new_u8(kvs[i]);
|
||||
byte[] val = Bry_.new_u8(kvs[i + 1]);
|
||||
Add(key, val);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
public String To_str(Bry_bfr tmp) { // TEST
|
||||
int len = this.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xomw_atr_itm itm = this.Get_at(i);
|
||||
tmp.Add(itm.Key_bry()).Add_byte_eq();
|
||||
tmp.Add(itm.Val()).Add_byte_nl();
|
||||
}
|
||||
return tmp.To_str_and_clear();
|
||||
}
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_html_elem {
|
||||
public Xomw_html_elem(byte[] name) {
|
||||
this.name = name;
|
||||
}
|
||||
public byte[] Name() {return name;} private final byte[] name; // EX: "a", "div", "img"
|
||||
|
||||
// private static final Hash_adp_bry void_elements = Hash_adp_bry.cs().Add_many_str("area", "super", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr");
|
||||
}
|
||||
@@ -1,267 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
public class Xomw_html_utl {
|
||||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
public void Raw_element(Bry_bfr bfr, byte[] element, Xomw_atr_mgr attribs, byte[] contents) {
|
||||
Bry_.Lcase__all(element); // XO:lcase element
|
||||
|
||||
Open_element__lcased(bfr, element, attribs);
|
||||
if (void_elements.Has(element)) {
|
||||
bfr.Del_by_1().Add(Bry__elem__lhs__inl);
|
||||
}
|
||||
else {
|
||||
bfr.Add(contents);
|
||||
Close_element__lcased(bfr, element);
|
||||
}
|
||||
}
|
||||
private void Open_element__lcased(Bry_bfr bfr, byte[] element, Xomw_atr_mgr attribs) {
|
||||
// This is not required in HTML5, but let's do it anyway, for
|
||||
// consistency and better compression.
|
||||
// $element = strtolower($element); // XO:handled by callers
|
||||
|
||||
// Remove invalid input types
|
||||
if (Bry_.Eq(element, Tag__input)) {
|
||||
// PORTED.HEADER:valid_input_types
|
||||
byte[] type_atr_val = attribs.Get_val_or_null(Atr__type);
|
||||
if (type_atr_val != null && !valid_input_types.Has(type_atr_val)) {
|
||||
attribs.Del(Atr__type);
|
||||
}
|
||||
}
|
||||
|
||||
// According to standard the default type for <button> elements is "submit".
|
||||
// Depending on compatibility mode IE might use "button", instead.
|
||||
// We enforce the standard "submit".
|
||||
if (Bry_.Eq(element, Tag__button) && attribs.Get_val_or_null(Atr__type) == null) {
|
||||
attribs.Set(Atr__type, Val__type__submit);
|
||||
}
|
||||
|
||||
bfr.Add_byte(Byte_ascii.Angle_bgn).Add(element);
|
||||
Expand_attributes(bfr, attribs); // TODO.XO:self::dropDefaults($element, $attribs)
|
||||
bfr.Add_byte(Byte_ascii.Angle_end);
|
||||
}
|
||||
public void Expand_attributes(Bry_bfr bfr, Xomw_atr_mgr atrs) {
|
||||
int len = atrs.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xomw_atr_itm atr = (Xomw_atr_itm)atrs.Get_at(i);
|
||||
byte[] key = atr.Key_bry();
|
||||
byte[] val = atr.Val();
|
||||
|
||||
// Support intuitive [ 'checked' => true/false ] form
|
||||
if (val == null) { // TESTME
|
||||
continue;
|
||||
}
|
||||
|
||||
// For boolean attributes, support [ 'foo' ] instead of
|
||||
// requiring [ 'foo' => 'meaningless' ].
|
||||
boolean bool_attrib = bool_attribs.Has(val);
|
||||
if (atr.Key_int() != -1 && bool_attrib) {
|
||||
key = val;
|
||||
}
|
||||
|
||||
// Not technically required in HTML5 but we'd like consistency
|
||||
// and better compression anyway.
|
||||
key = Bry_.Xcase__build__all(tmp, Bool_.N, key);
|
||||
|
||||
// PORTED.HEADER:$spaceSeparatedListAttributes
|
||||
|
||||
// Specific features for attributes that allow a list of space-separated values
|
||||
if (space_separated_list_attributes.Has(key)) {
|
||||
// Apply some normalization and remove duplicates
|
||||
|
||||
// Convert into correct array. Array can contain space-separated
|
||||
// values. Implode/explode to get those into the main array as well.
|
||||
// if (is_array($value)) {
|
||||
// If input wasn't an array, we can skip this step
|
||||
// $newValue = [];
|
||||
// foreach ($value as $k => $v) {
|
||||
// if (is_string($v)) {
|
||||
// String values should be normal `array('foo')`
|
||||
// Just append them
|
||||
// if (!isset($value[$v])) {
|
||||
// As a special case don't set 'foo' if a
|
||||
// separate 'foo' => true/false exists in the array
|
||||
// keys should be authoritative
|
||||
// $newValue[] = $v;
|
||||
// }
|
||||
// }
|
||||
// elseif ($v) {
|
||||
// If the value is truthy but not a String this is likely
|
||||
// an [ 'foo' => true ], falsy values don't add strings
|
||||
// $newValue[] = $k;
|
||||
// }
|
||||
// }
|
||||
// $value = implode(' ', $newValue);
|
||||
// }
|
||||
// $value = explode(' ', $value);
|
||||
|
||||
// Normalize spacing by fixing up cases where people used
|
||||
// more than 1 space and/or a trailing/leading space
|
||||
// $value = array_diff($value, [ '', ' ' ]);
|
||||
|
||||
// Remove duplicates and create the String
|
||||
// $value = implode(' ', array_unique($value));
|
||||
}
|
||||
// DELETE
|
||||
// elseif (is_array($value)) {
|
||||
// throw new MWException("HTML attribute $key can not contain a list of values");
|
||||
// }
|
||||
|
||||
if (bool_attrib) {
|
||||
bfr.Add_byte_space().Add(key).Add(Bry__atr__val__empty); // $ret .= " $key=\"\"";
|
||||
}
|
||||
else {
|
||||
// PORTED.HEADER:atr_val_encodings
|
||||
val = Php_str_.Strtr(val, atr_val_encodings, tmp, trv);
|
||||
bfr.Add_byte_space().Add(key).Add(Bry__atr__val__quote).Add(val).Add_byte_quote();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void Close_element__lcased(Bry_bfr bfr, byte[] element) {
|
||||
bfr.Add(Bry__elem__rhs__bgn).Add(element).Add_byte(Byte_ascii.Angle_end); // EX: "</", element, ">";
|
||||
}
|
||||
private static final byte[]
|
||||
Bry__elem__lhs__inl = Bry_.new_a7("/>")
|
||||
, Bry__elem__rhs__bgn = Bry_.new_a7("</")
|
||||
, Bry__atr__val__quote = Bry_.new_a7("=\"")
|
||||
, Bry__atr__val__empty = Bry_.new_a7("=\"\"")
|
||||
|
||||
, Tag__input = Bry_.new_a7("input")
|
||||
, Tag__button = Bry_.new_a7("button")
|
||||
, Atr__type = Bry_.new_a7("type")
|
||||
, Val__type__submit = Bry_.new_a7("submit")
|
||||
;
|
||||
|
||||
// List of void elements from HTML5, section 8.1.2 as of 2016-09-19
|
||||
private static final Hash_adp_bry void_elements = Hash_adp_bry.cs().Add_many_str
|
||||
(
|
||||
"area",
|
||||
"super",
|
||||
"br",
|
||||
"col",
|
||||
"embed",
|
||||
"hr",
|
||||
"img",
|
||||
"input",
|
||||
"keygen",
|
||||
"link",
|
||||
"meta",
|
||||
"param",
|
||||
"source",
|
||||
"track",
|
||||
"wbr"
|
||||
);
|
||||
|
||||
// Boolean attributes, which may have the value omitted entirely. Manually
|
||||
// collected from the HTML5 spec as of 2011-08-12.
|
||||
private static final Hash_adp_bry bool_attribs = Hash_adp_bry.ci_a7().Add_many_str(
|
||||
"async",
|
||||
"autofocus",
|
||||
"autoplay",
|
||||
"checked",
|
||||
"controls",
|
||||
"default",
|
||||
"defer",
|
||||
"disabled",
|
||||
"formnovalidate",
|
||||
"hidden",
|
||||
"ismap",
|
||||
// "itemscope", //XO:duplicate; added below
|
||||
"loop",
|
||||
"multiple",
|
||||
"muted",
|
||||
"novalidate",
|
||||
"open",
|
||||
"pubdate",
|
||||
"final ",
|
||||
"required",
|
||||
"reversed",
|
||||
"scoped",
|
||||
"seamless",
|
||||
"selected",
|
||||
"truespeed",
|
||||
"typemustmatch",
|
||||
// HTML5 Microdata
|
||||
"itemscope"
|
||||
);
|
||||
|
||||
private static final Btrie_slim_mgr atr_val_encodings = Btrie_slim_mgr.cs()
|
||||
// Apparently we need to entity-encode \n, \r, \t, although the
|
||||
// spec doesn't mention that. Since we're doing strtr() anyway,
|
||||
// we may as well not call htmlspecialchars().
|
||||
// @todo FIXME: Verify that we actually need to
|
||||
// escape \n\r\t here, and explain why, exactly.
|
||||
// We could call Sanitizer::encodeAttribute() for this, but we
|
||||
// don't because we're stubborn and like our marginal savings on
|
||||
// byte size from not having to encode unnecessary quotes.
|
||||
// The only difference between this transform and the one by
|
||||
// Sanitizer::encodeAttribute() is ' is not encoded.
|
||||
.Add_str_str("&" , "&")
|
||||
.Add_str_str("\"" , """)
|
||||
.Add_str_str(">" , ">")
|
||||
// '<' allegedly allowed per spec
|
||||
// but breaks some tools if not escaped.
|
||||
.Add_str_str("<" , "<")
|
||||
.Add_str_str("\n" , " ")
|
||||
.Add_str_str("\r" , " ")
|
||||
.Add_str_str("\t" , "	");
|
||||
|
||||
// https://www.w3.org/TR/html401/index/attributes.html ("space-separated")
|
||||
// https://www.w3.org/TR/html5/index.html#attributes-1 ("space-separated")
|
||||
private static final Hash_adp_bry space_separated_list_attributes = Hash_adp_bry.ci_a7().Add_many_str(
|
||||
"class", // html4, html5
|
||||
"accesskey", // as of html5, multiple space-separated values allowed
|
||||
// html4-spec doesn't document rel= as space-separated
|
||||
// but has been used like that and is now documented as such
|
||||
// in the html5-spec.
|
||||
"rel"
|
||||
);
|
||||
|
||||
private static final Hash_adp_bry valid_input_types = Hash_adp_bry.ci_a7().Add_many_str(
|
||||
// Remove invalid input types
|
||||
"hidden",
|
||||
"text",
|
||||
"password",
|
||||
"checkbox",
|
||||
"radio",
|
||||
"file",
|
||||
"submit",
|
||||
"image",
|
||||
"reset",
|
||||
"button",
|
||||
|
||||
// HTML input types
|
||||
"datetime",
|
||||
"datetime-local",
|
||||
"date",
|
||||
"month",
|
||||
"time",
|
||||
"week",
|
||||
"number",
|
||||
"range",
|
||||
"email",
|
||||
"url",
|
||||
"search",
|
||||
"tel",
|
||||
"color"
|
||||
);
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_html_utl__expand_attributes__tst {
|
||||
private final Xomw_html_utl__expand_attributes__fxt fxt = new Xomw_html_utl__expand_attributes__fxt();
|
||||
@Test public void Basic() {fxt.Test__expand_attributes(" a=\"b\"", "a", "b");}
|
||||
}
|
||||
class Xomw_html_utl__expand_attributes__fxt {
|
||||
private final Xomw_html_utl utl = new Xomw_html_utl();
|
||||
private final Bry_bfr bfr = Bry_bfr_.New();
|
||||
public void Test__expand_attributes(String expd, String... kvs) {
|
||||
Xomw_atr_mgr atrs = new Xomw_atr_mgr();
|
||||
int kvs_len = kvs.length;
|
||||
for (int i = 0; i < kvs_len; i += 2) {
|
||||
byte[] key = Bry_.new_a7(kvs[i]);
|
||||
byte[] val = Bry_.new_a7(kvs[i + 1]);
|
||||
Xomw_atr_itm itm = new Xomw_atr_itm(-1, key, val);
|
||||
atrs.Add(itm);
|
||||
}
|
||||
utl.Expand_attributes(bfr, atrs);
|
||||
Gftest.Eq__str(expd, bfr.To_str_and_clear());
|
||||
}
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_opt_mgr {
|
||||
public boolean known;
|
||||
public boolean broken;
|
||||
public boolean no_classes;
|
||||
public byte[] time = null;
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_qry_mgr {
|
||||
public byte[] action;
|
||||
public int redlink;
|
||||
public Xomw_qry_mgr Clear() {
|
||||
action = null;
|
||||
redlink = -1;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -1,125 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xomw_string_utils {
|
||||
// Explode a String, but ignore any instances of the separator inside
|
||||
// the given start and end delimiters, which may optionally nest.
|
||||
// The delimiters are literal strings, not regular expressions.
|
||||
// @param String bgn_delim Start delimiter
|
||||
// @param String end_delim End delimiter
|
||||
// @param String separator Separator String for the explode.
|
||||
// @param String subject Subject String to explode.
|
||||
// @param boolean nested True iff the delimiters are allowed to nest.
|
||||
// @return ArrayIterator
|
||||
// XO.MW: hard-coding (a) nested=true; (b) bgn="-{" end="}-" sep="|"
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-03
|
||||
private static final byte Delimiter_explode__sep = 0, Delimiter_explode__bgn = 1, Delimiter_explode__end = 2;
|
||||
private static final Btrie_slim_mgr delimiter_explode_trie = Btrie_slim_mgr.cs()
|
||||
.Add_str_byte("|" , Delimiter_explode__sep)
|
||||
.Add_str_byte("-{", Delimiter_explode__bgn)
|
||||
.Add_str_byte("}-", Delimiter_explode__end)
|
||||
;
|
||||
public static byte[][] Delimiter_explode(List_adp tmp, Btrie_rv trv, byte[] src) {
|
||||
int src_bgn = 0;
|
||||
int src_end = src.length;
|
||||
|
||||
int depth = 0;
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
while (true) {
|
||||
// eos
|
||||
if (cur == src_end) {
|
||||
// add rest
|
||||
tmp.Add(Bry_.Mid(src, prv, src_end));
|
||||
break;
|
||||
}
|
||||
|
||||
Object o = delimiter_explode_trie.Match_at(trv, src, cur, src_end);
|
||||
|
||||
// regular char; continue;
|
||||
if (o == null) {
|
||||
cur++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// handle sep, bgn, end
|
||||
byte tid = ((gplx.core.primitives.Byte_obj_val)o).Val();
|
||||
switch (tid) {
|
||||
case Delimiter_explode__sep:
|
||||
if (depth == 0) {
|
||||
tmp.Add(Bry_.Mid(src, prv, cur));
|
||||
prv = cur + 1;
|
||||
}
|
||||
break;
|
||||
case Delimiter_explode__bgn:
|
||||
depth++;
|
||||
break;
|
||||
case Delimiter_explode__end:
|
||||
depth--;
|
||||
break;
|
||||
}
|
||||
cur = trv.Pos();
|
||||
}
|
||||
return (byte[][])tmp.To_ary_and_clear(byte[].class);
|
||||
}
|
||||
// More or less "markup-safe" str_replace()
|
||||
// Ignores any instances of the separator inside `<...>`
|
||||
public static void Replace_markup(byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) { // REF:/includes/libs/StringUtils.php|replaceMarkup
|
||||
// PORTED: avoiding multiple regex calls / String creations
|
||||
// $placeholder = "\x00";
|
||||
|
||||
// Remove placeholder instances
|
||||
// $text = str_replace( $placeholder, '', $text );
|
||||
|
||||
// Replace instances of the separator inside HTML-like tags with the placeholder
|
||||
// $replacer = new DoubleReplacer( $search, $placeholder );
|
||||
// $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
|
||||
|
||||
// Explode, then put the replaced separators back in
|
||||
// $cleaned = str_replace( $search, $replace, $cleaned );
|
||||
// $text = str_replace( $placeholder, $search, $cleaned );
|
||||
|
||||
// if same length find / repl, do in-place replacement; EX: "!!" -> "||"
|
||||
int find_len = find.length;
|
||||
int repl_len = repl.length;
|
||||
if (find_len != repl_len) throw Err_.new_wo_type("find and repl should be same length");
|
||||
|
||||
byte find_0 = find[0];
|
||||
byte dlm_bgn = Byte_ascii.Angle_bgn;
|
||||
byte dlm_end = Byte_ascii.Angle_end;
|
||||
boolean repl_active = true;
|
||||
|
||||
// loop every char in array
|
||||
for (int i = src_bgn; i < src_end; i++) {
|
||||
byte b = src[i];
|
||||
if ( b == find_0
|
||||
&& Bry_.Match(src, i + 1, i + find_len, find, 1, find_len)
|
||||
&& repl_active
|
||||
) {
|
||||
Bry_.Set(src, i, i + find_len, repl);
|
||||
}
|
||||
else if (b == dlm_bgn) {
|
||||
repl_active = false;
|
||||
}
|
||||
else if (b == dlm_end) {
|
||||
repl_active = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,60 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_string_utils__tst {
|
||||
private final Xomw_string_utils__fxt fxt = new Xomw_string_utils__fxt();
|
||||
@Test public void Delimiter_explode() {
|
||||
// basic
|
||||
fxt.Test__delimiter_explode("a|b|c" , "a", "b", "c");
|
||||
// empty
|
||||
fxt.Test__delimiter_explode("|a||c|" , "", "a", "", "c", "");
|
||||
// nest_1
|
||||
fxt.Test__delimiter_explode("a|-{b|c}-|d" , "a", "-{b|c}-", "d");
|
||||
// nest_many
|
||||
fxt.Test__delimiter_explode("a|-{b-{c|d}-e}-|f" , "a", "-{b-{c|d}-e}-", "f");
|
||||
}
|
||||
@Test public void Replace_markup() {
|
||||
// basic
|
||||
fxt.Test__replace_markup("a!!b" , "!!", "||", "a||b");
|
||||
// missing
|
||||
fxt.Test__replace_markup("abcd" , "!!", "||", "abcd");
|
||||
// eos
|
||||
fxt.Test__replace_markup("a!!" , "!!", "||", "a||");
|
||||
// ignore
|
||||
fxt.Test__replace_markup("a!!b<!!>!!c" , "!!", "||", "a||b<!!>||c");
|
||||
// ignore asym_lhs
|
||||
fxt.Test__replace_markup("a!!b<!!<!!>!!c" , "!!", "||", "a||b<!!<!!>||c");
|
||||
// ignore asym_lhs
|
||||
fxt.Test__replace_markup("a!!b<!!>!!>!!c" , "!!", "||", "a||b<!!>||>||c"); // NOTE: should probably be "!!>!!>", but unmatched ">" are escaped to ">"
|
||||
}
|
||||
}
|
||||
class Xomw_string_utils__fxt {
|
||||
public void Test__delimiter_explode(String src_str, String... expd) {
|
||||
List_adp tmp = List_adp_.New();
|
||||
gplx.core.btries.Btrie_rv trv = new gplx.core.btries.Btrie_rv();
|
||||
|
||||
byte[][] actl = Xomw_string_utils.Delimiter_explode(tmp, trv, Bry_.new_u8(src_str));
|
||||
Gftest.Eq__ary(expd, actl, "src=~{0}", src_str);
|
||||
}
|
||||
public void Test__replace_markup(String src_str, String find, String repl, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
Xomw_string_utils.Replace_markup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl));
|
||||
Gftest.Eq__str(expd, src_bry);
|
||||
}
|
||||
}
|
||||
@@ -1,213 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.linkers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.xowa.mws.htmls.*;
|
||||
/* TODO.XO
|
||||
* P7: $html = HtmlArmor::getHtml($text);
|
||||
* P3: Get_link_url [alternate urls? EX: mw/wiki/index.php/title?]
|
||||
* P2: titleFormatter->getPrefixedText [depends on redlinks]
|
||||
* P1: Get_link_classes [depends on redlinks]
|
||||
*/
|
||||
public class Xomw_link_renderer {
|
||||
private boolean expand_urls = false;
|
||||
private final Xomw_html_utl html_utl = new Xomw_html_utl();
|
||||
private final Xomw_atr_mgr attribs = new Xomw_atr_mgr();
|
||||
private final List_adp tmp_merge_deleted = List_adp_.New();
|
||||
private final Xomw_sanitizer sanitizer;
|
||||
public Xomw_link_renderer(Xomw_sanitizer sanitizer) {
|
||||
this.sanitizer = sanitizer;
|
||||
}
|
||||
// XO.MW:SYNC:1.29; DATE:2017-01-31
|
||||
public void Make_link(Bry_bfr bfr, Xoa_ttl target, byte[] text, byte[] classes, Xomw_atr_mgr extra_atrs, Xomw_qry_mgr query) {
|
||||
if (target.Is_known()) {
|
||||
this.Make_known_link(bfr, target, text, extra_atrs, query);
|
||||
} else {
|
||||
this.Make_broken_link(bfr, target, text, extra_atrs, query);
|
||||
}
|
||||
}
|
||||
|
||||
// If you have already looked up the proper CSS classes using LinkRenderer::getLinkClasses()
|
||||
// or some other method, use this to avoid looking it up again.
|
||||
// XO.MW:SYNC:1.29; DATE:2017-01-31
|
||||
public void Make_preloaded_link(Bry_bfr bfr, Xoa_ttl target, byte[] text, byte[] classes, Xomw_atr_mgr extra_atrs, Xomw_qry_mgr query) {
|
||||
// XO.MW.HOOK: $this->runBeginHook --> 'HtmlPageLinkRendererBegin', 'LinkBegin'
|
||||
|
||||
target = Normalize_target(target);
|
||||
byte[] url = Get_link_url(target, query);
|
||||
attribs.Clear();
|
||||
attribs.Add(Gfh_atr_.Bry__href, url); // XO.MW: add url 1st; MW does attribs["url", url] + attribs + extra_attribs
|
||||
if (classes.length > 0) // XO.MW:do not bother adding if empty
|
||||
attribs.Add(Gfh_atr_.Bry__class, classes);
|
||||
byte[] prefixed_text = target.Get_prefixed_text();
|
||||
if (prefixed_text != Bry_.Empty) {
|
||||
attribs.Add(Gfh_atr_.Bry__title, prefixed_text);
|
||||
}
|
||||
|
||||
Merge_attribs(attribs, extra_atrs);
|
||||
|
||||
if (text == null) {
|
||||
text = this.Get_link_text(target);
|
||||
}
|
||||
|
||||
Build_a_element(bfr, target,text, attribs, true);
|
||||
}
|
||||
|
||||
// XO.MW:SYNC:1.29; DATE:2017-01-31
|
||||
public void Make_known_link(Bry_bfr bfr, Xoa_ttl target, byte[] text, Xomw_atr_mgr extra_atrs, Xomw_qry_mgr query) {
|
||||
byte[] classes = Bry_.Empty;
|
||||
if (target.Is_external()) {
|
||||
classes = Bry__classes__extiw;
|
||||
}
|
||||
byte[] colour = Get_link_classes(target);
|
||||
if (colour != Bry_.Empty) {
|
||||
classes = Bry_.Add(classes, Byte_ascii.Space_bry, colour);
|
||||
}
|
||||
|
||||
Make_preloaded_link(bfr, target, text, classes, extra_atrs, query);
|
||||
}
|
||||
// XO.MW:SYNC:1.29; DATE:2017-01-31
|
||||
public void Make_broken_link(Bry_bfr bfr, Xoa_ttl target, byte[] text, Xomw_atr_mgr extra_atrs, Xomw_qry_mgr query) {
|
||||
// XO.MW.HOOK: Run legacy hook
|
||||
|
||||
// We don't want to include fragments for broken links, because they
|
||||
// generally make no sense.
|
||||
if (target.Has_fragment()) {
|
||||
target = target.Create_fragment_target();
|
||||
}
|
||||
target = Normalize_target(target);
|
||||
|
||||
if (query.action == null && target.Ns().Id() != gplx.xowa.wikis.nss.Xow_ns_.Tid__special) {
|
||||
query.action = Bry_.new_a7("edit");
|
||||
query.redlink = 1;
|
||||
}
|
||||
|
||||
byte[] url = Get_link_url(target, query);
|
||||
attribs.Clear();
|
||||
attribs.Add(Gfh_atr_.Bry__href, url); // $attribs = ['href' => $url,] + $this->mergeAttribs($attribs, $extraAttribs);
|
||||
attribs.Add(Gfh_atr_.Bry__class, Bry_.new_a7("new"));
|
||||
Merge_attribs(attribs, extra_atrs);
|
||||
|
||||
// $prefixedText = $this->titleFormatter->getPrefixedText($target);
|
||||
// if ($prefixedText !== '') {
|
||||
// // This ends up in parser cache!
|
||||
// $attribs['title'] = wfMessage('red-link-title', $prefixedText)
|
||||
// ->inContentLanguage()
|
||||
// ->text();
|
||||
// }
|
||||
|
||||
if (text == null) {
|
||||
text = Get_link_text(target);
|
||||
}
|
||||
|
||||
Build_a_element(bfr, target, text, attribs, false);
|
||||
}
|
||||
// XO.MW:SYNC:1.29; DATE:2017-01-31
|
||||
private void Build_a_element(Bry_bfr bfr, Xoa_ttl target, byte[] text, Xomw_atr_mgr attribs, boolean is_known) {
|
||||
// XO.MW.HOOK:HtmlPageLinkRendererEnd
|
||||
|
||||
byte[] html = text;
|
||||
// $html = HtmlArmor::getHtml($text);
|
||||
|
||||
// XO.MW.HOOK:LinkEnd
|
||||
|
||||
html_utl.Raw_element(bfr, Gfh_tag_.Bry__a, attribs, html);
|
||||
}
|
||||
// XO.MW:SYNC:1.29; DATE:2017-01-31
|
||||
private byte[] Get_link_text(Xoa_ttl target) {
|
||||
byte[] prefixed_text = target.Get_prefixed_text();
|
||||
// If the target is just a fragment, with no title, we return the fragment
|
||||
// text. Otherwise, we return the title text itself.
|
||||
if (prefixed_text == Bry_.Empty && target.Has_fragment()) {
|
||||
return target.Get_fragment();
|
||||
}
|
||||
return prefixed_text;
|
||||
}
|
||||
private byte[] Get_link_url(Xoa_ttl target, Xomw_qry_mgr query) {
|
||||
// TODO: Use a LinkTargetResolver service instead of Title
|
||||
|
||||
// if ($this->forceArticlePath) {
|
||||
// $realQuery = $query;
|
||||
// $query = [];
|
||||
// }
|
||||
// else {
|
||||
// $realQuery = [];
|
||||
// }
|
||||
byte[] url = target.Get_link_url(query, false, expand_urls);
|
||||
|
||||
// if ($this->forceArticlePath && $realQuery) {
|
||||
// $url = wfAppendQuery($url, $realQuery);
|
||||
// }
|
||||
return url;
|
||||
}
|
||||
// XO.MW:SYNC:1.29; DATE:2017-01-31
|
||||
private Xoa_ttl Normalize_target(Xoa_ttl target) {
|
||||
return Xomw_linker.normaliseSpecialPage(target);
|
||||
}
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-01
|
||||
private void Merge_attribs(Xomw_atr_mgr src, Xomw_atr_mgr trg) {
|
||||
// XO.MW: ignore; src is always non-null and empty; if trg exists, it will be merged below
|
||||
// if (!$attribs) {return $defaults;}
|
||||
|
||||
// Merge the custom attribs with the default ones, and iterate
|
||||
// over that, deleting all "false" attributes.
|
||||
sanitizer.Merge_attributes(src, trg);
|
||||
|
||||
// XO.MW:MW removes "false" values; XO removes "null" values
|
||||
boolean deleted = false;
|
||||
int len = trg.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xomw_atr_itm trg_atr = trg.Get_at(i);
|
||||
// A false value suppresses the attribute
|
||||
if (trg_atr.Val() == null) {
|
||||
tmp_merge_deleted.Add(trg_atr);
|
||||
deleted = true;
|
||||
}
|
||||
}
|
||||
if (deleted) {
|
||||
len = tmp_merge_deleted.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xomw_atr_itm atr = (Xomw_atr_itm)trg.Get_at(i);
|
||||
trg.Del(atr.Key_bry());
|
||||
}
|
||||
tmp_merge_deleted.Clear();
|
||||
}
|
||||
}
|
||||
public byte[] Get_link_classes(Xoa_ttl target) {
|
||||
// Make sure the target is in the cache
|
||||
// $id = $this->linkCache->addLinkObj($target);
|
||||
// if ($id == 0) {
|
||||
// // Doesn't exist
|
||||
// return '';
|
||||
// }
|
||||
|
||||
// if ($this->linkCache->getGoodLinkFieldObj($target, 'redirect')) {
|
||||
// Page is a redirect
|
||||
// return 'mw-redirect';
|
||||
// }
|
||||
// elseif ($this->stubThreshold > 0 && MWNamespace::isContent($target->getNamespace())
|
||||
// && $this->linkCache->getGoodLinkFieldObj($target, 'length') < $this->stubThreshold
|
||||
// ) {
|
||||
// Page is a stub
|
||||
// return 'stub';
|
||||
// }
|
||||
|
||||
return Bry_.Empty;
|
||||
}
|
||||
private static final byte[] Bry__classes__extiw = Bry_.new_a7("extiw");
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.linkers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
// import org.junit.*;
|
||||
// public class Xomw_link_renderer__tst {
|
||||
// private final Xomw_link_renderer__fxt fxt = new Xomw_link_renderer__fxt();
|
||||
/*
|
||||
Make_broken_link
|
||||
target.Has_fragment()
|
||||
*/
|
||||
// }
|
||||
// class Xomw_link_renderer__fxt {
|
||||
// private final Xomw_link_renderer wkr = new Xomw_link_renderer(new Xomw_parser());
|
||||
// public void Test__parse(String src_str, String expd) {
|
||||
// byte[] src_bry = Bry_.new_u8(src_str);
|
||||
// wkr.Replace_external_links(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
// if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
// Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
// }
|
||||
// }
|
||||
@@ -1,304 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.xowa.mws.filerepo.file.*; import gplx.xowa.mws.parsers.lnkis.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
/* XO.TODO:
|
||||
* validateThumbParams
|
||||
*/
|
||||
// MEMORY:only one instance per wiki
|
||||
public abstract class Xomw_ImageHandler extends Xomw_MediaHandler { private final Xomw_param_map paramMap = new Xomw_param_map();
|
||||
public Xomw_ImageHandler(byte[] key) {super(key);
|
||||
paramMap.Add(Xomw_param_itm.Mw__img_width, Xomw_param_map.Type__handler, Xomw_param_itm.Name_bry__width);
|
||||
}
|
||||
/**
|
||||
* @param File file
|
||||
* @return boolean
|
||||
*/
|
||||
@Override public boolean canRender(Xomw_File file) {
|
||||
return (Php_utl_.istrue(file.getWidth()) && Php_utl_.istrue(file.getHeight()));
|
||||
}
|
||||
|
||||
@Override public Xomw_param_map getParamMap() {
|
||||
// XO.MW: defined above: "return [ 'img_width' => 'width' ];"
|
||||
return paramMap;
|
||||
}
|
||||
|
||||
@Override public boolean validateParam(int name_uid, byte[] val_bry, int val_int) {
|
||||
if (name_uid == Xomw_param_itm.Name__width || name_uid == Xomw_param_itm.Name__height) {
|
||||
if (val_int <= 0) {
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override public byte[] makeParamString(Xomw_params_handler handlerParams) {
|
||||
int width = 0;
|
||||
if (Php_utl_.isset(handlerParams.physicalWidth)) {
|
||||
width = handlerParams.physicalWidth;
|
||||
}
|
||||
else if (Php_utl_.isset(handlerParams.width)) {
|
||||
width = handlerParams.width;
|
||||
}
|
||||
else {
|
||||
throw Err_.new_wo_type("No width specified to makeParamString");
|
||||
}
|
||||
|
||||
// Removed for ProofreadPage
|
||||
// width = intval(width);
|
||||
return Bry_.Add(Int_.To_bry(width), Xomw_lnki_wkr.Bry__px);
|
||||
}
|
||||
|
||||
// public Xomw_param_map parseParamString(byte[] src) {
|
||||
// int len = src.length;
|
||||
// // XO.MW.REGEX: if (preg_match('/^(\d+)px/', str, m)) {
|
||||
// if ( len > 0 // at least one char
|
||||
// && Byte_ascii.Is_num(src[0])) // 1st char is numeric
|
||||
// {
|
||||
// pos = Bry_find_.Find_fwd_while_num(src, 1, len); // skip numeric
|
||||
// if (Bry_.Match(src, pos, len, Xomw_lnki_wkr.Bry__px)) { // matches "px"
|
||||
// Xomw_params_handler rv = new Xomw_params_handler();
|
||||
// rv.width = Bry_.To_int_or(src, 0, pos, Php_utl_.Null_int);
|
||||
// return rv;
|
||||
// }
|
||||
// }
|
||||
// return null;
|
||||
// }
|
||||
|
||||
// function getScriptParams(paramsVar) {
|
||||
// return [ 'width' => paramsVar['width'] ];
|
||||
// }
|
||||
|
||||
/**
|
||||
* @param File image
|
||||
* @param array paramsVar
|
||||
* @return boolean
|
||||
*/
|
||||
@Override public boolean normaliseParams(Xomw_File image, Xomw_params_handler handlerParams) {
|
||||
byte[] mimeType = image.getMimeType();
|
||||
|
||||
if (!Php_utl_.isset(handlerParams.width)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!Php_utl_.isset(handlerParams.page)) {
|
||||
handlerParams.page = 1;
|
||||
}
|
||||
else {
|
||||
// handlerParams.page = intval(handlerParams.page);
|
||||
// if (handlerParams.page > image.pageCount()) {
|
||||
// handlerParams.page = image.pageCount();
|
||||
// }
|
||||
//
|
||||
// if (handlerParams.page < 1) {
|
||||
// handlerParams.page = 1;
|
||||
// }
|
||||
}
|
||||
|
||||
int srcWidth = image.getWidth(handlerParams.page);
|
||||
int srcHeight = image.getHeight(handlerParams.page);
|
||||
|
||||
if (Php_utl_.isset(handlerParams.height) && handlerParams.height != -1) {
|
||||
// Height & width were both set
|
||||
if (handlerParams.width * srcHeight > handlerParams.height * srcWidth) {
|
||||
// Height is the relative smaller dimension, so scale width accordingly
|
||||
handlerParams.width = fitBoxWidth(srcWidth, srcHeight, handlerParams.height);
|
||||
|
||||
if (handlerParams.width == 0) {
|
||||
// Very small image, so we need to rely on client side scaling :(
|
||||
handlerParams.width = 1;
|
||||
}
|
||||
|
||||
handlerParams.physicalWidth = handlerParams.width;
|
||||
} else {
|
||||
// Height was crap, unset it so that it will be calculated later
|
||||
handlerParams.height = Php_utl_.Null_int;
|
||||
}
|
||||
}
|
||||
|
||||
if (!Php_utl_.isset(handlerParams.physicalWidth)) {
|
||||
// Passed all validations, so set the physicalWidth
|
||||
handlerParams.physicalWidth = handlerParams.width;
|
||||
}
|
||||
|
||||
// Because thumbs are only referred to by width, the height always needs
|
||||
// to be scaled by the width to keep the thumbnail sizes consistent,
|
||||
// even if it was set inside the if block above
|
||||
handlerParams.physicalHeight = Xomw_File.scaleHeight(srcWidth, srcHeight,
|
||||
handlerParams.physicalWidth);
|
||||
|
||||
// Set the height if it was not validated in the if block higher up
|
||||
if (!Php_utl_.isset(handlerParams.height) || handlerParams.height == -1) {
|
||||
handlerParams.height = handlerParams.physicalHeight;
|
||||
}
|
||||
|
||||
if (!this.validateThumbParams(handlerParams, srcWidth, srcHeight, mimeType)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate thumbnail parameters and fill in the correct height
|
||||
*
|
||||
* @param int width Specified width (input/output)
|
||||
* @param int height Height (output only)
|
||||
* @param int srcWidth Width of the source image
|
||||
* @param int srcHeight Height of the source image
|
||||
* @param String mimeType Unused
|
||||
* @return boolean False to indicate that an error should be returned to the user.
|
||||
*/
|
||||
// XO.MW.NOTE: MW passes w and h by ref, but only changes h; XO will pass handlerParams directly
|
||||
private boolean validateThumbParams(Xomw_params_handler handlerParams, int srcWidth, int srcHeight, byte[] mimeType) {
|
||||
int width = handlerParams.physicalWidth;
|
||||
int height = handlerParams.physicalHeight;
|
||||
// width = intval(width);
|
||||
|
||||
// Sanity check width
|
||||
if (width <= 0) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "validateThumbParams: Invalid destination width: width");
|
||||
|
||||
return false;
|
||||
}
|
||||
if (srcWidth <= 0) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "validateThumbParams: Invalid source width: srcWidth");
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
height = Xomw_File.scaleHeight(srcWidth, srcHeight, width);
|
||||
if (height == 0) {
|
||||
// Force height to be at least 1 pixel
|
||||
height = 1;
|
||||
}
|
||||
handlerParams.height = height;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// /**
|
||||
// * @param File image
|
||||
// * @param String script
|
||||
// * @param array paramsVar
|
||||
// * @return boolean|MediaTransformOutput
|
||||
// */
|
||||
// function getScriptedTransform(image, script, paramsVar) {
|
||||
// if (!this.normaliseParams(image, paramsVar)) {
|
||||
// return false;
|
||||
// }
|
||||
// url = wfAppendQuery(script, this.getScriptParams(paramsVar));
|
||||
//
|
||||
// if (image.mustRender() || paramsVar['width'] < image.getWidth()) {
|
||||
// return new ThumbnailImage(image, url, false, paramsVar);
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// function getImageSize(image, path) {
|
||||
// MediaWiki\suppressWarnings();
|
||||
// gis = getimagesize(path);
|
||||
// MediaWiki\restoreWarnings();
|
||||
//
|
||||
// return gis;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Function that returns the number of pixels to be thumbnailed.
|
||||
// * Intended for animated GIFs to multiply by the number of frames.
|
||||
// *
|
||||
// * If the file doesn't support a notion of "area" return 0.
|
||||
// *
|
||||
// * @param File image
|
||||
// * @return int
|
||||
// */
|
||||
// function getImageArea(image) {
|
||||
// return image.getWidth() * image.getHeight();
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @param File file
|
||||
// * @return String
|
||||
// */
|
||||
// function getShortDesc(file) {
|
||||
// global wgLang;
|
||||
// nbytes = htmlspecialchars(wgLang.formatSize(file.getSize()));
|
||||
// widthheight = wfMessage('widthheight')
|
||||
// .numParams(file.getWidth(), file.getHeight()).escaped();
|
||||
//
|
||||
// return "widthheight (nbytes)";
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @param File file
|
||||
// * @return String
|
||||
// */
|
||||
// function getLongDesc(file) {
|
||||
// global wgLang;
|
||||
// pages = file.pageCount();
|
||||
// size = htmlspecialchars(wgLang.formatSize(file.getSize()));
|
||||
// if (pages === false || pages <= 1) {
|
||||
// msg = wfMessage('file-info-size').numParams(file.getWidth(),
|
||||
// file.getHeight()).paramsVar(size,
|
||||
// '<span class="mime-type">' . file.getMimeType() . '</span>').parse();
|
||||
// } else {
|
||||
// msg = wfMessage('file-info-size-pages').numParams(file.getWidth(),
|
||||
// file.getHeight()).paramsVar(size,
|
||||
// '<span class="mime-type">' . file.getMimeType() . '</span>').numParams(pages).parse();
|
||||
// }
|
||||
//
|
||||
// return msg;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @param File file
|
||||
// * @return String
|
||||
// */
|
||||
// function getDimensionsString(file) {
|
||||
// pages = file.pageCount();
|
||||
// if (pages > 1) {
|
||||
// return wfMessage('widthheightpage')
|
||||
// .numParams(file.getWidth(), file.getHeight(), pages).text();
|
||||
// } else {
|
||||
// return wfMessage('widthheight')
|
||||
// .numParams(file.getWidth(), file.getHeight()).text();
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public function sanitizeParamsForBucketing(paramsVar) {
|
||||
// paramsVar = parent::sanitizeParamsForBucketing(paramsVar);
|
||||
//
|
||||
// // We unset the height parameters in order to let normaliseParams recalculate them
|
||||
// // Otherwise there might be a height discrepancy
|
||||
// if (isset(paramsVar['height'])) {
|
||||
// unset(paramsVar['height']);
|
||||
// }
|
||||
//
|
||||
// if (isset(paramsVar['physicalHeight'])) {
|
||||
// unset(paramsVar['physicalHeight']);
|
||||
// }
|
||||
//
|
||||
// return paramsVar;
|
||||
// }
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
import gplx.xowa.mws.parsers.*; import gplx.xowa.mws.parsers.lnkis.*;
|
||||
import gplx.xowa.mws.filerepo.*; import gplx.xowa.mws.filerepo.file.*;
|
||||
public class Xomw_ImageHandler__tst {
|
||||
private final Xomw_ImageHandler__fxt fxt = new Xomw_ImageHandler__fxt();
|
||||
@Before public void init() {
|
||||
fxt.Init__file("A.png", 400, 200);
|
||||
}
|
||||
@Test public void normaliseParams() {
|
||||
// widthOnly; "Because thumbs are only referred to by width, the height always needs"
|
||||
fxt.Test__normaliseParams(fxt.Make__handlerParams(200), fxt.Make__handlerParams(200, 100, 200, 100));
|
||||
}
|
||||
}
|
||||
class Xomw_ImageHandler__fxt {
|
||||
private final Xomw_ImageHandler handler;
|
||||
private final Xomw_FileRepo repo = new Xomw_FileRepo(Bry_.new_a7("/orig"), Bry_.new_a7("/thumb"));
|
||||
private final Xomw_parser_env env = new Xomw_parser_env();
|
||||
private Xomw_File file;
|
||||
public Xomw_ImageHandler__fxt() {
|
||||
handler = new Xomw_TransformationalImageHandler(Bry_.new_a7("test_handler"));
|
||||
}
|
||||
public Xomw_params_handler Make__handlerParams(int w) {return Make__handlerParams(w, Php_utl_.Null_int, Php_utl_.Null_int, Php_utl_.Null_int);}
|
||||
public Xomw_params_handler Make__handlerParams(int w, int h, int phys_w, int phys_h) {
|
||||
Xomw_params_handler rv = new Xomw_params_handler();
|
||||
rv.width = w;
|
||||
rv.height = h;
|
||||
rv.physicalWidth = phys_w;
|
||||
rv.physicalHeight = phys_h;
|
||||
return rv;
|
||||
}
|
||||
public void Init__file(String title, int w, int h) {
|
||||
this.file = new Xomw_LocalFile(env, Bry_.new_u8(title), repo, w, h, Xomw_MediaHandlerFactory.Mime__image__png);
|
||||
}
|
||||
public void Test__normaliseParams(Xomw_params_handler prms, Xomw_params_handler expd) {
|
||||
// exec
|
||||
handler.normaliseParams(file, prms);
|
||||
|
||||
// test
|
||||
Gftest.Eq__int(expd.width, prms.width);
|
||||
Gftest.Eq__int(expd.height, prms.height);
|
||||
Gftest.Eq__int(expd.physicalWidth, prms.physicalWidth);
|
||||
Gftest.Eq__int(expd.physicalHeight, prms.physicalHeight);
|
||||
}
|
||||
}
|
||||
@@ -1,868 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.xowa.mws.filerepo.file.*;
|
||||
import gplx.xowa.mws.parsers.lnkis.*;
|
||||
public abstract class Xomw_MediaHandler {
|
||||
public byte[] Key() {return key;} private byte[] key;
|
||||
public Xomw_MediaHandler(byte[] key) {
|
||||
this.key = key;
|
||||
}
|
||||
|
||||
private static final int TRANSFORM_LATER = 1;
|
||||
// static final METADATA_GOOD = true;
|
||||
// static final METADATA_BAD = false;
|
||||
// static final METADATA_COMPATIBLE = 2; // for old but backwards compatible.
|
||||
// /**
|
||||
// * Max length of error logged by logErrorForExternalProcess()
|
||||
// */
|
||||
// static final MAX_ERR_LOG_SIZE = 65535;
|
||||
//
|
||||
// /**
|
||||
// * Get a MediaHandler for a given MIME type from the instance cache
|
||||
// *
|
||||
// * @param String $type
|
||||
// * @return MediaHandler|boolean
|
||||
// */
|
||||
// static function getHandler($type) {
|
||||
// return MediaWikiServices::getInstance()
|
||||
// ->getMediaHandlerFactory()->getHandler($type);
|
||||
// }
|
||||
|
||||
/**
|
||||
* Get an associative array mapping magic word IDs to parameter names.
|
||||
* Will be used by the parser to identify parameters.
|
||||
*/
|
||||
public abstract Xomw_param_map getParamMap();
|
||||
|
||||
/**
|
||||
* Validate a thumbnail parameter at parse time.
|
||||
* Return true to accept the parameter, and false to reject it.
|
||||
* If you return false, the parser will do something quiet and forgiving.
|
||||
*
|
||||
* @param String $name
|
||||
* @param mixed $value
|
||||
*/
|
||||
public abstract boolean validateParam(int name_uid, byte[] val_bry, int val_int);
|
||||
|
||||
/**
|
||||
* Merge a parameter array into a String appropriate for inclusion in filenames
|
||||
*
|
||||
* @param array paramsVar Array of parameters that have been through normaliseParams.
|
||||
* @return String
|
||||
*/
|
||||
public abstract byte[] makeParamString(Xomw_params_handler handlerParams);
|
||||
|
||||
// /**
|
||||
// * Parse a param String made with makeParamString back into an array
|
||||
// *
|
||||
// * @param String $str The parameter String without file name (e.g. 122px)
|
||||
// * @return array|boolean Array of parameters or false on failure.
|
||||
// */
|
||||
// abstract public function parseParamString($str);
|
||||
|
||||
/**
|
||||
* Changes the parameter array as necessary, ready for transformation.
|
||||
* Should be idempotent.
|
||||
* Returns false if the parameters are unacceptable and the transform should fail
|
||||
* @param File $image
|
||||
* @param array $paramsVar
|
||||
*/
|
||||
public abstract boolean normaliseParams(Xomw_File image, Xomw_params_handler handlerParams);
|
||||
|
||||
// /**
|
||||
// * Get an image size array like that returned by getimagesize(), or false if it
|
||||
// * can't be determined.
|
||||
// *
|
||||
// * This function is used for determining the width, height and bitdepth directly
|
||||
// * from an image. The results are stored in the database in the img_width,
|
||||
// * img_height, img_bits fields.
|
||||
// *
|
||||
// * @note If this is a multipage file, return the width and height of the
|
||||
// * first page.
|
||||
// *
|
||||
// * @param File|FSFile $image The image Object, or false if there isn't one.
|
||||
// * Warning, FSFile::getPropsFromPath might pass an FSFile instead of File (!)
|
||||
// * @param String $path The filename
|
||||
// * @return array|boolean Follow the format of PHP getimagesize() @gplx.Internal protected function.
|
||||
// * See https://secure.php.net/getimagesize. MediaWiki will only ever use the
|
||||
// * first two array keys (the width and height), and the 'bits' associative
|
||||
// * key. All other array keys are ignored. Returning a 'bits' key is optional
|
||||
// * as not all formats have a notion of "bitdepth". Returns false on failure.
|
||||
// */
|
||||
// abstract function getImageSize($image, $path);
|
||||
//
|
||||
// /**
|
||||
// * Get handler-specific metadata which will be saved in the img_metadata field.
|
||||
// *
|
||||
// * @param File|FSFile $image The image Object, or false if there isn't one.
|
||||
// * Warning, FSFile::getPropsFromPath might pass an FSFile instead of File (!)
|
||||
// * @param String $path The filename
|
||||
// * @return String A String of metadata in php serialized form (Run through serialize())
|
||||
// */
|
||||
// function getMetadata($image, $path) {
|
||||
// return '';
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get metadata version.
|
||||
// *
|
||||
// * This is not used for validating metadata, this is used for the api when returning
|
||||
// * metadata, since api content formats should stay the same over time, and so things
|
||||
// * using ForeignApiRepo can keep backwards compatibility
|
||||
// *
|
||||
// * All core media handlers share a common version number, and extensions can
|
||||
// * use the GetMetadataVersion hook to append to the array (they should append a unique
|
||||
// * String so not to get confusing). If there was a media handler named 'foo' with metadata
|
||||
// * version 3 it might add to the end of the array the element 'foo=3'. if the core metadata
|
||||
// * version is 2, the end version String would look like '2;foo=3'.
|
||||
// *
|
||||
// * @return String Version String
|
||||
// */
|
||||
// static function getMetadataVersion() {
|
||||
// $version = [ '2' ]; // core metadata version
|
||||
// Hooks::run('GetMetadataVersion', [ &$version ]);
|
||||
//
|
||||
// return implode(';', $version);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Convert metadata version.
|
||||
// *
|
||||
// * By default just returns $metadata, but can be used to allow
|
||||
// * media handlers to convert between metadata versions.
|
||||
// *
|
||||
// * @param String|array $metadata Metadata array (serialized if String)
|
||||
// * @param int $version Target version
|
||||
// * @return array Serialized metadata in specified version, or $metadata on fail.
|
||||
// */
|
||||
// function convertMetadataVersion($metadata, $version = 1) {
|
||||
// if (!is_array($metadata)) {
|
||||
//
|
||||
// // unserialize to keep return parameter consistent.
|
||||
// MediaWiki\suppressWarnings();
|
||||
// $ret = unserialize($metadata);
|
||||
// MediaWiki\restoreWarnings();
|
||||
//
|
||||
// return $ret;
|
||||
// }
|
||||
//
|
||||
// return $metadata;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a String describing the type of metadata, for display purposes.
|
||||
// *
|
||||
// * @note This method is currently unused.
|
||||
// * @param File $image
|
||||
// * @return String
|
||||
// */
|
||||
// function getMetadataType($image) {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Check if the metadata String is valid for this handler.
|
||||
// * If it returns MediaHandler::METADATA_BAD (or false), Image
|
||||
// * will reload the metadata from the file and update the database.
|
||||
// * MediaHandler::METADATA_GOOD for if the metadata is a-ok,
|
||||
// * MediaHandler::METADATA_COMPATIBLE if metadata is old but backwards
|
||||
// * compatible (which may or may not trigger a metadata reload).
|
||||
// *
|
||||
// * @note Returning self::METADATA_BAD will trigger a metadata reload from
|
||||
// * file on page view. Always returning this from a broken file, or suddenly
|
||||
// * triggering as bad metadata for a large number of files can cause
|
||||
// * performance problems.
|
||||
// * @param File $image
|
||||
// * @param String $metadata The metadata in serialized form
|
||||
// * @return boolean
|
||||
// */
|
||||
// function isMetadataValid($image, $metadata) {
|
||||
// return self::METADATA_GOOD;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get an array of standard (FormatMetadata type) metadata values.
|
||||
// *
|
||||
// * The returned data is largely the same as that from getMetadata(),
|
||||
// * but formatted in a standard, stable, handler-independent way.
|
||||
// * The idea being that some values like ImageDescription or Artist
|
||||
// * are universal and should be retrievable in a handler generic way.
|
||||
// *
|
||||
// * The specific properties are the type of properties that can be
|
||||
// * handled by the FormatMetadata class. These values are exposed to the
|
||||
// * user via the filemetadata parser function.
|
||||
// *
|
||||
// * Details of the response format of this function can be found at
|
||||
// * https://www.mediawiki.org/wiki/Manual:File_metadata_handling
|
||||
// * tl/dr: the response is an associative array of
|
||||
// * properties keyed by name, but the value can be complex. You probably
|
||||
// * want to call one of the FormatMetadata::flatten* functions on the
|
||||
// * property values before using them, or call
|
||||
// * FormatMetadata::getFormattedData() on the full response array, which
|
||||
// * transforms all values into prettified, human-readable text.
|
||||
// *
|
||||
// * Subclasses overriding this function must return a value which is a
|
||||
// * valid API response fragment (all associative array keys are valid
|
||||
// * XML tagnames).
|
||||
// *
|
||||
// * Note, if the file simply has no metadata, but the handler supports
|
||||
// * this interface, it should return an empty array, not false.
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return array|boolean False if interface not supported
|
||||
// * @since 1.23
|
||||
// */
|
||||
// public function getCommonMetaArray(File $file) {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a MediaTransformOutput Object representing an alternate of the transformed
|
||||
// * output which will call an intermediary thumbnail assist script.
|
||||
// *
|
||||
// * Used when the repository has a thumbnailScriptUrl option configured.
|
||||
// *
|
||||
// * Return false to fall back to the regular getTransform().
|
||||
// * @param File $image
|
||||
// * @param String $script
|
||||
// * @param array $paramsVar
|
||||
// * @return boolean|ThumbnailImage
|
||||
// */
|
||||
// function getScriptedTransform($image, $script, $paramsVar) {
|
||||
// return false;
|
||||
// }
|
||||
|
||||
/**
|
||||
* Get a MediaTransformOutput Object representing the transformed output. Does not
|
||||
* actually do the transform.
|
||||
*
|
||||
* @param File $image The image Object
|
||||
* @param String $dstPath Filesystem destination path
|
||||
* @param String $dstUrl Destination URL to use in output HTML
|
||||
* @param array $paramsVar Arbitrary set of parameters validated by $this->validateParam()
|
||||
* @return MediaTransformOutput
|
||||
*/
|
||||
public Xomw_MediaTransformOutput getTransform(Xomw_File image, byte[] dstPath, byte[] dstUrl, Xomw_params_handler handlerParams) {
|
||||
return this.doTransform(image, dstPath, dstUrl, handlerParams, TRANSFORM_LATER);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a MediaTransformOutput Object representing the transformed output. Does the
|
||||
* transform unless $flags contains self::TRANSFORM_LATER.
|
||||
*
|
||||
* @param File $image The image Object
|
||||
* @param String $dstPath Filesystem destination path
|
||||
* @param String $dstUrl Destination URL to use in output HTML
|
||||
* @param array $paramsVar Arbitrary set of parameters validated by $this->validateParam()
|
||||
* Note: These parameters have *not* gone through $this->normaliseParams()
|
||||
* @param int $flags A bitfield, may contain self::TRANSFORM_LATER
|
||||
* @return MediaTransformOutput
|
||||
*/
|
||||
public Xomw_MediaTransformOutput doTransform(Xomw_File image, byte[] dstPath, byte[] dstUrl, Xomw_params_handler handlerParams) {return doTransform(image, dstPath, dstUrl, handlerParams, 0);}
|
||||
public abstract Xomw_MediaTransformOutput doTransform(Xomw_File image, byte[] dstPath, byte[] dstUrl, Xomw_params_handler handlerParams, int flags);
|
||||
|
||||
// /**
|
||||
// * Get the thumbnail extension and MIME type for a given source MIME type
|
||||
// *
|
||||
// * @param String $ext Extension of original file
|
||||
// * @param String $mime MIME type of original file
|
||||
// * @param array $paramsVar Handler specific rendering parameters
|
||||
// * @return array Thumbnail extension and MIME type
|
||||
// */
|
||||
// function getThumbType($ext, $mime, $paramsVar = null) {
|
||||
// $magic = MimeMagic::singleton();
|
||||
// if (!$ext || $magic->isMatchingExtension($ext, $mime) === false) {
|
||||
// // The extension is not valid for this MIME type and we do
|
||||
// // recognize the MIME type
|
||||
// $extensions = $magic->getExtensionsForType($mime);
|
||||
// if ($extensions) {
|
||||
// return [ strtok($extensions, ' '), $mime ];
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // The extension is correct (true) or the MIME type is unknown to
|
||||
// // MediaWiki (null)
|
||||
// return [ $ext, $mime ];
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get useful response headers for GET/HEAD requests for a file with the given metadata
|
||||
// *
|
||||
// * @param mixed $metadata Result of the getMetadata() function of this handler for a file
|
||||
// * @return array
|
||||
// */
|
||||
// public function getStreamHeaders($metadata) {
|
||||
// return [];
|
||||
// }
|
||||
|
||||
/**
|
||||
* True if the handled types can be transformed
|
||||
*
|
||||
* @param File $file
|
||||
* @return boolean
|
||||
*/
|
||||
@gplx.Virtual public boolean canRender(Xomw_File file) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* True if handled types cannot be displayed directly in a browser
|
||||
* but can be rendered
|
||||
*
|
||||
* @param File $file
|
||||
* @return boolean
|
||||
*/
|
||||
public boolean mustRender(Xomw_File file) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// /**
|
||||
// * True if the type has multi-page capabilities
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return boolean
|
||||
// */
|
||||
// public function isMultiPage($file) {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Page count for a multi-page document, false if unsupported or unknown
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return boolean
|
||||
// */
|
||||
// function pageCount(File $file) {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * The material is vectorized and thus scaling is lossless
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return boolean
|
||||
// */
|
||||
// function isVectorized($file) {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * The material is an image, and is animated.
|
||||
// * In particular, video material need not return true.
|
||||
// * @note Before 1.20, this was a method of ImageHandler only
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return boolean
|
||||
// */
|
||||
// function isAnimatedImage($file) {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * If the material is animated, we can animate the thumbnail
|
||||
// * @since 1.20
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return boolean If material is not animated, handler may return any value.
|
||||
// */
|
||||
// function canAnimateThumbnail($file) {
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * False if the handler is disabled for all files
|
||||
// * @return boolean
|
||||
// */
|
||||
// function isEnabled() {
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get an associative array of page dimensions
|
||||
// * Currently "width" and "height" are understood, but this might be
|
||||
// * expanded in the future.
|
||||
// * Returns false if unknown.
|
||||
// *
|
||||
// * It is expected that handlers for paged media (e.g. DjVuHandler)
|
||||
// * will override this method so that it gives the correct results
|
||||
// * for each specific page of the file, using the $page argument.
|
||||
// *
|
||||
// * @note For non-paged media, use getImageSize.
|
||||
// *
|
||||
// * @param File $image
|
||||
// * @param int $page What page to get dimensions of
|
||||
// * @return array|boolean
|
||||
// */
|
||||
// function getPageDimensions(File $image, $page) {
|
||||
// $gis = $this->getImageSize($image, $image->getLocalRefPath());
|
||||
// if ($gis) {
|
||||
// return [
|
||||
// 'width' => $gis[0],
|
||||
// 'height' => $gis[1]
|
||||
// ];
|
||||
// } else {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Generic getter for text layer.
|
||||
// * Currently overloaded by PDF and DjVu handlers
|
||||
// * @param File $image
|
||||
// * @param int $page Page number to get information for
|
||||
// * @return boolean|String Page text or false when no text found or if
|
||||
// * unsupported.
|
||||
// */
|
||||
// function getPageText(File $image, $page) {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get the text of the entire document.
|
||||
// * @param File $file
|
||||
// * @return boolean|String The text of the document or false if unsupported.
|
||||
// */
|
||||
// public function getEntireText(File $file) {
|
||||
// $numPages = $file->pageCount();
|
||||
// if (!$numPages) {
|
||||
// // Not a multipage document
|
||||
// return $this->getPageText($file, 1);
|
||||
// }
|
||||
// $document = '';
|
||||
// for ($i = 1; $i <= $numPages; $i++) {
|
||||
// $curPage = $this->getPageText($file, $i);
|
||||
// if (is_string($curPage)) {
|
||||
// $document .= $curPage . "\n";
|
||||
// }
|
||||
// }
|
||||
// if ($document !== '') {
|
||||
// return $document;
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get an array structure that looks like this:
|
||||
// *
|
||||
// * [
|
||||
// * 'visible' => [
|
||||
// * 'Human-readable name' => 'Human readable value',
|
||||
// * ...
|
||||
// * ],
|
||||
// * 'collapsed' => [
|
||||
// * 'Human-readable name' => 'Human readable value',
|
||||
// * ...
|
||||
// * ]
|
||||
// * ]
|
||||
// * The UI will format this into a table where the visible fields are always
|
||||
// * visible, and the collapsed fields are optionally visible.
|
||||
// *
|
||||
// * The function should return false if there is no metadata to display.
|
||||
// */
|
||||
//
|
||||
// /**
|
||||
// * @todo FIXME: This interface is not very flexible. The media handler
|
||||
// * should generate HTML instead. It can do all the formatting according
|
||||
// * to some standard. That makes it possible to do things like visual
|
||||
// * indication of grouped and chained streams in ogg container files.
|
||||
// * @param File $image
|
||||
// * @param boolean|IContextSource $context Context to use (optional)
|
||||
// * @return array|boolean
|
||||
// */
|
||||
// function formatMetadata($image, $context = false) {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /** sorts the visible/invisible field.
|
||||
// * Split off from ImageHandler::formatMetadata, as used by more than
|
||||
// * one type of handler.
|
||||
// *
|
||||
// * This is used by the media handlers that use the FormatMetadata class
|
||||
// *
|
||||
// * @param array $metadataArray Metadata array
|
||||
// * @param boolean|IContextSource $context Context to use (optional)
|
||||
// * @return array Array for use displaying metadata.
|
||||
// */
|
||||
// function formatMetadataHelper($metadataArray, $context = false) {
|
||||
// $result = [
|
||||
// 'visible' => [],
|
||||
// 'collapsed' => []
|
||||
// ];
|
||||
//
|
||||
// $formatted = FormatMetadata::getFormattedData($metadataArray, $context);
|
||||
// // Sort fields into visible and collapsed
|
||||
// $visibleFields = $this->visibleMetadataFields();
|
||||
// foreach ($formatted as $name => $value) {
|
||||
// $tag = strtolower($name);
|
||||
// self::addMeta($result,
|
||||
// in_array($tag, $visibleFields) ? 'visible' : 'collapsed',
|
||||
// 'exif',
|
||||
// $tag,
|
||||
// $value
|
||||
// );
|
||||
// }
|
||||
//
|
||||
// return $result;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a list of metadata items which should be displayed when
|
||||
// * the metadata table is collapsed.
|
||||
// *
|
||||
// * @return array Array of strings
|
||||
// */
|
||||
// protected function visibleMetadataFields() {
|
||||
// return FormatMetadata::getVisibleFields();
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * This is used to generate an array element for each metadata value
|
||||
// * That array is then used to generate the table of metadata values
|
||||
// * on the image page
|
||||
// *
|
||||
// * @param array &$array An array containing elements for each type of visibility
|
||||
// * and each of those elements being an array of metadata items. This function adds
|
||||
// * a value to that array.
|
||||
// * @param String $visibility ('visible' or 'collapsed') if this value is hidden
|
||||
// * by default.
|
||||
// * @param String $type Type of metadata tag (currently always 'exif')
|
||||
// * @param String $id The name of the metadata tag (like 'artist' for example).
|
||||
// * its name in the table displayed is the message "$type-$id" (Ex exif-artist).
|
||||
// * @param String $value Thingy goes into a wikitext table; it used to be escaped but
|
||||
// * that was incompatible with previous practise of customized display
|
||||
// * with wikitext formatting via messages such as 'exif-model-value'.
|
||||
// * So the escaping is taken back out, but generally this seems a confusing
|
||||
// * interface.
|
||||
// * @param boolean|String $param Value to pass to the message for the name of the field
|
||||
// * as $1. Currently this parameter doesn't seem to ever be used.
|
||||
// *
|
||||
// * Note, everything here is passed through the parser later on (!)
|
||||
// */
|
||||
// protected static function addMeta(&$array, $visibility, $type, $id, $value, $param = false) {
|
||||
// $msg = wfMessage("$type-$id", $param);
|
||||
// if ($msg->exists()) {
|
||||
// $name = $msg->text();
|
||||
// } else {
|
||||
// // This is for future compatibility when using instant commons.
|
||||
// // So as to not display as ugly a name if a new metadata
|
||||
// // property is defined that we don't know about
|
||||
// // (not a major issue since such a property would be collapsed
|
||||
// // by default).
|
||||
// wfDebug(__METHOD__ . ' Unknown metadata name: ' . $id . "\n");
|
||||
// $name = wfEscapeWikiText($id);
|
||||
// }
|
||||
// $array[$visibility][] = [
|
||||
// 'id' => "$type-$id",
|
||||
// 'name' => $name,
|
||||
// 'value' => $value
|
||||
// ];
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Short description. Shown on Special:Search results.
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return String
|
||||
// */
|
||||
// function getShortDesc($file) {
|
||||
// return self::getGeneralShortDesc($file);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Long description. Shown under image on image description page surounded by ().
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return String
|
||||
// */
|
||||
// function getLongDesc($file) {
|
||||
// return self::getGeneralLongDesc($file);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Used instead of getShortDesc if there is no handler registered for file.
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return String
|
||||
// */
|
||||
// static function getGeneralShortDesc($file) {
|
||||
// global $wgLang;
|
||||
//
|
||||
// return htmlspecialchars($wgLang->formatSize($file->getSize()));
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Used instead of getLongDesc if there is no handler registered for file.
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return String
|
||||
// */
|
||||
// static function getGeneralLongDesc($file) {
|
||||
// return wfMessage('file-info')->sizeParams($file->getSize())
|
||||
// ->paramsVar('<span class="mime-type">' . $file->getMimeType() . '</span>')->parse();
|
||||
// }
|
||||
|
||||
/**
|
||||
* Calculate the largest thumbnail width for a given original file size
|
||||
* such that the thumbnail's height is at most $maxHeight.
|
||||
* @param int $boxWidth Width of the thumbnail box.
|
||||
* @param int $boxHeight Height of the thumbnail box.
|
||||
* @param int $maxHeight Maximum height expected for the thumbnail.
|
||||
* @return int
|
||||
*/
|
||||
public static int fitBoxWidth(int boxWidth, int boxHeight, int maxHeight) {
|
||||
double idealWidth = boxWidth * maxHeight / boxHeight;
|
||||
int roundedUp = Math_.Ceil_as_int(idealWidth);
|
||||
if (Math_.Round(roundedUp * boxHeight / boxWidth, 0) > maxHeight) {
|
||||
return Math_.Floor_as_int(idealWidth);
|
||||
} else {
|
||||
return roundedUp;
|
||||
}
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Shown in file history box on image description page.
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return String Dimensions
|
||||
// */
|
||||
// function getDimensionsString($file) {
|
||||
// return '';
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Modify the parser Object post-transform.
|
||||
// *
|
||||
// * This is often used to do $parser->addOutputHook(),
|
||||
// * in order to add some javascript to render a viewer.
|
||||
// * See TimedMediaHandler or OggHandler for an example.
|
||||
// *
|
||||
// * @param Parser $parser
|
||||
// * @param File $file
|
||||
// */
|
||||
// function parserTransformHook($parser, $file) {
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * File validation hook called on upload.
|
||||
// *
|
||||
// * If the file at the given local path is not valid, or its MIME type does not
|
||||
// * match the handler class, a Status Object should be returned containing
|
||||
// * relevant errors.
|
||||
// *
|
||||
// * @param String $fileName The local path to the file.
|
||||
// * @return Status
|
||||
// */
|
||||
// function verifyUpload($fileName) {
|
||||
// return Status::newGood();
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Check for zero-sized thumbnails. These can be generated when
|
||||
// * no disk space is available or some other error occurs
|
||||
// *
|
||||
// * @param String $dstPath The location of the suspect file
|
||||
// * @param int $retval Return value of some shell process, file will be deleted if this is non-zero
|
||||
// * @return boolean True if removed, false otherwise
|
||||
// */
|
||||
// function removeBadFile($dstPath, $retval = 0) {
|
||||
// if (file_exists($dstPath)) {
|
||||
// $thumbstat = stat($dstPath);
|
||||
// if ($thumbstat['size'] == 0 || $retval != 0) {
|
||||
// $result = unlink($dstPath);
|
||||
//
|
||||
// if ($result) {
|
||||
// wfDebugLog('thumbnail',
|
||||
// sprintf('Removing bad %d-byte thumbnail "%s". unlink() succeeded',
|
||||
// $thumbstat['size'], $dstPath));
|
||||
// } else {
|
||||
// wfDebugLog('thumbnail',
|
||||
// sprintf('Removing bad %d-byte thumbnail "%s". unlink() failed',
|
||||
// $thumbstat['size'], $dstPath));
|
||||
// }
|
||||
//
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Remove files from the purge list.
|
||||
// *
|
||||
// * This is used by some video handlers to prevent ?action=purge
|
||||
// * from removing a transcoded video, which is expensive to
|
||||
// * regenerate.
|
||||
// *
|
||||
// * @see LocalFile::purgeThumbnails
|
||||
// *
|
||||
// * @param array $files
|
||||
// * @param array $options Purge options. Currently will always be
|
||||
// * an array with a single key 'forThumbRefresh' set to true.
|
||||
// */
|
||||
// public function filterThumbnailPurgeList(&$files, $options) {
|
||||
// // Do nothing
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * True if the handler can rotate the media
|
||||
// * @since 1.24 non-static. From 1.21-1.23 was static
|
||||
// * @return boolean
|
||||
// */
|
||||
// public function canRotate() {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * On supporting image formats, try to read out the low-level orientation
|
||||
// * of the file and return the angle that the file needs to be rotated to
|
||||
// * be viewed.
|
||||
// *
|
||||
// * This information is only useful when manipulating the original file;
|
||||
// * the width and height we normally work with is logical, and will match
|
||||
// * any produced output views.
|
||||
// *
|
||||
// * For files we don't know, we return 0.
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return int 0, 90, 180 or 270
|
||||
// */
|
||||
// public function getRotation($file) {
|
||||
// return 0;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Log an error that occurred in an external process
|
||||
// *
|
||||
// * Moved from BitmapHandler to MediaHandler with MediaWiki 1.23
|
||||
// *
|
||||
// * @since 1.23
|
||||
// * @param int $retval
|
||||
// * @param String $err Error reported by command. Anything longer than
|
||||
// * MediaHandler::MAX_ERR_LOG_SIZE is stripped off.
|
||||
// * @param String $cmd
|
||||
// */
|
||||
// protected function logErrorForExternalProcess($retval, $err, $cmd) {
|
||||
// # Keep error output limited (bug 57985)
|
||||
// $errMessage = trim(substr($err, 0, self::MAX_ERR_LOG_SIZE));
|
||||
//
|
||||
// wfDebugLog('thumbnail',
|
||||
// sprintf('thumbnail failed on %s: error %d "%s" from "%s"',
|
||||
// wfHostname(), $retval, $errMessage, $cmd));
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get list of languages file can be viewed in.
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return String[] Array of language codes, or empty array if unsupported.
|
||||
// * @since 1.23
|
||||
// */
|
||||
// public function getAvailableLanguages(File $file) {
|
||||
// return [];
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * On file types that support renderings in multiple languages,
|
||||
// * which language is used by default if unspecified.
|
||||
// *
|
||||
// * If getAvailableLanguages returns a non-empty array, this must return
|
||||
// * a valid language code. Otherwise can return null if files of this
|
||||
// * type do not support alternative language renderings.
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return String|null Language code or null if multi-language not supported for filetype.
|
||||
// * @since 1.23
|
||||
// */
|
||||
// public function getDefaultRenderLanguage(File $file) {
|
||||
// return null;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * If its an audio file, return the length of the file. Otherwise 0.
|
||||
// *
|
||||
// * File::getLength() existed for a long time, but was calling a method
|
||||
// * that only existed in some subclasses of this class (The TMH ones).
|
||||
// *
|
||||
// * @param File $file
|
||||
// * @return float length in seconds
|
||||
// * @since 1.23
|
||||
// */
|
||||
// public function getLength($file) {
|
||||
// return 0.0;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * True if creating thumbnails from the file is large or otherwise resource-intensive.
|
||||
// * @param File $file
|
||||
// * @return boolean
|
||||
// */
|
||||
// public function isExpensiveToThumbnail($file) {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Returns whether or not this handler supports the chained generation of thumbnails according
|
||||
// * to buckets
|
||||
// * @return boolean
|
||||
// * @since 1.24
|
||||
// */
|
||||
// public function supportsBucketing() {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Returns a normalised paramsVar array for which parameters have been cleaned up for bucketing
|
||||
// * purposes
|
||||
// * @param array $paramsVar
|
||||
// * @return array
|
||||
// */
|
||||
// public function sanitizeParamsForBucketing($paramsVar) {
|
||||
// return $paramsVar;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Gets configuration for the file warning message. Return value of
|
||||
// * the following structure:
|
||||
// * [
|
||||
// * // Required, module with messages loaded for the client
|
||||
// * 'module' => 'example.filewarning.messages',
|
||||
// * // Required, array of names of messages
|
||||
// * 'messages' => [
|
||||
// * // Required, main warning message
|
||||
// * 'main' => 'example-filewarning-main',
|
||||
// * // Optional, header for warning dialog
|
||||
// * 'header' => 'example-filewarning-header',
|
||||
// * // Optional, footer for warning dialog
|
||||
// * 'footer' => 'example-filewarning-footer',
|
||||
// * // Optional, text for more-information link (see below)
|
||||
// * 'info' => 'example-filewarning-info',
|
||||
// * ],
|
||||
// * // Optional, link for more information
|
||||
// * 'link' => 'http://example.com',
|
||||
// * ]
|
||||
// *
|
||||
// * Returns null if no warning is necessary.
|
||||
// * @param File $file
|
||||
// * @return array|null
|
||||
// */
|
||||
// public function getWarningConfig($file) {
|
||||
// return null;
|
||||
// }
|
||||
}
|
||||
@@ -1,63 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
// XO.MW:MW has registry and instance cache; XO only has instance
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-05
|
||||
public class Xomw_MediaHandlerFactory {
|
||||
private final Hash_adp_bry handlers = Hash_adp_bry.cs();
|
||||
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-05
|
||||
public Xomw_MediaHandlerFactory() {
|
||||
// Default, MediaWiki core media handlers
|
||||
// 'image/jpeg' => JpegHandler::class,
|
||||
handlers.Add(Mime__image__png, new Xomw_TransformationalImageHandler(Mime__image__png)); // PngHandler
|
||||
// 'image/gif' => GIFHandler::class,
|
||||
// 'image/tiff' => TiffHandler::class,
|
||||
// 'image/webp' => WebPHandler::class,
|
||||
// 'image/x-ms-bmp' => BmpHandler::class,
|
||||
// 'image/x-bmp' => BmpHandler::class,
|
||||
// 'image/x-xcf' => XCFHandler::class,
|
||||
// 'image/svg+xml' => SvgHandler::class, // official
|
||||
// 'image/svg' => SvgHandler::class, // compat
|
||||
// 'image/vnd.djvu' => DjVuHandler::class, // official
|
||||
// 'image/x.djvu' => DjVuHandler::class, // compat
|
||||
// 'image/x-djvu' => DjVuHandler::class, // compat
|
||||
|
||||
}
|
||||
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-05
|
||||
public Xomw_MediaHandler getHandler(byte[] type) {
|
||||
return (Xomw_MediaHandler)handlers.Get_by(type);
|
||||
}
|
||||
|
||||
public static byte[]
|
||||
Mime__image__jpeg = Bry_.new_a7("image/jpeg")
|
||||
, Mime__image__png = Bry_.new_a7("image/png")
|
||||
, Mime__image__gif = Bry_.new_a7("image/gif")
|
||||
, Mime__image__tiff = Bry_.new_a7("image/tiff")
|
||||
, Mime__image__webp = Bry_.new_a7("image/webp")
|
||||
, Mime__image__x_ms_bmp = Bry_.new_a7("image/x-ms-bmp")
|
||||
, Mime__image__x_bmp = Bry_.new_a7("image/x-bmp")
|
||||
, Mime__image__x_xcf = Bry_.new_a7("image/x-xcf")
|
||||
, Mime__image__svg_xml = Bry_.new_a7("image/svg+xml")
|
||||
, Mime__image__svg = Bry_.new_a7("image/svg")
|
||||
, Mime__image__vnd_djvu = Bry_.new_a7("image/vnd.djvu")
|
||||
, Mime__image__x_djvu_dot = Bry_.new_a7("image/x.djvu")
|
||||
, Mime__image__x_djvu_dash = Bry_.new_a7("image/x-djvu")
|
||||
;
|
||||
}
|
||||
@@ -1,281 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
import gplx.xowa.mws.parsers.lnkis.*;
|
||||
import gplx.xowa.mws.filerepo.file.*;
|
||||
public abstract class Xomw_MediaTransformOutput {
|
||||
public Xomw_MediaTransformOutput(Xomw_File file, byte[] url, byte[] path, int width, int height) {
|
||||
this.file = file;
|
||||
this.url = url;
|
||||
this.width = width;
|
||||
this.height = height;
|
||||
}
|
||||
// /** @var array Associative array mapping optional supplementary image files
|
||||
// * from pixel density (eg 1.5 or 2) to additional URLs.
|
||||
// */
|
||||
// public $responsiveUrls = [];
|
||||
|
||||
/** @var File */
|
||||
private final Xomw_File file;
|
||||
|
||||
/** @var int Image width */
|
||||
protected final int width;
|
||||
|
||||
/** @var int Image height */
|
||||
protected final int height;
|
||||
|
||||
/** @var String URL path to the thumb */
|
||||
protected final byte[] url;
|
||||
|
||||
// /** @var boolean|String */
|
||||
// protected $page;
|
||||
//
|
||||
// /** @var boolean|String Filesystem path to the thumb */
|
||||
// protected $path;
|
||||
//
|
||||
// /** @var boolean|String Language code, false if not set */
|
||||
// protected $lang;
|
||||
//
|
||||
// /** @var boolean|String Permanent storage path */
|
||||
// protected $storagePath = false;
|
||||
|
||||
|
||||
/**
|
||||
* @return int Width of the output box
|
||||
*/
|
||||
public int getWidth() {
|
||||
return this.width;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int Height of the output box
|
||||
*/
|
||||
public int getHeight() {
|
||||
return this.height;
|
||||
}
|
||||
|
||||
// /**
|
||||
// * @return File
|
||||
// */
|
||||
// public function getFile() {
|
||||
// return $this->file;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get the final extension of the thumbnail.
|
||||
// * Returns false for scripted transformations.
|
||||
// * @return String|boolean
|
||||
// */
|
||||
// public function getExtension() {
|
||||
// return $this->path ? FileBackend::extensionFromPath( $this->path ) : false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @return String|boolean The thumbnail URL
|
||||
// */
|
||||
// public function getUrl() {
|
||||
// return $this->url;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @return String|boolean The permanent thumbnail storage path
|
||||
// */
|
||||
// public function getStoragePath() {
|
||||
// return $this->storagePath;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @param String $storagePath The permanent storage path
|
||||
// * @return void
|
||||
// */
|
||||
// public function setStoragePath( $storagePath ) {
|
||||
// $this->storagePath = $storagePath;
|
||||
// if ( $this->path === false ) {
|
||||
// $this->path = $storagePath;
|
||||
// }
|
||||
// }
|
||||
|
||||
/**
|
||||
* Fetch HTML for this transform output
|
||||
*
|
||||
* @param array $options Associative array of options. Boolean options
|
||||
* should be indicated with a value of true for true, and false or
|
||||
* absent for false.
|
||||
*
|
||||
* alt Alternate text or caption
|
||||
* desc-link Boolean, show a description link
|
||||
* file-link Boolean, show a file download link
|
||||
* custom-url-link Custom URL to link to
|
||||
* custom-title-link Custom Title Object to link to
|
||||
* valign vertical-align property, if the output is an inline element
|
||||
* img-class Class applied to the "<img>" tag, if there is such a tag
|
||||
*
|
||||
* For images, desc-link and file-link are implemented as a click-through. For
|
||||
* sounds and videos, they may be displayed in other ways.
|
||||
*
|
||||
* @return String
|
||||
*/
|
||||
public abstract void toHtml(Bry_bfr bfr, Bry_bfr tmp, Xomw_params_mto options);
|
||||
|
||||
// /**
|
||||
// * This will be overridden to return true in error classes
|
||||
// * @return boolean
|
||||
// */
|
||||
// public function isError() {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Check if an output thumbnail file actually exists.
|
||||
// *
|
||||
// * This will return false if there was an error, the
|
||||
// * thumbnail is to be handled client-side only, or if
|
||||
// * transformation was deferred via TRANSFORM_LATER.
|
||||
// * This file may exist as a new file in /tmp, a file
|
||||
// * in permanent storage, or even refer to the original.
|
||||
// *
|
||||
// * @return boolean
|
||||
// */
|
||||
// public function hasFile() {
|
||||
// // If TRANSFORM_LATER, $this->path will be false.
|
||||
// // Note: a null path means "use the source file".
|
||||
// return ( !$this->isError() && ( $this->path || $this->path === null ) );
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Check if the output thumbnail is the same as the source.
|
||||
// * This can occur if the requested width was bigger than the source.
|
||||
// *
|
||||
// * @return boolean
|
||||
// */
|
||||
// public function fileIsSource() {
|
||||
// return ( !$this->isError() && $this->path === null );
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get the path of a file system copy of the thumbnail.
|
||||
// * Callers should never write to this path.
|
||||
// *
|
||||
// * @return String|boolean Returns false if there isn't one
|
||||
// */
|
||||
// public function getLocalCopyPath() {
|
||||
// if ( $this->isError() ) {
|
||||
// return false;
|
||||
// } elseif ( $this->path === null ) {
|
||||
// return $this->file->getLocalRefPath(); // assume thumb was not scaled
|
||||
// } elseif ( FileBackend::isStoragePath( $this->path ) ) {
|
||||
// $be = $this->file->getRepo()->getBackend();
|
||||
// // The temp file will be process cached by FileBackend
|
||||
// $fsFile = $be->getLocalReference( [ 'src' => $this->path ] );
|
||||
//
|
||||
// return $fsFile ? $fsFile->getPath() : false;
|
||||
// } else {
|
||||
// return $this->path; // may return false
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Stream the file if there were no errors
|
||||
// *
|
||||
// * @param array $headers Additional HTTP headers to send on success
|
||||
// * @return Status
|
||||
// * @since 1.27
|
||||
// */
|
||||
// public function streamFileWithStatus( $headers = [] ) {
|
||||
// if ( !$this->path ) {
|
||||
// return Status::newFatal( 'backend-fail-stream', '<no path>' );
|
||||
// } elseif ( FileBackend::isStoragePath( $this->path ) ) {
|
||||
// $be = $this->file->getRepo()->getBackend();
|
||||
// return $be->streamFile( [ 'src' => $this->path, 'headers' => $headers ] );
|
||||
// } else { // FS-file
|
||||
// $success = StreamFile::stream( $this->getLocalCopyPath(), $headers );
|
||||
// return $success ? Status::newGood() : Status::newFatal( 'backend-fail-stream', $this->path );
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Stream the file if there were no errors
|
||||
// *
|
||||
// * @deprecated since 1.26, use streamFileWithStatus
|
||||
// * @param array $headers Additional HTTP headers to send on success
|
||||
// * @return boolean Success
|
||||
// */
|
||||
// public function streamFile( $headers = [] ) {
|
||||
// $this->streamFileWithStatus( $headers )->isOK();
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Wrap some XHTML text in an anchor tag with the given attributes
|
||||
// *
|
||||
// * @param array $linkAttribs
|
||||
// * @param String $contents
|
||||
// * @return String
|
||||
// */
|
||||
// protected function linkWrap( $linkAttribs, $contents ) {
|
||||
// if ( $linkAttribs ) {
|
||||
// return Xml::tags( 'a', $linkAttribs, $contents );
|
||||
// } else {
|
||||
// return $contents;
|
||||
// }
|
||||
// }
|
||||
|
||||
/**
|
||||
* @param String $title
|
||||
* @param String|array $prms Query parameters to add
|
||||
* @return array
|
||||
*/
|
||||
public void getDescLinkAttribs(List_adp attribs, byte[] title, List_adp prms) {
|
||||
// if ( is_array( prms ) ) {
|
||||
// $query = prms;
|
||||
// } else {
|
||||
// $query = [];
|
||||
// }
|
||||
// if ( $this->page && $this->page !== 1 ) {
|
||||
// $query['page'] = $this->page;
|
||||
// }
|
||||
// if ( $this->lang ) {
|
||||
// $query['lang'] = $this->lang;
|
||||
// }
|
||||
//
|
||||
// if ( is_string( prms ) && prms !== '' ) {
|
||||
// $query = prms . '&' . wfArrayToCgi( $query );
|
||||
// }
|
||||
|
||||
attribs.Clear();
|
||||
// 'href' => $this->file->getTitle()->getLocalURL( $query ),
|
||||
attribs.Add_many(Gfh_atr_.Bry__href, this.file.getTitle());
|
||||
attribs.Add_many(Gfh_atr_.Bry__class, Bry__class__image);
|
||||
if (title != null) {
|
||||
attribs.Add_many(Gfh_atr_.Bry__title, title);
|
||||
}
|
||||
}
|
||||
|
||||
// Wrap some XHTML text in an anchor tag with the given attributes
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-03
|
||||
protected void Link_wrap(Bry_bfr bfr, List_adp link_attribs, byte[] contents) {
|
||||
if (link_attribs != null) {
|
||||
Xomw_xml.Tags(bfr, Gfh_tag_.Bry__a, link_attribs, contents);
|
||||
}
|
||||
else {
|
||||
bfr.Add(contents);
|
||||
}
|
||||
}
|
||||
private static final byte[] Bry__class__image = Bry_.new_a7("image");
|
||||
}
|
||||
@@ -1,214 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
import gplx.xowa.mws.parsers.lnkis.*;
|
||||
import gplx.xowa.mws.filerepo.file.*;
|
||||
// Media transform output for images
|
||||
public class Xomw_ThumbnailImage extends Xomw_MediaTransformOutput { private final List_adp attribs = List_adp_.New(), link_attribs = List_adp_.New();
|
||||
public Xomw_ThumbnailImage(Xomw_File file, byte[] url, byte[] path, int w, int h) {super(file, url, path, w, h);
|
||||
}
|
||||
/**
|
||||
* Get a thumbnail Object from a file and parameters.
|
||||
* If path is set to null, the output file is treated as a source copy.
|
||||
* If path is set to false, no output file will be created.
|
||||
* parameters should include, as a minimum, (file) 'width' and 'height'.
|
||||
* It may also include a 'page' parameter for multipage files.
|
||||
*
|
||||
* @param File file
|
||||
* @param String url URL path to the thumb
|
||||
* @param String|boolean path Filesystem path to the thumb
|
||||
* @param array parameters Associative array of parameters
|
||||
*/
|
||||
public Xomw_ThumbnailImage(Xomw_File file, byte[] url, byte[] path, Xomw_params_handler parameters) {super(file, url, path, parameters.width, parameters.height);
|
||||
// defaults = [
|
||||
// 'page' => false,
|
||||
// 'lang' => false
|
||||
// ];
|
||||
//
|
||||
// if (is_array(parameters)) {
|
||||
// actualParams = parameters + defaults;
|
||||
// } else {
|
||||
// // Using old format, should convert. Later a warning could be added here.
|
||||
// numArgs = func_num_args();
|
||||
// actualParams = [
|
||||
// 'width' => path,
|
||||
// 'height' => parameters,
|
||||
// 'page' => (numArgs > 5) ? func_get_arg(5) : false
|
||||
// ] + defaults;
|
||||
// path = (numArgs > 4) ? func_get_arg(4) : false;
|
||||
// }
|
||||
|
||||
// this->file = file;
|
||||
// this->url = url;
|
||||
// this->path = path;
|
||||
|
||||
// These should be integers when they get here.
|
||||
// If not, there's a bug somewhere. But let's at
|
||||
// least produce valid HTML code regardless.
|
||||
// this->width = round(actualParams['width']);
|
||||
// this->height = round(actualParams['height']);
|
||||
|
||||
// this->page = actualParams['page'];
|
||||
// this->lang = actualParams['lang'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Return HTML <img ... /> tag for the thumbnail, will include
|
||||
* width and height attributes and a blank alt text (as required).
|
||||
*
|
||||
* @param array options Associative array of options. Boolean options
|
||||
* should be indicated with a value of true for true, and false or
|
||||
* absent for false.
|
||||
*
|
||||
* alt HTML alt attribute
|
||||
* title HTML title attribute
|
||||
* desc-link Boolean, show a description link
|
||||
* file-link Boolean, show a file download link
|
||||
* valign vertical-align property, if the output is an inline element
|
||||
* img-class Class applied to the \<img\> tag, if there is such a tag
|
||||
* desc-query String, description link query prms
|
||||
* @Override width Override width attribute. Should generally not set
|
||||
* @Override height Override height attribute. Should generally not set
|
||||
* no-dimensions Boolean, skip width and height attributes (useful if
|
||||
* set in CSS)
|
||||
* custom-url-link Custom URL to link to
|
||||
* custom-title-link Custom Title Object to link to
|
||||
* custom target-link Value of the target attribute, for custom-target-link
|
||||
* parser-extlink-* Attributes added by parser for external links:
|
||||
* parser-extlink-rel: add rel="nofollow"
|
||||
* parser-extlink-target: link target, but overridden by custom-target-link
|
||||
*
|
||||
* For images, desc-link and file-link are implemented as a click-through. For
|
||||
* sounds and videos, they may be displayed in other ways.
|
||||
*
|
||||
* @throws MWException
|
||||
* @return String
|
||||
*/
|
||||
// Return HTML <img ... /> tag for the thumbnail, will include
|
||||
// width and height attributes and a blank alt text (as required).
|
||||
//
|
||||
// @param array options Associative array of options. Boolean options
|
||||
// should be indicated with a value of true for true, and false or
|
||||
// absent for false.
|
||||
//
|
||||
// alt HTML alt attribute
|
||||
// title HTML title attribute
|
||||
// desc-link Boolean, show a description link
|
||||
// file-link Boolean, show a file download link
|
||||
// valign vertical-align property, if the output is an inline element
|
||||
// img-class Class applied to the \<img\> tag, if there is such a tag
|
||||
// desc-query String, description link query prms
|
||||
// override-width Override width attribute. Should generally not set
|
||||
// override-height Override height attribute. Should generally not set
|
||||
// no-dimensions Boolean, skip width and height attributes (useful if
|
||||
// set in CSS)
|
||||
// custom-url-link Custom URL to link to
|
||||
// custom-title-link Custom Title Object to link to
|
||||
// custom target-link Value of the target attribute, for custom-target-link
|
||||
// parser-extlink-* Attributes added by parser for external links:
|
||||
// parser-extlink-rel: add rel="nofollow"
|
||||
// parser-extlink-target: link target, but overridden by custom-target-link
|
||||
//
|
||||
// For images, desc-link and file-link are implemented as a click-through. For
|
||||
// sounds and videos, they may be displayed in other ways.
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-03
|
||||
@Override public void toHtml(Bry_bfr bfr, Bry_bfr tmp, Xomw_params_mto options) {
|
||||
byte[] alt = options.alt;
|
||||
|
||||
// byte[] query = options.desc_query;
|
||||
|
||||
attribs.Clear();
|
||||
attribs.Add_many(Gfh_atr_.Bry__alt, alt);
|
||||
attribs.Add_many(Gfh_atr_.Bry__src, url);
|
||||
boolean link_attribs_is_null = false;
|
||||
if (!Php_utl_.empty(options.custom_url_link)) {
|
||||
link_attribs.Clear();
|
||||
link_attribs.Add_many(Gfh_atr_.Bry__href, options.custom_url_link);
|
||||
if (!Php_utl_.empty(options.title)) {
|
||||
link_attribs.Add_many(Gfh_atr_.Bry__title, options.title);
|
||||
}
|
||||
if (Php_utl_.empty(options.custom_target_link)) {
|
||||
link_attribs.Add_many(Gfh_atr_.Bry__target, options.custom_target_link);
|
||||
}
|
||||
else if (Php_utl_.empty(options.parser_extlink_target)) {
|
||||
link_attribs.Add_many(Gfh_atr_.Bry__target, options.parser_extlink_target);
|
||||
}
|
||||
if (Php_utl_.empty(options.parser_extlink_rel)) {
|
||||
link_attribs.Add_many(Gfh_atr_.Bry__rel, options.parser_extlink_rel);
|
||||
}
|
||||
}
|
||||
else if (!Php_utl_.empty(options.custom_title_link)) {
|
||||
// byte[] title = options.custom_title_link;
|
||||
// link_attribs.Clear();
|
||||
// link_attribs.Add_many(Gfh_atr_.Bry__href, title.Get_link_url());
|
||||
// byte[] options_title = options.title;
|
||||
// link_attribs.Add_many(Gfh_atr_.Bry__title, Php_utl_.empty(options_title) ? title.Get_full_text() : options_title);
|
||||
}
|
||||
else if (!Php_utl_.empty(options.desc_link)) {
|
||||
// link_attribs = this.getDescLinkAttribs(
|
||||
// empty(options['title']) ? null : options['title'],
|
||||
// $query
|
||||
// );
|
||||
link_attribs.Clear();
|
||||
this.getDescLinkAttribs(link_attribs,
|
||||
Php_utl_.empty(options.title) ? null : options.title,
|
||||
null);
|
||||
}
|
||||
else if (!Php_utl_.empty(options.file_link)) {
|
||||
// link_attribs.Clear();
|
||||
// link_attribs.Add_many(Gfh_atr_.Bry__href, file.Get_url());
|
||||
}
|
||||
else {
|
||||
link_attribs_is_null = true;
|
||||
if (!Php_utl_.empty(options.title)) {
|
||||
attribs.Add_many(Gfh_atr_.Bry__title, options.title);
|
||||
}
|
||||
}
|
||||
|
||||
if (!Php_utl_.empty(options.no_dimensions)) {
|
||||
attribs.Add_many(Gfh_atr_.Bry__width, Int_.To_bry(width));
|
||||
attribs.Add_many(Gfh_atr_.Bry__height, Int_.To_bry(height));
|
||||
}
|
||||
if (!Php_utl_.empty(options.valign)) {
|
||||
attribs.Add_many(Gfh_atr_.Bry__style, Bry_.Add(Bry__vertical_align, options.valign));
|
||||
}
|
||||
if (!Php_utl_.empty(options.img_cls)) {
|
||||
attribs.Add_many(Gfh_atr_.Bry__class, options.img_cls);
|
||||
}
|
||||
if (Php_utl_.isset(options.override_height)) {
|
||||
attribs.Add_many(Gfh_atr_.Bry__class, options.override_height);
|
||||
}
|
||||
if (Php_utl_.isset(options.override_width)) {
|
||||
attribs.Add_many(Gfh_atr_.Bry__width, options.override_height);
|
||||
}
|
||||
|
||||
// Additional densities for responsive images, if specified.
|
||||
// If any of these urls is the same as src url, it'll be excluded.
|
||||
// $responsiveUrls = array_diff(this.responsiveUrls, [ this.url ]);
|
||||
// if (!Php_utl_.empty($responsiveUrls)) {
|
||||
// $attribs['srcset'] = Html::srcSet($responsiveUrls);
|
||||
// }
|
||||
|
||||
// XO.MW.HOOK:ThumbnailBeforeProduceHTML
|
||||
Xomw_xml.Element(tmp, Gfh_tag_.Bry__img, attribs, Bry_.Empty, Bool_.Y);
|
||||
Link_wrap(bfr, link_attribs_is_null ? null : link_attribs, tmp.To_bry_and_clear());
|
||||
}
|
||||
private static final byte[] Bry__vertical_align = Bry_.new_a7("vertical-align: ");
|
||||
}
|
||||
@@ -1,611 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.xowa.mws.filerepo.file.*;
|
||||
import gplx.xowa.mws.parsers.lnkis.*;
|
||||
public class Xomw_TransformationalImageHandler extends Xomw_ImageHandler { public Xomw_TransformationalImageHandler(byte[] key) {super(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param File image
|
||||
* @param array paramsVar Transform parameters. Entries with the keys 'width'
|
||||
* and 'height' are the respective screen width and height, while the keys
|
||||
* 'physicalWidth' and 'physicalHeight' indicate the thumbnail dimensions.
|
||||
* @return boolean
|
||||
*/
|
||||
@Override public boolean normaliseParams(Xomw_File image, Xomw_params_handler prms) {
|
||||
if (!super.normaliseParams(image, prms)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Obtain the source, pre-rotation dimensions
|
||||
int srcWidth = image.getWidth(prms.page);
|
||||
int srcHeight = image.getHeight(prms.page);
|
||||
|
||||
// Don't make an image bigger than the source
|
||||
if (prms.physicalWidth >= srcWidth) {
|
||||
prms.physicalWidth = srcWidth;
|
||||
prms.physicalHeight = srcHeight;
|
||||
|
||||
// Skip scaling limit checks if no scaling is required
|
||||
// due to requested size being bigger than source.
|
||||
if (!image.mustRender()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Extracts the width/height if the image will be scaled before rotating
|
||||
// *
|
||||
// * This will match the physical size/aspect ratio of the original image
|
||||
// * prior to application of the rotation -- so for a portrait image that's
|
||||
// * stored as raw landscape with 90-degress rotation, the resulting size
|
||||
// * will be wider than it is tall.
|
||||
// *
|
||||
// * @param array paramsVar Parameters as returned by normaliseParams
|
||||
// * @param int rotation The rotation angle that will be applied
|
||||
// * @return array (width, height) array
|
||||
// */
|
||||
// public function extractPreRotationDimensions(paramsVar, rotation) {
|
||||
// if (rotation == 90 || rotation == 270) {
|
||||
// // We'll resize before rotation, so swap the dimensions again
|
||||
// width = paramsVar['physicalHeight'];
|
||||
// height = paramsVar['physicalWidth'];
|
||||
// } else {
|
||||
// width = paramsVar['physicalWidth'];
|
||||
// height = paramsVar['physicalHeight'];
|
||||
// }
|
||||
//
|
||||
// return [ width, height ];
|
||||
// }
|
||||
//
|
||||
/**
|
||||
* Create a thumbnail.
|
||||
*
|
||||
* This sets up various parameters, and then calls a helper method
|
||||
* based on this.getScalerType in order to scale the image.
|
||||
*
|
||||
* @param File image
|
||||
* @param String dstPath
|
||||
* @param String dstUrl
|
||||
* @param array paramsVar
|
||||
* @param int flags
|
||||
* @return MediaTransformError|ThumbnailImage|TransformParameterError
|
||||
*/
|
||||
@Override public Xomw_MediaTransformOutput doTransform(Xomw_File image, byte[] dstPath, byte[] dstUrl, Xomw_params_handler prms, int flags) {
|
||||
// if (!this.normaliseParams(image, paramsVar)) {
|
||||
// return new TransformParameterError(paramsVar);
|
||||
// }
|
||||
//
|
||||
// // Create a parameter array to pass to the scaler
|
||||
Xomw_params_scalar scalerParams = new Xomw_params_scalar();
|
||||
// // The size to which the image will be resized
|
||||
scalerParams.physicalWidth = prms.physicalWidth;
|
||||
scalerParams.physicalHeight = prms.physicalHeight;
|
||||
// 'physicalDimensions' => "{paramsVar['physicalWidth']}x{paramsVar['physicalHeight']}",
|
||||
// The size of the image on the page
|
||||
scalerParams.clientWidth = prms.width;
|
||||
scalerParams.clientHeight = prms.height;
|
||||
// Comment as will be added to the Exif of the thumbnail
|
||||
// 'comment' => isset(paramsVar['descriptionUrl'])
|
||||
// ? "File source: {paramsVar['descriptionUrl']}"
|
||||
// : '',
|
||||
// Properties of the original image
|
||||
scalerParams.srcWidth = image.getWidth();
|
||||
scalerParams.srcHeight = image.getHeight();
|
||||
scalerParams.mimeType = image.getMimeType();
|
||||
scalerParams.dstPath = dstPath;
|
||||
scalerParams.dstUrl = dstUrl;
|
||||
// 'interlace' => isset(paramsVar['interlace']) ? paramsVar['interlace'] : false,
|
||||
|
||||
// if (isset(paramsVar['quality']) && paramsVar['quality'] === 'low') {
|
||||
// scalerParams['quality'] = 30;
|
||||
// }
|
||||
|
||||
// For subclasses that might be paged.
|
||||
// if (image.isMultipage() && isset(paramsVar['page'])) {
|
||||
// scalerParams['page'] = intval(paramsVar['page']);
|
||||
// }
|
||||
|
||||
// Determine scaler type
|
||||
// scaler = this.getScalerType(dstPath);
|
||||
//
|
||||
// if (is_array(scaler)) {
|
||||
// scalerName = get_class(scaler[0]);
|
||||
// } else {
|
||||
// scalerName = scaler;
|
||||
// }
|
||||
//
|
||||
// wfDebug(__METHOD__ . ": creating {scalerParams['physicalDimensions']} " .
|
||||
// "thumbnail at dstPath using scaler scalerName\n");
|
||||
|
||||
if (!image.mustRender() &&
|
||||
scalerParams.physicalWidth == scalerParams.srcWidth
|
||||
&& scalerParams.physicalHeight == scalerParams.srcHeight
|
||||
// && !isset(scalerParams['quality'])
|
||||
) {
|
||||
|
||||
// normaliseParams (or the user) wants us to return the unscaled image
|
||||
// wfDebug(__METHOD__ . ": returning unscaled image\n");
|
||||
|
||||
return this.getClientScalingThumbnailImage(image, scalerParams);
|
||||
}
|
||||
|
||||
// if (scaler == 'client') {
|
||||
// // Client-side image scaling, use the source URL
|
||||
// // Using the destination URL in a TRANSFORM_LATER request would be incorrect
|
||||
// return this.getClientScalingThumbnailImage(image, scalerParams);
|
||||
// }
|
||||
//
|
||||
// if (image.isTransformedLocally() && !this.isImageAreaOkForThumbnaling(image, paramsVar)) {
|
||||
// global wgMaxImageArea;
|
||||
// return new TransformTooBigImageAreaError(paramsVar, wgMaxImageArea);
|
||||
// }
|
||||
//
|
||||
// if (flags & self::TRANSFORM_LATER) {
|
||||
// wfDebug(__METHOD__ . ": Transforming later per flags.\n");
|
||||
// newParams = [
|
||||
// 'width' => scalerParams['clientWidth'],
|
||||
// 'height' => scalerParams['clientHeight']
|
||||
// ];
|
||||
// if (isset(paramsVar['quality'])) {
|
||||
// newParams['quality'] = paramsVar['quality'];
|
||||
// }
|
||||
// if (isset(paramsVar['page']) && paramsVar['page']) {
|
||||
// newParams['page'] = paramsVar['page'];
|
||||
// }
|
||||
// return new Xomw_ThumbnailImage(image, dstUrl, null, newParams);
|
||||
return new Xomw_ThumbnailImage(image, dstUrl, null, prms);
|
||||
// }
|
||||
//
|
||||
// // Try to make a target path for the thumbnail
|
||||
// if (!wfMkdirParents(dirname(dstPath), null, __METHOD__)) {
|
||||
// wfDebug(__METHOD__ . ": Unable to create thumbnail destination " .
|
||||
// "directory, falling back to client scaling\n");
|
||||
//
|
||||
// return this.getClientScalingThumbnailImage(image, scalerParams);
|
||||
// }
|
||||
//
|
||||
// // Transform functions and binaries need a FS source file
|
||||
// thumbnailSource = this.getThumbnailSource(image, paramsVar);
|
||||
//
|
||||
// // If the source isn't the original, disable EXIF rotation because it's already been applied
|
||||
// if (scalerParams['srcWidth'] != thumbnailSource['width']
|
||||
// || scalerParams['srcHeight'] != thumbnailSource['height']) {
|
||||
// scalerParams['disableRotation'] = true;
|
||||
// }
|
||||
//
|
||||
// scalerParams['srcPath'] = thumbnailSource['path'];
|
||||
// scalerParams['srcWidth'] = thumbnailSource['width'];
|
||||
// scalerParams['srcHeight'] = thumbnailSource['height'];
|
||||
//
|
||||
// if (scalerParams['srcPath'] === false) { // Failed to get local copy
|
||||
// wfDebugLog('thumbnail',
|
||||
// sprintf('Thumbnail failed on %s: could not get local copy of "%s"',
|
||||
// wfHostname(), image.getName()));
|
||||
//
|
||||
// return new MediaTransformError('thumbnail_error',
|
||||
// scalerParams['clientWidth'], scalerParams['clientHeight'],
|
||||
// wfMessage('filemissing')
|
||||
// );
|
||||
// }
|
||||
//
|
||||
// // Try a hook. Called "Bitmap" for historical reasons.
|
||||
// /** @var mto MediaTransformOutput */
|
||||
// mto = null;
|
||||
// Hooks::run('BitmapHandlerTransform', [ this, image, &scalerParams, &mto ]);
|
||||
// if (!is_null(mto)) {
|
||||
// wfDebug(__METHOD__ . ": Hook to BitmapHandlerTransform created an mto\n");
|
||||
// scaler = 'hookaborted';
|
||||
// }
|
||||
//
|
||||
// // scaler will return a MediaTransformError on failure, or false on success.
|
||||
// // If the scaler is succesful, it will have created a thumbnail at the destination
|
||||
// // path.
|
||||
// if (is_array(scaler) && is_callable(scaler)) {
|
||||
// // Allow subclasses to specify their own rendering methods.
|
||||
// err = call_user_func(scaler, image, scalerParams);
|
||||
// } else {
|
||||
// switch (scaler) {
|
||||
// case 'hookaborted':
|
||||
// // Handled by the hook above
|
||||
// err = mto.isError() ? mto : false;
|
||||
// break;
|
||||
// case 'im':
|
||||
// err = this.transformImageMagick(image, scalerParams);
|
||||
// break;
|
||||
// case 'custom':
|
||||
// err = this.transformCustom(image, scalerParams);
|
||||
// break;
|
||||
// case 'imext':
|
||||
// err = this.transformImageMagickExt(image, scalerParams);
|
||||
// break;
|
||||
// case 'gd':
|
||||
// default:
|
||||
// err = this.transformGd(image, scalerParams);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // Remove the file if a zero-byte thumbnail was created, or if there was an error
|
||||
// removed = this.removeBadFile(dstPath, (boolean)err);
|
||||
// if (err) {
|
||||
// // transform returned MediaTransforError
|
||||
// return err;
|
||||
// } elseif (removed) {
|
||||
// // Thumbnail was zero-byte and had to be removed
|
||||
// return new MediaTransformError('thumbnail_error',
|
||||
// scalerParams['clientWidth'], scalerParams['clientHeight'],
|
||||
// wfMessage('unknown-error')
|
||||
// );
|
||||
// } elseif (mto) {
|
||||
// return mto;
|
||||
// } else {
|
||||
// newParams = [
|
||||
// 'width' => scalerParams['clientWidth'],
|
||||
// 'height' => scalerParams['clientHeight']
|
||||
// ];
|
||||
// if (isset(paramsVar['quality'])) {
|
||||
// newParams['quality'] = paramsVar['quality'];
|
||||
// }
|
||||
// if (isset(paramsVar['page']) && paramsVar['page']) {
|
||||
// newParams['page'] = paramsVar['page'];
|
||||
// }
|
||||
// return new ThumbnailImage(image, dstUrl, dstPath, newParams);
|
||||
// }
|
||||
// return null;
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Get the source file for the transform
|
||||
// *
|
||||
// * @param File file
|
||||
// * @param array paramsVar
|
||||
// * @return array Array with keys width, height and path.
|
||||
// */
|
||||
// protected function getThumbnailSource(file, paramsVar) {
|
||||
// return file.getThumbnailSource(paramsVar);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Returns what sort of scaler type should be used.
|
||||
// *
|
||||
// * Values can be one of client, im, custom, gd, imext, or an array
|
||||
// * of Object, method-name to call that specific method.
|
||||
// *
|
||||
// * If specifying a custom scaler command with [ Obj, method ],
|
||||
// * the method in question should take 2 parameters, a File Object,
|
||||
// * and a scalerParams array with various options (See doTransform
|
||||
// * for what is in scalerParams). On error it should return a
|
||||
// * MediaTransformError Object. On success it should return false,
|
||||
// * and simply make sure the thumbnail file is located at
|
||||
// * scalerParams['dstPath'].
|
||||
// *
|
||||
// * If there is a problem with the output path, it returns "client"
|
||||
// * to do client side scaling.
|
||||
// *
|
||||
// * @param String dstPath
|
||||
// * @param boolean checkDstPath Check that dstPath is valid
|
||||
// * @return String|Callable One of client, im, custom, gd, imext, or a Callable array.
|
||||
// */
|
||||
// abstract protected function getScalerType(dstPath, checkDstPath = true);
|
||||
|
||||
/**
|
||||
* Get a ThumbnailImage that respresents an image that will be scaled
|
||||
* client side
|
||||
*
|
||||
* @param File image File associated with this thumbnail
|
||||
* @param array scalerParams Array with scaler paramsVar
|
||||
* @return ThumbnailImage
|
||||
*
|
||||
* @todo FIXME: No rotation support
|
||||
*/
|
||||
private Xomw_ThumbnailImage getClientScalingThumbnailImage(Xomw_File image, Xomw_params_scalar scalerParams) {
|
||||
Xomw_params_handler prms = new Xomw_params_handler();
|
||||
prms.width = scalerParams.clientWidth;
|
||||
prms.height = scalerParams.clientHeight;
|
||||
|
||||
return new Xomw_ThumbnailImage(image, image.getUrl(), null, prms);
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Transform an image using ImageMagick
|
||||
// *
|
||||
// * This is a stub method. The real method is in BitmapHander.
|
||||
// *
|
||||
// * @param File image File associated with this thumbnail
|
||||
// * @param array paramsVar Array with scaler paramsVar
|
||||
// *
|
||||
// * @return MediaTransformError Error Object if error occurred, false (=no error) otherwise
|
||||
// */
|
||||
// protected function transformImageMagick(image, paramsVar) {
|
||||
// return this.getMediaTransformError(paramsVar, "Unimplemented");
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Transform an image using the Imagick PHP extension
|
||||
// *
|
||||
// * This is a stub method. The real method is in BitmapHander.
|
||||
// *
|
||||
// * @param File image File associated with this thumbnail
|
||||
// * @param array paramsVar Array with scaler paramsVar
|
||||
// *
|
||||
// * @return MediaTransformError Error Object if error occurred, false (=no error) otherwise
|
||||
// */
|
||||
// protected function transformImageMagickExt(image, paramsVar) {
|
||||
// return this.getMediaTransformError(paramsVar, "Unimplemented");
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Transform an image using a custom command
|
||||
// *
|
||||
// * This is a stub method. The real method is in BitmapHander.
|
||||
// *
|
||||
// * @param File image File associated with this thumbnail
|
||||
// * @param array paramsVar Array with scaler paramsVar
|
||||
// *
|
||||
// * @return MediaTransformError Error Object if error occurred, false (=no error) otherwise
|
||||
// */
|
||||
// protected function transformCustom(image, paramsVar) {
|
||||
// return this.getMediaTransformError(paramsVar, "Unimplemented");
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a MediaTransformError with error 'thumbnail_error'
|
||||
// *
|
||||
// * @param array paramsVar Parameter array as passed to the transform* functions
|
||||
// * @param String errMsg Error message
|
||||
// * @return MediaTransformError
|
||||
// */
|
||||
// public function getMediaTransformError(paramsVar, errMsg) {
|
||||
// return new MediaTransformError('thumbnail_error', paramsVar['clientWidth'],
|
||||
// paramsVar['clientHeight'], errMsg);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Transform an image using the built in GD library
|
||||
// *
|
||||
// * This is a stub method. The real method is in BitmapHander.
|
||||
// *
|
||||
// * @param File image File associated with this thumbnail
|
||||
// * @param array paramsVar Array with scaler paramsVar
|
||||
// *
|
||||
// * @return MediaTransformError Error Object if error occurred, false (=no error) otherwise
|
||||
// */
|
||||
// protected function transformGd(image, paramsVar) {
|
||||
// return this.getMediaTransformError(paramsVar, "Unimplemented");
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Escape a String for ImageMagick's property input (e.g. -set -comment)
|
||||
// * See InterpretImageProperties() in magick/property.c
|
||||
// * @param String s
|
||||
// * @return String
|
||||
// */
|
||||
// function escapeMagickProperty(s) {
|
||||
// // Double the backslashes
|
||||
// s = str_replace('\\', '\\\\', s);
|
||||
// // Double the percents
|
||||
// s = str_replace('%', '%%', s);
|
||||
// // Escape initial - or @
|
||||
// if (strlen(s) > 0 && (s[0] === '-' || s[0] === '@')) {
|
||||
// s = '\\' . s;
|
||||
// }
|
||||
//
|
||||
// return s;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Escape a String for ImageMagick's input filenames. See ExpandFilenames()
|
||||
// * and GetPathComponent() in magick/utility.c.
|
||||
// *
|
||||
// * This won't work with an initial ~ or @, so input files should be prefixed
|
||||
// * with the directory name.
|
||||
// *
|
||||
// * Glob character unescaping is broken in ImageMagick before 6.6.1-5, but
|
||||
// * it's broken in a way that doesn't involve trying to convert every file
|
||||
// * in a directory, so we're better off escaping and waiting for the bugfix
|
||||
// * to filter down to users.
|
||||
// *
|
||||
// * @param String path The file path
|
||||
// * @param boolean|String scene The scene specification, or false if there is none
|
||||
// * @throws MWException
|
||||
// * @return String
|
||||
// */
|
||||
// function escapeMagickInput(path, scene = false) {
|
||||
// // Die on initial metacharacters (caller should prepend path)
|
||||
// firstChar = substr(path, 0, 1);
|
||||
// if (firstChar === '~' || firstChar === '@') {
|
||||
// throw new MWException(__METHOD__ . ': cannot escape this path name');
|
||||
// }
|
||||
//
|
||||
// // Escape glob chars
|
||||
// path = preg_replace('/[*?\[\]{}]/', '\\\\\0', path);
|
||||
//
|
||||
// return this.escapeMagickPath(path, scene);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Escape a String for ImageMagick's output filename. See
|
||||
// * InterpretImageFilename() in magick/image.c.
|
||||
// * @param String path The file path
|
||||
// * @param boolean|String scene The scene specification, or false if there is none
|
||||
// * @return String
|
||||
// */
|
||||
// function escapeMagickOutput(path, scene = false) {
|
||||
// path = str_replace('%', '%%', path);
|
||||
//
|
||||
// return this.escapeMagickPath(path, scene);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Armour a String against ImageMagick's GetPathComponent(). This is a
|
||||
// * helper function for escapeMagickInput() and escapeMagickOutput().
|
||||
// *
|
||||
// * @param String path The file path
|
||||
// * @param boolean|String scene The scene specification, or false if there is none
|
||||
// * @throws MWException
|
||||
// * @return String
|
||||
// */
|
||||
// protected function escapeMagickPath(path, scene = false) {
|
||||
// // Die on format specifiers (other than drive letters). The regex is
|
||||
// // meant to match all the formats you get from "convert -list format"
|
||||
// if (preg_match('/^([a-zA-Z0-9-]+):/', path, m)) {
|
||||
// if (wfIsWindows() && is_dir(m[0])) {
|
||||
// // OK, it's a drive letter
|
||||
// // ImageMagick has a similar exception, see IsMagickConflict()
|
||||
// } else {
|
||||
// throw new MWException(__METHOD__ . ': unexpected colon character in path name');
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // If there are square brackets, add a do-nothing scene specification
|
||||
// // to force a literal interpretation
|
||||
// if (scene === false) {
|
||||
// if (strpos(path, '[') !== false) {
|
||||
// path .= '[0--1]';
|
||||
// }
|
||||
// } else {
|
||||
// path .= "[scene]";
|
||||
// }
|
||||
//
|
||||
// return path;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Retrieve the version of the installed ImageMagick
|
||||
// * You can use PHPs version_compare() to use this value
|
||||
// * Value is cached for one hour.
|
||||
// * @return String|boolean Representing the IM version; false on error
|
||||
// */
|
||||
// protected function getMagickVersion() {
|
||||
// cache = MediaWikiServices::getInstance().getLocalServerObjectCache();
|
||||
// method = __METHOD__;
|
||||
// return cache.getWithSetCallback(
|
||||
// 'imagemagick-version',
|
||||
// cache::TTL_HOUR,
|
||||
// function () use (method) {
|
||||
// global wgImageMagickConvertCommand;
|
||||
//
|
||||
// cmd = wfEscapeShellArg(wgImageMagickConvertCommand) . ' -version';
|
||||
// wfDebug(method . ": Running convert -version\n");
|
||||
// retval = '';
|
||||
// return = wfShellExec(cmd, retval);
|
||||
// x = preg_match(
|
||||
// '/Version: ImageMagick ([0-9]*\.[0-9]*\.[0-9]*)/', return, matches
|
||||
// );
|
||||
// if (x != 1) {
|
||||
// wfDebug(method . ": ImageMagick version check failed\n");
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// return matches[1];
|
||||
// }
|
||||
// );
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Returns whether the current scaler supports rotation.
|
||||
// *
|
||||
// * @since 1.24 No longer static
|
||||
// * @return boolean
|
||||
// */
|
||||
// public function canRotate() {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Should we automatically rotate an image based on exif
|
||||
// *
|
||||
// * @since 1.24 No longer static
|
||||
// * @see wgEnableAutoRotation
|
||||
// * @return boolean Whether auto rotation is enabled
|
||||
// */
|
||||
// public function autoRotateEnabled() {
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Rotate a thumbnail.
|
||||
// *
|
||||
// * This is a stub. See BitmapHandler::rotate.
|
||||
// *
|
||||
// * @param File file
|
||||
// * @param array paramsVar Rotate parameters.
|
||||
// * 'rotation' clockwise rotation in degrees, allowed are multiples of 90
|
||||
// * @since 1.24 Is non-static. From 1.21 it was static
|
||||
// * @return boolean|MediaTransformError
|
||||
// */
|
||||
// public function rotate(file, paramsVar) {
|
||||
// return new MediaTransformError('thumbnail_error', 0, 0,
|
||||
// get_class(this) . ' rotation not implemented');
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Returns whether the file needs to be rendered. Returns true if the
|
||||
// * file requires rotation and we are able to rotate it.
|
||||
// *
|
||||
// * @param File file
|
||||
// * @return boolean
|
||||
// */
|
||||
// public function mustRender(file) {
|
||||
// return this.canRotate() && this.getRotation(file) != 0;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Check if the file is smaller than the maximum image area for thumbnailing.
|
||||
// *
|
||||
// * Runs the 'BitmapHandlerCheckImageArea' hook.
|
||||
// *
|
||||
// * @param File file
|
||||
// * @param array paramsVar
|
||||
// * @return boolean
|
||||
// * @since 1.25
|
||||
// */
|
||||
// public function isImageAreaOkForThumbnaling(file, ¶msVar) {
|
||||
// global wgMaxImageArea;
|
||||
//
|
||||
// // For historical reasons, hook starts with BitmapHandler
|
||||
// checkImageAreaHookResult = null;
|
||||
// Hooks::run(
|
||||
// 'BitmapHandlerCheckImageArea',
|
||||
// [ file, ¶msVar, &checkImageAreaHookResult ]
|
||||
// );
|
||||
//
|
||||
// if (!is_null(checkImageAreaHookResult)) {
|
||||
// // was set by hook, so return that value
|
||||
// return (boolean)checkImageAreaHookResult;
|
||||
// }
|
||||
//
|
||||
// srcWidth = file.getWidth(paramsVar['page']);
|
||||
// srcHeight = file.getHeight(paramsVar['page']);
|
||||
//
|
||||
// if (srcWidth * srcHeight > wgMaxImageArea
|
||||
// && !(file.getMimeType() == 'image/jpeg'
|
||||
// && this.getScalerType(false, false) == 'im')
|
||||
// ) {
|
||||
// // Only ImageMagick can efficiently downsize jpg images without loading
|
||||
// // the entire file in memory
|
||||
// return false;
|
||||
// }
|
||||
// return true;
|
||||
// }
|
||||
}
|
||||
@@ -1,584 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
public class Xomw_block_level_pass {
|
||||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private boolean in_pre, dt_open;
|
||||
private int last_section;
|
||||
private byte[] find_colon_no_links__before, find_colon_no_links__after;
|
||||
|
||||
public void Do_block_levels(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, boolean line_start) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
// XO.STATIC
|
||||
if (block_chars_ary == null) {
|
||||
synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||
block_chars_ary = Block_chars_ary__new();
|
||||
open_match_trie = Btrie_slim_mgr.ci_a7().Add_many_str
|
||||
("<table", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6", "<pre", "<tr", "<p", "<ul", "<ol", "<dl", "<li", "</tr", "</td", "</th");
|
||||
close_match_trie = Btrie_slim_mgr.ci_a7().Add_many_str
|
||||
( "</table", "</h1", "</h2", "</h3", "</h4", "</h5", "</h6", "<td", "<th", "<blockquote", "</blockquote", "<div", "</div", "<hr"
|
||||
, "</pre", "</p", "</mw:", Xomw_strip_state.Str__marker_bgn + "-pre", "</li", "</ul", "</ol", "</dl", "<center", "</center");
|
||||
blockquote_trie = Btrie_slim_mgr.ci_a7().Add_many_str("<blockquote", "</blockquote");
|
||||
pre_trie = Btrie_slim_mgr.ci_a7().Add_str_int("<pre", Pre__bgn).Add_str_int("</pre", Pre__end);
|
||||
}
|
||||
}
|
||||
|
||||
// Parsing through the text line by line. The main thing
|
||||
// happening here is handling of block-level elements p, pre,
|
||||
// and making lists from lines starting with * # : etc.
|
||||
byte[] last_prefix = Bry_.Empty;
|
||||
bfr.Clear();
|
||||
this.dt_open = false;
|
||||
boolean in_block_elem = false;
|
||||
int prefix_len = 0;
|
||||
byte para_stack = Para_stack__none;
|
||||
boolean in_blockquote = false;
|
||||
this.in_pre = false;
|
||||
this.last_section = Last_section__none;
|
||||
byte[] prefix2 = null;
|
||||
|
||||
// PORTED.SPLIT: $textLines = StringUtils::explode("\n", $text);
|
||||
int line_bgn = src_bgn;
|
||||
while (line_bgn < src_end) {
|
||||
int line_end = Bry_find_.Find_fwd(src, Byte_ascii.Nl, line_bgn);
|
||||
if (line_end == Bry_find_.Not_found)
|
||||
line_end = src_end;
|
||||
|
||||
// Fix up line_start
|
||||
if (!line_start) {
|
||||
bfr.Add_mid(src, line_bgn, line_end);
|
||||
line_start = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// * = ul
|
||||
// # = ol
|
||||
// ; = dt
|
||||
// : = dd
|
||||
int last_prefix_len = last_prefix.length;
|
||||
|
||||
// PORTED: pre_close_match = preg_match('/<\\/pre/i', $oLine); pre_open_match = preg_match('/<pre/i', $oLine);
|
||||
int pre_cur = line_bgn;
|
||||
boolean pre_close_match = false;
|
||||
boolean pre_open_match = false;
|
||||
|
||||
while (true) {
|
||||
if (pre_cur >= line_end)
|
||||
break;
|
||||
Object o = pre_trie.Match_at(trv, src, pre_cur, line_end);
|
||||
if (o == null)
|
||||
pre_cur++;
|
||||
else {
|
||||
int pre_tid = (int)o;
|
||||
if (pre_tid == Pre__bgn)
|
||||
pre_open_match = true;
|
||||
else if (pre_tid == Pre__end)
|
||||
pre_close_match = true;
|
||||
pre_cur = trv.Pos();
|
||||
}
|
||||
}
|
||||
|
||||
byte[] prefix = null, t = null;
|
||||
// If not in a <pre> element, scan for and figure out what prefixes are there.
|
||||
if (!in_pre) {
|
||||
// Multiple prefixes may abut each other for nested lists.
|
||||
prefix_len = Php_str_.Strspn_fwd__ary(src, block_chars_ary, line_bgn, line_end, line_end); // strspn($oLine, '*#:;');
|
||||
prefix = Php_str_.Substr(src, line_bgn, prefix_len);
|
||||
|
||||
// eh?
|
||||
// ; and : are both from definition-lists, so they're equivalent
|
||||
// for the purposes of determining whether or not we need to open/close
|
||||
// elements.
|
||||
// substr( $inputLine, $prefixLength );
|
||||
prefix2 = Bry_.Replace(prefix, Byte_ascii.Semic, Byte_ascii.Colon);
|
||||
t = Bry_.Mid(src, line_bgn + prefix_len, line_end);
|
||||
in_pre = pre_open_match;
|
||||
}
|
||||
else {
|
||||
// Don't interpret any other prefixes in preformatted text
|
||||
prefix_len = 0;
|
||||
prefix = prefix2 = Bry_.Empty;
|
||||
t = Bry_.Mid(src, line_bgn, line_end);
|
||||
}
|
||||
|
||||
// List generation
|
||||
byte[] term = null, t2 = null;
|
||||
int common_prefix_len = -1;
|
||||
if (prefix_len > 0 && Bry_.Eq(last_prefix, prefix2)) {
|
||||
// Same as the last item, so no need to deal with nesting or opening stuff
|
||||
bfr.Add(Next_item(Php_str_.Substr_byte(prefix, -1)));
|
||||
para_stack = Para_stack__none;
|
||||
|
||||
if (prefix_len > 0 && prefix[prefix_len - 1] == Byte_ascii.Semic) {
|
||||
// The one nasty exception: definition lists work like this:
|
||||
// ; title : definition text
|
||||
// So we check for : in the remainder text to split up the
|
||||
// title and definition, without b0rking links.
|
||||
term = t2 = Bry_.Empty;
|
||||
if (Find_colon_no_links(t, term, t2) != Bry_find_.Not_found) {
|
||||
term = find_colon_no_links__before;
|
||||
t2 = find_colon_no_links__after;
|
||||
t = t2;
|
||||
bfr.Add(term).Add(Next_item(Byte_ascii.Colon));
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (prefix_len > 0 || last_prefix_len > 0) {
|
||||
// We need to open or close prefixes, or both.
|
||||
|
||||
// Either open or close a level...
|
||||
common_prefix_len = Get_common(prefix, last_prefix);
|
||||
para_stack = Para_stack__none;
|
||||
|
||||
// Close all the prefixes which aren't shared.
|
||||
while (common_prefix_len < last_prefix_len) {
|
||||
bfr.Add(Close_list(last_prefix[last_prefix_len - 1]));
|
||||
last_prefix_len--;
|
||||
}
|
||||
|
||||
// Continue the current prefix if appropriate.
|
||||
if (prefix_len <= common_prefix_len && common_prefix_len > 0) {
|
||||
bfr.Add(Next_item(prefix[common_prefix_len - 1]));
|
||||
}
|
||||
|
||||
// Open prefixes where appropriate.
|
||||
if (Bry_.Len_gt_0(last_prefix) && prefix_len > common_prefix_len) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
while (prefix_len > common_prefix_len) {
|
||||
byte c = Php_str_.Substr_byte(prefix, common_prefix_len, 1);
|
||||
bfr.Add(Open_list(c));
|
||||
|
||||
if (c == Byte_ascii.Semic) {
|
||||
// @todo FIXME: This is dupe of code above
|
||||
if (Find_colon_no_links(t, term, t2) != Bry_find_.Not_found) {
|
||||
term = find_colon_no_links__before;
|
||||
t2 = find_colon_no_links__after;
|
||||
t = t2;
|
||||
bfr.Add(term).Add(Next_item(Byte_ascii.Colon));
|
||||
}
|
||||
}
|
||||
++common_prefix_len;
|
||||
}
|
||||
if (prefix_len == 0 && Bry_.Len_gt_0(last_prefix)) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
last_prefix = prefix2;
|
||||
}
|
||||
|
||||
// If we have no prefixes, go to paragraph mode.
|
||||
if (0 == prefix_len) {
|
||||
// No prefix (not in list)--go to paragraph mode
|
||||
// XXX: use a stack for nestable elements like span, table and div
|
||||
int t_len = t.length;
|
||||
boolean open_match = Php_preg_.Match(open_match_trie, trv, t, 0, t_len) != null;
|
||||
boolean close_match = Php_preg_.Match(close_match_trie, trv, t, 0, t_len) != null;
|
||||
|
||||
if (open_match || close_match) {
|
||||
para_stack = Para_stack__none;
|
||||
// @todo bug 5718: paragraph closed
|
||||
bfr.Add(Close_paragraph());
|
||||
if (pre_open_match && !pre_close_match) {
|
||||
in_pre = true;
|
||||
}
|
||||
int bq_offset = 0;
|
||||
// PORTED:preg_match('/<(\\/?)blockquote[\s>]/i', t, $bqMatch, PREG_OFFSET_CAPTURE, $bq_offset)
|
||||
while (true) {
|
||||
Object o = Php_preg_.Match(blockquote_trie, trv, t, bq_offset, t_len);
|
||||
if (o == null) { // no more blockquotes found; exit
|
||||
break;
|
||||
}
|
||||
else {
|
||||
byte[] bq_bry = (byte[])o;
|
||||
in_blockquote = bq_bry[1] != Byte_ascii.Slash; // is this a close tag?
|
||||
bq_offset = trv.Pos();
|
||||
}
|
||||
}
|
||||
in_block_elem = !close_match;
|
||||
}
|
||||
else if (!in_block_elem && !in_pre) {
|
||||
if ( Php_str_.Substr_byte(t, 0) == Byte_ascii.Space
|
||||
&& (last_section == Last_section__pre || Bry_.Trim(t) != Bry_.Empty)
|
||||
&& !in_blockquote
|
||||
) {
|
||||
// pre
|
||||
if (last_section != Last_section__pre) {
|
||||
para_stack = Para_stack__none;
|
||||
bfr.Add(Close_paragraph()).Add(Gfh_tag_.Pre_lhs);
|
||||
last_section = Last_section__pre;
|
||||
}
|
||||
t = Bry_.Mid(t, 1);
|
||||
}
|
||||
else {
|
||||
// paragraph
|
||||
if (Bry_.Trim(t) == Bry_.Empty) {
|
||||
if (para_stack != Para_stack__none) {
|
||||
Para_stack_bfr(bfr, para_stack);
|
||||
bfr.Add_str_a7("<br />");
|
||||
para_stack = Para_stack__none;
|
||||
last_section = Last_section__para;
|
||||
}
|
||||
else {
|
||||
if (last_section != Last_section__para) {
|
||||
bfr.Add(Close_paragraph());
|
||||
last_section = Last_section__none;
|
||||
para_stack = Para_stack__bgn;
|
||||
}
|
||||
else {
|
||||
para_stack = Para_stack__mid;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (para_stack != Para_stack__none) {
|
||||
Para_stack_bfr(bfr, para_stack);
|
||||
para_stack = Para_stack__none;
|
||||
last_section = Last_section__para;
|
||||
}
|
||||
else if (last_section != Last_section__para) {
|
||||
bfr.Add(Close_paragraph()).Add(Gfh_tag_.P_lhs);
|
||||
this.last_section = Last_section__para;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// somewhere above we forget to get out of pre block (bug 785)
|
||||
if (pre_close_match && in_pre) {
|
||||
in_pre = false;
|
||||
}
|
||||
if (para_stack == Para_stack__none) {
|
||||
bfr.Add(t);
|
||||
if (prefix_len == 0) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
}
|
||||
|
||||
line_bgn = line_end + 1;
|
||||
}
|
||||
|
||||
while (prefix_len > 0) {
|
||||
bfr.Add(Close_list(prefix2[prefix_len - 1]));
|
||||
prefix_len--;
|
||||
if (prefix_len > 0) {
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
}
|
||||
if (last_section != Last_section__none) {
|
||||
bfr.Add(last_section == Last_section__para ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
|
||||
last_section = Last_section__none;
|
||||
}
|
||||
}
|
||||
// If a pre or p is open, return the corresponding close tag and update
|
||||
// the state. If no tag is open, return an empty String.
|
||||
public byte[] Close_paragraph() {
|
||||
byte[] result = Bry_.Empty;
|
||||
if (last_section != Last_section__none) {
|
||||
tmp.Add(last_section == Last_section__para ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
|
||||
result = tmp.Add_byte_nl().To_bry_and_clear();
|
||||
}
|
||||
in_pre = false;
|
||||
last_section = Last_section__none;
|
||||
return result;
|
||||
}
|
||||
|
||||
// getCommon() returns the length of the longest common substring
|
||||
// of both arguments, starting at the beginning of both.
|
||||
private int Get_common(byte[] st1, byte[] st2) {
|
||||
int st1_len = st1.length, st2_len = st2.length;
|
||||
int shorter = st1_len < st2_len ? st1_len : st2_len;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < shorter; i++) {
|
||||
if (st1[i] != st2[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
// Open the list item element identified by the prefix character.
|
||||
private byte[] Open_list(byte c) {
|
||||
byte[] result = Close_paragraph();
|
||||
|
||||
if (c == Byte_ascii.Star)
|
||||
result = tmp.Add(result).Add_str_a7("<ul><li>").To_bry_and_clear();
|
||||
else if (c == Byte_ascii.Hash)
|
||||
result = tmp.Add(result).Add_str_a7("<ol><li>").To_bry_and_clear();
|
||||
else if (c == Byte_ascii.Hash)
|
||||
result = tmp.Add(result).Add_str_a7("<dl><dd>").To_bry_and_clear();
|
||||
else if (c == Byte_ascii.Semic) {
|
||||
result = tmp.Add(result).Add_str_a7("<dl><dt>").To_bry_and_clear();
|
||||
dt_open = true;
|
||||
}
|
||||
else
|
||||
result = tmp.Add_str_a7("<!-- ERR 1 -->").To_bry_and_clear();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Close the current list item and open the next one.
|
||||
private byte[] Next_item(byte c) {
|
||||
if (c == Byte_ascii.Star || c == Byte_ascii.Hash) {
|
||||
return tmp.Add_str_a7("</li>\n<li>").To_bry_and_clear();
|
||||
}
|
||||
else if (c == Byte_ascii.Colon || c == Byte_ascii.Semic) {
|
||||
byte[] close = tmp.Add_str_a7("</dd>\n").To_bry_and_clear();
|
||||
if (dt_open) {
|
||||
close = tmp.Add_str_a7("</dt>\n").To_bry_and_clear();
|
||||
}
|
||||
if (c == Byte_ascii.Semic) {
|
||||
dt_open = true;
|
||||
return tmp.Add(close).Add_str_a7("<dt>").To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
dt_open = false;
|
||||
return tmp.Add(close).Add_str_a7("<dd>").To_bry_and_clear();
|
||||
}
|
||||
}
|
||||
return tmp.Add_str_a7("<!-- ERR 2 -->").To_bry_and_clear();
|
||||
}
|
||||
|
||||
// Close the current list item identified by the prefix character.
|
||||
private byte[] Close_list(byte c) {
|
||||
byte[] text = null;
|
||||
if (c == Byte_ascii.Star) {
|
||||
text = Bry_.new_a7("</li></ul>");
|
||||
}
|
||||
else if (c == Byte_ascii.Hash) {
|
||||
text = Bry_.new_a7("</li></ol>");
|
||||
}
|
||||
else if (c == Byte_ascii.Colon) {
|
||||
if (dt_open) {
|
||||
dt_open = false;
|
||||
text = Bry_.new_a7("</dt></dl>");
|
||||
}
|
||||
else {
|
||||
text = Bry_.new_a7("</dd></dl>");
|
||||
}
|
||||
}
|
||||
else {
|
||||
return Bry_.new_a7("<!-- ERR 3 -->");
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
// Split up a String on ':', ignoring any occurrences inside tags
|
||||
// to prevent illegal overlapping.
|
||||
private int Find_colon_no_links(byte[] str, byte[] before, byte[] after) {
|
||||
int len = str.length;
|
||||
int colon_pos = Php_str_.Strpos(str, Byte_ascii.Colon, 0, len);
|
||||
if (colon_pos == Bry_find_.Not_found) {
|
||||
// Nothing to find!
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
|
||||
int lt_pos = Php_str_.Strpos(str, Byte_ascii.Angle_bgn, 0, len);
|
||||
if (lt_pos == Bry_find_.Not_found || lt_pos > colon_pos) {
|
||||
// Easy; no tag nesting to worry about
|
||||
find_colon_no_links__before = Php_str_.Substr(str, 0, colon_pos);
|
||||
find_colon_no_links__after = Php_str_.Substr(str, colon_pos + 1);
|
||||
return colon_pos;
|
||||
}
|
||||
|
||||
// Ugly state machine to walk through avoiding tags.
|
||||
int state = COLON_STATE_TEXT;
|
||||
int level = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
byte c = str[i];
|
||||
|
||||
switch (state) {
|
||||
case COLON_STATE_TEXT:
|
||||
switch (c) {
|
||||
case Byte_ascii.Angle_bgn:
|
||||
// Could be either a <start> tag or an </end> tag
|
||||
state = COLON_STATE_TAGSTART;
|
||||
break;
|
||||
case Byte_ascii.Colon:
|
||||
if (level == 0) {
|
||||
// We found it!
|
||||
find_colon_no_links__before = Php_str_.Substr(str, 0, i);
|
||||
find_colon_no_links__after = Php_str_.Substr(str, i + 1);
|
||||
return i;
|
||||
}
|
||||
// Embedded in a tag; don't break it.
|
||||
break;
|
||||
default:
|
||||
// Skip ahead looking for something interesting
|
||||
colon_pos = Php_str_.Strpos(str, Byte_ascii.Colon, i, len);
|
||||
if (colon_pos == Bry_find_.Not_found) {
|
||||
// Nothing else interesting
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
lt_pos = Php_str_.Strpos(str, Byte_ascii.Angle_bgn, i, len);
|
||||
if (level == 0) {
|
||||
if (lt_pos == Bry_find_.Not_found || colon_pos < lt_pos) {
|
||||
// We found it!
|
||||
find_colon_no_links__before = Php_str_.Substr(str, 0, colon_pos);
|
||||
find_colon_no_links__after = Php_str_.Substr(str, colon_pos + 1);
|
||||
return i;
|
||||
}
|
||||
}
|
||||
if (lt_pos == Bry_find_.Not_found) {
|
||||
// Nothing else interesting to find; abort!
|
||||
// We're nested, but there's no close tags left. Abort!
|
||||
i = len; // break 2
|
||||
break;
|
||||
}
|
||||
// Skip ahead to next tag start
|
||||
i = lt_pos;
|
||||
state = COLON_STATE_TAGSTART;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_TAG:
|
||||
// In a <tag>
|
||||
switch (c) {
|
||||
case Byte_ascii.Angle_bgn:
|
||||
level++;
|
||||
state = COLON_STATE_TEXT;
|
||||
break;
|
||||
case Byte_ascii.Slash:
|
||||
// Slash may be followed by >?
|
||||
state = COLON_STATE_TAGSLASH;
|
||||
break;
|
||||
default:
|
||||
// ignore
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_TAGSTART:
|
||||
switch (c) {
|
||||
case Byte_ascii.Slash:
|
||||
state = COLON_STATE_CLOSETAG;
|
||||
break;
|
||||
case Byte_ascii.Bang:
|
||||
state = COLON_STATE_COMMENT;
|
||||
break;
|
||||
case Byte_ascii.Angle_bgn:
|
||||
// Illegal early close? This shouldn't happen D:
|
||||
state = COLON_STATE_TEXT;
|
||||
break;
|
||||
default:
|
||||
state = COLON_STATE_TAG;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_CLOSETAG:
|
||||
// In a </tag>
|
||||
if (c == Byte_ascii.Angle_bgn) {
|
||||
level--;
|
||||
if (level < 0) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; too many close tags");
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
state = COLON_STATE_TEXT;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_TAGSLASH:
|
||||
if (c == Byte_ascii.Angle_bgn) {
|
||||
// Yes, a self-closed tag <blah/>
|
||||
state = COLON_STATE_TEXT;
|
||||
}
|
||||
else {
|
||||
// Probably we're jumping the gun, and this is an attribute
|
||||
state = COLON_STATE_TAG;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_COMMENT:
|
||||
if (c == Byte_ascii.Dash) {
|
||||
state = COLON_STATE_COMMENTDASH;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_COMMENTDASH:
|
||||
if (c == Byte_ascii.Dash) {
|
||||
state = COLON_STATE_COMMENTDASHDASH;
|
||||
}
|
||||
else {
|
||||
state = COLON_STATE_COMMENT;
|
||||
}
|
||||
break;
|
||||
case COLON_STATE_COMMENTDASHDASH:
|
||||
if (c == Byte_ascii.Angle_bgn) {
|
||||
state = COLON_STATE_TEXT;
|
||||
}
|
||||
else {
|
||||
state = COLON_STATE_COMMENT;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw Err_.new_wo_type("State machine error");
|
||||
}
|
||||
}
|
||||
if (level > 0) {
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; not enough close tags (level ~{0}, state ~{1})", level, state);
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
private static final int
|
||||
COLON_STATE_TEXT = 0
|
||||
, COLON_STATE_TAG = 1
|
||||
, COLON_STATE_TAGSTART = 2
|
||||
, COLON_STATE_CLOSETAG = 3
|
||||
, COLON_STATE_TAGSLASH = 4
|
||||
, COLON_STATE_COMMENT = 5
|
||||
, COLON_STATE_COMMENTDASH = 6
|
||||
, COLON_STATE_COMMENTDASHDASH = 7
|
||||
;
|
||||
private static final byte
|
||||
Last_section__none = 0 // ''
|
||||
, Last_section__para = 1 // p
|
||||
, Last_section__pre = 2 // pre
|
||||
;
|
||||
private static final byte
|
||||
Para_stack__none = 0 // false
|
||||
, Para_stack__bgn = 1 // <p>
|
||||
, Para_stack__mid = 2 // </p><p>
|
||||
;
|
||||
private static final int Pre__bgn = 0, Pre__end = 1;
|
||||
private static Btrie_slim_mgr pre_trie;
|
||||
private static boolean[] block_chars_ary;
|
||||
private static boolean[] Block_chars_ary__new() {
|
||||
boolean[] rv = new boolean[256];
|
||||
rv[Byte_ascii.Star] = true;
|
||||
rv[Byte_ascii.Hash] = true;
|
||||
rv[Byte_ascii.Colon] = true;
|
||||
rv[Byte_ascii.Semic] = true;
|
||||
return rv;
|
||||
}
|
||||
private static Btrie_slim_mgr open_match_trie, close_match_trie, blockquote_trie;
|
||||
private static void Para_stack_bfr(Bry_bfr bfr, int id) {
|
||||
switch (id) {
|
||||
case Para_stack__bgn: bfr.Add_str_a7("<p>"); break;
|
||||
case Para_stack__mid: bfr.Add_str_a7("</p><p>"); break;
|
||||
default: throw Err_.new_unhandled_default(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
import gplx.xowa.mws.linkers.*;
|
||||
public class Xomw_block_level_pass__tst {
|
||||
private final Xomw_block_level_pass__fxt fxt = new Xomw_block_level_pass__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__do_block_levels(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<p>a"
|
||||
, "</p>"
|
||||
));
|
||||
}
|
||||
}
|
||||
class Xomw_block_level_pass__fxt {
|
||||
private final Xomw_block_level_pass block_level_pass = new Xomw_block_level_pass();
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private boolean apos = true;
|
||||
public void Test__do_block_levels(String src, String expd) {
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
block_level_pass.Do_block_levels(pctx, pbfr.Init(Bry_.new_u8(src)), true);
|
||||
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear());
|
||||
}
|
||||
}
|
||||
@@ -1,251 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.langs.htmls.*;
|
||||
import gplx.xowa.mws.*;
|
||||
import gplx.xowa.mws.htmls.*;
|
||||
import gplx.xowa.mws.linkers.*;
|
||||
public class Xomw_link_holders {
|
||||
private final Xomw_link_renderer link_renderer;
|
||||
private final Bry_bfr tmp;
|
||||
private int link_id = 0; // MOVED:Parser.php
|
||||
private final Xomw_link_holder_list internals = new Xomw_link_holder_list();
|
||||
private final Xomw_atr_mgr extra_atrs = new Xomw_atr_mgr();
|
||||
private final Xomw_qry_mgr query = new Xomw_qry_mgr();
|
||||
public Xomw_link_holders(Xomw_link_renderer link_renderer, Bry_bfr tmp) {
|
||||
this.link_renderer = link_renderer;
|
||||
this.tmp = tmp;
|
||||
}
|
||||
public void Clear() {
|
||||
internals.Clear();
|
||||
|
||||
link_id = 0;
|
||||
}
|
||||
public void Make_holder(Bry_bfr bfr, Xoa_ttl nt, byte[] text, byte[][] query, byte[] trail, byte[] prefix) {
|
||||
if (nt == null) {
|
||||
// Fail gracefully
|
||||
bfr.Add_str_a7("<!-- ERROR -->").Add(prefix).Add(text).Add(trail);
|
||||
}
|
||||
else {
|
||||
// Separate the link trail from the rest of the link
|
||||
// list( $inside, $trail ) = Linker::splitTrail( $trail );
|
||||
byte[] inside = Bry_.Empty;
|
||||
|
||||
Xomw_link_holder_item entry = new Xomw_link_holder_item(nt, tmp.Add_bry_many(prefix, text, inside).To_bry_and_clear(), query);
|
||||
|
||||
boolean is_external = false; // $nt->isExternal()
|
||||
if (is_external) {
|
||||
// Use a globally unique ID to keep the objects mergable
|
||||
// $key = $this->parent->nextLinkID();
|
||||
// $this->interwikis[$key] = $entry;
|
||||
// $retVal = "<!--IWLINK $key-->{$trail}";
|
||||
}
|
||||
else {
|
||||
int key = link_id++;
|
||||
internals.Add(key, entry);
|
||||
bfr.Add(Bry__link__bgn).Add_int_variable(key).Add(Gfh_tag_.Comm_end).Add(trail); // "<!--LINK $ns:$key-->{$trail}";
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Test__add(Xoa_ttl ttl, byte[] capt) {
|
||||
int key = link_id++;
|
||||
Xomw_link_holder_item item = new Xomw_link_holder_item(ttl, capt, Bry_.Ary_empty);
|
||||
internals.Add(key, item);
|
||||
}
|
||||
public void Replace(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
this.Replace_internal(pbfr);
|
||||
// $this->replaceInterwiki( $text );
|
||||
}
|
||||
private void Replace_internal(Xomw_parser_bfr pbfr) {
|
||||
if (internals.Len() == 0)
|
||||
return;
|
||||
|
||||
// $colours = [];
|
||||
// $linkCache = LinkCache::singleton();
|
||||
// $output = $this->parent->getOutput();
|
||||
// $linkRenderer = $this->parent->getLinkRenderer();
|
||||
|
||||
// $linkcolour_ids = [];
|
||||
|
||||
// SKIP:Replace_internals does db lookup to identify redlinks;
|
||||
|
||||
// Construct search and replace arrays
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
int cur = src_bgn;
|
||||
int prv = 0;
|
||||
while (true) {
|
||||
int link_bgn = Bry_find_.Find_fwd(src, Bry__link__bgn, cur, src_end);
|
||||
if (link_bgn == Bry_find_.Not_found) {
|
||||
bfr.Add_mid(src, prv, src_end);
|
||||
break;
|
||||
}
|
||||
int key_bgn = link_bgn + Bry__link__bgn.length;
|
||||
int key_end = Bry_find_.Find_fwd_while_num(src, key_bgn, src_end);
|
||||
int link_key = Bry_.To_int_or(src, key_bgn, key_end, -1);
|
||||
Xomw_link_holder_item item = internals.Get_by(link_key);
|
||||
|
||||
// $pdbk = $entry['pdbk'];
|
||||
// $title = $entry['title'];
|
||||
// $query = isset( $entry['query'] ) ? $entry['query'] : [];
|
||||
// $key = "$ns:$index";
|
||||
// $searchkey = "<!--LINK $key-->";
|
||||
// $displayText = $entry['text'];
|
||||
// if ( isset( $entry['selflink'] ) ) {
|
||||
// $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query );
|
||||
// continue;
|
||||
// }
|
||||
// if ( $displayText === '' ) {
|
||||
// $displayText = null;
|
||||
// } else {
|
||||
// $displayText = new HtmlArmor( $displayText );
|
||||
// }
|
||||
// if ( !isset( $colours[$pdbk] ) ) {
|
||||
// $colours[$pdbk] = 'new';
|
||||
// }
|
||||
// $attribs = [];
|
||||
// if ( $colours[$pdbk] == 'new' ) {
|
||||
// $linkCache->addBadLinkObj( $title );
|
||||
// $output->addLink( $title, 0 );
|
||||
// $link = $linkRenderer->makeBrokenLink(
|
||||
// $title, $displayText, $attribs, $query
|
||||
// );
|
||||
// } else {
|
||||
// $link = $linkRenderer->makePreloadedLink(
|
||||
// $title, $displayText, $colours[$pdbk], $attribs, $query
|
||||
// );
|
||||
// }
|
||||
|
||||
bfr.Add_mid(src, prv, link_bgn);
|
||||
link_renderer.Make_preloaded_link(bfr, item.Title(), item.Text(), Bry_.Empty, extra_atrs, query.Clear());
|
||||
cur = key_end + Gfh_tag_.Comm_end_len;
|
||||
prv = cur;
|
||||
}
|
||||
}
|
||||
// private void Replace_internal__db() {
|
||||
// // Generate query
|
||||
// $lb = new LinkBatch();
|
||||
// $lb->setCaller( __METHOD__ );
|
||||
//
|
||||
// foreach ( $this->internals as $ns => $entries ) {
|
||||
// foreach ( $entries as $entry ) {
|
||||
// /** @var Title $title */
|
||||
// $title = $entry['title'];
|
||||
// $pdbk = $entry['pdbk'];
|
||||
//
|
||||
// # Skip invalid entries.
|
||||
// # Result will be ugly, but prevents crash.
|
||||
// if ( is_null( $title ) ) {
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
// # Check if it's a static known link, e.g. interwiki
|
||||
// if ( $title->isAlwaysKnown() ) {
|
||||
// $colours[$pdbk] = '';
|
||||
// } elseif ( $ns == NS_SPECIAL ) {
|
||||
// $colours[$pdbk] = 'new';
|
||||
// } else {
|
||||
// $id = $linkCache->getGoodLinkID( $pdbk );
|
||||
// if ( $id != 0 ) {
|
||||
// $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
|
||||
// $output->addLink( $title, $id );
|
||||
// $linkcolour_ids[$id] = $pdbk;
|
||||
// } elseif ( $linkCache->isBadLink( $pdbk ) ) {
|
||||
// $colours[$pdbk] = 'new';
|
||||
// } else {
|
||||
// # Not in the link cache, add it to the query
|
||||
// $lb->addObj( $title );
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// if ( !$lb->isEmpty() ) {
|
||||
// $fields = array_merge(
|
||||
// LinkCache::getSelectFields(),
|
||||
// [ 'page_namespace', 'page_title' ]
|
||||
// );
|
||||
//
|
||||
// $res = $dbr->select(
|
||||
// 'page',
|
||||
// $fields,
|
||||
// $lb->constructSet( 'page', $dbr ),
|
||||
// __METHOD__
|
||||
// );
|
||||
//
|
||||
// # Fetch data and form into an associative array
|
||||
// # non-existent = broken
|
||||
// foreach ( $res as $s ) {
|
||||
// $title = Title::makeTitle( $s->page_namespace, $s->page_title );
|
||||
// $pdbk = $title->getPrefixedDBkey();
|
||||
// $linkCache->addGoodLinkObjFromRow( $title, $s );
|
||||
// $output->addLink( $title, $s->page_id );
|
||||
// $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
|
||||
// // add id to the extension todolist
|
||||
// $linkcolour_ids[$s->page_id] = $pdbk;
|
||||
// }
|
||||
// unset( $res );
|
||||
// }
|
||||
// if ( count( $linkcolour_ids ) ) {
|
||||
// // pass an array of page_ids to an extension
|
||||
// Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours ] );
|
||||
// }
|
||||
//
|
||||
// # Do a second query for different language variants of links and categories
|
||||
// if ( $wgContLang->hasVariants() ) {
|
||||
// $this->doVariants( $colours );
|
||||
// }
|
||||
// }
|
||||
|
||||
private static final byte[] Bry__link__bgn = Bry_.new_a7("<!--LINK ");
|
||||
}
|
||||
class Xomw_link_holder_list {
|
||||
private int ary_len = 0, ary_max = 128;
|
||||
private Xomw_link_holder_item[] ary = new Xomw_link_holder_item[128];
|
||||
public int Len() {return ary_len;}
|
||||
public void Clear() {
|
||||
ary_len = 0;
|
||||
if (ary_max > 128)
|
||||
ary = new Xomw_link_holder_item[128];
|
||||
}
|
||||
public void Add(int key, Xomw_link_holder_item item) {
|
||||
if (key >= ary_max) {
|
||||
int new_max = ary_max * 2;
|
||||
ary = (Xomw_link_holder_item[])Array_.Resize(ary, new_max);
|
||||
ary_max = new_max;
|
||||
}
|
||||
ary[key] = item;
|
||||
ary_len++;
|
||||
}
|
||||
public Xomw_link_holder_item Get_by(int key) {return ary[key];}
|
||||
}
|
||||
class Xomw_link_holder_item {
|
||||
public Xomw_link_holder_item(Xoa_ttl title, byte[] text, byte[][] query) {
|
||||
this.title = title;
|
||||
this.text = text;
|
||||
this.query = query;
|
||||
}
|
||||
public Xoa_ttl Title() {return title;} private final Xoa_ttl title;
|
||||
public byte[] Text() {return text;} private final byte[] text;
|
||||
public byte[] Pdbk() {return title.Get_prefixed_db_key();}
|
||||
public byte[][] Query() {return query;} private final byte[][] query;
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
import gplx.xowa.mws.linkers.*;
|
||||
public class Xomw_link_holders__tst {
|
||||
private final Xomw_link_holders__fxt fxt = new Xomw_link_holders__fxt();
|
||||
@Test public void Replace__basic() {
|
||||
fxt.Init__add("A", "a");
|
||||
fxt.Test__replace("a <!--LINK 0--> b", "a <a href='/wiki/A' title='A'>a</a> b");
|
||||
}
|
||||
}
|
||||
class Xomw_link_holders__fxt {
|
||||
private final Xomw_link_holders holders = new Xomw_link_holders(new Xomw_link_renderer(new Xomw_sanitizer()), Bry_bfr_.New());
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private final Xowe_wiki wiki;
|
||||
private boolean apos = true;
|
||||
public Xomw_link_holders__fxt() {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
this.wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
}
|
||||
public void Init__add(String ttl, String capt) {
|
||||
holders.Test__add(wiki.Ttl_parse(Bry_.new_u8(ttl)), Bry_.new_u8(capt));
|
||||
}
|
||||
public void Test__replace(String src, String expd) {
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
holders.Replace(new Xomw_parser_ctx(), pbfr.Init(Bry_.new_u8(src)));
|
||||
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear());
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_output_type {
|
||||
public static final byte
|
||||
Tid__html = 1 // like parse()
|
||||
, Tid__wiki = 2 // like preSaveTransform()
|
||||
, Tid__preprocess = 3 // like preprocess()
|
||||
, Tid__msg = 3
|
||||
, Tid__plain = 4 // like extractSections() - portions of the original are returned unchanged.
|
||||
;
|
||||
}
|
||||
@@ -1,299 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*; import gplx.core.net.*;
|
||||
import gplx.xowa.mws.parsers.prepros.*; import gplx.xowa.mws.parsers.headings.*;
|
||||
import gplx.xowa.mws.parsers.quotes.*; import gplx.xowa.mws.parsers.tables.*; import gplx.xowa.mws.parsers.hrs.*; import gplx.xowa.mws.parsers.nbsps.*;
|
||||
import gplx.xowa.mws.parsers.lnkes.*; import gplx.xowa.mws.parsers.lnkis.*; import gplx.xowa.mws.parsers.magiclinks.*; import gplx.xowa.mws.parsers.doubleunders.*;
|
||||
import gplx.xowa.mws.utls.*; import gplx.xowa.mws.linkers.*;
|
||||
import gplx.xowa.mws.htmls.*;
|
||||
public class Xomw_parser {
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
private final Xomw_table_wkr table_wkr;
|
||||
private final Xomw_hr_wkr hr_wkr = new Xomw_hr_wkr();
|
||||
private final Xomw_lnke_wkr lnke_wkr;
|
||||
private final Xomw_nbsp_wkr nbsp_wkr = new Xomw_nbsp_wkr();
|
||||
private final Xomw_block_level_pass block_wkr = new Xomw_block_level_pass();
|
||||
private final Xomw_heading_wkr heading_wkr = new Xomw_heading_wkr();
|
||||
private final Xomw_magiclinks_wkr magiclinks_wkr;
|
||||
private final Xomw_doubleunder_wkr doubleunder_wkr = new Xomw_doubleunder_wkr();
|
||||
private final Xomw_link_renderer link_renderer;
|
||||
private final Xomw_link_holders holders;
|
||||
private final Xomw_heading_cbk__html heading_wkr_cbk;
|
||||
private final Btrie_slim_mgr protocols_trie;
|
||||
private final Xomw_doubleunder_data doubleunder_data = new Xomw_doubleunder_data();
|
||||
private static Xomw_regex_space regex_space;
|
||||
private static Xomw_regex_boundary regex_boundary;
|
||||
private static Xomw_regex_url regex_url;
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private int marker_index = 0;
|
||||
// private final Xomw_prepro_wkr prepro_wkr = new Xomw_prepro_wkr();
|
||||
public Xomw_parser_env Env() {return env;} private final Xomw_parser_env env = new Xomw_parser_env();
|
||||
public Xomw_parser_options Options() {return options;} private final Xomw_parser_options options = new Xomw_parser_options();
|
||||
public Xomw_strip_state Strip_state() {return strip_state;} private final Xomw_strip_state strip_state = new Xomw_strip_state();
|
||||
public Xomw_sanitizer Sanitizer() {return sanitizer;} private final Xomw_sanitizer sanitizer = new Xomw_sanitizer();
|
||||
public Xomw_linker Linker() {return linker;} private final Xomw_linker linker;
|
||||
public Bry_bfr Tmp() {return tmp;} private final Bry_bfr tmp = Bry_bfr_.New();
|
||||
public Xomw_quote_wkr Quote_wkr() {return quote_wkr;} private final Xomw_quote_wkr quote_wkr;
|
||||
public Xomw_lnki_wkr Lnki_wkr() {return lnki_wkr;} private final Xomw_lnki_wkr lnki_wkr;
|
||||
public boolean Output_type__wiki() {return output_type__wiki;} private final boolean output_type__wiki = false;
|
||||
public Xomw_parser() {
|
||||
if (regex_space == null) {
|
||||
synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||
regex_space = new Xomw_regex_space();
|
||||
regex_boundary = new Xomw_regex_boundary(regex_space);
|
||||
regex_url = new Xomw_regex_url(regex_space);
|
||||
Atr__rel = Bry_.new_a7("rel");
|
||||
Get_external_link_rel = Bry_.new_a7("nofollow");
|
||||
}
|
||||
}
|
||||
|
||||
this.link_renderer = new Xomw_link_renderer(sanitizer);
|
||||
this.linker = new Xomw_linker(link_renderer);
|
||||
this.protocols_trie = Xomw_parser.Protocols__dflt();
|
||||
this.holders = new Xomw_link_holders(link_renderer, tmp);
|
||||
this.table_wkr = new Xomw_table_wkr(this);
|
||||
this.quote_wkr = new Xomw_quote_wkr(this);
|
||||
this.lnke_wkr = new Xomw_lnke_wkr(this);
|
||||
this.lnki_wkr = new Xomw_lnki_wkr(this, holders, link_renderer, protocols_trie);
|
||||
this.heading_wkr_cbk = new Xomw_heading_cbk__html();
|
||||
this.magiclinks_wkr = new Xomw_magiclinks_wkr(this, sanitizer, linker, regex_boundary, regex_url);
|
||||
}
|
||||
public void Init_by_wiki(Xowe_wiki wiki) {
|
||||
linker.Init_by_wiki(env, wiki.Lang().Lnki_trail_mgr().Trie());
|
||||
lnke_wkr.Init_by_wiki(protocols_trie, regex_url, regex_space);
|
||||
lnki_wkr.Init_by_wiki(env, wiki);
|
||||
doubleunder_wkr.Init_by_wiki(doubleunder_data, wiki.Lang());
|
||||
magiclinks_wkr.Init_by_wiki();
|
||||
}
|
||||
public void Init_by_page(Xoa_ttl ttl) {
|
||||
pctx.Init_by_page(ttl);
|
||||
}
|
||||
public void Internal_parse(Xomw_parser_bfr pbfr, byte[] text) {
|
||||
pbfr.Init(text);
|
||||
// $origText = text;
|
||||
|
||||
// MW.HOOK:ParserBeforeInternalParse
|
||||
|
||||
// if ($frame) {
|
||||
// use frame depth to infer how include/noinclude tags should be handled
|
||||
// depth=0 means this is the top-level document; otherwise it's an included document
|
||||
// boolean for_inclusion = false;
|
||||
// if (!$frame->depth) {
|
||||
// $flag = 0;
|
||||
// } else {
|
||||
// $flag = Parser::PTD_FOR_INCLUSION;
|
||||
// }
|
||||
// text = prepro_wkr.Preprocess_to_xml(text, for_inclusion);
|
||||
// text = $frame->expand($dom);
|
||||
// } else {
|
||||
// // if $frame is not provided, then use old-style replaceVariables
|
||||
// text = $this->replaceVariables(text);
|
||||
// }
|
||||
|
||||
// MW.HOOK:InternalParseBeforeSanitize
|
||||
// text = Sanitizer::removeHTMLtags(
|
||||
// text,
|
||||
// [ &$this, 'attributeStripCallback' ],
|
||||
// false,
|
||||
// array_keys($this->mTransparentTagHooks),
|
||||
// [],
|
||||
// [ &$this, 'addTrackingCategory' ]
|
||||
// );
|
||||
// MW.HOOK:InternalParseBeforeLinks
|
||||
|
||||
// Tables need to come after variable replacement for things to work
|
||||
// properly; putting them before other transformations should keep
|
||||
// exciting things like link expansions from showing up in surprising
|
||||
// places.
|
||||
table_wkr.Do_table_stuff(pctx, pbfr);
|
||||
hr_wkr.Replace_hrs(pctx, pbfr);
|
||||
|
||||
doubleunder_wkr.Do_double_underscore(pctx, pbfr); // DONE: DATE:2017-01-27
|
||||
|
||||
heading_wkr.Do_headings(pctx, pbfr, heading_wkr_cbk);
|
||||
lnki_wkr.Replace_internal_links(pctx, pbfr);
|
||||
quote_wkr.Do_all_quotes(pctx, pbfr);
|
||||
lnke_wkr.Replace_external_links(pctx, pbfr);
|
||||
|
||||
// replaceInternalLinks may sometimes leave behind
|
||||
// absolute URLs, which have to be masked to hide them from replaceExternalLinks
|
||||
Xomw_parser_bfr_.Replace(pbfr, Bry__marker__noparse, Bry_.Empty);
|
||||
magiclinks_wkr.Do_magic_links(pctx, pbfr);
|
||||
|
||||
// $text = $this->formatHeadings($text, $origText, $isMain);
|
||||
}
|
||||
|
||||
public void Internal_parse_half_parsed(Xomw_parser_bfr pbfr, boolean is_main, boolean line_start) {
|
||||
strip_state.Unstrip_general(pbfr);
|
||||
|
||||
// MW.HOOK:ParserAfterUnstrip
|
||||
|
||||
// Clean up special characters, only run once, next-to-last before doBlockLevels
|
||||
nbsp_wkr.Do_nbsp(pctx, pbfr);
|
||||
|
||||
block_wkr.Do_block_levels(pctx, pbfr, line_start);
|
||||
|
||||
lnki_wkr.Replace_link_holders(pctx, pbfr);
|
||||
|
||||
// The input doesn't get language converted if
|
||||
// a) It's disabled
|
||||
// b) Content isn't converted
|
||||
// c) It's a conversion table
|
||||
// d) it is an interface message (which is in the user language)
|
||||
// if ( !( $this->mOptions->getDisableContentConversion()
|
||||
// || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
|
||||
// ) {
|
||||
// if ( !$this->mOptions->getInterfaceMessage() ) {
|
||||
// // The position of the convert() call should not be changed. it
|
||||
// // assumes that the links are all replaced and the only thing left
|
||||
// // is the <nowiki> mark.
|
||||
// $text = $this->getConverterLanguage()->convert( $text );
|
||||
// }
|
||||
// }
|
||||
|
||||
strip_state.Unstrip_nowiki(pbfr);
|
||||
|
||||
// MW.HOOK:ParserBeforeTidy
|
||||
|
||||
// $text = $this->replaceTransparentTags( $text );
|
||||
strip_state.Unstrip_general(pbfr);
|
||||
|
||||
sanitizer.Normalize_char_references(pbfr);
|
||||
|
||||
// if ( MWTidy::isEnabled() ) {
|
||||
// if ( $this->mOptions->getTidy() ) {
|
||||
// $text = MWTidy::tidy( $text );
|
||||
// }
|
||||
// }
|
||||
// else {
|
||||
// attempt to sanitize at least some nesting problems
|
||||
// (T4702 and quite a few others)
|
||||
// $tidyregs = [
|
||||
// // ''Something [http://www.cool.com cool''] -->
|
||||
// // <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
|
||||
// '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
|
||||
// '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
|
||||
// // fix up an anchor inside another anchor, only
|
||||
// // at least for a single single nested link (T5695)
|
||||
// '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
|
||||
// '\\1\\2</a>\\3</a>\\1\\4</a>',
|
||||
// // fix div inside inline elements- doBlockLevels won't wrap a line which
|
||||
// // contains a div, so fix it up here; replace
|
||||
// // div with escaped text
|
||||
// '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
|
||||
// '\\1\\3<div\\5>\\6</div>\\8\\9',
|
||||
// // remove empty italic or bold tag pairs, some
|
||||
// // introduced by rules above
|
||||
// '/<([bi])><\/\\1>/' => '',
|
||||
// ];
|
||||
|
||||
// $text = preg_replace(
|
||||
// array_keys( $tidyregs ),
|
||||
// array_values( $tidyregs ),
|
||||
// $text );
|
||||
// }
|
||||
|
||||
// MW.HOOK:ParserAfterTidy
|
||||
}
|
||||
public byte[] Armor_links(Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
|
||||
// PORTED:preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', self::MARKER_PREFIX . "NOPARSE$1", $text )
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
boolean dirty = false;
|
||||
boolean called_by_bry = trg == null;
|
||||
while (true) {
|
||||
// exit if EOS
|
||||
if (cur == src_end) {
|
||||
// if dirty, add rest of String
|
||||
if (dirty)
|
||||
trg.Add_mid(src, prv, src_end);
|
||||
break;
|
||||
}
|
||||
|
||||
// check if cur matches protocol
|
||||
Object protocol_obj = protocols_trie.Match_at(trv, src, cur, src_end);
|
||||
// no match; continue
|
||||
if (protocol_obj == null) {
|
||||
cur++;
|
||||
}
|
||||
// match; add to bfr
|
||||
else {
|
||||
dirty = true;
|
||||
byte[] protocol_bry = (byte[])protocol_obj;
|
||||
if (called_by_bry) trg = Bry_bfr_.New();
|
||||
trg.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__noparse, protocol_bry);
|
||||
cur += protocol_bry.length;
|
||||
prv = cur;
|
||||
}
|
||||
}
|
||||
if (called_by_bry) {
|
||||
if (dirty)
|
||||
return trg.To_bry_and_clear();
|
||||
else {
|
||||
if (src_bgn == 0 && src_end == src.length)
|
||||
return src;
|
||||
else
|
||||
return Bry_.Mid(src, src_bgn, src_end);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (dirty)
|
||||
return null;
|
||||
else {
|
||||
trg.Add_mid(src, src_bgn, src_end);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
public byte[] Insert_strip_item(byte[] text) {
|
||||
tmp.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__strip_state_item);
|
||||
tmp.Add_int_variable(marker_index);
|
||||
tmp.Add(Xomw_strip_state.Bry__marker__end);
|
||||
byte[] marker = tmp.To_bry_and_clear();
|
||||
marker_index++;
|
||||
strip_state.Add_general(marker, text);
|
||||
return marker;
|
||||
}
|
||||
public Xomw_atr_mgr Get_external_link_attribs(Xomw_atr_mgr atrs) {
|
||||
atrs.Clear();
|
||||
byte[] rel = Get_external_link_rel;
|
||||
|
||||
// XO.MW.UNSUPPORTED: XO will assume target is blank; MW will set target of "_blank", "_self", etc. depending on global opt
|
||||
// $target = $this->mOptions->getExternalLinkTarget();
|
||||
atrs.Add(Atr__rel, rel);
|
||||
return atrs;
|
||||
}
|
||||
// XO.MW.UNSUPPORTED: XO will always assume "nofollow"; MW will return "nofollow" if (a) ns is in ns-exception list or (b) domain is in domain-exception list;
|
||||
// if ($wgNoFollowLinks && !in_array($ns, $wgNoFollowNsExceptions) && !wfMatchesDomainList($url, $wgNoFollowDomainExceptions)
|
||||
public byte[] Get_external_link_rel;
|
||||
private static byte[] Atr__rel;
|
||||
private static final byte[] Bry__strip_state_item = Bry_.new_a7("-item-"), Bry__noparse = Bry_.new_a7("NOPARSE");
|
||||
private static final byte[] Bry__marker__noparse = Bry_.Add(Xomw_strip_state.Bry__marker__bgn, Bry__noparse);
|
||||
public static Btrie_slim_mgr Protocols__dflt() {
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
|
||||
Gfo_protocol_itm[] ary = Gfo_protocol_itm.Ary();
|
||||
for (Gfo_protocol_itm itm : ary) {
|
||||
byte[] key = itm.Text_bry(); // EX: "https://"
|
||||
rv.Add_obj(key, key);
|
||||
}
|
||||
byte[] bry__relative = Bry_.new_a7("//");
|
||||
rv.Add_obj(bry__relative, bry__relative); // REF.MW: "$this->mUrlProtocols = wfUrlProtocols();"; "wfUrlProtocols( $includeProtocolRelative = true )"
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_parser__tst {
|
||||
private final Xomw_parser__fxt fxt = new Xomw_parser__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "== heading_1 =="
|
||||
, "para_1"
|
||||
, "== heading_2 =="
|
||||
, "para_2"
|
||||
, "-----"
|
||||
, "{|"
|
||||
, "|-"
|
||||
, "|a"
|
||||
, "|}"
|
||||
, "''italics''"
|
||||
, "__TOC__"
|
||||
, "[https://a.org b]"
|
||||
, "[[A|abc]]"
|
||||
, "https://c.org"
|
||||
, "a »b«  !important c"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<h2> heading_1 </h2>"
|
||||
, "<p>para_1"
|
||||
, "</p>"
|
||||
, "<h2> heading_2 </h2>"
|
||||
, "<p>para_2"
|
||||
, "</p>"
|
||||
, "<hr />"
|
||||
, "<table>"
|
||||
, ""
|
||||
, "<tr>"
|
||||
, "<td>a"
|
||||
, "</td></tr></table>"
|
||||
, "<p><i>italics</i>"
|
||||
, "<!--MWTOC-->"
|
||||
, "<a rel=\"nofollow\" class=\"external text\" href=\"https://a.org\">b</a>"
|
||||
, "<a href=\"/wiki/A\" title=\"A\">abc</a>"
|
||||
, "<a rel=\"nofollow\" class=\"external free\" href=\"https://c.org\">https://c.org</a>"
|
||||
, "a »b«  !important c"
|
||||
, "</p>"
|
||||
));
|
||||
}
|
||||
}
|
||||
class Xomw_parser__fxt {
|
||||
private final Xomw_parser mgr = new Xomw_parser();
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
public Xomw_parser__fxt() {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
mgr.Init_by_wiki(wiki);
|
||||
mgr.Init_by_page(wiki.Ttl_parse(Bry_.new_a7("Page_1")));
|
||||
}
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
mgr.Internal_parse(pbfr, src_bry);
|
||||
mgr.Internal_parse_half_parsed(pbfr, true, true);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_parser_bfr { // manages 2 bfrs to eliminate multiple calls to new memory allocations ("return bfr.To_bry_and_clear()")
|
||||
private final Bry_bfr bfr_1 = Bry_bfr_.New(), bfr_2 = Bry_bfr_.New();
|
||||
private Bry_bfr src, trg;
|
||||
public Xomw_parser_bfr() {
|
||||
this.src = bfr_1;
|
||||
this.trg = bfr_2;
|
||||
}
|
||||
public Bry_bfr Src() {return src;}
|
||||
public Bry_bfr Trg() {return trg;}
|
||||
public Bry_bfr Rslt() {return src;}
|
||||
public Xomw_parser_bfr Init(byte[] text) {
|
||||
// resize each bfr once by guessing that html_len = text_len * 2
|
||||
int text_len = text.length;
|
||||
int html_len = text_len * 2;
|
||||
src.Resize(html_len);
|
||||
trg.Resize(html_len);
|
||||
|
||||
// clear and add
|
||||
src.Clear();
|
||||
trg.Clear();
|
||||
src.Add(text);
|
||||
return this;
|
||||
}
|
||||
public void Switch() {
|
||||
Bry_bfr tmp = src;
|
||||
this.src = trg;
|
||||
this.trg = tmp;
|
||||
trg.Clear();
|
||||
}
|
||||
}
|
||||
@@ -1,69 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_parser_bfr_ {
|
||||
public static void Replace(Xomw_parser_bfr pbfr, byte[] find, byte[] repl) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
|
||||
if (Replace(bfr, Bool_.N, src, src_bgn, src_end, find, repl) != null)
|
||||
pbfr.Switch();
|
||||
}
|
||||
private static byte[] Replace(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) {
|
||||
boolean dirty = false;
|
||||
int cur = src_bgn;
|
||||
boolean called_by_bry = bfr == null;
|
||||
|
||||
while (true) {
|
||||
int find_bgn = Bry_find_.Find_fwd(src, find, cur);
|
||||
if (find_bgn == Bry_find_.Not_found) {
|
||||
if (dirty)
|
||||
bfr.Add_mid(src, cur, src_end);
|
||||
break;
|
||||
}
|
||||
if (called_by_bry) bfr = Bry_bfr_.New();
|
||||
bfr.Add_mid(src, cur, find_bgn);
|
||||
cur += find.length;
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
if (dirty) {
|
||||
if (called_by_bry)
|
||||
return bfr.To_bry_and_clear();
|
||||
else
|
||||
return Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
if (called_by_bry) {
|
||||
if (src_bgn == 0 && src_end == src.length)
|
||||
return src;
|
||||
else
|
||||
return Bry_.Mid(src, src_bgn, src_end);
|
||||
}
|
||||
else {
|
||||
if (lone_bfr)
|
||||
bfr.Add_mid(src, src_bgn, src_end);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.xowa.mws.parsers.lnkis.*;
|
||||
public class Xomw_parser_ctx {
|
||||
public Xoa_ttl Page_title() {return page_title;} private Xoa_ttl page_title;
|
||||
public Xomw_image_params Lnki_wkr__make_image__img_params = new Xomw_image_params();
|
||||
public byte[][] Lnki_wkr__make_image__match_magic_word = new byte[2][];
|
||||
public int[] Lnki_wkr__make_image__img_size = new int[2];
|
||||
public Xomw_params_mto Linker__makeImageLink__prms = new Xomw_params_mto();
|
||||
|
||||
public void Init_by_page(Xoa_ttl page_title) {
|
||||
this.page_title = page_title;
|
||||
}
|
||||
|
||||
public static final int Pos__bos = -1;
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.xowa.mws.filerepo.file.*; import gplx.xowa.mws.media.*;
|
||||
public class Xomw_parser_env {
|
||||
public byte[] Lang__align_end = Bry_.new_a7("right");
|
||||
public int User__default__thumbsize = 220;
|
||||
|
||||
public int Global__wgSVGMaxSize = 5120;
|
||||
public double Global__wgThumbUpright = .75d;
|
||||
public int[] Global__wgThumbLimits = new int[] {120, 150, 180, 200, 250, 300};
|
||||
|
||||
public Xomw_MagicWordMgr Magic_word_mgr() {return magic_word_mgr;} private final Xomw_MagicWordMgr magic_word_mgr = new Xomw_MagicWordMgr();
|
||||
public Xomw_message_mgr Message_mgr() {return message_mgr;} private final Xomw_message_mgr message_mgr = new Xomw_message_mgr();
|
||||
public Xomw_file_finder File_finder() {return file_finder;} private Xomw_file_finder file_finder = new Xomw_file_finder__noop();
|
||||
public Xomw_MediaHandlerFactory MediaHandlerFactory() {return mediaHandlerFactory;} private final Xomw_MediaHandlerFactory mediaHandlerFactory = new Xomw_MediaHandlerFactory();
|
||||
|
||||
public Xomw_parser_env File_finder_(Xomw_file_finder v) {file_finder = v; return this;}
|
||||
}
|
||||
@@ -1,933 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_parser_options {
|
||||
public Xomw_parser_options() {
|
||||
this.mThumbSize = 220;
|
||||
}
|
||||
// /**
|
||||
// * Interlanguage links are removed and returned in an array
|
||||
// */
|
||||
// private $mInterwikiMagic;
|
||||
//
|
||||
// /**
|
||||
// * Allow external images inline?
|
||||
// */
|
||||
// private $mAllowExternalImages;
|
||||
//
|
||||
// /**
|
||||
// * If not, any exception?
|
||||
// */
|
||||
// private $mAllowExternalImagesFrom;
|
||||
//
|
||||
// /**
|
||||
// * If not or it doesn't match, should we check an on-wiki whitelist?
|
||||
// */
|
||||
// private $mEnableImageWhitelist;
|
||||
//
|
||||
// /**
|
||||
// * Date format index
|
||||
// */
|
||||
// private $mDateFormat = null;
|
||||
//
|
||||
// /**
|
||||
// * Create "edit section" links?
|
||||
// */
|
||||
// private $mEditSection = true;
|
||||
//
|
||||
// /**
|
||||
// * Allow inclusion of special pages?
|
||||
// */
|
||||
// private $mAllowSpecialInclusion;
|
||||
//
|
||||
// /**
|
||||
// * Use tidy to cleanup output HTML?
|
||||
// */
|
||||
// private $mTidy = false;
|
||||
//
|
||||
// /**
|
||||
// * Which lang to call for PLURAL and GRAMMAR
|
||||
// */
|
||||
// private $mInterfaceMessage = false;
|
||||
//
|
||||
// /**
|
||||
// * Overrides $mInterfaceMessage with arbitrary language
|
||||
// */
|
||||
// private $mTargetLanguage = null;
|
||||
//
|
||||
// /**
|
||||
// * Maximum size of template expansions, in bytes
|
||||
// */
|
||||
// private $mMaxIncludeSize;
|
||||
//
|
||||
// /**
|
||||
// * Maximum number of nodes touched by PPFrame::expand()
|
||||
// */
|
||||
// private $mMaxPPNodeCount;
|
||||
//
|
||||
// /**
|
||||
// * Maximum number of nodes generated by Preprocessor::preprocessToObj()
|
||||
// */
|
||||
// private $mMaxGeneratedPPNodeCount;
|
||||
//
|
||||
// /**
|
||||
// * Maximum recursion depth in PPFrame::expand()
|
||||
// */
|
||||
// private $mMaxPPExpandDepth;
|
||||
//
|
||||
// /**
|
||||
// * Maximum recursion depth for templates within templates
|
||||
// */
|
||||
// private $mMaxTemplateDepth;
|
||||
//
|
||||
// /**
|
||||
// * Maximum number of calls per parse to expensive parser functions
|
||||
// */
|
||||
// private $mExpensiveParserFunctionLimit;
|
||||
//
|
||||
// /**
|
||||
// * Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS
|
||||
// */
|
||||
// private $mRemoveComments = true;
|
||||
//
|
||||
// /**
|
||||
// * @var callable Callback for current revision fetching; first argument to call_user_func().
|
||||
// */
|
||||
// private $mCurrentRevisionCallback =
|
||||
// [ 'Parser', 'statelessFetchRevision' ];
|
||||
//
|
||||
// /**
|
||||
// * @var callable Callback for template fetching; first argument to call_user_func().
|
||||
// */
|
||||
// private $mTemplateCallback =
|
||||
// [ 'Parser', 'statelessFetchTemplate' ];
|
||||
//
|
||||
// /**
|
||||
// * @var callable|null Callback to generate a guess for {{REVISIONID}}
|
||||
// */
|
||||
// private $mSpeculativeRevIdCallback;
|
||||
//
|
||||
// /**
|
||||
// * Enable limit report in an HTML comment on output
|
||||
// */
|
||||
// private $mEnableLimitReport = false;
|
||||
//
|
||||
// /**
|
||||
// * Timestamp used for {{CURRENTDAY}} etc.
|
||||
// */
|
||||
// private $mTimestamp;
|
||||
//
|
||||
// /**
|
||||
// * Target attribute for external links
|
||||
// */
|
||||
// private $mExternalLinkTarget;
|
||||
//
|
||||
// /**
|
||||
// * Clean up signature texts?
|
||||
// * @see Parser::cleanSig
|
||||
// */
|
||||
// private $mCleanSignatures;
|
||||
//
|
||||
// /**
|
||||
// * Transform wiki markup when saving the page?
|
||||
// */
|
||||
// private $mPreSaveTransform = true;
|
||||
//
|
||||
// /**
|
||||
// * Whether content conversion should be disabled
|
||||
// */
|
||||
// private $mDisableContentConversion;
|
||||
//
|
||||
// /**
|
||||
// * Whether title conversion should be disabled
|
||||
// */
|
||||
// private $mDisableTitleConversion;
|
||||
//
|
||||
// /**
|
||||
// * Automatically number headings?
|
||||
// */
|
||||
// private $mNumberHeadings;
|
||||
|
||||
/**
|
||||
* Thumb size preferred by the user.
|
||||
*/
|
||||
private int mThumbSize;
|
||||
|
||||
// /**
|
||||
// * Maximum article size of an article to be marked as "stub"
|
||||
// */
|
||||
// private $mStubThreshold;
|
||||
//
|
||||
// /**
|
||||
// * Language Object of the User language.
|
||||
// */
|
||||
// private $mUserLang;
|
||||
//
|
||||
// /**
|
||||
// * @var User
|
||||
// * Stored user Object
|
||||
// */
|
||||
// private $mUser;
|
||||
//
|
||||
// /**
|
||||
// * Parsing the page for a "preview" operation?
|
||||
// */
|
||||
// private $mIsPreview = false;
|
||||
//
|
||||
// /**
|
||||
// * Parsing the page for a "preview" operation on a single section?
|
||||
// */
|
||||
// private $mIsSectionPreview = false;
|
||||
//
|
||||
// /**
|
||||
// * Parsing the printable version of the page?
|
||||
// */
|
||||
// private $mIsPrintable = false;
|
||||
//
|
||||
// /**
|
||||
// * Extra key that should be present in the caching key.
|
||||
// */
|
||||
// private $mExtraKey = '';
|
||||
//
|
||||
// /**
|
||||
// * Are magic ISBN links enabled?
|
||||
// */
|
||||
// private $mMagicISBNLinks = true;
|
||||
//
|
||||
// /**
|
||||
// * Are magic PMID links enabled?
|
||||
// */
|
||||
// private $mMagicPMIDLinks = true;
|
||||
//
|
||||
// /**
|
||||
// * Are magic RFC links enabled?
|
||||
// */
|
||||
// private $mMagicRFCLinks = true;
|
||||
//
|
||||
// /**
|
||||
// * Function to be called when an option is accessed.
|
||||
// */
|
||||
// private $onAccessCallback = null;
|
||||
//
|
||||
// /**
|
||||
// * If the page being parsed is a redirect, this should hold the redirect
|
||||
// * target.
|
||||
// * @var Title|null
|
||||
// */
|
||||
// private $redirectTarget = null;
|
||||
//
|
||||
// public function getInterwikiMagic() {
|
||||
// return this.mInterwikiMagic;
|
||||
// }
|
||||
//
|
||||
// public function getAllowExternalImages() {
|
||||
// return this.mAllowExternalImages;
|
||||
// }
|
||||
//
|
||||
// public function getAllowExternalImagesFrom() {
|
||||
// return this.mAllowExternalImagesFrom;
|
||||
// }
|
||||
//
|
||||
// public function getEnableImageWhitelist() {
|
||||
// return this.mEnableImageWhitelist;
|
||||
// }
|
||||
//
|
||||
// public function getEditSection() {
|
||||
// return this.mEditSection;
|
||||
// }
|
||||
//
|
||||
// public function getNumberHeadings() {
|
||||
// this.optionUsed( 'numberheadings' );
|
||||
//
|
||||
// return this.mNumberHeadings;
|
||||
// }
|
||||
//
|
||||
// public function getAllowSpecialInclusion() {
|
||||
// return this.mAllowSpecialInclusion;
|
||||
// }
|
||||
//
|
||||
// public function getTidy() {
|
||||
// return this.mTidy;
|
||||
// }
|
||||
//
|
||||
// public function getInterfaceMessage() {
|
||||
// return this.mInterfaceMessage;
|
||||
// }
|
||||
//
|
||||
// public function getTargetLanguage() {
|
||||
// return this.mTargetLanguage;
|
||||
// }
|
||||
//
|
||||
// public function getMaxIncludeSize() {
|
||||
// return this.mMaxIncludeSize;
|
||||
// }
|
||||
//
|
||||
// public function getMaxPPNodeCount() {
|
||||
// return this.mMaxPPNodeCount;
|
||||
// }
|
||||
//
|
||||
// public function getMaxGeneratedPPNodeCount() {
|
||||
// return this.mMaxGeneratedPPNodeCount;
|
||||
// }
|
||||
//
|
||||
// public function getMaxPPExpandDepth() {
|
||||
// return this.mMaxPPExpandDepth;
|
||||
// }
|
||||
//
|
||||
// public function getMaxTemplateDepth() {
|
||||
// return this.mMaxTemplateDepth;
|
||||
// }
|
||||
//
|
||||
// /* @since 1.20 */
|
||||
// public function getExpensiveParserFunctionLimit() {
|
||||
// return this.mExpensiveParserFunctionLimit;
|
||||
// }
|
||||
//
|
||||
// public function getRemoveComments() {
|
||||
// return this.mRemoveComments;
|
||||
// }
|
||||
//
|
||||
// /* @since 1.24 */
|
||||
// public function getCurrentRevisionCallback() {
|
||||
// return this.mCurrentRevisionCallback;
|
||||
// }
|
||||
//
|
||||
// public function getTemplateCallback() {
|
||||
// return this.mTemplateCallback;
|
||||
// }
|
||||
//
|
||||
// /** @since 1.28 */
|
||||
// public function getSpeculativeRevIdCallback() {
|
||||
// return this.mSpeculativeRevIdCallback;
|
||||
// }
|
||||
//
|
||||
// public function getEnableLimitReport() {
|
||||
// return this.mEnableLimitReport;
|
||||
// }
|
||||
//
|
||||
// public function getCleanSignatures() {
|
||||
// return this.mCleanSignatures;
|
||||
// }
|
||||
//
|
||||
// public function getExternalLinkTarget() {
|
||||
// return this.mExternalLinkTarget;
|
||||
// }
|
||||
//
|
||||
// public function getDisableContentConversion() {
|
||||
// return this.mDisableContentConversion;
|
||||
// }
|
||||
//
|
||||
// public function getDisableTitleConversion() {
|
||||
// return this.mDisableTitleConversion;
|
||||
// }
|
||||
|
||||
public int getThumbSize() {
|
||||
// this.optionUsed( 'thumbsize' );
|
||||
|
||||
return this.mThumbSize;
|
||||
}
|
||||
|
||||
// public function getStubThreshold() {
|
||||
// this.optionUsed( 'stubthreshold' );
|
||||
//
|
||||
// return this.mStubThreshold;
|
||||
// }
|
||||
//
|
||||
// public function getIsPreview() {
|
||||
// return this.mIsPreview;
|
||||
// }
|
||||
//
|
||||
// public function getIsSectionPreview() {
|
||||
// return this.mIsSectionPreview;
|
||||
// }
|
||||
//
|
||||
// public function getIsPrintable() {
|
||||
// this.optionUsed( 'printable' );
|
||||
//
|
||||
// return this.mIsPrintable;
|
||||
// }
|
||||
//
|
||||
// public function getUser() {
|
||||
// return this.mUser;
|
||||
// }
|
||||
//
|
||||
// public function getPreSaveTransform() {
|
||||
// return this.mPreSaveTransform;
|
||||
// }
|
||||
//
|
||||
// public function getDateFormat() {
|
||||
// this.optionUsed( 'dateformat' );
|
||||
// if ( !isset( this.mDateFormat ) ) {
|
||||
// this.mDateFormat = this.mUser->getDatePreference();
|
||||
// }
|
||||
// return this.mDateFormat;
|
||||
// }
|
||||
//
|
||||
// public function getTimestamp() {
|
||||
// if ( !isset( this.mTimestamp ) ) {
|
||||
// this.mTimestamp = wfTimestampNow();
|
||||
// }
|
||||
// return this.mTimestamp;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get the user language used by the parser for this page and split the parser cache.
|
||||
// *
|
||||
// * @warning: Calling this causes the parser cache to be fragmented by user language!
|
||||
// * To avoid cache fragmentation, output should not depend on the user language.
|
||||
// * Use Parser::getFunctionLang() or Parser::getTargetLanguage() instead!
|
||||
// *
|
||||
// * @note This function will trigger a cache fragmentation by recording the
|
||||
// * 'userlang' option, see optionUsed(). This is done to avoid cache pollution
|
||||
// * when the page is rendered based on the language of the user.
|
||||
// *
|
||||
// * @note When saving, this will return the default language instead of the user's.
|
||||
// * {{int: }} uses this which used to produce inconsistent link tables (bug 14404).
|
||||
// *
|
||||
// * @return Language
|
||||
// * @since 1.19
|
||||
// */
|
||||
// public function getUserLangObj() {
|
||||
// this.optionUsed( 'userlang' );
|
||||
// return this.mUserLang;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Same as getUserLangObj() but returns a String instead.
|
||||
// *
|
||||
// * @warning: Calling this causes the parser cache to be fragmented by user language!
|
||||
// * To avoid cache fragmentation, output should not depend on the user language.
|
||||
// * Use Parser::getFunctionLang() or Parser::getTargetLanguage() instead!
|
||||
// *
|
||||
// * @see getUserLangObj()
|
||||
// *
|
||||
// * @return String Language code
|
||||
// * @since 1.17
|
||||
// */
|
||||
// public function getUserLang() {
|
||||
// return this.getUserLangObj()->getCode();
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @since 1.28
|
||||
// * @return boolean
|
||||
// */
|
||||
// public function getMagicISBNLinks() {
|
||||
// return this.mMagicISBNLinks;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @since 1.28
|
||||
// * @return boolean
|
||||
// */
|
||||
// public function getMagicPMIDLinks() {
|
||||
// return this.mMagicPMIDLinks;
|
||||
// }
|
||||
// /**
|
||||
// * @since 1.28
|
||||
// * @return boolean
|
||||
// */
|
||||
// public function getMagicRFCLinks() {
|
||||
// return this.mMagicRFCLinks;
|
||||
// }
|
||||
// public function setInterwikiMagic( $x ) {
|
||||
// return wfSetVar( this.mInterwikiMagic, $x );
|
||||
// }
|
||||
//
|
||||
// public function setAllowExternalImages( $x ) {
|
||||
// return wfSetVar( this.mAllowExternalImages, $x );
|
||||
// }
|
||||
//
|
||||
// public function setAllowExternalImagesFrom( $x ) {
|
||||
// return wfSetVar( this.mAllowExternalImagesFrom, $x );
|
||||
// }
|
||||
//
|
||||
// public function setEnableImageWhitelist( $x ) {
|
||||
// return wfSetVar( this.mEnableImageWhitelist, $x );
|
||||
// }
|
||||
//
|
||||
// public function setDateFormat( $x ) {
|
||||
// return wfSetVar( this.mDateFormat, $x );
|
||||
// }
|
||||
//
|
||||
// public function setEditSection( $x ) {
|
||||
// return wfSetVar( this.mEditSection, $x );
|
||||
// }
|
||||
//
|
||||
// public function setNumberHeadings( $x ) {
|
||||
// return wfSetVar( this.mNumberHeadings, $x );
|
||||
// }
|
||||
//
|
||||
// public function setAllowSpecialInclusion( $x ) {
|
||||
// return wfSetVar( this.mAllowSpecialInclusion, $x );
|
||||
// }
|
||||
//
|
||||
// public function setTidy( $x ) {
|
||||
// return wfSetVar( this.mTidy, $x );
|
||||
// }
|
||||
//
|
||||
// public function setInterfaceMessage( $x ) {
|
||||
// return wfSetVar( this.mInterfaceMessage, $x );
|
||||
// }
|
||||
//
|
||||
// public function setTargetLanguage( $x ) {
|
||||
// return wfSetVar( this.mTargetLanguage, $x, true );
|
||||
// }
|
||||
//
|
||||
// public function setMaxIncludeSize( $x ) {
|
||||
// return wfSetVar( this.mMaxIncludeSize, $x );
|
||||
// }
|
||||
//
|
||||
// public function setMaxPPNodeCount( $x ) {
|
||||
// return wfSetVar( this.mMaxPPNodeCount, $x );
|
||||
// }
|
||||
//
|
||||
// public function setMaxGeneratedPPNodeCount( $x ) {
|
||||
// return wfSetVar( this.mMaxGeneratedPPNodeCount, $x );
|
||||
// }
|
||||
//
|
||||
// public function setMaxTemplateDepth( $x ) {
|
||||
// return wfSetVar( this.mMaxTemplateDepth, $x );
|
||||
// }
|
||||
//
|
||||
// /* @since 1.20 */
|
||||
// public function setExpensiveParserFunctionLimit( $x ) {
|
||||
// return wfSetVar( this.mExpensiveParserFunctionLimit, $x );
|
||||
// }
|
||||
//
|
||||
// public function setRemoveComments( $x ) {
|
||||
// return wfSetVar( this.mRemoveComments, $x );
|
||||
// }
|
||||
//
|
||||
// /* @since 1.24 */
|
||||
// public function setCurrentRevisionCallback( $x ) {
|
||||
// return wfSetVar( this.mCurrentRevisionCallback, $x );
|
||||
// }
|
||||
//
|
||||
// /** @since 1.28 */
|
||||
// public function setSpeculativeRevIdCallback( $x ) {
|
||||
// return wfSetVar( this.mSpeculativeRevIdCallback, $x );
|
||||
// }
|
||||
//
|
||||
// public function setTemplateCallback( $x ) {
|
||||
// return wfSetVar( this.mTemplateCallback, $x );
|
||||
// }
|
||||
//
|
||||
// public function enableLimitReport( $x = true ) {
|
||||
// return wfSetVar( this.mEnableLimitReport, $x );
|
||||
// }
|
||||
//
|
||||
// public function setTimestamp( $x ) {
|
||||
// return wfSetVar( this.mTimestamp, $x );
|
||||
// }
|
||||
//
|
||||
// public function setCleanSignatures( $x ) {
|
||||
// return wfSetVar( this.mCleanSignatures, $x );
|
||||
// }
|
||||
//
|
||||
// public function setExternalLinkTarget( $x ) {
|
||||
// return wfSetVar( this.mExternalLinkTarget, $x );
|
||||
// }
|
||||
//
|
||||
// public function disableContentConversion( $x = true ) {
|
||||
// return wfSetVar( this.mDisableContentConversion, $x );
|
||||
// }
|
||||
//
|
||||
// public function disableTitleConversion( $x = true ) {
|
||||
// return wfSetVar( this.mDisableTitleConversion, $x );
|
||||
// }
|
||||
//
|
||||
// public function setUserLang( $x ) {
|
||||
// if ( is_string( $x ) ) {
|
||||
// $x = Language::factory( $x );
|
||||
// }
|
||||
//
|
||||
// return wfSetVar( this.mUserLang, $x );
|
||||
// }
|
||||
//
|
||||
// public function setThumbSize( $x ) {
|
||||
// return wfSetVar( this.mThumbSize, $x );
|
||||
// }
|
||||
//
|
||||
// public function setStubThreshold( $x ) {
|
||||
// return wfSetVar( this.mStubThreshold, $x );
|
||||
// }
|
||||
//
|
||||
// public function setPreSaveTransform( $x ) {
|
||||
// return wfSetVar( this.mPreSaveTransform, $x );
|
||||
// }
|
||||
//
|
||||
// public function setIsPreview( $x ) {
|
||||
// return wfSetVar( this.mIsPreview, $x );
|
||||
// }
|
||||
//
|
||||
// public function setIsSectionPreview( $x ) {
|
||||
// return wfSetVar( this.mIsSectionPreview, $x );
|
||||
// }
|
||||
//
|
||||
// public function setIsPrintable( $x ) {
|
||||
// return wfSetVar( this.mIsPrintable, $x );
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Set the redirect target.
|
||||
// *
|
||||
// * Note that setting or changing this does not *make* the page a redirect
|
||||
// * or change its target, it merely records the information for reference
|
||||
// * during the parse.
|
||||
// *
|
||||
// * @since 1.24
|
||||
// * @param Title|null $title
|
||||
// */
|
||||
// function setRedirectTarget( $title ) {
|
||||
// this.redirectTarget = $title;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get the previously-set redirect target.
|
||||
// *
|
||||
// * @since 1.24
|
||||
// * @return Title|null
|
||||
// */
|
||||
// function getRedirectTarget() {
|
||||
// return this.redirectTarget;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Extra key that should be present in the parser cache key.
|
||||
// * @param String $key
|
||||
// */
|
||||
// public function addExtraKey( $key ) {
|
||||
// this.mExtraKey .= '!' . $key;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Constructor
|
||||
// * @param User $user
|
||||
// * @param Language $lang
|
||||
// */
|
||||
// public function __construct( $user = null, $lang = null ) {
|
||||
// if ( $user === null ) {
|
||||
// global $wgUser;
|
||||
// if ( $wgUser === null ) {
|
||||
// $user = new User;
|
||||
// } else {
|
||||
// $user = $wgUser;
|
||||
// }
|
||||
// }
|
||||
// if ( $lang === null ) {
|
||||
// global $wgLang;
|
||||
// if ( !StubObject::isRealObject( $wgLang ) ) {
|
||||
// $wgLang->_unstub();
|
||||
// }
|
||||
// $lang = $wgLang;
|
||||
// }
|
||||
// this.initialiseFromUser( $user, $lang );
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a ParserOptions Object for an anonymous user
|
||||
// * @since 1.27
|
||||
// * @return ParserOptions
|
||||
// */
|
||||
// public static function newFromAnon() {
|
||||
// global $wgContLang;
|
||||
// return new ParserOptions( new User, $wgContLang );
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a ParserOptions Object from a given user.
|
||||
// * Language will be taken from $wgLang.
|
||||
// *
|
||||
// * @param User $user
|
||||
// * @return ParserOptions
|
||||
// */
|
||||
// public static function newFromUser( $user ) {
|
||||
// return new ParserOptions( $user );
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a ParserOptions Object from a given user and language
|
||||
// *
|
||||
// * @param User $user
|
||||
// * @param Language $lang
|
||||
// * @return ParserOptions
|
||||
// */
|
||||
// public static function newFromUserAndLang( User $user, Language $lang ) {
|
||||
// return new ParserOptions( $user, $lang );
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a ParserOptions Object from a IContextSource Object
|
||||
// *
|
||||
// * @param IContextSource $context
|
||||
// * @return ParserOptions
|
||||
// */
|
||||
// public static function newFromContext( IContextSource $context ) {
|
||||
// return new ParserOptions( $context->getUser(), $context->getLanguage() );
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get user options
|
||||
// *
|
||||
// * @param User $user
|
||||
// * @param Language $lang
|
||||
// */
|
||||
// private function initialiseFromUser( $user, $lang ) {
|
||||
// global $wgInterwikiMagic, $wgAllowExternalImages,
|
||||
// $wgAllowExternalImagesFrom, $wgEnableImageWhitelist, $wgAllowSpecialInclusion,
|
||||
// $wgMaxArticleSize, $wgMaxPPNodeCount, $wgMaxTemplateDepth, $wgMaxPPExpandDepth,
|
||||
// $wgCleanSignatures, $wgExternalLinkTarget, $wgExpensiveParserFunctionLimit,
|
||||
// $wgMaxGeneratedPPNodeCount, $wgDisableLangConversion, $wgDisableTitleConversion,
|
||||
// $wgEnableMagicLinks;
|
||||
//
|
||||
// // *UPDATE* ParserOptions::matches() if any of this changes as needed
|
||||
// this.mInterwikiMagic = $wgInterwikiMagic;
|
||||
// this.mAllowExternalImages = $wgAllowExternalImages;
|
||||
// this.mAllowExternalImagesFrom = $wgAllowExternalImagesFrom;
|
||||
// this.mEnableImageWhitelist = $wgEnableImageWhitelist;
|
||||
// this.mAllowSpecialInclusion = $wgAllowSpecialInclusion;
|
||||
// this.mMaxIncludeSize = $wgMaxArticleSize * 1024;
|
||||
// this.mMaxPPNodeCount = $wgMaxPPNodeCount;
|
||||
// this.mMaxGeneratedPPNodeCount = $wgMaxGeneratedPPNodeCount;
|
||||
// this.mMaxPPExpandDepth = $wgMaxPPExpandDepth;
|
||||
// this.mMaxTemplateDepth = $wgMaxTemplateDepth;
|
||||
// this.mExpensiveParserFunctionLimit = $wgExpensiveParserFunctionLimit;
|
||||
// this.mCleanSignatures = $wgCleanSignatures;
|
||||
// this.mExternalLinkTarget = $wgExternalLinkTarget;
|
||||
// this.mDisableContentConversion = $wgDisableLangConversion;
|
||||
// this.mDisableTitleConversion = $wgDisableLangConversion || $wgDisableTitleConversion;
|
||||
// this.mMagicISBNLinks = $wgEnableMagicLinks['ISBN'];
|
||||
// this.mMagicPMIDLinks = $wgEnableMagicLinks['PMID'];
|
||||
// this.mMagicRFCLinks = $wgEnableMagicLinks['RFC'];
|
||||
//
|
||||
// this.mUser = $user;
|
||||
// this.mNumberHeadings = $user->getOption( 'numberheadings' );
|
||||
// this.mThumbSize = $user->getOption( 'thumbsize' );
|
||||
// this.mStubThreshold = $user->getStubThreshold();
|
||||
// this.mUserLang = $lang;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Check if these options match that of another options set
|
||||
// *
|
||||
// * This ignores report limit settings that only affect HTML comments
|
||||
// *
|
||||
// * @param ParserOptions $other
|
||||
// * @return boolean
|
||||
// * @since 1.25
|
||||
// */
|
||||
// public function matches( ParserOptions $other ) {
|
||||
// $fields = array_keys( get_class_vars( __CLASS__ ) );
|
||||
// $fields = array_diff( $fields, [
|
||||
// 'mEnableLimitReport', // only effects HTML comments
|
||||
// 'onAccessCallback', // only used for ParserOutput option tracking
|
||||
// ] );
|
||||
// foreach ( $fields as $field ) {
|
||||
// if ( !is_object( this.$field ) && this.$field !== $other->$field ) {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
// // Check the Object and lazy-loaded options
|
||||
// return (
|
||||
// this.mUserLang->equals( $other->mUserLang ) &&
|
||||
// this.getDateFormat() === $other->getDateFormat()
|
||||
// );
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Registers a callback for tracking which ParserOptions which are used.
|
||||
// * This is a private API with the parser.
|
||||
// * @param callable $callback
|
||||
// */
|
||||
// public function registerWatcher( $callback ) {
|
||||
// this.onAccessCallback = $callback;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Called when an option is accessed.
|
||||
// * Calls the watcher that was set using registerWatcher().
|
||||
// * Typically, the watcher callback is ParserOutput::registerOption().
|
||||
// * The information registered that way will be used by ParserCache::save().
|
||||
// *
|
||||
// * @param String $optionName Name of the option
|
||||
// */
|
||||
// public function optionUsed( $optionName ) {
|
||||
// if ( this.onAccessCallback ) {
|
||||
// call_user_func( this.onAccessCallback, $optionName );
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Returns the full array of options that would have been used by
|
||||
// * in 1.16.
|
||||
// * Used to get the old parser cache entries when available.
|
||||
// * @return array
|
||||
// */
|
||||
// public static function legacyOptions() {
|
||||
// return [
|
||||
// 'stubthreshold',
|
||||
// 'numberheadings',
|
||||
// 'userlang',
|
||||
// 'thumbsize',
|
||||
// 'editsection',
|
||||
// 'printable'
|
||||
// ];
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Generate a hash String with the values set on these ParserOptions
|
||||
// * for the keys given in the array.
|
||||
// * This will be used as part of the hash key for the parser cache,
|
||||
// * so users sharing the options with vary for the same page share
|
||||
// * the same cached data safely.
|
||||
// *
|
||||
// * Extensions which require it should install 'PageRenderingHash' hook,
|
||||
// * which will give them a chance to modify this key based on their own
|
||||
// * settings.
|
||||
// *
|
||||
// * @since 1.17
|
||||
// * @param array $forOptions
|
||||
// * @param Title $title Used to get the content language of the page (since r97636)
|
||||
// * @return String Page rendering hash
|
||||
// */
|
||||
// public function optionsHash( $forOptions, $title = null ) {
|
||||
// global $wgRenderHashAppend;
|
||||
//
|
||||
// // FIXME: Once the cache key is reorganized this argument
|
||||
// // can be dropped. It was used when the math extension was
|
||||
// // part of core.
|
||||
// $confstr = '*';
|
||||
//
|
||||
// // Space assigned for the stubthreshold but unused
|
||||
// // since it disables the parser cache, its value will always
|
||||
// // be 0 when this function is called by parsercache.
|
||||
// if ( in_array( 'stubthreshold', $forOptions ) ) {
|
||||
// $confstr .= '!' . this.mStubThreshold;
|
||||
// } else {
|
||||
// $confstr .= '!*';
|
||||
// }
|
||||
//
|
||||
// if ( in_array( 'dateformat', $forOptions ) ) {
|
||||
// $confstr .= '!' . this.getDateFormat();
|
||||
// }
|
||||
//
|
||||
// if ( in_array( 'numberheadings', $forOptions ) ) {
|
||||
// $confstr .= '!' . ( this.mNumberHeadings ? '1' : '' );
|
||||
// } else {
|
||||
// $confstr .= '!*';
|
||||
// }
|
||||
//
|
||||
// if ( in_array( 'userlang', $forOptions ) ) {
|
||||
// $confstr .= '!' . this.mUserLang->getCode();
|
||||
// } else {
|
||||
// $confstr .= '!*';
|
||||
// }
|
||||
//
|
||||
// if ( in_array( 'thumbsize', $forOptions ) ) {
|
||||
// $confstr .= '!' . this.mThumbSize;
|
||||
// } else {
|
||||
// $confstr .= '!*';
|
||||
// }
|
||||
//
|
||||
// // add in language specific options, if any
|
||||
// // @todo FIXME: This is just a way of retrieving the url/user preferred variant
|
||||
// if ( !is_null( $title ) ) {
|
||||
// $confstr .= $title->getPageLanguage()->getExtraHashOptions();
|
||||
// } else {
|
||||
// global $wgContLang;
|
||||
// $confstr .= $wgContLang->getExtraHashOptions();
|
||||
// }
|
||||
//
|
||||
// $confstr .= $wgRenderHashAppend;
|
||||
//
|
||||
// // @note: as of Feb 2015, core never sets the editsection flag, since it uses
|
||||
// // <mw:editsection> tags to inject editsections on the fly. However, extensions
|
||||
// // may be using it by calling ParserOption::optionUsed resp. ParserOutput::registerOption
|
||||
// // directly. At least Wikibase does at this point in time.
|
||||
// if ( !in_array( 'editsection', $forOptions ) ) {
|
||||
// $confstr .= '!*';
|
||||
// } elseif ( !this.mEditSection ) {
|
||||
// $confstr .= '!edit=0';
|
||||
// }
|
||||
//
|
||||
// if ( this.mIsPrintable && in_array( 'printable', $forOptions ) ) {
|
||||
// $confstr .= '!printable=1';
|
||||
// }
|
||||
//
|
||||
// if ( this.mExtraKey != '' ) {
|
||||
// $confstr .= this.mExtraKey;
|
||||
// }
|
||||
//
|
||||
// // Give a chance for extensions to modify the hash, if they have
|
||||
// // extra options or other effects on the parser cache.
|
||||
// Hooks::run( 'PageRenderingHash', [ &$confstr, this.getUser(), &$forOptions ] );
|
||||
//
|
||||
// // Make it a valid memcached key fragment
|
||||
// $confstr = str_replace( ' ', '_', $confstr );
|
||||
//
|
||||
// return $confstr;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Sets a hook to force that a page exists, and sets a current revision callback to return
|
||||
// * a revision with custom content when the current revision of the page is requested.
|
||||
// *
|
||||
// * @since 1.25
|
||||
// * @param Title $title
|
||||
// * @param Content $content
|
||||
// * @param User $user The user that the fake revision is attributed to
|
||||
// * @return ScopedCallback to unset the hook
|
||||
// */
|
||||
// public function setupFakeRevision( $title, $content, $user ) {
|
||||
// $oldCallback = this.setCurrentRevisionCallback(
|
||||
// function (
|
||||
// $titleToCheck, $parser = false ) use ( $title, $content, $user, &$oldCallback
|
||||
// ) {
|
||||
// if ( $titleToCheck->equals( $title ) ) {
|
||||
// return new Revision( [
|
||||
// 'page' => $title->getArticleID(),
|
||||
// 'user_text' => $user->getName(),
|
||||
// 'user' => $user->getId(),
|
||||
// 'parent_id' => $title->getLatestRevID(),
|
||||
// 'title' => $title,
|
||||
// 'content' => $content
|
||||
// ] );
|
||||
// } else {
|
||||
// return call_user_func( $oldCallback, $titleToCheck, $parser );
|
||||
// }
|
||||
// }
|
||||
// );
|
||||
//
|
||||
// global $wgHooks;
|
||||
// $wgHooks['TitleExists'][] =
|
||||
// function ( $titleToCheck, &$exists ) use ( $title ) {
|
||||
// if ( $titleToCheck->equals( $title ) ) {
|
||||
// $exists = true;
|
||||
// }
|
||||
// };
|
||||
// end( $wgHooks['TitleExists'] );
|
||||
// $key = key( $wgHooks['TitleExists'] );
|
||||
// LinkCache::singleton()->clearBadLink( $title->getPrefixedDBkey() );
|
||||
// return new ScopedCallback( function () use ( $title, $key ) {
|
||||
// global $wgHooks;
|
||||
// unset( $wgHooks['TitleExists'][$key] );
|
||||
// LinkCache::singleton()->clearLink( $title );
|
||||
// } );
|
||||
// }
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xomw_regex_ {
|
||||
public static int Find_fwd_while(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
|
||||
int cur = src_bgn;
|
||||
while (cur < src_end) {
|
||||
byte b = src[cur];
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
|
||||
if (o == null)
|
||||
break;
|
||||
else
|
||||
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
}
|
||||
return cur;
|
||||
}
|
||||
public static int Find_fwd_until(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
|
||||
int cur = src_bgn;
|
||||
while (cur < src_end) {
|
||||
byte b = src[cur];
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
|
||||
if (o == null)
|
||||
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
else
|
||||
break;
|
||||
}
|
||||
return cur;
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xomw_regex_boundary { // THREAD.SAFE: trv is only for consistent interface
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
public Xomw_regex_boundary(Xomw_regex_space space) {
|
||||
// naive implementation of is_boundary; ignore all ws and underscore
|
||||
byte[][] ary = space.Ws();
|
||||
for (byte[] bry : ary)
|
||||
trie.Add_bry_byte(bry, Byte_.Zero);
|
||||
ary = space.Zs();
|
||||
for (byte[] bry : ary)
|
||||
trie.Add_bry_byte(bry, Byte_.Zero);
|
||||
}
|
||||
public boolean Is_boundary_prv(byte[] src, int pos) {
|
||||
if (pos == 0) return true; // BOS is true
|
||||
int bgn = gplx.core.intls.Utf8_.Get_pos0_of_char_bwd(src, pos - 1);
|
||||
byte b = src[bgn];
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, bgn, pos);
|
||||
return o != null;
|
||||
}
|
||||
}
|
||||
@@ -1,101 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
public class Xomw_regex_parser {
|
||||
private Bry_bfr tmp;
|
||||
public byte[][] Rslt() {return rslt;} private byte[][] rslt;
|
||||
public Xomw_regex_parser Add_ary(String... ary) {return Set_or_add(Parse_ary(ary));}
|
||||
private byte[][] Parse_ary(String... ary) {
|
||||
if (tmp == null) tmp = Bry_bfr_.New();
|
||||
int ary_len = ary.length;
|
||||
byte[][] rv = new byte[ary_len][];
|
||||
for (int i = 0; i < ary_len; i++) {
|
||||
rv[i] = Compile_itm(tmp, Bry_.new_u8(ary[i]));
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public Xomw_regex_parser Add_rng(String bgn, String end) {return Set_or_add(Parse_rng(bgn, end));}
|
||||
private byte[][] Parse_rng(String bgn, String end) {
|
||||
if (tmp == null) tmp = Bry_bfr_.New();
|
||||
byte[] bgn_bry = Compile_itm(tmp, Bry_.new_u8(bgn));
|
||||
int bgn_val = gplx.core.intls.Utf16_.Decode_to_int(bgn_bry, 0);
|
||||
byte[] end_bry = Compile_itm(tmp, Bry_.new_u8(end));
|
||||
int end_val = gplx.core.intls.Utf16_.Decode_to_int(end_bry, 0);
|
||||
|
||||
int rv_len = end_val - bgn_val + 1;
|
||||
byte[][] rv = new byte[rv_len][];
|
||||
for (int i = 0; i < rv_len; i++) {
|
||||
rv[i] = gplx.core.intls.Utf16_.Encode_int_to_bry(i + bgn_val);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private Xomw_regex_parser Set_or_add(byte[][] val) {
|
||||
rslt = rslt == null ? val : Bry_.Ary_add(rslt, val);
|
||||
return this;
|
||||
}
|
||||
private static byte[] Compile_itm(Bry_bfr tmp, byte[] src) {
|
||||
// parse each itm
|
||||
int src_end = src.length;
|
||||
int cur = 0;
|
||||
int prv = cur;
|
||||
boolean dirty = false;
|
||||
while (true) {
|
||||
// eos
|
||||
if (cur == src_end) {
|
||||
if (dirty)
|
||||
tmp.Add_mid(src, prv, src_end);
|
||||
break;
|
||||
}
|
||||
|
||||
// look at byte
|
||||
byte b = src[cur];
|
||||
switch (b) { // escape
|
||||
case Byte_ascii.Backslash:
|
||||
int nxt = cur + 1;
|
||||
if (nxt >= src_end) throw Err_.new_wo_type("regex escape failed: no more chars left", "src", src, "pos", nxt);
|
||||
byte nxt_byte = src[nxt];
|
||||
switch (nxt_byte) {
|
||||
case Byte_ascii.Ltr_s: // \s -> " "
|
||||
src = Byte_ascii.Space_bry;
|
||||
cur = src_end;
|
||||
break;
|
||||
case Byte_ascii.Ltr_x: // \ u -> utf8 sequence in hex-dec; EX: "\xc2\xad" -> new byte[] {194, 160}
|
||||
// read next two bytes
|
||||
dirty = true;
|
||||
nxt++;
|
||||
if (nxt + 2 > src_end) throw Err_.new_wo_type("utf8 escape failed: no more chars left", "src", src, "pos", nxt);
|
||||
tmp.Add_byte((byte)gplx.core.encoders.Hex_utl_.Parse_or(src, nxt, nxt + 2, -1));
|
||||
cur = nxt + 2;
|
||||
prv = cur;
|
||||
break;
|
||||
default:
|
||||
throw Err_.new_wo_type("regex escape failed: unknown char", "src", src, "pos", nxt);
|
||||
}
|
||||
break;
|
||||
default: // handles ascii only
|
||||
if (b > 127)
|
||||
throw Err_.new_wo_type("regex compiled failed: unknown char", "src", src, "pos", cur);
|
||||
cur++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// set item
|
||||
return dirty ? tmp.To_bry_and_clear() : src;
|
||||
}
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_regex_parser__tst {
|
||||
private final Xomw_regex_parser__fxt fxt = new Xomw_regex_parser__fxt();
|
||||
@Test public void Ary__space() {
|
||||
fxt.Test__parse_ary(String_.Ary("\\s"), String_.Ary(" "));
|
||||
}
|
||||
@Test public void Ary__utf8() {
|
||||
fxt.Test__parse_ary(String_.Ary("\\xc2\\xa7", "\\xe0\\xb9\\x90"), String_.Ary("§", "๐"));
|
||||
}
|
||||
@Test public void Rng__ascii() {
|
||||
fxt.Test__parse_rng("a", "c", String_.Ary("a", "b", "c"));
|
||||
}
|
||||
}
|
||||
class Xomw_regex_parser__fxt {
|
||||
private final Xomw_regex_parser parser = new Xomw_regex_parser();
|
||||
public void Test__parse_ary(String[] ary, String[] expd) {
|
||||
parser.Add_ary(ary);
|
||||
Gftest.Eq__ary(expd, String_.Ary(parser.Rslt()));
|
||||
}
|
||||
public void Test__parse_rng(String bgn, String end, String[] expd) {
|
||||
parser.Add_rng("a", "c");
|
||||
Gftest.Eq__ary(expd, String_.Ary(parser.Rslt()));
|
||||
}
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xomw_regex_space {
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
public Xomw_regex_space() {
|
||||
byte[] space = Bry_.New_by_ints(32);
|
||||
ws = new byte[][]
|
||||
{ space
|
||||
, Bry_.New_by_ints(9)
|
||||
, Bry_.New_by_ints(10)
|
||||
, Bry_.New_by_ints(13)
|
||||
};
|
||||
// Zs; REF:http://www.fileformat.info/info/unicode/category/Zs/list.htm
|
||||
zs = new byte[][]
|
||||
{ space
|
||||
, Bry_.New_by_ints(194, 160)
|
||||
, Bry_.New_by_ints(225, 154, 128)
|
||||
, Bry_.New_by_ints(226, 128, 129)
|
||||
, Bry_.New_by_ints(226, 128, 130)
|
||||
, Bry_.New_by_ints(226, 128, 131)
|
||||
, Bry_.New_by_ints(226, 128, 132)
|
||||
, Bry_.New_by_ints(226, 128, 133)
|
||||
, Bry_.New_by_ints(226, 128, 134)
|
||||
, Bry_.New_by_ints(226, 128, 135)
|
||||
, Bry_.New_by_ints(226, 128, 136)
|
||||
, Bry_.New_by_ints(226, 128, 137)
|
||||
, Bry_.New_by_ints(226, 128, 138)
|
||||
, Bry_.New_by_ints(226, 128, 175)
|
||||
, Bry_.New_by_ints(226, 129, 159)
|
||||
, Bry_.New_by_ints(227, 128, 128)
|
||||
};
|
||||
|
||||
byte[][] ary = ws;
|
||||
for (byte[] bry : ary) {
|
||||
trie.Add_bry_byte(bry, Byte_.Zero);
|
||||
}
|
||||
ary = zs;
|
||||
for (byte[] bry : ary) {
|
||||
trie.Add_bry_byte(bry, Byte_.Zero);
|
||||
}
|
||||
}
|
||||
public byte[][] Ws() {return ws;} private byte[][] ws;
|
||||
public byte[][] Zs() {return zs;} private byte[][] zs;
|
||||
public int Find_fwd_while(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
|
||||
return Xomw_regex_.Find_fwd_while(trie, trv, src, src_bgn, src_end);
|
||||
}
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xomw_regex_url {
|
||||
private final Btrie_slim_mgr trie;
|
||||
public Xomw_regex_url(Xomw_regex_space regex_space) {
|
||||
// [^][<>"\\x00-\\x20\\x7F\|]
|
||||
// REGEX:[^][<>"\\x00-\\x20\\x7F\p{Zs}]; NOTE: val is just a marker
|
||||
this.trie = Btrie_slim_mgr.cs();
|
||||
trie.Add_str_byte__many(Byte_.Zero, "[", "]", "<", ">", "\"");
|
||||
for (byte i = 0; i < 33; i++) {
|
||||
trie.Add_bry_byte(new byte[] {i}, Byte_.Zero);
|
||||
}
|
||||
trie.Add_bry_byte(Bry_.New_by_ints(127), Byte_.Zero); // x7F
|
||||
|
||||
byte[][] zs_ary = regex_space.Zs();
|
||||
for (byte[] zs : zs_ary) {
|
||||
trie.Add_bry_byte(zs, Byte_.Zero);
|
||||
}
|
||||
}
|
||||
public int Find_fwd_while(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
|
||||
return Xomw_regex_.Find_fwd_until(trie, trv, src, src_bgn, src_end);
|
||||
}
|
||||
}
|
||||
@@ -1,139 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xomw_strip_state { // REF.MW:/parser/StripState.php
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final Bry_bfr tmp_1 = Bry_bfr_.New();
|
||||
private final Bry_bfr tmp_2 = Bry_bfr_.New();
|
||||
private boolean tmp_2_used = false;
|
||||
private int general_len, nowiki_len;
|
||||
public void Clear() {
|
||||
trie.Clear();
|
||||
general_len = nowiki_len = 0;
|
||||
tmp_2_used = false;
|
||||
}
|
||||
public void Add_general(byte[] marker, byte[] val) {Add_item(Tid__general, marker, val);}
|
||||
public void Add_nowiki (byte[] marker, byte[] val) {Add_item(Tid__nowiki, marker, val);}
|
||||
public void Add_item(byte tid, byte[] marker, byte[] val) {
|
||||
trie.Add_obj(marker, new Xomw_strip_item(tid, marker, val));
|
||||
if (tid == Tid__general)
|
||||
general_len++;
|
||||
else
|
||||
nowiki_len++;
|
||||
}
|
||||
public byte[] Unstrip_general(byte[] text) {return Unstrip(Tid__general, text);}
|
||||
public byte[] Unstrip_nowiki (byte[] text) {return Unstrip(Tid__nowiki , text);}
|
||||
public byte[] Unstrip_both (byte[] text) {return Unstrip(Tid__both , text);}
|
||||
public byte[] Unstrip(byte tid, byte[] text) {
|
||||
boolean dirty = Unstrip(tid, tmp_1, text, 0, text.length);
|
||||
return dirty ? tmp_1.To_bry_and_clear() : text;
|
||||
}
|
||||
public void Unstrip_general(Xomw_parser_bfr pbfr) {Unstrip(Tid__general, pbfr);}
|
||||
public void Unstrip_nowiki (Xomw_parser_bfr pbfr) {Unstrip(Tid__nowiki , pbfr);}
|
||||
public void Unstrip_both (Xomw_parser_bfr pbfr) {Unstrip(Tid__both , pbfr);}
|
||||
private boolean Unstrip(byte tid, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
boolean dirty = Unstrip(tid, pbfr.Trg(), src, 0, src_bfr.Len());
|
||||
if (dirty)
|
||||
pbfr.Switch();
|
||||
return dirty;
|
||||
}
|
||||
private boolean Unstrip(byte tid, Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
|
||||
// exit early if no items for type
|
||||
if ((tid & Tid__general) == Tid__general) {
|
||||
if (general_len == 0)
|
||||
return false;
|
||||
}
|
||||
else if ((tid & Tid__nowiki) == Tid__nowiki) {
|
||||
if (nowiki_len == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
boolean dirty = false;
|
||||
// loop over each src char
|
||||
while (true) {
|
||||
// EOS: exit
|
||||
if (cur == src_end) {
|
||||
if (dirty) // add remainder if dirty
|
||||
trg.Add_mid(src, prv, src_end);
|
||||
break;
|
||||
}
|
||||
|
||||
// check if current pos matches strip state
|
||||
Object o = trie.Match_at(trv, src, cur, src_end);
|
||||
if (o != null) { // match
|
||||
Xomw_strip_item item = (Xomw_strip_item)o;
|
||||
byte item_tid = item.Tid();
|
||||
if ((tid & item_tid) == item_tid) { // check if types match
|
||||
// get bfr for recursion
|
||||
Bry_bfr nested_bfr = null;
|
||||
boolean tmp_2_release = false;
|
||||
if (tmp_2_used) {
|
||||
nested_bfr = Bry_bfr_.New();
|
||||
}
|
||||
else {
|
||||
nested_bfr = tmp_2;
|
||||
tmp_2_used = true;
|
||||
tmp_2_release = true;
|
||||
}
|
||||
|
||||
// recurse
|
||||
byte[] item_val = item.Val();
|
||||
if (Unstrip(tid, nested_bfr, item_val, 0, item_val.length))
|
||||
item_val = nested_bfr.To_bry_and_clear();
|
||||
if (tmp_2_release)
|
||||
tmp_2_used = false;
|
||||
|
||||
// add to trg
|
||||
trg.Add_mid(src, prv, cur);
|
||||
trg.Add(item_val);
|
||||
|
||||
// update vars
|
||||
dirty = true;
|
||||
cur += item.Key().length;
|
||||
prv = cur;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
cur++;
|
||||
}
|
||||
return dirty;
|
||||
}
|
||||
public static final String Str__marker_bgn = "\u007f'\"`UNIQ-";
|
||||
public static final byte[]
|
||||
Bry__marker__bgn = Bry_.new_a7(Str__marker_bgn)
|
||||
, Bry__marker__end = Bry_.new_a7("-QINU`\"'\u007f")
|
||||
;
|
||||
public static final byte Tid__general = 1, Tid__nowiki = 2, Tid__both = 3;
|
||||
}
|
||||
class Xomw_strip_item {
|
||||
public Xomw_strip_item(byte tid, byte[] key, byte[] val) {
|
||||
this.tid = tid;
|
||||
this.key = key;
|
||||
this.val = val;
|
||||
}
|
||||
public byte Tid() {return tid;} private final byte tid;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public byte[] Val() {return val;} private final byte[] val;
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_strip_state__tst {
|
||||
private final Xomw_strip_state__fxt fxt = new Xomw_strip_state__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
|
||||
fxt.Test__nostrip(Xomw_strip_state.Tid__nowiki , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b");
|
||||
fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
|
||||
fxt.Test__unstrip(Xomw_strip_state.Tid__both , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
|
||||
}
|
||||
@Test public void Recurse() {
|
||||
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
|
||||
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-2-QINU`\"'\u007f", "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f");
|
||||
fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-2-QINU`\"'\u007f b", "a val-1 b");
|
||||
}
|
||||
}
|
||||
class Xomw_strip_state__fxt {
|
||||
private final Xomw_strip_state strip_state = new Xomw_strip_state();
|
||||
public void Init__add(byte tid, String marker, String val) {
|
||||
strip_state.Add_item(tid, Bry_.new_u8(marker), Bry_.new_u8(val));
|
||||
}
|
||||
public void Test__nostrip(byte tid, String src) {Test__unstrip(tid, src, src);}
|
||||
public void Test__unstrip(byte tid, String src, String expd) {
|
||||
byte[] actl = strip_state.Unstrip(tid, Bry_.new_u8(src));
|
||||
Gftest.Eq__str(expd, String_.new_u8(actl));
|
||||
}
|
||||
}
|
||||
@@ -1,56 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_doubleunder_data {
|
||||
// XO.MW: MW stores these as mDoubleUnderscores in Parser
|
||||
public boolean toc;
|
||||
public boolean no_toc;
|
||||
public boolean force_toc;
|
||||
|
||||
public boolean no_gallery;
|
||||
public boolean force_gallery;
|
||||
|
||||
public boolean no_title_convert;
|
||||
public boolean no_content_convert;
|
||||
|
||||
public boolean no_edit_section;
|
||||
public boolean new_section_link;
|
||||
|
||||
public boolean static_redirect;
|
||||
|
||||
public boolean hidden_cat;
|
||||
|
||||
public boolean index;
|
||||
public boolean no_index;
|
||||
|
||||
// XO.MW: MW stores these as member variables in Parser
|
||||
public boolean show_toc;
|
||||
public boolean force_toc_position;
|
||||
|
||||
public void Reset() {
|
||||
toc = no_toc = force_toc =
|
||||
no_gallery = force_gallery =
|
||||
no_title_convert = no_content_convert =
|
||||
no_edit_section = new_section_link =
|
||||
static_redirect =
|
||||
hidden_cat = index = no_index =
|
||||
false;
|
||||
|
||||
show_toc = force_toc_position = false;
|
||||
}
|
||||
}
|
||||
@@ -1,148 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*;
|
||||
public class Xomw_doubleunder_wkr {
|
||||
private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_u8();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private Xomw_doubleunder_data data;
|
||||
public void Init_by_wiki(Xomw_doubleunder_data data, Xol_lang_itm lang) {
|
||||
this.data = data;
|
||||
Reg(trie, lang.Kwd_mgr()
|
||||
, Xol_kwd_grp_.Id_notoc
|
||||
, Xol_kwd_grp_.Id_nogallery
|
||||
, Xol_kwd_grp_.Id_forcetoc
|
||||
, Xol_kwd_grp_.Id_toc
|
||||
, Xol_kwd_grp_.Id_noeditsection
|
||||
, Xol_kwd_grp_.Id_newsectionlink
|
||||
, Xol_kwd_grp_.Id_hiddencat
|
||||
, Xol_kwd_grp_.Id_index
|
||||
, Xol_kwd_grp_.Id_noindex
|
||||
, Xol_kwd_grp_.Id_staticredirect
|
||||
, Xol_kwd_grp_.Id_notitleconvert
|
||||
, Xol_kwd_grp_.Id_nocontentconvert
|
||||
);
|
||||
}
|
||||
public void Do_double_underscore(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
|
||||
data.Reset();
|
||||
|
||||
// XO.MW: MW does TOC before others; XO does it at the same time
|
||||
// Now match and remove the rest of them
|
||||
// XO.MW.BGN: $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
boolean dirty = false;
|
||||
while (true) {
|
||||
// reached end; stop
|
||||
if (cur == src_end) {
|
||||
if (dirty) {
|
||||
bfr.Add_mid(src, prv, src_end);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// no match; keep searching
|
||||
byte b = src[cur];
|
||||
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
|
||||
if (o == null) {
|
||||
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
continue;
|
||||
}
|
||||
|
||||
// if cs, ensure exact-match (trie is case-insensitive)
|
||||
int kwd_end = trv.Pos();
|
||||
Xomw_doubleunder_itm itm = (Xomw_doubleunder_itm)o;
|
||||
if (itm.case_match && !Bry_.Match(src, cur, kwd_end, itm.val)) {
|
||||
cur = kwd_end;
|
||||
continue;
|
||||
}
|
||||
|
||||
// match; replace __KWD__ with "" (or "<!--MWTOC-->" if __TOC__)
|
||||
dirty = true;
|
||||
bfr.Add_mid(src, prv, cur);
|
||||
switch (itm.tid) {
|
||||
case Xol_kwd_grp_.Id_toc:
|
||||
// The position of __TOC__ needs to be recorded
|
||||
boolean already_seen = !data.show_toc;
|
||||
data.toc = true;
|
||||
data.show_toc = true;
|
||||
data.force_toc_position = true;
|
||||
|
||||
if (already_seen) { // Set a placeholder. At the end we'll fill it in with the TOC.
|
||||
bfr.Add_str_a7("<!--MWTOC-->");
|
||||
}
|
||||
else { // Only keep the first one. XO.MW:ignore by not adding anything to bfr
|
||||
}
|
||||
break;
|
||||
// XO.MW: MW adds boolean to hash_table; XO uses boolean props; note that "remove" is done by not adding to bfr
|
||||
case Xol_kwd_grp_.Id_notoc: data.no_toc = true; break;
|
||||
case Xol_kwd_grp_.Id_nogallery: data.no_gallery = true; break;
|
||||
case Xol_kwd_grp_.Id_forcetoc: data.force_toc = true; break;
|
||||
case Xol_kwd_grp_.Id_noeditsection: data.no_edit_section = true; break;
|
||||
case Xol_kwd_grp_.Id_newsectionlink: data.new_section_link = true; break;
|
||||
case Xol_kwd_grp_.Id_hiddencat: data.hidden_cat = true; break;
|
||||
case Xol_kwd_grp_.Id_index: data.index = true; break;
|
||||
case Xol_kwd_grp_.Id_noindex: data.no_index = true; break;
|
||||
case Xol_kwd_grp_.Id_staticredirect: data.static_redirect = true; break;
|
||||
case Xol_kwd_grp_.Id_notitleconvert: data.no_title_convert = true; break;
|
||||
case Xol_kwd_grp_.Id_nocontentconvert: data.no_content_convert = true; break;
|
||||
default: throw Err_.new_unhandled_default(itm.tid);
|
||||
}
|
||||
cur = kwd_end;
|
||||
prv = cur;
|
||||
}
|
||||
// XO.MW.END: $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
|
||||
|
||||
if (data.no_toc && !data.force_toc_position) {
|
||||
data.show_toc = false;
|
||||
}
|
||||
|
||||
// XO.MW.EDIT: hidden_cat, index, noindex are used to add to tracking category
|
||||
|
||||
if (dirty)
|
||||
pbfr.Switch();
|
||||
}
|
||||
private static void Reg(Btrie_slim_mgr trie, Xol_kwd_mgr mgr, int... ids) {
|
||||
for (int id : ids) {
|
||||
Xol_kwd_grp grp = mgr.Get_or_new(id);
|
||||
Xol_kwd_itm[] itms = grp.Itms();
|
||||
for (Xol_kwd_itm itm : itms) {
|
||||
byte[] val = itm.Val();
|
||||
trie.Add_obj(val, new Xomw_doubleunder_itm(id, grp.Case_match(), val));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
class Xomw_doubleunder_itm {
|
||||
public int tid;
|
||||
public boolean case_match;
|
||||
public byte[] val;
|
||||
public Xomw_doubleunder_itm(int tid, boolean case_match, byte[] val) {
|
||||
this.tid = tid;
|
||||
this.case_match = case_match;
|
||||
this.val = val;
|
||||
}
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
public class Xomw_doubleunder_wkr__tst {
|
||||
private final Xomw_doubleunder_wkr__fxt fxt = new Xomw_doubleunder_wkr__fxt();
|
||||
@Test public void No_match() {fxt.Test__parse("a b c" , "a b c");}
|
||||
@Test public void Force_toc() {fxt.Test__parse("a __FORCETOC__ b" , "a b").Test__prop_y(fxt.data.force_toc);}
|
||||
@Test public void Toc() {fxt.Test__parse("a __TOC__ b __TOC__ c" , "a <!--MWTOC--> b c").Test__prop_y(fxt.data.toc, fxt.data.show_toc, fxt.data.force_toc_position);}
|
||||
@Test public void Notoc_only() {fxt.Test__parse("a __NOTOC__ b" , "a b").Test__prop_y(fxt.data.no_toc).Test__prop_n(fxt.data.show_toc);} // show_toc is false
|
||||
@Test public void Notoc_w_toc() {fxt.Test__parse("a __TOC__ b __NOTOC__ c" , "a <!--MWTOC--> b c").Test__prop_y(fxt.data.toc, fxt.data.show_toc, fxt.data.force_toc_position);} // show_toc is true
|
||||
@Test public void Case_match() {fxt.Test__parse("a __index__ b" , "a __index__ b");}
|
||||
}
|
||||
class Xomw_doubleunder_wkr__fxt {
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private final Xomw_doubleunder_wkr wkr = new Xomw_doubleunder_wkr();
|
||||
public Xomw_doubleunder_data data = new Xomw_doubleunder_data();
|
||||
public Xomw_doubleunder_wkr__fxt() {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
wkr.Init_by_wiki(data, wiki.Lang());
|
||||
}
|
||||
public Xomw_doubleunder_wkr__fxt Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Do_double_underscore(pctx, pbfr.Init(src_bry));
|
||||
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
return this;
|
||||
}
|
||||
public Xomw_doubleunder_wkr__fxt Test__prop_y(boolean... ary) {return Test__prop(Bool_.Y, ary);}
|
||||
public Xomw_doubleunder_wkr__fxt Test__prop_n(boolean... ary) {return Test__prop(Bool_.N, ary);}
|
||||
private Xomw_doubleunder_wkr__fxt Test__prop(boolean expd, boolean... ary) {
|
||||
for (boolean v : ary)
|
||||
Gftest.Eq__bool(expd, v);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_heading_cbk__html implements Xomw_heading_cbk {
|
||||
public Bry_bfr Bfr() {return bfr;} private Bry_bfr bfr;
|
||||
public Xomw_heading_cbk__html Bfr_(Bry_bfr bfr) {
|
||||
this.bfr = bfr;
|
||||
return this;
|
||||
}
|
||||
public void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {
|
||||
// add from txt_bgn to hdr_bgn; EX: "abc\n==A==\n"; "\n==" seen -> add "abc"
|
||||
byte[] src = wkr.Src();
|
||||
int hdr_bgn = wkr.Hdr_bgn(), txt_bgn = wkr.Txt_bgn();
|
||||
if (hdr_bgn > txt_bgn)
|
||||
bfr.Add_mid(src, txt_bgn, hdr_bgn);
|
||||
|
||||
// add "\n" unless BOS
|
||||
if (hdr_bgn != Xomw_parser_ctx.Pos__bos) bfr.Add_byte_nl();
|
||||
|
||||
// add <h2>...</h2>
|
||||
int hdr_num = wkr.Hdr_num();
|
||||
bfr.Add(Tag__lhs).Add_int_digits(1, hdr_num).Add(Byte_ascii.Angle_end_bry); // <h2>
|
||||
bfr.Add_mid(wkr.Src(), wkr.Hdr_lhs_end(), wkr.Hdr_rhs_bgn());
|
||||
bfr.Add(Tag__rhs).Add_int_digits(1, hdr_num).Add(Byte_ascii.Angle_end_bry); // </h2>
|
||||
}
|
||||
public void On_src_done(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {
|
||||
// add from txt_bgn to EOS;
|
||||
byte[] src = wkr.Src();
|
||||
int txt_bgn = wkr.Txt_bgn(), src_end = wkr.Src_end();
|
||||
if (txt_bgn != src_end) // PERF: don't call Add_mid() if hdr is at end of EOS
|
||||
bfr.Add_mid(src, txt_bgn, src_end);
|
||||
}
|
||||
private static final byte[]
|
||||
Tag__lhs = Bry_.new_a7("<h")
|
||||
, Tag__rhs = Bry_.new_a7("</h")
|
||||
;
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_heading_wkr__tst {
|
||||
private final Xomw_heading_wkr__fxt fxt = new Xomw_heading_wkr__fxt();
|
||||
@Test public void Basic() {
|
||||
fxt.Test__parse("==A==" , "<h2>A</h2>");
|
||||
fxt.Test__parse("abc\n==A==\ndef" , "abc\n<h2>A</h2>\ndef");
|
||||
|
||||
fxt.Test__parse("abc" , "abc");
|
||||
fxt.Test__parse("abc\ndef" , "abc\ndef");
|
||||
fxt.Test__parse("abc\n==" , "abc\n<h1></h1>");
|
||||
}
|
||||
}
|
||||
class Xomw_heading_wkr__fxt {
|
||||
private final Xomw_heading_wkr wkr = new Xomw_heading_wkr();
|
||||
private final Xomw_heading_cbk__html cbk = new Xomw_heading_cbk__html().Bfr_(Bry_bfr_.New());
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Parse(pctx, src_bry, -1, src_bry.length, cbk);
|
||||
Tfds.Eq_str_lines(expd, cbk.Bfr().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
||||
@@ -1,81 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.hrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
public class Xomw_hr_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private Bry_bfr bfr;
|
||||
public void Replace_hrs(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) { // REF.MW: text = preg_replace('/(^|\n)-----*/', '\\1<hr />', text);
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
this.bfr = pbfr.Trg();
|
||||
|
||||
boolean dirty = false;
|
||||
|
||||
// do separate check for "-----" at start of String;
|
||||
int cur = 0;
|
||||
if (Bry_.Eq(src, 0, Len__wtxt__hr__bos, Bry__wtxt__hr__bos)) {
|
||||
cur = Replace_hr(Bool_.N, src, src_bgn, src_end, 0, Len__wtxt__hr__bos);
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
// loop
|
||||
while (true) {
|
||||
// find next "\n-----"
|
||||
int find_bgn = Bry_find_.Find_fwd(src, Bry__wtxt__hr__mid, cur, src_end);
|
||||
|
||||
// nothing found; exit
|
||||
if (find_bgn == Bry_find_.Not_found) {
|
||||
if (dirty) {
|
||||
bfr.Add_mid(src, cur, src_end);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// something found
|
||||
cur = Replace_hr(Bool_.Y, src, cur, src_end, find_bgn, Len__wtxt__hr__mid);
|
||||
dirty = true;
|
||||
}
|
||||
if (dirty)
|
||||
pbfr.Switch();
|
||||
}
|
||||
private int Replace_hr(boolean mid, byte[] src, int cur, int src_end, int find_bgn, int tkn_len) {
|
||||
// something found; add to bfr
|
||||
if (mid) {
|
||||
bfr.Add_mid(src, cur, find_bgn); // add everything before "\n-----"
|
||||
bfr.Add_byte_nl();
|
||||
}
|
||||
bfr.Add(Bry__html__hr);
|
||||
|
||||
// set dirty / cur and continue
|
||||
cur = find_bgn + tkn_len;
|
||||
cur = Bry_find_.Find_fwd_while(src, cur, src_end, Byte_ascii.Dash); // gobble up trailing "-"; the "*" in "-----*" from the regex above
|
||||
return cur;
|
||||
}
|
||||
private static final byte[]
|
||||
Bry__wtxt__hr__mid = Bry_.new_a7("\n-----")
|
||||
, Bry__wtxt__hr__bos = Bry_.new_a7("-----")
|
||||
, Bry__html__hr = Bry_.new_a7("<hr />")
|
||||
;
|
||||
private static final int
|
||||
Len__wtxt__hr__mid = Bry__wtxt__hr__mid.length
|
||||
, Len__wtxt__hr__bos = Bry__wtxt__hr__bos.length
|
||||
;
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.hrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_hr_wkr__tst {
|
||||
private final Xomw_hr_wkr__fxt fxt = new Xomw_hr_wkr__fxt();
|
||||
@Test public void Basic() {fxt.Test__parse("a\n-----b" , "a\n<hr />b");}
|
||||
@Test public void Extend() {fxt.Test__parse("a\n------b" , "a\n<hr />b");}
|
||||
@Test public void Not_found() {fxt.Test__parse("a\n----b" , "a\n----b");}
|
||||
@Test public void Bos() {fxt.Test__parse("-----a" , "<hr />a");}
|
||||
@Test public void Bos_and_mid() {fxt.Test__parse("-----a\n-----b" , "<hr />a\n<hr />b");}
|
||||
}
|
||||
class Xomw_hr_wkr__fxt {
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private final Xomw_hr_wkr wkr = new Xomw_hr_wkr();
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Replace_hrs(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
||||
@@ -1,233 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
import gplx.xowa.mws.htmls.*;
|
||||
/* TODO.XO
|
||||
* P3: $langObj->formatNum( ++$this->mAutonumber );
|
||||
* P2: $this->getConverterLanguage()->markNoConversion( $text );
|
||||
*/
|
||||
public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private final Bry_bfr tmp;
|
||||
private Btrie_slim_mgr protocol_trie; private final Btrie_rv trv = new Btrie_rv();
|
||||
private int autonumber;
|
||||
private final Xomw_parser parser;
|
||||
private final Xomw_linker linker;
|
||||
private final Xomw_sanitizer sanitizer;
|
||||
private final Xomw_atr_mgr attribs = new Xomw_atr_mgr();
|
||||
private Xomw_regex_url regex_url;
|
||||
private Xomw_regex_space regex_space;
|
||||
public Xomw_lnke_wkr(Xomw_parser parser) {
|
||||
this.parser = parser;
|
||||
this.tmp = parser.Tmp();
|
||||
this.linker = parser.Linker();
|
||||
this.sanitizer = parser.Sanitizer();
|
||||
|
||||
if (angle_entities_trie == null) {
|
||||
synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||
Link_type__free = Bry_.new_a7("free");
|
||||
Link_type__text = Bry_.new_a7("text");
|
||||
Link_type__autonumber = Bry_.new_a7("autonumber");
|
||||
|
||||
angle_entities_trie = Btrie_slim_mgr.cs().Add_many_str("<", ">");
|
||||
|
||||
// REGEX:([^\]\\x00-\\x08\\x0a-\\x1F]*?); NOTE: val is key.length
|
||||
invalid_text_chars_trie = Btrie_slim_mgr.cs();
|
||||
New__trie_itm__by_len(invalid_text_chars_trie, Byte_ascii.Brack_end);
|
||||
for (int i = 0; i <= 8; i++) { // x00-x08
|
||||
New__trie_itm__by_len(invalid_text_chars_trie, i);
|
||||
}
|
||||
for (int i = 10; i <= 31; i++) { // x0a-x1F
|
||||
New__trie_itm__by_len(invalid_text_chars_trie, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Init_by_wiki(Btrie_slim_mgr protocol_trie, Xomw_regex_url regex_url, Xomw_regex_space regex_space) {
|
||||
this.protocol_trie = protocol_trie;
|
||||
this.regex_url = regex_url;
|
||||
this.regex_space = regex_space;
|
||||
}
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-01
|
||||
public void Replace_external_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
int cur = src_bgn;
|
||||
this.autonumber = 1;
|
||||
|
||||
// find regex
|
||||
int prv = 0;
|
||||
while (true) {
|
||||
// PORTED.BGN: $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
|
||||
|
||||
// $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
|
||||
// self::EXT_LINK_ADDR .
|
||||
// self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
|
||||
//
|
||||
// REGEX: "[" + "protocol" + "url-char"* + "space"* + "text"* + "]";
|
||||
// protocol -> ((?i)' . $this->mUrlProtocols . ') -> "http://", "HTTps://"
|
||||
// url-char* -> (EXT_LINK_ADDR . EXT_LINK_URL_CLASS*) -> "255.255.255.255", "a.b.c"; NOTE: "http:///" is valid
|
||||
// space* -> \p{Zs}*
|
||||
// text -> ([^\]\\x00-\\x08\\x0a-\\x1F]*?) -> "abcd"
|
||||
// NOTE: /S=extra analysis of pattern /u = unicode support; REF.MW:http://php.net/manual/en/reference.pcre.pattern.modifiers.php
|
||||
|
||||
// Simplified expression to match an IPv4 or IPv6 address, or
|
||||
// at least one character of a host name (embeds EXT_LINK_URL_CLASS)
|
||||
// static final EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
|
||||
//
|
||||
// REGEX: "IPv4" | "IPv6" | "url-char"
|
||||
// IPv4 -> [0-9.]+ -> "255."
|
||||
// IPv6 -> \\[(?i:[0-9a-f:.]+)\\] -> "2001:"
|
||||
// url-char -> [^][<>"\\x00-\\x20\\x7F\p{Zs}] -> "abcde"
|
||||
|
||||
// Constants needed for external link processing
|
||||
// Everything except bracket, space, or control characters
|
||||
// \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
|
||||
// as well as U+3000 is IDEOGRAPHIC SPACE for T21052
|
||||
// static final EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
|
||||
//
|
||||
// REGEX: NOT [ "symbols" | "control" | "whitespace" ]
|
||||
// symbols -> ^][<>"
|
||||
// control -> \\x00-\\x20\\x7F
|
||||
// whitespace -> \p{Zs}
|
||||
|
||||
// search for "["
|
||||
int lnke_bgn = Bry_find_.Find_fwd(src, Byte_ascii.Brack_bgn, cur, src_end);
|
||||
if (lnke_bgn == Bry_find_.Not_found) {
|
||||
bfr.Add_mid(src, cur, src_end);
|
||||
break; // no more "["; stop
|
||||
}
|
||||
|
||||
// check for protocol; EX: "https://"
|
||||
cur = lnke_bgn + 1;
|
||||
int url_bgn = cur;
|
||||
Object protocol_bry = protocol_trie.Match_at(trv, src, cur, src_end);
|
||||
if (protocol_bry == null) {
|
||||
bfr.Add_mid(src, prv, cur);
|
||||
prv = cur;
|
||||
continue;// unknown protocol; ignore "["
|
||||
}
|
||||
cur += ((byte[])protocol_bry).length;
|
||||
|
||||
// check for one-or-more url chars; [^][<>"\\x00-\\x20\\x7F\p{Zs}]
|
||||
int domain_bgn = cur;
|
||||
cur = regex_url.Find_fwd_while(trv, src, domain_bgn, src_end);
|
||||
if (cur - domain_bgn == 0) {
|
||||
bfr.Add_mid(src, prv, cur);
|
||||
prv = cur;
|
||||
continue; // no chars found; invalid; EX: "[https://"abcde"]"
|
||||
}
|
||||
int url_end = cur;
|
||||
|
||||
// skip ws
|
||||
cur = regex_space.Find_fwd_while(trv, src, cur, src_end);
|
||||
|
||||
// get text (if any)
|
||||
int text_bgn = -1, text_end = -1;
|
||||
while (true) {
|
||||
byte b = src[cur];
|
||||
Object invalid_text_char = invalid_text_chars_trie.Match_at_w_b0(trv, b, src, cur, src_end);
|
||||
if (invalid_text_char != null) break;
|
||||
if (text_bgn == -1) text_bgn = cur;
|
||||
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
text_end = cur;
|
||||
}
|
||||
|
||||
// check for "]"
|
||||
if (src[cur] != Byte_ascii.Brack_end) {
|
||||
bfr.Add_mid(src, prv, cur);
|
||||
prv = cur;
|
||||
continue;
|
||||
}
|
||||
cur++;
|
||||
// PORTED.END: $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
|
||||
|
||||
// The characters '<' and '>' (which were escaped by
|
||||
// removeHTMLtags()) should not be included in
|
||||
// URLs, per RFC 2396.
|
||||
if (Php_preg_.Match(angle_entities_trie, trv, src, url_bgn, url_end) != null) {
|
||||
int angle_bgn = trv.Match_bgn;
|
||||
text_bgn = angle_bgn;
|
||||
url_end = angle_bgn;
|
||||
}
|
||||
|
||||
// If the link text is an image URL, replace it with an <img> tag
|
||||
// This happened by accident in the original parser, but some people used it extensively
|
||||
// XO.MW.UNSUPPORTED.NON-WMF: not supporting images from freefrom url; (EX: "http://a.org/image.png" -> "<img>"); haven't seen this used on WMF wikis
|
||||
// $img = $this->maybeMakeExternalImage( $text );
|
||||
// if ($img !== false) $text = $img;
|
||||
|
||||
// XO.MW.SKIP: See "Have link text"
|
||||
//$dtrail = '';
|
||||
|
||||
// Set linktype for CSS - if URL==text, link is essentially free
|
||||
boolean text_missing = text_bgn == -1;
|
||||
byte[] link_type = text_missing ? Link_type__free : Link_type__text;
|
||||
|
||||
// No link text, e.g. [http://domain.tld/some.link]
|
||||
if (text_missing) {
|
||||
// Autonumber; EX: "[123]"
|
||||
tmp.Add_byte(Byte_ascii.Brack_bgn);
|
||||
tmp.Add_int_variable(autonumber++); // TODO.XO:$langObj->formatNum( ++$this->mAutonumber );
|
||||
tmp.Add_byte(Byte_ascii.Brack_end);
|
||||
link_type = Link_type__autonumber;
|
||||
}
|
||||
else {
|
||||
// XO.MW.SKIP: skipped b/c MW splits $trail into $dtrail and $trail but does no extra logic with variables; just concatenates later; "$this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;"
|
||||
// Have link text, e.g. [http://domain.tld/some.link text]s
|
||||
// Check for trail
|
||||
// list( $dtrail, $trail ) = Linker::splitTrail( $trail );
|
||||
}
|
||||
|
||||
// TODO.XO:
|
||||
// $text = $this->getConverterLanguage()->markNoConversion( $text );
|
||||
|
||||
byte[] url = Bry_.Mid(src, url_bgn, url_end);
|
||||
url = sanitizer.Clean_url(url);
|
||||
|
||||
bfr.Add_mid(src, prv, lnke_bgn);
|
||||
prv = cur;
|
||||
// Use the encoded URL
|
||||
// This means that users can paste URLs directly into the text
|
||||
// Funny characters like <20> aren't valid in URLs anyway
|
||||
// This was changed in August 2004
|
||||
linker.makeExternalLink(bfr, url, Bry_.Mid(src, text_bgn, text_end), Bool_.N, link_type, parser.Get_external_link_attribs(attribs), Bry_.Empty);
|
||||
|
||||
// XO.MW.UNSUPPORTED.HOOK: registers link for processing by other extensions?
|
||||
// Register link in the output Object.
|
||||
// Replace unnecessary URL escape codes with the referenced character
|
||||
// This prevents spammers from hiding links from the filters
|
||||
// $pasteurized = self::normalizeLinkUrl( $url );
|
||||
// $this->mOutput->addExternalLink( $pasteurized );
|
||||
}
|
||||
}
|
||||
|
||||
private static byte[] Link_type__free, Link_type__text, Link_type__autonumber;
|
||||
private static Btrie_slim_mgr angle_entities_trie;
|
||||
private static Btrie_slim_mgr invalid_text_chars_trie;
|
||||
private static void New__trie_itm__by_len(Btrie_slim_mgr mgr, int... ary) {
|
||||
mgr.Add_obj(Bry_.New_by_ints(ary), new Int_obj_val(ary.length));
|
||||
}
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_lnke_wkr__tst {
|
||||
private final Xomw_lnke_wkr__fxt fxt = new Xomw_lnke_wkr__fxt();
|
||||
@Test public void Basic() {fxt.Test__parse("[https://a.org b]" , "<a rel='nofollow' class='external text' href='https://a.org'>b</a>");}
|
||||
@Test public void Invaild__protocol() {fxt.Test__parse("[httpz:a.org]" , "[httpz:a.org]");}
|
||||
@Test public void Invaild__protocol_slash() {fxt.Test__parse("[https:a.org]" , "[https:a.org]");}
|
||||
@Test public void Invaild__urlchars__0() {fxt.Test__parse("[https://]" , "[https://]");}
|
||||
@Test public void Invaild__urlchars__bad() {fxt.Test__parse("[https://\"]" , "[https://\"]");}
|
||||
@Test public void Many() {
|
||||
fxt.Test__parse(String_.Concat_lines_nl_apos_skip_last
|
||||
( "a"
|
||||
, "[https://b.org c]"
|
||||
, "d"
|
||||
, "[https://e.org f]"
|
||||
, "g"
|
||||
), String_.Concat_lines_nl_apos_skip_last
|
||||
( "a"
|
||||
, "<a rel='nofollow' class='external text' href='https://b.org'>c</a>"
|
||||
, "d"
|
||||
, "<a rel='nofollow' class='external text' href='https://e.org'>f</a>"
|
||||
, "g"
|
||||
));
|
||||
}
|
||||
@Test public void Protocol_rel() {
|
||||
fxt.Test__parse("[//a.org b]" , "<a rel='nofollow' class='external text' href='//a.org'>b</a>");
|
||||
}
|
||||
@Test public void Url_should_not_has_angle_entities() {
|
||||
fxt.Test__parse("[https://a.org/b<c z]" , "<a rel='nofollow' class='external text' href='https://a.org/b'><c z</a>");
|
||||
fxt.Test__parse("[https://a.org/b>c z]" , "<a rel='nofollow' class='external text' href='https://a.org/b'>>c z</a>");
|
||||
}
|
||||
@Test public void Link_trail() {// checks for noop via "Have link text"
|
||||
fxt.Test__parse("[https://a.org b]xyz" , "<a rel='nofollow' class='external text' href='https://a.org'>b</a>xyz");
|
||||
fxt.Test__parse("[https://a.org b]x!z" , "<a rel='nofollow' class='external text' href='https://a.org'>b</a>x!z");
|
||||
}
|
||||
@Test public void Clean_url() {
|
||||
fxt.Test__parse("[https://a"b c]" , "<a rel='nofollow' class='external text' href='https://a%22b'>c</a>");
|
||||
}
|
||||
}
|
||||
class Xomw_lnke_wkr__fxt {
|
||||
private final Xomw_lnke_wkr wkr = new Xomw_lnke_wkr(new Xomw_parser());
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private boolean apos = true;
|
||||
public Xomw_lnke_wkr__fxt() {
|
||||
Xomw_regex_space regex_space = new Xomw_regex_space();
|
||||
wkr.Init_by_wiki(Xomw_parser.Protocols__dflt(), new Xomw_regex_url(regex_space), regex_space);
|
||||
}
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Replace_external_links(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_image_params {
|
||||
public Xomw_param_map paramMap = null;
|
||||
public Xomw_MagicWordArray mwArray = null;
|
||||
}
|
||||
@@ -1,858 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.xwikis.*;
|
||||
import gplx.xowa.mws.parsers.*; import gplx.xowa.mws.parsers.quotes.*;
|
||||
import gplx.xowa.mws.htmls.*; import gplx.xowa.mws.linkers.*;
|
||||
import gplx.xowa.mws.libs.*;
|
||||
import gplx.xowa.mws.media.*; import gplx.xowa.mws.filerepo.file.*;
|
||||
import gplx.xowa.parsers.uniqs.*;
|
||||
/* TODO.XO
|
||||
* P7: multi-line links; // look at the next 'line' to see if we can close it there
|
||||
* P7: interwiki
|
||||
* P7: [[File:]]
|
||||
* P7: [[Category:]]
|
||||
* P6: [[Media:]]
|
||||
* P4: handle "]]]"; "If we get a ] at the beginning of $m[3]"
|
||||
* P4: handle "[[http://a.org]]"
|
||||
* P3: $langObj->formatNum( ++$this->mAutonumber );
|
||||
* P2: $this->getConverterLanguage()->markNoConversion( $text );
|
||||
* P1: link_prefix; EX: b[[A]]; [not enabled on enwiki]
|
||||
*/
|
||||
public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private final Xomw_link_holders holders;
|
||||
private final Xomw_linker linker;
|
||||
private final Xomw_link_renderer link_renderer;
|
||||
// private final Btrie_slim_mgr protocols_trie;
|
||||
private final Xomw_quote_wkr quote_wkr;
|
||||
private final Xomw_strip_state strip_state;
|
||||
private Xomw_parser_env env;
|
||||
private Xow_wiki wiki;
|
||||
private Xoa_ttl page_title;
|
||||
private final Xomw_linker__normalize_subpage_link normalize_subpage_link = new Xomw_linker__normalize_subpage_link();
|
||||
private final Bry_bfr tmp;
|
||||
private final Xomw_parser parser;
|
||||
private final Xomw_atr_mgr extra_atrs = new Xomw_atr_mgr();
|
||||
private final Xomw_qry_mgr query = new Xomw_qry_mgr();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private final List_adp tmp_list = List_adp_.New();
|
||||
private final Hash_adp mImageParams = Hash_adp_bry.cs();
|
||||
private final Hash_adp mImageParamsMagicArray = Hash_adp_bry.cs();
|
||||
public Xomw_lnki_wkr(Xomw_parser parser, Xomw_link_holders holders, Xomw_link_renderer link_renderer, Btrie_slim_mgr protocols_trie) {
|
||||
this.parser = parser;
|
||||
this.holders = holders;
|
||||
this.link_renderer = link_renderer;
|
||||
// this.protocols_trie = protocols_trie;
|
||||
|
||||
this.linker = parser.Linker();
|
||||
this.quote_wkr = parser.Quote_wkr();
|
||||
this.tmp = parser.Tmp();
|
||||
this.strip_state = parser.Strip_state();
|
||||
}
|
||||
public void Init_by_wiki(Xomw_parser_env env, Xow_wiki wiki) {
|
||||
this.env = env;
|
||||
this.wiki = wiki;
|
||||
if (title_chars_for_lnki == null) {
|
||||
title_chars_for_lnki = (boolean[])Array_.Clone(Xomw_ttl_utl.Title_chars_valid());
|
||||
// the % is needed to support urlencoded titles as well
|
||||
title_chars_for_lnki[Byte_ascii.Hash] = true;
|
||||
title_chars_for_lnki[Byte_ascii.Percent] = true;
|
||||
}
|
||||
}
|
||||
public void Clear_state() {
|
||||
holders.Clear();
|
||||
}
|
||||
public void Replace_internal_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
this.page_title = pctx.Page_title();
|
||||
|
||||
Replace_internal_links(pctx, bfr, src, src_bgn, src_end);
|
||||
}
|
||||
// XO.MW:SYNC:1.29; DATE:2017-02-02
|
||||
public void Replace_internal_links(Xomw_parser_ctx pctx, Bry_bfr bfr, byte[] src, int src_bgn, int src_end) {
|
||||
// XO.MW: regex for tc move to header; e1 and e1_img moved to code
|
||||
// the % is needed to support urlencoded titles as well
|
||||
|
||||
// XO.MW.BGN: split the entire text String on occurrences of [[
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
while (true) {
|
||||
int lnki_bgn = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__bgn, cur, src_end); // $a = StringUtils::explode('[[', ' ' . $s);
|
||||
if (lnki_bgn == Bry_find_.Not_found) { // no more "[["; stop loop
|
||||
bfr.Add_mid(src, cur, src_end);
|
||||
break;
|
||||
}
|
||||
cur = lnki_bgn + 2; // 2="[[".length
|
||||
|
||||
// XO.MW.IGNORE: handles strange split logic of adding space to String; "$s = substr($s, 1);"
|
||||
|
||||
// TODO.XO:link_prefix; EX: b[[A]]
|
||||
// $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
|
||||
// $e2 = null;
|
||||
// if ($useLinkPrefixExtension) {
|
||||
// // Match the end of a line for a word that's not followed by whitespace,
|
||||
// // e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
|
||||
// global $wgContLang;
|
||||
// $charset = $wgContLang->linkPrefixCharset();
|
||||
// $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
|
||||
// }
|
||||
|
||||
// IGNORE: throw new MWException(__METHOD__ . ": \$this->mTitle is null\n");
|
||||
|
||||
// $nottalk = !$this->mTitle->isTalkPage();
|
||||
|
||||
// TODO.XO:link_prefix
|
||||
byte[] prefix = Bry_.Empty;
|
||||
//if ($useLinkPrefixExtension) {
|
||||
// $m = [];
|
||||
// if (preg_match($e2, $s, $m)) {
|
||||
// $first_prefix = $m[2];
|
||||
// } else {
|
||||
// $first_prefix = false;
|
||||
// }
|
||||
//} else {
|
||||
// $prefix = '';
|
||||
//}
|
||||
|
||||
// TODO.XO:link_prefix; EX: b[[A]]
|
||||
//if ($useLinkPrefixExtension) {
|
||||
// if (preg_match($e2, $s, $m)) {
|
||||
// $prefix = $m[2];
|
||||
// $s = $m[1];
|
||||
// } else {
|
||||
// $prefix = '';
|
||||
// }
|
||||
// // first link
|
||||
// if ($first_prefix) {
|
||||
// $prefix = $first_prefix;
|
||||
// $first_prefix = false;
|
||||
// }
|
||||
//}
|
||||
|
||||
// PORTED.BGN: if (preg_match($e1, $line, $m)) && else if (preg_match($e1_img, $line, $m))
|
||||
// NOTE: both e1 and e1_img are effectively the same; e1_img allows nested "[["; EX: "[[A|b[[c]]d]]" will stop at "[[A|b"
|
||||
int ttl_bgn = cur;
|
||||
int ttl_end = Xomw_ttl_utl.Find_fwd_while_title(src, cur, src_end, title_chars_for_lnki);
|
||||
cur = ttl_end;
|
||||
int capt_bgn = -1, capt_end = -1;
|
||||
int nxt_lnki = -1;
|
||||
|
||||
boolean might_be_img = false;
|
||||
if (ttl_end > ttl_bgn) { // at least one valid title-char found; check for "|" or "]]" EX: "[[a"
|
||||
byte nxt_byte = src[ttl_end];
|
||||
if (nxt_byte == Byte_ascii.Pipe) { // handles lnki with capt ([[A|a]])and lnki with file ([[File:A.png|b|c|d]])
|
||||
cur = ttl_end + 1;
|
||||
|
||||
// find next "[["
|
||||
nxt_lnki = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__bgn, cur, src_end);
|
||||
if (nxt_lnki == Bry_find_.Not_found)
|
||||
nxt_lnki = src_end;
|
||||
|
||||
// find end "]]"
|
||||
capt_bgn = cur;
|
||||
capt_end = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__end, cur, nxt_lnki);
|
||||
if (capt_end == Bry_find_.Not_found) {
|
||||
capt_end = nxt_lnki;
|
||||
cur = nxt_lnki;
|
||||
might_be_img = true;
|
||||
}
|
||||
else {
|
||||
cur = capt_end + Bry__wtxt__lnki__end.length;
|
||||
}
|
||||
}
|
||||
else if (Bry_.Match(src, ttl_end, ttl_end + 2, Bry__wtxt__lnki__end)) { // handles simple lnki; EX: [[A]]
|
||||
cur = ttl_end + 2;
|
||||
}
|
||||
else {
|
||||
ttl_end = -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
ttl_end = -1;
|
||||
if (ttl_end == -1) { // either (a) no valid title-chars ("[[<") or (b) title char, but has stray "]" ("[[a]b]]")
|
||||
// Invalid form; output directly
|
||||
bfr.Add_mid(src, prv, lnki_bgn + 2);
|
||||
bfr.Add_mid(src, cur, ttl_bgn);
|
||||
prv = cur = ttl_bgn;
|
||||
continue;
|
||||
}
|
||||
// PORTED.END: if (preg_match($e1, $line, $m)) && else if (preg_match($e1_img, $line, $m))
|
||||
|
||||
byte[] text = Bry_.Mid(src, capt_bgn, capt_end);
|
||||
byte[] trail = Bry_.Empty;
|
||||
if (!might_be_img) {
|
||||
// TODO.XO:
|
||||
// If we get a ] at the beginning of $m[3] that means we have a link that's something like:
|
||||
// [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
|
||||
// the real problem is with the $e1 regex
|
||||
// See T1500.
|
||||
// Still some problems for cases where the ] is meant to be outside punctuation,
|
||||
// and no image is in sight. See T4095.
|
||||
// if ($text !== ''
|
||||
// && substr($m[3], 0, 1) === ']'
|
||||
// && strpos($text, '[') !== false
|
||||
// ) {
|
||||
// $text .= ']'; // so that replaceExternalLinks($text) works later
|
||||
// $m[3] = substr($m[3], 1);
|
||||
// }
|
||||
|
||||
// fix up urlencoded title texts
|
||||
// if (strpos($m[1], '%') !== false) {
|
||||
// // Should anchors '#' also be rejected?
|
||||
// $m[1] = str_replace([ '<', '>' ], [ '<', '>' ], rawurldecode($m[1]));
|
||||
// }
|
||||
// $trail = $m[3];
|
||||
}
|
||||
else {
|
||||
// Invalid, but might be an image with a link in its caption
|
||||
// $text = $m[2];
|
||||
// if (strpos($m[1], '%') !== false) {
|
||||
// $m[1] = str_replace([ '<', '>' ], [ '<', '>' ], rawurldecode($m[1]));
|
||||
// }
|
||||
// $trail = "";
|
||||
}
|
||||
|
||||
byte[] orig_link = Bry_.Mid(src, ttl_bgn, ttl_end);
|
||||
|
||||
// TODO.XO: handle "[[http://a.org]]"
|
||||
// Don't allow @gplx.Internal protected links to pages containing
|
||||
// PROTO: where PROTO is a valid URL protocol; these
|
||||
// should be external links.
|
||||
// if (preg_match('/^(?i:' . $this->mUrlProtocols . ')/', $origLink)) {
|
||||
// $s .= $prefix . '[[' . $line;
|
||||
// continue;
|
||||
// }
|
||||
|
||||
byte[] link = orig_link;
|
||||
boolean no_force = orig_link[0] != Byte_ascii.Colon;
|
||||
if (!no_force) {
|
||||
// Strip off leading ':'
|
||||
link = Bry_.Mid(link, 1);
|
||||
}
|
||||
Xoa_ttl nt = wiki.Ttl_parse(link);
|
||||
|
||||
// Make subpage if necessary
|
||||
boolean subpages_enabled = nt.Ns().Subpages_enabled();
|
||||
if (subpages_enabled) {
|
||||
Maybe_do_subpage_link(normalize_subpage_link, orig_link, text);
|
||||
link = normalize_subpage_link.link;
|
||||
text = normalize_subpage_link.text;
|
||||
nt = wiki.Ttl_parse(link);
|
||||
}
|
||||
// IGNORE: handled in rewrite above
|
||||
// else {
|
||||
// link = orig_link;
|
||||
// }
|
||||
|
||||
byte[] unstrip = strip_state.Unstrip_nowiki(link);
|
||||
if (!Bry_.Eq(unstrip, link))
|
||||
nt = wiki.Ttl_parse(unstrip);
|
||||
if (nt == null) {
|
||||
bfr.Add_mid(src, prv, lnki_bgn + 2); // $s .= $prefix . '[[' . $line;
|
||||
prv = cur = lnki_bgn + 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
Xow_ns ns = nt.Ns();
|
||||
Xow_xwiki_itm iw = nt.Wik_itm();
|
||||
|
||||
if (might_be_img) { // if this is actually an invalid link
|
||||
if (ns.Id_is_file() && no_force) { // but might be an image
|
||||
boolean found = false;
|
||||
// while (true) {
|
||||
// // look at the next 'line' to see if we can close it there
|
||||
// a->next();
|
||||
// next_line = a->current();
|
||||
// if (next_line === false || next_line === null) {
|
||||
// break;
|
||||
// }
|
||||
// m = explode(']]', next_line, 3);
|
||||
// if (count(m) == 3) {
|
||||
// // the first ]] closes the inner link, the second the image
|
||||
// found = true;
|
||||
// text .= "[[{m[0]}]]{m[1]}";
|
||||
// trail = m[2];
|
||||
// break;
|
||||
// } else if (count(m) == 2) {
|
||||
// // if there's exactly one ]] that's fine, we'll keep looking
|
||||
// text .= "[[{m[0]}]]{m[1]}";
|
||||
// } else {
|
||||
// // if next_line is invalid too, we need look no further
|
||||
// text .= '[[' . next_line;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
if (!found) {
|
||||
// we couldn't find the end of this imageLink, so output it raw
|
||||
// but don't ignore what might be perfectly normal links in the text we've examined
|
||||
Bry_bfr nested = wiki.Utl__bfr_mkr().Get_b128();
|
||||
this.Replace_internal_links(pctx, nested, text, 0, text.length);
|
||||
nested.Mkr_rls();
|
||||
bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
|
||||
// note: no trail, because without an end, there *is* no trail
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else { // it's not an image, so output it raw
|
||||
bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
|
||||
// note: no trail, because without an end, there *is* no trail
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
boolean was_blank = text.length == 0;
|
||||
if (was_blank) {
|
||||
text = link;
|
||||
}
|
||||
else {
|
||||
// T6598 madness. Handle the quotes only if they come from the alternate part
|
||||
// [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
|
||||
// [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
|
||||
// -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
|
||||
text = quote_wkr.Do_quotes(tmp, text);
|
||||
}
|
||||
|
||||
// Link not escaped by : , create the various objects
|
||||
// if (no_force && !nt->wasLocalInterwiki()) {
|
||||
// Interwikis
|
||||
// if (
|
||||
// iw && this->mOptions->getInterwikiMagic() && nottalk && (
|
||||
// Language::fetchLanguageName(iw, null, 'mw') ||
|
||||
// in_array(iw, wgExtraInterlanguageLinkPrefixes)
|
||||
// )
|
||||
// ) {
|
||||
// T26502: filter duplicates
|
||||
// if (!isset(this->mLangLinkLanguages[iw])) {
|
||||
// this->mLangLinkLanguages[iw] = true;
|
||||
// this->mOutput->addLanguageLink(nt->getFullText());
|
||||
// }
|
||||
//
|
||||
// s = rtrim(s . prefix);
|
||||
// s .= trim(trail, "\n") == '' ? '': prefix . trail;
|
||||
// continue;
|
||||
// }
|
||||
//
|
||||
if (ns.Id_is_file()) {
|
||||
// boolean is_good_image = !wfIsBadImage(nt->getDBkey(), this->mTitle)
|
||||
boolean is_good_image = true;
|
||||
if (is_good_image) {
|
||||
if (was_blank) {
|
||||
// if no parameters were passed, text
|
||||
// becomes something like "File:Foo.png",
|
||||
// which we don't want to pass on to the
|
||||
// image generator
|
||||
text = Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
// recursively parse links inside the image caption
|
||||
// actually, this will parse them in any other parameters, too,
|
||||
// but it might be hard to fix that, and it doesn't matter ATM
|
||||
// text = this->replaceExternalLinks(text);
|
||||
// holders->merge(this->replaceInternalLinks2(text));
|
||||
}
|
||||
// cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
|
||||
bfr.Add(prefix);
|
||||
// Armor_links(Make_image(bfr, nt, text, holders))
|
||||
this.makeImage(pctx, bfr, nt, text, holders);
|
||||
bfr.Add(trail);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (ns.Id_is_ctg()) {
|
||||
bfr.Trim_end_ws(); // s = rtrim(s . "\n"); // T2087
|
||||
|
||||
if (was_blank) {
|
||||
// sortkey = this->getDefaultSort();
|
||||
}
|
||||
else {
|
||||
// sortkey = text;
|
||||
}
|
||||
// sortkey = Sanitizer::decodeCharReferences(sortkey);
|
||||
// sortkey = str_replace("\n", '', sortkey);
|
||||
// sortkey = this->getConverterLanguage()->convertCategoryKey(sortkey);
|
||||
// this->mOutput->addCategory(nt->getDBkey(), sortkey);
|
||||
//
|
||||
// Strip the whitespace Category links produce, see T2087
|
||||
// s .= trim(prefix . trail, "\n") == '' ? '' : prefix . trail;
|
||||
|
||||
continue;
|
||||
}
|
||||
// }
|
||||
|
||||
// Self-link checking. For some languages, variants of the title are checked in
|
||||
// LinkHolderArray::doVariants() to allow batching the existence checks necessary
|
||||
// for linking to a different variant.
|
||||
if (!ns.Id_is_special() && nt.Eq_full_db(page_title) && !nt.Has_fragment()) {
|
||||
bfr.Add(prefix);
|
||||
linker.makeSelfLinkObj(bfr, nt, text, Bry_.Empty, trail, Bry_.Empty);
|
||||
continue;
|
||||
}
|
||||
|
||||
// NS_MEDIA is a pseudo-namespace for linking directly to a file
|
||||
// @todo FIXME: Should do batch file existence checks, see comment below
|
||||
if (ns.Id_is_media()) {
|
||||
// Give extensions a chance to select the file revision for us
|
||||
// options = [];
|
||||
// desc_query = false;
|
||||
// MW.HOOK:BeforeParserFetchFileAndTitle
|
||||
// Fetch and register the file (file title may be different via hooks)
|
||||
// list(file, nt) = this->fetchFileAndTitle(nt, options);
|
||||
// Cloak with NOPARSE to avoid replacement in replaceExternalLinks
|
||||
// s .= prefix . this->armorLinks(
|
||||
// Linker::makeMediaLinkFile(nt, file, text)) . trail;
|
||||
// continue;
|
||||
}
|
||||
|
||||
// Some titles, such as valid special pages or files in foreign repos, should
|
||||
// be shown as bluelinks even though they're not included in the page table
|
||||
// @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
|
||||
// batch file existence checks for NS_FILE and NS_MEDIA
|
||||
bfr.Add_mid(src, prv, lnki_bgn);
|
||||
prv = cur;
|
||||
if (iw == null && nt.Is_always_known()) {
|
||||
// this->mOutput->addLink(nt);
|
||||
Make_known_link_holder(bfr, nt, text, trail, prefix);
|
||||
}
|
||||
else {
|
||||
// Links will be added to the output link list after checking
|
||||
holders.Make_holder(bfr, nt, text, Bry_.Ary_empty, trail, prefix);
|
||||
}
|
||||
}
|
||||
}
|
||||
public void makeImage(Xomw_parser_ctx pctx, Bry_bfr bfr, Xoa_ttl title, byte[] options_at_link, Xomw_link_holders holders) {
|
||||
// Check if the options text is of the form "options|alt text"
|
||||
// Options are:
|
||||
// * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
|
||||
// * left no resizing, just left align. label is used for alt= only
|
||||
// * right same, but right aligned
|
||||
// * none same, but not aligned
|
||||
// * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
|
||||
// * center center the image
|
||||
// * frame Keep original image size, no magnify-button.
|
||||
// * framed Same as "frame"
|
||||
// * frameless like 'thumb' but without a frame. Keeps user preferences for width
|
||||
// * upright reduce width for upright images, rounded to full __0 px
|
||||
// * border draw a 1px border around the image
|
||||
// * alt Text for HTML alt attribute (defaults to empty)
|
||||
// * class Set a class for img node
|
||||
// * link Set the target of the image link. Can be external, interwiki, or local
|
||||
// vertical-align values (no % or length right now):
|
||||
// * baseline
|
||||
// * sub
|
||||
// * super
|
||||
// * top
|
||||
// * text-top
|
||||
// * middle
|
||||
// * bottom
|
||||
// * text-bottom
|
||||
|
||||
// Protect LanguageConverter markup when splitting into parts
|
||||
byte[][] parts = Xomw_string_utils.Delimiter_explode(tmp_list, trv, options_at_link);
|
||||
|
||||
// Give extensions a chance to select the file revision for us
|
||||
// $options = [];
|
||||
byte[] desc_query = null;
|
||||
// XO.MW.HOOK:BeforeParserFetchFileAndTitle
|
||||
|
||||
// Fetch and register the file (file title may be different via hooks)
|
||||
// list($file, $title) = $this->fetchFileAndTitle($title, $options);
|
||||
Xomw_File file = fetchFileAndTitle(title, null);
|
||||
|
||||
// Get parameter map
|
||||
Xomw_MediaHandler handler = file == null ? null : file.getHandler();
|
||||
|
||||
Xomw_image_params tmp_img_params = pctx.Lnki_wkr__make_image__img_params;
|
||||
this.getImageParams(tmp_img_params, handler);
|
||||
Xomw_param_map paramMap = tmp_img_params.paramMap;
|
||||
Xomw_MagicWordArray mwArray = tmp_img_params.mwArray;
|
||||
|
||||
// XO.MW.UNSUPPORTED.TrackingCategory: if (!$file) $this->addTrackingCategory('broken-file-category');
|
||||
|
||||
// Process the input parameters
|
||||
byte[] caption = Bry_.Empty;
|
||||
// XO.MW: $params = [ 'frame' => [], 'handler' => [], 'horizAlign' => [], 'vertAlign' => [] ];
|
||||
Xomw_params_frame frameParams = paramMap.Frame.Clear();
|
||||
Xomw_params_handler handlerParams = paramMap.Handler.Clear();
|
||||
// Xomw_params_horizAlign horizAlignParams = paramMap.HorizAlign.Clear();
|
||||
// Xomw_params_vertAlign vertAlignParams = paramMap.VertAlign.Clear();
|
||||
boolean seen_format = false;
|
||||
|
||||
int parts_len = parts.length;
|
||||
for (int i = 0; i < parts_len; i++) {
|
||||
byte[] part = parts[i];
|
||||
part = Bry_.Trim(part);
|
||||
byte[][] tmp_match_word = pctx.Lnki_wkr__make_image__match_magic_word;
|
||||
mwArray.matchVariableStartToEnd(tmp_match_word, part);
|
||||
byte[] magic_name = tmp_match_word[0];
|
||||
byte[] val = tmp_match_word[1];
|
||||
boolean validated = false;
|
||||
|
||||
Xomw_param_itm param_item = paramMap.Get_by(magic_name);
|
||||
if (param_item != null) {
|
||||
int typeUid = param_item.type_uid;
|
||||
int paramNameUid = param_item.name_uid;
|
||||
// Special case; width and height come in one variable together
|
||||
if (typeUid == Xomw_param_map.Type__handler && paramNameUid == Xomw_param_itm.Name__width) {
|
||||
int[] tmp_img_size = pctx.Lnki_wkr__make_image__img_size;
|
||||
this.parseWidthParam(tmp_img_size, val);
|
||||
int parsedW = tmp_img_size[0];
|
||||
int parsedH = tmp_img_size[1];
|
||||
if (parsedW != 0) {
|
||||
if (handler.validateParam(Xomw_param_itm.Name__width, null, parsedW)) {
|
||||
paramMap.Set(typeUid, Xomw_param_itm.Name__width, null, parsedW);
|
||||
validated = true;
|
||||
}
|
||||
}
|
||||
if (parsedH != 0) {
|
||||
if (handler.validateParam(Xomw_param_itm.Name__height, null, parsedH)) {
|
||||
paramMap.Set(typeUid, Xomw_param_itm.Name__height, null, parsedH);
|
||||
validated = true;
|
||||
}
|
||||
}
|
||||
// else no validation -- T15436
|
||||
}
|
||||
else {
|
||||
if (typeUid == Xomw_param_map.Type__handler) {
|
||||
// Validate handler parameter
|
||||
// validated = $handler->validateParam($paramName, $value);
|
||||
}
|
||||
else {
|
||||
// Validate @gplx.Internal protected parameters
|
||||
switch (paramNameUid) {
|
||||
case Xomw_param_itm.Name__manual_thumb:
|
||||
case Xomw_param_itm.Name__alt:
|
||||
case Xomw_param_itm.Name__class:
|
||||
// @todo FIXME: Possibly check validity here for
|
||||
// manualthumb? downstream behavior seems odd with
|
||||
// missing manual thumbs.
|
||||
validated = true;
|
||||
// $value = $this->stripAltText($value, $holders);
|
||||
break;
|
||||
case Xomw_param_itm.Name__link:
|
||||
// $chars = self::EXT_LINK_URL_CLASS;
|
||||
// $addr = self::EXT_LINK_ADDR;
|
||||
// $prots = $this->mUrlProtocols;
|
||||
// if ($value === '') {
|
||||
// $paramName = 'no-link';
|
||||
// $value = true;
|
||||
validated = true;
|
||||
// }
|
||||
// else if (preg_match("/^((?i)$prots)/", $value)) {
|
||||
// if (preg_match("/^((?i)$prots)$addr$chars*$/u", $value, $m)) {
|
||||
// $paramName = 'link-url';
|
||||
// $this->mOutput->addExternalLink($value);
|
||||
// if ($this->mOptions->getExternalLinkTarget()) {
|
||||
// $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
|
||||
// }
|
||||
validated = true;
|
||||
// }
|
||||
// } else {
|
||||
// $linkTitle = Title::newFromText($value);
|
||||
// if ($linkTitle) {
|
||||
// $paramName = 'link-title';
|
||||
// $value = $linkTitle;
|
||||
// $this->mOutput->addLink($linkTitle);
|
||||
validated = true;
|
||||
// }
|
||||
// }
|
||||
break;
|
||||
case Xomw_param_itm.Name__frameless:
|
||||
case Xomw_param_itm.Name__framed:
|
||||
case Xomw_param_itm.Name__thumbnail:
|
||||
// use first appearing option, discard others.
|
||||
validated = !seen_format;
|
||||
seen_format = true;
|
||||
break;
|
||||
default:
|
||||
// Most other things appear to be empty or numeric...
|
||||
validated = (val == null || Php_utl_.isnumeric(Bry_.Trim(val)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (validated) {
|
||||
paramMap.Set(typeUid, paramNameUid, val, -1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!validated) {
|
||||
caption = part;
|
||||
}
|
||||
}
|
||||
|
||||
// Process alignment parameters
|
||||
Xomw_param_itm tmp = paramMap.Get_by(Xomw_param_map.Type__horizAlign);
|
||||
if (tmp != null) {
|
||||
// frameParams.align = tmp.val;
|
||||
}
|
||||
tmp = paramMap.Get_by(Xomw_param_map.Type__vertAlign);
|
||||
if (tmp != null) {
|
||||
// frameParams.valign = tmp.val;
|
||||
}
|
||||
|
||||
frameParams.caption = caption;
|
||||
|
||||
boolean image_is_framed
|
||||
= frameParams.frame != null
|
||||
|| frameParams.framed != null
|
||||
|| frameParams.thumbnail != null
|
||||
|| frameParams.manualthumb != null
|
||||
;
|
||||
|
||||
// Will the image be presented in a frame, with the caption below?
|
||||
// In the old days, [[Image:Foo|text...]] would set alt text. Later it
|
||||
// came to also set the caption, ordinary text after the image -- which
|
||||
// makes no sense, because that just repeats the text multiple times in
|
||||
// screen readers. It *also* came to set the title attribute.
|
||||
// Now that we have an alt attribute, we should not set the alt text to
|
||||
// equal the caption: that's worse than useless, it just repeats the
|
||||
// text. This is the framed/thumbnail case. If there's no caption, we
|
||||
// use the unnamed parameter for alt text as well, just for the time be-
|
||||
// ing, if the unnamed param is set and the alt param is not.
|
||||
// For the future, we need to figure out if we want to tweak this more,
|
||||
// e.g., introducing a title= parameter for the title; ignoring the un-
|
||||
// named parameter entirely for images without a caption; adding an ex-
|
||||
// plicit caption= parameter and preserving the old magic unnamed para-
|
||||
// meter for BC; ...
|
||||
if (image_is_framed) { // Framed image
|
||||
if (caption == Bry_.Empty && frameParams.alt == null) {
|
||||
// No caption or alt text, add the filename as the alt text so
|
||||
// that screen readers at least get some description of the image
|
||||
frameParams.alt = title.Get_text();
|
||||
}
|
||||
// Do not set $params['frame']['title'] because tooltips don't make sense
|
||||
// for framed images
|
||||
}
|
||||
else { // Inline image
|
||||
if (frameParams.alt == null) {
|
||||
// No alt text, use the "caption" for the alt text
|
||||
if (caption != Bry_.Empty) {
|
||||
// frameParams.alt = $this->stripAltText(caption, $holders);
|
||||
}
|
||||
else {
|
||||
// No caption, fall back to using the filename for the
|
||||
// alt text
|
||||
frameParams.alt = title.Get_text();
|
||||
}
|
||||
}
|
||||
// Use the "caption" for the tooltip text
|
||||
// frameParams.title = $this->stripAltText(caption, $holders);
|
||||
}
|
||||
|
||||
// MW.HOOK:ParserMakeImageParams
|
||||
|
||||
// Linker does the rest
|
||||
// byte[] time = options.time;
|
||||
Object time = null;
|
||||
linker.makeImageLink(bfr, pctx, parser, title, file, frameParams, handlerParams, time, desc_query, parser.Options().getThumbSize());
|
||||
|
||||
// Give the handler a chance to modify the parser Object
|
||||
// if (handler != null) {
|
||||
// $handler->parserTransformHook($this, $file);
|
||||
// }
|
||||
}
|
||||
// protected function stripAltText( $caption, $holders ) {
|
||||
// // Strip bad stuff out of the title (tooltip). We can't just use
|
||||
// // replaceLinkHoldersText() here, because if this function is called
|
||||
// // from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
|
||||
// if ( $holders ) {
|
||||
// $tooltip = $holders->replaceText( $caption );
|
||||
// } else {
|
||||
// $tooltip = $this->replaceLinkHoldersText( $caption );
|
||||
// }
|
||||
//
|
||||
// // make sure there are no placeholders in thumbnail attributes
|
||||
// // that are later expanded to html- so expand them now and
|
||||
// // remove the tags
|
||||
// $tooltip = $this->mStripState->unstripBoth( $tooltip );
|
||||
// $tooltip = Sanitizer::stripAllTags( $tooltip );
|
||||
//
|
||||
// return $tooltip;
|
||||
// }
|
||||
|
||||
private static Xomw_param_list[] internalParamNames;
|
||||
private static Xomw_param_map internalParamMap;
|
||||
|
||||
private void getImageParams(Xomw_image_params rv, Xomw_MediaHandler handler) {
|
||||
byte[] handlerClass = handler == null ? Bry_.Empty : handler.Key();
|
||||
rv.paramMap = (Xomw_param_map)mImageParams.Get_by(handlerClass);
|
||||
// NOTE: lazy-init; code below can be inefficent
|
||||
if (rv.paramMap == null) {
|
||||
// Initialise static lists
|
||||
if (internalParamNames == null) {
|
||||
internalParamNames = new Xomw_param_list[]
|
||||
{ Xomw_param_list.New(Xomw_param_map.Type__horizAlign, "horizAlign", "left", "right", "center", "none")
|
||||
, Xomw_param_list.New(Xomw_param_map.Type__vertAlign , "vertAlign", "baseline", "sub", "super", "top", "text-top", "middle", "bottom", "text-bottom")
|
||||
, Xomw_param_list.New(Xomw_param_map.Type__frame , "frame", "thumbnail", "manual_thumb", "framed", "frameless", "upright", "border", "link", "alt", "class")
|
||||
};
|
||||
|
||||
internalParamMap = new Xomw_param_map();
|
||||
byte[] bry_img = Bry_.new_a7("img_");
|
||||
for (Xomw_param_list param_list : internalParamNames) {
|
||||
for (byte[] name : param_list.names) {
|
||||
byte[] magic_name = Bry_.Add(bry_img, Bry_.Replace(name, Byte_ascii.Dash, Byte_ascii.Underline));
|
||||
internalParamMap.Add(magic_name, param_list.type_uid, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add handler params
|
||||
Xomw_param_map paramMap = internalParamMap.Clone();
|
||||
if (handler != null) {
|
||||
Xomw_param_map handlerParamMap = handler.getParamMap();
|
||||
int handlerParamMapLen = handlerParamMap.Len();
|
||||
for (int i = 0; i < handlerParamMapLen; i++) {
|
||||
Xomw_param_itm itm = (Xomw_param_itm)handlerParamMap.Get_at(i);
|
||||
paramMap.Add(itm.magic, itm.type_uid, itm.name);
|
||||
}
|
||||
}
|
||||
this.mImageParams.Add(handlerClass, paramMap);
|
||||
rv.paramMap = paramMap;
|
||||
Xomw_MagicWordArray mw_array = new Xomw_MagicWordArray(env.Magic_word_mgr(), paramMap.Keys());
|
||||
this.mImageParamsMagicArray.Add(handlerClass, mw_array);
|
||||
rv.mwArray = mw_array;
|
||||
}
|
||||
else {
|
||||
rv.mwArray = (Xomw_MagicWordArray)mImageParamsMagicArray.Get_by(handlerClass);
|
||||
}
|
||||
}
|
||||
// Parsed a width param of imagelink like 300px or 200x300px
|
||||
// XO.MW.NOTE: for MW, "" -> null, null while "AxB" -> 0x0
|
||||
public void parseWidthParam(int[] img_size, byte[] src) {
|
||||
img_size[0] = img_size[1] = Php_utl_.Null_int;
|
||||
if (src == Bry_.Empty) {
|
||||
return;
|
||||
}
|
||||
// (T15500) In both cases (width/height and width only),
|
||||
// permit trailing "px" for backward compatibility.
|
||||
int src_bgn = 0;
|
||||
int src_end = src.length;
|
||||
// XO: "px" is optional; if exists at end, ignore it
|
||||
if (Bry_.Has_at_end(src, Bry__px)) {
|
||||
src_end -= 2;
|
||||
}
|
||||
|
||||
// XO.MW: if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
|
||||
int w_bgn = 0;
|
||||
int w_end = Bry_find_.Find_fwd_while_num(src, src_bgn, src_end);
|
||||
int h_bgn = -1;
|
||||
int h_end = -1;
|
||||
if (w_end < src_end && src[w_end] == Byte_ascii.Ltr_x) {
|
||||
h_bgn = w_end + 1;
|
||||
h_end = Bry_find_.Find_fwd_while_num(src, h_bgn, src_end);
|
||||
}
|
||||
img_size[0] = Bry_.To_int_or(src, w_bgn, w_end, 0);
|
||||
img_size[1] = Bry_.To_int_or(src, h_bgn, h_end, 0);
|
||||
}
|
||||
public static final byte[] Bry__px = Bry_.new_a7("px");
|
||||
|
||||
/**
|
||||
* Fetch a file and its title and register a reference to it.
|
||||
* If 'broken' is a key in $options then the file will appear as a broken thumbnail.
|
||||
* @param Title $title
|
||||
* @param array $options Array of options to RepoGroup::findFile
|
||||
* @return array ( File or false, Title of file )
|
||||
*/
|
||||
public Xomw_File fetchFileAndTitle(Xoa_ttl title, Hash_adp options) {
|
||||
Xomw_File file = fetchFileNoRegister(title, options);
|
||||
|
||||
//$time = $file ? $file->getTimestamp() : false;
|
||||
//$sha1 = $file ? $file->getSha1() : false;
|
||||
//# Register the file as a dependency...
|
||||
//$this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
|
||||
//if ( $file && !$title->equals( $file->getTitle() ) ) {
|
||||
// # Update fetched file title
|
||||
// $title = $file->getTitle();
|
||||
// $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
|
||||
//}
|
||||
return file;
|
||||
}
|
||||
/**
|
||||
* Helper function for fetchFileAndTitle.
|
||||
*
|
||||
* Also useful if you need to fetch a file but not use it yet,
|
||||
* for example to get the file's handler.
|
||||
*
|
||||
* @param Title $title
|
||||
* @param array $options Array of options to RepoGroup::findFile
|
||||
* @return File|boolean
|
||||
*/
|
||||
private Xomw_File fetchFileNoRegister(Xoa_ttl title, Hash_adp options) {
|
||||
Xomw_File file = null;
|
||||
// if ( isset( $options['broken'] ) ) {
|
||||
// file = false; // broken thumbnail forced by hook
|
||||
// } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
|
||||
// file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
|
||||
// } else { // get by (name,timestamp)
|
||||
file = env.File_finder().Find_file(title); // $options
|
||||
// }
|
||||
return file;
|
||||
}
|
||||
public void Maybe_do_subpage_link(Xomw_linker__normalize_subpage_link rv, byte[] target, byte[] text) {
|
||||
linker.normalizeSubpageLink(rv, page_title, target, text);
|
||||
}
|
||||
public void Replace_link_holders(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
holders.Replace(pctx, pbfr);
|
||||
}
|
||||
public void Make_known_link_holder(Bry_bfr bfr, Xoa_ttl nt, byte[] text, byte[] trail, byte[] prefix) {
|
||||
byte[][] split_trail = linker.splitTrail(trail);
|
||||
byte[] inside = split_trail[0];
|
||||
trail = split_trail[1];
|
||||
|
||||
if (text == Bry_.Empty) {
|
||||
text = Bry_.Escape_html(nt.Get_prefixed_text());
|
||||
}
|
||||
|
||||
// PORTED:new HtmlArmor( "$prefix$text$inside" )
|
||||
tmp.Add_bry_escape_html(prefix);
|
||||
tmp.Add_bry_escape_html(text);
|
||||
tmp.Add_bry_escape_html(inside);
|
||||
text = tmp.To_bry_and_clear();
|
||||
|
||||
link_renderer.Make_known_link(bfr, nt, text, extra_atrs, query);
|
||||
byte[] link = bfr.To_bry_and_clear();
|
||||
parser.Armor_links(bfr, link, 0, link.length);
|
||||
bfr.Add(trail);
|
||||
}
|
||||
|
||||
private static boolean[] title_chars_for_lnki;
|
||||
private static final byte[] Bry__wtxt__lnki__bgn = Bry_.new_a7("[["), Bry__wtxt__lnki__end = Bry_.new_a7("]]");
|
||||
|
||||
// $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
|
||||
//
|
||||
// REGEX: "title-char"(1+) + "pipe"(0-1) + "]]"(0-1) + "other chars up to next [["
|
||||
// title-char -> ([{$tc}]+)
|
||||
// pipe -> (?:\\|(.+?))?
|
||||
// ]] -> ?]]
|
||||
// other chars... -> (.*)
|
||||
|
||||
// $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
|
||||
//
|
||||
// REGEX: "title-char"(1+) + "pipe"(0-1) + "other chars up to next [["
|
||||
// title-char -> ([{$tc}]+)
|
||||
// pipe -> \\|
|
||||
// other chars... -> (.*)
|
||||
}
|
||||
@@ -1,122 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*; import gplx.core.tests.*;
|
||||
import gplx.xowa.mws.filerepo.*; import gplx.xowa.mws.filerepo.file.*;
|
||||
import gplx.xowa.mws.media.*;
|
||||
public class Xomw_lnki_wkr__file__tst {
|
||||
private final Xomw_lnki_wkr__fxt fxt = new Xomw_lnki_wkr__fxt();
|
||||
@Before public void init() {
|
||||
fxt.Clear();
|
||||
fxt.Init__file("A.png", 300, 200);
|
||||
}
|
||||
@Test public void Plain() {
|
||||
fxt.Test__to_html("[[File:A.png]]", "<a href='A.png' class='image'><img alt='A.png' src='/orig/7/70/A.png' /></a>");
|
||||
}
|
||||
@Test public void Thumb() {
|
||||
fxt.Test__to_html("[[File:A.png|thumb]]", "<div class='thumb tright'><div class='thumbinner' style='width:222px;'><a href='A.png' class='image'><img alt='A.png' src='/thumb/7/70/A.png/220px-A.png' class='thumbimage' /></a> <div class='thumbcaption'><div class='magnify'><a href='' class='internal'></a></div></div></div></div>");
|
||||
}
|
||||
@Test public void Size() {
|
||||
fxt.Test__to_html("[[File:A.png|123x456px]]", "<a href='A.png' class='image'><img alt='A.png' src='/thumb/7/70/A.png/123px-A.png' /></a>");
|
||||
}
|
||||
@Test public void fitBoxWidth() {
|
||||
// COMMENT:"Height is the relative smaller dimension, so scale width accordingly"
|
||||
// consider file of 200,100 (2:1)
|
||||
// EX_1: view is 120,40 (3:1)
|
||||
// - dimensions are either (a) 120,80 or (b) 80,40
|
||||
// - use (b) 80,40
|
||||
// EX_2: view is 120,80 (1.5:1)
|
||||
// - dimensions are either (a) 120,60 or (b) 160,80
|
||||
// - use (a) 120,60
|
||||
fxt.Init__file("A.png", 200, 100);
|
||||
fxt.Test__to_html__has("[[File:A.png|120x40px]]", "/80px-A.png");
|
||||
fxt.Test__to_html__has("[[File:A.png|120x80px]]", "/120px-A.png");
|
||||
}
|
||||
|
||||
@Test public void Test__parseWidthParam() {
|
||||
int[] img_size = new int[2];
|
||||
// WxHpx
|
||||
fxt.Test__parseWidthParam(img_size, "12x34px" , 12, 34);
|
||||
// WxH
|
||||
fxt.Test__parseWidthParam(img_size, "12x34" , 12, 34);
|
||||
// Wpx
|
||||
fxt.Test__parseWidthParam(img_size, "12px" , 12, 0);
|
||||
// W
|
||||
fxt.Test__parseWidthParam(img_size, "12" , 12, 0);
|
||||
// 12x
|
||||
fxt.Test__parseWidthParam(img_size, "12x" , 12, 0);
|
||||
// x34
|
||||
fxt.Test__parseWidthParam(img_size, "x34" , 0, 34);
|
||||
}
|
||||
}
|
||||
class Xomw_lnki_wkr__fxt {
|
||||
private final Xomw_lnki_wkr wkr;
|
||||
private final Xomw_parser_ctx pctx;
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private final Xomw_file_finder__mock file_finder;
|
||||
private final Xomw_FileRepo repo = new Xomw_FileRepo(Bry_.new_a7("/orig"), Bry_.new_a7("/thumb"));
|
||||
private boolean apos = true;
|
||||
public Xomw_lnki_wkr__fxt() {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
Xomw_parser parser = new Xomw_parser();
|
||||
wkr = parser.Lnki_wkr();
|
||||
|
||||
// env
|
||||
file_finder = new Xomw_file_finder__mock(parser.Env());
|
||||
parser.Env().File_finder_(file_finder);
|
||||
parser.Env().Magic_word_mgr().Add(Bry_.new_u8("img_thumbnail"), Bool_.Y, Bry_.Ary("thumb"));
|
||||
parser.Env().Magic_word_mgr().Add(Bry_.new_u8("img_width"), Bool_.Y, Bry_.Ary("$1px"));
|
||||
parser.Init_by_wiki(wiki);
|
||||
|
||||
// ctx
|
||||
pctx = new Xomw_parser_ctx();
|
||||
pctx.Init_by_page(wiki.Ttl_parse(Bry_.new_a7("Page_1")));
|
||||
}
|
||||
public void Clear() {
|
||||
wkr.Clear_state();
|
||||
}
|
||||
public void Init__file(String title, int w, int h) {
|
||||
file_finder.Add(title, repo, w, h, Xomw_MediaHandlerFactory.Mime__image__png);
|
||||
}
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Gftest.Eq__ary__lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
public void Test__to_html(String src_str, String expd) {
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Gftest.Eq__ary__lines(expd, Exec__to_html(src_str), src_str);
|
||||
}
|
||||
public void Test__to_html__has(String src_str, String expd) {
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Gftest.Eq__bool_y(String_.Has(Exec__to_html(src_str), expd));
|
||||
}
|
||||
private String Exec__to_html(String src_str) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
|
||||
wkr.Replace_link_holders(pctx, pbfr);
|
||||
return pbfr.Rslt().To_str_and_clear();
|
||||
}
|
||||
public void Test__parseWidthParam(int[] img_size, String src_str, int expd_w, int expd_h) {
|
||||
wkr.parseWidthParam(img_size, Bry_.new_u8(src_str));
|
||||
Gftest.Eq__int(expd_w, img_size[0], "w");
|
||||
Gftest.Eq__int(expd_h, img_size[1], "h");
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*; import gplx.xowa.mws.filerepo.*; import gplx.xowa.mws.filerepo.file.*;
|
||||
public class Xomw_lnki_wkr__text__tst {
|
||||
private final Xomw_lnki_wkr__fxt fxt = new Xomw_lnki_wkr__fxt();
|
||||
@Before public void init() {fxt.Clear();}
|
||||
@Test public void Text() {fxt.Test__parse("a [[A]] z" , "a <!--LINK 0--> z");}
|
||||
@Test public void Capt() {fxt.Test__parse("a [[A|a]] z" , "a <!--LINK 0--> z");}
|
||||
@Test public void Invalid__char() {fxt.Test__parse("a [[<A>]] z" , "a [[<A>]] z");}
|
||||
@Test public void Html__self() {fxt.Test__to_html("[[Page_1]]" , "<strong class='selflink'>Page_1</strong>");}
|
||||
@Test public void Html__text() {fxt.Test__to_html("[[A]]" , "<a href='/wiki/A' title='A'>A</a>");}
|
||||
@Test public void Html__capt() {fxt.Test__to_html("[[A|a]]" , "<a href='/wiki/A' title='A'>a</a>");}
|
||||
}
|
||||
@@ -1,57 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_param_itm {
|
||||
public final byte[] magic;
|
||||
public final int type_uid;
|
||||
public final byte[] name;
|
||||
public final int name_uid;
|
||||
public Xomw_param_itm(byte[] magic, int type_uid, byte[] name) {
|
||||
this.magic = magic;
|
||||
this.type_uid = type_uid;
|
||||
this.name = name;
|
||||
this.name_uid = name_uids.Get_as_int_or(name, -1);
|
||||
}
|
||||
public static final int
|
||||
Name__width = 0
|
||||
, Name__height = 1
|
||||
, Name__manual_thumb = 2
|
||||
, Name__alt = 3
|
||||
, Name__class = 4
|
||||
, Name__link = 5
|
||||
, Name__frameless = 6
|
||||
, Name__framed = 7
|
||||
, Name__thumbnail = 8
|
||||
;
|
||||
private static final Hash_adp_bry name_uids = Hash_adp_bry.cs()
|
||||
.Add_str_int("width" , Name__width)
|
||||
.Add_str_int("manual_thumb" , Name__manual_thumb)
|
||||
.Add_str_int("alt" , Name__alt)
|
||||
.Add_str_int("class" , Name__class)
|
||||
.Add_str_int("link" , Name__link)
|
||||
.Add_str_int("frameless" , Name__frameless)
|
||||
.Add_str_int("framed" , Name__framed)
|
||||
.Add_str_int("thumbnail" , Name__thumbnail)
|
||||
;
|
||||
public static final byte[]
|
||||
Mw__img_width = Bry_.new_a7("img_width")
|
||||
;
|
||||
public static final byte[]
|
||||
Name_bry__width = Bry_.new_a7("width")
|
||||
;
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_param_map {
|
||||
private final Ordered_hash hash = Ordered_hash_.New_bry();
|
||||
public final Xomw_params_frame Frame = new Xomw_params_frame();
|
||||
public final Xomw_params_handler Handler = new Xomw_params_handler();
|
||||
public final Xomw_params_horizAlign HorizAlign = new Xomw_params_horizAlign();
|
||||
public final Xomw_params_vertAlign VertAlign = new Xomw_params_vertAlign();
|
||||
public int Len() {return hash.Len();}
|
||||
public Xomw_param_itm Get_at(int i) {return (Xomw_param_itm)hash.Get_at(i);}
|
||||
public Xomw_param_itm Get_by(byte[] name) {
|
||||
return (Xomw_param_itm)hash.Get_by(name);
|
||||
}
|
||||
public Xomw_param_itm Get_by(int name_type) {
|
||||
return null;
|
||||
}
|
||||
public void Set(int type, int paramNameUid, byte[] paramBry, int paramInt) {
|
||||
switch (type) {
|
||||
case Type__frame: Frame.Set(paramNameUid, paramBry, paramInt); break;
|
||||
case Type__handler: Handler.Set(paramNameUid, paramBry, paramInt); break;
|
||||
}
|
||||
}
|
||||
public byte[][] Keys() {
|
||||
int len = hash.Len();
|
||||
byte[][] rv = new byte[len][];
|
||||
for (int i = 0; i < len; i++) {
|
||||
rv[i] = ((Xomw_param_itm)hash.Get_at(i)).magic;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public void Add(byte[] magic, int type_uid, byte[] name) {
|
||||
Xomw_param_itm itm = new Xomw_param_itm(magic, type_uid, name);
|
||||
hash.Add(magic, itm);
|
||||
}
|
||||
public Xomw_param_map Clone() {
|
||||
Xomw_param_map rv = new Xomw_param_map();
|
||||
int len = hash.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xomw_param_itm itm = (Xomw_param_itm)hash.Get_at(i);
|
||||
rv.Add(itm.magic, itm.type_uid, itm.name);
|
||||
}
|
||||
rv.Frame.Copy_to(this.Frame);
|
||||
rv.Handler.Copy_to(this.Handler);
|
||||
return rv;
|
||||
}
|
||||
|
||||
public static final int Type__horizAlign = 0, Type__vertAlign = 1, Type__frame = 2, Type__handler = 3;
|
||||
}
|
||||
class Xomw_param_list {
|
||||
public int type_uid;
|
||||
public byte[] type;
|
||||
public byte[][] names;
|
||||
|
||||
public static Xomw_param_list New(int type_uid, String type, String... names) {
|
||||
Xomw_param_list rv = new Xomw_param_list();
|
||||
rv.type_uid = type_uid;
|
||||
rv.type = Bry_.new_u8(type);
|
||||
rv.names = Bry_.Ary(names);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
@@ -1,85 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
public class Xomw_params_frame {
|
||||
public byte[] align = null;
|
||||
public byte[] valign = null;
|
||||
public byte[] caption = null;
|
||||
public byte[] frame = null;
|
||||
public byte[] framed = null;
|
||||
public byte[] frameless = null;
|
||||
public byte[] thumbnail = null;
|
||||
public byte[] manualthumb = null;
|
||||
public byte[] alt = null;
|
||||
public byte[] title = null;
|
||||
public byte[] cls = null;
|
||||
public byte[] img_cls = null;
|
||||
public byte[] link_title = null;
|
||||
public byte[] link_url = null;
|
||||
public byte[] link_target = null;
|
||||
public byte[] no_link = null;
|
||||
public byte[] border = null;
|
||||
public byte[] custom_url_link = null;
|
||||
public byte[] custom_target_link = null;
|
||||
public boolean desc_link = false;
|
||||
public byte[] desc_query = null;
|
||||
public double upright;
|
||||
public void Set(int uid, byte[] val_bry, int val_int) {
|
||||
switch (uid) {
|
||||
case Xomw_param_itm.Name__thumbnail: thumbnail = val_bry; break;
|
||||
}
|
||||
}
|
||||
public Xomw_params_frame Clear() {
|
||||
desc_link = false;
|
||||
upright = Php_utl_.Null_double;
|
||||
align = valign = caption = frame = framed = frameless
|
||||
= thumbnail = manualthumb = alt = title = cls = img_cls
|
||||
= link_title = link_url = link_target = no_link
|
||||
= custom_url_link = custom_target_link = desc_query
|
||||
= Php_utl_.Null_bry;
|
||||
return this;
|
||||
}
|
||||
public void Copy_to(Xomw_params_frame src) {
|
||||
this.desc_link = src.desc_link;
|
||||
this.upright = src.upright;
|
||||
this.align = src.align;
|
||||
this.valign = src.valign;
|
||||
this.caption = src.caption;
|
||||
this.frame = src.frame;
|
||||
this.framed = src.framed;
|
||||
this.frameless = src.frameless;
|
||||
this.thumbnail = src.thumbnail;
|
||||
this.manualthumb = src.manualthumb;
|
||||
this.alt = src.alt;
|
||||
this.title = src.title;
|
||||
this.cls = src.cls;
|
||||
this.img_cls = src.img_cls;
|
||||
this.link_title = src.link_title;
|
||||
this.link_url = src.link_url;
|
||||
this.link_target = src.link_target;
|
||||
this.no_link = src.no_link;
|
||||
this.border = src.border;
|
||||
this.custom_url_link = src.custom_url_link;
|
||||
this.custom_target_link = src.custom_target_link;
|
||||
this.desc_query = src.desc_query;
|
||||
}
|
||||
public static byte[] Cls_add(byte[] lhs, byte[] rhs) {
|
||||
return Bry_.Len_eq_0(lhs) ? rhs : Bry_.Add(lhs, Byte_ascii.Space_bry, rhs);
|
||||
}
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
public class Xomw_params_handler {
|
||||
public int width;
|
||||
public int height;
|
||||
public int page;
|
||||
public int physicalWidth;
|
||||
public int physicalHeight;
|
||||
public Xomw_params_handler Clear() {
|
||||
width = height = page
|
||||
= physicalWidth = physicalHeight = Php_utl_.Null_int;
|
||||
return this;
|
||||
}
|
||||
public void Copy_to(Xomw_params_handler src) {
|
||||
this.width = src.width;
|
||||
this.height = src.height;
|
||||
this.page = src.page;
|
||||
this.physicalWidth = src.physicalWidth;
|
||||
this.physicalHeight = src.physicalHeight;
|
||||
}
|
||||
public void Set(int uid, byte[] val_bry, int val_int) {
|
||||
switch (uid) {
|
||||
case Xomw_param_itm.Name__width: width = val_int; break;
|
||||
case Xomw_param_itm.Name__height: height = val_int; break;
|
||||
default: throw Err_.new_unhandled_default(uid);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_params_horizAlign {
|
||||
public Xomw_params_horizAlign Clear() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_params_mto {
|
||||
public boolean desc_link;
|
||||
public byte[] alt = null;
|
||||
public byte[] title = null;
|
||||
public byte[] img_cls = null;
|
||||
public byte[] file_link = null;
|
||||
public byte[] valign = null;
|
||||
public byte[] desc_query = null;
|
||||
public byte[] override_width = null;
|
||||
public byte[] override_height = null;
|
||||
public byte[] no_dimensions = null;
|
||||
public byte[] custom_url_link = null;
|
||||
public byte[] custom_title_link = null;
|
||||
public byte[] custom_target_link = null;
|
||||
public byte[] parser_extlink_rel = null;
|
||||
public byte[] parser_extlink_target = null;
|
||||
public Xomw_params_mto Clear() {
|
||||
desc_link = false;
|
||||
alt = title = file_link = valign
|
||||
= desc_query = override_width = override_height = no_dimensions
|
||||
= custom_url_link = custom_title_link
|
||||
= parser_extlink_rel = parser_extlink_target
|
||||
= null;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
public class Xomw_params_scalar {
|
||||
public int physicalWidth;
|
||||
public int physicalHeight;
|
||||
public byte[] physicalDimensions;
|
||||
public int clientWidth;
|
||||
public int clientHeight;
|
||||
public byte[] comment;
|
||||
public int srcWidth;
|
||||
public int srcHeight;
|
||||
public byte[] mimeType;
|
||||
public byte[] dstPath;
|
||||
public byte[] dstUrl;
|
||||
public byte[] interlace;
|
||||
public Xomw_params_scalar() {
|
||||
physicalWidth = physicalHeight = clientWidth = clientHeight = srcWidth = srcHeight = Php_utl_.Null_int;
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_params_vertAlign {
|
||||
public Xomw_params_vertAlign Clear() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -1,395 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.magiclinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.core.net.*;
|
||||
import gplx.xowa.mws.utls.*; import gplx.xowa.mws.htmls.*;
|
||||
import gplx.langs.regxs.*;
|
||||
// TODO.XO: this->getConverterLanguage()->markNoConversion($url, true),
|
||||
public class Xomw_magiclinks_wkr {
|
||||
private final Btrie_slim_mgr regex_trie = Btrie_slim_mgr.ci_a7(); // NOTE: must be ci to handle protocols; EX: "https:" and "HTTPS:"
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private static byte[] Tag__anch__rhs;
|
||||
private boolean[] url_separators;
|
||||
private static Xomw_regex_link_interrupt regex_link_interrupt;
|
||||
private final Xomw_parser parser;
|
||||
private final Xomw_regex_boundary regex_boundary;
|
||||
private final Xomw_regex_url regex_url;
|
||||
private final Xomw_sanitizer sanitizer;
|
||||
private final Xomw_linker linker;
|
||||
private final Xomw_atr_mgr atrs = new Xomw_atr_mgr();
|
||||
private byte[] page_title;
|
||||
|
||||
private static final byte Regex__anch = 1, Regex__elem = 2, Regex__free = 3;
|
||||
public Xomw_magiclinks_wkr(Xomw_parser parser, Xomw_sanitizer sanitizer, Xomw_linker linker, Xomw_regex_boundary regex_boundary, Xomw_regex_url regex_url) {
|
||||
this.parser = parser;
|
||||
this.sanitizer = sanitizer;
|
||||
this.linker = linker;
|
||||
this.regex_boundary = regex_boundary;
|
||||
this.regex_url = regex_url;
|
||||
|
||||
// ',;\.:!?'
|
||||
url_separators = Bool_ary_bldr.New_u8()
|
||||
.Set_many(Byte_ascii.Comma,Byte_ascii.Semic, Byte_ascii.Dot, Byte_ascii.Colon, Byte_ascii.Bang, Byte_ascii.Question)
|
||||
.To_ary();
|
||||
|
||||
if (Tag__anch__rhs == null) {
|
||||
synchronized (Type_adp_.ClassOf_obj(this)) {
|
||||
Tag__anch__rhs = Bry_.new_a7("</a>");
|
||||
regex_link_interrupt = new Xomw_regex_link_interrupt();
|
||||
}
|
||||
}
|
||||
}
|
||||
public void Init_by_wiki() {
|
||||
regex_trie.Add_str_byte("<a", Regex__anch);
|
||||
regex_trie.Add_str_byte("<" , Regex__elem);
|
||||
|
||||
Gfo_protocol_itm[] protocol_ary = Gfo_protocol_itm.Ary();
|
||||
int protocol_len = protocol_ary.length;
|
||||
for (int i = 0; i < protocol_len; i++) {
|
||||
Gfo_protocol_itm itm = protocol_ary[i];
|
||||
regex_trie.Add_bry_byte(itm.Text_bry(), Regex__free);
|
||||
}
|
||||
}
|
||||
|
||||
// Replace special strings like "ISBN xxx" and "RFC xxx" with
|
||||
// magic external links.
|
||||
public void Do_magic_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
boolean dirty = true;
|
||||
// PORTED.REGEX: handle below
|
||||
// XO.MW.UNSUPPORTED.OBSOLETE: not handling RFC|PMID|ISBN b/c of upcoming obsolescence: https://www.mediawiki.org/wiki/Requests_for_comment/Future_of_magic_links
|
||||
//'!(?: // Start cases
|
||||
// (<a[ \t\r\n>].*?</a>) | // m[1]: Skip link text
|
||||
// (<.*?>) | // m[2]: Skip stuff inside
|
||||
// // HTML elements' . "
|
||||
// (\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
|
||||
// // m[4]: Post-protocol path
|
||||
// \b(?:RFC|PMID) $spaces // m[5]: RFC or PMID, capture number
|
||||
// ([0-9]+)\b |
|
||||
// \bISBN $spaces ( // m[6]: ISBN, capture number
|
||||
// (?: 97[89] $spdash?)? // optional 13-digit ISBN prefix
|
||||
// (?: [0-9] $spdash?){9} // 9 digits with opt. delimiters
|
||||
// [0-9Xx] // check digit
|
||||
// )\b
|
||||
while (true) {
|
||||
if (cur == src_end) {
|
||||
if (dirty)
|
||||
bfr.Add_mid(src, prv, src_end);
|
||||
break;
|
||||
}
|
||||
|
||||
byte b = src[cur];
|
||||
Object o = regex_trie.Match_at_w_b0(trv, b, src, cur, src_end);
|
||||
// current byte doesn't look like magiclink; continue;
|
||||
if (o == null) {
|
||||
cur++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// looks like magiclink; do additional processing
|
||||
byte regex_tid = ((Byte_obj_val)o).Val();
|
||||
int hook_bgn = cur;
|
||||
int hook_end = trv.Pos();
|
||||
int tmp_pos = hook_end;
|
||||
boolean regex_valid = true;
|
||||
switch (regex_tid) {
|
||||
case Regex__anch: // (<a[ \t\r\n>].*?</a>) | // m[1]: Skip link text
|
||||
if (tmp_pos < src_end) {
|
||||
// find "[ \t\r\n>]" after "<a"; i.e.: don't match "<ab" or "<ac", etc..
|
||||
byte ws_byte = src[tmp_pos];
|
||||
switch (ws_byte) {
|
||||
// next char after "<a" is ws -> valid
|
||||
case Byte_ascii.Space:
|
||||
case Byte_ascii.Tab:
|
||||
case Byte_ascii.Cr:
|
||||
case Byte_ascii.Nl:
|
||||
break;
|
||||
// next char after "<a" is not ws -> invalid
|
||||
default:
|
||||
regex_valid = false;
|
||||
break;
|
||||
}
|
||||
if (regex_valid) {
|
||||
// find </a>
|
||||
tmp_pos++;
|
||||
int anch_end = Bry_find_.Find_fwd(src, Tag__anch__rhs, tmp_pos, src_end);
|
||||
// </a> not found -> invalid
|
||||
if (anch_end == Bry_find_.Not_found) {
|
||||
regex_valid = false;
|
||||
}
|
||||
// </a> found -> valid; set cur to after "</a>"
|
||||
else {
|
||||
cur = anch_end + Tag__anch__rhs.length;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
regex_valid = false;
|
||||
}
|
||||
break;
|
||||
case Regex__elem: // (<.*?>) | // m[2]: Skip stuff inside
|
||||
// just find ">"
|
||||
tmp_pos = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, tmp_pos, src_end);
|
||||
// > not found -> invalid
|
||||
if (tmp_pos == Bry_find_.Not_found) {
|
||||
regex_valid = false;
|
||||
}
|
||||
// > found -> valid; set cur to after ">"
|
||||
else {
|
||||
cur = tmp_pos + 1;
|
||||
}
|
||||
break;
|
||||
case Regex__free:
|
||||
// make sure that protocol starts at word bound; EX: "ahttp://a.org" should be invalid
|
||||
if (regex_boundary.Is_boundary_prv(src, hook_bgn)) {
|
||||
// skip forward until invalid url char
|
||||
tmp_pos = regex_url.Find_fwd_while(trv, src, tmp_pos, src_end);
|
||||
// no url chars found -> invalid
|
||||
if (tmp_pos == hook_end) {
|
||||
regex_valid = false;
|
||||
}
|
||||
// url chars found -> valid; set cur to 1st invalid url-char;
|
||||
else {
|
||||
cur = tmp_pos;
|
||||
}
|
||||
}
|
||||
else
|
||||
regex_valid = false;
|
||||
break;
|
||||
}
|
||||
// regex is invalid; advance by 1 and continue;
|
||||
if (!regex_valid) {
|
||||
cur++;
|
||||
}
|
||||
// regex is valid
|
||||
else {
|
||||
// handle free
|
||||
if (regex_tid == Regex__free) {
|
||||
this.page_title = pctx.Page_title().Full_db();
|
||||
dirty = true;
|
||||
bfr.Add_mid(src, prv, hook_bgn);
|
||||
byte[] url = Bry_.Mid(src, hook_bgn, cur);
|
||||
int num_post_proto = cur - hook_end; // get length of url without proto; EX: "http://a.org" should be 5 ("a.org")
|
||||
this.Make_free_external_link(bfr, url, num_post_proto);
|
||||
prv = cur;
|
||||
}
|
||||
// "<a " and "<" just need to be ignored; note that they already update cur so noop
|
||||
else {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dirty) {
|
||||
pbfr.Switch();
|
||||
}
|
||||
}
|
||||
|
||||
// Make a free external link, given a user-supplied URL
|
||||
public void Make_free_external_link(Bry_bfr bfr, byte[] url, int num_post_proto) {
|
||||
byte[] trail = Bry_.Empty;
|
||||
|
||||
// The characters '<' and '>' (which were escaped by
|
||||
// removeHTMLtags()) should not be included in
|
||||
// URLs, per RFC 2396.
|
||||
// Make terminate a URL as well (bug T84937)
|
||||
int separator_bgn = regex_link_interrupt.Find(trv, url, 0, url.length);
|
||||
if (separator_bgn != Bry_find_.Not_found) {
|
||||
trail = Bry_.Mid(url, separator_bgn);
|
||||
url = Bry_.Mid(url, 0, separator_bgn);
|
||||
}
|
||||
|
||||
// Move trailing punctuation to $trail
|
||||
int url_len = url.length;
|
||||
// If there is no left bracket, then consider right brackets fair game too
|
||||
// XO.MW: if (strpos($url, '(') === false) {$sep .= ')';}
|
||||
url_separators[Byte_ascii.Paren_end] = Bry_find_.Find_fwd(url, Byte_ascii.Paren_bgn, 0, url_len) == Bry_find_.Not_found;
|
||||
|
||||
int num_sep_chars = Php_str_.Strspn_bwd__ary(url, url_separators, url_len, -1);
|
||||
// Don't break a trailing HTML entity by moving the ; into $trail
|
||||
// This is in hot code, so use substr_compare to avoid having to
|
||||
// create a new String Object for the comparison
|
||||
// XO.MW.NOTE: ignore semic if part of entity; EX: "http://a.org'!."
|
||||
if (num_sep_chars > 0 && Php_str_.Substr_byte(url, -num_sep_chars) == Byte_ascii.Semic) {
|
||||
// more optimization: instead of running preg_match with a $
|
||||
// anchor, which can be slow, do the match on the reversed
|
||||
// String starting at the desired offset.
|
||||
// un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
|
||||
// if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, num_sep_chars)) {
|
||||
if (Xomw_regex_html_entity.Match_bwd(url, url_len - num_sep_chars, 0)) {
|
||||
num_sep_chars--;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_sep_chars > 0) {
|
||||
trail = Bry_.Add(Php_str_.Substr(url, -num_sep_chars), trail);
|
||||
url = Php_str_.Substr(url, 0, -num_sep_chars);
|
||||
}
|
||||
|
||||
// Verify that we still have a real URL after trail removal, and
|
||||
// not just lone protocol
|
||||
if (trail.length >= num_post_proto) {
|
||||
bfr.Add_bry_many(url, trail);
|
||||
return;
|
||||
}
|
||||
|
||||
url = sanitizer.Clean_url(url);
|
||||
|
||||
// XO.MW.UNSUPPORTED.NON-WMF: not supporting images from freefrom url; (EX: "http://a.org/image.png" -> "<img>"); haven't seen this used on WMF wikis
|
||||
// Is this an external image?
|
||||
byte[] text = null; // $this->maybeMakeExternalImage($url);
|
||||
if (text == null) {
|
||||
// Not an image, make a link
|
||||
linker.makeExternalLink(bfr, url
|
||||
, url // $this->getConverterLanguage()->markNoConversion($url, true),
|
||||
, true, Bry_.new_a7("free")
|
||||
, parser.Get_external_link_attribs(atrs)
|
||||
, page_title);
|
||||
|
||||
// XO.MW.UNSUPPORTED.HOOK: registers link for processing by other extensions?
|
||||
// Register it in the output Object...
|
||||
// Replace unnecessary URL escape codes with their equivalent characters
|
||||
// $pasteurized = self::normalizeLinkUrl($url);
|
||||
// $this->mOutput->addExternalLink($pasteurized);
|
||||
}
|
||||
bfr.Add(trail);
|
||||
}
|
||||
}
|
||||
class Xomw_regex_html_entity {
|
||||
// if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, num_sep_chars)) {
|
||||
// REGEX: (letters | hex + "#" | dec + "x#") + "&"
|
||||
// \G means "stop if matching breaks"; so, using a reversed example, "http://&#amp;&#!lt;" will not match "&#amp;" b/c "&#!lt;" breaks match
|
||||
// http://www.php.net/manual/en/regexp.reference.escape.php
|
||||
// http://stackoverflow.com/questions/14897949/what-is-the-use-of-g-anchor-in-regex
|
||||
public static boolean Match_bwd(byte[] src, int src_bgn, int src_end) {
|
||||
int cur = src_bgn - 1;
|
||||
int numbers = 0;
|
||||
int letters = 0;
|
||||
while (cur >= src_end) {
|
||||
int b_bgn = gplx.core.intls.Utf8_.Get_pos0_of_char_bwd(src, cur);
|
||||
switch (src[b_bgn]) {
|
||||
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
|
||||
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
|
||||
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
|
||||
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
|
||||
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
|
||||
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
|
||||
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
|
||||
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
|
||||
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
|
||||
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
|
||||
letters++;
|
||||
break;
|
||||
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
|
||||
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
|
||||
numbers++;
|
||||
break;
|
||||
case Byte_ascii.Hash:
|
||||
// next must be &; EX: "&#" and "&#x"
|
||||
int prv = cur - 1;
|
||||
if (prv >= src_end && src[prv] == Byte_ascii.Amp) {
|
||||
// if hex, num | ltr is fine
|
||||
byte hex_byte = src[cur + 1];
|
||||
if (hex_byte == Byte_ascii.Ltr_X || hex_byte == Byte_ascii.Ltr_x) {
|
||||
return numbers > 0 || letters > 1; // 1 to ignore "x"
|
||||
}
|
||||
// if dec, no letters allowed
|
||||
else {
|
||||
return numbers > 0 && letters == 0;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
case Byte_ascii.Amp:
|
||||
// if entity, no numbers
|
||||
return letters > 0 && numbers == 0;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
cur--;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
class Xomw_regex_link_interrupt {
|
||||
private static final byte Bgn__ent__lt = 0, Bgn__ent__gt = 1, Bgn__ent__nbsp = 2, Bgn__hex = 3, Bgn__dec = 4;
|
||||
private static final byte End__hex__lt = 0, End__hex__gt = 1, End__hex__nbsp = 2, End__dec__lt = 3, End__dec__gt = 4, End__dec__nbsp = 5;
|
||||
private final Btrie_slim_mgr bgn_trie = Btrie_slim_mgr.cs();
|
||||
private final Btrie_slim_mgr end_trie = Btrie_slim_mgr.ci_a7();
|
||||
public Xomw_regex_link_interrupt() {
|
||||
// MW.REGEX: &(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));
|
||||
bgn_trie.Add_str_byte("<", Bgn__ent__lt);
|
||||
bgn_trie.Add_str_byte(">", Bgn__ent__gt);
|
||||
bgn_trie.Add_str_byte(" ", Bgn__ent__nbsp);
|
||||
bgn_trie.Add_str_byte("&#x", Bgn__hex); // 3C | 3E | A0
|
||||
bgn_trie.Add_str_byte("&#", Bgn__dec); // 60 | 62 | 160
|
||||
|
||||
end_trie.Add_str_byte("3c;", End__hex__lt);
|
||||
end_trie.Add_str_byte("3e;", End__hex__gt);
|
||||
end_trie.Add_str_byte("a0;", End__hex__nbsp);
|
||||
end_trie.Add_str_byte("60;", End__dec__lt);
|
||||
end_trie.Add_str_byte("62;", End__dec__gt);
|
||||
end_trie.Add_str_byte("160;", End__dec__nbsp);
|
||||
}
|
||||
public int Find(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
|
||||
int pos = src_bgn;
|
||||
while (true) {
|
||||
if (pos >= src_end) break;
|
||||
byte b = src[pos];
|
||||
Object bgn_obj = bgn_trie.Match_at_w_b0(trv, b, src, pos, src_end);
|
||||
if (bgn_obj == null) {
|
||||
pos += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
continue;
|
||||
}
|
||||
|
||||
byte bgn_tid = ((Byte_obj_val)bgn_obj).Val();
|
||||
int end_pos = trv.Pos();
|
||||
boolean valid = false;
|
||||
switch (bgn_tid) {
|
||||
case Bgn__ent__lt:
|
||||
case Bgn__ent__gt:
|
||||
case Bgn__ent__nbsp:
|
||||
return pos;
|
||||
case Bgn__hex:
|
||||
case Bgn__dec:
|
||||
// match rest of sequence from above; EX: "3c;", "60;" etc.
|
||||
end_pos = Bry_find_.Find_fwd_while(src, end_pos, src_end, Byte_ascii.Num_0);
|
||||
Object end_obj = end_trie.Match_at(trv, src, end_pos, src_end);
|
||||
if (end_obj != null) {
|
||||
// make sure that hex-dec matches; EX: "`" and "c;" are invalid
|
||||
byte end_tid = ((Byte_obj_val)end_obj).Val();
|
||||
if ( bgn_tid == Bgn__hex && Int_.Between(end_tid, End__hex__lt, End__hex__nbsp)
|
||||
|| bgn_tid == Bgn__dec && Int_.Between(end_tid, End__dec__lt, End__dec__nbsp)
|
||||
)
|
||||
return pos;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (valid)
|
||||
return pos;
|
||||
else
|
||||
pos += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
|
||||
}
|
||||
return Bry_find_.Not_found;
|
||||
}
|
||||
}
|
||||
@@ -1,91 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.magiclinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_magiclinks_wkr__tst {
|
||||
private final Xomw_magiclinks_wkr__fxt fxt = new Xomw_magiclinks_wkr__fxt();
|
||||
@Test public void Basic() {fxt.Test__parse("a https://b.org z", "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a> z");}
|
||||
@Test public void Invalid() {fxt.Test__parse("a _https://b.org z", "a _https://b.org z");}
|
||||
@Test public void Tag__anch() {fxt.Test__parse("a <a title=\"https://b.org\">b</a> z", "a <a title=\"https://b.org\">b</a> z");}
|
||||
@Test public void Tag__misc() {fxt.Test__parse("a <div title=\"https://b.org\">b</div> z", "a <div title=\"https://b.org\">b</div> z");}
|
||||
@Test public void Interrupt() {
|
||||
// ent
|
||||
fxt.Test__parse("a https://b.org<z" , "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a><z");
|
||||
// hex
|
||||
fxt.Test__parse("a https://b.org<z" , "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a><z");
|
||||
// dec
|
||||
fxt.Test__parse("a https://b.org<z" , "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a><z");
|
||||
// num_post_proto rule
|
||||
fxt.Test__parse("a https://< z" , "a https://< z");
|
||||
}
|
||||
@Test public void Interrupt__hex_dec() {// implementation specific test for mixed hex / dec
|
||||
// dec-hex
|
||||
fxt.Test__parse("a https://b.orgc;z" , "a <a rel='nofollow' class='external free' href='https://b.org&#3c;z'>https://b.org&#3c;z</a>");
|
||||
}
|
||||
@Test public void Separator() {
|
||||
// basic; ,;.:!?
|
||||
fxt.Test__parse("a https://b.org,;.:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a>,;.:!? z");
|
||||
// ")" excluded
|
||||
fxt.Test__parse("a https://b.org).:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a>).:!? z");
|
||||
// ")" included b/c "(" exists
|
||||
fxt.Test__parse("a https://b.org().:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org()'>https://b.org()</a>.:!? z");
|
||||
// ";" excluded
|
||||
fxt.Test__parse("a https://b.org;.:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a>;.:!? z");
|
||||
// ";" included b/c of ent
|
||||
fxt.Test__parse("a https://b.org&abc;.:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org&abc;'>https://b.org&abc;</a>.:!? z");
|
||||
// ";" included b/c of hex; note that Clean_url changes "±" to "±"
|
||||
fxt.Test__parse("a https://b.org±.:!? z", "a <a rel='nofollow' class='external free' href='https://b.org±'>https://b.org±</a>.:!? z");
|
||||
// ";" included b/c of dec; note that Clean_url changes "{" to "{"
|
||||
fxt.Test__parse("a https://b.org{.:!? z", "a <a rel='nofollow' class='external free' href='https://b.org{'>https://b.org{</a>.:!? z");
|
||||
// ";" excluded b/c of invalid.ent
|
||||
fxt.Test__parse("a https://b.org&a1b;.:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org&a1b'>https://b.org&a1b</a>;.:!? z");
|
||||
// ";" excluded b/c of invalid.hex
|
||||
fxt.Test__parse("a https://b.org&#x;.:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org&#x'>https://b.org&#x</a>;.:!? z");
|
||||
// ";" excluded b/c of invalid.dec
|
||||
fxt.Test__parse("a https://b.org&#a;.:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org&#a'>https://b.org&#a</a>;.:!? z");
|
||||
// num_post_proto rule
|
||||
fxt.Test__parse("a https://.:!? z" , "a https://.:!? z");
|
||||
}
|
||||
@Test public void Clean_url() {
|
||||
// basic
|
||||
fxt.Test__parse("http://a᠆b.org/c᠆d" , "<a rel='nofollow' class='external free' href='http://ab.org/c᠆d'>http://ab.org/c᠆d</a>");
|
||||
}
|
||||
}
|
||||
class Xomw_magiclinks_wkr__fxt {
|
||||
private final Xomw_magiclinks_wkr wkr;
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
public Xomw_magiclinks_wkr__fxt() {
|
||||
Xoae_app app = Xoa_app_fxt.Make__app__edit();
|
||||
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
|
||||
|
||||
Xomw_regex_space regex_space = new Xomw_regex_space();
|
||||
pctx.Init_by_page(wiki.Ttl_parse(Bry_.new_a7("Page_1")));
|
||||
Xomw_parser parser = new Xomw_parser();
|
||||
this.wkr = new Xomw_magiclinks_wkr(parser, parser.Sanitizer(), parser.Linker(), new Xomw_regex_boundary(regex_space), new Xomw_regex_url(regex_space));
|
||||
wkr.Init_by_wiki();
|
||||
}
|
||||
public void Test__parse(String src_str, String expd) {Test__parse(Bool_.Y, src_str, expd);}
|
||||
public void Test__parse(boolean apos, String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
pbfr.Init(src_bry);
|
||||
wkr.Do_magic_links(pctx, pbfr);
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
||||
@@ -1,134 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.nbsps; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
public class Xomw_nbsp_wkr {
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
public void Do_nbsp(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
// PORTED:
|
||||
// Clean up special characters, only run once, next-to-last before doBlockLevels
|
||||
// $fixtags = [
|
||||
// // French spaces, last one Guillemet-left
|
||||
// // only if there is something before the space
|
||||
// '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ',
|
||||
// // french spaces, Guillemet-right
|
||||
// '/(\\302\\253) /' => '\\1 ',
|
||||
// '/ (!\s*important)/' => ' \\1', // Beware of CSS magic word !important, T13874.
|
||||
// ];
|
||||
// $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
|
||||
// XO.PBFR
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
|
||||
if (trie == null) {
|
||||
synchronized (this.getClass()) {
|
||||
trie = Btrie_slim_mgr.cs();
|
||||
Trie__add(trie, Tid__space_lhs, " ?");
|
||||
Trie__add(trie, Tid__space_lhs, " :");
|
||||
Trie__add(trie, Tid__space_lhs, " ;");
|
||||
Trie__add(trie, Tid__space_lhs, " !");
|
||||
Trie__add(trie, Tid__space_lhs, " »");
|
||||
Trie__add(trie, Tid__space_rhs, "« ");
|
||||
Trie__add(trie, Tid__important, " !");
|
||||
}
|
||||
}
|
||||
|
||||
int cur = src_bgn;
|
||||
int prv = cur;
|
||||
boolean dirty = true;
|
||||
// search forward for...
|
||||
// "\s" before ? : ; ! % 302,273; EX: "a :"
|
||||
// "\s" after 302,253
|
||||
// "&160;!\simportant"
|
||||
while (true) {
|
||||
if (cur == src_end) {
|
||||
if (dirty)
|
||||
bfr.Add_mid(src, prv, src_end);
|
||||
break;
|
||||
}
|
||||
Object o = trie.Match_at(trv, src, cur, src_end);
|
||||
if (o == null) {
|
||||
cur++;
|
||||
continue;
|
||||
}
|
||||
Xomw_nbsp_itm itm = (Xomw_nbsp_itm)o;
|
||||
|
||||
// '/ (!\s*important)/' => ' \\1'
|
||||
byte itm_tid = itm.Tid();
|
||||
int important_end = -1;
|
||||
if (itm_tid == Tid__important) {
|
||||
int space_bgn = cur + itm.Key().length;
|
||||
int space_end = Bry_find_.Find_fwd_while(src, space_bgn, src_end, Byte_ascii.Space);
|
||||
important_end = space_end + Bry__important.length;
|
||||
if (!Bry_.Match(src, space_end, important_end, Bry__important)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
dirty = true;
|
||||
bfr.Add_mid(src, prv, cur);
|
||||
switch (itm_tid) {
|
||||
case Tid__space_lhs:
|
||||
bfr.Add_bry_many(Bry__nbsp, itm.Val());
|
||||
break;
|
||||
case Tid__space_rhs:
|
||||
bfr.Add_bry_many(itm.Val(), Bry__nbsp);
|
||||
break;
|
||||
case Tid__important:
|
||||
bfr.Add(Bry__important__repl);
|
||||
break;
|
||||
}
|
||||
cur += itm.Key().length;
|
||||
prv = cur;
|
||||
}
|
||||
if (dirty)
|
||||
pbfr.Switch();
|
||||
}
|
||||
private static final byte Tid__space_lhs = 0, Tid__space_rhs = 1, Tid__important = 2;
|
||||
private static Btrie_slim_mgr trie;
|
||||
private static void Trie__add(Btrie_slim_mgr trie, byte tid, String key_str) {
|
||||
byte[] key_bry = Bry_.new_u8(key_str);
|
||||
byte[] val_bry = null;
|
||||
switch (tid) {
|
||||
case Tid__space_lhs:
|
||||
val_bry = Bry_.Mid(key_bry, 1);
|
||||
break;
|
||||
case Tid__space_rhs:
|
||||
val_bry = Bry_.Mid(key_bry, 0, key_bry.length - 1);
|
||||
break;
|
||||
case Tid__important:
|
||||
val_bry = key_bry;
|
||||
break;
|
||||
}
|
||||
Xomw_nbsp_itm itm = new Xomw_nbsp_itm(tid, key_bry, val_bry);
|
||||
trie.Add_obj(key_bry, itm);
|
||||
}
|
||||
private static final byte[] Bry__nbsp = Bry_.new_a7(" "), Bry__important = Bry_.new_a7("important"), Bry__important__repl = Bry_.new_a7(" !");
|
||||
}
|
||||
class Xomw_nbsp_itm {
|
||||
public Xomw_nbsp_itm(byte tid, byte[] key, byte[] val) {
|
||||
this.tid = tid;
|
||||
this.key = key;
|
||||
this.val = val;
|
||||
}
|
||||
public byte Tid() {return tid;} private final byte tid;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public byte[] Val() {return val;} private final byte[] val;
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.nbsps; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_nbsp_wkr__tst {
|
||||
private final Xomw_nbsp_wkr__fxt fxt = new Xomw_nbsp_wkr__fxt();
|
||||
@Test public void Noop() {fxt.Test__parse("abc" , "abc");}
|
||||
@Test public void Space_lhs__colon() {fxt.Test__parse("a :b c" , "a :b c");}
|
||||
@Test public void Space_lhs__laquo() {fxt.Test__parse("a »b c" , "a »b c");}
|
||||
@Test public void Space_rhs() {fxt.Test__parse("a« b c" , "a« b c");}
|
||||
@Test public void Important() {fxt.Test__parse("a  ! important b" , "a ! important b");}
|
||||
}
|
||||
class Xomw_nbsp_wkr__fxt {
|
||||
private final Xomw_nbsp_wkr wkr = new Xomw_nbsp_wkr();
|
||||
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
private boolean apos = true;
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
pbfr.Init(src_bry);
|
||||
wkr.Do_nbsp(pctx, pbfr);
|
||||
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_frame_itm {
|
||||
public byte[] Expand(byte[] ttl) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -1,564 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
// public class Xomw_frame_wkr { // THREAD.UNSAFE: caching for repeated calls
|
||||
// private final Xomw_parser parser;
|
||||
// public Xomw_frame_wkr(Xomw_parser parser) {
|
||||
// this.parser = parser;
|
||||
// }
|
||||
// \\ Replace magic variables, templates, and template arguments
|
||||
// \\ with the appropriate text. Templates are substituted recursively,
|
||||
// \\ taking care to avoid infinite loops.
|
||||
// \\
|
||||
// \\ Note that the substitution depends on value of $mOutputType:
|
||||
// \\ self::OT_WIKI: only {{subst:}} templates
|
||||
// \\ self::OT_PREPROCESS: templates but not extension tags
|
||||
// \\ self::OT_HTML: all templates and extension tags
|
||||
// \\
|
||||
// \\ @param String $text The text to transform
|
||||
// \\ @param boolean|PPFrame $frame Object describing the arguments passed to the
|
||||
// \\ template. Arguments may also be provided as an associative array, as
|
||||
// \\ was the usual case before MW1.12. Providing arguments this way may be
|
||||
// \\ useful for extensions wishing to perform variable replacement
|
||||
// \\ explicitly.
|
||||
// \\ @param boolean $argsOnly Only do argument (triple-brace) expansion, not
|
||||
// \\ double-brace expansion.
|
||||
// \\ @return String
|
||||
// public function replaceVariables($text, $frame = false, $argsOnly = false) {
|
||||
// // Is there any text? Also, Prevent too big inclusions!
|
||||
// $textSize = strlen($text);
|
||||
// if ($textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize()) {
|
||||
// return $text;
|
||||
// }
|
||||
//
|
||||
// if ($frame == false) {
|
||||
// $frame = $this->getPreprocessor()->newFrame();
|
||||
// } elseif (!($frame instanceof PPFrame)) {
|
||||
// wfDebug(__METHOD__ . " called using plain parameters instead of "
|
||||
// . "a PPFrame instance. Creating custom frame.\n");
|
||||
// $frame = $this->getPreprocessor()->newCustomFrame($frame);
|
||||
// }
|
||||
//
|
||||
// $dom = $this->preprocessToDom($text);
|
||||
// $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
|
||||
// $text = $frame->expand($dom, $flags);
|
||||
//
|
||||
// return $text;
|
||||
// }
|
||||
//
|
||||
// \\ Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
|
||||
// public static function createAssocArgs($args) {
|
||||
// $assocArgs = [];
|
||||
// $index = 1;
|
||||
// foreach ($args as $arg) {
|
||||
// $eqpos = strpos($arg, '=');
|
||||
// if ($eqpos == false) {
|
||||
// $assocArgs[$index++] = $arg;
|
||||
// } else {
|
||||
// $name = trim(substr($arg, 0, $eqpos));
|
||||
// $value = trim(substr($arg, $eqpos + 1));
|
||||
// if ($value == false) {
|
||||
// $value = '';
|
||||
// }
|
||||
// if ($name != false) {
|
||||
// $assocArgs[$name] = $value;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return $assocArgs;
|
||||
// }
|
||||
|
||||
// \\ Return the text of a template, after recursively
|
||||
// \\ replacing any variables or templates within the template.
|
||||
// \\
|
||||
// \\ @param array $piece The parts of the template
|
||||
// \\ $piece['title']: the title, i.e. the part before the |
|
||||
// \\ $piece['parts']: the parameter array
|
||||
// \\ $piece['lineStart']: whether the brace was at the start of a line
|
||||
// \\ @param PPFrame $frame The current frame, contains template arguments
|
||||
// \\ @throws Exception
|
||||
// \\ @return String The text of the template
|
||||
// public void Brace_substitution(Xomw_prepro_node__template piece, Xomw_frame_itm frame) {
|
||||
// // Flags
|
||||
//
|
||||
// // $text has been filled
|
||||
// boolean found = false;
|
||||
// // wiki markup in $text should be escaped
|
||||
// boolean nowiki = false;
|
||||
// // $text is HTML, armour it against wikitext transformation
|
||||
// boolean is_html = false;
|
||||
// // Force interwiki transclusion to be done in raw mode not rendered
|
||||
// boolean force_raw_interwiki = false;
|
||||
// // $text is a DOM node needing expansion in a child frame
|
||||
// boolean is_child_obj = false;
|
||||
// // $text is a DOM node needing expansion in the current frame
|
||||
// boolean is_local_obj = false;
|
||||
//
|
||||
// // Title Object, where $text came from
|
||||
// byte[] title = null;
|
||||
//
|
||||
// // $part1 is the bit before the first |, and must contain only title characters.
|
||||
// // Various prefixes will be stripped from it later.
|
||||
// byte[] title_with_spaces = frame.Expand(piece.Title());
|
||||
// byte[] part1 = Bry_.Trim(title_with_spaces);
|
||||
// byte[] title_text = null;
|
||||
//
|
||||
// // Original title text preserved for various purposes
|
||||
// byte[] originalTitle = part1;
|
||||
//
|
||||
// // $args is a list of argument nodes, starting from index 0, not including $part1
|
||||
// // @todo FIXME: If piece['parts'] is null then the call to getLength()
|
||||
// // below won't work b/c this $args isn't an Object
|
||||
// Xomw_prepro_node__part[] args = (null == piece.Parts()) ? null : piece.Parts();
|
||||
//
|
||||
// byte[] profile_section = null; // profile templates
|
||||
//
|
||||
// Tfds.Write(nowiki, is_html, force_raw_interwiki, is_child_obj, is_local_obj, title, title_text, profile_section);
|
||||
// // SUBST
|
||||
// if (!found) {
|
||||
// String subst_match = null; // $this->mSubstWords->matchStartAndRemove($part1);
|
||||
// boolean literal = false;
|
||||
//
|
||||
// // Possibilities for substMatch: "subst", "safesubst" or FALSE
|
||||
// // Decide whether to expand template or keep wikitext as-is.
|
||||
// if (parser.Output_type__wiki()) {
|
||||
// if (subst_match == null) {
|
||||
// literal = true; // literal when in PST with no prefix
|
||||
// }
|
||||
// else {
|
||||
// literal = false; // expand when in PST with subst: or safesubst:
|
||||
// }
|
||||
// }
|
||||
// else {
|
||||
// if (subst_match == "subst") {
|
||||
// literal = true; // literal when not in PST with plain subst:
|
||||
// }
|
||||
// else {
|
||||
// literal = false; // expand when not in PST with safesubst: or no prefix
|
||||
// }
|
||||
// }
|
||||
// if (literal) {
|
||||
//// $text = $frame->virtualBracketedImplode('{{', '|', '}}', title_with_spaces, $args);
|
||||
// is_local_obj = true;
|
||||
// found = true;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // Variables
|
||||
// if (!found && args.length == 0) {
|
||||
//// $id = $this->mVariables->matchStartToEnd($part1);
|
||||
//// if ($id != false) {
|
||||
//// $text = $this->getVariableValue($id, $frame);
|
||||
//// if (MagicWord::getCacheTTL($id) > -1) {
|
||||
//// $this->mOutput->updateCacheExpiry(MagicWord::getCacheTTL($id));
|
||||
//// }
|
||||
// found = true;
|
||||
//// }
|
||||
// }
|
||||
//
|
||||
// // MSG, MSGNW and RAW
|
||||
// if (!found) {
|
||||
// // Check for MSGNW:
|
||||
//// $mwMsgnw = MagicWord::get('msgnw');
|
||||
//// if ($mwMsgnw->matchStartAndRemove($part1)) {
|
||||
// nowiki = true;
|
||||
//// }
|
||||
//// else {
|
||||
// // Remove obsolete MSG:
|
||||
//// $mwMsg = MagicWord::get('msg');
|
||||
//// $mwMsg->matchStartAndRemove($part1);
|
||||
//// }
|
||||
//
|
||||
// // Check for RAW:
|
||||
//// $mwRaw = MagicWord::get('raw');
|
||||
//// if ($mwRaw->matchStartAndRemove($part1)) {
|
||||
//// force_raw_interwiki = true;
|
||||
//// }
|
||||
// }
|
||||
|
||||
// Parser functions
|
||||
// if (!found) {
|
||||
// $colonPos = strpos($part1, ':');
|
||||
// if ($colonPos != false) {
|
||||
// $func = substr($part1, 0, $colonPos);
|
||||
// $funcArgs = [ trim(substr($part1, $colonPos + 1)) ];
|
||||
// $argsLength = $args->getLength();
|
||||
// for ($i = 0; $i < $argsLength; $i++) {
|
||||
// $funcArgs[] = $args->item($i);
|
||||
// }
|
||||
// try {
|
||||
// $result = $this->callParserFunction($frame, $func, $funcArgs);
|
||||
// } catch (Exception $ex) {
|
||||
// throw $ex;
|
||||
// }
|
||||
|
||||
// The interface for parser functions allows for extracting
|
||||
// flags into the local scope. Extract any forwarded flags
|
||||
// here.
|
||||
// extract($result);
|
||||
// }
|
||||
// }
|
||||
|
||||
// Finish mangling title and then check for loops.
|
||||
// Set title to a Title Object and $title_text to the PDBK
|
||||
// if (!found) {
|
||||
// $ns = NS_TEMPLATE;
|
||||
// Split the title into page and subpage
|
||||
// $subpage = '';
|
||||
// $relative = $this->maybeDoSubpageLink($part1, $subpage);
|
||||
// if ($part1 != $relative) {
|
||||
// $part1 = $relative;
|
||||
// $ns = $this->mTitle->getNamespace();
|
||||
// }
|
||||
// title = Title::newFromText($part1, $ns);
|
||||
// if (title) {
|
||||
// $title_text = title->getPrefixedText();
|
||||
// // Check for language variants if the template is not found
|
||||
// if ($this->getConverterLanguage()->hasVariants() && title->getArticleID() == 0) {
|
||||
// $this->getConverterLanguage()->findVariantLink($part1, title, true);
|
||||
// }
|
||||
// // Do recursion depth check
|
||||
// $limit = $this->mOptions->getMaxTemplateDepth();
|
||||
// if ($frame->depth >= $limit) {
|
||||
// found = true;
|
||||
// $text = '<span class="error">'
|
||||
// . wfMessage('parser-template-recursion-depth-warning')
|
||||
// ->numParams($limit)->inContentLanguage()->text()
|
||||
// . '</span>';
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// Load from database
|
||||
// if (!found && title) {
|
||||
// $profile_section = $this->mProfiler->scopedProfileIn(title->getPrefixedDBkey());
|
||||
// if (!title->isExternal()) {
|
||||
// if (title->isSpecialPage()
|
||||
// && $this->mOptions->getAllowSpecialInclusion()
|
||||
// && $this->ot['html']
|
||||
// ) {
|
||||
// $specialPage = SpecialPageFactory::getPage(title->getDBkey());
|
||||
// // Pass the template arguments as URL parameters.
|
||||
// // "uselang" will have no effect since the Language Object
|
||||
// // is forced to the one defined in ParserOptions.
|
||||
// $pageArgs = [];
|
||||
// $argsLength = $args->getLength();
|
||||
// for ($i = 0; $i < $argsLength; $i++) {
|
||||
// $bits = $args->item($i)->splitArg();
|
||||
// if (strval($bits['index']) == '') {
|
||||
// $name = trim($frame->expand($bits['name'], PPFrame::STRIP_COMMENTS));
|
||||
// $value = trim($frame->expand($bits['value']));
|
||||
// $pageArgs[$name] = $value;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // Create a new context to execute the special page
|
||||
// $context = new RequestContext;
|
||||
// $context->setTitle(title);
|
||||
// $context->setRequest(new FauxRequest($pageArgs));
|
||||
// if ($specialPage && $specialPage->maxIncludeCacheTime() == 0) {
|
||||
// $context->setUser($this->getUser());
|
||||
// } else {
|
||||
// // If this page is cached, then we better not be per user.
|
||||
// $context->setUser(User::newFromName('127.0.0.1', false));
|
||||
// }
|
||||
// $context->setLanguage($this->mOptions->getUserLangObj());
|
||||
// $ret = SpecialPageFactory::capturePath(
|
||||
// title, $context, $this->getLinkRenderer());
|
||||
// if ($ret) {
|
||||
// $text = $context->getOutput()->getHTML();
|
||||
// $this->mOutput->addOutputPageMetadata($context->getOutput());
|
||||
// found = true;
|
||||
// is_html = true;
|
||||
// if ($specialPage && $specialPage->maxIncludeCacheTime() != false) {
|
||||
// $this->mOutput->updateRuntimeAdaptiveExpiry(
|
||||
// $specialPage->maxIncludeCacheTime()
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
// } elseif (MWNamespace::isNonincludable(title->getNamespace())) {
|
||||
// found = false; // access denied
|
||||
// wfDebug(__METHOD__ . ": template inclusion denied for " .
|
||||
// title->getPrefixedDBkey() . "\n");
|
||||
// } else {
|
||||
// list($text, title) = $this->getTemplateDom(title);
|
||||
// if ($text != false) {
|
||||
// found = true;
|
||||
// is_child_obj = true;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// // If the title is valid but undisplayable, make a link to it
|
||||
// if (!found && ($this->ot['html'] || $this->ot['pre'])) {
|
||||
// $text = "[[:$title_text]]";
|
||||
// found = true;
|
||||
// }
|
||||
// } elseif (title->isTrans()) {
|
||||
// // Interwiki transclusion
|
||||
// if ($this->ot['html'] && !force_raw_interwiki) {
|
||||
// $text = $this->interwikiTransclude(title, 'render');
|
||||
// is_html = true;
|
||||
// } else {
|
||||
// $text = $this->interwikiTransclude(title, 'raw');
|
||||
// // Preprocess it like a template
|
||||
// $text = $this->preprocessToDom($text, self::PTD_FOR_INCLUSION);
|
||||
// is_child_obj = true;
|
||||
// }
|
||||
// found = true;
|
||||
// }
|
||||
//
|
||||
// // Do infinite loop check
|
||||
// // This has to be done after redirect resolution to avoid infinite loops via redirects
|
||||
// if (!$frame->loopCheck(title)) {
|
||||
// found = true;
|
||||
// $text = '<span class="error">'
|
||||
// . wfMessage('parser-template-loop-warning', $title_text)->inContentLanguage()->text()
|
||||
// . '</span>';
|
||||
// wfDebug(__METHOD__ . ": template loop broken at '$title_text'\n");
|
||||
// }
|
||||
// }
|
||||
|
||||
// If we haven't found text to substitute by now, we're done
|
||||
// Recover the source wikitext and return it
|
||||
// if (!found) {
|
||||
// $text = $frame->virtualBracketedImplode('{{', '|', '}}', title_with_spaces, $args);
|
||||
// if ($profile_section) {
|
||||
// $this->mProfiler->scopedProfileOut($profile_section);
|
||||
// }
|
||||
// return [ 'Object' => $text ];
|
||||
// }
|
||||
|
||||
// Expand DOM-style return values in a child frame
|
||||
// if (is_child_obj) {
|
||||
// // Clean up argument array
|
||||
// $newFrame = $frame->newChild($args, title);
|
||||
//
|
||||
// if (nowiki) {
|
||||
// $text = $newFrame->expand($text, PPFrame::RECOVER_ORIG);
|
||||
// } elseif ($title_text != false && $newFrame->isEmpty()) {
|
||||
// // Expansion is eligible for the empty-frame cache
|
||||
// $text = $newFrame->cachedExpand($title_text, $text);
|
||||
// } else {
|
||||
// // Uncached expansion
|
||||
// $text = $newFrame->expand($text);
|
||||
// }
|
||||
// }
|
||||
// if (is_local_obj && nowiki) {
|
||||
// $text = $frame->expand($text, PPFrame::RECOVER_ORIG);
|
||||
// is_local_obj = false;
|
||||
// }
|
||||
|
||||
// if ($profile_section) {
|
||||
// $this->mProfiler->scopedProfileOut($profile_section);
|
||||
// }
|
||||
|
||||
// Replace raw HTML by a placeholder
|
||||
// if (is_html) {
|
||||
// $text = $this->insertStripItem($text);
|
||||
// } elseif (nowiki && ($this->ot['html'] || $this->ot['pre'])) {
|
||||
// // Escape nowiki-style return values
|
||||
// $text = wfEscapeWikiText($text);
|
||||
// } elseif (is_string($text)
|
||||
// && !$piece['lineStart']
|
||||
// && preg_match('/^(?:{\\||:|;|#|\*)/', $text)
|
||||
// ) {
|
||||
// // T2529: if the template begins with a table or block-level
|
||||
// // element, it should be treated as beginning a new line.
|
||||
// // This behavior is somewhat controversial.
|
||||
// $text = "\n" . $text;
|
||||
// }
|
||||
|
||||
// if (is_string($text) && !$this->incrementIncludeSize('post-expand', strlen($text))) {
|
||||
// // Error, oversize inclusion
|
||||
// if ($title_text != false) {
|
||||
// // Make a working, properly escaped link if possible (T25588)
|
||||
// $text = "[[:$title_text]]";
|
||||
// } else {
|
||||
// // This will probably not be a working link, but at least it may
|
||||
// // provide some hint of where the problem is
|
||||
// preg_replace('/^:/', '', $originalTitle);
|
||||
// $text = "[[:$originalTitle]]";
|
||||
// }
|
||||
// $text .= $this->insertStripItem('<!-- WARNING: template omitted, '
|
||||
// . 'post-expand include size too large -->');
|
||||
// $this->limitationWarn('post-expand-template-inclusion');
|
||||
// }
|
||||
//
|
||||
// if (is_local_obj) {
|
||||
// $ret = [ 'Object' => $text ];
|
||||
// } else {
|
||||
// $ret = [ 'text' => $text ];
|
||||
// }
|
||||
|
||||
// return $ret;
|
||||
// }
|
||||
|
||||
// \\ Triple brace replacement -- used for template arguments
|
||||
// public function argSubstitution($piece, $frame) {
|
||||
//
|
||||
// $error = false;
|
||||
// $parts = $piece['parts'];
|
||||
// $nameWithSpaces = $frame->expand($piece['title']);
|
||||
// $argName = trim($nameWithSpaces);
|
||||
// $Object = false;
|
||||
// $text = $frame->getArgument($argName);
|
||||
// if ($text == false && $parts->getLength() > 0
|
||||
// && ($this->ot['html']
|
||||
// || $this->ot['pre']
|
||||
// || ($this->ot['wiki'] && $frame->isTemplate())
|
||||
// )
|
||||
// ) {
|
||||
// // No match in frame, use the supplied default
|
||||
// $Object = $parts->item(0)->getChildren();
|
||||
// }
|
||||
// if (!$this->incrementIncludeSize('arg', strlen($text))) {
|
||||
// $error = '<!-- WARNING: argument omitted, expansion size too large -->';
|
||||
// $this->limitationWarn('post-expand-template-argument');
|
||||
// }
|
||||
//
|
||||
// if ($text == false && $Object == false) {
|
||||
// // No match anywhere
|
||||
// $Object = $frame->virtualBracketedImplode('{{{', '|', '}}}', $nameWithSpaces, $parts);
|
||||
// }
|
||||
// if ($error != false) {
|
||||
// $text .= $error;
|
||||
// }
|
||||
// if ($Object != false) {
|
||||
// $ret = [ 'Object' => $Object ];
|
||||
// } else {
|
||||
// $ret = [ 'text' => $text ];
|
||||
// }
|
||||
//
|
||||
// return $ret;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// \\ Return the text to be used for a given extension tag.
|
||||
// \\ This is the ghost of strip().
|
||||
// \\
|
||||
// \\ @param array $params Associative array of parameters:
|
||||
// \\ name PPNode for the tag name
|
||||
// \\ attr PPNode for unparsed text where tag attributes are thought to be
|
||||
// \\ attributes Optional associative array of parsed attributes
|
||||
// \\ inner Contents of extension element
|
||||
// \\ noClose Original text did not have a close tag
|
||||
// \\ @param PPFrame $frame
|
||||
// \\
|
||||
// \\ @throws MWException
|
||||
// \\ @return String
|
||||
// \\/
|
||||
// public function extensionSubstitution($params, $frame) {
|
||||
// static $errorStr = '<span class="error">';
|
||||
// static $errorLen = 20;
|
||||
//
|
||||
// $name = $frame->expand($params['name']);
|
||||
// if (substr($name, 0, $errorLen) == $errorStr) {
|
||||
// // Probably expansion depth or node count exceeded. Just punt the
|
||||
// // error up.
|
||||
// return $name;
|
||||
// }
|
||||
//
|
||||
// $attrText = !isset($params['attr']) ? null : $frame->expand($params['attr']);
|
||||
// if (substr($attrText, 0, $errorLen) == $errorStr) {
|
||||
// // See above
|
||||
// return $attrText;
|
||||
// }
|
||||
//
|
||||
// // We can't safely check if the expansion for $content resulted in an
|
||||
// // error, because the content could happen to be the error String
|
||||
// // (T149622).
|
||||
// $content = !isset($params['inner']) ? null : $frame->expand($params['inner']);
|
||||
//
|
||||
// $marker = self::MARKER_PREFIX . "-$name-"
|
||||
// . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX;
|
||||
//
|
||||
// $isFunctionTag = isset($this->mFunctionTagHooks[strtolower($name)]) &&
|
||||
// ($this->ot['html'] || $this->ot['pre']);
|
||||
// if ($isFunctionTag) {
|
||||
// $markerType = 'none';
|
||||
// } else {
|
||||
// $markerType = 'general';
|
||||
// }
|
||||
// if ($this->ot['html'] || $isFunctionTag) {
|
||||
// $name = strtolower($name);
|
||||
// $attributes = Sanitizer::decodeTagAttributes($attrText);
|
||||
// if (isset($params['attributes'])) {
|
||||
// $attributes = $attributes + $params['attributes'];
|
||||
// }
|
||||
//
|
||||
// if (isset($this->mTagHooks[$name])) {
|
||||
// // Workaround for PHP bug 35229 and similar
|
||||
// if (!is_callable($this->mTagHooks[$name])) {
|
||||
// throw new MWException("Tag hook for $name is not callable\n");
|
||||
// }
|
||||
// $output = call_user_func_array($this->mTagHooks[$name],
|
||||
// [ $content, $attributes, $this, $frame ]);
|
||||
// } elseif (isset($this->mFunctionTagHooks[$name])) {
|
||||
// list($callback,) = $this->mFunctionTagHooks[$name];
|
||||
// if (!is_callable($callback)) {
|
||||
// throw new MWException("Tag hook for $name is not callable\n");
|
||||
// }
|
||||
//
|
||||
// $output = call_user_func_array($callback, [ &$this, $frame, $content, $attributes ]);
|
||||
// } else {
|
||||
// $output = '<span class="error">Invalid tag extension name: ' .
|
||||
// htmlspecialchars($name) . '</span>';
|
||||
// }
|
||||
//
|
||||
// if (is_array($output)) {
|
||||
// // Extract flags to local scope (to override $markerType)
|
||||
// $flags = $output;
|
||||
// $output = $flags[0];
|
||||
// unset($flags[0]);
|
||||
// extract($flags);
|
||||
// }
|
||||
// } else {
|
||||
// if (is_null($attrText)) {
|
||||
// $attrText = '';
|
||||
// }
|
||||
// if (isset($params['attributes'])) {
|
||||
// foreach ($params['attributes'] as $attrName => $attrValue) {
|
||||
// $attrText .= ' ' . htmlspecialchars($attrName) . '="' .
|
||||
// htmlspecialchars($attrValue) . '"';
|
||||
// }
|
||||
// }
|
||||
// if ($content == null) {
|
||||
// $output = "<$name$attrText/>";
|
||||
// } else {
|
||||
// $close = is_null($params['close']) ? '' : $frame->expand($params['close']);
|
||||
// if (substr($close, 0, $errorLen) == $errorStr) {
|
||||
// // See above
|
||||
// return $close;
|
||||
// }
|
||||
// $output = "<$name$attrText>$content$close";
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// if ($markerType == 'none') {
|
||||
// return $output;
|
||||
// } elseif ($markerType == 'nowiki') {
|
||||
// $this->mStripState->addNoWiki($marker, $output);
|
||||
// } elseif ($markerType == 'general') {
|
||||
// $this->mStripState->addGeneral($marker, $output);
|
||||
// } else {
|
||||
// throw new MWException(__METHOD__ . ': invalid marker type');
|
||||
// }
|
||||
// return $marker;
|
||||
// }
|
||||
// }
|
||||
@@ -1,98 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public interface Xomw_prepro_node {
|
||||
int Subs__len();
|
||||
Xomw_prepro_node Subs__get_at(int i);
|
||||
void Subs__add(Xomw_prepro_node sub);
|
||||
void To_xml(Bry_bfr bfr);
|
||||
}
|
||||
class Xomw_prepro_node__text extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__text(byte[] bry) {
|
||||
this.bry = bry;
|
||||
}
|
||||
public byte[] Bry() {return bry;} protected final byte[] bry;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add(bry);
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_node__comment extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__comment(byte[] bry) {
|
||||
this.bry = bry;
|
||||
}
|
||||
public byte[] Bry() {return bry;} protected final byte[] bry;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add_str_a7("<comment>");
|
||||
bfr.Add(bry);
|
||||
bfr.Add_str_a7("</comment>");
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_node__ext extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__ext(byte[] name, byte[] attr, byte[] inner, byte[] close) {
|
||||
this.name = name;
|
||||
this.attr = attr;
|
||||
this.inner = inner;
|
||||
this.close = close;
|
||||
}
|
||||
public byte[] Name() {return name;} private final byte[] name;
|
||||
public byte[] Attr() {return attr;} private final byte[] attr;
|
||||
public byte[] Inner() {return inner;} private final byte[] inner;
|
||||
public byte[] Close() {return close;} private final byte[] close;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add_str_a7("<ext>");
|
||||
bfr.Add_str_a7("<name>").Add(name).Add_str_a7("</name>");
|
||||
bfr.Add_str_a7("<atr>").Add(attr).Add_str_a7("</atr>");
|
||||
bfr.Add_str_a7("<inner>").Add(inner).Add_str_a7("</inner>");
|
||||
bfr.Add_str_a7("<close>").Add(close).Add_str_a7("</close>");
|
||||
bfr.Add_str_a7("</ext>");
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_node__heading extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__heading(int heading_index, int title_index, byte[] text) {
|
||||
this.heading_index = heading_index;
|
||||
this.title_index = title_index;
|
||||
this.text = text;
|
||||
}
|
||||
public int Heading_index() {return heading_index;} private final int heading_index;
|
||||
public int Title_index() {return title_index;} private final int title_index;
|
||||
public byte[] Text() {return text;} private final byte[] text;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add_str_a7("<h ");
|
||||
bfr.Add_str_a7(" level=\"").Add_int_variable(heading_index);
|
||||
bfr.Add_str_a7("\" i=\"").Add_int_variable(title_index);
|
||||
bfr.Add_str_a7("\">");
|
||||
bfr.Add(text);
|
||||
bfr.Add_str_a7("</h>");
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_node__tplarg extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__tplarg(byte[] title, Xomw_prepro_node__part[] parts) {
|
||||
this.title = title; this.parts = parts;
|
||||
}
|
||||
public byte[] Title() {return title;} private final byte[] title;
|
||||
public Xomw_prepro_node__part[] Parts() {return parts;} private final Xomw_prepro_node__part[] parts;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add_str_a7("<tplarg>");
|
||||
bfr.Add_str_a7("<title>").Add(title);
|
||||
bfr.Add_str_a7("</title>");
|
||||
for (Xomw_prepro_node__part part : parts)
|
||||
part.To_xml(bfr);
|
||||
|
||||
bfr.Add_str_a7("</tplarg>");
|
||||
}
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public abstract class Xomw_prepro_node__base implements Xomw_prepro_node {
|
||||
private List_adp subs;
|
||||
public int Subs__len() {return subs == null ? 0 : subs.Len();}
|
||||
public Xomw_prepro_node Subs__get_at(int i) {return subs == null ? null : (Xomw_prepro_node)subs.Get_at(i);}
|
||||
public void Subs__add(Xomw_prepro_node sub) {
|
||||
if (subs == null) subs = List_adp_.New();
|
||||
subs.Add(sub);
|
||||
}
|
||||
public abstract void To_xml(Bry_bfr bfr);
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_prepro_node__part extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__part(int idx, byte[] key, byte[] val) {
|
||||
this.idx = idx;
|
||||
this.key = key;
|
||||
this.val = val;
|
||||
}
|
||||
public int Idx() {return idx;} private final int idx;
|
||||
public byte[] Key() {return key;} private final byte[] key;
|
||||
public byte[] Val() {return val;} private final byte[] val;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add_str_a7("<part>");
|
||||
bfr.Add_str_a7("<name");
|
||||
if (idx > 0) {
|
||||
bfr.Add_str_a7(" index=\"").Add_int_variable(idx).Add_str_a7("\" />");
|
||||
}
|
||||
else {
|
||||
bfr.Add_str_a7(">");
|
||||
bfr.Add(key);
|
||||
bfr.Add_str_a7("</name>");
|
||||
bfr.Add_str_a7("=");
|
||||
}
|
||||
bfr.Add_str_a7("<value>");
|
||||
bfr.Add(val);
|
||||
bfr.Add_str_a7("</value>");
|
||||
bfr.Add_str_a7("</part>");
|
||||
}
|
||||
}
|
||||
@@ -1,36 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
public class Xomw_prepro_node__template extends Xomw_prepro_node__base {
|
||||
public Xomw_prepro_node__template(byte[] title, Xomw_prepro_node__part[] parts, int line_start) {
|
||||
this.title = title; this.parts = parts; this.line_start = line_start;
|
||||
}
|
||||
public byte[] Title() {return title;} private final byte[] title;
|
||||
public Xomw_prepro_node__part[] Parts() {return parts;} private final Xomw_prepro_node__part[] parts;
|
||||
public int Line_start() {return line_start;} private final int line_start;
|
||||
@Override public void To_xml(Bry_bfr bfr) {
|
||||
bfr.Add_str_a7("<template");
|
||||
if (line_start > 0) bfr.Add_str_a7(" lineStart=\"").Add_int_variable(line_start).Add_byte_quote();
|
||||
bfr.Add_byte(Byte_ascii.Angle_end);
|
||||
bfr.Add_str_a7("<title>").Add(title);
|
||||
bfr.Add_str_a7("</title>");
|
||||
for (Xomw_prepro_node__part part : parts)
|
||||
part.To_xml(bfr);
|
||||
bfr.Add_str_a7("</template>");
|
||||
}
|
||||
}
|
||||
@@ -1,66 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
class Xomw_prepro_rule {
|
||||
public Xomw_prepro_rule(byte[] bgn, byte[] end, int min, int max, int[] names) {
|
||||
this.bgn = bgn;
|
||||
this.end = end;
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
this.names = names;
|
||||
}
|
||||
public final byte[] bgn;
|
||||
public final byte[] end;
|
||||
public final int min;
|
||||
public final int max;
|
||||
public final int[] names;
|
||||
public boolean Names_exist(int idx) {
|
||||
return idx < names.length && names[idx] != Name__invalid;
|
||||
}
|
||||
private static final byte[] Name__tmpl_bry = Bry_.new_a7("template"), Name__targ_bry = Bry_.new_a7("tplarg");
|
||||
public static final int Name__invalid = -1, Name__null = 0, Name__tmpl = 1, Name__targ = 2;
|
||||
public static byte[] Name(int type) {
|
||||
switch (type) {
|
||||
case Name__tmpl: return Name__tmpl_bry;
|
||||
case Name__targ: return Name__targ_bry;
|
||||
default:
|
||||
case Name__invalid: return null;
|
||||
case Name__null: return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_elem {
|
||||
private static final byte[] Bry__tag_end = Bry_.new_a7("</");
|
||||
public Xomw_prepro_elem(int type, byte[] name) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.tag_end_lhs = Bry_.Add(Bry__tag_end, name);
|
||||
}
|
||||
public final int type;
|
||||
public final byte[] name;
|
||||
public final byte[] tag_end_lhs;
|
||||
public static final int Type__comment = 0, Type__other = 1;
|
||||
}
|
||||
class Xomw_prepro_curchar_itm {
|
||||
public Xomw_prepro_curchar_itm(byte[] bry, byte type) {
|
||||
this.bry = bry;
|
||||
this.type = type;
|
||||
}
|
||||
public byte[] bry;
|
||||
public byte type;
|
||||
}
|
||||
@@ -1,170 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
class Xomw_prepro_stack {
|
||||
public List_adp stack = List_adp_.New();
|
||||
public Xomw_prepro_piece top;
|
||||
private Bry_bfr root_accum = Bry_bfr_.New(), accum;
|
||||
private final Xomw_prepro_flags flags = new Xomw_prepro_flags();
|
||||
|
||||
public Xomw_prepro_stack() {
|
||||
accum = root_accum;
|
||||
}
|
||||
public void Clear() {
|
||||
stack.Clear();
|
||||
accum.Clear();
|
||||
top = null;
|
||||
}
|
||||
public int Count() {return stack.Len();}
|
||||
public Bry_bfr Get_accum() {return accum;}
|
||||
public Bry_bfr Get_root_accum() {return root_accum;}
|
||||
|
||||
public Xomw_prepro_part Get_current_part() {
|
||||
if (top == null) {
|
||||
return null;
|
||||
}
|
||||
else {
|
||||
return top.Get_current_part();
|
||||
}
|
||||
}
|
||||
|
||||
public void Push(Xomw_prepro_piece item) {
|
||||
stack.Add(item);
|
||||
this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);
|
||||
accum = top.Get_accum();
|
||||
}
|
||||
|
||||
public Xomw_prepro_piece Pop() {
|
||||
int len = stack.Count();
|
||||
if (len == 0) {
|
||||
throw Err_.new_wo_type("Xomw_prepro_stack: no elements remaining");
|
||||
}
|
||||
|
||||
Xomw_prepro_piece rv = (Xomw_prepro_piece)stack.Get_at(len - 1);
|
||||
stack.Del_at(len - 1);
|
||||
len--;
|
||||
|
||||
if (len > 0) {
|
||||
this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);
|
||||
accum = top.Get_accum();
|
||||
} else {
|
||||
this.top = null;
|
||||
this.accum = root_accum;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
public void Add_part(byte[] bry) {
|
||||
top.Add_part(bry);
|
||||
accum = top.Get_accum();
|
||||
}
|
||||
|
||||
public Xomw_prepro_flags Get_flags() {
|
||||
if (stack.Count() == 0) {
|
||||
flags.Find_eq = false;
|
||||
flags.Find_pipe = false;
|
||||
flags.In_heading = false;
|
||||
return flags;
|
||||
}
|
||||
else {
|
||||
top.Set_flags(flags);
|
||||
return flags;
|
||||
}
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_flags {
|
||||
public boolean Find_pipe;
|
||||
public boolean Find_eq;
|
||||
public boolean In_heading;
|
||||
}
|
||||
class Xomw_prepro_piece {
|
||||
public final byte[] open; // Opening character (\n for heading)
|
||||
public final byte[] close; // Matching closing char;
|
||||
public int count; // Number of opening characters found (number of "=" for heading)
|
||||
public final boolean line_start; // True if the open char appeared at the start of the input line; Not set for headings.
|
||||
public final int start_pos;
|
||||
public List_adp parts = List_adp_.New();
|
||||
public Xomw_prepro_piece(byte[] open, byte[] close, int count, int start_pos, boolean line_start) {
|
||||
this.open = open;
|
||||
this.close = close;
|
||||
this.count = count;
|
||||
this.start_pos = start_pos;
|
||||
this.line_start = line_start;
|
||||
parts.Add(new Xomw_prepro_part(Bry_.Empty));
|
||||
}
|
||||
public void Parts__renew() {
|
||||
parts.Clear();
|
||||
this.Add_part(Bry_.Empty);
|
||||
}
|
||||
public Xomw_prepro_part Get_current_part() {
|
||||
return (Xomw_prepro_part)parts.Get_at(parts.Len() - 1);
|
||||
}
|
||||
public Bry_bfr Get_accum() {
|
||||
return Get_current_part().bfr;
|
||||
}
|
||||
public void Add_part(byte[] bry) {
|
||||
parts.Add(new Xomw_prepro_part(bry));
|
||||
}
|
||||
public static final byte[] Brack_bgn_bry = Bry_.new_a7("[");
|
||||
public void Set_flags(Xomw_prepro_flags flags) {
|
||||
int parts_len = parts.Len();
|
||||
boolean open_is_nl = Bry_.Eq(open, Byte_ascii.Nl_bry);
|
||||
boolean find_pipe = !open_is_nl && !Bry_.Eq(open, Brack_bgn_bry);
|
||||
flags.Find_pipe = find_pipe;
|
||||
flags.Find_eq = find_pipe && parts_len > 1 && ((Xomw_prepro_part)parts.Get_at(parts_len - 1)).Eqpos != -1;
|
||||
flags.In_heading = open_is_nl;
|
||||
}
|
||||
// Get the output String that would result if the close is not found.
|
||||
public byte[] Break_syntax(Bry_bfr tmp_bfr, int opening_count) {
|
||||
byte[] rv = Bry_.Empty;
|
||||
if (Bry_.Eq(open, Byte_ascii.Nl_bry)) {
|
||||
rv = ((Xomw_prepro_part)parts.Get_at(0)).bfr.To_bry();
|
||||
}
|
||||
else {
|
||||
if (opening_count == -1) {
|
||||
opening_count = count;
|
||||
}
|
||||
tmp_bfr.Add(Bry_.Repeat_bry(open, opening_count));
|
||||
|
||||
// concat parts with "|"
|
||||
boolean first = true;
|
||||
int len = parts.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(i);
|
||||
if (first) {
|
||||
first = false;
|
||||
}
|
||||
else {
|
||||
tmp_bfr.Add_byte_pipe();
|
||||
}
|
||||
tmp_bfr.Add(part.bfr.To_bry());
|
||||
}
|
||||
rv = tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_part {
|
||||
public Xomw_prepro_part(byte[] bry) {
|
||||
bfr.Add(bry);
|
||||
}
|
||||
public final Bry_bfr bfr = Bry_bfr_.New();
|
||||
public int Eqpos = -1;
|
||||
public int comment_end = -1;
|
||||
public int visual_end = -1;
|
||||
}
|
||||
@@ -1,789 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.core.btries.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
|
||||
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||
private final List_adp comments_list = List_adp_.New();
|
||||
private final Btrie_slim_mgr elements_trie__y = Btrie_slim_mgr.ci_a7(), elements_trie__n = Btrie_slim_mgr.ci_a7();
|
||||
private final Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs().Add_many_str("includeonly", "noinclude", "onlyinclude");
|
||||
private final Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs();
|
||||
private final Xomw_prepro_stack stack = new Xomw_prepro_stack();
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
private Bry_bfr accum = Bry_bfr_.New();
|
||||
|
||||
public void Init_by_wiki(String... xmlish_elems_ary) {
|
||||
Elements_trie__init_by_wiki(elements_trie__y, ignored_tags_y, xmlish_elems_ary, "noinclude");
|
||||
Elements_trie__init_by_wiki(elements_trie__n, ignored_tags_n, xmlish_elems_ary, "includeonly");
|
||||
}
|
||||
private void Elements_trie__init_by_wiki(Btrie_slim_mgr trie, Ordered_hash ignored_tags, String[] strip_list_ary, String xmlish_elem) {
|
||||
trie.Clear();
|
||||
Elements_trie__add(trie, Bool_.Y, "!--", "comment");
|
||||
// PORTED: $xmlishElements = parser->getStripList();
|
||||
for (String itm : strip_list_ary) {
|
||||
Elements_trie__add(trie, Bool_.N, itm, itm);
|
||||
}
|
||||
// PORTED: "$xmlishElements[] = 'noinclude';" or "$xmlishElements[] = 'includeonly';"
|
||||
Elements_trie__add(trie, Bool_.N, xmlish_elem, xmlish_elem);
|
||||
|
||||
// PORTED: $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
|
||||
int ignored_tags_len = ignored_tags.Count();
|
||||
for (int j = 0; j < ignored_tags_len; j++) {
|
||||
byte[] bry = (byte[])ignored_tags.Get_at(j);
|
||||
String str = String_.new_u8(bry);
|
||||
Elements_trie__add(trie, Bool_.N, str, str);
|
||||
}
|
||||
}
|
||||
private static void Elements_trie__add(Btrie_slim_mgr trie, boolean type_is_comment, String hook, String name) {
|
||||
trie.Add_obj(hook, new Xomw_prepro_elem(type_is_comment ? Xomw_prepro_elem.Type__comment : Xomw_prepro_elem.Type__other, Bry_.new_a7(name)));
|
||||
}
|
||||
public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) {
|
||||
// RELIC.PROC_VAR: forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
|
||||
// RELIC.INIT_BY_WIKI: $xmlishElements = parser->getStripList();
|
||||
// RELIC.CLASS_VAR: $xmlishAllowMissingEndTag = [ 'includeonly', 'noinclude', 'onlyinclude' ];
|
||||
boolean enable_only_include = false;
|
||||
|
||||
// PORTED: rewritten so that all add / del is done in INIT_BY_WIKI
|
||||
Ordered_hash ignored_tags;
|
||||
Hash_adp ignored_elements;
|
||||
Btrie_slim_mgr elements_trie;
|
||||
if (for_inclusion) {
|
||||
ignored_tags = ignored_tags_y; // RELIC: $ignoredTags = [ 'includeonly', '/includeonly' ];
|
||||
ignored_elements = ignored_elements__y; // RELIC: $ignoredElements = [ 'noinclude' ];
|
||||
// RELIC.INIT_BY_WIKI: $xmlishElements[] = 'noinclude';
|
||||
if ( Bry_.Has(src, Bry__only_include_bgn)
|
||||
&& Bry_.Has(src, Bry__only_include_end)) {
|
||||
enable_only_include = true;
|
||||
}
|
||||
elements_trie = elements_trie__y;
|
||||
}
|
||||
else {
|
||||
ignored_tags = ignored_tags_n; // $ignoredTags = [ 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ];
|
||||
ignored_elements = ignored_elements__n; // $ignoredElements = [ 'includeonly' ];
|
||||
// RELIC.INIT_BY_WIKI: $xmlishElements[] = 'includeonly';
|
||||
elements_trie = elements_trie__n;
|
||||
}
|
||||
|
||||
// RELIC.INIT_BY_WIKI: $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
|
||||
|
||||
// RELIC.REGEX
|
||||
// Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
|
||||
// $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
|
||||
|
||||
stack.Clear();
|
||||
|
||||
// RELIC.REGEX:
|
||||
// $searchBase = "[{<\n"; # }
|
||||
|
||||
// RELIC.BRY_FIND
|
||||
// For fast reverse searches
|
||||
// $revText = strrev( $text );
|
||||
// $lengthText = strlen( $text );
|
||||
|
||||
// Input pointer, starts out pointing to a pseudo-newline before the start
|
||||
int i = 0;
|
||||
|
||||
// Current accumulator
|
||||
accum = stack.Get_accum();
|
||||
accum.Add_str_a7("<root>");
|
||||
|
||||
// True to find equals signs in arguments
|
||||
boolean find_equals = false;
|
||||
|
||||
// True to take notice of pipe characters
|
||||
boolean find_pipe = false;
|
||||
int heading_index = 1;
|
||||
|
||||
// True if $i is inside a possible heading
|
||||
boolean in_heading = false;
|
||||
|
||||
// True if there are no more greater-than (>) signs right of $i
|
||||
boolean no_more_gt = false;
|
||||
|
||||
// Map of tag name => true if there are no more closing tags of given type right of $i
|
||||
no_more_closing_tag.Clear();
|
||||
|
||||
// True to ignore all input up to the next <onlyinclude>
|
||||
boolean find_only_include = enable_only_include;
|
||||
|
||||
// Do a line-start run without outputting an LF character
|
||||
boolean fake_line_start = true;
|
||||
|
||||
// XOWA: init
|
||||
int src_len = src.length;
|
||||
int found = -1;
|
||||
byte[] cur_char = Bry_.Empty;
|
||||
byte[] cur_closing = Bry_.Empty;
|
||||
byte[] inner = null;
|
||||
Xomw_prepro_rule rule = null;
|
||||
|
||||
while (true) {
|
||||
if (find_only_include) {
|
||||
// Ignore all input up to the next <onlyinclude>
|
||||
int start_pos = Bry_find_.Find_fwd(src, Bry__only_include_bgn, i, src_len);
|
||||
if (start_pos == Bry_find_.Not_found) {
|
||||
// Ignored section runs to the end
|
||||
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, src_len).Add_str_a7("</ignore>");
|
||||
break;
|
||||
}
|
||||
int tag_end_pos = start_pos + Bry__only_include_bgn.length; // past-the-end
|
||||
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, tag_end_pos).Add_str_a7("</ignore>");
|
||||
i = tag_end_pos;
|
||||
find_only_include = false;
|
||||
}
|
||||
|
||||
if (fake_line_start) {
|
||||
found = Found__line_bgn;
|
||||
cur_char = Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
// Find next opening brace, closing brace or pipe
|
||||
// RELIC.REGEX: $search = $searchBase;
|
||||
if (stack.top == null) {
|
||||
cur_closing = Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
cur_closing = stack.top.close;
|
||||
// RELIC.REGEX: $search .= $currentClosing;
|
||||
}
|
||||
if (find_pipe) {
|
||||
// RELIC.REGEX: $search .= '|';
|
||||
}
|
||||
if (find_equals) {
|
||||
// First equals will be for the template
|
||||
// RELIC.REGEX: $search .= '=';
|
||||
}
|
||||
|
||||
// Output literal section, advance input counter
|
||||
// PORTED: "$literalLength = strcspn(src, $search, i)"; NOTE: no trie b/c of frequent changes to $search
|
||||
int literal_len = 0;
|
||||
boolean loop_stop = false;
|
||||
// loop chars until search_char is found
|
||||
for (int j = i; j < src_len; j++) {
|
||||
byte b = src[j];
|
||||
switch (b) { // handle '$searchBase = "[{<\n";'
|
||||
case Byte_ascii.Brack_bgn:
|
||||
case Byte_ascii.Curly_bgn:
|
||||
case Byte_ascii.Angle_bgn:
|
||||
case Byte_ascii.Nl:
|
||||
loop_stop = true;
|
||||
break;
|
||||
case Byte_ascii.Pipe: // handle "find_pipe"
|
||||
if (find_pipe) loop_stop = true;
|
||||
break;
|
||||
case Byte_ascii.Eq: // handle "find_equals"
|
||||
if (find_equals) loop_stop = true;
|
||||
break;
|
||||
default: // handle "cur_closing"; specified by piece.close and rule.close, so "\n", "}", "]" and "}-"
|
||||
if (cur_closing != Bry_.Empty) {
|
||||
byte cur_closing_0 = cur_closing[0];
|
||||
if (b == cur_closing_0) {
|
||||
if (cur_closing.length == 1) { // handle "\n", "}", "]"
|
||||
loop_stop = true;
|
||||
}
|
||||
else {// handle "}-"
|
||||
int nxt_idx = j + 1;
|
||||
if (nxt_idx < src_len && src[nxt_idx] == Byte_ascii.Dash)
|
||||
loop_stop = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (loop_stop)
|
||||
break;
|
||||
else
|
||||
literal_len++;
|
||||
}
|
||||
if (literal_len > 0) {
|
||||
accum.Add_bry_escape_html(src, i, i + literal_len);
|
||||
i += literal_len;
|
||||
}
|
||||
if (i >= src_len) {
|
||||
if (Bry_.Eq(cur_closing, Byte_ascii.Nl_bry)) {
|
||||
// Do a past-the-end run to finish off the heading
|
||||
cur_char = Bry_.Empty;
|
||||
found = Found__line_end;
|
||||
}
|
||||
else {
|
||||
// All done
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// PORTED: "if ( $curChar == '|' ) {", etc..
|
||||
Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len);
|
||||
if (cur_char_itm != null) {
|
||||
cur_char = cur_char_itm.bry;
|
||||
switch (cur_char_itm.type) {
|
||||
case Byte_ascii.Pipe: found = Found__pipe; break;
|
||||
case Byte_ascii.Eq: found = Found__equals; break;
|
||||
case Byte_ascii.Angle_bgn: found = Found__angle; break;
|
||||
case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break;
|
||||
|
||||
// PORTED: "elseif ( $curChar == $currentClosing )"
|
||||
case Byte_ascii.Curly_end: found = Found__close; break;
|
||||
case Byte_ascii.Brack_end: found = Found__close; break;
|
||||
case Byte_ascii.At: found = Found__close; break; // NOTE: At is type for "}-"
|
||||
|
||||
// PORTED: "elseif ( isset( $this->rules[$curChar] ) )"
|
||||
case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;}
|
||||
case Byte_ascii.Brack_bgn: {found = Found__open; rule = rule_brack; break;}
|
||||
case Byte_ascii.Dash: {found = Found__open; rule = rule_langv; break;}
|
||||
}
|
||||
}
|
||||
else {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (found == Found__angle) {
|
||||
// Handle </onlyinclude>
|
||||
if ( enable_only_include
|
||||
&& Bry_.Eq(src, i, i + Len__only_include_end, Bry__only_include_end)) {
|
||||
find_only_include = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Determine element name
|
||||
// PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(pre|ref)(?:\s|\/>|>)|(!--)
|
||||
Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(trv, src, i + 1, src_len);
|
||||
if (element == null) {
|
||||
// Element name missing or not listed
|
||||
accum.Add(Bry__escaped_lt);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle comments
|
||||
if (element.type == Xomw_prepro_elem.Type__comment) {
|
||||
// To avoid leaving blank lines, when a sequence of
|
||||
// space-separated comments is both preceded and followed by
|
||||
// a newline (ignoring spaces), then
|
||||
// trim leading and trailing spaces and the trailing newline.
|
||||
|
||||
// Find the end
|
||||
int end_pos = Bry_find_.Find_fwd(src, Bry__comment_end, i + 4, src_len);
|
||||
if (end_pos == Bry_find_.Not_found) {
|
||||
// Unclosed comment in input, runs to end
|
||||
accum.Add_str_a7("<comment>").Add_bry_escape_html(src, i, src_len).Add_str_a7("</comment>");
|
||||
i = src_len;
|
||||
}
|
||||
else {
|
||||
// Search backwards for leading whitespace
|
||||
int ws_bgn = i > 0 ? i - Php_str_.Strspn_bwd__space_or_tab(src, i, -1) : 0;
|
||||
|
||||
// Search forwards for trailing whitespace
|
||||
// $wsEnd will be the position of the last space (or the '>' if there's none)
|
||||
int ws_end = end_pos + 2 + Php_str_.Strspn_fwd__space_or_tab(src, end_pos + 3, -1, src_len);
|
||||
|
||||
// Keep looking forward as long as we're finding more
|
||||
// comments.
|
||||
comments_list.Clear();
|
||||
comments_list.Add(new int[] {ws_bgn, ws_end});
|
||||
while (ws_end + 5 < src_len && Bry_.Eq(src, ws_end + 1, ws_end + 5, Bry__comment_bgn)) {
|
||||
int cur_char_pos = Bry_find_.Find_fwd(src, Bry__comment_end, ws_end + 4);
|
||||
if (cur_char_pos == Bry_find_.Not_found) {
|
||||
break;
|
||||
}
|
||||
cur_char_pos = cur_char_pos + 2 + Php_str_.Strspn_fwd__space_or_tab(src, cur_char_pos + 3, -1, src_len);
|
||||
comments_list.Add(new int[] {ws_end + 1, cur_char_pos});
|
||||
ws_end = cur_char_pos;
|
||||
}
|
||||
|
||||
// Eat the line if possible
|
||||
// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
|
||||
// the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
|
||||
// it's a possible beneficial b/c break.
|
||||
int bgn_pos = -1;
|
||||
if ( ws_bgn > 0
|
||||
&& Bry_.Eq(src, ws_bgn - 1, ws_bgn , Byte_ascii.Nl_bry)
|
||||
&& Bry_.Eq(src, ws_end + 1, ws_end + 2, Byte_ascii.Nl_bry)
|
||||
) {
|
||||
// Remove leading whitespace from the end of the accumulator
|
||||
// Sanity check first though
|
||||
int ws_len = i - ws_bgn;
|
||||
int accum_len = accum.Len();
|
||||
if ( ws_len > 0
|
||||
&& Php_str_.Strspn_fwd__space_or_tab(accum.Bfr(), accum_len - ws_len, -1, accum_len) == ws_len) {
|
||||
accum.Del_by(ws_len);
|
||||
}
|
||||
|
||||
// Dump all but the last comment to the accumulator
|
||||
int comments_list_len = comments_list.Len();
|
||||
for (int j = 0; j < comments_list_len; j++) {
|
||||
int[] com = (int[])comments_list.Get_at(j);
|
||||
bgn_pos = com[0];
|
||||
end_pos = com[1] + 1;
|
||||
if (j == comments_list_len - 1) {
|
||||
break;
|
||||
}
|
||||
inner = Bry_.Mid(src, bgn_pos, end_pos);
|
||||
accum.Add_str_a7("<comment>").Add_bry_escape_html(inner).Add_str_a7("</comment>");
|
||||
}
|
||||
|
||||
// Do a line-start run next time to look for headings after the comment
|
||||
fake_line_start = true;
|
||||
}
|
||||
else {
|
||||
// No line to eat, just take the comment itself
|
||||
bgn_pos = i;
|
||||
end_pos += 2;
|
||||
}
|
||||
|
||||
if (stack.top != null) {
|
||||
Xomw_prepro_part part = stack.top.Get_current_part();
|
||||
if (!(part.comment_end != -1 && part.comment_end == ws_bgn - 1)) {
|
||||
part.visual_end = ws_bgn;
|
||||
}
|
||||
// Else comments abutting, no change in visual end
|
||||
part.comment_end = end_pos;
|
||||
}
|
||||
i = end_pos + 1;
|
||||
inner = Bry_.Mid(src, bgn_pos, end_pos + 1);
|
||||
accum.Add_str_a7("<comment>").Add_bry_escape_html(inner).Add_str_a7("</comment>");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
byte[] name = element.name;
|
||||
// RELIC.BTRIE_CI: $lowerName = strtolower( $name );
|
||||
int atr_bgn = i + name.length + 1;
|
||||
|
||||
// Find end of tag
|
||||
int tag_end_pos = no_more_gt ? Bry_find_.Not_found : Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, atr_bgn);
|
||||
if (tag_end_pos == Bry_find_.Not_found) {
|
||||
// Infinite backtrack
|
||||
// Disable tag search to prevent worst-case O(N^2) performance
|
||||
no_more_gt = true;
|
||||
accum.Add(Bry__escaped_lt);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle ignored tags
|
||||
if (ignored_tags.Has(name)) {
|
||||
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, tag_end_pos + 1).Add_str_a7("</ignore>");
|
||||
i = tag_end_pos + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
int tag_bgn_pos = i;
|
||||
int atr_end = -1;
|
||||
byte[] close = null;
|
||||
if (src[tag_end_pos - 1] == Byte_ascii.Slash) {
|
||||
atr_end = tag_end_pos - 1;
|
||||
inner = null;
|
||||
i = tag_end_pos + 1;
|
||||
close = Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
atr_end = tag_end_pos;
|
||||
// Find closing tag
|
||||
// PORTED: `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
|
||||
boolean elem_end_found = false;
|
||||
int elem_end_lhs = -1, elem_end_rhs = -1;
|
||||
int elem_end_cur = tag_end_pos + 1;
|
||||
while (true) {
|
||||
// search for "</"
|
||||
elem_end_lhs = Bry_find_.Find_fwd(src, Bry__end_lhs, elem_end_cur, src_len);
|
||||
if (elem_end_lhs == Bry_find_.Not_found) {
|
||||
break;
|
||||
}
|
||||
|
||||
// verify $name
|
||||
elem_end_cur = elem_end_lhs + 2; // 2="</"
|
||||
int elem_end_tmp = elem_end_cur + name.length;
|
||||
if (!Bry_.Eq_ci_a7(name, src, elem_end_cur, elem_end_tmp)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// verify "\s*>"
|
||||
elem_end_cur = elem_end_tmp;
|
||||
elem_end_cur = Bry_find_.Find_fwd_while(src, elem_end_cur, src_len, Byte_ascii.Space);
|
||||
if (elem_end_cur == src_len) { // just "\s", but no ">"
|
||||
break;
|
||||
}
|
||||
if (src[elem_end_cur] == Byte_ascii.Gt) {
|
||||
elem_end_rhs = elem_end_cur + 1;
|
||||
elem_end_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( !no_more_closing_tag.Has(name)
|
||||
&& elem_end_found) {
|
||||
inner = Bry_.Mid(src, tag_end_pos + 1, elem_end_lhs);
|
||||
i = elem_end_rhs;
|
||||
tmp_bfr.Add_str_a7("<close>").Add_bry_escape_html(src, elem_end_lhs, elem_end_rhs).Add_str_a7("</close>");
|
||||
close = tmp_bfr.To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
// No end tag
|
||||
if (xmlish_allow_missing_end_tag.Has(name)) {
|
||||
// Let it run out to the end of the src.
|
||||
inner = Bry_.Mid(src, tag_end_pos + 1);
|
||||
i = src_len;
|
||||
close = Bry_.Empty;
|
||||
}
|
||||
else {
|
||||
// Don't match the tag, treat opening tag as literal and resume parsing.
|
||||
i = tag_end_pos + 1;
|
||||
accum.Add_bry_escape_html(src, tag_bgn_pos, tag_end_pos + 1);
|
||||
// Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
|
||||
no_more_closing_tag.Add_if_dupe_use_nth(name, name);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// <includeonly> and <noinclude> just become <ignore> tags
|
||||
if (ignored_elements.Has(name)) {
|
||||
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, tag_bgn_pos, i).Add_str_a7("</ignore>");
|
||||
continue;
|
||||
}
|
||||
|
||||
accum.Add_str_a7("<ext>");
|
||||
// PORTED:
|
||||
// if ( $attrEnd <= $attrStart ) {
|
||||
// $attr = '';
|
||||
// } else {
|
||||
// $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
|
||||
// }
|
||||
accum.Add_str_a7("<name>").Add(name).Add_str_a7("</name>");
|
||||
// Note that the attr element contains the whitespace between name and attribute,
|
||||
// this is necessary for precise reconstruction during pre-save transform.
|
||||
accum.Add_str_a7("<attr>");
|
||||
if (atr_end > atr_bgn)
|
||||
accum.Add_bry_escape_html(src, atr_bgn, atr_end);
|
||||
accum.Add_str_a7("</attr>");
|
||||
if (inner != null) {
|
||||
accum.Add_str_a7("<inner>").Add_bry_escape_html(inner).Add_str_a7("</inner>");
|
||||
}
|
||||
accum.Add(close).Add_str_a7("</ext>");
|
||||
}
|
||||
else if (found == Found__line_bgn) {
|
||||
// Is this the start of a heading?
|
||||
// Line break belongs before the heading element in any case
|
||||
if (fake_line_start) {
|
||||
fake_line_start = false;
|
||||
} else {
|
||||
accum.Add(cur_char);
|
||||
i++;
|
||||
}
|
||||
|
||||
int count = Php_str_.Strspn_fwd__byte(src, Byte_ascii.Eq, i, 6, src_len);
|
||||
if (count == 1 && find_equals) { // EX: "{{a|\n=b=\n"
|
||||
// DWIM: This looks kind of like a name/value separator.
|
||||
// Let's let the equals handler have it and break the
|
||||
// potential heading. This is heuristic, but AFAICT the
|
||||
// methods for completely correct disambiguation are very
|
||||
// complex.
|
||||
}
|
||||
else if (count > 0) {
|
||||
Xomw_prepro_piece piece = new Xomw_prepro_piece(Byte_ascii.Nl_bry, Byte_ascii.Nl_bry, count, i, false);
|
||||
piece.Add_part(Bry_.Repeat(Byte_ascii.Eq, count));
|
||||
stack.Push(piece);
|
||||
accum = stack.Get_accum();
|
||||
Xomw_prepro_flags flags = stack.Get_flags();
|
||||
find_pipe = flags.Find_pipe;
|
||||
find_equals = flags.Find_eq;
|
||||
in_heading = flags.In_heading;
|
||||
i += count;
|
||||
}
|
||||
}
|
||||
else if (found == Found__line_end) {
|
||||
Xomw_prepro_piece piece = stack.top;
|
||||
// A heading must be open, otherwise \n wouldn't have been in the search list
|
||||
if (!Bry_.Eq(piece.open, Byte_ascii.Nl_bry)) throw Err_.new_wo_type("assertion:piece must start with \\n");
|
||||
Xomw_prepro_part part = piece.Get_current_part();
|
||||
|
||||
// Search back through the input to see if it has a proper close.
|
||||
// Do this using the reversed String since the other solutions
|
||||
// (end anchor, etc.) are inefficient.
|
||||
int ws_len = Php_str_.Strspn_bwd__space_or_tab(src, src_len - i, -1);
|
||||
int search_bgn = i - ws_len;
|
||||
|
||||
if (part.comment_end != -1 && search_bgn -1 == part.comment_end) {
|
||||
// Comment found at line end
|
||||
// Search for equals signs before the comment
|
||||
search_bgn = part.visual_end;
|
||||
search_bgn = Bry_find_.Find_bwd__while_space_or_tab(src, search_bgn, 0);
|
||||
search_bgn -= Php_str_.Strspn_bwd__space_or_tab(src, search_bgn, -1);
|
||||
}
|
||||
int count = piece.count;
|
||||
int eq_len = Php_str_.Strspn_bwd__byte(src, Byte_ascii.Eq, search_bgn, -1);
|
||||
|
||||
byte[] element = Bry_.Empty;
|
||||
if (eq_len > 0) {
|
||||
if (search_bgn - eq_len == piece.start_pos) {
|
||||
// This is just a single String of equals signs on its own line
|
||||
// Replicate the doHeadings behavior /={count}(.+)={count}/
|
||||
// First find out how many equals signs there really are (don't stop at 6)
|
||||
count = eq_len;
|
||||
if (count < 3) {
|
||||
count = 0;
|
||||
}
|
||||
else {
|
||||
count = (count - 1) / 2;
|
||||
if (count > 6) count = 6;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (eq_len < count) count = eq_len; // PORTED: $count = min( $equalsLength, $count );
|
||||
}
|
||||
if (count > 0) {
|
||||
// Normal match, output <h>
|
||||
element = tmp_bfr.Add_str_a7("<h level=\"").Add_int_variable(count).Add_str_a7("\" i=\"").Add_int_variable(heading_index).Add_str_a7("\">").Add_bfr_and_preserve(accum).Add_str_a7("</h>").To_bry_and_clear();
|
||||
heading_index++;
|
||||
} else {
|
||||
// Single equals sign on its own line, count=0
|
||||
element = accum.To_bry();
|
||||
}
|
||||
}
|
||||
else {
|
||||
// No match, no <h>, just pass down the inner src
|
||||
element = accum.To_bry();
|
||||
}
|
||||
|
||||
// Unwind the stack
|
||||
stack.Pop();
|
||||
accum = stack.Get_accum();
|
||||
|
||||
Xomw_prepro_flags flags = stack.Get_flags();
|
||||
find_pipe = flags.Find_pipe;
|
||||
find_equals = flags.Find_eq;
|
||||
in_heading = flags.In_heading;
|
||||
|
||||
// Append the result to the enclosing accumulator
|
||||
accum.Add(element);
|
||||
// Note that we do NOT increment the input pointer.
|
||||
// This is because the closing linebreak could be the opening linebreak of
|
||||
// another heading. Infinite loops are avoided because the next iteration MUST
|
||||
// hit the heading open case above, which unconditionally increments the
|
||||
// input pointer.
|
||||
}
|
||||
else if (found == Found__open) {
|
||||
// count opening brace characters
|
||||
int count = Php_str_.Strspn_fwd__byte(src, cur_char[0], i, -1, src_len); // NOTE: don't know how MediaWiki will handle "-{"
|
||||
|
||||
// we need to add to stack only if opening brace count is enough for one of the rules
|
||||
if (count >= rule.min) {
|
||||
// Add it to the stack
|
||||
Xomw_prepro_piece piece = new Xomw_prepro_piece(cur_char, rule.end, count, -1, i > 0 && src[i - 1] == Byte_ascii.Nl);
|
||||
|
||||
stack.Push(piece);
|
||||
accum = stack.Get_accum();
|
||||
Xomw_prepro_flags flags = stack.Get_flags();
|
||||
find_pipe = flags.Find_pipe;
|
||||
find_equals = flags.Find_eq;
|
||||
in_heading = flags.In_heading;
|
||||
}
|
||||
else {
|
||||
// Add literal brace(s)
|
||||
for (int j = 0; j < count; j++)
|
||||
accum.Add_bry_escape_html(cur_char);
|
||||
}
|
||||
i += count;
|
||||
}
|
||||
else if (found == Found__close) {
|
||||
Xomw_prepro_piece piece = stack.top;
|
||||
// lets check if there are enough characters for closing brace
|
||||
int max_count = piece.count;
|
||||
int count = Php_str_.Strspn_fwd__byte(src, cur_char[0], i, max_count, src_len);
|
||||
|
||||
// check for maximum matching characters (if there are 5 closing characters, we will probably need only 3 - depending on the rules)
|
||||
rule = Get_rule(piece.open);
|
||||
int matching_count = -1;
|
||||
if (count > rule.max) {
|
||||
// The specified maximum exists in the callback array, unless the caller
|
||||
// has made an error
|
||||
matching_count = rule.max;
|
||||
}
|
||||
else {
|
||||
// Count is less than the maximum
|
||||
// Skip any gaps in the callback array to find the true largest match
|
||||
// Need to use array_key_exists not isset because the callback can be null
|
||||
matching_count = count;
|
||||
while (matching_count > 0 && !rule.Names_exist(matching_count)) {
|
||||
matching_count--;
|
||||
}
|
||||
}
|
||||
|
||||
if (matching_count <= 0) {
|
||||
// No matching element found in callback array
|
||||
// Output a literal closing brace and continue
|
||||
for (int j = 0; j < count; j++)
|
||||
accum.Add_bry_escape_html(cur_char);
|
||||
i += count;
|
||||
continue;
|
||||
}
|
||||
int name_type = rule.names[matching_count];
|
||||
byte[] element = null;
|
||||
if (name_type == Xomw_prepro_rule.Name__null) {
|
||||
// No element, just literal text
|
||||
tmp_bfr.Add(piece.Break_syntax(tmp_bfr, matching_count));
|
||||
element = tmp_bfr.Add(Bry_.Repeat_bry(rule.end, matching_count)).To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
// Create XML element
|
||||
// Note: $parts is already XML, does not need to be encoded further
|
||||
List_adp parts = piece.parts;
|
||||
byte[] title = ((Xomw_prepro_part)parts.Get_at(0)).bfr.To_bry_and_clear();
|
||||
parts.Del_at(0);
|
||||
|
||||
// The invocation is at the start of the line if lineStart is set in
|
||||
// the stack, and all opening brackets are used up.
|
||||
byte[] attr = null;
|
||||
if (max_count == matching_count && piece.line_start) { // RELIC:!empty( $piece->lineStart )
|
||||
attr = Bry_.new_a7(" lineStart=\"1\"");
|
||||
}
|
||||
else {
|
||||
attr = Bry_.Empty;
|
||||
}
|
||||
|
||||
byte[] name_bry = Xomw_prepro_rule.Name(name_type);
|
||||
tmp_bfr.Add_str_a7("<").Add(name_bry).Add(attr).Add_str_a7(">");
|
||||
tmp_bfr.Add_str_a7("<title>").Add(title).Add_str_a7("</title>");
|
||||
|
||||
int arg_idx = 1;
|
||||
int parts_len = parts.Len();
|
||||
for (int j = 0; j < parts_len; j++) {
|
||||
Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(j);
|
||||
if (part.Eqpos != -1) {
|
||||
Bry_bfr part_bfr = part.bfr;
|
||||
byte[] part_bfr_bry = part_bfr.Bfr();
|
||||
tmp_bfr.Add_str_a7("<part><name>").Add_mid(part_bfr_bry, 0, part.Eqpos);
|
||||
tmp_bfr.Add_str_a7("</name>=<value>").Add_mid(part_bfr_bry, part.Eqpos + 1, part_bfr.Len());
|
||||
tmp_bfr.Add_str_a7("</value></part>");
|
||||
}
|
||||
else {
|
||||
tmp_bfr.Add_str_a7("<part><name index=\"").Add_int_variable(arg_idx).Add_str_a7("\" /><value>").Add(part.bfr.To_bry()).Add_str_a7("</value></part>");
|
||||
arg_idx++;
|
||||
}
|
||||
}
|
||||
element = tmp_bfr.Add_str_a7("</").Add(name_bry).Add_str_a7(">").To_bry_and_clear();
|
||||
}
|
||||
|
||||
// Advance input pointer
|
||||
i += matching_count;
|
||||
|
||||
// Unwind the stack
|
||||
stack.Pop();
|
||||
accum = stack.Get_accum();
|
||||
|
||||
// Re-add the old stack element if it still has unmatched opening characters remaining
|
||||
if (matching_count < piece.count) {
|
||||
piece.Parts__renew(); // PORTED: piece.parts = [ new PPDPart ];
|
||||
piece.count -= matching_count;
|
||||
|
||||
// do we still qualify for any callback with remaining count?
|
||||
int min = Get_rule(piece.open).min;
|
||||
if (piece.count >= min) {
|
||||
stack.Push(piece);
|
||||
accum = stack.Get_accum();
|
||||
}
|
||||
else {
|
||||
accum.Add(Bry_.Repeat_bry(piece.open, piece.count));
|
||||
}
|
||||
}
|
||||
|
||||
Xomw_prepro_flags flags = stack.Get_flags();
|
||||
find_pipe = flags.Find_pipe;
|
||||
find_equals = flags.Find_eq;
|
||||
in_heading = flags.In_heading;
|
||||
|
||||
// Add XML element to the enclosing accumulator
|
||||
accum.Add(element);
|
||||
}
|
||||
else if (found == Found__pipe) {
|
||||
find_equals = true; // shortcut for getFlags()
|
||||
stack.Add_part(Bry_.Empty);
|
||||
accum = stack.Get_accum();
|
||||
i++;
|
||||
}
|
||||
else if (found == Found__equals) {
|
||||
find_equals = false; // shortcut for getFlags()
|
||||
stack.Get_current_part().Eqpos = accum.Len();
|
||||
accum.Add_byte(Byte_ascii.Eq);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
// Output any remaining unclosed brackets
|
||||
Bry_bfr root_accum = stack.Get_root_accum();
|
||||
int stack_len = stack.stack.Len();
|
||||
for (int j = 0; j < stack_len; j++) {
|
||||
Xomw_prepro_piece piece = (Xomw_prepro_piece)stack.stack.Get_at(j);
|
||||
root_accum.Add(piece.Break_syntax(tmp_bfr, -1));
|
||||
}
|
||||
root_accum.Add_str_a7("</root>");
|
||||
return root_accum.To_bry_and_clear();
|
||||
}
|
||||
private Xomw_prepro_rule Get_rule(byte[] bry) {
|
||||
if (Bry_.Eq(bry, rule_curly.bgn)) return rule_curly;
|
||||
else if (Bry_.Eq(bry, rule_brack.bgn)) return rule_brack;
|
||||
else if (Bry_.Eq(bry, rule_langv.bgn)) return rule_langv;
|
||||
else throw Err_.new_unhandled(bry);
|
||||
}
|
||||
private static final int
|
||||
Found__line_bgn = 0
|
||||
, Found__line_end = 1
|
||||
, Found__pipe = 2
|
||||
, Found__equals = 3
|
||||
, Found__angle = 4
|
||||
, Found__close = 5
|
||||
, Found__open = 6
|
||||
;
|
||||
private static final Xomw_prepro_rule
|
||||
rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}") , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ})
|
||||
, rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]") , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
|
||||
, rule_langv = new Xomw_prepro_rule(Bry_.new_a7("-{"), Bry_.new_a7("}-"), 1, 1, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
|
||||
;
|
||||
private static final byte[]
|
||||
Bry__only_include_bgn = Bry_.new_a7("<onlyinclude>")
|
||||
, Bry__only_include_end = Bry_.new_a7("</onlyinclude>")
|
||||
, Bry__comment_bgn = Bry_.new_a7("<!--")
|
||||
, Bry__comment_end = Bry_.new_a7("-->")
|
||||
, Bry__escaped_lt = Bry_.new_a7("<")
|
||||
, Bry__end_lhs = Bry_.new_a7("</")
|
||||
;
|
||||
private static final int Len__only_include_end = Bry__only_include_end.length;
|
||||
private static final Btrie_slim_mgr cur_char_trie = Cur_char_trie__new();
|
||||
private static final Ordered_hash
|
||||
ignored_tags_y = Ordered_hash_.New_bry().Add_many_str("includeonly", "/includeonly")
|
||||
, ignored_tags_n = Ordered_hash_.New_bry().Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude");
|
||||
private static final Hash_adp_bry
|
||||
ignored_elements__y = Hash_adp_bry.cs().Add_many_str("noinclude")
|
||||
, ignored_elements__n = Hash_adp_bry.cs().Add_many_str("includeonly");
|
||||
private static Btrie_slim_mgr Cur_char_trie__new() {
|
||||
Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
|
||||
String[] ary = new String[] {"|", "=", "<", "\n", "{", "[", "-{", "}", "]"};
|
||||
for (String str : ary) {
|
||||
byte[] bry = Bry_.new_a7(str);
|
||||
rv.Add_obj(bry, new Xomw_prepro_curchar_itm(bry, bry[0]));
|
||||
}
|
||||
|
||||
// handle "}-" separately
|
||||
byte[] langv_end = Bry_.new_a7("}-");
|
||||
rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.At));
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
@@ -1,235 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_prepro_wkr__tst {
|
||||
private final Xomw_prepro_wkr__fxt fxt = new Xomw_prepro_wkr__fxt();
|
||||
@Test public void Text() {
|
||||
fxt.Test__parse("abc", "<root>abc</root>");
|
||||
}
|
||||
@Test public void Brack() {
|
||||
fxt.Test__parse("a[[b]]c", "<root>a[[b]]c</root>");
|
||||
}
|
||||
@Test public void Brack__one() { // COVERS: "Add literal brace(s)"
|
||||
fxt.Test__parse("a[b]c", "<root>a[b]c</root>");
|
||||
}
|
||||
@Test public void Brack__max() { // COVERS: "The specified maximum exists in the callback array, unless the caller"
|
||||
fxt.Test__parse("a[[[[[b]]]]]c", "<root>a[[[[[b]]]]]c</root>");
|
||||
}
|
||||
@Test public void Template() {
|
||||
fxt.Test__parse("a{{b}}c", "<root>a<template><title>b</title></template>c</root>");
|
||||
}
|
||||
@Test public void Template__args__idx() {
|
||||
fxt.Test__parse("a{{b|c|d}}e", "<root>a<template><title>b</title><part><name index=\"1\" /><value>c</value></part><part><name index=\"2\" /><value>d</value></part></template>e</root>");
|
||||
}
|
||||
@Test public void Template__args__key() {
|
||||
fxt.Test__parse("a{{b|c=d}}e", "<root>a<template><title>b</title><part><name>c</name>=<value>d</value></part></template>e</root>");
|
||||
}
|
||||
@Test public void Template__line_start() { // COVERS: "The invocation is at the start of the line if lineStart is set in"
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, "{{b}}"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<root>a"
|
||||
, "<template lineStart=\"1\"><title>b</title></template></root>"
|
||||
));
|
||||
}
|
||||
@Test public void Template__max() { // COVERS: "do we still qualify for any callback with remaining count?"
|
||||
fxt.Test__parse("a{{{{{b}}}}}c", "<root>a<template><title><tplarg><title>b</title></tplarg></title></template>c</root>");
|
||||
}
|
||||
@Test public void Tplarg() {
|
||||
fxt.Test__parse("a{{{b}}}c", "<root>a<tplarg><title>b</title></tplarg>c</root>");
|
||||
}
|
||||
@Test public void Tplarg__dflt() {
|
||||
fxt.Test__parse("a{{{b|c}}}d", "<root>a<tplarg><title>b</title><part><name index=\"1\" /><value>c</value></part></tplarg>d</root>");
|
||||
}
|
||||
@Test public void Comment() {
|
||||
fxt.Test__parse("a<!--b-->c", "<root>a<comment><!--b--></comment>c</root>");
|
||||
}
|
||||
@Test public void Comment__dangling() {// COVERS: "Unclosed comment in input, runs to end"
|
||||
fxt.Test__parse("a<!--b", "<root>a<comment><!--b</comment></root>");
|
||||
}
|
||||
@Test public void Comment__ws() { // COVERS: "Search backwards for leading whitespace"
|
||||
fxt.Test__parse("a <!--b--> c", "<root>a <comment><!--b--></comment> c</root>"); // NOTE: space is outside comment
|
||||
}
|
||||
@Test public void Comment__many__ws() {// COVERS: "Dump all but the last comment to the accumulator"
|
||||
fxt.Test__parse("a <!--1--> <!--2--> z", "<root>a <comment><!--1--></comment> <comment><!--2--></comment> z</root>"); // NOTE: space is outside comment;
|
||||
}
|
||||
@Test public void Comment__nl__ws() { // COVERS: "Eat the line if possible"
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, " <!--1--> "
|
||||
, " <!--2--> "
|
||||
, "z"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<root>a"
|
||||
, "<comment> <!--1--> " // NOTE: space is inside </comment> if flanked by nl;
|
||||
, "</comment><comment> <!--2--> "
|
||||
, "</comment>z</root>"
|
||||
));
|
||||
}
|
||||
@Test public void Ext() { // COVERS.ALSO: "Note that the attr element contains the whitespace between name and attribute,"
|
||||
fxt.Test__parse("a<pre id=\"1\">b</pre>c", "<root>a<ext><name>pre</name><attr> id="1"</attr><inner>b</inner><close></pre></close></ext>c</root>");
|
||||
}
|
||||
@Test public void Ext__inline() { // COVERS: "if ( $text[$tagEndPos - 1] == '/' ) {"
|
||||
fxt.Test__parse("a<pre/>b" , "<root>a<ext><name>pre</name><attr></attr></ext>b</root>");
|
||||
fxt.Test__parse("a<pre />b" , "<root>a<ext><name>pre</name><attr> </attr></ext>b</root>");
|
||||
}
|
||||
@Test public void Ext__end__pass__space() {// COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
|
||||
fxt.Test__parse("a<pre>b</pre >c", "<root>a<ext><name>pre</name><attr></attr><inner>b</inner><close></pre ></close></ext>c</root>");
|
||||
}
|
||||
@Test public void Ext__end__pass__name() { // COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
|
||||
fxt.Test__parse("a<pre>b</pro></pre>c", "<root>a<ext><name>pre</name><attr></attr><inner>b</pro></inner><close></pre></close></ext>c</root>");
|
||||
}
|
||||
@Test public void Ext__end__fail__angle() {// COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
|
||||
fxt.Test__parse("a<pre>b</pre c", "<root>a<pre>b</pre c</root>");
|
||||
}
|
||||
@Test public void Ext__dangling() { // COVERS: "Let it run out to the end of the text."
|
||||
fxt.Test__parse("a<pre>bc", "<root>a<pre>bc</root>");
|
||||
}
|
||||
@Test public void Ext__dangling__many() { // COVERS: "Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>..."
|
||||
fxt.Test__parse("a<pre><pre><pre>bc", "<root>a<pre><pre><pre>bc</root>");
|
||||
}
|
||||
@Test public void Ext__unclosed() { // COVERS: "Infinite backtrack"
|
||||
fxt.Test__parse("a<pre bcd", "<root>a<pre bcd</root>");
|
||||
}
|
||||
@Test public void Ext__noinclude() { // COVERS: "<includeonly> and <noinclude> just become <ignore> tags"
|
||||
fxt.Init__for_inclusion_(Bool_.N);
|
||||
fxt.Test__parse("a<includeonly>b<noinclude>c</noinclude>d</includeonly>e", "<root>a<ignore><includeonly>b<noinclude>c</noinclude>d</includeonly></ignore>e</root>");
|
||||
}
|
||||
@Test public void Heading() {
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, "== b1 =="
|
||||
, "z"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<root>a"
|
||||
, "<h level=\"2\" i=\"1\">== b1 ==</h>"
|
||||
, "z</root>"
|
||||
));
|
||||
}
|
||||
@Test public void Heading__eos__no_nl() {
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, "== b1 =="
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<root>a"
|
||||
, "<h level=\"2\" i=\"1\">== b1 ==</h></root>"
|
||||
));
|
||||
}
|
||||
@Test public void Heading__bos__implied_nl() { // COVERS: "Is this the start of a heading?"
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "== b1 =="
|
||||
, "z"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<root><h level=\"2\" i=\"1\">== b1 ==</h>"
|
||||
, "z</root>"
|
||||
));
|
||||
}
|
||||
@Test public void Heading__dwim__y() { // COVERS: "DWIM: This looks kind of like a name/value separator."
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "a{{b|"
|
||||
, "=c="
|
||||
, "}}d"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<root>a<template><title>b</title><part><name>"
|
||||
, "</name>=<value>c="
|
||||
, "</value></part></template>d</root>"
|
||||
));
|
||||
}
|
||||
@Test public void Heading__dwim__n() { // COVERS: "DWIM: This looks kind of like a name/value separator."
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "a{{b|"
|
||||
, "==c=="
|
||||
, "}}d"
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<root>a<template><title>b</title><part><name index=\"1\" /><value>"
|
||||
, "<h level=\"2\" i=\"1\">==c==</h>"
|
||||
, "</value></part></template>d</root>"
|
||||
));
|
||||
}
|
||||
@Test public void Heading__comment() { // COVERS: "Comment found at line end"
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, "==b== <!--c-->"
|
||||
, ""
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<root>a"
|
||||
, "<h level=\"2\" i=\"1\">==b== <comment><!--c--></comment></h>"
|
||||
, "</root>"
|
||||
));
|
||||
}
|
||||
@Test public void Heading__consecutive__5() { // COVERS: "This is just a single String of equals signs on its own line"
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, "====="
|
||||
, ""
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<root>a"
|
||||
, "<h level=\"2\" i=\"1\">=====</h>"
|
||||
, "</root>"
|
||||
));
|
||||
}
|
||||
@Test public void Heading__consecutive__1() { // COVERS: "Single equals sign on its own line, count=0"
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, "="
|
||||
, ""
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<root>a"
|
||||
, "="
|
||||
, "</root>"
|
||||
));
|
||||
}
|
||||
@Test public void Heading__unclosed() { // COVERS: "No match, no <h>, just pass down the inner src"
|
||||
fxt.Test__parse(String_.Concat_lines_nl_skip_last
|
||||
( "a"
|
||||
, "===b"
|
||||
, ""
|
||||
), String_.Concat_lines_nl_skip_last
|
||||
( "<root>a"
|
||||
, "===b"
|
||||
, "</root>"
|
||||
));
|
||||
}
|
||||
@Test public void Inclusion__n() {
|
||||
fxt.Init__for_inclusion_(Bool_.N);
|
||||
fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root>a<ignore><onlyinclude></ignore>b<ignore></onlyinclude></ignore>c</root>");
|
||||
}
|
||||
@Test public void Inclusion__y() {
|
||||
fxt.Init__for_inclusion_(Bool_.Y);
|
||||
fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root><ignore>a<onlyinclude></ignore>b<ignore></onlyinclude>c</ignore></root>");
|
||||
}
|
||||
@Test public void Ignored__noinclude() { // COVERS: "Handle ignored tags"
|
||||
fxt.Init__for_inclusion_(Bool_.N);
|
||||
fxt.Test__parse("a<noinclude>b</noinclude>c", "<root>a<ignore><noinclude></ignore>b<ignore></noinclude></ignore>c</root>");
|
||||
}
|
||||
}
|
||||
class Xomw_prepro_wkr__fxt {
|
||||
private final Xomw_prepro_wkr wkr = new Xomw_prepro_wkr();
|
||||
private boolean for_inclusion = false;
|
||||
public Xomw_prepro_wkr__fxt() {
|
||||
wkr.Init_by_wiki("pre");
|
||||
}
|
||||
public void Init__for_inclusion_(boolean v) {for_inclusion = v;}
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
byte[] actl = wkr.Preprocess_to_xml(src_bry, for_inclusion);
|
||||
Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str);
|
||||
}
|
||||
}
|
||||
@@ -1,267 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
import gplx.core.primitives.*;
|
||||
public class Xomw_quote_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private Bry_bfr tmp;
|
||||
private final Int_list apos_pos_ary = new Int_list(32);
|
||||
public Xomw_quote_wkr(Xomw_parser mgr) {
|
||||
this.tmp = mgr.Tmp();
|
||||
}
|
||||
public void Do_all_quotes(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
Bry_bfr bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
int cur = src_bgn;
|
||||
int line_bgn = cur;
|
||||
while (true) {
|
||||
int line_end = Bry_find_.Find_fwd(src, Byte_ascii.Nl, line_bgn, src_end);
|
||||
if (line_end == Bry_find_.Not_found) {
|
||||
line_end = src_end;
|
||||
}
|
||||
Do_quotes(bfr, Bool_.Y, src, line_bgn, line_end);
|
||||
if (line_end == src_end)
|
||||
break;
|
||||
else
|
||||
line_bgn = line_end + 1; // 1=\n.length
|
||||
}
|
||||
// Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode( "\n", $text );
|
||||
if (bfr.Match_end_byt(Byte_ascii.Nl))
|
||||
bfr.Del_by_1(); // REF.MW: $outtext = substr( $outtext, 0, -1 );
|
||||
apos_pos_ary.Clear();
|
||||
}
|
||||
public byte[] Do_quotes(Bry_bfr tmp, byte[] src) {
|
||||
boolean found = Do_quotes(tmp, Bool_.N, src, 0, src.length);
|
||||
return found ? tmp.To_bry_and_clear() : src;
|
||||
}
|
||||
private boolean Do_quotes(Bry_bfr bfr, boolean all_quotes_mode, byte[] src, int line_bgn, int line_end) {
|
||||
byte[][] arr = Php_preg_.Split(apos_pos_ary, src, line_bgn, line_end, Wtxt__apos, Bool_.Y); // PORTED.REGX: arr = preg_split("/(''+)/", text, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||
if (arr == null) {
|
||||
if (all_quotes_mode) {
|
||||
bfr.Add_mid(src, line_bgn, line_end).Add_byte_nl();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
int arr_len = arr.length;
|
||||
|
||||
// First, do some preliminary work. This may shift some apostrophes from
|
||||
// being mark-up to being text. It also counts the number of occurrences
|
||||
// of bold and italics mark-ups.
|
||||
int num_bold = 0;
|
||||
int num_italics = 0;
|
||||
for (int i = 1; i < arr_len; i += 2) {
|
||||
int apos_len = arr[i].length;
|
||||
// If there are ever four apostrophes, assume the first is supposed to
|
||||
// be text, and the remaining three constitute mark-up for bold text.
|
||||
// (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
|
||||
if (apos_len == 4) {
|
||||
arr[i - 1] = Bry_.Add(arr[i - 1], Byte_ascii.Apos_bry);
|
||||
arr[i] = Bry_.new_a7("'''");
|
||||
apos_len = 3;
|
||||
}
|
||||
else if (apos_len > 5) {
|
||||
// If there are more than 5 apostrophes in a row, assume they're all
|
||||
// text except for the last 5.
|
||||
// (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
|
||||
arr[i - 1] = Bry_.Add(arr[i - 1], Bry_.Repeat(Byte_ascii.Apos, apos_len - 5));
|
||||
arr[i] = Bry_.new_a7("'''''");
|
||||
apos_len = 5;
|
||||
}
|
||||
// Count the number of occurrences of bold and italics mark-ups.
|
||||
if (apos_len == 2) {
|
||||
num_italics++;
|
||||
}
|
||||
else if (apos_len == 3) {
|
||||
num_bold++;
|
||||
}
|
||||
else if (apos_len == 5) {
|
||||
num_italics++;
|
||||
num_bold++;
|
||||
}
|
||||
}
|
||||
|
||||
// If there is an odd number of both bold and italics, it is likely
|
||||
// that one of the bold ones was meant to be an apostrophe followed
|
||||
// by italics. Which one we cannot know for certain, but it is more
|
||||
// likely to be one that has a single-letter word before it.
|
||||
// NOTE: this code primarily handles italicized possessives; EX: The ''[[Main Page]]'''s talk page.
|
||||
if ((num_bold % 2 == 1) && (num_italics % 2 == 1)) {
|
||||
int prv_ends_w_word_1char = -1;
|
||||
int prv_ends_w_word_nchar = -1;
|
||||
int prv_ends_w_space = -1;
|
||||
for (int i = 1; i < arr_len; i += 2) {
|
||||
if (arr[i].length == 3) {
|
||||
byte[] prv = arr[i - 1];
|
||||
byte prv__last_char = Php_str_.Substr_byte(prv, -1);
|
||||
byte prv__last_minus_1_char = Php_str_.Substr_byte(prv, -2, 1);
|
||||
if (prv__last_char == Byte_ascii.Space) { // NOTE: prv ends in space; EX: "''prv '''"
|
||||
if (prv_ends_w_space == -1) {
|
||||
prv_ends_w_space = i;
|
||||
}
|
||||
}
|
||||
else if (prv__last_minus_1_char == Byte_ascii.Space) { // NOTE: prv ends in 1-char word; EX: "''prv a'''"
|
||||
prv_ends_w_word_1char = i;
|
||||
// if $firstsingleletterword is set, we don't
|
||||
// look at the other options, so we can bail early.
|
||||
break;
|
||||
}
|
||||
else {
|
||||
if (prv_ends_w_word_nchar == -1) {
|
||||
prv_ends_w_word_nchar = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there is a single-letter word, use it!
|
||||
if (prv_ends_w_word_1char > -1) {
|
||||
arr[prv_ends_w_word_1char] = Wtxt__apos;
|
||||
arr[prv_ends_w_word_1char - 1] = Bry_.Add(arr[prv_ends_w_word_1char - 1], Byte_ascii.Apos);
|
||||
}
|
||||
else if (prv_ends_w_word_nchar > -1) {
|
||||
// If not, but there's a multi-letter word, use that one.
|
||||
arr[prv_ends_w_word_nchar] = Wtxt__apos;
|
||||
arr[prv_ends_w_word_nchar - 1] = Bry_.Add(arr[prv_ends_w_word_nchar - 1], Byte_ascii.Apos);
|
||||
}
|
||||
else if (prv_ends_w_space > -1) {
|
||||
// ... otherwise use the first one that has neither.
|
||||
// (notice that it is possible for all three to be -1 if, for example,
|
||||
// there is only one pentuple-apostrophe in the line)
|
||||
arr[prv_ends_w_space] = Wtxt__apos;
|
||||
arr[prv_ends_w_space - 1] = Bry_.Add(arr[prv_ends_w_space - 1], Byte_ascii.Apos);
|
||||
}
|
||||
}
|
||||
|
||||
// Now let's actually convert our apostrophic mush to HTML!
|
||||
int state = State__empty;
|
||||
for (int j = 0; j < arr_len; j++) {
|
||||
if ((j % 2) == 0) {
|
||||
if (state == State__both) {
|
||||
tmp.Add(arr[j]);
|
||||
}
|
||||
else {
|
||||
bfr.Add(arr[j]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
int apos_len = arr[j].length;
|
||||
if (apos_len == 2) {
|
||||
if (state == State__i) {
|
||||
bfr.Add_str_a7("</i>");
|
||||
state = State__empty;
|
||||
}
|
||||
else if (state == State__bi) {
|
||||
bfr.Add_str_a7("</i>");
|
||||
state = State__b;
|
||||
}
|
||||
else if (state == State__ib) {
|
||||
bfr.Add_str_a7("</b></i><b>");
|
||||
state = State__b;
|
||||
}
|
||||
else if (state == State__both) {
|
||||
bfr.Add_str_a7("<b><i>").Add_bfr_and_preserve(tmp).Add_str_a7("</i>");
|
||||
state = State__b;
|
||||
}
|
||||
else { // state can be 'b' or ''
|
||||
bfr.Add_str_a7("<i>");
|
||||
state = state == State__b ? State__bi : State__i;
|
||||
}
|
||||
}
|
||||
else if (apos_len == 3) {
|
||||
if (state == State__b) {
|
||||
bfr.Add_str_a7("</b>");
|
||||
state = State__empty;
|
||||
}
|
||||
else if (state == State__bi) {
|
||||
bfr.Add_str_a7("</i></b><i>");
|
||||
state = State__i;
|
||||
}
|
||||
else if (state == State__ib) {
|
||||
bfr.Add_str_a7("</b>");
|
||||
state = State__i;
|
||||
}
|
||||
else if (state == State__both) {
|
||||
bfr.Add_str_a7("<i><b>").Add_bfr_and_preserve(tmp).Add_str_a7("</b>");
|
||||
state = State__i;
|
||||
}
|
||||
else { // state can be 'i' or ''
|
||||
bfr.Add_str_a7("<b>");
|
||||
state = state == State__i ? State__ib : State__b;
|
||||
}
|
||||
}
|
||||
else if (apos_len == 5) {
|
||||
if (state == State__b) {
|
||||
bfr.Add_str_a7("</b><i>");
|
||||
state = State__i;
|
||||
}
|
||||
else if (state == State__i) {
|
||||
bfr.Add_str_a7("</i><b>");
|
||||
state = State__b;
|
||||
}
|
||||
else if (state == State__bi) {
|
||||
bfr.Add_str_a7("</i></b>");
|
||||
state = State__empty;
|
||||
}
|
||||
else if (state == State__ib) {
|
||||
bfr.Add_str_a7("</b></i>");
|
||||
state = State__empty;
|
||||
}
|
||||
else if (state == State__both) {
|
||||
bfr.Add_str_a7("<i><b>").Add_bfr_and_preserve(tmp).Add_str_a7("</b></i>");
|
||||
state = State__empty;
|
||||
}
|
||||
else { // (state == '')
|
||||
tmp.Clear();
|
||||
state = State__both;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now close all remaining tags. Notice that the order is important.
|
||||
if (state == State__b || state == State__ib) {
|
||||
bfr.Add_str_a7("</b>");
|
||||
}
|
||||
if (state == State__i || state == State__bi || state == State__ib) {
|
||||
bfr.Add_str_a7("</i>");
|
||||
}
|
||||
if (state == State__bi) {
|
||||
bfr.Add_str_a7("</b>");
|
||||
}
|
||||
// There might be lonely ''''', so make sure we have a buffer
|
||||
if (state == State__both && tmp.Len_gt_0()) {
|
||||
bfr.Add_str_a7("<b><i>").Add_bfr_and_clear(tmp).Add_str_a7("</i></b>");
|
||||
}
|
||||
bfr.Add_byte_nl();
|
||||
return true;
|
||||
}
|
||||
private static final int
|
||||
State__empty = 0
|
||||
, State__b = 1
|
||||
, State__i = 2
|
||||
, State__bi = 3
|
||||
, State__ib = 4
|
||||
, State__both = 5
|
||||
;
|
||||
private static final byte[] Wtxt__apos = Bry_.new_a7("''");
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import org.junit.*;
|
||||
public class Xomw_quote_wkr__tst {
|
||||
private final Xomw_quote_wkr__fxt fxt = new Xomw_quote_wkr__fxt();
|
||||
@Test public void Apos__0() {fxt.Test__parse("abc" , "abc");}
|
||||
@Test public void Apos__1() {fxt.Test__parse("a'b'c" , "a'b'c");}
|
||||
@Test public void Apos__2() {fxt.Test__parse("a''b''c" , "a<i>b</i>c");}
|
||||
@Test public void Apos__3() {fxt.Test__parse("a'''b'''c" , "a<b>b</b>c");}
|
||||
@Test public void Apos__4() {fxt.Test__parse("a''''b''''c" , "a'<b>b'</b>c");} // COVERS: "If there are ever four apostrophes"
|
||||
@Test public void Apos__5() {fxt.Test__parse("a'''''b'''''c" , "a<i><b>b</b></i>c");}
|
||||
@Test public void Apos__7() {fxt.Test__parse("a'''''''b'''''''c" , "a''<i><b>b''</b></i>c");} // COVERS: "If there are more than 5 apostrophes in a row"
|
||||
@Test public void Mix__single() {fxt.Test__parse("''a ''' ''b b''' ''cc'''" , "<i>a <b> </b></i><b>b b'<i> </i>cc</b>");} // COVERS: "If there is a single-letter word, use it!"
|
||||
@Test public void Mix__multi() {fxt.Test__parse("''a ''' ''b ''' ''cc'''" , "<i>a <b> </b></i><b>b </b> <i>cc'</i>");} // COVERS: "If not, but there's a multi-letter word, use that one."
|
||||
@Test public void Mix__space() {fxt.Test__parse("''a ''' ''b ''' ''c '''" , "<i>a '</i> <i>b <b> </b></i><b>c </b>");} // COVERS: "... otherwise use the first one that has neither."
|
||||
@Test public void Dangling__b() {fxt.Test__parse("a'''b" , "a<b>b</b>");} // COVERS: "if (state == State__b || state == State__ib)"
|
||||
@Test public void Dangling__i() {fxt.Test__parse("a''b" , "a<i>b</i>");} // COVERS: "if (state == State__i || state == State__bi || state == State__ib)"
|
||||
@Test public void Dangling__lone(){fxt.Test__parse("a'''''b" , "a<b><i>b</i></b>");} // COVERS: "There might be lonely ''''', so make sure we have a buffer"
|
||||
@Test public void Nl__text() {fxt.Test__parse("a\nb''c''d\n\ne" , "a\nb<i>c</i>d\n\ne");}
|
||||
}
|
||||
class Xomw_quote_wkr__fxt {
|
||||
private final Xomw_quote_wkr wkr = new Xomw_quote_wkr(new Xomw_parser());
|
||||
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
|
||||
public void Test__parse(String src_str, String expd) {
|
||||
byte[] src_bry = Bry_.new_u8(src_str);
|
||||
wkr.Do_all_quotes(new Xomw_parser_ctx(), pbfr.Init(src_bry));
|
||||
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
|
||||
}
|
||||
}
|
||||
@@ -1,292 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws.parsers.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
|
||||
import gplx.xowa.mws.utls.*;
|
||||
import gplx.xowa.parsers.htmls.*;
|
||||
import gplx.xowa.mws.libs.*; import gplx.xowa.parsers.uniqs.*;
|
||||
public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
|
||||
private final Bry_bfr tmp;
|
||||
private Bry_bfr bfr;
|
||||
private final Xomw_sanitizer sanitizer; private final Xomw_strip_state strip_state;
|
||||
private final List_adp
|
||||
td_history = List_adp_.New() // Is currently a td tag open?
|
||||
, last_tag_history = List_adp_.New() // Save history of last lag activated (td, th or caption)
|
||||
, tr_history = List_adp_.New() // Is currently a tr tag open?
|
||||
, tr_attributes = List_adp_.New() // history of tr attributes
|
||||
, has_opened_tr = List_adp_.New() // Did this table open a <tr> element?
|
||||
;
|
||||
private int indent_level = 0; // indent level of the table
|
||||
private byte[] first_2 = new byte[2];
|
||||
public Xomw_table_wkr(Xomw_parser parser) {
|
||||
this.tmp = parser.Tmp();
|
||||
this.sanitizer = parser.Sanitizer();
|
||||
this.strip_state = parser.Strip_state();
|
||||
}
|
||||
public void Do_table_stuff(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
|
||||
Bry_bfr src_bfr = pbfr.Src();
|
||||
byte[] src = src_bfr.Bfr();
|
||||
int src_bgn = 0;
|
||||
int src_end = src_bfr.Len();
|
||||
this.bfr = pbfr.Trg();
|
||||
pbfr.Switch();
|
||||
|
||||
indent_level = 0;
|
||||
|
||||
Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode("\n", $text);
|
||||
|
||||
// Closing open td, tr && table
|
||||
while (td_history.Len() > 0) {
|
||||
if (Php_ary_.Pop_bool_or_n(td_history)) {
|
||||
bfr.Add_str_a7("</td>\n");
|
||||
}
|
||||
if (Php_ary_.Pop_bool_or_n(tr_history)) {
|
||||
bfr.Add_str_a7("</tr>\n");
|
||||
}
|
||||
if (!Php_ary_.Pop_bool_or_n(has_opened_tr)) {
|
||||
bfr.Add_str_a7("<tr><td></td></tr>\n");
|
||||
}
|
||||
bfr.Add_str_a7("</table>\n");
|
||||
}
|
||||
|
||||
// Remove trailing line-ending (b/c)
|
||||
if (bfr.Get_at_last_or_nil_if_empty() == Byte_ascii.Nl) {
|
||||
bfr.Del_by_1();
|
||||
}
|
||||
|
||||
// special case: don't return empty table
|
||||
if ( bfr.Len() == Len__tb__empty
|
||||
&& Bry_.Eq(bfr.Bfr(), 0, Len__tb__empty, Html__tb__empty)) {
|
||||
bfr.Clear();
|
||||
return;
|
||||
}
|
||||
}
|
||||
public int Split(byte[] src, int itm_bgn, int itm_end) {
|
||||
byte[] out_line = Bry_.Mid(src, itm_bgn, itm_end); // MW: "$outLine"
|
||||
byte[] line = Bry_.Trim(out_line); // MW: "$line"
|
||||
|
||||
int line_len = line.length;
|
||||
if (line_len == 0) { // empty line, go to next line
|
||||
bfr.Add(out_line).Add_byte_nl();
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
|
||||
byte first_char = line[0];
|
||||
first_2[0] = line[0];
|
||||
first_2[1] = line_len == 1 ? Byte_ascii.Null : line[1];
|
||||
|
||||
// PORTED: preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)
|
||||
byte[] tblw_atrs = null;
|
||||
boolean tblw_bgn_found = false;
|
||||
int colons_end = Bry_find_.Find_fwd_while(src, 0, line_len, Byte_ascii.Colon);
|
||||
int tblw_bgn = Bry_find_.Find_fwd_while(line, colons_end, line_len, Byte_ascii.Space);
|
||||
int tblw_atrs_bgn = tblw_bgn + 2;
|
||||
if (Bry_.Eq(line, tblw_bgn, tblw_atrs_bgn, Wtxt__tb__bgn)) {
|
||||
tblw_bgn_found = true;
|
||||
tblw_atrs = (tblw_atrs_bgn == line_len) ? Bry_.Empty : Bry_.Mid(line, tblw_atrs_bgn, line_len);
|
||||
}
|
||||
if (tblw_bgn_found) {
|
||||
// First check if we are starting a new table
|
||||
indent_level = colons_end;
|
||||
|
||||
tblw_atrs = strip_state.Unstrip_both(tblw_atrs);
|
||||
|
||||
// PORTED: out_line = str_repeat('<dl><dd>', $indent_level) . "<table{atrs}>";
|
||||
for (int j = 0; j < indent_level; j++)
|
||||
tmp.Add(Html__dl__bgn);
|
||||
tmp.Add_str_a7("<table");
|
||||
sanitizer.Fix_tag_attributes(tmp, Name__table, tblw_atrs);
|
||||
tmp.Add_byte(Byte_ascii.Angle_end);
|
||||
out_line = tmp.To_bry_and_clear();
|
||||
td_history.Add(false);
|
||||
last_tag_history.Add(Bry_.Empty);
|
||||
tr_history.Add(false);
|
||||
tr_attributes.Add(Bry_.Empty);
|
||||
has_opened_tr.Add(false);
|
||||
}
|
||||
else if (td_history.Len() == 0) {
|
||||
// Don't do any of the following
|
||||
bfr.Add(out_line).Add_byte_nl();
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
else if (Bry_.Eq(first_2, Wtxt__tb__end)) {
|
||||
// We are ending a table
|
||||
line = tmp.Add_str_a7("</table>").Add_mid(line, 2, line.length).To_bry_and_clear();
|
||||
byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
|
||||
|
||||
if (!Php_ary_.Pop_bool_or_n(has_opened_tr)) {
|
||||
line = tmp.Add_str_a7("<tr><td></td></tr>").Add(line).To_bry_and_clear();
|
||||
}
|
||||
|
||||
if (Php_ary_.Pop_bool_or_n(tr_history)) {
|
||||
line = tmp.Add_str_a7("</tr>").Add(line).To_bry_and_clear();
|
||||
}
|
||||
|
||||
if (Php_ary_.Pop_bool_or_n(td_history)) {
|
||||
line = tmp.Add_str_a7("</").Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(line).To_bry_and_clear();
|
||||
}
|
||||
Php_ary_.Pop_bry_or_null(tr_attributes);
|
||||
// PORTED:$outLine = $line . str_repeat( '</dd></dl>', $indent_level );
|
||||
tmp.Add(line);
|
||||
for (int j = 0; j < indent_level; j++)
|
||||
tmp.Add(Html__dl__end);
|
||||
out_line = tmp.To_bry_and_clear();
|
||||
}
|
||||
else if (Bry_.Eq(first_2, Wtxt__tr)) {
|
||||
// Now we have a table row
|
||||
line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line);
|
||||
|
||||
// Whats after the tag is now only attributes
|
||||
byte[] atrs = strip_state.Unstrip_both(line);
|
||||
sanitizer.Fix_tag_attributes(tmp, Name__tr, atrs);
|
||||
atrs = tmp.To_bry_and_clear();
|
||||
|
||||
Php_ary_.Pop_bry_or_null(tr_attributes);
|
||||
tr_attributes.Add(atrs);
|
||||
|
||||
line = Bry_.Empty;
|
||||
byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
|
||||
Php_ary_.Pop_bool_or_n(has_opened_tr);
|
||||
has_opened_tr.Add(true);
|
||||
|
||||
if (Php_ary_.Pop_bool_or_n(tr_history)) {
|
||||
line = Html__tr__end;
|
||||
}
|
||||
|
||||
if (Php_ary_.Pop_bool_or_n(td_history)) {
|
||||
line = tmp.Add_str_a7("</").Add(last_tag).Add_byte(Byte_ascii.Gt).Add(line).To_bry_and_clear();
|
||||
}
|
||||
|
||||
out_line = line;
|
||||
tr_history.Add(false);
|
||||
td_history.Add(false);
|
||||
last_tag_history.Add(Bry_.Empty);
|
||||
}
|
||||
else if ( first_char == Byte_ascii.Pipe
|
||||
|| first_char == Byte_ascii.Bang
|
||||
|| Bry_.Eq(first_2, Wtxt__caption)
|
||||
) {
|
||||
// This might be cell elements, td, th or captions
|
||||
if (Bry_.Eq(first_2, Wtxt__caption)) {
|
||||
first_char = Byte_ascii.Plus;
|
||||
line = Bry_.Mid(line, 2);
|
||||
} else {
|
||||
line = Bry_.Mid(line, 1);
|
||||
}
|
||||
|
||||
// Implies both are valid for table headings.
|
||||
if (first_char == Byte_ascii.Bang) {
|
||||
Xomw_string_utils.Replace_markup(line, 0, line.length, Wtxt__th2, Wtxt__td2); // $line = StringUtils::replaceMarkup('!!', '||', $line);
|
||||
}
|
||||
|
||||
// Split up multiple cells on the same line.
|
||||
// FIXME : This can result in improper nesting of tags processed
|
||||
// by earlier parser steps.
|
||||
byte[][] cells = Bry_split_.Split(line, Wtxt__td2);
|
||||
if (cells.length == 0) cells = Cells__empty; // handle "\n|\n" which should still generate "<tr><td></td></tr>", not ""; see TEST
|
||||
|
||||
out_line = Bry_.Empty;
|
||||
|
||||
byte[] previous = null;
|
||||
// Loop through each table cell
|
||||
int cells_len = cells.length;
|
||||
for (int j = 0; j < cells_len; j++) {
|
||||
byte[] cell = cells[j];
|
||||
previous = Bry_.Empty;
|
||||
if (first_char != Byte_ascii.Plus) {
|
||||
byte[] tr_after = Php_ary_.Pop_bry_or_null(tr_attributes);
|
||||
if (!Php_ary_.Pop_bool_or_n(tr_history)) {
|
||||
previous = tmp.Add_str_a7("<tr").Add(tr_after).Add_str_a7(">\n").To_bry_and_clear();
|
||||
}
|
||||
tr_history.Add(true);
|
||||
tr_attributes.Add(Bry_.Empty);
|
||||
Php_ary_.Pop_bool_or_n(has_opened_tr);
|
||||
has_opened_tr.Add(true);
|
||||
}
|
||||
|
||||
byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
|
||||
|
||||
if (Php_ary_.Pop_bool_or_n(td_history)) {
|
||||
previous = tmp.Add_str_a7("</").Add(last_tag).Add_str_a7(">\n").Add(previous).To_bry_and_clear();
|
||||
}
|
||||
|
||||
if (first_char == Byte_ascii.Pipe) {
|
||||
last_tag = Name__td;
|
||||
}
|
||||
else if (first_char == Byte_ascii.Bang) {
|
||||
last_tag = Name__th;
|
||||
}
|
||||
else if (first_char == Byte_ascii.Plus) {
|
||||
last_tag = Name__caption;
|
||||
}
|
||||
else {
|
||||
last_tag = Bry_.Empty;
|
||||
}
|
||||
|
||||
last_tag_history.Add(last_tag);
|
||||
|
||||
// A cell could contain both parameters and data
|
||||
byte[][] cell_data = Bry_split_.Split_w_max(cell, Byte_ascii.Pipe, 2);
|
||||
|
||||
// Bug 553: Note that a '|' inside an invalid link should not
|
||||
// be mistaken as delimiting cell parameters
|
||||
byte[] cell_data_0 = cell_data[0];
|
||||
byte[] cell_data_1 = cell_data[1];
|
||||
if (Bry_find_.Find_fwd(cell_data_0, Wtxt__lnki__bgn) != Bry_find_.Not_found) {
|
||||
cell = tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell).To_bry_and_clear();
|
||||
}
|
||||
else if (cell_data_1 == null) {
|
||||
cell = tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell_data_0).To_bry_and_clear();
|
||||
}
|
||||
else {
|
||||
byte[] atrs = strip_state.Unstrip_both(cell_data_0);
|
||||
tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag);
|
||||
sanitizer.Fix_tag_attributes(tmp, last_tag, atrs);
|
||||
tmp.Add_byte(Byte_ascii.Angle_end).Add(cell_data_1);
|
||||
cell = tmp.To_bry_and_clear();
|
||||
}
|
||||
|
||||
out_line = Bry_.Add(out_line, cell);
|
||||
td_history.Add(true);
|
||||
}
|
||||
}
|
||||
bfr.Add(out_line).Add_byte_nl();
|
||||
return Bry_split_.Rv__ok;
|
||||
}
|
||||
private static final byte[]
|
||||
Wtxt__tb__bgn = Bry_.new_a7("{|")
|
||||
, Wtxt__tb__end = Bry_.new_a7("|}")
|
||||
, Wtxt__tr = Bry_.new_a7("|-")
|
||||
, Wtxt__caption = Bry_.new_a7("|+")
|
||||
, Wtxt__th2 = Bry_.new_a7("!!")
|
||||
, Wtxt__td2 = Bry_.new_a7("||")
|
||||
, Wtxt__lnki__bgn = Bry_.new_a7("[[")
|
||||
|
||||
, Name__table = Bry_.new_a7("table")
|
||||
, Name__tr = Bry_.new_a7("tr")
|
||||
, Name__td = Bry_.new_a7("td")
|
||||
, Name__th = Bry_.new_a7("th")
|
||||
, Name__caption = Bry_.new_a7("caption")
|
||||
|
||||
, Html__tr__end = Bry_.new_a7("</tr>")
|
||||
, Html__dl__bgn = Bry_.new_a7("<dl><dd>")
|
||||
, Html__dl__end = Bry_.new_a7("</dd></dl>")
|
||||
, Html__tb__empty = Bry_.new_a7("<table>\n<tr><td></td></tr>\n</table>")
|
||||
;
|
||||
private static final int Len__tb__empty = Html__tb__empty.length;
|
||||
private static final byte[][] Cells__empty = new byte[][] {Bry_.Empty};
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user