1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Xomw: Move Mw_parse classes into separate project

This commit is contained in:
gnosygnu
2017-02-08 17:38:39 -05:00
parent fdf6c49a05
commit 9a19be675e
117 changed files with 394 additions and 260 deletions

View File

@@ -141,7 +141,7 @@ public class Xoa_ttl { // PAGE:en.w:http://en.wikipedia.org/wiki/Help:Link; REF.
public byte[] Get_prefixed_db_key() {return Full_db();}
public boolean Has_fragment() {return anch_bgn != -1;}
public byte[] Get_fragment() {return Anch_txt();}
public byte[] Get_link_url(gplx.xowa.mws.htmls.Xomw_qry_mgr qry_mgr, boolean query2, boolean proto) {
public byte[] Get_link_url(Object qry_mgr, boolean query2, boolean proto) {
// if ( $this->isExternal() || $proto !== false ) {
// $ret = $this->getFullURL( $query, $query2, $proto );
// }

View File

@@ -15,8 +15,8 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
package gplx.xowa.mediawiki.includes.parsers.headingsOld; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
public interface Xomw_heading_cbk {
void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr);
void On_src_done(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr);
void On_hdr_seen(Xomw_heading_wkr wkr);
void On_src_done(Xomw_heading_wkr wkr);
}

View File

@@ -15,10 +15,9 @@ GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
package gplx.xowa.mediawiki.includes.parsers.headingsOld; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*; import gplx.xowa.mediawiki.includes.*; import gplx.xowa.mediawiki.includes.parsers.*;
import gplx.core.btries.*; import gplx.xowa.langs.*;
public class Xomw_heading_wkr {
private Xomw_parser_ctx pctx;
private Xomw_heading_cbk cbk;
public byte[] Src() {return src;} private byte[] src;
public int Src_end() {return src_end;} private int src_end;
@@ -30,17 +29,8 @@ public class Xomw_heading_wkr {
public int Hdr_lhs_end() {return hdr_lhs_end;} private int hdr_lhs_end;
public int Hdr_rhs_bgn() {return hdr_rhs_bgn;} private int hdr_rhs_bgn;
public int Hdr_rhs_end() {return hdr_rhs_end;} private int hdr_rhs_end;
public void Do_headings(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, Xomw_heading_cbk__html cbk) {
Bry_bfr src_bfr = pbfr.Src();
byte[] src_bry = src_bfr.Bfr();
int src_end = src_bfr.Len();
cbk.Bfr_(pbfr.Trg());
pbfr.Switch();
Parse(pctx, src_bry, 0, src_end, cbk);
}
public void Parse(Xomw_parser_ctx pctx, byte[] src, int src_bgn, int src_end, Xomw_heading_cbk cbk) { // REF.MW: /includes/parser/Parser.php|doHeadings
public void Parse(byte[] src, int src_bgn, int src_end, Xomw_heading_cbk cbk) { // REF.MW: /includes/parser/Parser.php|doHeadings
// init members
this.pctx = pctx;
this.src = src;
this.src_end = src_end;
this.cbk = cbk;
@@ -53,7 +43,7 @@ public class Xomw_heading_wkr {
// do loop
int pos = src_bgn;
this.txt_bgn = pos == Xomw_parser_ctx.Pos__bos ? 0 : pos;
this.txt_bgn = pos == -1 ? 0 : pos;
byte b = Byte_ascii.Nl;
while (true) {
int nxt = pos + 1;
@@ -70,7 +60,7 @@ public class Xomw_heading_wkr {
// EOS; add all text after last "==\n"
if (pos == src_end) {
cbk.On_src_done(pctx, this);
cbk.On_src_done(this);
break;
}
b = src[pos];
@@ -102,7 +92,7 @@ public class Xomw_heading_wkr {
this.hdr_num = hdr_lhs_len < hdr_rhs_len ? hdr_lhs_len : hdr_rhs_len;
cbk.On_hdr_seen(pctx, this);
cbk.On_hdr_seen(this);
return nl_rhs;
}
}

View File

@@ -1,33 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
public class Xomw_MagicWord {
public boolean case_match;
public byte[] name;
public Xomw_MagicWordSynonym[] synonyms;
public Xomw_MagicWord(byte[] name, boolean case_match, byte[][] synonyms_ary) {
this.name = name;
this.case_match = case_match;
int synonyms_len = synonyms_ary.length;
this.synonyms = new Xomw_MagicWordSynonym[synonyms_len];
for (int i = 0; i < synonyms_len; i++) {
synonyms[i] = new Xomw_MagicWordSynonym(name, case_match, synonyms_ary[i]);
}
}
}

View File

@@ -1,376 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
import gplx.core.btries.*; import gplx.core.primitives.*;
public class Xomw_MagicWordArray {
private Btrie_slim_mgr fwd_trie;
private Btrie_bwd_mgr bwd_trie;
private final Btrie_rv trv = new Btrie_rv();
// private final Xomw_MagicWordMgr magic_word_mgr;
public final byte[][] names;
// /** @var array */
// private hash;
// private baseRegex;
// private regex;
public Xomw_MagicWordArray(Xomw_MagicWordMgr magic_word_mgr, byte[][] names) {
// this.magic_word_mgr = magic_word_mgr;
this.names = names;
// ASSUME: all magic words in a group have the same case sensitivity
for (byte[] name : names) {
Xomw_MagicWord word = magic_word_mgr.Get(name);
if (word == null) continue;
Xomw_MagicWordSynonym[] synonyms = word.synonyms;
int synonyms_len = synonyms.length;
for (int i = 0; i < synonyms_len; i++) {
Xomw_MagicWordSynonym synonym = synonyms[i];
switch (synonym.arg1_tid) {
case Xomw_MagicWordSynonym.Arg1__nil:
case Xomw_MagicWordSynonym.Arg1__end:
if (fwd_trie == null) fwd_trie = word.case_match ? Btrie_slim_mgr.cs() : Btrie_slim_mgr.ci_u8();
fwd_trie.Add_obj(synonym.text_wo_arg1, synonym);
break;
case Xomw_MagicWordSynonym.Arg1__bgn:
if (bwd_trie == null) bwd_trie = Btrie_bwd_mgr.c__(word.case_match);
bwd_trie.Add(synonym.text_wo_arg1, synonym);
break;
// ignore if mid / mix
case Xomw_MagicWordSynonym.Arg1__mid:
case Xomw_MagicWordSynonym.Arg1__mix:
Gfo_usr_dlg_.Instance.Warn_many("", "", "MagicWordArray: unsupported arg_1_tid: tid=~{0}", synonym.arg1_tid);
continue;
}
}
}
}
// /**
// * Add a magic word by name
// *
// * @param String name
// */
// public function add(name) {
// this->names[] = name;
// this->hash = this->baseRegex = this->regex = null;
// }
//
// /**
// * Add a number of magic words by name
// *
// * @param array names
// */
// public function addArray(names) {
// this->names = array_merge(this->names, array_values(names));
// this->hash = this->baseRegex = this->regex = null;
// }
//
// /**
// * Get a 2-d hashtable for this array
// * @return array
// */
// public function getHash() {
// if (is_null(this->hash)) {
// global wgContLang;
// this->hash = [ 0 => [], 1 => [] ];
// foreach (this->names as name) {
// magic = MagicWord::get(name);
// case = intval(magic->isCaseSensitive());
// foreach (magic->getSynonyms() as syn) {
// if (!case) {
// syn = wgContLang->lc(syn);
// }
// this->hash[case][syn] = name;
// }
// }
// }
// return this->hash;
// }
//
// /**
// * Get the super regex
// * @return array
// */
// public function getBaseRegex() {
// if (is_null(this->baseRegex)) {
// this->baseRegex = [ 0 => '', 1 => '' ];
// foreach (this->names as name) {
// magic = MagicWord::get(name);
// case = intval(magic->isCaseSensitive());
// foreach (magic->getSynonyms() as i => syn) {
// // Group name must start with a non-digit in PCRE 8.34+
// it = strtr(i, '0123456789', 'abcdefghij');
// group = "(?P<{it}_{name}>" . preg_quote(syn, '/') . ')';
// if (this->baseRegex[case] === '') {
// this->baseRegex[case] = group;
// } else {
// this->baseRegex[case] .= '|' . group;
// }
// }
// }
// }
// return this->baseRegex;
// }
//
// /**
// * Get an unanchored regex that does not match parameters
// * @return array
// */
// public function getRegex() {
// if (is_null(this->regex)) {
// super = this->getBaseRegex();
// this->regex = [ '', '' ];
// if (this->baseRegex[0] !== '') {
// this->regex[0] = "/{super[0]}/iuS";
// }
// if (this->baseRegex[1] !== '') {
// this->regex[1] = "/{super[1]}/S";
// }
// }
// return this->regex;
// }
//
// /**
// * Get a regex for matching variables with parameters
// *
// * @return String
// */
// public function getVariableRegex() {
// return str_replace("\\1", "(.*?)", this->getRegex());
// }
//
// /**
// * Get a regex anchored to the start of the String that does not match parameters
// *
// * @return array
// */
// public function getRegexStart() {
// super = this->getBaseRegex();
// newRegex = [ '', '' ];
// if (super[0] !== '') {
// newRegex[0] = "/^(?:{super[0]})/iuS";
// }
// if (super[1] !== '') {
// newRegex[1] = "/^(?:{super[1]})/S";
// }
// return newRegex;
// }
//
// /**
// * Get an anchored regex for matching variables with parameters
// *
// * @return array
// */
// public function getVariableStartToEndRegex() {
// super = this->getBaseRegex();
// newRegex = [ '', '' ];
// if (super[0] !== '') {
// newRegex[0] = str_replace("\\1", "(.*?)", "/^(?:{super[0]})/iuS");
// }
// if (super[1] !== '') {
// newRegex[1] = str_replace("\\1", "(.*?)", "/^(?:{super[1]})/S");
// }
// return newRegex;
// }
//
// /**
// * @since 1.20
// * @return array
// */
// public function getNames() {
// return this->names;
// }
//
// /**
// * Parse a match array from preg_match
// * Returns array(magic word ID, parameter value)
// * If there is no parameter value, that element will be false.
// *
// * @param array m
// *
// * @throws MWException
// * @return array
// */
// public function parseMatch(m) {
// reset(m);
// while (list(key, value) = each(m)) {
// if (key === 0 || value === '') {
// continue;
// }
// parts = explode('_', key, 2);
// if (count(parts) != 2) {
// // This shouldn't happen
// // continue;
// throw new MWException(__METHOD__ . ': bad parameter name');
// }
// list(/* synIndex */, magicName) = parts;
// paramValue = next(m);
// return [ magicName, paramValue ];
// }
// // This shouldn't happen either
// throw new MWException(__METHOD__ . ': parameter not found');
// }
/**
* Match some text, with parameter capture
* Returns an array with the magic word name in the first element and the
* parameter in the second element.
* Both elements are false if there was no match.
*
* @param String text
*
* @return array
*/
public void matchVariableStartToEnd(byte[][] rv, byte[] src) {
int src_end = src.length;
if (src_end == 0) {
rv[0] = rv[1] = null;
return;
}
byte[] name = null;
int val_bgn = -1, val_end = -1;
// check fwd; EX: "thumb=$1"
if (fwd_trie != null) {
Object o = fwd_trie.Match_at(trv, src, 0, src_end);
if (o != null) {
Xomw_MagicWordSynonym syn = ((Xomw_MagicWordSynonym)o);
name = syn.magic_name;
val_bgn = trv.Pos();
val_end = src_end;
// if "nil", then must be full match; EX: "thumbx" does not match "thumb"
if (syn.arg1_tid == Xomw_MagicWordSynonym.Arg1__nil
&& syn.text_wo_arg1.length != src_end) {
rv[0] = rv[1] = null;
return;
}
}
}
// check bwd; EX: "$1px"
if (bwd_trie != null) {
Object o = bwd_trie.Match_at(trv, src, src_end - 1, -1);
if (o != null) {
Xomw_MagicWordSynonym syn = ((Xomw_MagicWordSynonym)o);
name = syn.magic_name;
val_bgn = 0;
val_end = src_end - syn.text_wo_arg1.length;
}
}
rv[0] = name;
rv[1] = val_end - val_bgn == 0 ? Bry_.Empty : Bry_.Mid(src, val_bgn, val_end);
}
// /**
// * Match some text, without parameter capture
// * Returns the magic word name, or false if there was no capture
// *
// * @param String text
// *
// * @return String|boolean False on failure
// */
// public function matchStartToEnd(text) {
// hash = this->getHash();
// if (isset(hash[1][text])) {
// return hash[1][text];
// }
// global wgContLang;
// lc = wgContLang->lc(text);
// if (isset(hash[0][lc])) {
// return hash[0][lc];
// }
// return false;
// }
//
// /**
// * Returns an associative array, ID => param value, for all items that match
// * Removes the matched items from the input String (passed by reference)
// *
// * @param String text
// *
// * @return array
// */
// public function matchAndRemove(&text) {
// found = [];
// regexes = this->getRegex();
// foreach (regexes as regex) {
// if (regex === '') {
// continue;
// }
// matches = [];
// res = preg_match_all(regex, text, matches, PREG_SET_ORDER);
// if (res === false) {
// LoggerFactory::getInstance('parser')->warning('preg_match_all returned false', [
// 'code' => preg_last_error(),
// 'regex' => regex,
// 'text' => text,
// ]);
// } elseif (res) {
// foreach (matches as m) {
// list(name, param) = this->parseMatch(m);
// found[name] = param;
// }
// }
// res = preg_replace(regex, '', text);
// if (res === null) {
// LoggerFactory::getInstance('parser')->warning('preg_replace returned null', [
// 'code' => preg_last_error(),
// 'regex' => regex,
// 'text' => text,
// ]);
// }
// text = res;
// }
// return found;
// }
//
// /**
// * Return the ID of the magic word at the start of text, and remove
// * the prefix from text.
// * Return false if no match found and text is not modified.
// * Does not match parameters.
// *
// * @param String text
// *
// * @return int|boolean False on failure
// */
// public function matchStartAndRemove(&text) {
// regexes = this->getRegexStart();
// foreach (regexes as regex) {
// if (regex === '') {
// continue;
// }
// if (preg_match(regex, text, m)) {
// list(id,) = this->parseMatch(m);
// if (strlen(m[0]) >= strlen(text)) {
// text = '';
// } else {
// text = substr(text, strlen(m[0]));
// }
// return id;
// }
// }
// return false;
// }
}

View File

@@ -1,64 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
import org.junit.*; import gplx.core.tests.*;
public class Xomw_MagicWordArray__tst {
private final Xomw_MagicWordArray__fxt fxt = new Xomw_MagicWordArray__fxt();
@Test public void Nil() {
fxt.Init__word(Bool_.Y, "img_nil", "nil");
fxt.Init__ary("img_nil");
fxt.Test__matchVariableStartToEnd("nil", "img_nil", "");
fxt.Test__matchVariableStartToEnd("nila", null, null);
}
@Test public void Bgn() {
fxt.Init__word(Bool_.Y, "img_bgn", "bgn$1");
fxt.Init__ary("img_bgn");
fxt.Test__matchVariableStartToEnd("bgna", "img_bgn", "a");
fxt.Test__matchVariableStartToEnd("bgn", "img_bgn", "");
}
@Test public void End() {
fxt.Init__word(Bool_.Y, "img_end", "$1end");
fxt.Init__ary("img_end");
fxt.Test__matchVariableStartToEnd("aend", "img_end", "a");
fxt.Test__matchVariableStartToEnd("end", "img_end", "");
}
@Test public void Smoke() {
fxt.Init__word(Bool_.Y, "img_upright", "upright", "upright=$1", "upright $1");
fxt.Init__word(Bool_.Y, "img_width", "$1px");
fxt.Init__ary("img_upright", "img_width");
fxt.Test__matchVariableStartToEnd("upright=123", "img_upright", "123");
fxt.Test__matchVariableStartToEnd("123px", "img_width", "123");
}
}
class Xomw_MagicWordArray__fxt {
private final Xomw_MagicWordMgr magic_word_mgr = new Xomw_MagicWordMgr();
private Xomw_MagicWordArray magic_word_ary;
public void Init__word(boolean cs, String word, String... synonyms) {
magic_word_mgr.Add(Bry_.new_u8(word), cs, Bry_.Ary(synonyms));
}
public void Init__ary(String... words) {
magic_word_ary = new Xomw_MagicWordArray(magic_word_mgr, Bry_.Ary(words));
}
public void Test__matchVariableStartToEnd(String src, String expd_name, String expd_val) {
byte[][] rv = new byte[2][];
magic_word_ary.matchVariableStartToEnd(rv, Bry_.new_u8(src));
Gftest.Eq__str(expd_name, rv[0], expd_name);
Gftest.Eq__str(expd_val , rv[1], expd_val);
}
}

View File

@@ -1,28 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
public class Xomw_MagicWordMgr {
private final Hash_adp_bry hash = Hash_adp_bry.cs();
public void Add(byte[] name, boolean cs, byte[]... synonyms) {
Xomw_MagicWord mw = new Xomw_MagicWord(name, cs, synonyms);
hash.Add(name, mw);
}
public Xomw_MagicWord Get(byte[] name) {
return (Xomw_MagicWord)hash.Get_by(name);
}
}

View File

@@ -1,91 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
public class Xomw_MagicWordSynonym {
public final byte[] magic_name;
public final boolean case_match;
public final byte[] text;
public final byte[] text_wo_arg1;
public final byte arg1_tid;
public Xomw_MagicWordSynonym(byte[] magic_name, boolean case_match, byte[] text) {
this.magic_name = magic_name;
this.case_match = case_match;
this.text = text;
this.arg1_tid = Get_arg1_tid(text);
switch (arg1_tid) {
case Arg1__bgn:
text_wo_arg1 = Bry_.Mid(text, 2);
break;
case Arg1__end:
text_wo_arg1 = Bry_.Mid(text, 0, text.length - 2);
break;
default:
text_wo_arg1 = text;
break;
}
}
private static byte Get_arg1_tid(byte[] src) {
int len = src.length;
byte rv = Arg1__nil;
int cur = 0;
while (true) {
if (cur == len) break;
byte b = src[cur];
// "$" matched
if (b == Byte_ascii.Dollar) {
// "1" matched?
int nxt_pos = cur + 1;
if (nxt_pos < len && src[nxt_pos] == Byte_ascii.Num_1) {
// "$1" matched
if (cur == 0) {
rv = Arg1__bgn;
}
else if (cur == len - 2) {
rv = rv == Arg1__nil ? Arg1__end : Arg1__mix;
}
else {
if (rv == Arg1__nil)
rv = Arg1__mid;
else if (rv == Arg1__mid)
rv = Arg1__mix;
}
cur += 2;
continue;
}
else {
cur += 1;
continue;
}
}
else {
cur += 1;
continue;
}
}
return rv;
}
public static final byte
Arg1__nil = 0 // EX: "thumb"
, Arg1__bgn = 1 // EX: "$1px"
, Arg1__end = 2 // EX: "thumb=$1"
, Arg1__mid = 3 // EX: "a$1b"
, Arg1__mix = 4 // EX: "a$1b$cc"
;
}

View File

@@ -1,22 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
public class Xomw_Message {
public byte[] text() {return null;}
public byte[] escaped() {return null;}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,27 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
public class Xomw_linker__normalize_subpage_link {
public byte[] link;
public byte[] text;
public Xomw_linker__normalize_subpage_link Init(byte[] link, byte[] text) {
this.link = link;
this.text = text;
return this;
}
}

View File

@@ -1,43 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
import org.junit.*; import gplx.core.tests.*;
public class Xomw_linker__normalize_subpage_link__tst {
private final Xomw_linker__normalize_subpage_link__fxt fxt = new Xomw_linker__normalize_subpage_link__fxt();
@Test public void None() {fxt.Test__normalize_subpage_link("A/B/C" , "Z" , "" , "Z" , "");}
@Test public void Hash() {fxt.Test__normalize_subpage_link("A/B/C" , "/Y#Z" , "" , "A/B/C/Y#Z" , "/Y#Z");}
@Test public void Slash__basic() {fxt.Test__normalize_subpage_link("A/B/C" , "/Z" , "" , "A/B/C/Z" , "/Z");}
@Test public void Slash__slash() {fxt.Test__normalize_subpage_link("A/B/C" , "/Z/" , "" , "A/B/C/Z" , "Z");}
@Test public void Dot2__empty() {fxt.Test__normalize_subpage_link("A/B/C" , "../" , "" , "A/B" , "");}
@Test public void Dot2__many() {fxt.Test__normalize_subpage_link("A/B/C" , "../../Z" , "z1" , "A/Z" , "z1");}
@Test public void Dot2__trailing() {fxt.Test__normalize_subpage_link("A/B/C" , "../../Z/" , "" , "A/Z" , "Z");}
}
class Xomw_linker__normalize_subpage_link__fxt {
private final Xomw_linker mgr = new Xomw_linker(new gplx.xowa.mws.linkers.Xomw_link_renderer(new Xomw_sanitizer()));
private final Xowe_wiki wiki;
private final Xomw_linker__normalize_subpage_link normalize_subpage_link = new Xomw_linker__normalize_subpage_link();
public Xomw_linker__normalize_subpage_link__fxt() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
this.wiki = Xoa_app_fxt.Make__wiki__edit(app);
}
public void Test__normalize_subpage_link(String page_title_str, String link, String text, String expd_link, String expd_text) {
mgr.normalizeSubpageLink(normalize_subpage_link, wiki.Ttl_parse(Bry_.new_u8(page_title_str)), Bry_.new_u8(link), Bry_.new_u8(text));
Gftest.Eq__str(expd_link, String_.new_u8(normalize_subpage_link.link));
Gftest.Eq__str(expd_text, String_.new_u8(normalize_subpage_link.text));
}
}

View File

@@ -1,39 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mws.parsers.*;
public class Xomw_linker__split_trail__tst {
private final Xomw_linker__split_trail__fxt fxt = new Xomw_linker__split_trail__fxt();
@Test public void Basic() {fxt.Test__split_trail("abc def" , "abc" , " def");}
@Test public void None() {fxt.Test__split_trail(" abc" , null , " abc");}
}
class Xomw_linker__split_trail__fxt {
private final Xomw_linker linker = new Xomw_linker(new gplx.xowa.mws.linkers.Xomw_link_renderer(new Xomw_sanitizer()));
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
public Xomw_linker__split_trail__fxt() {
String[] ary = new String[] {"a", "b", "c", "d", "e", "f"};
for (String itm : ary)
trie.Add_str_str(itm, itm);
linker.Init_by_wiki(new Xomw_parser_env(), trie);
}
public void Test__split_trail(String trail_str, String expd_inside, String expd_trail) {
byte[][] split_trail = linker.splitTrail(Bry_.new_u8(trail_str));
Gftest.Eq__str(expd_inside, String_.new_u8(split_trail[0]));
Gftest.Eq__str(expd_trail , String_.new_u8(split_trail[1]));
}
}

View File

@@ -1,22 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
public class Xomw_message_mgr {
private final Hash_adp hash = Hash_adp_.New();
public Xomw_Message Get_by_str(String key) {return (Xomw_Message)hash.Get_by(key);}
}

View File

@@ -1,921 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
import gplx.core.brys.*; import gplx.core.btries.*; import gplx.core.encoders.*; import gplx.core.primitives.*; import gplx.langs.htmls.entitys.*;
import gplx.xowa.parsers.htmls.*;
import gplx.langs.htmls.*; import gplx.xowa.mws.htmls.*; import gplx.xowa.mws.parsers.*; import gplx.xowa.mws.utls.*;
public class Xomw_sanitizer {
private final Mwh_doc_wkr__atr_bldr atr_bldr = new Mwh_doc_wkr__atr_bldr();
private final Mwh_atr_parser atr_parser = new Mwh_atr_parser();
private final Xomw_regex_escape_invalid regex_clean_url = new Xomw_regex_escape_invalid();
private final Xomw_regex_find_domain regex_find_domain = new Xomw_regex_find_domain();
private final Xomw_regex_ipv6_brack regex_ipv6_brack = new Xomw_regex_ipv6_brack();
private final Bry_tmp tmp_host = new Bry_tmp();
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
private final Btrie_rv trv = new Btrie_rv();
private final Xomw_regex_url_char_cbk__normalize normalize_cbk;
private final Xomw_regex_url_char_cbk__decode decode_cbk;
private static Xomw_regex_url_char regex_url_char;
private static Btrie_slim_mgr invalid_idn_trie;
public Xomw_sanitizer() {
this.normalize_cbk = new Xomw_regex_url_char_cbk__normalize(this);
this.decode_cbk = new Xomw_regex_url_char_cbk__decode(this);
if (regex_url_char == null) {
synchronized (Type_adp_.ClassOf_obj(this)) {
regex_url_char = new Xomw_regex_url_char();
// Characters that will be ignored in IDNs.
// https://tools.ietf.org/html/rfc3454#section-3.1
// $strip = "/
// \\s| // general whitespace
// \xc2\xad| // 00ad SOFT HYPHEN
// \xe1\xa0\x86| // 1806 MONGOLIAN TODO SOFT HYPHEN
// \xe2\x80\x8b| // 200b ZERO WIDTH SPACE
// \xe2\x81\xa0| // 2060 WORD JOINER
// \xef\xbb\xbf| // feff ZERO WIDTH NO-BREAK SPACE
// \xcd\x8f| // 034f COMBINING GRAPHEME JOINER
// \xe1\xa0\x8b| // 180b MONGOLIAN FREE VARIATION SELECTOR ONE
// \xe1\xa0\x8c| // 180c MONGOLIAN FREE VARIATION SELECTOR TWO
// \xe1\xa0\x8d| // 180d MONGOLIAN FREE VARIATION SELECTOR THREE
// \xe2\x80\x8c| // 200c ZERO WIDTH NON-JOINER
// \xe2\x80\x8d| // 200d ZERO WIDTH JOINER
// [\xef\xb8\x80-\xef\xb8\x8f] // fe00-fe0f VARIATION SELECTOR-1-16
// /xuD";
// XO.MW.REGEX:http://php.net/manual/en/reference.pcre.pattern.modifiers.php
// /x : ignore embedded ws
// /u : enabled pcre utf8
// /D : $ matches EOS, not NL
invalid_idn_trie = Btrie_slim_mgr.cs()
.Add_many_bry(new Xomw_regex_parser().Add_ary
( "\\s"
, "\\xc2\\xad" // 00ad SOFT HYPHEN
, "\\xe1\\xa0\\x86" // 1806 MONGOLIAN TODO SOFT HYPHEN
, "\\xe2\\x80\\x8b" // 200b ZERO WIDTH SPACE
, "\\xe2\\x81\\xa0" // 2060 WORD JOINER
, "\\xef\\xbb\\xbf" // feff ZERO WIDTH NO-BREAK SPACE
, "\\xcd\\x8f" // 034f COMBINING GRAPHEME JOINER
, "\\xe1\\xa0\\x8b" // 180b MONGOLIAN FREE VARIATION SELECTOR ONE
, "\\xe1\\xa0\\x8c" // 180c MONGOLIAN FREE VARIATION SELECTOR TWO
, "\\xe1\\xa0\\x8d" // 180d MONGOLIAN FREE VARIATION SELECTOR THREE
, "\\xe2\\x80\\x8c" // 200c ZERO WIDTH NON-JOINER
, "\\xe2\\x80\\x8d" // 200d ZERO WIDTH JOINER
)
.Add_rng
( "\\xef\\xb8\\x80", "\\xef\\xb8\\x8f" // fe00-fe0f VARIATION SELECTOR-1-16
)
.Rslt());
// assert static structs
if (html_entities == null) {
synchronized (Type_adp_.ClassOf_obj(this)) {
html_entities = Html_entities_new();
}
}
}
}
}
// Merge two sets of HTML attributes. Conflicting items in the second set
// will override those in the first, except for 'class' attributes which
// will be combined (if they're both strings).
// XO.MW: XO does src += trg; MW does rv = src + trg;
public void Merge_attributes(Xomw_atr_mgr src, Xomw_atr_mgr trg) {
int trg_len = trg.Len();
for (int i = 0; i < trg_len; i++) {
Xomw_atr_itm trg_atr = trg.Get_at(i);
// merge trg and src
byte[] atr_cls = Gfh_atr_.Bry__class;
if (Bry_.Eq(trg_atr.Key_bry(), atr_cls)) {
Xomw_atr_itm src_atr = src.Get_by_or_null(atr_cls);
if (src_atr != null) {
// NOTE: need byte[]-creation is unavoidable b/c src_atr and trg_atr are non-null
Merge_atrs_combine(tmp_bfr, src_atr.Val(), Byte_ascii.Space);
tmp_bfr.Add_byte_space();
Merge_atrs_combine(tmp_bfr, trg_atr.Val(), Byte_ascii.Space);
src_atr.Val_(tmp_bfr.To_bry_and_clear());
continue;
}
}
src.Add_or_set(trg_atr);
}
}
private void Merge_atrs_combine(Bry_bfr trg, byte[] src, byte sep) {
int src_len = src.length;
for (int i = 0; i < src_len; i++) {
byte b = src[i];
if (b == sep) {
// gobble ws; EX: "a b"
int space_bgn = i;
int space_end = Bry_find_.Find_fwd_while(src, i, src_len, sep);
i = space_end - 1; // -1 b/c i++ above
// ignore ws at BOS; EX: " a"
if (space_bgn == 0)
continue;
// ignore ws at EOS; EX: "a "
if (space_end == src_len)
break;
}
trg.Add_byte(b);
}
}
public byte[] Clean_url(byte[] url) {
// Normalize any HTML entities in input. They will be
// re-escaped by makeExternalLink().
url = Decode_char_references(null, Bool_.Y, url, 0, url.length);
// Escape any control characters introduced by the above step
// XO.MW.REGEX: $url = preg_replace_callback('/[\][<>"\\x00-\\x20\\x7F\|]/', [ __CLASS__, 'cleanUrlCallback' ], $url);
// '[]<>"' | '00 -> 32' | 127
if (regex_clean_url.Escape(tmp_bfr, url, 0, url.length))
url = tmp_bfr.To_bry_and_clear();
// XO.MW.REGEX: if (preg_match('!^([^:]+:)(//[^/]+)?(.*)$!iD', $url, $matches))
if (regex_find_domain.Match(url, 0, url.length)) {
// Characters that will be ignored in IDNs.
// https://tools.ietf.org/html/rfc3454#section-3.1
// Strip them before further processing so blacklists and such work.
Php_preg_.Replace(tmp_host.Init(url, regex_find_domain.host_bgn, regex_find_domain.host_end), tmp_bfr, invalid_idn_trie, trv, Bry_.Empty);
// IPv6 host names are bracketed with []. Url-decode these.
// if (substr_compare("//%5B", $host, 0, 5) === 0 &&
// preg_match('!^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!', $host, $matches)
// XO.MW.REGEX:
// !^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!
// "//%5B" + ("hex-dec" | [:.]) + "%5D" + numbers
// EX: [ABCD]:80:12
if (regex_ipv6_brack.Match(tmp_host.src, tmp_host.src_bgn, tmp_host.src_end)) {
tmp_bfr.Add_str_a7("//[").Add_mid(tmp_host.src, regex_ipv6_brack.host_bgn, regex_ipv6_brack.host_end)
.Add_byte(Byte_ascii.Brack_end).Add_mid(tmp_host.src, regex_ipv6_brack.segs_bgn, regex_ipv6_brack.segs_end);
tmp_host.Set_by_bfr(tmp_bfr);
}
// @todo FIXME: Validate hostnames here
tmp_bfr.Add_mid(url, regex_find_domain.prot_bgn, regex_find_domain.prot_end);
tmp_host.Add_to_bfr(tmp_bfr);
tmp_bfr.Add_mid(url, regex_find_domain.rest_bgn, regex_find_domain.rest_end);
return tmp_bfr.To_bry_and_clear();
}
else {
return url;
}
}
public void Fix_tag_attributes(Bry_bfr bfr, byte[] tag_name, byte[] atrs) {
atr_bldr.Atrs__clear();
atr_parser.Parse(atr_bldr, -1, -1, atrs, 0, atrs.length);
int len = atr_bldr.Atrs__len();
// PORTED: Sanitizer.php|safeEncodeTagAttributes
for (int i = 0; i < len; i++) {
// $encAttribute = htmlspecialchars($attribute);
// $encValue = Sanitizer::safeEncodeAttribute($value);
// $attribs[] = "$encAttribute=\"$encValue\"";
Mwh_atr_itm itm = atr_bldr.Atrs__get_at(i);
bfr.Add_byte_space(); // "return count($attribs) ? ' ' . implode(' ', $attribs) : '';"
bfr.Add_bry_escape_html(itm.Key_bry(), itm.Key_bgn(), itm.Key_end());
bfr.Add_byte_eq().Add_byte_quote();
bfr.Add(itm.Val_as_bry()); // TODO.XO:Sanitizer::encode
bfr.Add_byte_quote();
}
}
public void Normalize_char_references(Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
pbfr.Switch();
Normalize_char_references(bfr, Bool_.N, src, src_bgn, src_end);
}
public byte[] Normalize_char_references(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end) {
return regex_url_char.Replace_by_cbk(bfr, lone_bfr, src, src_bgn, src_end, normalize_cbk);
}
public byte[] Decode_char_references(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end) {
return regex_url_char.Replace_by_cbk(bfr, lone_bfr, src, src_bgn, src_end, decode_cbk);
}
public boolean Validate_codepoint(int codepoint) {
// U+000C is valid in HTML5 but not allowed in XML.
// U+000D is valid in XML but not allowed in HTML5.
// U+007F - U+009F are disallowed in HTML5 (control characters).
return codepoint == 0x09
|| codepoint == 0x0a
|| (codepoint >= 0x20 && codepoint <= 0x7e)
|| (codepoint >= 0xa0 && codepoint <= 0xd7ff)
|| (codepoint >= 0xe000 && codepoint <= 0xfffd)
|| (codepoint >= 0x10000 && codepoint <= 0x10ffff);
}
// Encode an attribute value for HTML output.
// XO.MW:SYNC:1.29; DATE:2017-02-03
public static void Encode_attribute(Bry_bfr bfr, byte[] text) {
// Whitespace is normalized during attribute decoding,
// so if we've been passed non-spaces we must encode them
// ahead of time or they won't be preserved.
bfr.Add_bry_escape_xml(text, 0, text.length);
}
public static Hash_adp_bry html_entities;
private static Hash_adp_bry Html_entities_new() {
Bry_bfr tmp = Bry_bfr_.New();
Hash_adp_bry rv = Hash_adp_bry.cs();
Html_entities_set(rv, Xomw_html_ent.Type__alias, 8207, "רלמ", "&rlm;");
Html_entities_set(rv, Xomw_html_ent.Type__alias, 8207, "رلم", "&rlm;");
Html_entities_set(rv, Xomw_html_ent.Type__char, 60, "lt", "&lt;");
Html_entities_set(rv, Xomw_html_ent.Type__char, 62, "gt", "&gt;");
Html_entities_set(rv, Xomw_html_ent.Type__char, 38, "amp", "&amp;");
Html_entities_set(rv, Xomw_html_ent.Type__char, 34, "quot", "&quot;");
// List of all named character entities defined in HTML 4.01
// https://www.w3.org/TR/html4/sgml/entities.html
// As well as &apos; which is only defined starting in XHTML1.
Html_entities_set(rv, tmp, "Aacute" , 193);
Html_entities_set(rv, tmp, "aacute" , 225);
Html_entities_set(rv, tmp, "Acirc" , 194);
Html_entities_set(rv, tmp, "acirc" , 226);
Html_entities_set(rv, tmp, "acute" , 180);
Html_entities_set(rv, tmp, "AElig" , 198);
Html_entities_set(rv, tmp, "aelig" , 230);
Html_entities_set(rv, tmp, "Agrave" , 192);
Html_entities_set(rv, tmp, "agrave" , 224);
Html_entities_set(rv, tmp, "alefsym" , 8501);
Html_entities_set(rv, tmp, "Alpha" , 913);
Html_entities_set(rv, tmp, "alpha" , 945);
Html_entities_set(rv, tmp, "amp" , 38); // XO: identical to Type__char entry; note that Type__char should be evaluated first
Html_entities_set(rv, tmp, "and" , 8743);
Html_entities_set(rv, tmp, "ang" , 8736);
Html_entities_set(rv, tmp, "apos" , 39); // New in XHTML & HTML 5; avoid in output for compatibility with IE.
Html_entities_set(rv, tmp, "Aring" , 197);
Html_entities_set(rv, tmp, "aring" , 229);
Html_entities_set(rv, tmp, "asymp" , 8776);
Html_entities_set(rv, tmp, "Atilde" , 195);
Html_entities_set(rv, tmp, "atilde" , 227);
Html_entities_set(rv, tmp, "Auml" , 196);
Html_entities_set(rv, tmp, "auml" , 228);
Html_entities_set(rv, tmp, "bdquo" , 8222);
Html_entities_set(rv, tmp, "Beta" , 914);
Html_entities_set(rv, tmp, "beta" , 946);
Html_entities_set(rv, tmp, "brvbar" , 166);
Html_entities_set(rv, tmp, "bull" , 8226);
Html_entities_set(rv, tmp, "cap" , 8745);
Html_entities_set(rv, tmp, "Ccedil" , 199);
Html_entities_set(rv, tmp, "ccedil" , 231);
Html_entities_set(rv, tmp, "cedil" , 184);
Html_entities_set(rv, tmp, "cent" , 162);
Html_entities_set(rv, tmp, "Chi" , 935);
Html_entities_set(rv, tmp, "chi" , 967);
Html_entities_set(rv, tmp, "circ" , 710);
Html_entities_set(rv, tmp, "clubs" , 9827);
Html_entities_set(rv, tmp, "cong" , 8773);
Html_entities_set(rv, tmp, "copy" , 169);
Html_entities_set(rv, tmp, "crarr" , 8629);
Html_entities_set(rv, tmp, "cup" , 8746);
Html_entities_set(rv, tmp, "curren" , 164);
Html_entities_set(rv, tmp, "dagger" , 8224);
Html_entities_set(rv, tmp, "Dagger" , 8225);
Html_entities_set(rv, tmp, "darr" , 8595);
Html_entities_set(rv, tmp, "dArr" , 8659);
Html_entities_set(rv, tmp, "deg" , 176);
Html_entities_set(rv, tmp, "Delta" , 916);
Html_entities_set(rv, tmp, "delta" , 948);
Html_entities_set(rv, tmp, "diams" , 9830);
Html_entities_set(rv, tmp, "divide" , 247);
Html_entities_set(rv, tmp, "Eacute" , 201);
Html_entities_set(rv, tmp, "eacute" , 233);
Html_entities_set(rv, tmp, "Ecirc" , 202);
Html_entities_set(rv, tmp, "ecirc" , 234);
Html_entities_set(rv, tmp, "Egrave" , 200);
Html_entities_set(rv, tmp, "egrave" , 232);
Html_entities_set(rv, tmp, "empty" , 8709);
Html_entities_set(rv, tmp, "emsp" , 8195);
Html_entities_set(rv, tmp, "ensp" , 8194);
Html_entities_set(rv, tmp, "Epsilon" , 917);
Html_entities_set(rv, tmp, "epsilon" , 949);
Html_entities_set(rv, tmp, "equiv" , 8801);
Html_entities_set(rv, tmp, "Eta" , 919);
Html_entities_set(rv, tmp, "eta" , 951);
Html_entities_set(rv, tmp, "ETH" , 208);
Html_entities_set(rv, tmp, "eth" , 240);
Html_entities_set(rv, tmp, "Euml" , 203);
Html_entities_set(rv, tmp, "euml" , 235);
Html_entities_set(rv, tmp, "euro" , 8364);
Html_entities_set(rv, tmp, "exist" , 8707);
Html_entities_set(rv, tmp, "fnof" , 402);
Html_entities_set(rv, tmp, "forall" , 8704);
Html_entities_set(rv, tmp, "frac12" , 189);
Html_entities_set(rv, tmp, "frac14" , 188);
Html_entities_set(rv, tmp, "frac34" , 190);
Html_entities_set(rv, tmp, "frasl" , 8260);
Html_entities_set(rv, tmp, "Gamma" , 915);
Html_entities_set(rv, tmp, "gamma" , 947);
Html_entities_set(rv, tmp, "ge" , 8805);
Html_entities_set(rv, tmp, "gt" , 62);
Html_entities_set(rv, tmp, "harr" , 8596);
Html_entities_set(rv, tmp, "hArr" , 8660);
Html_entities_set(rv, tmp, "hearts" , 9829);
Html_entities_set(rv, tmp, "hellip" , 8230);
Html_entities_set(rv, tmp, "Iacute" , 205);
Html_entities_set(rv, tmp, "iacute" , 237);
Html_entities_set(rv, tmp, "Icirc" , 206);
Html_entities_set(rv, tmp, "icirc" , 238);
Html_entities_set(rv, tmp, "iexcl" , 161);
Html_entities_set(rv, tmp, "Igrave" , 204);
Html_entities_set(rv, tmp, "igrave" , 236);
Html_entities_set(rv, tmp, "image" , 8465);
Html_entities_set(rv, tmp, "infin" , 8734);
Html_entities_set(rv, tmp, "int" , 8747);
Html_entities_set(rv, tmp, "Iota" , 921);
Html_entities_set(rv, tmp, "iota" , 953);
Html_entities_set(rv, tmp, "iquest" , 191);
Html_entities_set(rv, tmp, "isin" , 8712);
Html_entities_set(rv, tmp, "Iuml" , 207);
Html_entities_set(rv, tmp, "iuml" , 239);
Html_entities_set(rv, tmp, "Kappa" , 922);
Html_entities_set(rv, tmp, "kappa" , 954);
Html_entities_set(rv, tmp, "Lambda" , 923);
Html_entities_set(rv, tmp, "lambda" , 955);
Html_entities_set(rv, tmp, "lang" , 9001);
Html_entities_set(rv, tmp, "laquo" , 171);
Html_entities_set(rv, tmp, "larr" , 8592);
Html_entities_set(rv, tmp, "lArr" , 8656);
Html_entities_set(rv, tmp, "lceil" , 8968);
Html_entities_set(rv, tmp, "ldquo" , 8220);
Html_entities_set(rv, tmp, "le" , 8804);
Html_entities_set(rv, tmp, "lfloor" , 8970);
Html_entities_set(rv, tmp, "lowast" , 8727);
Html_entities_set(rv, tmp, "loz" , 9674);
Html_entities_set(rv, tmp, "lrm" , 8206);
Html_entities_set(rv, tmp, "lsaquo" , 8249);
Html_entities_set(rv, tmp, "lsquo" , 8216);
Html_entities_set(rv, tmp, "lt" , 60);
Html_entities_set(rv, tmp, "macr" , 175);
Html_entities_set(rv, tmp, "mdash" , 8212);
Html_entities_set(rv, tmp, "micro" , 181);
Html_entities_set(rv, tmp, "middot" , 183);
Html_entities_set(rv, tmp, "minus" , 8722);
Html_entities_set(rv, tmp, "Mu" , 924);
Html_entities_set(rv, tmp, "mu" , 956);
Html_entities_set(rv, tmp, "nabla" , 8711);
Html_entities_set(rv, tmp, "nbsp" , 160);
Html_entities_set(rv, tmp, "ndash" , 8211);
Html_entities_set(rv, tmp, "ne" , 8800);
Html_entities_set(rv, tmp, "ni" , 8715);
Html_entities_set(rv, tmp, "not" , 172);
Html_entities_set(rv, tmp, "notin" , 8713);
Html_entities_set(rv, tmp, "nsub" , 8836);
Html_entities_set(rv, tmp, "Ntilde" , 209);
Html_entities_set(rv, tmp, "ntilde" , 241);
Html_entities_set(rv, tmp, "Nu" , 925);
Html_entities_set(rv, tmp, "nu" , 957);
Html_entities_set(rv, tmp, "Oacute" , 211);
Html_entities_set(rv, tmp, "oacute" , 243);
Html_entities_set(rv, tmp, "Ocirc" , 212);
Html_entities_set(rv, tmp, "ocirc" , 244);
Html_entities_set(rv, tmp, "OElig" , 338);
Html_entities_set(rv, tmp, "oelig" , 339);
Html_entities_set(rv, tmp, "Ograve" , 210);
Html_entities_set(rv, tmp, "ograve" , 242);
Html_entities_set(rv, tmp, "oline" , 8254);
Html_entities_set(rv, tmp, "Omega" , 937);
Html_entities_set(rv, tmp, "omega" , 969);
Html_entities_set(rv, tmp, "Omicron" , 927);
Html_entities_set(rv, tmp, "omicron" , 959);
Html_entities_set(rv, tmp, "oplus" , 8853);
Html_entities_set(rv, tmp, "or" , 8744);
Html_entities_set(rv, tmp, "ordf" , 170);
Html_entities_set(rv, tmp, "ordm" , 186);
Html_entities_set(rv, tmp, "Oslash" , 216);
Html_entities_set(rv, tmp, "oslash" , 248);
Html_entities_set(rv, tmp, "Otilde" , 213);
Html_entities_set(rv, tmp, "otilde" , 245);
Html_entities_set(rv, tmp, "otimes" , 8855);
Html_entities_set(rv, tmp, "Ouml" , 214);
Html_entities_set(rv, tmp, "ouml" , 246);
Html_entities_set(rv, tmp, "para" , 182);
Html_entities_set(rv, tmp, "part" , 8706);
Html_entities_set(rv, tmp, "permil" , 8240);
Html_entities_set(rv, tmp, "perp" , 8869);
Html_entities_set(rv, tmp, "Phi" , 934);
Html_entities_set(rv, tmp, "phi" , 966);
Html_entities_set(rv, tmp, "Pi" , 928);
Html_entities_set(rv, tmp, "pi" , 960);
Html_entities_set(rv, tmp, "piv" , 982);
Html_entities_set(rv, tmp, "plusmn" , 177);
Html_entities_set(rv, tmp, "pound" , 163);
Html_entities_set(rv, tmp, "prime" , 8242);
Html_entities_set(rv, tmp, "Prime" , 8243);
Html_entities_set(rv, tmp, "prod" , 8719);
Html_entities_set(rv, tmp, "prop" , 8733);
Html_entities_set(rv, tmp, "Psi" , 936);
Html_entities_set(rv, tmp, "psi" , 968);
Html_entities_set(rv, tmp, "quot" , 34);
Html_entities_set(rv, tmp, "radic" , 8730);
Html_entities_set(rv, tmp, "rang" , 9002);
Html_entities_set(rv, tmp, "raquo" , 187);
Html_entities_set(rv, tmp, "rarr" , 8594);
Html_entities_set(rv, tmp, "rArr" , 8658);
Html_entities_set(rv, tmp, "rceil" , 8969);
Html_entities_set(rv, tmp, "rdquo" , 8221);
Html_entities_set(rv, tmp, "real" , 8476);
Html_entities_set(rv, tmp, "reg" , 174);
Html_entities_set(rv, tmp, "rfloor" , 8971);
Html_entities_set(rv, tmp, "Rho" , 929);
Html_entities_set(rv, tmp, "rho" , 961);
Html_entities_set(rv, tmp, "rlm" , 8207);
Html_entities_set(rv, tmp, "rsaquo" , 8250);
Html_entities_set(rv, tmp, "rsquo" , 8217);
Html_entities_set(rv, tmp, "sbquo" , 8218);
Html_entities_set(rv, tmp, "Scaron" , 352);
Html_entities_set(rv, tmp, "scaron" , 353);
Html_entities_set(rv, tmp, "sdot" , 8901);
Html_entities_set(rv, tmp, "sect" , 167);
Html_entities_set(rv, tmp, "shy" , 173);
Html_entities_set(rv, tmp, "Sigma" , 931);
Html_entities_set(rv, tmp, "sigma" , 963);
Html_entities_set(rv, tmp, "sigmaf" , 962);
Html_entities_set(rv, tmp, "sim" , 8764);
Html_entities_set(rv, tmp, "spades" , 9824);
Html_entities_set(rv, tmp, "sub" , 8834);
Html_entities_set(rv, tmp, "sube" , 8838);
Html_entities_set(rv, tmp, "sum" , 8721);
Html_entities_set(rv, tmp, "sup" , 8835);
Html_entities_set(rv, tmp, "sup1" , 185);
Html_entities_set(rv, tmp, "sup2" , 178);
Html_entities_set(rv, tmp, "sup3" , 179);
Html_entities_set(rv, tmp, "supe" , 8839);
Html_entities_set(rv, tmp, "szlig" , 223);
Html_entities_set(rv, tmp, "Tau" , 932);
Html_entities_set(rv, tmp, "tau" , 964);
Html_entities_set(rv, tmp, "there4" , 8756);
Html_entities_set(rv, tmp, "Theta" , 920);
Html_entities_set(rv, tmp, "theta" , 952);
Html_entities_set(rv, tmp, "thetasym" , 977);
Html_entities_set(rv, tmp, "thinsp" , 8201);
Html_entities_set(rv, tmp, "THORN" , 222);
Html_entities_set(rv, tmp, "thorn" , 254);
Html_entities_set(rv, tmp, "tilde" , 732);
Html_entities_set(rv, tmp, "times" , 215);
Html_entities_set(rv, tmp, "trade" , 8482);
Html_entities_set(rv, tmp, "Uacute" , 218);
Html_entities_set(rv, tmp, "uacute" , 250);
Html_entities_set(rv, tmp, "uarr" , 8593);
Html_entities_set(rv, tmp, "uArr" , 8657);
Html_entities_set(rv, tmp, "Ucirc" , 219);
Html_entities_set(rv, tmp, "ucirc" , 251);
Html_entities_set(rv, tmp, "Ugrave" , 217);
Html_entities_set(rv, tmp, "ugrave" , 249);
Html_entities_set(rv, tmp, "uml" , 168);
Html_entities_set(rv, tmp, "upsih" , 978);
Html_entities_set(rv, tmp, "Upsilon" , 933);
Html_entities_set(rv, tmp, "upsilon" , 965);
Html_entities_set(rv, tmp, "Uuml" , 220);
Html_entities_set(rv, tmp, "uuml" , 252);
Html_entities_set(rv, tmp, "weierp" , 8472);
Html_entities_set(rv, tmp, "Xi" , 926);
Html_entities_set(rv, tmp, "xi" , 958);
Html_entities_set(rv, tmp, "Yacute" , 221);
Html_entities_set(rv, tmp, "yacute" , 253);
Html_entities_set(rv, tmp, "yen" , 165);
Html_entities_set(rv, tmp, "Yuml" , 376);
Html_entities_set(rv, tmp, "yuml" , 255);
Html_entities_set(rv, tmp, "Zeta" , 918);
Html_entities_set(rv, tmp, "zeta" , 950);
Html_entities_set(rv, tmp, "zwj" , 8205);
Html_entities_set(rv, tmp, "zwnj" , 8204);
return rv;
}
private static void Html_entities_set(Hash_adp_bry rv, Bry_bfr tmp, String name_str, int code) {
byte[] html_bry = tmp.Add_str_a7("&#").Add_int_variable(code).Add_byte_semic().To_bry_and_clear();
Html_entities_set(rv, Xomw_html_ent.Type__entity, code, name_str, html_bry);
}
private static void Html_entities_set(Hash_adp_bry rv, byte type, int code, String name_str, String html_str) {Html_entities_set(rv, type, code, name_str, Bry_.new_u8(html_str));}
private static void Html_entities_set(Hash_adp_bry rv, byte type, int code, String name_str, byte[] html_bry) {
byte[] name_bry = Bry_.new_u8(name_str);
rv.Add_if_dupe_use_1st(name_bry, new Xomw_html_ent(type, code, name_bry, html_bry)); // Add_dupe needed b/c "lt" and co. are added early; ignore subsequent call
}
}
class Xomw_html_ent {
public Xomw_html_ent(byte type, int code, byte[] name, byte[] html) {
this.type = type;
this.code = code;
this.name = name;
this.html = html;
}
public final byte type;
public final int code;
public final byte[] name;
public final byte[] html;
public static final byte Type__null = 0, Type__alias = 1, Type__char = 2, Type__entity = 3;
}
class Xomw_regex_find_domain {
public int prot_bgn;
public int prot_end;
public int host_bgn;
public int host_end;
public int rest_bgn;
public int rest_end;
public boolean Match(byte[] src, int src_bgn, int src_end) {
// Validate hostname portion
// XO.MW.REGEX: if (preg_match('!^([^:]+:)(//[^/]+)?(.*)$!iD', $url, $matches)) {
// ([^:]+:)(//[^/]+)?(.*)
// "protocol" + "host" + "rest"
// "protocol" -> ([^:]+:) EX: "https:" anything not-colon up to colon
// "host" -> (//[^/]+)? EX: "//abc/" anything not-slash up to slash
// "rest" -> (.*) EX: rest"
// /i : case-insensitive
// /D : $ matches EOS, not NL
// find prot; EX: "https:"
prot_bgn = src_bgn;
prot_end = Bry_find_.Move_fwd(src, Byte_ascii.Colon, prot_bgn, src_end);
// exit if not found
if (prot_end == Bry_find_.Not_found) return false;
// find host: EX: "//a.org"
host_bgn = prot_end;
int double_slash_end = host_bgn + 2;
// exit if eos
if (double_slash_end >= src_end) return false;
// exit if not "//"
if ( src[host_bgn ] != Byte_ascii.Slash
|| src[host_bgn + 1] != Byte_ascii.Slash
) return false;
host_end = Bry_find_.Find_fwd(src, Byte_ascii.Slash, double_slash_end, src_end);
// exit if not found
if (host_end == Bry_find_.Not_found) {
host_end = src_end;
rest_bgn = rest_end = -1;
}
// exit if only "//"
if (host_end - host_bgn == 2) return false;
// set rest
rest_bgn = host_end;
rest_end = src_end;
return true;
}
}
class Xomw_regex_escape_invalid {
// [\][<>"\\x00-\\x20\\x7F\|]
public boolean Escape(Bry_bfr bfr, byte[] src, int src_bgn, int src_end) {
boolean dirty = false;
int cur = src_bgn;
int prv = cur;
while (true) {
// eos
if (cur == src_end) {
if (dirty) {
bfr.Add_mid(src, prv, src_end);
}
break;
}
boolean match = false;
byte b = src[cur];
switch (b) {
case Byte_ascii.Brack_bgn:
case Byte_ascii.Brack_end:
case Byte_ascii.Angle_bgn:
case Byte_ascii.Angle_end:
case Byte_ascii.Quote:
case Byte_ascii.Pipe:
case Byte_ascii.Delete:
match = true;
break;
default:
if (b >= 0 && b <= 32)
match = true;
break;
}
if (match) {
bfr.Add_mid(src, prv, cur);
gplx.langs.htmls.encoders.Gfo_url_encoder_.Php_urlencode.Encode(bfr, src, cur, cur + 1);
dirty = true;
cur++;
prv = cur;
}
else
cur++;
}
return dirty;
}
}
class Xomw_regex_ipv6_brack {
public int host_bgn;
public int host_end;
public int segs_bgn;
public int segs_end;
private final byte[]
Bry__host_bgn = Bry_.new_a7("//%5B")
, Bry__host_end = Bry_.new_a7("%5D")
;
public boolean Match(byte[] src, int src_bgn, int src_end) {
// preg_match('!^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!', $host, $matches)
// XO.MW.REGEX:
// !^//%5B([0-9A-Fa-f:.]+)%5D((:\d+)?)$!
// "//%5B" + ("hex-dec" | [:.]) + "%5D" + numbers
// EX: [ABCD]:80:12
host_bgn = src_bgn + Bry__host_bgn.length;
// exit if no match for "//%5B"
if (!Bry_.Match(src, src_bgn, host_bgn, Bry__host_bgn)) return false;
// skip all [0-9A-Fa-f:.]
host_end = host_bgn;
while (true) {
// exit if eos
if (host_end == src_end) return false;
boolean done = false;
byte b = src[host_end];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E: case Byte_ascii.Ltr_F:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e: case Byte_ascii.Ltr_f:
case Byte_ascii.Colon:
case Byte_ascii.Dot:
host_end++;
break;
case Byte_ascii.Percent:
// matches "%5D"
segs_bgn = host_end + Bry__host_end.length;
if ( Bry_.Match(src, host_end, segs_bgn, Bry__host_end)
&& host_end - host_bgn > 0) // host can't be 0-len; EX: "//%5B%5D"
done = true;
// exit if no match
else {
return false;
}
break;
// exit if no match
default: {
return false;
}
}
if (done) break;
}
// skip all (:\d+)
segs_end = segs_bgn;
while (true) {
// stop if eos
if (segs_end == src_end) return true;
// check if ":"
if (src[segs_end] == Byte_ascii.Colon) {
int num_bgn = segs_end + 1;
int num_end = Bry_find_.Find_fwd_while_num(src, num_bgn, src_end);
// exit if no nums found; EX:"[ABC]:80:"
if (num_end == num_bgn) {
return false;
}
segs_end = num_end;
}
// exit if seg doesn't start with ":"
else {
return false;
}
}
}
}
interface Xomw_regex_url_char_cbk {
boolean When_ent(Bry_bfr bfr, byte[] name);
boolean When_dec(Bry_bfr bfr, byte[] name);
boolean When_hex(Bry_bfr bfr, byte[] name);
boolean When_amp(Bry_bfr bfr);
}
class Xomw_regex_url_char_cbk__normalize implements Xomw_regex_url_char_cbk {
private final Xomw_sanitizer sanitizer;
public Xomw_regex_url_char_cbk__normalize(Xomw_sanitizer sanitizer) {
this.sanitizer = sanitizer;
}
public boolean When_ent(Bry_bfr bfr, byte[] name) { // XO.MW:normalizeEntity
// If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
// return the equivalent numeric entity reference (except for the core &lt;
// &gt; &amp; &quot;). If the entity is a MediaWiki-specific alias, returns
// the HTML equivalent. Otherwise, returns HTML-escaped text of
// pseudo-entity source (eg &amp;foo;)
Object o = Xomw_sanitizer.html_entities.Get_by_bry(name);
if (o == null) {
bfr.Add_str_a7("&amp;").Add(name).Add_byte_semic();
return false;
}
else {
Xomw_html_ent entity = (Xomw_html_ent)o;
bfr.Add(entity.html);
return true;
}
}
public boolean When_dec(Bry_bfr bfr, byte[] name) { // XO.MW:decCharReference
int point = Bry_.To_int_or(name, -1);
if (sanitizer.Validate_codepoint(point)) {
bfr.Add_str_a7("&#").Add_int_variable(point).Add_byte_semic();
return true;
}
return false;
}
public boolean When_hex(Bry_bfr bfr, byte[] name) { // XO.MW:hexCharReference
int point = Hex_utl_.Parse_or(name, -1);
if (sanitizer.Validate_codepoint(point)) {
bfr.Add_str_a7("&#x");
Hex_utl_.Write_bfr(bfr, Bool_.Y, point); // sprintf('&#x%x;', $point)
bfr.Add_byte_semic();
return true;
}
return false;
}
public boolean When_amp(Bry_bfr bfr) {
bfr.Add(Gfh_entity_.Amp_bry); // transform "&" to "&amp;"
return true;
}
}
class Xomw_regex_url_char_cbk__decode implements Xomw_regex_url_char_cbk {
private final Xomw_sanitizer sanitizer;
public Xomw_regex_url_char_cbk__decode(Xomw_sanitizer sanitizer) {
this.sanitizer = sanitizer;
}
public boolean When_ent(Bry_bfr bfr, byte[] name) {// XO.MW:decodeEntity
// If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
// return the UTF-8 encoding of that character. Otherwise, returns
// pseudo-entity source (eg "&foo;")
Object o = Xomw_sanitizer.html_entities.Get_by_bry(name);
if (o == null) {
bfr.Add_byte(Byte_ascii.Amp).Add(name).Add_byte_semic();
}
else {
Xomw_html_ent entity = (Xomw_html_ent)o;
bfr.Add(gplx.core.intls.Utf16_.Encode_int_to_bry(entity.code));
}
return true;
}
public boolean When_dec(Bry_bfr bfr, byte[] name) {
return Decode_char(bfr, Bry_.To_int(name));
}
public boolean When_hex(Bry_bfr bfr, byte[] name) {
return Decode_char(bfr, gplx.core.encoders.Hex_utl_.Parse_or(name, 0, name.length, -1));
}
public boolean When_amp(Bry_bfr bfr) {
bfr.Add_byte(Byte_ascii.Amp);
return true;
}
private boolean Decode_char(Bry_bfr bfr, int point) {// XO.MW:decodeChar
// Return UTF-8 String for a codepoint if that is a valid
// character reference, otherwise U+FFFD REPLACEMENT CHARACTER.
if (sanitizer.Validate_codepoint(point)) {
bfr.Add(gplx.core.intls.Utf16_.Encode_int_to_bry(point));
}
else {
bfr.Add(Utf8_replacement_char);
}
return true;
}
private static final byte[] Utf8_replacement_char = Bry_.New_by_ints(255, 253); // 0xfffd
}
class Xomw_regex_url_char {
// Regular expression to match various types of character references in
// Sanitizer::normalizeCharReferences and Sanitizer::decodeCharReferences
// static final CHAR_REFS_REGEX =
// '/&([A-Za-z0-9\x80-\xff]+);
// |&\#([0-9]+);
// |&\#[xX]([0-9A-Fa-f]+);
// |(&)/x';
public Xomw_regex_url_char() {
// assert static structs
if (Normalize__dec == null) {
synchronized (Xomw_sanitizer.class) {
Normalize__dec = Bool_ary_bldr.New_u8().Set_rng(Byte_ascii.Num_0, Byte_ascii.Num_9).To_ary();
Normalize__hex = Bool_ary_bldr.New_u8()
.Set_rng(Byte_ascii.Num_0, Byte_ascii.Num_9)
.Set_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z)
.Set_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z)
.To_ary();
Normalize__ent = Bool_ary_bldr.New_u8()
.Set_rng(Byte_ascii.Num_0, Byte_ascii.Num_9)
.Set_rng(Byte_ascii.Ltr_A, Byte_ascii.Ltr_Z)
.Set_rng(Byte_ascii.Ltr_a, Byte_ascii.Ltr_z)
.Set_rng(128, 255)
.To_ary();
}
}
}
public byte[] Replace_by_cbk(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end, Xomw_regex_url_char_cbk cbk) {
// XO.BRY_BFR
boolean dirty = false;
int cur = src_bgn;
boolean called_by_bry = bfr == null;
while (true) {
// search for "&"
int find_bgn = Bry_find_.Find_fwd(src, Byte_ascii.Amp, cur);
if (find_bgn == Bry_find_.Not_found) { // "&" not found; exit
if (dirty)
bfr.Add_mid(src, cur, src_end);
break;
}
int ent_bgn = find_bgn + 1; // +1 to skip &
// get regex; (a) dec (&#09;); (b) hex (&#xFF;); (c) entity (&alpha;);
boolean[] regex = null;
// check for #;
if (ent_bgn < src_end && src[ent_bgn] == Byte_ascii.Hash) {
ent_bgn++;
if (ent_bgn < src_end) {
byte nxt = src[ent_bgn];
// check for x
if (nxt == Byte_ascii.Ltr_X || nxt == Byte_ascii.Ltr_x) {
ent_bgn++;
regex = Normalize__hex;
}
}
if (regex == null)
regex = Normalize__dec;
}
else {
regex = Normalize__ent;
}
// keep looping until invalid regex
int ent_end = ent_bgn;
int b = Byte_ascii.Null;
for (int i = ent_bgn; i < src_end; i++) {
b = src[i] & 0xFF; // PATCH.JAVA:need to convert to unsigned byte
if (regex[b])
ent_end++;
else
break;
}
// mark dirty; can optimize later by checking if "&lt;" already exists
dirty = true;
if (bfr == null) bfr = Bry_bfr_.New();
bfr.Add_mid(src, cur, find_bgn); // add everything before &
// invalid <- regex ended, but not at semic
if (b != Byte_ascii.Semic) {
cbk.When_amp(bfr);
cur = find_bgn + 1; // position after "&"
continue;
}
// do normalization
byte[] name = Bry_.Mid(src, ent_bgn, ent_end);
boolean ret = false;
if (regex == Normalize__ent) {
cbk.When_ent(bfr, name);
ret = true;
}
else if (regex == Normalize__dec) {
ret = cbk.When_dec(bfr, name);
}
else if (regex == Normalize__hex) {
ret = cbk.When_hex(bfr, name);
}
if (!ret) {
cbk.When_amp(bfr);
cur = find_bgn + 1; // position after "&"
continue;
}
cur = ent_end + 1; // +1 to position after ";"
}
// XO.BRY_BFR
if (dirty) {
if (called_by_bry)
return bfr.To_bry_and_clear();
else
return Bry_.Empty;
}
else {
if (called_by_bry) {
if (src_bgn == 0 && src_end == src.length)
return src;
else
return Bry_.Mid(src, src_bgn, src_end);
}
else {
if (lone_bfr)
bfr.Add_mid(src, src_bgn, src_end);
return null;
}
}
}
private static boolean[] Normalize__dec, Normalize__hex, Normalize__ent;
}

View File

@@ -1,168 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mws.htmls.*;
public class Xomw_sanitizer__tst {
private final Xomw_sanitizer__fxt fxt = new Xomw_sanitizer__fxt();
@Test public void Normalize__text() {fxt.Test__normalize_char_references("abc" , "abc");}
@Test public void Normalize__dec() {fxt.Test__normalize_char_references("&#08;" , "&amp;#08;");}
@Test public void Normalize__dec__invalid() {fxt.Test__normalize_char_references("&#09;" , "&#9;");}
@Test public void Normalize__hex() {fxt.Test__normalize_char_references("&#xFF;" , "&#xff;");}
@Test public void Normalize__entity() {fxt.Test__normalize_char_references("&alpha;" , "&#945;");}
@Test public void Normalize__entity__lt() {fxt.Test__normalize_char_references("&lt;" , "&lt;");}
@Test public void Normalize__entity__alias() {fxt.Test__normalize_char_references("&רלמ;" , "&rlm;");}
@Test public void Normalize__amp() {fxt.Test__normalize_char_references("a&b" , "a&amp;b");}
@Test public void Normalize__invalid() {fxt.Test__normalize_char_references("&(invalid);" , "&amp;(invalid);");}
@Test public void Normalize__many() {
fxt.Test__normalize_char_references
( "a &#09; b &alpha; c &#xFF; d &(invalid); e"
, "a &#9; b &#945; c &#xff; d &amp;(invalid); e"
);
}
@Test public void Regex__domain() {
Xomw_regex_find_domain regex_domain = new Xomw_regex_find_domain();
// normal
fxt.Test__regex_domain_y(regex_domain, "https://a.org/bcd", "https:", "//a.org", "/bcd");
// trailing backslash
fxt.Test__regex_domain_y(regex_domain, "https://a.org/", "https:", "//a.org", "/");
// domain only
fxt.Test__regex_domain_y(regex_domain, "https://a.org", "https:", "//a.org", "");
// colon not found
fxt.Test__regex_domain_n(regex_domain, "https//a.org/bcd");
// host_bgn.eos
fxt.Test__regex_domain_n(regex_domain, "https:");
// host_bgn.//
fxt.Test__regex_domain_n(regex_domain, "https:a//");
// host_bgn.///
fxt.Test__regex_domain_n(regex_domain, "https:///a.org/b");
}
@Test public void Regex__clean_url() {
Xomw_regex_escape_invalid regex = new Xomw_regex_escape_invalid();
// noop
fxt.Test__regex_escape_invalid(regex, "https://a.org/bcd", Bool_.N, "");
// symbols
fxt.Test__regex_escape_invalid(regex, "[]<>\"|", Bool_.Y, "%5B%5D%3C%3E%22%7C%7F");
// range: 00 - 32
fxt.Test__regex_escape_invalid(regex, "\t\n ", Bool_.Y, "%09%0A+");
}
@Test public void Regex__ipv6_brack() {
Xomw_regex_ipv6_brack regex = new Xomw_regex_ipv6_brack();
// basic
fxt.Test__regex_ipv6_brack(regex, Bool_.Y, "//%5B0a.1b:12%5D:123");
// port: none
fxt.Test__regex_ipv6_brack(regex, Bool_.Y, "//%5Ba%5D");
// port: multiple
fxt.Test__regex_ipv6_brack(regex, Bool_.Y, "//%5Ba%5D:1:2:3");
// "//%5B" missing
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "abc");
// ipv6: invalid
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5Ba!%5D:1");
// ipv6: 0-len
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5B%5D:1");
// port: invalid
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5Ba%5D:a");
// port: 0-len
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5Ba%5D:");
}
@Test public void Decode() {
// dec
fxt.Test__decode_char_references("&#33;" , "!");
// hex
fxt.Test__decode_char_references("&#x23;" , "#");
// entity
fxt.Test__decode_char_references("&alpha;" , "α");
// entity:lt
fxt.Test__decode_char_references("&lt;" , "<");
// entity:rlm
fxt.Test__decode_char_references("&רלמ;" , "");
// entity:invalid
fxt.Test__decode_char_references("&invalid;" , "&invalid;");
// amp
fxt.Test__decode_char_references("a&b" , "a&b");
}
@Test public void Clean_url() {
// entity
fxt.Test__clean_url("http://a.org/b&amp;c" , "http://a.org/b&c");
// entity: escape
fxt.Test__clean_url("http://a.org/b&quot;c" , "http://a.org/b%22c");
// domain=n; make sure &quot; is changed, but not soft-hyphen
fxt.Test__clean_url("a&quot;­z" , "a%22­z");
// host: invalid idn
fxt.Test__clean_url("http://a᠆b.org/c᠆d" , "http://ab.org/c᠆d");
// ipv6_brack
fxt.Test__clean_url("http://[0a.1b:12]:123/cd" , "http://[0a.1b:12]:123/cd");
}
@Test public void Merge_atrs() {
Xomw_atr_mgr src_atrs = new Xomw_atr_mgr();
Xomw_atr_mgr trg_atrs = new Xomw_atr_mgr();
Xomw_atr_mgr expd_atrs = new Xomw_atr_mgr();
String cls = "class";
// basic: k1 + k2
fxt.Test__merge_attributes(src_atrs.Clear().Add_many("k1", "v1"), trg_atrs.Clear().Add_many("k2", "v2"), expd_atrs.Clear().Add_many("k1", "v1", "k2", "v2"));
// overwrite: k1 + k1
fxt.Test__merge_attributes(src_atrs.Clear().Add_many("k1", "v1"), trg_atrs.Clear().Add_many("k1", "v1a"), expd_atrs.Clear().Add_many("k1", "v1a"));
// cls: many
fxt.Test__merge_attributes(src_atrs.Clear().Add_many(cls, "v1 v2"), trg_atrs.Clear().Add_many(cls, "v3 v4"), expd_atrs.Clear().Add_many(cls, "v1 v2 v3 v4"));
// cls: src.empty
fxt.Test__merge_attributes(src_atrs.Clear(), trg_atrs.Clear().Add_many(cls, "v1"), expd_atrs.Clear().Add_many(cls, "v1"));
// cls: ws
fxt.Test__merge_attributes(src_atrs.Clear().Add_many(cls, " v1 v2 "), trg_atrs.Clear().Add_many(cls, " v3 v4 "), expd_atrs.Clear().Add_many(cls, "v1 v2 v3 v4"));
}
}
class Xomw_sanitizer__fxt {
private final Xomw_sanitizer sanitizer = new Xomw_sanitizer();
private final Bry_bfr tmp = Bry_bfr_.New();
public void Test__normalize_char_references(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
sanitizer.Normalize_char_references(tmp, Bool_.Y, src_bry, 0, src_bry.length);
Gftest.Eq__str(expd, tmp.To_str_and_clear());
}
public void Test__regex_domain_y(Xomw_regex_find_domain regex_domain, String src_str, String expd_prot, String expd_host, String expd_rest) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__bool(true, regex_domain.Match(src_bry, 0, src_bry.length), src_str);
Gftest.Eq__str(expd_prot, Bry_.Mid(src_bry, regex_domain.prot_bgn, regex_domain.prot_end));
Gftest.Eq__str(expd_host, Bry_.Mid(src_bry, regex_domain.host_bgn, regex_domain.host_end));
Gftest.Eq__str(expd_rest, Bry_.Mid(src_bry, regex_domain.rest_bgn, regex_domain.rest_end));
}
public void Test__regex_domain_n(Xomw_regex_find_domain regex_domain, String src_str) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__bool(false, regex_domain.Match(src_bry, 0, src_bry.length), src_str);
}
public void Test__regex_escape_invalid(Xomw_regex_escape_invalid regex, String src_str, boolean expd_rslt, String expd_str) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__bool(expd_rslt, regex.Escape(tmp, src_bry, 0, src_bry.length));
Gftest.Eq__str(expd_str, tmp.To_bry_and_clear());
}
public void Test__regex_ipv6_brack(Xomw_regex_ipv6_brack regex, boolean expd_rslt, String src_str) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__bool(expd_rslt, regex.Match(src_bry, 0, src_bry.length));
}
public void Test__decode_char_references(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
sanitizer.Decode_char_references(tmp, Bool_.Y, src_bry, 0, src_bry.length);
Gftest.Eq__str(expd, tmp.To_str_and_clear());
}
public void Test__clean_url(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
Gftest.Eq__str(expd, sanitizer.Clean_url(src_bry));
}
public void Test__merge_attributes(Xomw_atr_mgr src, Xomw_atr_mgr trg, Xomw_atr_mgr expd) {
sanitizer.Merge_attributes(src, trg);
Gftest.Eq__ary__lines(expd.To_str(tmp), src.To_str(tmp), "merge_atrs");
}
}

View File

@@ -1,85 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
public class Xomw_xml {
// Format an XML element with given attributes and, optionally, text content.
// Element and attribute names are assumed to be ready for literal inclusion.
// Strings are assumed to not contain XML-illegal characters; special
// characters (<, >, &) are escaped but illegals are not touched.
// ARGS: contents defaults to ""
// XO.MW:SYNC:1.29; DATE:2017-02-03
public static void Element(Bry_bfr bfr, byte[] element, List_adp attribs, byte[] contents, boolean allow_short_tag) {
bfr.Add_byte(Byte_ascii.Angle_bgn).Add(element);
if (attribs.Len() > 0) {
Expand_attributes(bfr, attribs);
}
if (contents == null) {
bfr.Add_byte(Byte_ascii.Angle_end);
}
else {
if (allow_short_tag && contents == Bry_.Empty) {
bfr.Add_str_a7(" />");
}
else {
bfr.Add_byte(Byte_ascii.Angle_end);
bfr.Add_bry_escape_html(contents);
bfr.Add_byte(Byte_ascii.Angle_bgn).Add_byte(Byte_ascii.Slash).Add(element).Add_byte(Byte_ascii.Angle_end);
}
}
}
// Given an array of ('attributename' => 'value'), it generates the code
// to set the XML attributes : attributename="value".
// The values are passed to Sanitizer::encodeAttribute.
// Return null if no attributes given.
// @param array $attribs Array of attributes for an XML element
// XO.MW:SYNC:1.29; DATE:2017-02-03
public static void Expand_attributes(Bry_bfr bfr, List_adp attribs) {
int attribs_len = attribs.Len();
for (int i = 0; i < attribs_len; i += 2) {
// XO.MW: $out .= " {$name}=\"" . Sanitizer::encodeAttribute( $val ) . '"';
bfr.Add_byte_space();
bfr.Add((byte[])attribs.Get_at(i));
bfr.Add_byte_eq().Add_byte_quote();
Xomw_sanitizer.Encode_attribute(bfr, (byte[])attribs.Get_at(i + 1));
bfr.Add_byte_quote();
}
}
// This opens an XML element
// XO.MW:SYNC:1.29; DATE:2017-02-03
public static void Open_element(Bry_bfr bfr, byte[] element, List_adp attribs) {
bfr.Add_byte(Byte_ascii.Angle_bgn).Add(element);
Expand_attributes(bfr, attribs);
bfr.Add_byte(Byte_ascii.Angle_end);
}
// Shortcut to close an XML element
// XO.MW:SYNC:1.29; DATE:2017-02-03
public static void Close_element(Bry_bfr bfr, byte[] element) {
bfr.Add_byte(Byte_ascii.Angle_bgn).Add_byte(Byte_ascii.Slash).Add(element).Add_byte(Byte_ascii.Angle_end);
}
// Same as Xml::element(), but does not escape contents. Handy when the
// content you have is already valid xml.
// XO.MW:SYNC:1.29; DATE:2017-02-03
public static void Tags(Bry_bfr bfr, byte[] element, List_adp attribs, byte[] contents) {
Open_element(bfr, element, attribs);
bfr.Add(contents);
bfr.Add_byte(Byte_ascii.Angle_bgn).Add_byte(Byte_ascii.Slash).Add(element).Add_byte(Byte_ascii.Angle_end);
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,21 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.filerepo.file; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.filerepo.*;
public interface Xomw_file_finder {
Xomw_File Find_file(Xoa_ttl ttl);
}

View File

@@ -1,32 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.filerepo.file; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.filerepo.*;
import gplx.xowa.mws.parsers.*;
public class Xomw_file_finder__mock implements Xomw_file_finder {
private final Xomw_parser_env env;
public Xomw_file_finder__mock(Xomw_parser_env env) {this.env = env;}
private final Hash_adp hash = Hash_adp_.New();
public void Clear() {hash.Clear();}
public Xomw_File Find_file(Xoa_ttl ttl) {
return (Xomw_File)hash.Get_by(ttl.Page_db_as_str());
}
public void Add(String title, Xomw_FileRepo repo, int w, int h, byte[] mime) {
Xomw_LocalFile file = new Xomw_LocalFile(env, Bry_.new_u8(title), repo, w, h, mime);
hash.Add_if_dupe_use_nth(title, file);
}
}

View File

@@ -1,21 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.filerepo.file; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.filerepo.*;
public class Xomw_file_finder__noop implements Xomw_file_finder {
public Xomw_File Find_file(Xoa_ttl ttl) {return null;}
}

View File

@@ -1,29 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
public class Xomw_atr_itm {
public Xomw_atr_itm(int key_int, byte[] key, byte[] val) {
this.key_int = key_int;
this.key_bry = key;
this.val = val;
}
public int Key_int() {return key_int;} private int key_int;
public byte[] Key_bry() {return key_bry;} private byte[] key_bry;
public byte[] Val() {return val;} private byte[] val;
public void Val_(byte[] v) {this.val = v;}
}

View File

@@ -1,72 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
public class Xomw_atr_mgr {
private final Ordered_hash hash = Ordered_hash_.New_bry();
public int Len() {return hash.Len();}
public Xomw_atr_itm Get_at(int i) {return (Xomw_atr_itm)hash.Get_at(i);}
public Xomw_atr_itm Get_by_or_null(byte[] k) {return (Xomw_atr_itm)hash.Get_by(k);}
public Xomw_atr_mgr Clear() {hash.Clear(); return this;}
public void Del(byte[] key) {hash.Del(key);}
public void Add(Xomw_atr_itm itm) {hash.Add(itm.Key_bry(), itm);}
public Xomw_atr_mgr Add(byte[] key, byte[] val) {
this.Add(new Xomw_atr_itm(-1, key, val));
return this;
}
public void Add_or_set(Xomw_atr_itm src) {
Xomw_atr_itm trg = (Xomw_atr_itm)hash.Get_by(src.Key_bry());
if (trg == null)
this.Add(src);
else
trg.Val_(src.Val());
}
public void Set(byte[] key, byte[] val) {
Xomw_atr_itm atr = Get_by_or_make(key);
atr.Val_(val);
}
public Xomw_atr_itm Get_by_or_make(byte[] k) {
Xomw_atr_itm rv = (Xomw_atr_itm)hash.Get_by(k);
if (rv == null) {
rv = new Xomw_atr_itm(-1, k, null);
Add(rv);
}
return rv;
}
public byte[] Get_val_or_null(byte[] k) {
Xomw_atr_itm atr = (Xomw_atr_itm)hash.Get_by(k);
return atr == null ? null : atr.Val();
}
public Xomw_atr_mgr Add_many(String... kvs) {// TEST
int len = kvs.length;
for (int i = 0; i < len; i += 2) {
byte[] key = Bry_.new_u8(kvs[i]);
byte[] val = Bry_.new_u8(kvs[i + 1]);
Add(key, val);
}
return this;
}
public String To_str(Bry_bfr tmp) { // TEST
int len = this.Len();
for (int i = 0; i < len; i++) {
Xomw_atr_itm itm = this.Get_at(i);
tmp.Add(itm.Key_bry()).Add_byte_eq();
tmp.Add(itm.Val()).Add_byte_nl();
}
return tmp.To_str_and_clear();
}
}

View File

@@ -1,26 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
public class Xomw_html_elem {
public Xomw_html_elem(byte[] name) {
this.name = name;
}
public byte[] Name() {return name;} private final byte[] name; // EX: "a", "div", "img"
// private static final Hash_adp_bry void_elements = Hash_adp_bry.cs().Add_many_str("area", "super", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr");
}

View File

@@ -1,267 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.core.btries.*;
import gplx.xowa.mws.utls.*;
public class Xomw_html_utl {
private final Bry_bfr tmp = Bry_bfr_.New();
private final Btrie_rv trv = new Btrie_rv();
public void Raw_element(Bry_bfr bfr, byte[] element, Xomw_atr_mgr attribs, byte[] contents) {
Bry_.Lcase__all(element); // XO:lcase element
Open_element__lcased(bfr, element, attribs);
if (void_elements.Has(element)) {
bfr.Del_by_1().Add(Bry__elem__lhs__inl);
}
else {
bfr.Add(contents);
Close_element__lcased(bfr, element);
}
}
private void Open_element__lcased(Bry_bfr bfr, byte[] element, Xomw_atr_mgr attribs) {
// This is not required in HTML5, but let's do it anyway, for
// consistency and better compression.
// $element = strtolower($element); // XO:handled by callers
// Remove invalid input types
if (Bry_.Eq(element, Tag__input)) {
// PORTED.HEADER:valid_input_types
byte[] type_atr_val = attribs.Get_val_or_null(Atr__type);
if (type_atr_val != null && !valid_input_types.Has(type_atr_val)) {
attribs.Del(Atr__type);
}
}
// According to standard the default type for <button> elements is "submit".
// Depending on compatibility mode IE might use "button", instead.
// We enforce the standard "submit".
if (Bry_.Eq(element, Tag__button) && attribs.Get_val_or_null(Atr__type) == null) {
attribs.Set(Atr__type, Val__type__submit);
}
bfr.Add_byte(Byte_ascii.Angle_bgn).Add(element);
Expand_attributes(bfr, attribs); // TODO.XO:self::dropDefaults($element, $attribs)
bfr.Add_byte(Byte_ascii.Angle_end);
}
public void Expand_attributes(Bry_bfr bfr, Xomw_atr_mgr atrs) {
int len = atrs.Len();
for (int i = 0; i < len; i++) {
Xomw_atr_itm atr = (Xomw_atr_itm)atrs.Get_at(i);
byte[] key = atr.Key_bry();
byte[] val = atr.Val();
// Support intuitive [ 'checked' => true/false ] form
if (val == null) { // TESTME
continue;
}
// For boolean attributes, support [ 'foo' ] instead of
// requiring [ 'foo' => 'meaningless' ].
boolean bool_attrib = bool_attribs.Has(val);
if (atr.Key_int() != -1 && bool_attrib) {
key = val;
}
// Not technically required in HTML5 but we'd like consistency
// and better compression anyway.
key = Bry_.Xcase__build__all(tmp, Bool_.N, key);
// PORTED.HEADER:$spaceSeparatedListAttributes
// Specific features for attributes that allow a list of space-separated values
if (space_separated_list_attributes.Has(key)) {
// Apply some normalization and remove duplicates
// Convert into correct array. Array can contain space-separated
// values. Implode/explode to get those into the main array as well.
// if (is_array($value)) {
// If input wasn't an array, we can skip this step
// $newValue = [];
// foreach ($value as $k => $v) {
// if (is_string($v)) {
// String values should be normal `array('foo')`
// Just append them
// if (!isset($value[$v])) {
// As a special case don't set 'foo' if a
// separate 'foo' => true/false exists in the array
// keys should be authoritative
// $newValue[] = $v;
// }
// }
// elseif ($v) {
// If the value is truthy but not a String this is likely
// an [ 'foo' => true ], falsy values don't add strings
// $newValue[] = $k;
// }
// }
// $value = implode(' ', $newValue);
// }
// $value = explode(' ', $value);
// Normalize spacing by fixing up cases where people used
// more than 1 space and/or a trailing/leading space
// $value = array_diff($value, [ '', ' ' ]);
// Remove duplicates and create the String
// $value = implode(' ', array_unique($value));
}
// DELETE
// elseif (is_array($value)) {
// throw new MWException("HTML attribute $key can not contain a list of values");
// }
if (bool_attrib) {
bfr.Add_byte_space().Add(key).Add(Bry__atr__val__empty); // $ret .= " $key=\"\"";
}
else {
// PORTED.HEADER:atr_val_encodings
val = Php_str_.Strtr(val, atr_val_encodings, tmp, trv);
bfr.Add_byte_space().Add(key).Add(Bry__atr__val__quote).Add(val).Add_byte_quote();
}
}
}
private void Close_element__lcased(Bry_bfr bfr, byte[] element) {
bfr.Add(Bry__elem__rhs__bgn).Add(element).Add_byte(Byte_ascii.Angle_end); // EX: "</", element, ">";
}
private static final byte[]
Bry__elem__lhs__inl = Bry_.new_a7("/>")
, Bry__elem__rhs__bgn = Bry_.new_a7("</")
, Bry__atr__val__quote = Bry_.new_a7("=\"")
, Bry__atr__val__empty = Bry_.new_a7("=\"\"")
, Tag__input = Bry_.new_a7("input")
, Tag__button = Bry_.new_a7("button")
, Atr__type = Bry_.new_a7("type")
, Val__type__submit = Bry_.new_a7("submit")
;
// List of void elements from HTML5, section 8.1.2 as of 2016-09-19
private static final Hash_adp_bry void_elements = Hash_adp_bry.cs().Add_many_str
(
"area",
"super",
"br",
"col",
"embed",
"hr",
"img",
"input",
"keygen",
"link",
"meta",
"param",
"source",
"track",
"wbr"
);
// Boolean attributes, which may have the value omitted entirely. Manually
// collected from the HTML5 spec as of 2011-08-12.
private static final Hash_adp_bry bool_attribs = Hash_adp_bry.ci_a7().Add_many_str(
"async",
"autofocus",
"autoplay",
"checked",
"controls",
"default",
"defer",
"disabled",
"formnovalidate",
"hidden",
"ismap",
// "itemscope", //XO:duplicate; added below
"loop",
"multiple",
"muted",
"novalidate",
"open",
"pubdate",
"final ",
"required",
"reversed",
"scoped",
"seamless",
"selected",
"truespeed",
"typemustmatch",
// HTML5 Microdata
"itemscope"
);
private static final Btrie_slim_mgr atr_val_encodings = Btrie_slim_mgr.cs()
// Apparently we need to entity-encode \n, \r, \t, although the
// spec doesn't mention that. Since we're doing strtr() anyway,
// we may as well not call htmlspecialchars().
// @todo FIXME: Verify that we actually need to
// escape \n\r\t here, and explain why, exactly.
// We could call Sanitizer::encodeAttribute() for this, but we
// don't because we're stubborn and like our marginal savings on
// byte size from not having to encode unnecessary quotes.
// The only difference between this transform and the one by
// Sanitizer::encodeAttribute() is ' is not encoded.
.Add_str_str("&" , "&amp;")
.Add_str_str("\"" , "&quot;")
.Add_str_str(">" , "&gt;")
// '<' allegedly allowed per spec
// but breaks some tools if not escaped.
.Add_str_str("<" , "&lt;")
.Add_str_str("\n" , "&#10;")
.Add_str_str("\r" , "&#13;")
.Add_str_str("\t" , "&#9;");
// https://www.w3.org/TR/html401/index/attributes.html ("space-separated")
// https://www.w3.org/TR/html5/index.html#attributes-1 ("space-separated")
private static final Hash_adp_bry space_separated_list_attributes = Hash_adp_bry.ci_a7().Add_many_str(
"class", // html4, html5
"accesskey", // as of html5, multiple space-separated values allowed
// html4-spec doesn't document rel= as space-separated
// but has been used like that and is now documented as such
// in the html5-spec.
"rel"
);
private static final Hash_adp_bry valid_input_types = Hash_adp_bry.ci_a7().Add_many_str(
// Remove invalid input types
"hidden",
"text",
"password",
"checkbox",
"radio",
"file",
"submit",
"image",
"reset",
"button",
// HTML input types
"datetime",
"datetime-local",
"date",
"month",
"time",
"week",
"number",
"range",
"email",
"url",
"search",
"tel",
"color"
);
}

View File

@@ -1,39 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import org.junit.*; import gplx.core.tests.*;
public class Xomw_html_utl__expand_attributes__tst {
private final Xomw_html_utl__expand_attributes__fxt fxt = new Xomw_html_utl__expand_attributes__fxt();
@Test public void Basic() {fxt.Test__expand_attributes(" a=\"b\"", "a", "b");}
}
class Xomw_html_utl__expand_attributes__fxt {
private final Xomw_html_utl utl = new Xomw_html_utl();
private final Bry_bfr bfr = Bry_bfr_.New();
public void Test__expand_attributes(String expd, String... kvs) {
Xomw_atr_mgr atrs = new Xomw_atr_mgr();
int kvs_len = kvs.length;
for (int i = 0; i < kvs_len; i += 2) {
byte[] key = Bry_.new_a7(kvs[i]);
byte[] val = Bry_.new_a7(kvs[i + 1]);
Xomw_atr_itm itm = new Xomw_atr_itm(-1, key, val);
atrs.Add(itm);
}
utl.Expand_attributes(bfr, atrs);
Gftest.Eq__str(expd, bfr.To_str_and_clear());
}
}

View File

@@ -1,24 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
public class Xomw_opt_mgr {
public boolean known;
public boolean broken;
public boolean no_classes;
public byte[] time = null;
}

View File

@@ -1,27 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.htmls; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
public class Xomw_qry_mgr {
public byte[] action;
public int redlink;
public Xomw_qry_mgr Clear() {
action = null;
redlink = -1;
return this;
}
}

View File

@@ -1,125 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.core.btries.*;
public class Xomw_string_utils {
// Explode a String, but ignore any instances of the separator inside
// the given start and end delimiters, which may optionally nest.
// The delimiters are literal strings, not regular expressions.
// @param String bgn_delim Start delimiter
// @param String end_delim End delimiter
// @param String separator Separator String for the explode.
// @param String subject Subject String to explode.
// @param boolean nested True iff the delimiters are allowed to nest.
// @return ArrayIterator
// XO.MW: hard-coding (a) nested=true; (b) bgn="-{" end="}-" sep="|"
// XO.MW:SYNC:1.29; DATE:2017-02-03
private static final byte Delimiter_explode__sep = 0, Delimiter_explode__bgn = 1, Delimiter_explode__end = 2;
private static final Btrie_slim_mgr delimiter_explode_trie = Btrie_slim_mgr.cs()
.Add_str_byte("|" , Delimiter_explode__sep)
.Add_str_byte("-{", Delimiter_explode__bgn)
.Add_str_byte("}-", Delimiter_explode__end)
;
public static byte[][] Delimiter_explode(List_adp tmp, Btrie_rv trv, byte[] src) {
int src_bgn = 0;
int src_end = src.length;
int depth = 0;
int cur = src_bgn;
int prv = cur;
while (true) {
// eos
if (cur == src_end) {
// add rest
tmp.Add(Bry_.Mid(src, prv, src_end));
break;
}
Object o = delimiter_explode_trie.Match_at(trv, src, cur, src_end);
// regular char; continue;
if (o == null) {
cur++;
continue;
}
// handle sep, bgn, end
byte tid = ((gplx.core.primitives.Byte_obj_val)o).Val();
switch (tid) {
case Delimiter_explode__sep:
if (depth == 0) {
tmp.Add(Bry_.Mid(src, prv, cur));
prv = cur + 1;
}
break;
case Delimiter_explode__bgn:
depth++;
break;
case Delimiter_explode__end:
depth--;
break;
}
cur = trv.Pos();
}
return (byte[][])tmp.To_ary_and_clear(byte[].class);
}
// More or less "markup-safe" str_replace()
// Ignores any instances of the separator inside `<...>`
public static void Replace_markup(byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) { // REF:/includes/libs/StringUtils.php|replaceMarkup
// PORTED: avoiding multiple regex calls / String creations
// $placeholder = "\x00";
// Remove placeholder instances
// $text = str_replace( $placeholder, '', $text );
// Replace instances of the separator inside HTML-like tags with the placeholder
// $replacer = new DoubleReplacer( $search, $placeholder );
// $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
// Explode, then put the replaced separators back in
// $cleaned = str_replace( $search, $replace, $cleaned );
// $text = str_replace( $placeholder, $search, $cleaned );
// if same length find / repl, do in-place replacement; EX: "!!" -> "||"
int find_len = find.length;
int repl_len = repl.length;
if (find_len != repl_len) throw Err_.new_wo_type("find and repl should be same length");
byte find_0 = find[0];
byte dlm_bgn = Byte_ascii.Angle_bgn;
byte dlm_end = Byte_ascii.Angle_end;
boolean repl_active = true;
// loop every char in array
for (int i = src_bgn; i < src_end; i++) {
byte b = src[i];
if ( b == find_0
&& Bry_.Match(src, i + 1, i + find_len, find, 1, find_len)
&& repl_active
) {
Bry_.Set(src, i, i + find_len, repl);
}
else if (b == dlm_bgn) {
repl_active = false;
}
else if (b == dlm_end) {
repl_active = true;
}
}
}
}

View File

@@ -1,60 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import org.junit.*; import gplx.core.tests.*;
public class Xomw_string_utils__tst {
private final Xomw_string_utils__fxt fxt = new Xomw_string_utils__fxt();
@Test public void Delimiter_explode() {
// basic
fxt.Test__delimiter_explode("a|b|c" , "a", "b", "c");
// empty
fxt.Test__delimiter_explode("|a||c|" , "", "a", "", "c", "");
// nest_1
fxt.Test__delimiter_explode("a|-{b|c}-|d" , "a", "-{b|c}-", "d");
// nest_many
fxt.Test__delimiter_explode("a|-{b-{c|d}-e}-|f" , "a", "-{b-{c|d}-e}-", "f");
}
@Test public void Replace_markup() {
// basic
fxt.Test__replace_markup("a!!b" , "!!", "||", "a||b");
// missing
fxt.Test__replace_markup("abcd" , "!!", "||", "abcd");
// eos
fxt.Test__replace_markup("a!!" , "!!", "||", "a||");
// ignore
fxt.Test__replace_markup("a!!b<!!>!!c" , "!!", "||", "a||b<!!>||c");
// ignore asym_lhs
fxt.Test__replace_markup("a!!b<!!<!!>!!c" , "!!", "||", "a||b<!!<!!>||c");
// ignore asym_lhs
fxt.Test__replace_markup("a!!b<!!>!!>!!c" , "!!", "||", "a||b<!!>||>||c"); // NOTE: should probably be "!!>!!>", but unmatched ">" are escaped to "&gt;"
}
}
class Xomw_string_utils__fxt {
public void Test__delimiter_explode(String src_str, String... expd) {
List_adp tmp = List_adp_.New();
gplx.core.btries.Btrie_rv trv = new gplx.core.btries.Btrie_rv();
byte[][] actl = Xomw_string_utils.Delimiter_explode(tmp, trv, Bry_.new_u8(src_str));
Gftest.Eq__ary(expd, actl, "src=~{0}", src_str);
}
public void Test__replace_markup(String src_str, String find, String repl, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
Xomw_string_utils.Replace_markup(src_bry, 0, src_bry.length, Bry_.new_a7(find), Bry_.new_a7(repl));
Gftest.Eq__str(expd, src_bry);
}
}

View File

@@ -1,213 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.linkers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.langs.htmls.*;
import gplx.xowa.mws.htmls.*;
/* TODO.XO
* P7: $html = HtmlArmor::getHtml($text);
* P3: Get_link_url [alternate urls? EX: mw/wiki/index.php/title?]
* P2: titleFormatter->getPrefixedText [depends on redlinks]
* P1: Get_link_classes [depends on redlinks]
*/
public class Xomw_link_renderer {
private boolean expand_urls = false;
private final Xomw_html_utl html_utl = new Xomw_html_utl();
private final Xomw_atr_mgr attribs = new Xomw_atr_mgr();
private final List_adp tmp_merge_deleted = List_adp_.New();
private final Xomw_sanitizer sanitizer;
public Xomw_link_renderer(Xomw_sanitizer sanitizer) {
this.sanitizer = sanitizer;
}
// XO.MW:SYNC:1.29; DATE:2017-01-31
public void Make_link(Bry_bfr bfr, Xoa_ttl target, byte[] text, byte[] classes, Xomw_atr_mgr extra_atrs, Xomw_qry_mgr query) {
if (target.Is_known()) {
this.Make_known_link(bfr, target, text, extra_atrs, query);
} else {
this.Make_broken_link(bfr, target, text, extra_atrs, query);
}
}
// If you have already looked up the proper CSS classes using LinkRenderer::getLinkClasses()
// or some other method, use this to avoid looking it up again.
// XO.MW:SYNC:1.29; DATE:2017-01-31
public void Make_preloaded_link(Bry_bfr bfr, Xoa_ttl target, byte[] text, byte[] classes, Xomw_atr_mgr extra_atrs, Xomw_qry_mgr query) {
// XO.MW.HOOK: $this->runBeginHook --> 'HtmlPageLinkRendererBegin', 'LinkBegin'
target = Normalize_target(target);
byte[] url = Get_link_url(target, query);
attribs.Clear();
attribs.Add(Gfh_atr_.Bry__href, url); // XO.MW: add url 1st; MW does attribs["url", url] + attribs + extra_attribs
if (classes.length > 0) // XO.MW:do not bother adding if empty
attribs.Add(Gfh_atr_.Bry__class, classes);
byte[] prefixed_text = target.Get_prefixed_text();
if (prefixed_text != Bry_.Empty) {
attribs.Add(Gfh_atr_.Bry__title, prefixed_text);
}
Merge_attribs(attribs, extra_atrs);
if (text == null) {
text = this.Get_link_text(target);
}
Build_a_element(bfr, target,text, attribs, true);
}
// XO.MW:SYNC:1.29; DATE:2017-01-31
public void Make_known_link(Bry_bfr bfr, Xoa_ttl target, byte[] text, Xomw_atr_mgr extra_atrs, Xomw_qry_mgr query) {
byte[] classes = Bry_.Empty;
if (target.Is_external()) {
classes = Bry__classes__extiw;
}
byte[] colour = Get_link_classes(target);
if (colour != Bry_.Empty) {
classes = Bry_.Add(classes, Byte_ascii.Space_bry, colour);
}
Make_preloaded_link(bfr, target, text, classes, extra_atrs, query);
}
// XO.MW:SYNC:1.29; DATE:2017-01-31
public void Make_broken_link(Bry_bfr bfr, Xoa_ttl target, byte[] text, Xomw_atr_mgr extra_atrs, Xomw_qry_mgr query) {
// XO.MW.HOOK: Run legacy hook
// We don't want to include fragments for broken links, because they
// generally make no sense.
if (target.Has_fragment()) {
target = target.Create_fragment_target();
}
target = Normalize_target(target);
if (query.action == null && target.Ns().Id() != gplx.xowa.wikis.nss.Xow_ns_.Tid__special) {
query.action = Bry_.new_a7("edit");
query.redlink = 1;
}
byte[] url = Get_link_url(target, query);
attribs.Clear();
attribs.Add(Gfh_atr_.Bry__href, url); // $attribs = ['href' => $url,] + $this->mergeAttribs($attribs, $extraAttribs);
attribs.Add(Gfh_atr_.Bry__class, Bry_.new_a7("new"));
Merge_attribs(attribs, extra_atrs);
// $prefixedText = $this->titleFormatter->getPrefixedText($target);
// if ($prefixedText !== '') {
// // This ends up in parser cache!
// $attribs['title'] = wfMessage('red-link-title', $prefixedText)
// ->inContentLanguage()
// ->text();
// }
if (text == null) {
text = Get_link_text(target);
}
Build_a_element(bfr, target, text, attribs, false);
}
// XO.MW:SYNC:1.29; DATE:2017-01-31
private void Build_a_element(Bry_bfr bfr, Xoa_ttl target, byte[] text, Xomw_atr_mgr attribs, boolean is_known) {
// XO.MW.HOOK:HtmlPageLinkRendererEnd
byte[] html = text;
// $html = HtmlArmor::getHtml($text);
// XO.MW.HOOK:LinkEnd
html_utl.Raw_element(bfr, Gfh_tag_.Bry__a, attribs, html);
}
// XO.MW:SYNC:1.29; DATE:2017-01-31
private byte[] Get_link_text(Xoa_ttl target) {
byte[] prefixed_text = target.Get_prefixed_text();
// If the target is just a fragment, with no title, we return the fragment
// text. Otherwise, we return the title text itself.
if (prefixed_text == Bry_.Empty && target.Has_fragment()) {
return target.Get_fragment();
}
return prefixed_text;
}
private byte[] Get_link_url(Xoa_ttl target, Xomw_qry_mgr query) {
// TODO: Use a LinkTargetResolver service instead of Title
// if ($this->forceArticlePath) {
// $realQuery = $query;
// $query = [];
// }
// else {
// $realQuery = [];
// }
byte[] url = target.Get_link_url(query, false, expand_urls);
// if ($this->forceArticlePath && $realQuery) {
// $url = wfAppendQuery($url, $realQuery);
// }
return url;
}
// XO.MW:SYNC:1.29; DATE:2017-01-31
private Xoa_ttl Normalize_target(Xoa_ttl target) {
return Xomw_linker.normaliseSpecialPage(target);
}
// XO.MW:SYNC:1.29; DATE:2017-02-01
private void Merge_attribs(Xomw_atr_mgr src, Xomw_atr_mgr trg) {
// XO.MW: ignore; src is always non-null and empty; if trg exists, it will be merged below
// if (!$attribs) {return $defaults;}
// Merge the custom attribs with the default ones, and iterate
// over that, deleting all "false" attributes.
sanitizer.Merge_attributes(src, trg);
// XO.MW:MW removes "false" values; XO removes "null" values
boolean deleted = false;
int len = trg.Len();
for (int i = 0; i < len; i++) {
Xomw_atr_itm trg_atr = trg.Get_at(i);
// A false value suppresses the attribute
if (trg_atr.Val() == null) {
tmp_merge_deleted.Add(trg_atr);
deleted = true;
}
}
if (deleted) {
len = tmp_merge_deleted.Len();
for (int i = 0; i < len; i++) {
Xomw_atr_itm atr = (Xomw_atr_itm)trg.Get_at(i);
trg.Del(atr.Key_bry());
}
tmp_merge_deleted.Clear();
}
}
public byte[] Get_link_classes(Xoa_ttl target) {
// Make sure the target is in the cache
// $id = $this->linkCache->addLinkObj($target);
// if ($id == 0) {
// // Doesn't exist
// return '';
// }
// if ($this->linkCache->getGoodLinkFieldObj($target, 'redirect')) {
// Page is a redirect
// return 'mw-redirect';
// }
// elseif ($this->stubThreshold > 0 && MWNamespace::isContent($target->getNamespace())
// && $this->linkCache->getGoodLinkFieldObj($target, 'length') < $this->stubThreshold
// ) {
// Page is a stub
// return 'stub';
// }
return Bry_.Empty;
}
private static final byte[] Bry__classes__extiw = Bry_.new_a7("extiw");
}

View File

@@ -1,35 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.linkers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
// import org.junit.*;
// public class Xomw_link_renderer__tst {
// private final Xomw_link_renderer__fxt fxt = new Xomw_link_renderer__fxt();
/*
Make_broken_link
target.Has_fragment()
*/
// }
// class Xomw_link_renderer__fxt {
// private final Xomw_link_renderer wkr = new Xomw_link_renderer(new Xomw_parser());
// public void Test__parse(String src_str, String expd) {
// byte[] src_bry = Bry_.new_u8(src_str);
// wkr.Replace_external_links(new Xomw_parser_ctx(), pbfr.Init(src_bry));
// if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
// Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
// }
// }

View File

@@ -1,304 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.xowa.mws.filerepo.file.*; import gplx.xowa.mws.parsers.lnkis.*;
import gplx.xowa.mws.utls.*;
/* XO.TODO:
* validateThumbParams
*/
// MEMORY:only one instance per wiki
public abstract class Xomw_ImageHandler extends Xomw_MediaHandler { private final Xomw_param_map paramMap = new Xomw_param_map();
public Xomw_ImageHandler(byte[] key) {super(key);
paramMap.Add(Xomw_param_itm.Mw__img_width, Xomw_param_map.Type__handler, Xomw_param_itm.Name_bry__width);
}
/**
* @param File file
* @return boolean
*/
@Override public boolean canRender(Xomw_File file) {
return (Php_utl_.istrue(file.getWidth()) && Php_utl_.istrue(file.getHeight()));
}
@Override public Xomw_param_map getParamMap() {
// XO.MW: defined above: "return [ 'img_width' => 'width' ];"
return paramMap;
}
@Override public boolean validateParam(int name_uid, byte[] val_bry, int val_int) {
if (name_uid == Xomw_param_itm.Name__width || name_uid == Xomw_param_itm.Name__height) {
if (val_int <= 0) {
return false;
}
else {
return true;
}
}
else {
return false;
}
}
@Override public byte[] makeParamString(Xomw_params_handler handlerParams) {
int width = 0;
if (Php_utl_.isset(handlerParams.physicalWidth)) {
width = handlerParams.physicalWidth;
}
else if (Php_utl_.isset(handlerParams.width)) {
width = handlerParams.width;
}
else {
throw Err_.new_wo_type("No width specified to makeParamString");
}
// Removed for ProofreadPage
// width = intval(width);
return Bry_.Add(Int_.To_bry(width), Xomw_lnki_wkr.Bry__px);
}
// public Xomw_param_map parseParamString(byte[] src) {
// int len = src.length;
// // XO.MW.REGEX: if (preg_match('/^(\d+)px/', str, m)) {
// if ( len > 0 // at least one char
// && Byte_ascii.Is_num(src[0])) // 1st char is numeric
// {
// pos = Bry_find_.Find_fwd_while_num(src, 1, len); // skip numeric
// if (Bry_.Match(src, pos, len, Xomw_lnki_wkr.Bry__px)) { // matches "px"
// Xomw_params_handler rv = new Xomw_params_handler();
// rv.width = Bry_.To_int_or(src, 0, pos, Php_utl_.Null_int);
// return rv;
// }
// }
// return null;
// }
// function getScriptParams(paramsVar) {
// return [ 'width' => paramsVar['width'] ];
// }
/**
* @param File image
* @param array paramsVar
* @return boolean
*/
@Override public boolean normaliseParams(Xomw_File image, Xomw_params_handler handlerParams) {
byte[] mimeType = image.getMimeType();
if (!Php_utl_.isset(handlerParams.width)) {
return false;
}
if (!Php_utl_.isset(handlerParams.page)) {
handlerParams.page = 1;
}
else {
// handlerParams.page = intval(handlerParams.page);
// if (handlerParams.page > image.pageCount()) {
// handlerParams.page = image.pageCount();
// }
//
// if (handlerParams.page < 1) {
// handlerParams.page = 1;
// }
}
int srcWidth = image.getWidth(handlerParams.page);
int srcHeight = image.getHeight(handlerParams.page);
if (Php_utl_.isset(handlerParams.height) && handlerParams.height != -1) {
// Height & width were both set
if (handlerParams.width * srcHeight > handlerParams.height * srcWidth) {
// Height is the relative smaller dimension, so scale width accordingly
handlerParams.width = fitBoxWidth(srcWidth, srcHeight, handlerParams.height);
if (handlerParams.width == 0) {
// Very small image, so we need to rely on client side scaling :(
handlerParams.width = 1;
}
handlerParams.physicalWidth = handlerParams.width;
} else {
// Height was crap, unset it so that it will be calculated later
handlerParams.height = Php_utl_.Null_int;
}
}
if (!Php_utl_.isset(handlerParams.physicalWidth)) {
// Passed all validations, so set the physicalWidth
handlerParams.physicalWidth = handlerParams.width;
}
// Because thumbs are only referred to by width, the height always needs
// to be scaled by the width to keep the thumbnail sizes consistent,
// even if it was set inside the if block above
handlerParams.physicalHeight = Xomw_File.scaleHeight(srcWidth, srcHeight,
handlerParams.physicalWidth);
// Set the height if it was not validated in the if block higher up
if (!Php_utl_.isset(handlerParams.height) || handlerParams.height == -1) {
handlerParams.height = handlerParams.physicalHeight;
}
if (!this.validateThumbParams(handlerParams, srcWidth, srcHeight, mimeType)
) {
return false;
}
return true;
}
/**
* Validate thumbnail parameters and fill in the correct height
*
* @param int width Specified width (input/output)
* @param int height Height (output only)
* @param int srcWidth Width of the source image
* @param int srcHeight Height of the source image
* @param String mimeType Unused
* @return boolean False to indicate that an error should be returned to the user.
*/
// XO.MW.NOTE: MW passes w and h by ref, but only changes h; XO will pass handlerParams directly
private boolean validateThumbParams(Xomw_params_handler handlerParams, int srcWidth, int srcHeight, byte[] mimeType) {
int width = handlerParams.physicalWidth;
int height = handlerParams.physicalHeight;
// width = intval(width);
// Sanity check width
if (width <= 0) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "validateThumbParams: Invalid destination width: width");
return false;
}
if (srcWidth <= 0) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "validateThumbParams: Invalid source width: srcWidth");
return false;
}
height = Xomw_File.scaleHeight(srcWidth, srcHeight, width);
if (height == 0) {
// Force height to be at least 1 pixel
height = 1;
}
handlerParams.height = height;
return true;
}
// /**
// * @param File image
// * @param String script
// * @param array paramsVar
// * @return boolean|MediaTransformOutput
// */
// function getScriptedTransform(image, script, paramsVar) {
// if (!this.normaliseParams(image, paramsVar)) {
// return false;
// }
// url = wfAppendQuery(script, this.getScriptParams(paramsVar));
//
// if (image.mustRender() || paramsVar['width'] < image.getWidth()) {
// return new ThumbnailImage(image, url, false, paramsVar);
// }
// }
//
// function getImageSize(image, path) {
// MediaWiki\suppressWarnings();
// gis = getimagesize(path);
// MediaWiki\restoreWarnings();
//
// return gis;
// }
//
// /**
// * Function that returns the number of pixels to be thumbnailed.
// * Intended for animated GIFs to multiply by the number of frames.
// *
// * If the file doesn't support a notion of "area" return 0.
// *
// * @param File image
// * @return int
// */
// function getImageArea(image) {
// return image.getWidth() * image.getHeight();
// }
//
// /**
// * @param File file
// * @return String
// */
// function getShortDesc(file) {
// global wgLang;
// nbytes = htmlspecialchars(wgLang.formatSize(file.getSize()));
// widthheight = wfMessage('widthheight')
// .numParams(file.getWidth(), file.getHeight()).escaped();
//
// return "widthheight (nbytes)";
// }
//
// /**
// * @param File file
// * @return String
// */
// function getLongDesc(file) {
// global wgLang;
// pages = file.pageCount();
// size = htmlspecialchars(wgLang.formatSize(file.getSize()));
// if (pages === false || pages <= 1) {
// msg = wfMessage('file-info-size').numParams(file.getWidth(),
// file.getHeight()).paramsVar(size,
// '<span class="mime-type">' . file.getMimeType() . '</span>').parse();
// } else {
// msg = wfMessage('file-info-size-pages').numParams(file.getWidth(),
// file.getHeight()).paramsVar(size,
// '<span class="mime-type">' . file.getMimeType() . '</span>').numParams(pages).parse();
// }
//
// return msg;
// }
//
// /**
// * @param File file
// * @return String
// */
// function getDimensionsString(file) {
// pages = file.pageCount();
// if (pages > 1) {
// return wfMessage('widthheightpage')
// .numParams(file.getWidth(), file.getHeight(), pages).text();
// } else {
// return wfMessage('widthheight')
// .numParams(file.getWidth(), file.getHeight()).text();
// }
// }
//
// public function sanitizeParamsForBucketing(paramsVar) {
// paramsVar = parent::sanitizeParamsForBucketing(paramsVar);
//
// // We unset the height parameters in order to let normaliseParams recalculate them
// // Otherwise there might be a height discrepancy
// if (isset(paramsVar['height'])) {
// unset(paramsVar['height']);
// }
//
// if (isset(paramsVar['physicalHeight'])) {
// unset(paramsVar['physicalHeight']);
// }
//
// return paramsVar;
// }
}

View File

@@ -1,63 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import org.junit.*; import gplx.core.tests.*;
import gplx.xowa.mws.utls.*;
import gplx.xowa.mws.parsers.*; import gplx.xowa.mws.parsers.lnkis.*;
import gplx.xowa.mws.filerepo.*; import gplx.xowa.mws.filerepo.file.*;
public class Xomw_ImageHandler__tst {
private final Xomw_ImageHandler__fxt fxt = new Xomw_ImageHandler__fxt();
@Before public void init() {
fxt.Init__file("A.png", 400, 200);
}
@Test public void normaliseParams() {
// widthOnly; "Because thumbs are only referred to by width, the height always needs"
fxt.Test__normaliseParams(fxt.Make__handlerParams(200), fxt.Make__handlerParams(200, 100, 200, 100));
}
}
class Xomw_ImageHandler__fxt {
private final Xomw_ImageHandler handler;
private final Xomw_FileRepo repo = new Xomw_FileRepo(Bry_.new_a7("/orig"), Bry_.new_a7("/thumb"));
private final Xomw_parser_env env = new Xomw_parser_env();
private Xomw_File file;
public Xomw_ImageHandler__fxt() {
handler = new Xomw_TransformationalImageHandler(Bry_.new_a7("test_handler"));
}
public Xomw_params_handler Make__handlerParams(int w) {return Make__handlerParams(w, Php_utl_.Null_int, Php_utl_.Null_int, Php_utl_.Null_int);}
public Xomw_params_handler Make__handlerParams(int w, int h, int phys_w, int phys_h) {
Xomw_params_handler rv = new Xomw_params_handler();
rv.width = w;
rv.height = h;
rv.physicalWidth = phys_w;
rv.physicalHeight = phys_h;
return rv;
}
public void Init__file(String title, int w, int h) {
this.file = new Xomw_LocalFile(env, Bry_.new_u8(title), repo, w, h, Xomw_MediaHandlerFactory.Mime__image__png);
}
public void Test__normaliseParams(Xomw_params_handler prms, Xomw_params_handler expd) {
// exec
handler.normaliseParams(file, prms);
// test
Gftest.Eq__int(expd.width, prms.width);
Gftest.Eq__int(expd.height, prms.height);
Gftest.Eq__int(expd.physicalWidth, prms.physicalWidth);
Gftest.Eq__int(expd.physicalHeight, prms.physicalHeight);
}
}

View File

@@ -1,868 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.xowa.mws.filerepo.file.*;
import gplx.xowa.mws.parsers.lnkis.*;
public abstract class Xomw_MediaHandler {
public byte[] Key() {return key;} private byte[] key;
public Xomw_MediaHandler(byte[] key) {
this.key = key;
}
private static final int TRANSFORM_LATER = 1;
// static final METADATA_GOOD = true;
// static final METADATA_BAD = false;
// static final METADATA_COMPATIBLE = 2; // for old but backwards compatible.
// /**
// * Max length of error logged by logErrorForExternalProcess()
// */
// static final MAX_ERR_LOG_SIZE = 65535;
//
// /**
// * Get a MediaHandler for a given MIME type from the instance cache
// *
// * @param String $type
// * @return MediaHandler|boolean
// */
// static function getHandler($type) {
// return MediaWikiServices::getInstance()
// ->getMediaHandlerFactory()->getHandler($type);
// }
/**
* Get an associative array mapping magic word IDs to parameter names.
* Will be used by the parser to identify parameters.
*/
public abstract Xomw_param_map getParamMap();
/**
* Validate a thumbnail parameter at parse time.
* Return true to accept the parameter, and false to reject it.
* If you return false, the parser will do something quiet and forgiving.
*
* @param String $name
* @param mixed $value
*/
public abstract boolean validateParam(int name_uid, byte[] val_bry, int val_int);
/**
* Merge a parameter array into a String appropriate for inclusion in filenames
*
* @param array paramsVar Array of parameters that have been through normaliseParams.
* @return String
*/
public abstract byte[] makeParamString(Xomw_params_handler handlerParams);
// /**
// * Parse a param String made with makeParamString back into an array
// *
// * @param String $str The parameter String without file name (e.g. 122px)
// * @return array|boolean Array of parameters or false on failure.
// */
// abstract public function parseParamString($str);
/**
* Changes the parameter array as necessary, ready for transformation.
* Should be idempotent.
* Returns false if the parameters are unacceptable and the transform should fail
* @param File $image
* @param array $paramsVar
*/
public abstract boolean normaliseParams(Xomw_File image, Xomw_params_handler handlerParams);
// /**
// * Get an image size array like that returned by getimagesize(), or false if it
// * can't be determined.
// *
// * This function is used for determining the width, height and bitdepth directly
// * from an image. The results are stored in the database in the img_width,
// * img_height, img_bits fields.
// *
// * @note If this is a multipage file, return the width and height of the
// * first page.
// *
// * @param File|FSFile $image The image Object, or false if there isn't one.
// * Warning, FSFile::getPropsFromPath might pass an FSFile instead of File (!)
// * @param String $path The filename
// * @return array|boolean Follow the format of PHP getimagesize() @gplx.Internal protected function.
// * See https://secure.php.net/getimagesize. MediaWiki will only ever use the
// * first two array keys (the width and height), and the 'bits' associative
// * key. All other array keys are ignored. Returning a 'bits' key is optional
// * as not all formats have a notion of "bitdepth". Returns false on failure.
// */
// abstract function getImageSize($image, $path);
//
// /**
// * Get handler-specific metadata which will be saved in the img_metadata field.
// *
// * @param File|FSFile $image The image Object, or false if there isn't one.
// * Warning, FSFile::getPropsFromPath might pass an FSFile instead of File (!)
// * @param String $path The filename
// * @return String A String of metadata in php serialized form (Run through serialize())
// */
// function getMetadata($image, $path) {
// return '';
// }
//
// /**
// * Get metadata version.
// *
// * This is not used for validating metadata, this is used for the api when returning
// * metadata, since api content formats should stay the same over time, and so things
// * using ForeignApiRepo can keep backwards compatibility
// *
// * All core media handlers share a common version number, and extensions can
// * use the GetMetadataVersion hook to append to the array (they should append a unique
// * String so not to get confusing). If there was a media handler named 'foo' with metadata
// * version 3 it might add to the end of the array the element 'foo=3'. if the core metadata
// * version is 2, the end version String would look like '2;foo=3'.
// *
// * @return String Version String
// */
// static function getMetadataVersion() {
// $version = [ '2' ]; // core metadata version
// Hooks::run('GetMetadataVersion', [ &$version ]);
//
// return implode(';', $version);
// }
//
// /**
// * Convert metadata version.
// *
// * By default just returns $metadata, but can be used to allow
// * media handlers to convert between metadata versions.
// *
// * @param String|array $metadata Metadata array (serialized if String)
// * @param int $version Target version
// * @return array Serialized metadata in specified version, or $metadata on fail.
// */
// function convertMetadataVersion($metadata, $version = 1) {
// if (!is_array($metadata)) {
//
// // unserialize to keep return parameter consistent.
// MediaWiki\suppressWarnings();
// $ret = unserialize($metadata);
// MediaWiki\restoreWarnings();
//
// return $ret;
// }
//
// return $metadata;
// }
//
// /**
// * Get a String describing the type of metadata, for display purposes.
// *
// * @note This method is currently unused.
// * @param File $image
// * @return String
// */
// function getMetadataType($image) {
// return false;
// }
//
// /**
// * Check if the metadata String is valid for this handler.
// * If it returns MediaHandler::METADATA_BAD (or false), Image
// * will reload the metadata from the file and update the database.
// * MediaHandler::METADATA_GOOD for if the metadata is a-ok,
// * MediaHandler::METADATA_COMPATIBLE if metadata is old but backwards
// * compatible (which may or may not trigger a metadata reload).
// *
// * @note Returning self::METADATA_BAD will trigger a metadata reload from
// * file on page view. Always returning this from a broken file, or suddenly
// * triggering as bad metadata for a large number of files can cause
// * performance problems.
// * @param File $image
// * @param String $metadata The metadata in serialized form
// * @return boolean
// */
// function isMetadataValid($image, $metadata) {
// return self::METADATA_GOOD;
// }
//
// /**
// * Get an array of standard (FormatMetadata type) metadata values.
// *
// * The returned data is largely the same as that from getMetadata(),
// * but formatted in a standard, stable, handler-independent way.
// * The idea being that some values like ImageDescription or Artist
// * are universal and should be retrievable in a handler generic way.
// *
// * The specific properties are the type of properties that can be
// * handled by the FormatMetadata class. These values are exposed to the
// * user via the filemetadata parser function.
// *
// * Details of the response format of this function can be found at
// * https://www.mediawiki.org/wiki/Manual:File_metadata_handling
// * tl/dr: the response is an associative array of
// * properties keyed by name, but the value can be complex. You probably
// * want to call one of the FormatMetadata::flatten* functions on the
// * property values before using them, or call
// * FormatMetadata::getFormattedData() on the full response array, which
// * transforms all values into prettified, human-readable text.
// *
// * Subclasses overriding this function must return a value which is a
// * valid API response fragment (all associative array keys are valid
// * XML tagnames).
// *
// * Note, if the file simply has no metadata, but the handler supports
// * this interface, it should return an empty array, not false.
// *
// * @param File $file
// * @return array|boolean False if interface not supported
// * @since 1.23
// */
// public function getCommonMetaArray(File $file) {
// return false;
// }
//
// /**
// * Get a MediaTransformOutput Object representing an alternate of the transformed
// * output which will call an intermediary thumbnail assist script.
// *
// * Used when the repository has a thumbnailScriptUrl option configured.
// *
// * Return false to fall back to the regular getTransform().
// * @param File $image
// * @param String $script
// * @param array $paramsVar
// * @return boolean|ThumbnailImage
// */
// function getScriptedTransform($image, $script, $paramsVar) {
// return false;
// }
/**
* Get a MediaTransformOutput Object representing the transformed output. Does not
* actually do the transform.
*
* @param File $image The image Object
* @param String $dstPath Filesystem destination path
* @param String $dstUrl Destination URL to use in output HTML
* @param array $paramsVar Arbitrary set of parameters validated by $this->validateParam()
* @return MediaTransformOutput
*/
public Xomw_MediaTransformOutput getTransform(Xomw_File image, byte[] dstPath, byte[] dstUrl, Xomw_params_handler handlerParams) {
return this.doTransform(image, dstPath, dstUrl, handlerParams, TRANSFORM_LATER);
}
/**
* Get a MediaTransformOutput Object representing the transformed output. Does the
* transform unless $flags contains self::TRANSFORM_LATER.
*
* @param File $image The image Object
* @param String $dstPath Filesystem destination path
* @param String $dstUrl Destination URL to use in output HTML
* @param array $paramsVar Arbitrary set of parameters validated by $this->validateParam()
* Note: These parameters have *not* gone through $this->normaliseParams()
* @param int $flags A bitfield, may contain self::TRANSFORM_LATER
* @return MediaTransformOutput
*/
public Xomw_MediaTransformOutput doTransform(Xomw_File image, byte[] dstPath, byte[] dstUrl, Xomw_params_handler handlerParams) {return doTransform(image, dstPath, dstUrl, handlerParams, 0);}
public abstract Xomw_MediaTransformOutput doTransform(Xomw_File image, byte[] dstPath, byte[] dstUrl, Xomw_params_handler handlerParams, int flags);
// /**
// * Get the thumbnail extension and MIME type for a given source MIME type
// *
// * @param String $ext Extension of original file
// * @param String $mime MIME type of original file
// * @param array $paramsVar Handler specific rendering parameters
// * @return array Thumbnail extension and MIME type
// */
// function getThumbType($ext, $mime, $paramsVar = null) {
// $magic = MimeMagic::singleton();
// if (!$ext || $magic->isMatchingExtension($ext, $mime) === false) {
// // The extension is not valid for this MIME type and we do
// // recognize the MIME type
// $extensions = $magic->getExtensionsForType($mime);
// if ($extensions) {
// return [ strtok($extensions, ' '), $mime ];
// }
// }
//
// // The extension is correct (true) or the MIME type is unknown to
// // MediaWiki (null)
// return [ $ext, $mime ];
// }
//
// /**
// * Get useful response headers for GET/HEAD requests for a file with the given metadata
// *
// * @param mixed $metadata Result of the getMetadata() function of this handler for a file
// * @return array
// */
// public function getStreamHeaders($metadata) {
// return [];
// }
/**
* True if the handled types can be transformed
*
* @param File $file
* @return boolean
*/
@gplx.Virtual public boolean canRender(Xomw_File file) {
return true;
}
/**
* True if handled types cannot be displayed directly in a browser
* but can be rendered
*
* @param File $file
* @return boolean
*/
public boolean mustRender(Xomw_File file) {
return false;
}
// /**
// * True if the type has multi-page capabilities
// *
// * @param File $file
// * @return boolean
// */
// public function isMultiPage($file) {
// return false;
// }
//
// /**
// * Page count for a multi-page document, false if unsupported or unknown
// *
// * @param File $file
// * @return boolean
// */
// function pageCount(File $file) {
// return false;
// }
//
// /**
// * The material is vectorized and thus scaling is lossless
// *
// * @param File $file
// * @return boolean
// */
// function isVectorized($file) {
// return false;
// }
//
// /**
// * The material is an image, and is animated.
// * In particular, video material need not return true.
// * @note Before 1.20, this was a method of ImageHandler only
// *
// * @param File $file
// * @return boolean
// */
// function isAnimatedImage($file) {
// return false;
// }
//
// /**
// * If the material is animated, we can animate the thumbnail
// * @since 1.20
// *
// * @param File $file
// * @return boolean If material is not animated, handler may return any value.
// */
// function canAnimateThumbnail($file) {
// return true;
// }
//
// /**
// * False if the handler is disabled for all files
// * @return boolean
// */
// function isEnabled() {
// return true;
// }
//
// /**
// * Get an associative array of page dimensions
// * Currently "width" and "height" are understood, but this might be
// * expanded in the future.
// * Returns false if unknown.
// *
// * It is expected that handlers for paged media (e.g. DjVuHandler)
// * will override this method so that it gives the correct results
// * for each specific page of the file, using the $page argument.
// *
// * @note For non-paged media, use getImageSize.
// *
// * @param File $image
// * @param int $page What page to get dimensions of
// * @return array|boolean
// */
// function getPageDimensions(File $image, $page) {
// $gis = $this->getImageSize($image, $image->getLocalRefPath());
// if ($gis) {
// return [
// 'width' => $gis[0],
// 'height' => $gis[1]
// ];
// } else {
// return false;
// }
// }
//
// /**
// * Generic getter for text layer.
// * Currently overloaded by PDF and DjVu handlers
// * @param File $image
// * @param int $page Page number to get information for
// * @return boolean|String Page text or false when no text found or if
// * unsupported.
// */
// function getPageText(File $image, $page) {
// return false;
// }
//
// /**
// * Get the text of the entire document.
// * @param File $file
// * @return boolean|String The text of the document or false if unsupported.
// */
// public function getEntireText(File $file) {
// $numPages = $file->pageCount();
// if (!$numPages) {
// // Not a multipage document
// return $this->getPageText($file, 1);
// }
// $document = '';
// for ($i = 1; $i <= $numPages; $i++) {
// $curPage = $this->getPageText($file, $i);
// if (is_string($curPage)) {
// $document .= $curPage . "\n";
// }
// }
// if ($document !== '') {
// return $document;
// }
// return false;
// }
//
// /**
// * Get an array structure that looks like this:
// *
// * [
// * 'visible' => [
// * 'Human-readable name' => 'Human readable value',
// * ...
// * ],
// * 'collapsed' => [
// * 'Human-readable name' => 'Human readable value',
// * ...
// * ]
// * ]
// * The UI will format this into a table where the visible fields are always
// * visible, and the collapsed fields are optionally visible.
// *
// * The function should return false if there is no metadata to display.
// */
//
// /**
// * @todo FIXME: This interface is not very flexible. The media handler
// * should generate HTML instead. It can do all the formatting according
// * to some standard. That makes it possible to do things like visual
// * indication of grouped and chained streams in ogg container files.
// * @param File $image
// * @param boolean|IContextSource $context Context to use (optional)
// * @return array|boolean
// */
// function formatMetadata($image, $context = false) {
// return false;
// }
//
// /** sorts the visible/invisible field.
// * Split off from ImageHandler::formatMetadata, as used by more than
// * one type of handler.
// *
// * This is used by the media handlers that use the FormatMetadata class
// *
// * @param array $metadataArray Metadata array
// * @param boolean|IContextSource $context Context to use (optional)
// * @return array Array for use displaying metadata.
// */
// function formatMetadataHelper($metadataArray, $context = false) {
// $result = [
// 'visible' => [],
// 'collapsed' => []
// ];
//
// $formatted = FormatMetadata::getFormattedData($metadataArray, $context);
// // Sort fields into visible and collapsed
// $visibleFields = $this->visibleMetadataFields();
// foreach ($formatted as $name => $value) {
// $tag = strtolower($name);
// self::addMeta($result,
// in_array($tag, $visibleFields) ? 'visible' : 'collapsed',
// 'exif',
// $tag,
// $value
// );
// }
//
// return $result;
// }
//
// /**
// * Get a list of metadata items which should be displayed when
// * the metadata table is collapsed.
// *
// * @return array Array of strings
// */
// protected function visibleMetadataFields() {
// return FormatMetadata::getVisibleFields();
// }
//
// /**
// * This is used to generate an array element for each metadata value
// * That array is then used to generate the table of metadata values
// * on the image page
// *
// * @param array &$array An array containing elements for each type of visibility
// * and each of those elements being an array of metadata items. This function adds
// * a value to that array.
// * @param String $visibility ('visible' or 'collapsed') if this value is hidden
// * by default.
// * @param String $type Type of metadata tag (currently always 'exif')
// * @param String $id The name of the metadata tag (like 'artist' for example).
// * its name in the table displayed is the message "$type-$id" (Ex exif-artist).
// * @param String $value Thingy goes into a wikitext table; it used to be escaped but
// * that was incompatible with previous practise of customized display
// * with wikitext formatting via messages such as 'exif-model-value'.
// * So the escaping is taken back out, but generally this seems a confusing
// * interface.
// * @param boolean|String $param Value to pass to the message for the name of the field
// * as $1. Currently this parameter doesn't seem to ever be used.
// *
// * Note, everything here is passed through the parser later on (!)
// */
// protected static function addMeta(&$array, $visibility, $type, $id, $value, $param = false) {
// $msg = wfMessage("$type-$id", $param);
// if ($msg->exists()) {
// $name = $msg->text();
// } else {
// // This is for future compatibility when using instant commons.
// // So as to not display as ugly a name if a new metadata
// // property is defined that we don't know about
// // (not a major issue since such a property would be collapsed
// // by default).
// wfDebug(__METHOD__ . ' Unknown metadata name: ' . $id . "\n");
// $name = wfEscapeWikiText($id);
// }
// $array[$visibility][] = [
// 'id' => "$type-$id",
// 'name' => $name,
// 'value' => $value
// ];
// }
//
// /**
// * Short description. Shown on Special:Search results.
// *
// * @param File $file
// * @return String
// */
// function getShortDesc($file) {
// return self::getGeneralShortDesc($file);
// }
//
// /**
// * Long description. Shown under image on image description page surounded by ().
// *
// * @param File $file
// * @return String
// */
// function getLongDesc($file) {
// return self::getGeneralLongDesc($file);
// }
//
// /**
// * Used instead of getShortDesc if there is no handler registered for file.
// *
// * @param File $file
// * @return String
// */
// static function getGeneralShortDesc($file) {
// global $wgLang;
//
// return htmlspecialchars($wgLang->formatSize($file->getSize()));
// }
//
// /**
// * Used instead of getLongDesc if there is no handler registered for file.
// *
// * @param File $file
// * @return String
// */
// static function getGeneralLongDesc($file) {
// return wfMessage('file-info')->sizeParams($file->getSize())
// ->paramsVar('<span class="mime-type">' . $file->getMimeType() . '</span>')->parse();
// }
/**
* Calculate the largest thumbnail width for a given original file size
* such that the thumbnail's height is at most $maxHeight.
* @param int $boxWidth Width of the thumbnail box.
* @param int $boxHeight Height of the thumbnail box.
* @param int $maxHeight Maximum height expected for the thumbnail.
* @return int
*/
public static int fitBoxWidth(int boxWidth, int boxHeight, int maxHeight) {
double idealWidth = boxWidth * maxHeight / boxHeight;
int roundedUp = Math_.Ceil_as_int(idealWidth);
if (Math_.Round(roundedUp * boxHeight / boxWidth, 0) > maxHeight) {
return Math_.Floor_as_int(idealWidth);
} else {
return roundedUp;
}
}
// /**
// * Shown in file history box on image description page.
// *
// * @param File $file
// * @return String Dimensions
// */
// function getDimensionsString($file) {
// return '';
// }
//
// /**
// * Modify the parser Object post-transform.
// *
// * This is often used to do $parser->addOutputHook(),
// * in order to add some javascript to render a viewer.
// * See TimedMediaHandler or OggHandler for an example.
// *
// * @param Parser $parser
// * @param File $file
// */
// function parserTransformHook($parser, $file) {
// }
//
// /**
// * File validation hook called on upload.
// *
// * If the file at the given local path is not valid, or its MIME type does not
// * match the handler class, a Status Object should be returned containing
// * relevant errors.
// *
// * @param String $fileName The local path to the file.
// * @return Status
// */
// function verifyUpload($fileName) {
// return Status::newGood();
// }
//
// /**
// * Check for zero-sized thumbnails. These can be generated when
// * no disk space is available or some other error occurs
// *
// * @param String $dstPath The location of the suspect file
// * @param int $retval Return value of some shell process, file will be deleted if this is non-zero
// * @return boolean True if removed, false otherwise
// */
// function removeBadFile($dstPath, $retval = 0) {
// if (file_exists($dstPath)) {
// $thumbstat = stat($dstPath);
// if ($thumbstat['size'] == 0 || $retval != 0) {
// $result = unlink($dstPath);
//
// if ($result) {
// wfDebugLog('thumbnail',
// sprintf('Removing bad %d-byte thumbnail "%s". unlink() succeeded',
// $thumbstat['size'], $dstPath));
// } else {
// wfDebugLog('thumbnail',
// sprintf('Removing bad %d-byte thumbnail "%s". unlink() failed',
// $thumbstat['size'], $dstPath));
// }
//
// return true;
// }
// }
//
// return false;
// }
//
// /**
// * Remove files from the purge list.
// *
// * This is used by some video handlers to prevent ?action=purge
// * from removing a transcoded video, which is expensive to
// * regenerate.
// *
// * @see LocalFile::purgeThumbnails
// *
// * @param array $files
// * @param array $options Purge options. Currently will always be
// * an array with a single key 'forThumbRefresh' set to true.
// */
// public function filterThumbnailPurgeList(&$files, $options) {
// // Do nothing
// }
//
// /**
// * True if the handler can rotate the media
// * @since 1.24 non-static. From 1.21-1.23 was static
// * @return boolean
// */
// public function canRotate() {
// return false;
// }
//
// /**
// * On supporting image formats, try to read out the low-level orientation
// * of the file and return the angle that the file needs to be rotated to
// * be viewed.
// *
// * This information is only useful when manipulating the original file;
// * the width and height we normally work with is logical, and will match
// * any produced output views.
// *
// * For files we don't know, we return 0.
// *
// * @param File $file
// * @return int 0, 90, 180 or 270
// */
// public function getRotation($file) {
// return 0;
// }
//
// /**
// * Log an error that occurred in an external process
// *
// * Moved from BitmapHandler to MediaHandler with MediaWiki 1.23
// *
// * @since 1.23
// * @param int $retval
// * @param String $err Error reported by command. Anything longer than
// * MediaHandler::MAX_ERR_LOG_SIZE is stripped off.
// * @param String $cmd
// */
// protected function logErrorForExternalProcess($retval, $err, $cmd) {
// # Keep error output limited (bug 57985)
// $errMessage = trim(substr($err, 0, self::MAX_ERR_LOG_SIZE));
//
// wfDebugLog('thumbnail',
// sprintf('thumbnail failed on %s: error %d "%s" from "%s"',
// wfHostname(), $retval, $errMessage, $cmd));
// }
//
// /**
// * Get list of languages file can be viewed in.
// *
// * @param File $file
// * @return String[] Array of language codes, or empty array if unsupported.
// * @since 1.23
// */
// public function getAvailableLanguages(File $file) {
// return [];
// }
//
// /**
// * On file types that support renderings in multiple languages,
// * which language is used by default if unspecified.
// *
// * If getAvailableLanguages returns a non-empty array, this must return
// * a valid language code. Otherwise can return null if files of this
// * type do not support alternative language renderings.
// *
// * @param File $file
// * @return String|null Language code or null if multi-language not supported for filetype.
// * @since 1.23
// */
// public function getDefaultRenderLanguage(File $file) {
// return null;
// }
//
// /**
// * If its an audio file, return the length of the file. Otherwise 0.
// *
// * File::getLength() existed for a long time, but was calling a method
// * that only existed in some subclasses of this class (The TMH ones).
// *
// * @param File $file
// * @return float length in seconds
// * @since 1.23
// */
// public function getLength($file) {
// return 0.0;
// }
//
// /**
// * True if creating thumbnails from the file is large or otherwise resource-intensive.
// * @param File $file
// * @return boolean
// */
// public function isExpensiveToThumbnail($file) {
// return false;
// }
//
// /**
// * Returns whether or not this handler supports the chained generation of thumbnails according
// * to buckets
// * @return boolean
// * @since 1.24
// */
// public function supportsBucketing() {
// return false;
// }
//
// /**
// * Returns a normalised paramsVar array for which parameters have been cleaned up for bucketing
// * purposes
// * @param array $paramsVar
// * @return array
// */
// public function sanitizeParamsForBucketing($paramsVar) {
// return $paramsVar;
// }
//
// /**
// * Gets configuration for the file warning message. Return value of
// * the following structure:
// * [
// * // Required, module with messages loaded for the client
// * 'module' => 'example.filewarning.messages',
// * // Required, array of names of messages
// * 'messages' => [
// * // Required, main warning message
// * 'main' => 'example-filewarning-main',
// * // Optional, header for warning dialog
// * 'header' => 'example-filewarning-header',
// * // Optional, footer for warning dialog
// * 'footer' => 'example-filewarning-footer',
// * // Optional, text for more-information link (see below)
// * 'info' => 'example-filewarning-info',
// * ],
// * // Optional, link for more information
// * 'link' => 'http://example.com',
// * ]
// *
// * Returns null if no warning is necessary.
// * @param File $file
// * @return array|null
// */
// public function getWarningConfig($file) {
// return null;
// }
}

View File

@@ -1,63 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
// XO.MW:MW has registry and instance cache; XO only has instance
// XO.MW:SYNC:1.29; DATE:2017-02-05
public class Xomw_MediaHandlerFactory {
private final Hash_adp_bry handlers = Hash_adp_bry.cs();
// XO.MW:SYNC:1.29; DATE:2017-02-05
public Xomw_MediaHandlerFactory() {
// Default, MediaWiki core media handlers
// 'image/jpeg' => JpegHandler::class,
handlers.Add(Mime__image__png, new Xomw_TransformationalImageHandler(Mime__image__png)); // PngHandler
// 'image/gif' => GIFHandler::class,
// 'image/tiff' => TiffHandler::class,
// 'image/webp' => WebPHandler::class,
// 'image/x-ms-bmp' => BmpHandler::class,
// 'image/x-bmp' => BmpHandler::class,
// 'image/x-xcf' => XCFHandler::class,
// 'image/svg+xml' => SvgHandler::class, // official
// 'image/svg' => SvgHandler::class, // compat
// 'image/vnd.djvu' => DjVuHandler::class, // official
// 'image/x.djvu' => DjVuHandler::class, // compat
// 'image/x-djvu' => DjVuHandler::class, // compat
}
// XO.MW:SYNC:1.29; DATE:2017-02-05
public Xomw_MediaHandler getHandler(byte[] type) {
return (Xomw_MediaHandler)handlers.Get_by(type);
}
public static byte[]
Mime__image__jpeg = Bry_.new_a7("image/jpeg")
, Mime__image__png = Bry_.new_a7("image/png")
, Mime__image__gif = Bry_.new_a7("image/gif")
, Mime__image__tiff = Bry_.new_a7("image/tiff")
, Mime__image__webp = Bry_.new_a7("image/webp")
, Mime__image__x_ms_bmp = Bry_.new_a7("image/x-ms-bmp")
, Mime__image__x_bmp = Bry_.new_a7("image/x-bmp")
, Mime__image__x_xcf = Bry_.new_a7("image/x-xcf")
, Mime__image__svg_xml = Bry_.new_a7("image/svg+xml")
, Mime__image__svg = Bry_.new_a7("image/svg")
, Mime__image__vnd_djvu = Bry_.new_a7("image/vnd.djvu")
, Mime__image__x_djvu_dot = Bry_.new_a7("image/x.djvu")
, Mime__image__x_djvu_dash = Bry_.new_a7("image/x-djvu")
;
}

View File

@@ -1,281 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.langs.htmls.*;
import gplx.xowa.mws.utls.*;
import gplx.xowa.mws.parsers.lnkis.*;
import gplx.xowa.mws.filerepo.file.*;
public abstract class Xomw_MediaTransformOutput {
public Xomw_MediaTransformOutput(Xomw_File file, byte[] url, byte[] path, int width, int height) {
this.file = file;
this.url = url;
this.width = width;
this.height = height;
}
// /** @var array Associative array mapping optional supplementary image files
// * from pixel density (eg 1.5 or 2) to additional URLs.
// */
// public $responsiveUrls = [];
/** @var File */
private final Xomw_File file;
/** @var int Image width */
protected final int width;
/** @var int Image height */
protected final int height;
/** @var String URL path to the thumb */
protected final byte[] url;
// /** @var boolean|String */
// protected $page;
//
// /** @var boolean|String Filesystem path to the thumb */
// protected $path;
//
// /** @var boolean|String Language code, false if not set */
// protected $lang;
//
// /** @var boolean|String Permanent storage path */
// protected $storagePath = false;
/**
* @return int Width of the output box
*/
public int getWidth() {
return this.width;
}
/**
* @return int Height of the output box
*/
public int getHeight() {
return this.height;
}
// /**
// * @return File
// */
// public function getFile() {
// return $this->file;
// }
//
// /**
// * Get the final extension of the thumbnail.
// * Returns false for scripted transformations.
// * @return String|boolean
// */
// public function getExtension() {
// return $this->path ? FileBackend::extensionFromPath( $this->path ) : false;
// }
//
// /**
// * @return String|boolean The thumbnail URL
// */
// public function getUrl() {
// return $this->url;
// }
//
// /**
// * @return String|boolean The permanent thumbnail storage path
// */
// public function getStoragePath() {
// return $this->storagePath;
// }
//
// /**
// * @param String $storagePath The permanent storage path
// * @return void
// */
// public function setStoragePath( $storagePath ) {
// $this->storagePath = $storagePath;
// if ( $this->path === false ) {
// $this->path = $storagePath;
// }
// }
/**
* Fetch HTML for this transform output
*
* @param array $options Associative array of options. Boolean options
* should be indicated with a value of true for true, and false or
* absent for false.
*
* alt Alternate text or caption
* desc-link Boolean, show a description link
* file-link Boolean, show a file download link
* custom-url-link Custom URL to link to
* custom-title-link Custom Title Object to link to
* valign vertical-align property, if the output is an inline element
* img-class Class applied to the "<img>" tag, if there is such a tag
*
* For images, desc-link and file-link are implemented as a click-through. For
* sounds and videos, they may be displayed in other ways.
*
* @return String
*/
public abstract void toHtml(Bry_bfr bfr, Bry_bfr tmp, Xomw_params_mto options);
// /**
// * This will be overridden to return true in error classes
// * @return boolean
// */
// public function isError() {
// return false;
// }
//
// /**
// * Check if an output thumbnail file actually exists.
// *
// * This will return false if there was an error, the
// * thumbnail is to be handled client-side only, or if
// * transformation was deferred via TRANSFORM_LATER.
// * This file may exist as a new file in /tmp, a file
// * in permanent storage, or even refer to the original.
// *
// * @return boolean
// */
// public function hasFile() {
// // If TRANSFORM_LATER, $this->path will be false.
// // Note: a null path means "use the source file".
// return ( !$this->isError() && ( $this->path || $this->path === null ) );
// }
//
// /**
// * Check if the output thumbnail is the same as the source.
// * This can occur if the requested width was bigger than the source.
// *
// * @return boolean
// */
// public function fileIsSource() {
// return ( !$this->isError() && $this->path === null );
// }
//
// /**
// * Get the path of a file system copy of the thumbnail.
// * Callers should never write to this path.
// *
// * @return String|boolean Returns false if there isn't one
// */
// public function getLocalCopyPath() {
// if ( $this->isError() ) {
// return false;
// } elseif ( $this->path === null ) {
// return $this->file->getLocalRefPath(); // assume thumb was not scaled
// } elseif ( FileBackend::isStoragePath( $this->path ) ) {
// $be = $this->file->getRepo()->getBackend();
// // The temp file will be process cached by FileBackend
// $fsFile = $be->getLocalReference( [ 'src' => $this->path ] );
//
// return $fsFile ? $fsFile->getPath() : false;
// } else {
// return $this->path; // may return false
// }
// }
//
// /**
// * Stream the file if there were no errors
// *
// * @param array $headers Additional HTTP headers to send on success
// * @return Status
// * @since 1.27
// */
// public function streamFileWithStatus( $headers = [] ) {
// if ( !$this->path ) {
// return Status::newFatal( 'backend-fail-stream', '<no path>' );
// } elseif ( FileBackend::isStoragePath( $this->path ) ) {
// $be = $this->file->getRepo()->getBackend();
// return $be->streamFile( [ 'src' => $this->path, 'headers' => $headers ] );
// } else { // FS-file
// $success = StreamFile::stream( $this->getLocalCopyPath(), $headers );
// return $success ? Status::newGood() : Status::newFatal( 'backend-fail-stream', $this->path );
// }
// }
//
// /**
// * Stream the file if there were no errors
// *
// * @deprecated since 1.26, use streamFileWithStatus
// * @param array $headers Additional HTTP headers to send on success
// * @return boolean Success
// */
// public function streamFile( $headers = [] ) {
// $this->streamFileWithStatus( $headers )->isOK();
// }
//
// /**
// * Wrap some XHTML text in an anchor tag with the given attributes
// *
// * @param array $linkAttribs
// * @param String $contents
// * @return String
// */
// protected function linkWrap( $linkAttribs, $contents ) {
// if ( $linkAttribs ) {
// return Xml::tags( 'a', $linkAttribs, $contents );
// } else {
// return $contents;
// }
// }
/**
* @param String $title
* @param String|array $prms Query parameters to add
* @return array
*/
public void getDescLinkAttribs(List_adp attribs, byte[] title, List_adp prms) {
// if ( is_array( prms ) ) {
// $query = prms;
// } else {
// $query = [];
// }
// if ( $this->page && $this->page !== 1 ) {
// $query['page'] = $this->page;
// }
// if ( $this->lang ) {
// $query['lang'] = $this->lang;
// }
//
// if ( is_string( prms ) && prms !== '' ) {
// $query = prms . '&' . wfArrayToCgi( $query );
// }
attribs.Clear();
// 'href' => $this->file->getTitle()->getLocalURL( $query ),
attribs.Add_many(Gfh_atr_.Bry__href, this.file.getTitle());
attribs.Add_many(Gfh_atr_.Bry__class, Bry__class__image);
if (title != null) {
attribs.Add_many(Gfh_atr_.Bry__title, title);
}
}
// Wrap some XHTML text in an anchor tag with the given attributes
// XO.MW:SYNC:1.29; DATE:2017-02-03
protected void Link_wrap(Bry_bfr bfr, List_adp link_attribs, byte[] contents) {
if (link_attribs != null) {
Xomw_xml.Tags(bfr, Gfh_tag_.Bry__a, link_attribs, contents);
}
else {
bfr.Add(contents);
}
}
private static final byte[] Bry__class__image = Bry_.new_a7("image");
}

View File

@@ -1,214 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.langs.htmls.*;
import gplx.xowa.mws.utls.*;
import gplx.xowa.mws.parsers.lnkis.*;
import gplx.xowa.mws.filerepo.file.*;
// Media transform output for images
public class Xomw_ThumbnailImage extends Xomw_MediaTransformOutput { private final List_adp attribs = List_adp_.New(), link_attribs = List_adp_.New();
public Xomw_ThumbnailImage(Xomw_File file, byte[] url, byte[] path, int w, int h) {super(file, url, path, w, h);
}
/**
* Get a thumbnail Object from a file and parameters.
* If path is set to null, the output file is treated as a source copy.
* If path is set to false, no output file will be created.
* parameters should include, as a minimum, (file) 'width' and 'height'.
* It may also include a 'page' parameter for multipage files.
*
* @param File file
* @param String url URL path to the thumb
* @param String|boolean path Filesystem path to the thumb
* @param array parameters Associative array of parameters
*/
public Xomw_ThumbnailImage(Xomw_File file, byte[] url, byte[] path, Xomw_params_handler parameters) {super(file, url, path, parameters.width, parameters.height);
// defaults = [
// 'page' => false,
// 'lang' => false
// ];
//
// if (is_array(parameters)) {
// actualParams = parameters + defaults;
// } else {
// // Using old format, should convert. Later a warning could be added here.
// numArgs = func_num_args();
// actualParams = [
// 'width' => path,
// 'height' => parameters,
// 'page' => (numArgs > 5) ? func_get_arg(5) : false
// ] + defaults;
// path = (numArgs > 4) ? func_get_arg(4) : false;
// }
// this->file = file;
// this->url = url;
// this->path = path;
// These should be integers when they get here.
// If not, there's a bug somewhere. But let's at
// least produce valid HTML code regardless.
// this->width = round(actualParams['width']);
// this->height = round(actualParams['height']);
// this->page = actualParams['page'];
// this->lang = actualParams['lang'];
}
/**
* Return HTML <img ... /> tag for the thumbnail, will include
* width and height attributes and a blank alt text (as required).
*
* @param array options Associative array of options. Boolean options
* should be indicated with a value of true for true, and false or
* absent for false.
*
* alt HTML alt attribute
* title HTML title attribute
* desc-link Boolean, show a description link
* file-link Boolean, show a file download link
* valign vertical-align property, if the output is an inline element
* img-class Class applied to the \<img\> tag, if there is such a tag
* desc-query String, description link query prms
* @Override width Override width attribute. Should generally not set
* @Override height Override height attribute. Should generally not set
* no-dimensions Boolean, skip width and height attributes (useful if
* set in CSS)
* custom-url-link Custom URL to link to
* custom-title-link Custom Title Object to link to
* custom target-link Value of the target attribute, for custom-target-link
* parser-extlink-* Attributes added by parser for external links:
* parser-extlink-rel: add rel="nofollow"
* parser-extlink-target: link target, but overridden by custom-target-link
*
* For images, desc-link and file-link are implemented as a click-through. For
* sounds and videos, they may be displayed in other ways.
*
* @throws MWException
* @return String
*/
// Return HTML <img ... /> tag for the thumbnail, will include
// width and height attributes and a blank alt text (as required).
//
// @param array options Associative array of options. Boolean options
// should be indicated with a value of true for true, and false or
// absent for false.
//
// alt HTML alt attribute
// title HTML title attribute
// desc-link Boolean, show a description link
// file-link Boolean, show a file download link
// valign vertical-align property, if the output is an inline element
// img-class Class applied to the \<img\> tag, if there is such a tag
// desc-query String, description link query prms
// override-width Override width attribute. Should generally not set
// override-height Override height attribute. Should generally not set
// no-dimensions Boolean, skip width and height attributes (useful if
// set in CSS)
// custom-url-link Custom URL to link to
// custom-title-link Custom Title Object to link to
// custom target-link Value of the target attribute, for custom-target-link
// parser-extlink-* Attributes added by parser for external links:
// parser-extlink-rel: add rel="nofollow"
// parser-extlink-target: link target, but overridden by custom-target-link
//
// For images, desc-link and file-link are implemented as a click-through. For
// sounds and videos, they may be displayed in other ways.
// XO.MW:SYNC:1.29; DATE:2017-02-03
@Override public void toHtml(Bry_bfr bfr, Bry_bfr tmp, Xomw_params_mto options) {
byte[] alt = options.alt;
// byte[] query = options.desc_query;
attribs.Clear();
attribs.Add_many(Gfh_atr_.Bry__alt, alt);
attribs.Add_many(Gfh_atr_.Bry__src, url);
boolean link_attribs_is_null = false;
if (!Php_utl_.empty(options.custom_url_link)) {
link_attribs.Clear();
link_attribs.Add_many(Gfh_atr_.Bry__href, options.custom_url_link);
if (!Php_utl_.empty(options.title)) {
link_attribs.Add_many(Gfh_atr_.Bry__title, options.title);
}
if (Php_utl_.empty(options.custom_target_link)) {
link_attribs.Add_many(Gfh_atr_.Bry__target, options.custom_target_link);
}
else if (Php_utl_.empty(options.parser_extlink_target)) {
link_attribs.Add_many(Gfh_atr_.Bry__target, options.parser_extlink_target);
}
if (Php_utl_.empty(options.parser_extlink_rel)) {
link_attribs.Add_many(Gfh_atr_.Bry__rel, options.parser_extlink_rel);
}
}
else if (!Php_utl_.empty(options.custom_title_link)) {
// byte[] title = options.custom_title_link;
// link_attribs.Clear();
// link_attribs.Add_many(Gfh_atr_.Bry__href, title.Get_link_url());
// byte[] options_title = options.title;
// link_attribs.Add_many(Gfh_atr_.Bry__title, Php_utl_.empty(options_title) ? title.Get_full_text() : options_title);
}
else if (!Php_utl_.empty(options.desc_link)) {
// link_attribs = this.getDescLinkAttribs(
// empty(options['title']) ? null : options['title'],
// $query
// );
link_attribs.Clear();
this.getDescLinkAttribs(link_attribs,
Php_utl_.empty(options.title) ? null : options.title,
null);
}
else if (!Php_utl_.empty(options.file_link)) {
// link_attribs.Clear();
// link_attribs.Add_many(Gfh_atr_.Bry__href, file.Get_url());
}
else {
link_attribs_is_null = true;
if (!Php_utl_.empty(options.title)) {
attribs.Add_many(Gfh_atr_.Bry__title, options.title);
}
}
if (!Php_utl_.empty(options.no_dimensions)) {
attribs.Add_many(Gfh_atr_.Bry__width, Int_.To_bry(width));
attribs.Add_many(Gfh_atr_.Bry__height, Int_.To_bry(height));
}
if (!Php_utl_.empty(options.valign)) {
attribs.Add_many(Gfh_atr_.Bry__style, Bry_.Add(Bry__vertical_align, options.valign));
}
if (!Php_utl_.empty(options.img_cls)) {
attribs.Add_many(Gfh_atr_.Bry__class, options.img_cls);
}
if (Php_utl_.isset(options.override_height)) {
attribs.Add_many(Gfh_atr_.Bry__class, options.override_height);
}
if (Php_utl_.isset(options.override_width)) {
attribs.Add_many(Gfh_atr_.Bry__width, options.override_height);
}
// Additional densities for responsive images, if specified.
// If any of these urls is the same as src url, it'll be excluded.
// $responsiveUrls = array_diff(this.responsiveUrls, [ this.url ]);
// if (!Php_utl_.empty($responsiveUrls)) {
// $attribs['srcset'] = Html::srcSet($responsiveUrls);
// }
// XO.MW.HOOK:ThumbnailBeforeProduceHTML
Xomw_xml.Element(tmp, Gfh_tag_.Bry__img, attribs, Bry_.Empty, Bool_.Y);
Link_wrap(bfr, link_attribs_is_null ? null : link_attribs, tmp.To_bry_and_clear());
}
private static final byte[] Bry__vertical_align = Bry_.new_a7("vertical-align: ");
}

View File

@@ -1,611 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.media; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.xowa.mws.filerepo.file.*;
import gplx.xowa.mws.parsers.lnkis.*;
public class Xomw_TransformationalImageHandler extends Xomw_ImageHandler { public Xomw_TransformationalImageHandler(byte[] key) {super(key);
}
/**
* @param File image
* @param array paramsVar Transform parameters. Entries with the keys 'width'
* and 'height' are the respective screen width and height, while the keys
* 'physicalWidth' and 'physicalHeight' indicate the thumbnail dimensions.
* @return boolean
*/
@Override public boolean normaliseParams(Xomw_File image, Xomw_params_handler prms) {
if (!super.normaliseParams(image, prms)) {
return false;
}
// Obtain the source, pre-rotation dimensions
int srcWidth = image.getWidth(prms.page);
int srcHeight = image.getHeight(prms.page);
// Don't make an image bigger than the source
if (prms.physicalWidth >= srcWidth) {
prms.physicalWidth = srcWidth;
prms.physicalHeight = srcHeight;
// Skip scaling limit checks if no scaling is required
// due to requested size being bigger than source.
if (!image.mustRender()) {
return true;
}
}
return true;
}
// /**
// * Extracts the width/height if the image will be scaled before rotating
// *
// * This will match the physical size/aspect ratio of the original image
// * prior to application of the rotation -- so for a portrait image that's
// * stored as raw landscape with 90-degress rotation, the resulting size
// * will be wider than it is tall.
// *
// * @param array paramsVar Parameters as returned by normaliseParams
// * @param int rotation The rotation angle that will be applied
// * @return array (width, height) array
// */
// public function extractPreRotationDimensions(paramsVar, rotation) {
// if (rotation == 90 || rotation == 270) {
// // We'll resize before rotation, so swap the dimensions again
// width = paramsVar['physicalHeight'];
// height = paramsVar['physicalWidth'];
// } else {
// width = paramsVar['physicalWidth'];
// height = paramsVar['physicalHeight'];
// }
//
// return [ width, height ];
// }
//
/**
* Create a thumbnail.
*
* This sets up various parameters, and then calls a helper method
* based on this.getScalerType in order to scale the image.
*
* @param File image
* @param String dstPath
* @param String dstUrl
* @param array paramsVar
* @param int flags
* @return MediaTransformError|ThumbnailImage|TransformParameterError
*/
@Override public Xomw_MediaTransformOutput doTransform(Xomw_File image, byte[] dstPath, byte[] dstUrl, Xomw_params_handler prms, int flags) {
// if (!this.normaliseParams(image, paramsVar)) {
// return new TransformParameterError(paramsVar);
// }
//
// // Create a parameter array to pass to the scaler
Xomw_params_scalar scalerParams = new Xomw_params_scalar();
// // The size to which the image will be resized
scalerParams.physicalWidth = prms.physicalWidth;
scalerParams.physicalHeight = prms.physicalHeight;
// 'physicalDimensions' => "{paramsVar['physicalWidth']}x{paramsVar['physicalHeight']}",
// The size of the image on the page
scalerParams.clientWidth = prms.width;
scalerParams.clientHeight = prms.height;
// Comment as will be added to the Exif of the thumbnail
// 'comment' => isset(paramsVar['descriptionUrl'])
// ? "File source: {paramsVar['descriptionUrl']}"
// : '',
// Properties of the original image
scalerParams.srcWidth = image.getWidth();
scalerParams.srcHeight = image.getHeight();
scalerParams.mimeType = image.getMimeType();
scalerParams.dstPath = dstPath;
scalerParams.dstUrl = dstUrl;
// 'interlace' => isset(paramsVar['interlace']) ? paramsVar['interlace'] : false,
// if (isset(paramsVar['quality']) && paramsVar['quality'] === 'low') {
// scalerParams['quality'] = 30;
// }
// For subclasses that might be paged.
// if (image.isMultipage() && isset(paramsVar['page'])) {
// scalerParams['page'] = intval(paramsVar['page']);
// }
// Determine scaler type
// scaler = this.getScalerType(dstPath);
//
// if (is_array(scaler)) {
// scalerName = get_class(scaler[0]);
// } else {
// scalerName = scaler;
// }
//
// wfDebug(__METHOD__ . ": creating {scalerParams['physicalDimensions']} " .
// "thumbnail at dstPath using scaler scalerName\n");
if (!image.mustRender() &&
scalerParams.physicalWidth == scalerParams.srcWidth
&& scalerParams.physicalHeight == scalerParams.srcHeight
// && !isset(scalerParams['quality'])
) {
// normaliseParams (or the user) wants us to return the unscaled image
// wfDebug(__METHOD__ . ": returning unscaled image\n");
return this.getClientScalingThumbnailImage(image, scalerParams);
}
// if (scaler == 'client') {
// // Client-side image scaling, use the source URL
// // Using the destination URL in a TRANSFORM_LATER request would be incorrect
// return this.getClientScalingThumbnailImage(image, scalerParams);
// }
//
// if (image.isTransformedLocally() && !this.isImageAreaOkForThumbnaling(image, paramsVar)) {
// global wgMaxImageArea;
// return new TransformTooBigImageAreaError(paramsVar, wgMaxImageArea);
// }
//
// if (flags & self::TRANSFORM_LATER) {
// wfDebug(__METHOD__ . ": Transforming later per flags.\n");
// newParams = [
// 'width' => scalerParams['clientWidth'],
// 'height' => scalerParams['clientHeight']
// ];
// if (isset(paramsVar['quality'])) {
// newParams['quality'] = paramsVar['quality'];
// }
// if (isset(paramsVar['page']) && paramsVar['page']) {
// newParams['page'] = paramsVar['page'];
// }
// return new Xomw_ThumbnailImage(image, dstUrl, null, newParams);
return new Xomw_ThumbnailImage(image, dstUrl, null, prms);
// }
//
// // Try to make a target path for the thumbnail
// if (!wfMkdirParents(dirname(dstPath), null, __METHOD__)) {
// wfDebug(__METHOD__ . ": Unable to create thumbnail destination " .
// "directory, falling back to client scaling\n");
//
// return this.getClientScalingThumbnailImage(image, scalerParams);
// }
//
// // Transform functions and binaries need a FS source file
// thumbnailSource = this.getThumbnailSource(image, paramsVar);
//
// // If the source isn't the original, disable EXIF rotation because it's already been applied
// if (scalerParams['srcWidth'] != thumbnailSource['width']
// || scalerParams['srcHeight'] != thumbnailSource['height']) {
// scalerParams['disableRotation'] = true;
// }
//
// scalerParams['srcPath'] = thumbnailSource['path'];
// scalerParams['srcWidth'] = thumbnailSource['width'];
// scalerParams['srcHeight'] = thumbnailSource['height'];
//
// if (scalerParams['srcPath'] === false) { // Failed to get local copy
// wfDebugLog('thumbnail',
// sprintf('Thumbnail failed on %s: could not get local copy of "%s"',
// wfHostname(), image.getName()));
//
// return new MediaTransformError('thumbnail_error',
// scalerParams['clientWidth'], scalerParams['clientHeight'],
// wfMessage('filemissing')
// );
// }
//
// // Try a hook. Called "Bitmap" for historical reasons.
// /** @var mto MediaTransformOutput */
// mto = null;
// Hooks::run('BitmapHandlerTransform', [ this, image, &scalerParams, &mto ]);
// if (!is_null(mto)) {
// wfDebug(__METHOD__ . ": Hook to BitmapHandlerTransform created an mto\n");
// scaler = 'hookaborted';
// }
//
// // scaler will return a MediaTransformError on failure, or false on success.
// // If the scaler is succesful, it will have created a thumbnail at the destination
// // path.
// if (is_array(scaler) && is_callable(scaler)) {
// // Allow subclasses to specify their own rendering methods.
// err = call_user_func(scaler, image, scalerParams);
// } else {
// switch (scaler) {
// case 'hookaborted':
// // Handled by the hook above
// err = mto.isError() ? mto : false;
// break;
// case 'im':
// err = this.transformImageMagick(image, scalerParams);
// break;
// case 'custom':
// err = this.transformCustom(image, scalerParams);
// break;
// case 'imext':
// err = this.transformImageMagickExt(image, scalerParams);
// break;
// case 'gd':
// default:
// err = this.transformGd(image, scalerParams);
// break;
// }
// }
//
// // Remove the file if a zero-byte thumbnail was created, or if there was an error
// removed = this.removeBadFile(dstPath, (boolean)err);
// if (err) {
// // transform returned MediaTransforError
// return err;
// } elseif (removed) {
// // Thumbnail was zero-byte and had to be removed
// return new MediaTransformError('thumbnail_error',
// scalerParams['clientWidth'], scalerParams['clientHeight'],
// wfMessage('unknown-error')
// );
// } elseif (mto) {
// return mto;
// } else {
// newParams = [
// 'width' => scalerParams['clientWidth'],
// 'height' => scalerParams['clientHeight']
// ];
// if (isset(paramsVar['quality'])) {
// newParams['quality'] = paramsVar['quality'];
// }
// if (isset(paramsVar['page']) && paramsVar['page']) {
// newParams['page'] = paramsVar['page'];
// }
// return new ThumbnailImage(image, dstUrl, dstPath, newParams);
// }
// return null;
}
// /**
// * Get the source file for the transform
// *
// * @param File file
// * @param array paramsVar
// * @return array Array with keys width, height and path.
// */
// protected function getThumbnailSource(file, paramsVar) {
// return file.getThumbnailSource(paramsVar);
// }
//
// /**
// * Returns what sort of scaler type should be used.
// *
// * Values can be one of client, im, custom, gd, imext, or an array
// * of Object, method-name to call that specific method.
// *
// * If specifying a custom scaler command with [ Obj, method ],
// * the method in question should take 2 parameters, a File Object,
// * and a scalerParams array with various options (See doTransform
// * for what is in scalerParams). On error it should return a
// * MediaTransformError Object. On success it should return false,
// * and simply make sure the thumbnail file is located at
// * scalerParams['dstPath'].
// *
// * If there is a problem with the output path, it returns "client"
// * to do client side scaling.
// *
// * @param String dstPath
// * @param boolean checkDstPath Check that dstPath is valid
// * @return String|Callable One of client, im, custom, gd, imext, or a Callable array.
// */
// abstract protected function getScalerType(dstPath, checkDstPath = true);
/**
* Get a ThumbnailImage that respresents an image that will be scaled
* client side
*
* @param File image File associated with this thumbnail
* @param array scalerParams Array with scaler paramsVar
* @return ThumbnailImage
*
* @todo FIXME: No rotation support
*/
private Xomw_ThumbnailImage getClientScalingThumbnailImage(Xomw_File image, Xomw_params_scalar scalerParams) {
Xomw_params_handler prms = new Xomw_params_handler();
prms.width = scalerParams.clientWidth;
prms.height = scalerParams.clientHeight;
return new Xomw_ThumbnailImage(image, image.getUrl(), null, prms);
}
// /**
// * Transform an image using ImageMagick
// *
// * This is a stub method. The real method is in BitmapHander.
// *
// * @param File image File associated with this thumbnail
// * @param array paramsVar Array with scaler paramsVar
// *
// * @return MediaTransformError Error Object if error occurred, false (=no error) otherwise
// */
// protected function transformImageMagick(image, paramsVar) {
// return this.getMediaTransformError(paramsVar, "Unimplemented");
// }
//
// /**
// * Transform an image using the Imagick PHP extension
// *
// * This is a stub method. The real method is in BitmapHander.
// *
// * @param File image File associated with this thumbnail
// * @param array paramsVar Array with scaler paramsVar
// *
// * @return MediaTransformError Error Object if error occurred, false (=no error) otherwise
// */
// protected function transformImageMagickExt(image, paramsVar) {
// return this.getMediaTransformError(paramsVar, "Unimplemented");
// }
//
// /**
// * Transform an image using a custom command
// *
// * This is a stub method. The real method is in BitmapHander.
// *
// * @param File image File associated with this thumbnail
// * @param array paramsVar Array with scaler paramsVar
// *
// * @return MediaTransformError Error Object if error occurred, false (=no error) otherwise
// */
// protected function transformCustom(image, paramsVar) {
// return this.getMediaTransformError(paramsVar, "Unimplemented");
// }
//
// /**
// * Get a MediaTransformError with error 'thumbnail_error'
// *
// * @param array paramsVar Parameter array as passed to the transform* functions
// * @param String errMsg Error message
// * @return MediaTransformError
// */
// public function getMediaTransformError(paramsVar, errMsg) {
// return new MediaTransformError('thumbnail_error', paramsVar['clientWidth'],
// paramsVar['clientHeight'], errMsg);
// }
//
// /**
// * Transform an image using the built in GD library
// *
// * This is a stub method. The real method is in BitmapHander.
// *
// * @param File image File associated with this thumbnail
// * @param array paramsVar Array with scaler paramsVar
// *
// * @return MediaTransformError Error Object if error occurred, false (=no error) otherwise
// */
// protected function transformGd(image, paramsVar) {
// return this.getMediaTransformError(paramsVar, "Unimplemented");
// }
//
// /**
// * Escape a String for ImageMagick's property input (e.g. -set -comment)
// * See InterpretImageProperties() in magick/property.c
// * @param String s
// * @return String
// */
// function escapeMagickProperty(s) {
// // Double the backslashes
// s = str_replace('\\', '\\\\', s);
// // Double the percents
// s = str_replace('%', '%%', s);
// // Escape initial - or @
// if (strlen(s) > 0 && (s[0] === '-' || s[0] === '@')) {
// s = '\\' . s;
// }
//
// return s;
// }
//
// /**
// * Escape a String for ImageMagick's input filenames. See ExpandFilenames()
// * and GetPathComponent() in magick/utility.c.
// *
// * This won't work with an initial ~ or @, so input files should be prefixed
// * with the directory name.
// *
// * Glob character unescaping is broken in ImageMagick before 6.6.1-5, but
// * it's broken in a way that doesn't involve trying to convert every file
// * in a directory, so we're better off escaping and waiting for the bugfix
// * to filter down to users.
// *
// * @param String path The file path
// * @param boolean|String scene The scene specification, or false if there is none
// * @throws MWException
// * @return String
// */
// function escapeMagickInput(path, scene = false) {
// // Die on initial metacharacters (caller should prepend path)
// firstChar = substr(path, 0, 1);
// if (firstChar === '~' || firstChar === '@') {
// throw new MWException(__METHOD__ . ': cannot escape this path name');
// }
//
// // Escape glob chars
// path = preg_replace('/[*?\[\]{}]/', '\\\\\0', path);
//
// return this.escapeMagickPath(path, scene);
// }
//
// /**
// * Escape a String for ImageMagick's output filename. See
// * InterpretImageFilename() in magick/image.c.
// * @param String path The file path
// * @param boolean|String scene The scene specification, or false if there is none
// * @return String
// */
// function escapeMagickOutput(path, scene = false) {
// path = str_replace('%', '%%', path);
//
// return this.escapeMagickPath(path, scene);
// }
//
// /**
// * Armour a String against ImageMagick's GetPathComponent(). This is a
// * helper function for escapeMagickInput() and escapeMagickOutput().
// *
// * @param String path The file path
// * @param boolean|String scene The scene specification, or false if there is none
// * @throws MWException
// * @return String
// */
// protected function escapeMagickPath(path, scene = false) {
// // Die on format specifiers (other than drive letters). The regex is
// // meant to match all the formats you get from "convert -list format"
// if (preg_match('/^([a-zA-Z0-9-]+):/', path, m)) {
// if (wfIsWindows() && is_dir(m[0])) {
// // OK, it's a drive letter
// // ImageMagick has a similar exception, see IsMagickConflict()
// } else {
// throw new MWException(__METHOD__ . ': unexpected colon character in path name');
// }
// }
//
// // If there are square brackets, add a do-nothing scene specification
// // to force a literal interpretation
// if (scene === false) {
// if (strpos(path, '[') !== false) {
// path .= '[0--1]';
// }
// } else {
// path .= "[scene]";
// }
//
// return path;
// }
//
// /**
// * Retrieve the version of the installed ImageMagick
// * You can use PHPs version_compare() to use this value
// * Value is cached for one hour.
// * @return String|boolean Representing the IM version; false on error
// */
// protected function getMagickVersion() {
// cache = MediaWikiServices::getInstance().getLocalServerObjectCache();
// method = __METHOD__;
// return cache.getWithSetCallback(
// 'imagemagick-version',
// cache::TTL_HOUR,
// function () use (method) {
// global wgImageMagickConvertCommand;
//
// cmd = wfEscapeShellArg(wgImageMagickConvertCommand) . ' -version';
// wfDebug(method . ": Running convert -version\n");
// retval = '';
// return = wfShellExec(cmd, retval);
// x = preg_match(
// '/Version: ImageMagick ([0-9]*\.[0-9]*\.[0-9]*)/', return, matches
// );
// if (x != 1) {
// wfDebug(method . ": ImageMagick version check failed\n");
// return false;
// }
//
// return matches[1];
// }
// );
// }
//
// /**
// * Returns whether the current scaler supports rotation.
// *
// * @since 1.24 No longer static
// * @return boolean
// */
// public function canRotate() {
// return false;
// }
//
// /**
// * Should we automatically rotate an image based on exif
// *
// * @since 1.24 No longer static
// * @see wgEnableAutoRotation
// * @return boolean Whether auto rotation is enabled
// */
// public function autoRotateEnabled() {
// return false;
// }
//
// /**
// * Rotate a thumbnail.
// *
// * This is a stub. See BitmapHandler::rotate.
// *
// * @param File file
// * @param array paramsVar Rotate parameters.
// * 'rotation' clockwise rotation in degrees, allowed are multiples of 90
// * @since 1.24 Is non-static. From 1.21 it was static
// * @return boolean|MediaTransformError
// */
// public function rotate(file, paramsVar) {
// return new MediaTransformError('thumbnail_error', 0, 0,
// get_class(this) . ' rotation not implemented');
// }
//
// /**
// * Returns whether the file needs to be rendered. Returns true if the
// * file requires rotation and we are able to rotate it.
// *
// * @param File file
// * @return boolean
// */
// public function mustRender(file) {
// return this.canRotate() && this.getRotation(file) != 0;
// }
//
// /**
// * Check if the file is smaller than the maximum image area for thumbnailing.
// *
// * Runs the 'BitmapHandlerCheckImageArea' hook.
// *
// * @param File file
// * @param array paramsVar
// * @return boolean
// * @since 1.25
// */
// public function isImageAreaOkForThumbnaling(file, &paramsVar) {
// global wgMaxImageArea;
//
// // For historical reasons, hook starts with BitmapHandler
// checkImageAreaHookResult = null;
// Hooks::run(
// 'BitmapHandlerCheckImageArea',
// [ file, &paramsVar, &checkImageAreaHookResult ]
// );
//
// if (!is_null(checkImageAreaHookResult)) {
// // was set by hook, so return that value
// return (boolean)checkImageAreaHookResult;
// }
//
// srcWidth = file.getWidth(paramsVar['page']);
// srcHeight = file.getHeight(paramsVar['page']);
//
// if (srcWidth * srcHeight > wgMaxImageArea
// && !(file.getMimeType() == 'image/jpeg'
// && this.getScalerType(false, false) == 'im')
// ) {
// // Only ImageMagick can efficiently downsize jpg images without loading
// // the entire file in memory
// return false;
// }
// return true;
// }
}

View File

@@ -1,584 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.core.btries.*;
import gplx.langs.htmls.*;
import gplx.xowa.mws.utls.*;
public class Xomw_block_level_pass {
private final Bry_bfr tmp = Bry_bfr_.New();
private final Btrie_rv trv = new Btrie_rv();
private boolean in_pre, dt_open;
private int last_section;
private byte[] find_colon_no_links__before, find_colon_no_links__after;
public void Do_block_levels(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr, boolean line_start) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
pbfr.Switch();
// XO.STATIC
if (block_chars_ary == null) {
synchronized (Type_adp_.ClassOf_obj(this)) {
block_chars_ary = Block_chars_ary__new();
open_match_trie = Btrie_slim_mgr.ci_a7().Add_many_str
("<table", "<h1", "<h2", "<h3", "<h4", "<h5", "<h6", "<pre", "<tr", "<p", "<ul", "<ol", "<dl", "<li", "</tr", "</td", "</th");
close_match_trie = Btrie_slim_mgr.ci_a7().Add_many_str
( "</table", "</h1", "</h2", "</h3", "</h4", "</h5", "</h6", "<td", "<th", "<blockquote", "</blockquote", "<div", "</div", "<hr"
, "</pre", "</p", "</mw:", Xomw_strip_state.Str__marker_bgn + "-pre", "</li", "</ul", "</ol", "</dl", "<center", "</center");
blockquote_trie = Btrie_slim_mgr.ci_a7().Add_many_str("<blockquote", "</blockquote");
pre_trie = Btrie_slim_mgr.ci_a7().Add_str_int("<pre", Pre__bgn).Add_str_int("</pre", Pre__end);
}
}
// Parsing through the text line by line. The main thing
// happening here is handling of block-level elements p, pre,
// and making lists from lines starting with * # : etc.
byte[] last_prefix = Bry_.Empty;
bfr.Clear();
this.dt_open = false;
boolean in_block_elem = false;
int prefix_len = 0;
byte para_stack = Para_stack__none;
boolean in_blockquote = false;
this.in_pre = false;
this.last_section = Last_section__none;
byte[] prefix2 = null;
// PORTED.SPLIT: $textLines = StringUtils::explode("\n", $text);
int line_bgn = src_bgn;
while (line_bgn < src_end) {
int line_end = Bry_find_.Find_fwd(src, Byte_ascii.Nl, line_bgn);
if (line_end == Bry_find_.Not_found)
line_end = src_end;
// Fix up line_start
if (!line_start) {
bfr.Add_mid(src, line_bgn, line_end);
line_start = true;
continue;
}
// * = ul
// # = ol
// ; = dt
// : = dd
int last_prefix_len = last_prefix.length;
// PORTED: pre_close_match = preg_match('/<\\/pre/i', $oLine); pre_open_match = preg_match('/<pre/i', $oLine);
int pre_cur = line_bgn;
boolean pre_close_match = false;
boolean pre_open_match = false;
while (true) {
if (pre_cur >= line_end)
break;
Object o = pre_trie.Match_at(trv, src, pre_cur, line_end);
if (o == null)
pre_cur++;
else {
int pre_tid = (int)o;
if (pre_tid == Pre__bgn)
pre_open_match = true;
else if (pre_tid == Pre__end)
pre_close_match = true;
pre_cur = trv.Pos();
}
}
byte[] prefix = null, t = null;
// If not in a <pre> element, scan for and figure out what prefixes are there.
if (!in_pre) {
// Multiple prefixes may abut each other for nested lists.
prefix_len = Php_str_.Strspn_fwd__ary(src, block_chars_ary, line_bgn, line_end, line_end); // strspn($oLine, '*#:;');
prefix = Php_str_.Substr(src, line_bgn, prefix_len);
// eh?
// ; and : are both from definition-lists, so they're equivalent
// for the purposes of determining whether or not we need to open/close
// elements.
// substr( $inputLine, $prefixLength );
prefix2 = Bry_.Replace(prefix, Byte_ascii.Semic, Byte_ascii.Colon);
t = Bry_.Mid(src, line_bgn + prefix_len, line_end);
in_pre = pre_open_match;
}
else {
// Don't interpret any other prefixes in preformatted text
prefix_len = 0;
prefix = prefix2 = Bry_.Empty;
t = Bry_.Mid(src, line_bgn, line_end);
}
// List generation
byte[] term = null, t2 = null;
int common_prefix_len = -1;
if (prefix_len > 0 && Bry_.Eq(last_prefix, prefix2)) {
// Same as the last item, so no need to deal with nesting or opening stuff
bfr.Add(Next_item(Php_str_.Substr_byte(prefix, -1)));
para_stack = Para_stack__none;
if (prefix_len > 0 && prefix[prefix_len - 1] == Byte_ascii.Semic) {
// The one nasty exception: definition lists work like this:
// ; title : definition text
// So we check for : in the remainder text to split up the
// title and definition, without b0rking links.
term = t2 = Bry_.Empty;
if (Find_colon_no_links(t, term, t2) != Bry_find_.Not_found) {
term = find_colon_no_links__before;
t2 = find_colon_no_links__after;
t = t2;
bfr.Add(term).Add(Next_item(Byte_ascii.Colon));
}
}
}
else if (prefix_len > 0 || last_prefix_len > 0) {
// We need to open or close prefixes, or both.
// Either open or close a level...
common_prefix_len = Get_common(prefix, last_prefix);
para_stack = Para_stack__none;
// Close all the prefixes which aren't shared.
while (common_prefix_len < last_prefix_len) {
bfr.Add(Close_list(last_prefix[last_prefix_len - 1]));
last_prefix_len--;
}
// Continue the current prefix if appropriate.
if (prefix_len <= common_prefix_len && common_prefix_len > 0) {
bfr.Add(Next_item(prefix[common_prefix_len - 1]));
}
// Open prefixes where appropriate.
if (Bry_.Len_gt_0(last_prefix) && prefix_len > common_prefix_len) {
bfr.Add_byte_nl();
}
while (prefix_len > common_prefix_len) {
byte c = Php_str_.Substr_byte(prefix, common_prefix_len, 1);
bfr.Add(Open_list(c));
if (c == Byte_ascii.Semic) {
// @todo FIXME: This is dupe of code above
if (Find_colon_no_links(t, term, t2) != Bry_find_.Not_found) {
term = find_colon_no_links__before;
t2 = find_colon_no_links__after;
t = t2;
bfr.Add(term).Add(Next_item(Byte_ascii.Colon));
}
}
++common_prefix_len;
}
if (prefix_len == 0 && Bry_.Len_gt_0(last_prefix)) {
bfr.Add_byte_nl();
}
last_prefix = prefix2;
}
// If we have no prefixes, go to paragraph mode.
if (0 == prefix_len) {
// No prefix (not in list)--go to paragraph mode
// XXX: use a stack for nestable elements like span, table and div
int t_len = t.length;
boolean open_match = Php_preg_.Match(open_match_trie, trv, t, 0, t_len) != null;
boolean close_match = Php_preg_.Match(close_match_trie, trv, t, 0, t_len) != null;
if (open_match || close_match) {
para_stack = Para_stack__none;
// @todo bug 5718: paragraph closed
bfr.Add(Close_paragraph());
if (pre_open_match && !pre_close_match) {
in_pre = true;
}
int bq_offset = 0;
// PORTED:preg_match('/<(\\/?)blockquote[\s>]/i', t, $bqMatch, PREG_OFFSET_CAPTURE, $bq_offset)
while (true) {
Object o = Php_preg_.Match(blockquote_trie, trv, t, bq_offset, t_len);
if (o == null) { // no more blockquotes found; exit
break;
}
else {
byte[] bq_bry = (byte[])o;
in_blockquote = bq_bry[1] != Byte_ascii.Slash; // is this a close tag?
bq_offset = trv.Pos();
}
}
in_block_elem = !close_match;
}
else if (!in_block_elem && !in_pre) {
if ( Php_str_.Substr_byte(t, 0) == Byte_ascii.Space
&& (last_section == Last_section__pre || Bry_.Trim(t) != Bry_.Empty)
&& !in_blockquote
) {
// pre
if (last_section != Last_section__pre) {
para_stack = Para_stack__none;
bfr.Add(Close_paragraph()).Add(Gfh_tag_.Pre_lhs);
last_section = Last_section__pre;
}
t = Bry_.Mid(t, 1);
}
else {
// paragraph
if (Bry_.Trim(t) == Bry_.Empty) {
if (para_stack != Para_stack__none) {
Para_stack_bfr(bfr, para_stack);
bfr.Add_str_a7("<br />");
para_stack = Para_stack__none;
last_section = Last_section__para;
}
else {
if (last_section != Last_section__para) {
bfr.Add(Close_paragraph());
last_section = Last_section__none;
para_stack = Para_stack__bgn;
}
else {
para_stack = Para_stack__mid;
}
}
}
else {
if (para_stack != Para_stack__none) {
Para_stack_bfr(bfr, para_stack);
para_stack = Para_stack__none;
last_section = Last_section__para;
}
else if (last_section != Last_section__para) {
bfr.Add(Close_paragraph()).Add(Gfh_tag_.P_lhs);
this.last_section = Last_section__para;
}
}
}
}
}
// somewhere above we forget to get out of pre block (bug 785)
if (pre_close_match && in_pre) {
in_pre = false;
}
if (para_stack == Para_stack__none) {
bfr.Add(t);
if (prefix_len == 0) {
bfr.Add_byte_nl();
}
}
line_bgn = line_end + 1;
}
while (prefix_len > 0) {
bfr.Add(Close_list(prefix2[prefix_len - 1]));
prefix_len--;
if (prefix_len > 0) {
bfr.Add_byte_nl();
}
}
if (last_section != Last_section__none) {
bfr.Add(last_section == Last_section__para ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
last_section = Last_section__none;
}
}
// If a pre or p is open, return the corresponding close tag and update
// the state. If no tag is open, return an empty String.
public byte[] Close_paragraph() {
byte[] result = Bry_.Empty;
if (last_section != Last_section__none) {
tmp.Add(last_section == Last_section__para ? Gfh_tag_.P_rhs : Gfh_tag_.Pre_rhs);
result = tmp.Add_byte_nl().To_bry_and_clear();
}
in_pre = false;
last_section = Last_section__none;
return result;
}
// getCommon() returns the length of the longest common substring
// of both arguments, starting at the beginning of both.
private int Get_common(byte[] st1, byte[] st2) {
int st1_len = st1.length, st2_len = st2.length;
int shorter = st1_len < st2_len ? st1_len : st2_len;
int i;
for (i = 0; i < shorter; i++) {
if (st1[i] != st2[i]) {
break;
}
}
return i;
}
// Open the list item element identified by the prefix character.
private byte[] Open_list(byte c) {
byte[] result = Close_paragraph();
if (c == Byte_ascii.Star)
result = tmp.Add(result).Add_str_a7("<ul><li>").To_bry_and_clear();
else if (c == Byte_ascii.Hash)
result = tmp.Add(result).Add_str_a7("<ol><li>").To_bry_and_clear();
else if (c == Byte_ascii.Hash)
result = tmp.Add(result).Add_str_a7("<dl><dd>").To_bry_and_clear();
else if (c == Byte_ascii.Semic) {
result = tmp.Add(result).Add_str_a7("<dl><dt>").To_bry_and_clear();
dt_open = true;
}
else
result = tmp.Add_str_a7("<!-- ERR 1 -->").To_bry_and_clear();
return result;
}
// Close the current list item and open the next one.
private byte[] Next_item(byte c) {
if (c == Byte_ascii.Star || c == Byte_ascii.Hash) {
return tmp.Add_str_a7("</li>\n<li>").To_bry_and_clear();
}
else if (c == Byte_ascii.Colon || c == Byte_ascii.Semic) {
byte[] close = tmp.Add_str_a7("</dd>\n").To_bry_and_clear();
if (dt_open) {
close = tmp.Add_str_a7("</dt>\n").To_bry_and_clear();
}
if (c == Byte_ascii.Semic) {
dt_open = true;
return tmp.Add(close).Add_str_a7("<dt>").To_bry_and_clear();
}
else {
dt_open = false;
return tmp.Add(close).Add_str_a7("<dd>").To_bry_and_clear();
}
}
return tmp.Add_str_a7("<!-- ERR 2 -->").To_bry_and_clear();
}
// Close the current list item identified by the prefix character.
private byte[] Close_list(byte c) {
byte[] text = null;
if (c == Byte_ascii.Star) {
text = Bry_.new_a7("</li></ul>");
}
else if (c == Byte_ascii.Hash) {
text = Bry_.new_a7("</li></ol>");
}
else if (c == Byte_ascii.Colon) {
if (dt_open) {
dt_open = false;
text = Bry_.new_a7("</dt></dl>");
}
else {
text = Bry_.new_a7("</dd></dl>");
}
}
else {
return Bry_.new_a7("<!-- ERR 3 -->");
}
return text;
}
// Split up a String on ':', ignoring any occurrences inside tags
// to prevent illegal overlapping.
private int Find_colon_no_links(byte[] str, byte[] before, byte[] after) {
int len = str.length;
int colon_pos = Php_str_.Strpos(str, Byte_ascii.Colon, 0, len);
if (colon_pos == Bry_find_.Not_found) {
// Nothing to find!
return Bry_find_.Not_found;
}
int lt_pos = Php_str_.Strpos(str, Byte_ascii.Angle_bgn, 0, len);
if (lt_pos == Bry_find_.Not_found || lt_pos > colon_pos) {
// Easy; no tag nesting to worry about
find_colon_no_links__before = Php_str_.Substr(str, 0, colon_pos);
find_colon_no_links__after = Php_str_.Substr(str, colon_pos + 1);
return colon_pos;
}
// Ugly state machine to walk through avoiding tags.
int state = COLON_STATE_TEXT;
int level = 0;
for (int i = 0; i < len; i++) {
byte c = str[i];
switch (state) {
case COLON_STATE_TEXT:
switch (c) {
case Byte_ascii.Angle_bgn:
// Could be either a <start> tag or an </end> tag
state = COLON_STATE_TAGSTART;
break;
case Byte_ascii.Colon:
if (level == 0) {
// We found it!
find_colon_no_links__before = Php_str_.Substr(str, 0, i);
find_colon_no_links__after = Php_str_.Substr(str, i + 1);
return i;
}
// Embedded in a tag; don't break it.
break;
default:
// Skip ahead looking for something interesting
colon_pos = Php_str_.Strpos(str, Byte_ascii.Colon, i, len);
if (colon_pos == Bry_find_.Not_found) {
// Nothing else interesting
return Bry_find_.Not_found;
}
lt_pos = Php_str_.Strpos(str, Byte_ascii.Angle_bgn, i, len);
if (level == 0) {
if (lt_pos == Bry_find_.Not_found || colon_pos < lt_pos) {
// We found it!
find_colon_no_links__before = Php_str_.Substr(str, 0, colon_pos);
find_colon_no_links__after = Php_str_.Substr(str, colon_pos + 1);
return i;
}
}
if (lt_pos == Bry_find_.Not_found) {
// Nothing else interesting to find; abort!
// We're nested, but there's no close tags left. Abort!
i = len; // break 2
break;
}
// Skip ahead to next tag start
i = lt_pos;
state = COLON_STATE_TAGSTART;
break;
}
break;
case COLON_STATE_TAG:
// In a <tag>
switch (c) {
case Byte_ascii.Angle_bgn:
level++;
state = COLON_STATE_TEXT;
break;
case Byte_ascii.Slash:
// Slash may be followed by >?
state = COLON_STATE_TAGSLASH;
break;
default:
// ignore
break;
}
break;
case COLON_STATE_TAGSTART:
switch (c) {
case Byte_ascii.Slash:
state = COLON_STATE_CLOSETAG;
break;
case Byte_ascii.Bang:
state = COLON_STATE_COMMENT;
break;
case Byte_ascii.Angle_bgn:
// Illegal early close? This shouldn't happen D:
state = COLON_STATE_TEXT;
break;
default:
state = COLON_STATE_TAG;
break;
}
break;
case COLON_STATE_CLOSETAG:
// In a </tag>
if (c == Byte_ascii.Angle_bgn) {
level--;
if (level < 0) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; too many close tags");
return Bry_find_.Not_found;
}
state = COLON_STATE_TEXT;
}
break;
case COLON_STATE_TAGSLASH:
if (c == Byte_ascii.Angle_bgn) {
// Yes, a self-closed tag <blah/>
state = COLON_STATE_TEXT;
}
else {
// Probably we're jumping the gun, and this is an attribute
state = COLON_STATE_TAG;
}
break;
case COLON_STATE_COMMENT:
if (c == Byte_ascii.Dash) {
state = COLON_STATE_COMMENTDASH;
}
break;
case COLON_STATE_COMMENTDASH:
if (c == Byte_ascii.Dash) {
state = COLON_STATE_COMMENTDASHDASH;
}
else {
state = COLON_STATE_COMMENT;
}
break;
case COLON_STATE_COMMENTDASHDASH:
if (c == Byte_ascii.Angle_bgn) {
state = COLON_STATE_TEXT;
}
else {
state = COLON_STATE_COMMENT;
}
break;
default:
throw Err_.new_wo_type("State machine error");
}
}
if (level > 0) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "Invalid input; not enough close tags (level ~{0}, state ~{1})", level, state);
return Bry_find_.Not_found;
}
return Bry_find_.Not_found;
}
private static final int
COLON_STATE_TEXT = 0
, COLON_STATE_TAG = 1
, COLON_STATE_TAGSTART = 2
, COLON_STATE_CLOSETAG = 3
, COLON_STATE_TAGSLASH = 4
, COLON_STATE_COMMENT = 5
, COLON_STATE_COMMENTDASH = 6
, COLON_STATE_COMMENTDASHDASH = 7
;
private static final byte
Last_section__none = 0 // ''
, Last_section__para = 1 // p
, Last_section__pre = 2 // pre
;
private static final byte
Para_stack__none = 0 // false
, Para_stack__bgn = 1 // <p>
, Para_stack__mid = 2 // </p><p>
;
private static final int Pre__bgn = 0, Pre__end = 1;
private static Btrie_slim_mgr pre_trie;
private static boolean[] block_chars_ary;
private static boolean[] Block_chars_ary__new() {
boolean[] rv = new boolean[256];
rv[Byte_ascii.Star] = true;
rv[Byte_ascii.Hash] = true;
rv[Byte_ascii.Colon] = true;
rv[Byte_ascii.Semic] = true;
return rv;
}
private static Btrie_slim_mgr open_match_trie, close_match_trie, blockquote_trie;
private static void Para_stack_bfr(Bry_bfr bfr, int id) {
switch (id) {
case Para_stack__bgn: bfr.Add_str_a7("<p>"); break;
case Para_stack__mid: bfr.Add_str_a7("</p><p>"); break;
default: throw Err_.new_unhandled_default(id);
}
}
}

View File

@@ -1,42 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import org.junit.*; import gplx.core.tests.*;
import gplx.xowa.mws.linkers.*;
public class Xomw_block_level_pass__tst {
private final Xomw_block_level_pass__fxt fxt = new Xomw_block_level_pass__fxt();
@Test public void Basic() {
fxt.Test__do_block_levels(String_.Concat_lines_nl_skip_last
( "a"
), String_.Concat_lines_nl_skip_last
( "<p>a"
, "</p>"
));
}
}
class Xomw_block_level_pass__fxt {
private final Xomw_block_level_pass block_level_pass = new Xomw_block_level_pass();
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
private boolean apos = true;
public void Test__do_block_levels(String src, String expd) {
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
block_level_pass.Do_block_levels(pctx, pbfr.Init(Bry_.new_u8(src)), true);
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear());
}
}

View File

@@ -1,251 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.langs.htmls.*;
import gplx.xowa.mws.*;
import gplx.xowa.mws.htmls.*;
import gplx.xowa.mws.linkers.*;
public class Xomw_link_holders {
private final Xomw_link_renderer link_renderer;
private final Bry_bfr tmp;
private int link_id = 0; // MOVED:Parser.php
private final Xomw_link_holder_list internals = new Xomw_link_holder_list();
private final Xomw_atr_mgr extra_atrs = new Xomw_atr_mgr();
private final Xomw_qry_mgr query = new Xomw_qry_mgr();
public Xomw_link_holders(Xomw_link_renderer link_renderer, Bry_bfr tmp) {
this.link_renderer = link_renderer;
this.tmp = tmp;
}
public void Clear() {
internals.Clear();
link_id = 0;
}
public void Make_holder(Bry_bfr bfr, Xoa_ttl nt, byte[] text, byte[][] query, byte[] trail, byte[] prefix) {
if (nt == null) {
// Fail gracefully
bfr.Add_str_a7("<!-- ERROR -->").Add(prefix).Add(text).Add(trail);
}
else {
// Separate the link trail from the rest of the link
// list( $inside, $trail ) = Linker::splitTrail( $trail );
byte[] inside = Bry_.Empty;
Xomw_link_holder_item entry = new Xomw_link_holder_item(nt, tmp.Add_bry_many(prefix, text, inside).To_bry_and_clear(), query);
boolean is_external = false; // $nt->isExternal()
if (is_external) {
// Use a globally unique ID to keep the objects mergable
// $key = $this->parent->nextLinkID();
// $this->interwikis[$key] = $entry;
// $retVal = "<!--IWLINK $key-->{$trail}";
}
else {
int key = link_id++;
internals.Add(key, entry);
bfr.Add(Bry__link__bgn).Add_int_variable(key).Add(Gfh_tag_.Comm_end).Add(trail); // "<!--LINK $ns:$key-->{$trail}";
}
}
}
public void Test__add(Xoa_ttl ttl, byte[] capt) {
int key = link_id++;
Xomw_link_holder_item item = new Xomw_link_holder_item(ttl, capt, Bry_.Ary_empty);
internals.Add(key, item);
}
public void Replace(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
this.Replace_internal(pbfr);
// $this->replaceInterwiki( $text );
}
private void Replace_internal(Xomw_parser_bfr pbfr) {
if (internals.Len() == 0)
return;
// $colours = [];
// $linkCache = LinkCache::singleton();
// $output = $this->parent->getOutput();
// $linkRenderer = $this->parent->getLinkRenderer();
// $linkcolour_ids = [];
// SKIP:Replace_internals does db lookup to identify redlinks;
// Construct search and replace arrays
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
pbfr.Switch();
int cur = src_bgn;
int prv = 0;
while (true) {
int link_bgn = Bry_find_.Find_fwd(src, Bry__link__bgn, cur, src_end);
if (link_bgn == Bry_find_.Not_found) {
bfr.Add_mid(src, prv, src_end);
break;
}
int key_bgn = link_bgn + Bry__link__bgn.length;
int key_end = Bry_find_.Find_fwd_while_num(src, key_bgn, src_end);
int link_key = Bry_.To_int_or(src, key_bgn, key_end, -1);
Xomw_link_holder_item item = internals.Get_by(link_key);
// $pdbk = $entry['pdbk'];
// $title = $entry['title'];
// $query = isset( $entry['query'] ) ? $entry['query'] : [];
// $key = "$ns:$index";
// $searchkey = "<!--LINK $key-->";
// $displayText = $entry['text'];
// if ( isset( $entry['selflink'] ) ) {
// $replacePairs[$searchkey] = Linker::makeSelfLinkObj( $title, $displayText, $query );
// continue;
// }
// if ( $displayText === '' ) {
// $displayText = null;
// } else {
// $displayText = new HtmlArmor( $displayText );
// }
// if ( !isset( $colours[$pdbk] ) ) {
// $colours[$pdbk] = 'new';
// }
// $attribs = [];
// if ( $colours[$pdbk] == 'new' ) {
// $linkCache->addBadLinkObj( $title );
// $output->addLink( $title, 0 );
// $link = $linkRenderer->makeBrokenLink(
// $title, $displayText, $attribs, $query
// );
// } else {
// $link = $linkRenderer->makePreloadedLink(
// $title, $displayText, $colours[$pdbk], $attribs, $query
// );
// }
bfr.Add_mid(src, prv, link_bgn);
link_renderer.Make_preloaded_link(bfr, item.Title(), item.Text(), Bry_.Empty, extra_atrs, query.Clear());
cur = key_end + Gfh_tag_.Comm_end_len;
prv = cur;
}
}
// private void Replace_internal__db() {
// // Generate query
// $lb = new LinkBatch();
// $lb->setCaller( __METHOD__ );
//
// foreach ( $this->internals as $ns => $entries ) {
// foreach ( $entries as $entry ) {
// /** @var Title $title */
// $title = $entry['title'];
// $pdbk = $entry['pdbk'];
//
// # Skip invalid entries.
// # Result will be ugly, but prevents crash.
// if ( is_null( $title ) ) {
// continue;
// }
//
// # Check if it's a static known link, e.g. interwiki
// if ( $title->isAlwaysKnown() ) {
// $colours[$pdbk] = '';
// } elseif ( $ns == NS_SPECIAL ) {
// $colours[$pdbk] = 'new';
// } else {
// $id = $linkCache->getGoodLinkID( $pdbk );
// if ( $id != 0 ) {
// $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
// $output->addLink( $title, $id );
// $linkcolour_ids[$id] = $pdbk;
// } elseif ( $linkCache->isBadLink( $pdbk ) ) {
// $colours[$pdbk] = 'new';
// } else {
// # Not in the link cache, add it to the query
// $lb->addObj( $title );
// }
// }
// }
// }
// if ( !$lb->isEmpty() ) {
// $fields = array_merge(
// LinkCache::getSelectFields(),
// [ 'page_namespace', 'page_title' ]
// );
//
// $res = $dbr->select(
// 'page',
// $fields,
// $lb->constructSet( 'page', $dbr ),
// __METHOD__
// );
//
// # Fetch data and form into an associative array
// # non-existent = broken
// foreach ( $res as $s ) {
// $title = Title::makeTitle( $s->page_namespace, $s->page_title );
// $pdbk = $title->getPrefixedDBkey();
// $linkCache->addGoodLinkObjFromRow( $title, $s );
// $output->addLink( $title, $s->page_id );
// $colours[$pdbk] = $linkRenderer->getLinkClasses( $title );
// // add id to the extension todolist
// $linkcolour_ids[$s->page_id] = $pdbk;
// }
// unset( $res );
// }
// if ( count( $linkcolour_ids ) ) {
// // pass an array of page_ids to an extension
// Hooks::run( 'GetLinkColours', [ $linkcolour_ids, &$colours ] );
// }
//
// # Do a second query for different language variants of links and categories
// if ( $wgContLang->hasVariants() ) {
// $this->doVariants( $colours );
// }
// }
private static final byte[] Bry__link__bgn = Bry_.new_a7("<!--LINK ");
}
class Xomw_link_holder_list {
private int ary_len = 0, ary_max = 128;
private Xomw_link_holder_item[] ary = new Xomw_link_holder_item[128];
public int Len() {return ary_len;}
public void Clear() {
ary_len = 0;
if (ary_max > 128)
ary = new Xomw_link_holder_item[128];
}
public void Add(int key, Xomw_link_holder_item item) {
if (key >= ary_max) {
int new_max = ary_max * 2;
ary = (Xomw_link_holder_item[])Array_.Resize(ary, new_max);
ary_max = new_max;
}
ary[key] = item;
ary_len++;
}
public Xomw_link_holder_item Get_by(int key) {return ary[key];}
}
class Xomw_link_holder_item {
public Xomw_link_holder_item(Xoa_ttl title, byte[] text, byte[][] query) {
this.title = title;
this.text = text;
this.query = query;
}
public Xoa_ttl Title() {return title;} private final Xoa_ttl title;
public byte[] Text() {return text;} private final byte[] text;
public byte[] Pdbk() {return title.Get_prefixed_db_key();}
public byte[][] Query() {return query;} private final byte[][] query;
}

View File

@@ -1,45 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import org.junit.*; import gplx.core.tests.*;
import gplx.xowa.mws.linkers.*;
public class Xomw_link_holders__tst {
private final Xomw_link_holders__fxt fxt = new Xomw_link_holders__fxt();
@Test public void Replace__basic() {
fxt.Init__add("A", "a");
fxt.Test__replace("a <!--LINK 0--> b", "a <a href='/wiki/A' title='A'>a</a> b");
}
}
class Xomw_link_holders__fxt {
private final Xomw_link_holders holders = new Xomw_link_holders(new Xomw_link_renderer(new Xomw_sanitizer()), Bry_bfr_.New());
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
private final Xowe_wiki wiki;
private boolean apos = true;
public Xomw_link_holders__fxt() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
this.wiki = Xoa_app_fxt.Make__wiki__edit(app);
}
public void Init__add(String ttl, String capt) {
holders.Test__add(wiki.Ttl_parse(Bry_.new_u8(ttl)), Bry_.new_u8(capt));
}
public void Test__replace(String src, String expd) {
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
holders.Replace(new Xomw_parser_ctx(), pbfr.Init(Bry_.new_u8(src)));
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear());
}
}

View File

@@ -1,27 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
public class Xomw_output_type {
public static final byte
Tid__html = 1 // like parse()
, Tid__wiki = 2 // like preSaveTransform()
, Tid__preprocess = 3 // like preprocess()
, Tid__msg = 3
, Tid__plain = 4 // like extractSections() - portions of the original are returned unchanged.
;
}

View File

@@ -1,299 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.core.btries.*; import gplx.core.net.*;
import gplx.xowa.mws.parsers.prepros.*; import gplx.xowa.mws.parsers.headings.*;
import gplx.xowa.mws.parsers.quotes.*; import gplx.xowa.mws.parsers.tables.*; import gplx.xowa.mws.parsers.hrs.*; import gplx.xowa.mws.parsers.nbsps.*;
import gplx.xowa.mws.parsers.lnkes.*; import gplx.xowa.mws.parsers.lnkis.*; import gplx.xowa.mws.parsers.magiclinks.*; import gplx.xowa.mws.parsers.doubleunders.*;
import gplx.xowa.mws.utls.*; import gplx.xowa.mws.linkers.*;
import gplx.xowa.mws.htmls.*;
public class Xomw_parser {
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
private final Xomw_table_wkr table_wkr;
private final Xomw_hr_wkr hr_wkr = new Xomw_hr_wkr();
private final Xomw_lnke_wkr lnke_wkr;
private final Xomw_nbsp_wkr nbsp_wkr = new Xomw_nbsp_wkr();
private final Xomw_block_level_pass block_wkr = new Xomw_block_level_pass();
private final Xomw_heading_wkr heading_wkr = new Xomw_heading_wkr();
private final Xomw_magiclinks_wkr magiclinks_wkr;
private final Xomw_doubleunder_wkr doubleunder_wkr = new Xomw_doubleunder_wkr();
private final Xomw_link_renderer link_renderer;
private final Xomw_link_holders holders;
private final Xomw_heading_cbk__html heading_wkr_cbk;
private final Btrie_slim_mgr protocols_trie;
private final Xomw_doubleunder_data doubleunder_data = new Xomw_doubleunder_data();
private static Xomw_regex_space regex_space;
private static Xomw_regex_boundary regex_boundary;
private static Xomw_regex_url regex_url;
private final Btrie_rv trv = new Btrie_rv();
private int marker_index = 0;
// private final Xomw_prepro_wkr prepro_wkr = new Xomw_prepro_wkr();
public Xomw_parser_env Env() {return env;} private final Xomw_parser_env env = new Xomw_parser_env();
public Xomw_parser_options Options() {return options;} private final Xomw_parser_options options = new Xomw_parser_options();
public Xomw_strip_state Strip_state() {return strip_state;} private final Xomw_strip_state strip_state = new Xomw_strip_state();
public Xomw_sanitizer Sanitizer() {return sanitizer;} private final Xomw_sanitizer sanitizer = new Xomw_sanitizer();
public Xomw_linker Linker() {return linker;} private final Xomw_linker linker;
public Bry_bfr Tmp() {return tmp;} private final Bry_bfr tmp = Bry_bfr_.New();
public Xomw_quote_wkr Quote_wkr() {return quote_wkr;} private final Xomw_quote_wkr quote_wkr;
public Xomw_lnki_wkr Lnki_wkr() {return lnki_wkr;} private final Xomw_lnki_wkr lnki_wkr;
public boolean Output_type__wiki() {return output_type__wiki;} private final boolean output_type__wiki = false;
public Xomw_parser() {
if (regex_space == null) {
synchronized (Type_adp_.ClassOf_obj(this)) {
regex_space = new Xomw_regex_space();
regex_boundary = new Xomw_regex_boundary(regex_space);
regex_url = new Xomw_regex_url(regex_space);
Atr__rel = Bry_.new_a7("rel");
Get_external_link_rel = Bry_.new_a7("nofollow");
}
}
this.link_renderer = new Xomw_link_renderer(sanitizer);
this.linker = new Xomw_linker(link_renderer);
this.protocols_trie = Xomw_parser.Protocols__dflt();
this.holders = new Xomw_link_holders(link_renderer, tmp);
this.table_wkr = new Xomw_table_wkr(this);
this.quote_wkr = new Xomw_quote_wkr(this);
this.lnke_wkr = new Xomw_lnke_wkr(this);
this.lnki_wkr = new Xomw_lnki_wkr(this, holders, link_renderer, protocols_trie);
this.heading_wkr_cbk = new Xomw_heading_cbk__html();
this.magiclinks_wkr = new Xomw_magiclinks_wkr(this, sanitizer, linker, regex_boundary, regex_url);
}
public void Init_by_wiki(Xowe_wiki wiki) {
linker.Init_by_wiki(env, wiki.Lang().Lnki_trail_mgr().Trie());
lnke_wkr.Init_by_wiki(protocols_trie, regex_url, regex_space);
lnki_wkr.Init_by_wiki(env, wiki);
doubleunder_wkr.Init_by_wiki(doubleunder_data, wiki.Lang());
magiclinks_wkr.Init_by_wiki();
}
public void Init_by_page(Xoa_ttl ttl) {
pctx.Init_by_page(ttl);
}
public void Internal_parse(Xomw_parser_bfr pbfr, byte[] text) {
pbfr.Init(text);
// $origText = text;
// MW.HOOK:ParserBeforeInternalParse
// if ($frame) {
// use frame depth to infer how include/noinclude tags should be handled
// depth=0 means this is the top-level document; otherwise it's an included document
// boolean for_inclusion = false;
// if (!$frame->depth) {
// $flag = 0;
// } else {
// $flag = Parser::PTD_FOR_INCLUSION;
// }
// text = prepro_wkr.Preprocess_to_xml(text, for_inclusion);
// text = $frame->expand($dom);
// } else {
// // if $frame is not provided, then use old-style replaceVariables
// text = $this->replaceVariables(text);
// }
// MW.HOOK:InternalParseBeforeSanitize
// text = Sanitizer::removeHTMLtags(
// text,
// [ &$this, 'attributeStripCallback' ],
// false,
// array_keys($this->mTransparentTagHooks),
// [],
// [ &$this, 'addTrackingCategory' ]
// );
// MW.HOOK:InternalParseBeforeLinks
// Tables need to come after variable replacement for things to work
// properly; putting them before other transformations should keep
// exciting things like link expansions from showing up in surprising
// places.
table_wkr.Do_table_stuff(pctx, pbfr);
hr_wkr.Replace_hrs(pctx, pbfr);
doubleunder_wkr.Do_double_underscore(pctx, pbfr); // DONE: DATE:2017-01-27
heading_wkr.Do_headings(pctx, pbfr, heading_wkr_cbk);
lnki_wkr.Replace_internal_links(pctx, pbfr);
quote_wkr.Do_all_quotes(pctx, pbfr);
lnke_wkr.Replace_external_links(pctx, pbfr);
// replaceInternalLinks may sometimes leave behind
// absolute URLs, which have to be masked to hide them from replaceExternalLinks
Xomw_parser_bfr_.Replace(pbfr, Bry__marker__noparse, Bry_.Empty);
magiclinks_wkr.Do_magic_links(pctx, pbfr);
// $text = $this->formatHeadings($text, $origText, $isMain);
}
public void Internal_parse_half_parsed(Xomw_parser_bfr pbfr, boolean is_main, boolean line_start) {
strip_state.Unstrip_general(pbfr);
// MW.HOOK:ParserAfterUnstrip
// Clean up special characters, only run once, next-to-last before doBlockLevels
nbsp_wkr.Do_nbsp(pctx, pbfr);
block_wkr.Do_block_levels(pctx, pbfr, line_start);
lnki_wkr.Replace_link_holders(pctx, pbfr);
// The input doesn't get language converted if
// a) It's disabled
// b) Content isn't converted
// c) It's a conversion table
// d) it is an interface message (which is in the user language)
// if ( !( $this->mOptions->getDisableContentConversion()
// || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
// ) {
// if ( !$this->mOptions->getInterfaceMessage() ) {
// // The position of the convert() call should not be changed. it
// // assumes that the links are all replaced and the only thing left
// // is the <nowiki> mark.
// $text = $this->getConverterLanguage()->convert( $text );
// }
// }
strip_state.Unstrip_nowiki(pbfr);
// MW.HOOK:ParserBeforeTidy
// $text = $this->replaceTransparentTags( $text );
strip_state.Unstrip_general(pbfr);
sanitizer.Normalize_char_references(pbfr);
// if ( MWTidy::isEnabled() ) {
// if ( $this->mOptions->getTidy() ) {
// $text = MWTidy::tidy( $text );
// }
// }
// else {
// attempt to sanitize at least some nesting problems
// (T4702 and quite a few others)
// $tidyregs = [
// // ''Something [http://www.cool.com cool''] -->
// // <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
// '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
// '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
// // fix up an anchor inside another anchor, only
// // at least for a single single nested link (T5695)
// '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
// '\\1\\2</a>\\3</a>\\1\\4</a>',
// // fix div inside inline elements- doBlockLevels won't wrap a line which
// // contains a div, so fix it up here; replace
// // div with escaped text
// '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
// '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
// // remove empty italic or bold tag pairs, some
// // introduced by rules above
// '/<([bi])><\/\\1>/' => '',
// ];
// $text = preg_replace(
// array_keys( $tidyregs ),
// array_values( $tidyregs ),
// $text );
// }
// MW.HOOK:ParserAfterTidy
}
public byte[] Armor_links(Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
// PORTED:preg_replace( '/\b((?i)' . $this->mUrlProtocols . ')/', self::MARKER_PREFIX . "NOPARSE$1", $text )
int cur = src_bgn;
int prv = cur;
boolean dirty = false;
boolean called_by_bry = trg == null;
while (true) {
// exit if EOS
if (cur == src_end) {
// if dirty, add rest of String
if (dirty)
trg.Add_mid(src, prv, src_end);
break;
}
// check if cur matches protocol
Object protocol_obj = protocols_trie.Match_at(trv, src, cur, src_end);
// no match; continue
if (protocol_obj == null) {
cur++;
}
// match; add to bfr
else {
dirty = true;
byte[] protocol_bry = (byte[])protocol_obj;
if (called_by_bry) trg = Bry_bfr_.New();
trg.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__noparse, protocol_bry);
cur += protocol_bry.length;
prv = cur;
}
}
if (called_by_bry) {
if (dirty)
return trg.To_bry_and_clear();
else {
if (src_bgn == 0 && src_end == src.length)
return src;
else
return Bry_.Mid(src, src_bgn, src_end);
}
}
else {
if (dirty)
return null;
else {
trg.Add_mid(src, src_bgn, src_end);
return null;
}
}
}
public byte[] Insert_strip_item(byte[] text) {
tmp.Add_bry_many(Xomw_strip_state.Bry__marker__bgn, Bry__strip_state_item);
tmp.Add_int_variable(marker_index);
tmp.Add(Xomw_strip_state.Bry__marker__end);
byte[] marker = tmp.To_bry_and_clear();
marker_index++;
strip_state.Add_general(marker, text);
return marker;
}
public Xomw_atr_mgr Get_external_link_attribs(Xomw_atr_mgr atrs) {
atrs.Clear();
byte[] rel = Get_external_link_rel;
// XO.MW.UNSUPPORTED: XO will assume target is blank; MW will set target of "_blank", "_self", etc. depending on global opt
// $target = $this->mOptions->getExternalLinkTarget();
atrs.Add(Atr__rel, rel);
return atrs;
}
// XO.MW.UNSUPPORTED: XO will always assume "nofollow"; MW will return "nofollow" if (a) ns is in ns-exception list or (b) domain is in domain-exception list;
// if ($wgNoFollowLinks && !in_array($ns, $wgNoFollowNsExceptions) && !wfMatchesDomainList($url, $wgNoFollowDomainExceptions)
public byte[] Get_external_link_rel;
private static byte[] Atr__rel;
private static final byte[] Bry__strip_state_item = Bry_.new_a7("-item-"), Bry__noparse = Bry_.new_a7("NOPARSE");
private static final byte[] Bry__marker__noparse = Bry_.Add(Xomw_strip_state.Bry__marker__bgn, Bry__noparse);
public static Btrie_slim_mgr Protocols__dflt() {
Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
Gfo_protocol_itm[] ary = Gfo_protocol_itm.Ary();
for (Gfo_protocol_itm itm : ary) {
byte[] key = itm.Text_bry(); // EX: "https://"
rv.Add_obj(key, key);
}
byte[] bry__relative = Bry_.new_a7("//");
rv.Add_obj(bry__relative, bry__relative); // REF.MW: "$this->mUrlProtocols = wfUrlProtocols();"; "wfUrlProtocols( $includeProtocolRelative = true )"
return rv;
}
}

View File

@@ -1,77 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import org.junit.*;
public class Xomw_parser__tst {
private final Xomw_parser__fxt fxt = new Xomw_parser__fxt();
@Test public void Basic() {
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "== heading_1 =="
, "para_1"
, "== heading_2 =="
, "para_2"
, "-----"
, "{|"
, "|-"
, "|a"
, "|}"
, "''italics''"
, "__TOC__"
, "[https://a.org b]"
, "[[A|abc]]"
, "https://c.org"
, "a »b« &#160;!important c"
), String_.Concat_lines_nl_skip_last
( "<h2> heading_1 </h2>"
, "<p>para_1"
, "</p>"
, "<h2> heading_2 </h2>"
, "<p>para_2"
, "</p>"
, "<hr />"
, "<table>"
, ""
, "<tr>"
, "<td>a"
, "</td></tr></table>"
, "<p><i>italics</i>"
, "<!--MWTOC-->"
, "<a rel=\"nofollow\" class=\"external text\" href=\"https://a.org\">b</a>"
, "<a href=\"/wiki/A\" title=\"A\">abc</a>"
, "<a rel=\"nofollow\" class=\"external free\" href=\"https://c.org\">https://c.org</a>"
, "a&#160;»b«&#160; !important c"
, "</p>"
));
}
}
class Xomw_parser__fxt {
private final Xomw_parser mgr = new Xomw_parser();
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
public Xomw_parser__fxt() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
mgr.Init_by_wiki(wiki);
mgr.Init_by_page(wiki.Ttl_parse(Bry_.new_a7("Page_1")));
}
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
mgr.Internal_parse(pbfr, src_bry);
mgr.Internal_parse_half_parsed(pbfr, true, true);
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
}

View File

@@ -1,48 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
public class Xomw_parser_bfr { // manages 2 bfrs to eliminate multiple calls to new memory allocations ("return bfr.To_bry_and_clear()")
private final Bry_bfr bfr_1 = Bry_bfr_.New(), bfr_2 = Bry_bfr_.New();
private Bry_bfr src, trg;
public Xomw_parser_bfr() {
this.src = bfr_1;
this.trg = bfr_2;
}
public Bry_bfr Src() {return src;}
public Bry_bfr Trg() {return trg;}
public Bry_bfr Rslt() {return src;}
public Xomw_parser_bfr Init(byte[] text) {
// resize each bfr once by guessing that html_len = text_len * 2
int text_len = text.length;
int html_len = text_len * 2;
src.Resize(html_len);
trg.Resize(html_len);
// clear and add
src.Clear();
trg.Clear();
src.Add(text);
return this;
}
public void Switch() {
Bry_bfr tmp = src;
this.src = trg;
this.trg = tmp;
trg.Clear();
}
}

View File

@@ -1,69 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
public class Xomw_parser_bfr_ {
public static void Replace(Xomw_parser_bfr pbfr, byte[] find, byte[] repl) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
if (Replace(bfr, Bool_.N, src, src_bgn, src_end, find, repl) != null)
pbfr.Switch();
}
private static byte[] Replace(Bry_bfr bfr, boolean lone_bfr, byte[] src, int src_bgn, int src_end, byte[] find, byte[] repl) {
boolean dirty = false;
int cur = src_bgn;
boolean called_by_bry = bfr == null;
while (true) {
int find_bgn = Bry_find_.Find_fwd(src, find, cur);
if (find_bgn == Bry_find_.Not_found) {
if (dirty)
bfr.Add_mid(src, cur, src_end);
break;
}
if (called_by_bry) bfr = Bry_bfr_.New();
bfr.Add_mid(src, cur, find_bgn);
cur += find.length;
dirty = true;
}
if (dirty) {
if (called_by_bry)
return bfr.To_bry_and_clear();
else
return Bry_.Empty;
}
else {
if (called_by_bry) {
if (src_bgn == 0 && src_end == src.length)
return src;
else
return Bry_.Mid(src, src_bgn, src_end);
}
else {
if (lone_bfr)
bfr.Add_mid(src, src_bgn, src_end);
return null;
}
}
}
}

View File

@@ -1,32 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.xowa.mws.parsers.lnkis.*;
public class Xomw_parser_ctx {
public Xoa_ttl Page_title() {return page_title;} private Xoa_ttl page_title;
public Xomw_image_params Lnki_wkr__make_image__img_params = new Xomw_image_params();
public byte[][] Lnki_wkr__make_image__match_magic_word = new byte[2][];
public int[] Lnki_wkr__make_image__img_size = new int[2];
public Xomw_params_mto Linker__makeImageLink__prms = new Xomw_params_mto();
public void Init_by_page(Xoa_ttl page_title) {
this.page_title = page_title;
}
public static final int Pos__bos = -1;
}

View File

@@ -1,34 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.xowa.mws.filerepo.file.*; import gplx.xowa.mws.media.*;
public class Xomw_parser_env {
public byte[] Lang__align_end = Bry_.new_a7("right");
public int User__default__thumbsize = 220;
public int Global__wgSVGMaxSize = 5120;
public double Global__wgThumbUpright = .75d;
public int[] Global__wgThumbLimits = new int[] {120, 150, 180, 200, 250, 300};
public Xomw_MagicWordMgr Magic_word_mgr() {return magic_word_mgr;} private final Xomw_MagicWordMgr magic_word_mgr = new Xomw_MagicWordMgr();
public Xomw_message_mgr Message_mgr() {return message_mgr;} private final Xomw_message_mgr message_mgr = new Xomw_message_mgr();
public Xomw_file_finder File_finder() {return file_finder;} private Xomw_file_finder file_finder = new Xomw_file_finder__noop();
public Xomw_MediaHandlerFactory MediaHandlerFactory() {return mediaHandlerFactory;} private final Xomw_MediaHandlerFactory mediaHandlerFactory = new Xomw_MediaHandlerFactory();
public Xomw_parser_env File_finder_(Xomw_file_finder v) {file_finder = v; return this;}
}

View File

@@ -1,933 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
public class Xomw_parser_options {
public Xomw_parser_options() {
this.mThumbSize = 220;
}
// /**
// * Interlanguage links are removed and returned in an array
// */
// private $mInterwikiMagic;
//
// /**
// * Allow external images inline?
// */
// private $mAllowExternalImages;
//
// /**
// * If not, any exception?
// */
// private $mAllowExternalImagesFrom;
//
// /**
// * If not or it doesn't match, should we check an on-wiki whitelist?
// */
// private $mEnableImageWhitelist;
//
// /**
// * Date format index
// */
// private $mDateFormat = null;
//
// /**
// * Create "edit section" links?
// */
// private $mEditSection = true;
//
// /**
// * Allow inclusion of special pages?
// */
// private $mAllowSpecialInclusion;
//
// /**
// * Use tidy to cleanup output HTML?
// */
// private $mTidy = false;
//
// /**
// * Which lang to call for PLURAL and GRAMMAR
// */
// private $mInterfaceMessage = false;
//
// /**
// * Overrides $mInterfaceMessage with arbitrary language
// */
// private $mTargetLanguage = null;
//
// /**
// * Maximum size of template expansions, in bytes
// */
// private $mMaxIncludeSize;
//
// /**
// * Maximum number of nodes touched by PPFrame::expand()
// */
// private $mMaxPPNodeCount;
//
// /**
// * Maximum number of nodes generated by Preprocessor::preprocessToObj()
// */
// private $mMaxGeneratedPPNodeCount;
//
// /**
// * Maximum recursion depth in PPFrame::expand()
// */
// private $mMaxPPExpandDepth;
//
// /**
// * Maximum recursion depth for templates within templates
// */
// private $mMaxTemplateDepth;
//
// /**
// * Maximum number of calls per parse to expensive parser functions
// */
// private $mExpensiveParserFunctionLimit;
//
// /**
// * Remove HTML comments. ONLY APPLIES TO PREPROCESS OPERATIONS
// */
// private $mRemoveComments = true;
//
// /**
// * @var callable Callback for current revision fetching; first argument to call_user_func().
// */
// private $mCurrentRevisionCallback =
// [ 'Parser', 'statelessFetchRevision' ];
//
// /**
// * @var callable Callback for template fetching; first argument to call_user_func().
// */
// private $mTemplateCallback =
// [ 'Parser', 'statelessFetchTemplate' ];
//
// /**
// * @var callable|null Callback to generate a guess for {{REVISIONID}}
// */
// private $mSpeculativeRevIdCallback;
//
// /**
// * Enable limit report in an HTML comment on output
// */
// private $mEnableLimitReport = false;
//
// /**
// * Timestamp used for {{CURRENTDAY}} etc.
// */
// private $mTimestamp;
//
// /**
// * Target attribute for external links
// */
// private $mExternalLinkTarget;
//
// /**
// * Clean up signature texts?
// * @see Parser::cleanSig
// */
// private $mCleanSignatures;
//
// /**
// * Transform wiki markup when saving the page?
// */
// private $mPreSaveTransform = true;
//
// /**
// * Whether content conversion should be disabled
// */
// private $mDisableContentConversion;
//
// /**
// * Whether title conversion should be disabled
// */
// private $mDisableTitleConversion;
//
// /**
// * Automatically number headings?
// */
// private $mNumberHeadings;
/**
* Thumb size preferred by the user.
*/
private int mThumbSize;
// /**
// * Maximum article size of an article to be marked as "stub"
// */
// private $mStubThreshold;
//
// /**
// * Language Object of the User language.
// */
// private $mUserLang;
//
// /**
// * @var User
// * Stored user Object
// */
// private $mUser;
//
// /**
// * Parsing the page for a "preview" operation?
// */
// private $mIsPreview = false;
//
// /**
// * Parsing the page for a "preview" operation on a single section?
// */
// private $mIsSectionPreview = false;
//
// /**
// * Parsing the printable version of the page?
// */
// private $mIsPrintable = false;
//
// /**
// * Extra key that should be present in the caching key.
// */
// private $mExtraKey = '';
//
// /**
// * Are magic ISBN links enabled?
// */
// private $mMagicISBNLinks = true;
//
// /**
// * Are magic PMID links enabled?
// */
// private $mMagicPMIDLinks = true;
//
// /**
// * Are magic RFC links enabled?
// */
// private $mMagicRFCLinks = true;
//
// /**
// * Function to be called when an option is accessed.
// */
// private $onAccessCallback = null;
//
// /**
// * If the page being parsed is a redirect, this should hold the redirect
// * target.
// * @var Title|null
// */
// private $redirectTarget = null;
//
// public function getInterwikiMagic() {
// return this.mInterwikiMagic;
// }
//
// public function getAllowExternalImages() {
// return this.mAllowExternalImages;
// }
//
// public function getAllowExternalImagesFrom() {
// return this.mAllowExternalImagesFrom;
// }
//
// public function getEnableImageWhitelist() {
// return this.mEnableImageWhitelist;
// }
//
// public function getEditSection() {
// return this.mEditSection;
// }
//
// public function getNumberHeadings() {
// this.optionUsed( 'numberheadings' );
//
// return this.mNumberHeadings;
// }
//
// public function getAllowSpecialInclusion() {
// return this.mAllowSpecialInclusion;
// }
//
// public function getTidy() {
// return this.mTidy;
// }
//
// public function getInterfaceMessage() {
// return this.mInterfaceMessage;
// }
//
// public function getTargetLanguage() {
// return this.mTargetLanguage;
// }
//
// public function getMaxIncludeSize() {
// return this.mMaxIncludeSize;
// }
//
// public function getMaxPPNodeCount() {
// return this.mMaxPPNodeCount;
// }
//
// public function getMaxGeneratedPPNodeCount() {
// return this.mMaxGeneratedPPNodeCount;
// }
//
// public function getMaxPPExpandDepth() {
// return this.mMaxPPExpandDepth;
// }
//
// public function getMaxTemplateDepth() {
// return this.mMaxTemplateDepth;
// }
//
// /* @since 1.20 */
// public function getExpensiveParserFunctionLimit() {
// return this.mExpensiveParserFunctionLimit;
// }
//
// public function getRemoveComments() {
// return this.mRemoveComments;
// }
//
// /* @since 1.24 */
// public function getCurrentRevisionCallback() {
// return this.mCurrentRevisionCallback;
// }
//
// public function getTemplateCallback() {
// return this.mTemplateCallback;
// }
//
// /** @since 1.28 */
// public function getSpeculativeRevIdCallback() {
// return this.mSpeculativeRevIdCallback;
// }
//
// public function getEnableLimitReport() {
// return this.mEnableLimitReport;
// }
//
// public function getCleanSignatures() {
// return this.mCleanSignatures;
// }
//
// public function getExternalLinkTarget() {
// return this.mExternalLinkTarget;
// }
//
// public function getDisableContentConversion() {
// return this.mDisableContentConversion;
// }
//
// public function getDisableTitleConversion() {
// return this.mDisableTitleConversion;
// }
public int getThumbSize() {
// this.optionUsed( 'thumbsize' );
return this.mThumbSize;
}
// public function getStubThreshold() {
// this.optionUsed( 'stubthreshold' );
//
// return this.mStubThreshold;
// }
//
// public function getIsPreview() {
// return this.mIsPreview;
// }
//
// public function getIsSectionPreview() {
// return this.mIsSectionPreview;
// }
//
// public function getIsPrintable() {
// this.optionUsed( 'printable' );
//
// return this.mIsPrintable;
// }
//
// public function getUser() {
// return this.mUser;
// }
//
// public function getPreSaveTransform() {
// return this.mPreSaveTransform;
// }
//
// public function getDateFormat() {
// this.optionUsed( 'dateformat' );
// if ( !isset( this.mDateFormat ) ) {
// this.mDateFormat = this.mUser->getDatePreference();
// }
// return this.mDateFormat;
// }
//
// public function getTimestamp() {
// if ( !isset( this.mTimestamp ) ) {
// this.mTimestamp = wfTimestampNow();
// }
// return this.mTimestamp;
// }
//
// /**
// * Get the user language used by the parser for this page and split the parser cache.
// *
// * @warning: Calling this causes the parser cache to be fragmented by user language!
// * To avoid cache fragmentation, output should not depend on the user language.
// * Use Parser::getFunctionLang() or Parser::getTargetLanguage() instead!
// *
// * @note This function will trigger a cache fragmentation by recording the
// * 'userlang' option, see optionUsed(). This is done to avoid cache pollution
// * when the page is rendered based on the language of the user.
// *
// * @note When saving, this will return the default language instead of the user's.
// * {{int: }} uses this which used to produce inconsistent link tables (bug 14404).
// *
// * @return Language
// * @since 1.19
// */
// public function getUserLangObj() {
// this.optionUsed( 'userlang' );
// return this.mUserLang;
// }
//
// /**
// * Same as getUserLangObj() but returns a String instead.
// *
// * @warning: Calling this causes the parser cache to be fragmented by user language!
// * To avoid cache fragmentation, output should not depend on the user language.
// * Use Parser::getFunctionLang() or Parser::getTargetLanguage() instead!
// *
// * @see getUserLangObj()
// *
// * @return String Language code
// * @since 1.17
// */
// public function getUserLang() {
// return this.getUserLangObj()->getCode();
// }
//
// /**
// * @since 1.28
// * @return boolean
// */
// public function getMagicISBNLinks() {
// return this.mMagicISBNLinks;
// }
//
// /**
// * @since 1.28
// * @return boolean
// */
// public function getMagicPMIDLinks() {
// return this.mMagicPMIDLinks;
// }
// /**
// * @since 1.28
// * @return boolean
// */
// public function getMagicRFCLinks() {
// return this.mMagicRFCLinks;
// }
// public function setInterwikiMagic( $x ) {
// return wfSetVar( this.mInterwikiMagic, $x );
// }
//
// public function setAllowExternalImages( $x ) {
// return wfSetVar( this.mAllowExternalImages, $x );
// }
//
// public function setAllowExternalImagesFrom( $x ) {
// return wfSetVar( this.mAllowExternalImagesFrom, $x );
// }
//
// public function setEnableImageWhitelist( $x ) {
// return wfSetVar( this.mEnableImageWhitelist, $x );
// }
//
// public function setDateFormat( $x ) {
// return wfSetVar( this.mDateFormat, $x );
// }
//
// public function setEditSection( $x ) {
// return wfSetVar( this.mEditSection, $x );
// }
//
// public function setNumberHeadings( $x ) {
// return wfSetVar( this.mNumberHeadings, $x );
// }
//
// public function setAllowSpecialInclusion( $x ) {
// return wfSetVar( this.mAllowSpecialInclusion, $x );
// }
//
// public function setTidy( $x ) {
// return wfSetVar( this.mTidy, $x );
// }
//
// public function setInterfaceMessage( $x ) {
// return wfSetVar( this.mInterfaceMessage, $x );
// }
//
// public function setTargetLanguage( $x ) {
// return wfSetVar( this.mTargetLanguage, $x, true );
// }
//
// public function setMaxIncludeSize( $x ) {
// return wfSetVar( this.mMaxIncludeSize, $x );
// }
//
// public function setMaxPPNodeCount( $x ) {
// return wfSetVar( this.mMaxPPNodeCount, $x );
// }
//
// public function setMaxGeneratedPPNodeCount( $x ) {
// return wfSetVar( this.mMaxGeneratedPPNodeCount, $x );
// }
//
// public function setMaxTemplateDepth( $x ) {
// return wfSetVar( this.mMaxTemplateDepth, $x );
// }
//
// /* @since 1.20 */
// public function setExpensiveParserFunctionLimit( $x ) {
// return wfSetVar( this.mExpensiveParserFunctionLimit, $x );
// }
//
// public function setRemoveComments( $x ) {
// return wfSetVar( this.mRemoveComments, $x );
// }
//
// /* @since 1.24 */
// public function setCurrentRevisionCallback( $x ) {
// return wfSetVar( this.mCurrentRevisionCallback, $x );
// }
//
// /** @since 1.28 */
// public function setSpeculativeRevIdCallback( $x ) {
// return wfSetVar( this.mSpeculativeRevIdCallback, $x );
// }
//
// public function setTemplateCallback( $x ) {
// return wfSetVar( this.mTemplateCallback, $x );
// }
//
// public function enableLimitReport( $x = true ) {
// return wfSetVar( this.mEnableLimitReport, $x );
// }
//
// public function setTimestamp( $x ) {
// return wfSetVar( this.mTimestamp, $x );
// }
//
// public function setCleanSignatures( $x ) {
// return wfSetVar( this.mCleanSignatures, $x );
// }
//
// public function setExternalLinkTarget( $x ) {
// return wfSetVar( this.mExternalLinkTarget, $x );
// }
//
// public function disableContentConversion( $x = true ) {
// return wfSetVar( this.mDisableContentConversion, $x );
// }
//
// public function disableTitleConversion( $x = true ) {
// return wfSetVar( this.mDisableTitleConversion, $x );
// }
//
// public function setUserLang( $x ) {
// if ( is_string( $x ) ) {
// $x = Language::factory( $x );
// }
//
// return wfSetVar( this.mUserLang, $x );
// }
//
// public function setThumbSize( $x ) {
// return wfSetVar( this.mThumbSize, $x );
// }
//
// public function setStubThreshold( $x ) {
// return wfSetVar( this.mStubThreshold, $x );
// }
//
// public function setPreSaveTransform( $x ) {
// return wfSetVar( this.mPreSaveTransform, $x );
// }
//
// public function setIsPreview( $x ) {
// return wfSetVar( this.mIsPreview, $x );
// }
//
// public function setIsSectionPreview( $x ) {
// return wfSetVar( this.mIsSectionPreview, $x );
// }
//
// public function setIsPrintable( $x ) {
// return wfSetVar( this.mIsPrintable, $x );
// }
//
// /**
// * Set the redirect target.
// *
// * Note that setting or changing this does not *make* the page a redirect
// * or change its target, it merely records the information for reference
// * during the parse.
// *
// * @since 1.24
// * @param Title|null $title
// */
// function setRedirectTarget( $title ) {
// this.redirectTarget = $title;
// }
//
// /**
// * Get the previously-set redirect target.
// *
// * @since 1.24
// * @return Title|null
// */
// function getRedirectTarget() {
// return this.redirectTarget;
// }
//
// /**
// * Extra key that should be present in the parser cache key.
// * @param String $key
// */
// public function addExtraKey( $key ) {
// this.mExtraKey .= '!' . $key;
// }
//
// /**
// * Constructor
// * @param User $user
// * @param Language $lang
// */
// public function __construct( $user = null, $lang = null ) {
// if ( $user === null ) {
// global $wgUser;
// if ( $wgUser === null ) {
// $user = new User;
// } else {
// $user = $wgUser;
// }
// }
// if ( $lang === null ) {
// global $wgLang;
// if ( !StubObject::isRealObject( $wgLang ) ) {
// $wgLang->_unstub();
// }
// $lang = $wgLang;
// }
// this.initialiseFromUser( $user, $lang );
// }
//
// /**
// * Get a ParserOptions Object for an anonymous user
// * @since 1.27
// * @return ParserOptions
// */
// public static function newFromAnon() {
// global $wgContLang;
// return new ParserOptions( new User, $wgContLang );
// }
//
// /**
// * Get a ParserOptions Object from a given user.
// * Language will be taken from $wgLang.
// *
// * @param User $user
// * @return ParserOptions
// */
// public static function newFromUser( $user ) {
// return new ParserOptions( $user );
// }
//
// /**
// * Get a ParserOptions Object from a given user and language
// *
// * @param User $user
// * @param Language $lang
// * @return ParserOptions
// */
// public static function newFromUserAndLang( User $user, Language $lang ) {
// return new ParserOptions( $user, $lang );
// }
//
// /**
// * Get a ParserOptions Object from a IContextSource Object
// *
// * @param IContextSource $context
// * @return ParserOptions
// */
// public static function newFromContext( IContextSource $context ) {
// return new ParserOptions( $context->getUser(), $context->getLanguage() );
// }
//
// /**
// * Get user options
// *
// * @param User $user
// * @param Language $lang
// */
// private function initialiseFromUser( $user, $lang ) {
// global $wgInterwikiMagic, $wgAllowExternalImages,
// $wgAllowExternalImagesFrom, $wgEnableImageWhitelist, $wgAllowSpecialInclusion,
// $wgMaxArticleSize, $wgMaxPPNodeCount, $wgMaxTemplateDepth, $wgMaxPPExpandDepth,
// $wgCleanSignatures, $wgExternalLinkTarget, $wgExpensiveParserFunctionLimit,
// $wgMaxGeneratedPPNodeCount, $wgDisableLangConversion, $wgDisableTitleConversion,
// $wgEnableMagicLinks;
//
// // *UPDATE* ParserOptions::matches() if any of this changes as needed
// this.mInterwikiMagic = $wgInterwikiMagic;
// this.mAllowExternalImages = $wgAllowExternalImages;
// this.mAllowExternalImagesFrom = $wgAllowExternalImagesFrom;
// this.mEnableImageWhitelist = $wgEnableImageWhitelist;
// this.mAllowSpecialInclusion = $wgAllowSpecialInclusion;
// this.mMaxIncludeSize = $wgMaxArticleSize * 1024;
// this.mMaxPPNodeCount = $wgMaxPPNodeCount;
// this.mMaxGeneratedPPNodeCount = $wgMaxGeneratedPPNodeCount;
// this.mMaxPPExpandDepth = $wgMaxPPExpandDepth;
// this.mMaxTemplateDepth = $wgMaxTemplateDepth;
// this.mExpensiveParserFunctionLimit = $wgExpensiveParserFunctionLimit;
// this.mCleanSignatures = $wgCleanSignatures;
// this.mExternalLinkTarget = $wgExternalLinkTarget;
// this.mDisableContentConversion = $wgDisableLangConversion;
// this.mDisableTitleConversion = $wgDisableLangConversion || $wgDisableTitleConversion;
// this.mMagicISBNLinks = $wgEnableMagicLinks['ISBN'];
// this.mMagicPMIDLinks = $wgEnableMagicLinks['PMID'];
// this.mMagicRFCLinks = $wgEnableMagicLinks['RFC'];
//
// this.mUser = $user;
// this.mNumberHeadings = $user->getOption( 'numberheadings' );
// this.mThumbSize = $user->getOption( 'thumbsize' );
// this.mStubThreshold = $user->getStubThreshold();
// this.mUserLang = $lang;
// }
//
// /**
// * Check if these options match that of another options set
// *
// * This ignores report limit settings that only affect HTML comments
// *
// * @param ParserOptions $other
// * @return boolean
// * @since 1.25
// */
// public function matches( ParserOptions $other ) {
// $fields = array_keys( get_class_vars( __CLASS__ ) );
// $fields = array_diff( $fields, [
// 'mEnableLimitReport', // only effects HTML comments
// 'onAccessCallback', // only used for ParserOutput option tracking
// ] );
// foreach ( $fields as $field ) {
// if ( !is_object( this.$field ) && this.$field !== $other->$field ) {
// return false;
// }
// }
// // Check the Object and lazy-loaded options
// return (
// this.mUserLang->equals( $other->mUserLang ) &&
// this.getDateFormat() === $other->getDateFormat()
// );
// }
//
// /**
// * Registers a callback for tracking which ParserOptions which are used.
// * This is a private API with the parser.
// * @param callable $callback
// */
// public function registerWatcher( $callback ) {
// this.onAccessCallback = $callback;
// }
//
// /**
// * Called when an option is accessed.
// * Calls the watcher that was set using registerWatcher().
// * Typically, the watcher callback is ParserOutput::registerOption().
// * The information registered that way will be used by ParserCache::save().
// *
// * @param String $optionName Name of the option
// */
// public function optionUsed( $optionName ) {
// if ( this.onAccessCallback ) {
// call_user_func( this.onAccessCallback, $optionName );
// }
// }
//
// /**
// * Returns the full array of options that would have been used by
// * in 1.16.
// * Used to get the old parser cache entries when available.
// * @return array
// */
// public static function legacyOptions() {
// return [
// 'stubthreshold',
// 'numberheadings',
// 'userlang',
// 'thumbsize',
// 'editsection',
// 'printable'
// ];
// }
//
// /**
// * Generate a hash String with the values set on these ParserOptions
// * for the keys given in the array.
// * This will be used as part of the hash key for the parser cache,
// * so users sharing the options with vary for the same page share
// * the same cached data safely.
// *
// * Extensions which require it should install 'PageRenderingHash' hook,
// * which will give them a chance to modify this key based on their own
// * settings.
// *
// * @since 1.17
// * @param array $forOptions
// * @param Title $title Used to get the content language of the page (since r97636)
// * @return String Page rendering hash
// */
// public function optionsHash( $forOptions, $title = null ) {
// global $wgRenderHashAppend;
//
// // FIXME: Once the cache key is reorganized this argument
// // can be dropped. It was used when the math extension was
// // part of core.
// $confstr = '*';
//
// // Space assigned for the stubthreshold but unused
// // since it disables the parser cache, its value will always
// // be 0 when this function is called by parsercache.
// if ( in_array( 'stubthreshold', $forOptions ) ) {
// $confstr .= '!' . this.mStubThreshold;
// } else {
// $confstr .= '!*';
// }
//
// if ( in_array( 'dateformat', $forOptions ) ) {
// $confstr .= '!' . this.getDateFormat();
// }
//
// if ( in_array( 'numberheadings', $forOptions ) ) {
// $confstr .= '!' . ( this.mNumberHeadings ? '1' : '' );
// } else {
// $confstr .= '!*';
// }
//
// if ( in_array( 'userlang', $forOptions ) ) {
// $confstr .= '!' . this.mUserLang->getCode();
// } else {
// $confstr .= '!*';
// }
//
// if ( in_array( 'thumbsize', $forOptions ) ) {
// $confstr .= '!' . this.mThumbSize;
// } else {
// $confstr .= '!*';
// }
//
// // add in language specific options, if any
// // @todo FIXME: This is just a way of retrieving the url/user preferred variant
// if ( !is_null( $title ) ) {
// $confstr .= $title->getPageLanguage()->getExtraHashOptions();
// } else {
// global $wgContLang;
// $confstr .= $wgContLang->getExtraHashOptions();
// }
//
// $confstr .= $wgRenderHashAppend;
//
// // @note: as of Feb 2015, core never sets the editsection flag, since it uses
// // <mw:editsection> tags to inject editsections on the fly. However, extensions
// // may be using it by calling ParserOption::optionUsed resp. ParserOutput::registerOption
// // directly. At least Wikibase does at this point in time.
// if ( !in_array( 'editsection', $forOptions ) ) {
// $confstr .= '!*';
// } elseif ( !this.mEditSection ) {
// $confstr .= '!edit=0';
// }
//
// if ( this.mIsPrintable && in_array( 'printable', $forOptions ) ) {
// $confstr .= '!printable=1';
// }
//
// if ( this.mExtraKey != '' ) {
// $confstr .= this.mExtraKey;
// }
//
// // Give a chance for extensions to modify the hash, if they have
// // extra options or other effects on the parser cache.
// Hooks::run( 'PageRenderingHash', [ &$confstr, this.getUser(), &$forOptions ] );
//
// // Make it a valid memcached key fragment
// $confstr = str_replace( ' ', '_', $confstr );
//
// return $confstr;
// }
//
// /**
// * Sets a hook to force that a page exists, and sets a current revision callback to return
// * a revision with custom content when the current revision of the page is requested.
// *
// * @since 1.25
// * @param Title $title
// * @param Content $content
// * @param User $user The user that the fake revision is attributed to
// * @return ScopedCallback to unset the hook
// */
// public function setupFakeRevision( $title, $content, $user ) {
// $oldCallback = this.setCurrentRevisionCallback(
// function (
// $titleToCheck, $parser = false ) use ( $title, $content, $user, &$oldCallback
// ) {
// if ( $titleToCheck->equals( $title ) ) {
// return new Revision( [
// 'page' => $title->getArticleID(),
// 'user_text' => $user->getName(),
// 'user' => $user->getId(),
// 'parent_id' => $title->getLatestRevID(),
// 'title' => $title,
// 'content' => $content
// ] );
// } else {
// return call_user_func( $oldCallback, $titleToCheck, $parser );
// }
// }
// );
//
// global $wgHooks;
// $wgHooks['TitleExists'][] =
// function ( $titleToCheck, &$exists ) use ( $title ) {
// if ( $titleToCheck->equals( $title ) ) {
// $exists = true;
// }
// };
// end( $wgHooks['TitleExists'] );
// $key = key( $wgHooks['TitleExists'] );
// LinkCache::singleton()->clearBadLink( $title->getPrefixedDBkey() );
// return new ScopedCallback( function () use ( $title, $key ) {
// global $wgHooks;
// unset( $wgHooks['TitleExists'][$key] );
// LinkCache::singleton()->clearLink( $title );
// } );
// }
}

View File

@@ -1,45 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.core.btries.*;
public class Xomw_regex_ {
public static int Find_fwd_while(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
int cur = src_bgn;
while (cur < src_end) {
byte b = src[cur];
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null)
break;
else
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
}
return cur;
}
public static int Find_fwd_until(Btrie_slim_mgr trie, Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
int cur = src_bgn;
while (cur < src_end) {
byte b = src[cur];
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null)
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
else
break;
}
return cur;
}
}

View File

@@ -1,39 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.core.btries.*;
public class Xomw_regex_boundary { // THREAD.SAFE: trv is only for consistent interface
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private final Btrie_rv trv = new Btrie_rv();
public Xomw_regex_boundary(Xomw_regex_space space) {
// naive implementation of is_boundary; ignore all ws and underscore
byte[][] ary = space.Ws();
for (byte[] bry : ary)
trie.Add_bry_byte(bry, Byte_.Zero);
ary = space.Zs();
for (byte[] bry : ary)
trie.Add_bry_byte(bry, Byte_.Zero);
}
public boolean Is_boundary_prv(byte[] src, int pos) {
if (pos == 0) return true; // BOS is true
int bgn = gplx.core.intls.Utf8_.Get_pos0_of_char_bwd(src, pos - 1);
byte b = src[bgn];
Object o = trie.Match_at_w_b0(trv, b, src, bgn, pos);
return o != null;
}
}

View File

@@ -1,101 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
public class Xomw_regex_parser {
private Bry_bfr tmp;
public byte[][] Rslt() {return rslt;} private byte[][] rslt;
public Xomw_regex_parser Add_ary(String... ary) {return Set_or_add(Parse_ary(ary));}
private byte[][] Parse_ary(String... ary) {
if (tmp == null) tmp = Bry_bfr_.New();
int ary_len = ary.length;
byte[][] rv = new byte[ary_len][];
for (int i = 0; i < ary_len; i++) {
rv[i] = Compile_itm(tmp, Bry_.new_u8(ary[i]));
}
return rv;
}
public Xomw_regex_parser Add_rng(String bgn, String end) {return Set_or_add(Parse_rng(bgn, end));}
private byte[][] Parse_rng(String bgn, String end) {
if (tmp == null) tmp = Bry_bfr_.New();
byte[] bgn_bry = Compile_itm(tmp, Bry_.new_u8(bgn));
int bgn_val = gplx.core.intls.Utf16_.Decode_to_int(bgn_bry, 0);
byte[] end_bry = Compile_itm(tmp, Bry_.new_u8(end));
int end_val = gplx.core.intls.Utf16_.Decode_to_int(end_bry, 0);
int rv_len = end_val - bgn_val + 1;
byte[][] rv = new byte[rv_len][];
for (int i = 0; i < rv_len; i++) {
rv[i] = gplx.core.intls.Utf16_.Encode_int_to_bry(i + bgn_val);
}
return rv;
}
private Xomw_regex_parser Set_or_add(byte[][] val) {
rslt = rslt == null ? val : Bry_.Ary_add(rslt, val);
return this;
}
private static byte[] Compile_itm(Bry_bfr tmp, byte[] src) {
// parse each itm
int src_end = src.length;
int cur = 0;
int prv = cur;
boolean dirty = false;
while (true) {
// eos
if (cur == src_end) {
if (dirty)
tmp.Add_mid(src, prv, src_end);
break;
}
// look at byte
byte b = src[cur];
switch (b) { // escape
case Byte_ascii.Backslash:
int nxt = cur + 1;
if (nxt >= src_end) throw Err_.new_wo_type("regex escape failed: no more chars left", "src", src, "pos", nxt);
byte nxt_byte = src[nxt];
switch (nxt_byte) {
case Byte_ascii.Ltr_s: // \s -> " "
src = Byte_ascii.Space_bry;
cur = src_end;
break;
case Byte_ascii.Ltr_x: // \ u -> utf8 sequence in hex-dec; EX: "\xc2\xad" -> new byte[] {194, 160}
// read next two bytes
dirty = true;
nxt++;
if (nxt + 2 > src_end) throw Err_.new_wo_type("utf8 escape failed: no more chars left", "src", src, "pos", nxt);
tmp.Add_byte((byte)gplx.core.encoders.Hex_utl_.Parse_or(src, nxt, nxt + 2, -1));
cur = nxt + 2;
prv = cur;
break;
default:
throw Err_.new_wo_type("regex escape failed: unknown char", "src", src, "pos", nxt);
}
break;
default: // handles ascii only
if (b > 127)
throw Err_.new_wo_type("regex compiled failed: unknown char", "src", src, "pos", cur);
cur++;
break;
}
}
// set item
return dirty ? tmp.To_bry_and_clear() : src;
}
}

View File

@@ -1,42 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import org.junit.*; import gplx.core.tests.*;
public class Xomw_regex_parser__tst {
private final Xomw_regex_parser__fxt fxt = new Xomw_regex_parser__fxt();
@Test public void Ary__space() {
fxt.Test__parse_ary(String_.Ary("\\s"), String_.Ary(" "));
}
@Test public void Ary__utf8() {
fxt.Test__parse_ary(String_.Ary("\\xc2\\xa7", "\\xe0\\xb9\\x90"), String_.Ary("§", ""));
}
@Test public void Rng__ascii() {
fxt.Test__parse_rng("a", "c", String_.Ary("a", "b", "c"));
}
}
class Xomw_regex_parser__fxt {
private final Xomw_regex_parser parser = new Xomw_regex_parser();
public void Test__parse_ary(String[] ary, String[] expd) {
parser.Add_ary(ary);
Gftest.Eq__ary(expd, String_.Ary(parser.Rslt()));
}
public void Test__parse_rng(String bgn, String end, String[] expd) {
parser.Add_rng("a", "c");
Gftest.Eq__ary(expd, String_.Ary(parser.Rslt()));
}
}

View File

@@ -1,64 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.core.btries.*;
public class Xomw_regex_space {
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
public Xomw_regex_space() {
byte[] space = Bry_.New_by_ints(32);
ws = new byte[][]
{ space
, Bry_.New_by_ints(9)
, Bry_.New_by_ints(10)
, Bry_.New_by_ints(13)
};
// Zs; REF:http://www.fileformat.info/info/unicode/category/Zs/list.htm
zs = new byte[][]
{ space
, Bry_.New_by_ints(194, 160)
, Bry_.New_by_ints(225, 154, 128)
, Bry_.New_by_ints(226, 128, 129)
, Bry_.New_by_ints(226, 128, 130)
, Bry_.New_by_ints(226, 128, 131)
, Bry_.New_by_ints(226, 128, 132)
, Bry_.New_by_ints(226, 128, 133)
, Bry_.New_by_ints(226, 128, 134)
, Bry_.New_by_ints(226, 128, 135)
, Bry_.New_by_ints(226, 128, 136)
, Bry_.New_by_ints(226, 128, 137)
, Bry_.New_by_ints(226, 128, 138)
, Bry_.New_by_ints(226, 128, 175)
, Bry_.New_by_ints(226, 129, 159)
, Bry_.New_by_ints(227, 128, 128)
};
byte[][] ary = ws;
for (byte[] bry : ary) {
trie.Add_bry_byte(bry, Byte_.Zero);
}
ary = zs;
for (byte[] bry : ary) {
trie.Add_bry_byte(bry, Byte_.Zero);
}
}
public byte[][] Ws() {return ws;} private byte[][] ws;
public byte[][] Zs() {return zs;} private byte[][] zs;
public int Find_fwd_while(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
return Xomw_regex_.Find_fwd_while(trie, trv, src, src_bgn, src_end);
}
}

View File

@@ -1,40 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.core.btries.*;
public class Xomw_regex_url {
private final Btrie_slim_mgr trie;
public Xomw_regex_url(Xomw_regex_space regex_space) {
// [^][<>"\\x00-\\x20\\x7F\|]
// REGEX:[^][<>"\\x00-\\x20\\x7F\p{Zs}]; NOTE: val is just a marker
this.trie = Btrie_slim_mgr.cs();
trie.Add_str_byte__many(Byte_.Zero, "[", "]", "<", ">", "\"");
for (byte i = 0; i < 33; i++) {
trie.Add_bry_byte(new byte[] {i}, Byte_.Zero);
}
trie.Add_bry_byte(Bry_.New_by_ints(127), Byte_.Zero); // x7F
byte[][] zs_ary = regex_space.Zs();
for (byte[] zs : zs_ary) {
trie.Add_bry_byte(zs, Byte_.Zero);
}
}
public int Find_fwd_while(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
return Xomw_regex_.Find_fwd_until(trie, trv, src, src_bgn, src_end);
}
}

View File

@@ -1,139 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import gplx.core.btries.*;
public class Xomw_strip_state { // REF.MW:/parser/StripState.php
private final Btrie_slim_mgr trie = Btrie_slim_mgr.cs();
private final Btrie_rv trv = new Btrie_rv();
private final Bry_bfr tmp_1 = Bry_bfr_.New();
private final Bry_bfr tmp_2 = Bry_bfr_.New();
private boolean tmp_2_used = false;
private int general_len, nowiki_len;
public void Clear() {
trie.Clear();
general_len = nowiki_len = 0;
tmp_2_used = false;
}
public void Add_general(byte[] marker, byte[] val) {Add_item(Tid__general, marker, val);}
public void Add_nowiki (byte[] marker, byte[] val) {Add_item(Tid__nowiki, marker, val);}
public void Add_item(byte tid, byte[] marker, byte[] val) {
trie.Add_obj(marker, new Xomw_strip_item(tid, marker, val));
if (tid == Tid__general)
general_len++;
else
nowiki_len++;
}
public byte[] Unstrip_general(byte[] text) {return Unstrip(Tid__general, text);}
public byte[] Unstrip_nowiki (byte[] text) {return Unstrip(Tid__nowiki , text);}
public byte[] Unstrip_both (byte[] text) {return Unstrip(Tid__both , text);}
public byte[] Unstrip(byte tid, byte[] text) {
boolean dirty = Unstrip(tid, tmp_1, text, 0, text.length);
return dirty ? tmp_1.To_bry_and_clear() : text;
}
public void Unstrip_general(Xomw_parser_bfr pbfr) {Unstrip(Tid__general, pbfr);}
public void Unstrip_nowiki (Xomw_parser_bfr pbfr) {Unstrip(Tid__nowiki , pbfr);}
public void Unstrip_both (Xomw_parser_bfr pbfr) {Unstrip(Tid__both , pbfr);}
private boolean Unstrip(byte tid, Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
boolean dirty = Unstrip(tid, pbfr.Trg(), src, 0, src_bfr.Len());
if (dirty)
pbfr.Switch();
return dirty;
}
private boolean Unstrip(byte tid, Bry_bfr trg, byte[] src, int src_bgn, int src_end) {
// exit early if no items for type
if ((tid & Tid__general) == Tid__general) {
if (general_len == 0)
return false;
}
else if ((tid & Tid__nowiki) == Tid__nowiki) {
if (nowiki_len == 0)
return false;
}
int cur = src_bgn;
int prv = cur;
boolean dirty = false;
// loop over each src char
while (true) {
// EOS: exit
if (cur == src_end) {
if (dirty) // add remainder if dirty
trg.Add_mid(src, prv, src_end);
break;
}
// check if current pos matches strip state
Object o = trie.Match_at(trv, src, cur, src_end);
if (o != null) { // match
Xomw_strip_item item = (Xomw_strip_item)o;
byte item_tid = item.Tid();
if ((tid & item_tid) == item_tid) { // check if types match
// get bfr for recursion
Bry_bfr nested_bfr = null;
boolean tmp_2_release = false;
if (tmp_2_used) {
nested_bfr = Bry_bfr_.New();
}
else {
nested_bfr = tmp_2;
tmp_2_used = true;
tmp_2_release = true;
}
// recurse
byte[] item_val = item.Val();
if (Unstrip(tid, nested_bfr, item_val, 0, item_val.length))
item_val = nested_bfr.To_bry_and_clear();
if (tmp_2_release)
tmp_2_used = false;
// add to trg
trg.Add_mid(src, prv, cur);
trg.Add(item_val);
// update vars
dirty = true;
cur += item.Key().length;
prv = cur;
continue;
}
}
cur++;
}
return dirty;
}
public static final String Str__marker_bgn = "\u007f'\"`UNIQ-";
public static final byte[]
Bry__marker__bgn = Bry_.new_a7(Str__marker_bgn)
, Bry__marker__end = Bry_.new_a7("-QINU`\"'\u007f")
;
public static final byte Tid__general = 1, Tid__nowiki = 2, Tid__both = 3;
}
class Xomw_strip_item {
public Xomw_strip_item(byte tid, byte[] key, byte[] val) {
this.tid = tid;
this.key = key;
this.val = val;
}
public byte Tid() {return tid;} private final byte tid;
public byte[] Key() {return key;} private final byte[] key;
public byte[] Val() {return val;} private final byte[] val;
}

View File

@@ -1,44 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*;
import org.junit.*; import gplx.core.tests.*;
public class Xomw_strip_state__tst {
private final Xomw_strip_state__fxt fxt = new Xomw_strip_state__fxt();
@Test public void Basic() {
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
fxt.Test__nostrip(Xomw_strip_state.Tid__nowiki , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b");
fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
fxt.Test__unstrip(Xomw_strip_state.Tid__both , "a \u007f'\"`UNIQ-key-1-QINU`\"'\u007f b", "a val-1 b");
}
@Test public void Recurse() {
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f", "val-1");
fxt.Init__add (Xomw_strip_state.Tid__general, "\u007f'\"`UNIQ-key-2-QINU`\"'\u007f", "\u007f'\"`UNIQ-key-1-QINU`\"'\u007f");
fxt.Test__unstrip(Xomw_strip_state.Tid__general, "a \u007f'\"`UNIQ-key-2-QINU`\"'\u007f b", "a val-1 b");
}
}
class Xomw_strip_state__fxt {
private final Xomw_strip_state strip_state = new Xomw_strip_state();
public void Init__add(byte tid, String marker, String val) {
strip_state.Add_item(tid, Bry_.new_u8(marker), Bry_.new_u8(val));
}
public void Test__nostrip(byte tid, String src) {Test__unstrip(tid, src, src);}
public void Test__unstrip(byte tid, String src, String expd) {
byte[] actl = strip_state.Unstrip(tid, Bry_.new_u8(src));
Gftest.Eq__str(expd, String_.new_u8(actl));
}
}

View File

@@ -1,56 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public class Xomw_doubleunder_data {
// XO.MW: MW stores these as mDoubleUnderscores in Parser
public boolean toc;
public boolean no_toc;
public boolean force_toc;
public boolean no_gallery;
public boolean force_gallery;
public boolean no_title_convert;
public boolean no_content_convert;
public boolean no_edit_section;
public boolean new_section_link;
public boolean static_redirect;
public boolean hidden_cat;
public boolean index;
public boolean no_index;
// XO.MW: MW stores these as member variables in Parser
public boolean show_toc;
public boolean force_toc_position;
public void Reset() {
toc = no_toc = force_toc =
no_gallery = force_gallery =
no_title_convert = no_content_convert =
no_edit_section = new_section_link =
static_redirect =
hidden_cat = index = no_index =
false;
show_toc = force_toc_position = false;
}
}

View File

@@ -1,148 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.core.btries.*;
import gplx.xowa.langs.*; import gplx.xowa.langs.kwds.*;
public class Xomw_doubleunder_wkr {
private final Btrie_slim_mgr trie = Btrie_slim_mgr.ci_u8();
private final Btrie_rv trv = new Btrie_rv();
private Xomw_doubleunder_data data;
public void Init_by_wiki(Xomw_doubleunder_data data, Xol_lang_itm lang) {
this.data = data;
Reg(trie, lang.Kwd_mgr()
, Xol_kwd_grp_.Id_notoc
, Xol_kwd_grp_.Id_nogallery
, Xol_kwd_grp_.Id_forcetoc
, Xol_kwd_grp_.Id_toc
, Xol_kwd_grp_.Id_noeditsection
, Xol_kwd_grp_.Id_newsectionlink
, Xol_kwd_grp_.Id_hiddencat
, Xol_kwd_grp_.Id_index
, Xol_kwd_grp_.Id_noindex
, Xol_kwd_grp_.Id_staticredirect
, Xol_kwd_grp_.Id_notitleconvert
, Xol_kwd_grp_.Id_nocontentconvert
);
}
public void Do_double_underscore(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
data.Reset();
// XO.MW: MW does TOC before others; XO does it at the same time
// Now match and remove the rest of them
// XO.MW.BGN: $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
int cur = src_bgn;
int prv = cur;
boolean dirty = false;
while (true) {
// reached end; stop
if (cur == src_end) {
if (dirty) {
bfr.Add_mid(src, prv, src_end);
}
break;
}
// no match; keep searching
byte b = src[cur];
Object o = trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null) {
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
continue;
}
// if cs, ensure exact-match (trie is case-insensitive)
int kwd_end = trv.Pos();
Xomw_doubleunder_itm itm = (Xomw_doubleunder_itm)o;
if (itm.case_match && !Bry_.Match(src, cur, kwd_end, itm.val)) {
cur = kwd_end;
continue;
}
// match; replace __KWD__ with "" (or "<!--MWTOC-->" if __TOC__)
dirty = true;
bfr.Add_mid(src, prv, cur);
switch (itm.tid) {
case Xol_kwd_grp_.Id_toc:
// The position of __TOC__ needs to be recorded
boolean already_seen = !data.show_toc;
data.toc = true;
data.show_toc = true;
data.force_toc_position = true;
if (already_seen) { // Set a placeholder. At the end we'll fill it in with the TOC.
bfr.Add_str_a7("<!--MWTOC-->");
}
else { // Only keep the first one. XO.MW:ignore by not adding anything to bfr
}
break;
// XO.MW: MW adds boolean to hash_table; XO uses boolean props; note that "remove" is done by not adding to bfr
case Xol_kwd_grp_.Id_notoc: data.no_toc = true; break;
case Xol_kwd_grp_.Id_nogallery: data.no_gallery = true; break;
case Xol_kwd_grp_.Id_forcetoc: data.force_toc = true; break;
case Xol_kwd_grp_.Id_noeditsection: data.no_edit_section = true; break;
case Xol_kwd_grp_.Id_newsectionlink: data.new_section_link = true; break;
case Xol_kwd_grp_.Id_hiddencat: data.hidden_cat = true; break;
case Xol_kwd_grp_.Id_index: data.index = true; break;
case Xol_kwd_grp_.Id_noindex: data.no_index = true; break;
case Xol_kwd_grp_.Id_staticredirect: data.static_redirect = true; break;
case Xol_kwd_grp_.Id_notitleconvert: data.no_title_convert = true; break;
case Xol_kwd_grp_.Id_nocontentconvert: data.no_content_convert = true; break;
default: throw Err_.new_unhandled_default(itm.tid);
}
cur = kwd_end;
prv = cur;
}
// XO.MW.END: $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
if (data.no_toc && !data.force_toc_position) {
data.show_toc = false;
}
// XO.MW.EDIT: hidden_cat, index, noindex are used to add to tracking category
if (dirty)
pbfr.Switch();
}
private static void Reg(Btrie_slim_mgr trie, Xol_kwd_mgr mgr, int... ids) {
for (int id : ids) {
Xol_kwd_grp grp = mgr.Get_or_new(id);
Xol_kwd_itm[] itms = grp.Itms();
for (Xol_kwd_itm itm : itms) {
byte[] val = itm.Val();
trie.Add_obj(val, new Xomw_doubleunder_itm(id, grp.Case_match(), val));
}
}
}
}
class Xomw_doubleunder_itm {
public int tid;
public boolean case_match;
public byte[] val;
public Xomw_doubleunder_itm(int tid, boolean case_match, byte[] val) {
this.tid = tid;
this.case_match = case_match;
this.val = val;
}
}

View File

@@ -1,52 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.doubleunders; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*; import gplx.core.tests.*;
public class Xomw_doubleunder_wkr__tst {
private final Xomw_doubleunder_wkr__fxt fxt = new Xomw_doubleunder_wkr__fxt();
@Test public void No_match() {fxt.Test__parse("a b c" , "a b c");}
@Test public void Force_toc() {fxt.Test__parse("a __FORCETOC__ b" , "a b").Test__prop_y(fxt.data.force_toc);}
@Test public void Toc() {fxt.Test__parse("a __TOC__ b __TOC__ c" , "a <!--MWTOC--> b c").Test__prop_y(fxt.data.toc, fxt.data.show_toc, fxt.data.force_toc_position);}
@Test public void Notoc_only() {fxt.Test__parse("a __NOTOC__ b" , "a b").Test__prop_y(fxt.data.no_toc).Test__prop_n(fxt.data.show_toc);} // show_toc is false
@Test public void Notoc_w_toc() {fxt.Test__parse("a __TOC__ b __NOTOC__ c" , "a <!--MWTOC--> b c").Test__prop_y(fxt.data.toc, fxt.data.show_toc, fxt.data.force_toc_position);} // show_toc is true
@Test public void Case_match() {fxt.Test__parse("a __index__ b" , "a __index__ b");}
}
class Xomw_doubleunder_wkr__fxt {
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
private final Xomw_doubleunder_wkr wkr = new Xomw_doubleunder_wkr();
public Xomw_doubleunder_data data = new Xomw_doubleunder_data();
public Xomw_doubleunder_wkr__fxt() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
wkr.Init_by_wiki(data, wiki.Lang());
}
public Xomw_doubleunder_wkr__fxt Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Do_double_underscore(pctx, pbfr.Init(src_bry));
Gftest.Eq__str(expd, pbfr.Rslt().To_str_and_clear(), src_str);
return this;
}
public Xomw_doubleunder_wkr__fxt Test__prop_y(boolean... ary) {return Test__prop(Bool_.Y, ary);}
public Xomw_doubleunder_wkr__fxt Test__prop_n(boolean... ary) {return Test__prop(Bool_.N, ary);}
private Xomw_doubleunder_wkr__fxt Test__prop(boolean expd, boolean... ary) {
for (boolean v : ary)
Gftest.Eq__bool(expd, v);
return this;
}
}

View File

@@ -1,52 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public class Xomw_heading_cbk__html implements Xomw_heading_cbk {
public Bry_bfr Bfr() {return bfr;} private Bry_bfr bfr;
public Xomw_heading_cbk__html Bfr_(Bry_bfr bfr) {
this.bfr = bfr;
return this;
}
public void On_hdr_seen(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {
// add from txt_bgn to hdr_bgn; EX: "abc\n==A==\n"; "\n==" seen -> add "abc"
byte[] src = wkr.Src();
int hdr_bgn = wkr.Hdr_bgn(), txt_bgn = wkr.Txt_bgn();
if (hdr_bgn > txt_bgn)
bfr.Add_mid(src, txt_bgn, hdr_bgn);
// add "\n" unless BOS
if (hdr_bgn != Xomw_parser_ctx.Pos__bos) bfr.Add_byte_nl();
// add <h2>...</h2>
int hdr_num = wkr.Hdr_num();
bfr.Add(Tag__lhs).Add_int_digits(1, hdr_num).Add(Byte_ascii.Angle_end_bry); // <h2>
bfr.Add_mid(wkr.Src(), wkr.Hdr_lhs_end(), wkr.Hdr_rhs_bgn());
bfr.Add(Tag__rhs).Add_int_digits(1, hdr_num).Add(Byte_ascii.Angle_end_bry); // </h2>
}
public void On_src_done(Xomw_parser_ctx pctx, Xomw_heading_wkr wkr) {
// add from txt_bgn to EOS;
byte[] src = wkr.Src();
int txt_bgn = wkr.Txt_bgn(), src_end = wkr.Src_end();
if (txt_bgn != src_end) // PERF: don't call Add_mid() if hdr is at end of EOS
bfr.Add_mid(src, txt_bgn, src_end);
}
private static final byte[]
Tag__lhs = Bry_.new_a7("<h")
, Tag__rhs = Bry_.new_a7("</h")
;
}

View File

@@ -1,41 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.headings; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*;
public class Xomw_heading_wkr__tst {
private final Xomw_heading_wkr__fxt fxt = new Xomw_heading_wkr__fxt();
@Test public void Basic() {
fxt.Test__parse("==A==" , "<h2>A</h2>");
fxt.Test__parse("abc\n==A==\ndef" , "abc\n<h2>A</h2>\ndef");
fxt.Test__parse("abc" , "abc");
fxt.Test__parse("abc\ndef" , "abc\ndef");
fxt.Test__parse("abc\n==" , "abc\n<h1></h1>");
}
}
class Xomw_heading_wkr__fxt {
private final Xomw_heading_wkr wkr = new Xomw_heading_wkr();
private final Xomw_heading_cbk__html cbk = new Xomw_heading_cbk__html().Bfr_(Bry_bfr_.New());
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Parse(pctx, src_bry, -1, src_bry.length, cbk);
Tfds.Eq_str_lines(expd, cbk.Bfr().To_str_and_clear(), src_str);
}
}

View File

@@ -1,81 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.hrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.xowa.mws.utls.*;
public class Xomw_hr_wkr {// THREAD.UNSAFE: caching for repeated calls
private Bry_bfr bfr;
public void Replace_hrs(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) { // REF.MW: text = preg_replace('/(^|\n)-----*/', '\\1<hr />', text);
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
this.bfr = pbfr.Trg();
boolean dirty = false;
// do separate check for "-----" at start of String;
int cur = 0;
if (Bry_.Eq(src, 0, Len__wtxt__hr__bos, Bry__wtxt__hr__bos)) {
cur = Replace_hr(Bool_.N, src, src_bgn, src_end, 0, Len__wtxt__hr__bos);
dirty = true;
}
// loop
while (true) {
// find next "\n-----"
int find_bgn = Bry_find_.Find_fwd(src, Bry__wtxt__hr__mid, cur, src_end);
// nothing found; exit
if (find_bgn == Bry_find_.Not_found) {
if (dirty) {
bfr.Add_mid(src, cur, src_end);
}
break;
}
// something found
cur = Replace_hr(Bool_.Y, src, cur, src_end, find_bgn, Len__wtxt__hr__mid);
dirty = true;
}
if (dirty)
pbfr.Switch();
}
private int Replace_hr(boolean mid, byte[] src, int cur, int src_end, int find_bgn, int tkn_len) {
// something found; add to bfr
if (mid) {
bfr.Add_mid(src, cur, find_bgn); // add everything before "\n-----"
bfr.Add_byte_nl();
}
bfr.Add(Bry__html__hr);
// set dirty / cur and continue
cur = find_bgn + tkn_len;
cur = Bry_find_.Find_fwd_while(src, cur, src_end, Byte_ascii.Dash); // gobble up trailing "-"; the "*" in "-----*" from the regex above
return cur;
}
private static final byte[]
Bry__wtxt__hr__mid = Bry_.new_a7("\n-----")
, Bry__wtxt__hr__bos = Bry_.new_a7("-----")
, Bry__html__hr = Bry_.new_a7("<hr />")
;
private static final int
Len__wtxt__hr__mid = Bry__wtxt__hr__mid.length
, Len__wtxt__hr__bos = Bry__wtxt__hr__bos.length
;
}

View File

@@ -1,36 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.hrs; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*;
public class Xomw_hr_wkr__tst {
private final Xomw_hr_wkr__fxt fxt = new Xomw_hr_wkr__fxt();
@Test public void Basic() {fxt.Test__parse("a\n-----b" , "a\n<hr />b");}
@Test public void Extend() {fxt.Test__parse("a\n------b" , "a\n<hr />b");}
@Test public void Not_found() {fxt.Test__parse("a\n----b" , "a\n----b");}
@Test public void Bos() {fxt.Test__parse("-----a" , "<hr />a");}
@Test public void Bos_and_mid() {fxt.Test__parse("-----a\n-----b" , "<hr />a\n<hr />b");}
}
class Xomw_hr_wkr__fxt {
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
private final Xomw_hr_wkr wkr = new Xomw_hr_wkr();
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Replace_hrs(new Xomw_parser_ctx(), pbfr.Init(src_bry));
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
}

View File

@@ -1,233 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.core.btries.*; import gplx.core.primitives.*;
import gplx.xowa.mws.utls.*;
import gplx.xowa.mws.htmls.*;
/* TODO.XO
* P3: $langObj->formatNum( ++$this->mAutonumber );
* P2: $this->getConverterLanguage()->markNoConversion( $text );
*/
public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
private final Bry_bfr tmp;
private Btrie_slim_mgr protocol_trie; private final Btrie_rv trv = new Btrie_rv();
private int autonumber;
private final Xomw_parser parser;
private final Xomw_linker linker;
private final Xomw_sanitizer sanitizer;
private final Xomw_atr_mgr attribs = new Xomw_atr_mgr();
private Xomw_regex_url regex_url;
private Xomw_regex_space regex_space;
public Xomw_lnke_wkr(Xomw_parser parser) {
this.parser = parser;
this.tmp = parser.Tmp();
this.linker = parser.Linker();
this.sanitizer = parser.Sanitizer();
if (angle_entities_trie == null) {
synchronized (Type_adp_.ClassOf_obj(this)) {
Link_type__free = Bry_.new_a7("free");
Link_type__text = Bry_.new_a7("text");
Link_type__autonumber = Bry_.new_a7("autonumber");
angle_entities_trie = Btrie_slim_mgr.cs().Add_many_str("&lt;", "&gt;");
// REGEX:([^\]\\x00-\\x08\\x0a-\\x1F]*?); NOTE: val is key.length
invalid_text_chars_trie = Btrie_slim_mgr.cs();
New__trie_itm__by_len(invalid_text_chars_trie, Byte_ascii.Brack_end);
for (int i = 0; i <= 8; i++) { // x00-x08
New__trie_itm__by_len(invalid_text_chars_trie, i);
}
for (int i = 10; i <= 31; i++) { // x0a-x1F
New__trie_itm__by_len(invalid_text_chars_trie, i);
}
}
}
}
public void Init_by_wiki(Btrie_slim_mgr protocol_trie, Xomw_regex_url regex_url, Xomw_regex_space regex_space) {
this.protocol_trie = protocol_trie;
this.regex_url = regex_url;
this.regex_space = regex_space;
}
// XO.MW:SYNC:1.29; DATE:2017-02-01
public void Replace_external_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
pbfr.Switch();
int cur = src_bgn;
this.autonumber = 1;
// find regex
int prv = 0;
while (true) {
// PORTED.BGN: $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
// $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->mUrlProtocols . ')' .
// self::EXT_LINK_ADDR .
// self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F]*?)\]/Su';
//
// REGEX: "[" + "protocol" + "url-char"* + "space"* + "text"* + "]";
// protocol -> ((?i)' . $this->mUrlProtocols . ') -> "http://", "HTTps://"
// url-char* -> (EXT_LINK_ADDR . EXT_LINK_URL_CLASS*) -> "255.255.255.255", "a.b.c"; NOTE: "http:///" is valid
// space* -> \p{Zs}*
// text -> ([^\]\\x00-\\x08\\x0a-\\x1F]*?) -> "abcd"
// NOTE: /S=extra analysis of pattern /u = unicode support; REF.MW:http://php.net/manual/en/reference.pcre.pattern.modifiers.php
// Simplified expression to match an IPv4 or IPv6 address, or
// at least one character of a host name (embeds EXT_LINK_URL_CLASS)
// static final EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}])';
//
// REGEX: "IPv4" | "IPv6" | "url-char"
// IPv4 -> [0-9.]+ -> "255."
// IPv6 -> \\[(?i:[0-9a-f:.]+)\\] -> "2001:"
// url-char -> [^][<>"\\x00-\\x20\\x7F\p{Zs}] -> "abcde"
// Constants needed for external link processing
// Everything except bracket, space, or control characters
// \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
// as well as U+3000 is IDEOGRAPHIC SPACE for T21052
// static final EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}]';
//
// REGEX: NOT [ "symbols" | "control" | "whitespace" ]
// symbols -> ^][<>"
// control -> \\x00-\\x20\\x7F
// whitespace -> \p{Zs}
// search for "["
int lnke_bgn = Bry_find_.Find_fwd(src, Byte_ascii.Brack_bgn, cur, src_end);
if (lnke_bgn == Bry_find_.Not_found) {
bfr.Add_mid(src, cur, src_end);
break; // no more "["; stop
}
// check for protocol; EX: "https://"
cur = lnke_bgn + 1;
int url_bgn = cur;
Object protocol_bry = protocol_trie.Match_at(trv, src, cur, src_end);
if (protocol_bry == null) {
bfr.Add_mid(src, prv, cur);
prv = cur;
continue;// unknown protocol; ignore "["
}
cur += ((byte[])protocol_bry).length;
// check for one-or-more url chars; [^][<>"\\x00-\\x20\\x7F\p{Zs}]
int domain_bgn = cur;
cur = regex_url.Find_fwd_while(trv, src, domain_bgn, src_end);
if (cur - domain_bgn == 0) {
bfr.Add_mid(src, prv, cur);
prv = cur;
continue; // no chars found; invalid; EX: "[https://"abcde"]"
}
int url_end = cur;
// skip ws
cur = regex_space.Find_fwd_while(trv, src, cur, src_end);
// get text (if any)
int text_bgn = -1, text_end = -1;
while (true) {
byte b = src[cur];
Object invalid_text_char = invalid_text_chars_trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (invalid_text_char != null) break;
if (text_bgn == -1) text_bgn = cur;
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
text_end = cur;
}
// check for "]"
if (src[cur] != Byte_ascii.Brack_end) {
bfr.Add_mid(src, prv, cur);
prv = cur;
continue;
}
cur++;
// PORTED.END: $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
// The characters '<' and '>' (which were escaped by
// removeHTMLtags()) should not be included in
// URLs, per RFC 2396.
if (Php_preg_.Match(angle_entities_trie, trv, src, url_bgn, url_end) != null) {
int angle_bgn = trv.Match_bgn;
text_bgn = angle_bgn;
url_end = angle_bgn;
}
// If the link text is an image URL, replace it with an <img> tag
// This happened by accident in the original parser, but some people used it extensively
// XO.MW.UNSUPPORTED.NON-WMF: not supporting images from freefrom url; (EX: "http://a.org/image.png" -> "<img>"); haven't seen this used on WMF wikis
// $img = $this->maybeMakeExternalImage( $text );
// if ($img !== false) $text = $img;
// XO.MW.SKIP: See "Have link text"
//$dtrail = '';
// Set linktype for CSS - if URL==text, link is essentially free
boolean text_missing = text_bgn == -1;
byte[] link_type = text_missing ? Link_type__free : Link_type__text;
// No link text, e.g. [http://domain.tld/some.link]
if (text_missing) {
// Autonumber; EX: "[123]"
tmp.Add_byte(Byte_ascii.Brack_bgn);
tmp.Add_int_variable(autonumber++); // TODO.XO:$langObj->formatNum( ++$this->mAutonumber );
tmp.Add_byte(Byte_ascii.Brack_end);
link_type = Link_type__autonumber;
}
else {
// XO.MW.SKIP: skipped b/c MW splits $trail into $dtrail and $trail but does no extra logic with variables; just concatenates later; "$this->getExternalLinkAttribs( $url ) ) . $dtrail . $trail;"
// Have link text, e.g. [http://domain.tld/some.link text]s
// Check for trail
// list( $dtrail, $trail ) = Linker::splitTrail( $trail );
}
// TODO.XO:
// $text = $this->getConverterLanguage()->markNoConversion( $text );
byte[] url = Bry_.Mid(src, url_bgn, url_end);
url = sanitizer.Clean_url(url);
bfr.Add_mid(src, prv, lnke_bgn);
prv = cur;
// Use the encoded URL
// This means that users can paste URLs directly into the text
// Funny characters like <20> aren't valid in URLs anyway
// This was changed in August 2004
linker.makeExternalLink(bfr, url, Bry_.Mid(src, text_bgn, text_end), Bool_.N, link_type, parser.Get_external_link_attribs(attribs), Bry_.Empty);
// XO.MW.UNSUPPORTED.HOOK: registers link for processing by other extensions?
// Register link in the output Object.
// Replace unnecessary URL escape codes with the referenced character
// This prevents spammers from hiding links from the filters
// $pasteurized = self::normalizeLinkUrl( $url );
// $this->mOutput->addExternalLink( $pasteurized );
}
}
private static byte[] Link_type__free, Link_type__text, Link_type__autonumber;
private static Btrie_slim_mgr angle_entities_trie;
private static Btrie_slim_mgr invalid_text_chars_trie;
private static void New__trie_itm__by_len(Btrie_slim_mgr mgr, int... ary) {
mgr.Add_obj(Bry_.New_by_ints(ary), new Int_obj_val(ary.length));
}
}

View File

@@ -1,71 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*;
public class Xomw_lnke_wkr__tst {
private final Xomw_lnke_wkr__fxt fxt = new Xomw_lnke_wkr__fxt();
@Test public void Basic() {fxt.Test__parse("[https://a.org b]" , "<a rel='nofollow' class='external text' href='https://a.org'>b</a>");}
@Test public void Invaild__protocol() {fxt.Test__parse("[httpz:a.org]" , "[httpz:a.org]");}
@Test public void Invaild__protocol_slash() {fxt.Test__parse("[https:a.org]" , "[https:a.org]");}
@Test public void Invaild__urlchars__0() {fxt.Test__parse("[https://]" , "[https://]");}
@Test public void Invaild__urlchars__bad() {fxt.Test__parse("[https://\"]" , "[https://\"]");}
@Test public void Many() {
fxt.Test__parse(String_.Concat_lines_nl_apos_skip_last
( "a"
, "[https://b.org c]"
, "d"
, "[https://e.org f]"
, "g"
), String_.Concat_lines_nl_apos_skip_last
( "a"
, "<a rel='nofollow' class='external text' href='https://b.org'>c</a>"
, "d"
, "<a rel='nofollow' class='external text' href='https://e.org'>f</a>"
, "g"
));
}
@Test public void Protocol_rel() {
fxt.Test__parse("[//a.org b]" , "<a rel='nofollow' class='external text' href='//a.org'>b</a>");
}
@Test public void Url_should_not_has_angle_entities() {
fxt.Test__parse("[https://a.org/b&lt;c z]" , "<a rel='nofollow' class='external text' href='https://a.org/b'>&lt;c z</a>");
fxt.Test__parse("[https://a.org/b&gt;c z]" , "<a rel='nofollow' class='external text' href='https://a.org/b'>&gt;c z</a>");
}
@Test public void Link_trail() {// checks for noop via "Have link text"
fxt.Test__parse("[https://a.org b]xyz" , "<a rel='nofollow' class='external text' href='https://a.org'>b</a>xyz");
fxt.Test__parse("[https://a.org b]x!z" , "<a rel='nofollow' class='external text' href='https://a.org'>b</a>x!z");
}
@Test public void Clean_url() {
fxt.Test__parse("[https://a&quot;­b c]" , "<a rel='nofollow' class='external text' href='https://a%22b'>c</a>");
}
}
class Xomw_lnke_wkr__fxt {
private final Xomw_lnke_wkr wkr = new Xomw_lnke_wkr(new Xomw_parser());
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
private boolean apos = true;
public Xomw_lnke_wkr__fxt() {
Xomw_regex_space regex_space = new Xomw_regex_space();
wkr.Init_by_wiki(Xomw_parser.Protocols__dflt(), new Xomw_regex_url(regex_space), regex_space);
}
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Replace_external_links(new Xomw_parser_ctx(), pbfr.Init(src_bry));
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
}

View File

@@ -1,22 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public class Xomw_image_params {
public Xomw_param_map paramMap = null;
public Xomw_MagicWordArray mwArray = null;
}

View File

@@ -1,858 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.core.btries.*; import gplx.core.primitives.*;
import gplx.xowa.mws.utls.*;
import gplx.xowa.wikis.nss.*; import gplx.xowa.wikis.xwikis.*;
import gplx.xowa.mws.parsers.*; import gplx.xowa.mws.parsers.quotes.*;
import gplx.xowa.mws.htmls.*; import gplx.xowa.mws.linkers.*;
import gplx.xowa.mws.libs.*;
import gplx.xowa.mws.media.*; import gplx.xowa.mws.filerepo.file.*;
import gplx.xowa.parsers.uniqs.*;
/* TODO.XO
* P7: multi-line links; // look at the next 'line' to see if we can close it there
* P7: interwiki
* P7: [[File:]]
* P7: [[Category:]]
* P6: [[Media:]]
* P4: handle "]]]"; "If we get a ] at the beginning of $m[3]"
* P4: handle "[[http://a.org]]"
* P3: $langObj->formatNum( ++$this->mAutonumber );
* P2: $this->getConverterLanguage()->markNoConversion( $text );
* P1: link_prefix; EX: b[[A]]; [not enabled on enwiki]
*/
public class Xomw_lnki_wkr {// THREAD.UNSAFE: caching for repeated calls
private final Xomw_link_holders holders;
private final Xomw_linker linker;
private final Xomw_link_renderer link_renderer;
// private final Btrie_slim_mgr protocols_trie;
private final Xomw_quote_wkr quote_wkr;
private final Xomw_strip_state strip_state;
private Xomw_parser_env env;
private Xow_wiki wiki;
private Xoa_ttl page_title;
private final Xomw_linker__normalize_subpage_link normalize_subpage_link = new Xomw_linker__normalize_subpage_link();
private final Bry_bfr tmp;
private final Xomw_parser parser;
private final Xomw_atr_mgr extra_atrs = new Xomw_atr_mgr();
private final Xomw_qry_mgr query = new Xomw_qry_mgr();
private final Btrie_rv trv = new Btrie_rv();
private final List_adp tmp_list = List_adp_.New();
private final Hash_adp mImageParams = Hash_adp_bry.cs();
private final Hash_adp mImageParamsMagicArray = Hash_adp_bry.cs();
public Xomw_lnki_wkr(Xomw_parser parser, Xomw_link_holders holders, Xomw_link_renderer link_renderer, Btrie_slim_mgr protocols_trie) {
this.parser = parser;
this.holders = holders;
this.link_renderer = link_renderer;
// this.protocols_trie = protocols_trie;
this.linker = parser.Linker();
this.quote_wkr = parser.Quote_wkr();
this.tmp = parser.Tmp();
this.strip_state = parser.Strip_state();
}
public void Init_by_wiki(Xomw_parser_env env, Xow_wiki wiki) {
this.env = env;
this.wiki = wiki;
if (title_chars_for_lnki == null) {
title_chars_for_lnki = (boolean[])Array_.Clone(Xomw_ttl_utl.Title_chars_valid());
// the % is needed to support urlencoded titles as well
title_chars_for_lnki[Byte_ascii.Hash] = true;
title_chars_for_lnki[Byte_ascii.Percent] = true;
}
}
public void Clear_state() {
holders.Clear();
}
public void Replace_internal_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
pbfr.Switch();
this.page_title = pctx.Page_title();
Replace_internal_links(pctx, bfr, src, src_bgn, src_end);
}
// XO.MW:SYNC:1.29; DATE:2017-02-02
public void Replace_internal_links(Xomw_parser_ctx pctx, Bry_bfr bfr, byte[] src, int src_bgn, int src_end) {
// XO.MW: regex for tc move to header; e1 and e1_img moved to code
// the % is needed to support urlencoded titles as well
// XO.MW.BGN: split the entire text String on occurrences of [[
int cur = src_bgn;
int prv = cur;
while (true) {
int lnki_bgn = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__bgn, cur, src_end); // $a = StringUtils::explode('[[', ' ' . $s);
if (lnki_bgn == Bry_find_.Not_found) { // no more "[["; stop loop
bfr.Add_mid(src, cur, src_end);
break;
}
cur = lnki_bgn + 2; // 2="[[".length
// XO.MW.IGNORE: handles strange split logic of adding space to String; "$s = substr($s, 1);"
// TODO.XO:link_prefix; EX: b[[A]]
// $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
// $e2 = null;
// if ($useLinkPrefixExtension) {
// // Match the end of a line for a word that's not followed by whitespace,
// // e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
// global $wgContLang;
// $charset = $wgContLang->linkPrefixCharset();
// $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
// }
// IGNORE: throw new MWException(__METHOD__ . ": \$this->mTitle is null\n");
// $nottalk = !$this->mTitle->isTalkPage();
// TODO.XO:link_prefix
byte[] prefix = Bry_.Empty;
//if ($useLinkPrefixExtension) {
// $m = [];
// if (preg_match($e2, $s, $m)) {
// $first_prefix = $m[2];
// } else {
// $first_prefix = false;
// }
//} else {
// $prefix = '';
//}
// TODO.XO:link_prefix; EX: b[[A]]
//if ($useLinkPrefixExtension) {
// if (preg_match($e2, $s, $m)) {
// $prefix = $m[2];
// $s = $m[1];
// } else {
// $prefix = '';
// }
// // first link
// if ($first_prefix) {
// $prefix = $first_prefix;
// $first_prefix = false;
// }
//}
// PORTED.BGN: if (preg_match($e1, $line, $m)) && else if (preg_match($e1_img, $line, $m))
// NOTE: both e1 and e1_img are effectively the same; e1_img allows nested "[["; EX: "[[A|b[[c]]d]]" will stop at "[[A|b"
int ttl_bgn = cur;
int ttl_end = Xomw_ttl_utl.Find_fwd_while_title(src, cur, src_end, title_chars_for_lnki);
cur = ttl_end;
int capt_bgn = -1, capt_end = -1;
int nxt_lnki = -1;
boolean might_be_img = false;
if (ttl_end > ttl_bgn) { // at least one valid title-char found; check for "|" or "]]" EX: "[[a"
byte nxt_byte = src[ttl_end];
if (nxt_byte == Byte_ascii.Pipe) { // handles lnki with capt ([[A|a]])and lnki with file ([[File:A.png|b|c|d]])
cur = ttl_end + 1;
// find next "[["
nxt_lnki = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__bgn, cur, src_end);
if (nxt_lnki == Bry_find_.Not_found)
nxt_lnki = src_end;
// find end "]]"
capt_bgn = cur;
capt_end = Bry_find_.Find_fwd(src, Bry__wtxt__lnki__end, cur, nxt_lnki);
if (capt_end == Bry_find_.Not_found) {
capt_end = nxt_lnki;
cur = nxt_lnki;
might_be_img = true;
}
else {
cur = capt_end + Bry__wtxt__lnki__end.length;
}
}
else if (Bry_.Match(src, ttl_end, ttl_end + 2, Bry__wtxt__lnki__end)) { // handles simple lnki; EX: [[A]]
cur = ttl_end + 2;
}
else {
ttl_end = -1;
}
}
else
ttl_end = -1;
if (ttl_end == -1) { // either (a) no valid title-chars ("[[<") or (b) title char, but has stray "]" ("[[a]b]]")
// Invalid form; output directly
bfr.Add_mid(src, prv, lnki_bgn + 2);
bfr.Add_mid(src, cur, ttl_bgn);
prv = cur = ttl_bgn;
continue;
}
// PORTED.END: if (preg_match($e1, $line, $m)) && else if (preg_match($e1_img, $line, $m))
byte[] text = Bry_.Mid(src, capt_bgn, capt_end);
byte[] trail = Bry_.Empty;
if (!might_be_img) {
// TODO.XO:
// If we get a ] at the beginning of $m[3] that means we have a link that's something like:
// [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
// the real problem is with the $e1 regex
// See T1500.
// Still some problems for cases where the ] is meant to be outside punctuation,
// and no image is in sight. See T4095.
// if ($text !== ''
// && substr($m[3], 0, 1) === ']'
// && strpos($text, '[') !== false
// ) {
// $text .= ']'; // so that replaceExternalLinks($text) works later
// $m[3] = substr($m[3], 1);
// }
// fix up urlencoded title texts
// if (strpos($m[1], '%') !== false) {
// // Should anchors '#' also be rejected?
// $m[1] = str_replace([ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode($m[1]));
// }
// $trail = $m[3];
}
else {
// Invalid, but might be an image with a link in its caption
// $text = $m[2];
// if (strpos($m[1], '%') !== false) {
// $m[1] = str_replace([ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode($m[1]));
// }
// $trail = "";
}
byte[] orig_link = Bry_.Mid(src, ttl_bgn, ttl_end);
// TODO.XO: handle "[[http://a.org]]"
// Don't allow @gplx.Internal protected links to pages containing
// PROTO: where PROTO is a valid URL protocol; these
// should be external links.
// if (preg_match('/^(?i:' . $this->mUrlProtocols . ')/', $origLink)) {
// $s .= $prefix . '[[' . $line;
// continue;
// }
byte[] link = orig_link;
boolean no_force = orig_link[0] != Byte_ascii.Colon;
if (!no_force) {
// Strip off leading ':'
link = Bry_.Mid(link, 1);
}
Xoa_ttl nt = wiki.Ttl_parse(link);
// Make subpage if necessary
boolean subpages_enabled = nt.Ns().Subpages_enabled();
if (subpages_enabled) {
Maybe_do_subpage_link(normalize_subpage_link, orig_link, text);
link = normalize_subpage_link.link;
text = normalize_subpage_link.text;
nt = wiki.Ttl_parse(link);
}
// IGNORE: handled in rewrite above
// else {
// link = orig_link;
// }
byte[] unstrip = strip_state.Unstrip_nowiki(link);
if (!Bry_.Eq(unstrip, link))
nt = wiki.Ttl_parse(unstrip);
if (nt == null) {
bfr.Add_mid(src, prv, lnki_bgn + 2); // $s .= $prefix . '[[' . $line;
prv = cur = lnki_bgn + 2;
continue;
}
Xow_ns ns = nt.Ns();
Xow_xwiki_itm iw = nt.Wik_itm();
if (might_be_img) { // if this is actually an invalid link
if (ns.Id_is_file() && no_force) { // but might be an image
boolean found = false;
// while (true) {
// // look at the next 'line' to see if we can close it there
// a->next();
// next_line = a->current();
// if (next_line === false || next_line === null) {
// break;
// }
// m = explode(']]', next_line, 3);
// if (count(m) == 3) {
// // the first ]] closes the inner link, the second the image
// found = true;
// text .= "[[{m[0]}]]{m[1]}";
// trail = m[2];
// break;
// } else if (count(m) == 2) {
// // if there's exactly one ]] that's fine, we'll keep looking
// text .= "[[{m[0]}]]{m[1]}";
// } else {
// // if next_line is invalid too, we need look no further
// text .= '[[' . next_line;
// break;
// }
// }
if (!found) {
// we couldn't find the end of this imageLink, so output it raw
// but don't ignore what might be perfectly normal links in the text we've examined
Bry_bfr nested = wiki.Utl__bfr_mkr().Get_b128();
this.Replace_internal_links(pctx, nested, text, 0, text.length);
nested.Mkr_rls();
bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
// note: no trail, because without an end, there *is* no trail
continue;
}
}
else { // it's not an image, so output it raw
bfr.Add(prefix).Add(Bry__wtxt__lnki__bgn).Add(link).Add_byte_pipe().Add(text); // s .= "{prefix}[[link|text";
// note: no trail, because without an end, there *is* no trail
continue;
}
}
boolean was_blank = text.length == 0;
if (was_blank) {
text = link;
}
else {
// T6598 madness. Handle the quotes only if they come from the alternate part
// [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
// [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
// -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
text = quote_wkr.Do_quotes(tmp, text);
}
// Link not escaped by : , create the various objects
// if (no_force && !nt->wasLocalInterwiki()) {
// Interwikis
// if (
// iw && this->mOptions->getInterwikiMagic() && nottalk && (
// Language::fetchLanguageName(iw, null, 'mw') ||
// in_array(iw, wgExtraInterlanguageLinkPrefixes)
// )
// ) {
// T26502: filter duplicates
// if (!isset(this->mLangLinkLanguages[iw])) {
// this->mLangLinkLanguages[iw] = true;
// this->mOutput->addLanguageLink(nt->getFullText());
// }
//
// s = rtrim(s . prefix);
// s .= trim(trail, "\n") == '' ? '': prefix . trail;
// continue;
// }
//
if (ns.Id_is_file()) {
// boolean is_good_image = !wfIsBadImage(nt->getDBkey(), this->mTitle)
boolean is_good_image = true;
if (is_good_image) {
if (was_blank) {
// if no parameters were passed, text
// becomes something like "File:Foo.png",
// which we don't want to pass on to the
// image generator
text = Bry_.Empty;
}
else {
// recursively parse links inside the image caption
// actually, this will parse them in any other parameters, too,
// but it might be hard to fix that, and it doesn't matter ATM
// text = this->replaceExternalLinks(text);
// holders->merge(this->replaceInternalLinks2(text));
}
// cloak any absolute URLs inside the image markup, so replaceExternalLinks() won't touch them
bfr.Add(prefix);
// Armor_links(Make_image(bfr, nt, text, holders))
this.makeImage(pctx, bfr, nt, text, holders);
bfr.Add(trail);
continue;
}
}
else if (ns.Id_is_ctg()) {
bfr.Trim_end_ws(); // s = rtrim(s . "\n"); // T2087
if (was_blank) {
// sortkey = this->getDefaultSort();
}
else {
// sortkey = text;
}
// sortkey = Sanitizer::decodeCharReferences(sortkey);
// sortkey = str_replace("\n", '', sortkey);
// sortkey = this->getConverterLanguage()->convertCategoryKey(sortkey);
// this->mOutput->addCategory(nt->getDBkey(), sortkey);
//
// Strip the whitespace Category links produce, see T2087
// s .= trim(prefix . trail, "\n") == '' ? '' : prefix . trail;
continue;
}
// }
// Self-link checking. For some languages, variants of the title are checked in
// LinkHolderArray::doVariants() to allow batching the existence checks necessary
// for linking to a different variant.
if (!ns.Id_is_special() && nt.Eq_full_db(page_title) && !nt.Has_fragment()) {
bfr.Add(prefix);
linker.makeSelfLinkObj(bfr, nt, text, Bry_.Empty, trail, Bry_.Empty);
continue;
}
// NS_MEDIA is a pseudo-namespace for linking directly to a file
// @todo FIXME: Should do batch file existence checks, see comment below
if (ns.Id_is_media()) {
// Give extensions a chance to select the file revision for us
// options = [];
// desc_query = false;
// MW.HOOK:BeforeParserFetchFileAndTitle
// Fetch and register the file (file title may be different via hooks)
// list(file, nt) = this->fetchFileAndTitle(nt, options);
// Cloak with NOPARSE to avoid replacement in replaceExternalLinks
// s .= prefix . this->armorLinks(
// Linker::makeMediaLinkFile(nt, file, text)) . trail;
// continue;
}
// Some titles, such as valid special pages or files in foreign repos, should
// be shown as bluelinks even though they're not included in the page table
// @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
// batch file existence checks for NS_FILE and NS_MEDIA
bfr.Add_mid(src, prv, lnki_bgn);
prv = cur;
if (iw == null && nt.Is_always_known()) {
// this->mOutput->addLink(nt);
Make_known_link_holder(bfr, nt, text, trail, prefix);
}
else {
// Links will be added to the output link list after checking
holders.Make_holder(bfr, nt, text, Bry_.Ary_empty, trail, prefix);
}
}
}
public void makeImage(Xomw_parser_ctx pctx, Bry_bfr bfr, Xoa_ttl title, byte[] options_at_link, Xomw_link_holders holders) {
// Check if the options text is of the form "options|alt text"
// Options are:
// * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
// * left no resizing, just left align. label is used for alt= only
// * right same, but right aligned
// * none same, but not aligned
// * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
// * center center the image
// * frame Keep original image size, no magnify-button.
// * framed Same as "frame"
// * frameless like 'thumb' but without a frame. Keeps user preferences for width
// * upright reduce width for upright images, rounded to full __0 px
// * border draw a 1px border around the image
// * alt Text for HTML alt attribute (defaults to empty)
// * class Set a class for img node
// * link Set the target of the image link. Can be external, interwiki, or local
// vertical-align values (no % or length right now):
// * baseline
// * sub
// * super
// * top
// * text-top
// * middle
// * bottom
// * text-bottom
// Protect LanguageConverter markup when splitting into parts
byte[][] parts = Xomw_string_utils.Delimiter_explode(tmp_list, trv, options_at_link);
// Give extensions a chance to select the file revision for us
// $options = [];
byte[] desc_query = null;
// XO.MW.HOOK:BeforeParserFetchFileAndTitle
// Fetch and register the file (file title may be different via hooks)
// list($file, $title) = $this->fetchFileAndTitle($title, $options);
Xomw_File file = fetchFileAndTitle(title, null);
// Get parameter map
Xomw_MediaHandler handler = file == null ? null : file.getHandler();
Xomw_image_params tmp_img_params = pctx.Lnki_wkr__make_image__img_params;
this.getImageParams(tmp_img_params, handler);
Xomw_param_map paramMap = tmp_img_params.paramMap;
Xomw_MagicWordArray mwArray = tmp_img_params.mwArray;
// XO.MW.UNSUPPORTED.TrackingCategory: if (!$file) $this->addTrackingCategory('broken-file-category');
// Process the input parameters
byte[] caption = Bry_.Empty;
// XO.MW: $params = [ 'frame' => [], 'handler' => [], 'horizAlign' => [], 'vertAlign' => [] ];
Xomw_params_frame frameParams = paramMap.Frame.Clear();
Xomw_params_handler handlerParams = paramMap.Handler.Clear();
// Xomw_params_horizAlign horizAlignParams = paramMap.HorizAlign.Clear();
// Xomw_params_vertAlign vertAlignParams = paramMap.VertAlign.Clear();
boolean seen_format = false;
int parts_len = parts.length;
for (int i = 0; i < parts_len; i++) {
byte[] part = parts[i];
part = Bry_.Trim(part);
byte[][] tmp_match_word = pctx.Lnki_wkr__make_image__match_magic_word;
mwArray.matchVariableStartToEnd(tmp_match_word, part);
byte[] magic_name = tmp_match_word[0];
byte[] val = tmp_match_word[1];
boolean validated = false;
Xomw_param_itm param_item = paramMap.Get_by(magic_name);
if (param_item != null) {
int typeUid = param_item.type_uid;
int paramNameUid = param_item.name_uid;
// Special case; width and height come in one variable together
if (typeUid == Xomw_param_map.Type__handler && paramNameUid == Xomw_param_itm.Name__width) {
int[] tmp_img_size = pctx.Lnki_wkr__make_image__img_size;
this.parseWidthParam(tmp_img_size, val);
int parsedW = tmp_img_size[0];
int parsedH = tmp_img_size[1];
if (parsedW != 0) {
if (handler.validateParam(Xomw_param_itm.Name__width, null, parsedW)) {
paramMap.Set(typeUid, Xomw_param_itm.Name__width, null, parsedW);
validated = true;
}
}
if (parsedH != 0) {
if (handler.validateParam(Xomw_param_itm.Name__height, null, parsedH)) {
paramMap.Set(typeUid, Xomw_param_itm.Name__height, null, parsedH);
validated = true;
}
}
// else no validation -- T15436
}
else {
if (typeUid == Xomw_param_map.Type__handler) {
// Validate handler parameter
// validated = $handler->validateParam($paramName, $value);
}
else {
// Validate @gplx.Internal protected parameters
switch (paramNameUid) {
case Xomw_param_itm.Name__manual_thumb:
case Xomw_param_itm.Name__alt:
case Xomw_param_itm.Name__class:
// @todo FIXME: Possibly check validity here for
// manualthumb? downstream behavior seems odd with
// missing manual thumbs.
validated = true;
// $value = $this->stripAltText($value, $holders);
break;
case Xomw_param_itm.Name__link:
// $chars = self::EXT_LINK_URL_CLASS;
// $addr = self::EXT_LINK_ADDR;
// $prots = $this->mUrlProtocols;
// if ($value === '') {
// $paramName = 'no-link';
// $value = true;
validated = true;
// }
// else if (preg_match("/^((?i)$prots)/", $value)) {
// if (preg_match("/^((?i)$prots)$addr$chars*$/u", $value, $m)) {
// $paramName = 'link-url';
// $this->mOutput->addExternalLink($value);
// if ($this->mOptions->getExternalLinkTarget()) {
// $params[$type]['link-target'] = $this->mOptions->getExternalLinkTarget();
// }
validated = true;
// }
// } else {
// $linkTitle = Title::newFromText($value);
// if ($linkTitle) {
// $paramName = 'link-title';
// $value = $linkTitle;
// $this->mOutput->addLink($linkTitle);
validated = true;
// }
// }
break;
case Xomw_param_itm.Name__frameless:
case Xomw_param_itm.Name__framed:
case Xomw_param_itm.Name__thumbnail:
// use first appearing option, discard others.
validated = !seen_format;
seen_format = true;
break;
default:
// Most other things appear to be empty or numeric...
validated = (val == null || Php_utl_.isnumeric(Bry_.Trim(val)));
break;
}
}
if (validated) {
paramMap.Set(typeUid, paramNameUid, val, -1);
}
}
}
if (!validated) {
caption = part;
}
}
// Process alignment parameters
Xomw_param_itm tmp = paramMap.Get_by(Xomw_param_map.Type__horizAlign);
if (tmp != null) {
// frameParams.align = tmp.val;
}
tmp = paramMap.Get_by(Xomw_param_map.Type__vertAlign);
if (tmp != null) {
// frameParams.valign = tmp.val;
}
frameParams.caption = caption;
boolean image_is_framed
= frameParams.frame != null
|| frameParams.framed != null
|| frameParams.thumbnail != null
|| frameParams.manualthumb != null
;
// Will the image be presented in a frame, with the caption below?
// In the old days, [[Image:Foo|text...]] would set alt text. Later it
// came to also set the caption, ordinary text after the image -- which
// makes no sense, because that just repeats the text multiple times in
// screen readers. It *also* came to set the title attribute.
// Now that we have an alt attribute, we should not set the alt text to
// equal the caption: that's worse than useless, it just repeats the
// text. This is the framed/thumbnail case. If there's no caption, we
// use the unnamed parameter for alt text as well, just for the time be-
// ing, if the unnamed param is set and the alt param is not.
// For the future, we need to figure out if we want to tweak this more,
// e.g., introducing a title= parameter for the title; ignoring the un-
// named parameter entirely for images without a caption; adding an ex-
// plicit caption= parameter and preserving the old magic unnamed para-
// meter for BC; ...
if (image_is_framed) { // Framed image
if (caption == Bry_.Empty && frameParams.alt == null) {
// No caption or alt text, add the filename as the alt text so
// that screen readers at least get some description of the image
frameParams.alt = title.Get_text();
}
// Do not set $params['frame']['title'] because tooltips don't make sense
// for framed images
}
else { // Inline image
if (frameParams.alt == null) {
// No alt text, use the "caption" for the alt text
if (caption != Bry_.Empty) {
// frameParams.alt = $this->stripAltText(caption, $holders);
}
else {
// No caption, fall back to using the filename for the
// alt text
frameParams.alt = title.Get_text();
}
}
// Use the "caption" for the tooltip text
// frameParams.title = $this->stripAltText(caption, $holders);
}
// MW.HOOK:ParserMakeImageParams
// Linker does the rest
// byte[] time = options.time;
Object time = null;
linker.makeImageLink(bfr, pctx, parser, title, file, frameParams, handlerParams, time, desc_query, parser.Options().getThumbSize());
// Give the handler a chance to modify the parser Object
// if (handler != null) {
// $handler->parserTransformHook($this, $file);
// }
}
// protected function stripAltText( $caption, $holders ) {
// // Strip bad stuff out of the title (tooltip). We can't just use
// // replaceLinkHoldersText() here, because if this function is called
// // from replaceInternalLinks2(), mLinkHolders won't be up-to-date.
// if ( $holders ) {
// $tooltip = $holders->replaceText( $caption );
// } else {
// $tooltip = $this->replaceLinkHoldersText( $caption );
// }
//
// // make sure there are no placeholders in thumbnail attributes
// // that are later expanded to html- so expand them now and
// // remove the tags
// $tooltip = $this->mStripState->unstripBoth( $tooltip );
// $tooltip = Sanitizer::stripAllTags( $tooltip );
//
// return $tooltip;
// }
private static Xomw_param_list[] internalParamNames;
private static Xomw_param_map internalParamMap;
private void getImageParams(Xomw_image_params rv, Xomw_MediaHandler handler) {
byte[] handlerClass = handler == null ? Bry_.Empty : handler.Key();
rv.paramMap = (Xomw_param_map)mImageParams.Get_by(handlerClass);
// NOTE: lazy-init; code below can be inefficent
if (rv.paramMap == null) {
// Initialise static lists
if (internalParamNames == null) {
internalParamNames = new Xomw_param_list[]
{ Xomw_param_list.New(Xomw_param_map.Type__horizAlign, "horizAlign", "left", "right", "center", "none")
, Xomw_param_list.New(Xomw_param_map.Type__vertAlign , "vertAlign", "baseline", "sub", "super", "top", "text-top", "middle", "bottom", "text-bottom")
, Xomw_param_list.New(Xomw_param_map.Type__frame , "frame", "thumbnail", "manual_thumb", "framed", "frameless", "upright", "border", "link", "alt", "class")
};
internalParamMap = new Xomw_param_map();
byte[] bry_img = Bry_.new_a7("img_");
for (Xomw_param_list param_list : internalParamNames) {
for (byte[] name : param_list.names) {
byte[] magic_name = Bry_.Add(bry_img, Bry_.Replace(name, Byte_ascii.Dash, Byte_ascii.Underline));
internalParamMap.Add(magic_name, param_list.type_uid, name);
}
}
}
// Add handler params
Xomw_param_map paramMap = internalParamMap.Clone();
if (handler != null) {
Xomw_param_map handlerParamMap = handler.getParamMap();
int handlerParamMapLen = handlerParamMap.Len();
for (int i = 0; i < handlerParamMapLen; i++) {
Xomw_param_itm itm = (Xomw_param_itm)handlerParamMap.Get_at(i);
paramMap.Add(itm.magic, itm.type_uid, itm.name);
}
}
this.mImageParams.Add(handlerClass, paramMap);
rv.paramMap = paramMap;
Xomw_MagicWordArray mw_array = new Xomw_MagicWordArray(env.Magic_word_mgr(), paramMap.Keys());
this.mImageParamsMagicArray.Add(handlerClass, mw_array);
rv.mwArray = mw_array;
}
else {
rv.mwArray = (Xomw_MagicWordArray)mImageParamsMagicArray.Get_by(handlerClass);
}
}
// Parsed a width param of imagelink like 300px or 200x300px
// XO.MW.NOTE: for MW, "" -> null, null while "AxB" -> 0x0
public void parseWidthParam(int[] img_size, byte[] src) {
img_size[0] = img_size[1] = Php_utl_.Null_int;
if (src == Bry_.Empty) {
return;
}
// (T15500) In both cases (width/height and width only),
// permit trailing "px" for backward compatibility.
int src_bgn = 0;
int src_end = src.length;
// XO: "px" is optional; if exists at end, ignore it
if (Bry_.Has_at_end(src, Bry__px)) {
src_end -= 2;
}
// XO.MW: if ( preg_match( '/^([0-9]*)x([0-9]*)\s*(?:px)?\s*$/', $value, $m ) ) {
int w_bgn = 0;
int w_end = Bry_find_.Find_fwd_while_num(src, src_bgn, src_end);
int h_bgn = -1;
int h_end = -1;
if (w_end < src_end && src[w_end] == Byte_ascii.Ltr_x) {
h_bgn = w_end + 1;
h_end = Bry_find_.Find_fwd_while_num(src, h_bgn, src_end);
}
img_size[0] = Bry_.To_int_or(src, w_bgn, w_end, 0);
img_size[1] = Bry_.To_int_or(src, h_bgn, h_end, 0);
}
public static final byte[] Bry__px = Bry_.new_a7("px");
/**
* Fetch a file and its title and register a reference to it.
* If 'broken' is a key in $options then the file will appear as a broken thumbnail.
* @param Title $title
* @param array $options Array of options to RepoGroup::findFile
* @return array ( File or false, Title of file )
*/
public Xomw_File fetchFileAndTitle(Xoa_ttl title, Hash_adp options) {
Xomw_File file = fetchFileNoRegister(title, options);
//$time = $file ? $file->getTimestamp() : false;
//$sha1 = $file ? $file->getSha1() : false;
//# Register the file as a dependency...
//$this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
//if ( $file && !$title->equals( $file->getTitle() ) ) {
// # Update fetched file title
// $title = $file->getTitle();
// $this->mOutput->addImage( $title->getDBkey(), $time, $sha1 );
//}
return file;
}
/**
* Helper function for fetchFileAndTitle.
*
* Also useful if you need to fetch a file but not use it yet,
* for example to get the file's handler.
*
* @param Title $title
* @param array $options Array of options to RepoGroup::findFile
* @return File|boolean
*/
private Xomw_File fetchFileNoRegister(Xoa_ttl title, Hash_adp options) {
Xomw_File file = null;
// if ( isset( $options['broken'] ) ) {
// file = false; // broken thumbnail forced by hook
// } elseif ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
// file = RepoGroup::singleton()->findFileFromKey( $options['sha1'], $options );
// } else { // get by (name,timestamp)
file = env.File_finder().Find_file(title); // $options
// }
return file;
}
public void Maybe_do_subpage_link(Xomw_linker__normalize_subpage_link rv, byte[] target, byte[] text) {
linker.normalizeSubpageLink(rv, page_title, target, text);
}
public void Replace_link_holders(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
holders.Replace(pctx, pbfr);
}
public void Make_known_link_holder(Bry_bfr bfr, Xoa_ttl nt, byte[] text, byte[] trail, byte[] prefix) {
byte[][] split_trail = linker.splitTrail(trail);
byte[] inside = split_trail[0];
trail = split_trail[1];
if (text == Bry_.Empty) {
text = Bry_.Escape_html(nt.Get_prefixed_text());
}
// PORTED:new HtmlArmor( "$prefix$text$inside" )
tmp.Add_bry_escape_html(prefix);
tmp.Add_bry_escape_html(text);
tmp.Add_bry_escape_html(inside);
text = tmp.To_bry_and_clear();
link_renderer.Make_known_link(bfr, nt, text, extra_atrs, query);
byte[] link = bfr.To_bry_and_clear();
parser.Armor_links(bfr, link, 0, link.length);
bfr.Add(trail);
}
private static boolean[] title_chars_for_lnki;
private static final byte[] Bry__wtxt__lnki__bgn = Bry_.new_a7("[["), Bry__wtxt__lnki__end = Bry_.new_a7("]]");
// $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
//
// REGEX: "title-char"(1+) + "pipe"(0-1) + "]]"(0-1) + "other chars up to next [["
// title-char -> ([{$tc}]+)
// pipe -> (?:\\|(.+?))?
// ]] -> ?]]
// other chars... -> (.*)
// $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
//
// REGEX: "title-char"(1+) + "pipe"(0-1) + "other chars up to next [["
// title-char -> ([{$tc}]+)
// pipe -> \\|
// other chars... -> (.*)
}

View File

@@ -1,122 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*; import gplx.core.tests.*;
import gplx.xowa.mws.filerepo.*; import gplx.xowa.mws.filerepo.file.*;
import gplx.xowa.mws.media.*;
public class Xomw_lnki_wkr__file__tst {
private final Xomw_lnki_wkr__fxt fxt = new Xomw_lnki_wkr__fxt();
@Before public void init() {
fxt.Clear();
fxt.Init__file("A.png", 300, 200);
}
@Test public void Plain() {
fxt.Test__to_html("[[File:A.png]]", "<a href='A.png' class='image'><img alt='A.png' src='/orig/7/70/A.png' /></a>");
}
@Test public void Thumb() {
fxt.Test__to_html("[[File:A.png|thumb]]", "<div class='thumb tright'><div class='thumbinner' style='width:222px;'><a href='A.png' class='image'><img alt='A.png' src='/thumb/7/70/A.png/220px-A.png' class='thumbimage' /></a> <div class='thumbcaption'><div class='magnify'><a href='' class='internal'></a></div></div></div></div>");
}
@Test public void Size() {
fxt.Test__to_html("[[File:A.png|123x456px]]", "<a href='A.png' class='image'><img alt='A.png' src='/thumb/7/70/A.png/123px-A.png' /></a>");
}
@Test public void fitBoxWidth() {
// COMMENT:"Height is the relative smaller dimension, so scale width accordingly"
// consider file of 200,100 (2:1)
// EX_1: view is 120,40 (3:1)
// - dimensions are either (a) 120,80 or (b) 80,40
// - use (b) 80,40
// EX_2: view is 120,80 (1.5:1)
// - dimensions are either (a) 120,60 or (b) 160,80
// - use (a) 120,60
fxt.Init__file("A.png", 200, 100);
fxt.Test__to_html__has("[[File:A.png|120x40px]]", "/80px-A.png");
fxt.Test__to_html__has("[[File:A.png|120x80px]]", "/120px-A.png");
}
@Test public void Test__parseWidthParam() {
int[] img_size = new int[2];
// WxHpx
fxt.Test__parseWidthParam(img_size, "12x34px" , 12, 34);
// WxH
fxt.Test__parseWidthParam(img_size, "12x34" , 12, 34);
// Wpx
fxt.Test__parseWidthParam(img_size, "12px" , 12, 0);
// W
fxt.Test__parseWidthParam(img_size, "12" , 12, 0);
// 12x
fxt.Test__parseWidthParam(img_size, "12x" , 12, 0);
// x34
fxt.Test__parseWidthParam(img_size, "x34" , 0, 34);
}
}
class Xomw_lnki_wkr__fxt {
private final Xomw_lnki_wkr wkr;
private final Xomw_parser_ctx pctx;
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
private final Xomw_file_finder__mock file_finder;
private final Xomw_FileRepo repo = new Xomw_FileRepo(Bry_.new_a7("/orig"), Bry_.new_a7("/thumb"));
private boolean apos = true;
public Xomw_lnki_wkr__fxt() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
Xomw_parser parser = new Xomw_parser();
wkr = parser.Lnki_wkr();
// env
file_finder = new Xomw_file_finder__mock(parser.Env());
parser.Env().File_finder_(file_finder);
parser.Env().Magic_word_mgr().Add(Bry_.new_u8("img_thumbnail"), Bool_.Y, Bry_.Ary("thumb"));
parser.Env().Magic_word_mgr().Add(Bry_.new_u8("img_width"), Bool_.Y, Bry_.Ary("$1px"));
parser.Init_by_wiki(wiki);
// ctx
pctx = new Xomw_parser_ctx();
pctx.Init_by_page(wiki.Ttl_parse(Bry_.new_a7("Page_1")));
}
public void Clear() {
wkr.Clear_state();
}
public void Init__file(String title, int w, int h) {
file_finder.Add(title, repo, w, h, Xomw_MediaHandlerFactory.Mime__image__png);
}
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
Gftest.Eq__ary__lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
public void Test__to_html(String src_str, String expd) {
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
Gftest.Eq__ary__lines(expd, Exec__to_html(src_str), src_str);
}
public void Test__to_html__has(String src_str, String expd) {
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
Gftest.Eq__bool_y(String_.Has(Exec__to_html(src_str), expd));
}
private String Exec__to_html(String src_str) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Replace_internal_links(pctx, pbfr.Init(src_bry));
wkr.Replace_link_holders(pctx, pbfr);
return pbfr.Rslt().To_str_and_clear();
}
public void Test__parseWidthParam(int[] img_size, String src_str, int expd_w, int expd_h) {
wkr.parseWidthParam(img_size, Bry_.new_u8(src_str));
Gftest.Eq__int(expd_w, img_size[0], "w");
Gftest.Eq__int(expd_h, img_size[1], "h");
}
}

View File

@@ -1,29 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*; import gplx.xowa.mws.filerepo.*; import gplx.xowa.mws.filerepo.file.*;
public class Xomw_lnki_wkr__text__tst {
private final Xomw_lnki_wkr__fxt fxt = new Xomw_lnki_wkr__fxt();
@Before public void init() {fxt.Clear();}
@Test public void Text() {fxt.Test__parse("a [[A]] z" , "a <!--LINK 0--> z");}
@Test public void Capt() {fxt.Test__parse("a [[A|a]] z" , "a <!--LINK 0--> z");}
@Test public void Invalid__char() {fxt.Test__parse("a [[<A>]] z" , "a [[<A>]] z");}
@Test public void Html__self() {fxt.Test__to_html("[[Page_1]]" , "<strong class='selflink'>Page_1</strong>");}
@Test public void Html__text() {fxt.Test__to_html("[[A]]" , "<a href='/wiki/A' title='A'>A</a>");}
@Test public void Html__capt() {fxt.Test__to_html("[[A|a]]" , "<a href='/wiki/A' title='A'>a</a>");}
}

View File

@@ -1,57 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public class Xomw_param_itm {
public final byte[] magic;
public final int type_uid;
public final byte[] name;
public final int name_uid;
public Xomw_param_itm(byte[] magic, int type_uid, byte[] name) {
this.magic = magic;
this.type_uid = type_uid;
this.name = name;
this.name_uid = name_uids.Get_as_int_or(name, -1);
}
public static final int
Name__width = 0
, Name__height = 1
, Name__manual_thumb = 2
, Name__alt = 3
, Name__class = 4
, Name__link = 5
, Name__frameless = 6
, Name__framed = 7
, Name__thumbnail = 8
;
private static final Hash_adp_bry name_uids = Hash_adp_bry.cs()
.Add_str_int("width" , Name__width)
.Add_str_int("manual_thumb" , Name__manual_thumb)
.Add_str_int("alt" , Name__alt)
.Add_str_int("class" , Name__class)
.Add_str_int("link" , Name__link)
.Add_str_int("frameless" , Name__frameless)
.Add_str_int("framed" , Name__framed)
.Add_str_int("thumbnail" , Name__thumbnail)
;
public static final byte[]
Mw__img_width = Bry_.new_a7("img_width")
;
public static final byte[]
Name_bry__width = Bry_.new_a7("width")
;
}

View File

@@ -1,77 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public class Xomw_param_map {
private final Ordered_hash hash = Ordered_hash_.New_bry();
public final Xomw_params_frame Frame = new Xomw_params_frame();
public final Xomw_params_handler Handler = new Xomw_params_handler();
public final Xomw_params_horizAlign HorizAlign = new Xomw_params_horizAlign();
public final Xomw_params_vertAlign VertAlign = new Xomw_params_vertAlign();
public int Len() {return hash.Len();}
public Xomw_param_itm Get_at(int i) {return (Xomw_param_itm)hash.Get_at(i);}
public Xomw_param_itm Get_by(byte[] name) {
return (Xomw_param_itm)hash.Get_by(name);
}
public Xomw_param_itm Get_by(int name_type) {
return null;
}
public void Set(int type, int paramNameUid, byte[] paramBry, int paramInt) {
switch (type) {
case Type__frame: Frame.Set(paramNameUid, paramBry, paramInt); break;
case Type__handler: Handler.Set(paramNameUid, paramBry, paramInt); break;
}
}
public byte[][] Keys() {
int len = hash.Len();
byte[][] rv = new byte[len][];
for (int i = 0; i < len; i++) {
rv[i] = ((Xomw_param_itm)hash.Get_at(i)).magic;
}
return rv;
}
public void Add(byte[] magic, int type_uid, byte[] name) {
Xomw_param_itm itm = new Xomw_param_itm(magic, type_uid, name);
hash.Add(magic, itm);
}
public Xomw_param_map Clone() {
Xomw_param_map rv = new Xomw_param_map();
int len = hash.Len();
for (int i = 0; i < len; i++) {
Xomw_param_itm itm = (Xomw_param_itm)hash.Get_at(i);
rv.Add(itm.magic, itm.type_uid, itm.name);
}
rv.Frame.Copy_to(this.Frame);
rv.Handler.Copy_to(this.Handler);
return rv;
}
public static final int Type__horizAlign = 0, Type__vertAlign = 1, Type__frame = 2, Type__handler = 3;
}
class Xomw_param_list {
public int type_uid;
public byte[] type;
public byte[][] names;
public static Xomw_param_list New(int type_uid, String type, String... names) {
Xomw_param_list rv = new Xomw_param_list();
rv.type_uid = type_uid;
rv.type = Bry_.new_u8(type);
rv.names = Bry_.Ary(names);
return rv;
}
}

View File

@@ -1,85 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.xowa.mws.utls.*;
public class Xomw_params_frame {
public byte[] align = null;
public byte[] valign = null;
public byte[] caption = null;
public byte[] frame = null;
public byte[] framed = null;
public byte[] frameless = null;
public byte[] thumbnail = null;
public byte[] manualthumb = null;
public byte[] alt = null;
public byte[] title = null;
public byte[] cls = null;
public byte[] img_cls = null;
public byte[] link_title = null;
public byte[] link_url = null;
public byte[] link_target = null;
public byte[] no_link = null;
public byte[] border = null;
public byte[] custom_url_link = null;
public byte[] custom_target_link = null;
public boolean desc_link = false;
public byte[] desc_query = null;
public double upright;
public void Set(int uid, byte[] val_bry, int val_int) {
switch (uid) {
case Xomw_param_itm.Name__thumbnail: thumbnail = val_bry; break;
}
}
public Xomw_params_frame Clear() {
desc_link = false;
upright = Php_utl_.Null_double;
align = valign = caption = frame = framed = frameless
= thumbnail = manualthumb = alt = title = cls = img_cls
= link_title = link_url = link_target = no_link
= custom_url_link = custom_target_link = desc_query
= Php_utl_.Null_bry;
return this;
}
public void Copy_to(Xomw_params_frame src) {
this.desc_link = src.desc_link;
this.upright = src.upright;
this.align = src.align;
this.valign = src.valign;
this.caption = src.caption;
this.frame = src.frame;
this.framed = src.framed;
this.frameless = src.frameless;
this.thumbnail = src.thumbnail;
this.manualthumb = src.manualthumb;
this.alt = src.alt;
this.title = src.title;
this.cls = src.cls;
this.img_cls = src.img_cls;
this.link_title = src.link_title;
this.link_url = src.link_url;
this.link_target = src.link_target;
this.no_link = src.no_link;
this.border = src.border;
this.custom_url_link = src.custom_url_link;
this.custom_target_link = src.custom_target_link;
this.desc_query = src.desc_query;
}
public static byte[] Cls_add(byte[] lhs, byte[] rhs) {
return Bry_.Len_eq_0(lhs) ? rhs : Bry_.Add(lhs, Byte_ascii.Space_bry, rhs);
}
}

View File

@@ -1,45 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.xowa.mws.utls.*;
public class Xomw_params_handler {
public int width;
public int height;
public int page;
public int physicalWidth;
public int physicalHeight;
public Xomw_params_handler Clear() {
width = height = page
= physicalWidth = physicalHeight = Php_utl_.Null_int;
return this;
}
public void Copy_to(Xomw_params_handler src) {
this.width = src.width;
this.height = src.height;
this.page = src.page;
this.physicalWidth = src.physicalWidth;
this.physicalHeight = src.physicalHeight;
}
public void Set(int uid, byte[] val_bry, int val_int) {
switch (uid) {
case Xomw_param_itm.Name__width: width = val_int; break;
case Xomw_param_itm.Name__height: height = val_int; break;
default: throw Err_.new_unhandled_default(uid);
}
}
}

View File

@@ -1,23 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public class Xomw_params_horizAlign {
public Xomw_params_horizAlign Clear() {
return this;
}
}

View File

@@ -1,44 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public class Xomw_params_mto {
public boolean desc_link;
public byte[] alt = null;
public byte[] title = null;
public byte[] img_cls = null;
public byte[] file_link = null;
public byte[] valign = null;
public byte[] desc_query = null;
public byte[] override_width = null;
public byte[] override_height = null;
public byte[] no_dimensions = null;
public byte[] custom_url_link = null;
public byte[] custom_title_link = null;
public byte[] custom_target_link = null;
public byte[] parser_extlink_rel = null;
public byte[] parser_extlink_target = null;
public Xomw_params_mto Clear() {
desc_link = false;
alt = title = file_link = valign
= desc_query = override_width = override_height = no_dimensions
= custom_url_link = custom_title_link
= parser_extlink_rel = parser_extlink_target
= null;
return this;
}
}

View File

@@ -1,36 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.xowa.mws.utls.*;
public class Xomw_params_scalar {
public int physicalWidth;
public int physicalHeight;
public byte[] physicalDimensions;
public int clientWidth;
public int clientHeight;
public byte[] comment;
public int srcWidth;
public int srcHeight;
public byte[] mimeType;
public byte[] dstPath;
public byte[] dstUrl;
public byte[] interlace;
public Xomw_params_scalar() {
physicalWidth = physicalHeight = clientWidth = clientHeight = srcWidth = srcHeight = Php_utl_.Null_int;
}
}

View File

@@ -1,23 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.lnkis; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public class Xomw_params_vertAlign {
public Xomw_params_vertAlign Clear() {
return this;
}
}

View File

@@ -1,395 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.magiclinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.core.primitives.*; import gplx.core.btries.*; import gplx.core.net.*;
import gplx.xowa.mws.utls.*; import gplx.xowa.mws.htmls.*;
import gplx.langs.regxs.*;
// TODO.XO: this->getConverterLanguage()->markNoConversion($url, true),
public class Xomw_magiclinks_wkr {
private final Btrie_slim_mgr regex_trie = Btrie_slim_mgr.ci_a7(); // NOTE: must be ci to handle protocols; EX: "https:" and "HTTPS:"
private final Btrie_rv trv = new Btrie_rv();
private static byte[] Tag__anch__rhs;
private boolean[] url_separators;
private static Xomw_regex_link_interrupt regex_link_interrupt;
private final Xomw_parser parser;
private final Xomw_regex_boundary regex_boundary;
private final Xomw_regex_url regex_url;
private final Xomw_sanitizer sanitizer;
private final Xomw_linker linker;
private final Xomw_atr_mgr atrs = new Xomw_atr_mgr();
private byte[] page_title;
private static final byte Regex__anch = 1, Regex__elem = 2, Regex__free = 3;
public Xomw_magiclinks_wkr(Xomw_parser parser, Xomw_sanitizer sanitizer, Xomw_linker linker, Xomw_regex_boundary regex_boundary, Xomw_regex_url regex_url) {
this.parser = parser;
this.sanitizer = sanitizer;
this.linker = linker;
this.regex_boundary = regex_boundary;
this.regex_url = regex_url;
// ',;\.:!?'
url_separators = Bool_ary_bldr.New_u8()
.Set_many(Byte_ascii.Comma,Byte_ascii.Semic, Byte_ascii.Dot, Byte_ascii.Colon, Byte_ascii.Bang, Byte_ascii.Question)
.To_ary();
if (Tag__anch__rhs == null) {
synchronized (Type_adp_.ClassOf_obj(this)) {
Tag__anch__rhs = Bry_.new_a7("</a>");
regex_link_interrupt = new Xomw_regex_link_interrupt();
}
}
}
public void Init_by_wiki() {
regex_trie.Add_str_byte("<a", Regex__anch);
regex_trie.Add_str_byte("<" , Regex__elem);
Gfo_protocol_itm[] protocol_ary = Gfo_protocol_itm.Ary();
int protocol_len = protocol_ary.length;
for (int i = 0; i < protocol_len; i++) {
Gfo_protocol_itm itm = protocol_ary[i];
regex_trie.Add_bry_byte(itm.Text_bry(), Regex__free);
}
}
// Replace special strings like "ISBN xxx" and "RFC xxx" with
// magic external links.
public void Do_magic_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
int cur = src_bgn;
int prv = cur;
boolean dirty = true;
// PORTED.REGEX: handle below
// XO.MW.UNSUPPORTED.OBSOLETE: not handling RFC|PMID|ISBN b/c of upcoming obsolescence: https://www.mediawiki.org/wiki/Requests_for_comment/Future_of_magic_links
//'!(?: // Start cases
// (<a[ \t\r\n>].*?</a>) | // m[1]: Skip link text
// (<.*?>) | // m[2]: Skip stuff inside
// // HTML elements' . "
// (\b(?i:$prots)($addr$urlChar*)) | // m[3]: Free external links
// // m[4]: Post-protocol path
// \b(?:RFC|PMID) $spaces // m[5]: RFC or PMID, capture number
// ([0-9]+)\b |
// \bISBN $spaces ( // m[6]: ISBN, capture number
// (?: 97[89] $spdash?)? // optional 13-digit ISBN prefix
// (?: [0-9] $spdash?){9} // 9 digits with opt. delimiters
// [0-9Xx] // check digit
// )\b
while (true) {
if (cur == src_end) {
if (dirty)
bfr.Add_mid(src, prv, src_end);
break;
}
byte b = src[cur];
Object o = regex_trie.Match_at_w_b0(trv, b, src, cur, src_end);
// current byte doesn't look like magiclink; continue;
if (o == null) {
cur++;
continue;
}
// looks like magiclink; do additional processing
byte regex_tid = ((Byte_obj_val)o).Val();
int hook_bgn = cur;
int hook_end = trv.Pos();
int tmp_pos = hook_end;
boolean regex_valid = true;
switch (regex_tid) {
case Regex__anch: // (<a[ \t\r\n>].*?</a>) | // m[1]: Skip link text
if (tmp_pos < src_end) {
// find "[ \t\r\n>]" after "<a"; i.e.: don't match "<ab" or "<ac", etc..
byte ws_byte = src[tmp_pos];
switch (ws_byte) {
// next char after "<a" is ws -> valid
case Byte_ascii.Space:
case Byte_ascii.Tab:
case Byte_ascii.Cr:
case Byte_ascii.Nl:
break;
// next char after "<a" is not ws -> invalid
default:
regex_valid = false;
break;
}
if (regex_valid) {
// find </a>
tmp_pos++;
int anch_end = Bry_find_.Find_fwd(src, Tag__anch__rhs, tmp_pos, src_end);
// </a> not found -> invalid
if (anch_end == Bry_find_.Not_found) {
regex_valid = false;
}
// </a> found -> valid; set cur to after "</a>"
else {
cur = anch_end + Tag__anch__rhs.length;
}
}
}
else {
regex_valid = false;
}
break;
case Regex__elem: // (<.*?>) | // m[2]: Skip stuff inside
// just find ">"
tmp_pos = Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, tmp_pos, src_end);
// > not found -> invalid
if (tmp_pos == Bry_find_.Not_found) {
regex_valid = false;
}
// > found -> valid; set cur to after ">"
else {
cur = tmp_pos + 1;
}
break;
case Regex__free:
// make sure that protocol starts at word bound; EX: "ahttp://a.org" should be invalid
if (regex_boundary.Is_boundary_prv(src, hook_bgn)) {
// skip forward until invalid url char
tmp_pos = regex_url.Find_fwd_while(trv, src, tmp_pos, src_end);
// no url chars found -> invalid
if (tmp_pos == hook_end) {
regex_valid = false;
}
// url chars found -> valid; set cur to 1st invalid url-char;
else {
cur = tmp_pos;
}
}
else
regex_valid = false;
break;
}
// regex is invalid; advance by 1 and continue;
if (!regex_valid) {
cur++;
}
// regex is valid
else {
// handle free
if (regex_tid == Regex__free) {
this.page_title = pctx.Page_title().Full_db();
dirty = true;
bfr.Add_mid(src, prv, hook_bgn);
byte[] url = Bry_.Mid(src, hook_bgn, cur);
int num_post_proto = cur - hook_end; // get length of url without proto; EX: "http://a.org" should be 5 ("a.org")
this.Make_free_external_link(bfr, url, num_post_proto);
prv = cur;
}
// "<a " and "<" just need to be ignored; note that they already update cur so noop
else {
}
}
}
if (dirty) {
pbfr.Switch();
}
}
// Make a free external link, given a user-supplied URL
public void Make_free_external_link(Bry_bfr bfr, byte[] url, int num_post_proto) {
byte[] trail = Bry_.Empty;
// The characters '<' and '>' (which were escaped by
// removeHTMLtags()) should not be included in
// URLs, per RFC 2396.
// Make &nbsp; terminate a URL as well (bug T84937)
int separator_bgn = regex_link_interrupt.Find(trv, url, 0, url.length);
if (separator_bgn != Bry_find_.Not_found) {
trail = Bry_.Mid(url, separator_bgn);
url = Bry_.Mid(url, 0, separator_bgn);
}
// Move trailing punctuation to $trail
int url_len = url.length;
// If there is no left bracket, then consider right brackets fair game too
// XO.MW: if (strpos($url, '(') === false) {$sep .= ')';}
url_separators[Byte_ascii.Paren_end] = Bry_find_.Find_fwd(url, Byte_ascii.Paren_bgn, 0, url_len) == Bry_find_.Not_found;
int num_sep_chars = Php_str_.Strspn_bwd__ary(url, url_separators, url_len, -1);
// Don't break a trailing HTML entity by moving the ; into $trail
// This is in hot code, so use substr_compare to avoid having to
// create a new String Object for the comparison
// XO.MW.NOTE: ignore semic if part of entity; EX: "http://a.org&apos;!."
if (num_sep_chars > 0 && Php_str_.Substr_byte(url, -num_sep_chars) == Byte_ascii.Semic) {
// more optimization: instead of running preg_match with a $
// anchor, which can be slow, do the match on the reversed
// String starting at the desired offset.
// un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
// if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, num_sep_chars)) {
if (Xomw_regex_html_entity.Match_bwd(url, url_len - num_sep_chars, 0)) {
num_sep_chars--;
}
}
if (num_sep_chars > 0) {
trail = Bry_.Add(Php_str_.Substr(url, -num_sep_chars), trail);
url = Php_str_.Substr(url, 0, -num_sep_chars);
}
// Verify that we still have a real URL after trail removal, and
// not just lone protocol
if (trail.length >= num_post_proto) {
bfr.Add_bry_many(url, trail);
return;
}
url = sanitizer.Clean_url(url);
// XO.MW.UNSUPPORTED.NON-WMF: not supporting images from freefrom url; (EX: "http://a.org/image.png" -> "<img>"); haven't seen this used on WMF wikis
// Is this an external image?
byte[] text = null; // $this->maybeMakeExternalImage($url);
if (text == null) {
// Not an image, make a link
linker.makeExternalLink(bfr, url
, url // $this->getConverterLanguage()->markNoConversion($url, true),
, true, Bry_.new_a7("free")
, parser.Get_external_link_attribs(atrs)
, page_title);
// XO.MW.UNSUPPORTED.HOOK: registers link for processing by other extensions?
// Register it in the output Object...
// Replace unnecessary URL escape codes with their equivalent characters
// $pasteurized = self::normalizeLinkUrl($url);
// $this->mOutput->addExternalLink($pasteurized);
}
bfr.Add(trail);
}
}
class Xomw_regex_html_entity {
// if (preg_match('/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, num_sep_chars)) {
// REGEX: (letters | hex + "#" | dec + "x#") + "&"
// \G means "stop if matching breaks"; so, using a reversed example, "http://&#amp;&#!lt;" will not match "&#amp;" b/c "&#!lt;" breaks match
// http://www.php.net/manual/en/regexp.reference.escape.php
// http://stackoverflow.com/questions/14897949/what-is-the-use-of-g-anchor-in-regex
public static boolean Match_bwd(byte[] src, int src_bgn, int src_end) {
int cur = src_bgn - 1;
int numbers = 0;
int letters = 0;
while (cur >= src_end) {
int b_bgn = gplx.core.intls.Utf8_.Get_pos0_of_char_bwd(src, cur);
switch (src[b_bgn]) {
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
letters++;
break;
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
numbers++;
break;
case Byte_ascii.Hash:
// next must be &; EX: "&#" and "&#x"
int prv = cur - 1;
if (prv >= src_end && src[prv] == Byte_ascii.Amp) {
// if hex, num | ltr is fine
byte hex_byte = src[cur + 1];
if (hex_byte == Byte_ascii.Ltr_X || hex_byte == Byte_ascii.Ltr_x) {
return numbers > 0 || letters > 1; // 1 to ignore "x"
}
// if dec, no letters allowed
else {
return numbers > 0 && letters == 0;
}
}
return false;
case Byte_ascii.Amp:
// if entity, no numbers
return letters > 0 && numbers == 0;
default:
return false;
}
cur--;
}
return false;
}
}
class Xomw_regex_link_interrupt {
private static final byte Bgn__ent__lt = 0, Bgn__ent__gt = 1, Bgn__ent__nbsp = 2, Bgn__hex = 3, Bgn__dec = 4;
private static final byte End__hex__lt = 0, End__hex__gt = 1, End__hex__nbsp = 2, End__dec__lt = 3, End__dec__gt = 4, End__dec__nbsp = 5;
private final Btrie_slim_mgr bgn_trie = Btrie_slim_mgr.cs();
private final Btrie_slim_mgr end_trie = Btrie_slim_mgr.ci_a7();
public Xomw_regex_link_interrupt() {
// MW.REGEX: &(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));
bgn_trie.Add_str_byte("&lt;", Bgn__ent__lt);
bgn_trie.Add_str_byte("&gt;", Bgn__ent__gt);
bgn_trie.Add_str_byte("&nbsp;", Bgn__ent__nbsp);
bgn_trie.Add_str_byte("&#x", Bgn__hex); // 3C | 3E | A0
bgn_trie.Add_str_byte("&#", Bgn__dec); // 60 | 62 | 160
end_trie.Add_str_byte("3c;", End__hex__lt);
end_trie.Add_str_byte("3e;", End__hex__gt);
end_trie.Add_str_byte("a0;", End__hex__nbsp);
end_trie.Add_str_byte("60;", End__dec__lt);
end_trie.Add_str_byte("62;", End__dec__gt);
end_trie.Add_str_byte("160;", End__dec__nbsp);
}
public int Find(Btrie_rv trv, byte[] src, int src_bgn, int src_end) {
int pos = src_bgn;
while (true) {
if (pos >= src_end) break;
byte b = src[pos];
Object bgn_obj = bgn_trie.Match_at_w_b0(trv, b, src, pos, src_end);
if (bgn_obj == null) {
pos += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
continue;
}
byte bgn_tid = ((Byte_obj_val)bgn_obj).Val();
int end_pos = trv.Pos();
boolean valid = false;
switch (bgn_tid) {
case Bgn__ent__lt:
case Bgn__ent__gt:
case Bgn__ent__nbsp:
return pos;
case Bgn__hex:
case Bgn__dec:
// match rest of sequence from above; EX: "3c;", "60;" etc.
end_pos = Bry_find_.Find_fwd_while(src, end_pos, src_end, Byte_ascii.Num_0);
Object end_obj = end_trie.Match_at(trv, src, end_pos, src_end);
if (end_obj != null) {
// make sure that hex-dec matches; EX: "&#x60;" and "&#3c;" are invalid
byte end_tid = ((Byte_obj_val)end_obj).Val();
if ( bgn_tid == Bgn__hex && Int_.Between(end_tid, End__hex__lt, End__hex__nbsp)
|| bgn_tid == Bgn__dec && Int_.Between(end_tid, End__dec__lt, End__dec__nbsp)
)
return pos;
}
break;
}
if (valid)
return pos;
else
pos += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
}
return Bry_find_.Not_found;
}
}

View File

@@ -1,91 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.magiclinks; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*;
public class Xomw_magiclinks_wkr__tst {
private final Xomw_magiclinks_wkr__fxt fxt = new Xomw_magiclinks_wkr__fxt();
@Test public void Basic() {fxt.Test__parse("a https://b.org z", "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a> z");}
@Test public void Invalid() {fxt.Test__parse("a _https://b.org z", "a _https://b.org z");}
@Test public void Tag__anch() {fxt.Test__parse("a <a title=\"https://b.org\">b</a> z", "a <a title=\"https://b.org\">b</a> z");}
@Test public void Tag__misc() {fxt.Test__parse("a <div title=\"https://b.org\">b</div> z", "a <div title=\"https://b.org\">b</div> z");}
@Test public void Interrupt() {
// ent
fxt.Test__parse("a https://b.org&lt;z" , "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a>&lt;z");
// hex
fxt.Test__parse("a https://b.org&#x3c;z" , "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a>&#x3c;z");
// dec
fxt.Test__parse("a https://b.org&#60;z" , "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a>&#60;z");
// num_post_proto rule
fxt.Test__parse("a https://&lt; z" , "a https://&lt; z");
}
@Test public void Interrupt__hex_dec() {// implementation specific test for mixed hex / dec
// dec-hex
fxt.Test__parse("a https://b.org&#3c;z" , "a <a rel='nofollow' class='external free' href='https://b.org&amp;#3c;z'>https://b.org&amp;#3c;z</a>");
}
@Test public void Separator() {
// basic; ,;.:!?
fxt.Test__parse("a https://b.org,;.:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a>,;.:!? z");
// ")" excluded
fxt.Test__parse("a https://b.org).:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a>).:!? z");
// ")" included b/c "(" exists
fxt.Test__parse("a https://b.org().:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org()'>https://b.org()</a>.:!? z");
// ";" excluded
fxt.Test__parse("a https://b.org;.:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org'>https://b.org</a>;.:!? z");
// ";" included b/c of ent
fxt.Test__parse("a https://b.org&abc;.:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org&amp;abc;'>https://b.org&amp;abc;</a>.:!? z");
// ";" included b/c of hex; note that Clean_url changes "&#xB1;" to "±"
fxt.Test__parse("a https://b.org&#xB1;.:!? z", "a <a rel='nofollow' class='external free' href='https://b.org±'>https://b.org±</a>.:!? z");
// ";" included b/c of dec; note that Clean_url changes "&#123;" to "{"
fxt.Test__parse("a https://b.org&#123;.:!? z", "a <a rel='nofollow' class='external free' href='https://b.org{'>https://b.org{</a>.:!? z");
// ";" excluded b/c of invalid.ent
fxt.Test__parse("a https://b.org&a1b;.:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org&amp;a1b'>https://b.org&amp;a1b</a>;.:!? z");
// ";" excluded b/c of invalid.hex
fxt.Test__parse("a https://b.org&#x;.:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org&amp;#x'>https://b.org&amp;#x</a>;.:!? z");
// ";" excluded b/c of invalid.dec
fxt.Test__parse("a https://b.org&#a;.:!? z" , "a <a rel='nofollow' class='external free' href='https://b.org&amp;#a'>https://b.org&amp;#a</a>;.:!? z");
// num_post_proto rule
fxt.Test__parse("a https://.:!? z" , "a https://.:!? z");
}
@Test public void Clean_url() {
// basic
fxt.Test__parse("http://a᠆b.org/c᠆d" , "<a rel='nofollow' class='external free' href='http://ab.org/c᠆d'>http://ab.org/c᠆d</a>");
}
}
class Xomw_magiclinks_wkr__fxt {
private final Xomw_magiclinks_wkr wkr;
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
public Xomw_magiclinks_wkr__fxt() {
Xoae_app app = Xoa_app_fxt.Make__app__edit();
Xowe_wiki wiki = Xoa_app_fxt.Make__wiki__edit(app);
Xomw_regex_space regex_space = new Xomw_regex_space();
pctx.Init_by_page(wiki.Ttl_parse(Bry_.new_a7("Page_1")));
Xomw_parser parser = new Xomw_parser();
this.wkr = new Xomw_magiclinks_wkr(parser, parser.Sanitizer(), parser.Linker(), new Xomw_regex_boundary(regex_space), new Xomw_regex_url(regex_space));
wkr.Init_by_wiki();
}
public void Test__parse(String src_str, String expd) {Test__parse(Bool_.Y, src_str, expd);}
public void Test__parse(boolean apos, String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
pbfr.Init(src_bry);
wkr.Do_magic_links(pctx, pbfr);
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
}

View File

@@ -1,134 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.nbsps; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.core.btries.*;
public class Xomw_nbsp_wkr {
private final Btrie_rv trv = new Btrie_rv();
public void Do_nbsp(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// PORTED:
// Clean up special characters, only run once, next-to-last before doBlockLevels
// $fixtags = [
// // French spaces, last one Guillemet-left
// // only if there is something before the space
// '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1&#160;',
// // french spaces, Guillemet-right
// '/(\\302\\253) /' => '\\1&#160;',
// '/&#160;(!\s*important)/' => ' \\1', // Beware of CSS magic word !important, T13874.
// ];
// $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
if (trie == null) {
synchronized (this.getClass()) {
trie = Btrie_slim_mgr.cs();
Trie__add(trie, Tid__space_lhs, " ?");
Trie__add(trie, Tid__space_lhs, " :");
Trie__add(trie, Tid__space_lhs, " ;");
Trie__add(trie, Tid__space_lhs, " !");
Trie__add(trie, Tid__space_lhs, " »");
Trie__add(trie, Tid__space_rhs, "« ");
Trie__add(trie, Tid__important, "&#160;!");
}
}
int cur = src_bgn;
int prv = cur;
boolean dirty = true;
// search forward for...
// "\s" before ? : ; ! % 302,273; EX: "a :"
// "\s" after 302,253
// "&160;!\simportant"
while (true) {
if (cur == src_end) {
if (dirty)
bfr.Add_mid(src, prv, src_end);
break;
}
Object o = trie.Match_at(trv, src, cur, src_end);
if (o == null) {
cur++;
continue;
}
Xomw_nbsp_itm itm = (Xomw_nbsp_itm)o;
// '/&#160;(!\s*important)/' => ' \\1'
byte itm_tid = itm.Tid();
int important_end = -1;
if (itm_tid == Tid__important) {
int space_bgn = cur + itm.Key().length;
int space_end = Bry_find_.Find_fwd_while(src, space_bgn, src_end, Byte_ascii.Space);
important_end = space_end + Bry__important.length;
if (!Bry_.Match(src, space_end, important_end, Bry__important)) {
continue;
}
}
dirty = true;
bfr.Add_mid(src, prv, cur);
switch (itm_tid) {
case Tid__space_lhs:
bfr.Add_bry_many(Bry__nbsp, itm.Val());
break;
case Tid__space_rhs:
bfr.Add_bry_many(itm.Val(), Bry__nbsp);
break;
case Tid__important:
bfr.Add(Bry__important__repl);
break;
}
cur += itm.Key().length;
prv = cur;
}
if (dirty)
pbfr.Switch();
}
private static final byte Tid__space_lhs = 0, Tid__space_rhs = 1, Tid__important = 2;
private static Btrie_slim_mgr trie;
private static void Trie__add(Btrie_slim_mgr trie, byte tid, String key_str) {
byte[] key_bry = Bry_.new_u8(key_str);
byte[] val_bry = null;
switch (tid) {
case Tid__space_lhs:
val_bry = Bry_.Mid(key_bry, 1);
break;
case Tid__space_rhs:
val_bry = Bry_.Mid(key_bry, 0, key_bry.length - 1);
break;
case Tid__important:
val_bry = key_bry;
break;
}
Xomw_nbsp_itm itm = new Xomw_nbsp_itm(tid, key_bry, val_bry);
trie.Add_obj(key_bry, itm);
}
private static final byte[] Bry__nbsp = Bry_.new_a7("&#160;"), Bry__important = Bry_.new_a7("important"), Bry__important__repl = Bry_.new_a7(" !");
}
class Xomw_nbsp_itm {
public Xomw_nbsp_itm(byte tid, byte[] key, byte[] val) {
this.tid = tid;
this.key = key;
this.val = val;
}
public byte Tid() {return tid;} private final byte tid;
public byte[] Key() {return key;} private final byte[] key;
public byte[] Val() {return val;} private final byte[] val;
}

View File

@@ -1,40 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.nbsps; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*;
public class Xomw_nbsp_wkr__tst {
private final Xomw_nbsp_wkr__fxt fxt = new Xomw_nbsp_wkr__fxt();
@Test public void Noop() {fxt.Test__parse("abc" , "abc");}
@Test public void Space_lhs__colon() {fxt.Test__parse("a :b c" , "a&#160;:b c");}
@Test public void Space_lhs__laquo() {fxt.Test__parse("a »b c" , "a&#160;»b c");}
@Test public void Space_rhs() {fxt.Test__parse("a« b c" , "a«&#160;b c");}
@Test public void Important() {fxt.Test__parse("a &#160;! important b" , "a ! important b");}
}
class Xomw_nbsp_wkr__fxt {
private final Xomw_nbsp_wkr wkr = new Xomw_nbsp_wkr();
private final Xomw_parser_ctx pctx = new Xomw_parser_ctx();
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
private boolean apos = true;
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
pbfr.Init(src_bry);
wkr.Do_nbsp(pctx, pbfr);
if (apos) expd = gplx.langs.htmls.Gfh_utl.Replace_apos(expd);
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
}

View File

@@ -1,23 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public class Xomw_frame_itm {
public byte[] Expand(byte[] ttl) {
return null;
}
}

View File

@@ -1,564 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
// public class Xomw_frame_wkr { // THREAD.UNSAFE: caching for repeated calls
// private final Xomw_parser parser;
// public Xomw_frame_wkr(Xomw_parser parser) {
// this.parser = parser;
// }
// \\ Replace magic variables, templates, and template arguments
// \\ with the appropriate text. Templates are substituted recursively,
// \\ taking care to avoid infinite loops.
// \\
// \\ Note that the substitution depends on value of $mOutputType:
// \\ self::OT_WIKI: only {{subst:}} templates
// \\ self::OT_PREPROCESS: templates but not extension tags
// \\ self::OT_HTML: all templates and extension tags
// \\
// \\ @param String $text The text to transform
// \\ @param boolean|PPFrame $frame Object describing the arguments passed to the
// \\ template. Arguments may also be provided as an associative array, as
// \\ was the usual case before MW1.12. Providing arguments this way may be
// \\ useful for extensions wishing to perform variable replacement
// \\ explicitly.
// \\ @param boolean $argsOnly Only do argument (triple-brace) expansion, not
// \\ double-brace expansion.
// \\ @return String
// public function replaceVariables($text, $frame = false, $argsOnly = false) {
// // Is there any text? Also, Prevent too big inclusions!
// $textSize = strlen($text);
// if ($textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize()) {
// return $text;
// }
//
// if ($frame == false) {
// $frame = $this->getPreprocessor()->newFrame();
// } elseif (!($frame instanceof PPFrame)) {
// wfDebug(__METHOD__ . " called using plain parameters instead of "
// . "a PPFrame instance. Creating custom frame.\n");
// $frame = $this->getPreprocessor()->newCustomFrame($frame);
// }
//
// $dom = $this->preprocessToDom($text);
// $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
// $text = $frame->expand($dom, $flags);
//
// return $text;
// }
//
// \\ Clean up argument array - refactored in 1.9 so parserfunctions can use it, too.
// public static function createAssocArgs($args) {
// $assocArgs = [];
// $index = 1;
// foreach ($args as $arg) {
// $eqpos = strpos($arg, '=');
// if ($eqpos == false) {
// $assocArgs[$index++] = $arg;
// } else {
// $name = trim(substr($arg, 0, $eqpos));
// $value = trim(substr($arg, $eqpos + 1));
// if ($value == false) {
// $value = '';
// }
// if ($name != false) {
// $assocArgs[$name] = $value;
// }
// }
// }
//
// return $assocArgs;
// }
// \\ Return the text of a template, after recursively
// \\ replacing any variables or templates within the template.
// \\
// \\ @param array $piece The parts of the template
// \\ $piece['title']: the title, i.e. the part before the |
// \\ $piece['parts']: the parameter array
// \\ $piece['lineStart']: whether the brace was at the start of a line
// \\ @param PPFrame $frame The current frame, contains template arguments
// \\ @throws Exception
// \\ @return String The text of the template
// public void Brace_substitution(Xomw_prepro_node__template piece, Xomw_frame_itm frame) {
// // Flags
//
// // $text has been filled
// boolean found = false;
// // wiki markup in $text should be escaped
// boolean nowiki = false;
// // $text is HTML, armour it against wikitext transformation
// boolean is_html = false;
// // Force interwiki transclusion to be done in raw mode not rendered
// boolean force_raw_interwiki = false;
// // $text is a DOM node needing expansion in a child frame
// boolean is_child_obj = false;
// // $text is a DOM node needing expansion in the current frame
// boolean is_local_obj = false;
//
// // Title Object, where $text came from
// byte[] title = null;
//
// // $part1 is the bit before the first |, and must contain only title characters.
// // Various prefixes will be stripped from it later.
// byte[] title_with_spaces = frame.Expand(piece.Title());
// byte[] part1 = Bry_.Trim(title_with_spaces);
// byte[] title_text = null;
//
// // Original title text preserved for various purposes
// byte[] originalTitle = part1;
//
// // $args is a list of argument nodes, starting from index 0, not including $part1
// // @todo FIXME: If piece['parts'] is null then the call to getLength()
// // below won't work b/c this $args isn't an Object
// Xomw_prepro_node__part[] args = (null == piece.Parts()) ? null : piece.Parts();
//
// byte[] profile_section = null; // profile templates
//
// Tfds.Write(nowiki, is_html, force_raw_interwiki, is_child_obj, is_local_obj, title, title_text, profile_section);
// // SUBST
// if (!found) {
// String subst_match = null; // $this->mSubstWords->matchStartAndRemove($part1);
// boolean literal = false;
//
// // Possibilities for substMatch: "subst", "safesubst" or FALSE
// // Decide whether to expand template or keep wikitext as-is.
// if (parser.Output_type__wiki()) {
// if (subst_match == null) {
// literal = true; // literal when in PST with no prefix
// }
// else {
// literal = false; // expand when in PST with subst: or safesubst:
// }
// }
// else {
// if (subst_match == "subst") {
// literal = true; // literal when not in PST with plain subst:
// }
// else {
// literal = false; // expand when not in PST with safesubst: or no prefix
// }
// }
// if (literal) {
//// $text = $frame->virtualBracketedImplode('{{', '|', '}}', title_with_spaces, $args);
// is_local_obj = true;
// found = true;
// }
// }
//
// // Variables
// if (!found && args.length == 0) {
//// $id = $this->mVariables->matchStartToEnd($part1);
//// if ($id != false) {
//// $text = $this->getVariableValue($id, $frame);
//// if (MagicWord::getCacheTTL($id) > -1) {
//// $this->mOutput->updateCacheExpiry(MagicWord::getCacheTTL($id));
//// }
// found = true;
//// }
// }
//
// // MSG, MSGNW and RAW
// if (!found) {
// // Check for MSGNW:
//// $mwMsgnw = MagicWord::get('msgnw');
//// if ($mwMsgnw->matchStartAndRemove($part1)) {
// nowiki = true;
//// }
//// else {
// // Remove obsolete MSG:
//// $mwMsg = MagicWord::get('msg');
//// $mwMsg->matchStartAndRemove($part1);
//// }
//
// // Check for RAW:
//// $mwRaw = MagicWord::get('raw');
//// if ($mwRaw->matchStartAndRemove($part1)) {
//// force_raw_interwiki = true;
//// }
// }
// Parser functions
// if (!found) {
// $colonPos = strpos($part1, ':');
// if ($colonPos != false) {
// $func = substr($part1, 0, $colonPos);
// $funcArgs = [ trim(substr($part1, $colonPos + 1)) ];
// $argsLength = $args->getLength();
// for ($i = 0; $i < $argsLength; $i++) {
// $funcArgs[] = $args->item($i);
// }
// try {
// $result = $this->callParserFunction($frame, $func, $funcArgs);
// } catch (Exception $ex) {
// throw $ex;
// }
// The interface for parser functions allows for extracting
// flags into the local scope. Extract any forwarded flags
// here.
// extract($result);
// }
// }
// Finish mangling title and then check for loops.
// Set title to a Title Object and $title_text to the PDBK
// if (!found) {
// $ns = NS_TEMPLATE;
// Split the title into page and subpage
// $subpage = '';
// $relative = $this->maybeDoSubpageLink($part1, $subpage);
// if ($part1 != $relative) {
// $part1 = $relative;
// $ns = $this->mTitle->getNamespace();
// }
// title = Title::newFromText($part1, $ns);
// if (title) {
// $title_text = title->getPrefixedText();
// // Check for language variants if the template is not found
// if ($this->getConverterLanguage()->hasVariants() && title->getArticleID() == 0) {
// $this->getConverterLanguage()->findVariantLink($part1, title, true);
// }
// // Do recursion depth check
// $limit = $this->mOptions->getMaxTemplateDepth();
// if ($frame->depth >= $limit) {
// found = true;
// $text = '<span class="error">'
// . wfMessage('parser-template-recursion-depth-warning')
// ->numParams($limit)->inContentLanguage()->text()
// . '</span>';
// }
// }
// }
// Load from database
// if (!found && title) {
// $profile_section = $this->mProfiler->scopedProfileIn(title->getPrefixedDBkey());
// if (!title->isExternal()) {
// if (title->isSpecialPage()
// && $this->mOptions->getAllowSpecialInclusion()
// && $this->ot['html']
// ) {
// $specialPage = SpecialPageFactory::getPage(title->getDBkey());
// // Pass the template arguments as URL parameters.
// // "uselang" will have no effect since the Language Object
// // is forced to the one defined in ParserOptions.
// $pageArgs = [];
// $argsLength = $args->getLength();
// for ($i = 0; $i < $argsLength; $i++) {
// $bits = $args->item($i)->splitArg();
// if (strval($bits['index']) == '') {
// $name = trim($frame->expand($bits['name'], PPFrame::STRIP_COMMENTS));
// $value = trim($frame->expand($bits['value']));
// $pageArgs[$name] = $value;
// }
// }
//
// // Create a new context to execute the special page
// $context = new RequestContext;
// $context->setTitle(title);
// $context->setRequest(new FauxRequest($pageArgs));
// if ($specialPage && $specialPage->maxIncludeCacheTime() == 0) {
// $context->setUser($this->getUser());
// } else {
// // If this page is cached, then we better not be per user.
// $context->setUser(User::newFromName('127.0.0.1', false));
// }
// $context->setLanguage($this->mOptions->getUserLangObj());
// $ret = SpecialPageFactory::capturePath(
// title, $context, $this->getLinkRenderer());
// if ($ret) {
// $text = $context->getOutput()->getHTML();
// $this->mOutput->addOutputPageMetadata($context->getOutput());
// found = true;
// is_html = true;
// if ($specialPage && $specialPage->maxIncludeCacheTime() != false) {
// $this->mOutput->updateRuntimeAdaptiveExpiry(
// $specialPage->maxIncludeCacheTime()
// );
// }
// }
// } elseif (MWNamespace::isNonincludable(title->getNamespace())) {
// found = false; // access denied
// wfDebug(__METHOD__ . ": template inclusion denied for " .
// title->getPrefixedDBkey() . "\n");
// } else {
// list($text, title) = $this->getTemplateDom(title);
// if ($text != false) {
// found = true;
// is_child_obj = true;
// }
// }
//
// // If the title is valid but undisplayable, make a link to it
// if (!found && ($this->ot['html'] || $this->ot['pre'])) {
// $text = "[[:$title_text]]";
// found = true;
// }
// } elseif (title->isTrans()) {
// // Interwiki transclusion
// if ($this->ot['html'] && !force_raw_interwiki) {
// $text = $this->interwikiTransclude(title, 'render');
// is_html = true;
// } else {
// $text = $this->interwikiTransclude(title, 'raw');
// // Preprocess it like a template
// $text = $this->preprocessToDom($text, self::PTD_FOR_INCLUSION);
// is_child_obj = true;
// }
// found = true;
// }
//
// // Do infinite loop check
// // This has to be done after redirect resolution to avoid infinite loops via redirects
// if (!$frame->loopCheck(title)) {
// found = true;
// $text = '<span class="error">'
// . wfMessage('parser-template-loop-warning', $title_text)->inContentLanguage()->text()
// . '</span>';
// wfDebug(__METHOD__ . ": template loop broken at '$title_text'\n");
// }
// }
// If we haven't found text to substitute by now, we're done
// Recover the source wikitext and return it
// if (!found) {
// $text = $frame->virtualBracketedImplode('{{', '|', '}}', title_with_spaces, $args);
// if ($profile_section) {
// $this->mProfiler->scopedProfileOut($profile_section);
// }
// return [ 'Object' => $text ];
// }
// Expand DOM-style return values in a child frame
// if (is_child_obj) {
// // Clean up argument array
// $newFrame = $frame->newChild($args, title);
//
// if (nowiki) {
// $text = $newFrame->expand($text, PPFrame::RECOVER_ORIG);
// } elseif ($title_text != false && $newFrame->isEmpty()) {
// // Expansion is eligible for the empty-frame cache
// $text = $newFrame->cachedExpand($title_text, $text);
// } else {
// // Uncached expansion
// $text = $newFrame->expand($text);
// }
// }
// if (is_local_obj && nowiki) {
// $text = $frame->expand($text, PPFrame::RECOVER_ORIG);
// is_local_obj = false;
// }
// if ($profile_section) {
// $this->mProfiler->scopedProfileOut($profile_section);
// }
// Replace raw HTML by a placeholder
// if (is_html) {
// $text = $this->insertStripItem($text);
// } elseif (nowiki && ($this->ot['html'] || $this->ot['pre'])) {
// // Escape nowiki-style return values
// $text = wfEscapeWikiText($text);
// } elseif (is_string($text)
// && !$piece['lineStart']
// && preg_match('/^(?:{\\||:|;|#|\*)/', $text)
// ) {
// // T2529: if the template begins with a table or block-level
// // element, it should be treated as beginning a new line.
// // This behavior is somewhat controversial.
// $text = "\n" . $text;
// }
// if (is_string($text) && !$this->incrementIncludeSize('post-expand', strlen($text))) {
// // Error, oversize inclusion
// if ($title_text != false) {
// // Make a working, properly escaped link if possible (T25588)
// $text = "[[:$title_text]]";
// } else {
// // This will probably not be a working link, but at least it may
// // provide some hint of where the problem is
// preg_replace('/^:/', '', $originalTitle);
// $text = "[[:$originalTitle]]";
// }
// $text .= $this->insertStripItem('<!-- WARNING: template omitted, '
// . 'post-expand include size too large -->');
// $this->limitationWarn('post-expand-template-inclusion');
// }
//
// if (is_local_obj) {
// $ret = [ 'Object' => $text ];
// } else {
// $ret = [ 'text' => $text ];
// }
// return $ret;
// }
// \\ Triple brace replacement -- used for template arguments
// public function argSubstitution($piece, $frame) {
//
// $error = false;
// $parts = $piece['parts'];
// $nameWithSpaces = $frame->expand($piece['title']);
// $argName = trim($nameWithSpaces);
// $Object = false;
// $text = $frame->getArgument($argName);
// if ($text == false && $parts->getLength() > 0
// && ($this->ot['html']
// || $this->ot['pre']
// || ($this->ot['wiki'] && $frame->isTemplate())
// )
// ) {
// // No match in frame, use the supplied default
// $Object = $parts->item(0)->getChildren();
// }
// if (!$this->incrementIncludeSize('arg', strlen($text))) {
// $error = '<!-- WARNING: argument omitted, expansion size too large -->';
// $this->limitationWarn('post-expand-template-argument');
// }
//
// if ($text == false && $Object == false) {
// // No match anywhere
// $Object = $frame->virtualBracketedImplode('{{{', '|', '}}}', $nameWithSpaces, $parts);
// }
// if ($error != false) {
// $text .= $error;
// }
// if ($Object != false) {
// $ret = [ 'Object' => $Object ];
// } else {
// $ret = [ 'text' => $text ];
// }
//
// return $ret;
// }
//
// /**
// \\ Return the text to be used for a given extension tag.
// \\ This is the ghost of strip().
// \\
// \\ @param array $params Associative array of parameters:
// \\ name PPNode for the tag name
// \\ attr PPNode for unparsed text where tag attributes are thought to be
// \\ attributes Optional associative array of parsed attributes
// \\ inner Contents of extension element
// \\ noClose Original text did not have a close tag
// \\ @param PPFrame $frame
// \\
// \\ @throws MWException
// \\ @return String
// \\/
// public function extensionSubstitution($params, $frame) {
// static $errorStr = '<span class="error">';
// static $errorLen = 20;
//
// $name = $frame->expand($params['name']);
// if (substr($name, 0, $errorLen) == $errorStr) {
// // Probably expansion depth or node count exceeded. Just punt the
// // error up.
// return $name;
// }
//
// $attrText = !isset($params['attr']) ? null : $frame->expand($params['attr']);
// if (substr($attrText, 0, $errorLen) == $errorStr) {
// // See above
// return $attrText;
// }
//
// // We can't safely check if the expansion for $content resulted in an
// // error, because the content could happen to be the error String
// // (T149622).
// $content = !isset($params['inner']) ? null : $frame->expand($params['inner']);
//
// $marker = self::MARKER_PREFIX . "-$name-"
// . sprintf('%08X', $this->mMarkerIndex++) . self::MARKER_SUFFIX;
//
// $isFunctionTag = isset($this->mFunctionTagHooks[strtolower($name)]) &&
// ($this->ot['html'] || $this->ot['pre']);
// if ($isFunctionTag) {
// $markerType = 'none';
// } else {
// $markerType = 'general';
// }
// if ($this->ot['html'] || $isFunctionTag) {
// $name = strtolower($name);
// $attributes = Sanitizer::decodeTagAttributes($attrText);
// if (isset($params['attributes'])) {
// $attributes = $attributes + $params['attributes'];
// }
//
// if (isset($this->mTagHooks[$name])) {
// // Workaround for PHP bug 35229 and similar
// if (!is_callable($this->mTagHooks[$name])) {
// throw new MWException("Tag hook for $name is not callable\n");
// }
// $output = call_user_func_array($this->mTagHooks[$name],
// [ $content, $attributes, $this, $frame ]);
// } elseif (isset($this->mFunctionTagHooks[$name])) {
// list($callback,) = $this->mFunctionTagHooks[$name];
// if (!is_callable($callback)) {
// throw new MWException("Tag hook for $name is not callable\n");
// }
//
// $output = call_user_func_array($callback, [ &$this, $frame, $content, $attributes ]);
// } else {
// $output = '<span class="error">Invalid tag extension name: ' .
// htmlspecialchars($name) . '</span>';
// }
//
// if (is_array($output)) {
// // Extract flags to local scope (to override $markerType)
// $flags = $output;
// $output = $flags[0];
// unset($flags[0]);
// extract($flags);
// }
// } else {
// if (is_null($attrText)) {
// $attrText = '';
// }
// if (isset($params['attributes'])) {
// foreach ($params['attributes'] as $attrName => $attrValue) {
// $attrText .= ' ' . htmlspecialchars($attrName) . '="' .
// htmlspecialchars($attrValue) . '"';
// }
// }
// if ($content == null) {
// $output = "<$name$attrText/>";
// } else {
// $close = is_null($params['close']) ? '' : $frame->expand($params['close']);
// if (substr($close, 0, $errorLen) == $errorStr) {
// // See above
// return $close;
// }
// $output = "<$name$attrText>$content$close";
// }
// }
//
// if ($markerType == 'none') {
// return $output;
// } elseif ($markerType == 'nowiki') {
// $this->mStripState->addNoWiki($marker, $output);
// } elseif ($markerType == 'general') {
// $this->mStripState->addGeneral($marker, $output);
// } else {
// throw new MWException(__METHOD__ . ': invalid marker type');
// }
// return $marker;
// }
// }

View File

@@ -1,98 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public interface Xomw_prepro_node {
int Subs__len();
Xomw_prepro_node Subs__get_at(int i);
void Subs__add(Xomw_prepro_node sub);
void To_xml(Bry_bfr bfr);
}
class Xomw_prepro_node__text extends Xomw_prepro_node__base {
public Xomw_prepro_node__text(byte[] bry) {
this.bry = bry;
}
public byte[] Bry() {return bry;} protected final byte[] bry;
@Override public void To_xml(Bry_bfr bfr) {
bfr.Add(bry);
}
}
class Xomw_prepro_node__comment extends Xomw_prepro_node__base {
public Xomw_prepro_node__comment(byte[] bry) {
this.bry = bry;
}
public byte[] Bry() {return bry;} protected final byte[] bry;
@Override public void To_xml(Bry_bfr bfr) {
bfr.Add_str_a7("<comment>");
bfr.Add(bry);
bfr.Add_str_a7("</comment>");
}
}
class Xomw_prepro_node__ext extends Xomw_prepro_node__base {
public Xomw_prepro_node__ext(byte[] name, byte[] attr, byte[] inner, byte[] close) {
this.name = name;
this.attr = attr;
this.inner = inner;
this.close = close;
}
public byte[] Name() {return name;} private final byte[] name;
public byte[] Attr() {return attr;} private final byte[] attr;
public byte[] Inner() {return inner;} private final byte[] inner;
public byte[] Close() {return close;} private final byte[] close;
@Override public void To_xml(Bry_bfr bfr) {
bfr.Add_str_a7("<ext>");
bfr.Add_str_a7("<name>").Add(name).Add_str_a7("</name>");
bfr.Add_str_a7("<atr>").Add(attr).Add_str_a7("</atr>");
bfr.Add_str_a7("<inner>").Add(inner).Add_str_a7("</inner>");
bfr.Add_str_a7("<close>").Add(close).Add_str_a7("</close>");
bfr.Add_str_a7("</ext>");
}
}
class Xomw_prepro_node__heading extends Xomw_prepro_node__base {
public Xomw_prepro_node__heading(int heading_index, int title_index, byte[] text) {
this.heading_index = heading_index;
this.title_index = title_index;
this.text = text;
}
public int Heading_index() {return heading_index;} private final int heading_index;
public int Title_index() {return title_index;} private final int title_index;
public byte[] Text() {return text;} private final byte[] text;
@Override public void To_xml(Bry_bfr bfr) {
bfr.Add_str_a7("<h ");
bfr.Add_str_a7(" level=\"").Add_int_variable(heading_index);
bfr.Add_str_a7("\" i=\"").Add_int_variable(title_index);
bfr.Add_str_a7("\">");
bfr.Add(text);
bfr.Add_str_a7("</h>");
}
}
class Xomw_prepro_node__tplarg extends Xomw_prepro_node__base {
public Xomw_prepro_node__tplarg(byte[] title, Xomw_prepro_node__part[] parts) {
this.title = title; this.parts = parts;
}
public byte[] Title() {return title;} private final byte[] title;
public Xomw_prepro_node__part[] Parts() {return parts;} private final Xomw_prepro_node__part[] parts;
@Override public void To_xml(Bry_bfr bfr) {
bfr.Add_str_a7("<tplarg>");
bfr.Add_str_a7("<title>").Add(title);
bfr.Add_str_a7("</title>");
for (Xomw_prepro_node__part part : parts)
part.To_xml(bfr);
bfr.Add_str_a7("</tplarg>");
}
}

View File

@@ -1,28 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public abstract class Xomw_prepro_node__base implements Xomw_prepro_node {
private List_adp subs;
public int Subs__len() {return subs == null ? 0 : subs.Len();}
public Xomw_prepro_node Subs__get_at(int i) {return subs == null ? null : (Xomw_prepro_node)subs.Get_at(i);}
public void Subs__add(Xomw_prepro_node sub) {
if (subs == null) subs = List_adp_.New();
subs.Add(sub);
}
public abstract void To_xml(Bry_bfr bfr);
}

View File

@@ -1,45 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public class Xomw_prepro_node__part extends Xomw_prepro_node__base {
public Xomw_prepro_node__part(int idx, byte[] key, byte[] val) {
this.idx = idx;
this.key = key;
this.val = val;
}
public int Idx() {return idx;} private final int idx;
public byte[] Key() {return key;} private final byte[] key;
public byte[] Val() {return val;} private final byte[] val;
@Override public void To_xml(Bry_bfr bfr) {
bfr.Add_str_a7("<part>");
bfr.Add_str_a7("<name");
if (idx > 0) {
bfr.Add_str_a7(" index=\"").Add_int_variable(idx).Add_str_a7("\" />");
}
else {
bfr.Add_str_a7(">");
bfr.Add(key);
bfr.Add_str_a7("</name>");
bfr.Add_str_a7("=");
}
bfr.Add_str_a7("<value>");
bfr.Add(val);
bfr.Add_str_a7("</value>");
bfr.Add_str_a7("</part>");
}
}

View File

@@ -1,36 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
public class Xomw_prepro_node__template extends Xomw_prepro_node__base {
public Xomw_prepro_node__template(byte[] title, Xomw_prepro_node__part[] parts, int line_start) {
this.title = title; this.parts = parts; this.line_start = line_start;
}
public byte[] Title() {return title;} private final byte[] title;
public Xomw_prepro_node__part[] Parts() {return parts;} private final Xomw_prepro_node__part[] parts;
public int Line_start() {return line_start;} private final int line_start;
@Override public void To_xml(Bry_bfr bfr) {
bfr.Add_str_a7("<template");
if (line_start > 0) bfr.Add_str_a7(" lineStart=\"").Add_int_variable(line_start).Add_byte_quote();
bfr.Add_byte(Byte_ascii.Angle_end);
bfr.Add_str_a7("<title>").Add(title);
bfr.Add_str_a7("</title>");
for (Xomw_prepro_node__part part : parts)
part.To_xml(bfr);
bfr.Add_str_a7("</template>");
}
}

View File

@@ -1,66 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
class Xomw_prepro_rule {
public Xomw_prepro_rule(byte[] bgn, byte[] end, int min, int max, int[] names) {
this.bgn = bgn;
this.end = end;
this.min = min;
this.max = max;
this.names = names;
}
public final byte[] bgn;
public final byte[] end;
public final int min;
public final int max;
public final int[] names;
public boolean Names_exist(int idx) {
return idx < names.length && names[idx] != Name__invalid;
}
private static final byte[] Name__tmpl_bry = Bry_.new_a7("template"), Name__targ_bry = Bry_.new_a7("tplarg");
public static final int Name__invalid = -1, Name__null = 0, Name__tmpl = 1, Name__targ = 2;
public static byte[] Name(int type) {
switch (type) {
case Name__tmpl: return Name__tmpl_bry;
case Name__targ: return Name__targ_bry;
default:
case Name__invalid: return null;
case Name__null: return null;
}
}
}
class Xomw_prepro_elem {
private static final byte[] Bry__tag_end = Bry_.new_a7("</");
public Xomw_prepro_elem(int type, byte[] name) {
this.type = type;
this.name = name;
this.tag_end_lhs = Bry_.Add(Bry__tag_end, name);
}
public final int type;
public final byte[] name;
public final byte[] tag_end_lhs;
public static final int Type__comment = 0, Type__other = 1;
}
class Xomw_prepro_curchar_itm {
public Xomw_prepro_curchar_itm(byte[] bry, byte type) {
this.bry = bry;
this.type = type;
}
public byte[] bry;
public byte type;
}

View File

@@ -1,170 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
class Xomw_prepro_stack {
public List_adp stack = List_adp_.New();
public Xomw_prepro_piece top;
private Bry_bfr root_accum = Bry_bfr_.New(), accum;
private final Xomw_prepro_flags flags = new Xomw_prepro_flags();
public Xomw_prepro_stack() {
accum = root_accum;
}
public void Clear() {
stack.Clear();
accum.Clear();
top = null;
}
public int Count() {return stack.Len();}
public Bry_bfr Get_accum() {return accum;}
public Bry_bfr Get_root_accum() {return root_accum;}
public Xomw_prepro_part Get_current_part() {
if (top == null) {
return null;
}
else {
return top.Get_current_part();
}
}
public void Push(Xomw_prepro_piece item) {
stack.Add(item);
this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);
accum = top.Get_accum();
}
public Xomw_prepro_piece Pop() {
int len = stack.Count();
if (len == 0) {
throw Err_.new_wo_type("Xomw_prepro_stack: no elements remaining");
}
Xomw_prepro_piece rv = (Xomw_prepro_piece)stack.Get_at(len - 1);
stack.Del_at(len - 1);
len--;
if (len > 0) {
this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);
accum = top.Get_accum();
} else {
this.top = null;
this.accum = root_accum;
}
return rv;
}
public void Add_part(byte[] bry) {
top.Add_part(bry);
accum = top.Get_accum();
}
public Xomw_prepro_flags Get_flags() {
if (stack.Count() == 0) {
flags.Find_eq = false;
flags.Find_pipe = false;
flags.In_heading = false;
return flags;
}
else {
top.Set_flags(flags);
return flags;
}
}
}
class Xomw_prepro_flags {
public boolean Find_pipe;
public boolean Find_eq;
public boolean In_heading;
}
class Xomw_prepro_piece {
public final byte[] open; // Opening character (\n for heading)
public final byte[] close; // Matching closing char;
public int count; // Number of opening characters found (number of "=" for heading)
public final boolean line_start; // True if the open char appeared at the start of the input line; Not set for headings.
public final int start_pos;
public List_adp parts = List_adp_.New();
public Xomw_prepro_piece(byte[] open, byte[] close, int count, int start_pos, boolean line_start) {
this.open = open;
this.close = close;
this.count = count;
this.start_pos = start_pos;
this.line_start = line_start;
parts.Add(new Xomw_prepro_part(Bry_.Empty));
}
public void Parts__renew() {
parts.Clear();
this.Add_part(Bry_.Empty);
}
public Xomw_prepro_part Get_current_part() {
return (Xomw_prepro_part)parts.Get_at(parts.Len() - 1);
}
public Bry_bfr Get_accum() {
return Get_current_part().bfr;
}
public void Add_part(byte[] bry) {
parts.Add(new Xomw_prepro_part(bry));
}
public static final byte[] Brack_bgn_bry = Bry_.new_a7("[");
public void Set_flags(Xomw_prepro_flags flags) {
int parts_len = parts.Len();
boolean open_is_nl = Bry_.Eq(open, Byte_ascii.Nl_bry);
boolean find_pipe = !open_is_nl && !Bry_.Eq(open, Brack_bgn_bry);
flags.Find_pipe = find_pipe;
flags.Find_eq = find_pipe && parts_len > 1 && ((Xomw_prepro_part)parts.Get_at(parts_len - 1)).Eqpos != -1;
flags.In_heading = open_is_nl;
}
// Get the output String that would result if the close is not found.
public byte[] Break_syntax(Bry_bfr tmp_bfr, int opening_count) {
byte[] rv = Bry_.Empty;
if (Bry_.Eq(open, Byte_ascii.Nl_bry)) {
rv = ((Xomw_prepro_part)parts.Get_at(0)).bfr.To_bry();
}
else {
if (opening_count == -1) {
opening_count = count;
}
tmp_bfr.Add(Bry_.Repeat_bry(open, opening_count));
// concat parts with "|"
boolean first = true;
int len = parts.Len();
for (int i = 0; i < len; i++) {
Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(i);
if (first) {
first = false;
}
else {
tmp_bfr.Add_byte_pipe();
}
tmp_bfr.Add(part.bfr.To_bry());
}
rv = tmp_bfr.To_bry_and_clear();
}
return rv;
}
}
class Xomw_prepro_part {
public Xomw_prepro_part(byte[] bry) {
bfr.Add(bry);
}
public final Bry_bfr bfr = Bry_bfr_.New();
public int Eqpos = -1;
public int comment_end = -1;
public int visual_end = -1;
}

View File

@@ -1,789 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.core.btries.*;
import gplx.xowa.mws.utls.*;
public class Xomw_prepro_wkr { // THREAD.UNSAFE: caching for repeated calls
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
private final List_adp comments_list = List_adp_.New();
private final Btrie_slim_mgr elements_trie__y = Btrie_slim_mgr.ci_a7(), elements_trie__n = Btrie_slim_mgr.ci_a7();
private final Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs().Add_many_str("includeonly", "noinclude", "onlyinclude");
private final Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs();
private final Xomw_prepro_stack stack = new Xomw_prepro_stack();
private final Btrie_rv trv = new Btrie_rv();
private Bry_bfr accum = Bry_bfr_.New();
public void Init_by_wiki(String... xmlish_elems_ary) {
Elements_trie__init_by_wiki(elements_trie__y, ignored_tags_y, xmlish_elems_ary, "noinclude");
Elements_trie__init_by_wiki(elements_trie__n, ignored_tags_n, xmlish_elems_ary, "includeonly");
}
private void Elements_trie__init_by_wiki(Btrie_slim_mgr trie, Ordered_hash ignored_tags, String[] strip_list_ary, String xmlish_elem) {
trie.Clear();
Elements_trie__add(trie, Bool_.Y, "!--", "comment");
// PORTED: $xmlishElements = parser->getStripList();
for (String itm : strip_list_ary) {
Elements_trie__add(trie, Bool_.N, itm, itm);
}
// PORTED: "$xmlishElements[] = 'noinclude';" or "$xmlishElements[] = 'includeonly';"
Elements_trie__add(trie, Bool_.N, xmlish_elem, xmlish_elem);
// PORTED: $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
int ignored_tags_len = ignored_tags.Count();
for (int j = 0; j < ignored_tags_len; j++) {
byte[] bry = (byte[])ignored_tags.Get_at(j);
String str = String_.new_u8(bry);
Elements_trie__add(trie, Bool_.N, str, str);
}
}
private static void Elements_trie__add(Btrie_slim_mgr trie, boolean type_is_comment, String hook, String name) {
trie.Add_obj(hook, new Xomw_prepro_elem(type_is_comment ? Xomw_prepro_elem.Type__comment : Xomw_prepro_elem.Type__other, Bry_.new_a7(name)));
}
public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) {
// RELIC.PROC_VAR: forInclusion = $flags & Parser::PTD_FOR_INCLUSION;
// RELIC.INIT_BY_WIKI: $xmlishElements = parser->getStripList();
// RELIC.CLASS_VAR: $xmlishAllowMissingEndTag = [ 'includeonly', 'noinclude', 'onlyinclude' ];
boolean enable_only_include = false;
// PORTED: rewritten so that all add / del is done in INIT_BY_WIKI
Ordered_hash ignored_tags;
Hash_adp ignored_elements;
Btrie_slim_mgr elements_trie;
if (for_inclusion) {
ignored_tags = ignored_tags_y; // RELIC: $ignoredTags = [ 'includeonly', '/includeonly' ];
ignored_elements = ignored_elements__y; // RELIC: $ignoredElements = [ 'noinclude' ];
// RELIC.INIT_BY_WIKI: $xmlishElements[] = 'noinclude';
if ( Bry_.Has(src, Bry__only_include_bgn)
&& Bry_.Has(src, Bry__only_include_end)) {
enable_only_include = true;
}
elements_trie = elements_trie__y;
}
else {
ignored_tags = ignored_tags_n; // $ignoredTags = [ 'noinclude', '/noinclude', 'onlyinclude', '/onlyinclude' ];
ignored_elements = ignored_elements__n; // $ignoredElements = [ 'includeonly' ];
// RELIC.INIT_BY_WIKI: $xmlishElements[] = 'includeonly';
elements_trie = elements_trie__n;
}
// RELIC.INIT_BY_WIKI: $xmlishRegex = implode( '|', array_merge( $xmlishElements, $ignoredTags ) );
// RELIC.REGEX
// Use "A" modifier (anchored) instead of "^", because ^ doesn't work with an offset
// $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA";
stack.Clear();
// RELIC.REGEX:
// $searchBase = "[{<\n"; # }
// RELIC.BRY_FIND
// For fast reverse searches
// $revText = strrev( $text );
// $lengthText = strlen( $text );
// Input pointer, starts out pointing to a pseudo-newline before the start
int i = 0;
// Current accumulator
accum = stack.Get_accum();
accum.Add_str_a7("<root>");
// True to find equals signs in arguments
boolean find_equals = false;
// True to take notice of pipe characters
boolean find_pipe = false;
int heading_index = 1;
// True if $i is inside a possible heading
boolean in_heading = false;
// True if there are no more greater-than (>) signs right of $i
boolean no_more_gt = false;
// Map of tag name => true if there are no more closing tags of given type right of $i
no_more_closing_tag.Clear();
// True to ignore all input up to the next <onlyinclude>
boolean find_only_include = enable_only_include;
// Do a line-start run without outputting an LF character
boolean fake_line_start = true;
// XOWA: init
int src_len = src.length;
int found = -1;
byte[] cur_char = Bry_.Empty;
byte[] cur_closing = Bry_.Empty;
byte[] inner = null;
Xomw_prepro_rule rule = null;
while (true) {
if (find_only_include) {
// Ignore all input up to the next <onlyinclude>
int start_pos = Bry_find_.Find_fwd(src, Bry__only_include_bgn, i, src_len);
if (start_pos == Bry_find_.Not_found) {
// Ignored section runs to the end
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, src_len).Add_str_a7("</ignore>");
break;
}
int tag_end_pos = start_pos + Bry__only_include_bgn.length; // past-the-end
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, tag_end_pos).Add_str_a7("</ignore>");
i = tag_end_pos;
find_only_include = false;
}
if (fake_line_start) {
found = Found__line_bgn;
cur_char = Bry_.Empty;
}
else {
// Find next opening brace, closing brace or pipe
// RELIC.REGEX: $search = $searchBase;
if (stack.top == null) {
cur_closing = Bry_.Empty;
}
else {
cur_closing = stack.top.close;
// RELIC.REGEX: $search .= $currentClosing;
}
if (find_pipe) {
// RELIC.REGEX: $search .= '|';
}
if (find_equals) {
// First equals will be for the template
// RELIC.REGEX: $search .= '=';
}
// Output literal section, advance input counter
// PORTED: "$literalLength = strcspn(src, $search, i)"; NOTE: no trie b/c of frequent changes to $search
int literal_len = 0;
boolean loop_stop = false;
// loop chars until search_char is found
for (int j = i; j < src_len; j++) {
byte b = src[j];
switch (b) { // handle '$searchBase = "[{<\n";'
case Byte_ascii.Brack_bgn:
case Byte_ascii.Curly_bgn:
case Byte_ascii.Angle_bgn:
case Byte_ascii.Nl:
loop_stop = true;
break;
case Byte_ascii.Pipe: // handle "find_pipe"
if (find_pipe) loop_stop = true;
break;
case Byte_ascii.Eq: // handle "find_equals"
if (find_equals) loop_stop = true;
break;
default: // handle "cur_closing"; specified by piece.close and rule.close, so "\n", "}", "]" and "}-"
if (cur_closing != Bry_.Empty) {
byte cur_closing_0 = cur_closing[0];
if (b == cur_closing_0) {
if (cur_closing.length == 1) { // handle "\n", "}", "]"
loop_stop = true;
}
else {// handle "}-"
int nxt_idx = j + 1;
if (nxt_idx < src_len && src[nxt_idx] == Byte_ascii.Dash)
loop_stop = true;
}
}
}
break;
}
if (loop_stop)
break;
else
literal_len++;
}
if (literal_len > 0) {
accum.Add_bry_escape_html(src, i, i + literal_len);
i += literal_len;
}
if (i >= src_len) {
if (Bry_.Eq(cur_closing, Byte_ascii.Nl_bry)) {
// Do a past-the-end run to finish off the heading
cur_char = Bry_.Empty;
found = Found__line_end;
}
else {
// All done
break;
}
}
else {
// PORTED: "if ( $curChar == '|' ) {", etc..
Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len);
if (cur_char_itm != null) {
cur_char = cur_char_itm.bry;
switch (cur_char_itm.type) {
case Byte_ascii.Pipe: found = Found__pipe; break;
case Byte_ascii.Eq: found = Found__equals; break;
case Byte_ascii.Angle_bgn: found = Found__angle; break;
case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break;
// PORTED: "elseif ( $curChar == $currentClosing )"
case Byte_ascii.Curly_end: found = Found__close; break;
case Byte_ascii.Brack_end: found = Found__close; break;
case Byte_ascii.At: found = Found__close; break; // NOTE: At is type for "}-"
// PORTED: "elseif ( isset( $this->rules[$curChar] ) )"
case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;}
case Byte_ascii.Brack_bgn: {found = Found__open; rule = rule_brack; break;}
case Byte_ascii.Dash: {found = Found__open; rule = rule_langv; break;}
}
}
else {
i++;
continue;
}
}
}
if (found == Found__angle) {
// Handle </onlyinclude>
if ( enable_only_include
&& Bry_.Eq(src, i, i + Len__only_include_end, Bry__only_include_end)) {
find_only_include = true;
continue;
}
// Determine element name
// PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(pre|ref)(?:\s|\/>|>)|(!--)
Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(trv, src, i + 1, src_len);
if (element == null) {
// Element name missing or not listed
accum.Add(Bry__escaped_lt);
i++;
continue;
}
// Handle comments
if (element.type == Xomw_prepro_elem.Type__comment) {
// To avoid leaving blank lines, when a sequence of
// space-separated comments is both preceded and followed by
// a newline (ignoring spaces), then
// trim leading and trailing spaces and the trailing newline.
// Find the end
int end_pos = Bry_find_.Find_fwd(src, Bry__comment_end, i + 4, src_len);
if (end_pos == Bry_find_.Not_found) {
// Unclosed comment in input, runs to end
accum.Add_str_a7("<comment>").Add_bry_escape_html(src, i, src_len).Add_str_a7("</comment>");
i = src_len;
}
else {
// Search backwards for leading whitespace
int ws_bgn = i > 0 ? i - Php_str_.Strspn_bwd__space_or_tab(src, i, -1) : 0;
// Search forwards for trailing whitespace
// $wsEnd will be the position of the last space (or the '>' if there's none)
int ws_end = end_pos + 2 + Php_str_.Strspn_fwd__space_or_tab(src, end_pos + 3, -1, src_len);
// Keep looking forward as long as we're finding more
// comments.
comments_list.Clear();
comments_list.Add(new int[] {ws_bgn, ws_end});
while (ws_end + 5 < src_len && Bry_.Eq(src, ws_end + 1, ws_end + 5, Bry__comment_bgn)) {
int cur_char_pos = Bry_find_.Find_fwd(src, Bry__comment_end, ws_end + 4);
if (cur_char_pos == Bry_find_.Not_found) {
break;
}
cur_char_pos = cur_char_pos + 2 + Php_str_.Strspn_fwd__space_or_tab(src, cur_char_pos + 3, -1, src_len);
comments_list.Add(new int[] {ws_end + 1, cur_char_pos});
ws_end = cur_char_pos;
}
// Eat the line if possible
// TODO: This could theoretically be done if $wsStart == 0, i.e. for comments at
// the overall start. That's not how Sanitizer::removeHTMLcomments() did it, but
// it's a possible beneficial b/c break.
int bgn_pos = -1;
if ( ws_bgn > 0
&& Bry_.Eq(src, ws_bgn - 1, ws_bgn , Byte_ascii.Nl_bry)
&& Bry_.Eq(src, ws_end + 1, ws_end + 2, Byte_ascii.Nl_bry)
) {
// Remove leading whitespace from the end of the accumulator
// Sanity check first though
int ws_len = i - ws_bgn;
int accum_len = accum.Len();
if ( ws_len > 0
&& Php_str_.Strspn_fwd__space_or_tab(accum.Bfr(), accum_len - ws_len, -1, accum_len) == ws_len) {
accum.Del_by(ws_len);
}
// Dump all but the last comment to the accumulator
int comments_list_len = comments_list.Len();
for (int j = 0; j < comments_list_len; j++) {
int[] com = (int[])comments_list.Get_at(j);
bgn_pos = com[0];
end_pos = com[1] + 1;
if (j == comments_list_len - 1) {
break;
}
inner = Bry_.Mid(src, bgn_pos, end_pos);
accum.Add_str_a7("<comment>").Add_bry_escape_html(inner).Add_str_a7("</comment>");
}
// Do a line-start run next time to look for headings after the comment
fake_line_start = true;
}
else {
// No line to eat, just take the comment itself
bgn_pos = i;
end_pos += 2;
}
if (stack.top != null) {
Xomw_prepro_part part = stack.top.Get_current_part();
if (!(part.comment_end != -1 && part.comment_end == ws_bgn - 1)) {
part.visual_end = ws_bgn;
}
// Else comments abutting, no change in visual end
part.comment_end = end_pos;
}
i = end_pos + 1;
inner = Bry_.Mid(src, bgn_pos, end_pos + 1);
accum.Add_str_a7("<comment>").Add_bry_escape_html(inner).Add_str_a7("</comment>");
}
continue;
}
byte[] name = element.name;
// RELIC.BTRIE_CI: $lowerName = strtolower( $name );
int atr_bgn = i + name.length + 1;
// Find end of tag
int tag_end_pos = no_more_gt ? Bry_find_.Not_found : Bry_find_.Find_fwd(src, Byte_ascii.Angle_end, atr_bgn);
if (tag_end_pos == Bry_find_.Not_found) {
// Infinite backtrack
// Disable tag search to prevent worst-case O(N^2) performance
no_more_gt = true;
accum.Add(Bry__escaped_lt);
i++;
continue;
}
// Handle ignored tags
if (ignored_tags.Has(name)) {
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, i, tag_end_pos + 1).Add_str_a7("</ignore>");
i = tag_end_pos + 1;
continue;
}
int tag_bgn_pos = i;
int atr_end = -1;
byte[] close = null;
if (src[tag_end_pos - 1] == Byte_ascii.Slash) {
atr_end = tag_end_pos - 1;
inner = null;
i = tag_end_pos + 1;
close = Bry_.Empty;
}
else {
atr_end = tag_end_pos;
// Find closing tag
// PORTED: `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
boolean elem_end_found = false;
int elem_end_lhs = -1, elem_end_rhs = -1;
int elem_end_cur = tag_end_pos + 1;
while (true) {
// search for "</"
elem_end_lhs = Bry_find_.Find_fwd(src, Bry__end_lhs, elem_end_cur, src_len);
if (elem_end_lhs == Bry_find_.Not_found) {
break;
}
// verify $name
elem_end_cur = elem_end_lhs + 2; // 2="</"
int elem_end_tmp = elem_end_cur + name.length;
if (!Bry_.Eq_ci_a7(name, src, elem_end_cur, elem_end_tmp)) {
continue;
}
// verify "\s*>"
elem_end_cur = elem_end_tmp;
elem_end_cur = Bry_find_.Find_fwd_while(src, elem_end_cur, src_len, Byte_ascii.Space);
if (elem_end_cur == src_len) { // just "\s", but no ">"
break;
}
if (src[elem_end_cur] == Byte_ascii.Gt) {
elem_end_rhs = elem_end_cur + 1;
elem_end_found = true;
break;
}
}
if ( !no_more_closing_tag.Has(name)
&& elem_end_found) {
inner = Bry_.Mid(src, tag_end_pos + 1, elem_end_lhs);
i = elem_end_rhs;
tmp_bfr.Add_str_a7("<close>").Add_bry_escape_html(src, elem_end_lhs, elem_end_rhs).Add_str_a7("</close>");
close = tmp_bfr.To_bry_and_clear();
}
else {
// No end tag
if (xmlish_allow_missing_end_tag.Has(name)) {
// Let it run out to the end of the src.
inner = Bry_.Mid(src, tag_end_pos + 1);
i = src_len;
close = Bry_.Empty;
}
else {
// Don't match the tag, treat opening tag as literal and resume parsing.
i = tag_end_pos + 1;
accum.Add_bry_escape_html(src, tag_bgn_pos, tag_end_pos + 1);
// Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
no_more_closing_tag.Add_if_dupe_use_nth(name, name);
continue;
}
}
}
// <includeonly> and <noinclude> just become <ignore> tags
if (ignored_elements.Has(name)) {
accum.Add_str_a7("<ignore>").Add_bry_escape_html(src, tag_bgn_pos, i).Add_str_a7("</ignore>");
continue;
}
accum.Add_str_a7("<ext>");
// PORTED:
// if ( $attrEnd <= $attrStart ) {
// $attr = '';
// } else {
// $attr = substr( $text, $attrStart, $attrEnd - $attrStart );
// }
accum.Add_str_a7("<name>").Add(name).Add_str_a7("</name>");
// Note that the attr element contains the whitespace between name and attribute,
// this is necessary for precise reconstruction during pre-save transform.
accum.Add_str_a7("<attr>");
if (atr_end > atr_bgn)
accum.Add_bry_escape_html(src, atr_bgn, atr_end);
accum.Add_str_a7("</attr>");
if (inner != null) {
accum.Add_str_a7("<inner>").Add_bry_escape_html(inner).Add_str_a7("</inner>");
}
accum.Add(close).Add_str_a7("</ext>");
}
else if (found == Found__line_bgn) {
// Is this the start of a heading?
// Line break belongs before the heading element in any case
if (fake_line_start) {
fake_line_start = false;
} else {
accum.Add(cur_char);
i++;
}
int count = Php_str_.Strspn_fwd__byte(src, Byte_ascii.Eq, i, 6, src_len);
if (count == 1 && find_equals) { // EX: "{{a|\n=b=\n"
// DWIM: This looks kind of like a name/value separator.
// Let's let the equals handler have it and break the
// potential heading. This is heuristic, but AFAICT the
// methods for completely correct disambiguation are very
// complex.
}
else if (count > 0) {
Xomw_prepro_piece piece = new Xomw_prepro_piece(Byte_ascii.Nl_bry, Byte_ascii.Nl_bry, count, i, false);
piece.Add_part(Bry_.Repeat(Byte_ascii.Eq, count));
stack.Push(piece);
accum = stack.Get_accum();
Xomw_prepro_flags flags = stack.Get_flags();
find_pipe = flags.Find_pipe;
find_equals = flags.Find_eq;
in_heading = flags.In_heading;
i += count;
}
}
else if (found == Found__line_end) {
Xomw_prepro_piece piece = stack.top;
// A heading must be open, otherwise \n wouldn't have been in the search list
if (!Bry_.Eq(piece.open, Byte_ascii.Nl_bry)) throw Err_.new_wo_type("assertion:piece must start with \\n");
Xomw_prepro_part part = piece.Get_current_part();
// Search back through the input to see if it has a proper close.
// Do this using the reversed String since the other solutions
// (end anchor, etc.) are inefficient.
int ws_len = Php_str_.Strspn_bwd__space_or_tab(src, src_len - i, -1);
int search_bgn = i - ws_len;
if (part.comment_end != -1 && search_bgn -1 == part.comment_end) {
// Comment found at line end
// Search for equals signs before the comment
search_bgn = part.visual_end;
search_bgn = Bry_find_.Find_bwd__while_space_or_tab(src, search_bgn, 0);
search_bgn -= Php_str_.Strspn_bwd__space_or_tab(src, search_bgn, -1);
}
int count = piece.count;
int eq_len = Php_str_.Strspn_bwd__byte(src, Byte_ascii.Eq, search_bgn, -1);
byte[] element = Bry_.Empty;
if (eq_len > 0) {
if (search_bgn - eq_len == piece.start_pos) {
// This is just a single String of equals signs on its own line
// Replicate the doHeadings behavior /={count}(.+)={count}/
// First find out how many equals signs there really are (don't stop at 6)
count = eq_len;
if (count < 3) {
count = 0;
}
else {
count = (count - 1) / 2;
if (count > 6) count = 6;
}
}
else {
if (eq_len < count) count = eq_len; // PORTED: $count = min( $equalsLength, $count );
}
if (count > 0) {
// Normal match, output <h>
element = tmp_bfr.Add_str_a7("<h level=\"").Add_int_variable(count).Add_str_a7("\" i=\"").Add_int_variable(heading_index).Add_str_a7("\">").Add_bfr_and_preserve(accum).Add_str_a7("</h>").To_bry_and_clear();
heading_index++;
} else {
// Single equals sign on its own line, count=0
element = accum.To_bry();
}
}
else {
// No match, no <h>, just pass down the inner src
element = accum.To_bry();
}
// Unwind the stack
stack.Pop();
accum = stack.Get_accum();
Xomw_prepro_flags flags = stack.Get_flags();
find_pipe = flags.Find_pipe;
find_equals = flags.Find_eq;
in_heading = flags.In_heading;
// Append the result to the enclosing accumulator
accum.Add(element);
// Note that we do NOT increment the input pointer.
// This is because the closing linebreak could be the opening linebreak of
// another heading. Infinite loops are avoided because the next iteration MUST
// hit the heading open case above, which unconditionally increments the
// input pointer.
}
else if (found == Found__open) {
// count opening brace characters
int count = Php_str_.Strspn_fwd__byte(src, cur_char[0], i, -1, src_len); // NOTE: don't know how MediaWiki will handle "-{"
// we need to add to stack only if opening brace count is enough for one of the rules
if (count >= rule.min) {
// Add it to the stack
Xomw_prepro_piece piece = new Xomw_prepro_piece(cur_char, rule.end, count, -1, i > 0 && src[i - 1] == Byte_ascii.Nl);
stack.Push(piece);
accum = stack.Get_accum();
Xomw_prepro_flags flags = stack.Get_flags();
find_pipe = flags.Find_pipe;
find_equals = flags.Find_eq;
in_heading = flags.In_heading;
}
else {
// Add literal brace(s)
for (int j = 0; j < count; j++)
accum.Add_bry_escape_html(cur_char);
}
i += count;
}
else if (found == Found__close) {
Xomw_prepro_piece piece = stack.top;
// lets check if there are enough characters for closing brace
int max_count = piece.count;
int count = Php_str_.Strspn_fwd__byte(src, cur_char[0], i, max_count, src_len);
// check for maximum matching characters (if there are 5 closing characters, we will probably need only 3 - depending on the rules)
rule = Get_rule(piece.open);
int matching_count = -1;
if (count > rule.max) {
// The specified maximum exists in the callback array, unless the caller
// has made an error
matching_count = rule.max;
}
else {
// Count is less than the maximum
// Skip any gaps in the callback array to find the true largest match
// Need to use array_key_exists not isset because the callback can be null
matching_count = count;
while (matching_count > 0 && !rule.Names_exist(matching_count)) {
matching_count--;
}
}
if (matching_count <= 0) {
// No matching element found in callback array
// Output a literal closing brace and continue
for (int j = 0; j < count; j++)
accum.Add_bry_escape_html(cur_char);
i += count;
continue;
}
int name_type = rule.names[matching_count];
byte[] element = null;
if (name_type == Xomw_prepro_rule.Name__null) {
// No element, just literal text
tmp_bfr.Add(piece.Break_syntax(tmp_bfr, matching_count));
element = tmp_bfr.Add(Bry_.Repeat_bry(rule.end, matching_count)).To_bry_and_clear();
}
else {
// Create XML element
// Note: $parts is already XML, does not need to be encoded further
List_adp parts = piece.parts;
byte[] title = ((Xomw_prepro_part)parts.Get_at(0)).bfr.To_bry_and_clear();
parts.Del_at(0);
// The invocation is at the start of the line if lineStart is set in
// the stack, and all opening brackets are used up.
byte[] attr = null;
if (max_count == matching_count && piece.line_start) { // RELIC:!empty( $piece->lineStart )
attr = Bry_.new_a7(" lineStart=\"1\"");
}
else {
attr = Bry_.Empty;
}
byte[] name_bry = Xomw_prepro_rule.Name(name_type);
tmp_bfr.Add_str_a7("<").Add(name_bry).Add(attr).Add_str_a7(">");
tmp_bfr.Add_str_a7("<title>").Add(title).Add_str_a7("</title>");
int arg_idx = 1;
int parts_len = parts.Len();
for (int j = 0; j < parts_len; j++) {
Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(j);
if (part.Eqpos != -1) {
Bry_bfr part_bfr = part.bfr;
byte[] part_bfr_bry = part_bfr.Bfr();
tmp_bfr.Add_str_a7("<part><name>").Add_mid(part_bfr_bry, 0, part.Eqpos);
tmp_bfr.Add_str_a7("</name>=<value>").Add_mid(part_bfr_bry, part.Eqpos + 1, part_bfr.Len());
tmp_bfr.Add_str_a7("</value></part>");
}
else {
tmp_bfr.Add_str_a7("<part><name index=\"").Add_int_variable(arg_idx).Add_str_a7("\" /><value>").Add(part.bfr.To_bry()).Add_str_a7("</value></part>");
arg_idx++;
}
}
element = tmp_bfr.Add_str_a7("</").Add(name_bry).Add_str_a7(">").To_bry_and_clear();
}
// Advance input pointer
i += matching_count;
// Unwind the stack
stack.Pop();
accum = stack.Get_accum();
// Re-add the old stack element if it still has unmatched opening characters remaining
if (matching_count < piece.count) {
piece.Parts__renew(); // PORTED: piece.parts = [ new PPDPart ];
piece.count -= matching_count;
// do we still qualify for any callback with remaining count?
int min = Get_rule(piece.open).min;
if (piece.count >= min) {
stack.Push(piece);
accum = stack.Get_accum();
}
else {
accum.Add(Bry_.Repeat_bry(piece.open, piece.count));
}
}
Xomw_prepro_flags flags = stack.Get_flags();
find_pipe = flags.Find_pipe;
find_equals = flags.Find_eq;
in_heading = flags.In_heading;
// Add XML element to the enclosing accumulator
accum.Add(element);
}
else if (found == Found__pipe) {
find_equals = true; // shortcut for getFlags()
stack.Add_part(Bry_.Empty);
accum = stack.Get_accum();
i++;
}
else if (found == Found__equals) {
find_equals = false; // shortcut for getFlags()
stack.Get_current_part().Eqpos = accum.Len();
accum.Add_byte(Byte_ascii.Eq);
i++;
}
}
// Output any remaining unclosed brackets
Bry_bfr root_accum = stack.Get_root_accum();
int stack_len = stack.stack.Len();
for (int j = 0; j < stack_len; j++) {
Xomw_prepro_piece piece = (Xomw_prepro_piece)stack.stack.Get_at(j);
root_accum.Add(piece.Break_syntax(tmp_bfr, -1));
}
root_accum.Add_str_a7("</root>");
return root_accum.To_bry_and_clear();
}
private Xomw_prepro_rule Get_rule(byte[] bry) {
if (Bry_.Eq(bry, rule_curly.bgn)) return rule_curly;
else if (Bry_.Eq(bry, rule_brack.bgn)) return rule_brack;
else if (Bry_.Eq(bry, rule_langv.bgn)) return rule_langv;
else throw Err_.new_unhandled(bry);
}
private static final int
Found__line_bgn = 0
, Found__line_end = 1
, Found__pipe = 2
, Found__equals = 3
, Found__angle = 4
, Found__close = 5
, Found__open = 6
;
private static final Xomw_prepro_rule
rule_curly = new Xomw_prepro_rule(Bry_.new_a7("{"), Bry_.new_a7("}") , 2, 3, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__tmpl, Xomw_prepro_rule.Name__targ})
, rule_brack = new Xomw_prepro_rule(Bry_.new_a7("["), Bry_.new_a7("]") , 2, 2, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
, rule_langv = new Xomw_prepro_rule(Bry_.new_a7("-{"), Bry_.new_a7("}-"), 1, 1, new int[] {Xomw_prepro_rule.Name__invalid, Xomw_prepro_rule.Name__null})
;
private static final byte[]
Bry__only_include_bgn = Bry_.new_a7("<onlyinclude>")
, Bry__only_include_end = Bry_.new_a7("</onlyinclude>")
, Bry__comment_bgn = Bry_.new_a7("<!--")
, Bry__comment_end = Bry_.new_a7("-->")
, Bry__escaped_lt = Bry_.new_a7("&lt;")
, Bry__end_lhs = Bry_.new_a7("</")
;
private static final int Len__only_include_end = Bry__only_include_end.length;
private static final Btrie_slim_mgr cur_char_trie = Cur_char_trie__new();
private static final Ordered_hash
ignored_tags_y = Ordered_hash_.New_bry().Add_many_str("includeonly", "/includeonly")
, ignored_tags_n = Ordered_hash_.New_bry().Add_many_str("noinclude", "/noinclude", "onlyinclude", "/onlyinclude");
private static final Hash_adp_bry
ignored_elements__y = Hash_adp_bry.cs().Add_many_str("noinclude")
, ignored_elements__n = Hash_adp_bry.cs().Add_many_str("includeonly");
private static Btrie_slim_mgr Cur_char_trie__new() {
Btrie_slim_mgr rv = Btrie_slim_mgr.ci_a7();
String[] ary = new String[] {"|", "=", "<", "\n", "{", "[", "-{", "}", "]"};
for (String str : ary) {
byte[] bry = Bry_.new_a7(str);
rv.Add_obj(bry, new Xomw_prepro_curchar_itm(bry, bry[0]));
}
// handle "}-" separately
byte[] langv_end = Bry_.new_a7("}-");
rv.Add_obj(langv_end, new Xomw_prepro_curchar_itm(langv_end, Byte_ascii.At));
return rv;
}
}

View File

@@ -1,235 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.prepros; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*;
public class Xomw_prepro_wkr__tst {
private final Xomw_prepro_wkr__fxt fxt = new Xomw_prepro_wkr__fxt();
@Test public void Text() {
fxt.Test__parse("abc", "<root>abc</root>");
}
@Test public void Brack() {
fxt.Test__parse("a[[b]]c", "<root>a[[b]]c</root>");
}
@Test public void Brack__one() { // COVERS: "Add literal brace(s)"
fxt.Test__parse("a[b]c", "<root>a[b]c</root>");
}
@Test public void Brack__max() { // COVERS: "The specified maximum exists in the callback array, unless the caller"
fxt.Test__parse("a[[[[[b]]]]]c", "<root>a[[[[[b]]]]]c</root>");
}
@Test public void Template() {
fxt.Test__parse("a{{b}}c", "<root>a<template><title>b</title></template>c</root>");
}
@Test public void Template__args__idx() {
fxt.Test__parse("a{{b|c|d}}e", "<root>a<template><title>b</title><part><name index=\"1\" /><value>c</value></part><part><name index=\"2\" /><value>d</value></part></template>e</root>");
}
@Test public void Template__args__key() {
fxt.Test__parse("a{{b|c=d}}e", "<root>a<template><title>b</title><part><name>c</name>=<value>d</value></part></template>e</root>");
}
@Test public void Template__line_start() { // COVERS: "The invocation is at the start of the line if lineStart is set in"
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a"
, "{{b}}"
), String_.Concat_lines_nl_skip_last
( "<root>a"
, "<template lineStart=\"1\"><title>b</title></template></root>"
));
}
@Test public void Template__max() { // COVERS: "do we still qualify for any callback with remaining count?"
fxt.Test__parse("a{{{{{b}}}}}c", "<root>a<template><title><tplarg><title>b</title></tplarg></title></template>c</root>");
}
@Test public void Tplarg() {
fxt.Test__parse("a{{{b}}}c", "<root>a<tplarg><title>b</title></tplarg>c</root>");
}
@Test public void Tplarg__dflt() {
fxt.Test__parse("a{{{b|c}}}d", "<root>a<tplarg><title>b</title><part><name index=\"1\" /><value>c</value></part></tplarg>d</root>");
}
@Test public void Comment() {
fxt.Test__parse("a<!--b-->c", "<root>a<comment>&lt;!--b--&gt;</comment>c</root>");
}
@Test public void Comment__dangling() {// COVERS: "Unclosed comment in input, runs to end"
fxt.Test__parse("a<!--b", "<root>a<comment>&lt;!--b</comment></root>");
}
@Test public void Comment__ws() { // COVERS: "Search backwards for leading whitespace"
fxt.Test__parse("a <!--b--> c", "<root>a <comment>&lt;!--b--&gt;</comment> c</root>"); // NOTE: space is outside comment
}
@Test public void Comment__many__ws() {// COVERS: "Dump all but the last comment to the accumulator"
fxt.Test__parse("a <!--1--> <!--2--> z", "<root>a <comment>&lt;!--1--&gt;</comment> <comment>&lt;!--2--&gt;</comment> z</root>"); // NOTE: space is outside comment;
}
@Test public void Comment__nl__ws() { // COVERS: "Eat the line if possible"
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a"
, " <!--1--> "
, " <!--2--> "
, "z"
), String_.Concat_lines_nl_skip_last
( "<root>a"
, "<comment> &lt;!--1--&gt; " // NOTE: space is inside </comment> if flanked by nl;
, "</comment><comment> &lt;!--2--&gt; "
, "</comment>z</root>"
));
}
@Test public void Ext() { // COVERS.ALSO: "Note that the attr element contains the whitespace between name and attribute,"
fxt.Test__parse("a<pre id=\"1\">b</pre>c", "<root>a<ext><name>pre</name><attr> id=&quot;1&quot;</attr><inner>b</inner><close>&lt;/pre&gt;</close></ext>c</root>");
}
@Test public void Ext__inline() { // COVERS: "if ( $text[$tagEndPos - 1] == '/' ) {"
fxt.Test__parse("a<pre/>b" , "<root>a<ext><name>pre</name><attr></attr></ext>b</root>");
fxt.Test__parse("a<pre />b" , "<root>a<ext><name>pre</name><attr> </attr></ext>b</root>");
}
@Test public void Ext__end__pass__space() {// COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
fxt.Test__parse("a<pre>b</pre >c", "<root>a<ext><name>pre</name><attr></attr><inner>b</inner><close>&lt;/pre &gt;</close></ext>c</root>");
}
@Test public void Ext__end__pass__name() { // COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
fxt.Test__parse("a<pre>b</pro></pre>c", "<root>a<ext><name>pre</name><attr></attr><inner>b&lt;/pro&gt;</inner><close>&lt;/pre&gt;</close></ext>c</root>");
}
@Test public void Ext__end__fail__angle() {// COVERS: "\s*" in `preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",`
fxt.Test__parse("a<pre>b</pre c", "<root>a&lt;pre&gt;b&lt;/pre c</root>");
}
@Test public void Ext__dangling() { // COVERS: "Let it run out to the end of the text."
fxt.Test__parse("a<pre>bc", "<root>a&lt;pre&gt;bc</root>");
}
@Test public void Ext__dangling__many() { // COVERS: "Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>..."
fxt.Test__parse("a<pre><pre><pre>bc", "<root>a&lt;pre&gt;&lt;pre&gt;&lt;pre&gt;bc</root>");
}
@Test public void Ext__unclosed() { // COVERS: "Infinite backtrack"
fxt.Test__parse("a<pre bcd", "<root>a&lt;pre bcd</root>");
}
@Test public void Ext__noinclude() { // COVERS: "<includeonly> and <noinclude> just become <ignore> tags"
fxt.Init__for_inclusion_(Bool_.N);
fxt.Test__parse("a<includeonly>b<noinclude>c</noinclude>d</includeonly>e", "<root>a<ignore>&lt;includeonly&gt;b&lt;noinclude&gt;c&lt;/noinclude&gt;d&lt;/includeonly&gt;</ignore>e</root>");
}
@Test public void Heading() {
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a"
, "== b1 =="
, "z"
), String_.Concat_lines_nl_skip_last
( "<root>a"
, "<h level=\"2\" i=\"1\">== b1 ==</h>"
, "z</root>"
));
}
@Test public void Heading__eos__no_nl() {
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a"
, "== b1 =="
), String_.Concat_lines_nl_skip_last
( "<root>a"
, "<h level=\"2\" i=\"1\">== b1 ==</h></root>"
));
}
@Test public void Heading__bos__implied_nl() { // COVERS: "Is this the start of a heading?"
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "== b1 =="
, "z"
), String_.Concat_lines_nl_skip_last
( "<root><h level=\"2\" i=\"1\">== b1 ==</h>"
, "z</root>"
));
}
@Test public void Heading__dwim__y() { // COVERS: "DWIM: This looks kind of like a name/value separator."
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a{{b|"
, "=c="
, "}}d"
), String_.Concat_lines_nl_skip_last
( "<root>a<template><title>b</title><part><name>"
, "</name>=<value>c="
, "</value></part></template>d</root>"
));
}
@Test public void Heading__dwim__n() { // COVERS: "DWIM: This looks kind of like a name/value separator."
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a{{b|"
, "==c=="
, "}}d"
), String_.Concat_lines_nl_skip_last
( "<root>a<template><title>b</title><part><name index=\"1\" /><value>"
, "<h level=\"2\" i=\"1\">==c==</h>"
, "</value></part></template>d</root>"
));
}
@Test public void Heading__comment() { // COVERS: "Comment found at line end"
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a"
, "==b== <!--c-->"
, ""
), String_.Concat_lines_nl_skip_last
( "<root>a"
, "<h level=\"2\" i=\"1\">==b== <comment>&lt;!--c--&gt;</comment></h>"
, "</root>"
));
}
@Test public void Heading__consecutive__5() { // COVERS: "This is just a single String of equals signs on its own line"
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a"
, "====="
, ""
), String_.Concat_lines_nl_skip_last
( "<root>a"
, "<h level=\"2\" i=\"1\">=====</h>"
, "</root>"
));
}
@Test public void Heading__consecutive__1() { // COVERS: "Single equals sign on its own line, count=0"
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a"
, "="
, ""
), String_.Concat_lines_nl_skip_last
( "<root>a"
, "="
, "</root>"
));
}
@Test public void Heading__unclosed() { // COVERS: "No match, no <h>, just pass down the inner src"
fxt.Test__parse(String_.Concat_lines_nl_skip_last
( "a"
, "===b"
, ""
), String_.Concat_lines_nl_skip_last
( "<root>a"
, "===b"
, "</root>"
));
}
@Test public void Inclusion__n() {
fxt.Init__for_inclusion_(Bool_.N);
fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root>a<ignore>&lt;onlyinclude&gt;</ignore>b<ignore>&lt;/onlyinclude&gt;</ignore>c</root>");
}
@Test public void Inclusion__y() {
fxt.Init__for_inclusion_(Bool_.Y);
fxt.Test__parse("a<onlyinclude>b</onlyinclude>c", "<root><ignore>a&lt;onlyinclude&gt;</ignore>b<ignore>&lt;/onlyinclude&gt;c</ignore></root>");
}
@Test public void Ignored__noinclude() { // COVERS: "Handle ignored tags"
fxt.Init__for_inclusion_(Bool_.N);
fxt.Test__parse("a<noinclude>b</noinclude>c", "<root>a<ignore>&lt;noinclude&gt;</ignore>b<ignore>&lt;/noinclude&gt;</ignore>c</root>");
}
}
class Xomw_prepro_wkr__fxt {
private final Xomw_prepro_wkr wkr = new Xomw_prepro_wkr();
private boolean for_inclusion = false;
public Xomw_prepro_wkr__fxt() {
wkr.Init_by_wiki("pre");
}
public void Init__for_inclusion_(boolean v) {for_inclusion = v;}
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
byte[] actl = wkr.Preprocess_to_xml(src_bry, for_inclusion);
Tfds.Eq_str_lines(expd, String_.new_u8(actl), src_str);
}
}

View File

@@ -1,267 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.xowa.mws.utls.*;
import gplx.xowa.parsers.htmls.*;
import gplx.core.primitives.*;
public class Xomw_quote_wkr {// THREAD.UNSAFE: caching for repeated calls
private Bry_bfr tmp;
private final Int_list apos_pos_ary = new Int_list(32);
public Xomw_quote_wkr(Xomw_parser mgr) {
this.tmp = mgr.Tmp();
}
public void Do_all_quotes(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
Bry_bfr bfr = pbfr.Trg();
pbfr.Switch();
int cur = src_bgn;
int line_bgn = cur;
while (true) {
int line_end = Bry_find_.Find_fwd(src, Byte_ascii.Nl, line_bgn, src_end);
if (line_end == Bry_find_.Not_found) {
line_end = src_end;
}
Do_quotes(bfr, Bool_.Y, src, line_bgn, line_end);
if (line_end == src_end)
break;
else
line_bgn = line_end + 1; // 1=\n.length
}
// Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode( "\n", $text );
if (bfr.Match_end_byt(Byte_ascii.Nl))
bfr.Del_by_1(); // REF.MW: $outtext = substr( $outtext, 0, -1 );
apos_pos_ary.Clear();
}
public byte[] Do_quotes(Bry_bfr tmp, byte[] src) {
boolean found = Do_quotes(tmp, Bool_.N, src, 0, src.length);
return found ? tmp.To_bry_and_clear() : src;
}
private boolean Do_quotes(Bry_bfr bfr, boolean all_quotes_mode, byte[] src, int line_bgn, int line_end) {
byte[][] arr = Php_preg_.Split(apos_pos_ary, src, line_bgn, line_end, Wtxt__apos, Bool_.Y); // PORTED.REGX: arr = preg_split("/(''+)/", text, -1, PREG_SPLIT_DELIM_CAPTURE);
if (arr == null) {
if (all_quotes_mode) {
bfr.Add_mid(src, line_bgn, line_end).Add_byte_nl();
}
return false;
}
int arr_len = arr.length;
// First, do some preliminary work. This may shift some apostrophes from
// being mark-up to being text. It also counts the number of occurrences
// of bold and italics mark-ups.
int num_bold = 0;
int num_italics = 0;
for (int i = 1; i < arr_len; i += 2) {
int apos_len = arr[i].length;
// If there are ever four apostrophes, assume the first is supposed to
// be text, and the remaining three constitute mark-up for bold text.
// (bug 13227: ''''foo'''' turns into ' ''' foo ' ''')
if (apos_len == 4) {
arr[i - 1] = Bry_.Add(arr[i - 1], Byte_ascii.Apos_bry);
arr[i] = Bry_.new_a7("'''");
apos_len = 3;
}
else if (apos_len > 5) {
// If there are more than 5 apostrophes in a row, assume they're all
// text except for the last 5.
// (bug 13227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
arr[i - 1] = Bry_.Add(arr[i - 1], Bry_.Repeat(Byte_ascii.Apos, apos_len - 5));
arr[i] = Bry_.new_a7("'''''");
apos_len = 5;
}
// Count the number of occurrences of bold and italics mark-ups.
if (apos_len == 2) {
num_italics++;
}
else if (apos_len == 3) {
num_bold++;
}
else if (apos_len == 5) {
num_italics++;
num_bold++;
}
}
// If there is an odd number of both bold and italics, it is likely
// that one of the bold ones was meant to be an apostrophe followed
// by italics. Which one we cannot know for certain, but it is more
// likely to be one that has a single-letter word before it.
// NOTE: this code primarily handles italicized possessives; EX: The ''[[Main Page]]'''s talk page.
if ((num_bold % 2 == 1) && (num_italics % 2 == 1)) {
int prv_ends_w_word_1char = -1;
int prv_ends_w_word_nchar = -1;
int prv_ends_w_space = -1;
for (int i = 1; i < arr_len; i += 2) {
if (arr[i].length == 3) {
byte[] prv = arr[i - 1];
byte prv__last_char = Php_str_.Substr_byte(prv, -1);
byte prv__last_minus_1_char = Php_str_.Substr_byte(prv, -2, 1);
if (prv__last_char == Byte_ascii.Space) { // NOTE: prv ends in space; EX: "''prv '''"
if (prv_ends_w_space == -1) {
prv_ends_w_space = i;
}
}
else if (prv__last_minus_1_char == Byte_ascii.Space) { // NOTE: prv ends in 1-char word; EX: "''prv a'''"
prv_ends_w_word_1char = i;
// if $firstsingleletterword is set, we don't
// look at the other options, so we can bail early.
break;
}
else {
if (prv_ends_w_word_nchar == -1) {
prv_ends_w_word_nchar = i;
}
}
}
}
// If there is a single-letter word, use it!
if (prv_ends_w_word_1char > -1) {
arr[prv_ends_w_word_1char] = Wtxt__apos;
arr[prv_ends_w_word_1char - 1] = Bry_.Add(arr[prv_ends_w_word_1char - 1], Byte_ascii.Apos);
}
else if (prv_ends_w_word_nchar > -1) {
// If not, but there's a multi-letter word, use that one.
arr[prv_ends_w_word_nchar] = Wtxt__apos;
arr[prv_ends_w_word_nchar - 1] = Bry_.Add(arr[prv_ends_w_word_nchar - 1], Byte_ascii.Apos);
}
else if (prv_ends_w_space > -1) {
// ... otherwise use the first one that has neither.
// (notice that it is possible for all three to be -1 if, for example,
// there is only one pentuple-apostrophe in the line)
arr[prv_ends_w_space] = Wtxt__apos;
arr[prv_ends_w_space - 1] = Bry_.Add(arr[prv_ends_w_space - 1], Byte_ascii.Apos);
}
}
// Now let's actually convert our apostrophic mush to HTML!
int state = State__empty;
for (int j = 0; j < arr_len; j++) {
if ((j % 2) == 0) {
if (state == State__both) {
tmp.Add(arr[j]);
}
else {
bfr.Add(arr[j]);
}
}
else {
int apos_len = arr[j].length;
if (apos_len == 2) {
if (state == State__i) {
bfr.Add_str_a7("</i>");
state = State__empty;
}
else if (state == State__bi) {
bfr.Add_str_a7("</i>");
state = State__b;
}
else if (state == State__ib) {
bfr.Add_str_a7("</b></i><b>");
state = State__b;
}
else if (state == State__both) {
bfr.Add_str_a7("<b><i>").Add_bfr_and_preserve(tmp).Add_str_a7("</i>");
state = State__b;
}
else { // state can be 'b' or ''
bfr.Add_str_a7("<i>");
state = state == State__b ? State__bi : State__i;
}
}
else if (apos_len == 3) {
if (state == State__b) {
bfr.Add_str_a7("</b>");
state = State__empty;
}
else if (state == State__bi) {
bfr.Add_str_a7("</i></b><i>");
state = State__i;
}
else if (state == State__ib) {
bfr.Add_str_a7("</b>");
state = State__i;
}
else if (state == State__both) {
bfr.Add_str_a7("<i><b>").Add_bfr_and_preserve(tmp).Add_str_a7("</b>");
state = State__i;
}
else { // state can be 'i' or ''
bfr.Add_str_a7("<b>");
state = state == State__i ? State__ib : State__b;
}
}
else if (apos_len == 5) {
if (state == State__b) {
bfr.Add_str_a7("</b><i>");
state = State__i;
}
else if (state == State__i) {
bfr.Add_str_a7("</i><b>");
state = State__b;
}
else if (state == State__bi) {
bfr.Add_str_a7("</i></b>");
state = State__empty;
}
else if (state == State__ib) {
bfr.Add_str_a7("</b></i>");
state = State__empty;
}
else if (state == State__both) {
bfr.Add_str_a7("<i><b>").Add_bfr_and_preserve(tmp).Add_str_a7("</b></i>");
state = State__empty;
}
else { // (state == '')
tmp.Clear();
state = State__both;
}
}
}
}
// Now close all remaining tags. Notice that the order is important.
if (state == State__b || state == State__ib) {
bfr.Add_str_a7("</b>");
}
if (state == State__i || state == State__bi || state == State__ib) {
bfr.Add_str_a7("</i>");
}
if (state == State__bi) {
bfr.Add_str_a7("</b>");
}
// There might be lonely ''''', so make sure we have a buffer
if (state == State__both && tmp.Len_gt_0()) {
bfr.Add_str_a7("<b><i>").Add_bfr_and_clear(tmp).Add_str_a7("</i></b>");
}
bfr.Add_byte_nl();
return true;
}
private static final int
State__empty = 0
, State__b = 1
, State__i = 2
, State__bi = 3
, State__ib = 4
, State__both = 5
;
private static final byte[] Wtxt__apos = Bry_.new_a7("''");
}

View File

@@ -1,45 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.quotes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import org.junit.*;
public class Xomw_quote_wkr__tst {
private final Xomw_quote_wkr__fxt fxt = new Xomw_quote_wkr__fxt();
@Test public void Apos__0() {fxt.Test__parse("abc" , "abc");}
@Test public void Apos__1() {fxt.Test__parse("a'b'c" , "a'b'c");}
@Test public void Apos__2() {fxt.Test__parse("a''b''c" , "a<i>b</i>c");}
@Test public void Apos__3() {fxt.Test__parse("a'''b'''c" , "a<b>b</b>c");}
@Test public void Apos__4() {fxt.Test__parse("a''''b''''c" , "a'<b>b'</b>c");} // COVERS: "If there are ever four apostrophes"
@Test public void Apos__5() {fxt.Test__parse("a'''''b'''''c" , "a<i><b>b</b></i>c");}
@Test public void Apos__7() {fxt.Test__parse("a'''''''b'''''''c" , "a''<i><b>b''</b></i>c");} // COVERS: "If there are more than 5 apostrophes in a row"
@Test public void Mix__single() {fxt.Test__parse("''a ''' ''b b''' ''cc'''" , "<i>a <b> </b></i><b>b b'<i> </i>cc</b>");} // COVERS: "If there is a single-letter word, use it!"
@Test public void Mix__multi() {fxt.Test__parse("''a ''' ''b ''' ''cc'''" , "<i>a <b> </b></i><b>b </b> <i>cc'</i>");} // COVERS: "If not, but there's a multi-letter word, use that one."
@Test public void Mix__space() {fxt.Test__parse("''a ''' ''b ''' ''c '''" , "<i>a '</i> <i>b <b> </b></i><b>c </b>");} // COVERS: "... otherwise use the first one that has neither."
@Test public void Dangling__b() {fxt.Test__parse("a'''b" , "a<b>b</b>");} // COVERS: "if (state == State__b || state == State__ib)"
@Test public void Dangling__i() {fxt.Test__parse("a''b" , "a<i>b</i>");} // COVERS: "if (state == State__i || state == State__bi || state == State__ib)"
@Test public void Dangling__lone(){fxt.Test__parse("a'''''b" , "a<b><i>b</i></b>");} // COVERS: "There might be lonely ''''', so make sure we have a buffer"
@Test public void Nl__text() {fxt.Test__parse("a\nb''c''d\n\ne" , "a\nb<i>c</i>d\n\ne");}
}
class Xomw_quote_wkr__fxt {
private final Xomw_quote_wkr wkr = new Xomw_quote_wkr(new Xomw_parser());
private final Xomw_parser_bfr pbfr = new Xomw_parser_bfr();
public void Test__parse(String src_str, String expd) {
byte[] src_bry = Bry_.new_u8(src_str);
wkr.Do_all_quotes(new Xomw_parser_ctx(), pbfr.Init(src_bry));
Tfds.Eq_str_lines(expd, pbfr.Rslt().To_str_and_clear(), src_str);
}
}

View File

@@ -1,292 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa.mws.parsers.tables; import gplx.*; import gplx.xowa.*; import gplx.xowa.mws.*; import gplx.xowa.mws.parsers.*;
import gplx.xowa.mws.utls.*;
import gplx.xowa.parsers.htmls.*;
import gplx.xowa.mws.libs.*; import gplx.xowa.parsers.uniqs.*;
public class Xomw_table_wkr implements gplx.core.brys.Bry_split_wkr {// THREAD.UNSAFE: caching for repeated calls
private final Bry_bfr tmp;
private Bry_bfr bfr;
private final Xomw_sanitizer sanitizer; private final Xomw_strip_state strip_state;
private final List_adp
td_history = List_adp_.New() // Is currently a td tag open?
, last_tag_history = List_adp_.New() // Save history of last lag activated (td, th or caption)
, tr_history = List_adp_.New() // Is currently a tr tag open?
, tr_attributes = List_adp_.New() // history of tr attributes
, has_opened_tr = List_adp_.New() // Did this table open a <tr> element?
;
private int indent_level = 0; // indent level of the table
private byte[] first_2 = new byte[2];
public Xomw_table_wkr(Xomw_parser parser) {
this.tmp = parser.Tmp();
this.sanitizer = parser.Sanitizer();
this.strip_state = parser.Strip_state();
}
public void Do_table_stuff(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
Bry_bfr src_bfr = pbfr.Src();
byte[] src = src_bfr.Bfr();
int src_bgn = 0;
int src_end = src_bfr.Len();
this.bfr = pbfr.Trg();
pbfr.Switch();
indent_level = 0;
Bry_split_.Split(src, src_bgn, src_end, Byte_ascii.Nl, Bool_.N, this); // PORTED.SPLIT: $lines = StringUtils::explode("\n", $text);
// Closing open td, tr && table
while (td_history.Len() > 0) {
if (Php_ary_.Pop_bool_or_n(td_history)) {
bfr.Add_str_a7("</td>\n");
}
if (Php_ary_.Pop_bool_or_n(tr_history)) {
bfr.Add_str_a7("</tr>\n");
}
if (!Php_ary_.Pop_bool_or_n(has_opened_tr)) {
bfr.Add_str_a7("<tr><td></td></tr>\n");
}
bfr.Add_str_a7("</table>\n");
}
// Remove trailing line-ending (b/c)
if (bfr.Get_at_last_or_nil_if_empty() == Byte_ascii.Nl) {
bfr.Del_by_1();
}
// special case: don't return empty table
if ( bfr.Len() == Len__tb__empty
&& Bry_.Eq(bfr.Bfr(), 0, Len__tb__empty, Html__tb__empty)) {
bfr.Clear();
return;
}
}
public int Split(byte[] src, int itm_bgn, int itm_end) {
byte[] out_line = Bry_.Mid(src, itm_bgn, itm_end); // MW: "$outLine"
byte[] line = Bry_.Trim(out_line); // MW: "$line"
int line_len = line.length;
if (line_len == 0) { // empty line, go to next line
bfr.Add(out_line).Add_byte_nl();
return Bry_split_.Rv__ok;
}
byte first_char = line[0];
first_2[0] = line[0];
first_2[1] = line_len == 1 ? Byte_ascii.Null : line[1];
// PORTED: preg_match('/^(:*)\s*\{\|(.*)$/', $line, $matches)
byte[] tblw_atrs = null;
boolean tblw_bgn_found = false;
int colons_end = Bry_find_.Find_fwd_while(src, 0, line_len, Byte_ascii.Colon);
int tblw_bgn = Bry_find_.Find_fwd_while(line, colons_end, line_len, Byte_ascii.Space);
int tblw_atrs_bgn = tblw_bgn + 2;
if (Bry_.Eq(line, tblw_bgn, tblw_atrs_bgn, Wtxt__tb__bgn)) {
tblw_bgn_found = true;
tblw_atrs = (tblw_atrs_bgn == line_len) ? Bry_.Empty : Bry_.Mid(line, tblw_atrs_bgn, line_len);
}
if (tblw_bgn_found) {
// First check if we are starting a new table
indent_level = colons_end;
tblw_atrs = strip_state.Unstrip_both(tblw_atrs);
// PORTED: out_line = str_repeat('<dl><dd>', $indent_level) . "<table{atrs}>";
for (int j = 0; j < indent_level; j++)
tmp.Add(Html__dl__bgn);
tmp.Add_str_a7("<table");
sanitizer.Fix_tag_attributes(tmp, Name__table, tblw_atrs);
tmp.Add_byte(Byte_ascii.Angle_end);
out_line = tmp.To_bry_and_clear();
td_history.Add(false);
last_tag_history.Add(Bry_.Empty);
tr_history.Add(false);
tr_attributes.Add(Bry_.Empty);
has_opened_tr.Add(false);
}
else if (td_history.Len() == 0) {
// Don't do any of the following
bfr.Add(out_line).Add_byte_nl();
return Bry_split_.Rv__ok;
}
else if (Bry_.Eq(first_2, Wtxt__tb__end)) {
// We are ending a table
line = tmp.Add_str_a7("</table>").Add_mid(line, 2, line.length).To_bry_and_clear();
byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
if (!Php_ary_.Pop_bool_or_n(has_opened_tr)) {
line = tmp.Add_str_a7("<tr><td></td></tr>").Add(line).To_bry_and_clear();
}
if (Php_ary_.Pop_bool_or_n(tr_history)) {
line = tmp.Add_str_a7("</tr>").Add(line).To_bry_and_clear();
}
if (Php_ary_.Pop_bool_or_n(td_history)) {
line = tmp.Add_str_a7("</").Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(line).To_bry_and_clear();
}
Php_ary_.Pop_bry_or_null(tr_attributes);
// PORTED:$outLine = $line . str_repeat( '</dd></dl>', $indent_level );
tmp.Add(line);
for (int j = 0; j < indent_level; j++)
tmp.Add(Html__dl__end);
out_line = tmp.To_bry_and_clear();
}
else if (Bry_.Eq(first_2, Wtxt__tr)) {
// Now we have a table row
line = Bry_.Mid(line, 2); // PORTED: $line = preg_replace('#^\|-+#', '', $line);
// Whats after the tag is now only attributes
byte[] atrs = strip_state.Unstrip_both(line);
sanitizer.Fix_tag_attributes(tmp, Name__tr, atrs);
atrs = tmp.To_bry_and_clear();
Php_ary_.Pop_bry_or_null(tr_attributes);
tr_attributes.Add(atrs);
line = Bry_.Empty;
byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
Php_ary_.Pop_bool_or_n(has_opened_tr);
has_opened_tr.Add(true);
if (Php_ary_.Pop_bool_or_n(tr_history)) {
line = Html__tr__end;
}
if (Php_ary_.Pop_bool_or_n(td_history)) {
line = tmp.Add_str_a7("</").Add(last_tag).Add_byte(Byte_ascii.Gt).Add(line).To_bry_and_clear();
}
out_line = line;
tr_history.Add(false);
td_history.Add(false);
last_tag_history.Add(Bry_.Empty);
}
else if ( first_char == Byte_ascii.Pipe
|| first_char == Byte_ascii.Bang
|| Bry_.Eq(first_2, Wtxt__caption)
) {
// This might be cell elements, td, th or captions
if (Bry_.Eq(first_2, Wtxt__caption)) {
first_char = Byte_ascii.Plus;
line = Bry_.Mid(line, 2);
} else {
line = Bry_.Mid(line, 1);
}
// Implies both are valid for table headings.
if (first_char == Byte_ascii.Bang) {
Xomw_string_utils.Replace_markup(line, 0, line.length, Wtxt__th2, Wtxt__td2); // $line = StringUtils::replaceMarkup('!!', '||', $line);
}
// Split up multiple cells on the same line.
// FIXME : This can result in improper nesting of tags processed
// by earlier parser steps.
byte[][] cells = Bry_split_.Split(line, Wtxt__td2);
if (cells.length == 0) cells = Cells__empty; // handle "\n|\n" which should still generate "<tr><td></td></tr>", not ""; see TEST
out_line = Bry_.Empty;
byte[] previous = null;
// Loop through each table cell
int cells_len = cells.length;
for (int j = 0; j < cells_len; j++) {
byte[] cell = cells[j];
previous = Bry_.Empty;
if (first_char != Byte_ascii.Plus) {
byte[] tr_after = Php_ary_.Pop_bry_or_null(tr_attributes);
if (!Php_ary_.Pop_bool_or_n(tr_history)) {
previous = tmp.Add_str_a7("<tr").Add(tr_after).Add_str_a7(">\n").To_bry_and_clear();
}
tr_history.Add(true);
tr_attributes.Add(Bry_.Empty);
Php_ary_.Pop_bool_or_n(has_opened_tr);
has_opened_tr.Add(true);
}
byte[] last_tag = Php_ary_.Pop_bry_or_null(last_tag_history);
if (Php_ary_.Pop_bool_or_n(td_history)) {
previous = tmp.Add_str_a7("</").Add(last_tag).Add_str_a7(">\n").Add(previous).To_bry_and_clear();
}
if (first_char == Byte_ascii.Pipe) {
last_tag = Name__td;
}
else if (first_char == Byte_ascii.Bang) {
last_tag = Name__th;
}
else if (first_char == Byte_ascii.Plus) {
last_tag = Name__caption;
}
else {
last_tag = Bry_.Empty;
}
last_tag_history.Add(last_tag);
// A cell could contain both parameters and data
byte[][] cell_data = Bry_split_.Split_w_max(cell, Byte_ascii.Pipe, 2);
// Bug 553: Note that a '|' inside an invalid link should not
// be mistaken as delimiting cell parameters
byte[] cell_data_0 = cell_data[0];
byte[] cell_data_1 = cell_data[1];
if (Bry_find_.Find_fwd(cell_data_0, Wtxt__lnki__bgn) != Bry_find_.Not_found) {
cell = tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell).To_bry_and_clear();
}
else if (cell_data_1 == null) {
cell = tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag).Add_byte(Byte_ascii.Angle_end).Add(cell_data_0).To_bry_and_clear();
}
else {
byte[] atrs = strip_state.Unstrip_both(cell_data_0);
tmp.Add(previous).Add_byte(Byte_ascii.Angle_bgn).Add(last_tag);
sanitizer.Fix_tag_attributes(tmp, last_tag, atrs);
tmp.Add_byte(Byte_ascii.Angle_end).Add(cell_data_1);
cell = tmp.To_bry_and_clear();
}
out_line = Bry_.Add(out_line, cell);
td_history.Add(true);
}
}
bfr.Add(out_line).Add_byte_nl();
return Bry_split_.Rv__ok;
}
private static final byte[]
Wtxt__tb__bgn = Bry_.new_a7("{|")
, Wtxt__tb__end = Bry_.new_a7("|}")
, Wtxt__tr = Bry_.new_a7("|-")
, Wtxt__caption = Bry_.new_a7("|+")
, Wtxt__th2 = Bry_.new_a7("!!")
, Wtxt__td2 = Bry_.new_a7("||")
, Wtxt__lnki__bgn = Bry_.new_a7("[[")
, Name__table = Bry_.new_a7("table")
, Name__tr = Bry_.new_a7("tr")
, Name__td = Bry_.new_a7("td")
, Name__th = Bry_.new_a7("th")
, Name__caption = Bry_.new_a7("caption")
, Html__tr__end = Bry_.new_a7("</tr>")
, Html__dl__bgn = Bry_.new_a7("<dl><dd>")
, Html__dl__end = Bry_.new_a7("</dd></dl>")
, Html__tb__empty = Bry_.new_a7("<table>\n<tr><td></td></tr>\n</table>")
;
private static final int Len__tb__empty = Html__tb__empty.length;
private static final byte[][] Cells__empty = new byte[][] {Bry_.Empty};
}

Some files were not shown because too many files have changed in this diff Show More