mirror of https://github.com/gnosygnu/xowa
parent
352238a9c5
commit
8011f9e979
@ -0,0 +1,33 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
public class Xomw_MagicWord {
|
||||
public boolean case_match;
|
||||
public byte[] name;
|
||||
public Xomw_MagicWordSynonym[] synonyms;
|
||||
public Xomw_MagicWord(byte[] name, boolean case_match, byte[][] synonyms_ary) {
|
||||
this.name = name;
|
||||
this.case_match = case_match;
|
||||
|
||||
int synonyms_len = synonyms_ary.length;
|
||||
this.synonyms = new Xomw_MagicWordSynonym[synonyms_len];
|
||||
for (int i = 0; i < synonyms_len; i++) {
|
||||
synonyms[i] = new Xomw_MagicWordSynonym(name, case_match, synonyms_ary[i]);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,365 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
import gplx.core.btries.*; import gplx.core.primitives.*;
|
||||
public class Xomw_MagicWordArray {
|
||||
private Btrie_slim_mgr fwd_trie;
|
||||
private Btrie_bwd_mgr bwd_trie;
|
||||
private final Btrie_rv trv = new Btrie_rv();
|
||||
// private final Xomw_MagicWordMgr magic_word_mgr;
|
||||
public final byte[][] names;
|
||||
|
||||
// /** @var array */
|
||||
// private hash;
|
||||
|
||||
// private baseRegex;
|
||||
|
||||
// private regex;
|
||||
|
||||
public Xomw_MagicWordArray(Xomw_MagicWordMgr magic_word_mgr, byte[][] names) {
|
||||
// this.magic_word_mgr = magic_word_mgr;
|
||||
this.names = names;
|
||||
|
||||
// ASSUME: all magic words in a group have the same case sensitivity
|
||||
for (byte[] name : names) {
|
||||
Xomw_MagicWord word = magic_word_mgr.Get(name);
|
||||
if (word == null) continue;
|
||||
Xomw_MagicWordSynonym[] synonyms = word.synonyms;
|
||||
int synonyms_len = synonyms.length;
|
||||
for (int i = 0; i < synonyms_len; i++) {
|
||||
Xomw_MagicWordSynonym synonym = synonyms[i];
|
||||
switch (synonym.arg1_tid) {
|
||||
case Xomw_MagicWordSynonym.Arg1__end:
|
||||
if (fwd_trie == null) fwd_trie = word.case_match ? Btrie_slim_mgr.cs() : Btrie_slim_mgr.ci_u8();
|
||||
fwd_trie.Add_obj(synonym.text, synonym);
|
||||
break;
|
||||
case Xomw_MagicWordSynonym.Arg1__bgn:
|
||||
if (bwd_trie == null) bwd_trie = Btrie_bwd_mgr.c__(word.case_match);
|
||||
bwd_trie.Add(synonym.text, synonym);
|
||||
break;
|
||||
// ignore if mid / mix
|
||||
case Xomw_MagicWordSynonym.Arg1__mid:
|
||||
case Xomw_MagicWordSynonym.Arg1__mix:
|
||||
Gfo_usr_dlg_.Instance.Warn_many("", "", "MagicWordArray: unsupported arg_1_tid: tid=~{0}", synonym.arg1_tid);
|
||||
continue;
|
||||
case Xomw_MagicWordSynonym.Arg1__nil:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Add a magic word by name
|
||||
// *
|
||||
// * @param String name
|
||||
// */
|
||||
// public function add(name) {
|
||||
// this->names[] = name;
|
||||
// this->hash = this->baseRegex = this->regex = null;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Add a number of magic words by name
|
||||
// *
|
||||
// * @param array names
|
||||
// */
|
||||
// public function addArray(names) {
|
||||
// this->names = array_merge(this->names, array_values(names));
|
||||
// this->hash = this->baseRegex = this->regex = null;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a 2-d hashtable for this array
|
||||
// * @return array
|
||||
// */
|
||||
// public function getHash() {
|
||||
// if (is_null(this->hash)) {
|
||||
// global wgContLang;
|
||||
// this->hash = [ 0 => [], 1 => [] ];
|
||||
// foreach (this->names as name) {
|
||||
// magic = MagicWord::get(name);
|
||||
// case = intval(magic->isCaseSensitive());
|
||||
// foreach (magic->getSynonyms() as syn) {
|
||||
// if (!case) {
|
||||
// syn = wgContLang->lc(syn);
|
||||
// }
|
||||
// this->hash[case][syn] = name;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return this->hash;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get the super regex
|
||||
// * @return array
|
||||
// */
|
||||
// public function getBaseRegex() {
|
||||
// if (is_null(this->baseRegex)) {
|
||||
// this->baseRegex = [ 0 => '', 1 => '' ];
|
||||
// foreach (this->names as name) {
|
||||
// magic = MagicWord::get(name);
|
||||
// case = intval(magic->isCaseSensitive());
|
||||
// foreach (magic->getSynonyms() as i => syn) {
|
||||
// // Group name must start with a non-digit in PCRE 8.34+
|
||||
// it = strtr(i, '0123456789', 'abcdefghij');
|
||||
// group = "(?P<{it}_{name}>" . preg_quote(syn, '/') . ')';
|
||||
// if (this->baseRegex[case] === '') {
|
||||
// this->baseRegex[case] = group;
|
||||
// } else {
|
||||
// this->baseRegex[case] .= '|' . group;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return this->baseRegex;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get an unanchored regex that does not match parameters
|
||||
// * @return array
|
||||
// */
|
||||
// public function getRegex() {
|
||||
// if (is_null(this->regex)) {
|
||||
// super = this->getBaseRegex();
|
||||
// this->regex = [ '', '' ];
|
||||
// if (this->baseRegex[0] !== '') {
|
||||
// this->regex[0] = "/{super[0]}/iuS";
|
||||
// }
|
||||
// if (this->baseRegex[1] !== '') {
|
||||
// this->regex[1] = "/{super[1]}/S";
|
||||
// }
|
||||
// }
|
||||
// return this->regex;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a regex for matching variables with parameters
|
||||
// *
|
||||
// * @return String
|
||||
// */
|
||||
// public function getVariableRegex() {
|
||||
// return str_replace("\\1", "(.*?)", this->getRegex());
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get a regex anchored to the start of the String that does not match parameters
|
||||
// *
|
||||
// * @return array
|
||||
// */
|
||||
// public function getRegexStart() {
|
||||
// super = this->getBaseRegex();
|
||||
// newRegex = [ '', '' ];
|
||||
// if (super[0] !== '') {
|
||||
// newRegex[0] = "/^(?:{super[0]})/iuS";
|
||||
// }
|
||||
// if (super[1] !== '') {
|
||||
// newRegex[1] = "/^(?:{super[1]})/S";
|
||||
// }
|
||||
// return newRegex;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get an anchored regex for matching variables with parameters
|
||||
// *
|
||||
// * @return array
|
||||
// */
|
||||
// public function getVariableStartToEndRegex() {
|
||||
// super = this->getBaseRegex();
|
||||
// newRegex = [ '', '' ];
|
||||
// if (super[0] !== '') {
|
||||
// newRegex[0] = str_replace("\\1", "(.*?)", "/^(?:{super[0]})/iuS");
|
||||
// }
|
||||
// if (super[1] !== '') {
|
||||
// newRegex[1] = str_replace("\\1", "(.*?)", "/^(?:{super[1]})/S");
|
||||
// }
|
||||
// return newRegex;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @since 1.20
|
||||
// * @return array
|
||||
// */
|
||||
// public function getNames() {
|
||||
// return this->names;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Parse a match array from preg_match
|
||||
// * Returns array(magic word ID, parameter value)
|
||||
// * If there is no parameter value, that element will be false.
|
||||
// *
|
||||
// * @param array m
|
||||
// *
|
||||
// * @throws MWException
|
||||
// * @return array
|
||||
// */
|
||||
// public function parseMatch(m) {
|
||||
// reset(m);
|
||||
// while (list(key, value) = each(m)) {
|
||||
// if (key === 0 || value === '') {
|
||||
// continue;
|
||||
// }
|
||||
// parts = explode('_', key, 2);
|
||||
// if (count(parts) != 2) {
|
||||
// // This shouldn't happen
|
||||
// // continue;
|
||||
// throw new MWException(__METHOD__ . ': bad parameter name');
|
||||
// }
|
||||
// list(/* synIndex */, magicName) = parts;
|
||||
// paramValue = next(m);
|
||||
// return [ magicName, paramValue ];
|
||||
// }
|
||||
// // This shouldn't happen either
|
||||
// throw new MWException(__METHOD__ . ': parameter not found');
|
||||
// }
|
||||
|
||||
/**
|
||||
* Match some text, with parameter capture
|
||||
* Returns an array with the magic word name in the first element and the
|
||||
* parameter in the second element.
|
||||
* Both elements are false if there was no match.
|
||||
*
|
||||
* @param String text
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public byte[] matchVariableStartToEnd(byte[] src) {
|
||||
int src_end = src.length;
|
||||
|
||||
// check fwd; EX: "thumb=$1"
|
||||
if (fwd_trie != null) {
|
||||
Object o = fwd_trie.Match_at(trv, src, 0, src_end);
|
||||
if (o != null) {
|
||||
return ((Xomw_MagicWordSynonym)o).magic_name;
|
||||
}
|
||||
}
|
||||
|
||||
// check bwd; EX: "$1px"
|
||||
if (bwd_trie != null) {
|
||||
Object o = bwd_trie.Match_at(trv, src, src_end, -1);
|
||||
if (o != null) {
|
||||
return ((Xomw_MagicWordSynonym)o).magic_name;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
// regexes = this->getVariableStartToEndRegex();
|
||||
// foreach (regexes as regex) {
|
||||
// if (regex !== '') {
|
||||
// m = [];
|
||||
// if (preg_match(regex, text, m)) {
|
||||
// return this->parseMatch(m);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return [ false, false ];
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Match some text, without parameter capture
|
||||
// * Returns the magic word name, or false if there was no capture
|
||||
// *
|
||||
// * @param String text
|
||||
// *
|
||||
// * @return String|boolean False on failure
|
||||
// */
|
||||
// public function matchStartToEnd(text) {
|
||||
// hash = this->getHash();
|
||||
// if (isset(hash[1][text])) {
|
||||
// return hash[1][text];
|
||||
// }
|
||||
// global wgContLang;
|
||||
// lc = wgContLang->lc(text);
|
||||
// if (isset(hash[0][lc])) {
|
||||
// return hash[0][lc];
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Returns an associative array, ID => param value, for all items that match
|
||||
// * Removes the matched items from the input String (passed by reference)
|
||||
// *
|
||||
// * @param String text
|
||||
// *
|
||||
// * @return array
|
||||
// */
|
||||
// public function matchAndRemove(&text) {
|
||||
// found = [];
|
||||
// regexes = this->getRegex();
|
||||
// foreach (regexes as regex) {
|
||||
// if (regex === '') {
|
||||
// continue;
|
||||
// }
|
||||
// matches = [];
|
||||
// res = preg_match_all(regex, text, matches, PREG_SET_ORDER);
|
||||
// if (res === false) {
|
||||
// LoggerFactory::getInstance('parser')->warning('preg_match_all returned false', [
|
||||
// 'code' => preg_last_error(),
|
||||
// 'regex' => regex,
|
||||
// 'text' => text,
|
||||
// ]);
|
||||
// } elseif (res) {
|
||||
// foreach (matches as m) {
|
||||
// list(name, param) = this->parseMatch(m);
|
||||
// found[name] = param;
|
||||
// }
|
||||
// }
|
||||
// res = preg_replace(regex, '', text);
|
||||
// if (res === null) {
|
||||
// LoggerFactory::getInstance('parser')->warning('preg_replace returned null', [
|
||||
// 'code' => preg_last_error(),
|
||||
// 'regex' => regex,
|
||||
// 'text' => text,
|
||||
// ]);
|
||||
// }
|
||||
// text = res;
|
||||
// }
|
||||
// return found;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Return the ID of the magic word at the start of text, and remove
|
||||
// * the prefix from text.
|
||||
// * Return false if no match found and text is not modified.
|
||||
// * Does not match parameters.
|
||||
// *
|
||||
// * @param String text
|
||||
// *
|
||||
// * @return int|boolean False on failure
|
||||
// */
|
||||
// public function matchStartAndRemove(&text) {
|
||||
// regexes = this->getRegexStart();
|
||||
// foreach (regexes as regex) {
|
||||
// if (regex === '') {
|
||||
// continue;
|
||||
// }
|
||||
// if (preg_match(regex, text, m)) {
|
||||
// list(id,) = this->parseMatch(m);
|
||||
// if (strlen(m[0]) >= strlen(text)) {
|
||||
// text = '';
|
||||
// } else {
|
||||
// text = substr(text, strlen(m[0]));
|
||||
// }
|
||||
// return id;
|
||||
// }
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
public class Xomw_MagicWordMgr {
|
||||
public Xomw_MagicWord Get(byte[] name) {
|
||||
return null;
|
||||
}
|
||||
}
|
@ -0,0 +1,79 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.xowa.mws; import gplx.*; import gplx.xowa.*;
|
||||
public class Xomw_MagicWordSynonym {
|
||||
public final byte[] magic_name;
|
||||
public final boolean case_match;
|
||||
public final byte[] text;
|
||||
public final byte arg1_tid;
|
||||
public Xomw_MagicWordSynonym(byte[] magic_name, boolean case_match, byte[] text) {
|
||||
this.magic_name = magic_name;
|
||||
this.case_match = case_match;
|
||||
this.text = text;
|
||||
this.arg1_tid = Get_arg1_tid(text);
|
||||
}
|
||||
|
||||
private static byte Get_arg1_tid(byte[] src) {
|
||||
int len = src.length;
|
||||
byte rv = Arg1__nil;
|
||||
int cur = 0;
|
||||
while (true) {
|
||||
if (cur == len) break;
|
||||
byte b = src[cur];
|
||||
// "$" matched
|
||||
if (b == Byte_ascii.Dollar) {
|
||||
// "1" matched?
|
||||
int nxt_pos = cur + 1;
|
||||
if (nxt_pos < len && src[nxt_pos] == Byte_ascii.Num_1) {
|
||||
// "$1" matched
|
||||
if (cur == 0) {
|
||||
rv = Arg1__bgn;
|
||||
}
|
||||
else if (cur == len - 2) {
|
||||
rv = rv == Arg1__nil ? Arg1__end : Arg1__mix;
|
||||
}
|
||||
else {
|
||||
if (rv == Arg1__nil)
|
||||
rv = Arg1__mid;
|
||||
else if (rv == Arg1__mid)
|
||||
rv = Arg1__mix;
|
||||
}
|
||||
cur += 3;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
cur += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else {
|
||||
cur += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
public static final byte
|
||||
Arg1__nil = 0 // EX: "thumb"
|
||||
, Arg1__bgn = 1 // EX: "$1px"
|
||||
, Arg1__end = 2 // EX: "thumb=$1"
|
||||
, Arg1__mid = 3 // EX: "a$1b"
|
||||
, Arg1__mix = 4 // EX: "a$1b$cc"
|
||||
;
|
||||
}
|
@ -1 +1 @@
|
||||
version_updater description 2
|
||||
v4.1.0.1702
|
||||
|
Loading…
Reference in new issue