mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
v2.7.3.3
This commit is contained in:
65
100_core/src/gplx/core/regxs/Regx_adp.java
Normal file
65
100_core/src/gplx/core/regxs/Regx_adp.java
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.regxs; import gplx.*; import gplx.core.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
public class Regx_adp {
|
||||
@gplx.Internal protected Regx_adp(String regx) {Pattern_(regx);}
|
||||
public String Pattern() {return pattern;} public Regx_adp Pattern_(String val) {pattern = val; Under_sync(); return this;} private String pattern;
|
||||
public boolean Pattern_is_invalid() {return pattern_is_invalid;} private boolean pattern_is_invalid = false;
|
||||
public Regx_match[] Match_all(String text, int bgn) {
|
||||
int idx = bgn;
|
||||
List_adp rv = List_adp_.new_();
|
||||
int len = String_.Len(text);
|
||||
while (idx <= len) { // NOTE: must be <= not < else "a?" will return null instead of ""; PAGE:en.d:民; DATE:2015-01-30
|
||||
Regx_match match = this.Match(text, idx);
|
||||
if (match.Rslt_none()) break;
|
||||
rv.Add(match);
|
||||
int find_bgn = match.Find_bgn();
|
||||
int find_len = match.Find_len();
|
||||
idx = find_len == 0 // find_bgn == find_end
|
||||
? find_bgn + 1 // add 1 to resume search from next char; DATE:2014-09-02
|
||||
: find_bgn + find_len // otherwise search after find_end
|
||||
;
|
||||
}
|
||||
return (Regx_match[])rv.To_ary(Regx_match.class);
|
||||
}
|
||||
private Pattern under;
|
||||
void Under_sync() {
|
||||
try {under = Pattern.compile(pattern, Pattern.DOTALL | Pattern.UNICODE_CHARACTER_CLASS);} // JRE.7:UNICODE_CHARACTER_CLASS; added during %w fix for en.w:A#; DATE:2015-06-10
|
||||
catch (Exception e) { // NOTE: if invalid, then default to empty pattern (which should return nothing); EX:d:〆る generates [^]; DATE:2013-10-20
|
||||
pattern_is_invalid = true;
|
||||
under = Pattern.compile("", Pattern.DOTALL | Pattern.UNICODE_CHARACTER_CLASS);
|
||||
}
|
||||
}
|
||||
public Regx_match Match(String input, int bgn) {
|
||||
Matcher match = under.matcher(input);
|
||||
boolean success = match.find(bgn);
|
||||
int match_bgn = success ? match.start() : String_.Find_none;
|
||||
int match_end = success ? match.end() : String_.Find_none;
|
||||
Regx_group[] ary = Regx_group.Ary_empty;
|
||||
int groups_len = match.groupCount();
|
||||
if (success && groups_len > 0) {
|
||||
ary = new Regx_group[groups_len];
|
||||
for (int i = 0; i < groups_len; i++)
|
||||
ary[i] = new Regx_group(true, match.start(i + 1), match.end(i + 1), match.group(i + 1));
|
||||
}
|
||||
return new Regx_match(success, match_bgn, match_end, ary);
|
||||
}
|
||||
public String ReplaceAll(String input, String replace) {return under.matcher(input).replaceAll(replace);}
|
||||
}
|
||||
43
100_core/src/gplx/core/regxs/Regx_adp_.java
Normal file
43
100_core/src/gplx/core/regxs/Regx_adp_.java
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.regxs; import gplx.*; import gplx.core.*;
|
||||
public class Regx_adp_ {
|
||||
public static Regx_adp new_(String pattern) {return new Regx_adp(pattern);}
|
||||
public static List_adp Find_all(String input, String find) {
|
||||
Regx_adp regx = Regx_adp_.new_(find);
|
||||
int idx = 0;
|
||||
List_adp rv = List_adp_.new_();
|
||||
while (true) {
|
||||
Regx_match match = regx.Match(input, idx);
|
||||
if (match.Rslt_none()) break;
|
||||
rv.Add(match);
|
||||
int findBgn = match.Find_bgn();
|
||||
idx = findBgn + match.Find_len();
|
||||
if (idx > String_.Len(input)) break;
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static String Replace(String raw, String regx_str, String replace) {
|
||||
Regx_adp regx = Regx_adp_.new_(regx_str);
|
||||
return regx.ReplaceAll(raw, replace);
|
||||
}
|
||||
public static boolean Match(String input, String pattern) {
|
||||
Regx_adp rv = new Regx_adp(pattern);
|
||||
return rv.Match(input, 0).Rslt();
|
||||
}
|
||||
}
|
||||
93
100_core/src/gplx/core/regxs/Regx_adp__tst.java
Normal file
93
100_core/src/gplx/core/regxs/Regx_adp__tst.java
Normal file
@@ -0,0 +1,93 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.regxs; import gplx.*; import gplx.core.*;
|
||||
import org.junit.*;
|
||||
public class Regx_adp__tst implements TfdsEqListItmStr {
|
||||
@Test public void Match() {
|
||||
tst_Match("a", "a", true); // basic
|
||||
tst_Match("a", "b", false); // matchNot
|
||||
tst_Match("a", "ab", true); // matchPart
|
||||
tst_Match("a\\+b", "a+b", true); // matchEscape
|
||||
tst_Match("[^a]", "b", true); // charSet_negate
|
||||
} void tst_Match(String find, String input, boolean expd) {Tfds.Eq(expd, Regx_adp_.Match(input, find));}
|
||||
@Test public void Match_all() {
|
||||
tst_Match_all("#REDIRECT [[Template:Error]]", "^\\p{Nd}*", 1); // handle match = true but len = 0; DATE:2013-04-11
|
||||
tst_Match_all("a", "$", 1); // $ should match once, not zero; DATE:2014-09-02
|
||||
} void tst_Match_all(String input, String regx, int expd) {Tfds.Eq(expd, Regx_adp_.new_(regx).Match_all(input, 0).length);}
|
||||
@Test public void Replace() {
|
||||
tst_Replace("ab", "a", "b", "bb"); // basic
|
||||
tst_Replace("ab", "c", "b", "ab"); // replaceNot
|
||||
tst_Replace("aba", "a", "b", "bbb"); // replaceMultiple
|
||||
} void tst_Replace(String input, String find, String replace, String expd) {Tfds.Eq(expd, Regx_adp_.Replace(input, find, replace));}
|
||||
@Test public void Match_WholeWord() {
|
||||
tst_WholeWord("a", "ab a", true); // pass a
|
||||
tst_WholeWord("a", "ab c", false); // fail ab
|
||||
tst_WholeWord("a", "a_", false); // fail a_
|
||||
tst_WholeWord("[a]", "a [a] c", true); // pass [a]
|
||||
tst_WholeWord("[a]", "a[a]c", false); // fail a[a]c
|
||||
} void tst_WholeWord(String regx, String text, boolean expd) {Tfds.Eq(expd, Regx_adp_.Match(text, Regx_bldr.WholeWord(regx)));}
|
||||
@Test public void Match_As() {
|
||||
tst_Regx("public static [A-Za-z0-9_]+ as_\\(Object obj\\)", "public static Obj1 as_(Object obj) {return obj instanceof Obj1 ? (Obj1)obj : null;}", true);
|
||||
tst_Regx("public static [A-Za-z0-9_]+ as_\\(Object obj\\)", "public static boolean Asterisk(Object obj) {}", false);
|
||||
} void tst_Regx(String regx, String text, boolean expd) {Tfds.Eq(expd, Regx_adp_.Match(text, regx));}
|
||||
@Test public void Find() {
|
||||
tst_Matches("b", "a b c b a", match_(2, 1), match_(6, 1));
|
||||
tst_Matches("d", "a b c b a");
|
||||
tst_Matches("b", "a b c b a b b", matches_(2, 6, 10, 12)); // BUGFIX: multiple entries did not work b/c of += instead of +
|
||||
}
|
||||
@Test public void Groups() {
|
||||
tst_Groups("abc def ghi dz", "(d\\p{L}+)", "def", "dz");
|
||||
}
|
||||
Regx_match[] matches_(int... bgnAry) {
|
||||
int aryLen = Array_.Len(bgnAry);
|
||||
Regx_match[] rv = new Regx_match[aryLen];
|
||||
for (int i = 0; i < aryLen; i++)
|
||||
rv[i] = match_(bgnAry[i]);
|
||||
return rv;
|
||||
}
|
||||
Regx_match match_(int bgn) {return match_(bgn, Int_.MinValue);}
|
||||
Regx_match match_(int bgn, int len) {return new Regx_match(true, bgn, bgn + len, Regx_group.Ary_empty);}
|
||||
void tst_Matches(String find, String input, Regx_match... expd) {
|
||||
List_adp expdList = Array_.XtoList(expd);
|
||||
List_adp actlList = Regx_adp_.Find_all(input, find);
|
||||
Tfds.Eq_list(expdList, actlList, this);
|
||||
}
|
||||
void tst_Groups(String text, String regx, String... expd) {
|
||||
Regx_adp regx_mgr = Regx_adp_.new_(regx);
|
||||
Regx_match[] rslts = regx_mgr.Match_all(text, 0);
|
||||
Tfds.Eq_ary_str(expd, To_ary(rslts));
|
||||
}
|
||||
String[] To_ary(Regx_match[] ary) {
|
||||
List_adp rv = List_adp_.new_();
|
||||
int len = ary.length;
|
||||
for (int i = 0; i < len; i++) {
|
||||
Regx_match itm = ary[i];
|
||||
int cap_len = itm.Groups().length;
|
||||
for (int j = 0; j < cap_len; j++) {
|
||||
rv.Add(itm.Groups()[j].Val());
|
||||
}
|
||||
}
|
||||
return rv.To_str_ary();
|
||||
}
|
||||
public String XtoStr(Object curObj, Object expdObj) {
|
||||
Regx_match cur = (Regx_match)curObj, expd = (Regx_match)expdObj;
|
||||
String rv = "bgn=" + cur.Find_bgn();
|
||||
if (expd != null && expd.Find_len() != Int_.MinValue) rv += " len=" + cur.Find_len();
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
62
100_core/src/gplx/core/regxs/Regx_bldr.java
Normal file
62
100_core/src/gplx/core/regxs/Regx_bldr.java
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.regxs; import gplx.*; import gplx.core.*;
|
||||
import gplx.core.strings.*;
|
||||
public class Regx_bldr {
|
||||
public static String Includes(String characters) {return String_.Concat_any(Regx_bldr.Tkn_CharSetBegin, characters, Regx_bldr.Tkn_CharSetEnd);}
|
||||
public static String Excludes(String characters) {return String_.Concat_any(Regx_bldr.Tkn_CharSetBegin, Regx_bldr.Tkn_Not, characters, Regx_bldr.Tkn_CharSetEnd);}
|
||||
public static String WholeWord(String word) {return String_.Concat_any("(?<![A-Za-z0-9_])", EscapeAll(word), "(?![A-Za-z0-9_])");}
|
||||
public static String EscapeAll(String text) {
|
||||
String_bldr sb = String_bldr_.new_();
|
||||
int len = String_.Len(text);
|
||||
for (int i = 0; i < len; i++) {
|
||||
char c = String_.CharAt(text, i);
|
||||
if (RegxChar_chk(c))
|
||||
sb.Add(Regx_bldr.Tkn_Escape);
|
||||
sb.Add(c);
|
||||
}
|
||||
return sb.XtoStr();
|
||||
}
|
||||
public static boolean RegxChar_chk(char c) {
|
||||
return
|
||||
( c == Regx_bldr.Tkn_Escape || c == Regx_bldr.Tkn_Or
|
||||
|| c == Regx_bldr.Tkn_LineBegin || c == Regx_bldr.Tkn_LineEnd
|
||||
|| c == Regx_bldr.Tkn_GroupBegin || c == Regx_bldr.Tkn_GroupEnd
|
||||
|| c == Regx_bldr.Tkn_RepBegin || c == Regx_bldr.Tkn_RepEnd
|
||||
|| c == Regx_bldr.Tkn_Wild_0Plus || c == Regx_bldr.Tkn_Wild_1Plus || c == Regx_bldr.Tkn_Wild_0or1
|
||||
|| c == Regx_bldr.Tkn_CharSetBegin || c == Regx_bldr.Tkn_CharSetEnd
|
||||
);
|
||||
}
|
||||
public static final char
|
||||
Tkn_LineBegin = '^'
|
||||
, Tkn_LineEnd = '$'
|
||||
, Tkn_AnyChar = '.' // except newline
|
||||
, Tkn_Wild_0Plus = '*'
|
||||
, Tkn_Wild_1Plus = '+'
|
||||
, Tkn_Wild_0or1 = '?'
|
||||
, Tkn_CharSetBegin = '['
|
||||
, Tkn_CharSetEnd = ']'
|
||||
, Tkn_GroupBegin = '('
|
||||
, Tkn_GroupEnd = ')'
|
||||
, Tkn_RepBegin = '{'
|
||||
, Tkn_RepEnd = '}'
|
||||
, Tkn_Not = '^'
|
||||
, Tkn_Or = '|'
|
||||
, Tkn_Escape = '\\'
|
||||
;
|
||||
}
|
||||
26
100_core/src/gplx/core/regxs/Regx_group.java
Normal file
26
100_core/src/gplx/core/regxs/Regx_group.java
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.regxs; import gplx.*; import gplx.core.*;
|
||||
public class Regx_group {
|
||||
public Regx_group(boolean rslt, int bgn, int end, String val) {this.rslt = rslt; this.bgn = bgn; this.end = end; this.val = val;}
|
||||
public boolean Rslt() {return rslt;} private boolean rslt;
|
||||
public int Bgn() {return bgn;} int bgn;
|
||||
public int End() {return end;} int end;
|
||||
public String Val() {return val;} private String val;
|
||||
public static final Regx_group[] Ary_empty = new Regx_group[0];
|
||||
}
|
||||
28
100_core/src/gplx/core/regxs/Regx_match.java
Normal file
28
100_core/src/gplx/core/regxs/Regx_match.java
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012 gnosygnu@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as
|
||||
published by the Free Software Foundation, either version 3 of the
|
||||
License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package gplx.core.regxs; import gplx.*; import gplx.core.*;
|
||||
public class Regx_match {
|
||||
public Regx_match(boolean rslt, int find_bgn, int find_end, Regx_group[] groups) {this.rslt = rslt; this.find_bgn = find_bgn; this.find_end = find_end; this.groups = groups;}
|
||||
public boolean Rslt() {return rslt;} private boolean rslt;
|
||||
public boolean Rslt_none() {return !rslt;} // NOTE: was "|| find_end - find_bgn == 0"; DATE:2013-04-11; DATE:2014-09-02
|
||||
public int Find_bgn() {return find_bgn;} int find_bgn;
|
||||
public int Find_end() {return find_end;} int find_end;
|
||||
public int Find_len() {return find_end - find_bgn;}
|
||||
public Regx_group[] Groups() {return groups;} Regx_group[] groups = Regx_group.Ary_empty;
|
||||
public static final Regx_match[] Ary_empty = new Regx_match[0];
|
||||
}
|
||||
Reference in New Issue
Block a user