mirror of https://github.com/gnosygnu/xowa
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
89 lines
4.2 KiB
89 lines
4.2 KiB
/*
|
|
XOWA: the XOWA Offline Wiki Application
|
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
|
|
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
|
or alternatively under the terms of the Apache License Version 2.0.
|
|
|
|
You may use XOWA according to either of these licenses as is most appropriate
|
|
for your project on a case-by-case basis.
|
|
|
|
The terms of each license can be found in the source code repository:
|
|
|
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|
*/
|
|
package gplx.langs.regxs; import gplx.*; import gplx.langs.*;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
public class Regx_adp {
|
|
public Regx_adp(String regx, int flags) {
|
|
this.flags = flags;
|
|
Pattern_(regx);
|
|
}
|
|
public String Pattern() {return pattern;} public Regx_adp Pattern_(String val) {pattern = val; Under_sync(); return this;} private String pattern;
|
|
public boolean Pattern_is_invalid() {return pattern_is_invalid;} private boolean pattern_is_invalid = false;
|
|
public Exception Pattern_is_invalid_exception() {return pattern_is_invalid_exception;} private Exception pattern_is_invalid_exception = null;
|
|
public Regx_match[] Match_all(String text, int bgn) {
|
|
int idx = bgn;
|
|
List_adp rv = List_adp_.New();
|
|
int len = String_.Len(text);
|
|
while (idx <= len) { // NOTE: must be <= not < else "a?" will return null instead of ""; PAGE:en.d:民; DATE:2015-01-30
|
|
Regx_match match = this.Match(text, idx);
|
|
if (match.Rslt_none()) break;
|
|
rv.Add(match);
|
|
int find_bgn = match.Find_bgn();
|
|
int find_len = match.Find_len();
|
|
idx = find_len == 0 // find_bgn == find_end
|
|
? find_bgn + 1 // add 1 to resume search from next char; DATE:2014-09-02
|
|
: find_bgn + find_len // otherwise search after find_end
|
|
;
|
|
}
|
|
return (Regx_match[])rv.To_ary(Regx_match.class);
|
|
}
|
|
private int flags = FLAG__DOTALL | FLAG__UNICODE_CHARACTER_CLASS;// JRE.7:UNICODE_CHARACTER_CLASS; added during %w fix for en.w:A#; DATE:2015-06-10
|
|
private Pattern under;
|
|
public Pattern Under() {return under;}
|
|
private void Under_sync() {
|
|
try {under = Pattern.compile(pattern, flags);}
|
|
catch (Exception e) { // NOTE: if invalid, then default to empty pattern (which should return nothing); EX:d:〆る generates [^]; DATE:2013-10-20
|
|
pattern_is_invalid = true;
|
|
pattern_is_invalid_exception = e;
|
|
under = Pattern.compile("", flags);
|
|
}
|
|
}
|
|
public Regx_match Match(String input, int bgn) {
|
|
Matcher match = under.matcher(input);
|
|
boolean success = match.find(bgn);
|
|
int match_bgn = success ? match.start() : String_.Find_none;
|
|
int match_end = success ? match.end() : String_.Find_none;
|
|
Regx_group[] ary = Regx_group.Ary_empty;
|
|
int groups_len = match.groupCount();
|
|
if (success && groups_len > 0) {
|
|
// NOTE: by convention, there are n groups, but groups.count is n - 1 and groups[0] is entire match (not 1st group); see TEST: DATE:2019-12-28
|
|
groups_len++;
|
|
ary = new Regx_group[groups_len];
|
|
for (int i = 0; i < groups_len; i++) {
|
|
int match_start = match.start(i);
|
|
ary[i] = new Regx_group(match_start != -1, match_start, match.end(i), match.group(i));
|
|
}
|
|
}
|
|
return new Regx_match(success, match_bgn, match_end, ary);
|
|
}
|
|
public String ReplaceAll(String input, String replace) {return under.matcher(input).replaceAll(replace);}
|
|
// https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html
|
|
public static final int
|
|
FLAG__NONE = 0
|
|
, FLAG__UNIX_LINES = Pattern.UNIX_LINES
|
|
, FLAG__CASE_INSENSITIVE = Pattern.CASE_INSENSITIVE
|
|
, FLAG__COMMENTS = Pattern.COMMENTS
|
|
, FLAG__MULTILINE = Pattern.MULTILINE
|
|
, FLAG__LITERAL = Pattern.LITERAL
|
|
, FLAG__DOTALL = Pattern.DOTALL
|
|
, FLAG__UNICODE_CASE = Pattern.UNICODE_CASE
|
|
, FLAG__CANON_EQ = Pattern.CANON_EQ
|
|
, FLAG__UNICODE_CHARACTER_CLASS = Pattern.UNICODE_CHARACTER_CLASS
|
|
;
|
|
public static final int FLAG__DEFAULT = FLAG__DOTALL | FLAG__UNICODE_CHARACTER_CLASS;// JRE.7:UNICODE_CHARACTER_CLASS; added during %w fix for en.w:A#; DATE:2015-06-10
|
|
}
|