diff --git a/100_core/src_150_text/gplx/texts/RegxAdp.java b/100_core/src/gplx/core/regxs/Regx_adp.java similarity index 76% rename from 100_core/src_150_text/gplx/texts/RegxAdp.java rename to 100_core/src/gplx/core/regxs/Regx_adp.java index a2e881ab9..05e43d252 100644 --- a/100_core/src_150_text/gplx/texts/RegxAdp.java +++ b/100_core/src/gplx/core/regxs/Regx_adp.java @@ -15,40 +15,19 @@ GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ -package gplx.texts; import gplx.*; +package gplx.core.regxs; import gplx.*; import gplx.core.*; import java.util.regex.Matcher; import java.util.regex.Pattern; -public class RegxAdp { - void Under_sync() { - try {under = Pattern.compile(pattern, Pattern.DOTALL | Pattern.UNICODE_CHARACTER_CLASS);} // JRE.7:UNICODE_CHARACTER_CLASS; added during %w fix for en.w:A#; DATE:2015-06-10 - catch (Exception e) { // NOTE: if invalid, then default to empty pattern (which should return nothing); EX:d:〆る generates [^]; DATE:2013-10-20 - pattern_is_invalid = true; - under = Pattern.compile("", Pattern.DOTALL | Pattern.UNICODE_CHARACTER_CLASS); - } - } private Pattern under; - public RegxMatch Match(String input, int bgn) { - Matcher match = under.matcher(input); - boolean success = match.find(bgn); - int match_bgn = success ? match.start() : String_.Find_none; - int match_end = success ? match.end() : String_.Find_none; - RegxGroup[] ary = RegxGroup.Ary_empty; - int groups_len = match.groupCount(); - if (success && groups_len > 0) { - ary = new RegxGroup[groups_len]; - for (int i = 0; i < groups_len; i++) - ary[i] = new RegxGroup(true, match.start(i + 1), match.end(i + 1), match.group(i + 1)); - } - return new RegxMatch(success, match_bgn, match_end, ary); - } - public String ReplaceAll(String input, String replace) {return under.matcher(input).replaceAll(replace);} - public String Pattern() {return pattern;} public RegxAdp Pattern_(String val) {pattern = val; Under_sync(); return this;} private String pattern; +public class Regx_adp { + @gplx.Internal protected Regx_adp(String regx) {Pattern_(regx);} + public String Pattern() {return pattern;} public Regx_adp Pattern_(String val) {pattern = val; Under_sync(); return this;} private String pattern; public boolean Pattern_is_invalid() {return pattern_is_invalid;} private boolean pattern_is_invalid = false; - public RegxMatch[] Match_all(String text, int bgn) { + public Regx_match[] Match_all(String text, int bgn) { int idx = bgn; List_adp rv = List_adp_.new_(); int len = String_.Len(text); while (idx <= len) { // NOTE: must be <= not < else "a?" will return null instead of ""; PAGE:en.d:民; DATE:2015-01-30 - RegxMatch match = this.Match(text, idx); + Regx_match match = this.Match(text, idx); if (match.Rslt_none()) break; rv.Add(match); int find_bgn = match.Find_bgn(); @@ -58,7 +37,29 @@ public class RegxAdp { : find_bgn + find_len // otherwise search after find_end ; } - return (RegxMatch[])rv.To_ary(RegxMatch.class); + return (Regx_match[])rv.To_ary(Regx_match.class); + } + private Pattern under; + void Under_sync() { + try {under = Pattern.compile(pattern, Pattern.DOTALL | Pattern.UNICODE_CHARACTER_CLASS);} // JRE.7:UNICODE_CHARACTER_CLASS; added during %w fix for en.w:A#; DATE:2015-06-10 + catch (Exception e) { // NOTE: if invalid, then default to empty pattern (which should return nothing); EX:d:〆る generates [^]; DATE:2013-10-20 + pattern_is_invalid = true; + under = Pattern.compile("", Pattern.DOTALL | Pattern.UNICODE_CHARACTER_CLASS); + } + } + public Regx_match Match(String input, int bgn) { + Matcher match = under.matcher(input); + boolean success = match.find(bgn); + int match_bgn = success ? match.start() : String_.Find_none; + int match_end = success ? match.end() : String_.Find_none; + Regx_group[] ary = Regx_group.Ary_empty; + int groups_len = match.groupCount(); + if (success && groups_len > 0) { + ary = new Regx_group[groups_len]; + for (int i = 0; i < groups_len; i++) + ary[i] = new Regx_group(true, match.start(i + 1), match.end(i + 1), match.group(i + 1)); + } + return new Regx_match(success, match_bgn, match_end, ary); + } + public String ReplaceAll(String input, String replace) {return under.matcher(input).replaceAll(replace);} } - @gplx.Internal protected RegxAdp(String regx) {Pattern_(regx);} -} diff --git a/100_core/src_150_text/gplx/texts/RegxAdp_mpo_find.java b/100_core/src/gplx/core/regxs/Regx_adp_.java similarity index 60% rename from 100_core/src_150_text/gplx/texts/RegxAdp_mpo_find.java rename to 100_core/src/gplx/core/regxs/Regx_adp_.java index a4d83413f..02c1b1130 100644 --- a/100_core/src_150_text/gplx/texts/RegxAdp_mpo_find.java +++ b/100_core/src/gplx/core/regxs/Regx_adp_.java @@ -15,16 +15,15 @@ GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ -package gplx.texts; import gplx.*; -public class RegxAdp_mpo_find { - public String Input() {return input;} public RegxAdp_mpo_find Input_(String val) {input = val; return this;} private String input; - public String Find() {return find;} public RegxAdp_mpo_find Find_(String val) {find = val; return this;} private String find; - public List_adp Exec_asList() { - RegxAdp regx = RegxAdp_.new_(find); +package gplx.core.regxs; import gplx.*; import gplx.core.*; +public class Regx_adp_ { + public static Regx_adp new_(String pattern) {return new Regx_adp(pattern);} + public static List_adp Find_all(String input, String find) { + Regx_adp regx = Regx_adp_.new_(find); int idx = 0; List_adp rv = List_adp_.new_(); while (true) { - RegxMatch match = regx.Match(input, idx); + Regx_match match = regx.Match(input, idx); if (match.Rslt_none()) break; rv.Add(match); int findBgn = match.Find_bgn(); @@ -33,4 +32,12 @@ public class RegxAdp_mpo_find { } return rv; } + public static String Replace(String raw, String regx_str, String replace) { + Regx_adp regx = Regx_adp_.new_(regx_str); + return regx.ReplaceAll(raw, replace); + } + public static boolean Match(String input, String pattern) { + Regx_adp rv = new Regx_adp(pattern); + return rv.Match(input, 0).Rslt(); + } } diff --git a/100_core/src_150_text/gplx/texts/RegxAdp__tst.java b/100_core/src/gplx/core/regxs/Regx_adp__tst.java similarity index 76% rename from 100_core/src_150_text/gplx/texts/RegxAdp__tst.java rename to 100_core/src/gplx/core/regxs/Regx_adp__tst.java index e1e930ac5..352f91da3 100644 --- a/100_core/src_150_text/gplx/texts/RegxAdp__tst.java +++ b/100_core/src/gplx/core/regxs/Regx_adp__tst.java @@ -15,36 +15,36 @@ GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ -package gplx.texts; import gplx.*; +package gplx.core.regxs; import gplx.*; import gplx.core.*; import org.junit.*; -public class RegxAdp__tst implements TfdsEqListItmStr { +public class Regx_adp__tst implements TfdsEqListItmStr { @Test public void Match() { tst_Match("a", "a", true); // basic tst_Match("a", "b", false); // matchNot tst_Match("a", "ab", true); // matchPart tst_Match("a\\+b", "a+b", true); // matchEscape tst_Match("[^a]", "b", true); // charSet_negate - } void tst_Match(String find, String input, boolean expd) {Tfds.Eq(expd, RegxAdp_.Match(input, find));} + } void tst_Match(String find, String input, boolean expd) {Tfds.Eq(expd, Regx_adp_.Match(input, find));} @Test public void Match_all() { tst_Match_all("#REDIRECT [[Template:Error]]", "^\\p{Nd}*", 1); // handle match = true but len = 0; DATE:2013-04-11 tst_Match_all("a", "$", 1); // $ should match once, not zero; DATE:2014-09-02 - } void tst_Match_all(String input, String regx, int expd) {Tfds.Eq(expd, RegxAdp_.new_(regx).Match_all(input, 0).length);} + } void tst_Match_all(String input, String regx, int expd) {Tfds.Eq(expd, Regx_adp_.new_(regx).Match_all(input, 0).length);} @Test public void Replace() { tst_Replace("ab", "a", "b", "bb"); // basic tst_Replace("ab", "c", "b", "ab"); // replaceNot tst_Replace("aba", "a", "b", "bbb"); // replaceMultiple - } void tst_Replace(String input, String find, String replace, String expd) {Tfds.Eq(expd, RegxAdp_.Replace(input, find, replace));} + } void tst_Replace(String input, String find, String replace, String expd) {Tfds.Eq(expd, Regx_adp_.Replace(input, find, replace));} @Test public void Match_WholeWord() { tst_WholeWord("a", "ab a", true); // pass a tst_WholeWord("a", "ab c", false); // fail ab tst_WholeWord("a", "a_", false); // fail a_ tst_WholeWord("[a]", "a [a] c", true); // pass [a] tst_WholeWord("[a]", "a[a]c", false); // fail a[a]c - } void tst_WholeWord(String regx, String text, boolean expd) {Tfds.Eq(expd, RegxAdp_.Match(text, RegxBldr.WholeWord(regx)));} + } void tst_WholeWord(String regx, String text, boolean expd) {Tfds.Eq(expd, Regx_adp_.Match(text, Regx_bldr.WholeWord(regx)));} @Test public void Match_As() { tst_Regx("public static [A-Za-z0-9_]+ as_\\(Object obj\\)", "public static Obj1 as_(Object obj) {return obj instanceof Obj1 ? (Obj1)obj : null;}", true); tst_Regx("public static [A-Za-z0-9_]+ as_\\(Object obj\\)", "public static boolean Asterisk(Object obj) {}", false); - } void tst_Regx(String regx, String text, boolean expd) {Tfds.Eq(expd, RegxAdp_.Match(text, regx));} + } void tst_Regx(String regx, String text, boolean expd) {Tfds.Eq(expd, Regx_adp_.Match(text, regx));} @Test public void Find() { tst_Matches("b", "a b c b a", match_(2, 1), match_(6, 1)); tst_Matches("d", "a b c b a"); @@ -53,30 +53,30 @@ public class RegxAdp__tst implements TfdsEqListItmStr { @Test public void Groups() { tst_Groups("abc def ghi dz", "(d\\p{L}+)", "def", "dz"); } - RegxMatch[] matches_(int... bgnAry) { + Regx_match[] matches_(int... bgnAry) { int aryLen = Array_.Len(bgnAry); - RegxMatch[] rv = new RegxMatch[aryLen]; + Regx_match[] rv = new Regx_match[aryLen]; for (int i = 0; i < aryLen; i++) rv[i] = match_(bgnAry[i]); return rv; } - RegxMatch match_(int bgn) {return match_(bgn, Int_.MinValue);} - RegxMatch match_(int bgn, int len) {return new RegxMatch(true, bgn, bgn + len, RegxGroup.Ary_empty);} - void tst_Matches(String find, String input, RegxMatch... expd) { + Regx_match match_(int bgn) {return match_(bgn, Int_.MinValue);} + Regx_match match_(int bgn, int len) {return new Regx_match(true, bgn, bgn + len, Regx_group.Ary_empty);} + void tst_Matches(String find, String input, Regx_match... expd) { List_adp expdList = Array_.XtoList(expd); - List_adp actlList = RegxAdp_.Find_args(input, find).Exec_asList(); + List_adp actlList = Regx_adp_.Find_all(input, find); Tfds.Eq_list(expdList, actlList, this); } void tst_Groups(String text, String regx, String... expd) { - RegxAdp regx_mgr = RegxAdp_.new_(regx); - RegxMatch[] rslts = regx_mgr.Match_all(text, 0); + Regx_adp regx_mgr = Regx_adp_.new_(regx); + Regx_match[] rslts = regx_mgr.Match_all(text, 0); Tfds.Eq_ary_str(expd, To_ary(rslts)); } - String[] To_ary(RegxMatch[] ary) { + String[] To_ary(Regx_match[] ary) { List_adp rv = List_adp_.new_(); int len = ary.length; for (int i = 0; i < len; i++) { - RegxMatch itm = ary[i]; + Regx_match itm = ary[i]; int cap_len = itm.Groups().length; for (int j = 0; j < cap_len; j++) { rv.Add(itm.Groups()[j].Val()); @@ -85,7 +85,7 @@ public class RegxAdp__tst implements TfdsEqListItmStr { return rv.To_str_ary(); } public String XtoStr(Object curObj, Object expdObj) { - RegxMatch cur = (RegxMatch)curObj, expd = (RegxMatch)expdObj; + Regx_match cur = (Regx_match)curObj, expd = (Regx_match)expdObj; String rv = "bgn=" + cur.Find_bgn(); if (expd != null && expd.Find_len() != Int_.MinValue) rv += " len=" + cur.Find_len(); return rv; diff --git a/100_core/src_150_text/gplx/texts/RegxBldr.java b/100_core/src/gplx/core/regxs/Regx_bldr.java similarity index 69% rename from 100_core/src_150_text/gplx/texts/RegxBldr.java rename to 100_core/src/gplx/core/regxs/Regx_bldr.java index f4489ad43..cae6f4c69 100644 --- a/100_core/src_150_text/gplx/texts/RegxBldr.java +++ b/100_core/src/gplx/core/regxs/Regx_bldr.java @@ -15,11 +15,11 @@ GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ -package gplx.texts; import gplx.*; +package gplx.core.regxs; import gplx.*; import gplx.core.*; import gplx.core.strings.*; -public class RegxBldr { - public static String Includes(String characters) {return String_.Concat_any(RegxBldr.Tkn_CharSetBegin, characters, RegxBldr.Tkn_CharSetEnd);} - public static String Excludes(String characters) {return String_.Concat_any(RegxBldr.Tkn_CharSetBegin, RegxBldr.Tkn_Not, characters, RegxBldr.Tkn_CharSetEnd);} +public class Regx_bldr { + public static String Includes(String characters) {return String_.Concat_any(Regx_bldr.Tkn_CharSetBegin, characters, Regx_bldr.Tkn_CharSetEnd);} + public static String Excludes(String characters) {return String_.Concat_any(Regx_bldr.Tkn_CharSetBegin, Regx_bldr.Tkn_Not, characters, Regx_bldr.Tkn_CharSetEnd);} public static String WholeWord(String word) {return String_.Concat_any("(?. */ -package gplx.texts; import gplx.*; -public class RegxGroup { - public RegxGroup(boolean rslt, int bgn, int end, String val) {this.rslt = rslt; this.bgn = bgn; this.end = end; this.val = val;} +package gplx.core.regxs; import gplx.*; import gplx.core.*; +public class Regx_group { + public Regx_group(boolean rslt, int bgn, int end, String val) {this.rslt = rslt; this.bgn = bgn; this.end = end; this.val = val;} public boolean Rslt() {return rslt;} private boolean rslt; public int Bgn() {return bgn;} int bgn; public int End() {return end;} int end; public String Val() {return val;} private String val; - public static final RegxGroup[] Ary_empty = new RegxGroup[0]; + public static final Regx_group[] Ary_empty = new Regx_group[0]; } diff --git a/100_core/src_150_text/gplx/texts/RegxMatch.java b/100_core/src/gplx/core/regxs/Regx_match.java similarity index 71% rename from 100_core/src_150_text/gplx/texts/RegxMatch.java rename to 100_core/src/gplx/core/regxs/Regx_match.java index 011b67885..b780ec042 100644 --- a/100_core/src_150_text/gplx/texts/RegxMatch.java +++ b/100_core/src/gplx/core/regxs/Regx_match.java @@ -15,14 +15,14 @@ GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ -package gplx.texts; import gplx.*; -public class RegxMatch { - public RegxMatch(boolean rslt, int find_bgn, int find_end, RegxGroup[] groups) {this.rslt = rslt; this.find_bgn = find_bgn; this.find_end = find_end; this.groups = groups;} +package gplx.core.regxs; import gplx.*; import gplx.core.*; +public class Regx_match { + public Regx_match(boolean rslt, int find_bgn, int find_end, Regx_group[] groups) {this.rslt = rslt; this.find_bgn = find_bgn; this.find_end = find_end; this.groups = groups;} public boolean Rslt() {return rslt;} private boolean rslt; public boolean Rslt_none() {return !rslt;} // NOTE: was "|| find_end - find_bgn == 0"; DATE:2013-04-11; DATE:2014-09-02 public int Find_bgn() {return find_bgn;} int find_bgn; public int Find_end() {return find_end;} int find_end; public int Find_len() {return find_end - find_bgn;} - public RegxGroup[] Groups() {return groups;} RegxGroup[] groups = RegxGroup.Ary_empty; - public static final RegxMatch[] Ary_empty = new RegxMatch[0]; + public Regx_group[] Groups() {return groups;} Regx_group[] groups = Regx_group.Ary_empty; + public static final Regx_match[] Ary_empty = new Regx_match[0]; } diff --git a/100_core/src_150_text/gplx/texts/RegxAdp_.java b/100_core/src_150_text/gplx/texts/RegxAdp_.java deleted file mode 100644 index e09000507..000000000 --- a/100_core/src_150_text/gplx/texts/RegxAdp_.java +++ /dev/null @@ -1,28 +0,0 @@ -/* -XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012 gnosygnu@gmail.com - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as -published by the Free Software Foundation, either version 3 of the -License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see . -*/ -package gplx.texts; import gplx.*; -public class RegxAdp_ { - public static RegxAdp_mpo_find Find_args(String input, String find) {return new RegxAdp_mpo_find().Input_(input).Find_(find);} - public static RegxAdp_mpo_replace Replace_args(String input, String find, String replace) {return new RegxAdp_mpo_replace().Input_(input).Find_(find).Replace_(replace);} - public static RegxAdp new_(String pattern) {return new RegxAdp(pattern);} - public static String Replace(String raw, String regx, String replace) {return Replace_args(raw, regx, replace).Exec_asStr();} - public static boolean Match(String input, String pattern) { - RegxAdp rv = new RegxAdp(pattern); - return rv.Match(input, 0).Rslt(); - } -} diff --git a/100_core/src_150_text/gplx/texts/RegxAdp_mpo_replace.java b/100_core/src_150_text/gplx/texts/RegxAdp_mpo_replace.java deleted file mode 100644 index 9165092e7..000000000 --- a/100_core/src_150_text/gplx/texts/RegxAdp_mpo_replace.java +++ /dev/null @@ -1,27 +0,0 @@ -/* -XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012 gnosygnu@gmail.com - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as -published by the Free Software Foundation, either version 3 of the -License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see . -*/ -package gplx.texts; import gplx.*; -public class RegxAdp_mpo_replace { - public String Input() {return input;} public RegxAdp_mpo_replace Input_(String val) {input = val; return this;} private String input; - public String Find() {return find;} public RegxAdp_mpo_replace Find_(String val) {find = val; return this;} private String find; - public String Replace() {return replace;} public RegxAdp_mpo_replace Replace_(String val) {replace = val; return this;} private String replace; - public String Exec_asStr() { - RegxAdp regx = RegxAdp_.new_(find); - return regx.ReplaceAll(input, replace); - } -} diff --git a/100_core/src_150_text/gplx/texts/RegxPatn_cls_ioMatch.java b/100_core/src_150_text/gplx/texts/RegxPatn_cls_ioMatch.java index f156a3e9f..12221618b 100644 --- a/100_core/src_150_text/gplx/texts/RegxPatn_cls_ioMatch.java +++ b/100_core/src_150_text/gplx/texts/RegxPatn_cls_ioMatch.java @@ -16,12 +16,13 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.texts; import gplx.*; +import gplx.core.regxs.*; public class RegxPatn_cls_ioMatch { public String Raw() {return raw;} private String raw; public boolean CaseSensitive() {return caseSensitive;} private boolean caseSensitive; public boolean Matches(String text) { text = String_.CaseNormalize(caseSensitive, text); - return RegxAdp_.Match(text, compiled);} // WNT-centric: Io_mgr paths are case-insensitive; + return Regx_adp_.Match(text, compiled);} // WNT-centric: Io_mgr paths are case-insensitive; @Override public String toString() {return raw;} String compiled; diff --git a/100_core/src_150_text/gplx/texts/RegxPatn_cls_ioMatch_.java b/100_core/src_150_text/gplx/texts/RegxPatn_cls_ioMatch_.java index 5bbbb6428..10f124fa8 100644 --- a/100_core/src_150_text/gplx/texts/RegxPatn_cls_ioMatch_.java +++ b/100_core/src_150_text/gplx/texts/RegxPatn_cls_ioMatch_.java @@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.texts; import gplx.*; -import gplx.core.strings.*; +import gplx.core.strings.*; import gplx.core.regxs.*; public class RegxPatn_cls_ioMatch_ { public static final String Wildcard = "*"; public static final String OrDelimiter = "|"; @@ -32,22 +32,22 @@ public class RegxPatn_cls_ioMatch_ { if (raw == ImpossiblePath) return ImpossiblePath; String_bldr sb = String_bldr_.new_(); - sb.Add(RegxBldr.Tkn_LineBegin); // Char_LineBegin for exact match (else "LIKE a" would match "abc") + sb.Add(Regx_bldr.Tkn_LineBegin); // Char_LineBegin for exact match (else "LIKE a" would match "abc") int rawLen = String_.Len(raw); for (int i = 0; i < rawLen; i++) { char c = String_.CharAt(raw, i); if (c == '\\') sb.Add("\\\\"); else if (c == '*') - sb.Add(".").Add(RegxBldr.Tkn_Wild_0Plus); + sb.Add(".").Add(Regx_bldr.Tkn_Wild_0Plus); else if (c == '|') - sb.Add(RegxBldr.Tkn_LineEnd).Add("|").Add(RegxBldr.Tkn_LineBegin); // each term must be bracketed by lineBgn/lineEnd; ex: A|B -> ^A$|^B$ + sb.Add(Regx_bldr.Tkn_LineEnd).Add("|").Add(Regx_bldr.Tkn_LineBegin); // each term must be bracketed by lineBgn/lineEnd; ex: A|B -> ^A$|^B$ else sb.Add(c); } - sb.Add(RegxBldr.Tkn_LineEnd); + sb.Add(Regx_bldr.Tkn_LineEnd); return sb.XtoStr(); } public static final String InvalidCharacters = "|*?\"<>"; // : / \ are omitted b/c they will cause full paths to fail - public static final String ValidCharacters = RegxBldr.Excludes(InvalidCharacters); + public static final String ValidCharacters = Regx_bldr.Excludes(InvalidCharacters); } diff --git a/100_core/src_150_text/gplx/texts/RegxPatn_cls_like.java b/100_core/src_150_text/gplx/texts/RegxPatn_cls_like.java index ee6580630..b50f7a734 100644 --- a/100_core/src_150_text/gplx/texts/RegxPatn_cls_like.java +++ b/100_core/src_150_text/gplx/texts/RegxPatn_cls_like.java @@ -16,10 +16,11 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.texts; import gplx.*; +import gplx.core.regxs.*; public class RegxPatn_cls_like { public char Escape() {return escape;} char escape; public static final char EscapeDefault = '|'; public String Raw() {return raw;} private String raw; - public boolean Matches(String text) {return RegxAdp_.Match(text, compiled);} + public boolean Matches(String text) {return Regx_adp_.Match(text, compiled);} @Override public String toString() {return String_.Format("LIKE {0} ESCAPE {1} -> {2}", raw, escape, compiled);} String compiled; diff --git a/100_core/src_150_text/gplx/texts/RegxPatn_cls_like_.java b/100_core/src_150_text/gplx/texts/RegxPatn_cls_like_.java index e0a23f7fb..cb7c05c9a 100644 --- a/100_core/src_150_text/gplx/texts/RegxPatn_cls_like_.java +++ b/100_core/src_150_text/gplx/texts/RegxPatn_cls_like_.java @@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.texts; import gplx.*; -import gplx.core.strings.*; +import gplx.core.strings.*; import gplx.core.regxs.*; public class RegxPatn_cls_like_ { public static RegxPatn_cls_like parse_(String regxRaw, char escape) { String regx = Compile(regxRaw, escape); @@ -26,7 +26,7 @@ public class RegxPatn_cls_like_ { char Wildcard = '%', AnyChar = '_'; boolean insideCharSet = false; String_bldr sb = String_bldr_.new_(); - sb.Add(RegxBldr.Tkn_LineBegin); + sb.Add(Regx_bldr.Tkn_LineBegin); int rawLen = String_.Len(raw); for (int i = 0; i < rawLen; i++) { char c = String_.CharAt(raw, i); @@ -36,28 +36,28 @@ public class RegxPatn_cls_like_ { else throw Err_.new_wo_type("escape cannot be last char", "raw", raw, "escape", escape, "i", i); } else if (c == Wildcard) { // % -> .* - sb.Add(RegxBldr.Tkn_AnyChar).Add(RegxBldr.Tkn_Wild_0Plus); + sb.Add(Regx_bldr.Tkn_AnyChar).Add(Regx_bldr.Tkn_Wild_0Plus); } else if (c == AnyChar) // _ -> . - sb.Add(RegxBldr.Tkn_AnyChar); - else if (c == RegxBldr.Tkn_CharSetBegin) { // toggle insideCharSet for ^ + sb.Add(Regx_bldr.Tkn_AnyChar); + else if (c == Regx_bldr.Tkn_CharSetBegin) { // toggle insideCharSet for ^ insideCharSet = true; sb.Add(c); } - else if (c == RegxBldr.Tkn_CharSetEnd) { // toggle insideCharSet for ^ + else if (c == Regx_bldr.Tkn_CharSetEnd) { // toggle insideCharSet for ^ insideCharSet = false; sb.Add(c); } - else if (c == RegxBldr.Tkn_Not && insideCharSet) { // ^ is used for Not in CharSet, but also used for LineStart; do not escape if insideCharSet + else if (c == Regx_bldr.Tkn_Not && insideCharSet) { // ^ is used for Not in CharSet, but also used for LineStart; do not escape if insideCharSet insideCharSet = false; sb.Add(c); } - else if (RegxBldr.RegxChar_chk(c)) - sb.Add(RegxBldr.Tkn_Escape).Add(c); + else if (Regx_bldr.RegxChar_chk(c)) + sb.Add(Regx_bldr.Tkn_Escape).Add(c); else // regular text sb.Add(c); } - sb.Add(RegxBldr.Tkn_LineEnd); + sb.Add(Regx_bldr.Tkn_LineEnd); return sb.XtoStr(); } } diff --git a/100_core/src_150_text/gplx/texts/RegxPatn_cls_like_tst.java b/100_core/src_150_text/gplx/texts/RegxPatn_cls_like_tst.java index c82f0c872..791b45cb0 100644 --- a/100_core/src_150_text/gplx/texts/RegxPatn_cls_like_tst.java +++ b/100_core/src_150_text/gplx/texts/RegxPatn_cls_like_tst.java @@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.texts; import gplx.*; -import org.junit.*; +import org.junit.*; import gplx.core.regxs.*; public class RegxPatn_cls_like_tst { @Test public void Basic() { tst_Match("abcd", "abcd", true); // basic; pass @@ -60,17 +60,17 @@ public class RegxPatn_cls_like_tst { tst_Match("a~b", "a~b", '~', false); // escape char; fail tst_Match("a~b", "a~~b", '~', true); // escape char; pass } - @Test public void Chars() { // Escape RegxBldr; ex: LIKE 'a{' -> a\{ - tst_EscapeRegxChar(RegxBldr.Tkn_Escape); // \ - tst_EscapeRegxChar(RegxBldr.Tkn_GroupBegin); // [ - tst_EscapeRegxChar(RegxBldr.Tkn_GroupEnd); // ] - tst_EscapeRegxChar(RegxBldr.Tkn_LineBegin); // ^ - tst_EscapeRegxChar(RegxBldr.Tkn_LineEnd); // $ - tst_EscapeRegxChar(RegxBldr.Tkn_RepBegin); // { - tst_EscapeRegxChar(RegxBldr.Tkn_RepEnd); // } - tst_EscapeRegxChar(RegxBldr.Tkn_Wild_0or1); // ? - tst_EscapeRegxChar(RegxBldr.Tkn_Wild_0Plus); // * - tst_EscapeRegxChar(RegxBldr.Tkn_Wild_1Plus); // + + @Test public void Chars() { // Escape Regx_bldr; ex: LIKE 'a{' -> a\{ + tst_EscapeRegxChar(Regx_bldr.Tkn_Escape); // \ + tst_EscapeRegxChar(Regx_bldr.Tkn_GroupBegin); // [ + tst_EscapeRegxChar(Regx_bldr.Tkn_GroupEnd); // ] + tst_EscapeRegxChar(Regx_bldr.Tkn_LineBegin); // ^ + tst_EscapeRegxChar(Regx_bldr.Tkn_LineEnd); // $ + tst_EscapeRegxChar(Regx_bldr.Tkn_RepBegin); // { + tst_EscapeRegxChar(Regx_bldr.Tkn_RepEnd); // } + tst_EscapeRegxChar(Regx_bldr.Tkn_Wild_0or1); // ? + tst_EscapeRegxChar(Regx_bldr.Tkn_Wild_0Plus); // * + tst_EscapeRegxChar(Regx_bldr.Tkn_Wild_1Plus); // + } void tst_Match(String raw, String regx, boolean expd) {tst_Match(raw, regx, RegxPatn_cls_like.EscapeDefault, expd);} void tst_Match(String raw, String regx, char escape, boolean expd) { diff --git a/100_core/src_800_tst/gplx/Tfds.java b/100_core/src_800_tst/gplx/Tfds.java index c7ac2323c..c1786c6ea 100644 --- a/100_core/src_800_tst/gplx/Tfds.java +++ b/100_core/src_800_tst/gplx/Tfds.java @@ -19,6 +19,12 @@ package gplx; import gplx.core.strings.*; import gplx.core.consoles.*; public class Tfds { // URL:doc/gplx.tfds/Tfds.txt public static boolean SkipDb = false; + public static void Eq_bool (boolean expd , boolean actl, String fmt, Object... args) {Eq_str(Bool_.Xto_str_lower(expd), Bool_.Xto_str_lower(actl), fmt, args);} + public static void Eq_str (byte[] expd, String actl, String fmt, Object... args) {Eq_str(String_.new_u8(expd), actl, fmt, args);} + public static void Eq_str (byte[] expd, byte[] actl, String fmt, Object... args) {Eq_str(String_.new_u8(expd), String_.new_u8(actl), fmt, args);} + public static void Eq_str (String expd, byte[] actl, String fmt, Object... args) {Eq_str(expd, String_.new_u8(actl), fmt, args);} + public static void Eq_str (String expd, String actl, String fmt, Object... args) {Eq_wkr(expd, actl, true, String_.Format(fmt, args));} + public static void Eq(Object expd, Object actl) {Eq_wkr(expd, actl, true, EmptyStr);} public static void Eq_able(EqAble expd, EqAble actl) {Eq_able_wkr(expd, actl, true, EmptyStr);} public static void Eq_able(EqAble expd, EqAble actl, String fmt, Object... args) {Eq_able_wkr(expd, actl, true, String_.Format(fmt, args));} @@ -29,10 +35,11 @@ public class Tfds { // URL:doc/gplx.tfds/Tfds.txt public static void Eq_date(DateAdp expd, DateAdp actl) {Eq_wkr(expd.XtoStr_gplx(), actl.XtoStr_gplx(), true, EmptyStr);} public static void Eq_date(DateAdp expd, DateAdp actl, String fmt, Object... args){Eq_wkr(expd.XtoStr_gplx(), actl.XtoStr_gplx(), true, String_.Format(fmt, args));} public static void Eq_url(Io_url expd, Io_url actl) {Eq_wkr(expd.Raw(), actl.Raw(), true, EmptyStr);} + public static void Eq_str(String expd, byte[] actl) {Eq_wkr(expd, String_.new_u8(actl), true, EmptyStr);} public static void Eq_bry(String expd, byte[] actl) {Eq_wkr(expd, String_.new_u8(actl), true, EmptyStr);} public static void Eq_bry(byte[] expd, byte[] actl) {Eq_wkr(String_.new_u8(expd), String_.new_u8(actl), true, EmptyStr);} - public static void Eq_str(XtoStrAble expd, XtoStrAble actl, String msg) {Eq_wkr(expd.XtoStr(), actl.XtoStr(), true, msg);} - public static void Eq_str(XtoStrAble expd, XtoStrAble actl) {Eq_wkr(expd.XtoStr(), actl.XtoStr(), true, String_.Empty);} + public static void Eq_str_intf(XtoStrAble expd, XtoStrAble actl, String msg) {Eq_wkr(expd.XtoStr(), actl.XtoStr(), true, msg);} + public static void Eq_str_intf(XtoStrAble expd, XtoStrAble actl) {Eq_wkr(expd.XtoStr(), actl.XtoStr(), true, String_.Empty);} public static void Eq_str_lines(String lhs, String rhs) {Eq_str_lines(lhs, rhs, EmptyStr);} public static void Eq_str_lines(String lhs, String rhs, String note) { if (lhs == null && rhs == null) return; // true diff --git a/400_xowa/src/gplx/xowa/Xoa_app_.java b/400_xowa/src/gplx/xowa/Xoa_app_.java index a52b7744d..b9d5d9d5c 100644 --- a/400_xowa/src/gplx/xowa/Xoa_app_.java +++ b/400_xowa/src/gplx/xowa/Xoa_app_.java @@ -26,7 +26,7 @@ public class Xoa_app_ { boot_mgr.Run(args); } public static final String Name = "xowa"; - public static final String Version = "2.7.3.2"; + public static final String Version = "2.7.3.3"; public static String Build_date = "2012-12-30 00:00:00"; public static String Op_sys; public static String User_agent = ""; diff --git a/400_xowa/src/gplx/xowa/html/Xoh_page_wtr_wkr.java b/400_xowa/src/gplx/xowa/html/Xoh_page_wtr_wkr.java index c16cd2d91..e9ad11571 100644 --- a/400_xowa/src/gplx/xowa/html/Xoh_page_wtr_wkr.java +++ b/400_xowa/src/gplx/xowa/html/Xoh_page_wtr_wkr.java @@ -117,13 +117,15 @@ public class Xoh_page_wtr_wkr implements Bry_fmtr_arg { bfr.Add(gplx.xowa.apps.Xoa_gfs_php_mgr.Xto_php(tmp_bfr, Bool_.N, data_raw)); return; } - Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr(); if (ns_id == Xow_ns_.Id_file) // if [[File]], add boilerplate header app.Ns_file_page_mgr().Bld_html(wiki, ctx, page, bfr, page.Ttl(), wiki.Cfg_file_page(), page.File_queue()); gplx.xowa.html.tidy.Xoh_tidy_mgr tidy_mgr = app.Html_mgr().Tidy_mgr(); boolean tidy_enabled = tidy_mgr.Enabled(); Bry_bfr hdom_bfr = tidy_enabled ? app.Utl__bfr_mkr().Get_m001() : bfr; // if tidy, then write to tidy_bfr; note that bfr already has and written to it, so this can't be passed to tidy; DATE:2014-06-11 wiki.Html_mgr().Html_wtr().Write_all(hdom_bfr, page.Wikie().Ctx(), hctx, page.Root().Data_mid(), page.Root()); +// Xol_vnt_mgr vnt_mgr = wiki.Lang().Vnt_mgr(); +// if (vnt_mgr.Enabled()) // VNT +// hdom_bfr.Add(vnt_mgr.Convert_text(wiki, hdom_bfr.Xto_bry_and_clear())); if (tidy_enabled) { tidy_mgr.Run_tidy_html(page, hdom_bfr); bfr.Add_bfr_and_clear(hdom_bfr); @@ -143,8 +145,6 @@ public class Xoh_page_wtr_wkr implements Bry_fmtr_arg { else wiki.Html_mgr().Ctg_mgr().Bld(bfr, page, ctgs_len); } - if (vnt_mgr.Enabled()) // VNT - bfr.Add(vnt_mgr.Convert_text(wiki, bfr.Xto_bry_and_clear())); } private void Write_body_pre(Bry_bfr bfr, Xoae_app app, Xowe_wiki wiki, byte[] data_raw, Bry_bfr tmp_bfr) { Xoh_html_wtr_escaper.Escape(app.Parser_amp_mgr(), tmp_bfr, data_raw, 0, data_raw.length, false, false); diff --git a/400_xowa/src/gplx/xowa/wikis/Xoa_wiki_regy.java b/400_xowa/src/gplx/xowa/wikis/Xoa_wiki_regy.java index ee7b4d544..6fc56e952 100644 --- a/400_xowa/src/gplx/xowa/wikis/Xoa_wiki_regy.java +++ b/400_xowa/src/gplx/xowa/wikis/Xoa_wiki_regy.java @@ -39,7 +39,7 @@ public class Xoa_wiki_regy { } init_needed = true; } - public static void Make_wiki_dir(Xoae_app app, String domain_str) { // TEST: fake wiki_dir for Parse_from_url_bar; DATE:2014-02-16 + public static void Make_wiki_dir(Xoa_app app, String domain_str) { // TEST: fake wiki_dir for Parse_from_url_bar; DATE:2014-02-16 Io_url wiki_dir = app.Fsys_mgr().Wiki_dir(); Io_mgr.I.CreateDir(wiki_dir.GenSubDir(domain_str)); } diff --git a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring.java b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring.java index 82f57c1f4..463f261b6 100644 --- a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring.java +++ b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring.java @@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; -import gplx.texts.*; import gplx.intl.*; +import gplx.core.regxs.*; import gplx.intl.*; public class Scrib_lib_ustring implements Scrib_lib { private final String_surrogate_utl surrogate_utl = new String_surrogate_utl(); public Scrib_lib_ustring(Scrib_core core) {this.core = core; gsub_mgr = new Scrib_lib_ustring_gsub_mgr(core, regx_converter);} private Scrib_core core; Scrib_lib_ustring_gsub_mgr gsub_mgr; @@ -69,12 +69,12 @@ public class Scrib_lib_ustring implements Scrib_lib { ; } regx = regx_converter.Parse(Bry_.new_u8(regx), Scrib_regx_converter.Anchor_G); - RegxAdp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx); - RegxMatch[] regx_rslts = regx_adp.Match_all(text_str, bgn_codepoint_idx); // NOTE: MW calculates an offset to handle mb strings. however, java's regex always takes offset in chars (not bytes like PHP preg_match); DATE:2014-03-04 + Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx); + Regx_match[] regx_rslts = regx_adp.Match_all(text_str, bgn_codepoint_idx); // NOTE: MW calculates an offset to handle mb strings. however, java's regex always takes offset in chars (not bytes like PHP preg_match); DATE:2014-03-04 int len = regx_rslts.length; if (len == 0) return rslt.Init_ary_empty(); List_adp tmp_list = List_adp_.new_(); - RegxMatch match = regx_rslts[0]; // NOTE: take only 1st result; DATE:2014-08-27 + Regx_match match = regx_rslts[0]; // NOTE: take only 1st result; DATE:2014-08-27 int match_find_bgn_codepoint = match.Find_bgn(); // NOTE: java regex returns results in codepoint; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27 int match_find_bgn_adj = -surrogate_utl.Count_surrogates__codepoint_idx1(text_bry, text_bry_len, bgn_byte_pos, match_find_bgn_codepoint - bgn_codepoint_idx); // NOTE: convert from java regex codepoint to lua / php char_idx; PAGE:zh.w:南北鐵路 (越南) DATE:2014-08-27 tmp_list.Add(match_find_bgn_codepoint + match_find_bgn_adj + -bgn_adj + Scrib_lib_ustring.Base1); @@ -100,13 +100,13 @@ public class Scrib_lib_ustring implements Scrib_lib { String regx = regx_converter.Parse(args.Cast_bry_or_null(1), Scrib_regx_converter.Anchor_G); int bgn = args.Cast_int_or(2, 1); bgn = Bgn_adjust(text, bgn); - RegxAdp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx); - RegxMatch[] regx_rslts = regx_adp.Match_all(text, bgn); + Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx); + Regx_match[] regx_rslts = regx_adp.Match_all(text, bgn); int len = regx_rslts.length; if (len == 0) return rslt.Init_null(); // return null if no matches found; EX:w:Mount_Gambier_(volcano); DATE:2014-04-02; confirmed with en.d:民; DATE:2015-01-30 List_adp tmp_list = List_adp_.new_(); for (int i = 0; i < len; i++) { - RegxMatch match = regx_rslts[i]; + Regx_match match = regx_rslts[i]; AddCapturesFromMatch(tmp_list, match, text, regx_converter.Capt_ary(), true); } return rslt.Init_many_list(tmp_list); @@ -123,22 +123,22 @@ public class Scrib_lib_ustring implements Scrib_lib { String regx = args.Pull_str(1); KeyVal[] capt = args.Cast_kv_ary_or_null(2); int pos = args.Pull_int(3); - RegxAdp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx); - RegxMatch[] regx_rslts = regx_adp.Match_all(text, pos); + Regx_adp regx_adp = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx); + Regx_match[] regx_rslts = regx_adp.Match_all(text, pos); int len = regx_rslts.length; if (len == 0) return rslt.Init_many_objs(pos, KeyVal_.Ary_empty); - RegxMatch match = regx_rslts[0]; // NOTE: take only 1st result + Regx_match match = regx_rslts[0]; // NOTE: take only 1st result List_adp tmp_list = List_adp_.new_(); AddCapturesFromMatch(tmp_list, match, text, capt, true); // NOTE: was incorrectly set as false; DATE:2014-04-23 return rslt.Init_many_objs(match.Find_end(), Scrib_kv_utl_.base1_list_(tmp_list)); } - private void AddCapturesFromMatch(List_adp tmp_list, RegxMatch rslt, String text, KeyVal[] capts, boolean op_is_match) {// NOTE: this matches behavior in UstringLibrary.php!addCapturesFromMatch - RegxGroup[] grps = rslt.Groups(); + private void AddCapturesFromMatch(List_adp tmp_list, Regx_match rslt, String text, KeyVal[] capts, boolean op_is_match) {// NOTE: this matches behavior in UstringLibrary.php!addCapturesFromMatch + Regx_group[] grps = rslt.Groups(); int grps_len = grps.length; int capts_len = capts == null ? 0 : capts.length; if (grps_len > 0) { for (int j = 0; j < grps_len; j++) { - RegxGroup grp = grps[j]; + Regx_group grp = grps[j]; if ( j < capts_len // bounds check b/c null can be passed && Bool_.cast_(capts[j].Val()) // check if true; indicates that group is "()" or "anypos" see regex converter; DATE:2014-04-23 ) @@ -151,8 +151,8 @@ public class Scrib_lib_ustring implements Scrib_lib { && tmp_list.Count() == 0) // only add match once; EX: "aaaa", "a" will have four matches; get 1st; DATE:2014-04-02 tmp_list.Add(String_.Mid(text, rslt.Find_bgn(), rslt.Find_end())); } - public static RegxAdp RegxAdp_new_(Xop_ctx ctx, String regx) { - RegxAdp rv = RegxAdp_.new_(regx); + public static Regx_adp RegxAdp_new_(Xop_ctx ctx, String regx) { + Regx_adp rv = Regx_adp_.new_(regx); if (rv.Pattern_is_invalid()) { ctx.App().Usr_dlg().Warn_many("", "", "regx is invalid: regx=~{0} page=~{1}", regx, String_.new_u8(ctx.Cur_page().Ttl().Page_db())); } @@ -212,8 +212,8 @@ class Scrib_lib_ustring_gsub_mgr { else throw Err_.new_unhandled(ClassAdp_.NameOf_type(repl_type)); } private String Exec_repl(byte repl_tid, byte[] repl_bry, String text, String regx, int limit) { - RegxAdp regx_mgr = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx); - RegxMatch[] rslts = regx_mgr.Match_all(text, 0); + Regx_adp regx_mgr = Scrib_lib_ustring.RegxAdp_new_(core.Ctx(), regx); + Regx_match[] rslts = regx_mgr.Match_all(text, 0); if ( rslts.length == 0 // PHP: If matches are found, the new subject will be returned, otherwise subject will be returned unchanged.; http://php.net/manual/en/function.preg-replace-callback.php || regx_mgr.Pattern_is_invalid() // NOTE: invalid patterns should return self; EX:[^]; DATE:2014-09-02 ) return text; @@ -222,7 +222,7 @@ class Scrib_lib_ustring_gsub_mgr { int pos = 0; for (int i = 0; i < len; i++) { if (limit > -1 && repl_count == limit) break; - RegxMatch rslt = rslts[i]; + Regx_match rslt = rslts[i]; tmp_bfr.Add_str(String_.Mid(text, pos, rslt.Find_bgn())); // NOTE: regx returns char pos (not bry); must add as String, not bry; DATE:2013-07-17 Exec_repl_itm(tmp_bfr, repl_tid, repl_bry, text, rslt); pos = rslt.Find_end(); @@ -233,7 +233,7 @@ class Scrib_lib_ustring_gsub_mgr { tmp_bfr.Add_str(String_.Mid(text, pos, text_len)); // NOTE: regx returns char pos (not bry); must add as String, not bry; DATE:2013-07-17 return tmp_bfr.Xto_str_and_clear(); } - private void Exec_repl_itm(Bry_bfr tmp_bfr, byte repl_tid, byte[] repl_bry, String text, RegxMatch match) { + private void Exec_repl_itm(Bry_bfr tmp_bfr, byte repl_tid, byte[] repl_bry, String text, Regx_match match) { switch (repl_tid) { case Repl_tid_string: int len = repl_bry.length; @@ -251,7 +251,7 @@ class Scrib_lib_ustring_gsub_mgr { case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9: int idx = b - Byte_ascii.Num_0 - List_adp_.Base1; if (idx < match.Groups().length) { // retrieve numbered capture; TODO: support more than 9 captures - RegxGroup grp = match.Groups()[idx]; + Regx_group grp = match.Groups()[idx]; tmp_bfr.Add_str(String_.Mid(text, grp.Bgn(), grp.End())); // NOTE: grp.Bgn() / .End() is for String pos (bry pos will fail for utf8 strings) } else { @@ -278,13 +278,13 @@ class Scrib_lib_ustring_gsub_mgr { break; case Repl_tid_table: { int match_bgn = -1, match_end = -1; - RegxGroup[] grps = match.Groups(); + Regx_group[] grps = match.Groups(); if (grps.length == 0) { match_bgn = match.Find_bgn(); match_end = match.Find_end(); } else { // group exists, take first one (logic matches Scribunto); PAGE:en.w:Bannered_routes_of_U.S._Route_60; DATE:2014-08-15 - RegxGroup grp = grps[0]; + Regx_group grp = grps[0]; match_bgn = grp.Bgn(); match_end = grp.End(); } @@ -298,7 +298,7 @@ class Scrib_lib_ustring_gsub_mgr { } case Repl_tid_luacbk: { KeyVal[] luacbk_args = null; - RegxGroup[] grps = match.Groups(); + Regx_group[] grps = match.Groups(); int grps_len = grps.length; if (grps_len == 0) { // no match; use original String String find_str = String_.Mid(text, match.Find_bgn(), match.Find_end()); @@ -307,7 +307,7 @@ class Scrib_lib_ustring_gsub_mgr { else { // match; build ary of matches; (see UStringLibrary.php) luacbk_args = new KeyVal[grps_len]; for (int i = 0; i < grps_len; i++) { - RegxGroup grp = grps[i]; + Regx_group grp = grps[i]; String find_str = String_.Mid(text, grp.Bgn(), grp.End()); luacbk_args[i] = KeyVal_.int_(i + Scrib_core.Base_1, find_str); } diff --git a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring__lib_tst.java b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring__lib_tst.java index 67776ceb1..b390b390b 100644 --- a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring__lib_tst.java +++ b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_lib_ustring__lib_tst.java @@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; -import org.junit.*; +import org.junit.*; import gplx.core.regxs.*; public class Scrib_lib_ustring__lib_tst { @Before public void init() { fxt.Clear_for_lib(); @@ -116,11 +116,19 @@ public class Scrib_lib_ustring__lib_tst { , " 1=2" )); } - @Test public void Gsub_frontier_pattern() { // PURPOSE: handle frontier pattern; EX:"%f[%a]"; NOTE:test will fail if run in 1.6 environment; DATE:2015-07-20 +// @Test public void Gsub_frontier_pattern() { // PURPOSE: handle frontier pattern; EX:"%f[%a]"; DATE:2015-07-21 // fxt.Init_cbk(Scrib_core.Key_mw_interface, fxt.Core().Lib_ustring(), Scrib_lib_ustring.Invk_gsub); -// //Exec_gsub_regx("THE QUICK brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", 1, "", "THE;1;QUICK;2;JUMPS;3;"); -// Exec_gsub_regx("thE QUICK brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", 1, "", "THE;1;QUICK;2;JUMPS;3;"); - } +// Exec_gsub_regx("a b c", "%f[%W]", 5, "()", "a() b() c();3"); +// Exec_gsub_regx("abC DEF gHI JKm NOP", "%f[%a]%u+%f[%A]", Int_.MaxValue, "()", "abC () gHI JKm ();2"); // based on http://lua-users.org/wiki/FrontierPattern +// } +// @Test public void Gsub_frontier_pattern_utl() {// PURPOSE: standalone test for \0 logic in frontier pattern; note that verified against PHP: echo(preg_match( "/[\w]/us", "\0" )); DATE:2015-07-21 +// Tfds.Eq(Bool_.N, Regx_adp_.Match("\0", "a")); // \0 not matched by a +// Tfds.Eq(Bool_.N, Regx_adp_.Match("\0", "0")); // \0 not matched by numeric 0 +// Tfds.Eq(Bool_.N, Regx_adp_.Match("\0", "[\\w]")); // \0 not matched by word_char +// Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\W]")); // \0 matched by !word_char +// Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\x]")); // \0 matched by any_char +// Tfds.Eq(Bool_.Y, Regx_adp_.Match("\0", "[\\X]")); // \0 matched by !any_char +// } // @Test public void Match_viwiktionary() { // fxt.Init_cbk(Scrib_core.Key_mw_interface, fxt.Core().Lib_ustring(), Scrib_lib_ustring.Invk_match); // Exec_match("tr" , "()(r)", 1, ";"); // should return all matches diff --git a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_regx_converter.java b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_regx_converter.java index f17a0d058..2a6f65444 100644 --- a/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_regx_converter.java +++ b/400_xowa/src/gplx/xowa/xtns/scribunto/libs/Scrib_regx_converter.java @@ -16,6 +16,7 @@ You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; +import gplx.core.regxs.*; public class Scrib_regx_converter { private List_adp capt_list = List_adp_.new_(), grps_parens = List_adp_.new_(); private List_adp grps_open = List_adp_.new_(); public Scrib_regx_converter() {Init();} @@ -94,20 +95,21 @@ public class Scrib_regx_converter { } } break; -// case Byte_ascii.Ltr_f: { // EX: "%f[%a]" +// case Byte_ascii.Ltr_f: { // EX: lua frontier pattern; "%f[%a]"; DATE:2015-07-21 // ++i; // if (i + 1 >= len || src[i] != Byte_ascii.Brack_bgn) throw Err_.new_("scribunto", "missing '[' after %f in pattern at pattern character $ii"); +// +// // %f always followed by bracketed term; convert lua bracketed term to regex // Bry_bfr tmp_bfr = Xoa_app_.Utl__bfr_mkr().Get_b128(); // i = bracketedCharSetToRegex(tmp_bfr, src, i, len); // byte[] bracketed_regx = tmp_bfr.To_bry_and_rls(); -// int j = 1; -// bfr.Add_str_a7("(?. */ package gplx.xowa.xtns.scribunto.libs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; import gplx.xowa.xtns.scribunto.*; -import org.junit.*; -import gplx.texts.*; +import org.junit.*; import gplx.core.regxs.*; public class Scrib_regx_converter_tst { @Before public void init() {fxt.Clear();} private Scrib_regx_converter_fxt fxt = new Scrib_regx_converter_fxt(); @Test public void Basic() {fxt.Test_parse("abc012ABC" , "abc012ABC");} @@ -61,7 +60,7 @@ class Scrib_regx_converter_fxt { } public void Test_replace(String text, String find, String replace, String expd) { String regex_str = under.Parse(Bry_.new_u8(find), Scrib_regx_converter.Anchor_G); - String actl = RegxAdp_.Replace(text, regex_str, replace); + String actl = Regx_adp_.Replace(text, regex_str, replace); Tfds.Eq(expd, actl); } } \ No newline at end of file diff --git a/400_xowa/src_240_install/gplx/xowa/Xob_dump_file.java b/400_xowa/src_240_install/gplx/xowa/Xob_dump_file.java index 91f4e3840..192966d93 100644 --- a/400_xowa/src_240_install/gplx/xowa/Xob_dump_file.java +++ b/400_xowa/src_240_install/gplx/xowa/Xob_dump_file.java @@ -84,11 +84,13 @@ public class Xob_dump_file { } private boolean Connect_exec(IoEngine_xrg_downloadFil args, String cur_file_url) { boolean rv = args.Src_last_modified_query_(true).Exec_meta(cur_file_url); - Xoa_app_.Usr_dlg().Note_many("", "", "wmf.dump:connect log; url=~{0} result=~{1} fil_len=~{2} file_modified=~{3} server_url=~{4} dump_date=~{5}", cur_file_url, rv, args.Src_content_length(), args.Src_last_modified() == null ? "<>" : args.Src_last_modified().XtoStr_fmt_yyyy_MM_dd_HH_mm_ss(), server_url, dump_date); + long tmp_file_len = args.Src_content_length(); + DateAdp tmp_file_modified = args.Src_last_modified(); + Xoa_app_.Usr_dlg().Note_many("", "", "wmf.dump:connect rslts; url=~{0} result=~{1} fil_len=~{2} file_modified=~{3} server_url=~{4} dump_date=~{5}", cur_file_url, rv, tmp_file_len, tmp_file_modified == null ? "<>" : tmp_file_modified.XtoStr_fmt_yyyy_MM_dd_HH_mm_ss(), server_url, dump_date); if (rv) { - file_len = args.Src_content_length(); - file_modified = args.Src_last_modified(); - if (file_modified.Timestamp_unix() <= 0) return false; // dump is available, but invalid (stamped with old date) + if (tmp_file_modified != null && tmp_file_modified.Year() <= 1970) return false; // url has invalid file; note that dumps.wikimedia.org currently returns back an HTML page with "404 not found"; rather than try to download and parse this (since content may change), use the date_modified which always appears to be UnixTime 0; DATE:2015-07-21 + file_len = tmp_file_len; + file_modified = tmp_file_modified; } return rv; } diff --git a/400_xowa/src_240_install/gplx/xowa/Xob_dump_file_.java b/400_xowa/src_240_install/gplx/xowa/Xob_dump_file_.java index 898eda089..34c92e60a 100644 --- a/400_xowa/src_240_install/gplx/xowa/Xob_dump_file_.java +++ b/400_xowa/src_240_install/gplx/xowa/Xob_dump_file_.java @@ -34,7 +34,7 @@ public class Xob_dump_file_ { || String_.Eq(dump_server, Xob_dump_file_.Server_masaryk) ) ){ - Xoa_app_.Usr_dlg().Note_many("", "", "wmf.dump:connect log; server_url=~{0} dump_date=~{1}", dump_server, dump_date); + Xoa_app_.Usr_dlg().Note_many("", "", "wmf.dump:dump date; server_url=~{0} dump_date=~{1}", dump_server, dump_date); Xoi_mirror_parser mirror_parser = new Xoi_mirror_parser(); String dump_wiki_url = dump_server + String_.new_a7(rv.Wiki_alias()) + "/"; byte[] dump_url_wiki_html = gplx.ios.IoEngine_xrg_downloadFil.new_("", Io_url_.Empty).Exec_as_bry(dump_wiki_url); if (Bry_.Len_eq_0(dump_url_wiki_html)) return; diff --git a/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser.java b/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser.java index 2ada3aa66..5f856c1f0 100644 --- a/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser.java +++ b/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser.java @@ -20,7 +20,8 @@ import gplx.core.primitives.*; import gplx.xowa.langs.*; import gplx.xowa.wikis.*; import gplx.xowa.wikis.xwikis.*; import gplx.xowa.net.*; import gplx.xowa.files.*; import gplx.xowa.html.hrefs.*; public class Xoa_url_parser { - private final Url_encoder encoder = Url_encoder.new_html_href_mw_().Itms_raw_same_many(Byte_ascii.Underline); private final Bry_bfr tmp_bfr = Bry_bfr.reset_(255); + private final Url_encoder encoder = Url_encoder.new_html_href_mw_().Itms_raw_same_many(Byte_ascii.Underline); + private final Bry_bfr tmp_bfr = Bry_bfr.reset_(255); public Gfo_url_parser Url_parser() {return url_parser;} private Gfo_url_parser url_parser = new Gfo_url_parser(); private Gfo_url gfo_url = new Gfo_url(); public String Build_str(Xoa_url url) { // transform to "canonical" form that fits url box for both XOWA and Mozilla Firefox tmp_bfr.Add(url.Wiki_bry()); // add wiki; EX: "en.wikipedia.org" diff --git a/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_basic_tst.java b/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_basic_tst.java deleted file mode 100644 index b6169778f..000000000 --- a/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_basic_tst.java +++ /dev/null @@ -1,172 +0,0 @@ -/* -XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012 gnosygnu@gmail.com - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as -published by the Free Software Foundation, either version 3 of the -License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see . -*/ -package gplx.xowa; import gplx.*; -import org.junit.*; import gplx.xowa.wikis.xwikis.*; -public class Xoa_url_parser_basic_tst { - @Before public void init() {fxt.Reset();} private Xoa_url_parser_chkr fxt = new Xoa_url_parser_chkr(); - @Test public void Basic() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Test_parse_w_wiki("en.wikipedia.org/wiki/A"); - } - @Test public void Abrv() { // deprecate; no longer needed with shortcuts - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Test_parse_w_wiki("en.wikipedia.org/A"); - } - @Test public void Commons() { // PURPOSE: "C" was being picked up as an xwiki to commons; PAGE:no.b:C/Variabler; DATE:2014-10-14 - fxt.Init_xwiki("c", "commons.wikimedia.org"); // add alias of "C" - fxt.Expd_wiki("en.wikipedia.org").Expd_page("C/D").Test_parse_w_wiki("C/D"); // should use default wiki of enwiki, not commons; also, page should be "C/D", not "D" - } - @Test public void Http_basic() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Test_parse_w_wiki("http://en.wikipedia.org/wiki/A"); - } - @Test public void Relative() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Test_parse_w_wiki("//en.wikipedia.org/wiki/A"); - } - @Test public void Name() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Test_parse_w_wiki("A"); - } - @Test public void Sub_1() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A/b").Test_parse_w_wiki("A/b"); - } - @Test public void Sub_2() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A/b/c").Test_parse_w_wiki("A/b/c"); - } - @Test public void Sub_3() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A/b").Test_parse_w_wiki("en.wikipedia.org/wiki/A/b"); - } - @Test public void Ns_category() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("Category:A").Test_parse_w_wiki("Category:A"); - } - @Test public void Ns_file() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("File:A").Test_parse_w_wiki("File:A"); - } - @Test public void Anchor() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Expd_anchor("b").Test_parse_w_wiki("A#b"); - } - @Test public void Upload() { - fxt.App().Usere().Wiki().Xwiki_mgr().Add_full("commons.wikimedia.org", "commons.wikimedia.org"); - fxt.Reset().Expd_wiki("commons.wikimedia.org").Expd_page("File:C.svg").Test_parse_w_wiki("http://upload.wikimedia.org/wikipedia/commons/a/ab/C.svg"); - fxt.Reset().Expd_wiki("commons.wikimedia.org").Expd_page("File:A.png").Test_parse_w_wiki("http://upload.wikimedia.org/wikipedia/commons/thumb/7/70/A.png/220px-A.png"); - } - @Test public void Parse_lang() { - Xow_xwiki_mgr xwiki_mgr = fxt.Wiki_en_w().Xwiki_mgr(); - xwiki_mgr.Add_full(Bry_.new_a7("fr"), Bry_.new_a7("fr.wikipedia.org"), Bry_.new_a7("http://fr.wikipedia.org/~{0}")); - fxt.Expd_wiki("fr.wikipedia.org").Expd_page("A").Test_parse_w_wiki("http://en.wikipedia.org/wiki/fr:A"); - } - @Test public void Alias_wiki() { - Xow_xwiki_mgr xwiki_mgr = fxt.Wiki_en_w().Xwiki_mgr(); - xwiki_mgr.Add_full(Bry_.new_a7("s"), Bry_.new_a7("en.wikisource.org")); - fxt.Expd_wiki("en.wikisource.org").Expd_page("A/b/c").Test_parse_w_wiki("s:A/b/c"); - } - @Test public void Xwiki_no_segs() { // PURPOSE: handle xwiki without full url; EX: "commons:Commons:Media_of_the_day"; DATE:2014-02-19 - Xow_xwiki_mgr xwiki_mgr = fxt.Wiki_en_w().Xwiki_mgr(); - xwiki_mgr.Add_full(Bry_.new_a7("s"), Bry_.new_a7("en.wikisource.org")); - fxt.Expd_wiki("en.wikisource.org").Expd_page("Project:A").Test_parse_w_wiki("s:Project:A"); - } - @Test public void Domain_only() { - fxt.App().Usere().Wiki().Xwiki_mgr().Add_full("fr.wikipedia.org", "fr.wikipedia.org"); - fxt.Expd_wiki("fr.wikipedia.org").Expd_page("").Test_parse_w_wiki("fr.wikipedia.org"); - } - @Test public void Domain_and_wiki() { - fxt.App().Usere().Wiki().Xwiki_mgr().Add_full("fr.wikipedia.org", "fr.wikipedia.org"); - fxt.Expd_wiki("fr.wikipedia.org").Expd_page("").Test_parse_w_wiki("fr.wikipedia.org/wiki"); - } - @Test public void Domain_and_wiki_w_http() { - fxt.App().Usere().Wiki().Xwiki_mgr().Add_full("fr.wikipedia.org", "fr.wikipedia.org"); - fxt.Expd_wiki("fr.wikipedia.org").Expd_page("").Test_parse_w_wiki("http://fr.wikipedia.org/wiki"); - } - @Test public void Redirect() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Test_parse_w_wiki("A?redirect=no"); - } - @Test public void Namespace_in_different_wiki() { // PURPOSE.fix: namespaced titles would default to default_wiki instead of current_wiki - fxt.Expd_wiki("en.wikisource.org").Expd_page("Category:A").Test_parse_w_wiki(fxt.Wiki_en_s(), "Category:A"); - } - @Test public void Action_is_edit() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Expd_action_is_edit_y().Test_parse_w_wiki("A?action=edit"); - } - @Test public void Assert_state_cleared() { // PURPOSE.fix: action_is_edit (et. al.) was not being cleared on parse even though Xoa_url reused; DATE:20121231 - byte[] raw = Bry_.new_a7("A?action=edit"); - Xoa_url url = Xoa_url_parser.Parse_url(fxt.App(), fxt.Wiki_en_w(), raw, 0, raw.length, false); - Tfds.Eq(true, url.Action_is_edit()); - raw = Bry_.new_a7("B"); - Xoa_url_parser.Parse_url(url, fxt.App(), fxt.Wiki_en_w(), raw, 0, raw.length, false); - Tfds.Eq(false, url.Action_is_edit()); - } - @Test public void Query_arg() { // PURPOSE.fix: query args were not printing out - byte[] raw = Bry_.new_a7("en.wikipedia.org/wiki/Special:Search/Earth?fulltext=yes"); - Xoa_url url = Xoa_url_parser.Parse_url(fxt.App(), fxt.Wiki_en_w(), raw, 0, raw.length, false); - Xoa_url_parser parser = new Xoa_url_parser(); - Tfds.Eq("en.wikipedia.org/wiki/Special:Search/Earth?fulltext=yes", parser.Build_str(url)); - } - @Test public void Anchor_with_slash() { // PURPOSE: A/b#c/d was not parsing correctly - fxt.Expd_page("A/b").Expd_anchor("c.2Fd").Test_parse_w_wiki("A/b#c/d"); - } - @Test public void Slash() { - fxt.Reset().Expd_wiki("en.wikipedia.org").Expd_page("/A").Test_parse_w_wiki("en.wikipedia.org/wiki//A"); - fxt.Reset().Expd_wiki("en.wikipedia.org").Expd_page("A//b").Test_parse_w_wiki("en.wikipedia.org/wiki/A//b"); - fxt.Reset().Expd_wiki("en.wikipedia.org").Expd_page("//A").Test_parse_w_wiki("en.wikipedia.org/wiki///A"); - } - @Test public void Question_is_page() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A?B").Expd_anchor(null).Test_parse_w_wiki("A?B"); - } - @Test public void Question_is_anchor() { - fxt.Expd_wiki("en.wikipedia.org").Expd_page("A").Expd_anchor("b.3Fc").Test_parse_w_wiki("A#b?c"); - } -} -class Xoa_url_parser_chkr implements Tst_chkr { - public Xoa_url_parser_chkr Reset() { - if (app == null) { - app = Xoa_app_fxt.app_(); - wiki_en_w = Xoa_app_fxt.wiki_(app, "en.wikipedia.org"); - wiki_en_s = Xoa_app_fxt.wiki_(app, "en.wikisource.org"); - app.Usere().Wiki().Xwiki_mgr().Add_full("en.wikipedia.org", "en.wikipedia.org"); - app.Usere().Wiki().Xwiki_mgr().Add_full("en.wikisource.org", "en.wikisource.org"); - } - expd_wiki_str = expd_page = expd_anchor = null; - expd_anchor_is_edit = Bool_.__byte; - return this; - } - public Xoae_app App() {return app;} private Xoae_app app; - public Xowe_wiki Wiki_en_w() {return wiki_en_w;} private Xowe_wiki wiki_en_w; - public Xowe_wiki Wiki_en_s() {return wiki_en_s;} private Xowe_wiki wiki_en_s; - public Class TypeOf() {return Xoa_url.class;} - public Xoa_url_parser_chkr Expd_wiki(String v) {this.expd_wiki_str = v; return this;} private String expd_wiki_str; - public Xoa_url_parser_chkr Expd_page(String v) {this.expd_page = v; return this;} private String expd_page; - public Xoa_url_parser_chkr Expd_anchor(String v) {this.expd_anchor = v; return this;} private String expd_anchor; - public Xoa_url_parser_chkr Expd_action_is_edit_y() {this.expd_anchor_is_edit = Bool_.Y_byte; return this;} private byte expd_anchor_is_edit = Bool_.__byte; - public Xoa_url_parser_chkr Expd_action_is_edit_n() {this.expd_anchor_is_edit = Bool_.N_byte; return this;} - public void Init_xwiki(String alias, String domain) {app.Usere().Wiki().Xwiki_mgr().Add_full(alias, domain);} - public int Chk(Tst_mgr mgr, String path, Object actl_obj) { - Xoa_url actl = (Xoa_url)actl_obj; - int rv = 0; - rv += mgr.Tst_val(expd_wiki_str == null, path, "wiki", expd_wiki_str, String_.new_u8(actl.Wiki_bry())); - rv += mgr.Tst_val(expd_page == null, path, "page", expd_page, String_.new_u8(actl.Page_bry())); - rv += mgr.Tst_val(expd_anchor == null, path, "anchor", expd_anchor, String_.new_u8(actl.Anchor_bry())); - rv += mgr.Tst_val(expd_anchor_is_edit == Bool_.__byte, path, "anchor_is_edit", expd_anchor_is_edit == Bool_.Y_byte, actl.Action_is_edit()); - return rv; - } - public Xoa_url_parser_chkr Test_parse_from_url_bar(String raw, String expd) { - Xoa_url actl_url = Xoa_url_parser.Parse_from_url_bar(app, wiki_en_w, raw); - Tfds.Eq(expd, actl_url.Xto_full_str()); - return this; - } - public void Test_parse_w_wiki(String raw) {Test_parse_w_wiki(wiki_en_w, raw);} - public void Test_parse_w_wiki(Xowe_wiki w, String raw) { - Xoa_url url = Xoa_url_parser.Parse_url(app, w, raw); - Tst_mgr tst_mgr = new Tst_mgr(); - tst_mgr.Tst_obj(this, url); - } -} diff --git a/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_mw_links_tst.java b/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_mw_links_tst.java deleted file mode 100644 index f7a45b909..000000000 --- a/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_mw_links_tst.java +++ /dev/null @@ -1,25 +0,0 @@ -/* -XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012 gnosygnu@gmail.com - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as -published by the Free Software Foundation, either version 3 of the -License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see . -*/ -package gplx.xowa; import gplx.*; -import org.junit.*; -public class Xoa_url_parser_mw_links_tst { - @Before public void init() {fxt.Reset();} private Xoa_url_parser_chkr fxt = new Xoa_url_parser_chkr(); - @Test public void Title_remove_w() { // PURPOSE: fix /w/ showing up as seg; DATE:2014-05-30 - fxt.Expd_page("A").Expd_wiki("en.wikipedia.org").Test_parse_w_wiki("http://en.wikipedia.org/w/index.php?title=A"); - } -} diff --git a/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_tst.java b/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_tst.java new file mode 100644 index 000000000..4be78e4d0 --- /dev/null +++ b/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_tst.java @@ -0,0 +1,170 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012 gnosygnu@gmail.com + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as +published by the Free Software Foundation, either version 3 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . +*/ +package gplx.xowa; import gplx.*; +import org.junit.*; import gplx.xowa.wikis.xwikis.*; +public class Xoa_url_parser_tst { + private final Xoa_url_parser_fxt fxt = new Xoa_url_parser_fxt(); + @Test public void Basic() { + fxt.Run_parse("en.wikipedia.org/wiki/A").Chk_wiki("en.wikipedia.org").Chk_page("A"); + } + @Test public void No_wiki() { // PURPOSE: no "/wiki/" + fxt.Run_parse("en.wikipedia.org/A").Chk_wiki("en.wikipedia.org").Chk_page("A"); + } + @Test public void Commons() { // PURPOSE: "C" was being picked up as an xwiki to commons; PAGE:no.b:C/Variabler; DATE:2014-10-14 + fxt.Prep_add_xwiki_to_user("c", "commons.wikimedia.org"); // add alias of "c" + fxt.Run_parse("C/D").Chk_wiki("en.wikipedia.org").Chk_page("C/D"); // should use current wiki (enwiki), not xwiki to commons; also, page should be "C/D", not "D" + } + @Test public void Http_basic() { + fxt.Run_parse("http://en.wikipedia.org/wiki/A").Chk_wiki("en.wikipedia.org").Chk_page("A"); + } + @Test public void Relative() { + fxt.Run_parse("//en.wikipedia.org/wiki/A").Chk_wiki("en.wikipedia.org").Chk_page("A"); + } + @Test public void Name() { + fxt.Run_parse("A").Chk_wiki("en.wikipedia.org").Chk_page("A"); + } + @Test public void Sub_1() { + fxt.Run_parse("A/b").Chk_wiki("en.wikipedia.org").Chk_page("A/b"); + } + @Test public void Sub_2() { + fxt.Run_parse("A/b/c").Chk_wiki("en.wikipedia.org").Chk_page("A/b/c"); + } + @Test public void Sub_3() { + fxt.Run_parse("en.wikipedia.org/wiki/A/b").Chk_wiki("en.wikipedia.org").Chk_page("A/b"); + } + @Test public void Ns_category() { + fxt.Run_parse("Category:A").Chk_wiki("en.wikipedia.org").Chk_page("Category:A"); + } + @Test public void Ns_file() { + fxt.Run_parse("File:A").Chk_wiki("en.wikipedia.org").Chk_page("File:A"); + } + @Test public void Anchor() { + fxt.Run_parse("A#b").Chk_wiki("en.wikipedia.org").Chk_page("A").Chk_anchor("b"); + } + @Test public void Upload() { + fxt.Prep_add_xwiki_to_user("commons.wikimedia.org"); // NOTE: need to add xwiki to be able to resolve "/commons/" + fxt.Run_parse("http://upload.wikimedia.org/wikipedia/commons/a/ab/C.svg").Chk_wiki("commons.wikimedia.org").Chk_page("File:C.svg"); + fxt.Run_parse("http://upload.wikimedia.org/wikipedia/commons/thumb/7/70/A.png/220px-A.png").Chk_wiki("commons.wikimedia.org").Chk_page("File:A.png"); + } + @Test public void Parse_lang() { + fxt.Prep_add_xwiki_to_wiki("fr", "fr.wikipedia.org", "http://fr.wikipedia.org/~{0}"); + fxt.Run_parse("http://en.wikipedia.org/wiki/fr:A").Chk_wiki("fr.wikipedia.org").Chk_page("A"); + } + @Test public void Alias_wiki() { + fxt.Prep_add_xwiki_to_wiki("s", "en.wikisource.org"); + fxt.Run_parse("s:A/b/c").Chk_wiki("en.wikisource.org").Chk_page("A/b/c"); + } + @Test public void Xwiki_no_segs() { // PURPOSE: handle xwiki without full url; EX: "commons:Commons:Media_of_the_day"; DATE:2014-02-19 + fxt.Prep_add_xwiki_to_wiki("s", "en.wikisource.org"); + fxt.Run_parse("s:Project:A").Chk_wiki("en.wikisource.org").Chk_page("Project:A"); + } + @Test public void Domain_only() { + fxt.Prep_add_xwiki_to_user("fr.wikipedia.org"); + fxt.Run_parse("fr.wikipedia.org").Chk_wiki("fr.wikipedia.org").Chk_page(""); + } + @Test public void Domain_and_wiki() { + fxt.Prep_add_xwiki_to_user("fr.wikipedia.org"); + fxt.Run_parse("fr.wikipedia.org/wiki").Chk_wiki("fr.wikipedia.org").Chk_page(""); + } + @Test public void Domain_and_wiki_w_http() { + fxt.Prep_add_xwiki_to_user("fr.wikipedia.org"); + fxt.Run_parse("http://fr.wikipedia.org/wiki").Chk_wiki("fr.wikipedia.org").Chk_page(""); + } + @Test public void Redirect() { + fxt.Run_parse("A?redirect=no").Chk_wiki("en.wikipedia.org").Chk_page("A"); + } + @Test public void Namespace_in_different_wiki() { // PURPOSE.fix: namespaced titles would default to default_wiki instead of current_wiki + Xowe_wiki en_s = fxt.Prep_create_wiki("en.wikisource.org"); + fxt.Run_parse(en_s, "Category:A").Chk_wiki("en.wikisource.org").Chk_page("Category:A"); + } + @Test public void Action_is_edit() { + fxt.Run_parse("A?action=edit").Chk_wiki("en.wikipedia.org").Chk_page("A").Chk_action_is_edit_y(); + } + @Test public void Assert_state_cleared() { // PURPOSE.fix: action_is_edit (et. al.) was not being cleared on parse even though Xoa_url reused; DATE:20121231 + fxt.Run_parse("A?action=edit") .Chk_action_is_edit_y(); + fxt.Run_parse_reuse("B") .Chk_action_is_edit_n(); + } + @Test public void Query_arg() { // PURPOSE.fix: query args were not printing out + fxt.Run_parse("en.wikipedia.org/wiki/Special:Search/Earth?fulltext=yes").Chk_build_str_is_same(); + } + @Test public void Anchor_with_slash() { // PURPOSE: A/b#c/d was not parsing correctly + fxt.Run_parse("A/b#c/d").Chk_page("A/b").Chk_anchor("c.2Fd"); + } + @Test public void Slash() { + fxt.Run_parse("en.wikipedia.org/wiki//A").Chk_wiki("en.wikipedia.org").Chk_page("/A"); + fxt.Run_parse("en.wikipedia.org/wiki/A//b").Chk_wiki("en.wikipedia.org").Chk_page("A//b"); + fxt.Run_parse("en.wikipedia.org/wiki///A").Chk_wiki("en.wikipedia.org").Chk_page("//A"); + } + @Test public void Question_is_page() { + fxt.Run_parse("A?B").Chk_wiki("en.wikipedia.org").Chk_page("A?B").Chk_anchor(null); + } + @Test public void Question_is_anchor() { + fxt.Run_parse("A#b?c").Chk_wiki("en.wikipedia.org").Chk_page("A").Chk_anchor("b.3Fc"); + } + @Test public void Title_remove_w() { // PURPOSE: fix /w/ showing up as seg; DATE:2014-05-30 + fxt.Run_parse("http://en.wikipedia.org/w/index.php?title=A").Chk_wiki("en.wikipedia.org").Chk_page("A"); + } +} +class Xoa_url_parser_fxt { + private final Xoae_app app; private final Xowe_wiki cur_wiki; + private Xoa_url actl_url; + public Xoa_url_parser_fxt() { + this.app = Xoa_app_fxt.app_(); + this.cur_wiki = Prep_create_wiki("en.wikipedia.org"); + } + public Xowe_wiki Prep_create_wiki(String domain) { + Xowe_wiki rv = Xoa_app_fxt.wiki_(app, domain); + Prep_add_xwiki_to_user(domain); + return rv; + } + public Xoa_url_parser_fxt Prep_add_xwiki_to_wiki(String alias, String domain) {return Prep_xwiki(cur_wiki, alias, domain, null);} + public Xoa_url_parser_fxt Prep_add_xwiki_to_wiki(String alias, String domain, String fmt) {return Prep_xwiki(cur_wiki, alias, domain, fmt);} + public Xoa_url_parser_fxt Prep_add_xwiki_to_user(String domain) {return Prep_xwiki(app.Usere().Wiki(), domain, domain, null);} + public Xoa_url_parser_fxt Prep_add_xwiki_to_user(String alias, String domain) {return Prep_xwiki(app.Usere().Wiki(), alias, domain, null);} + public Xoa_url_parser_fxt Prep_add_xwiki_to_user(String alias, String domain, String fmt) {return Prep_xwiki(app.Usere().Wiki(), alias, domain, fmt);} + private Xoa_url_parser_fxt Prep_xwiki(Xow_wiki wiki, String alias, String domain, String fmt) { + wiki.Xwiki_mgr().Add_full(Bry_.new_u8(alias), Bry_.new_u8(domain), Bry_.new_u8_safe(fmt)); + return this; + } + public Xoa_url_parser_fxt Run_parse(String actl_str) {return Run_parse(cur_wiki, actl_str);} + public Xoa_url_parser_fxt Run_parse(Xow_wiki wiki, String actl_str) { + this.actl_url = Xoa_url_parser.Parse_url(app, wiki, actl_str); + return this; + } + public Xoa_url_parser_fxt Run_parse_reuse(String actl_str) { + byte[] actl_bry = Bry_.new_u8(actl_str); + Xoa_url_parser.Parse_url(actl_url, app, cur_wiki, actl_bry, 0, actl_bry.length, false); + return this; + } + public Xoa_url_parser_fxt Run_parse_from_url_bar(String raw) { + this.actl_url = Xoa_url_parser.Parse_from_url_bar(app, cur_wiki, raw); + return this; + } + public Xoa_url_parser_fxt Chk_wiki(String v) {Tfds.Eq_str(v, actl_url.Wiki_bry() , "wiki"); return this;} + public Xoa_url_parser_fxt Chk_page(String v) {Tfds.Eq_str(v, actl_url.Page_bry() , "page"); return this;} + public Xoa_url_parser_fxt Chk_anchor(String v) {Tfds.Eq_str(v, actl_url.Anchor_bry(), "anch"); return this;} + public Xoa_url_parser_fxt Chk_action_is_edit_y() {return Chk_action_is_edit_(Bool_.Y);} + public Xoa_url_parser_fxt Chk_action_is_edit_n() {return Chk_action_is_edit_(Bool_.N);} + private Xoa_url_parser_fxt Chk_action_is_edit_(boolean v) {Tfds.Eq_bool(v, actl_url.Action_is_edit(), "action_is_edit"); return this;} + public Xoa_url_parser_fxt Chk_to_str(String v) {Tfds.Eq_str(v, actl_url.Xto_full_str(), "Xto_full_str"); return this;} + public Xoa_url_parser_fxt Chk_build_str_is_same() { + Xoa_url_parser parser = new Xoa_url_parser(); + Tfds.Eq_str(actl_url.Raw(), parser.Build_str(actl_url), "build_str"); + return this; + } +} diff --git a/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_url_bar_tst.java b/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_url_bar_tst.java index 4e3c888e9..e2c74af1b 100644 --- a/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_url_bar_tst.java +++ b/400_xowa/src_310_url/gplx/xowa/Xoa_url_parser_url_bar_tst.java @@ -18,52 +18,40 @@ along with this program. If not, see . package gplx.xowa; import gplx.*; import org.junit.*; public class Xoa_url_parser_url_bar_tst { - @Before public void init() {fxt.Reset();} private Xoa_url_parser_chkr fxt = new Xoa_url_parser_chkr(); + private final Xoa_url_parser_fxt fxt = new Xoa_url_parser_fxt(); @Test public void Basic() { - fxt.Test_parse_from_url_bar("Page_1" , "en.wikipedia.org/wiki/Page_1"); // basic + fxt.Run_parse_from_url_bar("Page_1").Chk_to_str("en.wikipedia.org/wiki/Page_1"); // basic } @Test public void Lang() { - fxt.App().Usere().Wiki().Xwiki_mgr().Add_full("uk", "uk.wikipedia.org"); - fxt.Test_parse_from_url_bar("uk" , "en.wikipedia.org/wiki/uk"); // lang-like page (uk=Ukraine) should not try to open wiki; DATE:2014-02-07 + fxt.Prep_add_xwiki_to_user("uk", "uk.wikipedia.org"); + fxt.Run_parse_from_url_bar("uk").Chk_to_str("en.wikipedia.org/wiki/uk"); // lang-like page (uk=Ukraine) should not try to open wiki; DATE:2014-02-07 } @Test public void Lang_like() { - fxt.App().Usere().Wiki().Xwiki_mgr().Add_full(Bry_.new_a7("uk"), Bry_.new_a7("uk.wikipedia.org"), Bry_.new_a7("http://~{1}.wikipedia.org")); // NOTE: fmt needed for Type_is_lang - fxt.Test_parse_from_url_bar("uk/A" , "en.wikipedia.org/wiki/uk/A"); // uk/A should not try be interpreted as wiki="uk" page="A"; DATE:2014-04-26 + fxt.Prep_add_xwiki_to_user("uk", "uk.wikipedia.org", "http://~{1}.wikipedia.org"); // NOTE: fmt needed for Type_is_lang + fxt.Run_parse_from_url_bar("uk/A").Chk_to_str("en.wikipedia.org/wiki/uk/A"); // uk/A should not try be interpreted as wiki="uk" page="A"; DATE:2014-04-26 } @Test public void Macro() { - fxt.App().Usere().Wiki().Xwiki_mgr().Add_full("fr.wikisource.org", "fr.wikisource.org"); - fxt.Test_parse_from_url_bar("fr.s:Auteur:Shakespeare" , "fr.wikisource.org/wiki/Auteur:Shakespeare"); // url_macros + fxt.Prep_add_xwiki_to_user("fr.wikisource.org"); + fxt.Run_parse_from_url_bar("fr.s:Auteur:Shakespeare").Chk_to_str("fr.wikisource.org/wiki/Auteur:Shakespeare"); // url_macros } - @Test public void Home() { - Init_db(fxt.App().Usere().Wiki()); - fxt.Test_parse_from_url_bar("home" , "en.wikipedia.org/wiki/home"); // home should go to current wiki's home; DATE:2014-02-09 - fxt.Test_parse_from_url_bar("home/wiki/Main_Page" , "home/wiki/Main_Page"); // home Main_Page should go to home; DATE:2014-02-09 + @Test public void Main_page__home() { + fxt.Run_parse_from_url_bar("home").Chk_to_str("en.wikipedia.org/wiki/home"); // home should go to current wiki's home; DATE:2014-02-09 + fxt.Run_parse_from_url_bar("home/wiki/Main_Page").Chk_to_str("home/wiki/Main_Page"); // home Main_Page should go to home; DATE:2014-02-09 } - @Test public void Custom() { -// fxt.App().Usere().Wiki().Xwiki_mgr().Add_full("zh.wikipedia.org", "zh.wikipedia.org"); -// gplx.xowa.wikis.Xoa_wiki_regy.Make_wiki_dir(fxt.App(), "zh.wikipedia.org"); -// fxt.App().Wiki_mgr().Get_by_key_or_make(Bry_.new_a7("zh.wikipedia.org")).Props().Main_page_(Bry_.new_a7("Zh_Main_Page")); - fxt.App().Usere().Wiki().Xwiki_mgr().Add_full("zh.wikipedia.org", "zh.wikipedia.org"); - Xowe_wiki zh_wiki = fxt.App().Wiki_mgr().Get_by_key_or_make(Bry_.new_a7("zh.wikipedia.org")); - Init_db(zh_wiki); - gplx.xowa.wikis.Xoa_wiki_regy.Make_wiki_dir(fxt.App(), "zh.wikipedia.org"); + @Test public void Main_page__zhw() { + Xowe_wiki zh_wiki = fxt.Prep_create_wiki("zh.wikipedia.org"); + gplx.xowa.wikis.Xoa_wiki_regy.Make_wiki_dir(zh_wiki.App(), "zh.wikipedia.org"); // HACK: needed for to_url_bar zh_wiki.Props().Main_page_(Bry_.new_a7("Zh_Main_Page")); - fxt.Test_parse_from_url_bar("zh.w:" , "zh.wikipedia.org/wiki/Zh_Main_Page"); - fxt.Test_parse_from_url_bar("zh.w:Main_Page" , "zh.wikipedia.org/wiki/Main_Page"); + fxt.Run_parse_from_url_bar("zh.w:").Chk_to_str("zh.wikipedia.org/wiki/Zh_Main_Page"); + fxt.Run_parse_from_url_bar("zh.w:Main_Page").Chk_to_str("zh.wikipedia.org/wiki/Main_Page"); } @Test public void Mobile() { // PURPOSE: handle mobile links; DATE:2014-05-03 - fxt.Test_parse_from_url_bar("en.m.wikipedia.org/wiki/A" , "en.wikipedia.org/wiki/A"); // basic - fxt.Test_parse_from_url_bar("en.M.wikipedia.org/wiki/A" , "en.wikipedia.org/wiki/A"); // upper - fxt.Test_parse_from_url_bar("A" , "en.wikipedia.org/wiki/A"); // bounds-check: 0 - fxt.Test_parse_from_url_bar("A." , "en.wikipedia.org/wiki/A."); // bounds-check: 1 - fxt.Test_parse_from_url_bar("A.b" , "en.wikipedia.org/wiki/A.b"); // bounds-check: 2 - fxt.Test_parse_from_url_bar("A.b.m." , "en.wikipedia.org/wiki/A.b.m."); // false-match - fxt.Test_parse_from_url_bar("en.x.wikipedia.org/wiki/A" , "en.wikipedia.org/wiki/en.x.wikipedia.org/A"); // fail - } - public static void Init_db(Xowe_wiki wiki) { - Xoa_test_.Db_init(true, Xoa_test_.Url_root()); - wiki.Ns_mgr().Init_w_defaults(); - Xowe_wiki_bldr.Create(wiki, 1, "dump.xml"); - wiki.Data__core_mgr().Db__core().Tbl__ns().Insert(wiki.Ns_mgr()); + fxt.Run_parse_from_url_bar("en.m.wikipedia.org/wiki/A" ).Chk_to_str("en.wikipedia.org/wiki/A"); // basic + fxt.Run_parse_from_url_bar("en.M.wikipedia.org/wiki/A" ).Chk_to_str("en.wikipedia.org/wiki/A"); // upper + fxt.Run_parse_from_url_bar("A" ).Chk_to_str("en.wikipedia.org/wiki/A"); // bounds-check: 0 + fxt.Run_parse_from_url_bar("A." ).Chk_to_str("en.wikipedia.org/wiki/A."); // bounds-check: 1 + fxt.Run_parse_from_url_bar("A.b" ).Chk_to_str("en.wikipedia.org/wiki/A.b"); // bounds-check: 2 + fxt.Run_parse_from_url_bar("A.b.m." ).Chk_to_str("en.wikipedia.org/wiki/A.b.m."); // false-match + fxt.Run_parse_from_url_bar("en.x.wikipedia.org/wiki/A" ).Chk_to_str("en.wikipedia.org/wiki/en.x.wikipedia.org/A"); // fail } } diff --git a/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file-user.xowa b/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file-user.xowa new file mode 100644 index 000000000..1a9bf3f57 Binary files /dev/null and b/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file-user.xowa differ diff --git a/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file.xowa b/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file.xowa new file mode 100644 index 000000000..5546c15b8 Binary files /dev/null and b/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-file.xowa differ diff --git a/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-text.xowa b/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-text.xowa index 47ec10fa3..5987a021a 100644 Binary files a/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-text.xowa and b/tst/400_xowa/root/wiki/en.wikipedia.org/en.wikipedia.org-text.xowa differ