mirror of
https://github.com/gnosygnu/xowa.git
synced 2025-06-13 12:54:14 +00:00
177 lines
9.0 KiB
Java
177 lines
9.0 KiB
Java
/*
|
||
XOWA: the XOWA Offline Wiki Application
|
||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||
|
||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||
or alternatively under the terms of the Apache License Version 2.0.
|
||
|
||
You may use XOWA according to either of these licenses as is most appropriate
|
||
for your project on a case-by-case basis.
|
||
|
||
The terms of each license can be found in the source code repository:
|
||
|
||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||
*/
|
||
package gplx.xowa.mediawiki.includes; import gplx.*; import gplx.xowa.*; import gplx.xowa.mediawiki.*;
|
||
import org.junit.*; import gplx.core.tests.*; import gplx.core.btries.*; import gplx.xowa.mediawiki.includes.xohtml.*;
|
||
public class XomwSanitizerTest {
|
||
private final XomwSanitizerFxt fxt = new XomwSanitizerFxt();
|
||
@Test public void Normalize__text() {fxt.Test__normalize_char_references("abc" , "abc");}
|
||
@Test public void Normalize__dec() {fxt.Test__normalize_char_references("" , "");}
|
||
@Test public void Normalize__dec__invalid() {fxt.Test__normalize_char_references("	" , "	");}
|
||
@Test public void Normalize__hex() {fxt.Test__normalize_char_references("ÿ" , "ÿ");}
|
||
@Test public void Normalize__entity() {fxt.Test__normalize_char_references("α" , "α");}
|
||
@Test public void Normalize__entity__lt() {fxt.Test__normalize_char_references("<" , "<");}
|
||
@Test public void Normalize__entity__alias() {fxt.Test__normalize_char_references("&רלמ;" , "‏");}
|
||
@Test public void Normalize__amp() {fxt.Test__normalize_char_references("a&b" , "a&b");}
|
||
@Test public void Normalize__invalid() {fxt.Test__normalize_char_references("&(invalid);" , "&(invalid);");}
|
||
@Test public void Normalize__many() {
|
||
fxt.Test__normalize_char_references
|
||
( "a 	 b α c ÿ d &(invalid); e"
|
||
, "a 	 b α c ÿ d &(invalid); e"
|
||
);
|
||
}
|
||
@Test public void Regex__domain() {
|
||
Xomw_regex_find_domain regex_domain = new Xomw_regex_find_domain();
|
||
// normal
|
||
fxt.Test__regex_domain_y(regex_domain, "https://a.org/bcd", "https:", "//a.org", "/bcd");
|
||
// trailing backslash
|
||
fxt.Test__regex_domain_y(regex_domain, "https://a.org/", "https:", "//a.org", "/");
|
||
// domain only
|
||
fxt.Test__regex_domain_y(regex_domain, "https://a.org", "https:", "//a.org", "");
|
||
// colon not found
|
||
fxt.Test__regex_domain_n(regex_domain, "https//a.org/bcd");
|
||
// host_bgn.eos
|
||
fxt.Test__regex_domain_n(regex_domain, "https:");
|
||
// host_bgn.//
|
||
fxt.Test__regex_domain_n(regex_domain, "https:a//");
|
||
// host_bgn.///
|
||
fxt.Test__regex_domain_n(regex_domain, "https:///a.org/b");
|
||
}
|
||
@Test public void Regex__clean_url() {
|
||
Xomw_regex_escape_invalid regex = new Xomw_regex_escape_invalid();
|
||
// noop
|
||
fxt.Test__regex_escape_invalid(regex, "https://a.org/bcd", Bool_.N, "");
|
||
// symbols
|
||
fxt.Test__regex_escape_invalid(regex, "[]<>\"|", Bool_.Y, "%5B%5D%3C%3E%22%7C%7F");
|
||
// range: 00 - 32
|
||
fxt.Test__regex_escape_invalid(regex, "\t\n ", Bool_.Y, "%09%0A+");
|
||
}
|
||
@Test public void Regex__ipv6_brack() {
|
||
Xomw_regex_ipv6_brack regex = new Xomw_regex_ipv6_brack();
|
||
// basic
|
||
fxt.Test__regex_ipv6_brack(regex, Bool_.Y, "//%5B0a.1b:12%5D:123");
|
||
// port: none
|
||
fxt.Test__regex_ipv6_brack(regex, Bool_.Y, "//%5Ba%5D");
|
||
// port: multiple
|
||
fxt.Test__regex_ipv6_brack(regex, Bool_.Y, "//%5Ba%5D:1:2:3");
|
||
// "//%5B" missing
|
||
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "abc");
|
||
// ipv6: invalid
|
||
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5Ba!%5D:1");
|
||
// ipv6: 0-len
|
||
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5B%5D:1");
|
||
// port: invalid
|
||
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5Ba%5D:a");
|
||
// port: 0-len
|
||
fxt.Test__regex_ipv6_brack(regex, Bool_.N, "//%5Ba%5D:");
|
||
}
|
||
@Test public void Decode() {
|
||
// dec
|
||
fxt.Test__decode_char_references("!" , "!");
|
||
// hex
|
||
fxt.Test__decode_char_references("#" , "#");
|
||
// entity
|
||
fxt.Test__decode_char_references("α" , "α");
|
||
// entity:lt
|
||
fxt.Test__decode_char_references("<" , "<");
|
||
// entity:rlm
|
||
fxt.Test__decode_char_references("&רלמ;" , "");
|
||
// entity:invalid
|
||
fxt.Test__decode_char_references("&invalid;" , "&invalid;");
|
||
// amp
|
||
fxt.Test__decode_char_references("a&b" , "a&b");
|
||
}
|
||
@Test public void Clean_url() {
|
||
// entity
|
||
fxt.Test__clean_url("http://a.org/b&c" , "http://a.org/b&c");
|
||
// entity: escape
|
||
fxt.Test__clean_url("http://a.org/b"c" , "http://a.org/b%22c");
|
||
// domain=n; make sure " is changed, but not soft-hyphen
|
||
fxt.Test__clean_url("a"z" , "a%22z");
|
||
// host: invalid idn
|
||
fxt.Test__clean_url("http://a᠆b.org/c᠆d" , "http://ab.org/c᠆d");
|
||
// ipv6_brack
|
||
fxt.Test__clean_url("http://[0a.1b:12]:123/cd" , "http://[0a.1b:12]:123/cd");
|
||
}
|
||
@Test public void Merge_atrs() {
|
||
Xomw_atr_mgr src_atrs = new Xomw_atr_mgr();
|
||
Xomw_atr_mgr trg_atrs = new Xomw_atr_mgr();
|
||
Xomw_atr_mgr expd_atrs = new Xomw_atr_mgr();
|
||
String cls = "class";
|
||
// basic: k1 + k2
|
||
fxt.Test__merge_attributes(src_atrs.Clear().Add_many("k1", "v1"), trg_atrs.Clear().Add_many("k2", "v2"), expd_atrs.Clear().Add_many("k1", "v1", "k2", "v2"));
|
||
// overwrite: k1 + k1
|
||
fxt.Test__merge_attributes(src_atrs.Clear().Add_many("k1", "v1"), trg_atrs.Clear().Add_many("k1", "v1a"), expd_atrs.Clear().Add_many("k1", "v1a"));
|
||
// cls: many
|
||
fxt.Test__merge_attributes(src_atrs.Clear().Add_many(cls, "v1 v2"), trg_atrs.Clear().Add_many(cls, "v3 v4"), expd_atrs.Clear().Add_many(cls, "v1 v2 v3 v4"));
|
||
// cls: src.empty
|
||
fxt.Test__merge_attributes(src_atrs.Clear(), trg_atrs.Clear().Add_many(cls, "v1"), expd_atrs.Clear().Add_many(cls, "v1"));
|
||
// cls: ws
|
||
fxt.Test__merge_attributes(src_atrs.Clear().Add_many(cls, " v1 v2 "), trg_atrs.Clear().Add_many(cls, " v3 v4 "), expd_atrs.Clear().Add_many(cls, "v1 v2 v3 v4"));
|
||
}
|
||
|
||
@Test public void normalizeWhitespace() {
|
||
fxt.Test_normalizeWhitespace("a\r\nb", "a b");
|
||
fxt.Test_normalizeWhitespace("a\rb", "a b");
|
||
fxt.Test_normalizeWhitespace("a\nb", "a b");
|
||
fxt.Test_normalizeWhitespace("a\tb", "a b");
|
||
}
|
||
}
|
||
class XomwSanitizerFxt {
|
||
private final XomwSanitizer sanitizer = new XomwSanitizer();
|
||
private final Bry_bfr tmp = Bry_bfr_.New();
|
||
public void Test__normalize_char_references(String src_str, String expd) {
|
||
byte[] src_bry = Bry_.new_u8(src_str);
|
||
sanitizer.normalizeCharReferences(tmp, Bool_.Y, src_bry, 0, src_bry.length);
|
||
Gftest.Eq__str(expd, tmp.To_str_and_clear());
|
||
}
|
||
public void Test__regex_domain_y(Xomw_regex_find_domain regex_domain, String src_str, String expd_prot, String expd_host, String expd_rest) {
|
||
byte[] src_bry = Bry_.new_u8(src_str);
|
||
Gftest.Eq__bool(true, regex_domain.Match(src_bry, 0, src_bry.length), src_str);
|
||
Gftest.Eq__str(expd_prot, Bry_.Mid(src_bry, regex_domain.prot_bgn, regex_domain.prot_end));
|
||
Gftest.Eq__str(expd_host, Bry_.Mid(src_bry, regex_domain.host_bgn, regex_domain.host_end));
|
||
Gftest.Eq__str(expd_rest, Bry_.Mid(src_bry, regex_domain.rest_bgn, regex_domain.rest_end));
|
||
}
|
||
public void Test__regex_domain_n(Xomw_regex_find_domain regex_domain, String src_str) {
|
||
byte[] src_bry = Bry_.new_u8(src_str);
|
||
Gftest.Eq__bool(false, regex_domain.Match(src_bry, 0, src_bry.length), src_str);
|
||
}
|
||
public void Test__regex_escape_invalid(Xomw_regex_escape_invalid regex, String src_str, boolean expd_rslt, String expd_str) {
|
||
byte[] src_bry = Bry_.new_u8(src_str);
|
||
Gftest.Eq__bool(expd_rslt, regex.Escape(tmp, src_bry, 0, src_bry.length));
|
||
Gftest.Eq__str(expd_str, tmp.To_bry_and_clear());
|
||
}
|
||
public void Test__regex_ipv6_brack(Xomw_regex_ipv6_brack regex, boolean expd_rslt, String src_str) {
|
||
byte[] src_bry = Bry_.new_u8(src_str);
|
||
Gftest.Eq__bool(expd_rslt, regex.Match(src_bry, 0, src_bry.length));
|
||
}
|
||
public void Test__decode_char_references(String src_str, String expd) {
|
||
byte[] src_bry = Bry_.new_u8(src_str);
|
||
sanitizer.decodeCharReferences(tmp, Bool_.Y, src_bry, 0, src_bry.length);
|
||
Gftest.Eq__str(expd, tmp.To_str_and_clear());
|
||
}
|
||
public void Test__clean_url(String src_str, String expd) {
|
||
byte[] src_bry = Bry_.new_u8(src_str);
|
||
Gftest.Eq__str(expd, sanitizer.cleanUrl(src_bry));
|
||
}
|
||
public void Test__merge_attributes(Xomw_atr_mgr src, Xomw_atr_mgr trg, Xomw_atr_mgr expd) {
|
||
sanitizer.mergeAttributes(src, trg);
|
||
Gftest.Eq__ary__lines(expd.To_str(tmp), src.To_str(tmp), "merge_atrs");
|
||
}
|
||
public void Test_normalizeWhitespace(String src_str, String expd) {
|
||
Gftest.Eq__str(expd, sanitizer.normalizeWhitespace(Bry_.new_u8(src_str)), "merge_atrs");
|
||
}
|
||
}
|