Make ISEMAIL and ISURL more flexible for longer TLD (#834)

Allow TLD of length up to 24 in ISEMAIL
This commit is contained in:
Vincent Viers 2024-01-31 19:58:50 +01:00 committed by GitHub
parent 36ade2bfd0
commit 6ff4f43b07
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 14 additions and 6 deletions

View File

@ -23,7 +23,7 @@ export const UP_TRIANGLE = '\u25B2';
export const DOWN_TRIANGLE = '\u25BC';
const EMAIL_RE = new RegExp("^\\w[\\w%+/='-]*(\\.[\\w%+/='-]+)*@([A-Za-z0-9]([A-Za-z0-9-]*[A-Za-z" +
"0-9])?\\.)+[A-Za-z]{2,6}$", "u");
"0-9])?\\.)+[A-Za-z]{2,24}$", "u");
// Returns whether str starts with prefix. (Note that this implementation avoids creating a new
// string, and only checks a single location.)

View File

@ -257,7 +257,8 @@ _email_regexp = re.compile(
([A-Za-z0-9] # Each part of hostname must start with alphanumeric
([A-Za-z0-9-]*[A-Za-z0-9])?\. # May have dashes inside, but end in alphanumeric
)+
[A-Za-z]{2,6}$ # Restrict top-level domain to length {2,6}. Google seems
[A-Za-z]{2,24}$ # Restrict top-level domain to length {2,24} (theoretically,
# the max length is 63 bytes as per RFC 1034). Google seems
# to use a whitelist for TLDs longer than 2 characters.
""", re.UNICODE | re.VERBOSE)
@ -289,7 +290,8 @@ def ISEMAIL(value):
>>> ISEMAIL("john@aol...com")
False
More tests:
More tests: Google Sheets Grist
------------- -----
>>> ISEMAIL("Abc@example.com") # True, True
True
>>> ISEMAIL("Abc.123@example.com") # True, True
@ -314,6 +316,10 @@ def ISEMAIL(value):
True
>>> ISEMAIL("Bob_O'Reilly+tag@example.com") # False, True
True
>>> ISEMAIL("marie@isola.corsica") # False, True
True
>>> ISEMAIL("fabio@disapproved.solutions") # False, True
True
>>> ISEMAIL(u"фыва@mail.ru") # False, True
True
>>> ISEMAIL("my@baddash.-.com") # True, False
@ -324,8 +330,6 @@ def ISEMAIL(value):
False
>>> ISEMAIL("john@-.com") # True, False
False
>>> ISEMAIL("fabio@disapproved.solutions") # False, False
False
>>> ISEMAIL("!def!xyz%abc@example.com") # False, False
False
>>> ISEMAIL("!#$%&'*+-/=?^_`.{|}~@example.com") # False, False
@ -391,7 +395,8 @@ _url_regexp = re.compile(
([A-Za-z0-9] # Each part of hostname must start with alphanumeric
([A-Za-z0-9-]*[A-Za-z0-9])?\. # May have dashes inside, but end in alphanumeric
)+
[A-Za-z]{2,6} # Restrict top-level domain to length {2,6}. Google seems
[A-Za-z]{2,24} # Restrict top-level domain to length {2,24} (theoretically,
# the max length is 63 bytes as per RFC 1034). Google seems
# to use a whitelist for TLDs longer than 2 characters.
([/?][-\w!#$%&'()*+,./:;=?@~]*)?$ # Notably, this excludes <, >, and ".
""", re.VERBOSE)
@ -437,6 +442,8 @@ def ISURL(value):
True
>>> ISURL("http://foo.com/!#$%25&'()*+,-./=?@_~")
True
>>> ISURL("http://collectivite.isla.corsica")
True
>>> ISURL("http://../")
False
>>> ISURL("http://??/")

View File

@ -247,6 +247,7 @@ describe('gutil', function() {
assert.isTrue(gutil.isEmail('email@subdomain.do-main.com'));
assert.isTrue(gutil.isEmail('firstname+lastname@domain.com'));
assert.isTrue(gutil.isEmail('email@domain.co.jp'));
assert.isTrue(gutil.isEmail('marie@isola.corsica'));
assert.isFalse(gutil.isEmail('plainaddress'));
assert.isFalse(gutil.isEmail('@domain.com'));