Skip to content

Commit

Permalink
fix(input): fix URL validation being too strict
Browse files Browse the repository at this point in the history
Background:
Prior to ffb6b2f, there was a bug in `URL_REGEXP`, trying to match the hostname as `\S+` (meaning
any non-space character). This resulted in never actually validating the structure of the URL (e.g.
segments such as port, path, query, fragment).
Then ffb6b2f and subsequently e4bb838 fixed that bug, but revealed `URL_REGEXP`'s "strictness" wrt
certain parts of the URL.

Since browsers are too lenient when it comes to URL validation, this commit relaxes the "strictness"
of `URL_REGEXP`, focusing more on the general structure, than on the specific characters allowed in
each segment.

Note 1: `URL_REGEXP` still seems to be stricter than browsers in some cases.
Note 2: Browsers don't always agree on what is a valid URL and what isn't.

Fixes angular#13528
  • Loading branch information
gkalpak committed Dec 15, 2015
1 parent 5ef3af3 commit d1971a7
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 17 deletions.
13 changes: 12 additions & 1 deletion src/ng/directive/input.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,18 @@
// Regex code is obtained from SO: https://stackoverflow.com/questions/3143070/javascript-regex-iso-datetime#answer-3143231
var ISO_DATE_REGEXP = /\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z)/;
// See valid URLs in RFC3987 (http://tools.ietf.org/html/rfc3987)
var URL_REGEXP = /^[A-Za-z][A-Za-z\d.+-]*:\/*(?:\w+(?::\w+)?@)?[^\s/]+(?::\d+)?(?:\/[\w#!:.?+=&%@\-/[\]$'()*,;~]*)?$/;
// Note: We are being more lenient, because browsers are too.
// 1. Scheme
// 2. Slashes
// 3. Username
// 4. Password
// 5. Hostname
// 6. Port
// 7. Path
// 8. Query
// 9. Fragment
// 1111111111111111 222 333333 44444 555555555555555555555555 666 77777777 8888888 999
var URL_REGEXP = /^[a-z][a-z\d.+-]*:\/*(?:[^:@]+(?::[^@]+)?@)?(?:[^\s:/?#]+|\[[a-f\d:]+\])(?::\d+)?(?:\/[^?#]*)?(?:\?[^#]*)?(?:#.*)?$/i;
var EMAIL_REGEXP = /^[a-z0-9!#$%&'*+\/=?^_`{|}~.-]+@[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$/i;
var NUMBER_REGEXP = /^\s*(\-|\+)?(\d+|(\d*(\.\d*)))([eE][+-]?\d+)?\s*$/;
var DATE_REGEXP = /^(\d{4})-(\d{2})-(\d{2})$/;
Expand Down
125 changes: 109 additions & 16 deletions test/ng/directive/inputSpec.js
Original file line number Diff line number Diff line change
Expand Up @@ -2535,22 +2535,115 @@ describe('input', function() {


describe('URL_REGEXP', function() {
/* global URL_REGEXP: false */
it('should validate url', function() {
// See valid URLs in RFC3987 (http://tools.ietf.org/html/rfc3987)
expect(URL_REGEXP.test('http://server:123/path')).toBe(true);
expect(URL_REGEXP.test('https://server:123/path')).toBe(true);
expect(URL_REGEXP.test('file:///home/user')).toBe(true);
expect(URL_REGEXP.test('mailto:[email protected]?subject=Foo')).toBe(true);
expect(URL_REGEXP.test('r2-d2.c3-p0://localhost/foo')).toBe(true);
expect(URL_REGEXP.test('abc:/foo')).toBe(true);
expect(URL_REGEXP.test('http://example.com/path;path')).toBe(true);
expect(URL_REGEXP.test('http://example.com/[]$\'()*,~)')).toBe(true);
expect(URL_REGEXP.test('http:')).toBe(false);
expect(URL_REGEXP.test('[email protected]')).toBe(false);
expect(URL_REGEXP.test('a_B.c')).toBe(false);
expect(URL_REGEXP.test('0scheme://example.com')).toBe(false);
expect(URL_REGEXP.test('http://example.com:9999/``')).toBe(false);
// See valid URLs in RFC3987 (http://tools.ietf.org/html/rfc3987)
// Note: We are being more lenient, because browsers are too.
var urls = [
['scheme://hostname', true],
['scheme://username:[email protected]:7678/pa/t.h?q=u&e=r&y#fragment', true],

// Validating `scheme`
['://example.com', false],
['0scheme://example.com', false],
['.scheme://example.com', false],
['+scheme://example.com', false],
['-scheme://example.com', false],
['_scheme://example.com', false],
['scheme0://example.com', true],
['scheme.://example.com', true],
['scheme+://example.com', true],
['scheme-://example.com', true],
['scheme_://example.com', false],

// Vaidating `:` and `/` after `scheme`
['scheme//example.com', false],
['scheme:example.com', true],
['scheme:/example.com', true],
['scheme:///example.com', true],

// Validating `username` and `password`
['scheme://@example.com', true],
['scheme://[email protected]', true],
['scheme://[email protected]', true],
['scheme://u#s$e%r^n&a*m;[email protected]', true],
['scheme://:[email protected]', true],
['scheme://username:[email protected]', true],
['scheme://username:pass:[email protected]', true],
['scheme://username:[email protected]', true],
['scheme://username:p#a$s%s^w&o*r;[email protected]', true],

// Validating `hostname`
['scheme:', false], // Chrome, FF: true
['scheme://', false], // Chrome, FF: true
['scheme:// example.com:', false], // Chrome, FF: true
['scheme://example com:', false], // Chrome, FF: true
['scheme://:', false], // Chrome, FF: true
['scheme://?', false], // Chrome, FF: true
['scheme://#', false], // Chrome, FF: true
['scheme://username:password@:', false], // Chrome, FF: true
['scheme://username:password@/', false], // Chrome, FF: true
['scheme://username:password@?', false], // Chrome, FF: true
['scheme://username:password@#', false], // Chrome, FF: true
['scheme://host.name', true],
['scheme://123.456.789.10', true],
['scheme://[1234:0000:0000:5678:9abc:0000:0000:def]', true],
['scheme://[1234:0000:0000:5678:9abc:0000:0000:def]:7678', true],
['scheme://[1234:0:0:5678:9abc:0:0:def]', true],
['scheme://[1234::5678:9abc::def]', true],
['scheme://~`!@$%^&*-_=+|\\;\'",.()[]{}<>', true],

// Validating `port`
['scheme://example.com/no-port', true],
['scheme://example.com:7678', true],
['scheme://example.com:76T8', false], // Chrome, FF: true
['scheme://example.com:port', false], // Chrome, FF: true

// Validating `path`
['scheme://example.com/', true],
['scheme://example.com/path', true],
['scheme://example.com/path/~`!@$%^&*-_=+|\\;:\'",./()[]{}<>', true],

// Validating `query`
['scheme://example.com?query', true],
['scheme://example.com/?query', true],
['scheme://example.com/path?query', true],
['scheme://example.com/path?~`!@$%^&*-_=+|\\;:\'",.?/()[]{}<>', true],

// Validating `fragment`
['scheme://example.com#fragment', true],
['scheme://example.com/#fragment', true],
['scheme://example.com/path#fragment', true],
['scheme://example.com/path/#fragment', true],
['scheme://example.com/path?query#fragment', true],
['scheme://example.com/path?query#~`!@#$%^&*-_=+|\\;:\'",.?/()[]{}<>', true],

// Validating miscellaneous
['scheme://☺.✪.⌘.➡/䨹', true],
['scheme://مثال.إختبار', true],
['scheme://例子.测试', true],
['scheme://उदाहरण.परीक्षा', true],

// Legacy tests
['http://server:123/path', true],
['https://server:123/path', true],
['file:///home/user', true],
['mailto:[email protected]?subject=Foo', true],
['r2-d2.c3-p0://localhost/foo', true],
['abc:/foo', true],
['http://example.com/path;path', true],
['http://example.com/[]$\'()*,~)', true],
['http:', false], // FF: true
['[email protected]', false],
['a_B.c', false],
['0scheme://example.com', false],
['http://example.com:9999/``', true]
];

they('should validate url: $prop', urls, function(item) {
var url = item[0];
var valid = item[1];

/* global URL_REGEXP: false */
expect(URL_REGEXP.test(url)).toBe(valid);
});
});
});
Expand Down

2 comments on commit d1971a7

@Narretz
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since I am reviewing commit messages ... ;)

Since browsers are too lenient when it comes to URL validation

Do you mean "browsers are lenient, too"?

@gkalpak
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually meant something like "since it makes sense to follow browsers closely and browsers are too lenient...". :)
I'll update that too.

Please sign in to comment.