-
Notifications
You must be signed in to change notification settings - Fork 56
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds support for native URL parsing if node API is unavailable #149
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,5 @@ | ||
|
||
var NODE_URL; | ||
try { | ||
NODE_URL = require('url'); | ||
} catch(e) { | ||
// no-op - we have a fallback in parseURL | ||
} | ||
var isValid = require('./is-valid.js'); | ||
|
||
function parseURL(urlString) { | ||
if (NODE_URL) { | ||
return NODE_URL.parse(urlString, null, true); | ||
} | ||
return new URL(urlString); | ||
} | ||
|
||
|
||
/** | ||
* Utility to cleanup the base host value. Also removes url fragments. | ||
* | ||
|
@@ -29,7 +14,9 @@ function parseURL(urlString) { | |
*/ | ||
|
||
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) | ||
var hasPrefixRE = /^(([a-z][a-z0-9+.-]*)?:)?\/\//; | ||
var hasPrefixRE = /^(([a-z][a-z0-9+.-]+)?:)\/\//; | ||
var ipLikeRE = /(\d+\.){3}\d+/i; | ||
var ipv6LikeRE = /(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4}|::1)/i; | ||
|
||
|
||
/** | ||
|
@@ -89,30 +76,45 @@ module.exports = function extractHostname(value) { | |
url = '' + url; | ||
} | ||
|
||
var needsTrimming = checkTrimmingNeeded(url); | ||
if (needsTrimming) { | ||
url = url.trim(); | ||
} | ||
|
||
var needsLowerCase = checkLowerCaseNeeded(url); | ||
if (needsLowerCase) { | ||
url = url.toLowerCase(); | ||
} | ||
|
||
var v6Match = url.match(ipv6LikeRE); | ||
if (v6Match) { | ||
return v6Match[1]; | ||
} | ||
|
||
var needsTrimming = checkTrimmingNeeded(url); | ||
if (needsTrimming) { | ||
url = url.trim(); | ||
} | ||
|
||
// Try again after `url` has been transformed to lowercase and trimmed. | ||
if ((needsLowerCase || needsTrimming) && isValid(url)) { | ||
return trimTrailingDots(url); | ||
} | ||
|
||
// Proceed with heavier url parsing to extract the hostname. | ||
if (!hasPrefixRE.test(url)) { | ||
url = '//' + url; | ||
url = 'https://' + url; | ||
Comment on lines
-97
to
+101
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. WHATWG URL does not support scheme-less URLs so I had to add a scheme to get it to parse. |
||
} | ||
|
||
var parts = parseURL(url); | ||
|
||
if (parts.hostname) { | ||
return trimTrailingDots(parts.hostname); | ||
try { | ||
var parts = new URL(url); | ||
|
||
if (parts.hostname) { | ||
// WHATWG URL parses any integer sequence as an IP, whereas the legacy | ||
// node URL module would not. Preserve behavior where non-ip-like strings | ||
// will not result in valid hostnames. | ||
if (ipLikeRE.test(parts.hostname) && !ipLikeRE.test(value)) { | ||
return value; | ||
} | ||
Comment on lines
+111
to
+113
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. WHATWG URL will parse the string |
||
return trimTrailingDots(parts.hostname); | ||
} | ||
} catch (e) { | ||
// Invalid URL | ||
} | ||
|
||
return null; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -291,7 +291,7 @@ describe('tld.js', function () { | |
}); | ||
|
||
it('should return the initial value if it is not a valid hostname', function(){ | ||
expect(tld.extractHostname(42)).to.equal('42'); | ||
expect(tld.extractHostname(42)).to.equal(42); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As mentioned above - "return the initial value" feels like it should return an integer, not a string. But this is a change in behavior. |
||
}); | ||
|
||
it('should return www.nytimes.com even with an URL as a parameter', function(){ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This diff is a little funky, but I just moved the lowercase check to before the ipv6 check since the later IP checks expect a lowercased string.