From 1cbf000227dcdd2f4297a9b8056cee8fb2f14cfd Mon Sep 17 00:00:00 2001 From: stoically Date: Fri, 16 Mar 2018 16:18:16 +0100 Subject: [PATCH] Change same domain logic to respect second+ level domains Part of #92 --- package.json | 1 + src/.eslintrc | 1 + src/background.js | 10 +- src/background/lib.js | 832 +++++++++++++++++++++++++++++++++++++++++- 4 files changed, 834 insertions(+), 10 deletions(-) diff --git a/package.json b/package.json index 446841d4..29a10f0e 100644 --- a/package.json +++ b/package.json @@ -26,6 +26,7 @@ "glob-to-regexp": "^0.4.0", "jquery": "^3.3.1", "p-queue": "^2.3.0", + "psl": "^1.1.25", "semantic-ui": "^2.3.0", "semver-compare": "^1.0.0" }, diff --git a/src/.eslintrc b/src/.eslintrc index 819a8f1f..40a993e0 100644 --- a/src/.eslintrc +++ b/src/.eslintrc @@ -7,6 +7,7 @@ "tmp": false, "debug": false, "lib": false, + "psl": false, "log": false, "delay": false, "globToRegexp": false, diff --git a/src/background.js b/src/background.js index d9c15ce6..691770e4 100644 --- a/src/background.js +++ b/src/background.js @@ -376,15 +376,7 @@ class TemporaryContainers { sameDomain(origin, target) { - const splittedTarget = target.split('.'); - const checkHostname = '.' + (splittedTarget.splice(-2).join('.')); - const dottedOrigin = '.' + origin; - if (target.length > 1 && - (dottedOrigin.endsWith(checkHostname) || - checkHostname.endsWith(dottedOrigin))) { - return true; - } - return false; + return psl.parse(origin).domain === psl.parse(target).domain; } diff --git a/src/background/lib.js b/src/background/lib.js index 637d47fd..3f8cdd94 100644 --- a/src/background/lib.js +++ b/src/background/lib.js @@ -364,7 +364,837 @@ window.lib.versionCompare = () => { return versionCompare; }; +window.lib.psl = () => { + // https://github.com/wrangr/psl + + // The MIT License (MIT) + // + // Copyright (c) 2017 Lupo Montero lupomontero@gmail.com + // + // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + // + // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + // + // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.psl = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o= punySuffix.length) { + // return memo; + // } + //} + return rule; + }, null); + }; + + + // + // Error codes and messages. + // + exports.errorCodes = { + DOMAIN_TOO_SHORT: 'Domain name too short.', + DOMAIN_TOO_LONG: 'Domain name too long. It should be no more than 255 chars.', + LABEL_STARTS_WITH_DASH: 'Domain name label can not start with a dash.', + LABEL_ENDS_WITH_DASH: 'Domain name label can not end with a dash.', + LABEL_TOO_LONG: 'Domain name label should be at most 63 chars long.', + LABEL_TOO_SHORT: 'Domain name label should be at least 1 character long.', + LABEL_INVALID_CHARS: 'Domain name label can only contain alphanumeric characters or dashes.' + }; + + + // + // Validate domain name and throw if not valid. + // + // From wikipedia: + // + // Hostnames are composed of series of labels concatenated with dots, as are all + // domain names. Each label must be between 1 and 63 characters long, and the + // entire hostname (including the delimiting dots) has a maximum of 255 chars. + // + // Allowed chars: + // + // * `a-z` + // * `0-9` + // * `-` but not as a starting or ending character + // * `.` as a separator for the textual portions of a domain name + // + // * http://en.wikipedia.org/wiki/Domain_name + // * http://en.wikipedia.org/wiki/Hostname + // + internals.validate = function (input) { + + // Before we can validate we need to take care of IDNs with unicode chars. + var ascii = Punycode.toASCII(input); + + if (ascii.length < 1) { + return 'DOMAIN_TOO_SHORT'; + } + if (ascii.length > 255) { + return 'DOMAIN_TOO_LONG'; + } + + // Check each part's length and allowed chars. + var labels = ascii.split('.'); + var label; + + for (var i = 0; i < labels.length; ++i) { + label = labels[i]; + if (!label.length) { + return 'LABEL_TOO_SHORT'; + } + if (label.length > 63) { + return 'LABEL_TOO_LONG'; + } + if (label.charAt(0) === '-') { + return 'LABEL_STARTS_WITH_DASH'; + } + if (label.charAt(label.length - 1) === '-') { + return 'LABEL_ENDS_WITH_DASH'; + } + if (!/^[a-z0-9\-]+$/.test(label)) { + return 'LABEL_INVALID_CHARS'; + } + } + }; + + + // + // Public API + // + + + // + // Parse domain. + // + exports.parse = function (input) { + + if (typeof input !== 'string') { + throw new TypeError('Domain name must be a string.'); + } + + // Force domain to lowercase. + var domain = input.slice(0).toLowerCase(); + + // Handle FQDN. + // TODO: Simply remove trailing dot? + if (domain.charAt(domain.length - 1) === '.') { + domain = domain.slice(0, domain.length - 1); + } + + // Validate and sanitise input. + var error = internals.validate(domain); + if (error) { + return { + input: input, + error: { + message: exports.errorCodes[error], + code: error + } + }; + } + + var parsed = { + input: input, + tld: null, + sld: null, + domain: null, + subdomain: null, + listed: false + }; + + var domainParts = domain.split('.'); + + // Non-Internet TLD + if (domainParts[domainParts.length - 1] === 'local') { + return parsed; + } + + var handlePunycode = function () { + + if (!/xn--/.test(domain)) { + return parsed; + } + if (parsed.domain) { + parsed.domain = Punycode.toASCII(parsed.domain); + } + if (parsed.subdomain) { + parsed.subdomain = Punycode.toASCII(parsed.subdomain); + } + return parsed; + }; + + var rule = internals.findRule(domain); + + // Unlisted tld. + if (!rule) { + if (domainParts.length < 2) { + return parsed; + } + parsed.tld = domainParts.pop(); + parsed.sld = domainParts.pop(); + parsed.domain = [parsed.sld, parsed.tld].join('.'); + if (domainParts.length) { + parsed.subdomain = domainParts.pop(); + } + return handlePunycode(); + } + + // At this point we know the public suffix is listed. + parsed.listed = true; + + var tldParts = rule.suffix.split('.'); + var privateParts = domainParts.slice(0, domainParts.length - tldParts.length); + + if (rule.exception) { + privateParts.push(tldParts.shift()); + } + + parsed.tld = tldParts.join('.'); + + if (!privateParts.length) { + return handlePunycode(); + } + + if (rule.wildcard) { + tldParts.unshift(privateParts.pop()); + parsed.tld = tldParts.join('.'); + } + + if (!privateParts.length) { + return handlePunycode(); + } + + parsed.sld = privateParts.pop(); + parsed.domain = [parsed.sld, parsed.tld].join('.'); + + if (privateParts.length) { + parsed.subdomain = privateParts.join('.'); + } + + return handlePunycode(); + }; + + + // + // Get domain. + // + exports.get = function (domain) { + + if (!domain) { + return null; + } + return exports.parse(domain).domain || null; + }; + + + // + // Check whether domain belongs to a known public suffix. + // + exports.isValid = function (domain) { + + var parsed = exports.parse(domain); + return Boolean(parsed.domain && parsed.listed); + }; + + },{"./data/rules.json":1,"punycode":3}],3:[function(require,module,exports){ + (function (global){ + /*! https://mths.be/punycode v1.4.1 by @mathias */ + ;(function(root) { + + /** Detect free variables */ + var freeExports = typeof exports == 'object' && exports && + !exports.nodeType && exports; + var freeModule = typeof module == 'object' && module && + !module.nodeType && module; + var freeGlobal = typeof global == 'object' && global; + if ( + freeGlobal.global === freeGlobal || + freeGlobal.window === freeGlobal || + freeGlobal.self === freeGlobal + ) { + root = freeGlobal; + } + + /** + * The `punycode` object. + * @name punycode + * @type Object + */ + var punycode, + + /** Highest positive signed 32-bit float value */ + maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1 + + /** Bootstring parameters */ + base = 36, + tMin = 1, + tMax = 26, + skew = 38, + damp = 700, + initialBias = 72, + initialN = 128, // 0x80 + delimiter = '-', // '\x2D' + + /** Regular expressions */ + regexPunycode = /^xn--/, + regexNonASCII = /[^\x20-\x7E]/, // unprintable ASCII chars + non-ASCII chars + regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g, // RFC 3490 separators + + /** Error messages */ + errors = { + 'overflow': 'Overflow: input needs wider integers to process', + 'not-basic': 'Illegal input >= 0x80 (not a basic code point)', + 'invalid-input': 'Invalid input' + }, + + /** Convenience shortcuts */ + baseMinusTMin = base - tMin, + floor = Math.floor, + stringFromCharCode = String.fromCharCode, + + /** Temporary variable */ + key; + + /*--------------------------------------------------------------------------*/ + + /** + * A generic error utility function. + * @private + * @param {String} type The error type. + * @returns {Error} Throws a `RangeError` with the applicable error message. + */ + function error(type) { + throw new RangeError(errors[type]); + } + + /** + * A generic `Array#map` utility function. + * @private + * @param {Array} array The array to iterate over. + * @param {Function} callback The function that gets called for every array + * item. + * @returns {Array} A new array of values returned by the callback function. + */ + function map(array, fn) { + var length = array.length; + var result = []; + while (length--) { + result[length] = fn(array[length]); + } + return result; + } + + /** + * A simple `Array#map`-like wrapper to work with domain name strings or email + * addresses. + * @private + * @param {String} domain The domain name or email address. + * @param {Function} callback The function that gets called for every + * character. + * @returns {Array} A new string of characters returned by the callback + * function. + */ + function mapDomain(string, fn) { + var parts = string.split('@'); + var result = ''; + if (parts.length > 1) { + // In email addresses, only the domain name should be punycoded. Leave + // the local part (i.e. everything up to `@`) intact. + result = parts[0] + '@'; + string = parts[1]; + } + // Avoid `split(regex)` for IE8 compatibility. See #17. + string = string.replace(regexSeparators, '\x2E'); + var labels = string.split('.'); + var encoded = map(labels, fn).join('.'); + return result + encoded; + } + + /** + * Creates an array containing the numeric code points of each Unicode + * character in the string. While JavaScript uses UCS-2 internally, + * this function will convert a pair of surrogate halves (each of which + * UCS-2 exposes as separate characters) into a single code point, + * matching UTF-16. + * @see `punycode.ucs2.encode` + * @see + * @memberOf punycode.ucs2 + * @name decode + * @param {String} string The Unicode input string (UCS-2). + * @returns {Array} The new array of code points. + */ + function ucs2decode(string) { + var output = [], + counter = 0, + length = string.length, + value, + extra; + while (counter < length) { + value = string.charCodeAt(counter++); + if (value >= 0xD800 && value <= 0xDBFF && counter < length) { + // high surrogate, and there is a next character + extra = string.charCodeAt(counter++); + if ((extra & 0xFC00) == 0xDC00) { // low surrogate + output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000); + } else { + // unmatched surrogate; only append this code unit, in case the next + // code unit is the high surrogate of a surrogate pair + output.push(value); + counter--; + } + } else { + output.push(value); + } + } + return output; + } + + /** + * Creates a string based on an array of numeric code points. + * @see `punycode.ucs2.decode` + * @memberOf punycode.ucs2 + * @name encode + * @param {Array} codePoints The array of numeric code points. + * @returns {String} The new Unicode string (UCS-2). + */ + function ucs2encode(array) { + return map(array, function(value) { + var output = ''; + if (value > 0xFFFF) { + value -= 0x10000; + output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800); + value = 0xDC00 | value & 0x3FF; + } + output += stringFromCharCode(value); + return output; + }).join(''); + } + + /** + * Converts a basic code point into a digit/integer. + * @see `digitToBasic()` + * @private + * @param {Number} codePoint The basic numeric code point value. + * @returns {Number} The numeric value of a basic code point (for use in + * representing integers) in the range `0` to `base - 1`, or `base` if + * the code point does not represent a value. + */ + function basicToDigit(codePoint) { + if (codePoint - 48 < 10) { + return codePoint - 22; + } + if (codePoint - 65 < 26) { + return codePoint - 65; + } + if (codePoint - 97 < 26) { + return codePoint - 97; + } + return base; + } + + /** + * Converts a digit/integer into a basic code point. + * @see `basicToDigit()` + * @private + * @param {Number} digit The numeric value of a basic code point. + * @returns {Number} The basic code point whose value (when used for + * representing integers) is `digit`, which needs to be in the range + * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is + * used; else, the lowercase form is used. The behavior is undefined + * if `flag` is non-zero and `digit` has no uppercase form. + */ + function digitToBasic(digit, flag) { + // 0..25 map to ASCII a..z or A..Z + // 26..35 map to ASCII 0..9 + return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5); + } + + /** + * Bias adaptation function as per section 3.4 of RFC 3492. + * https://tools.ietf.org/html/rfc3492#section-3.4 + * @private + */ + function adapt(delta, numPoints, firstTime) { + var k = 0; + delta = firstTime ? floor(delta / damp) : delta >> 1; + delta += floor(delta / numPoints); + for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) { + delta = floor(delta / baseMinusTMin); + } + return floor(k + (baseMinusTMin + 1) * delta / (delta + skew)); + } + + /** + * Converts a Punycode string of ASCII-only symbols to a string of Unicode + * symbols. + * @memberOf punycode + * @param {String} input The Punycode string of ASCII-only symbols. + * @returns {String} The resulting string of Unicode symbols. + */ + function decode(input) { + // Don't use UCS-2 + var output = [], + inputLength = input.length, + out, + i = 0, + n = initialN, + bias = initialBias, + basic, + j, + index, + oldi, + w, + k, + digit, + t, + /** Cached calculation results */ + baseMinusT; + + // Handle the basic code points: let `basic` be the number of input code + // points before the last delimiter, or `0` if there is none, then copy + // the first basic code points to the output. + + basic = input.lastIndexOf(delimiter); + if (basic < 0) { + basic = 0; + } + + for (j = 0; j < basic; ++j) { + // if it's not a basic code point + if (input.charCodeAt(j) >= 0x80) { + error('not-basic'); + } + output.push(input.charCodeAt(j)); + } + + // Main decoding loop: start just after the last delimiter if any basic code + // points were copied; start at the beginning otherwise. + + for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) { + + // `index` is the index of the next character to be consumed. + // Decode a generalized variable-length integer into `delta`, + // which gets added to `i`. The overflow checking is easier + // if we increase `i` as we go, then subtract off its starting + // value at the end to obtain `delta`. + for (oldi = i, w = 1, k = base; /* no condition */; k += base) { + + if (index >= inputLength) { + error('invalid-input'); + } + + digit = basicToDigit(input.charCodeAt(index++)); + + if (digit >= base || digit > floor((maxInt - i) / w)) { + error('overflow'); + } + + i += digit * w; + t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); + + if (digit < t) { + break; + } + + baseMinusT = base - t; + if (w > floor(maxInt / baseMinusT)) { + error('overflow'); + } + + w *= baseMinusT; + + } + + out = output.length + 1; + bias = adapt(i - oldi, out, oldi == 0); + + // `i` was supposed to wrap around from `out` to `0`, + // incrementing `n` each time, so we'll fix that now: + if (floor(i / out) > maxInt - n) { + error('overflow'); + } + + n += floor(i / out); + i %= out; + + // Insert `n` at position `i` of the output + output.splice(i++, 0, n); + + } + + return ucs2encode(output); + } + + /** + * Converts a string of Unicode symbols (e.g. a domain name label) to a + * Punycode string of ASCII-only symbols. + * @memberOf punycode + * @param {String} input The string of Unicode symbols. + * @returns {String} The resulting Punycode string of ASCII-only symbols. + */ + function encode(input) { + var n, + delta, + handledCPCount, + basicLength, + bias, + j, + m, + q, + k, + t, + currentValue, + output = [], + /** `inputLength` will hold the number of code points in `input`. */ + inputLength, + /** Cached calculation results */ + handledCPCountPlusOne, + baseMinusT, + qMinusT; + + // Convert the input in UCS-2 to Unicode + input = ucs2decode(input); + + // Cache the length + inputLength = input.length; + + // Initialize the state + n = initialN; + delta = 0; + bias = initialBias; + + // Handle the basic code points + for (j = 0; j < inputLength; ++j) { + currentValue = input[j]; + if (currentValue < 0x80) { + output.push(stringFromCharCode(currentValue)); + } + } + + handledCPCount = basicLength = output.length; + + // `handledCPCount` is the number of code points that have been handled; + // `basicLength` is the number of basic code points. + + // Finish the basic string - if it is not empty - with a delimiter + if (basicLength) { + output.push(delimiter); + } + + // Main encoding loop: + while (handledCPCount < inputLength) { + + // All non-basic code points < n have been handled already. Find the next + // larger one: + for (m = maxInt, j = 0; j < inputLength; ++j) { + currentValue = input[j]; + if (currentValue >= n && currentValue < m) { + m = currentValue; + } + } + + // Increase `delta` enough to advance the decoder's state to , + // but guard against overflow + handledCPCountPlusOne = handledCPCount + 1; + if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) { + error('overflow'); + } + + delta += (m - n) * handledCPCountPlusOne; + n = m; + + for (j = 0; j < inputLength; ++j) { + currentValue = input[j]; + + if (currentValue < n && ++delta > maxInt) { + error('overflow'); + } + + if (currentValue == n) { + // Represent delta as a generalized variable-length integer + for (q = delta, k = base; /* no condition */; k += base) { + t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); + if (q < t) { + break; + } + qMinusT = q - t; + baseMinusT = base - t; + output.push( + stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0)) + ); + q = floor(qMinusT / baseMinusT); + } + + output.push(stringFromCharCode(digitToBasic(q, 0))); + bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength); + delta = 0; + ++handledCPCount; + } + } + + ++delta; + ++n; + + } + return output.join(''); + } + + /** + * Converts a Punycode string representing a domain name or an email address + * to Unicode. Only the Punycoded parts of the input will be converted, i.e. + * it doesn't matter if you call it on a string that has already been + * converted to Unicode. + * @memberOf punycode + * @param {String} input The Punycoded domain name or email address to + * convert to Unicode. + * @returns {String} The Unicode representation of the given Punycode + * string. + */ + function toUnicode(input) { + return mapDomain(input, function(string) { + return regexPunycode.test(string) + ? decode(string.slice(4).toLowerCase()) + : string; + }); + } + + /** + * Converts a Unicode string representing a domain name or an email address to + * Punycode. Only the non-ASCII parts of the domain name will be converted, + * i.e. it doesn't matter if you call it with a domain that's already in + * ASCII. + * @memberOf punycode + * @param {String} input The domain name or email address to convert, as a + * Unicode string. + * @returns {String} The Punycode representation of the given domain name or + * email address. + */ + function toASCII(input) { + return mapDomain(input, function(string) { + return regexNonASCII.test(string) + ? 'xn--' + encode(string) + : string; + }); + } + + /*--------------------------------------------------------------------------*/ + + /** Define the public API */ + punycode = { + /** + * A string representing the current Punycode.js version number. + * @memberOf punycode + * @type String + */ + 'version': '1.4.1', + /** + * An object of methods to convert from JavaScript's internal character + * representation (UCS-2) to Unicode code points, and back. + * @see + * @memberOf punycode + * @type Object + */ + 'ucs2': { + 'decode': ucs2decode, + 'encode': ucs2encode + }, + 'decode': decode, + 'encode': encode, + 'toASCII': toASCII, + 'toUnicode': toUnicode + }; + + /** Expose `punycode` */ + // Some AMD build optimizers, like r.js, check for specific condition patterns + // like the following: + if ( + typeof define == 'function' && + typeof define.amd == 'object' && + define.amd + ) { + define('punycode', function() { + return punycode; + }); + } else if (freeExports && freeModule) { + if (module.exports == freeExports) { + // in Node.js, io.js, or RingoJS v0.8.0+ + freeModule.exports = punycode; + } else { + // in Narwhal or RingoJS v0.7.0- + for (key in punycode) { + punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]); + } + } + } else { + // in Rhino or a web browser + root.punycode = punycode; + } + + }(this)); + + }).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) + },{}]},{},[2])(2) + }); + + return self.psl; +} + window.delay = window.lib.delay(); window.globToRegexp = window.lib.globToRegexp(); window.versionCompare = window.lib.versionCompare(); -window.PQueue = window.lib.PQueue(); \ No newline at end of file +window.PQueue = window.lib.PQueue(); +window.lib.psl(); \ No newline at end of file