From 2479a4562fc3c2ca7f362dd3e57daad5225376e7 Mon Sep 17 00:00:00 2001 From: Gregory Jacobs Date: Mon, 17 Nov 2014 01:10:48 -0500 Subject: [PATCH] Match URI scheme names that are greater than 9 characters, and allow for digits, +, ., and - in the scheme name. (Ex: 'chrome-extension:') --- dist/Autolinker.js | 61 ++++++---- dist/Autolinker.min.js | 4 +- package.json | 2 +- src/Autolinker.js | 5 +- src/MatchValidator.js | 27 +++-- src/match/Url.js | 27 +++-- tests/AutolinkerSpec.js | 252 +++++++++++++++++++++++++++++++--------- 7 files changed, 274 insertions(+), 104 deletions(-) diff --git a/dist/Autolinker.js b/dist/Autolinker.js index 458a5bc1..4f0f29a6 100644 --- a/dist/Autolinker.js +++ b/dist/Autolinker.js @@ -16,7 +16,7 @@ /*! * Autolinker.js - * 0.13.1 + * 0.14.0 * * Copyright(c) 2014 Gregory Jacobs * MIT Licensed. http://www.opensource.org/licenses/mit-license.php @@ -243,7 +243,7 @@ emailRegex = /(?:[\-;:&=\+\$,\w\.]+@)/, // something@ for email addresses (a.k.a. local-part) - protocolRegex = /(?:[A-Za-z]{3,9}:(?![A-Za-z]{3,9}:\/\/)(?:\/\/)?)/, // match protocol, allow in format "http://" or "mailto:". However, do not match the first part of something like 'link:http://www.google.com' (i.e. don't match "link:") + protocolRegex = /(?:[A-Za-z][-.+A-Za-z0-9]+:(?![A-Za-z][-.+A-Za-z0-9]+:\/\/)(?!\d+\/?)(?:\/\/)?)/, // match protocol, allow in format "http://" or "mailto:". However, do not match the first part of something like 'link:http://www.google.com' (i.e. don't match "link:"). Also, make sure we don't interpret 'google.com:8000' as if 'google.com' was a protocol here (i.e. ignore a trailing port number in this regex) wwwRegex = /(?:www\.)/, // starting with 'www.' domainNameRegex = /[A-Za-z0-9\.\-]*[A-Za-z0-9\-]/, // anything looking at all like a domain, non-unicode domains, not ending in a period tldRegex = /\.(?:international|construction|contractors|enterprises|photography|productions|foundation|immobilien|industries|management|properties|technology|christmas|community|directory|education|equipment|institute|marketing|solutions|vacations|bargains|boutique|builders|catering|cleaning|clothing|computer|democrat|diamonds|graphics|holdings|lighting|partners|plumbing|supplies|training|ventures|academy|careers|company|cruises|domains|exposed|flights|florist|gallery|guitars|holiday|kitchen|neustar|okinawa|recipes|rentals|reviews|shiksha|singles|support|systems|agency|berlin|camera|center|coffee|condos|dating|estate|events|expert|futbol|kaufen|luxury|maison|monash|museum|nagoya|photos|repair|report|social|supply|tattoo|tienda|travel|viajes|villas|vision|voting|voyage|actor|build|cards|cheap|codes|dance|email|glass|house|mango|ninja|parts|photo|shoes|solar|today|tokyo|tools|watch|works|aero|arpa|asia|best|bike|blue|buzz|camp|club|cool|coop|farm|fish|gift|guru|info|jobs|kiwi|kred|land|limo|link|menu|mobi|moda|name|pics|pink|post|qpon|rich|ruhr|sexy|tips|vote|voto|wang|wien|wiki|zone|bar|bid|biz|cab|cat|ceo|com|edu|gov|int|kim|mil|net|onl|org|pro|pub|red|tel|uno|wed|xxx|xyz|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)\b/, // match our known top level domains (TLDs) @@ -563,7 +563,8 @@ match = new Autolinker.match.Url( { matchedText : matchStr, url : matchStr, - protocolRelativeMatch : protocolRelativeMatch, + protocolUrlMatch : !!protocolUrlMatch, + protocolRelativeMatch : !!protocolRelativeMatch, stripPrefix : this.stripPrefix } ); } @@ -1318,7 +1319,7 @@ * @private * @property {RegExp} hasFullProtocolRegex */ - hasFullProtocolRegex : /^[A-Za-z]{3,9}:\/\//, + hasFullProtocolRegex : /^[A-Za-z][-.+A-Za-z0-9]+:\/\//, /** * Regex to test for a protocol prefix, such as 'mailto:' @@ -1326,7 +1327,7 @@ * @private * @property {RegExp} hasProtocolPrefixRegex */ - hasProtocolPrefixRegex : /^[A-Za-z]{3,9}:/, + hasProtocolPrefixRegex : /^[A-Za-z][-.+A-Za-z0-9]+:/, /** * Regex to determine if at least one word char exists after the protocol (i.e. after the ':') @@ -1334,7 +1335,7 @@ * @private * @property {RegExp} hasWordCharAfterProtocolRegex */ - hasWordCharAfterProtocolRegex : /:.*?[A-Za-z]/, + hasWordCharAfterProtocolRegex : /:[^\s]*?[A-Za-z]/, /** @@ -1360,9 +1361,9 @@ */ isValidMatch : function( urlMatch, protocolUrlMatch, protocolRelativeMatch ) { if( - this.urlMatchDoesNotHaveProtocolOrDot( urlMatch, protocolUrlMatch ) || // At least one period ('.') must exist in the URL match for us to consider it an actual URL, *unless* it was a full protocol match (like 'http://localhost') - this.urlMatchDoesNotHaveAtLeastOneWordChar( urlMatch ) || // At least one letter character must exist in the domain name after a protocol match. Ex: skip over something like "git:1.0" - this.isInvalidProtocolRelativeMatch( protocolRelativeMatch ) // A protocol-relative match which has a word character in front of it (so we can skip something like "abc//google.com") + this.urlMatchDoesNotHaveProtocolOrDot( urlMatch, protocolUrlMatch ) || // At least one period ('.') must exist in the URL match for us to consider it an actual URL, *unless* it was a full protocol match (like 'http://localhost') + this.urlMatchDoesNotHaveAtLeastOneWordChar( urlMatch, protocolUrlMatch ) || // At least one letter character must exist in the domain name after a protocol match. Ex: skip over something like "git:1.0" + this.isInvalidProtocolRelativeMatch( protocolRelativeMatch ) // A protocol-relative match which has a word character in front of it (so we can skip something like "abc//google.com") ) { return false; } @@ -1388,7 +1389,7 @@ * match. */ urlMatchDoesNotHaveProtocolOrDot : function( urlMatch, protocolUrlMatch ) { - return ( urlMatch && ( !protocolUrlMatch || !this.hasFullProtocolRegex.test( protocolUrlMatch ) ) && urlMatch.indexOf( '.' ) === -1 ); + return ( !!urlMatch && ( !protocolUrlMatch || !this.hasFullProtocolRegex.test( protocolUrlMatch ) ) && urlMatch.indexOf( '.' ) === -1 ); }, @@ -1400,11 +1401,18 @@ * * @private * @param {String} urlMatch The matched URL, if there was one. Will be an empty string if the match is not a URL match. + * @param {String} protocolUrlMatch The match URL string for a protocol match. Ex: 'http://yahoo.com'. This is used to + * know whether or not we have a protocol in the URL string, in order to check for a word character after the protocol + * separator (':'). * @return {Boolean} `true` if the URL match does not have at least one word character in it after the protocol, `false` * otherwise. */ - urlMatchDoesNotHaveAtLeastOneWordChar : function( urlMatch ) { - return ( urlMatch && this.hasProtocolPrefixRegex.test( urlMatch ) && !this.hasWordCharAfterProtocolRegex.test( urlMatch ) ); + urlMatchDoesNotHaveAtLeastOneWordChar : function( urlMatch, protocolUrlMatch ) { + if( urlMatch && protocolUrlMatch ) { + return !this.hasWordCharAfterProtocolRegex.test( urlMatch ); + } else { + return false; + } }, @@ -1420,7 +1428,7 @@ * @return {Boolean} `true` if it is an invalid protocol-relative match, `false` otherwise. */ isInvalidProtocolRelativeMatch : function( protocolRelativeMatch ) { - return ( protocolRelativeMatch && this.invalidProtocolRelMatchRegex.test( protocolRelativeMatch ) ); + return ( !!protocolRelativeMatch && this.invalidProtocolRelMatchRegex.test( protocolRelativeMatch ) ); } } ); @@ -1785,6 +1793,13 @@ * The url that was matched. */ + /** + * @cfg {Boolean} protocolUrlMatch (required) + * + * `true` if the URL is a match which already has a protocol (i.e. 'http://'), `false` if the match was from a 'www' or + * known TLD match. + */ + /** * @cfg {Boolean} protocolRelativeMatch (required) * @@ -1816,13 +1831,13 @@ protocolRelativeRegex : /^\/\//, /** - * @protected - * @property {RegExp} checkForProtocolRegex + * @private + * @property {Boolean} protocolPrepended * - * A regular expression used to check if the {@link #url} is missing a protocol (in which case, 'http://' - * will be added). + * Will be set to `true` if the 'http://' protocol has been prepended to the {@link #url} (because the + * {@link #url} did not have a protocol) */ - checkForProtocolRegex: /^[A-Za-z]{3,9}:/, + protocolPrepended : false, /** @@ -1836,17 +1851,19 @@ /** - * Returns the url that was matched, assuming the protocol to be 'http://' if the match - * was missing a protocol. + * Returns the url that was matched, assuming the protocol to be 'http://' if the original + * match was missing a protocol. * * @return {String} */ getUrl : function() { var url = this.url; - // if the url string doesn't begin with a protocol, assume http:// - if( !this.protocolRelativeMatch && !this.checkForProtocolRegex.test( url ) ) { + // if the url string doesn't begin with a protocol, assume 'http://' + if( !this.protocolRelativeMatch && !this.protocolUrlMatch && !this.protocolPrepended ) { url = this.url = 'http://' + url; + + this.protocolPrepended = true; } return url; diff --git a/dist/Autolinker.min.js b/dist/Autolinker.min.js index 13164d77..8316104a 100644 --- a/dist/Autolinker.min.js +++ b/dist/Autolinker.min.js @@ -1,10 +1,10 @@ /*! * Autolinker.js - * 0.13.1 + * 0.14.0 * * Copyright(c) 2014 Gregory Jacobs * MIT Licensed. http://www.opensource.org/licenses/mit-license.php * * https://github.com/gregjacobs/Autolinker.js */ -!function(a,b){"function"==typeof define&&define.amd?define([],function(){return a.returnExportsGlobal=b()}):"object"==typeof exports?module.exports=b():a.Autolinker=b()}(this,function(){var a=function(b){a.Util.assign(this,b),this.matchValidator=new a.MatchValidator};return a.prototype={constructor:a,urls:!0,email:!0,twitter:!0,newWindow:!0,stripPrefix:!0,className:"",htmlCharacterEntitiesRegex:/( | |<|<|>|>)/gi,matcherRegex:function(){var a=/(^|[^\w])@(\w{1,15})/,b=/(?:[\-;:&=\+\$,\w\.]+@)/,c=/(?:[A-Za-z]{3,9}:(?![A-Za-z]{3,9}:\/\/)(?:\/\/)?)/,d=/(?:www\.)/,e=/[A-Za-z0-9\.\-]*[A-Za-z0-9\-]/,f=/\.(?:international|construction|contractors|enterprises|photography|productions|foundation|immobilien|industries|management|properties|technology|christmas|community|directory|education|equipment|institute|marketing|solutions|vacations|bargains|boutique|builders|catering|cleaning|clothing|computer|democrat|diamonds|graphics|holdings|lighting|partners|plumbing|supplies|training|ventures|academy|careers|company|cruises|domains|exposed|flights|florist|gallery|guitars|holiday|kitchen|neustar|okinawa|recipes|rentals|reviews|shiksha|singles|support|systems|agency|berlin|camera|center|coffee|condos|dating|estate|events|expert|futbol|kaufen|luxury|maison|monash|museum|nagoya|photos|repair|report|social|supply|tattoo|tienda|travel|viajes|villas|vision|voting|voyage|actor|build|cards|cheap|codes|dance|email|glass|house|mango|ninja|parts|photo|shoes|solar|today|tokyo|tools|watch|works|aero|arpa|asia|best|bike|blue|buzz|camp|club|cool|coop|farm|fish|gift|guru|info|jobs|kiwi|kred|land|limo|link|menu|mobi|moda|name|pics|pink|post|qpon|rich|ruhr|sexy|tips|vote|voto|wang|wien|wiki|zone|bar|bid|biz|cab|cat|ceo|com|edu|gov|int|kim|mil|net|onl|org|pro|pub|red|tel|uno|wed|xxx|xyz|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)\b/,g=/[\-A-Za-z0-9+&@#\/%=~_()|'$*\[\]?!:,.;]*[\-A-Za-z0-9+&@#\/%=~_()|'$*\[\]]/;return new RegExp(["(",a.source,")","|","(",b.source,e.source,f.source,")","|","(","(?:","(",c.source,e.source,")","|","(?:","(.?//)?",d.source,e.source,")","|","(?:","(.?//)?",e.source,f.source,")",")","(?:"+g.source+")?",")"].join(""),"gi")}(),charBeforeProtocolRelMatchRegex:/^(.)?\/\//,link:function(b){var c=this,d=this.getHtmlParser(),e=this.htmlCharacterEntitiesRegex,f=0,g=[];return d.parse(b,{processHtmlNode:function(a,b,c){"a"===b&&(c?f=Math.max(f-1,0):f++),g.push(a)},processTextNode:function(b){if(0===f)for(var d=a.Util.splitAndCapture(b,e),h=0,i=d.length;i>h;h++){var j=d[h],k=c.processTextNode(j);g.push(k)}else g.push(b)}}),g.join("")},getHtmlParser:function(){var b=this.htmlParser;return b||(b=this.htmlParser=new a.HtmlParser),b},getTagBuilder:function(){var b=this.tagBuilder;return b||(b=this.tagBuilder=new a.AnchorTagBuilder({newWindow:this.newWindow,truncate:this.truncate,className:this.className})),b},processTextNode:function(a){var b=this;return a.replace(this.matcherRegex,function(a,c,d,e,f,g,h,i,j){var k=b.processCandidateMatch(a,c,d,e,f,g,h,i,j);if(k){var l=b.createMatchReturnVal(k.match,k.matchStr);return k.prefixStr+l+k.suffixStr}return a})},processCandidateMatch:function(b,c,d,e,f,g,h,i,j){var k,l=i||j,m="",n="";if(c&&!this.twitter||f&&!this.email||g&&!this.urls||!this.matchValidator.isValidMatch(g,h,l))return null;if(this.matchHasUnbalancedClosingParen(b)&&(b=b.substr(0,b.length-1),n=")"),f)k=new a.match.Email({matchedText:b,email:f});else if(c)d&&(m=d,b=b.slice(1)),k=new a.match.Twitter({matchedText:b,twitterHandle:e});else{if(l){var o=l.match(this.charBeforeProtocolRelMatchRegex)[1]||"";o&&(m=o,b=b.slice(1))}k=new a.match.Url({matchedText:b,url:b,protocolRelativeMatch:l,stripPrefix:this.stripPrefix})}return{prefixStr:m,suffixStr:n,matchStr:b,match:k}},matchHasUnbalancedClosingParen:function(a){var b=a.charAt(a.length-1);if(")"===b){var c=a.match(/\(/g),d=a.match(/\)/g),e=c&&c.length||0,f=d&&d.length||0;if(f>e)return!0}return!1},createMatchReturnVal:function(b,c){var d;if(this.replaceFn&&(d=this.replaceFn.call(this,this,b)),"string"==typeof d)return d;if(d===!1)return c;if(d instanceof a.HtmlTag)return d.toString();var e=this.getTagBuilder(),f=e.build(b);return f.toString()}},a.link=function(b,c){var d=new a(c);return d.link(b)},a.match={},a.Util={abstractMethod:function(){throw"abstract"},assign:function(a,b){for(var c in b)b.hasOwnProperty(c)&&(a[c]=b[c]);return a},extend:function(b,c){var d=b.prototype,e=function(){};e.prototype=d;var f;f=c.hasOwnProperty("constructor")?c.constructor:function(){d.constructor.apply(this,arguments)};var g=f.prototype=new e;return g.constructor=f,g.superclass=d,delete c.constructor,a.Util.assign(g,c),f},ellipsis:function(a,b,c){return a.length>b&&(c=null==c?"..":c,a=a.substring(0,b-c.length)+c),a},indexOf:function(a,b){if(Array.prototype.indexOf)return a.indexOf(b);for(var c=0,d=a.length;d>c;c++)if(a[c]===b)return c;return-1},splitAndCapture:function(a,b){if(!b.global)throw new Error("`splitRegex` must have the 'g' flag set");for(var c,d=[],e=0;c=b.exec(a);)d.push(a.substring(e,c.index)),d.push(c[0]),e=c.index+c[0].length;return d.push(a.substring(e)),d}},a.HtmlParser=a.Util.extend(Object,{htmlRegex:function(){var a=/[0-9a-zA-Z:]+/,b=/[^\s\0"'>\/=\x01-\x1F\x7F]+/,c=/(?:".*?"|'.*?'|[^'"=<>`\s]+)/,d=b.source+"(?:\\s*=\\s*"+c.source+")?";return new RegExp(["<(?:!|(/))?","("+a.source+")","(?:","\\s+","(?:",d,"|",c.source+")",")*","\\s*/?",">"].join(""),"g")}(),parse:function(a,b){b=b||{};for(var c,d=b.processHtmlNode||function(){},e=b.processTextNode||function(){},f=this.htmlRegex,g=0;null!==(c=f.exec(a));){var h=c[0],i=c[2],j=!!c[1],k=a.substring(g,c.index);k&&e(k),d(h,i,j),g=c.index+h.length}if(g",this.getInnerHtml(),""].join("")},buildAttrsStr:function(){if(!this.attrs)return"";var a=this.getAttrs(),b=[];for(var c in a)a.hasOwnProperty(c)&&b.push(c+'="'+a[c]+'"');return b.join(" ")}}),a.MatchValidator=a.Util.extend(Object,{invalidProtocolRelMatchRegex:/^[\w]\/\//,hasFullProtocolRegex:/^[A-Za-z]{3,9}:\/\//,hasProtocolPrefixRegex:/^[A-Za-z]{3,9}:/,hasWordCharAfterProtocolRegex:/:.*?[A-Za-z]/,isValidMatch:function(a,b,c){return this.urlMatchDoesNotHaveProtocolOrDot(a,b)||this.urlMatchDoesNotHaveAtLeastOneWordChar(a)||this.isInvalidProtocolRelativeMatch(c)?!1:!0},urlMatchDoesNotHaveProtocolOrDot:function(a,b){return a&&(!b||!this.hasFullProtocolRegex.test(b))&&-1===a.indexOf(".")},urlMatchDoesNotHaveAtLeastOneWordChar:function(a){return a&&this.hasProtocolPrefixRegex.test(a)&&!this.hasWordCharAfterProtocolRegex.test(a)},isInvalidProtocolRelativeMatch:function(a){return a&&this.invalidProtocolRelMatchRegex.test(a)}}),a.AnchorTagBuilder=a.Util.extend(Object,{constructor:function(b){a.Util.assign(this,b)},build:function(b){var c=new a.HtmlTag({tagName:"a",attrs:this.createAttrs(b.getType(),b.getAnchorHref()),innerHtml:this.processAnchorText(b.getAnchorText())});return c},createAttrs:function(a,b){var c={href:b},d=this.createCssClass(a);return d&&(c["class"]=d),this.newWindow&&(c.target="_blank"),c},createCssClass:function(a){var b=this.className;return b?b+" "+b+"-"+a:""},processAnchorText:function(a){return a=this.doTruncate(a)},doTruncate:function(b){return a.Util.ellipsis(b,this.truncate||Number.POSITIVE_INFINITY)}}),a.match.Match=a.Util.extend(Object,{constructor:function(b){a.Util.assign(this,b)},getType:a.Util.abstractMethod,getMatchedText:function(){return this.matchedText},getAnchorHref:a.Util.abstractMethod,getAnchorText:a.Util.abstractMethod}),a.match.Email=a.Util.extend(a.match.Match,{getType:function(){return"email"},getEmail:function(){return this.email},getAnchorHref:function(){return"mailto:"+this.email},getAnchorText:function(){return this.email}}),a.match.Twitter=a.Util.extend(a.match.Match,{getType:function(){return"twitter"},getTwitterHandle:function(){return this.twitterHandle},getAnchorHref:function(){return"https://twitter.com/"+this.twitterHandle},getAnchorText:function(){return"@"+this.twitterHandle}}),a.match.Url=a.Util.extend(a.match.Match,{urlPrefixRegex:/^(https?:\/\/)?(www\.)?/i,protocolRelativeRegex:/^\/\//,checkForProtocolRegex:/^[A-Za-z]{3,9}:/,getType:function(){return"url"},getUrl:function(){var a=this.url;return this.protocolRelativeMatch||this.checkForProtocolRegex.test(a)||(a=this.url="http://"+a),a},getAnchorHref:function(){var a=this.getUrl();return a.replace(/&/g,"&")},getAnchorText:function(){var a=this.getUrl();return this.protocolRelativeMatch&&(a=this.stripProtocolRelativePrefix(a)),this.stripPrefix&&(a=this.stripUrlPrefix(a)),a=this.removeTrailingSlash(a)},stripUrlPrefix:function(a){return a.replace(this.urlPrefixRegex,"")},stripProtocolRelativePrefix:function(a){return a.replace(this.protocolRelativeRegex,"")},removeTrailingSlash:function(a){return"/"===a.charAt(a.length-1)&&(a=a.slice(0,-1)),a}}),a}); \ No newline at end of file +!function(a,b){"function"==typeof define&&define.amd?define([],function(){return a.returnExportsGlobal=b()}):"object"==typeof exports?module.exports=b():a.Autolinker=b()}(this,function(){var a=function(b){a.Util.assign(this,b),this.matchValidator=new a.MatchValidator};return a.prototype={constructor:a,urls:!0,email:!0,twitter:!0,newWindow:!0,stripPrefix:!0,className:"",htmlCharacterEntitiesRegex:/( | |<|<|>|>)/gi,matcherRegex:function(){var a=/(^|[^\w])@(\w{1,15})/,b=/(?:[\-;:&=\+\$,\w\.]+@)/,c=/(?:[A-Za-z][-.+A-Za-z0-9]+:(?![A-Za-z][-.+A-Za-z0-9]+:\/\/)(?!\d+\/?)(?:\/\/)?)/,d=/(?:www\.)/,e=/[A-Za-z0-9\.\-]*[A-Za-z0-9\-]/,f=/\.(?:international|construction|contractors|enterprises|photography|productions|foundation|immobilien|industries|management|properties|technology|christmas|community|directory|education|equipment|institute|marketing|solutions|vacations|bargains|boutique|builders|catering|cleaning|clothing|computer|democrat|diamonds|graphics|holdings|lighting|partners|plumbing|supplies|training|ventures|academy|careers|company|cruises|domains|exposed|flights|florist|gallery|guitars|holiday|kitchen|neustar|okinawa|recipes|rentals|reviews|shiksha|singles|support|systems|agency|berlin|camera|center|coffee|condos|dating|estate|events|expert|futbol|kaufen|luxury|maison|monash|museum|nagoya|photos|repair|report|social|supply|tattoo|tienda|travel|viajes|villas|vision|voting|voyage|actor|build|cards|cheap|codes|dance|email|glass|house|mango|ninja|parts|photo|shoes|solar|today|tokyo|tools|watch|works|aero|arpa|asia|best|bike|blue|buzz|camp|club|cool|coop|farm|fish|gift|guru|info|jobs|kiwi|kred|land|limo|link|menu|mobi|moda|name|pics|pink|post|qpon|rich|ruhr|sexy|tips|vote|voto|wang|wien|wiki|zone|bar|bid|biz|cab|cat|ceo|com|edu|gov|int|kim|mil|net|onl|org|pro|pub|red|tel|uno|wed|xxx|xyz|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)\b/,g=/[\-A-Za-z0-9+&@#\/%=~_()|'$*\[\]?!:,.;]*[\-A-Za-z0-9+&@#\/%=~_()|'$*\[\]]/;return new RegExp(["(",a.source,")","|","(",b.source,e.source,f.source,")","|","(","(?:","(",c.source,e.source,")","|","(?:","(.?//)?",d.source,e.source,")","|","(?:","(.?//)?",e.source,f.source,")",")","(?:"+g.source+")?",")"].join(""),"gi")}(),charBeforeProtocolRelMatchRegex:/^(.)?\/\//,link:function(b){var c=this,d=this.getHtmlParser(),e=this.htmlCharacterEntitiesRegex,f=0,g=[];return d.parse(b,{processHtmlNode:function(a,b,c){"a"===b&&(c?f=Math.max(f-1,0):f++),g.push(a)},processTextNode:function(b){if(0===f)for(var d=a.Util.splitAndCapture(b,e),h=0,i=d.length;i>h;h++){var j=d[h],k=c.processTextNode(j);g.push(k)}else g.push(b)}}),g.join("")},getHtmlParser:function(){var b=this.htmlParser;return b||(b=this.htmlParser=new a.HtmlParser),b},getTagBuilder:function(){var b=this.tagBuilder;return b||(b=this.tagBuilder=new a.AnchorTagBuilder({newWindow:this.newWindow,truncate:this.truncate,className:this.className})),b},processTextNode:function(a){var b=this;return a.replace(this.matcherRegex,function(a,c,d,e,f,g,h,i,j){var k=b.processCandidateMatch(a,c,d,e,f,g,h,i,j);if(k){var l=b.createMatchReturnVal(k.match,k.matchStr);return k.prefixStr+l+k.suffixStr}return a})},processCandidateMatch:function(b,c,d,e,f,g,h,i,j){var k,l=i||j,m="",n="";if(c&&!this.twitter||f&&!this.email||g&&!this.urls||!this.matchValidator.isValidMatch(g,h,l))return null;if(this.matchHasUnbalancedClosingParen(b)&&(b=b.substr(0,b.length-1),n=")"),f)k=new a.match.Email({matchedText:b,email:f});else if(c)d&&(m=d,b=b.slice(1)),k=new a.match.Twitter({matchedText:b,twitterHandle:e});else{if(l){var o=l.match(this.charBeforeProtocolRelMatchRegex)[1]||"";o&&(m=o,b=b.slice(1))}k=new a.match.Url({matchedText:b,url:b,protocolUrlMatch:!!h,protocolRelativeMatch:!!l,stripPrefix:this.stripPrefix})}return{prefixStr:m,suffixStr:n,matchStr:b,match:k}},matchHasUnbalancedClosingParen:function(a){var b=a.charAt(a.length-1);if(")"===b){var c=a.match(/\(/g),d=a.match(/\)/g),e=c&&c.length||0,f=d&&d.length||0;if(f>e)return!0}return!1},createMatchReturnVal:function(b,c){var d;if(this.replaceFn&&(d=this.replaceFn.call(this,this,b)),"string"==typeof d)return d;if(d===!1)return c;if(d instanceof a.HtmlTag)return d.toString();var e=this.getTagBuilder(),f=e.build(b);return f.toString()}},a.link=function(b,c){var d=new a(c);return d.link(b)},a.match={},a.Util={abstractMethod:function(){throw"abstract"},assign:function(a,b){for(var c in b)b.hasOwnProperty(c)&&(a[c]=b[c]);return a},extend:function(b,c){var d=b.prototype,e=function(){};e.prototype=d;var f;f=c.hasOwnProperty("constructor")?c.constructor:function(){d.constructor.apply(this,arguments)};var g=f.prototype=new e;return g.constructor=f,g.superclass=d,delete c.constructor,a.Util.assign(g,c),f},ellipsis:function(a,b,c){return a.length>b&&(c=null==c?"..":c,a=a.substring(0,b-c.length)+c),a},indexOf:function(a,b){if(Array.prototype.indexOf)return a.indexOf(b);for(var c=0,d=a.length;d>c;c++)if(a[c]===b)return c;return-1},splitAndCapture:function(a,b){if(!b.global)throw new Error("`splitRegex` must have the 'g' flag set");for(var c,d=[],e=0;c=b.exec(a);)d.push(a.substring(e,c.index)),d.push(c[0]),e=c.index+c[0].length;return d.push(a.substring(e)),d}},a.HtmlParser=a.Util.extend(Object,{htmlRegex:function(){var a=/[0-9a-zA-Z:]+/,b=/[^\s\0"'>\/=\x01-\x1F\x7F]+/,c=/(?:".*?"|'.*?'|[^'"=<>`\s]+)/,d=b.source+"(?:\\s*=\\s*"+c.source+")?";return new RegExp(["<(?:!|(/))?","("+a.source+")","(?:","\\s+","(?:",d,"|",c.source+")",")*","\\s*/?",">"].join(""),"g")}(),parse:function(a,b){b=b||{};for(var c,d=b.processHtmlNode||function(){},e=b.processTextNode||function(){},f=this.htmlRegex,g=0;null!==(c=f.exec(a));){var h=c[0],i=c[2],j=!!c[1],k=a.substring(g,c.index);k&&e(k),d(h,i,j),g=c.index+h.length}if(g",this.getInnerHtml(),""].join("")},buildAttrsStr:function(){if(!this.attrs)return"";var a=this.getAttrs(),b=[];for(var c in a)a.hasOwnProperty(c)&&b.push(c+'="'+a[c]+'"');return b.join(" ")}}),a.MatchValidator=a.Util.extend(Object,{invalidProtocolRelMatchRegex:/^[\w]\/\//,hasFullProtocolRegex:/^[A-Za-z][-.+A-Za-z0-9]+:\/\//,hasProtocolPrefixRegex:/^[A-Za-z][-.+A-Za-z0-9]+:/,hasWordCharAfterProtocolRegex:/:[^\s]*?[A-Za-z]/,isValidMatch:function(a,b,c){return this.urlMatchDoesNotHaveProtocolOrDot(a,b)||this.urlMatchDoesNotHaveAtLeastOneWordChar(a,b)||this.isInvalidProtocolRelativeMatch(c)?!1:!0},urlMatchDoesNotHaveProtocolOrDot:function(a,b){return!(!a||b&&this.hasFullProtocolRegex.test(b)||-1!==a.indexOf("."))},urlMatchDoesNotHaveAtLeastOneWordChar:function(a,b){return a&&b?!this.hasWordCharAfterProtocolRegex.test(a):!1},isInvalidProtocolRelativeMatch:function(a){return!!a&&this.invalidProtocolRelMatchRegex.test(a)}}),a.AnchorTagBuilder=a.Util.extend(Object,{constructor:function(b){a.Util.assign(this,b)},build:function(b){var c=new a.HtmlTag({tagName:"a",attrs:this.createAttrs(b.getType(),b.getAnchorHref()),innerHtml:this.processAnchorText(b.getAnchorText())});return c},createAttrs:function(a,b){var c={href:b},d=this.createCssClass(a);return d&&(c["class"]=d),this.newWindow&&(c.target="_blank"),c},createCssClass:function(a){var b=this.className;return b?b+" "+b+"-"+a:""},processAnchorText:function(a){return a=this.doTruncate(a)},doTruncate:function(b){return a.Util.ellipsis(b,this.truncate||Number.POSITIVE_INFINITY)}}),a.match.Match=a.Util.extend(Object,{constructor:function(b){a.Util.assign(this,b)},getType:a.Util.abstractMethod,getMatchedText:function(){return this.matchedText},getAnchorHref:a.Util.abstractMethod,getAnchorText:a.Util.abstractMethod}),a.match.Email=a.Util.extend(a.match.Match,{getType:function(){return"email"},getEmail:function(){return this.email},getAnchorHref:function(){return"mailto:"+this.email},getAnchorText:function(){return this.email}}),a.match.Twitter=a.Util.extend(a.match.Match,{getType:function(){return"twitter"},getTwitterHandle:function(){return this.twitterHandle},getAnchorHref:function(){return"https://twitter.com/"+this.twitterHandle},getAnchorText:function(){return"@"+this.twitterHandle}}),a.match.Url=a.Util.extend(a.match.Match,{urlPrefixRegex:/^(https?:\/\/)?(www\.)?/i,protocolRelativeRegex:/^\/\//,protocolPrepended:!1,getType:function(){return"url"},getUrl:function(){var a=this.url;return this.protocolRelativeMatch||this.protocolUrlMatch||this.protocolPrepended||(a=this.url="http://"+a,this.protocolPrepended=!0),a},getAnchorHref:function(){var a=this.getUrl();return a.replace(/&/g,"&")},getAnchorText:function(){var a=this.getUrl();return this.protocolRelativeMatch&&(a=this.stripProtocolRelativePrefix(a)),this.stripPrefix&&(a=this.stripUrlPrefix(a)),a=this.removeTrailingSlash(a)},stripUrlPrefix:function(a){return a.replace(this.urlPrefixRegex,"")},stripProtocolRelativePrefix:function(a){return a.replace(this.protocolRelativeRegex,"")},removeTrailingSlash:function(a){return"/"===a.charAt(a.length-1)&&(a=a.slice(0,-1)),a}}),a}); \ No newline at end of file diff --git a/package.json b/package.json index 2fa152a8..d133ba27 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "autolinker", - "version": "0.13.1", + "version": "0.14.0", "description": "Utility to automatically link the URLs, email addresses, and Twitter handles in a given block of text/HTML", "main": "dist/Autolinker.js", "directories": { diff --git a/src/Autolinker.js b/src/Autolinker.js index 35584b62..d79bd73a 100644 --- a/src/Autolinker.js +++ b/src/Autolinker.js @@ -218,7 +218,7 @@ Autolinker.prototype = { emailRegex = /(?:[\-;:&=\+\$,\w\.]+@)/, // something@ for email addresses (a.k.a. local-part) - protocolRegex = /(?:[A-Za-z]{3,9}:(?![A-Za-z]{3,9}:\/\/)(?:\/\/)?)/, // match protocol, allow in format "http://" or "mailto:". However, do not match the first part of something like 'link:http://www.google.com' (i.e. don't match "link:") + protocolRegex = /(?:[A-Za-z][-.+A-Za-z0-9]+:(?![A-Za-z][-.+A-Za-z0-9]+:\/\/)(?!\d+\/?)(?:\/\/)?)/, // match protocol, allow in format "http://" or "mailto:". However, do not match the first part of something like 'link:http://www.google.com' (i.e. don't match "link:"). Also, make sure we don't interpret 'google.com:8000' as if 'google.com' was a protocol here (i.e. ignore a trailing port number in this regex) wwwRegex = /(?:www\.)/, // starting with 'www.' domainNameRegex = /[A-Za-z0-9\.\-]*[A-Za-z0-9\-]/, // anything looking at all like a domain, non-unicode domains, not ending in a period tldRegex = /\.(?:international|construction|contractors|enterprises|photography|productions|foundation|immobilien|industries|management|properties|technology|christmas|community|directory|education|equipment|institute|marketing|solutions|vacations|bargains|boutique|builders|catering|cleaning|clothing|computer|democrat|diamonds|graphics|holdings|lighting|partners|plumbing|supplies|training|ventures|academy|careers|company|cruises|domains|exposed|flights|florist|gallery|guitars|holiday|kitchen|neustar|okinawa|recipes|rentals|reviews|shiksha|singles|support|systems|agency|berlin|camera|center|coffee|condos|dating|estate|events|expert|futbol|kaufen|luxury|maison|monash|museum|nagoya|photos|repair|report|social|supply|tattoo|tienda|travel|viajes|villas|vision|voting|voyage|actor|build|cards|cheap|codes|dance|email|glass|house|mango|ninja|parts|photo|shoes|solar|today|tokyo|tools|watch|works|aero|arpa|asia|best|bike|blue|buzz|camp|club|cool|coop|farm|fish|gift|guru|info|jobs|kiwi|kred|land|limo|link|menu|mobi|moda|name|pics|pink|post|qpon|rich|ruhr|sexy|tips|vote|voto|wang|wien|wiki|zone|bar|bid|biz|cab|cat|ceo|com|edu|gov|int|kim|mil|net|onl|org|pro|pub|red|tel|uno|wed|xxx|xyz|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)\b/, // match our known top level domains (TLDs) @@ -538,7 +538,8 @@ Autolinker.prototype = { match = new Autolinker.match.Url( { matchedText : matchStr, url : matchStr, - protocolRelativeMatch : protocolRelativeMatch, + protocolUrlMatch : !!protocolUrlMatch, + protocolRelativeMatch : !!protocolRelativeMatch, stripPrefix : this.stripPrefix } ); } diff --git a/src/MatchValidator.js b/src/MatchValidator.js index bfd24ef6..ea877813 100644 --- a/src/MatchValidator.js +++ b/src/MatchValidator.js @@ -34,7 +34,7 @@ Autolinker.MatchValidator = Autolinker.Util.extend( Object, { * @private * @property {RegExp} hasFullProtocolRegex */ - hasFullProtocolRegex : /^[A-Za-z]{3,9}:\/\//, + hasFullProtocolRegex : /^[A-Za-z][-.+A-Za-z0-9]+:\/\//, /** * Regex to test for a protocol prefix, such as 'mailto:' @@ -42,7 +42,7 @@ Autolinker.MatchValidator = Autolinker.Util.extend( Object, { * @private * @property {RegExp} hasProtocolPrefixRegex */ - hasProtocolPrefixRegex : /^[A-Za-z]{3,9}:/, + hasProtocolPrefixRegex : /^[A-Za-z][-.+A-Za-z0-9]+:/, /** * Regex to determine if at least one word char exists after the protocol (i.e. after the ':') @@ -50,7 +50,7 @@ Autolinker.MatchValidator = Autolinker.Util.extend( Object, { * @private * @property {RegExp} hasWordCharAfterProtocolRegex */ - hasWordCharAfterProtocolRegex : /:.*?[A-Za-z]/, + hasWordCharAfterProtocolRegex : /:[^\s]*?[A-Za-z]/, /** @@ -76,9 +76,9 @@ Autolinker.MatchValidator = Autolinker.Util.extend( Object, { */ isValidMatch : function( urlMatch, protocolUrlMatch, protocolRelativeMatch ) { if( - this.urlMatchDoesNotHaveProtocolOrDot( urlMatch, protocolUrlMatch ) || // At least one period ('.') must exist in the URL match for us to consider it an actual URL, *unless* it was a full protocol match (like 'http://localhost') - this.urlMatchDoesNotHaveAtLeastOneWordChar( urlMatch ) || // At least one letter character must exist in the domain name after a protocol match. Ex: skip over something like "git:1.0" - this.isInvalidProtocolRelativeMatch( protocolRelativeMatch ) // A protocol-relative match which has a word character in front of it (so we can skip something like "abc//google.com") + this.urlMatchDoesNotHaveProtocolOrDot( urlMatch, protocolUrlMatch ) || // At least one period ('.') must exist in the URL match for us to consider it an actual URL, *unless* it was a full protocol match (like 'http://localhost') + this.urlMatchDoesNotHaveAtLeastOneWordChar( urlMatch, protocolUrlMatch ) || // At least one letter character must exist in the domain name after a protocol match. Ex: skip over something like "git:1.0" + this.isInvalidProtocolRelativeMatch( protocolRelativeMatch ) // A protocol-relative match which has a word character in front of it (so we can skip something like "abc//google.com") ) { return false; } @@ -104,7 +104,7 @@ Autolinker.MatchValidator = Autolinker.Util.extend( Object, { * match. */ urlMatchDoesNotHaveProtocolOrDot : function( urlMatch, protocolUrlMatch ) { - return ( urlMatch && ( !protocolUrlMatch || !this.hasFullProtocolRegex.test( protocolUrlMatch ) ) && urlMatch.indexOf( '.' ) === -1 ); + return ( !!urlMatch && ( !protocolUrlMatch || !this.hasFullProtocolRegex.test( protocolUrlMatch ) ) && urlMatch.indexOf( '.' ) === -1 ); }, @@ -116,11 +116,18 @@ Autolinker.MatchValidator = Autolinker.Util.extend( Object, { * * @private * @param {String} urlMatch The matched URL, if there was one. Will be an empty string if the match is not a URL match. + * @param {String} protocolUrlMatch The match URL string for a protocol match. Ex: 'http://yahoo.com'. This is used to + * know whether or not we have a protocol in the URL string, in order to check for a word character after the protocol + * separator (':'). * @return {Boolean} `true` if the URL match does not have at least one word character in it after the protocol, `false` * otherwise. */ - urlMatchDoesNotHaveAtLeastOneWordChar : function( urlMatch ) { - return ( urlMatch && this.hasProtocolPrefixRegex.test( urlMatch ) && !this.hasWordCharAfterProtocolRegex.test( urlMatch ) ); + urlMatchDoesNotHaveAtLeastOneWordChar : function( urlMatch, protocolUrlMatch ) { + if( urlMatch && protocolUrlMatch ) { + return !this.hasWordCharAfterProtocolRegex.test( urlMatch ); + } else { + return false; + } }, @@ -136,7 +143,7 @@ Autolinker.MatchValidator = Autolinker.Util.extend( Object, { * @return {Boolean} `true` if it is an invalid protocol-relative match, `false` otherwise. */ isInvalidProtocolRelativeMatch : function( protocolRelativeMatch ) { - return ( protocolRelativeMatch && this.invalidProtocolRelMatchRegex.test( protocolRelativeMatch ) ); + return ( !!protocolRelativeMatch && this.invalidProtocolRelMatchRegex.test( protocolRelativeMatch ) ); } } ); \ No newline at end of file diff --git a/src/match/Url.js b/src/match/Url.js index 1b8696c4..509cd688 100644 --- a/src/match/Url.js +++ b/src/match/Url.js @@ -15,6 +15,13 @@ Autolinker.match.Url = Autolinker.Util.extend( Autolinker.match.Match, { * The url that was matched. */ + /** + * @cfg {Boolean} protocolUrlMatch (required) + * + * `true` if the URL is a match which already has a protocol (i.e. 'http://'), `false` if the match was from a 'www' or + * known TLD match. + */ + /** * @cfg {Boolean} protocolRelativeMatch (required) * @@ -46,13 +53,13 @@ Autolinker.match.Url = Autolinker.Util.extend( Autolinker.match.Match, { protocolRelativeRegex : /^\/\//, /** - * @protected - * @property {RegExp} checkForProtocolRegex + * @private + * @property {Boolean} protocolPrepended * - * A regular expression used to check if the {@link #url} is missing a protocol (in which case, 'http://' - * will be added). + * Will be set to `true` if the 'http://' protocol has been prepended to the {@link #url} (because the + * {@link #url} did not have a protocol) */ - checkForProtocolRegex: /^[A-Za-z]{3,9}:/, + protocolPrepended : false, /** @@ -66,17 +73,19 @@ Autolinker.match.Url = Autolinker.Util.extend( Autolinker.match.Match, { /** - * Returns the url that was matched, assuming the protocol to be 'http://' if the match - * was missing a protocol. + * Returns the url that was matched, assuming the protocol to be 'http://' if the original + * match was missing a protocol. * * @return {String} */ getUrl : function() { var url = this.url; - // if the url string doesn't begin with a protocol, assume http:// - if( !this.protocolRelativeMatch && !this.checkForProtocolRegex.test( url ) ) { + // if the url string doesn't begin with a protocol, assume 'http://' + if( !this.protocolRelativeMatch && !this.protocolUrlMatch && !this.protocolPrepended ) { url = this.url = 'http://' + url; + + this.protocolPrepended = true; } return url; diff --git a/tests/AutolinkerSpec.js b/tests/AutolinkerSpec.js index 0238da9b..fa12982f 100644 --- a/tests/AutolinkerSpec.js +++ b/tests/AutolinkerSpec.js @@ -35,6 +35,36 @@ describe( "Autolinker", function() { var result = autolinker.link( "Joe went to http://localhost today" ); expect( result ).toBe( 'Joe went to localhost today' ); } ); + + + it( "should automatically link localhost URLs when there is a protocol and port", function() { + var result = autolinker.link( "Joe went to http://localhost:8000 today" ); + expect( result ).toBe( 'Joe went to localhost:8000 today' ); + } ); + + + it( "should automatically link localhost URLs when there is a protocol, port, and path", function() { + var result = autolinker.link( "Joe went to http://localhost:8000/abc today" ); + expect( result ).toBe( 'Joe went to localhost:8000/abc today' ); + } ); + + + it( "should automatically link localhost URLs when there is a protocol, port, and query string", function() { + var result = autolinker.link( "Joe went to http://localhost:8000?abc today" ); + expect( result ).toBe( 'Joe went to localhost:8000?abc today' ); + } ); + + + it( "should automatically link localhost URLs when there is a protocol, port, and hash", function() { + var result = autolinker.link( "Joe went to http://localhost:8000#abc today" ); + expect( result ).toBe( 'Joe went to localhost:8000#abc today' ); + } ); + + + it( "should not include the '?' char if it is at the end of the URL", function() { + var result = autolinker.link( "Joe went to http://localhost:8000? today" ); + expect( result ).toBe( 'Joe went to localhost:8000? today' ); + } ); it( "should automatically link URLs in the form of http://www.yahoo.com (i.e. protocol and 'www' prefix)", function() { @@ -121,65 +151,123 @@ describe( "Autolinker", function() { } ); - it( "should NOT automatically link strings of the form 'git:d' (using the heuristic that the domain name does not have a '.' in it)", function() { - var result = autolinker.link( 'Something like git:d should not be linked as a URL' ); - expect( result ).toBe( 'Something like git:d should not be linked as a URL' ); - } ); - - - it( "should NOT automatically link strings of the form 'git:domain' (using the heuristic that the domain name does not have a '.' in it)", function() { - var result = autolinker.link( 'Something like git:domain should not be linked as a URL' ); - expect( result ).toBe( 'Something like git:domain should not be linked as a URL' ); - } ); - - - it( "should automatically link strings of the form 'git:domain.com', interpreting this as a protocol and domain name", function() { - var result = autolinker.link( 'Something like git:domain.com should be linked as a URL' ); - expect( result ).toBe( 'Something like git:domain.com should be linked as a URL' ); - } ); - - - it( "should NOT automatically link a string in the form of 'version:1.0'", function() { - var result = autolinker.link( 'version:1.0' ); - expect( result ).toBe( 'version:1.0' ); - } ); - - - it( "should NOT automatically link these 'abc:def' style strings", function() { - var strings = [ - 'BEGIN:VCALENDAR', - 'VERSION:1.0', - 'BEGIN:VEVENT', - 'DTSTART:20140401T090000', - 'DTEND:20140401T100000', - 'SUMMARY:Some thing to do', - 'LOCATION:', - 'DESCRIPTION:Just call this guy yeah! Testings', - 'PRIORITY:3', - 'END:VEVENT', - 'END:VCALENDAR', - 'START:123:SOMETHING' - ]; - var i, len = strings.length, str; + describe( "protocol linking", function() { + + it( "should NOT automatically link strings of the form 'git:d' (using the heuristic that the domain name does not have a '.' in it)", function() { + var result = autolinker.link( 'Something like git:d should not be linked as a URL' ); + expect( result ).toBe( 'Something like git:d should not be linked as a URL' ); + } ); - // Test with just the strings themselves. - for( i = 0; i < len; i++ ) { - str = strings[ i ]; - expect( autolinker.link( str ) ).toBe( str ); // none should be autolinked - } - // Test with the strings surrounded by other text - for( i = 0; i < len; i++ ) { - str = strings[ i ]; - expect( autolinker.link( 'test ' + str + ' test' ) ).toBe( 'test ' + str + ' test' ); // none should be autolinked - } - } ); - - it( "should NOT include preceding ':' introductions without a space", function() { - var result = autolinker.link( 'the link:http://example.com/' ); - expect( result ).toBe( 'the link:example.com' ); + it( "should NOT automatically link strings of the form 'git:domain' (using the heuristic that the domain name does not have a '.' in it)", function() { + var result = autolinker.link( 'Something like git:domain should not be linked as a URL' ); + expect( result ).toBe( 'Something like git:domain should not be linked as a URL' ); + } ); + + + it( "should automatically link strings of the form 'git:domain.com', interpreting this as a protocol and domain name", function() { + var result = autolinker.link( 'Something like git:domain.com should be linked as a URL' ); + expect( result ).toBe( 'Something like git:domain.com should be linked as a URL' ); + } ); + + + it( "should NOT automatically link a string in the form of 'version:1.0'", function() { + var result = autolinker.link( 'version:1.0' ); + expect( result ).toBe( 'version:1.0' ); + } ); + + + it( "should NOT automatically link these 'abc:def' style strings", function() { + var strings = [ + 'BEGIN:VCALENDAR', + 'VERSION:1.0', + 'BEGIN:VEVENT', + 'DTSTART:20140401T090000', + 'DTEND:20140401T100000', + 'SUMMARY:Some thing to do', + 'LOCATION:', + 'DESCRIPTION:Just call this guy yeah! Testings', + 'PRIORITY:3', + 'END:VEVENT', + 'END:VCALENDAR', + 'START:123', + 'START:123:SOMETHING' + ]; + var i, len = strings.length, str; + + // Test with just the strings themselves. + for( i = 0; i < len; i++ ) { + str = strings[ i ]; + expect( autolinker.link( str ) ).toBe( str ); // none should be autolinked + } + + // Test with the strings surrounded by other text + for( i = 0; i < len; i++ ) { + str = strings[ i ]; + expect( autolinker.link( 'test ' + str + ' test' ) ).toBe( 'test ' + str + ' test' ); // none should be autolinked + } + } ); + + + it( "should NOT include preceding ':' introductions without a space", function() { + var result = autolinker.link( 'the link:http://example.com/' ); + expect( result ).toBe( 'the link:example.com' ); + } ); + + + it( "should autolink protocols with at least two characters", function() { + var result = autolinker.link( 'link this: gg://example.com/' ); + expect( result ).toBe( 'link this: gg://example.com' ); + } ); + + + it( "should autolink protocols with more than 9 characters (as was the previous upper bound, but it seems protocols may be longer)", function() { + var result = autolinker.link( 'link this: opaquelocktoken://example' ); + expect( result ).toBe( 'link this: opaquelocktoken://example' ); + } ); + + + it( "should NOT autolink a protocol with only one character", function() { + var result = autolinker.link( 'do not link this: a://example' ); + expect( result ).toBe( 'do not link this: a://example' ); + } ); + + + it( "should autolink protocols with digits, dashes, dots, and plus signs in their names", function() { + var result1 = autolinker.link( 'link this: a1://example' ); + expect( result1 ).toBe( 'link this: a1://example' ); + + var result2 = autolinker.link( 'link this: view-source://example' ); + expect( result2 ).toBe( 'link this: view-source://example' ); + + var result3 = autolinker.link( 'link this: iris.xpc://example' ); + expect( result3 ).toBe( 'link this: iris.xpc://example' ); + + var result4 = autolinker.link( 'link this: test+protocol://example' ); + expect( result4 ).toBe( 'link this: test+protocol://example' ); + + // Test all allowed non-alpha chars + var result5 = autolinker.link( 'link this: test+proto-col.123://example' ); + expect( result5 ).toBe( 'link this: test+proto-col.123://example' ); + } ); + + + it( "should NOT autolink protocols that start with a digit, dash, plus sign, or dot, as per http://tools.ietf.org/html/rfc3986#section-3.1", function() { + var result = autolinker.link( 'do not link this: 1a://example' ); + expect( result ).toBe( 'do not link this: 1a://example' ); + + var result2 = autolinker.link( 'do not link this: -a://example' ); + expect( result2 ).toBe( 'do not link this: -a://example' ); + + var result3 = autolinker.link( 'do not link this: +a://example' ); + expect( result3 ).toBe( 'do not link this: +a://example' ); + + var result4 = autolinker.link( 'do not link this: .a://example' ); + expect( result4 ).toBe( 'do not link this: .a://example' ); + } ); + } ); - + } ); @@ -203,11 +291,35 @@ describe( "Autolinker", function() { } ); + it( "should automatically link URLs in the form of 'www.yahoo.com:8000/abc' (with a port number and path)", function() { + var result = autolinker.link( "Joe went to www.yahoo.com:8000/abc today" ); + expect( result ).toBe( 'Joe went to yahoo.com:8000/abc today' ); + } ); + + + it( "should automatically link URLs in the form of 'www.yahoo.com:8000?abc' (with a port number and query string)", function() { + var result = autolinker.link( "Joe went to www.yahoo.com:8000?abc today" ); + expect( result ).toBe( 'Joe went to yahoo.com:8000?abc today' ); + } ); + + + it( "should automatically link URLs in the form of 'www.yahoo.com:8000#abc' (with a port number and hash)", function() { + var result = autolinker.link( "Joe went to www.yahoo.com:8000#abc today" ); + expect( result ).toBe( 'Joe went to yahoo.com:8000#abc today' ); + } ); + + it( "should automatically link capitalized URLs", function() { var result = autolinker.link( "Joe went to WWW.YAHOO.COM today" ); expect( result ).toBe( 'Joe went to YAHOO.COM today' ); } ); + + it( "should not include the '?' char if it is at the end of the URL", function() { + var result = autolinker.link( "Joe went to www.yahoo.com? today" ); + expect( result ).toBe( 'Joe went to yahoo.com? today' ); + } ); + } ); @@ -243,17 +355,41 @@ describe( "Autolinker", function() { } ); - it( "should automatically link URLs in the form of 'www.yahoo.com:8000' (with a port number)", function() { + it( "should automatically link URLs in the form of 'yahoo.com:8000' (with a port number)", function() { var result = autolinker.link( "Joe went to yahoo.com:8000 today" ); expect( result ).toBe( 'Joe went to yahoo.com:8000 today' ); } ); + it( "should automatically link URLs in the form of 'yahoo.com:8000/abc' (with a port number and path)", function() { + var result = autolinker.link( "Joe went to yahoo.com:8000/abc today" ); + expect( result ).toBe( 'Joe went to yahoo.com:8000/abc today' ); + } ); + + + it( "should automatically link URLs in the form of 'yahoo.com:8000?abc' (with a port number and query string)", function() { + var result = autolinker.link( "Joe went to yahoo.com:8000?abc today" ); + expect( result ).toBe( 'Joe went to yahoo.com:8000?abc today' ); + } ); + + + it( "should automatically link URLs in the form of 'yahoo.com:8000#abc' (with a port number and hash)", function() { + var result = autolinker.link( "Joe went to yahoo.com:8000#abc today" ); + expect( result ).toBe( 'Joe went to yahoo.com:8000#abc today' ); + } ); + + it( "should automatically link capitalized URLs", function() { var result = autolinker.link( "Joe went to YAHOO.COM." ); expect( result ).toBe( 'Joe went to YAHOO.COM.' ); } ); + + it( "should not include the '?' char if it is at the end of the URL", function() { + var result = autolinker.link( "Joe went to yahoo.com? today" ); + expect( result ).toBe( 'Joe went to yahoo.com? today' ); + } ); + } ); @@ -898,7 +1034,7 @@ describe( "Autolinker", function() { } ); } ); - + describe( "`truncate` option", function() {