From 41d5938883a3d06ffe8a88a51efd8d1896f7d747 Mon Sep 17 00:00:00 2001 From: Misko Hevery Date: Tue, 23 Nov 2010 20:10:05 -0800 Subject: [PATCH] Fixed sanitization * explicitly require full URLs (ftp|https?://...) * list the URI attributes * remove a lot of unneeded attributes --- src/sanitizer.js | 115 ++++++++++++++++++++---------------------- test/sanitizerSpec.js | 49 +++++++++++++----- 2 files changed, 90 insertions(+), 74 deletions(-) diff --git a/src/sanitizer.js b/src/sanitizer.js index 94c367068d1b..66631a9087aa 100644 --- a/src/sanitizer.js +++ b/src/sanitizer.js @@ -21,7 +21,9 @@ var START_TAG_REGEXP = /^<\s*([\w:]+)((?:\s+\w+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^'] BEGIN_TAG_REGEXP = /^/g, - CDATA_REGEXP = //g; + CDATA_REGEXP = //g, + URI_REGEXP = /^((ftp|https?):\/\/|mailto:|#)/, + NON_ALPHANUMERIC_REGEXP = /([^\#-~| |!])/g; // Match everything outside of normal chars and " (quote character) // Empty Elements - HTML 4.01 var emptyElements = makeMap("area,base,basefont,br,col,hr,img,input,isindex,link,param"); @@ -33,24 +35,24 @@ var blockElements = makeMap("address,blockquote,button,center,dd,del,dir,div,dl, // Inline Elements - HTML 4.01 var inlineElements = makeMap("a,abbr,acronym,b,basefont,bdo,big,br,button,cite,code,del,dfn,em,font,i,img,"+ "input,ins,kbd,label,map,q,s,samp,select,small,span,strike,strong,sub,sup,textarea,tt,u,var"); - // Elements that you can, intentionally, leave open // (and which close themselves) var closeSelfElements = makeMap("colgroup,dd,dt,li,options,p,td,tfoot,th,thead,tr"); - -// Attributes that have their values filled in disabled="disabled" -var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected"); - // Special Elements (can contain anything) var specialElements = makeMap("script,style"); - var validElements = extend({}, emptyElements, blockElements, inlineElements, closeSelfElements); -var validAttrs = extend({}, fillAttrs, makeMap( - 'abbr,align,alink,alt,archive,axis,background,bgcolor,border,cellpadding,cellspacing,cite,class,classid,clear,code,codebase,'+ - 'codetype,color,cols,colspan,content,coords,data,dir,face,for,headers,height,href,hreflang,hspace,id,label,lang,language,'+ - 'link,longdesc,marginheight,marginwidth,maxlength,media,method,name,nowrap,profile,prompt,rel,rev,rows,rowspan,rules,scheme,'+ - 'scope,scrolling,shape,size,span,src,standby,start,summary,tabindex,target,text,title,type,usemap,valign,value,valuetype,'+ - 'vlink,vspace,width')); + +//see: http://www.w3.org/TR/html4/index/attributes.html +//Attributes that have their values filled in disabled="disabled" +var fillAttrs = makeMap("checked,compact,declare,defer,disabled,ismap,multiple,nohref,noresize,noshade,nowrap,readonly,selected"); +//Attributes that have href and hence need to be sanitized +var uriAttrs = makeMap("background,href,longdesc,src,usemap"); +var validAttrs = extend({}, fillAttrs, uriAttrs, makeMap( + 'abbr,align,alt,axis,bgcolor,border,cellpadding,cellspacing,class,clear,'+ + 'color,cols,colspan,coords,dir,face,for,headers,height,hreflang,hspace,id,'+ + 'label,lang,language,maxlength,method,name,prompt,rel,rev,rows,rowspan,rules,'+ + 'scope,scrolling,shape,size,span,start,summary,tabindex,target,title,type,'+ + 'valign,value,vspace,width')); /** * @example @@ -64,7 +66,7 @@ var validAttrs = extend({}, fillAttrs, makeMap( * @param {string} html string * @param {object} handler */ -var htmlParser = function( html, handler ) { +function htmlParser( html, handler ) { var index, chars, match, stack = [], last = html; stack.last = function(){ return stack[ stack.length - 1 ]; }; @@ -112,8 +114,7 @@ var htmlParser = function( html, handler ) { var text = index < 0 ? html : html.substring( 0, index ); html = index < 0 ? "" : html.substring( index ); - if ( handler.chars ) - handler.chars( text ); + handler.chars( decodeEntities(text) ); } } else { @@ -122,8 +123,7 @@ var htmlParser = function( html, handler ) { replace(COMMENT_REGEXP, "$1"). replace(CDATA_REGEXP, "$1"); - if ( handler.chars ) - handler.chars( text ); + handler.chars( decodeEntities(text) ); return ""; }); @@ -157,21 +157,18 @@ var htmlParser = function( html, handler ) { if ( !unary ) stack.push( tagName ); - if ( handler.start ) { - var attrs = {}; + var attrs = {}; - rest.replace(ATTR_REGEXP, function(match, name) { - var value = arguments[2] ? arguments[2] : - arguments[3] ? arguments[3] : - arguments[4] ? arguments[4] : - fillAttrs[name] ? name : ""; + rest.replace(ATTR_REGEXP, function(match, name) { + var value = arguments[2] ? arguments[2] : + arguments[3] ? arguments[3] : + arguments[4] ? arguments[4] : + fillAttrs[name] ? name : ""; - attrs[name] = value; //value.replace(/(^|[^\\])"/g, '$1\\\"') //" - }); + attrs[name] = decodeEntities(value); //value.replace(/(^|[^\\])"/g, '$1\\\"') //" + }); - if ( handler.start ) - handler.start( tagName, attrs, unary ); - } + handler.start( tagName, attrs, unary ); } function parseEndTag( tag, tagName ) { @@ -186,8 +183,7 @@ var htmlParser = function( html, handler ) { if ( pos >= 0 ) { // Close all the open elements, up the stack for ( i = stack.length - 1; i >= pos; i-- ) - if ( handler.end ) - handler.end( stack[ i ] ); + handler.end( stack[ i ] ); // Remove the open elements from the stack stack.length = pos; @@ -206,28 +202,32 @@ function makeMap(str){ return obj; } -/* - * For attack vectors see: http://ha.ckers.org/xss.html +/** + * decodes all entities into regular string + * @param value + * @returns */ -var JAVASCRIPT_URL = /^javascript:/i, - NBSP_REGEXP = / /gim, - HEX_ENTITY_REGEXP = /&#x([\da-f]*);?/igm, - DEC_ENTITY_REGEXP = /&#(\d+);?/igm, - CHAR_REGEXP = /[\w:]/gm, - HEX_DECODE = function(match, code){return fromCharCode(parseInt(code,16));}, - DEC_DECODE = function(match, code){return fromCharCode(code);}; +var hiddenPre=document.createElement("pre"); +function decodeEntities(value) { + hiddenPre.innerHTML=value.replace(//g, '>'); } /** @@ -253,14 +253,12 @@ function htmlSanitizeWriter(buf){ out('<'); out(tag); foreach(attrs, function(value, key){ - if (validAttrs[lowercase(key)] && !isJavaScriptUrl(value)) { + var lkey=lowercase(key); + if (validAttrs[lkey] && (uriAttrs[lkey]!==true || value.match(URI_REGEXP))) { out(' '); out(key); out('="'); - out(value. - replace(//g, '>'). - replace(/\"/g,'"')); + out(encodeEntities(value)); out('"'); } }); @@ -280,10 +278,7 @@ function htmlSanitizeWriter(buf){ }, chars: function(chars){ if (!ignore) { - out(chars. - replace(/&(\w+[&;\W])?/g, function(match, entity){return entity?match:'&';}). - replace(//g, '>')); + out(encodeEntities(chars)); } } }; diff --git a/test/sanitizerSpec.js b/test/sanitizerSpec.js index 4e1ff355fab7..88da693d06c1 100644 --- a/test/sanitizerSpec.js +++ b/test/sanitizerSpec.js @@ -6,7 +6,7 @@ describe('HTML', function(){ it('should echo html', function(){ expectHTML('helloworld.'). - toEqual('helloworld.'); + toEqual('helloworld.'); }); it('should remove script', function(){ @@ -49,9 +49,15 @@ describe('HTML', function(){ expectHTML('abc').toEqual('abc'); }); + it('should handle entities', function(){ + var everything = '
' + + '!@#$%^&*()_+-={}[]:";\'<>?,./`~ ħ
'; + expectHTML(everything).toEqual(everything); + }); + it('should handle improper html', function(){ - expectHTML('< div id="" alt=abc href=\'"\' >text< /div>'). - toEqual('
text
'); + expectHTML('< div id="" alt=abc dir=\'"\' >text< /div>'). + toEqual('
text
'); }); it('should handle improper html2', function(){ @@ -81,19 +87,14 @@ describe('HTML', function(){ expect(html).toEqual('a<div>&</div>c'); }); - it('should not double escape entities', function(){ - writer.chars(' ><'); - expect(html).toEqual(' ><'); - }); - it('should escape IE script', function(){ - writer.chars('&{}'); - expect(html).toEqual('&{}'); + writer.chars('&<>{}'); + expect(html).toEqual('&<>{}'); }); it('should escape attributes', function(){ - writer.start('div', {id:'\"\'<>'}); - expect(html).toEqual('
'); + writer.start('div', {id:'!@#$%^&*()_+-={}[]:";\'<>?,./`~ \n\0\r\u0127'}); + expect(html).toEqual('
'); }); it('should ignore missformed elements', function(){ @@ -105,12 +106,32 @@ describe('HTML', function(){ writer.start('div', {unknown:""}); expect(html).toEqual('
'); }); + + describe('isUri', function(){ + + function isUri(value) { + return value.match(URI_REGEXP); + } + + it('should be URI', function(){ + expect(isUri('http://abc')).toBeTruthy(); + expect(isUri('https://abc')).toBeTruthy(); + expect(isUri('ftp://abc')).toBeTruthy(); + expect(isUri('mailto:me@example.com')).toBeTruthy(); + expect(isUri('#anchor')).toBeTruthy(); + }); + + it('should not be UIR', function(){ + expect(isUri('')).toBeFalsy(); + expect(isUri('javascript:alert')).toBeFalsy(); + }); + }); describe('javascript URL attribute', function(){ beforeEach(function(){ this.addMatchers({ toBeValidUrl: function(){ - return !isJavaScriptUrl(this.actual); + return URI_REGEXP.exec(this.actual); } }); }); @@ -118,7 +139,7 @@ describe('HTML', function(){ it('should ignore javascript:', function(){ expect('JavaScript:abc').not.toBeValidUrl(); expect(' \n Java\n Script:abc').not.toBeValidUrl(); - expect('JavaScript/my.js').toBeValidUrl(); + expect('http://JavaScript/my.js').toBeValidUrl(); }); it('should ignore dec encoded javascript:', function(){