-
Notifications
You must be signed in to change notification settings - Fork 238
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix for <!DOCTYPE> tag html parsing, which could cause the regex engi…
…ne to freeze with 100% cpu for certain inputs. Add fix for proper handling of <A> tags (with capitalized tag name) as well.
- Loading branch information
1 parent
3bedddc
commit 6335974
Showing
6 changed files
with
162 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,7 @@ | |
|
||
/*! | ||
* Autolinker.js | ||
* 0.14.1 | ||
* 0.15.0 | ||
* | ||
* Copyright(c) 2014 Gregory Jacobs <[email protected]> | ||
* MIT Licensed. http://www.opensource.org/licenses/mit-license.php | ||
|
@@ -850,34 +850,53 @@ | |
* | ||
* Capturing groups: | ||
* | ||
* 1. If it is an end tag, this group will have the '/'. | ||
* 2. The tag name. | ||
* 1. The "!DOCTYPE" tag name, if a tag is a <!DOCTYPE> tag. | ||
* 2. If it is an end tag, this group will have the '/'. | ||
* 3. The tag name for all tags (other than the <!DOCTYPE> tag) | ||
*/ | ||
htmlRegex : (function() { | ||
var tagNameRegex = /[0-9a-zA-Z:]+/, | ||
var tagNameRegex = /[0-9a-zA-Z][0-9a-zA-Z:]*/, | ||
attrNameRegex = /[^\s\0"'>\/=\x01-\x1F\x7F]+/, // the unicode range accounts for excluding control chars, and the delete char | ||
attrValueRegex = /(?:".*?"|'.*?'|[^'"=<>`\s]+)/, // double quoted, single quoted, or unquoted attribute values | ||
nameEqualsValueRegex = attrNameRegex.source + '(?:\\s*=\\s*' + attrValueRegex.source + ')?'; // optional '=[value]' | ||
|
||
return new RegExp( [ | ||
'<(?:!|(/))?', // Beginning of a tag. Either '<' for a start tag, '</' for an end tag, or <! for the <!DOCTYPE ...> tag. The slash or an empty string is Capturing Group 1. | ||
// for <!DOCTYPE> tag. Ex: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">) | ||
'(?:', | ||
'<(!DOCTYPE)', // *** Capturing Group 1 - If it's a doctype tag | ||
|
||
// Zero or more attributes following the tag name | ||
'(?:', | ||
'\\s+', // one or more whitespace chars before an attribute | ||
|
||
// Either: | ||
// A. attr="value", or | ||
// B. "value" alone (To cover example doctype tag: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">) | ||
'(?:', nameEqualsValueRegex, '|', attrValueRegex.source + ')', | ||
')*', | ||
'>', | ||
')', | ||
|
||
'|', | ||
|
||
// The tag name (Capturing Group 2) | ||
'(' + tagNameRegex.source + ')', | ||
// All other HTML tags (i.e. tags that are not <!DOCTYPE>) | ||
'(?:', | ||
'<(/)?', // Beginning of a tag. Either '<' for a start tag, or '</' for an end tag. | ||
// *** Capturing Group 2: The slash or an empty string. Slash ('/') for end tag, empty string for start or self-closing tag. | ||
|
||
// Zero or more attributes following the tag name | ||
'(?:', | ||
'\\s+', // one or more whitespace chars before an attribute | ||
// *** Capturing Group 3 - The tag name | ||
'(' + tagNameRegex.source + ')', | ||
|
||
// Either: | ||
// A. tag="value", or | ||
// B. "value" alone (for <!DOCTYPE> tag. Ex: <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">) | ||
'(?:', nameEqualsValueRegex, '|', attrValueRegex.source + ')', | ||
')*', | ||
// Zero or more attributes following the tag name | ||
'(?:', | ||
'\\s+', // one or more whitespace chars before an attribute | ||
nameEqualsValueRegex, // attr="value" (with optional ="value" part) | ||
')*', | ||
|
||
'\\s*/?', // any trailing spaces and optional '/' before the closing '>' | ||
'>' | ||
].join( "" ), 'g' ); | ||
'\\s*/?', // any trailing spaces and optional '/' before the closing '>' | ||
'>', | ||
')' | ||
].join( "" ), 'gi' ); | ||
} )(), | ||
|
||
|
||
|
@@ -911,15 +930,15 @@ | |
// wrapping the URLs in anchor tags | ||
while( ( currentResult = htmlRegex.exec( html ) ) !== null ) { | ||
var tagText = currentResult[ 0 ], | ||
tagName = currentResult[ 2 ], | ||
isClosingTag = !!currentResult[ 1 ], | ||
tagName = currentResult[ 1 ] || currentResult[ 3 ], // The <!DOCTYPE> tag (ex: "!DOCTYPE"), or another tag (ex: "a") | ||
isClosingTag = !!currentResult[ 2 ], | ||
inBetweenTagsText = html.substring( lastIndex, currentResult.index ); | ||
|
||
if( inBetweenTagsText ) { | ||
processTextNodeVisitor( inBetweenTagsText ); | ||
} | ||
|
||
processHtmlNodeVisitor( tagText, tagName, isClosingTag ); | ||
processHtmlNodeVisitor( tagText, tagName.toLowerCase(), isClosingTag ); | ||
|
||
lastIndex = currentResult.index + tagText.length; | ||
} | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters