-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #120 from ecosia/mw-refactore-regex-to-allow-attri…
…butes Refactor sanitizeHTML method to support attributes
- Loading branch information
Showing
5 changed files
with
74 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,36 @@ | ||
// Strips all tags | ||
const removeAllTagsRegex = /<\/?[^>]+(>|$)/g; | ||
export const removeAllTags = (input) => (input.replace(removeAllTagsRegex, '')); | ||
|
||
/** | ||
* sanitizeHTML strips html tags in the given string | ||
* if allowedTags is empty, all tags are stripped | ||
* @param {*} htmlString the HTML strings | ||
* @param {*} allowedTags array of tags that are not stripped | ||
*/ | ||
// eslint-disable-next-line import/prefer-default-export | ||
export const sanitizeHTML = (htmlString, allowedTags = []) => { | ||
// Add an optional white space to the allowed tags | ||
const allowedTagsWhiteSpaced = allowedTags.map((tag) => `${tag}\\s*`); | ||
|
||
// Remove tag attributes | ||
// The solution for this was found on: | ||
// https://stackoverflow.com/questions/4885891/regex-for-removing-all-attributes-from-a-paragraph | ||
const htmlWithoutAttributes = htmlString.replace(/<(\w+)(.|[\r\n])*?>/g, '<$1>'); | ||
export const sanitizeHTML = (htmlString, allowedTags = [], allowedAttributes = []) => { | ||
if (!htmlString) { | ||
return ''; | ||
} | ||
|
||
const expression = (allowedTags.length > 0) ? | ||
// Regex explanation | ||
// Note: \ needs to be escaped in the final expression | ||
// '<' Match the starting tag | ||
// '(' Create a matching group | ||
// '?!' Use negative lookup | ||
// we only want to match the tags that are not in the allowedTags array | ||
// '\s*?' Optional match of any white space charater before optional / | ||
// '\/?' Matches / zero to one time for the closing tag | ||
// '\s*?' Optional match of any white space charater after optional / | ||
// '(${allowedTags.join('|')})>' matching group of the allowed tags | ||
// ')' close the matching group of negative lookup | ||
// '\w*[^<>]*' matches any word that isn't in the excluded group | ||
// '>' Match closing tagq | ||
`<(?!\\s*\\/?\\s*(${allowedTagsWhiteSpaced.join('|')})>)\\w*[^<>]*>` : | ||
// Strips all tags | ||
'<(\\/?\\w*)\\w*[^<>]*>'; | ||
if (allowedTags.length === 0) { | ||
return removeAllTags(htmlString); | ||
} | ||
|
||
const regExp = new RegExp(expression, 'gm'); | ||
return htmlWithoutAttributes.replace(regExp, ''); | ||
return htmlString.replace(/<(\/*)(\w+)([^>]*)>/g, (match, closing, tagName, attrs) => { | ||
if (allowedTags.includes(tagName)) { | ||
// If the tag is allowed, we'll retain only allowed attributes. | ||
if (closing) { | ||
// If it's a closing tag, simply return it as is. | ||
return `</${tagName}>`; | ||
} | ||
// Otherwise, reconstruct the opening tag with only allowed attributes. | ||
const allowedAttrs = attrs.split(/\s+/) | ||
.filter((attr) => allowedAttributes.includes(attr.split('=')[0])) | ||
.join(' '); | ||
return `<${tagName}${allowedAttrs ? ` ${allowedAttrs}` : ''}>`; | ||
} | ||
// If the tag is not allowed, strip it completely. | ||
return ''; | ||
}); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters