Merge pull request #120 from ecosia/mw-refactore-regex-to-allow-attri…

…butes Refactor sanitizeHTML method to support attributes
ecosia · Oct 17, 2023 · 25a9074 · 25a9074
2 parents e66374f + 735469a
commit 25a9074
Show file tree

Hide file tree

Showing 5 changed files with 74 additions and 31 deletions.
diff --git a/README.md b/README.md
@@ -98,6 +98,18 @@ It is also possible to provide custom allowed tags directly to the directive tag
 </template>
 ```
 
+#### allowedAttributes
+
+Array of strings. Default: []
+
+Customize the tag attributes that are allowed to be rendered:
+
+```js
+Vue.use(VueSafeHTML, {
+  allowedTags: ['a'],
+  allowedAttributes: ['title', 'class', 'href'],
+});
+
 ### Nuxt
 
 `vue-safe-html` is written as a Vue plugin so you can easily use it inside Nuxt by following [the Nuxt documentation](https://nuxtjs.org/docs/2.x/directory-structure/plugins#vue-plugins).

diff --git a/src/directive.js b/src/directive.js
@@ -14,15 +14,17 @@ const defaultTags = [
   'sub',
   'sup',
 ];
+
 const areTagsValid = (tags) => (
   Array.isArray(tags) &&
   tags.every((tag) => typeof tag === 'string')
 );
 
 export { defaultTags as allowedTags };
 
-export default (tags) => {
+export default (tags, attributes) => {
   const initialTags = areTagsValid(tags) ? tags : defaultTags;
+
   return (el, binding) => {
     let finalTags = initialTags;
 
@@ -33,7 +35,7 @@ export default (tags) => {
       }
     }
 
-    const sanitized = sanitizeHTML(binding.value, finalTags);
+    const sanitized = sanitizeHTML(binding.value, finalTags, attributes);
 
     if (typeof el.innerHTML === 'string') {
       // we're client-side and `el` is an HTMLElement

diff --git a/src/index.js b/src/index.js
@@ -7,6 +7,6 @@ export {
 
 export default {
   install: (Vue, options = {}) => {
-    Vue.directive('safe-html', createDirective(options.allowedTags));
+    Vue.directive('safe-html', createDirective(options.allowedTags, options.allowedAttributes));
   },
 };
diff --git a/src/utils.js b/src/utils.js
@@ -1,37 +1,36 @@
+// Strips all tags
+const removeAllTagsRegex = /<\/?[^>]+(>|$)/g;
+export const removeAllTags = (input) => (input.replace(removeAllTagsRegex, ''));
+
 /**
  * sanitizeHTML strips html tags in the given string
  * if allowedTags is empty, all tags are stripped
  * @param {*} htmlString  the HTML strings
  * @param {*} allowedTags array of tags that are not stripped
  */
-// eslint-disable-next-line import/prefer-default-export
-export const sanitizeHTML = (htmlString, allowedTags = []) => {
-  // Add an optional white space to the allowed tags
-  const allowedTagsWhiteSpaced = allowedTags.map((tag) => `${tag}\\s*`);
-
-  // Remove tag attributes
-  // The solution for this was found on:
-  // https://stackoverflow.com/questions/4885891/regex-for-removing-all-attributes-from-a-paragraph
-  const htmlWithoutAttributes = htmlString.replace(/<(\w+)(.|[\r\n])*?>/g, '<$1>');
+export const sanitizeHTML = (htmlString, allowedTags = [], allowedAttributes = []) => {
+  if (!htmlString) {
+    return '';
+  }
 
-  const expression = (allowedTags.length > 0) ?
-    // Regex explanation
-    // Note: \ needs to be escaped in the final expression
-    // '<' Match the starting tag
-    // '(' Create a matching group
-    // '?!' Use negative lookup
-    //      we only want to match the tags that are not in the allowedTags array
-    // '\s*?' Optional match of any white space charater before optional /
-    // '\/?' Matches / zero to one time for the closing tag
-    // '\s*?' Optional match of any white space charater after optional /
-    // '(${allowedTags.join('|')})>' matching group of the allowed tags
-    // ')' close the matching group of negative lookup
-    // '\w*[^<>]*' matches any word that isn't in the excluded group
-    // '>' Match closing tagq
-    `<(?!\\s*\\/?\\s*(${allowedTagsWhiteSpaced.join('|')})>)\\w*[^<>]*>` :
-    // Strips all tags
-    '<(\\/?\\w*)\\w*[^<>]*>';
+  if (allowedTags.length === 0) {
+    return removeAllTags(htmlString);
+  }
 
-  const regExp = new RegExp(expression, 'gm');
-  return htmlWithoutAttributes.replace(regExp, '');
+  return htmlString.replace(/<(\/*)(\w+)([^>]*)>/g, (match, closing, tagName, attrs) => {
+    if (allowedTags.includes(tagName)) {
+      // If the tag is allowed, we'll retain only allowed attributes.
+      if (closing) {
+        // If it's a closing tag, simply return it as is.
+        return `</${tagName}>`;
+      }
+      // Otherwise, reconstruct the opening tag with only allowed attributes.
+      const allowedAttrs = attrs.split(/\s+/)
+        .filter((attr) => allowedAttributes.includes(attr.split('=')[0]))
+        .join(' ');
+      return `<${tagName}${allowedAttrs ? ` ${allowedAttrs}` : ''}>`;
+    }
+    // If the tag is not allowed, strip it completely.
+    return '';
+  });
 };
diff --git a/src/utils.test.js b/src/utils.test.js
@@ -49,5 +49,35 @@ describe('Utils', () => {
       const expected = '<p>Test1</p> Test2';
       expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
     });
+
+    it('Keeps allowed attributes', () => {
+      const allowedTags = ['p', 'strong'];
+      const allowedAttributes = ['title'];
+      const given = '<p data-test="test" title="test2">Test1</p> <strong data-test=\'test2\'>Test2</strong>';
+      const expected = '<p title="test2">Test1</p> <strong>Test2</strong>';
+      expect(utils.sanitizeHTML(given, allowedTags, allowedAttributes)).toBe(expected);
+    });
+
+    it('Ignores incomplete tag', () => {
+      const allowedTags = ['p', 'strong'];
+      const allowedAttributes = ['data-lazy'];
+      const given = '<p data-lazy="test">Test1</p> <adsfjgsa>with invalid tag </';
+      const expected = '<p data-lazy="test">Test1</p> with invalid tag </';
+      expect(utils.sanitizeHTML(given, allowedTags, allowedAttributes)).toBe(expected);
+    });
+
+    it('Does not error on null input', () => {
+      const allowedTags = [];
+      const given = null;
+      const expected = '';
+      expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
+    });
+
+    it('Does not crash on undefined input', () => {
+      const allowedTags = [];
+      const given = undefined;
+      const expected = '';
+      expect(utils.sanitizeHTML(given, allowedTags)).toBe(expected);
+    });
   });
 });