diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f41e34354..241dd251f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,10 +66,11 @@ Notes: web developers are advised to use [`~` (tilde range)](https://github.com/ - Added code viewer dialog with syntax highlighting, in PR [#5335](https://github.com/microsoft/BotFramework-WebChat/pull/5335), by [@OEvgeny](https://github.com/OEvgeny) - Added copy button to code blocks, in PR [#5334](https://github.com/microsoft/BotFramework-WebChat/pull/5334), by [@compulim](https://github.com/compulim) - Added copy button to view code dialog, in PR [#5336](https://github.com/microsoft/BotFramework-WebChat/pull/5336), by [@compulim](https://github.com/compulim) -- Added HTML content transformer middleware, in PR [#5338](https://github.com/microsoft/BotFramework-WebChat/pull/5338), by [@compulim](https://github.com/compulim) +- Added HTML content transformer middleware, in PR [#5338](https://github.com/microsoft/BotFramework-WebChat/pull/5338) and [#5347](https://github.com/microsoft/BotFramework-WebChat/pull/5347), by [@compulim](https://github.com/compulim) - HTML content transformer is used by `useRenderMarkdown` to transform the result from `renderMarkdown` - HTML sanitizer is moved from `renderMarkdown` into HTML content transformer for better coverage - Copy button is added to fenced code blocks (`
`)
+   -  Configure HTML sanitizer via `request.allowedTags`
 
 ### Changed
 
diff --git a/__tests__/html2/markdown/customElement.html b/__tests__/html2/markdown/customElement.html
new file mode 100644
index 0000000000..276b7c6031
--- /dev/null
+++ b/__tests__/html2/markdown/customElement.html
@@ -0,0 +1,58 @@
+
+
+  
+    
+    
+    
+    
+  
+
+  
+    
+ + + diff --git a/__tests__/html2/markdown/customElement.html.snap-1.png b/__tests__/html2/markdown/customElement.html.snap-1.png new file mode 100644 index 0000000000..5993a15f02 Binary files /dev/null and b/__tests__/html2/markdown/customElement.html.snap-1.png differ diff --git a/packages/bundle/src/markdown/middleware/createCodeBlockCopyButtonMiddleware.ts b/packages/bundle/src/markdown/middleware/createCodeBlockCopyButtonMiddleware.ts index b8989fca83..41888f50ed 100644 --- a/packages/bundle/src/markdown/middleware/createCodeBlockCopyButtonMiddleware.ts +++ b/packages/bundle/src/markdown/middleware/createCodeBlockCopyButtonMiddleware.ts @@ -7,6 +7,16 @@ export default function createCodeBlockCopyButtonMiddleware(): HTMLContentTransf next( Object.freeze({ ...request, + allowedTags: Object.freeze( + new Map(request.allowedTags).set( + request.codeBlockCopyButtonTagName, + Object.freeze({ + attributes: Object.freeze( + new Set(['class', 'data-alt-copy', 'data-alt-copied', 'data-testid', 'data-value']) + ) + }) + ) + ), documentFragment: codeBlockCopyButtonDocumentMod(request.documentFragment, { codeBlockCopyButtonAltCopied: request.codeBlockCopyButtonAltCopied, codeBlockCopyButtonAltCopy: request.codeBlockCopyButtonAltCopy, diff --git a/packages/bundle/src/markdown/middleware/createSanitizeMiddleware.ts b/packages/bundle/src/markdown/middleware/createSanitizeMiddleware.ts index e4bd6987bf..2e7a5b6e7b 100644 --- a/packages/bundle/src/markdown/middleware/createSanitizeMiddleware.ts +++ b/packages/bundle/src/markdown/middleware/createSanitizeMiddleware.ts @@ -1,3 +1,4 @@ +import { type HTMLContentTransformMiddleware } from 'botframework-webchat-component'; import { parseDocumentFragmentFromString, serializeDocumentFragmentIntoString @@ -5,104 +6,27 @@ import { import sanitizeHTML from 'sanitize-html'; const BASE_SANITIZE_HTML_OPTIONS = Object.freeze({ - allowedAttributes: { - a: ['aria-label', 'class', 'href', 'name', 'rel', 'target'], - button: ['aria-label', 'class', 'type', 'value'], - img: ['alt', 'aria-label', 'class', 'src', 'title'], - pre: ['class'], - span: ['aria-label'] - }, allowedSchemes: ['data', 'http', 'https', 'ftp', 'mailto', 'sip', 'tel'], - allowedTags: [ - 'a', - 'b', - 'blockquote', - 'br', - 'button', - 'caption', - 'code', - 'del', - 'div', - 'em', - 'h1', - 'h2', - 'h3', - 'h4', - 'h5', - 'h6', - 'hr', - 'i', - 'img', - 'ins', - 'li', - 'nl', - 'ol', - 'p', - 'pre', - 's', - 'span', - 'strike', - 'strong', - 'table', - 'tbody', - 'td', - 'tfoot', - 'th', - 'thead', - 'tr', - 'ul', - - // Followings are for MathML elements, from https://developer.mozilla.org/en-US/docs/Web/MathML. - 'annotation-xml', - 'annotation', - 'math', - 'merror', - 'mfrac', - 'mi', - 'mmultiscripts', - 'mn', - 'mo', - 'mover', - 'mpadded', - 'mphantom', - 'mprescripts', - 'mroot', - 'mrow', - 'ms', - 'mspace', - 'msqrt', - 'mstyle', - 'msub', - 'msubsup', - 'msup', - 'mtable', - 'mtd', - 'mtext', - 'mtr', - 'munder', - 'munderover', - 'semantics' - ], // Bug of https://github.com/apostrophecms/sanitize-html/issues/633. // They should not remove `alt=""` even though it is empty. nonBooleanAttributes: [] }); -export default function createSanitizeMiddleware() { +export default function createSanitizeMiddleware(): HTMLContentTransformMiddleware { return () => () => request => { - const { codeBlockCopyButtonTagName, documentFragment } = request; - const sanitizeHTMLOptions = { - ...BASE_SANITIZE_HTML_OPTIONS, - allowedAttributes: { - ...BASE_SANITIZE_HTML_OPTIONS.allowedAttributes, - [codeBlockCopyButtonTagName]: ['class', 'data-alt-copy', 'data-alt-copied', 'data-testid', 'data-value'] - }, - allowedTags: [...BASE_SANITIZE_HTML_OPTIONS.allowedTags, codeBlockCopyButtonTagName] - }; + const { documentFragment } = request; const htmlAfterBetterLink = serializeDocumentFragmentIntoString(documentFragment); - const htmlAfterSanitization = sanitizeHTML(htmlAfterBetterLink, sanitizeHTMLOptions); + const htmlAfterSanitization = sanitizeHTML(htmlAfterBetterLink, { + ...BASE_SANITIZE_HTML_OPTIONS, + allowedAttributes: Object.fromEntries( + Array.from(request.allowedTags.entries()).map( + ([tag, { attributes }]) => [tag, Array.from(attributes)] satisfies [string, string[]] + ) + ) satisfies Record, + allowedTags: Array.from(request.allowedTags.keys() satisfies Iterator) satisfies string[] + }); return parseDocumentFragmentFromString(htmlAfterSanitization); }; diff --git a/packages/component/src/providers/HTMLContentTransformCOR/private/HTMLContentTransformContext.ts b/packages/component/src/providers/HTMLContentTransformCOR/private/HTMLContentTransformContext.ts index a0603e1aad..ddffab2248 100644 --- a/packages/component/src/providers/HTMLContentTransformCOR/private/HTMLContentTransformContext.ts +++ b/packages/component/src/providers/HTMLContentTransformCOR/private/HTMLContentTransformContext.ts @@ -1,6 +1,13 @@ import { createContext } from 'react'; export type HTMLContentTransformRequest = Readonly<{ + allowedTags: ReadonlyMap< + string, + Readonly<{ + // TODO: Ultimately, we could allowlist a cherry-picked instance of element, but not all elements sharing the same tag name. + attributes: ReadonlySet; + }> + >; codeBlockCopyButtonAltCopied: string; codeBlockCopyButtonAltCopy: string; codeBlockCopyButtonClassName: string; diff --git a/packages/component/src/providers/HTMLContentTransformCOR/useTransformHTMLContent.ts b/packages/component/src/providers/HTMLContentTransformCOR/useTransformHTMLContent.ts index 3a55cdbc6f..149db157fb 100644 --- a/packages/component/src/providers/HTMLContentTransformCOR/useTransformHTMLContent.ts +++ b/packages/component/src/providers/HTMLContentTransformCOR/useTransformHTMLContent.ts @@ -7,6 +7,89 @@ import useHTMLContentTransformContext from './private/useHTMLContentTransformCon const { useLocalizer } = hooks; +const DEFAULT_ALLOWED_TAGS: ReadonlyMap }>> = Object.freeze( + new Map( + ( + [ + ['a', ['aria-label', 'class', 'href', 'name', 'rel', 'target']], + ['b', []], + ['blockquote', []], + ['br', []], + ['button', ['aria-label', 'class', 'type', 'value']], + ['caption', []], + ['code', []], + ['del', []], + ['div', []], + ['em', []], + ['h1', []], + ['h2', []], + ['h3', []], + ['h4', []], + ['h5', []], + ['h6', []], + ['hr', []], + ['i', []], + ['img', ['alt', 'aria-label', 'class', 'src', 'title']], + ['ins', []], + ['li', []], + ['nl', []], + ['ol', []], + ['p', []], + ['pre', ['class']], + ['s', []], + ['span', ['aria-label']], + ['strike', []], + ['strong', []], + ['table', []], + ['tbody', []], + ['td', []], + ['tfoot', []], + ['th', []], + ['thead', []], + ['tr', []], + ['ul', []], + + // Followings are for MathML elements, from https://developer.mozilla.org/en-US/docs/Web/MathML. + ['annotation-xml', []], + ['annotation', []], + ['math', []], + ['merror', []], + ['mfrac', []], + ['mi', []], + ['mmultiscripts', []], + ['mn', []], + ['mo', []], + ['mover', []], + ['mpadded', []], + ['mphantom', []], + ['mprescripts', []], + ['mroot', []], + ['mrow', []], + ['ms', []], + ['mspace', []], + ['msqrt', []], + ['mstyle', []], + ['msub', []], + ['msubsup', []], + ['msup', []], + ['mtable', []], + ['mtd', []], + ['mtext', []], + ['mtr', []], + ['munder', []], + ['munderover', []], + ['semantics', []] + ] satisfies [string, string[]][] + ).map( + ([tag, attributes]) => + [tag, Object.freeze({ attributes: Object.freeze(new Set(attributes)) })] satisfies [ + string, + Readonly<{ attributes: ReadonlySet }> + ] + ) + ) +); + export default function useTransformHTMLContent(): (documentFragment: DocumentFragment) => DocumentFragment { const [{ codeBlockCopyButton: codeBlockCopyButtonClassName }] = useStyleSet(); const [codeBlockCopyButtonTagName] = useCodeBlockCopyButtonTagName(); @@ -20,6 +103,7 @@ export default function useTransformHTMLContent(): (documentFragment: DocumentFr return useCallback( documentFragment => transform({ + allowedTags: DEFAULT_ALLOWED_TAGS, codeBlockCopyButtonAltCopied, codeBlockCopyButtonAltCopy, codeBlockCopyButtonClassName,