diff --git a/CHANGELOG.md b/CHANGELOG.md index 750283365d..7bf3d02a53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -59,6 +59,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Updated verbiage from "transcript" to "chat history" - Fixed overlapping hit zone causing clicking on bottom edge of message bubble may focus on the next activity instead - Fixed typings of `useFocus` and `useLocalizer` +- Fixes [#3165](https://github.com/microsoft/BotFramework-WebChat/issues/3165) and [#4094](https://github.com/microsoft/BotFramework-WebChat/issues/4094). Allowlist `aria-label` for links in Markdown and skip unrecognized attributes or invalid curly brackets, by [@compulim](https://github.com/compulim), in PR [#4095](https://github.com/microsoft/BotFramework-WebChat/pull/4095) ### Changed diff --git a/__tests__/__image_snapshots__/html/markdown-attributes-curly-brackets-js-markdown-should-process-valid-aria-label-attributes-set-through-curly-brackets-1-snap.png b/__tests__/__image_snapshots__/html/markdown-attributes-curly-brackets-js-markdown-should-process-valid-aria-label-attributes-set-through-curly-brackets-1-snap.png new file mode 100644 index 0000000000..6d540dd9b5 Binary files /dev/null and b/__tests__/__image_snapshots__/html/markdown-attributes-curly-brackets-js-markdown-should-process-valid-aria-label-attributes-set-through-curly-brackets-1-snap.png differ diff --git a/__tests__/html/markdown.attributes.curlyBrackets.html b/__tests__/html/markdown.attributes.curlyBrackets.html new file mode 100644 index 0000000000..82dcc3b612 --- /dev/null +++ b/__tests__/html/markdown.attributes.curlyBrackets.html @@ -0,0 +1,190 @@ + + +
+ + + + + + + + + + diff --git a/__tests__/html/markdown.attributes.curlyBrackets.js b/__tests__/html/markdown.attributes.curlyBrackets.js new file mode 100644 index 0000000000..069972f007 --- /dev/null +++ b/__tests__/html/markdown.attributes.curlyBrackets.js @@ -0,0 +1,5 @@ +/** @jest-environment ./packages/test/harness/src/host/jest/WebDriverEnvironment.js */ + +describe('Markdown', () => { + test('should process valid aria-label attributes set through curly brackets', () => runHTML('markdown.attributes.curlyBrackets.html')); +}); diff --git a/packages/bundle/src/renderMarkdown.ts b/packages/bundle/src/renderMarkdown.ts index d73bf83552..dc9ee27955 100644 --- a/packages/bundle/src/renderMarkdown.ts +++ b/packages/bundle/src/renderMarkdown.ts @@ -57,6 +57,16 @@ const TRANSPARENT_GIF = ' // This is used for parsing Markdown for external links. const internalMarkdownIt = new MarkdownIt(); +const MARKDOWN_ATTRS_LEFT_DELIMITER = '⟬'; +// Make sure the delimiter is free from any RegExp characters, such as *, ?, etc. +// eslint-disable-next-line security/detect-non-literal-regexp +const MARKDOWN_ATTRS_LEFT_DELIMITER_PATTERN = new RegExp(MARKDOWN_ATTRS_LEFT_DELIMITER, 'gu'); + +const MARKDOWN_ATTRS_RIGHT_DELIMITER = '⟭'; +// Make sure the delimiter is free from any RegExp characters, such as *, ?, etc. +// eslint-disable-next-line security/detect-non-literal-regexp +const MARKDOWN_ATTRS_RIGHT_DELIMITER_PATTERN = new RegExp(MARKDOWN_ATTRS_RIGHT_DELIMITER, 'gu'); + export default function render( markdown: string, { markdownRespectCRLF }: { markdownRespectCRLF: boolean }, @@ -66,14 +76,35 @@ export default function render( markdown = markdown.replace(/\n\r|\r\n/gu, carriageReturn => (carriageReturn === '\n\r' ? '\r\n' : '\n\r')); } - const html = new MarkdownIt({ + // Related to #3165. + // We only support attributes "aria-label" and should leave other attributes as-is. + // However, `markdown-it-attrs` remove unrecognized attributes, such as {hello}. + // Before passing to `markdown-it-attrs`, we will convert known attributes from {aria-label="..."} into ⟬aria-label="..."⟭ (using white tortoise shell brackets). + // Then, we ask `markdown-it-attrs` to only process the new brackets, so it should only try to process things that we allowlisted. + // Lastly, we revert tortoise shell brackets back to curly brackets, for unprocessed attributes. + markdown = markdown + .replace(/\{\s*aria-label()\s*\}/giu, `${MARKDOWN_ATTRS_LEFT_DELIMITER}aria-label${MARKDOWN_ATTRS_RIGHT_DELIMITER}`) + .replace( + /\{\s*aria-label=("[^"]*"|[^\s}]*)\s*\}/giu, + (_, valueInsideQuotes) => + `${MARKDOWN_ATTRS_LEFT_DELIMITER}aria-label=${valueInsideQuotes}${MARKDOWN_ATTRS_RIGHT_DELIMITER}` + ); + + let html = new MarkdownIt({ breaks: false, html: false, linkify: true, typographer: true, xhtmlOut: true }) - .use(markdownItAttrs) + .use(markdownItAttrs, { + // `markdown-it-attrs` is added for accessibility and allow bot developers to specify `aria-label`. + // We are allowlisting `aria-label` only as it is allowlisted in `sanitize-html`. + // Other `aria-*` will be sanitized even we allowlisted here. + allowedAttributes: ['aria-label'], + leftDelimiter: MARKDOWN_ATTRS_LEFT_DELIMITER, + rightDelimiter: MARKDOWN_ATTRS_RIGHT_DELIMITER + }) .use(iterator, 'url_new_win', 'link_open', (tokens, index) => { const token = tokens[+index]; @@ -97,6 +128,10 @@ export default function render( }) .render(markdown); + // Restore attributes not processed by `markdown-it-attrs`. + // TODO: [P2] #2511 After we fixed our polyfill story, we should use "String.prototype.replaceAll" instead of RegExp for replace all occurrences. + html = html.replace(MARKDOWN_ATTRS_LEFT_DELIMITER_PATTERN, '{').replace(MARKDOWN_ATTRS_RIGHT_DELIMITER_PATTERN, '}'); + // The signature from "sanitize-html" module is not correct. // @ts-ignore return sanitizeHTML(html, SANITIZE_HTML_OPTIONS);