Skip to content

Commit

Permalink
Markdown: whitelist aria-label for curly brackets (#4095)
Browse files Browse the repository at this point in the history
* Markdown: allowlist aria-label attribute

* Add snapshot

* Update entry

* Fix tests
  • Loading branch information
compulim authored Mar 2, 2022
1 parent ae89df6 commit 5f46f23
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Updated verbiage from "transcript" to "chat history"
- Fixed overlapping hit zone causing clicking on bottom edge of message bubble may focus on the next activity instead
- Fixed typings of `useFocus` and `useLocalizer`
- Fixes [#3165](https://github.com/microsoft/BotFramework-WebChat/issues/3165) and [#4094](https://github.com/microsoft/BotFramework-WebChat/issues/4094). Allowlist `aria-label` for links in Markdown and skip unrecognized attributes or invalid curly brackets, by [@compulim](https://github.com/compulim), in PR [#4095](https://github.com/microsoft/BotFramework-WebChat/pull/4095)

### Changed

Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
190 changes: 190 additions & 0 deletions __tests__/html/markdown.attributes.curlyBrackets.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
<!DOCTYPE html>
<html lang="en-US">
<head>
<link href="/assets/index.css" rel="stylesheet" type="text/css" />
<script crossorigin="anonymous" src="/test-harness.js"></script>
<script crossorigin="anonymous" src="/test-page-object.js"></script>
<script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
</head>
<body>
<div id="webchat"></div>
<script>
run(async function () {
await host.windowSize(undefined, 1440, document.getElementById('webchat'));

expect.extend({
toHaveAttributes(received, attributes) {
for (const [name, value] of Object.entries(attributes)) {
if (!value) {
expect(received.hasAttribute(name)).toBeTruthy();
} else {
expect(received.getAttribute(name)).toBe(value);
}
}

return { pass: true };
}
});

// GIVEN: Message with some HTML attributes in its Markdown.
WebChat.renderWebChat(
{
directLine: testHelpers.createDirectLineWithTranscript([
{
from: {
id: 'bot',
role: 'bot'
},
id: '00000',
text: [
'## Regressions',
'The followings are to protect against regressions:',
'- Hello {World}',
'- Hello {1}',
'## Supported',
'The followings are supported Markdown attributes:',
'- [Link](https://bing.com/){aria-label="This is a label"} should return `<a aria-label="This is a label">`',
'- [Link](https://bing.com/){aria-label=Hello} should return `<a aria-label="Hello">`',
'- [Link](https://bing.com/){aria-label=} should return `<a aria-label>`',
'- [Link](https://bing.com/){aria-label} should return `<a aria-label>`',
'- [Link](https://bing.com/){ aria-label=" This is a label with many whitespaces " } should return `<a aria-label=" This is a label with many whitespaces ">`',
'- [Link](https://bing.com/){aria-label=a"b"c} should return `<a aria-label="a"b"c">`',
'## Not recognized',
'The followings are not recognized as Markdown attributes and should left untouched:',
'- [Link](https://bing.com/){aria-label=This is ignored} should left untouched',
'- [Link](https://bing.com/){aria-label ="This is a label with whitespace before equal sign"} should left untouched',
'- [Link](https://bing.com/){.ignored} should left untouched',
'- [Link](https://bing.com/){onload="javascript:void()"} should left untouched',
'## Other cases',
'### Not processed by `markdown-it-attrs`',
'- {aria-label="123"} should left untouched',
'- {aria-label=} should left untouched',
'- {aria-label} should left untouched',
'### Processed by `markdown-it-attrs`',
'- Hello, *World*{aria-label="Emphasized"}!'
].join('\n\n'),
textFormat: 'markdown',
timestamp: '2000-01-23T12:34:56.12345Z',
type: 'message'
}
]),
store: testHelpers.createStore()
},
document.getElementById('webchat')
);

// WHEN: Rendered.
await pageConditions.uiConnected();
await pageConditions.minNumActivitiesShown(1);

const { children } = document.querySelector('.webchat__bubble__content .markdown');

const [
regressionItems,
supportedItems,
notRecognizedItems,
otherCasesNotProcessedItems,
otherCasesProcessedItems
] = [].map.call(document.querySelectorAll('.webchat__bubble__content .markdown ul'), list => [
...list.querySelectorAll('li')
]);

// THEN: Hello {World} and Hello {1} should be kept as-is.
expect(regressionItems.shift()).toHaveProperty('innerText', 'Hello {World}');
expect(regressionItems.shift()).toHaveProperty('innerText', 'Hello {1}');

const anchors = [].map.call(supportedItems, supportItem => supportItem.querySelector('a'));

// THEN: Valid curly brackets with only "aria-label" defined should be processed. The brackets should be removed from the text.

// [Link](https://bing.com/){aria-label="This is a label"} should return `<a aria-label="This is a label">
expect(supportedItems.shift()).toHaveProperty(
'innerText',
'Link should return <a aria-label="This is a label">'
);
expect(anchors.shift()).toHaveAttributes({ 'aria-label': 'This is a label' });

// [Link](https://bing.com/){aria-label=Hello} should return `<a aria-label="Hello">`
expect(supportedItems.shift()).toHaveProperty('innerText', 'Link should return <a aria-label="Hello">');
expect(anchors.shift()).toHaveAttributes({ 'aria-label': 'Hello' });

// [Link](https://bing.com/){aria-label=} should return `<a aria-label>`
expect(supportedItems.shift()).toHaveProperty('innerText', 'Link should return <a aria-label>');
expect(anchors.shift()).toHaveAttributes({ 'aria-label': 0 });

// [Link](https://bing.com/){aria-label} should return `<a aria-label>`
expect(supportedItems.shift()).toHaveProperty('innerText', 'Link should return <a aria-label>');
expect(anchors.shift()).toHaveAttributes({ 'aria-label': 0 });

// [Link](https://bing.com/){ aria-label=" This is a label with many whitespaces " } should return `<a aria-label=" This is a label with many whitespaces ">`
// Spaces before `aria-label` or after the last quote, is supported by `markdown-it-attrs`.
expect(supportedItems.shift()).toHaveProperty(
'innerText',
'Link should return <a aria-label=" This is a label with many whitespaces ">'
);
expect(anchors.shift()).toHaveAttributes({ 'aria-label': ' This is a label with many whitespaces ' });

// [Link](https://bing.com/){aria-label=a"b"c} should return `<a aria-label="a"b"c">`
// `aria-label=a"b"c` is supported by `markdown-it-attrs`, will turn into `aria-label="a&quot;b&quot;c"`.
expect(supportedItems.shift()).toHaveProperty('innerText', 'Link should return <a aria-label="a"b"c">');
expect(anchors.shift()).toHaveAttributes({ 'aria-label': 'a"b"c' });

// THEN: Invalid or unrecognized curly brackets should left untouched.
// In Web Chat, we only allow one and only `aria-label` attribute to be set in the attribute.
// If other attributes are detected, it is not in a valid form and is ignored.

// [Link](https://bing.com/){aria-label=This is ignored} should left untouched
// Although "aria-label=This" is recognized, "is ignored" is not valid. The whole bracelet is ignored.
expect(notRecognizedItems.shift()).toHaveProperty(
'innerText',
'Link{aria-label=This is ignored} should left untouched'
);

// [Link](https://bing.com/){aria-label ="This is a label with whitespace before equal sign"} should left untouched
// A space between or after the equal sign (=) is not valid in `markdown-it-attrs`.
expect(notRecognizedItems.shift()).toHaveProperty(
'innerText',
'Link{aria-label =“This is a label with whitespace before equal sign”} should left untouched'
);
expect(notRecognizedItems.shift()).toHaveProperty('innerText', 'Link{.ignored} should left untouched');

// [Link](https://bing.com/){.ignored} should left untouched
// Class is not supported in Web Chat.
expect(notRecognizedItems[0].querySelector('a').hasAttribute('onload')).toBe(false);

// [Link](https://bing.com/){onload="javascript:void()"} should left untouched
// "onload" is not a recognized attribute.
expect(notRecognizedItems.shift()).toHaveProperty(
'innerText',
'Link{onload=“javascript:void()”} should left untouched'
);

// THEN: Curly brackets not processed by `markdown-it-attrs` should left untouched.

// {aria-label="123"} should left untouched
// Although the bracelet is valid, it is not processed by `markdown-it-attrs`.
expect(otherCasesNotProcessedItems.shift()).toHaveProperty(
'innerText',
'{aria-label=“123”} should left untouched'
);

// {aria-label=} should left untouched
// Although the bracelet is valid, it is not processed by `markdown-it-attrs`.
expect(otherCasesNotProcessedItems.shift()).toHaveProperty('innerText', '{aria-label=} should left untouched');

// {aria-label} should left untouched
// Although the bracelet is valid, it is not processed by `markdown-it-attrs`.
expect(otherCasesNotProcessedItems.shift()).toHaveProperty('innerText', '{aria-label} should left untouched');

// THEN: Curly brackets processed by `markdown-it-attrs` should be removed.

// Hello, *World*{aria-label="Emphasized"}!
// Although the bracelet is processed, sanitize rules in Web Chat do not allow setting attribute to emphasis (or anything other than <a>).
expect(otherCasesProcessedItems[0].querySelector('em')).toHaveProperty('outerHTML', '<em>World</em>');
expect(otherCasesProcessedItems.shift()).toHaveProperty('innerText', 'Hello, World!');

await host.snapshot();
});
</script>
</body>
</html>
5 changes: 5 additions & 0 deletions __tests__/html/markdown.attributes.curlyBrackets.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/** @jest-environment ./packages/test/harness/src/host/jest/WebDriverEnvironment.js */

describe('Markdown', () => {
test('should process valid aria-label attributes set through curly brackets', () => runHTML('markdown.attributes.curlyBrackets.html'));
});
39 changes: 37 additions & 2 deletions packages/bundle/src/renderMarkdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,16 @@ const TRANSPARENT_GIF = '
// This is used for parsing Markdown for external links.
const internalMarkdownIt = new MarkdownIt();

const MARKDOWN_ATTRS_LEFT_DELIMITER = '⟬';
// Make sure the delimiter is free from any RegExp characters, such as *, ?, etc.
// eslint-disable-next-line security/detect-non-literal-regexp
const MARKDOWN_ATTRS_LEFT_DELIMITER_PATTERN = new RegExp(MARKDOWN_ATTRS_LEFT_DELIMITER, 'gu');

const MARKDOWN_ATTRS_RIGHT_DELIMITER = '⟭';
// Make sure the delimiter is free from any RegExp characters, such as *, ?, etc.
// eslint-disable-next-line security/detect-non-literal-regexp
const MARKDOWN_ATTRS_RIGHT_DELIMITER_PATTERN = new RegExp(MARKDOWN_ATTRS_RIGHT_DELIMITER, 'gu');

export default function render(
markdown: string,
{ markdownRespectCRLF }: { markdownRespectCRLF: boolean },
Expand All @@ -66,14 +76,35 @@ export default function render(
markdown = markdown.replace(/\n\r|\r\n/gu, carriageReturn => (carriageReturn === '\n\r' ? '\r\n' : '\n\r'));
}

const html = new MarkdownIt({
// Related to #3165.
// We only support attributes "aria-label" and should leave other attributes as-is.
// However, `markdown-it-attrs` remove unrecognized attributes, such as {hello}.
// Before passing to `markdown-it-attrs`, we will convert known attributes from {aria-label="..."} into ⟬aria-label="..."⟭ (using white tortoise shell brackets).
// Then, we ask `markdown-it-attrs` to only process the new brackets, so it should only try to process things that we allowlisted.
// Lastly, we revert tortoise shell brackets back to curly brackets, for unprocessed attributes.
markdown = markdown
.replace(/\{\s*aria-label()\s*\}/giu, `${MARKDOWN_ATTRS_LEFT_DELIMITER}aria-label${MARKDOWN_ATTRS_RIGHT_DELIMITER}`)
.replace(
/\{\s*aria-label=("[^"]*"|[^\s}]*)\s*\}/giu,
(_, valueInsideQuotes) =>
`${MARKDOWN_ATTRS_LEFT_DELIMITER}aria-label=${valueInsideQuotes}${MARKDOWN_ATTRS_RIGHT_DELIMITER}`
);

let html = new MarkdownIt({
breaks: false,
html: false,
linkify: true,
typographer: true,
xhtmlOut: true
})
.use(markdownItAttrs)
.use(markdownItAttrs, {
// `markdown-it-attrs` is added for accessibility and allow bot developers to specify `aria-label`.
// We are allowlisting `aria-label` only as it is allowlisted in `sanitize-html`.
// Other `aria-*` will be sanitized even we allowlisted here.
allowedAttributes: ['aria-label'],
leftDelimiter: MARKDOWN_ATTRS_LEFT_DELIMITER,
rightDelimiter: MARKDOWN_ATTRS_RIGHT_DELIMITER
})
.use(iterator, 'url_new_win', 'link_open', (tokens, index) => {
const token = tokens[+index];

Expand All @@ -97,6 +128,10 @@ export default function render(
})
.render(markdown);

// Restore attributes not processed by `markdown-it-attrs`.
// TODO: [P2] #2511 After we fixed our polyfill story, we should use "String.prototype.replaceAll" instead of RegExp for replace all occurrences.
html = html.replace(MARKDOWN_ATTRS_LEFT_DELIMITER_PATTERN, '{').replace(MARKDOWN_ATTRS_RIGHT_DELIMITER_PATTERN, '}');

// The signature from "sanitize-html" module is not correct.
// @ts-ignore
return sanitizeHTML(html, SANITIZE_HTML_OPTIONS);
Expand Down

0 comments on commit 5f46f23

Please sign in to comment.