Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix escape codeblock #2230

Merged
merged 6 commits into from
Aug 9, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions browser/components/MarkdownPreview.js
Original file line number Diff line number Diff line change
Expand Up @@ -444,13 +444,6 @@ export default class MarkdownPreview extends React.Component {
let { value, codeBlockTheme } = this.props

this.refs.root.contentWindow.document.body.setAttribute('data-theme', theme)

const codeBlocks = value.match(/(```)(.|[\n])*?(```)/g)
if (codeBlocks !== null) {
codeBlocks.forEach((codeBlock) => {
value = value.replace(codeBlock, htmlTextHelper.encodeEntities(codeBlock))
})
}
const renderedHTML = this.markdown.render(value)
attachmentManagement.migrateAttachments(renderedHTML, storagePath, noteKey)
this.refs.root.contentWindow.document.body.innerHTML = attachmentManagement.fixLocalURLS(renderedHTML, storagePath)
Expand Down
17 changes: 14 additions & 3 deletions browser/lib/markdown-it-sanitize-html.js
Original file line number Diff line number Diff line change
@@ -1,23 +1,34 @@
'use strict'

import sanitizeHtml from 'sanitize-html'
import { escapeHtmlCharacters } from './utils'

module.exports = function sanitizePlugin (md, options) {
options = options || {}

md.core.ruler.after('linkify', 'sanitize_inline', state => {
for (let tokenIdx = 0; tokenIdx < state.tokens.length; tokenIdx++) {
if (state.tokens[tokenIdx].type === 'html_block') {
state.tokens[tokenIdx].content = sanitizeHtml(state.tokens[tokenIdx].content, options)
state.tokens[tokenIdx].content = sanitizeHtml(
state.tokens[tokenIdx].content,
options
)
}
if (state.tokens[tokenIdx].type === 'fence') {
state.tokens[tokenIdx].content = state.tokens[tokenIdx].content.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;')
// escapeHtmlCharacters has better performance
state.tokens[tokenIdx].content = escapeHtmlCharacters(
state.tokens[tokenIdx].content,
{ skipSingleQuote: true }
)
}
if (state.tokens[tokenIdx].type === 'inline') {
const inlineTokens = state.tokens[tokenIdx].children
for (let childIdx = 0; childIdx < inlineTokens.length; childIdx++) {
if (inlineTokens[childIdx].type === 'html_inline') {
inlineTokens[childIdx].content = sanitizeHtml(inlineTokens[childIdx].content, options)
inlineTokens[childIdx].content = sanitizeHtml(
inlineTokens[childIdx].content,
options
)
}
}
}
Expand Down
63 changes: 56 additions & 7 deletions browser/lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,31 @@ export function lastFindInArray (array, callback) {
}
}

export function escapeHtmlCharacters (html, opt = { detectCodeBlock: false }) {
export function escapeHtmlCharacters (
html,
opt = { detectCodeBlock: false, skipSingleQuote: false }
) {
const matchHtmlRegExp = /["'&<>]/g
const matchCodeBlockRegExp = /```/g
const escapes = ['&quot;', '&amp;', '&#39;', '&lt;', '&gt;']
let match = null
const replaceAt = (str, index, replace) =>
str.substr(0, index) +
replace +
str.substr(index + replace.length - (replace.length - 1))

// detecting code block
while ((match = matchHtmlRegExp.exec(html)) != null) {
while ((match = matchHtmlRegExp.exec(html)) !== null) {
const current = { char: match[0], index: match.index }
const codeBlockIndexs = []
let openCodeBlock = null
// if the detectCodeBlock option is activated then this function should skip
// characters that needed to be escape but located in code block
if (opt.detectCodeBlock) {
// position of the nearest line start
// The first type of code block is lines that start with 4 spaces
// Here we check for the \n character located before the character that
// needed to be escape. It means we check for the begining of the line that
// contain that character, then we check if there are 4 spaces next to the
// \n character (the line start with 4 spaces)
let previousLineEnd = current.index - 1
while (html[previousLineEnd] !== '\n' && previousLineEnd !== -1) {
previousLineEnd--
Expand All @@ -31,16 +42,54 @@ export function escapeHtmlCharacters (html, opt = { detectCodeBlock: false }) {
html[previousLineEnd + 3] === ' ' &&
html[previousLineEnd + 4] === ' '
) {
// so skip it
// skip the current character
continue
}
// The second type of code block is lines that wrapped in ```
// We will get the position of each ```
// then push it into an array
// then the array returned will be like this:
// [startCodeblock, endCodeBlock, startCodeBlock, endCodeBlock]
while ((openCodeBlock = matchCodeBlockRegExp.exec(html)) !== null) {
codeBlockIndexs.push(openCodeBlock.index)
}
let shouldSkipChar = false
// we loop through the array of positions
// we skip 2 element as the i index position is the position of ``` that
// open the codeblock and the i + 1 is the position of the ``` that close
// the code block
for (let i = 0; i < codeBlockIndexs.length; i += 2) {
// the i index position is the position of the ``` that open code block
// so we have to + 2 as that position is the position of the first ` in the ````
// but we need to make sure that the position current character is larger
// that the last ` in the ``` that open the code block so we have to take
// the position of the first ` and + 2
// the i + 1 index position is the closing ``` so the char must less than it
if (
current.index > codeBlockIndexs[i] + 2 &&
current.index < codeBlockIndexs[i + 1]
) {
// skip it
shouldSkipChar = true
break
}
}
if (shouldSkipChar) {
// skip the current character
continue
}
}
// otherwise, escape it !!!
if (current.char === '&') {
// when escaping character & we have to be becareful as the & could be a part
// of an escaped character like &quot; will be came &amp;quot;
let nextStr = ''
let nextIndex = current.index
let escapedStr = false
// maximum length of an escape string is 5. For example ('&quot;')
// maximum length of an escaped string is 5. For example ('&quot;')
// we take the next 5 character of the next string if it is one of the string:
// ['&quot;', '&amp;', '&#39;', '&lt;', '&gt;'] then we will not escape the & character
// as it is a part of the escaped string and should not be escaped
while (nextStr.length <= 5) {
nextStr += html[nextIndex]
nextIndex++
Expand All @@ -55,7 +104,7 @@ export function escapeHtmlCharacters (html, opt = { detectCodeBlock: false }) {
}
} else if (current.char === '"') {
html = replaceAt(html, current.index, '&quot;')
} else if (current.char === "'") {
} else if (current.char === "'" && !opt.skipSingleQuote) {
html = replaceAt(html, current.index, '&#39;')
} else if (current.char === '<') {
html = replaceAt(html, current.index, '&lt;')
Expand Down
35 changes: 0 additions & 35 deletions package-lock.json

This file was deleted.

27 changes: 26 additions & 1 deletion tests/lib/escapeHtmlCharacters-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,38 @@ test('escapeHtmlCharacters should NOT skip code block if that option is NOT enab
t.is(actual, expected)
})

test('escapeHtmlCharacters should NOT escape & character if it\'s a part of an escaped character', t => {
test("escapeHtmlCharacters should NOT escape & character if it's a part of an escaped character", t => {
const input = 'Do not escape &amp; or &quot; but do escape &'
const expected = 'Do not escape &amp; or &quot; but do escape &amp;'
const actual = escapeHtmlCharacters(input)
t.is(actual, expected)
})

test('escapeHtmlCharacters should skip char if in code block', t => {
const input = `
\`\`\`
<dontescapeme>
\`\`\`
das<das>dasd
dasdasdasd
\`\`\`
<dontescapeme>
\`\`\`
`
const expected = `
\`\`\`
<dontescapeme>
\`\`\`
das&lt;das&gt;dasd
dasdasdasd
\`\`\`
<dontescapeme>
\`\`\`
`
const actual = escapeHtmlCharacters(input, { detectCodeBlock: true })
t.is(actual, expected)
})

test('escapeHtmlCharacters should return the correct result', t => {
const input = '& < > " \''
const expected = '&amp; &lt; &gt; &quot; &#39;'
Expand Down