diff --git a/package.json b/package.json index 1c7941d22..964dc8471 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,8 @@ "plugin:prettier/recommended" ], "rules": { + "unicorn/no-hex-escape": "off", + "no-control-regex": "off", "import/no-unassigned-import": "off", "unicorn/filename-case": "off", "no-await-in-loop": "off", diff --git a/packages/saber-plugin-feed/lib/index.js b/packages/saber-plugin-feed/lib/index.js index 8e01d242d..8953fa06a 100644 --- a/packages/saber-plugin-feed/lib/index.js +++ b/packages/saber-plugin-feed/lib/index.js @@ -1,6 +1,6 @@ const path = require('path') const { Feed } = require('feed') -const { getFeedPath, resolveURL } = require('./utils') +const { getFeedPath, resolveURL, removeXMLInvalidChars } = require('./utils') const ID = 'generate-feed' @@ -69,7 +69,7 @@ exports.apply = (api, options = {}) => { // Strip HTML tags in excerpt and use it as description (a.k.a. summary) description: page.excerpt && page.excerpt.replace(/<(?:.|\n)*?>/gm, ''), - content, + content: removeXMLInvalidChars(content), date: page.updatedAt, published: page.createdAt }) diff --git a/packages/saber-plugin-feed/lib/utils.js b/packages/saber-plugin-feed/lib/utils.js index fed56222e..1ffb0f3e4 100644 --- a/packages/saber-plugin-feed/lib/utils.js +++ b/packages/saber-plugin-feed/lib/utils.js @@ -26,3 +26,34 @@ exports.getFeedPath = (feedPath, defaultPath) => { exports.resolveURL = (base, pathname) => { return new URL(pathname, base).href } + +/** + * Removes XML-invalid characters from a string. + * reference: https://www.ryadel.com/en/javascript-remove-xml-invalid-chars-characters-string-utf8-unicode-regex/ + * @param {string} string - a string potentially containing XML-invalid characters, such as non-UTF8 characters, STX, EOX and so on. + * @param {boolean} removeDiscouragedChars - a string potentially containing XML-invalid characters, such as non-UTF8 characters, STX, EOX and so on. + * @return : a sanitized string without all the XML-invalid characters. + */ +exports.removeXMLInvalidChars = (string, removeDiscouragedChars = true) => { + // remove everything forbidden by XML 1.0 specifications, plus the unicode replacement character U+FFFD + let regex = /((?:[\0-\x08\x0B\f\x0E-\x1F\uFFFD\uFFFE\uFFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]))/g + string = string.replace(regex, '') + + if (removeDiscouragedChars) { + // remove everything not suggested by XML 1.0 specifications + regex = new RegExp( + '([\\x7F-\\x84]|[\\x86-\\x9F]|[\\uFDD0-\\uFDEF]|(?:\\uD83F[\\uDFFE\\uDFFF])|(?:\\uD87F[\\uDF' + + 'FE\\uDFFF])|(?:\\uD8BF[\\uDFFE\\uDFFF])|(?:\\uD8FF[\\uDFFE\\uDFFF])|(?:\\uD93F[\\uDFFE\\uD' + + 'FFF])|(?:\\uD97F[\\uDFFE\\uDFFF])|(?:\\uD9BF[\\uDFFE\\uDFFF])|(?:\\uD9FF[\\uDFFE\\uDFFF])' + + '|(?:\\uDA3F[\\uDFFE\\uDFFF])|(?:\\uDA7F[\\uDFFE\\uDFFF])|(?:\\uDABF[\\uDFFE\\uDFFF])|(?:\\' + + 'uDAFF[\\uDFFE\\uDFFF])|(?:\\uDB3F[\\uDFFE\\uDFFF])|(?:\\uDB7F[\\uDFFE\\uDFFF])|(?:\\uDBBF' + + '[\\uDFFE\\uDFFF])|(?:\\uDBFF[\\uDFFE\\uDFFF])(?:[\\0-\\t\\x0B\\f\\x0E-\\u2027\\u202A-\\uD7FF\\' + + 'uE000-\\uFFFF]|[\\uD800-\\uDBFF][\\uDC00-\\uDFFF]|[\\uD800-\\uDBFF](?![\\uDC00-\\uDFFF])|' + + '(?:[^\\uD800-\\uDBFF]|^)[\\uDC00-\\uDFFF]))', + 'g' + ) + string = string.replace(regex, '') + } + + return string +}