From 6465043fd908ad600396c4834f351f51e17e99c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Sat, 19 Mar 2022 12:19:11 +0000 Subject: [PATCH] Revert "refactor: Remove deprecated `normalizeWhitespace` option (#614)" This reverts commit e15248bc82bba7de16abd81c784f7eb8970a5a2b. --- readme.md | 71 +++++++++++++++++++ src/__fixtures__/16-normalize_whitespace.json | 47 ++++++++++++ src/index.ts | 27 ++++++- 3 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 src/__fixtures__/16-normalize_whitespace.json diff --git a/readme.md b/readme.md index 82f64960..781eb380 100644 --- a/readme.md +++ b/readme.md @@ -76,6 +76,77 @@ When the parser is used in a non-streaming fashion, `endIndex` is an integer indicating the position of the end of the node in the document. The default value is `false`. +## Option: `normalizeWhitespace` _(deprecated)_ + +Replace all whitespace with single spaces. +The default value is `false`. + +**Note:** Enabling this might break your markup. + +For the following examples, this HTML will be used: + +```html +
this is the text
+``` + +### Example: `normalizeWhitespace: true` + +```javascript +[ + { + type: "tag", + name: "font", + children: [ + { + data: " ", + type: "text", + }, + { + type: "tag", + name: "br", + }, + { + data: "this is the text ", + type: "text", + }, + { + type: "tag", + name: "font", + }, + ], + }, +]; +``` + +### Example: `normalizeWhitespace: false` + +```javascript +[ + { + type: "tag", + name: "font", + children: [ + { + data: "\n\t", + type: "text", + }, + { + type: "tag", + name: "br", + }, + { + data: "this is the text\n", + type: "text", + }, + { + type: "tag", + name: "font", + }, + ], + }, +]; +``` + --- License: BSD-2-Clause diff --git a/src/__fixtures__/16-normalize_whitespace.json b/src/__fixtures__/16-normalize_whitespace.json new file mode 100644 index 00000000..a9d79b70 --- /dev/null +++ b/src/__fixtures__/16-normalize_whitespace.json @@ -0,0 +1,47 @@ +{ + "name": "Normalize whitespace", + "options": { + "normalizeWhitespace": true + }, + "html": "Line one\n
\t \r\n\f
\nline two
x
", + "expected": [ + { + "data": "Line one ", + "type": "text" + }, + { + "type": "tag", + "name": "br", + "attribs": {} + }, + { + "data": " ", + "type": "text" + }, + { + "type": "tag", + "name": "br", + "attribs": {} + }, + { + "data": " line two", + "type": "text" + }, + { + "type": "tag", + "name": "font", + "attribs": {}, + "children": [ + { + "type": "tag", + "name": "br", + "attribs": {} + }, + { + "data": " x ", + "type": "text" + } + ] + } + ] +} diff --git a/src/index.ts b/src/index.ts index 3dd38bda..ec86896d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -12,6 +12,8 @@ import { export * from "./node"; +const reWhitespace = /\s+/g; + export interface DomHandlerOptions { /** * Add a `startIndex` property to nodes. @@ -31,6 +33,16 @@ export interface DomHandlerOptions { */ withEndIndices?: boolean; + /** + * Replace all whitespace with single spaces. + * + * **Note:** Enabling this might break your markup. + * + * @default false + * @deprecated + */ + normalizeWhitespace?: boolean; + /** * Treat the markup as XML. * @@ -41,6 +53,7 @@ export interface DomHandlerOptions { // Default options const defaultOpts: DomHandlerOptions = { + normalizeWhitespace: false, withStartIndices: false, withEndIndices: false, xmlMode: false, @@ -153,14 +166,26 @@ export class DomHandler { } public ontext(data: string): void { + const { normalizeWhitespace } = this.options; const { lastNode } = this; if (lastNode && lastNode.type === ElementType.Text) { - lastNode.data += data; + if (normalizeWhitespace) { + lastNode.data = (lastNode.data + data).replace( + reWhitespace, + " " + ); + } else { + lastNode.data += data; + } if (this.options.withEndIndices) { lastNode.endIndex = this.parser!.endIndex; } } else { + if (normalizeWhitespace) { + data = data.replace(reWhitespace, " "); + } + const node = new Text(data); this.addNode(node); this.lastNode = node;