From 6465043fd908ad600396c4834f351f51e17e99c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Sat, 19 Mar 2022 12:19:11 +0000
Subject: [PATCH] Revert "refactor: Remove deprecated `normalizeWhitespace`
option (#614)"
This reverts commit e15248bc82bba7de16abd81c784f7eb8970a5a2b.
---
readme.md | 71 +++++++++++++++++++
src/__fixtures__/16-normalize_whitespace.json | 47 ++++++++++++
src/index.ts | 27 ++++++-
3 files changed, 144 insertions(+), 1 deletion(-)
create mode 100644 src/__fixtures__/16-normalize_whitespace.json
diff --git a/readme.md b/readme.md
index 82f64960..781eb380 100644
--- a/readme.md
+++ b/readme.md
@@ -76,6 +76,77 @@ When the parser is used in a non-streaming fashion, `endIndex` is an integer
indicating the position of the end of the node in the document.
The default value is `false`.
+## Option: `normalizeWhitespace` _(deprecated)_
+
+Replace all whitespace with single spaces.
+The default value is `false`.
+
+**Note:** Enabling this might break your markup.
+
+For the following examples, this HTML will be used:
+
+```html
+
this is the text
+```
+
+### Example: `normalizeWhitespace: true`
+
+```javascript
+[
+ {
+ type: "tag",
+ name: "font",
+ children: [
+ {
+ data: " ",
+ type: "text",
+ },
+ {
+ type: "tag",
+ name: "br",
+ },
+ {
+ data: "this is the text ",
+ type: "text",
+ },
+ {
+ type: "tag",
+ name: "font",
+ },
+ ],
+ },
+];
+```
+
+### Example: `normalizeWhitespace: false`
+
+```javascript
+[
+ {
+ type: "tag",
+ name: "font",
+ children: [
+ {
+ data: "\n\t",
+ type: "text",
+ },
+ {
+ type: "tag",
+ name: "br",
+ },
+ {
+ data: "this is the text\n",
+ type: "text",
+ },
+ {
+ type: "tag",
+ name: "font",
+ },
+ ],
+ },
+];
+```
+
---
License: BSD-2-Clause
diff --git a/src/__fixtures__/16-normalize_whitespace.json b/src/__fixtures__/16-normalize_whitespace.json
new file mode 100644
index 00000000..a9d79b70
--- /dev/null
+++ b/src/__fixtures__/16-normalize_whitespace.json
@@ -0,0 +1,47 @@
+{
+ "name": "Normalize whitespace",
+ "options": {
+ "normalizeWhitespace": true
+ },
+ "html": "Line one\n
\t \r\n\f
\nline two
x ",
+ "expected": [
+ {
+ "data": "Line one ",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": " ",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": " line two",
+ "type": "text"
+ },
+ {
+ "type": "tag",
+ "name": "font",
+ "attribs": {},
+ "children": [
+ {
+ "type": "tag",
+ "name": "br",
+ "attribs": {}
+ },
+ {
+ "data": " x ",
+ "type": "text"
+ }
+ ]
+ }
+ ]
+}
diff --git a/src/index.ts b/src/index.ts
index 3dd38bda..ec86896d 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -12,6 +12,8 @@ import {
export * from "./node";
+const reWhitespace = /\s+/g;
+
export interface DomHandlerOptions {
/**
* Add a `startIndex` property to nodes.
@@ -31,6 +33,16 @@ export interface DomHandlerOptions {
*/
withEndIndices?: boolean;
+ /**
+ * Replace all whitespace with single spaces.
+ *
+ * **Note:** Enabling this might break your markup.
+ *
+ * @default false
+ * @deprecated
+ */
+ normalizeWhitespace?: boolean;
+
/**
* Treat the markup as XML.
*
@@ -41,6 +53,7 @@ export interface DomHandlerOptions {
// Default options
const defaultOpts: DomHandlerOptions = {
+ normalizeWhitespace: false,
withStartIndices: false,
withEndIndices: false,
xmlMode: false,
@@ -153,14 +166,26 @@ export class DomHandler {
}
public ontext(data: string): void {
+ const { normalizeWhitespace } = this.options;
const { lastNode } = this;
if (lastNode && lastNode.type === ElementType.Text) {
- lastNode.data += data;
+ if (normalizeWhitespace) {
+ lastNode.data = (lastNode.data + data).replace(
+ reWhitespace,
+ " "
+ );
+ } else {
+ lastNode.data += data;
+ }
if (this.options.withEndIndices) {
lastNode.endIndex = this.parser!.endIndex;
}
} else {
+ if (normalizeWhitespace) {
+ data = data.replace(reWhitespace, " ");
+ }
+
const node = new Text(data);
this.addNode(node);
this.lastNode = node;