Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve support for pasting from Google Docs and Microsoft Word #3989

Merged
merged 1 commit into from
Feb 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

- **Clipboard** Convert newlines between inline elements to a space.
- **Clipboard** Avoid generating unsupported formats on paste.
- **Clipboard** Improve support for pasting from Google Docs and Microsoft Word.
- **Syntax** Support highlight.js v10 and v11.

# 2.0.0-beta.2
Expand Down
8 changes: 7 additions & 1 deletion packages/quill/src/modules/clipboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { DirectionAttribute, DirectionStyle } from '../formats/direction';
import { FontStyle } from '../formats/font';
import { SizeStyle } from '../formats/size';
import { deleteRange } from './keyboard';
import normalizeExternalHTML from './normalizeExternalHTML';

const debug = logger('quill:clipboard');

Expand Down Expand Up @@ -118,8 +119,13 @@ class Clipboard extends Module<ClipboardOptions> {
return delta;
}

convertHTML(html: string) {
protected normalizeHTML(doc: Document) {
normalizeExternalHTML(doc);
}

protected convertHTML(html: string) {
const doc = new DOMParser().parseFromString(html, 'text/html');
this.normalizeHTML(doc);
const container = doc.body;
const nodeMatches = new WeakMap();
const [elementMatchers, textMatchers] = this.prepareMatching(
Expand Down
14 changes: 14 additions & 0 deletions packages/quill/src/modules/normalizeExternalHTML/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import googleDocs from './normalizers/googleDocs';
import msWord from './normalizers/msWord';

const NORMALIZERS = [msWord, googleDocs];

const normalizeExternalHTML = (doc: Document) => {
if (doc.documentElement) {
NORMALIZERS.forEach((normalize) => {
normalize(doc);
});
}
};

export default normalizeExternalHTML;
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
const normalWeightRegexp = /font-weight:\s*normal/;
const blockTagNames = ['P', 'OL', 'UL'];

const isBlockElement = (element: Element | null) => {
return element && blockTagNames.includes(element.tagName);
};

const normalizeEmptyLines = (doc: Document) => {
Array.from(doc.querySelectorAll('br'))
.filter(
(br) =>
isBlockElement(br.previousElementSibling) &&
isBlockElement(br.nextElementSibling),
)
.forEach((br) => {
br.parentNode?.removeChild(br);
});
};

const normalizeFontWeight = (doc: Document) => {
Array.from(doc.querySelectorAll('b[style*="font-weight"]'))
.filter((node) => node.getAttribute('style')?.match(normalWeightRegexp))
.forEach((node) => {
const fragment = doc.createDocumentFragment();
fragment.append(...node.childNodes);
node.parentNode?.replaceChild(fragment, node);
});
};

export default function normalize(doc: Document) {
if (doc.querySelector('[id^="docs-internal-guid-"]')) {
normalizeFontWeight(doc);
normalizeEmptyLines(doc);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import _ from 'lodash';

const ignoreRegexp = /\bmso-list:[^;]*ignore/i;
const idRegexp = /\bmso-list:[^;]*\bl(\d+)/i;
const indentRegexp = /\bmso-list:[^;]*\blevel(\d+)/i;

const parseListItem = (element: Element, html: string) => {
const style = element.getAttribute('style');
const idMatch = style?.match(idRegexp);
if (!idMatch) {
return null;
}
const id = Number(idMatch[1]);

const indentMatch = style?.match(indentRegexp);
const indent = indentMatch ? Number(indentMatch[1]) : 1;

const typeRegexp = new RegExp(
`@list l${id}:level${indent}\\s*\\{[^\\}]*mso-level-number-format:\\s*([\\w-]+)`,
'i',
);
const typeMatch = html.match(typeRegexp);
const type = typeMatch && typeMatch[1] === 'bullet' ? 'bullet' : 'ordered';

return { id, indent, type, element };
};

// list items are represented as `p` tags with styles like `mso-list: l0 level1` where:
// 1. "0" in "l0" means the list item id;
// 2. "1" in "level1" means the indent level, starting from 1.
const normalizeListItem = (doc: Document) => {
const msoList = Array.from(doc.querySelectorAll('[style*=mso-list]'));
const [ignored, others] = _.partition(msoList, (node) =>
(node.getAttribute('style') || '').match(ignoreRegexp),
);

// Each list item contains a marker wrapped with "mso-list: Ignore".
ignored.forEach((node) => node.parentNode?.removeChild(node));

// The list stype is not defined inline with the tag, instead, it's in the
// style tag so we need to pass the html as a string.
const html = doc.documentElement.innerHTML;
const listItems = others
.map((element) => parseListItem(element, html))
.filter((parsed) => parsed);

while (listItems.length) {
const childListItems = [];

let current = listItems.shift();
// Group continuous items into the same group (aka "ul")
while (current) {
childListItems.push(current);
current =
listItems.length &&
listItems[0]?.element === current.element.nextElementSibling &&
// Different id means the next item doesn't belong to this group.
listItems[0].id === current.id
? listItems.shift()
: null;
}

const ul = document.createElement('ul');
childListItems.forEach((listItem) => {
const li = document.createElement('li');
li.setAttribute('data-list', listItem.type);
if (listItem.indent > 1) {
li.setAttribute('class', `ql-indent-${listItem.indent - 1}`);
}
li.innerHTML = listItem.element.innerHTML;
ul.appendChild(li);
});

const element = childListItems[0]?.element;
const { parentNode } = element ?? {};
if (element) {
parentNode?.replaceChild(ul, element);
}
childListItems.slice(1).forEach(({ element: e }) => {
parentNode?.removeChild(e);
});
}
};

export default function normalize(doc: Document) {
if (
doc.documentElement.getAttribute('xmlns:w') ===
'urn:schemas-microsoft-com:office:word'
) {
normalizeListItem(doc);
}
}
13 changes: 13 additions & 0 deletions packages/quill/test/unit/modules/clipboard.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -528,5 +528,18 @@ describe('Clipboard', () => {
});
expect(delta).toEqual(new Delta().insert(''));
});

test('Google Docs', () => {
const html = `<meta charset='utf-8'><meta charset="utf-8"><b style="font-weight:normal;" id="docs-internal-guid-6f072e08-7fff-e641-0fbc-7fe2846294a4"><p dir="ltr" style="line-height:1.38;margin-top:0pt;margin-bottom:0pt;"><span style="font-size:11pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">text</span></p><br /><ol style="margin-top:0;margin-bottom:0;padding-inline-start:48px;"><li dir="ltr" style="list-style-type:decimal;font-size:11pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;" aria-level="1"><p dir="ltr" style="line-height:1.38;margin-top:0pt;margin-bottom:0pt;" role="presentation"><span style="font-size:11pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">i1</span></p></li><li dir="ltr" style="list-style-type:decimal;font-size:11pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;" aria-level="1"><p dir="ltr" style="line-height:1.38;margin-top:0pt;margin-bottom:0pt;" role="presentation"><span style="font-size:11pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">i2</span></p></li><ol style="margin-top:0;margin-bottom:0;padding-inline-start:48px;"><li dir="ltr" style="list-style-type:lower-alpha;font-size:11pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;" aria-level="2"><p dir="ltr" style="line-height:1.38;margin-top:0pt;margin-bottom:0pt;" role="presentation"><span style="font-size:11pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:400;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">i3</span></p></li></ol></ol><p dir="ltr" style="line-height:1.38;margin-top:0pt;margin-bottom:0pt;"><span style="font-size:11pt;font-family:Arial,sans-serif;color:#000000;background-color:transparent;font-weight:700;font-style:normal;font-variant:normal;text-decoration:none;vertical-align:baseline;white-space:pre;white-space:pre-wrap;">text</span></p></b><br class="Apple-interchange-newline">`;
const delta = createClipboard().convert({ html });
expect(delta).toEqual(
new Delta()
.insert('text\n')
.insert('i1\ni2\n', { list: 'ordered' })
.insert('i3\n', { list: 'ordered', indent: 1 })
.insert('text', { bold: true })
.insert('\n'),
);
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { describe, expect, test } from 'vitest';
import normalize from '../../../../../src/modules/normalizeExternalHTML/normalizers/googleDocs';

describe('Google Docs', () => {
test('remove unnecessary b tags', () => {
const html = `
<b
style="font-weight: normal;"
id="docs-internal-guid-9f51ddb9-7fff-7da1-2cd6-e966f9297902"
>
<span>Item 1</span><b>Item 2</b>
</b>
<b
style="font-weight: bold;"
>Item 3</b>
`;

const doc = new DOMParser().parseFromString(html, 'text/html');
normalize(doc);
expect(doc.body.children).toMatchInlineSnapshot(`
HTMLCollection [
<span>
Item 1
</span>,
<b>
Item 2
</b>,
<b
style="font-weight: bold;"
>
Item 3
</b>,
]
`);
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { describe, expect, test } from 'vitest';
import normalize from '../../../../../src/modules/normalizeExternalHTML/normalizers/msWord';

describe('Microsoft Word', () => {
test('keep the list style', () => {
const html = `
<html xmlns:w="urn:schemas-microsoft-com:office:word">
<style>
@list l0:level3 { mso-level-number-format:bullet; }
@list l2:level1 { mso-level-number-format:alpha; }
</style>
<body>
<p style="mso-list: l0 level1 lfo1"><span style="mso-list: Ignore;">1. </span>item 1</p>
<p style="mso-list: l0 level3 lfo1">item 2</p>
<p style="mso-list: l1 level4 lfo1">item 3 in another list</p>
<p>Plain paragraph</p>
<p style="mso-list: l2 level1 lfo1">the last item</p>
</body>
</html>
`;

const doc = new DOMParser().parseFromString(html, 'text/html');
normalize(doc);
expect(doc.body.children).toMatchInlineSnapshot(`
HTMLCollection [
<ul>
<li
data-list="ordered"
>
item 1
</li>
<li
class="ql-indent-2"
data-list="bullet"
>
item 2
</li>
</ul>,
<ul>
<li
class="ql-indent-3"
data-list="ordered"
>
item 3 in another list
</li>
</ul>,
<p>
Plain paragraph
</p>,
<ul>
<li
data-list="ordered"
>
the last item
</li>
</ul>,
]
`);
});
});
Loading