-
Notifications
You must be signed in to change notification settings - Fork 18
Support for pasting flat lists #7
Changes from 23 commits
6a2f58c
3138aa7
8d3f491
9069ff0
dc40c3d
b71c6c6
a64bcdf
17b8fa8
13eccae
681da17
10b2890
054f704
ff66a56
3d105fc
3bee569
8595645
4e5f298
db5cc8e
9a28f5b
f702773
c88136b
4107f66
a2b9ddf
1154c56
0173707
8954a84
4408c1b
ffec498
84a87ef
7389dfe
1671ea0
9ffb255
c374a26
9811665
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
/** | ||
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved. | ||
* For licensing, see LICENSE.md. | ||
*/ | ||
|
||
/** | ||
* @module pastefromoffice/filters/list | ||
*/ | ||
|
||
import Element from '@ckeditor/ckeditor5-engine/src/view/element'; | ||
import Matcher from '@ckeditor/ckeditor5-engine/src/view/matcher'; | ||
import Position from '@ckeditor/ckeditor5-engine/src/view/position'; | ||
import TreeWalker from '@ckeditor/ckeditor5-engine/src/view/treewalker'; | ||
import UpcastWriter from '@ckeditor/ckeditor5-engine/src/view/upcastwriter'; | ||
|
||
/** | ||
* Transforms Word specific list-like elements to the semantic HTML lists. | ||
* | ||
* Lists in Word are represented by block elements with special attributes like: | ||
* | ||
* <p class=MsoListParagraphCxSpFirst style='mso-list:l1 level1 lfo1'>...</p> // Paragraph based list. | ||
* <h1 style='mso-list:l0 level1 lfo1'>...</h1> // Heading 1 based list. | ||
* | ||
* @param {module:engine/view/node~Node|module:engine/view/documentfragment~DocumentFragment} bodyView The view | ||
* structure which to transform. | ||
* @returns {module:engine/view/node~Node|module:engine/view/documentfragment~DocumentFragment} The view | ||
* structure instance with list-like elements transformed into semantic lists. | ||
*/ | ||
export function paragraphsToLists( bodyView, stylesString ) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Our naming convention required the use of a verb in a function/method name. |
||
const firstChild = bodyView.getChild( 0 ); | ||
|
||
if ( firstChild ) { | ||
const listNodes = findAllListNodes( Position.createBefore( firstChild ) ); | ||
createLists( listNodes, stylesString ); | ||
} | ||
|
||
return bodyView; | ||
} | ||
|
||
// Writer used for View elements manipulation. | ||
const writer = new UpcastWriter(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd prefer if we didn't create any instances on module level. That's negligible at this scale but, in general, it increases memory consumption and initial loading time (all these instances are created on first script execution). So far, we avoided creating these instances and it'd be good to continue this. |
||
|
||
// Matcher for finding list-like elements. | ||
const listMatcher = new Matcher( { | ||
name: /^p|h\d+$/, | ||
styles: { | ||
'mso-list': /.*/ | ||
} | ||
} ); | ||
|
||
// Matcher for finding `span` elements holding lists numbering/bullets. | ||
const listBulletMatcher = new Matcher( { | ||
name: 'span', | ||
styles: { | ||
'mso-list': 'Ignore' | ||
} | ||
} ); | ||
|
||
// Finds all list-like nodes starting from a given position. | ||
// | ||
// @param {module:engine/src/view/position~Position} startPosition Position from which to start looking. | ||
// @returns {Array.<Object>} Array of found list items. Each item is an object containing: | ||
// | ||
// * {module:engine/src/view/element~Element} element List-like element. | ||
// * {Number} id List item id parsed from `mso-list` style (see `getListItemData()` function). | ||
// * {Number} order List item creation order parsed from `mso-list` style (see `getListItemData()` function). | ||
// * {Number} indent List item indentation level parsed from `mso-list` style (see `getListItemData()` function). | ||
function findAllListNodes( startPosition ) { | ||
const treeWalker = new TreeWalker( { startPosition, ignoreElementEnd: true } ); | ||
|
||
// Find all list nodes. | ||
const listNodes = []; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Blank line missing (yep, there's a ticket to improve ESLint's config but there was some kind of a problem). |
||
for ( const value of treeWalker ) { | ||
if ( value.type === 'elementStart' && listMatcher.match( value.item ) ) { | ||
const itemData = getListItemData( value.item ); | ||
|
||
listNodes.push( { | ||
element: value.item, | ||
id: itemData.id, | ||
order: itemData.order, | ||
indent: itemData.indent | ||
} ); | ||
} | ||
} | ||
|
||
return listNodes; | ||
} | ||
|
||
// Transforms given list-like nodes into semantic lists. As the function operates on a provided | ||
// {module:engine/src/view/element~Element elements}, it will modify the view structure to which those list elements belongs. | ||
// | ||
// @param {Array.<Object>} listItems Array containing list items data. Usually it is the output of `findAllListNodes()` function. | ||
// @param {String} styles CSS styles which may contain additional data about lists format. | ||
function createLists( listItems, styles ) { | ||
if ( listItems.length ) { | ||
let currentList = null; | ||
let previousListItem = null; | ||
|
||
for ( const listItem of listItems ) { | ||
const listNode = listItem.element; | ||
|
||
if ( !previousListItem || previousListItem.id !== listItem.id ) { | ||
const listStyle = findListType( listItem, styles ); | ||
currentList = new Element( listStyle.type ); | ||
writer.insertChild( listNode.parent.getChildIndex( listNode ), currentList, listNode.parent ); | ||
} | ||
|
||
removeBulletElement( listNode ); | ||
|
||
writer.appendChild( listNode, currentList ); | ||
writer.rename( 'li', listNode ); | ||
|
||
previousListItem = listItem; | ||
} | ||
} | ||
} | ||
|
||
// Extracts list information from Word specific list style like: | ||
// | ||
// `style="mso-list:l1 level1 lfo1"` | ||
// | ||
// where: | ||
// | ||
// * `l1` is a list id (all elements with the same id belongs to the same list), | ||
// * `level1` is a list item indentation level, | ||
// * `lfo1` is a list insertion order in a document. | ||
// | ||
// @param {module:engine/view/element~Element} element List-like element from which data is extracted. | ||
// @returns {Object} result | ||
// @returns {Number} result.id List id. | ||
// @returns {Number} result.order List creation order. | ||
// @returns {Number} result.indent List indentation level. | ||
function getListItemData( element ) { | ||
const data = {}; | ||
const listStyle = element.getStyle( 'mso-list' ); | ||
|
||
if ( listStyle ) { | ||
data.id = parseInt( listStyle.match( /(^|\s+)l(\d+)/i )[ 2 ] ); | ||
data.order = parseInt( listStyle.match( /\s*lfo(\d+)/i )[ 1 ] ); | ||
data.indent = parseInt( listStyle.match( /\s*level(\d+)/i )[ 1 ] ); | ||
} | ||
|
||
return data; | ||
} | ||
|
||
// Checks list item style based on a provided CSS. | ||
// | ||
// List item style is extracted from CSS stylesheet. Each list with its specific style attribute value (`mso-list:l1 level1 lfo1`) | ||
// has its dedicated properties in a CSS stylesheet defined with a selector like: | ||
// | ||
// @list l1:level1 { ... } | ||
// | ||
// It contains `mso-level-number-format` property which defines list numbering/bullet style. If this property | ||
// is not defined it means default `decimal` numbering. | ||
// | ||
// Here CSS string representation is used as `mso-level-number-format` is invalid CSS property which gets removed during parsing. | ||
// | ||
// @param {Object} listItem List item for which list style will be searched for. | ||
// @param {String} styles CSS stylesheet. | ||
// @returns {Object} result | ||
// @returns {String} result.type Type of the list, could be `ul` or `ol`. | ||
// @returns {String} result.style List style like `decimal`, `lower-roman`, etc. It is passed directly from Word stylesheet | ||
// so may be not compatible with CSS `list-style-type` accepted values. | ||
function findListType( listItem, styles ) { | ||
const listStyleRegexp = new RegExp( `@list l${ listItem.id }:level${ listItem.indent }\\s*({[^}]*)`, 'gi' ); | ||
const listStyleTypeRegex = /mso-level-number-format:([^;]*);/gi; | ||
|
||
const listStyleMatch = listStyleRegexp.exec( styles ); | ||
|
||
let listStyleType = 'decimal'; // Decimal is default one. | ||
if ( listStyleMatch && listStyleMatch[ 1 ] ) { | ||
const listStyleTypeMatch = listStyleTypeRegex.exec( listStyleMatch[ 1 ] ); | ||
|
||
if ( listStyleTypeMatch && listStyleTypeMatch[ 1 ] ) { | ||
listStyleType = listStyleTypeMatch[ 1 ].trim(); | ||
} | ||
} | ||
|
||
return { | ||
type: listStyleType !== 'bullet' && listStyleType !== 'image' ? 'ol' : 'ul', | ||
style: listStyleType | ||
}; | ||
} | ||
|
||
// Removes span with a numbering/bullet from the given list element. | ||
// | ||
// @param {module:engine/view/element~Element} listElement | ||
function removeBulletElement( listElement ) { | ||
const treeWalker = new TreeWalker( { startPosition: Position.createBefore( listElement.getChild( 0 ) ), ignoreElementEnd: true } ); | ||
|
||
for ( const value of treeWalker ) { | ||
if ( value.type === 'elementStart' && listBulletMatcher.match( value.item ) ) { | ||
writer.remove( value.item ); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/** | ||
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved. | ||
* For licensing, see LICENSE.md. | ||
*/ | ||
|
||
/** | ||
* @module pastefromoffice/filters/utils | ||
*/ | ||
|
||
/* globals DOMParser */ | ||
|
||
import DomConverter from '@ckeditor/ckeditor5-engine/src/view/domconverter'; | ||
import { NBSP_FILLER } from '@ckeditor/ckeditor5-engine/src/view/filler'; | ||
|
||
const domParser = new DOMParser(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, to be created when needed. |
||
const domConverter = new DomConverter( { blockFiller: NBSP_FILLER } ); | ||
|
||
/** | ||
* Parses provided HTML extracting contents of `body` and `style` tags. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
* | ||
* @param {String} htmlString HTML string to be parsed. | ||
* @returns {Object} result | ||
* @returns {module:engine/view/documentfragment~DocumentFragment} result.body Parsed body | ||
* content as a traversable structure. | ||
* @returns {String} result.bodyString Entire body content as a string. | ||
* @returns {Array.<CSSStyleSheet>} result.styles Array of native `CSSStyleSheet` objects, each representing | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you try to build docs? JSDoc may complain about this. We'd need to add it to the known types there. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
* separate `style` tag from the source HTML. | ||
* @returns {String} result.stylesString All `style` tags contents combined in the order of occurrence into one string. | ||
*/ | ||
export function parseHtml( htmlString ) { | ||
// Parse htmlString as native Document object. | ||
const htmlDocument = domParser.parseFromString( htmlString, 'text/html' ); | ||
|
||
// Get `innerHTML` first as transforming to View modifies the source document. | ||
const bodyString = htmlDocument.body.innerHTML; | ||
|
||
// Transform document.body to View. | ||
const bodyView = documentToView( htmlDocument ); | ||
|
||
// Extract stylesheets. | ||
const stylesObject = extractStyles( htmlDocument ); | ||
|
||
return { | ||
body: bodyView, | ||
bodyString, | ||
styles: stylesObject.styles, | ||
stylesString: stylesObject.stylesString | ||
}; | ||
} | ||
|
||
// Transforms native `Document` object into {@link module:engine/view/documentfragment~DocumentFragment}. | ||
// | ||
// @param {Document} htmlDocument Native `Document` object to be transformed. | ||
// @returns {module:engine/view/documentfragment~DocumentFragment} | ||
function documentToView( htmlDocument ) { | ||
const fragment = htmlDocument.createDocumentFragment(); | ||
const nodes = htmlDocument.body.childNodes; | ||
|
||
while ( nodes.length > 0 ) { | ||
fragment.appendChild( nodes[ 0 ] ); | ||
} | ||
|
||
return domConverter.domToView( fragment ); | ||
} | ||
|
||
// Extracts both `CSSStyleSheet` and string representation from all `style` elements available in a provided `htmlDocument`. | ||
// | ||
// @param {Document} htmlDocument Native `Document` object from which styles will be extracted. | ||
// @returns {Object} result | ||
// @returns {Array.<CSSStyleSheet>} result.styles Array of native `CSSStyleSheet` object, each representing | ||
// separate `style` tag from the source object. | ||
// @returns {String} result.stylesString All `style` tags contents combined in the order of occurrence as one string. | ||
function extractStyles( htmlDocument ) { | ||
const styles = []; | ||
const stylesString = []; | ||
|
||
for ( const el of htmlDocument.all ) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure how I get to this, not using |
||
if ( el.tagName.toLowerCase() === 'style' && el.sheet && el.sheet.rules && el.sheet.rules.length ) { | ||
styles.push( el.sheet ); | ||
stylesString.push( el.innerHTML ); | ||
} | ||
} | ||
|
||
return { | ||
styles, | ||
stylesString: stylesString.join( ' ' ) | ||
}; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/** | ||
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved. | ||
* For licensing, see LICENSE.md. | ||
*/ | ||
|
||
/** | ||
* @module pastefromoffice/pastefromoffice | ||
*/ | ||
|
||
import Plugin from '@ckeditor/ckeditor5-core/src/plugin'; | ||
import Clipboard from '@ckeditor/ckeditor5-clipboard/src/clipboard'; | ||
|
||
import { parseHtml } from './filters/utils'; | ||
import { paragraphsToLists } from './filters/list'; | ||
|
||
/** | ||
* This plugin handles content pasted from Word and transforms it (if necessary) | ||
* to format suitable for editor {@link module:engine/model/model~Model}. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
* | ||
* @extends module:core/plugin~Plugin | ||
*/ | ||
export default class PasteFromOffice extends Plugin { | ||
/** | ||
* @inheritDoc | ||
*/ | ||
static get pluginName() { | ||
return 'PasteFromOffice'; | ||
} | ||
|
||
/** | ||
* @inheritDoc | ||
*/ | ||
init() { | ||
const editor = this.editor; | ||
|
||
this.listenTo( editor.plugins.get( Clipboard ), 'inputTransformation', ( evt, data ) => { | ||
const html = data.dataTransfer.getData( 'text/html' ); | ||
|
||
if ( isWordInput( html ) ) { | ||
data.content = this._normalizeWordInput( html ); | ||
} | ||
}, { priority: 'high' } ); | ||
} | ||
|
||
/** | ||
* Normalizes input pasted from Word to format suitable for editor {@link module:engine/model/model~Model}. | ||
* | ||
* **Notice**: this function was exposed mainly for testing purposes and should not be called directly. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. s/Notice/Note |
||
* | ||
* @protected | ||
* @param {String} input Word input. | ||
* @returns {module:engine/view/documentfragment~DocumentFragment} Normalized input. | ||
*/ | ||
_normalizeWordInput( input ) { | ||
const { body, stylesString } = parseHtml( input ); | ||
const normalizedInput = paragraphsToLists( body, stylesString ); | ||
|
||
return normalizedInput; | ||
} | ||
} | ||
|
||
// Checks if given HTML string is a result of pasting content from Word. | ||
// | ||
// @param {String} html HTML string to test. | ||
// @returns {Boolean} True if given HTML string is a Word HTML. | ||
function isWordInput( html ) { | ||
return !!( html && html.match( /<meta\s*name="?generator"?\s*content="?microsoft\s*word\s*\d+"?\/?>/gi ) ); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PS. I forgot to add – you don't need to import the tree walker. You can use
Range#getWalker()
or, if you don't need to set any walker params, simply iterate over the range.