Skip to content
This repository has been archived by the owner on Jun 26, 2020. It is now read-only.

Support for pasting flat lists #7

Merged
merged 34 commits into from
Oct 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
6a2f58c
Tests: lists integration tests.
f1ames Aug 13, 2018
3138aa7
Tests: Added PFW plugin to manual test.
f1ames Aug 21, 2018
8d3f491
Tests: Updated lists integration tests.
f1ames Aug 21, 2018
9069ff0
Paste from Word plugin added with flat lists support.
f1ames Aug 21, 2018
dc40c3d
Removed 'stringifyView' filter wrapper.
f1ames Aug 21, 2018
b71c6c6
Tests: common filters unit tests.
f1ames Aug 21, 2018
a64bcdf
Tests: list filter unit tests.
f1ames Aug 21, 2018
17b8fa8
Tests: general plugin tests.
f1ames Aug 22, 2018
13eccae
The 'bodyToView' filter return type adjustments.
f1ames Aug 22, 2018
681da17
Tests: updated tests structure.
f1ames Aug 22, 2018
10b2890
Tests: Moved integration tests to different directory.
f1ames Aug 22, 2018
054f704
Tests: Lists normalization unit tests.
f1ames Aug 22, 2018
ff66a56
Tests: list integration tests now validates input of 'insertContent()…
f1ames Aug 23, 2018
3d105fc
Improved docs.
f1ames Aug 23, 2018
3bee569
Merge branch 'master' into t/5
f1ames Aug 27, 2018
8595645
Adjustments to new 'Paste from Office' name.
f1ames Aug 28, 2018
4e5f298
Common filters combined as one 'parseHtml()' function.
f1ames Aug 28, 2018
db5cc8e
Tests: adjusted tests to new filters structure.
f1ames Aug 28, 2018
9a28f5b
Docs rewording.
f1ames Aug 28, 2018
f702773
'UpcastWriter' calls adjusted.
f1ames Aug 31, 2018
c88136b
Missing dev dependencies added.
f1ames Aug 31, 2018
4107f66
Transform content on 'inputTransformation' event instead of 'clipboar…
f1ames Sep 17, 2018
a2b9ddf
Tests: Adjusted tests to 'inputTransformation' event.
f1ames Sep 17, 2018
1154c56
Code and docs adjustments.
f1ames Sep 24, 2018
0173707
Tests: skip 4 failing unit test.
f1ames Sep 24, 2018
8954a84
Use 'cssRules' instead of 'rules' when processing styles.
f1ames Sep 24, 2018
4408c1b
Fix for 'TypeError: Object doesn't support property or method Symbol.…
f1ames Sep 24, 2018
ffec498
List filter refactoring.
f1ames Sep 25, 2018
84a87ef
Wording. [skip ci]
f1ames Sep 25, 2018
7389dfe
Tests: Unit test for empty style tag handling. Bring CC back to 100%.
f1ames Sep 25, 2018
1671ea0
Get rid of TreeWalker.
f1ames Sep 25, 2018
9ffb255
Other: Updated dev deps versions.
f1ames Oct 18, 2018
c374a26
Various improvements.
Reinmar Oct 25, 2018
9811665
Updated dependencies.
Reinmar Oct 25, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,20 @@
],
"dependencies": {},
"devDependencies": {
"@ckeditor/ckeditor5-basic-styles": "^10.0.2",
"@ckeditor/ckeditor5-clipboard": "^10.0.2",
"@ckeditor/ckeditor5-core": "^11.0.0",
"@ckeditor/ckeditor5-editor-classic": "^11.0.0",
"@ckeditor/ckeditor5-engine": "^10.2.0",
"@ckeditor/ckeditor5-enter": "^10.1.1",
"@ckeditor/ckeditor5-heading": "^10.0.2",
"@ckeditor/ckeditor5-paragraph": "^10.0.2",
"@ckeditor/ckeditor5-table": "^10.1.0",
"eslint": "^4.15.0",
"eslint-config-ckeditor5": "^1.0.7",
"@ckeditor/ckeditor5-basic-styles": "^10.0.3",
"@ckeditor/ckeditor5-clipboard": "^10.0.3",
"@ckeditor/ckeditor5-core": "^11.0.1",
"@ckeditor/ckeditor5-editor-classic": "^11.0.1",
"@ckeditor/ckeditor5-engine": "^11.0.0",
"@ckeditor/ckeditor5-enter": "^10.1.2",
"@ckeditor/ckeditor5-heading": "^10.1.0",
"@ckeditor/ckeditor5-link": "^10.0.4",
"@ckeditor/ckeditor5-list": "^11.0.2",
"@ckeditor/ckeditor5-paragraph": "^10.0.3",
"@ckeditor/ckeditor5-table": "^11.0.0",
"@ckeditor/ckeditor5-utils": "^11.0.0",
"eslint": "^5.5.0",
"eslint-config-ckeditor5": "^1.0.8",
"husky": "^0.14.3",
"lint-staged": "^7.0.0"
},
Expand Down
216 changes: 216 additions & 0 deletions src/filters/list.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/**
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved.
* For licensing, see LICENSE.md.
*/

/**
* @module paste-from-office/filters/list
*/

import Element from '@ckeditor/ckeditor5-engine/src/view/element';
import Matcher from '@ckeditor/ckeditor5-engine/src/view/matcher';
import Range from '@ckeditor/ckeditor5-engine/src/view/range';
import UpcastWriter from '@ckeditor/ckeditor5-engine/src/view/upcastwriter';

/**
* Transforms Word specific list-like elements to the semantic HTML lists.
*
* Lists in Word are represented by block elements with special attributes like:
*
* <p class=MsoListParagraphCxSpFirst style='mso-list:l1 level1 lfo1'>...</p> // Paragraph based list.
* <h1 style='mso-list:l0 level1 lfo1'>...</h1> // Heading 1 based list.
*
* @param {module:engine/view/documentfragment~DocumentFragment} documentFragment The view structure which to transform.
* @param {String} stylesString Styles from which list-like elements styling will be extracted.
*/
export function transformListItemLikeElementsIntoLists( documentFragment, stylesString ) {
if ( !documentFragment.childCount ) {
return;
}

const itemLikeElements = findAllItemLikeElements( documentFragment );

if ( !itemLikeElements.length ) {
return;
}

const writer = new UpcastWriter();

let currentList = null;

itemLikeElements.forEach( ( itemLikeElement, i ) => {
if ( !currentList || isNewListNeeded( itemLikeElements[ i - 1 ], itemLikeElement ) ) {
const listStyle = detectListStyle( itemLikeElement, stylesString );

currentList = insertNewEmptyList( listStyle, itemLikeElement.element, writer );
}

const listItem = transformElementIntoListItem( itemLikeElement.element, writer );

writer.appendChild( listItem, currentList );
} );
}

// Finds all list-like elements in a given document fragment.
//
// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment
// in which to look for list-like nodes.
// @returns {Array.<Object>} Array of found list-like items. Each item is an object containing:
//
// * {module:engine/src/view/element~Element} element List-like element.
// * {Number} id List item id parsed from `mso-list` style (see `getListItemData()` function).
// * {Number} order List item creation order parsed from `mso-list` style (see `getListItemData()` function).
// * {Number} indent List item indentation level parsed from `mso-list` style (see `getListItemData()` function).
function findAllItemLikeElements( documentFragment ) {
const range = Range.createIn( documentFragment );

// Matcher for finding list-like elements.
const itemLikeElementsMatcher = new Matcher( {
name: /^p|h\d+$/,
styles: {
'mso-list': /.*/
}
} );

const itemLikeElements = [];

for ( const value of range ) {
if ( value.type === 'elementStart' && itemLikeElementsMatcher.match( value.item ) ) {
const itemData = getListItemData( value.item );

itemLikeElements.push( {
element: value.item,
id: itemData.id,
order: itemData.order,
indent: itemData.indent
} );
}
}

return itemLikeElements;
}

// Extracts list item style from the provided CSS.
//
// List item style is extracted from CSS stylesheet. Each list with its specific style attribute
// value (`mso-list:l1 level1 lfo1`) has its dedicated properties in a CSS stylesheet defined with a selector like:
//
// @list l1:level1 { ... }
//
// It contains `mso-level-number-format` property which defines list numbering/bullet style. If this property
// is not defined it means default `decimal` numbering.
//
// Here CSS string representation is used as `mso-level-number-format` property is an invalid CSS property
// and will be removed during CSS parsing.
//
// @param {Object} listLikeItem List-like item for which list style will be searched for. Usually
// a result of `findAllItemLikeElements()` function.
// @param {String} stylesString CSS stylesheet.
// @returns {Object} result
// @returns {String} result.type List type, could be `ul` or `ol`.
// @returns {String} result.style List style, for example: `decimal`, `lower-roman`, etc. It is extracted
// directly from Word stylesheet without further processing and may be not compatible
// with CSS `list-style-type` property accepted values.
function detectListStyle( listLikeItem, stylesString ) {
const listStyleRegexp = new RegExp( `@list l${ listLikeItem.id }:level${ listLikeItem.indent }\\s*({[^}]*)`, 'gi' );
const listStyleTypeRegex = /mso-level-number-format:([^;]*);/gi;

const listStyleMatch = listStyleRegexp.exec( stylesString );

let listStyleType = 'decimal'; // Decimal is default one.
if ( listStyleMatch && listStyleMatch[ 1 ] ) {
const listStyleTypeMatch = listStyleTypeRegex.exec( listStyleMatch[ 1 ] );

if ( listStyleTypeMatch && listStyleTypeMatch[ 1 ] ) {
listStyleType = listStyleTypeMatch[ 1 ].trim();
}
}

return {
type: listStyleType !== 'bullet' && listStyleType !== 'image' ? 'ol' : 'ul',
style: listStyleType
};
}

// Creates empty list of a given type and inserts it after a specified element.
//
// @param {Object} listStyle List style object which determines the type of newly created list.
// Usually a result of `detectListStyle()` function.
// @param {module:engine/view/element~Element} element Element before which list is inserted.
// @param {module:engine/view/upcastwriter~UpcastWriter} writer
// @returns {module:engine/view/element~Element} Newly created list element.
function insertNewEmptyList( listStyle, element, writer ) {
const list = new Element( listStyle.type );
const position = element.parent.getChildIndex( element );

writer.insertChild( position, list, element.parent );

return list;
}

// Transforms given element into a semantic list item. As the function operates on a provided
// {module:engine/src/view/element~Element element} it will modify the view structure to which this element belongs.
//
// @param {module:engine/view/element~Element} element Element which will be transformed into list item.
// @param {module:engine/view/upcastwriter~UpcastWriter} writer
// @returns {module:engine/view/element~Element} New element to which the given one was transformed. It is
// inserted in place of the old element (the reference to the old element is lost due to renaming).
function transformElementIntoListItem( element, writer ) {
removeBulletElement( element, writer );

return writer.rename( 'li', element );
}

// Extracts list item information from Word specific list-like element style:
//
// `style="mso-list:l1 level1 lfo1"`
//
// where:
//
// * `l1` is a list id (all elements with the same id belongs to the same list),
// * `level1` is a list item indentation level,
// * `lfo1` is a list insertion order in a document.
//
// @param {module:engine/view/element~Element} element Element from which style data is extracted.
// @returns {Object} result
// @returns {Number} result.id Parent list id.
// @returns {Number} result.order List item creation order.
// @returns {Number} result.indent List item indentation level.
function getListItemData( element ) {
const data = {};
const listStyle = element.getStyle( 'mso-list' );

if ( listStyle ) {
data.id = parseInt( listStyle.match( /(^|\s+)l(\d+)/i )[ 2 ] );
data.order = parseInt( listStyle.match( /\s*lfo(\d+)/i )[ 1 ] );
data.indent = parseInt( listStyle.match( /\s*level(\d+)/i )[ 1 ] );
}

return data;
}

// Removes span with a numbering/bullet from a given element.
//
// @param {module:engine/view/element~Element} element
// @param {module:engine/view/upcastwriter~UpcastWriter} writer
function removeBulletElement( element, writer ) {
// Matcher for finding `span` elements holding lists numbering/bullets.
const bulletMatcher = new Matcher( {
name: 'span',
styles: {
'mso-list': 'Ignore'
}
} );

const range = Range.createIn( element );

for ( const value of range ) {
if ( value.type === 'elementStart' && bulletMatcher.match( value.item ) ) {
writer.remove( value.item );
}
}
}

function isNewListNeeded( previousItem, currentItem ) {
return previousItem.id !== currentItem.id;
}
89 changes: 89 additions & 0 deletions src/filters/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/**
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved.
* For licensing, see LICENSE.md.
*/

/**
* @module paste-from-office/filters/utils
*/

/* globals DOMParser */

import DomConverter from '@ckeditor/ckeditor5-engine/src/view/domconverter';
import { NBSP_FILLER } from '@ckeditor/ckeditor5-engine/src/view/filler';

/**
* Parses provided HTML extracting contents of `<body>` and `<style>` tags.
*
* @param {String} htmlString HTML string to be parsed.
* @returns {Object} result
* @returns {module:engine/view/documentfragment~DocumentFragment} result.body Parsed body
* content as a traversable structure.
* @returns {String} result.bodyString Entire body content as a string.
* @returns {Array.<CSSStyleSheet>} result.styles Array of native `CSSStyleSheet` objects, each representing
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you try to build docs? JSDoc may complain about this. We'd need to add it to the known types there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to be working fine. No errors during docs build.
image

* separate `style` tag from the source HTML.
* @returns {String} result.stylesString All `style` tags contents combined in the order of occurrence into one string.
*/
export function parseHtml( htmlString ) {
const domParser = new DOMParser();

// Parse htmlString as native Document object.
const htmlDocument = domParser.parseFromString( htmlString, 'text/html' );

// Get `innerHTML` first as transforming to View modifies the source document.
const bodyString = htmlDocument.body.innerHTML;

// Transform document.body to View.
const bodyView = documentToView( htmlDocument );

// Extract stylesheets.
const stylesObject = extractStyles( htmlDocument );

return {
body: bodyView,
bodyString,
styles: stylesObject.styles,
stylesString: stylesObject.stylesString
};
}

// Transforms native `Document` object into {@link module:engine/view/documentfragment~DocumentFragment}.
//
// @param {Document} htmlDocument Native `Document` object to be transformed.
// @returns {module:engine/view/documentfragment~DocumentFragment}
function documentToView( htmlDocument ) {
const domConverter = new DomConverter( { blockFiller: NBSP_FILLER } );
const fragment = htmlDocument.createDocumentFragment();
const nodes = htmlDocument.body.childNodes;

while ( nodes.length > 0 ) {
fragment.appendChild( nodes[ 0 ] );
}

return domConverter.domToView( fragment );
}

// Extracts both `CSSStyleSheet` and string representation from all `style` elements available in a provided `htmlDocument`.
//
// @param {Document} htmlDocument Native `Document` object from which styles will be extracted.
// @returns {Object} result
// @returns {Array.<CSSStyleSheet>} result.styles Array of native `CSSStyleSheet` object, each representing
// separate `style` tag from the source object.
// @returns {String} result.stylesString All `style` tags contents combined in the order of occurrence as one string.
function extractStyles( htmlDocument ) {
const styles = [];
const stylesString = [];
const styleTags = Array.from( htmlDocument.getElementsByTagName( 'style' ) );

for ( const style of styleTags ) {
if ( style.sheet && style.sheet.cssRules && style.sheet.cssRules.length ) {
styles.push( style.sheet );
stylesString.push( style.innerHTML );
}
}

return {
styles,
stylesString: stylesString.join( ' ' )
};
}
Loading