Skip to content
This repository has been archived by the owner on Jun 26, 2020. It is now read-only.

Support for pasting images #28

Merged
merged 29 commits into from
Nov 27, 2018
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
450a5ef
Use RTF data to replace images local paths with their base64 represen…
f1ames Oct 19, 2018
db862fe
Tests: Pasting images from Word, inital tests.
f1ames Oct 19, 2018
c6e3cfd
Tests: Pasting images from Word - more complex tests added.
f1ames Oct 24, 2018
74a141a
Image pasting filter refactoring.
f1ames Oct 24, 2018
7eedf1c
Tests: Image pasting model content integration tests.
f1ames Oct 24, 2018
f90efa2
Tests: Updated fixtures with data generated from latest MS Word.
f1ames Oct 26, 2018
a2c233b
Tests: Added browser specific fixtures.
f1ames Oct 26, 2018
60c4baa
Tests: Reorganized image test directory.
f1ames Oct 26, 2018
56267fa
Support for pasting images from Word in Safari added.
f1ames Oct 29, 2018
14ae502
Tests: Pasting images in Safari normalization tests.
f1ames Oct 29, 2018
83cb4f8
Tests: Fixtures for Safari image integration tests added.
f1ames Nov 2, 2018
67e0e2f
Tests: Integration tests for pasting images in Safari added.
f1ames Nov 2, 2018
5bbbd31
Docs: Rewording.
f1ames Nov 5, 2018
a598a6b
Merge branch 't/12' into t/21
f1ames Nov 5, 2018
eadaf03
Adjustments to recent API changes. Use 'ViewWriter' consistently acro…
f1ames Nov 5, 2018
73215d9
Tests: Added missing 'image' package to deps.
f1ames Nov 5, 2018
1b553ba
Tests: Skip failing Chrome and Firefox test on Edge.
f1ames Nov 5, 2018
3dddb02
Tests: Make blob fixtures names consistent.
f1ames Nov 5, 2018
006735b
Merge branch 'master' into t/21
f1ames Nov 6, 2018
f16286a
Tests: Refactoring and 100% cc.
f1ames Nov 6, 2018
c052f00
Tests: Imports fix.
f1ames Nov 6, 2018
06ac2f9
Merge branch 'master' into t/21
f1ames Nov 6, 2018
19cdcdc
Merge branch 'master' into t/21
f1ames Nov 14, 2018
f4c2d2f
Pasted image blobs should be handled in 'UploadImage' plugin.
f1ames Nov 20, 2018
0f1ea89
Merge branch 'master' into t/21
Reinmar Nov 23, 2018
bb6e5d7
Enabled CS in the manual sample.
Reinmar Nov 23, 2018
5827a97
Updated dev deps.
f1ames Nov 26, 2018
c33342d
Docs: Mention issues fixed by space normalization. [skip ci]
f1ames Nov 26, 2018
97ba106
Refactoring.
f1ames Nov 27, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@
},
"devDependencies": {
"@ckeditor/ckeditor5-basic-styles": "^10.0.3",
"@ckeditor/ckeditor5-cloud-services": "^10.1.0",
"@ckeditor/ckeditor5-easy-image": "^10.0.3",
"@ckeditor/ckeditor5-editor-classic": "^11.0.1",
"@ckeditor/ckeditor5-enter": "^10.1.2",
"@ckeditor/ckeditor5-heading": "^10.1.0",
"@ckeditor/ckeditor5-image": "^11.0.0",
"@ckeditor/ckeditor5-link": "^10.0.4",
"@ckeditor/ckeditor5-list": "^11.0.2",
"@ckeditor/ckeditor5-paragraph": "^10.0.3",
Expand Down
206 changes: 206 additions & 0 deletions src/filters/image.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
/**
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved.
* For licensing, see LICENSE.md.
*/

/**
* @module paste-from-office/filters/image
*/

import ViewMatcher from '@ckeditor/ckeditor5-engine/src/view/matcher';
import UpcastWriter from '@ckeditor/ckeditor5-engine/src/view/upcastwriter';

import { convertHexToBase64 } from './utils';

/**
* Replaces source attribute of all `<img>` elements representing regular
* images (not the Word shapes) with inlined base64 image representation extracted from RTF or Blob data.
*
* @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment on which transform images.
* @param {String} rtfData The RTF data from which images representation will be used.
*/
export function replaceImagesSourceWithBase64( documentFragment, rtfData ) {
if ( !documentFragment.childCount ) {
return;
}

const upcastWriter = new UpcastWriter();
const shapesIds = findAllShapesIds( documentFragment, upcastWriter );

removeAllImgElementsRepresentingShapes( shapesIds, documentFragment, upcastWriter );
removeAllShapeElements( documentFragment, upcastWriter );

const images = findAllImageElementsWithLocalSource( documentFragment, upcastWriter );

if ( images.length ) {
replaceImagesFileSourceWithInlineRepresentation( images, extractImageDataFromRtf( rtfData ), upcastWriter );
}
}

// Finds all shapes (`<v:*>...</v:*>`) ids. Shapes can represent images (canvas)
// or Word shapes (which does not have RTF or Blob representation).
//
// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment
// from which to extract shape ids.
// @param {module:engine/view/upcastwriter~UpcastWriter} writer
// @returns {Array.<String>} Array of shape ids.
function findAllShapesIds( documentFragment, writer ) {
const range = writer.createRangeIn( documentFragment );

const shapeElementsMatcher = new ViewMatcher( {
name: /v:(.+)/
} );

const shapesIds = [];

for ( const value of range ) {
const el = value.item;
const prevSiblingName = el.previousSibling && el.previousSibling.name || null;

// If shape element have 'o:gfxdata' attribute and is not directly before `<v:shapetype>` element it means it represent Word shape.
if ( shapeElementsMatcher.match( el ) && el.getAttribute( 'o:gfxdata' ) && prevSiblingName !== 'v:shapetype' ) {
shapesIds.push( value.item.getAttribute( 'id' ) );
}
}

return shapesIds;
}

// Removes all `<img>` elements which represents Word shapes and not regular images.
//
// @param {Array.<String>} shapesIds Shape ids which will be checked against `<img>` elements.
// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment from which to remove `<img>` elements.
// @param {module:engine/view/upcastwriter~UpcastWriter} writer
function removeAllImgElementsRepresentingShapes( shapesIds, documentFragment, writer ) {
const range = writer.createRangeIn( documentFragment );

const imageElementsMatcher = new ViewMatcher( {
name: 'img'
} );

const imgs = [];

for ( const value of range ) {
if ( imageElementsMatcher.match( value.item ) ) {
const el = value.item;
const shapes = el.getAttribute( 'v:shapes' ) ? el.getAttribute( 'v:shapes' ).split( ' ' ) : [];

if ( shapes.length && shapes.every( shape => shapesIds.indexOf( shape ) > -1 ) ) {
imgs.push( el );
// Shapes may also have empty source while content is paste in some browsers (Safari).
} else if ( !el.getAttribute( 'src' ) ) {
imgs.push( el );
}
}
}

for ( const img of imgs ) {
writer.remove( img );
}
}

// Removes all shape elements (`<v:*>...</v:*>`) so they do not pollute the output structure.
//
// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment from which to remove shape elements.
// @param {module:engine/view/upcastwriter~UpcastWriter} writer
function removeAllShapeElements( documentFragment, writer ) {
const range = writer.createRangeIn( documentFragment );

const shapeElementsMatcher = new ViewMatcher( {
name: /v:(.+)/
} );

const shapes = [];

for ( const value of range ) {
if ( shapeElementsMatcher.match( value.item ) ) {
shapes.push( value.item );
}
}

for ( const shape of shapes ) {
writer.remove( shape );
}
}

// Finds all `<img>` elements in a given document fragment which have source pointing to local `file://` resource.
//
// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment in which to look for `<img>` elements.
// @param {module:engine/view/upcastwriter~UpcastWriter} writer
// @returns {Object} result All found images grouped by source type.
// @returns {Array.<module:engine/view/element~Element>} result.file Array of found `<img>` elements with `file://` source.
// @returns {Array.<module:engine/view/element~Element>} result.blob Array of found `<img>` elements with `blob:` source.
function findAllImageElementsWithLocalSource( documentFragment, writer ) {
const range = writer.createRangeIn( documentFragment );

const imageElementsMatcher = new ViewMatcher( {
name: 'img'
} );

const imgs = [];

for ( const value of range ) {
if ( imageElementsMatcher.match( value.item ) ) {
if ( value.item.getAttribute( 'src' ).indexOf( 'file://' ) === 0 ) {
f1ames marked this conversation as resolved.
Show resolved Hide resolved
imgs.push( value.item );
}
}
}

return imgs;
}

// Extracts all images HEX representations from a given RTF data.
//
// @param {String} rtfData The RTF data from which to extract images HEX representation.
// @returns {Array.<Object>} Array of found HEX representations. Each array item is an object containing:
//
// * {String} hex Image representation in HEX format.
// * {string} type Type of image, `image/png` or `image/jpeg`.
function extractImageDataFromRtf( rtfData ) {
if ( !rtfData ) {
return [];
}

const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\*\\blipuid\s?[\da-fA-F]+)?[\s}]*?/;
const regexPicture = new RegExp( '(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g' );
const images = rtfData.match( regexPicture );
const result = [];

if ( images ) {
for ( const image of images ) {
let imageType = false;

if ( image.indexOf( '\\pngblip' ) !== -1 ) {
f1ames marked this conversation as resolved.
Show resolved Hide resolved
imageType = 'image/png';
} else if ( image.indexOf( '\\jpegblip' ) !== -1 ) {
f1ames marked this conversation as resolved.
Show resolved Hide resolved
imageType = 'image/jpeg';
}

if ( imageType ) {
result.push( {
hex: image.replace( regexPictureHeader, '' ).replace( /[^\da-fA-F]/g, '' ),
type: imageType
} );
}
}
}

return result;
}

// Replaces `src` attribute value of all given images with the corresponding base64 image representation.
//
// @param {Array.<module:engine/view/element~Element>} imageElements Array of image elements which will have its source replaced.
// @param {Array.<Object>} imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function).
// The array should be the same length as `imageElements` parameter.
// @param {module:engine/view/upcastwriter~UpcastWriter} writer
function replaceImagesFileSourceWithInlineRepresentation( imageElements, imagesHexSources, writer ) {
// Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order.
if ( imageElements.length === imagesHexSources.length ) {
for ( let i = 0; i < imageElements.length; i++ ) {
const newSrc = `data:${ imagesHexSources[ i ].type };base64,${ convertHexToBase64( imagesHexSources[ i ].hex ) }`;
writer.setAttribute( 'src', newSrc, imageElements[ i ] );
}
}
}
25 changes: 11 additions & 14 deletions src/filters/list.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,19 @@ import UpcastWriter from '@ckeditor/ckeditor5-engine/src/view/upcastwriter';
*
* @param {module:engine/view/documentfragment~DocumentFragment} documentFragment The view structure which to transform.
* @param {String} stylesString Styles from which list-like elements styling will be extracted.
* @param {module:engine/view/view~View} view
*/
export function transformListItemLikeElementsIntoLists( documentFragment, stylesString, view ) {
export function transformListItemLikeElementsIntoLists( documentFragment, stylesString ) {
if ( !documentFragment.childCount ) {
return;
}

const itemLikeElements = findAllItemLikeElements( documentFragment, view );
const writer = new UpcastWriter();
const itemLikeElements = findAllItemLikeElements( documentFragment, writer );

if ( !itemLikeElements.length ) {
return;
}

const writer = new UpcastWriter();

let currentList = null;

itemLikeElements.forEach( ( itemLikeElement, i ) => {
Expand All @@ -45,7 +43,7 @@ export function transformListItemLikeElementsIntoLists( documentFragment, styles
currentList = insertNewEmptyList( listStyle, itemLikeElement.element, writer );
}

const listItem = transformElementIntoListItem( itemLikeElement.element, writer, view );
const listItem = transformElementIntoListItem( itemLikeElement.element, writer );

writer.appendChild( listItem, currentList );
} );
Expand All @@ -55,15 +53,15 @@ export function transformListItemLikeElementsIntoLists( documentFragment, styles
//
// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment
// in which to look for list-like nodes.
// @param {module:engine/view/view~View} view
// @param {module:engine/view/upcastwriter~UpcastWriter} writer
// @returns {Array.<Object>} Array of found list-like items. Each item is an object containing:
//
// * {module:engine/src/view/element~Element} element List-like element.
// * {Number} id List item id parsed from `mso-list` style (see `getListItemData()` function).
// * {Number} order List item creation order parsed from `mso-list` style (see `getListItemData()` function).
// * {Number} indent List item indentation level parsed from `mso-list` style (see `getListItemData()` function).
function findAllItemLikeElements( documentFragment, view ) {
const range = view.createRangeIn( documentFragment );
function findAllItemLikeElements( documentFragment, writer ) {
const range = writer.createRangeIn( documentFragment );

// Matcher for finding list-like elements.
const itemLikeElementsMatcher = new Matcher( {
Expand Down Expand Up @@ -156,8 +154,8 @@ function insertNewEmptyList( listStyle, element, writer ) {
// @param {module:engine/view/upcastwriter~UpcastWriter} writer
// @returns {module:engine/view/element~Element} New element to which the given one was transformed. It is
// inserted in place of the old element (the reference to the old element is lost due to renaming).
function transformElementIntoListItem( element, writer, view ) {
removeBulletElement( element, writer, view );
function transformElementIntoListItem( element, writer ) {
removeBulletElement( element, writer );

return writer.rename( 'li', element );
}
Expand Down Expand Up @@ -194,8 +192,7 @@ function getListItemData( element ) {
//
// @param {module:engine/view/element~Element} element
// @param {module:engine/view/upcastwriter~UpcastWriter} writer
// @param {module:engine/view/view~View} view
function removeBulletElement( element, writer, view ) {
function removeBulletElement( element, writer ) {
// Matcher for finding `span` elements holding lists numbering/bullets.
const bulletMatcher = new Matcher( {
name: 'span',
Expand All @@ -204,7 +201,7 @@ function removeBulletElement( element, writer, view ) {
}
} );

const range = view.createRangeIn( element );
const range = writer.createRangeIn( element );

for ( const value of range ) {
if ( value.type === 'elementStart' && bulletMatcher.match( value.item ) ) {
Expand Down
3 changes: 3 additions & 0 deletions src/filters/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ import { normalizeSpacing, normalizeSpacerunSpans } from './space';
export function parseHtml( htmlString ) {
const domParser = new DOMParser();

// Remove Word specific "if comments" so content inside is not omitted by the parser.
htmlString = htmlString.replace( /<!--\[if gte vml 1]>/g, '' );

const normalizedHtml = normalizeSpacing( cleanContentAfterBody( htmlString ) );

// Parse htmlString as native Document object.
Expand Down
4 changes: 3 additions & 1 deletion src/filters/space.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@
* Replaces last space preceding elements closing tag with `&nbsp;`. Such operation prevents spaces from being removed
* during further DOM/View processing (see especially {@link module:engine/view/domconverter~DomConverter#_processDataFromDomText}).
* This method also takes into account Word specific `<o:p></o:p>` empty tags.
* Additionally multiline sequences of spaces and new lines between tags are removed (see #39 and #40).
*
* @param {String} htmlString HTML string in which spacing should be normalized.
* @returns {String} Input HTML with spaces normalized.
*/
export function normalizeSpacing( htmlString ) {
return normalizeSafariSpaceSpans( normalizeSafariSpaceSpans( htmlString ) ) // Run normalization two times to cover nested spans.
.replace( / <\//g, '\u00A0</' )
.replace( / <o:p><\/o:p>/g, '\u00A0<o:p></o:p>' );
.replace( / <o:p><\/o:p>/g, '\u00A0<o:p></o:p>' )
.replace( />(\s*(\r\n?|\n)\s*)+</g, '><' );
}

/**
Expand Down
22 changes: 22 additions & 0 deletions src/filters/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/**
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved.
* For licensing, see LICENSE.md.
*/

/**
* @module paste-from-office/filters/utils
*/

/* globals btoa */

/**
* Converts given HEX string to base64 representation.
*
* @param {String} hexString The HEX string to be converted.
* @returns {String} Base64 representation of a given HEX string.
*/
export function convertHexToBase64( hexString ) {
f1ames marked this conversation as resolved.
Show resolved Hide resolved
return btoa( hexString.match( /\w{2}/g ).map( char => {
return String.fromCharCode( parseInt( char, 16 ) );
} ).join( '' ) );
}
10 changes: 7 additions & 3 deletions src/pastefromoffice.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import Clipboard from '@ckeditor/ckeditor5-clipboard/src/clipboard';

import { parseHtml } from './filters/parse';
import { transformListItemLikeElementsIntoLists } from './filters/list';
import { replaceImagesSourceWithBase64 } from './filters/image';

/**
* The Paste from Office plugin.
Expand Down Expand Up @@ -41,7 +42,7 @@ export default class PasteFromOffice extends Plugin {
const html = data.dataTransfer.getData( 'text/html' );

if ( isWordInput( html ) ) {
data.content = this._normalizeWordInput( html );
data.content = this._normalizeWordInput( html, data.dataTransfer );
}
}, { priority: 'high' } );
}
Expand All @@ -53,11 +54,14 @@ export default class PasteFromOffice extends Plugin {
*
* @protected
* @param {String} input Word input.
* @param {module:clipboard/datatransfer~DataTransfer} dataTransfer Data transfer instance.
* @returns {module:engine/view/documentfragment~DocumentFragment} Normalized input.
*/
_normalizeWordInput( input ) {
_normalizeWordInput( input, dataTransfer ) {
const { body, stylesString } = parseHtml( input );
transformListItemLikeElementsIntoLists( body, stylesString, this.editor.editing.view );

transformListItemLikeElementsIntoLists( body, stylesString );
replaceImagesSourceWithBase64( body, dataTransfer.getData( 'text/rtf' ) );

return body;
}
Expand Down
Binary file not shown.
Loading