diff --git a/package.json b/package.json index a6c6396..308d7c9 100644 --- a/package.json +++ b/package.json @@ -16,9 +16,12 @@ }, "devDependencies": { "@ckeditor/ckeditor5-basic-styles": "^10.0.3", + "@ckeditor/ckeditor5-cloud-services": "^10.1.0", + "@ckeditor/ckeditor5-easy-image": "^10.0.3", "@ckeditor/ckeditor5-editor-classic": "^11.0.1", "@ckeditor/ckeditor5-enter": "^10.1.2", "@ckeditor/ckeditor5-heading": "^10.1.0", + "@ckeditor/ckeditor5-image": "^11.0.0", "@ckeditor/ckeditor5-link": "^10.0.4", "@ckeditor/ckeditor5-list": "^11.0.2", "@ckeditor/ckeditor5-paragraph": "^10.0.3", diff --git a/src/filters/image.js b/src/filters/image.js new file mode 100644 index 0000000..5b114c3 --- /dev/null +++ b/src/filters/image.js @@ -0,0 +1,219 @@ +/** + * @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved. + * For licensing, see LICENSE.md. + */ + +/** + * @module paste-from-office/filters/image + */ + +/* globals btoa */ + +import ViewMatcher from '@ckeditor/ckeditor5-engine/src/view/matcher'; +import UpcastWriter from '@ckeditor/ckeditor5-engine/src/view/upcastwriter'; + +/** + * Replaces source attribute of all `` elements representing regular + * images (not the Word shapes) with inlined base64 image representation extracted from RTF or Blob data. + * + * @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment on which transform images. + * @param {String} rtfData The RTF data from which images representation will be used. + */ +export function replaceImagesSourceWithBase64( documentFragment, rtfData ) { + if ( !documentFragment.childCount ) { + return; + } + + const upcastWriter = new UpcastWriter(); + const shapesIds = findAllShapesIds( documentFragment, upcastWriter ); + + removeAllImgElementsRepresentingShapes( shapesIds, documentFragment, upcastWriter ); + removeAllShapeElements( documentFragment, upcastWriter ); + + const images = findAllImageElementsWithLocalSource( documentFragment, upcastWriter ); + + if ( images.length ) { + replaceImagesFileSourceWithInlineRepresentation( images, extractImageDataFromRtf( rtfData ), upcastWriter ); + } +} + +/** + * Converts given HEX string to base64 representation. + * + * @protected + * @param {String} hexString The HEX string to be converted. + * @returns {String} Base64 representation of a given HEX string. + */ +export function _convertHexToBase64( hexString ) { + return btoa( hexString.match( /\w{2}/g ).map( char => { + return String.fromCharCode( parseInt( char, 16 ) ); + } ).join( '' ) ); +} + +// Finds all shapes (`...`) ids. Shapes can represent images (canvas) +// or Word shapes (which does not have RTF or Blob representation). +// +// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment +// from which to extract shape ids. +// @param {module:engine/view/upcastwriter~UpcastWriter} writer +// @returns {Array.} Array of shape ids. +function findAllShapesIds( documentFragment, writer ) { + const range = writer.createRangeIn( documentFragment ); + + const shapeElementsMatcher = new ViewMatcher( { + name: /v:(.+)/ + } ); + + const shapesIds = []; + + for ( const value of range ) { + const el = value.item; + const prevSiblingName = el.previousSibling && el.previousSibling.name || null; + + // If shape element have 'o:gfxdata' attribute and is not directly before `` element it means it represent Word shape. + if ( shapeElementsMatcher.match( el ) && el.getAttribute( 'o:gfxdata' ) && prevSiblingName !== 'v:shapetype' ) { + shapesIds.push( value.item.getAttribute( 'id' ) ); + } + } + + return shapesIds; +} + +// Removes all `` elements which represents Word shapes and not regular images. +// +// @param {Array.} shapesIds Shape ids which will be checked against `` elements. +// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment from which to remove `` elements. +// @param {module:engine/view/upcastwriter~UpcastWriter} writer +function removeAllImgElementsRepresentingShapes( shapesIds, documentFragment, writer ) { + const range = writer.createRangeIn( documentFragment ); + + const imageElementsMatcher = new ViewMatcher( { + name: 'img' + } ); + + const imgs = []; + + for ( const value of range ) { + if ( imageElementsMatcher.match( value.item ) ) { + const el = value.item; + const shapes = el.getAttribute( 'v:shapes' ) ? el.getAttribute( 'v:shapes' ).split( ' ' ) : []; + + if ( shapes.length && shapes.every( shape => shapesIds.indexOf( shape ) > -1 ) ) { + imgs.push( el ); + // Shapes may also have empty source while content is paste in some browsers (Safari). + } else if ( !el.getAttribute( 'src' ) ) { + imgs.push( el ); + } + } + } + + for ( const img of imgs ) { + writer.remove( img ); + } +} + +// Removes all shape elements (`...`) so they do not pollute the output structure. +// +// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment from which to remove shape elements. +// @param {module:engine/view/upcastwriter~UpcastWriter} writer +function removeAllShapeElements( documentFragment, writer ) { + const range = writer.createRangeIn( documentFragment ); + + const shapeElementsMatcher = new ViewMatcher( { + name: /v:(.+)/ + } ); + + const shapes = []; + + for ( const value of range ) { + if ( shapeElementsMatcher.match( value.item ) ) { + shapes.push( value.item ); + } + } + + for ( const shape of shapes ) { + writer.remove( shape ); + } +} + +// Finds all `` elements in a given document fragment which have source pointing to local `file://` resource. +// +// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment in which to look for `` elements. +// @param {module:engine/view/upcastwriter~UpcastWriter} writer +// @returns {Object} result All found images grouped by source type. +// @returns {Array.} result.file Array of found `` elements with `file://` source. +// @returns {Array.} result.blob Array of found `` elements with `blob:` source. +function findAllImageElementsWithLocalSource( documentFragment, writer ) { + const range = writer.createRangeIn( documentFragment ); + + const imageElementsMatcher = new ViewMatcher( { + name: 'img' + } ); + + const imgs = []; + + for ( const value of range ) { + if ( imageElementsMatcher.match( value.item ) ) { + if ( value.item.getAttribute( 'src' ).startsWith( 'file://' ) ) { + imgs.push( value.item ); + } + } + } + + return imgs; +} + +// Extracts all images HEX representations from a given RTF data. +// +// @param {String} rtfData The RTF data from which to extract images HEX representation. +// @returns {Array.} Array of found HEX representations. Each array item is an object containing: +// +// * {String} hex Image representation in HEX format. +// * {string} type Type of image, `image/png` or `image/jpeg`. +function extractImageDataFromRtf( rtfData ) { + if ( !rtfData ) { + return []; + } + + const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\*\\blipuid\s?[\da-fA-F]+)?[\s}]*?/; + const regexPicture = new RegExp( '(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g' ); + const images = rtfData.match( regexPicture ); + const result = []; + + if ( images ) { + for ( const image of images ) { + let imageType = false; + + if ( image.includes( '\\pngblip' ) ) { + imageType = 'image/png'; + } else if ( image.includes( '\\jpegblip' ) ) { + imageType = 'image/jpeg'; + } + + if ( imageType ) { + result.push( { + hex: image.replace( regexPictureHeader, '' ).replace( /[^\da-fA-F]/g, '' ), + type: imageType + } ); + } + } + } + + return result; +} + +// Replaces `src` attribute value of all given images with the corresponding base64 image representation. +// +// @param {Array.} imageElements Array of image elements which will have its source replaced. +// @param {Array.} imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function). +// The array should be the same length as `imageElements` parameter. +// @param {module:engine/view/upcastwriter~UpcastWriter} writer +function replaceImagesFileSourceWithInlineRepresentation( imageElements, imagesHexSources, writer ) { + // Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order. + if ( imageElements.length === imagesHexSources.length ) { + for ( let i = 0; i < imageElements.length; i++ ) { + const newSrc = `data:${ imagesHexSources[ i ].type };base64,${ _convertHexToBase64( imagesHexSources[ i ].hex ) }`; + writer.setAttribute( 'src', newSrc, imageElements[ i ] ); + } + } +} diff --git a/src/filters/list.js b/src/filters/list.js index 36865f1..c4fcba4 100644 --- a/src/filters/list.js +++ b/src/filters/list.js @@ -21,21 +21,19 @@ import UpcastWriter from '@ckeditor/ckeditor5-engine/src/view/upcastwriter'; * * @param {module:engine/view/documentfragment~DocumentFragment} documentFragment The view structure which to transform. * @param {String} stylesString Styles from which list-like elements styling will be extracted. - * @param {module:engine/view/view~View} view */ -export function transformListItemLikeElementsIntoLists( documentFragment, stylesString, view ) { +export function transformListItemLikeElementsIntoLists( documentFragment, stylesString ) { if ( !documentFragment.childCount ) { return; } - const itemLikeElements = findAllItemLikeElements( documentFragment, view ); + const writer = new UpcastWriter(); + const itemLikeElements = findAllItemLikeElements( documentFragment, writer ); if ( !itemLikeElements.length ) { return; } - const writer = new UpcastWriter(); - let currentList = null; itemLikeElements.forEach( ( itemLikeElement, i ) => { @@ -45,7 +43,7 @@ export function transformListItemLikeElementsIntoLists( documentFragment, styles currentList = insertNewEmptyList( listStyle, itemLikeElement.element, writer ); } - const listItem = transformElementIntoListItem( itemLikeElement.element, writer, view ); + const listItem = transformElementIntoListItem( itemLikeElement.element, writer ); writer.appendChild( listItem, currentList ); } ); @@ -55,15 +53,15 @@ export function transformListItemLikeElementsIntoLists( documentFragment, styles // // @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment // in which to look for list-like nodes. -// @param {module:engine/view/view~View} view +// @param {module:engine/view/upcastwriter~UpcastWriter} writer // @returns {Array.} Array of found list-like items. Each item is an object containing: // // * {module:engine/src/view/element~Element} element List-like element. // * {Number} id List item id parsed from `mso-list` style (see `getListItemData()` function). // * {Number} order List item creation order parsed from `mso-list` style (see `getListItemData()` function). // * {Number} indent List item indentation level parsed from `mso-list` style (see `getListItemData()` function). -function findAllItemLikeElements( documentFragment, view ) { - const range = view.createRangeIn( documentFragment ); +function findAllItemLikeElements( documentFragment, writer ) { + const range = writer.createRangeIn( documentFragment ); // Matcher for finding list-like elements. const itemLikeElementsMatcher = new Matcher( { @@ -156,8 +154,8 @@ function insertNewEmptyList( listStyle, element, writer ) { // @param {module:engine/view/upcastwriter~UpcastWriter} writer // @returns {module:engine/view/element~Element} New element to which the given one was transformed. It is // inserted in place of the old element (the reference to the old element is lost due to renaming). -function transformElementIntoListItem( element, writer, view ) { - removeBulletElement( element, writer, view ); +function transformElementIntoListItem( element, writer ) { + removeBulletElement( element, writer ); return writer.rename( 'li', element ); } @@ -194,8 +192,7 @@ function getListItemData( element ) { // // @param {module:engine/view/element~Element} element // @param {module:engine/view/upcastwriter~UpcastWriter} writer -// @param {module:engine/view/view~View} view -function removeBulletElement( element, writer, view ) { +function removeBulletElement( element, writer ) { // Matcher for finding `span` elements holding lists numbering/bullets. const bulletMatcher = new Matcher( { name: 'span', @@ -204,7 +201,7 @@ function removeBulletElement( element, writer, view ) { } } ); - const range = view.createRangeIn( element ); + const range = writer.createRangeIn( element ); for ( const value of range ) { if ( value.type === 'elementStart' && bulletMatcher.match( value.item ) ) { diff --git a/src/filters/parse.js b/src/filters/parse.js index 014095f..4105322 100644 --- a/src/filters/parse.js +++ b/src/filters/parse.js @@ -29,6 +29,9 @@ import { normalizeSpacing, normalizeSpacerunSpans } from './space'; export function parseHtml( htmlString ) { const domParser = new DOMParser(); + // Remove Word specific "if comments" so content inside is not omitted by the parser. + htmlString = htmlString.replace( / + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +