From 01505fd0b0b9b176c637ebb7767e372548503847 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9rgio=20Gomes?= Date: Thu, 5 Sep 2024 16:12:50 +0100 Subject: [PATCH] Deduplicate uploads and media filenames --- .../post-publish-panel/maybe-upload-media.js | 68 ++++-- .../post-publish-panel/media-util.js | 120 ++++++++++ .../post-publish-panel/test/media-util.js | 221 ++++++++++++++++++ 3 files changed, 393 insertions(+), 16 deletions(-) create mode 100644 packages/editor/src/components/post-publish-panel/media-util.js create mode 100644 packages/editor/src/components/post-publish-panel/test/media-util.js diff --git a/packages/editor/src/components/post-publish-panel/maybe-upload-media.js b/packages/editor/src/components/post-publish-panel/maybe-upload-media.js index fd4707a03d4218..e69f55265ce12d 100644 --- a/packages/editor/src/components/post-publish-panel/maybe-upload-media.js +++ b/packages/editor/src/components/post-publish-panel/maybe-upload-media.js @@ -14,6 +14,11 @@ import { store as blockEditorStore } from '@wordpress/block-editor'; import { useState } from '@wordpress/element'; import { isBlobURL } from '@wordpress/blob'; +/** + * Internal dependencies + */ +import { fetchMedia } from './media-util'; + function flattenBlocks( blocks ) { const result = []; @@ -25,6 +30,10 @@ function flattenBlocks( blocks ) { return result; } +// Determine whether a block has external media. +// Different blocks use different attribute names (and potentially +// different logic as well) in determining whether the media is +// present, and whether it's external. function hasExternalMedia( block ) { if ( block.name === 'core/image' || block.name === 'core/cover' ) { return block.attributes.url && ! block.attributes.id; @@ -35,6 +44,9 @@ function hasExternalMedia( block ) { } } +// Retrieve media info from a block. +// Different blocks use different attribute names, so we need this +// function to normalize things into a consistent naming scheme. function getMediaInfo( block ) { if ( block.name === 'core/image' || block.name === 'core/cover' ) { const { url, alt, id } = block.attributes; @@ -47,6 +59,7 @@ function getMediaInfo( block ) { } } +// Image component to represent a single image in the upload dialog. function Image( { clientId, alt, url } ) { const { selectBlock } = useDispatch( blockEditorStore ); return ( @@ -80,7 +93,7 @@ function Image( { clientId, alt, url } ) { ); } -export default function PostFormatPanel() { +export default function MaybeUploadMediaPanel() { const [ isUploading, setIsUploading ] = useState( false ); const [ isAnimating, setIsAnimating ] = useState( false ); const [ hadUploadError, setHadUploadError ] = useState( false ); @@ -91,6 +104,8 @@ export default function PostFormatPanel() { } ), [] ); + + // Get a list of blocks with external media. const blocksWithExternalMedia = flattenBlocks( editorBlocks ).filter( ( block ) => hasExternalMedia( block ) ); @@ -107,6 +122,9 @@ export default function PostFormatPanel() { , ]; + // Update an individual block to point to newly-added library media. + // Different blocks use different attribute names, so we need this + // function to ensure we modify the correct attributes for each type. function updateBlockWithUploadedMedia( block, media ) { if ( block.name === 'core/image' || block.name === 'core/cover' ) { return updateBlockAttributes( block.clientId, { @@ -123,16 +141,26 @@ export default function PostFormatPanel() { } } + // Handle fetching and uploading all external media in the post. function uploadImages() { setIsUploading( true ); setHadUploadError( false ); - Promise.all( + + // Multiple blocks can be using the same URL, so we + // should ensure we only fetch and upload each of them once. + const mediaUrls = new Set( blocksWithExternalMedia.map( ( block ) => { const { url } = getMediaInfo( block ); - return window - .fetch( url.includes( '?' ) ? url : url + '?' ) - .then( ( response ) => response.blob() ) - .then( ( blob ) => + return url; + } ) + ); + + // Create an upload promise for each URL, that we can wait for in all + // blocks that make use of that media. + const uploadPromises = Object.fromEntries( + fetchMedia( [ ...mediaUrls ] ).map( ( { url, blobPromise } ) => { + const uploadPromise = blobPromise.then( + ( blob ) => new Promise( ( resolve, reject ) => { mediaUpload( { filesList: [ blob ], @@ -141,22 +169,30 @@ export default function PostFormatPanel() { return; } - updateBlockWithUploadedMedia( - block, - media - ); - resolve(); + resolve( media ); }, onError() { - setHadUploadError( true ); reject(); }, } ); - } ).then( () => setIsAnimating( true ) ) + } ) + ); + + return [ url, uploadPromise ]; + } ) + ); + + // Wait for all blocks to be updated with library media. + Promise.all( + blocksWithExternalMedia.map( ( block ) => { + const { url } = getMediaInfo( block ); + + return uploadPromises[ url ] + .then( ( media ) => + updateBlockWithUploadedMedia( block, media ) ) - .catch( () => { - setHadUploadError( true ); - } ); + .then( () => setIsAnimating( true ) ) + .catch( () => setHadUploadError( true ) ); } ) ).finally( () => { setIsUploading( false ); diff --git a/packages/editor/src/components/post-publish-panel/media-util.js b/packages/editor/src/components/post-publish-panel/media-util.js new file mode 100644 index 00000000000000..073872fdb80c56 --- /dev/null +++ b/packages/editor/src/components/post-publish-panel/media-util.js @@ -0,0 +1,120 @@ +/** + * External dependencies + */ +import { v4 as uuid } from 'uuid'; + +/** + * WordPress dependencies + */ +import { getFilename } from '@wordpress/url'; + +// Generate a list of unique filenames given a list of URLs. +// We want all basenames to be unique, since sometimes the extension +// doesn't reflect the mime type, and may end up getting changed by +// the server, on upload. +export function generateUniqueFilenames( urls ) { + const basenames = new Set(); + + return urls.map( ( url ) => { + // We prefer to match the remote filename, if possible. + const filename = getFilename( url ); + let basename = ''; + let extension = ''; + + if ( filename ) { + const parts = filename.split( '.' ); + if ( parts.length > 1 ) { + // Assume the last part is the extension. + extension = parts.pop(); + } + basename = parts.join( '.' ); + } + + if ( ! basename ) { + // It looks like we don't have a basename, so let's use a UUID. + basename = uuid(); + } + + if ( basenames.has( basename ) ) { + // Append a UUID to deduplicate the basename. + // The server will try to deduplicate on its own if we don't do this, + // but it may run into a race condition + // (see https://github.com/WordPress/gutenberg/issues/64899). + // Deduplicating the filenames before uploading is safer. + basename = `${ basename }-${ uuid() }`; + } + + basenames.add( basename ); + + return { url, basename, extension }; + } ); +} + +const mimeTypeToExtension = { + 'image/apng': 'apng', + 'image/avif': 'avif', + 'image/bmp': 'bmp', + 'image/gif': 'gif', + 'image/vnd.microsoft.icon': 'ico', + 'image/jpeg': 'jpg', + 'image/jxl': 'jxl', + 'image/png': 'png', + 'image/svg+xml': 'svg', + 'image/tiff': 'tiff', + 'image/webp': 'webp', + 'video/x-msvideo': 'avi', + 'video/mp4': 'mp4', + 'video/mpeg': 'mpeg', + 'video/ogg': 'ogg', + 'video/mp2t': 'ts', + 'video/webm': 'webm', + 'video/3gpp': '3gp', + 'video/3gpp2': '3g2', +}; + +// Get the file extension to use for a given mime type. +export function getExtensionFromMimeType( mime ) { + mime = ( mime ?? '' ).toLowerCase(); + + let extension = mimeTypeToExtension[ mime ]; + + if ( ! extension ) { + // We don't know which extension to use, so we need to fall back to + // something safe. The server should replace it with an appropriate + // extension for the mime type. + if ( mime.startsWith( 'image/' ) ) { + extension = 'png'; + } + if ( mime.startsWith( 'video/' ) ) { + extension = 'mp4'; + } + } + + // If all else fails, try an empty extension. + // The server will probably reject the upload, but there isn't much + // else we can do. + return extension || ''; +} + +// Returns an array of { url, blobPromise } objects, where the promise +// points to a fetched blob. Blobs will have unique filenames. +export function fetchMedia( urls ) { + return generateUniqueFilenames( urls ).map( + ( { url, basename, extension } ) => { + const blobPromise = window + .fetch( url.includes( '?' ) ? url : url + '?' ) + .then( ( response ) => response.blob() ) + .then( ( blob ) => { + // Not all remote filenames have an extension, but we need to + // provide one, or the server is likely to reject the upload. + if ( ! extension ) { + extension = getExtensionFromMimeType( blob.type ); + } + blob.name = `${ basename }.${ extension }`; + return blob; + } ); + + return { url, blobPromise }; + } + ); +} diff --git a/packages/editor/src/components/post-publish-panel/test/media-util.js b/packages/editor/src/components/post-publish-panel/test/media-util.js new file mode 100644 index 00000000000000..395b1b4ed05f49 --- /dev/null +++ b/packages/editor/src/components/post-publish-panel/test/media-util.js @@ -0,0 +1,221 @@ +/** + * Internal dependencies + */ +import { + generateUniqueFilenames, + getExtensionFromMimeType, +} from '../media-util'; + +describe( 'generateUniqueFilenames', () => { + it( 'should prefer the original filenames', () => { + const urls = [ + 'https://example.com/images/image1.jpg', + 'https://example.com/images/image2.jpg', + 'https://example.com/images/image3.jpg', + 'https://example.com/images/image4.jpg', + ]; + + expect( generateUniqueFilenames( urls ) ).toEqual( [ + { + url: 'https://example.com/images/image1.jpg', + basename: 'image1', + extension: 'jpg', + }, + { + url: 'https://example.com/images/image2.jpg', + basename: 'image2', + extension: 'jpg', + }, + { + url: 'https://example.com/images/image3.jpg', + basename: 'image3', + extension: 'jpg', + }, + { + url: 'https://example.com/images/image4.jpg', + basename: 'image4', + extension: 'jpg', + }, + ] ); + } ); + + it( 'should handle filenames with no extensions', () => { + const urls = [ + 'https://example.com/images/image1', + 'https://example.com/images/image2', + 'https://example.com/images/image3', + 'https://example.com/images/image4', + ]; + + expect( generateUniqueFilenames( urls ) ).toEqual( [ + { + url: 'https://example.com/images/image1', + basename: 'image1', + extension: '', + }, + { + url: 'https://example.com/images/image2', + basename: 'image2', + extension: '', + }, + { + url: 'https://example.com/images/image3', + basename: 'image3', + extension: '', + }, + { + url: 'https://example.com/images/image4', + basename: 'image4', + extension: '', + }, + ] ); + } ); + + it( 'should handle query parameters correctly', () => { + const urls = [ + 'https://example.com/images/image1.jpg?a=notafile.npg', + 'https://example.com/images/image2.jpg?a=notafile.npg', + 'https://example.com/images/image3.jpg?a=notafile.npg', + 'https://example.com/images/image4.jpg?a=notafile.npg', + ]; + + expect( generateUniqueFilenames( urls ) ).toEqual( [ + { + url: 'https://example.com/images/image1.jpg?a=notafile.npg', + basename: 'image1', + extension: 'jpg', + }, + { + url: 'https://example.com/images/image2.jpg?a=notafile.npg', + basename: 'image2', + extension: 'jpg', + }, + { + url: 'https://example.com/images/image3.jpg?a=notafile.npg', + basename: 'image3', + extension: 'jpg', + }, + { + url: 'https://example.com/images/image4.jpg?a=notafile.npg', + basename: 'image4', + extension: 'jpg', + }, + ] ); + } ); + + it( 'should deduplicate identical filenames', () => { + const urls = [ + 'https://example.com/image1/image.jpg', + 'https://example.com/image2/image.jpg', + 'https://example.com/image3/image.jpg', + 'https://example.com/image4/image.jpg', + ]; + + const results = generateUniqueFilenames( urls ); + expect( results.length ).toBe( urls.length ); + + for ( const result of results ) { + expect( result.extension ).toBe( 'jpg' ); + } + + const basenames = new Set( + results.map( ( result ) => result.basename ) + ); + + expect( basenames.size ).toBe( results.length ); + } ); + + it( 'should deduplicate identical basenames', () => { + const urls = [ + 'https://example.com/images/image.jpg', + 'https://example.com/images/image.png', + 'https://example.com/images/image.webp', + 'https://example.com/images/image.avif', + ]; + + const results = generateUniqueFilenames( urls ); + expect( results.length ).toBe( urls.length ); + + expect( results[ 0 ].extension ).toBe( 'jpg' ); + expect( results[ 1 ].extension ).toBe( 'png' ); + expect( results[ 2 ].extension ).toBe( 'webp' ); + expect( results[ 3 ].extension ).toBe( 'avif' ); + + const basenames = new Set( + results.map( ( result ) => result.basename ) + ); + + expect( basenames.size ).toBe( results.length ); + } ); + + it( 'should deduplicate filenames without extensions', () => { + const urls = [ + 'https://example.com/image1/image', + 'https://example.com/image2/image', + 'https://example.com/image3/image', + 'https://example.com/image4/image', + ]; + + const results = generateUniqueFilenames( urls ); + expect( results.length ).toBe( urls.length ); + + for ( const result of results ) { + expect( result.extension ).toBe( '' ); + } + + const basenames = new Set( + results.map( ( result ) => result.basename ) + ); + + expect( basenames.size ).toBe( results.length ); + } ); + + it( 'should deduplicate paths with no filename', () => { + const urls = [ + 'https://example.com/image1/dir/', + 'https://example.com/image2/dir/', + 'https://example.com/image3/dir/', + 'https://example.com/image4/dir/', + ]; + + const results = generateUniqueFilenames( urls ); + expect( results.length ).toBe( urls.length ); + + for ( const result of results ) { + expect( result.extension ).toBe( '' ); + } + + const basenames = new Set( + results.map( ( result ) => result.basename ) + ); + + expect( basenames.size ).toBe( results.length ); + } ); +} ); + +describe( 'getExtensionFromMimeType', () => { + it( 'should use the correct extension for common mime types on the web', () => { + expect( getExtensionFromMimeType( 'image/png' ) ).toBe( 'png' ); + expect( getExtensionFromMimeType( 'image/jpeg' ) ).toBe( 'jpg' ); + expect( getExtensionFromMimeType( 'image/webp' ) ).toBe( 'webp' ); + expect( getExtensionFromMimeType( 'image/gif' ) ).toBe( 'gif' ); + expect( getExtensionFromMimeType( 'image/avif' ) ).toBe( 'avif' ); + expect( getExtensionFromMimeType( 'image/jxl' ) ).toBe( 'jxl' ); + expect( getExtensionFromMimeType( 'image/svg+xml' ) ).toBe( 'svg' ); + expect( getExtensionFromMimeType( 'video/mp4' ) ).toBe( 'mp4' ); + expect( getExtensionFromMimeType( 'video/mpeg' ) ).toBe( 'mpeg' ); + expect( getExtensionFromMimeType( 'video/webm' ) ).toBe( 'webm' ); + } ); + + it( 'should use a fallback extension for unknown image/video mime types', () => { + expect( getExtensionFromMimeType( 'image/fake' ) ).toBe( 'png' ); + expect( getExtensionFromMimeType( 'video/fake' ) ).toBe( 'mp4' ); + } ); + + it( 'should return an empty extension for non image/video mime types', () => { + expect( getExtensionFromMimeType( 'application/json' ) ).toBe( '' ); + expect( getExtensionFromMimeType( '' ) ).toBe( '' ); + expect( getExtensionFromMimeType( null ) ).toBe( '' ); + expect( getExtensionFromMimeType( undefined ) ).toBe( '' ); + } ); +} );