diff --git a/.changeset/eight-squids-collect.md b/.changeset/eight-squids-collect.md new file mode 100644 index 000000000000..8ea8054e5cd7 --- /dev/null +++ b/.changeset/eight-squids-collect.md @@ -0,0 +1,5 @@ +--- +"miniflare": patch +--- + +fix: reflect file changes when using dev with workers + assets diff --git a/packages/miniflare/src/plugins/assets/index.ts b/packages/miniflare/src/plugins/assets/index.ts index f9db7ac28646..e3a868f67bce 100644 --- a/packages/miniflare/src/plugins/assets/index.ts +++ b/packages/miniflare/src/plugins/assets/index.ts @@ -57,13 +57,17 @@ export const ASSETS_PLUGIN: Plugin = { if (!options.assets) { return []; } - const assetsReverseMap = await createReverseMap(options.assets?.path); const storageServiceName = `${ASSETS_PLUGIN_NAME}:storage`; const storageService: Service = { name: storageServiceName, disk: { path: options.assets.path, writable: true }, }; + + const { encodedAssetManifest, assetsReverseMap } = await buildAssetManifest( + options.assets.path + ); + const namespaceService: Service = { name: ASSETS_KV_SERVICE_NAME, worker: { @@ -82,12 +86,12 @@ export const ASSETS_PLUGIN: Plugin = { }, { name: "ASSETS_REVERSE_MAP", - json: assetsReverseMap, + json: JSON.stringify(assetsReverseMap), }, ], }, }; - const assetsManifest = await buildAssetsManifest(options.assets.path); + const assetService: Service = { name: ASSETS_SERVICE_NAME, worker: { @@ -105,7 +109,7 @@ export const ASSETS_PLUGIN: Plugin = { }, { name: "ASSETS_MANIFEST", - data: assetsManifest, + data: encodedAssetManifest, }, { name: "CONFIG", @@ -146,30 +150,45 @@ export const ASSETS_PLUGIN: Plugin = { }, }; -// -- ASSET MANIFEST -- -// -// 1. Traverse the asset directory to create an asset manifest. -// (In prod the manifest contains a pathHash and a contentHash. The -// contentHash is used for uploading and as the keys for the KV namespace -// where the assets are stored. Uploading is irrelevant in dev, so for -// performance reasons, the pathHash is reused for the "contentHash".) -// -// 2. Sort and binary encode the asset manifest -// This is available to asset service worker as a binding. +/** + * The Asset Manifest and Asset Reverse Map are used to map a request path to an asset. + * 1. Hash path of request + * 2. Use this path hash to find the manifest entry + * 3. Get content hash from manifest entry + * 4a. In prod, use content hash to get asset from KV + * 4b. In dev, we fake out the KV store and use the file system instead. + * Use content hash to get file path from asset reverse map. + */ -export const buildAssetsManifest = async (dir: string) => { - const manifest = await walk(dir); +export const buildAssetManifest = async (dir: string) => { + const { manifest, assetsReverseMap } = await walk(dir); const sortedAssetManifest = sortManifest(manifest); const encodedAssetManifest = encodeManifest(sortedAssetManifest); - return encodedAssetManifest; + return { encodedAssetManifest, assetsReverseMap }; +}; + +export type ManifestEntry = { + pathHash: Uint8Array; + contentHash: Uint8Array; }; +export type AssetReverseMap = { + [pathHash: string]: { filePath: string; contentType: string }; +}; + +/** + * Traverses the asset directory to create an asset manifest and asset reverse map. + * These are available to the Asset Worker as a binding. + * NB: This runs every time the dev server restarts. + */ const walk = async (dir: string) => { const files = await fs.readdir(dir, { recursive: true }); - const manifest: Uint8Array[] = []; + const manifest: ManifestEntry[] = []; + const assetsReverseMap: AssetReverseMap = {}; let counter = 0; await Promise.all( files.map(async (file) => { + /** absolute file path */ const filepath = path.join(dir, file); const relativeFilepath = path.relative(dir, filepath); const filestat = await fs.stat(filepath); @@ -196,9 +215,43 @@ const walk = async (dir: string) => { ); } - manifest.push( - await hashPath(encodeFilePath(relativeFilepath, path.sep)) - ); + /* + * Each manifest entry is a series of bytes that store data in a [header, + * pathHash, contentHash] format, where: + * - `header` is a fixed 20 bytes reserved but currently unused + * - `pathHash` is the hashed file path + * - `contentHash` is the hashed file content in prod + * + * The `contentHash` of a file is determined by reading the contents of + * the file, and applying a hash function on the read data. In local + * development, performing this operation for each asset file would + * become very expensive very quickly, as it would have to be performed + * every time a dev server reload is trigerred. In watch mode, depending + * on the user's setup, this could potentially be on every file change. + * + * To avoid this from becoming a performance bottleneck, we're doing + * things a bit differently for dev, and implementing the `contentHash` + * as a hash function of the file path and the modified timestamp. + * (`hash(filePath + modifiedTime)`). + * This way a file's corresponding 'contentHash' will always update + * if the file changes, and `wrangler dev` will serve the updated asset + * files instead of incorrectly returning 304s. + */ + + const [pathHash, contentHash] = await Promise.all([ + hashPath(encodeFilePath(relativeFilepath, path.sep)), + hashPath( + encodeFilePath(filepath, path.sep) + filestat.mtimeMs.toString() + ), + ]); + manifest.push({ + pathHash, + contentHash, + }); + assetsReverseMap[bytesToHex(contentHash)] = { + filePath: relativeFilepath, + contentType: getContentType(filepath), + }; counter++; } }) @@ -210,87 +263,49 @@ const walk = async (dir: string) => { `Ensure your assets directory contains a maximum of ${MAX_ASSET_COUNT.toLocaleString()} files, and that you have specified your assets directory correctly.` ); } - return manifest; + return { manifest, assetsReverseMap }; }; + // sorts ascending by path hash -const sortManifest = (manifest: Uint8Array[]) => { +const sortManifest = (manifest: ManifestEntry[]) => { return manifest.sort(comparisonFn); }; -const comparisonFn = (a: Uint8Array, b: Uint8Array) => { - // i don't see why this would ever be the case - if (a.length < b.length) { +const comparisonFn = (a: ManifestEntry, b: ManifestEntry) => { + if (a.pathHash.length < b.pathHash.length) { return -1; } - if (a.length > b.length) { + if (a.pathHash.length > b.pathHash.length) { return 1; } - for (const [i, v] of a.entries()) { - if (v < b[i]) { + for (const [i, v] of a.pathHash.entries()) { + if (v < b.pathHash[i]) { return -1; } - if (v > b[i]) { + if (v > b.pathHash[i]) { return 1; } } return 1; }; -const encodeManifest = (manifest: Uint8Array[]) => { +const encodeManifest = (manifest: ManifestEntry[]) => { const assetManifestBytes = new Uint8Array( HEADER_SIZE + manifest.length * ENTRY_SIZE ); + for (const [i, entry] of manifest.entries()) { const entryOffset = HEADER_SIZE + i * ENTRY_SIZE; - // NB: PATH_HASH_OFFSET = 0 - // set the path hash: - assetManifestBytes.set(entry, entryOffset + PATH_HASH_OFFSET); - // set the content hash, which happens to be the same as the path hash in dev: - assetManifestBytes.set(entry, entryOffset + CONTENT_HASH_OFFSET); + assetManifestBytes.set(entry.pathHash, entryOffset + PATH_HASH_OFFSET); + // content hash here in dev is hash(path + last modified time of file) + assetManifestBytes.set( + entry.contentHash, + entryOffset + CONTENT_HASH_OFFSET + ); } return assetManifestBytes; }; -// -- ASSET REVERSE MAP -- -// -// In prod, the contentHash is used as the key for the KV store that holds the assets. -// Asset Worker will hash the path of an incoming request, look for that pathHash in -// the stored manifest, and get the corresponding contentHash to use as the KV key. -// In dev, we fake out this KV store and just get the assets from disk. However we still need -// to map a given "contentHash" to the filePath. This is what the ASSET REVERSE MAP is for. -// This is available to the ASSETS_KV_NAMESPACE service (assets-kv.worker.ts) as a binding. - -type AssetReverseMap = { - [pathHash: string]: { filePath: string; contentType: string }; -}; - -// TODO: This walk should be pulled into a common place, shared by wrangler and miniflare -const createReverseMap = async (dir: string) => { - const files = await fs.readdir(dir, { recursive: true }); - const assetsReverseMap: AssetReverseMap = {}; - await Promise.all( - files.map(async (file) => { - const filepath = path.join(dir, file); - const relativeFilepath = path.relative(dir, filepath); - const filestat = await fs.stat(filepath); - - if (filestat.isSymbolicLink() || filestat.isDirectory()) { - return; - } else { - const pathHash = bytesToHex( - await hashPath(encodeFilePath(relativeFilepath, path.sep)) - ); - - assetsReverseMap[pathHash] = { - filePath: relativeFilepath, - contentType: getContentType(filepath), - }; - } - }) - ); - return JSON.stringify(assetsReverseMap); -}; - const bytesToHex = (buffer: ArrayBufferLike) => { return [...new Uint8Array(buffer)] .map((b) => b.toString(16).padStart(2, "0")) diff --git a/packages/wrangler/e2e/dev.test.ts b/packages/wrangler/e2e/dev.test.ts index bcc0c5fc29cd..8be6aeae280a 100644 --- a/packages/wrangler/e2e/dev.test.ts +++ b/packages/wrangler/e2e/dev.test.ts @@ -7,6 +7,7 @@ import { fetch } from "undici"; import { afterEach, beforeEach, describe, expect, it } from "vitest"; import { WranglerE2ETestHelper } from "./helpers/e2e-wrangler-test"; import { fetchText } from "./helpers/fetch-text"; +import { fetchWithETag } from "./helpers/fetch-with-etag"; import { generateResourceName } from "./helpers/generate-resource-name"; import { retry } from "./helpers/retry"; import { seed as baseSeed, makeRoot } from "./helpers/setup"; @@ -901,8 +902,12 @@ describe("watch mode", () => { const worker = helper.runLongLived(cmd); const { url } = await worker.waitForReady(); - let text = await fetchText(`${url}/index.html`); - expect(text).toBe("

Hello Workers + Assets

"); + let { response, cachedETags } = await fetchWithETag( + `${url}/index.html`, + {} + ); + const originalETag = response.headers.get("etag"); + expect(await response.text()).toBe("

Hello Workers + Assets

"); await helper.seed({ "public/index.html": dedent` @@ -910,9 +915,17 @@ describe("watch mode", () => { }); await worker.waitForReload(); - - text = await fetchText(`${url}/index.html`); - expect(text).toBe("

Hello Updated Workers + Assets

"); + ({ response, cachedETags } = await retry( + (s) => s.response.status !== 200, + async () => { + return await fetchWithETag(`${url}/index.html`, cachedETags); + } + )); + expect(await response.text()).toBe( + "

Hello Updated Workers + Assets

" + ); + // expect a new eTag back because the content for this path has changed + expect(response.headers.get("etag")).not.toBe(originalETag); }); it(`supports adding new assets during dev session`, async () => { @@ -931,8 +944,12 @@ describe("watch mode", () => { const worker = helper.runLongLived(cmd); const { url } = await worker.waitForReady(); - let text = await fetchText(`${url}/index.html`); - expect(text).toBe("

Hello Workers + Assets

"); + let { response, cachedETags } = await fetchWithETag( + `${url}/index.html`, + {} + ); + + expect(await response.text()).toBe("

Hello Workers + Assets

"); await helper.seed({ "public/about.html": dedent`About Workers + Assets`, @@ -943,20 +960,26 @@ describe("watch mode", () => { // re-calculating the asset manifest / reverse assets map might not be // done at this point, so retry until they are available - const response = await retry( - (s) => s.status !== 200, + ({ response, cachedETags } = await retry( + (s) => s.response.status !== 200, async () => { - const r = await fetch(`${url}/about.html`); - return { text: await r.text(), status: r.status }; + return await fetchWithETag(`${url}/about.html`, cachedETags); } - ); - expect(response.text).toBe("About Workers + Assets"); - - text = await fetchText(`${url}/workers/index.html`); - expect(text).toBe("Cloudflare Workers!"); - - text = await fetchText(`${url}/index.html`); - expect(text).toBe("

Hello Workers + Assets

"); + )); + expect(await response.text()).toBe("About Workers + Assets"); + + ({ response, cachedETags } = await fetchWithETag( + `${url}/workers/index.html`, + cachedETags + )); + expect(await response.text()).toBe("Cloudflare Workers!"); + + // expect 304 for the original asset as the content has not changed + ({ response, cachedETags } = await fetchWithETag( + `${url}/index.html`, + cachedETags + )); + expect(response.status).toBe(304); }); it(`supports removing existing assets during dev session`, async () => { @@ -977,15 +1000,22 @@ describe("watch mode", () => { const worker = helper.runLongLived(cmd); const { url } = await worker.waitForReady(); - - let text = await fetchText(`${url}/index.html`); - expect(text).toBe("

Hello Workers + Assets

"); - - text = await fetchText(`${url}/about.html`); - expect(text).toBe("About Workers + Assets"); - - text = await fetchText(`${url}/workers/index.html`); - expect(text).toBe("Cloudflare Workers!"); + let { response, cachedETags } = await fetchWithETag( + `${url}/index.html`, + {} + ); + expect(await response.text()).toBe("

Hello Workers + Assets

"); + + ({ response, cachedETags } = await fetchWithETag( + `${url}/about.html`, + cachedETags + )); + expect(await response.text()).toBe("About Workers + Assets"); + ({ response, cachedETags } = await fetchWithETag( + `${url}/workers/index.html`, + cachedETags + )); + expect(await response.text()).toBe("Cloudflare Workers!"); await helper.removeFiles(["public/index.html"]); @@ -993,13 +1023,12 @@ describe("watch mode", () => { // re-calculating the asset manifest / reverse assets map might not be // done at this point, so retry until they are available - const response = await retry( - (s) => s.status !== 404, + ({ response, cachedETags } = await retry( + (s) => s.response.status !== 404, async () => { - const r = await fetch(`${url}/index.html`); - return { text: await r.text(), status: r.status }; + return await fetchWithETag(`${url}/index.html`, cachedETags); } - ); + )); expect(response.status).toBe(404); }); @@ -1015,17 +1044,21 @@ describe("watch mode", () => { `, "public/index.html": dedent`

Hello Workers + Assets

`, + }); + await helper.seed({ "public2/index.html": dedent`

Hola Workers + Assets

`, "public2/about/index.html": dedent`

Read more about Workers + Assets

`, }); - const worker = helper.runLongLived(cmd); const { url } = await worker.waitForReady(); - let text = await fetchText(`${url}/index.html`); - expect(text).toBe("

Hello Workers + Assets

"); + let { response, cachedETags } = await fetchWithETag( + `${url}/index.html`, + {} + ); + expect(await response.text()).toBe("

Hello Workers + Assets

"); await helper.seed({ "wrangler.toml": dedent` @@ -1039,17 +1072,20 @@ describe("watch mode", () => { await worker.waitForReload(); - const response = await retry( - (s) => s.text !== "

Hola Workers + Assets

", + ({ response, cachedETags } = await retry( + (s) => s.response.status !== 200, async () => { - const r = await fetch(`${url}/index.html`); - return { text: await r.text(), status: r.status }; + return await fetchWithETag(`${url}/index.html`, cachedETags); } + )); + expect(await response.text()).toBe("

Hola Workers + Assets

"); + ({ response, cachedETags } = await fetchWithETag( + `${url}/about/index.html`, + {} + )); + expect(await response.text()).toBe( + "

Read more about Workers + Assets

" ); - expect(response.text).toBe("

Hola Workers + Assets

"); - - text = await fetchText(`${url}/about/index.html`); - expect(text).toBe("

Read more about Workers + Assets

"); }); } ); @@ -1074,11 +1110,20 @@ describe("watch mode", () => { const worker = helper.runLongLived(cmd); const { url } = await worker.waitForReady(); - let text = await fetchText(`${url}/index.html`); - expect(text).toBe("

Hello Workers + Assets

"); - - text = await fetchText(`${url}/about.html`); - expect(text).toBe("

Read more about Workers + Assets

"); + let { response, cachedETags } = await fetchWithETag( + `${url}/index.html`, + {} + ); + const originalETag = response.headers.get("etag"); + expect(await response.text()).toBe("

Hello Workers + Assets

"); + + ({ response, cachedETags } = await fetchWithETag( + `${url}/about.html`, + cachedETags + )); + expect(await response.text()).toBe( + "

Read more about Workers + Assets

" + ); // change + add await helper.seed({ @@ -1092,20 +1137,29 @@ describe("watch mode", () => { // re-calculating the asset manifest / reverse assets map might not be // done at this point, so retry until they are available - let response = await retry( - (s) => s.status !== 200, + ({ response, cachedETags } = await retry( + (s) => s.response.status !== 200, async () => { - const r = await fetch(`${url}/hello.html`); - return { text: await r.text(), status: r.status }; + return await fetchWithETag(`${url}/hello.html`, cachedETags); } + )); + expect(await response.text()).toBe("

Hya Workers!

"); + + ({ response, cachedETags } = await fetchWithETag( + `${url}/index.html`, + cachedETags + )); + expect(await response.text()).toBe( + "

Hello Updated Workers + Assets

" ); - expect(response.text).toBe("

Hya Workers!

"); - - text = await fetchText(`${url}/index.html`); - expect(text).toBe("

Hello Updated Workers + Assets

"); + expect(response.headers.get("etag")).not.toBe(originalETag); - text = await fetchText(`${url}/about.html`); - expect(text).toBe("

Read more about Workers + Assets

"); + // unchanged -> expect 304 + ({ response, cachedETags } = await fetchWithETag( + `${url}/about.html`, + cachedETags + )); + expect(response.status).toBe(304); // remove await helper.removeFiles(["dist/about.html"]); @@ -1114,13 +1168,12 @@ describe("watch mode", () => { // re-calculating the asset manifest / reverse assets map might not be // done at this point, so retry until they are available - response = await retry( - (s) => s.status !== 404, + ({ response, cachedETags } = await retry( + (s) => s.response.status !== 404, async () => { - const r = await fetch(`${url}/about.html`); - return { text: await r.text(), status: r.status }; + return await fetchWithETag(`${url}/about.html`, cachedETags); } - ); + )); expect(response.status).toBe(404); }); }); diff --git a/packages/wrangler/e2e/helpers/fetch-with-etag.ts b/packages/wrangler/e2e/helpers/fetch-with-etag.ts new file mode 100644 index 000000000000..ad0489478a6e --- /dev/null +++ b/packages/wrangler/e2e/helpers/fetch-with-etag.ts @@ -0,0 +1,12 @@ +import { fetch } from "undici"; + +export const fetchWithETag = async ( + url: string, + cachedETags: Record +) => { + const response = await fetch(url, { + headers: { "if-none-match": cachedETags[url] ?? "" }, + }); + cachedETags[url] = response.headers.get("etag") ?? ""; + return { response, cachedETags }; +}; diff --git a/packages/wrangler/src/experimental-assets.ts b/packages/wrangler/src/experimental-assets.ts index c821535e3b69..7c0ab8734508 100644 --- a/packages/wrangler/src/experimental-assets.ts +++ b/packages/wrangler/src/experimental-assets.ts @@ -51,7 +51,7 @@ export const syncExperimentalAssets = async ( // 1. generate asset manifest logger.info("🌀 Building list of assets..."); - const manifest = await buildAssetsManifest(assetDirectory); + const manifest = await buildAssetManifest(assetDirectory); // 2. fetch buckets w/ hashes logger.info("🌀 Starting asset upload..."); @@ -218,7 +218,7 @@ export const syncExperimentalAssets = async ( return completionJwt; }; -export const buildAssetsManifest = async (dir: string) => { +export const buildAssetManifest = async (dir: string) => { const files = await readdir(dir, { recursive: true }); const manifest: AssetManifest = {}; let counter = 0; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 91fd34e0e215..59494e9ac876 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -6,6 +6,12 @@ settings: catalogs: default: + '@vitest/runner': + specifier: ~2.1.1 + version: 2.1.1 + '@vitest/snapshot': + specifier: ~2.1.1 + version: 2.1.1 vitest: specifier: ~2.1.1 version: 2.1.1