From 0609ecf62480aac0b7e6d01d82229fb740dc6086 Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Wed, 9 Oct 2024 09:26:20 -0700 Subject: [PATCH] Switch OSD Optimizer to rely on file hashes instead of `mtime`s (#8472) * Hotswap getMtimes to compute sha1 hashes Signed-off-by: Simeon Widdis * Rename all the things Signed-off-by: Simeon Widdis * Changeset file for PR #8472 created/updated --------- Signed-off-by: Simeon Widdis Co-authored-by: opensearch-changeset-bot[bot] <154024398+opensearch-changeset-bot[bot]@users.noreply.github.com> --- changelogs/fragments/8472.yml | 2 ++ .../osd-optimizer/src/common/bundle.test.ts | 2 +- packages/osd-optimizer/src/common/bundle.ts | 8 ++--- .../integration_tests/bundle_cache.test.ts | 22 ++++++------ packages/osd-optimizer/src/node/cache.ts | 16 ++++----- .../src/node/node_auto_tranpilation.ts | 6 ++-- .../src/optimizer/bundle_cache.ts | 6 ++-- .../src/optimizer/cache_keys.test.ts | 12 +++---- .../osd-optimizer/src/optimizer/cache_keys.ts | 14 ++++---- ...{get_mtimes.test.ts => get_hashes.test.ts} | 26 +++++++++----- .../{get_mtimes.ts => get_hashes.ts} | 17 ++++++---- .../osd-optimizer/src/worker/run_compilers.ts | 34 ++++++++----------- 12 files changed, 87 insertions(+), 78 deletions(-) create mode 100644 changelogs/fragments/8472.yml rename packages/osd-optimizer/src/optimizer/{get_mtimes.test.ts => get_hashes.test.ts} (69%) rename packages/osd-optimizer/src/optimizer/{get_mtimes.ts => get_hashes.ts} (72%) diff --git a/changelogs/fragments/8472.yml b/changelogs/fragments/8472.yml new file mode 100644 index 000000000000..ec7f1d92b11e --- /dev/null +++ b/changelogs/fragments/8472.yml @@ -0,0 +1,2 @@ +infra: +- Switch OSD build cache to rely on file hashes instead of `mtime`s ([#8472](https://github.com/opensearch-project/OpenSearch-Dashboards/pull/8472)) \ No newline at end of file diff --git a/packages/osd-optimizer/src/common/bundle.test.ts b/packages/osd-optimizer/src/common/bundle.test.ts index ceb7eff4b1db..fab8043e8799 100644 --- a/packages/osd-optimizer/src/common/bundle.test.ts +++ b/packages/osd-optimizer/src/common/bundle.test.ts @@ -55,7 +55,7 @@ it('creates cache keys', () => { ) ).toMatchInlineSnapshot(` Object { - "mtimes": Object { + "hashes": Object { "/foo/bar/a": 123, "/foo/bar/c": 789, }, diff --git a/packages/osd-optimizer/src/common/bundle.ts b/packages/osd-optimizer/src/common/bundle.ts index fbc03c55b03a..2f5932726668 100644 --- a/packages/osd-optimizer/src/common/bundle.ts +++ b/packages/osd-optimizer/src/common/bundle.ts @@ -103,13 +103,13 @@ export class Bundle { /** * Calculate the cache key for this bundle based from current - * mtime values. + * hash values. */ - createCacheKey(files: string[], mtimes: Map): unknown { + createCacheKey(files: string[], hashes: Map): unknown { return { spec: this.toSpec(), - mtimes: entriesToObject( - files.map((p) => [p, mtimes.get(p)] as const).sort(ascending((e) => e[0])) + hashes: entriesToObject( + files.map((p) => [p, hashes.get(p)] as const).sort(ascending((e) => e[0])) ), }; } diff --git a/packages/osd-optimizer/src/integration_tests/bundle_cache.test.ts b/packages/osd-optimizer/src/integration_tests/bundle_cache.test.ts index 83a503a21e7e..21abccca17e1 100644 --- a/packages/osd-optimizer/src/integration_tests/bundle_cache.test.ts +++ b/packages/osd-optimizer/src/integration_tests/bundle_cache.test.ts @@ -34,7 +34,7 @@ import cpy from 'cpy'; import del from 'del'; import { createAbsolutePathSerializer } from '@osd/dev-utils'; -import { getMtimes } from '../optimizer/get_mtimes'; +import { getHashes } from '../optimizer/get_hashes'; import { OptimizerConfig } from '../optimizer/optimizer_config'; import { allValuesFrom, Bundle } from '../common'; import { getBundleCacheEvent$ } from '../optimizer/bundle_cache'; @@ -77,8 +77,8 @@ it('emits "bundle cached" event when everything is updated', async () => { Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/index.ts'), Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/lib.ts'), ]; - const mtimes = await getMtimes(files); - const cacheKey = bundle.createCacheKey(files, mtimes); + const hashes = await getHashes(files); + const cacheKey = bundle.createCacheKey(files, hashes); bundle.cache.set({ cacheKey, @@ -116,8 +116,8 @@ it('emits "bundle not cached" event when cacheKey is up to date but caching is d Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/index.ts'), Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/lib.ts'), ]; - const mtimes = await getMtimes(files); - const cacheKey = bundle.createCacheKey(files, mtimes); + const hashes = await getHashes(files); + const cacheKey = bundle.createCacheKey(files, hashes); bundle.cache.set({ cacheKey, @@ -155,8 +155,8 @@ it('emits "bundle not cached" event when optimizerCacheKey is missing', async () Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/index.ts'), Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/lib.ts'), ]; - const mtimes = await getMtimes(files); - const cacheKey = bundle.createCacheKey(files, mtimes); + const hashes = await getHashes(files); + const cacheKey = bundle.createCacheKey(files, hashes); bundle.cache.set({ cacheKey, @@ -194,8 +194,8 @@ it('emits "bundle not cached" event when optimizerCacheKey is outdated, includes Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/index.ts'), Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/lib.ts'), ]; - const mtimes = await getMtimes(files); - const cacheKey = bundle.createCacheKey(files, mtimes); + const hashes = await getHashes(files); + const cacheKey = bundle.createCacheKey(files, hashes); bundle.cache.set({ cacheKey, @@ -238,8 +238,8 @@ it('emits "bundle not cached" event when bundleRefExportIds is outdated, include Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/index.ts'), Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/lib.ts'), ]; - const mtimes = await getMtimes(files); - const cacheKey = bundle.createCacheKey(files, mtimes); + const hashes = await getHashes(files); + const cacheKey = bundle.createCacheKey(files, hashes); bundle.cache.set({ cacheKey, diff --git a/packages/osd-optimizer/src/node/cache.ts b/packages/osd-optimizer/src/node/cache.ts index a81c656267bf..c176a38b932f 100644 --- a/packages/osd-optimizer/src/node/cache.ts +++ b/packages/osd-optimizer/src/node/cache.ts @@ -47,7 +47,7 @@ const dbName = (db: LMDB.Database) => export class Cache { private readonly codes: LMDB.RootDatabase; private readonly atimes: LMDB.Database; - private readonly mtimes: LMDB.Database; + private readonly hashes: LMDB.Database; private readonly sourceMaps: LMDB.Database; private readonly pathRoots: string[]; private readonly prefix: string; @@ -82,8 +82,8 @@ export class Cache { encoding: 'string', }); - this.mtimes = this.codes.openDB('mtimes', { - name: 'mtimes', + this.hashes = this.codes.openDB('hashes', { + name: 'hashes', encoding: 'string', }); @@ -106,8 +106,8 @@ export class Cache { } } - getMtime(path: string) { - return this.safeGet(this.mtimes, this.getKey(path)); + getFileHash(path: string) { + return this.safeGet(this.hashes, this.getKey(path)); } getCode(path: string) { @@ -131,12 +131,12 @@ export class Cache { } } - async update(path: string, file: { mtime: string; code: string; map: any }) { + async update(path: string, file: { filehash: string; code: string; map: any }) { const key = this.getKey(path); await Promise.all([ this.safePut(this.atimes, key, GLOBAL_ATIME), - this.safePut(this.mtimes, key, file.mtime), + this.safePut(this.hashes, key, file.filehash), this.safePut(this.codes, key, file.code), this.safePut(this.sourceMaps, key, JSON.stringify(file.map)), ]); @@ -223,7 +223,7 @@ export class Cache { // if a future version starts returning independent promises so // this is just for some future-proofing promises.add(this.atimes.remove(k)); - promises.add(this.mtimes.remove(k)); + promises.add(this.hashes.remove(k)); promises.add(this.codes.remove(k)); promises.add(this.sourceMaps.remove(k)); } diff --git a/packages/osd-optimizer/src/node/node_auto_tranpilation.ts b/packages/osd-optimizer/src/node/node_auto_tranpilation.ts index 87fcde1ce9d3..8beeaca3e144 100644 --- a/packages/osd-optimizer/src/node/node_auto_tranpilation.ts +++ b/packages/osd-optimizer/src/node/node_auto_tranpilation.ts @@ -105,8 +105,8 @@ function determineCachePrefix() { function compile(cache: Cache, source: string, path: string) { try { - const mtime = `${Fs.statSync(path).mtimeMs}`; - if (cache.getMtime(path) === mtime) { + const filehash = Crypto.createHash('sha1').update(Fs.readFileSync(path)).digest('base64'); + if (cache.getFileHash(path) === filehash) { const code = cache.getCode(path); if (code) { // code *should* always be defined, but if it isn't for some reason rebuild it @@ -122,7 +122,7 @@ function compile(cache: Cache, source: string, path: string) { } cache.update(path, { - mtime, + filehash, map: result.map, code: result.code, }); diff --git a/packages/osd-optimizer/src/optimizer/bundle_cache.ts b/packages/osd-optimizer/src/optimizer/bundle_cache.ts index 4545eb8072c0..6976606e1c4b 100644 --- a/packages/osd-optimizer/src/optimizer/bundle_cache.ts +++ b/packages/osd-optimizer/src/optimizer/bundle_cache.ts @@ -34,7 +34,7 @@ import { mergeAll } from 'rxjs/operators'; import { Bundle, BundleRefs } from '../common'; import { OptimizerConfig } from './optimizer_config'; -import { getMtimes } from './get_mtimes'; +import { getHashes } from './get_hashes'; import { diffCacheKey } from './cache_keys'; export type BundleCacheEvent = BundleNotCachedEvent | BundleCachedEvent; @@ -136,7 +136,7 @@ export function getBundleCacheEvent$( eligibleBundles.push(bundle); } - const mtimes = await getMtimes( + const hashes = await getHashes( new Set( eligibleBundles.reduce( (acc: string[], bundle) => [...acc, ...(bundle.cache.getReferencedFiles() || [])], @@ -148,7 +148,7 @@ export function getBundleCacheEvent$( for (const bundle of eligibleBundles) { const diff = diffCacheKey( bundle.cache.getCacheKey(), - bundle.createCacheKey(bundle.cache.getReferencedFiles() || [], mtimes) + bundle.createCacheKey(bundle.cache.getReferencedFiles() || [], hashes) ); if (diff) { diff --git a/packages/osd-optimizer/src/optimizer/cache_keys.test.ts b/packages/osd-optimizer/src/optimizer/cache_keys.test.ts index 95cd4b4b1d32..84d87678fad8 100644 --- a/packages/osd-optimizer/src/optimizer/cache_keys.test.ts +++ b/packages/osd-optimizer/src/optimizer/cache_keys.test.ts @@ -45,8 +45,8 @@ jest.mock('./get_changes.ts', () => ({ ]), })); -jest.mock('./get_mtimes.ts', () => ({ - getMtimes: async (paths: string[]) => new Map(paths.map((path) => [path, 12345])), +jest.mock('./get_hashes.ts', () => ({ + getHashes: async (paths: string[]) => new Map(paths.map((path) => [path, ''])), })); jest.mock('execa'); @@ -88,11 +88,11 @@ describe('getOptimizerCacheKey()', () => { "deletedPaths": Array [ "/foo/bar/c", ], - "lastCommit": "", - "modifiedTimes": Object { - "/foo/bar/a": 12345, - "/foo/bar/b": 12345, + "fileHashes": Object { + "/foo/bar/a": "", + "/foo/bar/b": "", }, + "lastCommit": "", "workerConfig": Object { "browserslistEnv": "dev", "dist": false, diff --git a/packages/osd-optimizer/src/optimizer/cache_keys.ts b/packages/osd-optimizer/src/optimizer/cache_keys.ts index 16efdea4888b..d809052dec6b 100644 --- a/packages/osd-optimizer/src/optimizer/cache_keys.ts +++ b/packages/osd-optimizer/src/optimizer/cache_keys.ts @@ -40,7 +40,7 @@ import { diff } from 'jest-diff'; import jsonStable from 'json-stable-stringify'; import { ascending, CacheableWorkerConfig } from '../common'; -import { getMtimes } from './get_mtimes'; +import { getHashes } from './get_hashes'; import { getChanges } from './get_changes'; import { OptimizerConfig } from './optimizer_config'; @@ -138,7 +138,7 @@ export interface OptimizerCacheKey { readonly bootstrap: string | undefined; readonly workerConfig: CacheableWorkerConfig; readonly deletedPaths: string[]; - readonly modifiedTimes: Record; + readonly fileHashes: Record; } async function getLastCommit() { @@ -181,14 +181,14 @@ export async function getOptimizerCacheKey(config: OptimizerConfig) { lastCommit, bootstrap, deletedPaths, - modifiedTimes: {} as Record, + fileHashes: {} as Record, workerConfig: config.getCacheableWorkerConfig(), }; - const mtimes = await getMtimes(modifiedPaths); - for (const [path, mtime] of Array.from(mtimes.entries()).sort(ascending((e) => e[0]))) { - if (typeof mtime === 'number') { - cacheKeys.modifiedTimes[path] = mtime; + const hashes = await getHashes(modifiedPaths); + for (const [path, filehash] of Array.from(hashes.entries()).sort(ascending((e) => e[0]))) { + if (typeof filehash === 'string') { + cacheKeys.fileHashes[path] = filehash; } } diff --git a/packages/osd-optimizer/src/optimizer/get_mtimes.test.ts b/packages/osd-optimizer/src/optimizer/get_hashes.test.ts similarity index 69% rename from packages/osd-optimizer/src/optimizer/get_mtimes.test.ts rename to packages/osd-optimizer/src/optimizer/get_hashes.test.ts index d305ca59a1a0..1713d9de5ad7 100644 --- a/packages/osd-optimizer/src/optimizer/get_mtimes.test.ts +++ b/packages/osd-optimizer/src/optimizer/get_hashes.test.ts @@ -30,28 +30,36 @@ jest.mock('fs'); -import { getMtimes } from './get_mtimes'; +import { getHashes } from './get_hashes'; -const { stat }: { stat: jest.Mock } = jest.requireMock('fs'); +const { stat, readFile }: { stat: jest.Mock; readFile: jest.Mock } = jest.requireMock('fs'); -it('returns mtimes Map', async () => { +it('returns hashes Map', async () => { stat.mockImplementation((path, cb) => { if (path.includes('missing')) { const error = new Error('file not found'); (error as any).code = 'ENOENT'; cb(error); } else { - cb(null, { - mtimeMs: 1234, - }); + cb(null, {}); } }); - await expect(getMtimes(['/foo/bar', '/foo/missing', '/foo/baz', '/foo/bar'])).resolves + readFile.mockImplementation((path, cb) => { + if (path.includes('missing')) { + const error = new Error('file not found'); + (error as any).code = 'ENOENT'; + cb(error); + } else { + cb(null, `Content of ${path}`); + } + }); + + await expect(getHashes(['/foo/bar', '/foo/missing', '/foo/baz', '/foo/bar'])).resolves .toMatchInlineSnapshot(` Map { - "/foo/bar" => 1234, - "/foo/baz" => 1234, + "/foo/bar" => "OwCtruddjWkB6ROdbLRM0NnWOhs=", + "/foo/baz" => "mb6SFQi4VuH8jbwW3h6YoolklXc=", } `); }); diff --git a/packages/osd-optimizer/src/optimizer/get_mtimes.ts b/packages/osd-optimizer/src/optimizer/get_hashes.ts similarity index 72% rename from packages/osd-optimizer/src/optimizer/get_mtimes.ts rename to packages/osd-optimizer/src/optimizer/get_hashes.ts index fdb9d58c9d8d..b83128d54249 100644 --- a/packages/osd-optimizer/src/optimizer/get_mtimes.ts +++ b/packages/osd-optimizer/src/optimizer/get_hashes.ts @@ -32,23 +32,28 @@ import Fs from 'fs'; import * as Rx from 'rxjs'; import { mergeMap, map, catchError } from 'rxjs/operators'; +import Crypto from 'crypto'; import { allValuesFrom } from '../common'; -const stat$ = Rx.bindNodeCallback(Fs.stat); +// const stat$ = Rx.bindNodeCallback(Fs.stat); +const readFile$ = Rx.bindNodeCallback(Fs.readFile); /** - * get mtimes of referenced paths concurrently, limit concurrency to 100 + * Get content hashes of referenced paths concurrently, with at most 100 concurrent files */ -export async function getMtimes(paths: Iterable) { +export async function getHashes(paths: Iterable): Promise> { return new Map( await allValuesFrom( Rx.from(paths).pipe( - // map paths to [path, mtimeMs] entries with concurrency of + // map paths to [path, sha1Hash] entries with concurrency of // 100 at a time, ignoring missing paths mergeMap( (path) => - stat$(path).pipe( - map((stat) => [path, stat.mtimeMs] as const), + readFile$(path).pipe( + map( + (buffer) => + [path, Crypto.createHash('sha1').update(buffer).digest('base64')] as const + ), catchError((error: any) => error?.code === 'ENOENT' ? Rx.EMPTY : Rx.throwError(error) ) diff --git a/packages/osd-optimizer/src/worker/run_compilers.ts b/packages/osd-optimizer/src/worker/run_compilers.ts index 4cd55b5ad07c..03745eb4911e 100644 --- a/packages/osd-optimizer/src/worker/run_compilers.ts +++ b/packages/osd-optimizer/src/worker/run_compilers.ts @@ -58,6 +58,7 @@ import { isConcatenatedModule, getModulePath, } from './webpack_helpers'; +import { getHashes } from '../optimizer/get_hashes'; const PLUGIN_NAME = '@osd/optimizer'; @@ -178,28 +179,21 @@ const observeCompiler = ( } const files = Array.from(referencedFiles).sort(ascending((p) => p)); - const mtimes = new Map( - files.map((path): [string, number | undefined] => { - try { - return [path, compiler.inputFileSystem.statSync(path)?.mtimeMs]; - } catch (error) { - if (error?.code === 'ENOENT') { - return [path, undefined]; - } - throw error; - } + getHashes(files) + .then((hashes) => { + bundle.cache.set({ + bundleRefExportIds, + optimizerCacheKey: workerConfig.optimizerCacheKey, + cacheKey: bundle.createCacheKey(files, hashes), + moduleCount, + workUnits, + files, + }); }) - ); - - bundle.cache.set({ - bundleRefExportIds, - optimizerCacheKey: workerConfig.optimizerCacheKey, - cacheKey: bundle.createCacheKey(files, mtimes), - moduleCount, - workUnits, - files, - }); + .catch((_err) => { + // If cache fails to write, it's alright to ignore and reattempt next build + }); return compilerMsgs.compilerSuccess({ moduleCount,