From 37038463ed7a33411ade8aaca6510a3c5be56ac3 Mon Sep 17 00:00:00 2001 From: Jonathan Cammisuli Date: Wed, 26 Apr 2023 13:36:20 -0400 Subject: [PATCH] feat(core): remove git file hasher implementation (#16517) --- packages/nx/src/hasher/file-hasher.ts | 3 - .../nx/src/hasher/git-based-file-hasher.ts | 47 ----- packages/nx/src/hasher/git-hasher.spec.ts | 199 ------------------ packages/nx/src/hasher/git-hasher.ts | 150 ------------- .../src/utils/get-hashing-implementation.ts | 18 +- 5 files changed, 1 insertion(+), 416 deletions(-) delete mode 100644 packages/nx/src/hasher/git-based-file-hasher.ts delete mode 100644 packages/nx/src/hasher/git-hasher.spec.ts delete mode 100644 packages/nx/src/hasher/git-hasher.ts diff --git a/packages/nx/src/hasher/file-hasher.ts b/packages/nx/src/hasher/file-hasher.ts index d46dbe5bb5681..1221c2fbd6b0b 100644 --- a/packages/nx/src/hasher/file-hasher.ts +++ b/packages/nx/src/hasher/file-hasher.ts @@ -1,4 +1,3 @@ -import { GitBasedFileHasher } from './git-based-file-hasher'; import { NodeBasedFileHasher } from './node-based-file-hasher'; import { FileHasherBase } from './file-hasher-base'; import { NativeFileHasher } from './native-file-hasher'; @@ -11,8 +10,6 @@ function createFileHasher(): FileHasherBase { switch (getHashingImplementation()) { case HasherImplementation.Native: return new NativeFileHasher(); - case HasherImplementation.Git: - return new GitBasedFileHasher(); case HasherImplementation.Node: return new NodeBasedFileHasher(); } diff --git a/packages/nx/src/hasher/git-based-file-hasher.ts b/packages/nx/src/hasher/git-based-file-hasher.ts deleted file mode 100644 index f296e5c110e23..0000000000000 --- a/packages/nx/src/hasher/git-based-file-hasher.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { workspaceRoot } from '../utils/workspace-root'; -import { performance } from 'perf_hooks'; -import { getFileHashes, getGitHashForFiles } from './git-hasher'; -import { existsSync, readFileSync } from 'fs'; -import { FileHasherBase } from './file-hasher-base'; -import ignore from 'ignore'; - -export class GitBasedFileHasher extends FileHasherBase { - /** - * For the project graph daemon server use-case we can potentially skip expensive work - * by leveraging knowledge of the uncommitted and untracked files, so the init() method - * returns a Map containing this data. - */ - async init() { - performance.mark('init hashing:start'); - this.clear(); - - const gitResult = await getFileHashes(workspaceRoot); - const ignore = getIgnoredGlobs(); - gitResult.allFiles.forEach((hash, filename) => { - if (!ignore.ignores(filename)) { - this.fileHashes.set(filename, hash); - } - }); - this.isInitialized = true; - performance.mark('init hashing:end'); - performance.measure( - 'init hashing', - 'init hashing:start', - 'init hashing:end' - ); - } - - async hashFiles(files: string[]) { - return (await getGitHashForFiles(files, workspaceRoot)).hashes; - } -} - -function getIgnoredGlobs() { - if (existsSync(`${workspaceRoot}/.nxignore`)) { - const ig = ignore(); - ig.add(readFileSync(`${workspaceRoot}/.nxignore`, 'utf-8')); - return ig; - } else { - return { ignores: (file: string) => false }; - } -} diff --git a/packages/nx/src/hasher/git-hasher.spec.ts b/packages/nx/src/hasher/git-hasher.spec.ts deleted file mode 100644 index c98af65b2afae..0000000000000 --- a/packages/nx/src/hasher/git-hasher.spec.ts +++ /dev/null @@ -1,199 +0,0 @@ -import { execSync } from 'child_process'; -import { mkdirSync, removeSync } from 'fs-extra'; -import { dirSync } from 'tmp'; -import { getFileHashes, getGitHashForBatch } from './git-hasher'; - -describe('git-hasher', () => { - let dir: string; - const warnSpy = jest.spyOn(console, 'warn'); - - beforeEach(() => { - dir = dirSync().name; - run(`git init`); - run(`git config user.email "test@test.com"`); - run(`git config user.name "test"`); - run(`git config commit.gpgsign false`); - - warnSpy.mockClear(); - }); - - afterEach(() => { - expect(console.warn).not.toHaveBeenCalled(); - removeSync(dir); - }); - - it('should work', async () => { - run(`echo AAA > a.txt`); - run(`git add .`); - run(`git commit -am init`); - const hashes = (await getFileHashes(dir)).allFiles; - expect([...hashes.keys()]).toEqual([`a.txt`]); - expect(hashes.get(`a.txt`)).toBeDefined(); - - // should handle additions - run(`echo BBB > b.txt`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `a.txt`, - `b.txt`, - ]); - - run(`git add .`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `a.txt`, - `b.txt`, - ]); - - run(`git commit -am second`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `a.txt`, - `b.txt`, - ]); - - // should handle removals - // removal unstaged - run(`rm b.txt`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([`a.txt`]); - - // removal staged - run(`git add .`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([`a.txt`]); - - // removed committed - run(`git commit -am third`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([`a.txt`]); - - // should handle moves - run(`mv a.txt newa.txt`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `newa.txt`, - ]); - - run(`git add .`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `newa.txt`, - ]); - - run(`echo AAAA > a.txt`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `newa.txt`, - `a.txt`, - ]); - - run(`git add .`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `a.txt`, - `newa.txt`, - ]); - }); - - it('should handle spaces in filenames', async () => { - run(`echo AAA > "a b".txt`); - run(`git add .`); - run(`git commit -am init`); - run(`touch "x y z.txt"`); // unstaged - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `a b.txt`, - `x y z.txt`, - ]); - run(`git add .`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `a b.txt`, - `x y z.txt`, - ]); - run(`mv "a b.txt" "a b moved.txt"`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `x y z.txt`, - `a b moved.txt`, - ]); - run(`git add .`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `a b moved.txt`, - `x y z.txt`, - ]); - run(`rm "x y z.txt"`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `a b moved.txt`, - ]); - }); - - it('should handle renames and modifications', async () => { - run(`echo AAA > "a".txt`); - run(`git add .`); - run(`git commit -am init`); - run(`mv a.txt moda.txt`); - run(`git add .`); - run(`echo modified >> moda.txt`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `moda.txt`, - ]); - }); - - it('should handle special characters in filenames', async () => { - run(`echo AAA > "a-ū".txt`); - run(`echo BBB > "b-ū".txt`); - run(`git add .`); - run(`git commit -am init`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `a-ū.txt`, - `b-ū.txt`, - ]); - - run(`mv a-ū.txt moda-ū.txt`); - run(`git add .`); - run(`echo modified >> moda-ū.txt`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `b-ū.txt`, - `moda-ū.txt`, - ]); - - run(`rm "moda-ū.txt"`); - expect([...(await getFileHashes(dir)).allFiles.keys()]).toEqual([ - `b-ū.txt`, - ]); - }); - - it('should work with sub-directories', async () => { - const subDir = `${dir}/sub`; - mkdirSync(subDir); - run(`echo AAA > a.txt`); - run(`echo BBB > sub/b.txt`); - run(`git add --all`); - run(`git commit -am init`); - expect([...(await getFileHashes(subDir)).allFiles.keys()]).toEqual([ - `b.txt`, - ]); - - run(`echo CCC > sub/c.txt`); - expect([...(await getFileHashes(subDir)).allFiles.keys()]).toEqual([ - `b.txt`, - `c.txt`, - ]); - }); - - function run(command: string) { - return execSync(command, { cwd: dir, stdio: ['pipe', 'pipe', 'pipe'] }); - } - - it('should hash two simple files', async () => { - const files = ['a.txt', 'b.txt']; - run(`echo AAA > a.txt`); - run(`echo BBB > b.txt`); - const hashes = await getGitHashForBatch(files, dir); - expect([...hashes.keys()]).toEqual(files); - }); - - it('should fail when file deleted', async () => { - const files = ['a.txt', 'b.txt']; - run(`echo AAA > a.txt`); - try { - const hashes = await getGitHashForBatch(files, dir); - expect(false).toBeTruthy(); - } catch (err: any) { - expect(err instanceof Error).toBeTruthy(); - const error = err as Error; - expect(error.message).toMatch( - /Passed 2 file paths to Git to hash, but received 1 hashes\.\n *fatal:.*b\.txt.*No such file or directory\n/ - ); - } - }); -}); diff --git a/packages/nx/src/hasher/git-hasher.ts b/packages/nx/src/hasher/git-hasher.ts deleted file mode 100644 index 1b4a5c8d80095..0000000000000 --- a/packages/nx/src/hasher/git-hasher.ts +++ /dev/null @@ -1,150 +0,0 @@ -import { spawn } from 'child_process'; -import { chunkify } from '../utils/chunkify'; -import { fileExists } from '../utils/fileutils'; -import { joinPathFragments } from '../utils/path'; - -export async function getGitHashForFiles( - potentialFilesToHash: string[], - path: string -): Promise<{ hashes: Map; deleted: string[] }> { - const { filesToHash, deleted } = getActualFilesToHash( - potentialFilesToHash, - path - ); - - const res: Map = new Map(); - const promises: Promise>[] = chunkify(filesToHash).map( - (files) => getGitHashForBatch(files, path) - ); - // Merge batch results into final result set - const batchResults = await Promise.all(promises); - for (const batch of batchResults) { - batch.forEach((v, k) => res.set(k, v)); - } - return { hashes: res, deleted }; -} - -export async function getGitHashForBatch(filesToHash: string[], path) { - const res: Map = new Map(); - const { stdout: hashStdout, stderr: hashStderr } = await spawnProcess( - 'git', - ['hash-object', ...filesToHash], - path - ); - const hashes: string[] = hashStdout.split('\n').filter((s) => !!s); - if (hashes.length !== filesToHash.length) { - throw new Error( - `Passed ${filesToHash.length} file paths to Git to hash, but received ${hashes.length} hashes.\n${hashStderr}` - ); - } - for (let i = 0; i < hashes.length; i++) { - const hash: string = hashes[i]; - const filePath: string = filesToHash[i]; - res.set(filePath, hash); - } - return res; -} - -function getActualFilesToHash( - potentialFilesToHash: string[], - path: string -): { filesToHash: string[]; deleted: string[] } { - const filesToHash = []; - const deleted = []; - for (const file of potentialFilesToHash) { - if (fileExists(joinPathFragments(path, file))) { - filesToHash.push(file); - } else { - deleted.push(file); - } - } - return { filesToHash, deleted }; -} - -async function spawnProcess( - command: string, - args: string[], - cwd: string -): Promise<{ code: number; stdout: string; stderr: string }> { - const cp = spawn(command, args, { - windowsHide: true, - detached: true, - shell: false, - cwd, - }); - let stdout = ''; - let stderr = ''; - cp.stdout.on('data', (data) => { - stdout += data; - }); - cp.stderr.on('data', (data) => { - stderr += data; - }); - return new Promise((resolve) => { - cp.on('close', (code) => { - resolve({ code, stdout, stderr }); - }); - }); -} - -async function getStagedFiles(path: string) { - const { stdout: staged } = await spawnProcess( - 'git', - ['ls-files', '-s', '-z', '--exclude-standard', '.'], - path - ); - const res = new Map(); - for (const line of staged.split('\0')) { - if (!line) { - continue; - } - const [_, hash, __, ...fileParts] = line.split(/\s/); - const fileName = fileParts.join(' '); - res.set(fileName, hash); - } - return res; -} - -async function getUnstagedFiles(path: string) { - const { stdout: unstaged } = await spawnProcess( - 'git', - ['ls-files', '-m', '-z', '--exclude-standard', '.'], - path - ); - const lines = unstaged.split('\0').filter((f) => !!f); - return getGitHashForFiles(lines, path); -} - -async function getUntrackedFiles(path: string) { - const { stdout: untracked } = await spawnProcess( - 'git', - ['ls-files', '--other', '-z', '--exclude-standard', '.'], - path - ); - const lines = untracked.split('\0').filter((f) => !!f); - return getGitHashForFiles(lines, path); -} - -export async function getFileHashes(path: string): Promise<{ - allFiles: Map; -}> { - const [staged, unstaged, untracked] = await Promise.all([ - getStagedFiles(path), - getUnstagedFiles(path), - getUntrackedFiles(path), - ]); - - unstaged.hashes.forEach((hash: string, filename: string) => { - staged.set(filename, hash); - }); - - unstaged.deleted.forEach((filename) => { - staged.delete(filename); - }); - - untracked.hashes.forEach((hash: string, filename: string) => { - staged.set(filename, hash); - }); - - return { allFiles: staged }; -} diff --git a/packages/nx/src/utils/get-hashing-implementation.ts b/packages/nx/src/utils/get-hashing-implementation.ts index a833c74371948..188b015660023 100644 --- a/packages/nx/src/utils/get-hashing-implementation.ts +++ b/packages/nx/src/utils/get-hashing-implementation.ts @@ -1,13 +1,7 @@ import { NativeFileHasher } from '../hasher/native-file-hasher'; -import { workspaceRoot } from './workspace-root'; - -import { execSync } from 'child_process'; -import { existsSync } from 'fs'; -import { join } from 'path'; export enum HasherImplementation { Native = 'Native', - Git = 'Git', Node = 'Node', } @@ -21,17 +15,7 @@ export function getHashingImplementation() { return HasherImplementation.Native; } - execSync('git rev-parse --is-inside-work-tree', { - stdio: 'ignore', - windowsHide: true, - }); - - // we don't use git based hasher when the repo uses git submodules - if (!existsSync(join(workspaceRoot, '.git', 'modules'))) { - return HasherImplementation.Git; - } else { - return HasherImplementation.Node; - } + return HasherImplementation.Node; } catch { return HasherImplementation.Node; }