From 87510fd6444c74c19b1f854f548a91afbb9d9025 Mon Sep 17 00:00:00 2001 From: Herko Lategan Date: Wed, 8 Feb 2023 13:10:40 +0000 Subject: [PATCH] roachprod: guard calls to SetupSSH This change ensures that calls to `SetupSSH` do not run concurrently across processes or threads. Overlapping calls are not safe and can lead to invalid SSH configurations. The scenario takes place when multiple clusters are created simultaneously from the same or multiple processes. Resolves: #90092 Release note: None --- pkg/roachprod/roachprod.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pkg/roachprod/roachprod.go b/pkg/roachprod/roachprod.go index fb90139e2a52..f8b6072685a1 100644 --- a/pkg/roachprod/roachprod.go +++ b/pkg/roachprod/roachprod.go @@ -224,8 +224,9 @@ func CachedClusters(l *logger.Logger, fn func(clusterName string, numVMs int)) { } } -// acquireFilesystemLock acquires a filesystem lock so that two concurrent -// synchronizations of roachprod state don't clobber each other. +// acquireFilesystemLock acquires a filesystem lock in order that concurrent +// operations or roachprod processes that access shared system resources do +// not conflict. func acquireFilesystemLock() (unlockFn func(), _ error) { lockFile := os.ExpandEnv("$HOME/.roachprod/LOCK") f, err := os.Create(lockFile) @@ -577,6 +578,11 @@ func SetupSSH(ctx context.Context, l *logger.Logger, clusterName string) error { // Configure SSH for machines in the zones we operate on. if err := vm.ProvidersSequential(providers, func(p vm.Provider) error { + unlock, lockErr := acquireFilesystemLock() + if lockErr != nil { + return lockErr + } + defer unlock() return p.ConfigSSH(zones[p.Name()]) }); err != nil { return err