From e2339d05acb4adb84225dac0e4f4b7c8149ef0df Mon Sep 17 00:00:00 2001 From: Kimmo Lehto Date: Wed, 4 May 2022 17:57:30 +0300 Subject: [PATCH 1/5] Only allow one k0sctl to run simultaneously per host Cant use configurer before detect OS Flock Wait a while No flock? Take a risk Seal window --- cmd/apply.go | 18 ++++++++++-- configurer/linux.go | 29 +++++++++++++++++++ phase/prepare_hosts.go | 28 ++++++++++++++++++ .../v1beta1/cluster/host.go | 1 + 4 files changed, 74 insertions(+), 2 deletions(-) diff --git a/cmd/apply.go b/cmd/apply.go index cfa1fd9d..482cfc86 100644 --- a/cmd/apply.go +++ b/cmd/apply.go @@ -80,14 +80,28 @@ var applyCommand = &cli.Command{ analytics.Client.Publish("apply-start", map[string]interface{}{}) - if err := manager.Run(); err != nil { + var result error + + defer func() { + // Handle panics and failed applies by running the disconnect phase + if err := recover(); err != nil || result != nil { + p := &phase.Disconnect{} + _ = p.Prepare(manager.Config) + _ = p.Run() + if err != nil { + panic(err) + } + } + }() + + if result = manager.Run(); result != nil { analytics.Client.Publish("apply-failure", map[string]interface{}{"clusterID": manager.Config.Spec.K0s.Metadata.ClusterID}) if lf, err := LogFile(); err == nil { if ln, ok := lf.(interface{ Name() string }); ok { log.Errorf("apply failed - log file saved to %s", ln.Name()) } } - return err + return result } analytics.Client.Publish("apply-success", map[string]interface{}{"duration": time.Since(start), "clusterID": manager.Config.Spec.K0s.Metadata.ClusterID}) diff --git a/configurer/linux.go b/configurer/linux.go index 1a4f83d2..9085a936 100644 --- a/configurer/linux.go +++ b/configurer/linux.go @@ -6,11 +6,13 @@ import ( "regexp" "strconv" "strings" + "time" "github.com/alessio/shellescape" "github.com/k0sproject/rig/exec" "github.com/k0sproject/rig/os" "github.com/k0sproject/version" + log "github.com/sirupsen/logrus" ) // Static Constants Interface for overriding by distro-specific structs @@ -87,6 +89,33 @@ func (l Linux) K0sJoinTokenPath() string { return "/etc/k0s/k0stoken" } +// TryLock tries to obtain an exclusive lock on the host to avoid multiple instances accessing it at once +func (l Linux) TryLock(h os.Host) error { + if err := h.Exec("command -v flock"); err != nil { + log.Warnf("%s: host does not have the 'flock' command which is used to ensure only one instance of k0sctl operates on it at a time", h) + return nil + } + + sshpid, err := h.ExecOutput("ps --no-headers -eo ppid -fp $$") + if err != nil { + return err + } + + errCh := make(chan error) + go func() { + errCh <- h.Execf(`flock -n /tmp/k0sctl.lock -c "while ps -p %s > /dev/null; do sleep 1; done;"`, sshpid, exec.Sudo(h)) + }() + + select { + case err := <-errCh: + log.Debugf("%s: lock failed: %s", h, err) + return fmt.Errorf("failed to obtain an exclusive lock on the host: %w", err) + case <-time.After(time.Second * 5): + log.Debugf("%s: acquired a lock", h) + return nil + } +} + // TempFile returns a temp file path func (l Linux) TempFile(h os.Host) (string, error) { return h.ExecOutput("mktemp") diff --git a/phase/prepare_hosts.go b/phase/prepare_hosts.go index d36d8b29..ba42fab7 100644 --- a/phase/prepare_hosts.go +++ b/phase/prepare_hosts.go @@ -1,8 +1,11 @@ package phase import ( + "errors" "strings" + "time" + retry "github.com/avast/retry-go" "github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster" "github.com/k0sproject/rig/os" log "github.com/sirupsen/logrus" @@ -34,6 +37,31 @@ func (p *PrepareHosts) prepareHost(h *cluster.Host) error { } } + err := retry.Do( + func() error { + return h.Configurer.TryLock(h) + }, + retry.OnRetry( + func(n uint, err error) { + log.Errorf("%s: attempt %d of %d.. trying to obtain a lock on host: %s", h, n+1, retries, err.Error()) + }, + ), + retry.RetryIf( + func(err error) bool { + return !strings.Contains(err.Error(), "host does not have") + }, + ), + retry.DelayType(retry.CombineDelay(retry.FixedDelay, retry.RandomDelay)), + retry.MaxJitter(time.Second*2), + retry.Delay(time.Second*3), + retry.Attempts(5), + retry.LastErrorOnly(true), + ) + + if err != nil && !strings.Contains(err.Error(), "host does not have") { + return errors.New("another k0sctl instance is currently operating on the node") + } + if len(h.Environment) > 0 { log.Infof("%s: updating environment", h) if err := h.Configurer.UpdateEnvironment(h, h.Environment); err != nil { diff --git a/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go b/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go index 12ffbcfd..ceb269bc 100644 --- a/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go +++ b/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go @@ -117,6 +117,7 @@ type configurer interface { CleanupServiceEnvironment(os.Host, string) error Stat(os.Host, string, ...exec.Option) (*os.FileInfo, error) Touch(os.Host, string, time.Time, ...exec.Option) error + TryLock(os.Host) error } // HostMetadata resolved metadata for host From dfe32d09d26803196a0ec8a2bad27a9618d7f758 Mon Sep 17 00:00:00 2001 From: Kimmo Lehto Date: Tue, 24 May 2022 13:45:42 +0300 Subject: [PATCH 2/5] Complete redo --- cmd/apply.go | 3 + cmd/backup.go | 4 + cmd/reset.go | 3 + configurer/linux.go | 58 +++++---- phase/lock.go | 121 ++++++++++++++++++ phase/prepare_hosts.go | 33 +---- phase/unlock.go | 34 +++++ .../v1beta1/cluster/host.go | 3 +- 8 files changed, 203 insertions(+), 56 deletions(-) create mode 100644 phase/lock.go create mode 100644 phase/unlock.go diff --git a/cmd/apply.go b/cmd/apply.go index 482cfc86..69f8bc71 100644 --- a/cmd/apply.go +++ b/cmd/apply.go @@ -48,10 +48,12 @@ var applyCommand = &cli.Command{ phase.NoWait = ctx.Bool("no-wait") manager := phase.Manager{Config: ctx.Context.Value(ctxConfigKey{}).(*v1beta1.Cluster)} + lockPhase := &phase.Lock{} manager.AddPhase( &phase.Connect{}, &phase.DetectOS{}, + lockPhase, &phase.PrepareHosts{}, &phase.GatherFacts{}, &phase.DownloadBinaries{}, @@ -75,6 +77,7 @@ var applyCommand = &cli.Command{ NoDrain: ctx.Bool("no-drain"), }, &phase.RunHooks{Stage: "after", Action: "apply"}, + &phase.Unlock{Cancel: lockPhase.Cancel}, &phase.Disconnect{}, ) diff --git a/cmd/backup.go b/cmd/backup.go index d56487ac..28b27745 100644 --- a/cmd/backup.go +++ b/cmd/backup.go @@ -28,14 +28,18 @@ var backupCommand = &cli.Command{ start := time.Now() manager := phase.Manager{Config: ctx.Context.Value(ctxConfigKey{}).(*v1beta1.Cluster)} + lockPhase := &phase.Lock{} + manager.AddPhase( &phase.Connect{}, &phase.DetectOS{}, + lockPhase, &phase.GatherFacts{}, &phase.GatherK0sFacts{}, &phase.RunHooks{Stage: "before", Action: "backup"}, &phase.Backup{}, &phase.RunHooks{Stage: "after", Action: "backup"}, + &phase.Unlock{Cancel: lockPhase.Cancel}, &phase.Disconnect{}, ) diff --git a/cmd/reset.go b/cmd/reset.go index f0f7f68b..565f809e 100644 --- a/cmd/reset.go +++ b/cmd/reset.go @@ -52,14 +52,17 @@ var resetCommand = &cli.Command{ manager := phase.Manager{Config: ctx.Context.Value(ctxConfigKey{}).(*v1beta1.Cluster)} + lockPhase := &phase.Lock{} manager.AddPhase( &phase.Connect{}, &phase.DetectOS{}, + lockPhase, &phase.PrepareHosts{}, &phase.GatherK0sFacts{}, &phase.RunHooks{Stage: "before", Action: "reset"}, &phase.Reset{}, &phase.RunHooks{Stage: "after", Action: "reset"}, + &phase.Unlock{Cancel: lockPhase.Cancel}, &phase.Disconnect{}, ) diff --git a/configurer/linux.go b/configurer/linux.go index 9085a936..13a35a5a 100644 --- a/configurer/linux.go +++ b/configurer/linux.go @@ -6,13 +6,11 @@ import ( "regexp" "strconv" "strings" - "time" "github.com/alessio/shellescape" "github.com/k0sproject/rig/exec" "github.com/k0sproject/rig/os" "github.com/k0sproject/version" - log "github.com/sirupsen/logrus" ) // Static Constants Interface for overriding by distro-specific structs @@ -21,6 +19,7 @@ type PathFuncs interface { K0sConfigPath() string K0sJoinTokenPath() string KubeconfigPath() string + K0sctlLockFilePath() string } // Linux is a base module for various linux OS support packages @@ -89,31 +88,9 @@ func (l Linux) K0sJoinTokenPath() string { return "/etc/k0s/k0stoken" } -// TryLock tries to obtain an exclusive lock on the host to avoid multiple instances accessing it at once -func (l Linux) TryLock(h os.Host) error { - if err := h.Exec("command -v flock"); err != nil { - log.Warnf("%s: host does not have the 'flock' command which is used to ensure only one instance of k0sctl operates on it at a time", h) - return nil - } - - sshpid, err := h.ExecOutput("ps --no-headers -eo ppid -fp $$") - if err != nil { - return err - } - - errCh := make(chan error) - go func() { - errCh <- h.Execf(`flock -n /tmp/k0sctl.lock -c "while ps -p %s > /dev/null; do sleep 1; done;"`, sshpid, exec.Sudo(h)) - }() - - select { - case err := <-errCh: - log.Debugf("%s: lock failed: %s", h, err) - return fmt.Errorf("failed to obtain an exclusive lock on the host: %w", err) - case <-time.After(time.Second * 5): - log.Debugf("%s: acquired a lock", h) - return nil - } +// K0sctlLockFilePath returns a path to a lock file +func (l Linux) K0sctlLockFilePath() string { + return "/run/lock/k0sctl" } // TempFile returns a temp file path @@ -235,3 +212,30 @@ func (l Linux) PrivateAddress(h os.Host, iface, publicip string) (string, error) return "", fmt.Errorf("not found") } + +// UpsertFile creates a file in path with content only if the file does not exist already +func (l Linux) UpsertFile(h os.Host, path, content string) error { + tmpf, err := l.TempFile(h) + if err != nil { + return err + } + if err := h.Execf(`cat > "%s"`, tmpf, exec.Stdin(content), exec.Sudo(h)); err != nil { + return err + } + + defer func() { + _ = h.Execf(`rm -f "%s"`, tmpf, exec.Sudo(h)) + }() + + // mv -n is atomic + if err := h.Execf(`mv -n "%s" "%s"`, tmpf, path, exec.Sudo(h)); err != nil { + return fmt.Errorf("upsert failed: %w", err) + } + + // if original tempfile still exists, error out + if h.Execf(`test -f "%s"`, tmpf) == nil { + return fmt.Errorf("upsert failed") + } + + return nil +} diff --git a/phase/lock.go b/phase/lock.go new file mode 100644 index 00000000..0f2935c7 --- /dev/null +++ b/phase/lock.go @@ -0,0 +1,121 @@ +package phase + +import ( + "context" + "fmt" + gos "os" + "sync" + "time" + + retry "github.com/avast/retry-go" + "github.com/k0sproject/k0sctl/analytics" + "github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1" + "github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster" + "github.com/k0sproject/rig/exec" + log "github.com/sirupsen/logrus" +) + +// Lock acquires an exclusive k0sctl lock on hosts +type Lock struct { + GenericPhase + cfs []func() + instanceID string + m sync.Mutex +} + +// Prepare the phase +func (p *Lock) Prepare(c *v1beta1.Cluster) error { + p.Config = c + mid, _ := analytics.MachineID() + p.instanceID = fmt.Sprintf("%s-%d", mid, gos.Getpid()) + return nil +} + +// Title for the phase +func (p *Lock) Title() string { + return "Acquire exclusive host lock" +} + +func (p *Lock) Cancel() { + p.m.Lock() + defer p.m.Unlock() + for _, f := range p.cfs { + f() + } +} + +// Run the phase +func (p *Lock) Run() error { + if err := p.Config.Spec.Hosts.ParallelEach(p.startLock); err != nil { + return err + } + return p.Config.Spec.Hosts.ParallelEach(p.startTicker) +} + +func (p *Lock) startTicker(h *cluster.Host) error { + lfp := h.Configurer.K0sctlLockFilePath() + ticker := time.NewTicker(10 * time.Second) + ctx, cancel := context.WithCancel(context.Background()) + p.m.Lock() + p.cfs = append(p.cfs, cancel) + p.m.Unlock() + + go func() { + log.Debugf("%s: started periodic update of lock file %s timestamp", h, lfp) + for { + select { + case <-ticker.C: + if err := h.Configurer.Touch(h, h.Configurer.K0sctlLockFilePath(), time.Now(), exec.Sudo(h)); err != nil { + log.Debugf("%s: failed to touch lock file: %s", h, err) + } + case <-ctx.Done(): + _ = h.Configurer.DeleteFile(h, lfp) + return + } + } + }() + + return nil +} + +func (p *Lock) startLock(h *cluster.Host) error { + return retry.Do( + func() error { + return p.tryLock(h) + }, + retry.OnRetry( + func(n uint, err error) { + log.Errorf("%s: attempt %d of %d.. trying to obtain a lock on host: %s", h, n+1, retries, err.Error()) + }, + ), + retry.DelayType(retry.CombineDelay(retry.FixedDelay, retry.RandomDelay)), + retry.MaxJitter(time.Second*2), + retry.Delay(time.Second*3), + retry.Attempts(5), + retry.LastErrorOnly(true), + ) +} + +func (p *Lock) tryLock(h *cluster.Host) error { + lfp := h.Configurer.K0sctlLockFilePath() + + if err := h.Configurer.UpsertFile(h, lfp, p.instanceID); err != nil { + stat, err := h.Configurer.Stat(h, lfp, exec.Sudo(h)) + if err != nil { + return fmt.Errorf("lock file disappeared: %w", err) + } + content, err := h.Configurer.ReadFile(h, lfp) + if err != nil { + return fmt.Errorf("failed to read lock file: %w", err) + } + if content != p.instanceID { + if time.Since(stat.ModTime()) < 20*time.Second { + return fmt.Errorf("another instance of k0sctl is currently operating on the host") + } + _ = h.Configurer.DeleteFile(h, lfp) + return fmt.Errorf("removed existing expired lock file") + } + } + + return nil +} diff --git a/phase/prepare_hosts.go b/phase/prepare_hosts.go index ba42fab7..c9e1fad6 100644 --- a/phase/prepare_hosts.go +++ b/phase/prepare_hosts.go @@ -1,11 +1,8 @@ package phase import ( - "errors" "strings" - "time" - retry "github.com/avast/retry-go" "github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster" "github.com/k0sproject/rig/os" log "github.com/sirupsen/logrus" @@ -14,6 +11,7 @@ import ( // PrepareHosts installs required packages and so on on the hosts. type PrepareHosts struct { GenericPhase + cancel func() } // Title for the phase @@ -30,6 +28,10 @@ type prepare interface { Prepare(os.Host) error } +func (p *PrepareHosts) CleanUp() { + p.cancel() +} + func (p *PrepareHosts) prepareHost(h *cluster.Host) error { if c, ok := h.Configurer.(prepare); ok { if err := c.Prepare(h); err != nil { @@ -37,31 +39,6 @@ func (p *PrepareHosts) prepareHost(h *cluster.Host) error { } } - err := retry.Do( - func() error { - return h.Configurer.TryLock(h) - }, - retry.OnRetry( - func(n uint, err error) { - log.Errorf("%s: attempt %d of %d.. trying to obtain a lock on host: %s", h, n+1, retries, err.Error()) - }, - ), - retry.RetryIf( - func(err error) bool { - return !strings.Contains(err.Error(), "host does not have") - }, - ), - retry.DelayType(retry.CombineDelay(retry.FixedDelay, retry.RandomDelay)), - retry.MaxJitter(time.Second*2), - retry.Delay(time.Second*3), - retry.Attempts(5), - retry.LastErrorOnly(true), - ) - - if err != nil && !strings.Contains(err.Error(), "host does not have") { - return errors.New("another k0sctl instance is currently operating on the node") - } - if len(h.Environment) > 0 { log.Infof("%s: updating environment", h) if err := h.Configurer.UpdateEnvironment(h, h.Environment); err != nil { diff --git a/phase/unlock.go b/phase/unlock.go new file mode 100644 index 00000000..c6efa85f --- /dev/null +++ b/phase/unlock.go @@ -0,0 +1,34 @@ +package phase + +import ( + "github.com/k0sproject/k0sctl/pkg/apis/k0sctl.k0sproject.io/v1beta1" + log "github.com/sirupsen/logrus" +) + +// Unlock acquires an exclusive k0sctl lock on hosts +type Unlock struct { + GenericPhase + Cancel func() +} + +// Prepare the phase +func (p *Unlock) Prepare(c *v1beta1.Cluster) error { + p.Config = c + if p.Cancel == nil { + p.Cancel = func() { + log.Fatalf("cancel function not defined") + } + } + return nil +} + +// Title for the phase +func (p *Unlock) Title() string { + return "Release exclusive host lock" +} + +// Run the phase +func (p *Unlock) Run() error { + p.Cancel() + return nil +} diff --git a/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go b/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go index ceb269bc..b75b810f 100644 --- a/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go +++ b/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go @@ -117,7 +117,8 @@ type configurer interface { CleanupServiceEnvironment(os.Host, string) error Stat(os.Host, string, ...exec.Option) (*os.FileInfo, error) Touch(os.Host, string, time.Time, ...exec.Option) error - TryLock(os.Host) error + K0sctlLockFilePath() string + UpsertFile(os.Host, string, string) error } // HostMetadata resolved metadata for host From d694b8f4e359b62186f5bfa1f1b998f96354bdf6 Mon Sep 17 00:00:00 2001 From: Kimmo Lehto Date: Tue, 24 May 2022 14:19:21 +0300 Subject: [PATCH 3/5] Cleanup --- cmd/apply.go | 12 ------------ phase/lock.go | 13 ++++++++++--- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/cmd/apply.go b/cmd/apply.go index 69f8bc71..fcf3a7a8 100644 --- a/cmd/apply.go +++ b/cmd/apply.go @@ -85,18 +85,6 @@ var applyCommand = &cli.Command{ var result error - defer func() { - // Handle panics and failed applies by running the disconnect phase - if err := recover(); err != nil || result != nil { - p := &phase.Disconnect{} - _ = p.Prepare(manager.Config) - _ = p.Run() - if err != nil { - panic(err) - } - } - }() - if result = manager.Run(); result != nil { analytics.Client.Publish("apply-failure", map[string]interface{}{"clusterID": manager.Config.Spec.K0s.Metadata.ClusterID}) if lf, err := LogFile(); err == nil { diff --git a/phase/lock.go b/phase/lock.go index 0f2935c7..d27bcdd8 100644 --- a/phase/lock.go +++ b/phase/lock.go @@ -21,6 +21,7 @@ type Lock struct { cfs []func() instanceID string m sync.Mutex + wg sync.WaitGroup } // Prepare the phase @@ -42,6 +43,7 @@ func (p *Lock) Cancel() { for _, f := range p.cfs { f() } + p.wg.Wait() } // Run the phase @@ -53,6 +55,7 @@ func (p *Lock) Run() error { } func (p *Lock) startTicker(h *cluster.Host) error { + p.wg.Add(1) lfp := h.Configurer.K0sctlLockFilePath() ticker := time.NewTicker(10 * time.Second) ctx, cancel := context.WithCancel(context.Background()) @@ -66,10 +69,14 @@ func (p *Lock) startTicker(h *cluster.Host) error { select { case <-ticker.C: if err := h.Configurer.Touch(h, h.Configurer.K0sctlLockFilePath(), time.Now(), exec.Sudo(h)); err != nil { - log.Debugf("%s: failed to touch lock file: %s", h, err) + log.Warnf("%s: failed to touch lock file: %s", h, err) } case <-ctx.Done(): - _ = h.Configurer.DeleteFile(h, lfp) + log.Debugf("%s: stopped lock cycle, removing file") + if err := h.Configurer.DeleteFile(h, lfp); err != nil { + log.Warnf("%s: failed to remove host lock file: %s", err) + } + p.wg.Done() return } } @@ -109,7 +116,7 @@ func (p *Lock) tryLock(h *cluster.Host) error { return fmt.Errorf("failed to read lock file: %w", err) } if content != p.instanceID { - if time.Since(stat.ModTime()) < 20*time.Second { + if time.Since(stat.ModTime()) < 30*time.Second { return fmt.Errorf("another instance of k0sctl is currently operating on the host") } _ = h.Configurer.DeleteFile(h, lfp) From 0794f5f034296e67766d1ac7a8a34f1340756e8b Mon Sep 17 00:00:00 2001 From: Kimmo Lehto Date: Tue, 24 May 2022 14:32:49 +0300 Subject: [PATCH 4/5] Lint --- phase/lock.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/phase/lock.go b/phase/lock.go index d27bcdd8..c6cd1a9e 100644 --- a/phase/lock.go +++ b/phase/lock.go @@ -72,9 +72,9 @@ func (p *Lock) startTicker(h *cluster.Host) error { log.Warnf("%s: failed to touch lock file: %s", h, err) } case <-ctx.Done(): - log.Debugf("%s: stopped lock cycle, removing file") + log.Debugf("%s: stopped lock cycle, removing file", h) if err := h.Configurer.DeleteFile(h, lfp); err != nil { - log.Warnf("%s: failed to remove host lock file: %s", err) + log.Warnf("%s: failed to remove host lock file: %s", h, err) } p.wg.Done() return From c945802ad472e3fdc5b15ddf6cd351fefb2b270d Mon Sep 17 00:00:00 2001 From: Kimmo Lehto Date: Wed, 25 May 2022 09:38:44 +0300 Subject: [PATCH 5/5] Fall back to /tmp/k0sctl.lock if /run/lock does not exist --- configurer/linux.go | 9 ++++++--- phase/lock.go | 6 +++--- pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/configurer/linux.go b/configurer/linux.go index 13a35a5a..fe689086 100644 --- a/configurer/linux.go +++ b/configurer/linux.go @@ -19,7 +19,6 @@ type PathFuncs interface { K0sConfigPath() string K0sJoinTokenPath() string KubeconfigPath() string - K0sctlLockFilePath() string } // Linux is a base module for various linux OS support packages @@ -89,8 +88,12 @@ func (l Linux) K0sJoinTokenPath() string { } // K0sctlLockFilePath returns a path to a lock file -func (l Linux) K0sctlLockFilePath() string { - return "/run/lock/k0sctl" +func (l Linux) K0sctlLockFilePath(h os.Host) string { + if h.Exec("test -d /run/lock", exec.Sudo(h)) == nil { + return "/run/lock/k0sctl" + } + + return "/tmp/k0sctl.lock" } // TempFile returns a temp file path diff --git a/phase/lock.go b/phase/lock.go index c6cd1a9e..f8646e7a 100644 --- a/phase/lock.go +++ b/phase/lock.go @@ -56,7 +56,7 @@ func (p *Lock) Run() error { func (p *Lock) startTicker(h *cluster.Host) error { p.wg.Add(1) - lfp := h.Configurer.K0sctlLockFilePath() + lfp := h.Configurer.K0sctlLockFilePath(h) ticker := time.NewTicker(10 * time.Second) ctx, cancel := context.WithCancel(context.Background()) p.m.Lock() @@ -68,7 +68,7 @@ func (p *Lock) startTicker(h *cluster.Host) error { for { select { case <-ticker.C: - if err := h.Configurer.Touch(h, h.Configurer.K0sctlLockFilePath(), time.Now(), exec.Sudo(h)); err != nil { + if err := h.Configurer.Touch(h, lfp, time.Now(), exec.Sudo(h)); err != nil { log.Warnf("%s: failed to touch lock file: %s", h, err) } case <-ctx.Done(): @@ -104,7 +104,7 @@ func (p *Lock) startLock(h *cluster.Host) error { } func (p *Lock) tryLock(h *cluster.Host) error { - lfp := h.Configurer.K0sctlLockFilePath() + lfp := h.Configurer.K0sctlLockFilePath(h) if err := h.Configurer.UpsertFile(h, lfp, p.instanceID); err != nil { stat, err := h.Configurer.Stat(h, lfp, exec.Sudo(h)) diff --git a/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go b/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go index b75b810f..6d608e9c 100644 --- a/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go +++ b/pkg/apis/k0sctl.k0sproject.io/v1beta1/cluster/host.go @@ -117,7 +117,7 @@ type configurer interface { CleanupServiceEnvironment(os.Host, string) error Stat(os.Host, string, ...exec.Option) (*os.FileInfo, error) Touch(os.Host, string, time.Time, ...exec.Option) error - K0sctlLockFilePath() string + K0sctlLockFilePath(os.Host) string UpsertFile(os.Host, string, string) error }