From 1be41b35b133088472410d254be546adeea9fa56 Mon Sep 17 00:00:00 2001 From: Andrey Golev Date: Thu, 5 Mar 2020 12:29:36 +0200 Subject: [PATCH 1/3] Graceful shutdown. Exiting cluster, then waiting for all ongoing jobs to finish before dying. --- cmd/agent.go | 19 ++++++++++++++----- dkron/agent.go | 10 ++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/cmd/agent.go b/cmd/agent.go index 3e2a3a234..3cf15ef47 100644 --- a/cmd/agent.go +++ b/cmd/agent.go @@ -9,7 +9,6 @@ import ( "time" "github.com/distribworks/dkron/v2/dkron" - "github.com/hashicorp/go-plugin" log "github.com/sirupsen/logrus" "github.com/spf13/cobra" "github.com/spf13/viper" @@ -20,7 +19,7 @@ var agent *dkron.Agent const ( // gracefulTimeout controls how long we wait before forcefully terminating - gracefulTimeout = 3 * time.Second + gracefulTimeout = 3 * time.Hour ) // agentCmd represents the agent command @@ -110,18 +109,28 @@ WAIT: } // Attempt a graceful leave - gracefulCh := make(chan struct{}) log.Info("agent: Gracefully shutting down agent...") go func() { - plugin.CleanupClients() if err := agent.Stop(); err != nil { fmt.Printf("Error: %s", err) log.Error(fmt.Sprintf("Error: %s", err)) return } - close(gracefulCh) }() + gracefulCh := make(chan struct{}) + + for { + log.Info("Waiting for jobs to finish...") + if agent.GetRunningJobs() < 1 { + log.Info("No jobs left. Exiting.") + break + } + time.Sleep(1 * time.Second) + } + + close(gracefulCh) + // Wait for leave or another signal select { case <-signalCh: diff --git a/dkron/agent.go b/dkron/agent.go index 05c94b432..6958774ff 100644 --- a/dkron/agent.go +++ b/dkron/agent.go @@ -881,3 +881,13 @@ func (a *Agent) applySetJob(job *proto.Job) error { func (a *Agent) RaftApply(cmd []byte) raft.ApplyFuture { return a.raft.Apply(cmd, raftTimeout) } + +// GetRunningJobs returns amount of active jobs +func (a *Agent) GetRunningJobs() int { + job := 0 + runningExecutions.Range(func(k, v interface{}) bool { + job = job + 1 + return true + }) + return job +} From dd763d7685c04b291140f9b8f2b0edaf5274c95e Mon Sep 17 00:00:00 2001 From: Andrey Golev Date: Thu, 5 Mar 2020 16:07:53 +0200 Subject: [PATCH 2/3] plugin cleanup returned back --- cmd/agent.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmd/agent.go b/cmd/agent.go index 3cf15ef47..c85394471 100644 --- a/cmd/agent.go +++ b/cmd/agent.go @@ -9,6 +9,7 @@ import ( "time" "github.com/distribworks/dkron/v2/dkron" + "github.com/hashicorp/go-plugin" log "github.com/sirupsen/logrus" "github.com/spf13/cobra" "github.com/spf13/viper" @@ -129,6 +130,8 @@ WAIT: time.Sleep(1 * time.Second) } + plugin.CleanupClients() + close(gracefulCh) // Wait for leave or another signal From 810178e951d75daefd6e88ec4bb2dca19ef76ae4 Mon Sep 17 00:00:00 2001 From: Andrey Golev Date: Fri, 6 Mar 2020 14:40:33 +0200 Subject: [PATCH 3/3] gofmt applied --- cmd/agent.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/agent.go b/cmd/agent.go index c85394471..ccb2ae5dc 100644 --- a/cmd/agent.go +++ b/cmd/agent.go @@ -9,7 +9,7 @@ import ( "time" "github.com/distribworks/dkron/v2/dkron" - "github.com/hashicorp/go-plugin" + "github.com/hashicorp/go-plugin" log "github.com/sirupsen/logrus" "github.com/spf13/cobra" "github.com/spf13/viper" @@ -130,7 +130,7 @@ WAIT: time.Sleep(1 * time.Second) } - plugin.CleanupClients() + plugin.CleanupClients() close(gracefulCh)