From 6a6182bb89480117d4be3df1255c10b23f99647d Mon Sep 17 00:00:00 2001 From: Berger Eugene Date: Sat, 9 Nov 2024 17:31:26 +0200 Subject: [PATCH] fix #258: don't run stopped process while waiting for restart --- src/api/pc_api.go | 4 +--- src/app/process.go | 19 +++++++++++++----- src/app/system_test.go | 40 +++++++++++++++++++++++++++++++++++++ src/health/health_checks.go | 12 +++++------ 4 files changed, 61 insertions(+), 14 deletions(-) diff --git a/src/api/pc_api.go b/src/api/pc_api.go index cc72f67..08ce268 100644 --- a/src/api/pc_api.go +++ b/src/api/pc_api.go @@ -315,10 +315,8 @@ func (api *PcApi) GetProcessPorts(c *gin.Context) { // @Success 200 {object} map[string]string "Stopped Server" // @Router /project/stop [post] func (api *PcApi) ShutDownProject(c *gin.Context) { - go func() { - _ = api.project.ShutDownProject() - }() c.JSON(http.StatusOK, gin.H{"status": "stopped"}) + _ = api.project.ShutDownProject() } // @Schemes diff --git a/src/app/process.go b/src/app/process.go index 907a4cc..57bc98e 100644 --- a/src/app/process.go +++ b/src/app/process.go @@ -117,6 +117,7 @@ func (p *Process) run() int { } p.onProcessStart() +loop: for { err := p.setStateAndRun(p.getStartingStateName(), p.getProcessStarter()) if err != nil { @@ -163,7 +164,7 @@ func (p *Process) run() int { select { case <-p.procRunCtx.Done(): log.Debug().Str("process", p.getName()).Msg("process stopped while waiting to restart") - break + break loop case <-time.After(p.getBackoff()): p.handleInfo("\n") continue @@ -400,18 +401,26 @@ func (p *Process) stopProcess(cancelReadinessFuncs bool) error { return p.doConfiguredStop(p.procConf.ShutDownParams) } err := p.command.Stop(p.procConf.ShutDownParams.Signal, p.procConf.ShutDownParams.ParentOnly) - if p.procConf.ShutDownParams.ShutDownTimeout == UndefinedShutdownTimeoutSec { - return err + if err != nil { + log.Error().Err(err).Msgf("terminating %s failed", p.getName()) } + if p.procConf.ShutDownParams.ShutDownTimeout != UndefinedShutdownTimeoutSec { + return p.forceKillOnTimeout() + } + return err +} + +func (p *Process) forceKillOnTimeout() error { p.mtxStopFn.Lock() p.waitForStoppedCtx, p.waitForStoppedFn = context.WithTimeout(context.Background(), time.Duration(p.procConf.ShutDownParams.ShutDownTimeout)*time.Second) p.mtxStopFn.Unlock() <-p.waitForStoppedCtx.Done() - err = p.waitForStoppedCtx.Err() + err := p.waitForStoppedCtx.Err() switch { case errors.Is(err, context.Canceled): return nil case errors.Is(err, context.DeadlineExceeded): + log.Debug().Msgf("process failed to shut down within %d seconds, sending %d", p.procConf.ShutDownParams.ShutDownTimeout, syscall.SIGKILL) return p.command.Stop(int(syscall.SIGKILL), p.procConf.ShutDownParams.ParentOnly) default: log.Error().Err(err).Msgf("terminating %s with timeout %d failed", p.getName(), p.procConf.ShutDownParams.ShutDownTimeout) @@ -556,7 +565,7 @@ func (p *Process) getResourceUsage() (int64, float64) { } proc, err := puproc.NewProcess(int32(p.procState.Pid)) if err != nil { - log.Err(err).Msgf("Could not find process") + log.Err(err).Msgf("Could not find pid %d with name %s", p.procState.Pid, p.getName()) return -1, -1 } meminfo, err := proc.MemoryInfo() diff --git a/src/app/system_test.go b/src/app/system_test.go index cecf3a9..f12c757 100644 --- a/src/app/system_test.go +++ b/src/app/system_test.go @@ -969,3 +969,43 @@ func TestSystem_TestProcShutDownWithConfiguredTimeOut(t *testing.T) { }) } + +func TestSystem_TestRestartingProcessShutDown(t *testing.T) { + proc1 := "proc1" + shell := command.DefaultShellConfig() + p, err := NewProjectRunner(&ProjectOpts{ + project: &types.Project{ + ShellConfig: shell, + Processes: map[string]types.ProcessConfig{ + proc1: { + Name: proc1, + ReplicaName: proc1, + Executable: shell.ShellCommand, + Args: []string{shell.ShellArgument, "sleep 0.2"}, + RestartPolicy: types.RestartPolicyConfig{ + Restart: types.RestartPolicyAlways, + BackoffSeconds: 1, + }, + }, + }, + }, + }) + if err != nil { + t.Fatal(err.Error()) + } + go func() { + err := p.Run() + if err != nil { + t.Errorf("Failed to run project: %v", err) + } + }() + time.Sleep(300 * time.Millisecond) + proc := p.getRunningProcess(proc1) + assertProcessStatus(t, proc, proc1, types.ProcessStateRestarting) + err = p.StopProcess(proc1) + if err != nil { + t.Fatalf("Failed to stop process: %v", err) + } + time.Sleep(100 * time.Millisecond) + assertProcessStatus(t, proc, proc1, types.ProcessStateCompleted) +} diff --git a/src/health/health_checks.go b/src/health/health_checks.go index a587fc5..cbdca66 100644 --- a/src/health/health_checks.go +++ b/src/health/health_checks.go @@ -3,6 +3,7 @@ package health import ( "errors" "fmt" + "sync/atomic" "time" "github.com/InVisionApp/go-health/v2" @@ -19,7 +20,7 @@ type Prober struct { name string onCheckEndFunc func(bool, bool, string) hc *health.Health - stopped bool + stopped atomic.Bool } func New(name string, probe Probe, onCheckEnd func(bool, bool, string)) (*Prober, error) { @@ -29,7 +30,6 @@ func New(name string, probe Probe, onCheckEnd func(bool, bool, string)) (*Prober name: name, onCheckEndFunc: onCheckEnd, hc: health.New(), - stopped: false, } p.hc.DisableLogging() if probe.Exec != nil { @@ -51,9 +51,9 @@ func New(name string, probe Probe, onCheckEnd func(bool, bool, string)) (*Prober func (p *Prober) Start() { go func() { - p.stopped = false + p.stopped.Store(false) time.Sleep(time.Duration(p.probe.InitialDelay) * time.Second) - if p.stopped { + if p.stopped.Load() { return } err := p.hc.Start() @@ -68,7 +68,7 @@ func (p *Prober) Start() { func (p *Prober) Stop() { if p.hc != nil { _ = p.hc.Stop() - p.stopped = true + p.stopped.Store(true) } } @@ -81,7 +81,7 @@ func (p *Prober) healthCheckCompleted(state *health.State) { if state.Status == OK { ok = true } - if p.stopped { + if p.stopped.Load() { return } p.onCheckEndFunc(ok, fatal, state.Err)