Skip to content

Commit

Permalink
Get rid of process startup/shutdown timing errors
Browse files Browse the repository at this point in the history
  • Loading branch information
evan-bradley committed Jun 13, 2023
1 parent 1db1af9 commit 67ec60c
Showing 1 changed file with 22 additions and 2 deletions.
24 changes: 22 additions & 2 deletions cmd/opampsupervisor/supervisor/supervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ type Supervisor struct {

// The OpAMP client to connect to the OpAMP Server.
opampClient client.OpAMPClient

shuttingDown bool

agentHasStarted bool
agentStartHealthCheckAttempts int
}

func NewSupervisor(logger *zap.Logger, configFile string) (*Supervisor, error) {
Expand Down Expand Up @@ -486,6 +491,9 @@ func (s *Supervisor) startAgent() {

return
}

s.agentHasStarted = false
s.agentStartHealthCheckAttempts = 0
s.startedAt = time.Now()
s.startHealthCheckTicker()

Expand Down Expand Up @@ -525,9 +533,15 @@ func (s *Supervisor) healthCheck() {

if err != nil {
health.Healthy = false
health.LastError = err.Error()
s.logger.Error("Agent is not healthy", zap.Error(err))
if !s.agentHasStarted && s.agentStartHealthCheckAttempts < 10 {
health.LastError = "Agent is starting"
s.agentStartHealthCheckAttempts += 1
} else {
health.LastError = err.Error()
s.logger.Error("Agent is not healthy", zap.Error(err))
}
} else {
s.agentHasStarted = true
health.Healthy = true
s.logger.Debug("Agent is healthy.")
}
Expand Down Expand Up @@ -558,6 +572,10 @@ func (s *Supervisor) runAgentProcess() {
s.startAgent()

case <-s.commander.Done():
if s.shuttingDown {
break
}

s.logger.Debug("Agent process exited unexpectedly. Will restart in a bit...", zap.Int("pid", s.commander.Pid()), zap.Int("exit_code", s.commander.ExitCode()))
errMsg := fmt.Sprintf(
"Agent process PID=%d exited unexpectedly, exit code=%d. Will restart in a bit...",
Expand Down Expand Up @@ -613,13 +631,15 @@ func (s *Supervisor) writeEffectiveConfigToFile(cfg string, filePath string) {

func (s *Supervisor) Shutdown() {
s.logger.Debug("Supervisor shutting down...")
s.shuttingDown = true
if s.commander != nil {
err := s.commander.Stop(context.Background())

if err != nil {
s.logger.Error("Could not stop agent process", zap.Error(err))
}
}

if s.opampClient != nil {
err := s.opampClient.SetHealth(
&protobufs.AgentHealth{
Expand Down

0 comments on commit 67ec60c

Please sign in to comment.