diff --git a/agent/app/healthcheck.go b/agent/app/healthcheck.go index 9d5ba5063a2..0ff731a20c0 100644 --- a/agent/app/healthcheck.go +++ b/agent/app/healthcheck.go @@ -15,16 +15,25 @@ package app import ( "net/http" + "time" "github.com/aws/amazon-ecs-agent/agent/sighandlers/exitcodes" "github.com/cihub/seelog" ) // runHealthcheck runs the Agent's healthcheck -func runHealthcheck() int { - _, err := http.Get("http://localhost:51678/v1/metadata") +func runHealthcheck(url string, timeout time.Duration) int { + client := &http.Client{ + Timeout: timeout, + } + r, err := http.NewRequest("GET", url, nil) + if err != nil { + seelog.Errorf("error creating healthcheck request: %v", err) + return exitcodes.ExitError + } + _, err = client.Do(r) if err != nil { - seelog.Warnf("Health check failed with error: %v", err) + seelog.Errorf("health check [GET %s] failed with error: %v", url, err) return exitcodes.ExitError } return exitcodes.ExitSuccess diff --git a/agent/app/healthcheck_test.go b/agent/app/healthcheck_test.go new file mode 100644 index 00000000000..2fd489d0307 --- /dev/null +++ b/agent/app/healthcheck_test.go @@ -0,0 +1,64 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package app + +import ( + "fmt" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestHealthcheck_Sunny(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintln(w, "Hello, client") + })) + defer ts.Close() + + rc := runHealthcheck(ts.URL, time.Second*2) + require.Equal(t, 0, rc) +} + +func TestHealthcheck_InvalidURL2(t *testing.T) { + // leading space in url is invalid + rc := runHealthcheck(" http://foobar", time.Second*2) + require.Equal(t, 1, rc) +} + +func TestHealthcheck_Timeout(t *testing.T) { + sema := make(chan int) + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + <-sema + })) + defer ts.Close() + + rc := runHealthcheck(ts.URL, time.Second*2) + require.Equal(t, 1, rc) + close(sema) +} + +// we actually pass the healthcheck in the event of a non-200 http status code. +func TestHealthcheck_404(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + w.Write([]byte("https://http.cat/404")) + })) + defer ts.Close() + + rc := runHealthcheck(ts.URL+"/foobar", time.Second*2) + require.Equal(t, 0, rc) +} diff --git a/agent/app/run.go b/agent/app/run.go index 8ba5f579e2f..134ba43c556 100644 --- a/agent/app/run.go +++ b/agent/app/run.go @@ -15,6 +15,7 @@ package app import ( "context" + "time" "github.com/aws/amazon-ecs-agent/agent/app/args" "github.com/aws/amazon-ecs-agent/agent/logger" @@ -39,7 +40,11 @@ func Run(arguments []string) int { } else if *parsedArgs.Version { return version.PrintVersion() } else if *parsedArgs.Healthcheck { - return runHealthcheck() + // Timeout is purposely set to shorter than the default docker healthcheck + // timeout of 30s. This is so that we can catch any http timeout and log the + // issue within agent logs. + // see https://docs.docker.com/engine/reference/builder/#healthcheck + return runHealthcheck("http://localhost:51678/v1/metadata", time.Second*25) } logger.SetLevel(*parsedArgs.LogLevel) diff --git a/agent/tcs/handler/handler.go b/agent/tcs/handler/handler.go index d367a24f1f8..b2351ced7cc 100644 --- a/agent/tcs/handler/handler.go +++ b/agent/tcs/handler/handler.go @@ -87,6 +87,11 @@ func StartSession(params *TelemetrySessionParams, statsEngine stats.Engine) erro seelog.Errorf("Error: lost websocket connection with ECS Telemetry service (TCS): %v", tcsError) params.time().Sleep(backoff.Duration()) } + select { + case <-params.Ctx.Done(): + return nil + default: + } } }