Skip to content

Commit

Permalink
Timeout and unit tests for agent healthcheck (#2437)
Browse files Browse the repository at this point in the history
* Timeout and unit tests for agent healthcheck

* integ test cleanup wait duration and expand terminal reason log msg

* add 'type' to DriverOpts
  • Loading branch information
sparrc authored Apr 27, 2020
1 parent 55a4769 commit 4369eec
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 5 deletions.
15 changes: 12 additions & 3 deletions agent/app/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,25 @@ package app

import (
"net/http"
"time"

"github.com/aws/amazon-ecs-agent/agent/sighandlers/exitcodes"
"github.com/cihub/seelog"
)

// runHealthcheck runs the Agent's healthcheck
func runHealthcheck() int {
_, err := http.Get("http://localhost:51678/v1/metadata")
func runHealthcheck(url string, timeout time.Duration) int {
client := &http.Client{
Timeout: timeout,
}
r, err := http.NewRequest("GET", url, nil)
if err != nil {
seelog.Errorf("error creating healthcheck request: %v", err)
return exitcodes.ExitError
}
_, err = client.Do(r)
if err != nil {
seelog.Warnf("Health check failed with error: %v", err)
seelog.Errorf("health check [GET %s] failed with error: %v", url, err)
return exitcodes.ExitError
}
return exitcodes.ExitSuccess
Expand Down
64 changes: 64 additions & 0 deletions agent/app/healthcheck_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

package app

import (
"fmt"
"net/http"
"net/http/httptest"
"testing"
"time"

"github.com/stretchr/testify/require"
)

func TestHealthcheck_Sunny(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, "Hello, client")
}))
defer ts.Close()

rc := runHealthcheck(ts.URL, time.Second*2)
require.Equal(t, 0, rc)
}

func TestHealthcheck_InvalidURL2(t *testing.T) {
// leading space in url is invalid
rc := runHealthcheck(" http://foobar", time.Second*2)
require.Equal(t, 1, rc)
}

func TestHealthcheck_Timeout(t *testing.T) {
sema := make(chan int)
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
<-sema
}))
defer ts.Close()

rc := runHealthcheck(ts.URL, time.Second*2)
require.Equal(t, 1, rc)
close(sema)
}

// we actually pass the healthcheck in the event of a non-200 http status code.
func TestHealthcheck_404(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
w.Write([]byte("https://http.cat/404"))
}))
defer ts.Close()

rc := runHealthcheck(ts.URL+"/foobar", time.Second*2)
require.Equal(t, 0, rc)
}
7 changes: 6 additions & 1 deletion agent/app/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package app

import (
"context"
"time"

"github.com/aws/amazon-ecs-agent/agent/app/args"
"github.com/aws/amazon-ecs-agent/agent/logger"
Expand All @@ -39,7 +40,11 @@ func Run(arguments []string) int {
} else if *parsedArgs.Version {
return version.PrintVersion()
} else if *parsedArgs.Healthcheck {
return runHealthcheck()
// Timeout is purposely set to shorter than the default docker healthcheck
// timeout of 30s. This is so that we can catch any http timeout and log the
// issue within agent logs.
// see https://docs.docker.com/engine/reference/builder/#healthcheck
return runHealthcheck("http://localhost:51678/v1/metadata", time.Second*25)
}

logger.SetLevel(*parsedArgs.LogLevel)
Expand Down
1 change: 1 addition & 0 deletions agent/engine/engine_unix_integ_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ func createVolumeTask(scope, arn, volume string, autoprovision bool) (*apitask.T
DriverOpts: map[string]string{
"device": tmpDirectory,
"o": "bind",
"type": "tmpfs",
},
}
if scope == "shared" {
Expand Down
2 changes: 1 addition & 1 deletion agent/taskresource/volume/dockervolume.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ func (vol *VolumeResource) GetTerminalReason() string {

func (vol *VolumeResource) setTerminalReason(reason string) {
vol.terminalReasonOnce.Do(func() {
seelog.Infof("Volume Resource [%s]: setting terminal reason for volume resource", vol.Name)
seelog.Infof("Volume Resource [%s]: setting terminal reason for volume resource, reason: %s", vol.Name, reason)
vol.terminalReason = reason
})
}
Expand Down
5 changes: 5 additions & 0 deletions agent/tcs/handler/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ func StartSession(params *TelemetrySessionParams, statsEngine stats.Engine) erro
seelog.Errorf("Error: lost websocket connection with ECS Telemetry service (TCS): %v", tcsError)
params.time().Sleep(backoff.Duration())
}
select {
case <-params.Ctx.Done():
return nil
default:
}
}
}

Expand Down

0 comments on commit 4369eec

Please sign in to comment.