From ec53c9ded82560b40c54b56989ee7c84be8ebcb5 Mon Sep 17 00:00:00 2001 From: Ciara Stacke Date: Thu, 8 Jun 2023 14:58:57 +0100 Subject: [PATCH 1/2] Add timeout and retry logic for the NGINX pid file --- internal/nginx/runtime/manager.go | 36 +++++++++++++++++++------- internal/nginx/runtime/manager_test.go | 29 ++++++++++++++++++++- 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/internal/nginx/runtime/manager.go b/internal/nginx/runtime/manager.go index 54fcc64ba5..4ef3c76a7c 100644 --- a/internal/nginx/runtime/manager.go +++ b/internal/nginx/runtime/manager.go @@ -3,6 +3,7 @@ package runtime import ( "context" "fmt" + "io/fs" "os" "strconv" "strings" @@ -10,9 +11,15 @@ import ( "time" ) -const pidFile = "/etc/nginx/nginx.pid" +const ( + pidFile = "/etc/nginx/nginx.pid" + pidFileTimeout = 5 * time.Second +) -type readFileFunc func(string) ([]byte, error) +type ( + readFileFunc func(string) ([]byte, error) + checkFileFunc func(string) (fs.FileInfo, error) +) //go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . Manager @@ -31,13 +38,8 @@ func NewManagerImpl() *ManagerImpl { } func (m *ManagerImpl) Reload(ctx context.Context) error { - // FIXME(pleshakov): Before reload attempt, make sure NGINX is running. - // If the gateway container starts before NGINX container (which is possible), - // then it is possible that a reload can be attempted when NGINX is not running yet. - // Make sure to prevent this case, so we don't get an error. - // We find the main NGINX PID on every reload because it will change if the NGINX container is restarted. - pid, err := findMainProcess(os.ReadFile) + pid, err := findMainProcess(os.Stat, os.ReadFile, pidFileTimeout) if err != nil { return fmt.Errorf("failed to find NGINX main process: %w", err) } @@ -65,8 +67,22 @@ func (m *ManagerImpl) Reload(ctx context.Context) error { return nil } -func findMainProcess(readFile readFileFunc) (int, error) { - content, err := readFile(pidFile) +func findMainProcess(checkFile checkFileFunc, readFile readFileFunc, timeout time.Duration) (int, error) { + startTime := time.Now() + deadline := startTime.Add(timeout) + + fileCheck := func() (content []byte, err error) { + for time.Now().Before(deadline) { + _, err := checkFile(pidFile) + if err == nil { + return readFile(pidFile) + } + time.Sleep(1 * time.Second) + } + return nil, fmt.Errorf("timeout waiting for pid file to appear") + } + + content, err := fileCheck() if err != nil { return 0, err } diff --git a/internal/nginx/runtime/manager_test.go b/internal/nginx/runtime/manager_test.go index 8d716f8d9d..9e04d89196 100644 --- a/internal/nginx/runtime/manager_test.go +++ b/internal/nginx/runtime/manager_test.go @@ -2,7 +2,9 @@ package runtime import ( "errors" + "io/fs" "testing" + "time" ) func TestFindMainProcess(t *testing.T) { @@ -18,40 +20,65 @@ func TestFindMainProcess(t *testing.T) { return nil, errors.New("error") } + checkFileFuncGen := func(content fs.FileInfo) checkFileFunc { + return func(name string) (fs.FileInfo, error) { + if name != pidFile { + return nil, errors.New("error") + } + return content, nil + } + } + checkFileError := func(string) (fs.FileInfo, error) { + return nil, errors.New("error") + } + var testFileInfo fs.FileInfo + tests := []struct { readFile readFileFunc + checkFile checkFileFunc msg string expected int expectError bool }{ { readFile: readFileFuncGen([]byte("1\n")), + checkFile: checkFileFuncGen(testFileInfo), expected: 1, expectError: false, msg: "normal case", }, { readFile: readFileFuncGen([]byte("")), + checkFile: checkFileFuncGen(testFileInfo), expected: 0, expectError: true, msg: "empty file content", }, { readFile: readFileFuncGen([]byte("not a number")), + checkFile: checkFileFuncGen(testFileInfo), expected: 0, expectError: true, msg: "bad file content", }, { readFile: readFileError, + checkFile: checkFileFuncGen(testFileInfo), expected: 0, expectError: true, msg: "cannot read file", }, + { + readFile: readFileFuncGen([]byte("1\n")), + checkFile: checkFileError, + expected: 0, + expectError: true, + msg: "cannot file pid file", + }, } for _, test := range tests { - result, err := findMainProcess(test.readFile) + result, err := findMainProcess(test.checkFile, test.readFile, 1*time.Microsecond) if result != test.expected { t.Errorf("findMainProcess() returned %d but expected %d for case %q", result, test.expected, test.msg) From a864dc1fdffe230f3701f91659d045bf6dcf702c Mon Sep 17 00:00:00 2001 From: Ciara Stacke <18287516+ciarams87@users.noreply.github.com> Date: Thu, 8 Jun 2023 16:39:38 +0100 Subject: [PATCH 2/2] Add timeout and retry logic for the NGINX pid file --- internal/nginx/runtime/manager.go | 39 ++++++++++++++++++-------- internal/nginx/runtime/manager_test.go | 22 +++++++++++++-- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/internal/nginx/runtime/manager.go b/internal/nginx/runtime/manager.go index 4ef3c76a7c..4c4cddda49 100644 --- a/internal/nginx/runtime/manager.go +++ b/internal/nginx/runtime/manager.go @@ -2,6 +2,7 @@ package runtime import ( "context" + "errors" "fmt" "io/fs" "os" @@ -9,6 +10,8 @@ import ( "strings" "syscall" "time" + + "k8s.io/apimachinery/pkg/util/wait" ) const ( @@ -39,7 +42,7 @@ func NewManagerImpl() *ManagerImpl { func (m *ManagerImpl) Reload(ctx context.Context) error { // We find the main NGINX PID on every reload because it will change if the NGINX container is restarted. - pid, err := findMainProcess(os.Stat, os.ReadFile, pidFileTimeout) + pid, err := findMainProcess(ctx, os.Stat, os.ReadFile, pidFileTimeout) if err != nil { return fmt.Errorf("failed to find NGINX main process: %w", err) } @@ -67,22 +70,34 @@ func (m *ManagerImpl) Reload(ctx context.Context) error { return nil } -func findMainProcess(checkFile checkFileFunc, readFile readFileFunc, timeout time.Duration) (int, error) { - startTime := time.Now() - deadline := startTime.Add(timeout) - - fileCheck := func() (content []byte, err error) { - for time.Now().Before(deadline) { +func findMainProcess( + ctx context.Context, + checkFile checkFileFunc, + readFile readFileFunc, + timeout time.Duration, +) (int, error) { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + err := wait.PollUntilContextCancel( + ctx, + 1*time.Second, + true, /* poll immediately */ + func(ctx context.Context) (bool, error) { _, err := checkFile(pidFile) if err == nil { - return readFile(pidFile) + return true, nil + } + if !errors.Is(err, fs.ErrNotExist) { + return false, err } - time.Sleep(1 * time.Second) - } - return nil, fmt.Errorf("timeout waiting for pid file to appear") + return false, nil + }) + if err != nil { + return 0, err } - content, err := fileCheck() + content, err := readFile(pidFile) if err != nil { return 0, err } diff --git a/internal/nginx/runtime/manager_test.go b/internal/nginx/runtime/manager_test.go index 9e04d89196..c5ff89b041 100644 --- a/internal/nginx/runtime/manager_test.go +++ b/internal/nginx/runtime/manager_test.go @@ -1,6 +1,7 @@ package runtime import ( + "context" "errors" "io/fs" "testing" @@ -32,8 +33,12 @@ func TestFindMainProcess(t *testing.T) { return nil, errors.New("error") } var testFileInfo fs.FileInfo + ctx := context.Background() + cancellingCtx, cancel := context.WithCancel(ctx) + time.AfterFunc(1*time.Millisecond, cancel) tests := []struct { + ctx context.Context readFile readFileFunc checkFile checkFileFunc msg string @@ -41,6 +46,7 @@ func TestFindMainProcess(t *testing.T) { expectError bool }{ { + ctx: ctx, readFile: readFileFuncGen([]byte("1\n")), checkFile: checkFileFuncGen(testFileInfo), expected: 1, @@ -48,6 +54,7 @@ func TestFindMainProcess(t *testing.T) { msg: "normal case", }, { + ctx: ctx, readFile: readFileFuncGen([]byte("")), checkFile: checkFileFuncGen(testFileInfo), expected: 0, @@ -55,6 +62,7 @@ func TestFindMainProcess(t *testing.T) { msg: "empty file content", }, { + ctx: ctx, readFile: readFileFuncGen([]byte("not a number")), checkFile: checkFileFuncGen(testFileInfo), expected: 0, @@ -62,6 +70,7 @@ func TestFindMainProcess(t *testing.T) { msg: "bad file content", }, { + ctx: ctx, readFile: readFileError, checkFile: checkFileFuncGen(testFileInfo), expected: 0, @@ -69,16 +78,25 @@ func TestFindMainProcess(t *testing.T) { msg: "cannot read file", }, { + ctx: ctx, readFile: readFileFuncGen([]byte("1\n")), checkFile: checkFileError, expected: 0, expectError: true, - msg: "cannot file pid file", + msg: "cannot find pid file", + }, + { + ctx: cancellingCtx, + readFile: readFileFuncGen([]byte("1\n")), + checkFile: checkFileError, + expected: 0, + expectError: true, + msg: "context canceled", }, } for _, test := range tests { - result, err := findMainProcess(test.checkFile, test.readFile, 1*time.Microsecond) + result, err := findMainProcess(test.ctx, test.checkFile, test.readFile, 2*time.Millisecond) if result != test.expected { t.Errorf("findMainProcess() returned %d but expected %d for case %q", result, test.expected, test.msg)