Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automate Graceful Recovery NFR #1832

Merged
merged 36 commits into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
175036b
Add https traffic support for automated tests
bjee19 Mar 29, 2024
f0f4156
Add NGF and NGINX container restart tests
bjee19 Apr 16, 2024
8150af1
Add restart count to check container restart
bjee19 Apr 17, 2024
e6b90f5
Move restart count checks to helper function
bjee19 Apr 17, 2024
a31141a
Add polling wait functions to replace sleep workarounds
bjee19 Apr 18, 2024
b3cf4a5
Add polling wait for leader election lease to change and refactored e…
bjee19 Apr 19, 2024
48b9cc1
Refactor container restart functions
bjee19 Apr 19, 2024
e530198
Refactor container restart command to use job
bjee19 Apr 25, 2024
1eeafd2
Refactor waitForRoutesToBeReady to use parentrefs instead of status
bjee19 Apr 25, 2024
801a187
Revert runAsNonRoot to true
bjee19 Apr 25, 2024
8134a94
Change test to allow system suite to deploy NGF
bjee19 Apr 25, 2024
2df9b80
Add propagation policy when deleting Job
bjee19 Apr 25, 2024
3f4d37c
Refactor multiple functions into restartContainer function
bjee19 Apr 26, 2024
24eec08
Refactor test cases using shared runTest function
bjee19 Apr 26, 2024
8ae6c60
Change runTest name to runRecoveryTest
bjee19 Apr 26, 2024
f47df82
Add noling:gosec to satisfy CodeQL
bjee19 Apr 29, 2024
020a0b8
Remove accidentally added files
bjee19 Apr 29, 2024
1f33460
Add placeholder values in node debugger job
bjee19 Apr 29, 2024
851de6b
Add check for job spec container count
bjee19 Apr 29, 2024
cf6bd61
Remove regex from pgrep commands
bjee19 Apr 29, 2024
afb3af9
Remove leader lease checking from nginx container restart
bjee19 Apr 29, 2024
1ad5271
Add ngfPodName to BeforeAll
bjee19 Apr 29, 2024
7eaad5b
Fix argument refactor mistake
bjee19 Apr 29, 2024
d513eeb
Add ContainerRestartTimeout
bjee19 Apr 30, 2024
b4c7628
Move batchv1 scheme to system suite test
bjee19 Apr 30, 2024
bbd7448
Refactor constants to local variables
bjee19 Apr 30, 2024
4544fb5
Add additional comments
bjee19 Apr 30, 2024
1c3ecee
Refactor error checking
bjee19 Apr 30, 2024
6508425
Remove sinceSeconds when checking container logs for errors
bjee19 Apr 30, 2024
cefead3
Refactor waiting functions using gomega Eventually assertions
bjee19 May 1, 2024
587b237
Add Skip of failing test
bjee19 May 1, 2024
dc07841
Refactored Eventually functions to return errors instead of booleans
bjee19 May 1, 2024
d311b9d
Add check for lease holder identity being empty
bjee19 May 1, 2024
dbba349
Add filter of error messages in nginx logs to allow expected errors
bjee19 May 2, 2024
b5dc80e
Add ubuntu image version
bjee19 May 3, 2024
e326e96
Add teardown and setup of NGF and check NGF pod length is one
bjee19 May 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ GINKGO_FLAGS=
NGF_VERSION=
CI=false
TELEMETRY_ENDPOINT=
TELEMETRY_ENDPOINT_INSECURE=
TELEMETRY_ENDPOINT_INSECURE=false

ifneq ($(GINKGO_LABEL),)
override GINKGO_FLAGS += --label-filter "$(GINKGO_LABEL)"
Expand Down
14 changes: 13 additions & 1 deletion tests/framework/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package framework
import (
"bytes"
"context"
"crypto/tls"
"fmt"
"net"
"net/http"
Expand Down Expand Up @@ -34,7 +35,18 @@ func Get(url, address string, timeout time.Duration) (int, string, error) {
return 0, "", err
}

resp, err := http.DefaultClient.Do(req)
var resp *http.Response
if strings.HasPrefix(url, "https") {
customTransport := http.DefaultTransport.(*http.Transport).Clone()
// similar to how in our examples with https requests we run our curl command
// we turn off verification of the certificate, we do the same here
customTransport.TLSClientConfig = &tls.Config{InsecureSkipVerify: true} //nolint:gosec // for https test traffic
Dismissed Show dismissed Hide dismissed
client := &http.Client{Transport: customTransport}
resp, err = client.Do(req)
} else {
resp, err = http.DefaultClient.Do(req)
}

if err != nil {
return 0, "", err
}
Expand Down
12 changes: 6 additions & 6 deletions tests/framework/resourcemanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,12 @@ func (rm *ResourceManager) ApplyFromFiles(files []string, namespace string) erro
}

// Delete deletes Kubernetes resources defined as Go objects.
func (rm *ResourceManager) Delete(resources []client.Object) error {
func (rm *ResourceManager) Delete(resources []client.Object, opts ...client.DeleteOption) error {
for _, resource := range resources {
ctx, cancel := context.WithTimeout(context.Background(), rm.TimeoutConfig.DeleteTimeout)
defer cancel()

if err := rm.K8sClient.Delete(ctx, resource); err != nil && !apierrors.IsNotFound(err) {
if err := rm.K8sClient.Delete(ctx, resource, opts...); err != nil && !apierrors.IsNotFound(err) {
return fmt.Errorf("error deleting resource: %w", err)
}
}
Expand Down Expand Up @@ -159,7 +159,7 @@ func (rm *ResourceManager) readAndHandleObjects(
files []string,
) error {
for _, file := range files {
data, err := rm.getFileContents(file)
data, err := rm.GetFileContents(file)
if err != nil {
return err
}
Expand Down Expand Up @@ -187,9 +187,9 @@ func (rm *ResourceManager) readAndHandleObjects(
return nil
}

// getFileContents takes a string that can either be a local file
// GetFileContents takes a string that can either be a local file
// path or an https:// URL to YAML manifests and provides the contents.
func (rm *ResourceManager) getFileContents(file string) (*bytes.Buffer, error) {
func (rm *ResourceManager) GetFileContents(file string) (*bytes.Buffer, error) {
if strings.HasPrefix(file, "http://") {
return nil, fmt.Errorf("data can't be retrieved from %s: http is not supported, use https", file)
} else if strings.HasPrefix(file, "https://") {
Expand Down Expand Up @@ -314,7 +314,7 @@ func (rm *ResourceManager) waitForRoutesToBeReady(ctx context.Context, namespace

var numParents, readyCount int
for _, route := range routeList.Items {
numParents += len(route.Status.Parents)
numParents += len(route.Spec.ParentRefs)
for _, parent := range route.Status.Parents {
for _, cond := range parent.Conditions {
if cond.Type == string(v1.RouteConditionAccepted) && cond.Status == metav1.ConditionTrue {
Expand Down
18 changes: 13 additions & 5 deletions tests/framework/timeout.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,23 @@ type TimeoutConfig struct {

// RequestTimeout represents the maximum time for making an HTTP Request with the roundtripper.
RequestTimeout time.Duration

// ContainerRestartTimeout represents the maximum time for a Kubernetes Container to restart.
ContainerRestartTimeout time.Duration

// GetLeaderLeaseTimeout represents the maximum time for NGF to retrieve the leader lease.
GetLeaderLeaseTimeout time.Duration
}

// DefaultTimeoutConfig populates a TimeoutConfig with the default values.
func DefaultTimeoutConfig() TimeoutConfig {
return TimeoutConfig{
CreateTimeout: 60 * time.Second,
DeleteTimeout: 10 * time.Second,
GetTimeout: 10 * time.Second,
ManifestFetchTimeout: 10 * time.Second,
RequestTimeout: 10 * time.Second,
CreateTimeout: 60 * time.Second,
DeleteTimeout: 10 * time.Second,
GetTimeout: 10 * time.Second,
ManifestFetchTimeout: 10 * time.Second,
RequestTimeout: 10 * time.Second,
ContainerRestartTimeout: 10 * time.Second,
GetLeaderLeaseTimeout: 60 * time.Second,
}
}
Loading
Loading