From fab4c07ea9fb0f124a5abe3dd7fcfffc23f2a1b3 Mon Sep 17 00:00:00 2001 From: Giri Kuncoro Date: Thu, 28 Feb 2019 02:52:51 +0800 Subject: [PATCH] Add Kubernetes cluster validation (#679) * Implement validate kube cluster objects * Add validate kube cluster in validate cluster command * Pass namespace and add more conditions to fail validate pods * Add header to validate pods file * Refactor validate pods to write test easier * Add unit test for validate pods * Remove cluster name from validate pods in validate command handler * Update bazel file for cluster validation * Refactor to collect pod failures and fail at the end * Remove unecessary prints and pass kube-system with metav1 * Print all pod failures when found * Cleanup and remove unnecessary test * Add test to cover looping functionality with N pods * Add validate components into the validation workflow --- cmd/clusterctl/cmd/validate_cluster.go | 5 +- cmd/clusterctl/validation/BUILD.bazel | 8 +- cmd/clusterctl/validation/validate_pods.go | 146 ++++++++++ .../validation/validate_pods_test.go | 253 ++++++++++++++++++ 4 files changed, 410 insertions(+), 2 deletions(-) create mode 100644 cmd/clusterctl/validation/validate_pods.go create mode 100644 cmd/clusterctl/validation/validate_pods_test.go diff --git a/cmd/clusterctl/cmd/validate_cluster.go b/cmd/clusterctl/cmd/validate_cluster.go index 5910ca132aa6..10b5358a4375 100644 --- a/cmd/clusterctl/cmd/validate_cluster.go +++ b/cmd/clusterctl/cmd/validate_cluster.go @@ -23,6 +23,7 @@ import ( "github.com/pkg/errors" "github.com/spf13/cobra" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" tcmd "k8s.io/client-go/tools/clientcmd" "sigs.k8s.io/cluster-api/cmd/clusterctl/validation" "sigs.k8s.io/cluster-api/pkg/apis" @@ -77,7 +78,9 @@ func RunValidateCluster() error { if err := validation.ValidateClusterAPIObjects(context.TODO(), os.Stdout, c, vco.KubeconfigOverrides.Context.Cluster, vco.KubeconfigOverrides.Context.Namespace); err != nil { return err } + if err := validation.ValidatePods(context.TODO(), os.Stdout, c, metav1.NamespaceSystem); err != nil { + return err + } - // TODO(wangzhen127): Also validate the cluster in addition to the cluster API objects. https://github.com/kubernetes-sigs/cluster-api/issues/168 return nil } diff --git a/cmd/clusterctl/validation/BUILD.bazel b/cmd/clusterctl/validation/BUILD.bazel index 35f501c6a6eb..6077c3a80da5 100644 --- a/cmd/clusterctl/validation/BUILD.bazel +++ b/cmd/clusterctl/validation/BUILD.bazel @@ -2,7 +2,10 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "go_default_library", - srcs = ["validate_cluster_api_objects.go"], + srcs = [ + "validate_cluster_api_objects.go", + "validate_pods.go", + ], importpath = "sigs.k8s.io/cluster-api/cmd/clusterctl/validation", visibility = ["//visibility:public"], deps = [ @@ -10,6 +13,7 @@ go_library( "//pkg/apis/cluster/v1alpha1:go_default_library", "//pkg/controller/noderefutil:go_default_library", "//vendor/github.com/pkg/errors:go_default_library", + "//vendor/golang.org/x/net/context:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", "//vendor/sigs.k8s.io/controller-runtime/pkg/client:go_default_library", @@ -21,6 +25,7 @@ go_test( srcs = [ "validate_cluster_api_objects_suite_test.go", "validate_cluster_api_objects_test.go", + "validate_pods_test.go", ], data = glob(["testdata/**"]), embed = [":go_default_library"], @@ -29,6 +34,7 @@ go_test( "//pkg/apis/cluster/common:go_default_library", "//pkg/apis/cluster/v1alpha1:go_default_library", "//pkg/apis/cluster/v1alpha1/testutil:go_default_library", + "//vendor/golang.org/x/net/context:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", diff --git a/cmd/clusterctl/validation/validate_pods.go b/cmd/clusterctl/validation/validate_pods.go new file mode 100644 index 000000000000..5bf5ec5f9701 --- /dev/null +++ b/cmd/clusterctl/validation/validate_pods.go @@ -0,0 +1,146 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package validation + +import ( + "context" + "fmt" + "io" + "strings" + + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type validationError struct { + name string + message string +} + +func ValidatePods(ctx context.Context, w io.Writer, c client.Client, namespace string) error { + fmt.Fprintf(w, "Validating pods in namespace %q\n", namespace) + + pods, err := getPods(ctx, c, namespace) + if err != nil { + return err + } + if err := validatePods(w, pods, namespace); err != nil { + return err + } + + components, err := getComponents(ctx, c) + if err != nil { + return err + } + return validateComponents(w, components) +} + +func getPods(ctx context.Context, c client.Client, namespace string) (*corev1.PodList, error) { + pods := &corev1.PodList{} + if err := c.List(ctx, client.InNamespace(namespace), pods); err != nil { + return nil, fmt.Errorf("failed to get pods in namespace %q: %v", namespace, err) + } + return pods, nil +} + +func validatePods(w io.Writer, pods *corev1.PodList, namespace string) error { + if len(pods.Items) == 0 { + fmt.Fprintf(w, "FAIL\n") + fmt.Fprintf(w, "\tpods in namespace %q not exist.\n", namespace) + return fmt.Errorf("pods in namespace %q not exist", namespace) + } + + var failures []*validationError + for _, pod := range pods.Items { + if pod.Status.Phase == corev1.PodSucceeded { + continue + } + + if pod.Status.Phase == corev1.PodPending || + pod.Status.Phase == corev1.PodFailed || + pod.Status.Phase == corev1.PodUnknown { + failures = append(failures, &validationError{ + name: fmt.Sprintf("%q/%q", pod.Namespace, pod.Name), + message: fmt.Sprintf("Pod %q in namespace %q is %s.", pod.Name, pod.Namespace, pod.Status.Phase), + }) + continue + } + + var notready []string + for _, container := range pod.Status.ContainerStatuses { + if !container.Ready { + notready = append(notready, container.Name) + } + } + if len(notready) != 0 { + failures = append(failures, &validationError{ + name: fmt.Sprintf("%q/%q", pod.Namespace, pod.Name), + message: fmt.Sprintf("Pod %q in namespace %q is not ready (%s).", pod.Name, pod.Namespace, strings.Join(notready, ",")), + }) + } + } + + if len(failures) != 0 { + fmt.Fprintf(w, "FAIL\n") + for _, failure := range failures { + fmt.Fprintf(w, "\t[%v]: %s\n", failure.name, failure.message) + } + return fmt.Errorf("pod failures in namespace %q found", namespace) + } + + fmt.Fprintf(w, "PASS\n") + return nil +} + +func getComponents(ctx context.Context, c client.Client) (*corev1.ComponentStatusList, error) { + components := &corev1.ComponentStatusList{} + if err := c.List(ctx, &client.ListOptions{}, components); err != nil { + return nil, err + } + return components, nil +} + +func validateComponents(w io.Writer, components *corev1.ComponentStatusList) error { + if len(components.Items) == 0 { + fmt.Fprintf(w, "FAIL\n") + fmt.Fprintf(w, "\tcomponents not exist.\n") + return fmt.Errorf("components not exist") + } + + var failures []*validationError + for _, component := range components.Items { + for _, condition := range component.Conditions { + if condition.Status != corev1.ConditionTrue { + failures = append(failures, &validationError{ + name: fmt.Sprintf("%q", component.Name), + message: fmt.Sprintf("Component %q is not healthy", component.Name), + }) + } + } + } + + if len(failures) != 0 { + fmt.Fprintf(w, "FAIL\n") + for _, failure := range failures { + fmt.Fprintf(w, "\t[%v]: %s\n", failure.name, failure.message) + } + return fmt.Errorf("component failures found") + } + + fmt.Fprintf(w, "PASS\n") + return nil +} diff --git a/cmd/clusterctl/validation/validate_pods_test.go b/cmd/clusterctl/validation/validate_pods_test.go new file mode 100644 index 000000000000..7c734777f2c9 --- /dev/null +++ b/cmd/clusterctl/validation/validate_pods_test.go @@ -0,0 +1,253 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package validation + +import ( + "bytes" + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func podWithStatus(podName, namespace string, podPhase corev1.PodPhase, containerReadyStatus bool) corev1.Pod { + return corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Namespace: namespace, + }, + Spec: corev1.PodSpec{}, + Status: corev1.PodStatus{ + Phase: podPhase, + ContainerStatuses: []corev1.ContainerStatus{ + { + Ready: containerReadyStatus, + }, + }, + }, + } +} + +func TestValidatePodsWithNoPod(t *testing.T) { + pods := &corev1.PodList{Items: []corev1.Pod{}} + + var b bytes.Buffer + if err := validatePods(&b, pods, "test-namespace"); err == nil { + t.Errorf("Expected error but didn't get one") + } +} + +func TestValidatePodsWithOnePod(t *testing.T) { + var testcases = []struct { + name string + podPhase corev1.PodPhase + containerReadyStatus bool + expectErr bool + }{ + { + name: "Pods include terminating pod", + podPhase: corev1.PodSucceeded, + containerReadyStatus: false, + expectErr: false, + }, + { + name: "Pods include pending pod", + podPhase: corev1.PodPending, + containerReadyStatus: false, + expectErr: true, + }, + { + name: "Pods include failed pod", + podPhase: corev1.PodFailed, + containerReadyStatus: false, + expectErr: true, + }, + { + name: "Pods include unknown pod", + podPhase: corev1.PodUnknown, + containerReadyStatus: false, + expectErr: true, + }, + { + name: "Pods include pod with non-ready container", + podPhase: corev1.PodRunning, + containerReadyStatus: false, + expectErr: true, + }, + { + name: "Pods are all ready", + podPhase: corev1.PodRunning, + containerReadyStatus: true, + expectErr: false, + }, + } + + for _, testcase := range testcases { + t.Run(testcase.name, func(t *testing.T) { + pods := &corev1.PodList{ + Items: []corev1.Pod{ + podWithStatus("test-pod", "test-namespace", testcase.podPhase, testcase.containerReadyStatus), + }, + } + + var b bytes.Buffer + err := validatePods(&b, pods, "test-namespace") + if testcase.expectErr && err == nil { + t.Errorf("Expect to get error, but got no returned error: %v", b.String()) + } + if !testcase.expectErr && err != nil { + t.Errorf("Expect to get no error, but got returned error: %v: %v", err, b.String()) + } + }) + } +} + +func TestValidatePodsWithNPods(t *testing.T) { + var testcases = []struct { + name string + pods []corev1.Pod + + expectErr bool + }{ + { + name: "Pods start with failed pod", + pods: []corev1.Pod{ + podWithStatus("test-pod-1", "test-namespace", corev1.PodFailed, false), + podWithStatus("test-pod-2", "test-namespace", corev1.PodRunning, true), + }, + expectErr: true, + }, + { + name: "Pods end with failed pod", + pods: []corev1.Pod{ + podWithStatus("test-pod-1", "test-namespace", corev1.PodRunning, true), + podWithStatus("test-pod-2", "test-namespace", corev1.PodFailed, false), + }, + expectErr: true, + }, + { + name: "Pods include pod with non-ready container", + pods: []corev1.Pod{ + podWithStatus("test-pod-1", "test-namespace", corev1.PodRunning, false), + podWithStatus("test-pod-2", "test-namespace", corev1.PodRunning, true), + }, + expectErr: true, + }, + { + name: "Pods are all failing", + pods: []corev1.Pod{ + podWithStatus("test-pod-1", "test-namespace", corev1.PodFailed, false), + podWithStatus("test-pod-2", "test-namespace", corev1.PodFailed, false), + }, + expectErr: true, + }, + { + name: "Pods are all ready", + pods: []corev1.Pod{ + podWithStatus("test-pod-1", "test-namespace", corev1.PodRunning, true), + podWithStatus("test-pod-2", "test-namespace", corev1.PodRunning, true), + }, + expectErr: false, + }, + } + + for _, testcase := range testcases { + t.Run(testcase.name, func(t *testing.T) { + pods := &corev1.PodList{ + Items: testcase.pods, + } + + var b bytes.Buffer + err := validatePods(&b, pods, "test-namespace") + if testcase.expectErr && err == nil { + t.Errorf("Expect to get error, but got no returned error: %v", b.String()) + } + if !testcase.expectErr && err != nil { + t.Errorf("Expect to get no error, but got returned error: %v: %v", err, b.String()) + } + }) + } +} + +func componentConditionWithStatus(status corev1.ConditionStatus) corev1.ComponentCondition { + return corev1.ComponentCondition{ + Status: status, + } +} + +func componentStatusListWithCondition(componentConditions []corev1.ComponentCondition) *corev1.ComponentStatusList { + return &corev1.ComponentStatusList{ + Items: []corev1.ComponentStatus{ + { + Conditions: componentConditions, + }, + }, + } +} + +func TestValidateComponentsWithNoComponent(t *testing.T) { + components := &corev1.ComponentStatusList{Items: []corev1.ComponentStatus{}} + + var b bytes.Buffer + if err := validateComponents(&b, components); err == nil { + t.Errorf("Expected error but didn't get one") + } +} + +func TestValidateComponents(t *testing.T) { + var testcases = []struct { + name string + conditionStatus corev1.ConditionStatus + + expectErr bool + }{ + { + name: "Components include unknown status", + conditionStatus: corev1.ConditionUnknown, + expectErr: true, + }, + { + name: "Components include not ready status", + conditionStatus: corev1.ConditionFalse, + expectErr: true, + }, + { + name: "Components are all ready", + conditionStatus: corev1.ConditionTrue, + expectErr: false, + }, + } + + for _, testcase := range testcases { + t.Run(testcase.name, func(t *testing.T) { + components := componentStatusListWithCondition( + []corev1.ComponentCondition{ + componentConditionWithStatus(testcase.conditionStatus), + }, + ) + + var b bytes.Buffer + err := validateComponents(&b, components) + if testcase.expectErr && err == nil { + t.Errorf("Expect to get error, but got no returned error: %v", b.String()) + } + if !testcase.expectErr && err != nil { + t.Errorf("Expect to get no error, but got returned error: %v: %v", err, b.String()) + } + }) + } +}