diff --git a/pkg/antctl/raw/check/cluster/command.go b/pkg/antctl/raw/check/cluster/command.go index 18715ea562d..a08e5aa1ccb 100644 --- a/pkg/antctl/raw/check/cluster/command.go +++ b/pkg/antctl/raw/check/cluster/command.go @@ -30,17 +30,18 @@ import ( "k8s.io/utils/ptr" "antrea.io/antrea/pkg/antctl/raw/check" - "antrea.io/antrea/pkg/version" ) func Command() *cobra.Command { + o := newOptions() command := &cobra.Command{ Use: "cluster", Short: "Runs pre installation checks", RunE: func(cmd *cobra.Command, args []string) error { - return Run() + return Run(o) }, } + command.Flags().StringVar(&o.testImage, "test-image", o.testImage, "Container image override for the cluster checker") return command } @@ -50,6 +51,17 @@ const ( podReadyTimeout = 1 * time.Minute ) +type options struct { + // Container image for the cluster checker. + testImage string +} + +func newOptions() *options { + return &options{ + testImage: check.DefaultTestImage, + } +} + type uncertainError struct { reason string } @@ -78,15 +90,17 @@ type testContext struct { clusterName string namespace string testPod *corev1.Pod + // Container image for the cluster checker. + testImage string } -func Run() error { +func Run(o *options) error { client, config, clusterName, err := check.NewClient() if err != nil { return fmt.Errorf("unable to create Kubernetes client: %s", err) } ctx := context.Background() - testContext := NewTestContext(client, config, clusterName) + testContext := NewTestContext(client, config, clusterName, o.testImage) if err := testContext.setup(ctx); err != nil { return err } @@ -122,7 +136,7 @@ func (t *testContext) setup(ctx context.Context) error { } deployment := check.NewDeployment(check.DeploymentParameters{ Name: deploymentName, - Image: getAntreaAgentImage(), + Image: t.testImage, Replicas: 1, Command: []string{"bash", "-c"}, Args: []string{"trap 'exit 0' SIGTERM; sleep infinity & pid=$!; wait $pid"}, @@ -197,19 +211,13 @@ func (t *testContext) setup(ctx context.Context) error { return nil } -func getAntreaAgentImage() string { - if version.ReleaseStatus == "released" { - return fmt.Sprintf("antrea/antrea-agent-ubuntu:%s", version.Version) - } - return "antrea/antrea-agent-ubuntu:latest" -} - -func NewTestContext(client kubernetes.Interface, config *rest.Config, clusterName string) *testContext { +func NewTestContext(client kubernetes.Interface, config *rest.Config, clusterName, testImage string) *testContext { return &testContext{ client: client, config: config, clusterName: clusterName, namespace: check.GenerateRandomNamespace(testNamespacePrefix), + testImage: testImage, } } diff --git a/pkg/antctl/raw/check/constants.go b/pkg/antctl/raw/check/constants.go new file mode 100644 index 00000000000..222889ad622 --- /dev/null +++ b/pkg/antctl/raw/check/constants.go @@ -0,0 +1,19 @@ +// Copyright 2024 Antrea Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package check + +const ( + DefaultTestImage = "antrea/toolbox:1.4-0" +) diff --git a/pkg/antctl/raw/check/installation/command.go b/pkg/antctl/raw/check/installation/command.go index d6120fbea9e..ef194994d56 100644 --- a/pkg/antctl/raw/check/installation/command.go +++ b/pkg/antctl/raw/check/installation/command.go @@ -18,7 +18,6 @@ import ( "context" "errors" "fmt" - "net" "os" "regexp" "strings" @@ -46,17 +45,21 @@ func Command() *cobra.Command { } command.Flags().StringVarP(&o.antreaNamespace, "namespace", "n", o.antreaNamespace, "Configure Namespace in which Antrea is running") command.Flags().StringVar(&o.runFilter, "run", o.runFilter, "Run only the tests that match the provided regex") + command.Flags().StringVar(&o.testImage, "test-image", o.testImage, "Container image override for the installation checker") return command } type options struct { antreaNamespace string runFilter string + // Container image for the installation checker. + testImage string } func newOptions() *options { return &options{ antreaNamespace: "kube-system", + testImage: check.DefaultTestImage, } } @@ -68,7 +71,6 @@ const ( kindEchoName = "echo" kindClientName = "client" agentDaemonSetName = "antrea-agent" - deploymentImage = "registry.k8s.io/e2e-test-images/agnhost:2.40" podReadyTimeout = 1 * time.Minute ) @@ -109,6 +111,8 @@ type testContext struct { namespace string // A nil regex indicates that all the tests should be run. runFilterRegex *regexp.Regexp + // Container image for the installation checker. + testImage string } type testStats struct { @@ -143,7 +147,7 @@ func Run(o *options) error { return fmt.Errorf("unable to create Kubernetes client: %w", err) } ctx := context.Background() - testContext := NewTestContext(client, config, clusterName, o.antreaNamespace, runFilterRegex) + testContext := NewTestContext(client, config, clusterName, o.antreaNamespace, runFilterRegex, o.testImage) if err := testContext.setup(ctx); err != nil { return err } @@ -157,9 +161,12 @@ func Run(o *options) error { return nil } -func agnhostConnectCommand(ip string, port string) []string { - hostPort := net.JoinHostPort(ip, port) - return []string{"/agnhost", "connect", hostPort, "--timeout=3s"} +func tcpProbeCommand(ip string, port int) []string { + return []string{"nc", ip, fmt.Sprint(port), "--wait=3s", "-vz"} +} + +func tcpServerCommand(port int) []string { + return []string{"nc", "-l", fmt.Sprint(port), "-k"} } func newService(name string, selector map[string]string, port int) *corev1.Service { @@ -184,6 +191,7 @@ func NewTestContext( clusterName string, antreaNamespace string, runFilterRegex *regexp.Regexp, + testImage string, ) *testContext { return &testContext{ client: client, @@ -192,6 +200,7 @@ func NewTestContext( antreaNamespace: antreaNamespace, namespace: check.GenerateRandomNamespace(testNamespacePrefix), runFilterRegex: runFilterRegex, + testImage: testImage, } } @@ -223,8 +232,8 @@ func (t *testContext) setup(ctx context.Context) error { Name: echoSameNodeDeploymentName, Role: kindEchoName, Port: 80, - Image: deploymentImage, - Command: []string{"/agnhost", "netexec", "--http-port=80"}, + Image: t.testImage, + Command: tcpServerCommand(80), Affinity: &corev1.Affinity{ PodAffinity: &corev1.PodAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ @@ -254,8 +263,7 @@ func (t *testContext) setup(ctx context.Context) error { clientDeployment := check.NewDeployment(check.DeploymentParameters{ Name: clientDeploymentName, Role: kindClientName, - Image: deploymentImage, - Command: []string{"/agnhost", "pause"}, + Image: t.testImage, Port: 80, Tolerations: commonToleration, Labels: map[string]string{"app": "antrea", "component": "installation-checker", "name": clientDeploymentName}, @@ -268,8 +276,8 @@ func (t *testContext) setup(ctx context.Context) error { Name: echoOtherNodeDeploymentName, Role: kindEchoName, Port: 80, - Image: deploymentImage, - Command: []string{"/agnhost", "netexec", "--http-port=80"}, + Image: t.testImage, + Command: tcpServerCommand(80), Affinity: &corev1.Affinity{ PodAntiAffinity: &corev1.PodAntiAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ @@ -357,14 +365,14 @@ func (t *testContext) runTests(ctx context.Context) testStats { return stats } -func (t *testContext) runAgnhostConnect(ctx context.Context, clientPodName string, container string, target string, targetPort int) error { - cmd := agnhostConnectCommand(target, fmt.Sprint(targetPort)) +func (t *testContext) tcpProbe(ctx context.Context, clientPodName string, container string, target string, targetPort int) error { + cmd := tcpProbeCommand(target, targetPort) _, stderr, err := check.ExecInPod(ctx, t.client, t.config, t.namespace, clientPodName, container, cmd) if err != nil { // We log the contents of stderr here for troubleshooting purposes. - t.Log("/agnhost command '%s' failed: %v", strings.Join(cmd, " "), err) + t.Log("tcp probe command '%s' failed: %v", strings.Join(cmd, " "), err) if stderr != "" { - t.Log("/agnhost stderr: %s", strings.TrimSpace(stderr)) + t.Log("tcp probe stderr: %s", strings.TrimSpace(stderr)) } } return err diff --git a/pkg/antctl/raw/check/installation/command_test.go b/pkg/antctl/raw/check/installation/command_test.go index 3d9a65e871f..ed56c945a2a 100644 --- a/pkg/antctl/raw/check/installation/command_test.go +++ b/pkg/antctl/raw/check/installation/command_test.go @@ -21,6 +21,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "antrea.io/antrea/pkg/antctl/raw/check" ) func overrideTestsRegistry(t *testing.T, registry map[string]Test) { @@ -103,7 +105,7 @@ func TestRun(t *testing.T) { overrideTestsRegistry(t, tc.registry) runFilterRegex, err := compileRunFilter(tc.runFilter) require.NoError(t, err) - testContext := NewTestContext(nil, nil, "test-cluster", "kube-system", runFilterRegex) + testContext := NewTestContext(nil, nil, "test-cluster", "kube-system", runFilterRegex, check.DefaultTestImage) stats := testContext.runTests(ctx) assert.Equal(t, tc.expectedStats, stats) }) diff --git a/pkg/antctl/raw/check/installation/test_denyall.go b/pkg/antctl/raw/check/installation/test_denyall.go index c2d83eee002..251332bc949 100644 --- a/pkg/antctl/raw/check/installation/test_denyall.go +++ b/pkg/antctl/raw/check/installation/test_denyall.go @@ -88,7 +88,7 @@ func (t *DenyAllConnectivityTest) Run(ctx context.Context, testContext *testCont for _, clientPod := range testContext.clientPods { for _, service := range services { for _, clusterIP := range service.Spec.ClusterIPs { - if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", clusterIP, 80); err != nil { + if err := testContext.tcpProbe(ctx, clientPod.Name, "", clusterIP, 80); err != nil { testContext.Log("NetworkPolicy is working as expected: Pod %s cannot connect to Service %s (%s)", clientPod.Name, service.Name, clusterIP) } else { return fmt.Errorf("NetworkPolicy is not working as expected: Pod %s connected to Service %s (%s) when it should not", clientPod.Name, service.Name, clusterIP) diff --git a/pkg/antctl/raw/check/installation/test_podtointernet.go b/pkg/antctl/raw/check/installation/test_podtointernet.go index c5be54b252a..fa65db26c6d 100644 --- a/pkg/antctl/raw/check/installation/test_podtointernet.go +++ b/pkg/antctl/raw/check/installation/test_podtointernet.go @@ -29,7 +29,7 @@ func (t *PodToInternetConnectivityTest) Run(ctx context.Context, testContext *te for _, clientPod := range testContext.clientPods { srcPod := testContext.namespace + "/" + clientPod.Name testContext.Log("Validating connectivity from Pod %s to the world (google.com)...", srcPod) - if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", "google.com", 80); err != nil { + if err := testContext.tcpProbe(ctx, clientPod.Name, "", "google.com", 80); err != nil { return fmt.Errorf("Pod %s was not able to connect to google.com: %w", srcPod, err) } testContext.Log("Pod %s was able to connect to google.com", srcPod) diff --git a/pkg/antctl/raw/check/installation/test_podtopodinternode.go b/pkg/antctl/raw/check/installation/test_podtopodinternode.go index feb72d15c06..e278b5c88d2 100644 --- a/pkg/antctl/raw/check/installation/test_podtopodinternode.go +++ b/pkg/antctl/raw/check/installation/test_podtopodinternode.go @@ -35,7 +35,7 @@ func (t *PodToPodInterNodeConnectivityTest) Run(ctx context.Context, testContext for _, podIP := range testContext.echoOtherNodePod.Status.PodIPs { echoIP := podIP.IP testContext.Log("Validating from Pod %s to Pod %s at IP %s...", srcPod, dstPod, echoIP) - if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", echoIP, 80); err != nil { + if err := testContext.tcpProbe(ctx, clientPod.Name, "", echoIP, 80); err != nil { return fmt.Errorf("client Pod %s was not able to communicate with echo Pod %s (%s): %w", clientPod.Name, testContext.echoOtherNodePod.Name, echoIP, err) } testContext.Log("client Pod %s was able to communicate with echo Pod %s (%s)", clientPod.Name, testContext.echoOtherNodePod.Name, echoIP) diff --git a/pkg/antctl/raw/check/installation/test_podtopodintranode.go b/pkg/antctl/raw/check/installation/test_podtopodintranode.go index 0762e542b89..b4bbc2396ec 100644 --- a/pkg/antctl/raw/check/installation/test_podtopodintranode.go +++ b/pkg/antctl/raw/check/installation/test_podtopodintranode.go @@ -32,7 +32,7 @@ func (t *PodToPodIntraNodeConnectivityTest) Run(ctx context.Context, testContext for _, podIP := range testContext.echoSameNodePod.Status.PodIPs { echoIP := podIP.IP testContext.Log("Validating from Pod %s to Pod %s at IP %s...", srcPod, dstPod, echoIP) - if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", echoIP, 80); err != nil { + if err := testContext.tcpProbe(ctx, clientPod.Name, "", echoIP, 80); err != nil { return fmt.Errorf("client Pod %s was not able to communicate with echo Pod %s (%s): %w", clientPod.Name, testContext.echoSameNodePod.Name, echoIP, err) } testContext.Log("client Pod %s was able to communicate with echo Pod %s (%s)", clientPod.Name, testContext.echoSameNodePod.Name, echoIP) diff --git a/pkg/antctl/raw/check/installation/test_podtoservice.go b/pkg/antctl/raw/check/installation/test_podtoservice.go index 28331e0fc00..1d45af42386 100644 --- a/pkg/antctl/raw/check/installation/test_podtoservice.go +++ b/pkg/antctl/raw/check/installation/test_podtoservice.go @@ -53,7 +53,7 @@ func (t *PodToServiceConnectivityTest) Run(ctx context.Context, testContext *tes for _, clusterIP := range service.Spec.ClusterIPs { // Service is realized asynchronously, retry a few times. if err := wait.PollUntilContextTimeout(ctx, 500*time.Millisecond, 2*time.Second, true, func(ctx context.Context) (bool, error) { - if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", clusterIP, 80); err != nil { + if err := testContext.tcpProbe(ctx, clientPod.Name, "", clusterIP, 80); err != nil { testContext.Log("Client Pod %s was not able to communicate with Service %s (%s): %v, retrying...", clientPod.Name, service.Name, clusterIP, err) return false, nil }