From 709c526a54d1a157b6dc68b08e97ec5041bb9396 Mon Sep 17 00:00:00 2001 From: Sam Day Date: Wed, 5 Apr 2023 09:06:15 +0200 Subject: [PATCH 01/14] feat: new dev/test environment --- .dockerignore | 6 + .github/workflows/test_e2e.yml | 64 +- .gitignore | 2 + .gitlab-ci.yml | 4 +- README.md | 4 +- e2etests/.gitignore | 4 - e2etests/setup.go | 648 ------------------- e2etests/templates/cilium.yml | 776 ----------------------- e2etests/templates/cloudinit_k3s.txt.tpl | 35 - e2etests/templates/cloudinit_k8s.txt.tpl | 61 -- e2etests/testing.go | 616 ------------------ hack/Dockerfile | 7 +- hack/dev-down.sh | 37 ++ hack/dev-up.sh | 157 +++++ hack/k3s-registries.yaml | 3 + hack/kustomization.yaml | 17 - hack/registry-port-forward.sh | 16 + skaffold.yaml | 14 +- {e2etests => tests/e2e}/e2e_test.go | 79 +-- tests/e2e/testing.go | 362 +++++++++++ 20 files changed, 666 insertions(+), 2246 deletions(-) create mode 100644 .dockerignore delete mode 100644 e2etests/.gitignore delete mode 100644 e2etests/setup.go delete mode 100644 e2etests/templates/cilium.yml delete mode 100644 e2etests/templates/cloudinit_k3s.txt.tpl delete mode 100644 e2etests/templates/cloudinit_k8s.txt.tpl delete mode 100644 e2etests/testing.go create mode 100755 hack/dev-down.sh create mode 100755 hack/dev-up.sh create mode 100644 hack/k3s-registries.yaml delete mode 100644 hack/kustomization.yaml create mode 100755 hack/registry-port-forward.sh rename {e2etests => tests/e2e}/e2e_test.go (73%) create mode 100644 tests/e2e/testing.go diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..017b030d5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +* +!hcloud/ +!internal/ +!go.mod +!go.sum +!main.go diff --git a/.github/workflows/test_e2e.yml b/.github/workflows/test_e2e.yml index a6b6db125..09fdd9269 100644 --- a/.github/workflows/test_e2e.yml +++ b/.github/workflows/test_e2e.yml @@ -1,46 +1,11 @@ name: Run e2e tests on: [ pull_request ] jobs: - k8s: - runs-on: ubuntu-latest - strategy: - matrix: - k8s: [ 1.24.10, 1.25.6, 1.26.1 ] - fail-fast: false - name: k8s ${{ matrix.k8s }} - steps: - - uses: actions/setup-go@v4 - with: - go-version: '1.19' - - uses: actions/checkout@master - - name: HCLOUD_TOKEN - env: - HCLOUD_TOKEN: ${{ secrets.HCLOUD_TOKEN }} - TTS_TOKEN: ${{ secrets.TTS_TOKEN }} - run: | - set -ueo pipefail - if [[ "${HCLOUD_TOKEN:-}" != "" ]]; then - echo "HCLOUD_TOKEN=$HCLOUD_TOKEN" >> "$GITHUB_ENV" - elif [[ "${TTS_TOKEN:-}" != "" ]]; then - token="$(./scripts/get-token.sh)" - echo "::add-mask::$token" - echo "HCLOUD_TOKEN=$token" >> "$GITHUB_ENV" - else - echo "::error ::Couldn't determine HCLOUD_TOKEN. Check that repository secrets are setup correctly." - exit 1 - fi - - name: Run tests - env: - K8S_VERSION: k8s-${{ matrix.k8s }} - USE_NETWORKS: yes - run: | - go test $(go list ./... | grep e2etests) -v -timeout 60m - ./scripts/delete-token.sh $HCLOUD_TOKEN k3s: runs-on: ubuntu-latest strategy: matrix: - k3s: [ v1.24.10+k3s1, v1.25.6+k3s1, v1.26.1+k3s1 ] + k3s: [ v1.24, v1.25, v1.26 ] fail-fast: false name: k3s ${{ matrix.k3s }} steps: @@ -64,10 +29,31 @@ jobs: echo "::error ::Couldn't determine HCLOUD_TOKEN. Check that repository secrets are setup correctly." exit 1 fi + + - uses: 3bit/setup-hcloud@v2 + - uses: yokawasa/action-setup-kube-tools@v0.9.2 + with: + setup-tools: | + helm + kubectl + skaffold + helm: v3.11.2 + kubectl: v1.26.3 + skaffold: v2.3.0 + - name: Run tests env: - K8S_VERSION: k3s-${{ matrix.k3s }} + K3S_CHANNEL: ${{ matrix.k3s }} + SCOPE: gha-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.k3s }} + SSH_KEYS: sam.day,julian.toelle USE_NETWORKS: yes run: | - go test $(go list ./... | grep e2etests) -v -timeout 60m - ./scripts/delete-token.sh $HCLOUD_TOKEN + curl -sLS https://get.k3sup.dev | sh + + trap "cat hack/.reg*; hack/dev-down.sh; ./scripts/delete-token.sh $HCLOUD_TOKEN" EXIT + source <(hack/dev-up.sh) + + skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" + tag=$(skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" --quiet --output="{{ (index .Builds 0).Tag }}") + skaffold deploy --images=hetznercloud/hcloud-cloud-controller-manager=$tag + go test ./tests/e2e -v -timeout 60m diff --git a/.gitignore b/.gitignore index fbbfeea9e..898979d12 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ deploy/gen/ .coverage.out .envrc hcloud-cloud-controller-manager +*.tgz +hack/.* diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 36eac7a8a..32a6dc4f2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -35,7 +35,7 @@ test:unit: variables: NODE_NAME: "test" script: - - go test $(go list ./... | grep -v e2etests) -v + - go test $(go list ./... | grep -v e2e) -v tags: - hc-bladerunner @@ -92,7 +92,7 @@ e2e: - docker login $CI_REGISTRY --username=$CI_REGISTRY_USER --password=$CI_REGISTRY_PASSWORD - docker pull $CCM_IMAGE_NAME script: - - go test $(go list ./... | grep e2etests) -v -timeout 60m + - go test $(go list ./... | grep e2e) -v -timeout 60m tags: - hc-bladerunner-build diff --git a/README.md b/README.md index cad3e4d8b..7039b0269 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ release. To run unit tests locally, execute ```sh -go test $(go list ./... | grep -v e2etests) -v +go test $(go list ./... | grep -v e2e) -v ``` Check that your go version is up to date, tests might fail if it is not. @@ -232,7 +232,7 @@ export KEEP_SERVER_ON_FAILURE=yes # Keep the test server after a test failure. 2. Run the tests ```bash -go test $(go list ./... | grep e2etests) -v -timeout 60m +go test $(go list ./... | grep e2e) -v -timeout 60m ``` The tests will now run and cleanup themselves afterwards. Sometimes it might happen that you need to clean up the diff --git a/e2etests/.gitignore b/e2etests/.gitignore deleted file mode 100644 index 108479e84..000000000 --- a/e2etests/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -ci-hcloud-ccm.tar -ssh_key -kubeconfig -join.txt diff --git a/e2etests/setup.go b/e2etests/setup.go deleted file mode 100644 index 8f4a295ad..000000000 --- a/e2etests/setup.go +++ /dev/null @@ -1,648 +0,0 @@ -package e2etests - -import ( - "bytes" - "context" - "crypto/rand" - "crypto/rsa" - "crypto/x509" - "encoding/pem" - "fmt" - "html/template" - "io" - "net" - "os" - "os/exec" - "regexp" - "strings" - "sync" - "time" - - "golang.org/x/crypto/ssh" - - "github.com/hetznercloud/hcloud-go/hcloud" -) - -type K8sDistribution string - -const ( - K8sDistributionK8s K8sDistribution = "k8s" - K8sDistributionK3s K8sDistribution = "k3s" - - imageName = "hetznercloud/hcloud-cloud-controller-manager" -) - -var ( - imageRegexp = regexp.MustCompilePOSIX(fmt.Sprintf("%s\\:.*$", imageName)) - instanceType = "cpx21" -) - -type hcloudK8sSetup struct { - Hcloud *hcloud.Client - HcloudToken string - K8sVersion string - K8sDistribution K8sDistribution - TestIdentifier string - ImageName string - KeepOnFailure bool - ClusterNode *hcloud.Server - ExtServer *hcloud.Server - UseNetworks bool - privKey string - sshKey *hcloud.SSHKey - network *hcloud.Network - clusterJoinCMD string - WorkerNodes []*hcloud.Server - testLabels map[string]string -} - -type cloudInitTmpl struct { - K8sVersion string - HcloudToken string - HcloudNetwork string - IsClusterServer bool - JoinCMD string - UseFlannel bool -} - -// PrepareTestEnv setups a test environment for the Cloud Controller Manager -// This includes the creation of a Network, SSH Key and Server. -// The server will be created with a Cloud Init UserData -// The template can be found under e2etests/templates/cloudinit_.ixt.tpl. -func (s *hcloudK8sSetup) PrepareTestEnv(ctx context.Context, additionalSSHKeys []*hcloud.SSHKey) (string, error) { - const op = "hcloudK8sSetup/PrepareTestEnv" - - s.testLabels = map[string]string{"K8sDistribution": string(s.K8sDistribution), "K8sVersion": strings.ReplaceAll(s.K8sVersion, "+", ""), "test": s.TestIdentifier} - err := s.getSSHKey(ctx) - if err != nil { - return "", fmt.Errorf("%s getSSHKey: %s", op, err) - } - - err = s.getNetwork(ctx) - if err != nil { - return "", fmt.Errorf("%s getNetwork: %s", op, err) - } - userData, err := s.getCloudInitConfig(true) - if err != nil { - fmt.Printf("[cluster-node] %s getCloudInitConfig: %s", op, err) - return "", err - } - srv, err := s.createServer(ctx, "cluster-node", instanceType, additionalSSHKeys, userData) - if err != nil { - return "", fmt.Errorf("%s: create cluster node: %v", op, err) - } - s.ClusterNode = srv - s.waitUntilSSHable(srv) - err = s.waitForCloudInit(srv) - if err != nil { - return "", err - } - - joinCmd, err := s.getJoinCmd() - if err != nil { - return "", err - } - s.clusterJoinCMD = joinCmd - - err = s.transferDockerImage(s.ClusterNode) - if err != nil { - return "", fmt.Errorf("%s: %s", op, err) - } - - fmt.Printf("[%s] %s: Load Image:\n", s.ClusterNode.Name, op) - err = RunCommandOnServer(s.privKey, s.ClusterNode, "ctr -n=k8s.io image import ci-hcloud-ccm.tar") - if err != nil { - return "", fmt.Errorf("%s: Load image %s", op, err) - } - kubeconfigPath, err := s.PrepareK8s() - if err != nil { - return "", fmt.Errorf("%s: %s", op, err) - } - - var workers = 1 // Change this value if you want to have more workers for the test - var wg sync.WaitGroup - for worker := 1; worker <= workers; worker++ { - wg.Add(1) - go s.createClusterWorker(ctx, additionalSSHKeys, &wg, worker) - } - wg.Wait() - - srv, err = s.createServer(ctx, "ext-server", instanceType, additionalSSHKeys, "") - if err != nil { - return "", fmt.Errorf("%s: create ext server: %v", op, err) - } - s.ExtServer = srv - s.waitUntilSSHable(srv) - - return kubeconfigPath, nil -} - -func (s *hcloudK8sSetup) createClusterWorker(ctx context.Context, additionalSSHKeys []*hcloud.SSHKey, wg *sync.WaitGroup, worker int) { - const op = "hcloudK8sSetup/createClusterWorker" - defer wg.Done() - - workerName := fmt.Sprintf("cluster-worker-%d", worker) - fmt.Printf("[%s] %s Create worker node:\n", workerName, op) - - userData, err := s.getCloudInitConfig(false) - if err != nil { - fmt.Printf("[%s] %s getCloudInitConfig: %s", workerName, op, err) - return - } - srv, err := s.createServer(ctx, workerName, instanceType, additionalSSHKeys, userData) - if err != nil { - fmt.Printf("[%s] %s createServer: %s", workerName, op, err) - return - } - s.WorkerNodes = append(s.WorkerNodes, srv) - - s.waitUntilSSHable(srv) - - err = s.waitForCloudInit(srv) - if err != nil { - fmt.Printf("[%s] %s: wait for cloud init on worker: %v", srv.Name, op, err) - return - } - - err = s.transferDockerImage(srv) - if err != nil { - fmt.Printf("[%s] %s: transfer image on worker: %v", srv.Name, op, err) - return - } - - fmt.Printf("[%s] %s Load Image\n", srv.Name, op) - err = RunCommandOnServer(s.privKey, srv, "ctr -n=k8s.io image import ci-hcloud-ccm.tar") - if err != nil { - fmt.Printf("[%s] %s: load image on worker: %v", srv.Name, op, err) - return - } -} - -// waitForCloudInit waits on cloud init on the server. -// when cloud init is ready we can assume that the server -// and the plain k8s installation is ready. -func (s *hcloudK8sSetup) getJoinCmd() (string, error) { - const op = "hcloudK8sSetup/getJoinCmd" - fmt.Printf("[%s] %s: Download join cmd\n", s.ClusterNode.Name, op) - if s.K8sDistribution == K8sDistributionK8s { - err := scp("ssh_key", fmt.Sprintf("root@%s:/root/join.txt", s.ClusterNode.PublicNet.IPv4.IP.String()), "join.txt") - if err != nil { - return "", fmt.Errorf("[%s] %s download join cmd: %s", s.ClusterNode.Name, op, err) - } - cmd, err := os.ReadFile("join.txt") - if err != nil { - return "", fmt.Errorf("[%s] %s reading join cmd file: %s", s.ClusterNode.Name, op, err) - } - - return string(cmd), nil - } - err := scp("ssh_key", fmt.Sprintf("root@%s:/var/lib/rancher/k3s/server/node-token", s.ClusterNode.PublicNet.IPv4.IP.String()), "join.txt") - if err != nil { - return "", fmt.Errorf("[%s] %s download join cmd: %s", s.ClusterNode.Name, op, err) - } - token, err := os.ReadFile("join.txt") - if err != nil { - return "", fmt.Errorf("[%s] %s read join cmd: %s", s.ClusterNode.Name, op, err) - } - return fmt.Sprintf("K3S_URL=https://%s:6443 K3S_TOKEN=%s", s.ClusterNode.PublicNet.IPv4.IP.String(), token), nil -} - -func (s *hcloudK8sSetup) waitUntilSSHable(server *hcloud.Server) { - const op = "hcloudK8sSetup/PrepareTestEnv" - fmt.Printf("[%s] %s: Waiting for server to be sshable:\n", server.Name, op) - for { - conn, err := net.Dial("tcp", fmt.Sprintf("%s:22", server.PublicNet.IPv4.IP.String())) - if err != nil { - time.Sleep(1 * time.Second) - continue - } - _ = conn.Close() - fmt.Printf("[%s] %s: SSH Connection successful\n", server.Name, op) - break - } -} - -func (s *hcloudK8sSetup) createServer(ctx context.Context, name, typ string, additionalSSHKeys []*hcloud.SSHKey, userData string) (*hcloud.Server, error) { - const op = "e2etest/createServer" - - sshKeys := []*hcloud.SSHKey{s.sshKey} - sshKeys = append(sshKeys, additionalSSHKeys...) - - res, _, err := s.Hcloud.Server.Create(ctx, hcloud.ServerCreateOpts{ - Name: fmt.Sprintf("srv-%s-%s", name, s.TestIdentifier), - ServerType: &hcloud.ServerType{Name: typ}, - Image: &hcloud.Image{Name: "ubuntu-20.04"}, - SSHKeys: sshKeys, - UserData: userData, - Labels: s.testLabels, - Networks: []*hcloud.Network{s.network}, - }) - if err != nil { - return nil, fmt.Errorf("%s Hcloud.Server.Create: %s", op, err) - } - - _, errCh := s.Hcloud.Action.WatchProgress(ctx, res.Action) - if err := <-errCh; err != nil { - return nil, fmt.Errorf("%s WatchProgress Action %s: %s", op, res.Action.Command, err) - } - - for _, nextAction := range res.NextActions { - _, errCh := s.Hcloud.Action.WatchProgress(ctx, nextAction) - if err := <-errCh; err != nil { - return nil, fmt.Errorf("%s WatchProgress NextAction %s: %s", op, nextAction.Command, err) - } - } - srv, _, err := s.Hcloud.Server.GetByID(ctx, res.Server.ID) - if err != nil { - return nil, fmt.Errorf("%s Hcloud.Server.GetByID: %s", op, err) - } - return srv, nil -} - -// PrepareK8s patches an existing kubernetes cluster with a CNI and the correct -// Cloud Controller Manager version from this test run. -func (s *hcloudK8sSetup) PrepareK8s() (string, error) { - const op = "hcloudK8sSetup/PrepareK8s" - - if s.UseNetworks { - err := s.deployCilium() - if err != nil { - return "", fmt.Errorf("%s: %s", op, err) - } - } - if s.K8sDistribution != K8sDistributionK3s && !s.UseNetworks { - err := s.deployFlannel() - if err != nil { - return "", fmt.Errorf("%s: %s", op, err) - } - } - - err := s.prepareCCMDeploymentFile(s.UseNetworks) - if err != nil { - return "", fmt.Errorf("%s: %s", op, err) - } - - fmt.Printf("[%s] %s: Apply ccm deployment\n", s.ClusterNode.Name, op) - err = RunCommandOnServer(s.privKey, s.ClusterNode, "KUBECONFIG=/root/.kube/config kubectl apply -f ccm.yml") - if err != nil { - return "", fmt.Errorf("%s Deploy ccm: %s", op, err) - } - - fmt.Printf("[%s] %s: Download kubeconfig\n", s.ClusterNode.Name, op) - - err = scp("ssh_key", fmt.Sprintf("root@%s:/root/.kube/config", s.ClusterNode.PublicNet.IPv4.IP.String()), "kubeconfig") - if err != nil { - return "", fmt.Errorf("%s download kubeconfig: %s", op, err) - } - - fmt.Printf("[%s] %s: Ensure correct server is set\n", s.ClusterNode.Name, op) - kubeconfigBefore, err := os.ReadFile("kubeconfig") - if err != nil { - return "", fmt.Errorf("%s reading kubeconfig: %s", op, err) - } - kubeconfigAfterwards := strings.ReplaceAll(string(kubeconfigBefore), "127.0.0.1", s.ClusterNode.PublicNet.IPv4.IP.String()) - err = os.WriteFile("kubeconfig", []byte(kubeconfigAfterwards), 0) - if err != nil { - return "", fmt.Errorf("%s writing kubeconfig: %s", op, err) - } - return "kubeconfig", nil -} - -func scp(identityFile, src, dest string) error { - const op = "e2etests/scp" - - err := runCmd( - "/usr/bin/scp", - []string{ - "-F", "/dev/null", // ignore $HOME/.ssh/config - "-i", identityFile, - "-o", "IdentitiesOnly=yes", // only use the identities passed on the command line - "-o", "UserKnownHostsFile=/dev/null", - "-o", "StrictHostKeyChecking=no", - src, - dest, - }, - nil, - ) - if err != nil { - return fmt.Errorf("%s: %v", op, err) - } - return nil -} - -func runCmd(name string, argv []string, env []string) error { - cmd := exec.Command(name, argv...) - if os.Getenv("TEST_DEBUG_MODE") != "" { - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - } - if env != nil { - cmd.Env = append(os.Environ(), env...) - } - if err := cmd.Run(); err != nil { - return fmt.Errorf("run cmd: %s %s: %v", name, strings.Join(argv, " "), err) - } - return nil -} - -// prepareCCMDeploymentFile patches the Cloud Controller Deployment file -// It replaces the used image and the pull policy to always use the local image -// from this test run. -func (s *hcloudK8sSetup) prepareCCMDeploymentFile(networks bool) error { - const op = "hcloudK8sSetup/prepareCCMDeploymentFile" - fmt.Printf("%s: Read master deployment file\n", op) - var deploymentFilePath = "../deploy/ccm.yaml" - if networks { - deploymentFilePath = "../deploy/ccm-networks.yaml" - } - deploymentFile, err := os.ReadFile(deploymentFilePath) - if err != nil { - return fmt.Errorf("%s: read ccm deployment file %s: %v", op, deploymentFilePath, err) - } - - fmt.Printf("%s: Prepare deployment file and transfer it\n", op) - deploymentFile = imageRegexp.ReplaceAll(deploymentFile, []byte(s.ImageName)) - deploymentFile = []byte(strings.ReplaceAll(string(deploymentFile), " imagePullPolicy: Always", " imagePullPolicy: IfNotPresent")) - - err = RunCommandOnServer(s.privKey, s.ClusterNode, fmt.Sprintf("echo '%s' >> ccm.yml", deploymentFile)) - if err != nil { - return fmt.Errorf("%s: Prepare deployment file and transfer it: %s", op, err) - } - return nil -} - -// deployFlannel deploys Flannel as CNI. Flannel is used for all tests where -// we don't use Hetzner Cloud Networks. -func (s *hcloudK8sSetup) deployFlannel() error { - const op = "hcloudK8sSetup/deployFlannel" - fmt.Printf("%s: apply flannel deployment\n", op) - err := RunCommandOnServer(s.privKey, s.ClusterNode, "KUBECONFIG=/root/.kube/config kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml") - if err != nil { - return fmt.Errorf("%s: apply flannel deployment: %s", op, err) - } - fmt.Printf("%s: patch flannel deployment\n", op) - err = RunCommandOnServer(s.privKey, s.ClusterNode, "KUBECONFIG=/root/.kube/config kubectl -n kube-flannel patch ds kube-flannel-ds --type json -p '[{\"op\":\"add\",\"path\":\"/spec/template/spec/tolerations/-\",\"value\":{\"key\":\"node.cloudprovider.kubernetes.io/uninitialized\",\"value\":\"true\",\"effect\":\"NoSchedule\"}}]'") - if err != nil { - return fmt.Errorf("%s: patch flannel deployment: %s", op, err) - } - return nil -} - -// deployCilium deploys Cilium as CNI. Cilium is used for all tests where -// we use Hetzner Cloud Networks as Cilium is one of the only CNIs -// that support Cloud Controllers as source for advertising routes. -func (s *hcloudK8sSetup) deployCilium() error { - const op = "hcloudK8sSetup/deployCilium" - - deploymentFile, err := os.ReadFile("templates/cilium.yml") - if err != nil { - return fmt.Errorf("%s: read cilium deployment file %s: %v", op, "templates/cilium.yml", err) - } - err = RunCommandOnServer(s.privKey, s.ClusterNode, fmt.Sprintf("cat < cilium.yml\n%s\nEOF", deploymentFile)) - if err != nil { - return fmt.Errorf("%s: Transfer cilium deployment: %s", op, err) - } - - fmt.Printf("%s: apply cilium deployment\n", op) - err = RunCommandOnServer(s.privKey, s.ClusterNode, "KUBECONFIG=/root/.kube/config kubectl apply -f cilium.yml") - if err != nil { - return fmt.Errorf("%s: apply cilium deployment: %s", op, err) - } - - return nil -} - -// transferDockerImage transfers the local build docker image tar via SCP. -func (s *hcloudK8sSetup) transferDockerImage(server *hcloud.Server) error { - const op = "hcloudK8sSetup/transferDockerImage" - fmt.Printf("[%s] %s: Transfer docker image\n", server.Name, op) - err := WithSSHSession(s.privKey, server.PublicNet.IPv4.IP.String(), func(session *ssh.Session) error { - file, err := os.Open("ci-hcloud-ccm.tar") - if err != nil { - return fmt.Errorf("%s read ci-hcloud-ccm.tar: %s", op, err) - } - defer file.Close() - stat, err := file.Stat() - if err != nil { - return fmt.Errorf("%s file.Stat: %s", op, err) - } - wg := sync.WaitGroup{} - wg.Add(1) - - go func() { - hostIn, _ := session.StdinPipe() - defer hostIn.Close() - fmt.Fprintf(hostIn, "C0664 %d %s\n", stat.Size(), "ci-hcloud-ccm.tar") - io.Copy(hostIn, file) - fmt.Fprint(hostIn, "\x00") - wg.Done() - }() - - err = session.Run("/usr/bin/scp -t /root") - if err != nil { - return fmt.Errorf("%s copy via scp: %s", op, err) - } - wg.Wait() - return err - }) - return err -} - -// waitForCloudInit waits on cloud init on the server. -// when cloud init is ready we can assume that the server -// and the plain k8s installation is ready. -func (s *hcloudK8sSetup) waitForCloudInit(server *hcloud.Server) error { - const op = "hcloudK8sSetup/PrepareTestEnv" - fmt.Printf("[%s] %s: Wait for cloud-init\n", server.Name, op) - err := RunCommandOnServer(s.privKey, server, "cloud-init status --wait > /dev/null") - if err != nil { - return fmt.Errorf("%s: Wait for cloud-init: %s", op, err) - } - return nil -} - -// TearDown deletes all created resources within the Hetzner Cloud -// there is no need to "shutdown" the k8s cluster before -// so we just delete all created resources. -func (s *hcloudK8sSetup) TearDown(testFailed bool) error { - const op = "hcloudK8sSetup/TearDown" - - if s.KeepOnFailure && testFailed { - fmt.Println("Skipping tear-down for further analysis.") - fmt.Println("Please clean-up afterwards ;-)") - return nil - } - - ctx := context.Background() - - _, _, err := s.Hcloud.Server.DeleteWithResult(ctx, s.ClusterNode) - if err != nil { - return fmt.Errorf("%s Hcloud.Server.Delete: %s", op, err) - } - s.ClusterNode = nil - - for _, wn := range s.WorkerNodes { - _, _, err := s.Hcloud.Server.DeleteWithResult(ctx, wn) - if err != nil { - return fmt.Errorf("[%s] %s Hcloud.Server.Delete: %s", wn.Name, op, err) - } - } - - _, _, err = s.Hcloud.Server.DeleteWithResult(ctx, s.ExtServer) - if err != nil { - return fmt.Errorf("%s Hcloud.Server.Delete: %s", op, err) - } - s.ExtServer = nil - - _, err = s.Hcloud.SSHKey.Delete(ctx, s.sshKey) - if err != nil { - return fmt.Errorf("%s Hcloud.SSHKey.Delete: %s", err, err) - } - s.sshKey = nil - _, err = s.Hcloud.Network.Delete(ctx, s.network) - if err != nil { - return fmt.Errorf("%s Hcloud.Network.Delete: %s", err, err) - } - s.network = nil - return nil -} - -// getCloudInitConfig returns the generated cloud init configuration. -func (s *hcloudK8sSetup) getCloudInitConfig(isClusterServer bool) (string, error) { - const op = "hcloudK8sSetup/getCloudInitConfig" - - data := cloudInitTmpl{ - K8sVersion: s.K8sVersion, - HcloudToken: s.HcloudToken, - HcloudNetwork: s.network.Name, - IsClusterServer: isClusterServer, - JoinCMD: s.clusterJoinCMD, - UseFlannel: s.K8sDistribution == K8sDistributionK3s && !s.UseNetworks, - } - str, err := os.ReadFile(fmt.Sprintf("templates/cloudinit_%s.txt.tpl", s.K8sDistribution)) - if err != nil { - return "", fmt.Errorf("%s: read template file %s: %v", "templates/cloudinit.txt.tpl", op, err) - } - tmpl, err := template.New("cloud_init").Parse(string(str)) - if err != nil { - return "", fmt.Errorf("%s: parsing template file %s: %v", "templates/cloudinit.txt.tpl", op, err) - } - var buf bytes.Buffer - if err := tmpl.Execute(&buf, data); err != nil { - return "", fmt.Errorf("%s: execute template: %v", op, err) - } - return buf.String(), nil -} - -// getSSHKey create and get the Hetzner Cloud SSH Key for the test. -func (s *hcloudK8sSetup) getSSHKey(ctx context.Context) error { - const op = "hcloudK8sSetup/getSSHKey" - pubKey, privKey, err := makeSSHKeyPair() - if err != nil { - return err - } - sshKey, _, err := s.Hcloud.SSHKey.Create(ctx, hcloud.SSHKeyCreateOpts{ - Name: fmt.Sprintf("s-%s", s.TestIdentifier), - PublicKey: pubKey, - Labels: s.testLabels, - }) - if err != nil { - return fmt.Errorf("%s: creating ssh key: %v", op, err) - } - s.privKey = privKey - s.sshKey = sshKey - err = os.WriteFile("ssh_key", []byte(s.privKey), 0600) - if err != nil { - return fmt.Errorf("%s: writing ssh key private key: %v", op, err) - } - return nil -} - -// getNetwork create a Hetzner Cloud Network for this test. -func (s *hcloudK8sSetup) getNetwork(ctx context.Context) error { - const op = "hcloudK8sSetup/getNetwork" - _, ipRange, _ := net.ParseCIDR("10.0.0.0/8") - _, subnetRange, _ := net.ParseCIDR("10.0.0.0/16") - network, _, err := s.Hcloud.Network.Create(ctx, hcloud.NetworkCreateOpts{ - Name: fmt.Sprintf("nw-%s", s.TestIdentifier), - IPRange: ipRange, - Labels: s.testLabels, - }) - if err != nil { - return fmt.Errorf("%s: creating network: %v", op, err) - } - _, _, err = s.Hcloud.Network.AddSubnet(ctx, network, hcloud.NetworkAddSubnetOpts{ - Subnet: hcloud.NetworkSubnet{ - Type: hcloud.NetworkSubnetTypeCloud, - IPRange: subnetRange, - NetworkZone: hcloud.NetworkZoneEUCentral, - }, - }) - if err != nil { - return fmt.Errorf("%s: creating subnet: %v", op, err) - } - s.network = network - return nil -} - -// makeSSHKeyPair generate a SSH key pair. -func makeSSHKeyPair() (string, string, error) { - privateKey, err := rsa.GenerateKey(rand.Reader, 1024) - if err != nil { - return "", "", err - } - - // generate and write private key as PEM - var privKeyBuf strings.Builder - - privateKeyPEM := &pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(privateKey)} - if err := pem.Encode(&privKeyBuf, privateKeyPEM); err != nil { - return "", "", err - } - - // generate and write public key - pub, err := ssh.NewPublicKey(&privateKey.PublicKey) - if err != nil { - return "", "", err - } - - var pubKeyBuf strings.Builder - pubKeyBuf.Write(ssh.MarshalAuthorizedKey(pub)) - - return pubKeyBuf.String(), privKeyBuf.String(), nil -} - -func RunCommandOnServer(privKey string, server *hcloud.Server, command string) error { - return WithSSHSession(privKey, server.PublicNet.IPv4.IP.String(), func(session *ssh.Session) error { - if ok := os.Getenv("TEST_DEBUG_MODE"); ok != "" { - session.Stdout = os.Stdout - session.Stderr = os.Stderr - } - return session.Run(command) - }) -} - -func WithSSHSession(privKey string, host string, fn func(*ssh.Session) error) error { - signer, err := ssh.ParsePrivateKey([]byte(privKey)) - if err != nil { - return err - } - - client, err := ssh.Dial("tcp", net.JoinHostPort(host, "22"), &ssh.ClientConfig{ - User: "root", - Auth: []ssh.AuthMethod{ssh.PublicKeys(signer)}, - HostKeyCallback: ssh.InsecureIgnoreHostKey(), - Timeout: 1 * time.Second, - }) - if err != nil { - return err - } - - session, err := client.NewSession() - if err != nil { - return err - } - defer session.Close() - - return fn(session) -} diff --git a/e2etests/templates/cilium.yml b/e2etests/templates/cilium.yml deleted file mode 100644 index 4c63204e6..000000000 --- a/e2etests/templates/cilium.yml +++ /dev/null @@ -1,776 +0,0 @@ ---- -# Source: cilium/templates/cilium-agent-serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: "cilium" - namespace: kube-system ---- -# Source: cilium/templates/cilium-operator-serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: "cilium-operator" - namespace: kube-system ---- -# Source: cilium/templates/cilium-configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: cilium-config - namespace: kube-system -data: - - # Identity allocation mode selects how identities are shared between cilium - # nodes by setting how they are stored. The options are "crd" or "kvstore". - # - "crd" stores identities in kubernetes as CRDs (custom resource definition). - # These can be queried with: - # kubectl get ciliumid - # - "kvstore" stores identities in a kvstore, etcd or consul, that is - # configured below. Cilium versions before 1.6 supported only the kvstore - # backend. Upgrades from these older cilium versions should continue using - # the kvstore by commenting out the identity-allocation-mode below, or - # setting it to "kvstore". - identity-allocation-mode: crd - cilium-endpoint-gc-interval: "5m0s" - - # If you want to run cilium in debug mode change this value to true - debug: "false" - # The agent can be put into the following three policy enforcement modes - # default, always and never. - # https://docs.cilium.io/en/latest/policy/intro/#policy-enforcement-modes - enable-policy: "default" - - # Enable IPv4 addressing. If enabled, all endpoints are allocated an IPv4 - # address. - enable-ipv4: "true" - - # Enable IPv6 addressing. If enabled, all endpoints are allocated an IPv6 - # address. - enable-ipv6: "false" - # Users who wish to specify their own custom CNI configuration file must set - # custom-cni-conf to "true", otherwise Cilium may overwrite the configuration. - custom-cni-conf: "false" - enable-bpf-clock-probe: "true" - # If you want cilium monitor to aggregate tracing for packets, set this level - # to "low", "medium", or "maximum". The higher the level, the less packets - # that will be seen in monitor output. - monitor-aggregation: medium - - # The monitor aggregation interval governs the typical time between monitor - # notification events for each allowed connection. - # - # Only effective when monitor aggregation is set to "medium" or higher. - monitor-aggregation-interval: 5s - - # The monitor aggregation flags determine which TCP flags which, upon the - # first observation, cause monitor notifications to be generated. - # - # Only effective when monitor aggregation is set to "medium" or higher. - monitor-aggregation-flags: all - # Specifies the ratio (0.0-1.0) of total system memory to use for dynamic - # sizing of the TCP CT, non-TCP CT, NAT and policy BPF maps. - bpf-map-dynamic-size-ratio: "0.0025" - # bpf-policy-map-max specifies the maximum number of entries in endpoint - # policy map (per endpoint) - bpf-policy-map-max: "16384" - # bpf-lb-map-max specifies the maximum number of entries in bpf lb service, - # backend and affinity maps. - bpf-lb-map-max: "65536" - # bpf-lb-bypass-fib-lookup instructs Cilium to enable the FIB lookup bypass - # optimization for nodeport reverse NAT handling. - bpf-lb-external-clusterip: "false" - - # Pre-allocation of map entries allows per-packet latency to be reduced, at - # the expense of up-front memory allocation for the entries in the maps. The - # default value below will minimize memory usage in the default installation; - # users who are sensitive to latency may consider setting this to "true". - # - # This option was introduced in Cilium 1.4. Cilium 1.3 and earlier ignore - # this option and behave as though it is set to "true". - # - # If this value is modified, then during the next Cilium startup the restore - # of existing endpoints and tracking of ongoing connections may be disrupted. - # As a result, reply packets may be dropped and the load-balancing decisions - # for established connections may change. - # - # If this option is set to "false" during an upgrade from 1.3 or earlier to - # 1.4 or later, then it may cause one-time disruptions during the upgrade. - preallocate-bpf-maps: "false" - - # Regular expression matching compatible Istio sidecar istio-proxy - # container image names - sidecar-istio-proxy-image: "cilium/istio_proxy" - - # Name of the cluster. Only relevant when building a mesh of clusters. - cluster-name: default - # Unique ID of the cluster. Must be unique across all conneted clusters and - # in the range of 1 and 255. Only relevant when building a mesh of clusters. - cluster-id: "" - - # Encapsulation mode for communication between nodes - # Possible values: - # - disabled - # - vxlan (default) - # - geneve - tunnel: disabled - # Enables L7 proxy for L7 policy enforcement and visibility - enable-l7-proxy: "true" - - enable-ipv4-masquerade: "true" - enable-ipv6-masquerade: "true" - enable-bpf-masquerade: "true" - - enable-xt-socket-fallback: "true" - install-iptables-rules: "true" - install-no-conntrack-iptables-rules: "false" - - auto-direct-node-routes: "false" - enable-bandwidth-manager: "false" - enable-local-redirect-policy: "false" - - native-routing-cidr: "10.0.0.0/8" - kube-proxy-replacement: "probe" - enable-health-check-nodeport: "true" - node-port-bind-protection: "true" - enable-auto-protect-node-port-range: "true" - enable-session-affinity: "true" - enable-l2-neigh-discovery: "true" - enable-endpoint-health-checking: "true" - enable-health-checking: "true" - enable-well-known-identities: "false" - enable-remote-node-identity: "true" - operator-api-serve-addr: "127.0.0.1:9234" - # Enable Hubble gRPC service. - enable-hubble: "true" - # UNIX domain socket for Hubble server to listen to. - hubble-socket-path: "/var/run/cilium/hubble.sock" - # An additional address for Hubble server to listen to (e.g. ":4244"). - hubble-listen-address: ":4244" - hubble-disable-tls: "false" - hubble-tls-cert-file: /var/lib/cilium/tls/hubble/server.crt - hubble-tls-key-file: /var/lib/cilium/tls/hubble/server.key - hubble-tls-client-ca-files: /var/lib/cilium/tls/hubble/client-ca.crt - ipam: "kubernetes" - k8s-require-ipv4-pod-cidr: "true" - k8s-require-ipv6-pod-cidr: "false" - # cluster-pool-ipv4-cidr: "10.0.0.0/8" - # cluster-pool-ipv4-mask-size: "8" - disable-cnp-status-updates: "true" - cgroup-root: "/run/cilium/cgroupv2" ---- -# Source: cilium/templates/cilium-agent-clusterrole.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: cilium -rules: -- apiGroups: - - networking.k8s.io - resources: - - networkpolicies - verbs: - - get - - list - - watch -- apiGroups: - - discovery.k8s.io - resources: - - endpointslices - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - - namespaces - - services - - nodes - - endpoints - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - - pods - - pods/finalizers - verbs: - - get - - list - - watch - - update - - delete -- apiGroups: - - "" - resources: - - nodes - verbs: - - get - - list - - watch - - update -- apiGroups: - - "" - resources: - - nodes - - nodes/status - verbs: - - patch -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - # Deprecated for removal in v1.10 - - create - - list - - watch - - update - - # This is used when validating policies in preflight. This will need to stay - # until we figure out how to avoid "get" inside the preflight, and then - # should be removed ideally. - - get -- apiGroups: - - cilium.io - resources: - - ciliumnetworkpolicies - - ciliumnetworkpolicies/status - - ciliumnetworkpolicies/finalizers - - ciliumclusterwidenetworkpolicies - - ciliumclusterwidenetworkpolicies/status - - ciliumclusterwidenetworkpolicies/finalizers - - ciliumendpoints - - ciliumendpoints/status - - ciliumendpoints/finalizers - - ciliumnodes - - ciliumnodes/status - - ciliumnodes/finalizers - - ciliumidentities - - ciliumidentities/finalizers - - ciliumlocalredirectpolicies - - ciliumlocalredirectpolicies/status - - ciliumlocalredirectpolicies/finalizers - - ciliumegressnatpolicies - verbs: - - '*' ---- -# Source: cilium/templates/cilium-operator-clusterrole.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: cilium-operator -rules: -- apiGroups: - - "" - resources: - # to automatically delete [core|kube]dns pods so that are starting to being - # managed by Cilium - - pods - verbs: - - get - - list - - watch - - delete -- apiGroups: - - discovery.k8s.io - resources: - - endpointslices - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - - services - verbs: - - get - - list - - watch -- apiGroups: - - "" - resources: - # to perform LB IP allocation for BGP - - services/status - verbs: - - update -- apiGroups: - - "" - resources: - # to perform the translation of a CNP that contains `ToGroup` to its endpoints - - services - - endpoints - # to check apiserver connectivity - - namespaces - verbs: - - get - - list - - watch -- apiGroups: - - cilium.io - resources: - - ciliumnetworkpolicies - - ciliumnetworkpolicies/status - - ciliumnetworkpolicies/finalizers - - ciliumclusterwidenetworkpolicies - - ciliumclusterwidenetworkpolicies/status - - ciliumclusterwidenetworkpolicies/finalizers - - ciliumendpoints - - ciliumendpoints/status - - ciliumendpoints/finalizers - - ciliumnodes - - ciliumnodes/status - - ciliumnodes/finalizers - - ciliumidentities - - ciliumidentities/status - - ciliumidentities/finalizers - - ciliumlocalredirectpolicies - - ciliumlocalredirectpolicies/status - - ciliumlocalredirectpolicies/finalizers - verbs: - - '*' -- apiGroups: - - apiextensions.k8s.io - resources: - - customresourcedefinitions - verbs: - - create - - get - - list - - update - - watch -# For cilium-operator running in HA mode. -# -# Cilium operator running in HA mode requires the use of ResourceLock for Leader Election -# between multiple running instances. -# The preferred way of doing this is to use LeasesResourceLock as edits to Leases are less -# common and fewer objects in the cluster watch "all Leases". -- apiGroups: - - coordination.k8s.io - resources: - - leases - verbs: - - create - - get - - update ---- -# Source: cilium/templates/cilium-agent-clusterrolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: cilium -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: cilium -subjects: -- kind: ServiceAccount - name: "cilium" - namespace: kube-system ---- -# Source: cilium/templates/cilium-operator-clusterrolebinding.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: cilium-operator -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: cilium-operator -subjects: -- kind: ServiceAccount - name: "cilium-operator" - namespace: kube-system ---- -# Source: cilium/templates/cilium-agent-daemonset.yaml -apiVersion: apps/v1 -kind: DaemonSet -metadata: - labels: - k8s-app: cilium - name: cilium - namespace: kube-system -spec: - selector: - matchLabels: - k8s-app: cilium - updateStrategy: - rollingUpdate: - maxUnavailable: 2 - type: RollingUpdate - template: - metadata: - annotations: - # This annotation plus the CriticalAddonsOnly toleration makes - # cilium to be a critical pod in the cluster, which ensures cilium - # gets priority scheduling. - # https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ - scheduler.alpha.kubernetes.io/critical-pod: "" - labels: - k8s-app: cilium - spec: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: kubernetes.io/os - operator: In - values: - - linux - - matchExpressions: - - key: beta.kubernetes.io/os - operator: In - values: - - linux - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: k8s-app - operator: In - values: - - cilium - topologyKey: kubernetes.io/hostname - containers: - - args: - - --config-dir=/tmp/cilium/config-map - command: - - cilium-agent - startupProbe: - httpGet: - host: '127.0.0.1' - path: /healthz - port: 9876 - scheme: HTTP - httpHeaders: - - name: "brief" - value: "true" - failureThreshold: 105 - periodSeconds: 2 - successThreshold: 1 - livenessProbe: - httpGet: - host: '127.0.0.1' - path: /healthz - port: 9876 - scheme: HTTP - httpHeaders: - - name: "brief" - value: "true" - failureThreshold: 10 - periodSeconds: 30 - successThreshold: 1 - timeoutSeconds: 5 - readinessProbe: - httpGet: - host: '127.0.0.1' - path: /healthz - port: 9876 - scheme: HTTP - httpHeaders: - - name: "brief" - value: "true" - failureThreshold: 3 - periodSeconds: 30 - successThreshold: 1 - timeoutSeconds: 5 - env: - - name: K8S_NODE_NAME - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: spec.nodeName - - name: CILIUM_K8S_NAMESPACE - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.namespace - - name: CILIUM_CLUSTERMESH_CONFIG - value: /var/lib/cilium/clustermesh/ - - name: CILIUM_CNI_CHAINING_MODE - valueFrom: - configMapKeyRef: - key: cni-chaining-mode - name: cilium-config - optional: true - - name: CILIUM_CUSTOM_CNI_CONF - valueFrom: - configMapKeyRef: - key: custom-cni-conf - name: cilium-config - optional: true - image: "quay.io/cilium/cilium:v1.10.5@sha256:0612218e28288db360c63677c09fafa2d17edda4f13867bcabf87056046b33bb" - imagePullPolicy: IfNotPresent - lifecycle: - postStart: - exec: - command: - - "/cni-install.sh" - - "--enable-debug=false" - - "--cni-exclusive=true" - preStop: - exec: - command: - - /cni-uninstall.sh - name: cilium-agent - securityContext: - capabilities: - add: - - NET_ADMIN - - SYS_MODULE - privileged: true - volumeMounts: - - mountPath: /sys/fs/bpf - name: bpf-maps - - mountPath: /var/run/cilium - name: cilium-run - - mountPath: /host/opt/cni/bin - name: cni-path - - mountPath: /host/etc/cni/net.d - name: etc-cni-netd - - mountPath: /var/lib/cilium/clustermesh - name: clustermesh-secrets - readOnly: true - - mountPath: /tmp/cilium/config-map - name: cilium-config-path - readOnly: true - # Needed to be able to load kernel modules - - mountPath: /lib/modules - name: lib-modules - readOnly: true - - mountPath: /run/xtables.lock - name: xtables-lock - - mountPath: /var/lib/cilium/tls/hubble - name: hubble-tls - readOnly: true - hostNetwork: true - initContainers: - # Required to mount cgroup2 filesystem on the underlying Kubernetes node. - # We use nsenter command with host's cgroup and mount namespaces enabled. - - name: mount-cgroup - env: - - name: CGROUP_ROOT - value: /run/cilium/cgroupv2 - - name: BIN_PATH - value: /opt/cni/bin - command: - - sh - - -c - # The statically linked Go program binary is invoked to avoid any - # dependency on utilities like sh and mount that can be missing on certain - # distros installed on the underlying host. Copy the binary to the - # same directory where we install cilium cni plugin so that exec permissions - # are available. - - 'cp /usr/bin/cilium-mount /hostbin/cilium-mount && nsenter --cgroup=/hostproc/1/ns/cgroup --mount=/hostproc/1/ns/mnt "${BIN_PATH}/cilium-mount" $CGROUP_ROOT; rm /hostbin/cilium-mount' - image: "quay.io/cilium/cilium:v1.10.5@sha256:0612218e28288db360c63677c09fafa2d17edda4f13867bcabf87056046b33bb" - imagePullPolicy: IfNotPresent - volumeMounts: - - mountPath: /hostproc - name: hostproc - - mountPath: /hostbin - name: cni-path - securityContext: - privileged: true - - command: - - /init-container.sh - env: - - name: CILIUM_ALL_STATE - valueFrom: - configMapKeyRef: - key: clean-cilium-state - name: cilium-config - optional: true - - name: CILIUM_BPF_STATE - valueFrom: - configMapKeyRef: - key: clean-cilium-bpf-state - name: cilium-config - optional: true - image: "quay.io/cilium/cilium:v1.10.5@sha256:0612218e28288db360c63677c09fafa2d17edda4f13867bcabf87056046b33bb" - imagePullPolicy: IfNotPresent - name: clean-cilium-state - securityContext: - capabilities: - add: - - NET_ADMIN - privileged: true - volumeMounts: - - mountPath: /sys/fs/bpf - name: bpf-maps - # Required to mount cgroup filesystem from the host to cilium agent pod - - mountPath: /run/cilium/cgroupv2 - name: cilium-cgroup - mountPropagation: HostToContainer - - mountPath: /var/run/cilium - name: cilium-run - resources: - requests: - cpu: 100m - memory: 100Mi - restartPolicy: Always - priorityClassName: system-node-critical - serviceAccount: "cilium" - serviceAccountName: "cilium" - terminationGracePeriodSeconds: 1 - tolerations: - - operator: Exists - volumes: - # To keep state between restarts / upgrades - - hostPath: - path: /var/run/cilium - type: DirectoryOrCreate - name: cilium-run - # To keep state between restarts / upgrades for bpf maps - - hostPath: - path: /sys/fs/bpf - type: DirectoryOrCreate - name: bpf-maps - # To mount cgroup2 filesystem on the host - - hostPath: - path: /proc - type: Directory - name: hostproc - # To keep state between restarts / upgrades for cgroup2 filesystem - - hostPath: - path: /run/cilium/cgroupv2 - type: DirectoryOrCreate - name: cilium-cgroup - # To install cilium cni plugin in the host - - hostPath: - path: /opt/cni/bin - type: DirectoryOrCreate - name: cni-path - # To install cilium cni configuration in the host - - hostPath: - path: /etc/cni/net.d - type: DirectoryOrCreate - name: etc-cni-netd - # To be able to load kernel modules - - hostPath: - path: /lib/modules - name: lib-modules - # To access iptables concurrently with other processes (e.g. kube-proxy) - - hostPath: - path: /run/xtables.lock - type: FileOrCreate - name: xtables-lock - # To read the clustermesh configuration - - name: clustermesh-secrets - secret: - defaultMode: 420 - optional: true - secretName: cilium-clustermesh - # To read the configuration from the config map - - configMap: - name: cilium-config - name: cilium-config-path - - name: hubble-tls - projected: - sources: - - secret: - name: hubble-server-certs - items: - - key: ca.crt - path: client-ca.crt - - key: tls.crt - path: server.crt - - key: tls.key - path: server.key - optional: true ---- -# Source: cilium/templates/cilium-operator-deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - io.cilium/app: operator - name: cilium-operator - name: cilium-operator - namespace: kube-system -spec: - # See docs on ServerCapabilities.LeasesResourceLock in file pkg/k8s/version/version.go - # for more details. - replicas: 2 - selector: - matchLabels: - io.cilium/app: operator - name: cilium-operator - strategy: - rollingUpdate: - maxSurge: 1 - maxUnavailable: 1 - type: RollingUpdate - template: - metadata: - annotations: - labels: - io.cilium/app: operator - name: cilium-operator - spec: - # In HA mode, cilium-operator pods must not be scheduled on the same - # node as they will clash with each other. - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: io.cilium/app - operator: In - values: - - operator - topologyKey: kubernetes.io/hostname - containers: - - args: - - --config-dir=/tmp/cilium/config-map - command: - - cilium-operator-generic - env: - - name: K8S_NODE_NAME - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: spec.nodeName - - name: CILIUM_K8S_NAMESPACE - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.namespace - - name: CILIUM_DEBUG - valueFrom: - configMapKeyRef: - key: debug - name: cilium-config - optional: true - image: "quay.io/cilium/operator-generic:v1.10.5@sha256:2d2f730f219d489ff0702923bf24c0002cd93eb4b47ba344375566202f56d972" - imagePullPolicy: IfNotPresent - name: cilium-operator - livenessProbe: - httpGet: - host: '127.0.0.1' - path: /healthz - port: 9234 - scheme: HTTP - initialDelaySeconds: 60 - periodSeconds: 10 - timeoutSeconds: 3 - volumeMounts: - - mountPath: /tmp/cilium/config-map - name: cilium-config-path - readOnly: true - hostNetwork: true - restartPolicy: Always - priorityClassName: system-cluster-critical - serviceAccount: "cilium-operator" - serviceAccountName: "cilium-operator" - tolerations: - - operator: Exists - volumes: - # To read the configuration from the config map - - configMap: - name: cilium-config - name: cilium-config-path diff --git a/e2etests/templates/cloudinit_k3s.txt.tpl b/e2etests/templates/cloudinit_k3s.txt.tpl deleted file mode 100644 index 73f839f57..000000000 --- a/e2etests/templates/cloudinit_k3s.txt.tpl +++ /dev/null @@ -1,35 +0,0 @@ -#cloud-config -write_files: -- content: | - net.bridge.bridge-nf-call-ip6tables = 1 - net.bridge.bridge-nf-call-iptables = 1 - path: /etc/sysctl.d/k8s.conf -- content: | - alias k="kubectl" - alias ksy="kubectl -n kube-system" - alias kgp="kubectl get pods" - alias kgs="kubectl get services" - export HCLOUD_TOKEN={{.HcloudToken}} - path: /root/.bashrc -runcmd: -- sysctl --system -- apt install -y apt-transport-https curl -- export INSTALL_K3S_VERSION={{.K8sVersion}} -# Download and install latest hcloud cli release for easier debugging on host -- curl -s https://api.github.com/repos/hetznercloud/cli/releases/latest | grep browser_download_url | grep linux-amd64 | cut -d '"' -f 4 | wget -qi - -- tar xvzf hcloud-linux-amd64.tar.gz && cp hcloud /usr/bin/hcloud && chmod +x /usr/bin/hcloud -{{if .IsClusterServer}} -- curl -sfL https://get.k3s.io | sh -s - --disable servicelb --disable traefik --disable-cloud-controller --kubelet-arg="cloud-provider=external" --disable metrics-server {{if not .UseFlannel }}--flannel-backend=none{{ end }} -- mkdir -p /opt/cni/bin -- ln -s /var/lib/rancher/k3s/data/current/bin/loopback /opt/cni/bin/loopback # Workaround for https://github.com/k3s-io/k3s/issues/219 -- ln -s /var/lib/rancher/k3s/data/current/bin/bridge /opt/cni/bin/bridge # Workaround for https://github.com/k3s-io/k3s/issues/219 -- ln -s /var/lib/rancher/k3s/data/current/bin/host-local /opt/cni/bin/host-local # Workaround for https://github.com/k3s-io/k3s/issues/219 -- ln -s /var/lib/rancher/k3s/data/current/bin/portmap /opt/cni/bin/portmap # Workaround for https://github.com/k3s-io/k3s/issues/219 -- mkdir -p /root/.kube -- cp -i /etc/rancher/k3s/k3s.yaml /root/.kube/config -- until KUBECONFIG=/root/.kube/config kubectl get node; do sleep 2;done -- KUBECONFIG=/root/.kube/config kubectl -n kube-system create secret generic hcloud --from-literal=token={{.HcloudToken}} --from-literal=network={{.HcloudNetwork}} -{{else}} -- curl -sfL https://get.k3s.io | {{.JoinCMD}} sh -s - --kubelet-arg="cloud-provider=external" -- sleep 10 # to get the joining work -{{end}} diff --git a/e2etests/templates/cloudinit_k8s.txt.tpl b/e2etests/templates/cloudinit_k8s.txt.tpl deleted file mode 100644 index 7382c63e6..000000000 --- a/e2etests/templates/cloudinit_k8s.txt.tpl +++ /dev/null @@ -1,61 +0,0 @@ -#cloud-config -write_files: -- content: | - overlay - br_netfilter - path: /etc/modules-load.d/containerd.conf -- content: | - net.bridge.bridge-nf-call-ip6tables = 1 - net.bridge.bridge-nf-call-iptables = 1 - net.ipv4.ip_forward = 1 - path: /etc/sysctl.d/k8s.conf -- content: | - apiVersion: kubeadm.k8s.io/v1beta2 - kind: ClusterConfiguration - kubernetesVersion: v{{.K8sVersion}} - networking: - podSubnet: "10.244.0.0/16" - path: /tmp/kubeadm-config.yaml -- content: | - [Service] - Environment="KUBELET_EXTRA_ARGS=--cloud-provider=external" - path: /etc/systemd/system/kubelet.service.d/20-hcloud.conf -- content: | - alias k="kubectl" - alias ksy="kubectl -n kube-system" - alias kgp="kubectl get pods" - alias kgs="kubectl get services" - alias cilog="cat /var/log/cloud-init-output.log" - export HCLOUD_TOKEN={{.HcloudToken}} - path: /root/.bashrc -runcmd: -- export HOME=/root -- modprobe overlay -- modprobe br_netfilter -- sysctl --system -- apt install -y apt-transport-https curl -- curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - -- echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" > /etc/apt/sources.list.d/kubernetes.list -- curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg -- echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null -- apt update -- apt install -y kubectl={{.K8sVersion}}-00 kubeadm={{.K8sVersion}}-00 kubelet={{.K8sVersion}}-00 containerd.io -- systemctl daemon-reload -- mkdir -p /etc/containerd -- containerd config default | tee /etc/containerd/config.toml -- systemctl restart containerd -- systemctl restart kubelet -# Download and install latest hcloud cli release for easier debugging on host -- curl -s https://api.github.com/repos/hetznercloud/cli/releases/latest | grep browser_download_url | grep linux-amd64 | cut -d '"' -f 4 | wget -qi - -- tar xvzf hcloud-linux-amd64.tar.gz && cp hcloud /usr/bin/hcloud && chmod +x /usr/bin/hcloud -{{if .IsClusterServer}} -- kubeadm init --config /tmp/kubeadm-config.yaml -- mkdir -p /root/.kube -- cp -i /etc/kubernetes/admin.conf /root/.kube/config -- until KUBECONFIG=/root/.kube/config kubectl get node; do sleep 2;done -- KUBECONFIG=/root/.kube/config kubectl -n kube-system create secret generic hcloud --from-literal=token={{.HcloudToken}} --from-literal=network={{.HcloudNetwork}} -- kubeadm token create --print-join-command >> /root/join.txt -{{else}} -- {{.JoinCMD}} -- sleep 10 # to get the joining work -{{end}} diff --git a/e2etests/testing.go b/e2etests/testing.go deleted file mode 100644 index 47483a4fc..000000000 --- a/e2etests/testing.go +++ /dev/null @@ -1,616 +0,0 @@ -package e2etests - -import ( - "context" - "crypto/tls" - "fmt" - "math/rand" - "net" - "net/http" - "os" - "strings" - "sync" - "testing" - "time" - - corev1 "k8s.io/api/core/v1" - k8serrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/clientcmd" - - "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/testsupport" - "github.com/hetznercloud/hcloud-go/hcloud" -) - -var rng *rand.Rand - -func init() { - rng = rand.New(rand.NewSource(time.Now().UnixNano())) -} - -type TestCluster struct { - KeepOnFailure bool - useNetworks bool - setup *hcloudK8sSetup - k8sClient *kubernetes.Clientset - started bool - certificates []*hcloud.Certificate - - mu sync.Mutex -} - -func (tc *TestCluster) initialize() error { - const op = "e2tests/TestCluster.initialize" - - if tc.started { - return nil - } - - fmt.Printf("%s: Starting CCM Testsuite\n", op) - - networksSupport := os.Getenv("USE_NETWORKS") - if networksSupport == "yes" { - tc.useNetworks = true - } - isUsingGithubActions := os.Getenv("GITHUB_ACTIONS") - isUsingGitlabCI := os.Getenv("CI_JOB_ID") - testIdentifier := "" - if isUsingGithubActions == "true" { - testIdentifier = fmt.Sprintf("gh-%s-%d", os.Getenv("GITHUB_RUN_ID"), rng.Int()) - fmt.Printf("%s: Running in Github Action\n", op) - } - if isUsingGitlabCI != "" { - testIdentifier = fmt.Sprintf("gl-%s", isUsingGitlabCI) - fmt.Printf("%s: Running in Gitlab CI\n", op) - } - if testIdentifier == "" { - testIdentifier = fmt.Sprintf("local-%d", rng.Int()) - fmt.Printf("%s: Running local\n", op) - } - - k8sVersion := os.Getenv("K8S_VERSION") - if k8sVersion == "" { - k8sVersion = "k8s-1.18.9" - } - - k8sVersionsDetails := strings.Split(k8sVersion, "-") - if len(k8sVersionsDetails) != 2 { - return fmt.Errorf("%s: invalid k8s version: %v should be format -", op, k8sVersion) - } - - token := os.Getenv("HCLOUD_TOKEN") - if len(token) != 64 { - return fmt.Errorf("%s: No valid HCLOUD_TOKEN found", op) - } - tc.KeepOnFailure = os.Getenv("KEEP_SERVER_ON_FAILURE") == "yes" - - var additionalSSHKeys []*hcloud.SSHKey - - opts := []hcloud.ClientOption{ - hcloud.WithToken(token), - hcloud.WithApplication("hcloud-ccm-testsuite", "1.0"), - } - hcloudClient := hcloud.NewClient(opts...) - additionalSSHKeysIDOrName := os.Getenv("USE_SSH_KEYS") - if additionalSSHKeysIDOrName != "" { - idsOrNames := strings.Split(additionalSSHKeysIDOrName, ",") - for _, idOrName := range idsOrNames { - additionalSSHKey, _, err := hcloudClient.SSHKey.Get(context.Background(), idOrName) - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - additionalSSHKeys = append(additionalSSHKeys, additionalSSHKey) - } - } - - fmt.Printf("%s: Test against %s\n", op, k8sVersion) - - imageName := os.Getenv("CCM_IMAGE_NAME") - buildImage := false - if imageName == "" { - imageName = fmt.Sprintf("hcloud-ccm:ci_%s", testIdentifier) - buildImage = true - } - if buildImage { - fmt.Printf("%s: Building ccm image\n", op) - - err := runCmd( - "go", - []string{"build", "-o", "../hcloud-cloud-controller-manager", "../."}, - []string{"CGO_ENABLED=0", "GOOS=linux", "GOARCH=amd64"}, - ) - if err != nil { - return fmt.Errorf("%s: %v", op, err) - } - - if err := runCmd("docker", []string{"build", "-t", imageName, "../"}, nil); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - } - - fmt.Printf("%s: Saving ccm image to disk\n", op) - if err := runCmd("docker", []string{"save", "--output", "ci-hcloud-ccm.tar", imageName}, nil); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - - tc.setup = &hcloudK8sSetup{ - Hcloud: hcloudClient, - K8sDistribution: K8sDistribution(k8sVersionsDetails[0]), - K8sVersion: k8sVersionsDetails[1], - TestIdentifier: testIdentifier, - ImageName: imageName, - HcloudToken: token, - KeepOnFailure: tc.KeepOnFailure, - UseNetworks: tc.useNetworks, - } - fmt.Printf("%s: Setting up test env\n", op) - - kubeconfigPath, err := tc.setup.PrepareTestEnv(context.Background(), additionalSSHKeys) - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - - config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) - if err != nil { - return fmt.Errorf("%s: clientcmd.BuildConfigFromFlags: %s", op, err) - } - - tc.k8sClient, err = kubernetes.NewForConfig(config) - if err != nil { - return fmt.Errorf("%s: kubernetes.NewForConfig: %s", op, err) - } - - tc.started = true - return nil -} - -func (tc *TestCluster) Start() error { - const op = "e2etests/TestCluster.Start" - - tc.mu.Lock() - defer tc.mu.Unlock() - - if err := tc.initialize(); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - if err := tc.ensureNodesReady(); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - if err := tc.ensurePodsReady(); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - return nil -} - -func (tc *TestCluster) Stop(testFailed bool) error { - const op = "e2etests/TestCluster.Stop" - - tc.mu.Lock() - defer tc.mu.Unlock() - - if !tc.started { - return nil - } - - for _, c := range tc.certificates { - if _, err := tc.setup.Hcloud.Certificate.Delete(context.Background(), c); err != nil { - fmt.Printf("%s: delete certificate %d: %v", op, c.ID, err) - } - } - - if err := tc.setup.TearDown(testFailed); err != nil { - fmt.Printf("%s: Tear Down: %s", op, err) - } - return nil -} - -func (tc *TestCluster) ensureNodesReady() error { - const op = "e2etests/ensureNodesReady" - - err := wait.Poll(1*time.Second, 5*time.Minute, func() (bool, error) { - var totalNodes = len(tc.setup.WorkerNodes) + 1 // Number Worker Nodes + 1 Cluster Node - var readyNodes int - nodes, err := tc.k8sClient.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) - if err != nil { - return false, err - } - for _, node := range nodes.Items { - for _, cond := range node.Status.Conditions { - if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { - readyNodes++ - } - } - } - pendingNodes := totalNodes - readyNodes - fmt.Printf("Waiting for %d/%d nodes\n", pendingNodes, totalNodes) - return pendingNodes == 0, err - }) - - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - return nil -} - -func (tc *TestCluster) ensurePodsReady() error { - const op = "e2etests/ensurePodsReady" - - err := wait.Poll(1*time.Second, 10*time.Minute, func() (bool, error) { - pods, err := tc.k8sClient.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{}) - if err != nil { - return false, err - } - totalPods := len(pods.Items) - - var readyPods int - for _, pod := range pods.Items { - for _, cond := range pod.Status.Conditions { - if cond.Type == corev1.PodReady && cond.Status == corev1.ConditionTrue { - readyPods++ - } - } - } - - pendingPods := totalPods - readyPods - fmt.Printf("Waiting for %d/%d pods\n", pendingPods, totalPods) - return pendingPods == 0, err - }) - - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - return nil -} - -// CreateTLSCertificate creates a TLS certificate used for testing and posts it -// to the Hetzner Cloud backend. -// -// The baseName of the certificate gets a random number suffix attached. -// baseName and suffix are separated by a single "-" character. -func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hcloud.Certificate { - const op = "e2etests/TestCluster.CreateTLSCertificate" - - rndInt := rng.Int() - name := fmt.Sprintf("%s-%d", baseName, rndInt) - - p := testsupport.NewTLSPair(t, fmt.Sprintf("www.example%d.com", rndInt)) - opts := hcloud.CertificateCreateOpts{ - Name: name, - Certificate: p.Cert, - PrivateKey: p.Key, - } - cert, _, err := tc.setup.Hcloud.Certificate.Create(context.Background(), opts) - if err != nil { - t.Fatalf("%s: %s: %v", op, name, err) - } - if cert == nil { - t.Fatalf("%s: no certificate created", op) - } - - tc.mu.Lock() - defer tc.mu.Unlock() - tc.certificates = append(tc.certificates, cert) - - return cert -} - -type lbTestHelper struct { - podName string - port int - K8sClient *kubernetes.Clientset - KeepOnFailure bool - t *testing.T -} - -// DeployTestPod deploys a basic nginx pod within the k8s cluster -// and waits until it is "ready". -func (l *lbTestHelper) DeployTestPod() *corev1.Pod { - const op = "lbTestHelper/DeployTestPod" - - podName := fmt.Sprintf("pod-%s", l.podName) - testPod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: podName, - Labels: map[string]string{ - "app": podName, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "nginx-hello-world", - Image: "nginxdemos/hello:plain-text", - Ports: []corev1.ContainerPort{ - { - ContainerPort: 80, - Name: "http", - }, - }, - }, - }, - }, - } - - pod, err := l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Create(context.Background(), &testPod, metav1.CreateOptions{}) - if err != nil { - l.t.Fatalf("%s: could not create test pod: %s", op, err) - } - err = wait.Poll(1*time.Second, 1*time.Minute, func() (done bool, err error) { - p, err := l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - return false, err - } - for _, condition := range p.Status.Conditions { - if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { - return true, nil - } - } - pod = p - return false, nil - }) - if err != nil { - l.t.Fatalf("%s: pod %s did not come up after 1 minute: %s", op, podName, err) - } - return pod -} - -// ServiceDefinition returns a service definition for a Hetzner Cloud Load Balancer (k8s service). -func (l *lbTestHelper) ServiceDefinition(pod *corev1.Pod, annotations map[string]string) *corev1.Service { - port := l.port - if port == 0 { - port = 80 - } - - return &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("svc-%s", l.podName), - Annotations: annotations, - }, - Spec: corev1.ServiceSpec{ - Selector: map[string]string{ - "app": pod.Name, - }, - Type: corev1.ServiceTypeLoadBalancer, - Ports: []corev1.ServicePort{ - { - Port: int32(port), - TargetPort: intstr.FromInt(80), - Name: "http", - }, - }, - ExternalTrafficPolicy: corev1.ServiceExternalTrafficPolicyTypeLocal, - }, - } -} - -// CreateService creates a k8s service based on the given service definition -// and waits until it is "ready". -func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, error) { - const op = "lbTestHelper/CreateService" - _, err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Create(context.Background(), lbSvc, metav1.CreateOptions{}) - if err != nil { - return nil, fmt.Errorf("%s: could not create service: %s", op, err) - } - - err = wait.Poll(1*time.Second, 5*time.Minute, func() (done bool, err error) { - svc, err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Get(context.Background(), lbSvc.Name, metav1.GetOptions{}) - if err != nil { - return false, err - } - ingressIPs := svc.Status.LoadBalancer.Ingress - if len(ingressIPs) > 0 { - lbSvc = svc - return true, nil - } - return false, nil - }) - if err != nil { - return nil, fmt.Errorf("%s: test service (load balancer) did not come up after 5 minute: %s", op, err) - } - return lbSvc, nil -} - -// TearDown deletes the created pod and service. -func (l *lbTestHelper) TearDown() { - const op = "lbTestHelper/TearDown" - - if l.KeepOnFailure && l.t.Failed() { - return - } - - svcName := fmt.Sprintf("svc-%s", l.podName) - err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Delete(context.Background(), svcName, metav1.DeleteOptions{}) - if err != nil && !k8serrors.IsNotFound(err) { - l.t.Errorf("%s: deleting test svc failed: %s", op, err) - } - - err = wait.Poll(1*time.Second, 3*time.Minute, func() (done bool, err error) { - _, err = l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Get(context.Background(), svcName, metav1.GetOptions{}) - if err != nil { - if k8serrors.IsNotFound(err) { - return true, nil - } - return false, err - } - return false, nil - }) - if err != nil { - l.t.Errorf("%s: test service was not removed after 3 minutes: %s", op, err) - } - - podName := fmt.Sprintf("pod-%s", l.podName) - err = l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Delete(context.Background(), podName, metav1.DeleteOptions{}) - if err != nil && !k8serrors.IsNotFound(err) { - l.t.Errorf("%s: deleting test pod failed: %s", op, err) - } - err = wait.Poll(1*time.Second, 3*time.Minute, func() (done bool, err error) { - _, err = l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - if k8serrors.IsNotFound(err) { - return true, nil - } - return false, err - } - return false, nil - }) - if err != nil { - l.t.Errorf("%s: test pod not removed after 3 minutes: %s", op, err) - } -} - -type nwTestHelper struct { - podName string - K8sClient *kubernetes.Clientset - privateKey string - t *testing.T -} - -// DeployTestPod deploys a basic nginx pod within the k8s cluster -// and waits until it is "ready". -func (n *nwTestHelper) DeployTestPod() *corev1.Pod { - const op = "nwTestHelper/DeployTestPod" - podName := fmt.Sprintf("pod-%s", n.podName) - testPod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: podName, - Labels: map[string]string{ - "app": podName, - }, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "nginx-hello-world", - Image: "nginxdemos/hello:plain-text", - Ports: []corev1.ContainerPort{ - { - ContainerPort: 80, - Name: "http", - }, - }, - }, - }, - }, - } - - pod, err := n.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Create(context.Background(), &testPod, metav1.CreateOptions{}) - if err != nil { - n.t.Fatalf("%s: could not create test pod: %s", op, err) - } - err = wait.Poll(1*time.Second, 1*time.Minute, func() (done bool, err error) { - p, err := n.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - return false, err - } - for _, condition := range p.Status.Conditions { - if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { - return true, nil - } - } - pod = p - return false, nil - }) - if err != nil { - n.t.Fatalf("%s: pod %s did not come up after 1 minute: %s", op, podName, err) - } - pod, err = n.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - n.t.Fatalf("%s: could not create test pod: %s", op, err) - } - return pod -} - -// TearDown deletes the created pod. -func (n *nwTestHelper) TearDown() { - const op = "nwTestHelper/TearDown" - podName := fmt.Sprintf("pod-%s", n.podName) - err := n.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Delete(context.Background(), podName, metav1.DeleteOptions{}) - if err != nil && !k8serrors.IsNotFound(err) { - n.t.Errorf("%s: deleting test pod failed: %s", op, err) - } - err = wait.Poll(1*time.Second, 1*time.Minute, func() (done bool, err error) { - _, err = n.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - if k8serrors.IsNotFound(err) { - return true, nil - } - return false, err - } - return false, nil - }) - if err != nil { - n.t.Errorf("%s: test pod not removed after 1 minute: %s", op, err) - } -} - -// WaitForHTTPAvailable tries to connect to the given IP via http -// It tries it for 2 minutes, if after two minutes the connection -// wasn't successful and it wasn't a HTTP 200 response it will fail. -func WaitForHTTPAvailable(t *testing.T, ingressIP string, useHTTPS bool) { - const op = "e2etests/WaitForHTTPAvailable" - - client := &http.Client{ - Timeout: 1 * time.Second, - Transport: &http.Transport{ - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: true, // nolint - }, - }, - } - proto := "http" - if useHTTPS { - proto = "https" - } - - err := wait.Poll(1*time.Second, 2*time.Minute, func() (bool, error) { - resp, err := client.Get(fmt.Sprintf("%s://%s", proto, ingressIP)) - if err != nil { - return false, nil - } - defer resp.Body.Close() - switch resp.StatusCode { - case http.StatusOK: - // Success - return true, nil - case http.StatusServiceUnavailable: - // Health checks are still evaluating - return false, nil - default: - return false, fmt.Errorf("%s: got HTTP Code %d instead of 200", op, resp.StatusCode) - } - }) - if err != nil { - t.Errorf("%s: not available via client.Get: %s", op, err) - } -} - -// WaitForHTTPOnServer tries to connect to the given IP using curl. -// -// It tries it for 2 minutes, if after two minutes the connection wasn't -// successful or it was not a HTTP 200 response it will fail. -func WaitForHTTPOnServer(t *testing.T, srv *hcloud.Server, privateKey, tgtIP string, useHTTPS bool) { - const op = "e2etests/WaitForHTTPOnServer" - - proto := "http" - if useHTTPS { - proto = "https" - } - cmd := fmt.Sprintf("curl -k %s://%s", proto, tgtIP) - if net.ParseIP(tgtIP).To4() == nil { - // Assume its a IPv6 address - cmd = fmt.Sprintf("curl -6 -kg %s://[%s]", proto, tgtIP) - } - - err := wait.Poll(1*time.Second, 2*time.Minute, func() (bool, error) { - if err := RunCommandOnServer(privateKey, srv, cmd); err != nil { - return false, nil - } - return true, nil - }) - if err != nil { - t.Errorf("%s: not available via %q: %s", op, cmd, err) - } -} diff --git a/hack/Dockerfile b/hack/Dockerfile index d47434e00..c7107b6a1 100644 --- a/hack/Dockerfile +++ b/hack/Dockerfile @@ -1,15 +1,14 @@ FROM golang:1.20 as builder WORKDIR /hccm -ADD ../go.mod go.sum /hccm/ +ADD go.mod go.sum /hccm/ RUN go mod download -ADD .. /hccm/ +ADD . /hccm/ RUN ls -al # `skaffold debug` sets SKAFFOLD_GO_GCFLAGS to disable compiler optimizations ARG SKAFFOLD_GO_GCFLAGS RUN CGO_ENABLED=0 go build -gcflags="${SKAFFOLD_GO_GCFLAGS}" -o hcloud-cloud-controller-manager.bin github.com/hetznercloud/hcloud-cloud-controller-manager - FROM alpine:3.17 RUN apk add --no-cache ca-certificates bash COPY --from=builder /hccm/hcloud-cloud-controller-manager.bin /bin/hcloud-cloud-controller-manager -ENTRYPOINT ["/bin/hcloud-cloud-controller-manager"] \ No newline at end of file +ENTRYPOINT ["/bin/hcloud-cloud-controller-manager"] diff --git a/hack/dev-down.sh b/hack/dev-down.sh new file mode 100755 index 000000000..9a7ade0a1 --- /dev/null +++ b/hack/dev-down.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -ue -o pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +scope="${SCOPE:-dev}" +scope=${scope//[^a-zA-Z0-9_]/-} +scope_name=hccm-${scope} +label="managedby=hack" + +if [[ "${ALL:-}" == "" ]]; then + label="$label,scope=$scope_name" + rm -f $SCRIPT_DIR/.ssh-$scope $SCRIPT_DIR/.kubeconfig-$scope +else + rm -f $SCRIPT_DIR/.ssh* $SCRIPT_DIR/.kubeconfig* +fi + +for instance in $(hcloud server list -o noheader -o columns=id -l $label); do + ( + hcloud server delete $instance + ) & +done + + +for key in $(hcloud ssh-key list -o noheader -o columns=name -l $label); do + ( + hcloud ssh-key delete $key + ) & +done + + +for key in $(hcloud network list -o noheader -o columns=name -l $label); do + ( + hcloud network delete $key + ) & +done + +wait diff --git a/hack/dev-up.sh b/hack/dev-up.sh new file mode 100755 index 000000000..1c716b87e --- /dev/null +++ b/hack/dev-up.sh @@ -0,0 +1,157 @@ +#!/usr/bin/env bash +set -ueo pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +if [[ -n "${DEBUG:-}" ]]; then set -x; fi + +# Redirect all stdout to stderr. +{ + if ! hcloud version >/dev/null; then echo "ERROR: 'hcloud' CLI not found, please install it and make it available on your \$PATH"; exit 1; fi + if ! k3sup version >/dev/null; then echo "ERROR: 'k3sup' not found, please install it and make it available on your \$PATH"; exit 1; fi + if [[ "${HCLOUD_TOKEN:-}" == "" ]]; then echo "ERROR: please set \$HCLOUD_TOKEN"; exit 1; fi + + # We run a lot of subshells below for speed. If any encounter an error, we shut down the whole process group, pronto. + function error() { + echo "Onoes, something went wrong! :( The output above might have some clues." + kill 0 + } + + trap error ERR + + image_name=${IMAGE_NAME:-ubuntu-20.04} + instance_count=${INSTANCES:-1} + instance_type=${INSTANCE_TYPE:-cpx11} + location=${LOCATION:-fsn1} + network_zone=${NETWORK_ZONE:-eu-central} + ssh_keys=${SSH_KEYS:-} + channel=${K3S_CHANNEL:-stable} + network_cidr=${NETWORK_CIDR:-10.0.0.0/8} + subnet_cidr=${SUBNET_CIDR:-10.0.0.0/24} + cluster_cidr=${CLUSTER_CIDR:-10.244.0.0/16} + scope="${SCOPE:-dev}" + scope=${scope//[^a-zA-Z0-9_]/-} + scope_name=hccm-${scope} + label="managedby=hack,scope=$scope_name" + ssh_private_key="$SCRIPT_DIR/.ssh-$scope" + k3s_opts=${K3S_OPTS:-"--kubelet-arg cloud-provider=external --disable=traefik --disable=servicelb --flannel-backend=none --disable=local-storage"} + k3s_server_opts=${K3S_SERVER_OPTS:-"--disable-cloud-controller --cluster-cidr ${cluster_cidr}"} + + export KUBECONFIG="$SCRIPT_DIR/.kubeconfig-$scope" + + ssh_command="ssh -i $ssh_private_key -o StrictHostKeyChecking=off -o BatchMode=yes -o ConnectTimeout=5" + + # Generate SSH keys and upload publkey to Hetzner Cloud. + ( trap error ERR + [[ ! -f $ssh_private_key ]] && ssh-keygen -t ed25519 -f $ssh_private_key -C '' -N '' + [[ ! -f $ssh_private_key.pub ]] && ssh-keygen -y -f $ssh_private_key > $ssh_private_key.pub + if ! hcloud ssh-key describe $scope_name >/dev/null 2>&1; then + hcloud ssh-key create --label $label --name $scope_name --public-key-from-file $ssh_private_key.pub + fi + ) & + + # Create Network + ( trap error ERR + if ! hcloud network describe $scope_name >/dev/null 2>&1; then + hcloud network create --label $label --ip-range $network_cidr --name $scope_name + hcloud network add-subnet --network-zone $network_zone --type cloud --ip-range $subnet_cidr $scope_name + fi + ) & + + + for num in $(seq $instance_count); do + # Create server and initialize Kubernetes on it with k3sup. + ( trap error ERR + + server_name="$scope_name-$num" + + # Maybe cluster is already up and node is already there. + if kubectl get node $server_name >/dev/null 2>&1; then + exit 0 + fi + + ip=$(hcloud server ip $server_name 2>/dev/null || true) + + if [[ -z "${ip:-}" ]]; then + # Wait for SSH key + until hcloud ssh-key describe $scope_name >/dev/null 2>&1; do sleep 1; done + until hcloud network describe $scope_name >/dev/null 2>&1; do sleep 1; done + + createcmd="hcloud server create --image $image_name --label $label --location $location --name $server_name --ssh-key=$scope_name --type $instance_type --network $scope_name" + for key in $ssh_keys; do + createcmd+=" --ssh-key $key" + done + $createcmd + ip=$(hcloud server ip $server_name) + fi + + # Wait for SSH. + until [ "$($ssh_command root@$ip echo ok 2>/dev/null)" = "ok" ]; do + sleep 1 + done + + $ssh_command root@$ip 'mkdir -p /etc/rancher/k3s && cat > /etc/rancher/k3s/registries.yaml' < $SCRIPT_DIR/k3s-registries.yaml + + if [[ "$num" == "1" ]]; then + # First node is control plane. + k3sup install --print-config=false --ip $ip --k3s-channel $channel --k3s-extra-args "${k3s_server_opts} ${k3s_opts}" --local-path $KUBECONFIG --ssh-key $ssh_private_key + else + # All subsequent nodes are initialized as workers. + + # Can't go any further until control plane has bootstrapped a bit though. + until $ssh_command root@$(hcloud server ip $scope_name-1 || true) stat /etc/rancher/node/password >/dev/null 2>&1; do + sleep 1 + done + + k3sup join --server-ip $(hcloud server ip $scope_name-1) --ip $ip --k3s-channel $channel --k3s-extra-args "${k3s_opts}" --ssh-key $ssh_private_key + fi + ) & + + # Wait for this node to show up in the cluster. + ( trap error ERR; set +x + until kubectl wait --for=condition=Ready node/$scope_name-$num >/dev/null 2>&1; do sleep 1; done + echo $scope_name-$num is up and in cluster + ) & + done + + ( trap error ERR + # Control plane init tasks. + # This is running in parallel with the server init, above. + + # Wait for control plane to look alive. + until kubectl get nodes >/dev/null 2>&1; do sleep 1; done; + + # Deploy private registry. + ( trap error ERR + if ! helm status -n kube-system registry >/dev/null 2>&1; then + helm install registry docker-registry \ + --repo=https://helm.twun.io \ + -n kube-system \ + --version 2.2.2 \ + --set service.clusterIP=10.43.0.2 \ + --set 'tolerations[0].key=node.cloudprovider.kubernetes.io/uninitialized' \ + --set 'tolerations[0].operator=Exists' + fi + ) & + + # Install Cilium. + ( trap error ERR + if ! kubectl get -n kube-system ds/cilium >/dev/null 2>&1; then + helm install cilium cilium --repo https://helm.cilium.io/ -n kube-system --version 1.13.1 --set tunnel=disabled --set ipv4NativeRoutingCIDR=$cluster_cidr + fi) & + + # Create HCLOUD_TOKEN Secret for hcloud-cloud-controller-manager. + ( trap error ERR + if ! kubectl -n kube-system get secret hcloud >/dev/null 2>&1; then + kubectl -n kube-system create secret generic hcloud --from-literal="token=$HCLOUD_TOKEN" --from-literal="network=$scope_name" + fi) & + wait + ) & + wait + echo "Success - cluster fully initialized and ready, why not see for yourself?" + echo '$ kubectl get nodes' + kubectl get nodes +} >&2 + +echo "export KUBECONFIG=$KUBECONFIG" +$SCRIPT_DIR/registry-port-forward.sh +echo "export SKAFFOLD_DEFAULT_REPO=localhost:30666" diff --git a/hack/k3s-registries.yaml b/hack/k3s-registries.yaml new file mode 100644 index 000000000..8c808b121 --- /dev/null +++ b/hack/k3s-registries.yaml @@ -0,0 +1,3 @@ +mirrors: + localhost:30666: + endpoint: ["http://10.43.0.2:5000"] diff --git a/hack/kustomization.yaml b/hack/kustomization.yaml deleted file mode 100644 index c3d6e7e51..000000000 --- a/hack/kustomization.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - ../deploy -patches: - - target: - kind: Deployment - name: hcloud-cloud-controller-manager - patch: |- - - op: add - path: /spec/template/spec/containers/0/env/- - value: - name: LOG_LEVEL - value: info - - op: replace - path: /spec/template/spec/containers/0/env/1/valueFrom/secretKeyRef/name - value: hcloud \ No newline at end of file diff --git a/hack/registry-port-forward.sh b/hack/registry-port-forward.sh new file mode 100755 index 000000000..082079d24 --- /dev/null +++ b/hack/registry-port-forward.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -ue -o pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +{ +until kubectl -n kube-system --timeout=30s rollout status deployment/registry-docker-registry >/dev/null 2>&1; do sleep 1; done +old_pid=$(cat $SCRIPT_DIR/.reg-pf 2>/dev/null || true) +if [[ -n "$old_pid" ]]; then + echo "killing old port-forward with PID $old_pid" + kill $old_pid || true +fi + +nohup kubectl port-forward -n kube-system svc/registry-docker-registry 30666:5000 >$SCRIPT_DIR/.reg-pf.out 2>$SCRIPT_DIR/.reg-pf.err & +} >&2 + +echo $! > $SCRIPT_DIR/.reg-pf diff --git a/skaffold.yaml b/skaffold.yaml index 803082933..a44783951 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -1,4 +1,4 @@ -apiVersion: skaffold/v2beta19 +apiVersion: skaffold/v4beta3 kind: Config metadata: name: cloud-controller-manager @@ -11,6 +11,12 @@ build: - hetznercloud/hcloud-cloud-controller-manager:buildcache local: useBuildkit: true -deploy: - kustomize: - paths: [hack/] \ No newline at end of file + insecureRegistries: + - localhost:30666 +manifests: + helm: + releases: + - name: hccm + chartPath: chart + setValues: + networking.enabled: true diff --git a/e2etests/e2e_test.go b/tests/e2e/e2e_test.go similarity index 73% rename from e2etests/e2e_test.go rename to tests/e2e/e2e_test.go index e3d493fbe..48630158d 100644 --- a/e2etests/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -1,4 +1,4 @@ -package e2etests +package e2e import ( "context" @@ -52,7 +52,10 @@ func TestCloudControllerManagerPodIsPresent(t *testing.T) { }) t.Run("pod with app=hcloud-cloud-controller-manager is present in kube-system", func(t *testing.T) { - pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{LabelSelector: "app=hcloud-cloud-controller-manager"}) + pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system"). + List(context.Background(), metav1.ListOptions{ + LabelSelector: "app.kubernetes.io/name=hcloud-cloud-controller-manager", + }) assert.NoError(t, err) if len(pods.Items) == 0 { @@ -62,15 +65,20 @@ func TestCloudControllerManagerPodIsPresent(t *testing.T) { } func TestCloudControllerManagerSetCorrectNodeLabelsAndIPAddresses(t *testing.T) { - node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), testCluster.setup.ClusterNode.Name, metav1.GetOptions{}) + node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), testCluster.scope+"-1", metav1.GetOptions{}) assert.NoError(t, err) + server, _, err := testCluster.hcloud.Server.Get(context.TODO(), testCluster.scope+"-1") + if err != nil { + return + } + labels := node.Labels expectedLabels := map[string]string{ - "node.kubernetes.io/instance-type": testCluster.setup.ClusterNode.ServerType.Name, - "topology.kubernetes.io/region": testCluster.setup.ClusterNode.Datacenter.Location.Name, - "topology.kubernetes.io/zone": testCluster.setup.ClusterNode.Datacenter.Name, - "kubernetes.io/hostname": testCluster.setup.ClusterNode.Name, + "node.kubernetes.io/instance-type": server.ServerType.Name, + "topology.kubernetes.io/region": server.Datacenter.Location.Name, + "topology.kubernetes.io/zone": server.Datacenter.Name, + "kubernetes.io/hostname": server.Name, "kubernetes.io/os": "linux", "kubernetes.io/arch": "amd64", } @@ -82,19 +90,18 @@ func TestCloudControllerManagerSetCorrectNodeLabelsAndIPAddresses(t *testing.T) for _, address := range node.Status.Addresses { if address.Type == corev1.NodeExternalIP { - expectedIP := testCluster.setup.ClusterNode.PublicNet.IPv4.IP.String() + expectedIP := server.PublicNet.IPv4.IP.String() if expectedIP != address.Address { t.Errorf("Got %s as NodeExternalIP but expected %s", address.Address, expectedIP) } } } - if testCluster.useNetworks { - for _, address := range node.Status.Addresses { - if address.Type == corev1.NodeInternalIP { - expectedIP := testCluster.setup.ClusterNode.PrivateNet[0].IP.String() - if expectedIP != address.Address { - t.Errorf("Got %s as NodeInternalIP but expected %s", address.Address, expectedIP) - } + + for _, address := range node.Status.Addresses { + if address.Type == corev1.NodeInternalIP { + expectedIP := server.PrivateNet[0].IP.String() + if expectedIP != address.Address { + t.Errorf("Got %s as NodeInternalIP but expected %s", address.Address, expectedIP) } } } @@ -114,11 +121,9 @@ func TestCloudControllerManagerLoadBalancersMinimalSetup(t *testing.T) { t.Fatalf("deploying test svc: %s", err) } - ingressIP := lbSvc.Status.LoadBalancer.Ingress[0].IP // Index 0 is always the public IP of the LB - WaitForHTTPAvailable(t, ingressIP, false) - for _, ing := range lbSvc.Status.LoadBalancer.Ingress { - WaitForHTTPOnServer(t, testCluster.setup.ExtServer, testCluster.setup.privKey, ing.IP, false) + fmt.Println(ing.IP) + WaitForHTTPAvailable(t, ing.IP, false) } lbTest.TearDown() @@ -150,9 +155,10 @@ func TestCloudControllerManagerLoadBalancersHTTPS(t *testing.T) { ingressIP := lbSvc.Status.LoadBalancer.Ingress[0].IP // Index 0 is always the public IP of the LB WaitForHTTPAvailable(t, ingressIP, true) - for _, ing := range lbSvc.Status.LoadBalancer.Ingress { - WaitForHTTPOnServer(t, testCluster.setup.ExtServer, testCluster.setup.privKey, ing.IP, true) - } + // TODO: + //for _, ing := range lbSvc.Status.LoadBalancer.Ingress { + // WaitForHTTPOnServer(t, testCluster.setup.ExtServer, testCluster.setup.privKey, ing.IP, true) + //} lbTest.TearDown() } @@ -181,7 +187,7 @@ func TestCloudControllerManagerLoadBalancersHTTPSWithManagedCertificate(t *testi if err != nil { t.Fatalf("deploying test svc: %s", err) } - certs, err := testCluster.setup.Hcloud.Certificate.AllWithOpts(context.Background(), hcloud.CertificateListOpts{ + certs, err := testCluster.hcloud.Certificate.AllWithOpts(context.Background(), hcloud.CertificateListOpts{ ListOpts: hcloud.ListOpts{ LabelSelector: fmt.Sprintf("%s=%s", hcops.LabelServiceUID, lbSvc.ObjectMeta.UID), }, @@ -190,15 +196,11 @@ func TestCloudControllerManagerLoadBalancersHTTPSWithManagedCertificate(t *testi assert.Len(t, certs, 1) lbTest.TearDown() - _, err = testCluster.setup.Hcloud.Certificate.Delete(context.Background(), certs[0]) + _, err = testCluster.hcloud.Certificate.Delete(context.Background(), certs[0]) assert.NoError(t, err) } func TestCloudControllerManagerLoadBalancersWithPrivateNetwork(t *testing.T) { - if testCluster.useNetworks == false { - t.Skipf("Private Networks test is disabled") - } - lbTest := lbTestHelper{t: t, K8sClient: testCluster.k8sClient, podName: "loadbalancer-private-network"} pod := lbTest.DeployTestPod() @@ -220,15 +222,16 @@ func TestCloudControllerManagerLoadBalancersWithPrivateNetwork(t *testing.T) { } func TestCloudControllerManagerNetworksPodIPsAreAccessible(t *testing.T) { - if testCluster.useNetworks == false { - t.Skipf("Private Networks test is disabled") - } - - nwTest := nwTestHelper{t: t, K8sClient: testCluster.k8sClient, privateKey: testCluster.setup.privKey, podName: "network-routes-accessible"} - - pod := nwTest.DeployTestPod() + node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), testCluster.scope+"-1", metav1.GetOptions{}) - WaitForHTTPOnServer(t, testCluster.setup.ExtServer, testCluster.setup.privKey, pod.Status.PodIP, false) - - nwTest.TearDown() + network, _, err := testCluster.hcloud.Network.Get(context.TODO(), testCluster.scope) + if err != nil { + t.Error(err) + } + for _, subnet := range network.Subnets { + if subnet.IPRange.String() == node.Spec.PodCIDR { + return + } + } + t.Fatal("did not find expected route table entry") } diff --git a/tests/e2e/testing.go b/tests/e2e/testing.go new file mode 100644 index 000000000..c3cbbd6c9 --- /dev/null +++ b/tests/e2e/testing.go @@ -0,0 +1,362 @@ +package e2e + +import ( + "context" + "crypto/tls" + "fmt" + "math/rand" + "net/http" + "os" + "regexp" + "sync" + "testing" + "time" + + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/testsupport" + "github.com/hetznercloud/hcloud-go/hcloud" + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" +) + +var rng *rand.Rand +var scopeButcher = regexp.MustCompile(`[^a-zA-Z0-9_]`) + +func init() { + rng = rand.New(rand.NewSource(time.Now().UnixNano())) +} + +type TestCluster struct { + KeepOnFailure bool + useNetworks bool + hcloud *hcloud.Client + k8sClient *kubernetes.Clientset + certificates []*hcloud.Certificate + scope string + mu sync.Mutex +} + +func (tc *TestCluster) initialize() error { + const op = "e2tests/TestCluster.initialize" + + fmt.Printf("%s: Starting CCM Testsuite\n", op) + + tc.scope = os.Getenv("SCOPE") + if tc.scope == "" { + tc.scope = "dev" + } + tc.scope = scopeButcher.ReplaceAllString(tc.scope, "-") + tc.scope = "hccm-" + tc.scope + + networksSupport := os.Getenv("USE_NETWORKS") + if networksSupport == "yes" { + tc.useNetworks = true + } + + token := os.Getenv("HCLOUD_TOKEN") + if len(token) != 64 { + return fmt.Errorf("%s: No valid HCLOUD_TOKEN found", op) + } + tc.KeepOnFailure = os.Getenv("KEEP_SERVER_ON_FAILURE") == "yes" + + opts := []hcloud.ClientOption{ + hcloud.WithToken(token), + hcloud.WithApplication("hcloud-ccm-testsuite", "1.0"), + } + hcloudClient := hcloud.NewClient(opts...) + tc.hcloud = hcloudClient + + fmt.Printf("%s: Setting up test env\n", op) + + loadingRules := clientcmd.NewDefaultClientConfigLoadingRules() + configOverrides := &clientcmd.ConfigOverrides{} + + kubeConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, configOverrides) + clientConfig, err := kubeConfig.ClientConfig() + if err != nil { + return fmt.Errorf("%s: kubeConfig.ClientConfig: %s", op, err) + } + + tc.k8sClient, err = kubernetes.NewForConfig(clientConfig) + if err != nil { + return fmt.Errorf("%s: kubernetes.NewForConfig: %s", op, err) + } + + return nil +} + +func (tc *TestCluster) Start() error { + const op = "e2e/TestCluster.Start" + + tc.mu.Lock() + defer tc.mu.Unlock() + + if err := tc.initialize(); err != nil { + return fmt.Errorf("%s: %v", op, err) + } + return nil +} + +func (tc *TestCluster) Stop(testFailed bool) error { + const op = "e2e/TestCluster.Stop" + + tc.mu.Lock() + defer tc.mu.Unlock() + + for _, c := range tc.certificates { + if _, err := tc.hcloud.Certificate.Delete(context.Background(), c); err != nil { + fmt.Printf("%s: delete certificate %d: %v", op, c.ID, err) + } + } + + return nil +} + +// CreateTLSCertificate creates a TLS certificate used for testing and posts it +// to the Hetzner Cloud backend. +// +// The baseName of the certificate gets a random number suffix attached. +// baseName and suffix are separated by a single "-" character. +func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hcloud.Certificate { + const op = "e2e/TestCluster.CreateTLSCertificate" + + rndInt := rng.Int() + name := fmt.Sprintf("%s-%d", baseName, rndInt) + + p := testsupport.NewTLSPair(t, fmt.Sprintf("www.example%d.com", rndInt)) + opts := hcloud.CertificateCreateOpts{ + Name: name, + Certificate: p.Cert, + PrivateKey: p.Key, + } + cert, _, err := tc.hcloud.Certificate.Create(context.Background(), opts) + if err != nil { + t.Fatalf("%s: %s: %v", op, name, err) + } + if cert == nil { + t.Fatalf("%s: no certificate created", op) + } + + tc.mu.Lock() + defer tc.mu.Unlock() + tc.certificates = append(tc.certificates, cert) + + return cert +} + +type lbTestHelper struct { + podName string + port int + K8sClient *kubernetes.Clientset + KeepOnFailure bool + t *testing.T +} + +// DeployTestPod deploys a basic nginx pod within the k8s cluster +// and waits until it is "ready" +func (l *lbTestHelper) DeployTestPod() *corev1.Pod { + const op = "lbTestHelper/DeployTestPod" + + podName := fmt.Sprintf("pod-%s", l.podName) + testPod := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: podName, + Labels: map[string]string{ + "app": podName, + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "nginx-hello-world", + Image: "nginxdemos/hello:plain-text", + Ports: []corev1.ContainerPort{ + { + ContainerPort: 80, + Name: "http", + }, + }, + }, + }, + }, + } + + pod, err := l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Create(context.Background(), &testPod, metav1.CreateOptions{}) + if err != nil { + l.t.Fatalf("%s: could not create test pod: %s", op, err) + } + err = wait.Poll(1*time.Second, 1*time.Minute, func() (done bool, err error) { + p, err := l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) + if err != nil { + return false, err + } + for _, condition := range p.Status.Conditions { + if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionTrue { + return true, nil + } + } + pod = p + return false, nil + }) + if err != nil { + l.t.Fatalf("%s: pod %s did not come up after 1 minute: %s", op, podName, err) + } + return pod +} + +// ServiceDefinition returns a service definition for a Hetzner Cloud Load Balancer (k8s service) +func (l *lbTestHelper) ServiceDefinition(pod *corev1.Pod, annotations map[string]string) *corev1.Service { + port := l.port + if port == 0 { + port = 80 + } + + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("svc-%s", l.podName), + Annotations: annotations, + }, + Spec: corev1.ServiceSpec{ + Selector: map[string]string{ + "app": pod.Name, + }, + Type: corev1.ServiceTypeLoadBalancer, + Ports: []corev1.ServicePort{ + { + Port: int32(port), + TargetPort: intstr.FromInt(80), + Name: "http", + }, + }, + ExternalTrafficPolicy: corev1.ServiceExternalTrafficPolicyTypeLocal, + }, + } +} + +// CreateService creates a k8s service based on the given service definition +// and waits until it is "ready" +func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, error) { + const op = "lbTestHelper/CreateService" + + // Default is 15s interval, 10s timeout, 3 retries => 45 seconds until up + // With these changes it should be 1 seconds until up + // lbSvc.Annotations[string(annotation.LBSvcHealthCheckInterval)] = "1s" + // lbSvc.Annotations[string(annotation.LBSvcHealthCheckTimeout)] = "2s" + // lbSvc.Annotations[string(annotation.LBSvcHealthCheckRetries)] = "1" + // lbSvc.Annotations[string(annotation.LBSvcHealthCheckProtocol)] = "tcp" + + _, err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Create(context.Background(), lbSvc, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("%s: could not create service: %s", op, err) + } + + err = wait.Poll(1*time.Second, 5*time.Minute, func() (done bool, err error) { + svc, err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Get(context.Background(), lbSvc.Name, metav1.GetOptions{}) + if err != nil { + return false, err + } + ingressIPs := svc.Status.LoadBalancer.Ingress + if len(ingressIPs) > 0 { + lbSvc = svc + return true, nil + } + return false, nil + }) + if err != nil { + return nil, fmt.Errorf("%s: test service (load balancer) did not come up after 5 minute: %s", op, err) + } + return lbSvc, nil +} + +// TearDown deletes the created pod and service +func (l *lbTestHelper) TearDown() { + const op = "lbTestHelper/TearDown" + + if l.KeepOnFailure && l.t.Failed() { + return + } + + svcName := fmt.Sprintf("svc-%s", l.podName) + err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Delete(context.Background(), svcName, metav1.DeleteOptions{}) + if err != nil && !k8serrors.IsNotFound(err) { + l.t.Errorf("%s: deleting test svc failed: %s", op, err) + } + + err = wait.Poll(1*time.Second, 3*time.Minute, func() (done bool, err error) { + _, err = l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Get(context.Background(), svcName, metav1.GetOptions{}) + if err != nil { + if k8serrors.IsNotFound(err) { + return true, nil + } + return false, err + } + return false, nil + }) + if err != nil { + l.t.Errorf("%s: test service was not removed after 3 minutes: %s", op, err) + } + + podName := fmt.Sprintf("pod-%s", l.podName) + err = l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Delete(context.Background(), podName, metav1.DeleteOptions{}) + if err != nil && !k8serrors.IsNotFound(err) { + l.t.Errorf("%s: deleting test pod failed: %s", op, err) + } + err = wait.Poll(1*time.Second, 3*time.Minute, func() (done bool, err error) { + _, err = l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) + if err != nil { + if k8serrors.IsNotFound(err) { + return true, nil + } + return false, err + } + return false, nil + }) + if err != nil { + l.t.Errorf("%s: test pod not removed after 3 minutes: %s", op, err) + } +} + +// WaitForHTTPAvailable tries to connect to the given IP via http +// It tries it for 2 minutes, if after two minutes the connection +// wasn't successful and it wasn't a HTTP 200 response it will fail +func WaitForHTTPAvailable(t *testing.T, ingressIP string, useHTTPS bool) { + const op = "e2e/WaitForHTTPAvailable" + + client := &http.Client{ + Timeout: 1 * time.Second, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, // nolint + }, + }, + } + proto := "http" + if useHTTPS { + proto = "https" + } + + err := wait.Poll(1*time.Second, 2*time.Minute, func() (bool, error) { + resp, err := client.Get(fmt.Sprintf("%s://%s", proto, ingressIP)) + if err != nil { + return false, nil + } + defer resp.Body.Close() + switch resp.StatusCode { + case http.StatusOK: + // Success + return true, nil + case http.StatusServiceUnavailable: + // Health checks are still evaluating + return false, nil + default: + return false, fmt.Errorf("%s: got HTTP Code %d instead of 200", op, resp.StatusCode) + } + }) + if err != nil { + t.Errorf("%s: not available via client.Get: %s", op, err) + } +} From a8bdd8bfa2dd0dce7b2d6c1c71c9e92accabe350 Mon Sep 17 00:00:00 2001 From: Sam Day Date: Thu, 6 Apr 2023 09:02:32 +0200 Subject: [PATCH 02/14] ci: remove debug cruft --- .github/workflows/test_e2e.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test_e2e.yml b/.github/workflows/test_e2e.yml index 09fdd9269..fb9686802 100644 --- a/.github/workflows/test_e2e.yml +++ b/.github/workflows/test_e2e.yml @@ -45,12 +45,11 @@ jobs: env: K3S_CHANNEL: ${{ matrix.k3s }} SCOPE: gha-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.k3s }} - SSH_KEYS: sam.day,julian.toelle USE_NETWORKS: yes run: | curl -sLS https://get.k3sup.dev | sh - trap "cat hack/.reg*; hack/dev-down.sh; ./scripts/delete-token.sh $HCLOUD_TOKEN" EXIT + trap "hack/dev-down.sh; ./scripts/delete-token.sh $HCLOUD_TOKEN" EXIT source <(hack/dev-up.sh) skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" From dbb2a579629d4986e414bd44cebc94ebd43184fb Mon Sep 17 00:00:00 2001 From: Sam Day Date: Thu, 6 Apr 2023 09:15:54 +0200 Subject: [PATCH 03/14] ci: fix unit test workflow --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b21c4fc4f..cf932430e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: - name: Run tests run: | go vet ./... - go test $(go list ./... | grep -v e2etests) + go test $(go list ./... | grep -v e2e) lint: name: Lint From bc8ab0db7f227ea76670b82696ee2ad50c7f7936 Mon Sep 17 00:00:00 2001 From: Sam Day Date: Thu, 6 Apr 2023 09:25:46 +0200 Subject: [PATCH 04/14] lint fixes, e2e test cleanup --- tests/e2e/e2e_test.go | 7 +++--- tests/e2e/testing.go | 58 +++++++------------------------------------ 2 files changed, 13 insertions(+), 52 deletions(-) diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index 48630158d..cdd6a0017 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -27,7 +27,7 @@ func TestMain(m *testing.M) { rc := m.Run() - if err := testCluster.Stop(rc > 0); err != nil { + if err := testCluster.Stop(); err != nil { fmt.Printf("%v\n", err) os.Exit(1) } @@ -134,7 +134,6 @@ func TestCloudControllerManagerLoadBalancersHTTPS(t *testing.T) { lbTest := lbTestHelper{ t: t, K8sClient: testCluster.k8sClient, - KeepOnFailure: testCluster.KeepOnFailure, podName: "loadbalancer-https", port: 443, } @@ -168,7 +167,6 @@ func TestCloudControllerManagerLoadBalancersHTTPSWithManagedCertificate(t *testi lbTest := lbTestHelper{ t: t, K8sClient: testCluster.k8sClient, - KeepOnFailure: testCluster.KeepOnFailure, podName: "loadbalancer-https", port: 443, } @@ -223,6 +221,9 @@ func TestCloudControllerManagerLoadBalancersWithPrivateNetwork(t *testing.T) { func TestCloudControllerManagerNetworksPodIPsAreAccessible(t *testing.T) { node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), testCluster.scope+"-1", metav1.GetOptions{}) + if err != nil { + t.Error(err) + } network, _, err := testCluster.hcloud.Network.Get(context.TODO(), testCluster.scope) if err != nil { diff --git a/tests/e2e/testing.go b/tests/e2e/testing.go index c3cbbd6c9..b3f4a46f2 100644 --- a/tests/e2e/testing.go +++ b/tests/e2e/testing.go @@ -8,7 +8,6 @@ import ( "net/http" "os" "regexp" - "sync" "testing" "time" @@ -31,20 +30,13 @@ func init() { } type TestCluster struct { - KeepOnFailure bool - useNetworks bool hcloud *hcloud.Client k8sClient *kubernetes.Clientset certificates []*hcloud.Certificate scope string - mu sync.Mutex } -func (tc *TestCluster) initialize() error { - const op = "e2tests/TestCluster.initialize" - - fmt.Printf("%s: Starting CCM Testsuite\n", op) - +func (tc *TestCluster) Start() error { tc.scope = os.Getenv("SCOPE") if tc.scope == "" { tc.scope = "dev" @@ -52,16 +44,10 @@ func (tc *TestCluster) initialize() error { tc.scope = scopeButcher.ReplaceAllString(tc.scope, "-") tc.scope = "hccm-" + tc.scope - networksSupport := os.Getenv("USE_NETWORKS") - if networksSupport == "yes" { - tc.useNetworks = true - } - token := os.Getenv("HCLOUD_TOKEN") if len(token) != 64 { - return fmt.Errorf("%s: No valid HCLOUD_TOKEN found", op) + return fmt.Errorf("no valid HCLOUD_TOKEN found") } - tc.KeepOnFailure = os.Getenv("KEEP_SERVER_ON_FAILURE") == "yes" opts := []hcloud.ClientOption{ hcloud.WithToken(token), @@ -70,46 +56,27 @@ func (tc *TestCluster) initialize() error { hcloudClient := hcloud.NewClient(opts...) tc.hcloud = hcloudClient - fmt.Printf("%s: Setting up test env\n", op) - loadingRules := clientcmd.NewDefaultClientConfigLoadingRules() configOverrides := &clientcmd.ConfigOverrides{} kubeConfig := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, configOverrides) clientConfig, err := kubeConfig.ClientConfig() if err != nil { - return fmt.Errorf("%s: kubeConfig.ClientConfig: %s", op, err) + return fmt.Errorf("kubeConfig.ClientConfig: %s", err) } tc.k8sClient, err = kubernetes.NewForConfig(clientConfig) if err != nil { - return fmt.Errorf("%s: kubernetes.NewForConfig: %s", op, err) + return fmt.Errorf("kubernetes.NewForConfig: %s", err) } return nil } -func (tc *TestCluster) Start() error { - const op = "e2e/TestCluster.Start" - - tc.mu.Lock() - defer tc.mu.Unlock() - - if err := tc.initialize(); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - return nil -} - -func (tc *TestCluster) Stop(testFailed bool) error { - const op = "e2e/TestCluster.Stop" - - tc.mu.Lock() - defer tc.mu.Unlock() - +func (tc *TestCluster) Stop() error { for _, c := range tc.certificates { if _, err := tc.hcloud.Certificate.Delete(context.Background(), c); err != nil { - fmt.Printf("%s: delete certificate %d: %v", op, c.ID, err) + fmt.Printf("delete certificate %d failed: %v", c.ID, err) } } @@ -141,8 +108,6 @@ func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hclo t.Fatalf("%s: no certificate created", op) } - tc.mu.Lock() - defer tc.mu.Unlock() tc.certificates = append(tc.certificates, cert) return cert @@ -152,12 +117,11 @@ type lbTestHelper struct { podName string port int K8sClient *kubernetes.Clientset - KeepOnFailure bool t *testing.T } // DeployTestPod deploys a basic nginx pod within the k8s cluster -// and waits until it is "ready" +// and waits until it is "ready". func (l *lbTestHelper) DeployTestPod() *corev1.Pod { const op = "lbTestHelper/DeployTestPod" @@ -238,7 +202,7 @@ func (l *lbTestHelper) ServiceDefinition(pod *corev1.Pod, annotations map[string } // CreateService creates a k8s service based on the given service definition -// and waits until it is "ready" +// and waits until it is "ready". func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, error) { const op = "lbTestHelper/CreateService" @@ -272,14 +236,10 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er return lbSvc, nil } -// TearDown deletes the created pod and service +// TearDown deletes the created pod and service. func (l *lbTestHelper) TearDown() { const op = "lbTestHelper/TearDown" - if l.KeepOnFailure && l.t.Failed() { - return - } - svcName := fmt.Sprintf("svc-%s", l.podName) err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Delete(context.Background(), svcName, metav1.DeleteOptions{}) if err != nil && !k8serrors.IsNotFound(err) { From 86da48c60f279de58cbe3b715a5ed7f280d5cd6b Mon Sep 17 00:00:00 2001 From: Sam Day Date: Thu, 6 Apr 2023 12:27:30 +0200 Subject: [PATCH 05/14] make e2e tests work more --- hack/dev-up.sh | 2 ++ tests/e2e/e2e_test.go | 37 ++++++++++++++------------- tests/e2e/testing.go | 58 +++++++++++++++++++++++++++++-------------- 3 files changed, 62 insertions(+), 35 deletions(-) diff --git a/hack/dev-up.sh b/hack/dev-up.sh index 1c716b87e..86c018555 100755 --- a/hack/dev-up.sh +++ b/hack/dev-up.sh @@ -36,6 +36,8 @@ if [[ -n "${DEBUG:-}" ]]; then set -x; fi k3s_opts=${K3S_OPTS:-"--kubelet-arg cloud-provider=external --disable=traefik --disable=servicelb --flannel-backend=none --disable=local-storage"} k3s_server_opts=${K3S_SERVER_OPTS:-"--disable-cloud-controller --cluster-cidr ${cluster_cidr}"} + echo "$HCLOUD_TOKEN" > "$SCRIPT_DIR/.token-$scope" + export KUBECONFIG="$SCRIPT_DIR/.kubeconfig-$scope" ssh_command="ssh -i $ssh_private_key -o StrictHostKeyChecking=off -o BatchMode=yes -o ConnectTimeout=5" diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index cdd6a0017..64e60e191 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -5,6 +5,7 @@ import ( "fmt" "math/rand" "os" + "strconv" "strings" "testing" @@ -65,10 +66,10 @@ func TestCloudControllerManagerPodIsPresent(t *testing.T) { } func TestCloudControllerManagerSetCorrectNodeLabelsAndIPAddresses(t *testing.T) { - node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), testCluster.scope+"-1", metav1.GetOptions{}) + node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), "hccm-"+testCluster.scope+"-1", metav1.GetOptions{}) assert.NoError(t, err) - server, _, err := testCluster.hcloud.Server.Get(context.TODO(), testCluster.scope+"-1") + server, _, err := testCluster.hcloud.Server.Get(context.TODO(), "hccm-"+testCluster.scope+"-1") if err != nil { return } @@ -108,7 +109,12 @@ func TestCloudControllerManagerSetCorrectNodeLabelsAndIPAddresses(t *testing.T) } func TestCloudControllerManagerLoadBalancersMinimalSetup(t *testing.T) { - lbTest := lbTestHelper{t: t, K8sClient: testCluster.k8sClient, podName: "loadbalancer-minimal"} + lbTest := lbTestHelper{ + t: t, + K8sClient: testCluster.k8sClient, + podName: "loadbalancer-minimal", + namespace: "hccm-test-" + strconv.Itoa(rand.Int()), + } pod := lbTest.DeployTestPod() @@ -121,10 +127,7 @@ func TestCloudControllerManagerLoadBalancersMinimalSetup(t *testing.T) { t.Fatalf("deploying test svc: %s", err) } - for _, ing := range lbSvc.Status.LoadBalancer.Ingress { - fmt.Println(ing.IP) - WaitForHTTPAvailable(t, ing.IP, false) - } + WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) lbTest.TearDown() } @@ -132,10 +135,10 @@ func TestCloudControllerManagerLoadBalancersMinimalSetup(t *testing.T) { func TestCloudControllerManagerLoadBalancersHTTPS(t *testing.T) { cert := testCluster.CreateTLSCertificate(t, "loadbalancer-https") lbTest := lbTestHelper{ - t: t, - K8sClient: testCluster.k8sClient, - podName: "loadbalancer-https", - port: 443, + t: t, + K8sClient: testCluster.k8sClient, + podName: "loadbalancer-https", + port: 443, } pod := lbTest.DeployTestPod() @@ -165,10 +168,10 @@ func TestCloudControllerManagerLoadBalancersHTTPS(t *testing.T) { func TestCloudControllerManagerLoadBalancersHTTPSWithManagedCertificate(t *testing.T) { domainName := fmt.Sprintf("%d-ccm-test.hc-certs.de", rand.Int()) lbTest := lbTestHelper{ - t: t, - K8sClient: testCluster.k8sClient, - podName: "loadbalancer-https", - port: 443, + t: t, + K8sClient: testCluster.k8sClient, + podName: "loadbalancer-https", + port: 443, } pod := lbTest.DeployTestPod() @@ -220,12 +223,12 @@ func TestCloudControllerManagerLoadBalancersWithPrivateNetwork(t *testing.T) { } func TestCloudControllerManagerNetworksPodIPsAreAccessible(t *testing.T) { - node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), testCluster.scope+"-1", metav1.GetOptions{}) + node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), "hccm-"+testCluster.scope+"-1", metav1.GetOptions{}) if err != nil { t.Error(err) } - network, _, err := testCluster.hcloud.Network.Get(context.TODO(), testCluster.scope) + network, _, err := testCluster.hcloud.Network.Get(context.TODO(), "hccm-"+testCluster.scope) if err != nil { t.Error(err) } diff --git a/tests/e2e/testing.go b/tests/e2e/testing.go index b3f4a46f2..a62daed5d 100644 --- a/tests/e2e/testing.go +++ b/tests/e2e/testing.go @@ -30,10 +30,10 @@ func init() { } type TestCluster struct { - hcloud *hcloud.Client - k8sClient *kubernetes.Clientset - certificates []*hcloud.Certificate - scope string + hcloud *hcloud.Client + k8sClient *kubernetes.Clientset + certificates []*hcloud.Certificate + scope string } func (tc *TestCluster) Start() error { @@ -42,10 +42,17 @@ func (tc *TestCluster) Start() error { tc.scope = "dev" } tc.scope = scopeButcher.ReplaceAllString(tc.scope, "-") - tc.scope = "hccm-" + tc.scope token := os.Getenv("HCLOUD_TOKEN") - if len(token) != 64 { + if token == "" { + buf, err := os.ReadFile(fmt.Sprintf("../../hack/.token-%s", tc.scope)) + if err != nil { + return err + } + token = string(buf) + } + + if token == "" { return fmt.Errorf("no valid HCLOUD_TOKEN found") } @@ -56,6 +63,11 @@ func (tc *TestCluster) Start() error { hcloudClient := hcloud.NewClient(opts...) tc.hcloud = hcloudClient + err := os.Setenv("KUBECONFIG", "../../hack/.kubeconfig-" + tc.scope) + if err != nil { + return err + } + loadingRules := clientcmd.NewDefaultClientConfigLoadingRules() configOverrides := &clientcmd.ConfigOverrides{} @@ -114,10 +126,11 @@ func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hclo } type lbTestHelper struct { - podName string - port int - K8sClient *kubernetes.Clientset - t *testing.T + podName string + port int + K8sClient *kubernetes.Clientset + t *testing.T + namespace string } // DeployTestPod deploys a basic nginx pod within the k8s cluster @@ -125,6 +138,15 @@ type lbTestHelper struct { func (l *lbTestHelper) DeployTestPod() *corev1.Pod { const op = "lbTestHelper/DeployTestPod" + _, err := l.K8sClient.CoreV1().Namespaces().Create(context.Background(), &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: l.namespace, + }, + }, metav1.CreateOptions{}) + if err != nil && !k8serrors.IsAlreadyExists(err) { + panic(err) + } + podName := fmt.Sprintf("pod-%s", l.podName) testPod := corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -149,12 +171,12 @@ func (l *lbTestHelper) DeployTestPod() *corev1.Pod { }, } - pod, err := l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Create(context.Background(), &testPod, metav1.CreateOptions{}) + pod, err := l.K8sClient.CoreV1().Pods(l.namespace).Create(context.Background(), &testPod, metav1.CreateOptions{}) if err != nil { l.t.Fatalf("%s: could not create test pod: %s", op, err) } err = wait.Poll(1*time.Second, 1*time.Minute, func() (done bool, err error) { - p, err := l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) + p, err := l.K8sClient.CoreV1().Pods(l.namespace).Get(context.Background(), podName, metav1.GetOptions{}) if err != nil { return false, err } @@ -213,13 +235,13 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er // lbSvc.Annotations[string(annotation.LBSvcHealthCheckRetries)] = "1" // lbSvc.Annotations[string(annotation.LBSvcHealthCheckProtocol)] = "tcp" - _, err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Create(context.Background(), lbSvc, metav1.CreateOptions{}) + _, err := l.K8sClient.CoreV1().Services(l.namespace).Create(context.Background(), lbSvc, metav1.CreateOptions{}) if err != nil { return nil, fmt.Errorf("%s: could not create service: %s", op, err) } err = wait.Poll(1*time.Second, 5*time.Minute, func() (done bool, err error) { - svc, err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Get(context.Background(), lbSvc.Name, metav1.GetOptions{}) + svc, err := l.K8sClient.CoreV1().Services(l.namespace).Get(context.Background(), lbSvc.Name, metav1.GetOptions{}) if err != nil { return false, err } @@ -241,13 +263,13 @@ func (l *lbTestHelper) TearDown() { const op = "lbTestHelper/TearDown" svcName := fmt.Sprintf("svc-%s", l.podName) - err := l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Delete(context.Background(), svcName, metav1.DeleteOptions{}) + err := l.K8sClient.CoreV1().Services(l.namespace).Delete(context.Background(), svcName, metav1.DeleteOptions{}) if err != nil && !k8serrors.IsNotFound(err) { l.t.Errorf("%s: deleting test svc failed: %s", op, err) } err = wait.Poll(1*time.Second, 3*time.Minute, func() (done bool, err error) { - _, err = l.K8sClient.CoreV1().Services(corev1.NamespaceDefault).Get(context.Background(), svcName, metav1.GetOptions{}) + _, err = l.K8sClient.CoreV1().Services(l.namespace).Get(context.Background(), svcName, metav1.GetOptions{}) if err != nil { if k8serrors.IsNotFound(err) { return true, nil @@ -261,12 +283,12 @@ func (l *lbTestHelper) TearDown() { } podName := fmt.Sprintf("pod-%s", l.podName) - err = l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Delete(context.Background(), podName, metav1.DeleteOptions{}) + err = l.K8sClient.CoreV1().Pods(l.namespace).Delete(context.Background(), podName, metav1.DeleteOptions{}) if err != nil && !k8serrors.IsNotFound(err) { l.t.Errorf("%s: deleting test pod failed: %s", op, err) } err = wait.Poll(1*time.Second, 3*time.Minute, func() (done bool, err error) { - _, err = l.K8sClient.CoreV1().Pods(corev1.NamespaceDefault).Get(context.Background(), podName, metav1.GetOptions{}) + _, err = l.K8sClient.CoreV1().Pods(l.namespace).Get(context.Background(), podName, metav1.GetOptions{}) if err != nil { if k8serrors.IsNotFound(err) { return true, nil From 658b39f466a9a71123feed1ff5427bf7ad17d56e Mon Sep 17 00:00:00 2001 From: samcday Date: Thu, 6 Apr 2023 13:23:40 +0200 Subject: [PATCH 06/14] Update hack/dev-up.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Julian Tölle --- hack/dev-up.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/dev-up.sh b/hack/dev-up.sh index 86c018555..a37935cba 100755 --- a/hack/dev-up.sh +++ b/hack/dev-up.sh @@ -137,7 +137,7 @@ if [[ -n "${DEBUG:-}" ]]; then set -x; fi # Install Cilium. ( trap error ERR - if ! kubectl get -n kube-system ds/cilium >/dev/null 2>&1; then + if ! helm status -n kube-system cilium >/dev/null 2>&1; then helm install cilium cilium --repo https://helm.cilium.io/ -n kube-system --version 1.13.1 --set tunnel=disabled --set ipv4NativeRoutingCIDR=$cluster_cidr fi) & From 0d60dda316d005d6aeadf83ad44caea595d1fff9 Mon Sep 17 00:00:00 2001 From: samcday Date: Thu, 6 Apr 2023 13:24:13 +0200 Subject: [PATCH 07/14] Update hack/dev-up.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Julian Tölle --- hack/dev-up.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/hack/dev-up.sh b/hack/dev-up.sh index a37935cba..034b1ffa0 100755 --- a/hack/dev-up.sh +++ b/hack/dev-up.sh @@ -8,6 +8,7 @@ if [[ -n "${DEBUG:-}" ]]; then set -x; fi { if ! hcloud version >/dev/null; then echo "ERROR: 'hcloud' CLI not found, please install it and make it available on your \$PATH"; exit 1; fi if ! k3sup version >/dev/null; then echo "ERROR: 'k3sup' not found, please install it and make it available on your \$PATH"; exit 1; fi + if ! helm version >/dev/null; then echo "ERROR: 'helm' not found, please install it and make it available on your \$PATH"; exit 1; fi if [[ "${HCLOUD_TOKEN:-}" == "" ]]; then echo "ERROR: please set \$HCLOUD_TOKEN"; exit 1; fi # We run a lot of subshells below for speed. If any encounter an error, we shut down the whole process group, pronto. From ae8e419849012f57ba9becf33f84c6e32b889883 Mon Sep 17 00:00:00 2001 From: samcday Date: Thu, 6 Apr 2023 13:25:31 +0200 Subject: [PATCH 08/14] Update hack/dev-up.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Julian Tölle --- hack/dev-up.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hack/dev-up.sh b/hack/dev-up.sh index 034b1ffa0..54be66705 100755 --- a/hack/dev-up.sh +++ b/hack/dev-up.sh @@ -94,9 +94,12 @@ if [[ -n "${DEBUG:-}" ]]; then set -x; fi $ssh_command root@$ip 'mkdir -p /etc/rancher/k3s && cat > /etc/rancher/k3s/registries.yaml' < $SCRIPT_DIR/k3s-registries.yaml + private_ip=$(hcloud server describe $server_name -o format="{{ (index .PrivateNet 0).IP }}") + k3s_node_ip_opts="--node-external-ip ${ip} --node-ip ${private_ip}" + if [[ "$num" == "1" ]]; then # First node is control plane. - k3sup install --print-config=false --ip $ip --k3s-channel $channel --k3s-extra-args "${k3s_server_opts} ${k3s_opts}" --local-path $KUBECONFIG --ssh-key $ssh_private_key + k3sup install --print-config=false --ip $ip --k3s-channel $channel --k3s-extra-args "${k3s_server_opts} ${k3s_opts} ${k3s_node_ip_opts}" --local-path $KUBECONFIG --ssh-key $ssh_private_key else # All subsequent nodes are initialized as workers. @@ -105,7 +108,7 @@ if [[ -n "${DEBUG:-}" ]]; then set -x; fi sleep 1 done - k3sup join --server-ip $(hcloud server ip $scope_name-1) --ip $ip --k3s-channel $channel --k3s-extra-args "${k3s_opts}" --ssh-key $ssh_private_key + k3sup join --server-ip $(hcloud server ip $scope_name-1) --ip $ip --k3s-channel $channel --k3s-extra-args "${k3s_opts} ${k3s_node_ip_opts}" --ssh-key $ssh_private_key fi ) & From 05f6b3873de0d72305e3153c6f9f84b74c6df7b2 Mon Sep 17 00:00:00 2001 From: Sam Day Date: Thu, 6 Apr 2023 13:27:21 +0200 Subject: [PATCH 09/14] ongoing e2e test cleanup --- .github/workflows/test_e2e.yml | 1 - hack/dev-up.sh | 2 +- tests/e2e/e2e_test.go | 20 +++++++++-- tests/e2e/testing.go | 61 ++++++++++------------------------ 4 files changed, 36 insertions(+), 48 deletions(-) diff --git a/.github/workflows/test_e2e.yml b/.github/workflows/test_e2e.yml index fb9686802..08672919e 100644 --- a/.github/workflows/test_e2e.yml +++ b/.github/workflows/test_e2e.yml @@ -45,7 +45,6 @@ jobs: env: K3S_CHANNEL: ${{ matrix.k3s }} SCOPE: gha-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.k3s }} - USE_NETWORKS: yes run: | curl -sLS https://get.k3sup.dev | sh diff --git a/hack/dev-up.sh b/hack/dev-up.sh index 54be66705..3910ea647 100755 --- a/hack/dev-up.sh +++ b/hack/dev-up.sh @@ -37,7 +37,7 @@ if [[ -n "${DEBUG:-}" ]]; then set -x; fi k3s_opts=${K3S_OPTS:-"--kubelet-arg cloud-provider=external --disable=traefik --disable=servicelb --flannel-backend=none --disable=local-storage"} k3s_server_opts=${K3S_SERVER_OPTS:-"--disable-cloud-controller --cluster-cidr ${cluster_cidr}"} - echo "$HCLOUD_TOKEN" > "$SCRIPT_DIR/.token-$scope" + echo -n "$HCLOUD_TOKEN" > "$SCRIPT_DIR/.token-$scope" export KUBECONFIG="$SCRIPT_DIR/.kubeconfig-$scope" diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index 64e60e191..97406decd 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -5,7 +5,6 @@ import ( "fmt" "math/rand" "os" - "strconv" "strings" "testing" @@ -36,6 +35,8 @@ func TestMain(m *testing.M) { } func TestCloudControllerManagerPodIsPresent(t *testing.T) { + t.Parallel() + t.Run("hcloud-cloud-controller-manager pod is present in kube-system", func(t *testing.T) { pods, err := testCluster.k8sClient.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{}) assert.NoError(t, err) @@ -66,6 +67,8 @@ func TestCloudControllerManagerPodIsPresent(t *testing.T) { } func TestCloudControllerManagerSetCorrectNodeLabelsAndIPAddresses(t *testing.T) { + t.Parallel() + node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), "hccm-"+testCluster.scope+"-1", metav1.GetOptions{}) assert.NoError(t, err) @@ -109,11 +112,12 @@ func TestCloudControllerManagerSetCorrectNodeLabelsAndIPAddresses(t *testing.T) } func TestCloudControllerManagerLoadBalancersMinimalSetup(t *testing.T) { + t.Parallel() + lbTest := lbTestHelper{ t: t, K8sClient: testCluster.k8sClient, podName: "loadbalancer-minimal", - namespace: "hccm-test-" + strconv.Itoa(rand.Int()), } pod := lbTest.DeployTestPod() @@ -133,6 +137,9 @@ func TestCloudControllerManagerLoadBalancersMinimalSetup(t *testing.T) { } func TestCloudControllerManagerLoadBalancersHTTPS(t *testing.T) { + t.Parallel() + + cert := testCluster.CreateTLSCertificate(t, "loadbalancer-https") lbTest := lbTestHelper{ t: t, @@ -166,6 +173,9 @@ func TestCloudControllerManagerLoadBalancersHTTPS(t *testing.T) { } func TestCloudControllerManagerLoadBalancersHTTPSWithManagedCertificate(t *testing.T) { + t.Parallel() + + domainName := fmt.Sprintf("%d-ccm-test.hc-certs.de", rand.Int()) lbTest := lbTestHelper{ t: t, @@ -202,6 +212,8 @@ func TestCloudControllerManagerLoadBalancersHTTPSWithManagedCertificate(t *testi } func TestCloudControllerManagerLoadBalancersWithPrivateNetwork(t *testing.T) { + t.Parallel() + lbTest := lbTestHelper{t: t, K8sClient: testCluster.k8sClient, podName: "loadbalancer-private-network"} pod := lbTest.DeployTestPod() @@ -223,6 +235,8 @@ func TestCloudControllerManagerLoadBalancersWithPrivateNetwork(t *testing.T) { } func TestCloudControllerManagerNetworksPodIPsAreAccessible(t *testing.T) { + t.Parallel() + node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), "hccm-"+testCluster.scope+"-1", metav1.GetOptions{}) if err != nil { t.Error(err) @@ -230,7 +244,7 @@ func TestCloudControllerManagerNetworksPodIPsAreAccessible(t *testing.T) { network, _, err := testCluster.hcloud.Network.Get(context.TODO(), "hccm-"+testCluster.scope) if err != nil { - t.Error(err) + t.Fatal(err) } for _, subnet := range network.Subnets { if subnet.IPRange.String() == node.Spec.PodCIDR { diff --git a/tests/e2e/testing.go b/tests/e2e/testing.go index a62daed5d..8665e0f31 100644 --- a/tests/e2e/testing.go +++ b/tests/e2e/testing.go @@ -8,6 +8,7 @@ import ( "net/http" "os" "regexp" + "strconv" "testing" "time" @@ -63,7 +64,7 @@ func (tc *TestCluster) Start() error { hcloudClient := hcloud.NewClient(opts...) tc.hcloud = hcloudClient - err := os.Setenv("KUBECONFIG", "../../hack/.kubeconfig-" + tc.scope) + err := os.Setenv("KUBECONFIG", "../../hack/.kubeconfig-"+tc.scope) if err != nil { return err } @@ -101,7 +102,6 @@ func (tc *TestCluster) Stop() error { // The baseName of the certificate gets a random number suffix attached. // baseName and suffix are separated by a single "-" character. func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hcloud.Certificate { - const op = "e2e/TestCluster.CreateTLSCertificate" rndInt := rng.Int() name := fmt.Sprintf("%s-%d", baseName, rndInt) @@ -114,10 +114,10 @@ func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hclo } cert, _, err := tc.hcloud.Certificate.Create(context.Background(), opts) if err != nil { - t.Fatalf("%s: %s: %v", op, name, err) + t.Fatalf("%s: %v", name, err) } if cert == nil { - t.Fatalf("%s: no certificate created", op) + t.Fatalf("no certificate created") } tc.certificates = append(tc.certificates, cert) @@ -136,8 +136,9 @@ type lbTestHelper struct { // DeployTestPod deploys a basic nginx pod within the k8s cluster // and waits until it is "ready". func (l *lbTestHelper) DeployTestPod() *corev1.Pod { - const op = "lbTestHelper/DeployTestPod" - + if l.namespace == "" { + l.namespace = "hccm-test-" + strconv.Itoa(rand.Int()) + } _, err := l.K8sClient.CoreV1().Namespaces().Create(context.Background(), &corev1.Namespace{ ObjectMeta: metav1.ObjectMeta{ Name: l.namespace, @@ -173,7 +174,7 @@ func (l *lbTestHelper) DeployTestPod() *corev1.Pod { pod, err := l.K8sClient.CoreV1().Pods(l.namespace).Create(context.Background(), &testPod, metav1.CreateOptions{}) if err != nil { - l.t.Fatalf("%s: could not create test pod: %s", op, err) + l.t.Fatalf("could not create test pod: %s", err) } err = wait.Poll(1*time.Second, 1*time.Minute, func() (done bool, err error) { p, err := l.K8sClient.CoreV1().Pods(l.namespace).Get(context.Background(), podName, metav1.GetOptions{}) @@ -189,7 +190,7 @@ func (l *lbTestHelper) DeployTestPod() *corev1.Pod { return false, nil }) if err != nil { - l.t.Fatalf("%s: pod %s did not come up after 1 minute: %s", op, podName, err) + l.t.Fatalf("pod %s did not come up after 1 minute: %s", podName, err) } return pod } @@ -226,7 +227,6 @@ func (l *lbTestHelper) ServiceDefinition(pod *corev1.Pod, annotations map[string // CreateService creates a k8s service based on the given service definition // and waits until it is "ready". func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, error) { - const op = "lbTestHelper/CreateService" // Default is 15s interval, 10s timeout, 3 retries => 45 seconds until up // With these changes it should be 1 seconds until up @@ -237,7 +237,7 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er _, err := l.K8sClient.CoreV1().Services(l.namespace).Create(context.Background(), lbSvc, metav1.CreateOptions{}) if err != nil { - return nil, fmt.Errorf("%s: could not create service: %s", op, err) + return nil, fmt.Errorf("could not create service: %s", err) } err = wait.Poll(1*time.Second, 5*time.Minute, func() (done bool, err error) { @@ -253,52 +253,28 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er return false, nil }) if err != nil { - return nil, fmt.Errorf("%s: test service (load balancer) did not come up after 5 minute: %s", op, err) + return nil, fmt.Errorf("test service (load balancer) did not come up after 5 minute: %s", err) } return lbSvc, nil } // TearDown deletes the created pod and service. func (l *lbTestHelper) TearDown() { - const op = "lbTestHelper/TearDown" - svcName := fmt.Sprintf("svc-%s", l.podName) err := l.K8sClient.CoreV1().Services(l.namespace).Delete(context.Background(), svcName, metav1.DeleteOptions{}) if err != nil && !k8serrors.IsNotFound(err) { - l.t.Errorf("%s: deleting test svc failed: %s", op, err) - } - - err = wait.Poll(1*time.Second, 3*time.Minute, func() (done bool, err error) { - _, err = l.K8sClient.CoreV1().Services(l.namespace).Get(context.Background(), svcName, metav1.GetOptions{}) - if err != nil { - if k8serrors.IsNotFound(err) { - return true, nil - } - return false, err - } - return false, nil - }) - if err != nil { - l.t.Errorf("%s: test service was not removed after 3 minutes: %s", op, err) + l.t.Errorf("deleting test svc failed: %s", err) } - podName := fmt.Sprintf("pod-%s", l.podName) - err = l.K8sClient.CoreV1().Pods(l.namespace).Delete(context.Background(), podName, metav1.DeleteOptions{}) - if err != nil && !k8serrors.IsNotFound(err) { - l.t.Errorf("%s: deleting test pod failed: %s", op, err) - } err = wait.Poll(1*time.Second, 3*time.Minute, func() (done bool, err error) { - _, err = l.K8sClient.CoreV1().Pods(l.namespace).Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - if k8serrors.IsNotFound(err) { - return true, nil - } + err = l.K8sClient.CoreV1().Namespaces().Delete(context.Background(), l.namespace, metav1.DeleteOptions{}) + if err != nil && !k8serrors.IsNotFound(err) { return false, err } - return false, nil + return true, nil }) if err != nil { - l.t.Errorf("%s: test pod not removed after 3 minutes: %s", op, err) + panic(err) } } @@ -306,7 +282,6 @@ func (l *lbTestHelper) TearDown() { // It tries it for 2 minutes, if after two minutes the connection // wasn't successful and it wasn't a HTTP 200 response it will fail func WaitForHTTPAvailable(t *testing.T, ingressIP string, useHTTPS bool) { - const op = "e2e/WaitForHTTPAvailable" client := &http.Client{ Timeout: 1 * time.Second, @@ -335,10 +310,10 @@ func WaitForHTTPAvailable(t *testing.T, ingressIP string, useHTTPS bool) { // Health checks are still evaluating return false, nil default: - return false, fmt.Errorf("%s: got HTTP Code %d instead of 200", op, resp.StatusCode) + return false, fmt.Errorf("got HTTP Code %d instead of 200", resp.StatusCode) } }) if err != nil { - t.Errorf("%s: not available via client.Get: %s", op, err) + t.Errorf("not available via client.Get: %s", err) } } From f2917d87d54f4c3feab304c42425ddb3d4dc8d68 Mon Sep 17 00:00:00 2001 From: Sam Day Date: Thu, 6 Apr 2023 13:37:53 +0200 Subject: [PATCH 10/14] lint, e2e --- tests/e2e/e2e_test.go | 13 ++----------- tests/e2e/testing.go | 10 ++-------- 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index 97406decd..ffa8aa4d4 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -139,7 +139,6 @@ func TestCloudControllerManagerLoadBalancersMinimalSetup(t *testing.T) { func TestCloudControllerManagerLoadBalancersHTTPS(t *testing.T) { t.Parallel() - cert := testCluster.CreateTLSCertificate(t, "loadbalancer-https") lbTest := lbTestHelper{ t: t, @@ -161,13 +160,7 @@ func TestCloudControllerManagerLoadBalancersHTTPS(t *testing.T) { t.Fatalf("deploying test svc: %s", err) } - ingressIP := lbSvc.Status.LoadBalancer.Ingress[0].IP // Index 0 is always the public IP of the LB - WaitForHTTPAvailable(t, ingressIP, true) - - // TODO: - //for _, ing := range lbSvc.Status.LoadBalancer.Ingress { - // WaitForHTTPOnServer(t, testCluster.setup.ExtServer, testCluster.setup.privKey, ing.IP, true) - //} + WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, true) lbTest.TearDown() } @@ -175,7 +168,6 @@ func TestCloudControllerManagerLoadBalancersHTTPS(t *testing.T) { func TestCloudControllerManagerLoadBalancersHTTPSWithManagedCertificate(t *testing.T) { t.Parallel() - domainName := fmt.Sprintf("%d-ccm-test.hc-certs.de", rand.Int()) lbTest := lbTestHelper{ t: t, @@ -228,8 +220,7 @@ func TestCloudControllerManagerLoadBalancersWithPrivateNetwork(t *testing.T) { t.Fatalf("deploying test svc: %s", err) } - ingressIP := lbSvc.Status.LoadBalancer.Ingress[0].IP // Index 0 is always the public IP of the LB - WaitForHTTPAvailable(t, ingressIP, false) + WaitForHTTPAvailable(t, lbSvc.Status.LoadBalancer.Ingress[0].IP, false) lbTest.TearDown() } diff --git a/tests/e2e/testing.go b/tests/e2e/testing.go index 8665e0f31..66841a21e 100644 --- a/tests/e2e/testing.go +++ b/tests/e2e/testing.go @@ -260,14 +260,8 @@ func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, er // TearDown deletes the created pod and service. func (l *lbTestHelper) TearDown() { - svcName := fmt.Sprintf("svc-%s", l.podName) - err := l.K8sClient.CoreV1().Services(l.namespace).Delete(context.Background(), svcName, metav1.DeleteOptions{}) - if err != nil && !k8serrors.IsNotFound(err) { - l.t.Errorf("deleting test svc failed: %s", err) - } - - err = wait.Poll(1*time.Second, 3*time.Minute, func() (done bool, err error) { - err = l.K8sClient.CoreV1().Namespaces().Delete(context.Background(), l.namespace, metav1.DeleteOptions{}) + err := wait.Poll(1*time.Second, 3*time.Minute, func() (bool, error) { + err := l.K8sClient.CoreV1().Namespaces().Delete(context.Background(), l.namespace, metav1.DeleteOptions{}) if err != nil && !k8serrors.IsNotFound(err) { return false, err } From 3b135262680da4c07804c8c3094c047c12f79dec Mon Sep 17 00:00:00 2001 From: Sam Day Date: Thu, 6 Apr 2023 13:44:27 +0200 Subject: [PATCH 11/14] more lint --- tests/e2e/testing.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/e2e/testing.go b/tests/e2e/testing.go index 66841a21e..2edf55f89 100644 --- a/tests/e2e/testing.go +++ b/tests/e2e/testing.go @@ -102,7 +102,6 @@ func (tc *TestCluster) Stop() error { // The baseName of the certificate gets a random number suffix attached. // baseName and suffix are separated by a single "-" character. func (tc *TestCluster) CreateTLSCertificate(t *testing.T, baseName string) *hcloud.Certificate { - rndInt := rng.Int() name := fmt.Sprintf("%s-%d", baseName, rndInt) @@ -195,7 +194,7 @@ func (l *lbTestHelper) DeployTestPod() *corev1.Pod { return pod } -// ServiceDefinition returns a service definition for a Hetzner Cloud Load Balancer (k8s service) +// ServiceDefinition returns a service definition for a Hetzner Cloud Load Balancer (k8s service). func (l *lbTestHelper) ServiceDefinition(pod *corev1.Pod, annotations map[string]string) *corev1.Service { port := l.port if port == 0 { @@ -227,7 +226,6 @@ func (l *lbTestHelper) ServiceDefinition(pod *corev1.Pod, annotations map[string // CreateService creates a k8s service based on the given service definition // and waits until it is "ready". func (l *lbTestHelper) CreateService(lbSvc *corev1.Service) (*corev1.Service, error) { - // Default is 15s interval, 10s timeout, 3 retries => 45 seconds until up // With these changes it should be 1 seconds until up // lbSvc.Annotations[string(annotation.LBSvcHealthCheckInterval)] = "1s" @@ -274,9 +272,8 @@ func (l *lbTestHelper) TearDown() { // WaitForHTTPAvailable tries to connect to the given IP via http // It tries it for 2 minutes, if after two minutes the connection -// wasn't successful and it wasn't a HTTP 200 response it will fail +// wasn't successful and it wasn't a HTTP 200 response it will fail. func WaitForHTTPAvailable(t *testing.T, ingressIP string, useHTTPS bool) { - client := &http.Client{ Timeout: 1 * time.Second, Transport: &http.Transport{ From b302eefadb2354215ebbac88cbe459ad42bf959d Mon Sep 17 00:00:00 2001 From: Sam Day Date: Thu, 6 Apr 2023 15:05:14 +0200 Subject: [PATCH 12/14] e2e passing --- hack/dev-up.sh | 5 ++++- tests/e2e/e2e_test.go | 34 +++++++++++++++++++++++----------- tests/e2e/testing.go | 4 ++-- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/hack/dev-up.sh b/hack/dev-up.sh index 3910ea647..8b99873ba 100755 --- a/hack/dev-up.sh +++ b/hack/dev-up.sh @@ -142,7 +142,10 @@ if [[ -n "${DEBUG:-}" ]]; then set -x; fi # Install Cilium. ( trap error ERR if ! helm status -n kube-system cilium >/dev/null 2>&1; then - helm install cilium cilium --repo https://helm.cilium.io/ -n kube-system --version 1.13.1 --set tunnel=disabled --set ipv4NativeRoutingCIDR=$cluster_cidr + helm install cilium cilium --repo https://helm.cilium.io/ -n kube-system --version 1.13.1 \ + --set tunnel=disabled \ + --set ipv4NativeRoutingCIDR=$cluster_cidr \ + --set ipam.mode=kubernetes fi) & # Create HCLOUD_TOKEN Secret for hcloud-cloud-controller-manager. diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index ffa8aa4d4..603079300 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -3,10 +3,12 @@ package e2e import ( "context" "fmt" + "k8s.io/apimachinery/pkg/util/wait" "math/rand" "os" "strings" "testing" + "time" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" @@ -228,19 +230,29 @@ func TestCloudControllerManagerLoadBalancersWithPrivateNetwork(t *testing.T) { func TestCloudControllerManagerNetworksPodIPsAreAccessible(t *testing.T) { t.Parallel() - node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), "hccm-"+testCluster.scope+"-1", metav1.GetOptions{}) - if err != nil { - t.Error(err) - } + err := wait.Poll(1*time.Second, 2*time.Minute, func() (bool, error) { + node, err := testCluster.k8sClient.CoreV1().Nodes().Get(context.Background(), "hccm-"+testCluster.scope+"-1", metav1.GetOptions{}) + if err != nil { + return false, err + } - network, _, err := testCluster.hcloud.Network.Get(context.TODO(), "hccm-"+testCluster.scope) + network, _, err := testCluster.hcloud.Network.Get(context.TODO(), "hccm-"+testCluster.scope) + if err != nil { + return false, err + } + for _, route := range network.Routes { + if route.Destination.String() == node.Spec.PodCIDR { + for _, a := range node.Status.Addresses { + if a.Type == corev1.NodeInternalIP { + assert.Equal(t, a.Address, route.Gateway.String()) + } + } + return true, nil + } + } + return false, nil + }) if err != nil { t.Fatal(err) } - for _, subnet := range network.Subnets { - if subnet.IPRange.String() == node.Spec.PodCIDR { - return - } - } - t.Fatal("did not find expected route table entry") } diff --git a/tests/e2e/testing.go b/tests/e2e/testing.go index 2edf55f89..7128a18fc 100644 --- a/tests/e2e/testing.go +++ b/tests/e2e/testing.go @@ -263,7 +263,7 @@ func (l *lbTestHelper) TearDown() { if err != nil && !k8serrors.IsNotFound(err) { return false, err } - return true, nil + return k8serrors.IsNotFound(err), nil }) if err != nil { panic(err) @@ -305,6 +305,6 @@ func WaitForHTTPAvailable(t *testing.T, ingressIP string, useHTTPS bool) { } }) if err != nil { - t.Errorf("not available via client.Get: %s", err) + t.Errorf("%s not available: %s", ingressIP, err) } } From c6b13e62ac097b530328ceb4031f3baac5b4ae98 Mon Sep 17 00:00:00 2001 From: Sam Day Date: Thu, 6 Apr 2023 15:07:35 +0200 Subject: [PATCH 13/14] lint --- tests/e2e/e2e_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index 603079300..5b66e7e5c 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -3,13 +3,14 @@ package e2e import ( "context" "fmt" - "k8s.io/apimachinery/pkg/util/wait" "math/rand" "os" "strings" "testing" "time" + "k8s.io/apimachinery/pkg/util/wait" + "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" From ebefea280f4fbe11bd7984428e2855385f037e9d Mon Sep 17 00:00:00 2001 From: Sam Day Date: Thu, 6 Apr 2023 15:15:19 +0200 Subject: [PATCH 14/14] gci --- scripts/e2etest-local.sh | 43 ---------------------------------------- tests/e2e/e2e_test.go | 3 +-- tests/e2e/testing.go | 5 +++-- 3 files changed, 4 insertions(+), 47 deletions(-) delete mode 100755 scripts/e2etest-local.sh diff --git a/scripts/e2etest-local.sh b/scripts/e2etest-local.sh deleted file mode 100755 index 7a05cdca7..000000000 --- a/scripts/e2etest-local.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash - -set -e - -function test_k8s_version() { - if [[ -z "$1" ]]; then - echo "Usage: $0 " - return 1 - fi - - export K8S_VERSION="$1" - - echo "Testing $K8S_VERSION without network support" - export USE_NETWORKS="no" - if ! go test -count=1 -v -timeout 60m ./e2etests; then - return 2 - fi - - echo - echo - echo "Testing $K8S_VERSION with network support" - export USE_NETWORKS="yes" - if ! go test -count=1 -v -timeout 60m ./e2etests; then - return 2 - fi -} - -if [[ -z "$HCLOUD_TOKEN" ]]; then - echo "HCLOUD_TOKEN not set! Aborting tests." - exit 1 -fi - -K8S_VERSIONS=( - "k8s-1.20.12" - "k3s-v1.20.12+k3s1" - "k8s-1.21.6" - "k3s-v1.21.6+k3s1" - "k8s-1.22.3" - "k3s-v1.22.3+k3s1" -) -for v in "${K8S_VERSIONS[@]}"; do - test_k8s_version "$v" -done diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index 5b66e7e5c..aece4b404 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -9,11 +9,10 @@ import ( "testing" "time" - "k8s.io/apimachinery/pkg/util/wait" - "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/annotation" "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/hcops" diff --git a/tests/e2e/testing.go b/tests/e2e/testing.go index 7128a18fc..9787fd162 100644 --- a/tests/e2e/testing.go +++ b/tests/e2e/testing.go @@ -12,8 +12,6 @@ import ( "testing" "time" - "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/testsupport" - "github.com/hetznercloud/hcloud-go/hcloud" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -21,6 +19,9 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/clientcmd" + + "github.com/hetznercloud/hcloud-cloud-controller-manager/internal/testsupport" + "github.com/hetznercloud/hcloud-go/hcloud" ) var rng *rand.Rand