From fd93df9a90e0b3b8dd413c72e55fc2377adef38e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20T=C3=B6lle?= Date: Thu, 6 Apr 2023 12:50:18 +0200 Subject: [PATCH] feat: new dev/test environment This PR simplifies and unifies the process of bringing up a hcloud k8s environment that is suitable for development/testing purposes. It builds off the work originally started in hetznercloud/csi-driver#226 The central script is `hack/dev-up.sh`. The script requires a `HCLOUD_TOKEN`, and will build a 1+ k8s cluster using `k3sup` + `hcloud` CLI tools. It's typically quite fast: a cold execution of the script should take less <1min before a fully operational cluster is ready for use. The `dev-down.sh` script will delete all resources created by `dev-up.sh`. One of the primary objectives in this work was to simplify the e2e testing process. Skaffold is plumbed into the e2e test pipelines on GitHub. This way, the process of building and deploying a test build of csi-driver to a cluster is unified for development and test. Once this work has landed and stabilized here, we expect to package it up a bit further and use it in csi-driver and anywhere else we operate Kubernetes integrations that need automated testing. Co-authored-by: Sam Day --- .dockerignore | 25 +- .github/workflows/test_e2e.yml | 44 +- .gitignore | 3 + e2etests/.gitignore | 5 +- e2etests/e2e_test.go | 48 -- e2etests/run-e2e-tests.sh | 30 + e2etests/setup.go | 564 ------------------ e2etests/templates/cloudinit_k8s.txt.tpl | 66 -- .../1.23.yml => testdriver-1.23.yaml} | 0 e2etests/testing.go | 242 -------- hack/dev-down.sh | 37 ++ hack/dev-up.sh | 173 ++++++ hack/k3s-registries.yaml | 3 + hack/registry-port-forward.sh | 16 + script/e2etest-local.sh | 28 - skaffold.yaml | 8 +- 16 files changed, 310 insertions(+), 982 deletions(-) create mode 100644 .gitignore delete mode 100644 e2etests/e2e_test.go create mode 100755 e2etests/run-e2e-tests.sh delete mode 100644 e2etests/setup.go delete mode 100644 e2etests/templates/cloudinit_k8s.txt.tpl rename e2etests/{templates/testdrivers/1.23.yml => testdriver-1.23.yaml} (100%) delete mode 100644 e2etests/testing.go create mode 100755 hack/dev-down.sh create mode 100755 hack/dev-up.sh create mode 100644 hack/k3s-registries.yaml create mode 100755 hack/registry-port-forward.sh delete mode 100755 script/e2etest-local.sh diff --git a/.dockerignore b/.dockerignore index 41ae2653..9b08426b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,15 +1,10 @@ -.dockerignore -Dockerfile -.git/ -.github/ -.gitlab-ci.yml -.idea/ -CHANGES.md -LICENSE -README.md -e2etests/ -deploy/ -mock/ -script/ -**/*_test.go -kustomization.yaml +* +!api/ +!app/ +!cmd/ +!csi/ +!driver/ +!metrics/ +!volumes/ +!go.mod +!go.sum diff --git a/.github/workflows/test_e2e.yml b/.github/workflows/test_e2e.yml index c6750960..2e6ca588 100644 --- a/.github/workflows/test_e2e.yml +++ b/.github/workflows/test_e2e.yml @@ -9,8 +9,14 @@ jobs: # causes all other currently running jobs to abort and all need to be restarted. fail-fast: false matrix: - k8s: [ k8s-1.23.15, k8s-1.24.9, k8s-1.25.5, k8s-1.26.0 ] - name: k8s ${{ matrix.k8s }} + include: + - k3s: v1.24 + k8s-test: v1.24.12 + - k3s: v1.25 + k8s-test: v1.25.8 + - k3s: v1.26 + k8s-test: v1.26.3 + name: k3s ${{ matrix.k3s }} steps: - uses: actions/setup-go@v2 with: @@ -34,17 +40,31 @@ jobs: echo "::error ::Couldn't determine HCLOUD_TOKEN. Check that repository secrets are setup correctly." exit 1 fi - - uses: actions/cache@v2 + + - uses: 3bit/setup-hcloud@v2 + - uses: yokawasa/action-setup-kube-tools@v0.9.2 with: - path: | - ~/go/pkg/mod - ~/.cache/go-build - key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go- + setup-tools: | + helm + kubectl + skaffold + helm: v3.11.2 + kubectl: v1.26.3 + skaffold: v2.3.0 + - name: Run tests env: - K8S_VERSION: ${{ matrix.k8s }} + K3S_CHANNEL: ${{ matrix.k3s }} + K8S_TEST_VERSION: ${{ matrix.k8s-test }} + SCOPE: gha-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.k3s }} run: | - go test $(go list ./... | grep e2etests) -v -timeout 60m - ./script/delete-token.sh $HCLOUD_TOKEN + curl -sLS https://get.k3sup.dev | sh + + trap "hack/dev-down.sh; ./script/delete-token.sh $HCLOUD_TOKEN" EXIT + source <(hack/dev-up.sh) + + skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" + tag=$(skaffold build --tag="e2e-${GITHUB_RUN_ID}-${GITHUB_RUN_NUMBER}" --quiet --output="{{ (index .Builds 0).Tag }}") + skaffold deploy --images=hetznercloud/hcloud-csi-driver=$tag + + e2etests/run-e2e-tests.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..ffe7c94e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +# rootfs for docker plugin +deploy/docker-swarm/pkg/plugin +hack/.* diff --git a/e2etests/.gitignore b/e2etests/.gitignore index 8dfc4ceb..a459cf81 100644 --- a/e2etests/.gitignore +++ b/e2etests/.gitignore @@ -1,4 +1 @@ -ci-hcloud-csi-driver.tar -ssh_key -kubeconfig -join.txt +test-binaries/ \ No newline at end of file diff --git a/e2etests/e2e_test.go b/e2etests/e2e_test.go deleted file mode 100644 index e61cf99f..00000000 --- a/e2etests/e2e_test.go +++ /dev/null @@ -1,48 +0,0 @@ -package e2etests - -import ( - "fmt" - "os" - "testing" -) - -var testCluster TestCluster - -func TestMain(m *testing.M) { - if err := testCluster.Start(); err != nil { - fmt.Printf("%v\n", err) - os.Exit(1) - } - - rc := m.Run() - - if err := testCluster.Stop(rc > 0); err != nil { - fmt.Printf("%v\n", err) - os.Exit(1) - } - os.Exit(rc) -} - -func TestOfficialTestsuite(t *testing.T) { - // The e2e tests are a bit flaky, and at the moment in ~1/3 of the runs a test fails, causing the whole pipeline to - // fail. As ,the e2e tests take 15-20 minutes each, this is quite annoying. By setting -flakeAttempts=2, the pipeline - // will immediately retry any failed tests. - t.Run("parallel tests", func(t *testing.T) { - err := RunCommandVisibleOnServer(testCluster.setup.privKey, testCluster.setup.MainNode, "KUBECONFIG=/root/.kube/config ./ginkgo -nodes=6 -flakeAttempts=2 -v -focus='External.Storage' -skip='\\[Feature:|\\[Disruptive\\]|\\[Serial\\]' ./e2e.test -- -storage.testdriver=test-driver.yml") - if err != nil { - t.Error(err) - } - }) - t.Run("serial tests", func(t *testing.T) { - // Tests tagged as "Feature:SELinuxMountReadWriteOncePod" were added in - // Kubernetes v1.26, and fail for us because we do not support the - // SINGLE_NODE_MULTI_WRITER Capability (equivalent to ReadWriteOncePod - // Volume Access Mode in Kubernetes). - // This feature is being tracked in https://github.com/hetznercloud/csi-driver/issues/327 - // and we should add the tests once we have implemented the capability. - err := RunCommandVisibleOnServer(testCluster.setup.privKey, testCluster.setup.MainNode, "KUBECONFIG=/root/.kube/config ./ginkgo -flakeAttempts=2 -v -focus='External.Storage.*(\\[Feature:|\\[Serial\\])' -skip='\\[Feature:SELinuxMountReadWriteOncePod\\]' ./e2e.test -- -storage.testdriver=test-driver.yml") - if err != nil { - t.Error(err) - } - }) -} diff --git a/e2etests/run-e2e-tests.sh b/e2etests/run-e2e-tests.sh new file mode 100755 index 00000000..ff4f66a8 --- /dev/null +++ b/e2etests/run-e2e-tests.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -uex -o pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +k8s_test_version="${K8S_TEST_VERSION:-v1.26.3}" + +mkdir -p "${SCRIPT_DIR}/test-binaries" +# TODO: Read linux-amd64 from env +curl --location "https://dl.k8s.io/${k8s_test_version}/kubernetes-test-linux-amd64.tar.gz" | \ + tar --strip-components=3 -C "${SCRIPT_DIR}/test-binaries" -zxf - kubernetes/test/bin/e2e.test kubernetes/test/bin/ginkgo + +ginkgo="${SCRIPT_DIR}/test-binaries/ginkgo" +ginkgo_flags="-v --flakeAttempts=2" + +e2e="${SCRIPT_DIR}/test-binaries/e2e.test" +e2e_flags="-storage.testdriver=${SCRIPT_DIR}/testdriver-1.23.yaml" + +echo "Executing parallel tests" +${ginkgo} ${ginkgo_flags} \ + -nodes=6 \ + -focus='External.Storage' \ + -skip='\[Feature:|\[Disruptive\]|\[Serial\]' \ + "${e2e}" -- ${e2e_flags} + +echo "Executing serial tests" +${ginkgo} ${ginkgo_flags} \ + -focus='External.Storage.*(\[Feature:|\[Serial\])' \ + -skip='\[Feature:SELinuxMountReadWriteOncePod\]' \ + "${e2e}" -- ${e2e_flags} + diff --git a/e2etests/setup.go b/e2etests/setup.go deleted file mode 100644 index 24ac4553..00000000 --- a/e2etests/setup.go +++ /dev/null @@ -1,564 +0,0 @@ -package e2etests - -import ( - "bytes" - "context" - "crypto/rand" - "crypto/rsa" - "crypto/x509" - "encoding/pem" - "fmt" - "html/template" - "io" - "io/ioutil" - "net" - "os" - "os/exec" - "strings" - "sync" - "time" - - "github.com/hetznercloud/hcloud-go/hcloud" - "golang.org/x/crypto/ssh" -) - -type K8sDistribution string - -const ( - K8sDistributionK8s K8sDistribution = "k8s" - K8sDistributionK3s K8sDistribution = "k3s" - TestDriverFilePath = "templates/testdrivers/1.23.yml" -) - -var instanceType = "cpx21" - -type hcloudK8sSetup struct { - Hcloud *hcloud.Client - HcloudToken string - K8sVersion string - K8sDistribution K8sDistribution - TestIdentifier string - ImageName string - KeepOnFailure bool - MainNode *hcloud.Server - WorkerNodes []*hcloud.Server - privKey string - sshKey *hcloud.SSHKey - clusterJoinCMD string - testLabels map[string]string -} - -type cloudInitTmpl struct { - K8sVersion string - HcloudToken string - IsClusterServer bool - JoinCMD string -} - -// PrepareTestEnv setups a test environment for the CSI Driver -// This includes the creation of a SSH Key, a "Cluster Node" and a defined amount of Worker nodes -// The servers will be created with a Cloud Init UserData -// The template can be found under e2etests/templates/cloudinit_.txt.tpl -func (s *hcloudK8sSetup) PrepareTestEnv(ctx context.Context, additionalSSHKeys []*hcloud.SSHKey) error { - const op = "hcloudK8sSetup/PrepareTestEnv" - - s.testLabels = map[string]string{"K8sDistribution": string(s.K8sDistribution), "K8sVersion": strings.ReplaceAll(s.K8sVersion, "+", ""), "test": s.TestIdentifier} - err := s.getSSHKey(ctx) - if err != nil { - return fmt.Errorf("%s getSSHKey: %s", op, err) - } - - srv, err := s.createClusterServer(ctx, "cluster-node", instanceType, additionalSSHKeys) - if err != nil { - return fmt.Errorf("%s: create cluster node: %v", op, err) - } - s.MainNode = srv - - s.waitUntilSSHable(s.MainNode) - - err = s.waitForCloudInit(s.MainNode) - if err != nil { - return err - } - - joinCmd, err := s.getJoinCmd() - if err != nil { - return err - } - s.clusterJoinCMD = joinCmd - - err = s.transferDockerImage(s.MainNode) - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - - fmt.Printf("[cluster-node] %s Load Image:\n", op) - transferCmd := "ctr -n=k8s.io image import ci-hcloud-csi-driver.tar" - err = RunCommandOnServer(s.privKey, s.MainNode, transferCmd) - if err != nil { - return fmt.Errorf("%s: Load image %s", op, err) - } - - var workers = 3 // Change this value if you want to have more workers for the test - var wg sync.WaitGroup - errs := make(chan error, workers) - for worker := 1; worker <= workers; worker++ { - wg.Add(1) - go func(worker int) { - err = s.createClusterWorker(ctx, additionalSSHKeys, &wg, worker) - if err != nil { - errs <- err - } - }(worker) - } - wg.Wait() - close(errs) - - // Return first error that happened - err = <-errs - if err != nil { - return err - } - - return nil -} - -func (s *hcloudK8sSetup) createClusterWorker(ctx context.Context, additionalSSHKeys []*hcloud.SSHKey, wg *sync.WaitGroup, worker int) error { - const op = "hcloudK8sSetup/createClusterWorker" - defer wg.Done() - - workerName := fmt.Sprintf("cluster-worker-%d", worker) - fmt.Printf("[%s] %s Create worker node:\n", workerName, op) - - userData, err := s.getCloudInitConfig(false) - if err != nil { - return fmt.Errorf("[%s] %s getCloudInitConfig: %w", workerName, op, err) - } - srv, err := s.createServer(ctx, workerName, instanceType, additionalSSHKeys, err, userData) - if err != nil { - return fmt.Errorf("[%s] %s createServer: %w", workerName, op, err) - } - s.WorkerNodes = append(s.WorkerNodes, srv) - - s.waitUntilSSHable(srv) - - err = s.waitForCloudInit(srv) - if err != nil { - return fmt.Errorf("[%s] %s: wait for cloud init on worker: %w", srv.Name, op, err) - } - - err = s.transferDockerImage(srv) - if err != nil { - return fmt.Errorf("[%s] %s: transfer image on worker: %w", srv.Name, op, err) - } - - fmt.Printf("[%s] %s Load Image\n", srv.Name, op) - - transferCmd := "ctr -n=k8s.io image import ci-hcloud-csi-driver.tar" - - err = RunCommandOnServer(s.privKey, srv, transferCmd) - if err != nil { - return fmt.Errorf("[%s] %s: load image on worker: %w", srv.Name, op, err) - } - - return nil -} - -func (s *hcloudK8sSetup) waitUntilSSHable(server *hcloud.Server) { - const op = "hcloudK8sSetup/PrepareTestEnv" - fmt.Printf("[%s] %s: Waiting for server to be sshable:\n", server.Name, op) - for { - conn, err := net.Dial("tcp", fmt.Sprintf("%s:22", server.PublicNet.IPv4.IP.String())) - if err != nil { - time.Sleep(1 * time.Second) - continue - } - _ = conn.Close() - fmt.Printf("[%s] %s: SSH Connection successful\n", server.Name, op) - break - } -} - -func (s *hcloudK8sSetup) createClusterServer(ctx context.Context, name, typ string, additionalSSHKeys []*hcloud.SSHKey) (*hcloud.Server, error) { - const op = "e2etest/createClusterServer" - - userData, err := s.getCloudInitConfig(true) - if err != nil { - return nil, fmt.Errorf("%s getCloudInitConfig: %s", op, err) - } - srv, err := s.createServer(ctx, name, typ, additionalSSHKeys, err, userData) - if err != nil { - return nil, fmt.Errorf("%s createServer: %s", op, err) - } - return srv, nil -} - -func (s *hcloudK8sSetup) createServer(ctx context.Context, name string, typ string, additionalSSHKeys []*hcloud.SSHKey, err error, userData string) (*hcloud.Server, error) { - const op = "e2etest/createServer" - sshKeys := []*hcloud.SSHKey{s.sshKey} - for _, additionalSSHKey := range additionalSSHKeys { - sshKeys = append(sshKeys, additionalSSHKey) - } - - res, _, err := s.Hcloud.Server.Create(ctx, hcloud.ServerCreateOpts{ - Name: fmt.Sprintf("%s-%s", name, s.TestIdentifier), - ServerType: &hcloud.ServerType{Name: typ}, - Image: &hcloud.Image{Name: "ubuntu-20.04"}, - SSHKeys: sshKeys, - UserData: userData, - Labels: s.testLabels, - }) - if err != nil { - return nil, fmt.Errorf("%s Hcloud.Server.Create: %s", op, err) - } - - _, errCh := s.Hcloud.Action.WatchProgress(ctx, res.Action) - if err := <-errCh; err != nil { - return nil, fmt.Errorf("%s WatchProgress Action %s: %s", op, res.Action.Command, err) - } - - for _, nextAction := range res.NextActions { - _, errCh := s.Hcloud.Action.WatchProgress(ctx, nextAction) - if err := <-errCh; err != nil { - return nil, fmt.Errorf("%s WatchProgress NextAction %s: %s", op, nextAction.Command, err) - } - } - srv, _, err := s.Hcloud.Server.GetByID(ctx, res.Server.ID) - if err != nil { - return nil, fmt.Errorf("%s Hcloud.Server.GetByID: %s", op, err) - } - return srv, nil -} - -// PrepareK8s patches an existing kubernetes cluster with the correct -// CSI Driver version from this test run. -// This should only run on the cluster main node -func (s *hcloudK8sSetup) PrepareK8s() (string, error) { - const op = "hcloudK8sSetup/PrepareK8s" - - err := s.prepareCSIDriverDeploymentFile() - if err != nil { - return "", fmt.Errorf("%s: %s", op, err) - } - - fmt.Printf("[%s] %s: Apply csi-driver deployment\n", s.MainNode.Name, op) - err = RunCommandOnServer(s.privKey, s.MainNode, "KUBECONFIG=/root/.kube/config kubectl apply -f csi-driver.yml") - if err != nil { - return "", fmt.Errorf("%s Deploy csi: %s", op, err) - } - - patch := `{"spec":{"template":{"spec":{"containers":[{"name":"hcloud-csi-driver","env":[{"name":"LOG_LEVEL","value":"debug"}]}]}}}}` - fmt.Printf("[%s] %s: Patch deployment for debug logging\n", s.MainNode.Name, op) - err = RunCommandOnServer(s.privKey, s.MainNode, fmt.Sprintf("KUBECONFIG=/root/.kube/config kubectl patch deployment hcloud-csi-controller -n kube-system --patch '%s'", patch)) - if err != nil { - return "", fmt.Errorf("%s Patch Deployment: %s", op, err) - } - err = RunCommandOnServer(s.privKey, s.MainNode, fmt.Sprintf("KUBECONFIG=/root/.kube/config kubectl patch daemonset hcloud-csi-node -n kube-system --patch '%s'", patch)) - if err != nil { - return "", fmt.Errorf("%s Patch DaemonSet: %s", op, err) - } - - fmt.Printf("[%s] %s: Ensure Server is not labeled as master\n", s.MainNode.Name, op) - err = RunCommandOnServer(s.privKey, s.MainNode, "KUBECONFIG=/root/.kube/config kubectl label nodes --all node-role.kubernetes.io/master-") - if err != nil { - return "", fmt.Errorf("%s Ensure Server is not labeled as master: %s", op, err) - } - - fmt.Printf("[%s] %s: Read test-driver.yml configuration file\n", s.MainNode.Name, op) - testDriverFile, err := ioutil.ReadFile(TestDriverFilePath) - if err != nil { - return "", fmt.Errorf("%s read testdriverfile file: %s %v", op, TestDriverFilePath, err) - } - - fmt.Printf("[%s] %s: Transfer test-driver.yml configuration file\n", s.MainNode.Name, op) - err = RunCommandOnServer(s.privKey, s.MainNode, fmt.Sprintf("echo '%s' >> test-driver.yml", testDriverFile)) - if err != nil { - return "", fmt.Errorf("%s send testdriverfile file: %s %v", op, TestDriverFilePath, err) - } - fmt.Printf("[%s] %s: Download kubeconfig\n", s.MainNode.Name, op) - err = scp("ssh_key", fmt.Sprintf("root@%s:/root/.kube/config", s.MainNode.PublicNet.IPv4.IP.String()), "kubeconfig") - if err != nil { - return "", fmt.Errorf("%s download kubeconfig: %s", op, err) - } - - fmt.Printf("[%s] %s: Ensure correct server is set\n", s.MainNode.Name, op) - kubeconfigBefore, err := ioutil.ReadFile("kubeconfig") - if err != nil { - return "", fmt.Errorf("%s reading kubeconfig: %s", op, err) - } - kubeconfigAfterwards := strings.Replace(string(kubeconfigBefore), "127.0.0.1", s.MainNode.PublicNet.IPv4.IP.String(), -1) - err = ioutil.WriteFile("kubeconfig", []byte(kubeconfigAfterwards), 0) - if err != nil { - return "", fmt.Errorf("%s writing kubeconfig: %s", op, err) - } - return "kubeconfig", nil -} - -func scp(identityFile, src, dest string) error { - const op = "e2etests/scp" - - err := runCmd( - "/usr/bin/env", - []string{ - "scp", - "-F", "/dev/null", // ignore $HOME/.ssh/config - "-i", identityFile, - "-o", "IdentitiesOnly=yes", // only use the identities passed on the command line - "-o", "UserKnownHostsFile=/dev/null", - "-o", "StrictHostKeyChecking=no", - src, - dest, - }, - nil, - ) - if err != nil { - return fmt.Errorf("%s: %v", op, err) - } - return nil -} - -func runCmd(name string, argv []string, env []string) error { - cmd := exec.Command(name, argv...) - if os.Getenv("TEST_DEBUG_MODE") != "" { - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - } - if env != nil { - cmd.Env = append(os.Environ(), env...) - } - if err := cmd.Run(); err != nil { - return fmt.Errorf("run cmd: %s %s: %v", name, strings.Join(argv, " "), err) - } - return nil -} - -// prepareCSIDriverDeploymentFile patches the Cloud Controller Deployment file -// It replaces the used image and the pull policy to always use the local image -// from this test run -func (s *hcloudK8sSetup) prepareCSIDriverDeploymentFile() error { - const op = "hcloudK8sSetup/prepareCSIDriverDeploymentFile" - fmt.Printf("[%s] %s: Read master deployment file\n", s.MainNode.Name, op) - deploymentFile, err := ioutil.ReadFile("../deploy/kubernetes/hcloud-csi.yml") - if err != nil { - return fmt.Errorf("%s: read csi driver deployment file %s: %v", op, "../deploy/kubernetes/hcloud-csi.yml", err) - } - - fmt.Printf("[%s] %s: Prepare deployment file and transfer it\n", s.MainNode.Name, op) - deploymentFile = []byte(strings.ReplaceAll(string(deploymentFile), "hetznercloud/hcloud-csi-driver:latest", fmt.Sprintf("hcloud-csi:ci_%s", s.TestIdentifier))) - deploymentFile = []byte(strings.ReplaceAll(string(deploymentFile), " imagePullPolicy: Always", " imagePullPolicy: IfNotPresent")) - - err = RunCommandOnServer(s.privKey, s.MainNode, fmt.Sprintf("echo '%s' >> csi-driver.yml", deploymentFile)) - if err != nil { - return fmt.Errorf("%s: Prepare deployment file and transfer it: %s", op, err) - } - return nil -} - -// transferDockerImage transfers the local build docker image tar via SCP -func (s *hcloudK8sSetup) transferDockerImage(server *hcloud.Server) error { - const op = "hcloudK8sSetup/transferDockerImage" - fmt.Printf("[%s] %s: Transfer docker image\n", server.Name, op) - err := WithSSHSession(s.privKey, server.PublicNet.IPv4.IP.String(), func(session *ssh.Session) error { - file, err := os.Open("ci-hcloud-csi-driver.tar") - if err != nil { - return fmt.Errorf("%s read ci-hcloud-ccm.tar: %s", op, err) - } - defer file.Close() - stat, err := file.Stat() - if err != nil { - return fmt.Errorf("%s file.Stat: %s", op, err) - } - wg := sync.WaitGroup{} - wg.Add(1) - - go func() { - hostIn, _ := session.StdinPipe() - defer hostIn.Close() - fmt.Fprintf(hostIn, "C0664 %d %s\n", stat.Size(), "ci-hcloud-csi-driver.tar") - io.Copy(hostIn, file) - fmt.Fprint(hostIn, "\x00") - wg.Done() - }() - - err = session.Run("/usr/bin/env scp -t /root") - if err != nil { - return fmt.Errorf("%s copy via scp: %s", op, err) - } - wg.Wait() - return err - }) - return err -} - -// waitForCloudInit waits on cloud init on the server. -// when cloud init is ready we can assume that the server -// and the plain k8s installation is ready -func (s *hcloudK8sSetup) waitForCloudInit(server *hcloud.Server) error { - const op = "hcloudK8sSetup/PrepareTestEnv" - fmt.Printf("[%s] %s: Wait for cloud-init\n", server.Name, op) - err := RunCommandOnServer(s.privKey, server, fmt.Sprintf("cloud-init status --wait > /dev/null")) - if err != nil { - return fmt.Errorf("[%s] %s: Wait for cloud-init: %s", server.Name, op, err) - } - return nil -} - -// waitForCloudInit waits on cloud init on the server. -// when cloud init is ready we can assume that the server -// and the plain k8s installation is ready -func (s *hcloudK8sSetup) getJoinCmd() (string, error) { - const op = "hcloudK8sSetup/getJoinCmd" - fmt.Printf("[%s] %s: Download join cmd\n", s.MainNode.Name, op) - err := scp("ssh_key", fmt.Sprintf("root@%s:/root/join.txt", s.MainNode.PublicNet.IPv4.IP.String()), "join.txt") - if err != nil { - return "", fmt.Errorf("[%s] %s download join cmd: %s", s.MainNode.Name, op, err) - } - cmd, err := ioutil.ReadFile("join.txt") - if err != nil { - return "", fmt.Errorf("[%s] %s reading join cmd file: %s", s.MainNode.Name, op, err) - } - return string(cmd), nil -} - -// TearDown deletes all created resources within the Hetzner Cloud -// there is no need to "shutdown" the k8s cluster before -// so we just delete all created resources -func (s *hcloudK8sSetup) TearDown(testFailed bool) error { - const op = "hcloudK8sSetup/TearDown" - - if s.KeepOnFailure && testFailed { - fmt.Println("Skipping tear-down for further analysis.") - fmt.Println("Please clean-up afterwards ;-)") - return nil - } - - ctx := context.Background() - for _, wn := range s.WorkerNodes { - _, err := s.Hcloud.Server.Delete(ctx, wn) - if err != nil { - return fmt.Errorf("[%s] %s Hcloud.Server.Delete: %s", wn.Name, op, err) - } - } - _, err := s.Hcloud.Server.Delete(ctx, s.MainNode) - if err != nil { - return fmt.Errorf("[cluster-node] %s Hcloud.Server.Delete: %s", op, err) - } - s.MainNode = nil - - _, err = s.Hcloud.SSHKey.Delete(ctx, s.sshKey) - if err != nil { - return fmt.Errorf("%s Hcloud.SSHKey.Delete: %s", err, err) - } - s.sshKey = nil - return nil -} - -// getCloudInitConfig returns the generated cloud init configuration -func (s *hcloudK8sSetup) getCloudInitConfig(isClusterServer bool) (string, error) { - const op = "hcloudK8sSetup/getCloudInitConfig" - - str, err := ioutil.ReadFile(fmt.Sprintf("templates/cloudinit_%s.txt.tpl", s.K8sDistribution)) - if err != nil { - return "", fmt.Errorf("%s: read template file %s: %v", "templates/cloudinit.txt.tpl", op, err) - } - tmpl, err := template.New("cloud_init").Parse(string(str)) - if err != nil { - return "", fmt.Errorf("%s: parsing template file %s: %v", "templates/cloudinit.txt.tpl", op, err) - } - var buf bytes.Buffer - if err := tmpl.Execute(&buf, cloudInitTmpl{K8sVersion: s.K8sVersion, HcloudToken: s.HcloudToken, IsClusterServer: isClusterServer, JoinCMD: s.clusterJoinCMD}); err != nil { - return "", fmt.Errorf("%s: execute template: %v", op, err) - } - return buf.String(), nil -} - -// getSSHKey create and get the Hetzner Cloud SSH Key for the test -func (s *hcloudK8sSetup) getSSHKey(ctx context.Context) error { - const op = "hcloudK8sSetup/getSSHKey" - pubKey, privKey, err := makeSSHKeyPair() - if err != nil { - return err - } - - sshKey, _, err := s.Hcloud.SSHKey.Create(ctx, hcloud.SSHKeyCreateOpts{ - Name: fmt.Sprintf("s-%s", s.TestIdentifier), - PublicKey: pubKey, - Labels: s.testLabels, - }) - if err != nil { - return fmt.Errorf("%s: creating ssh key: %v", op, err) - } - s.privKey = privKey - s.sshKey = sshKey - err = ioutil.WriteFile("ssh_key", []byte(s.privKey), 0600) - if err != nil { - return fmt.Errorf("%s: writing ssh key private key: %v", op, err) - } - return nil -} - -// makeSSHKeyPair generate a SSH key pair -func makeSSHKeyPair() (string, string, error) { - privateKey, err := rsa.GenerateKey(rand.Reader, 1024) - if err != nil { - return "", "", err - } - - // generate and write private key as PEM - var privKeyBuf strings.Builder - - privateKeyPEM := &pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(privateKey)} - if err := pem.Encode(&privKeyBuf, privateKeyPEM); err != nil { - return "", "", err - } - - // generate and write public key - pub, err := ssh.NewPublicKey(&privateKey.PublicKey) - if err != nil { - return "", "", err - } - - var pubKeyBuf strings.Builder - pubKeyBuf.Write(ssh.MarshalAuthorizedKey(pub)) - - return pubKeyBuf.String(), privKeyBuf.String(), nil -} -func RunCommandOnServer(privKey string, server *hcloud.Server, command string) error { - return WithSSHSession(privKey, server.PublicNet.IPv4.IP.String(), func(session *ssh.Session) error { - if ok := os.Getenv("TEST_DEBUG_MODE"); ok != "" { - session.Stdout = os.Stdout - } - return session.Run(command) - }) -} -func RunCommandVisibleOnServer(privKey string, server *hcloud.Server, command string) error { - return WithSSHSession(privKey, server.PublicNet.IPv4.IP.String(), func(session *ssh.Session) error { - session.Stdout = os.Stdout - return session.Run(command) - }) -} - -func WithSSHSession(privKey string, host string, fn func(*ssh.Session) error) error { - signer, err := ssh.ParsePrivateKey([]byte(privKey)) - if err != nil { - return err - } - - client, err := ssh.Dial("tcp", net.JoinHostPort(host, "22"), &ssh.ClientConfig{ - User: "root", - Auth: []ssh.AuthMethod{ssh.PublicKeys(signer)}, - HostKeyCallback: ssh.InsecureIgnoreHostKey(), - Timeout: 1 * time.Second, - }) - if err != nil { - return err - } - - session, err := client.NewSession() - if err != nil { - return err - } - defer session.Close() - - return fn(session) -} diff --git a/e2etests/templates/cloudinit_k8s.txt.tpl b/e2etests/templates/cloudinit_k8s.txt.tpl deleted file mode 100644 index 48924fa0..00000000 --- a/e2etests/templates/cloudinit_k8s.txt.tpl +++ /dev/null @@ -1,66 +0,0 @@ -#cloud-config -write_files: -- content: | - overlay - br_netfilter - path: /etc/modules-load.d/containerd.conf -- content: | - net.bridge.bridge-nf-call-ip6tables = 1 - net.bridge.bridge-nf-call-iptables = 1 - net.ipv4.ip_forward = 1 - path: /etc/sysctl.d/k8s.conf -- content: | - apiVersion: kubeadm.k8s.io/v1beta2 - kind: ClusterConfiguration - kubernetesVersion: v{{.K8sVersion}} - networking: - podSubnet: "10.244.0.0/16" - path: /tmp/kubeadm-config.yaml -- content: | - [Service] - Environment="KUBELET_EXTRA_ARGS=--cloud-provider=external" - path: /etc/systemd/system/kubelet.service.d/20-hcloud.conf -- content: | - alias k="kubectl" - alias ksy="kubectl -n kube-system" - alias kgp="kubectl get pods" - alias kgs="kubectl get services" - alias cilog="cat /var/log/cloud-init-output.log" - export HCLOUD_TOKEN={{.HcloudToken}} - path: /root/.bashrc -runcmd: -- export HOME=/root -- modprobe overlay -- modprobe br_netfilter -- sysctl --system -- apt install -y apt-transport-https curl -- curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - -- echo "deb https://apt.kubernetes.io/ kubernetes-xenial main" > /etc/apt/sources.list.d/kubernetes.list -- curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg -- echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null -- apt update -- apt install -y kubectl={{.K8sVersion}}-00 kubeadm={{.K8sVersion}}-00 kubelet={{.K8sVersion}}-00 containerd.io -- systemctl daemon-reload -- mkdir -p /etc/containerd -- containerd config default | tee /etc/containerd/config.toml -- systemctl restart containerd -- systemctl restart kubelet -# Download and install latest hcloud cli release for easier debugging on host -- curl -s https://api.github.com/repos/hetznercloud/cli/releases/latest | grep browser_download_url | grep linux-amd64 | cut -d '"' -f 4 | wget -qi - -- tar xvzf hcloud-linux-amd64.tar.gz && cp hcloud /usr/bin/hcloud && chmod +x /usr/bin/hcloud -{{if .IsClusterServer}} -- kubeadm init --config /tmp/kubeadm-config.yaml -- mkdir -p /root/.kube -- cp -i /etc/kubernetes/admin.conf /root/.kube/config -- until KUBECONFIG=/root/.kube/config kubectl get node; do sleep 2;done -- KUBECONFIG=/root/.kube/config kubectl apply -f https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml -- KUBECONFIG=/root/.kube/config kubectl -n kube-system patch ds kube-flannel-ds --type json -p '[{"op":"add","path":"/spec/template/spec/tolerations/-","value":{"key":"node.cloudprovider.kubernetes.io/uninitialized","value":"true","effect":"NoSchedule"}}]' -- KUBECONFIG=/root/.kube/config kubectl -n kube-system create secret generic hcloud --from-literal=token={{.HcloudToken}} -- KUBECONFIG=/root/.kube/config kubectl apply -f https://raw.githubusercontent.com/hetznercloud/hcloud-cloud-controller-manager/master/deploy/ccm.yaml -- cd /root/ && curl -s --location https://dl.k8s.io/v{{.K8sVersion}}/kubernetes-test-linux-amd64.tar.gz | tar --strip-components=3 -zxf - kubernetes/test/bin/e2e.test kubernetes/test/bin/ginkgo -- KUBECONFIG=/root/.kube/config kubectl taint nodes --all node-role.kubernetes.io/master- -- kubeadm token create --print-join-command >> /root/join.txt -{{else}} -- {{.JoinCMD}} -- sleep 10 # to get the joining work -{{end}} diff --git a/e2etests/templates/testdrivers/1.23.yml b/e2etests/testdriver-1.23.yaml similarity index 100% rename from e2etests/templates/testdrivers/1.23.yml rename to e2etests/testdriver-1.23.yaml diff --git a/e2etests/testing.go b/e2etests/testing.go deleted file mode 100644 index 0c381910..00000000 --- a/e2etests/testing.go +++ /dev/null @@ -1,242 +0,0 @@ -package e2etests - -import ( - "context" - "fmt" - "math/rand" - "os" - "strings" - "sync" - "time" - - "k8s.io/client-go/tools/clientcmd" - - "github.com/hetznercloud/csi-driver/integrationtests" - "github.com/hetznercloud/hcloud-go/hcloud" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/kubernetes" -) - -var rng *rand.Rand - -func init() { - rng = rand.New(rand.NewSource(time.Now().UnixNano())) -} - -type TestCluster struct { - KeepOnFailure bool - setup *hcloudK8sSetup - k8sClient *kubernetes.Clientset - started bool - - mu sync.Mutex -} - -func (tc *TestCluster) initialize() error { - const op = "e2tests/TestCluster.initialize" - - if tc.started { - return nil - } - - fmt.Printf("%s: Starting Testsuite\n", op) - - isUsingGithubActions := os.Getenv("GITHUB_ACTIONS") - isUsingGitlabCI := os.Getenv("CI_JOB_ID") - testIdentifier := "" - if isUsingGithubActions == "true" { - testIdentifier = fmt.Sprintf("gh-%s-%d", os.Getenv("GITHUB_RUN_ID"), rng.Int()) - fmt.Printf("%s: Running in Github Action\n", op) - } - if isUsingGitlabCI != "" { - testIdentifier = fmt.Sprintf("gl-%s", isUsingGitlabCI) - fmt.Printf("%s: Running in Gitlab CI\n", op) - } - if testIdentifier == "" { - testIdentifier = fmt.Sprintf("local-%d", rng.Int()) - fmt.Printf("%s: Running local\n", op) - } - - k8sVersion := os.Getenv("K8S_VERSION") - if k8sVersion == "" { - k8sVersion = "k8s-1.18.9" - } - - k8sVersionsDetails := strings.Split(k8sVersion, "-") - if len(k8sVersionsDetails) != 2 { - return fmt.Errorf("%s: invalid k8s version: %v should be format -", op, k8sVersion) - } - - token := os.Getenv("HCLOUD_TOKEN") - if len(token) != 64 { - return fmt.Errorf("%s: No valid HCLOUD_TOKEN found", op) - } - tc.KeepOnFailure = os.Getenv("KEEP_SERVER_ON_FAILURE") == "yes" - - var additionalSSHKeys []*hcloud.SSHKey - - opts := []hcloud.ClientOption{ - hcloud.WithToken(token), - hcloud.WithApplication("hcloud-ccm-testsuite", "1.0"), - } - hcloudClient := hcloud.NewClient(opts...) - additionalSSHKeysIDOrName := os.Getenv("USE_SSH_KEYS") - if additionalSSHKeysIDOrName != "" { - idsOrNames := strings.Split(additionalSSHKeysIDOrName, ",") - for _, idOrName := range idsOrNames { - additionalSSHKey, _, err := hcloudClient.SSHKey.Get(context.Background(), idOrName) - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - additionalSSHKeys = append(additionalSSHKeys, additionalSSHKey) - } - } - - fmt.Printf("%s: Test against %s\n", op, k8sVersion) - - imageName := os.Getenv("CSI_IMAGE_NAME") - buildImage := false - if imageName == "" { - imageName = fmt.Sprintf("hcloud-csi:ci_%s", testIdentifier) - buildImage = true - } - if buildImage { - fmt.Printf("%s: Building image\n", op) - if _, err := integrationtests.DockerBuild(imageName, "../"); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - } - - fmt.Printf("%s: Saving image to disk\n", op) - if _, err := integrationtests.DockerSave(imageName, "ci-hcloud-csi-driver.tar"); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - - tc.setup = &hcloudK8sSetup{ - Hcloud: hcloudClient, - K8sDistribution: K8sDistribution(k8sVersionsDetails[0]), - K8sVersion: k8sVersionsDetails[1], - TestIdentifier: testIdentifier, - ImageName: imageName, - HcloudToken: token, - KeepOnFailure: tc.KeepOnFailure, - } - fmt.Printf("%s: Setting up test env\n", op) - - err := tc.setup.PrepareTestEnv(context.Background(), additionalSSHKeys) - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - - kubeconfigPath, err := tc.setup.PrepareK8s() - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - - config, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) - if err != nil { - return fmt.Errorf("%s: clientcmd.BuildConfigFromFlags: %s", op, err) - } - - tc.k8sClient, err = kubernetes.NewForConfig(config) - if err != nil { - return fmt.Errorf("%s: kubernetes.NewForConfig: %s", op, err) - } - - tc.started = true - return nil -} - -func (tc *TestCluster) Start() error { - const op = "e2etests/TestCluster.Start" - - tc.mu.Lock() - defer tc.mu.Unlock() - - if err := tc.initialize(); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - if err := tc.ensureNodesReady(); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - if err := tc.ensurePodsReady(); err != nil { - return fmt.Errorf("%s: %v", op, err) - } - return nil -} - -func (tc *TestCluster) Stop(testFailed bool) error { - const op = "e2etests/TestCluster.Stop" - - tc.mu.Lock() - defer tc.mu.Unlock() - - if !tc.started { - return nil - } - - if err := tc.setup.TearDown(testFailed); err != nil { - fmt.Printf("%s: Tear Down: %s", op, err) - } - return nil -} - -func (tc *TestCluster) ensureNodesReady() error { - const op = "e2etests/ensureNodesReady" - - err := wait.Poll(1*time.Second, 5*time.Minute, func() (bool, error) { - var totalNodes = len(tc.setup.WorkerNodes) + 1 // Number Worker Nodes + 1 Cluster Node - var readyNodes int - nodes, err := tc.k8sClient.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{}) - if err != nil { - return false, err - } - for _, node := range nodes.Items { - for _, cond := range node.Status.Conditions { - if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { - readyNodes++ - } - } - } - pendingNodes := totalNodes - readyNodes - fmt.Printf("Waiting for %d/%d nodes\n", pendingNodes, totalNodes) - return pendingNodes == 0, err - }) - - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - return nil -} - -func (tc *TestCluster) ensurePodsReady() error { - const op = "e2etests/ensurePodsReady" - - err := wait.Poll(1*time.Second, 10*time.Minute, func() (bool, error) { - pods, err := tc.k8sClient.CoreV1().Pods("kube-system").List(context.Background(), metav1.ListOptions{}) - if err != nil { - return false, err - } - totalPods := len(pods.Items) - - var readyPods int - for _, pod := range pods.Items { - for _, cond := range pod.Status.Conditions { - if cond.Type == corev1.PodReady && cond.Status == corev1.ConditionTrue { - readyPods++ - } - } - } - - pendingPods := totalPods - readyPods - fmt.Printf("Waiting for %d/%d pods\n", pendingPods, totalPods) - return pendingPods == 0, err - }) - - if err != nil { - return fmt.Errorf("%s: %s", op, err) - } - return nil -} diff --git a/hack/dev-down.sh b/hack/dev-down.sh new file mode 100755 index 00000000..bf3097fb --- /dev/null +++ b/hack/dev-down.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -ue -o pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +scope="${SCOPE:-dev}" +scope=${scope//[^a-zA-Z0-9_]/-} +scope_name=csi-${scope} +label="managedby=hack" + +if [[ "${ALL:-}" == "" ]]; then + label="$label,scope=$scope_name" + rm -f $SCRIPT_DIR/.ssh-$scope $SCRIPT_DIR/.kubeconfig-$scope +else + rm -f $SCRIPT_DIR/.ssh* $SCRIPT_DIR/.kubeconfig* +fi + +for instance in $(hcloud server list -o noheader -o columns=id -l $label); do + ( + hcloud server delete $instance + ) & +done + + +for key in $(hcloud ssh-key list -o noheader -o columns=name -l $label); do + ( + hcloud ssh-key delete $key + ) & +done + + +for key in $(hcloud network list -o noheader -o columns=name -l $label); do + ( + hcloud network delete $key + ) & +done + +wait \ No newline at end of file diff --git a/hack/dev-up.sh b/hack/dev-up.sh new file mode 100755 index 00000000..f5223767 --- /dev/null +++ b/hack/dev-up.sh @@ -0,0 +1,173 @@ +#!/usr/bin/env bash +set -ueo pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +if [[ -n "${DEBUG:-}" ]]; then set -x; fi + +# Redirect all stdout to stderr. +{ + if ! hcloud version >/dev/null; then echo "ERROR: 'hcloud' CLI not found, please install it and make it available on your \$PATH"; exit 1; fi + if ! k3sup version >/dev/null; then echo "ERROR: 'k3sup' not found, please install it and make it available on your \$PATH"; exit 1; fi + if ! helm version >/dev/null; then echo "ERROR: 'helm' not found, please install it and make it available on your \$PATH"; exit 1; fi + if [[ "${HCLOUD_TOKEN:-}" == "" ]]; then echo "ERROR: please set \$HCLOUD_TOKEN"; exit 1; fi + + # We run a lot of subshells below for speed. If any encounter an error, we shut down the whole process group, pronto. + function error() { + echo "Onoes, something went wrong! :( The output above might have some clues." + kill 0 + } + + trap error ERR + + image_name=${IMAGE_NAME:-ubuntu-20.04} + instance_count=${INSTANCES:-3} + instance_type=${INSTANCE_TYPE:-cpx11} + location=${LOCATION:-fsn1} + network_zone=${NETWORK_ZONE:-eu-central} + ssh_keys=${SSH_KEYS:-} + channel=${K3S_CHANNEL:-stable} + network_cidr=${NETWORK_CIDR:-10.0.0.0/8} + subnet_cidr=${SUBNET_CIDR:-10.0.0.0/24} + cluster_cidr=${CLUSTER_CIDR:-10.244.0.0/16} + scope="${SCOPE:-dev}" + scope=${scope//[^a-zA-Z0-9_]/-} + scope_name=csi-${scope} + label="managedby=hack,scope=$scope_name" + ssh_private_key="$SCRIPT_DIR/.ssh-$scope" + k3s_opts=${K3S_OPTS:-"--kubelet-arg cloud-provider=external"} + k3s_server_opts=${K3S_SERVER_OPTS:-"--disable-cloud-controller --disable=traefik --disable=servicelb --disable=local-storage --flannel-backend=none --cluster-cidr ${cluster_cidr}"} + + echo -n "$HCLOUD_TOKEN" > "$SCRIPT_DIR/.token-$scope" + + export KUBECONFIG="$SCRIPT_DIR/.kubeconfig-$scope" + + ssh_command="ssh -i $ssh_private_key -o StrictHostKeyChecking=off -o BatchMode=yes -o ConnectTimeout=5" + + # Generate SSH keys and upload publkey to Hetzner Cloud. + ( trap error ERR + [[ ! -f $ssh_private_key ]] && ssh-keygen -t ed25519 -f $ssh_private_key -C '' -N '' + [[ ! -f $ssh_private_key.pub ]] && ssh-keygen -y -f $ssh_private_key > $ssh_private_key.pub + if ! hcloud ssh-key describe $scope_name >/dev/null 2>&1; then + hcloud ssh-key create --label $label --name $scope_name --public-key-from-file $ssh_private_key.pub + fi + ) & + + # Create Network + ( trap error ERR + if ! hcloud network describe $scope_name >/dev/null 2>&1; then + hcloud network create --label $label --ip-range $network_cidr --name $scope_name + hcloud network add-subnet --network-zone $network_zone --type cloud --ip-range $subnet_cidr $scope_name + fi + ) & + + + for num in $(seq $instance_count); do + # Create server and initialize Kubernetes on it with k3sup. + ( trap error ERR + + server_name="$scope_name-$num" + + # Maybe cluster is already up and node is already there. + if kubectl get node $server_name >/dev/null 2>&1; then + exit 0 + fi + + ip=$(hcloud server ip $server_name 2>/dev/null || true) + + if [[ -z "${ip:-}" ]]; then + # Wait for SSH key + until hcloud ssh-key describe $scope_name >/dev/null 2>&1; do sleep 1; done + until hcloud network describe $scope_name >/dev/null 2>&1; do sleep 1; done + + createcmd="hcloud server create --image $image_name --label $label --location $location --name $server_name --ssh-key=$scope_name --type $instance_type --network $scope_name" + for key in $ssh_keys; do + createcmd+=" --ssh-key $key" + done + $createcmd + ip=$(hcloud server ip $server_name) + fi + + # Wait for SSH. + until [ "$($ssh_command root@$ip echo ok 2>/dev/null)" = "ok" ]; do + sleep 1 + done + + $ssh_command root@$ip 'mkdir -p /etc/rancher/k3s && cat > /etc/rancher/k3s/registries.yaml' < $SCRIPT_DIR/k3s-registries.yaml + + private_ip=$(hcloud server describe $server_name -o format="{{ (index .PrivateNet 0).IP }}") + k3s_node_ip_opts="--node-external-ip ${ip} --node-ip ${private_ip}" + + if [[ "$num" == "1" ]]; then + # First node is control plane. + k3sup install --print-config=false --ip $ip --k3s-channel $channel --k3s-extra-args "${k3s_server_opts} ${k3s_opts} ${k3s_node_ip_opts}" --local-path $KUBECONFIG --ssh-key $ssh_private_key + else + # All subsequent nodes are initialized as workers. + + # Can't go any further until control plane has bootstrapped a bit though. + until $ssh_command root@$(hcloud server ip $scope_name-1 || true) stat /etc/rancher/node/password >/dev/null 2>&1; do + sleep 1 + done + + k3sup join --server-ip $(hcloud server ip $scope_name-1) --ip $ip --k3s-channel $channel --k3s-extra-args "${k3s_opts} ${k3s_node_ip_opts}" --ssh-key $ssh_private_key + fi + ) & + + # Wait for this node to show up in the cluster. + ( trap error ERR; set +x + until kubectl wait --for=condition=Ready node/$scope_name-$num >/dev/null 2>&1; do sleep 1; done + echo $scope_name-$num is up and in cluster + ) & + done + + ( trap error ERR + # Control plane init tasks. + # This is running in parallel with the server init, above. + + # Wait for control plane to look alive. + until kubectl get nodes >/dev/null 2>&1; do sleep 1; done; + + # Deploy private registry. + ( trap error ERR + if ! helm status -n kube-system registry >/dev/null 2>&1; then + helm install registry docker-registry \ + --repo=https://helm.twun.io \ + -n kube-system \ + --version 2.2.2 \ + --set service.clusterIP=10.43.0.2 \ + --set 'tolerations[0].key=node.cloudprovider.kubernetes.io/uninitialized' \ + --set 'tolerations[0].operator=Exists' + fi + ) & + + # Install Cilium. + ( trap error ERR + if ! helm status -n kube-system cilium >/dev/null 2>&1; then + helm install cilium cilium --repo https://helm.cilium.io/ -n kube-system --version 1.13.1 \ + --set tunnel=disabled \ + --set ipv4NativeRoutingCIDR=$cluster_cidr \ + --set ipam.mode=kubernetes + fi) & + + # Create HCLOUD_TOKEN Secret for hcloud-cloud-controller-manager. + ( trap error ERR + if ! kubectl -n kube-system get secret hcloud >/dev/null 2>&1; then + kubectl -n kube-system create secret generic hcloud --from-literal="token=$HCLOUD_TOKEN" --from-literal="network=$scope_name" + fi) & + wait + + # Install hcloud-cloud-controller-manager + ( trap error ERR + if ! helm status -n kube-system hccm >/dev/null 2>&1; then + helm install hccm hcloud-cloud-controller-manager --repo https://charts.hetzner.cloud/ -n kube-system --version 1.14.2 --set networking.enabled=true + fi) & + wait + ) & + wait + echo "Success - cluster fully initialized and ready, why not see for yourself?" + echo '$ kubectl get nodes' + kubectl get nodes +} >&2 + +echo "export KUBECONFIG=$KUBECONFIG" +$SCRIPT_DIR/registry-port-forward.sh +echo "export SKAFFOLD_DEFAULT_REPO=localhost:30666" diff --git a/hack/k3s-registries.yaml b/hack/k3s-registries.yaml new file mode 100644 index 00000000..8c808b12 --- /dev/null +++ b/hack/k3s-registries.yaml @@ -0,0 +1,3 @@ +mirrors: + localhost:30666: + endpoint: ["http://10.43.0.2:5000"] diff --git a/hack/registry-port-forward.sh b/hack/registry-port-forward.sh new file mode 100755 index 00000000..082079d2 --- /dev/null +++ b/hack/registry-port-forward.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -ue -o pipefail +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + +{ +until kubectl -n kube-system --timeout=30s rollout status deployment/registry-docker-registry >/dev/null 2>&1; do sleep 1; done +old_pid=$(cat $SCRIPT_DIR/.reg-pf 2>/dev/null || true) +if [[ -n "$old_pid" ]]; then + echo "killing old port-forward with PID $old_pid" + kill $old_pid || true +fi + +nohup kubectl port-forward -n kube-system svc/registry-docker-registry 30666:5000 >$SCRIPT_DIR/.reg-pf.out 2>$SCRIPT_DIR/.reg-pf.err & +} >&2 + +echo $! > $SCRIPT_DIR/.reg-pf diff --git a/script/e2etest-local.sh b/script/e2etest-local.sh deleted file mode 100755 index 03e78604..00000000 --- a/script/e2etest-local.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -set -e - -function test_k8s_version() { - if [[ -z "$1" ]]; then - echo "Usage: $0 " - return 1 - fi - - export K8S_VERSION="$1" - - echo "Testing $K8S_VERSION" - if ! go test -count=1 -v -timeout 60m ./e2etests; then - return 2 - fi - -} - -if [[ -z "$HCLOUD_TOKEN" ]]; then - echo "HCLOUD_TOKEN not set! Aborting tests." - exit 1 -fi - -K8S_VERSIONS=("k8s-1.19.10" "k8s-1.20.6" "k8s-1.21.0") -for v in "${K8S_VERSIONS[@]}"; do - test_k8s_version "$v" -done diff --git a/skaffold.yaml b/skaffold.yaml index b6b9b487..cc283bfc 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -1,4 +1,4 @@ -apiVersion: skaffold/v2beta19 +apiVersion: skaffold/v4beta3 kind: Config metadata: name: csi-driver @@ -11,6 +11,8 @@ build: - hetznercloud/hcloud-csi-driver:buildcache local: useBuildkit: true -deploy: + insecureRegistries: + - localhost:30666 +manifests: kustomize: - paths: [hack/] \ No newline at end of file + paths: [hack/]