From d03357967d73d6b11ff6a798a679b0812222a119 Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 11:14:16 +0200 Subject: [PATCH 01/15] :seedling: WipeDisk annotation Introduced annotation "wipedisk.hetznerbaremetalhost.infrastructure.cluster.x-k8s.io" Set it to a space seperated list of WWNs or "all". The given disks will be erased with `wipefs -af` before provisioning. Once wipefs was executed the annotation gets removed. --- api/v1beta1/conditions_const.go | 2 + api/v1beta1/hetznerbaremetalhost_types.go | 4 ++ .../baremetal/client/mocks/ssh/Client.go | 49 +++++++++++++++++++ .../baremetal/client/ssh/ssh_client.go | 37 +++++++++++++- pkg/services/baremetal/host/host.go | 42 ++++++++++++++++ 5 files changed, 133 insertions(+), 1 deletion(-) diff --git a/api/v1beta1/conditions_const.go b/api/v1beta1/conditions_const.go index bcc054364..ff7f9e3bd 100644 --- a/api/v1beta1/conditions_const.go +++ b/api/v1beta1/conditions_const.go @@ -183,6 +183,8 @@ const ( ServerNotFoundReason = "ServerNotFound" // LinuxOnOtherDiskFoundReason indicates that the server can't be provisioned on the given WWN, since the reboot would fail. LinuxOnOtherDiskFoundReason = "LinuxOnOtherDiskFound" + // WipeDiskFailedReason indicates that erasing the disks before provisioning failed. + WipeDiskFailedReason = "WipeDiskFailed" // SSHToRescueSystemFailedReason indicates that the rescue system can't be reached via ssh. SSHToRescueSystemFailedReason = "SSHToRescueSystemFailed" // RebootTimedOutReason indicates that the reboot timed out. diff --git a/api/v1beta1/hetznerbaremetalhost_types.go b/api/v1beta1/hetznerbaremetalhost_types.go index 47be9682a..d1a3884af 100644 --- a/api/v1beta1/hetznerbaremetalhost_types.go +++ b/api/v1beta1/hetznerbaremetalhost_types.go @@ -37,6 +37,10 @@ const ( // HostAnnotation is the key for an annotation that should go on a HetznerBareMetalMachine to // reference what HetznerBareMetalHost it corresponds to. HostAnnotation = "infrastructure.cluster.x-k8s.io/HetznerBareMetalHost" + + // WipeDiskAnnotation indicates which Disks (WWNs) to erase before provisioning + // The value is a list of WWNS or "all". + WipeDiskAnnotation = "wipedisk.hetznerbaremetalhost.infrastructure.cluster.x-k8s.io" ) // RootDeviceHints holds the hints for specifying the storage location diff --git a/pkg/services/baremetal/client/mocks/ssh/Client.go b/pkg/services/baremetal/client/mocks/ssh/Client.go index 213951ced..8b259baa4 100644 --- a/pkg/services/baremetal/client/mocks/ssh/Client.go +++ b/pkg/services/baremetal/client/mocks/ssh/Client.go @@ -3,6 +3,8 @@ package mocks import ( + context "context" + mock "github.com/stretchr/testify/mock" sshclient "github.com/syself/cluster-api-provider-hetzner/pkg/services/baremetal/client/ssh" ) @@ -1216,6 +1218,53 @@ func (_c *Client_UntarTGZ_Call) RunAndReturn(run func() sshclient.Output) *Clien return _c } +// WipeDisks provides a mock function with given fields: ctx, sliceOfWwns +func (_m *Client) WipeDisks(ctx context.Context, sliceOfWwns []string) error { + ret := _m.Called(ctx, sliceOfWwns) + + if len(ret) == 0 { + panic("no return value specified for WipeDisks") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, []string) error); ok { + r0 = rf(ctx, sliceOfWwns) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// Client_WipeDisks_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'WipeDisks' +type Client_WipeDisks_Call struct { + *mock.Call +} + +// WipeDisks is a helper method to define mock.On call +// - ctx context.Context +// - sliceOfWwns []string +func (_e *Client_Expecter) WipeDisks(ctx interface{}, sliceOfWwns interface{}) *Client_WipeDisks_Call { + return &Client_WipeDisks_Call{Call: _e.mock.On("WipeDisks", ctx, sliceOfWwns)} +} + +func (_c *Client_WipeDisks_Call) Run(run func(ctx context.Context, sliceOfWwns []string)) *Client_WipeDisks_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].([]string)) + }) + return _c +} + +func (_c *Client_WipeDisks_Call) Return(_a0 error) *Client_WipeDisks_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *Client_WipeDisks_Call) RunAndReturn(run func(context.Context, []string) error) *Client_WipeDisks_Call { + _c.Call.Return(run) + return _c +} + // NewClient creates a new instance of Client. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewClient(t interface { diff --git a/pkg/services/baremetal/client/ssh/ssh_client.go b/pkg/services/baremetal/client/ssh/ssh_client.go index 4cca01dc9..9bd977e69 100644 --- a/pkg/services/baremetal/client/ssh/ssh_client.go +++ b/pkg/services/baremetal/client/ssh/ssh_client.go @@ -20,15 +20,20 @@ package sshclient import ( "bufio" "bytes" - _ "embed" + "context" "encoding/base64" "errors" "fmt" "os" "regexp" + "slices" "strings" "time" + ctrl "sigs.k8s.io/controller-runtime" + + _ "embed" + "golang.org/x/crypto/ssh" ) @@ -39,6 +44,9 @@ const ( //go:embed detect-linux-on-another-disk.sh var detectLinuxOnAnotherDiskShellScript string +//go:embed wipe-disks.sh +var wipeDisksShellScript string + var downloadFromOciShellScript = `#!/bin/bash # Copyright 2023 The Kubernetes Authors. @@ -253,6 +261,10 @@ type Client interface { ResetKubeadm() Output UntarTGZ() Output DetectLinuxOnAnotherDisk(sliceOfWwns []string) Output + + // Erase filesystem, raid and partition-table signatures. + // String "all" will wipe all disks. + WipeDisks(ctx context.Context, sliceOfWwns []string) error } // Factory is the interface for creating new Client objects. @@ -552,6 +564,29 @@ EOF_VIA_SSH `, strings.Join(sliceOfWwns, " "), detectLinuxOnAnotherDiskShellScript)) } +func (c *sshClient) WipeDisks(ctx context.Context, sliceOfWwns []string) error { + log := ctrl.LoggerFrom(ctx) + if len(sliceOfWwns) == 0 { + return nil + } + if slices.Contains(sliceOfWwns, "all") { + out := c.runSSH("lsblk --nodeps --noheadings -o WWN | sort -u") + if out.Err != nil { + return fmt.Errorf("failed to WWN of all disks: %w", out.Err) + } + log.Info("WipeDisks: 'all' was given. Found these WWNs", "WWNs", sliceOfWwns) + sliceOfWwns = strings.Fields(out.StdOut) + } + out := c.runSSH(fmt.Sprintf(`cat <<'EOF_VIA_SSH' | bash -s -- %s +%s +EOF_VIA_SSH +`, strings.Join(sliceOfWwns, " "), wipeDisksShellScript)) + if out.Err != nil { + return fmt.Errorf("WipeDisks for %+v failed: %s. %s: %w", sliceOfWwns, out.StdOut, out.StdErr, out.Err) + } + return nil +} + func (c *sshClient) UntarTGZ() Output { // read tgz from container image. fileName := "/installimage.tgz" diff --git a/pkg/services/baremetal/host/host.go b/pkg/services/baremetal/host/host.go index d31b20b6c..e6823bf96 100644 --- a/pkg/services/baremetal/host/host.go +++ b/pkg/services/baremetal/host/host.go @@ -1095,6 +1095,48 @@ func (s *Service) actionImageInstalling(ctx context.Context) actionResult { } func (s *Service) actionImageInstallingStartBackgroundProcess(ctx context.Context, sshClient sshclient.Client) actionResult { + // Call WipeDisks if the corresponding annotation is set. + sliceOfWwns := strings.Fields(s.scope.HetznerBareMetalHost.Annotations[infrav1.WipeDiskAnnotation]) + if len(sliceOfWwns) > 0 { + err := sshClient.WipeDisks(ctx, sliceOfWwns) + if err != nil { + var exitErr *ssh.ExitError + if errors.As(err, &exitErr) && exitErr.ExitStatus() > 0 { + // The script was executed, but an error occurred. + // Do not retry. This needs manual intervention. + msg := fmt.Sprintf("WipeDisks failed (permanent error): %s", + err.Error()) + conditions.MarkFalse( + s.scope.HetznerBareMetalHost, + infrav1.ProvisionSucceededCondition, + infrav1.WipeDiskFailedReason, + clusterv1.ConditionSeverityError, + msg, + ) + record.Warn(s.scope.HetznerBareMetalHost, infrav1.WipeDiskFailedReason, msg) + s.scope.HetznerBareMetalHost.SetError(infrav1.PermanentError, msg) + return actionStop{} + } + // some other error happened. It is likely that the ssh connection failed. + msg := fmt.Sprintf("WipeDisks failed (Will retry): %s", + err.Error()) + conditions.MarkFalse( + s.scope.HetznerBareMetalHost, + infrav1.ProvisionSucceededCondition, + infrav1.SSHToRescueSystemFailedReason, + clusterv1.ConditionSeverityInfo, + msg, + ) + record.Event(s.scope.HetznerBareMetalHost, infrav1.SSHToRescueSystemFailedReason, msg) + return actionContinue{ + delay: 10 * time.Second, + } + } + delete(s.scope.HetznerBareMetalHost.Annotations, infrav1.WipeDiskAnnotation) + record.Eventf(s.scope.HetznerBareMetalHost, "WipeDiskDone", "WipeDisks (%v) was done. Annotation %q was removed", + sliceOfWwns, infrav1.WipeDiskAnnotation) + } + // If there is a Linux OS on an other disk, then the reboot after the provisioning // will likely fail, because the machine boots into the other operating system. // We want detect that early, and not start the provisioning process. From d980cd5217dcc33fadf9c9c633b5b8e9c4f4f90b Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 11:21:28 +0200 Subject: [PATCH 02/15] .. addes shell script. --- .../baremetal/client/ssh/wipe-disks.sh | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 pkg/services/baremetal/client/ssh/wipe-disks.sh diff --git a/pkg/services/baremetal/client/ssh/wipe-disks.sh b/pkg/services/baremetal/client/ssh/wipe-disks.sh new file mode 100644 index 000000000..cea3717ff --- /dev/null +++ b/pkg/services/baremetal/client/ssh/wipe-disks.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# Copyright 2024 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +trap 'echo "ERROR: A command has failed. Exiting the script. Line was ($0:$LINENO): $(sed -n "${LINENO}p" "$0")"; exit 3' ERR +set -Eeuo pipefail + +function usage() { + echo "$0 wwn1 [wwn2 ...]" + echo " Wipe all filesystem, raid or partition-table signaturesfrom the specified disks." + echo " ATTENTION! THIS DELETES ALL DATA ON THE GIVEN DISK!" + echo "Existing WWNs:" + lsblk -oNAME,WWN | grep -vi loop || true +} + +if [ $# -eq 0 ]; then + echo "Error: No WWN was provided." + echo + usage + exit 3 +fi + +# Iterate over all input arguments +for wwn in "$@"; do + if ! lsblk -l -oWWN | grep -qFx "${wwn}"; then + echo "$wwn is not a WWN of this machine" + echo + usage + exit 3 + fi + device=$(lsblk -oNAME,WWN,TYPE | grep disk | grep "$wwn" | cut -d' ' -f1) + if [ -z "$device" ]; then + echo "Failed to find device for WWN $wwn" + exit 3 + fi + wipefs -af "$device" +done From 49369a04b769daa4b05a1379ce0aa71ca3f684e2 Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 11:27:35 +0200 Subject: [PATCH 03/15] .. use singular everywhere (wipeDisk not wipeDisks) --- .../baremetal/client/mocks/ssh/Client.go | 22 +++++++++---------- .../baremetal/client/ssh/ssh_client.go | 20 ++++++++--------- .../ssh/{wipe-disks.sh => wipe-disk.sh} | 0 pkg/services/baremetal/host/host.go | 10 ++++----- 4 files changed, 25 insertions(+), 27 deletions(-) rename pkg/services/baremetal/client/ssh/{wipe-disks.sh => wipe-disk.sh} (100%) diff --git a/pkg/services/baremetal/client/mocks/ssh/Client.go b/pkg/services/baremetal/client/mocks/ssh/Client.go index 8b259baa4..2ffed6d67 100644 --- a/pkg/services/baremetal/client/mocks/ssh/Client.go +++ b/pkg/services/baremetal/client/mocks/ssh/Client.go @@ -1218,12 +1218,12 @@ func (_c *Client_UntarTGZ_Call) RunAndReturn(run func() sshclient.Output) *Clien return _c } -// WipeDisks provides a mock function with given fields: ctx, sliceOfWwns -func (_m *Client) WipeDisks(ctx context.Context, sliceOfWwns []string) error { +// WipeDisk provides a mock function with given fields: ctx, sliceOfWwns +func (_m *Client) WipeDisk(ctx context.Context, sliceOfWwns []string) error { ret := _m.Called(ctx, sliceOfWwns) if len(ret) == 0 { - panic("no return value specified for WipeDisks") + panic("no return value specified for WipeDisk") } var r0 error @@ -1236,31 +1236,31 @@ func (_m *Client) WipeDisks(ctx context.Context, sliceOfWwns []string) error { return r0 } -// Client_WipeDisks_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'WipeDisks' -type Client_WipeDisks_Call struct { +// Client_WipeDisk_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'WipeDisk' +type Client_WipeDisk_Call struct { *mock.Call } -// WipeDisks is a helper method to define mock.On call +// WipeDisk is a helper method to define mock.On call // - ctx context.Context // - sliceOfWwns []string -func (_e *Client_Expecter) WipeDisks(ctx interface{}, sliceOfWwns interface{}) *Client_WipeDisks_Call { - return &Client_WipeDisks_Call{Call: _e.mock.On("WipeDisks", ctx, sliceOfWwns)} +func (_e *Client_Expecter) WipeDisk(ctx interface{}, sliceOfWwns interface{}) *Client_WipeDisk_Call { + return &Client_WipeDisk_Call{Call: _e.mock.On("WipeDisk", ctx, sliceOfWwns)} } -func (_c *Client_WipeDisks_Call) Run(run func(ctx context.Context, sliceOfWwns []string)) *Client_WipeDisks_Call { +func (_c *Client_WipeDisk_Call) Run(run func(ctx context.Context, sliceOfWwns []string)) *Client_WipeDisk_Call { _c.Call.Run(func(args mock.Arguments) { run(args[0].(context.Context), args[1].([]string)) }) return _c } -func (_c *Client_WipeDisks_Call) Return(_a0 error) *Client_WipeDisks_Call { +func (_c *Client_WipeDisk_Call) Return(_a0 error) *Client_WipeDisk_Call { _c.Call.Return(_a0) return _c } -func (_c *Client_WipeDisks_Call) RunAndReturn(run func(context.Context, []string) error) *Client_WipeDisks_Call { +func (_c *Client_WipeDisk_Call) RunAndReturn(run func(context.Context, []string) error) *Client_WipeDisk_Call { _c.Call.Return(run) return _c } diff --git a/pkg/services/baremetal/client/ssh/ssh_client.go b/pkg/services/baremetal/client/ssh/ssh_client.go index 9bd977e69..39c59367b 100644 --- a/pkg/services/baremetal/client/ssh/ssh_client.go +++ b/pkg/services/baremetal/client/ssh/ssh_client.go @@ -21,6 +21,7 @@ import ( "bufio" "bytes" "context" + _ "embed" "encoding/base64" "errors" "fmt" @@ -30,11 +31,8 @@ import ( "strings" "time" - ctrl "sigs.k8s.io/controller-runtime" - - _ "embed" - "golang.org/x/crypto/ssh" + ctrl "sigs.k8s.io/controller-runtime" ) const ( @@ -44,8 +42,8 @@ const ( //go:embed detect-linux-on-another-disk.sh var detectLinuxOnAnotherDiskShellScript string -//go:embed wipe-disks.sh -var wipeDisksShellScript string +//go:embed wipe-disk.sh +var wipeDiskShellScript string var downloadFromOciShellScript = `#!/bin/bash @@ -264,7 +262,7 @@ type Client interface { // Erase filesystem, raid and partition-table signatures. // String "all" will wipe all disks. - WipeDisks(ctx context.Context, sliceOfWwns []string) error + WipeDisk(ctx context.Context, sliceOfWwns []string) error } // Factory is the interface for creating new Client objects. @@ -564,7 +562,7 @@ EOF_VIA_SSH `, strings.Join(sliceOfWwns, " "), detectLinuxOnAnotherDiskShellScript)) } -func (c *sshClient) WipeDisks(ctx context.Context, sliceOfWwns []string) error { +func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) error { log := ctrl.LoggerFrom(ctx) if len(sliceOfWwns) == 0 { return nil @@ -574,15 +572,15 @@ func (c *sshClient) WipeDisks(ctx context.Context, sliceOfWwns []string) error { if out.Err != nil { return fmt.Errorf("failed to WWN of all disks: %w", out.Err) } - log.Info("WipeDisks: 'all' was given. Found these WWNs", "WWNs", sliceOfWwns) + log.Info("WipeDisk: 'all' was given. Found these WWNs", "WWNs", sliceOfWwns) sliceOfWwns = strings.Fields(out.StdOut) } out := c.runSSH(fmt.Sprintf(`cat <<'EOF_VIA_SSH' | bash -s -- %s %s EOF_VIA_SSH -`, strings.Join(sliceOfWwns, " "), wipeDisksShellScript)) +`, strings.Join(sliceOfWwns, " "), wipeDiskShellScript)) if out.Err != nil { - return fmt.Errorf("WipeDisks for %+v failed: %s. %s: %w", sliceOfWwns, out.StdOut, out.StdErr, out.Err) + return fmt.Errorf("WipeDisk for %+v failed: %s. %s: %w", sliceOfWwns, out.StdOut, out.StdErr, out.Err) } return nil } diff --git a/pkg/services/baremetal/client/ssh/wipe-disks.sh b/pkg/services/baremetal/client/ssh/wipe-disk.sh similarity index 100% rename from pkg/services/baremetal/client/ssh/wipe-disks.sh rename to pkg/services/baremetal/client/ssh/wipe-disk.sh diff --git a/pkg/services/baremetal/host/host.go b/pkg/services/baremetal/host/host.go index e6823bf96..033b8a2b8 100644 --- a/pkg/services/baremetal/host/host.go +++ b/pkg/services/baremetal/host/host.go @@ -1095,16 +1095,16 @@ func (s *Service) actionImageInstalling(ctx context.Context) actionResult { } func (s *Service) actionImageInstallingStartBackgroundProcess(ctx context.Context, sshClient sshclient.Client) actionResult { - // Call WipeDisks if the corresponding annotation is set. + // Call WipeDisk if the corresponding annotation is set. sliceOfWwns := strings.Fields(s.scope.HetznerBareMetalHost.Annotations[infrav1.WipeDiskAnnotation]) if len(sliceOfWwns) > 0 { - err := sshClient.WipeDisks(ctx, sliceOfWwns) + err := sshClient.WipeDisk(ctx, sliceOfWwns) if err != nil { var exitErr *ssh.ExitError if errors.As(err, &exitErr) && exitErr.ExitStatus() > 0 { // The script was executed, but an error occurred. // Do not retry. This needs manual intervention. - msg := fmt.Sprintf("WipeDisks failed (permanent error): %s", + msg := fmt.Sprintf("WipeDisk failed (permanent error): %s", err.Error()) conditions.MarkFalse( s.scope.HetznerBareMetalHost, @@ -1118,7 +1118,7 @@ func (s *Service) actionImageInstallingStartBackgroundProcess(ctx context.Contex return actionStop{} } // some other error happened. It is likely that the ssh connection failed. - msg := fmt.Sprintf("WipeDisks failed (Will retry): %s", + msg := fmt.Sprintf("WipeDisk failed (Will retry): %s", err.Error()) conditions.MarkFalse( s.scope.HetznerBareMetalHost, @@ -1133,7 +1133,7 @@ func (s *Service) actionImageInstallingStartBackgroundProcess(ctx context.Contex } } delete(s.scope.HetznerBareMetalHost.Annotations, infrav1.WipeDiskAnnotation) - record.Eventf(s.scope.HetznerBareMetalHost, "WipeDiskDone", "WipeDisks (%v) was done. Annotation %q was removed", + record.Eventf(s.scope.HetznerBareMetalHost, "WipeDiskDone", "WipeDisk (%v) was done. Annotation %q was removed", sliceOfWwns, infrav1.WipeDiskAnnotation) } From 81903309825e96bcdcf574250c35c615228f57dc Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 11:36:53 +0200 Subject: [PATCH 04/15] increate timeout, hoping to decrease flakiness --- controllers/hcloudmachine_controller_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controllers/hcloudmachine_controller_test.go b/controllers/hcloudmachine_controller_test.go index 4542cfee5..2f2335c0b 100644 --- a/controllers/hcloudmachine_controller_test.go +++ b/controllers/hcloudmachine_controller_test.go @@ -377,7 +377,7 @@ var _ = Describe("HCloudMachineReconciler", func() { } return len(servers) == 0 - }, timeout, interval).Should(BeTrue()) + }, 2*timeout, interval).Should(BeTrue()) By("checking that bootstrap condition is not ready") From a729dace9cb13839cd4ed586bdf9225744919f60 Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 11:58:55 +0200 Subject: [PATCH 05/15] add output of wipefs to the event. --- .../baremetal/client/mocks/ssh/Client.go | 26 +++++++++++++------ .../baremetal/client/ssh/ssh_client.go | 12 ++++----- .../baremetal/client/ssh/wipe-disk.sh | 2 +- pkg/services/baremetal/host/host.go | 6 ++--- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/pkg/services/baremetal/client/mocks/ssh/Client.go b/pkg/services/baremetal/client/mocks/ssh/Client.go index 2ffed6d67..9ea2354aa 100644 --- a/pkg/services/baremetal/client/mocks/ssh/Client.go +++ b/pkg/services/baremetal/client/mocks/ssh/Client.go @@ -1219,21 +1219,31 @@ func (_c *Client_UntarTGZ_Call) RunAndReturn(run func() sshclient.Output) *Clien } // WipeDisk provides a mock function with given fields: ctx, sliceOfWwns -func (_m *Client) WipeDisk(ctx context.Context, sliceOfWwns []string) error { +func (_m *Client) WipeDisk(ctx context.Context, sliceOfWwns []string) (string, error) { ret := _m.Called(ctx, sliceOfWwns) if len(ret) == 0 { panic("no return value specified for WipeDisk") } - var r0 error - if rf, ok := ret.Get(0).(func(context.Context, []string) error); ok { + var r0 string + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, []string) (string, error)); ok { + return rf(ctx, sliceOfWwns) + } + if rf, ok := ret.Get(0).(func(context.Context, []string) string); ok { r0 = rf(ctx, sliceOfWwns) } else { - r0 = ret.Error(0) + r0 = ret.Get(0).(string) } - return r0 + if rf, ok := ret.Get(1).(func(context.Context, []string) error); ok { + r1 = rf(ctx, sliceOfWwns) + } else { + r1 = ret.Error(1) + } + + return r0, r1 } // Client_WipeDisk_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'WipeDisk' @@ -1255,12 +1265,12 @@ func (_c *Client_WipeDisk_Call) Run(run func(ctx context.Context, sliceOfWwns [] return _c } -func (_c *Client_WipeDisk_Call) Return(_a0 error) *Client_WipeDisk_Call { - _c.Call.Return(_a0) +func (_c *Client_WipeDisk_Call) Return(_a0 string, _a1 error) *Client_WipeDisk_Call { + _c.Call.Return(_a0, _a1) return _c } -func (_c *Client_WipeDisk_Call) RunAndReturn(run func(context.Context, []string) error) *Client_WipeDisk_Call { +func (_c *Client_WipeDisk_Call) RunAndReturn(run func(context.Context, []string) (string, error)) *Client_WipeDisk_Call { _c.Call.Return(run) return _c } diff --git a/pkg/services/baremetal/client/ssh/ssh_client.go b/pkg/services/baremetal/client/ssh/ssh_client.go index 39c59367b..d67a044ca 100644 --- a/pkg/services/baremetal/client/ssh/ssh_client.go +++ b/pkg/services/baremetal/client/ssh/ssh_client.go @@ -262,7 +262,7 @@ type Client interface { // Erase filesystem, raid and partition-table signatures. // String "all" will wipe all disks. - WipeDisk(ctx context.Context, sliceOfWwns []string) error + WipeDisk(ctx context.Context, sliceOfWwns []string) (string, error) } // Factory is the interface for creating new Client objects. @@ -562,15 +562,15 @@ EOF_VIA_SSH `, strings.Join(sliceOfWwns, " "), detectLinuxOnAnotherDiskShellScript)) } -func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) error { +func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) (string, error) { log := ctrl.LoggerFrom(ctx) if len(sliceOfWwns) == 0 { - return nil + return "", nil } if slices.Contains(sliceOfWwns, "all") { out := c.runSSH("lsblk --nodeps --noheadings -o WWN | sort -u") if out.Err != nil { - return fmt.Errorf("failed to WWN of all disks: %w", out.Err) + return "", fmt.Errorf("failed to WWN of all disks: %w", out.Err) } log.Info("WipeDisk: 'all' was given. Found these WWNs", "WWNs", sliceOfWwns) sliceOfWwns = strings.Fields(out.StdOut) @@ -580,9 +580,9 @@ func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) error { EOF_VIA_SSH `, strings.Join(sliceOfWwns, " "), wipeDiskShellScript)) if out.Err != nil { - return fmt.Errorf("WipeDisk for %+v failed: %s. %s: %w", sliceOfWwns, out.StdOut, out.StdErr, out.Err) + return "", fmt.Errorf("WipeDisk for %+v failed: %s. %s: %w", sliceOfWwns, out.StdOut, out.StdErr, out.Err) } - return nil + return out.String(), nil } func (c *sshClient) UntarTGZ() Output { diff --git a/pkg/services/baremetal/client/ssh/wipe-disk.sh b/pkg/services/baremetal/client/ssh/wipe-disk.sh index cea3717ff..36d55a79a 100644 --- a/pkg/services/baremetal/client/ssh/wipe-disk.sh +++ b/pkg/services/baremetal/client/ssh/wipe-disk.sh @@ -45,5 +45,5 @@ for wwn in "$@"; do echo "Failed to find device for WWN $wwn" exit 3 fi - wipefs -af "$device" + wipefs -af "/dev/$device" done diff --git a/pkg/services/baremetal/host/host.go b/pkg/services/baremetal/host/host.go index 033b8a2b8..780fb769b 100644 --- a/pkg/services/baremetal/host/host.go +++ b/pkg/services/baremetal/host/host.go @@ -1098,7 +1098,7 @@ func (s *Service) actionImageInstallingStartBackgroundProcess(ctx context.Contex // Call WipeDisk if the corresponding annotation is set. sliceOfWwns := strings.Fields(s.scope.HetznerBareMetalHost.Annotations[infrav1.WipeDiskAnnotation]) if len(sliceOfWwns) > 0 { - err := sshClient.WipeDisk(ctx, sliceOfWwns) + output, err := sshClient.WipeDisk(ctx, sliceOfWwns) if err != nil { var exitErr *ssh.ExitError if errors.As(err, &exitErr) && exitErr.ExitStatus() > 0 { @@ -1133,8 +1133,8 @@ func (s *Service) actionImageInstallingStartBackgroundProcess(ctx context.Contex } } delete(s.scope.HetznerBareMetalHost.Annotations, infrav1.WipeDiskAnnotation) - record.Eventf(s.scope.HetznerBareMetalHost, "WipeDiskDone", "WipeDisk (%v) was done. Annotation %q was removed", - sliceOfWwns, infrav1.WipeDiskAnnotation) + record.Eventf(s.scope.HetznerBareMetalHost, "WipeDiskDone", "WipeDisk (%v) was done. Annotation %q was removed.\n%s", + sliceOfWwns, infrav1.WipeDiskAnnotation, output) } // If there is a Linux OS on an other disk, then the reboot after the provisioning From 8825109fe46ff640f3a4a12f54cc9298d90d5e06 Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 12:00:45 +0200 Subject: [PATCH 06/15] better info msg. --- pkg/services/baremetal/client/ssh/wipe-disk.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/services/baremetal/client/ssh/wipe-disk.sh b/pkg/services/baremetal/client/ssh/wipe-disk.sh index 36d55a79a..6495e2bc6 100644 --- a/pkg/services/baremetal/client/ssh/wipe-disk.sh +++ b/pkg/services/baremetal/client/ssh/wipe-disk.sh @@ -45,5 +45,6 @@ for wwn in "$@"; do echo "Failed to find device for WWN $wwn" exit 3 fi + echo "INFO: Calling wipfs for $wwn (/dev/$device)" wipefs -af "/dev/$device" done From 656c8d3a2075f4f72f45d01650416148f459a7ba Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 13:31:57 +0200 Subject: [PATCH 07/15] .. check if wwn is valid. --- pkg/services/baremetal/client/ssh/ssh_client.go | 11 +++++++++++ pkg/services/baremetal/host/host.go | 5 ++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pkg/services/baremetal/client/ssh/ssh_client.go b/pkg/services/baremetal/client/ssh/ssh_client.go index d67a044ca..7f04d4bc5 100644 --- a/pkg/services/baremetal/client/ssh/ssh_client.go +++ b/pkg/services/baremetal/client/ssh/ssh_client.go @@ -562,6 +562,9 @@ EOF_VIA_SSH `, strings.Join(sliceOfWwns, " "), detectLinuxOnAnotherDiskShellScript)) } +// I found no details about the format. +var isValidWWNRegex = regexp.MustCompile(`^[0-9a-zA-Z.-]{5,64}$`) + func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) (string, error) { log := ctrl.LoggerFrom(ctx) if len(sliceOfWwns) == 0 { @@ -574,6 +577,14 @@ func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) (string, } log.Info("WipeDisk: 'all' was given. Found these WWNs", "WWNs", sliceOfWwns) sliceOfWwns = strings.Fields(out.StdOut) + } else { + for _, wwn := range sliceOfWwns { + // validate WWN. + // It is unlikely, but somehow could use this wwn: `"; do-nasty-things-here` + if !isValidWWNRegex.MatchString(wwn) { + return "", fmt.Errorf("WWN %q does not match regex %q", wwn, isValidWWNRegex.String()) + } + } } out := c.runSSH(fmt.Sprintf(`cat <<'EOF_VIA_SSH' | bash -s -- %s %s diff --git a/pkg/services/baremetal/host/host.go b/pkg/services/baremetal/host/host.go index 780fb769b..9d251a616 100644 --- a/pkg/services/baremetal/host/host.go +++ b/pkg/services/baremetal/host/host.go @@ -1100,8 +1100,7 @@ func (s *Service) actionImageInstallingStartBackgroundProcess(ctx context.Contex if len(sliceOfWwns) > 0 { output, err := sshClient.WipeDisk(ctx, sliceOfWwns) if err != nil { - var exitErr *ssh.ExitError - if errors.As(err, &exitErr) && exitErr.ExitStatus() > 0 { + if errors.Is(err, &ssh.ExitError{}) { // The script was executed, but an error occurred. // Do not retry. This needs manual intervention. msg := fmt.Sprintf("WipeDisk failed (permanent error): %s", @@ -1133,7 +1132,7 @@ func (s *Service) actionImageInstallingStartBackgroundProcess(ctx context.Contex } } delete(s.scope.HetznerBareMetalHost.Annotations, infrav1.WipeDiskAnnotation) - record.Eventf(s.scope.HetznerBareMetalHost, "WipeDiskDone", "WipeDisk (%v) was done. Annotation %q was removed.\n%s", + record.Eventf(s.scope.HetznerBareMetalHost, "WipeDiskDone", "WipeDisk %v was done. Annotation %q was removed.\n%s", sliceOfWwns, infrav1.WipeDiskAnnotation, output) } From d522002cd86a43da498a41d7e5780dbe7fb891fa Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 13:42:27 +0200 Subject: [PATCH 08/15] validate WWN, so that no code-injection can be done. --- pkg/services/baremetal/client/ssh/ssh_client.go | 7 +++++-- pkg/services/baremetal/host/host.go | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pkg/services/baremetal/client/ssh/ssh_client.go b/pkg/services/baremetal/client/ssh/ssh_client.go index 7f04d4bc5..8bdd94c61 100644 --- a/pkg/services/baremetal/client/ssh/ssh_client.go +++ b/pkg/services/baremetal/client/ssh/ssh_client.go @@ -563,7 +563,10 @@ EOF_VIA_SSH } // I found no details about the format. -var isValidWWNRegex = regexp.MustCompile(`^[0-9a-zA-Z.-]{5,64}$`) +var ( + isValidWWNRegex = regexp.MustCompile(`^[0-9a-zA-Z.-]{5,64}$`) + ErrInvalidWWN = fmt.Errorf("WWN does not match regex %q", isValidWWNRegex.String()) +) func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) (string, error) { log := ctrl.LoggerFrom(ctx) @@ -582,7 +585,7 @@ func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) (string, // validate WWN. // It is unlikely, but somehow could use this wwn: `"; do-nasty-things-here` if !isValidWWNRegex.MatchString(wwn) { - return "", fmt.Errorf("WWN %q does not match regex %q", wwn, isValidWWNRegex.String()) + return "", fmt.Errorf("WWN %q is invalid. %w", wwn, ErrInvalidWWN) } } } diff --git a/pkg/services/baremetal/host/host.go b/pkg/services/baremetal/host/host.go index 9d251a616..29c66d977 100644 --- a/pkg/services/baremetal/host/host.go +++ b/pkg/services/baremetal/host/host.go @@ -1100,7 +1100,7 @@ func (s *Service) actionImageInstallingStartBackgroundProcess(ctx context.Contex if len(sliceOfWwns) > 0 { output, err := sshClient.WipeDisk(ctx, sliceOfWwns) if err != nil { - if errors.Is(err, &ssh.ExitError{}) { + if errors.Is(err, &ssh.ExitError{}) || errors.Is(err, sshclient.ErrInvalidWWN) { // The script was executed, but an error occurred. // Do not retry. This needs manual intervention. msg := fmt.Sprintf("WipeDisk failed (permanent error): %s", From 8990cf318f5a79edb04b7e833e158b3e44a5213c Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 14:02:18 +0200 Subject: [PATCH 09/15] fixed bug: errors.Is() does not work for ssh.ExitError. For error structs, you need to use errors.As(). --- pkg/services/baremetal/host/host.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/services/baremetal/host/host.go b/pkg/services/baremetal/host/host.go index 29c66d977..c7ff7a591 100644 --- a/pkg/services/baremetal/host/host.go +++ b/pkg/services/baremetal/host/host.go @@ -1100,7 +1100,8 @@ func (s *Service) actionImageInstallingStartBackgroundProcess(ctx context.Contex if len(sliceOfWwns) > 0 { output, err := sshClient.WipeDisk(ctx, sliceOfWwns) if err != nil { - if errors.Is(err, &ssh.ExitError{}) || errors.Is(err, sshclient.ErrInvalidWWN) { + var exitErr *ssh.ExitError + if errors.As(err, &exitErr) || errors.Is(err, sshclient.ErrInvalidWWN) { // The script was executed, but an error occurred. // Do not retry. This needs manual intervention. msg := fmt.Sprintf("WipeDisk failed (permanent error): %s", @@ -1122,11 +1123,11 @@ func (s *Service) actionImageInstallingStartBackgroundProcess(ctx context.Contex conditions.MarkFalse( s.scope.HetznerBareMetalHost, infrav1.ProvisionSucceededCondition, - infrav1.SSHToRescueSystemFailedReason, - clusterv1.ConditionSeverityInfo, + infrav1.WipeDiskFailedReason, + clusterv1.ConditionSeverityWarning, msg, ) - record.Event(s.scope.HetznerBareMetalHost, infrav1.SSHToRescueSystemFailedReason, msg) + record.Warn(s.scope.HetznerBareMetalHost, infrav1.WipeDiskFailedReason, msg) return actionContinue{ delay: 10 * time.Second, } From e57357c0648c4ac1f92ef9769d5e4c1b9ff69e81 Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 14:39:30 +0200 Subject: [PATCH 10/15] ... bash strict fails if not executed in a script We might got during `trap`: sed: can't read bash: No such file or directory --- pkg/services/baremetal/client/ssh/ssh_client.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pkg/services/baremetal/client/ssh/ssh_client.go b/pkg/services/baremetal/client/ssh/ssh_client.go index 8bdd94c61..897fad644 100644 --- a/pkg/services/baremetal/client/ssh/ssh_client.go +++ b/pkg/services/baremetal/client/ssh/ssh_client.go @@ -556,9 +556,11 @@ func (c *sshClient) ResetKubeadm() Output { } func (c *sshClient) DetectLinuxOnAnotherDisk(sliceOfWwns []string) Output { - return c.runSSH(fmt.Sprintf(`cat <<'EOF_VIA_SSH' | bash -s -- %s + return c.runSSH(fmt.Sprintf(`cat >/root/detect-linux-on-another-disk.sh <<'EOF_VIA_SSH' %s EOF_VIA_SSH +chmod a+rx /root/detect-linux-on-another-disk.sh +/root/detect-linux-on-another-disk.sh %s `, strings.Join(sliceOfWwns, " "), detectLinuxOnAnotherDiskShellScript)) } @@ -589,9 +591,11 @@ func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) (string, } } } - out := c.runSSH(fmt.Sprintf(`cat <<'EOF_VIA_SSH' | bash -s -- %s + out := c.runSSH(fmt.Sprintf(`cat >/root/wipe-disk.sh <<'EOF_VIA_SSH' %s EOF_VIA_SSH +chmod a+rx /root/wipe-disk.sh +/root/wipe-disk.sh %s `, strings.Join(sliceOfWwns, " "), wipeDiskShellScript)) if out.Err != nil { return "", fmt.Errorf("WipeDisk for %+v failed: %s. %s: %w", sliceOfWwns, out.StdOut, out.StdErr, out.Err) From 5e4b1591a336e2aaaa42f58570a25bc2d84025ab Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 14:44:33 +0200 Subject: [PATCH 11/15] ... typo --- pkg/services/baremetal/client/ssh/ssh_client.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/services/baremetal/client/ssh/ssh_client.go b/pkg/services/baremetal/client/ssh/ssh_client.go index 897fad644..5162bb6a0 100644 --- a/pkg/services/baremetal/client/ssh/ssh_client.go +++ b/pkg/services/baremetal/client/ssh/ssh_client.go @@ -561,7 +561,7 @@ func (c *sshClient) DetectLinuxOnAnotherDisk(sliceOfWwns []string) Output { EOF_VIA_SSH chmod a+rx /root/detect-linux-on-another-disk.sh /root/detect-linux-on-another-disk.sh %s -`, strings.Join(sliceOfWwns, " "), detectLinuxOnAnotherDiskShellScript)) +`, detectLinuxOnAnotherDiskShellScript, strings.Join(sliceOfWwns, " "))) } // I found no details about the format. @@ -596,7 +596,7 @@ func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) (string, EOF_VIA_SSH chmod a+rx /root/wipe-disk.sh /root/wipe-disk.sh %s -`, strings.Join(sliceOfWwns, " "), wipeDiskShellScript)) +`, wipeDiskShellScript, strings.Join(sliceOfWwns, " "))) if out.Err != nil { return "", fmt.Errorf("WipeDisk for %+v failed: %s. %s: %w", sliceOfWwns, out.StdOut, out.StdErr, out.Err) } From 3adfcfc80e9773b2c1cf661d1787bc4f4e311944 Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 14:58:28 +0200 Subject: [PATCH 12/15] colon is a valic char in a wwn. --- pkg/services/baremetal/client/ssh/ssh_client.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/services/baremetal/client/ssh/ssh_client.go b/pkg/services/baremetal/client/ssh/ssh_client.go index 5162bb6a0..8f38427db 100644 --- a/pkg/services/baremetal/client/ssh/ssh_client.go +++ b/pkg/services/baremetal/client/ssh/ssh_client.go @@ -564,9 +564,10 @@ chmod a+rx /root/detect-linux-on-another-disk.sh `, detectLinuxOnAnotherDiskShellScript, strings.Join(sliceOfWwns, " "))) } -// I found no details about the format. var ( - isValidWWNRegex = regexp.MustCompile(`^[0-9a-zA-Z.-]{5,64}$`) + // I found no details about the format. I found these examples + // 10:00:00:05:1e:7a:7a:00 eui.00253885910c8cec 0x500a07511bb48b25 + isValidWWNRegex = regexp.MustCompile(`^[0-9a-zA-Z.:-]{5,64}$`) ErrInvalidWWN = fmt.Errorf("WWN does not match regex %q", isValidWWNRegex.String()) ) From c8d317ae753575a45913e3f49a3c4cd472625c04 Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 15:00:13 +0200 Subject: [PATCH 13/15] typo --- pkg/services/baremetal/client/ssh/ssh_client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/services/baremetal/client/ssh/ssh_client.go b/pkg/services/baremetal/client/ssh/ssh_client.go index 8f38427db..728e6e030 100644 --- a/pkg/services/baremetal/client/ssh/ssh_client.go +++ b/pkg/services/baremetal/client/ssh/ssh_client.go @@ -579,7 +579,7 @@ func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) (string, if slices.Contains(sliceOfWwns, "all") { out := c.runSSH("lsblk --nodeps --noheadings -o WWN | sort -u") if out.Err != nil { - return "", fmt.Errorf("failed to WWN of all disks: %w", out.Err) + return "", fmt.Errorf("failed to find WWNs of all disks: %w", out.Err) } log.Info("WipeDisk: 'all' was given. Found these WWNs", "WWNs", sliceOfWwns) sliceOfWwns = strings.Fields(out.StdOut) From 6ee94a3c3959d45ca467b8ef9e29621cb3de0188 Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 15:03:23 +0200 Subject: [PATCH 14/15] typo --- pkg/services/baremetal/client/ssh/ssh_client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/services/baremetal/client/ssh/ssh_client.go b/pkg/services/baremetal/client/ssh/ssh_client.go index 728e6e030..35abcd77e 100644 --- a/pkg/services/baremetal/client/ssh/ssh_client.go +++ b/pkg/services/baremetal/client/ssh/ssh_client.go @@ -586,7 +586,7 @@ func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) (string, } else { for _, wwn := range sliceOfWwns { // validate WWN. - // It is unlikely, but somehow could use this wwn: `"; do-nasty-things-here` + // It is unlikely, but someone could use this wwn: `"; do-nasty-things-here` if !isValidWWNRegex.MatchString(wwn) { return "", fmt.Errorf("WWN %q is invalid. %w", wwn, ErrInvalidWWN) } From 9c266323b91226d20afc04248def18d2c7325453 Mon Sep 17 00:00:00 2001 From: Thomas Guettler Date: Fri, 9 Aug 2024 15:19:29 +0200 Subject: [PATCH 15/15] make linter happy (add comment to ErrInvalidWWN) --- pkg/services/baremetal/client/ssh/ssh_client.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/services/baremetal/client/ssh/ssh_client.go b/pkg/services/baremetal/client/ssh/ssh_client.go index 35abcd77e..cc10bc5f8 100644 --- a/pkg/services/baremetal/client/ssh/ssh_client.go +++ b/pkg/services/baremetal/client/ssh/ssh_client.go @@ -568,7 +568,9 @@ var ( // I found no details about the format. I found these examples // 10:00:00:05:1e:7a:7a:00 eui.00253885910c8cec 0x500a07511bb48b25 isValidWWNRegex = regexp.MustCompile(`^[0-9a-zA-Z.:-]{5,64}$`) - ErrInvalidWWN = fmt.Errorf("WWN does not match regex %q", isValidWWNRegex.String()) + + // ErrInvalidWWN indicates that a WWN has an invalid syntax. + ErrInvalidWWN = fmt.Errorf("WWN does not match regex %q", isValidWWNRegex.String()) ) func (c *sshClient) WipeDisk(ctx context.Context, sliceOfWwns []string) (string, error) {