Skip to content

Commit

Permalink
🌱 Several small changes to make debugging bare-metal provisioning easier
Browse files Browse the repository at this point in the history
  • Loading branch information
guettli committed Mar 8, 2024
1 parent 03ed605 commit f63b41f
Show file tree
Hide file tree
Showing 15 changed files with 3,898 additions and 14 deletions.
18 changes: 14 additions & 4 deletions api/v1beta1/hetznerbaremetalhost_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,19 @@ type RootDeviceHints struct {

// IsValid checks whether rootDeviceHint is valid.
func (rdh *RootDeviceHints) IsValid() bool {
if rdh.WWN == "" && len(rdh.Raid.WWN) == 0 ||
rdh.WWN != "" && len(rdh.Raid.WWN) > 0 {
return false
return rdh.IsValidWithMessage() == ""
}

// IsValidWithMessage checks whether rootDeviceHint is valid.
// If valid, an empty string gets returned.
func (rdh *RootDeviceHints) IsValidWithMessage() string {
if rdh.WWN == "" && len(rdh.Raid.WWN) == 0 {
return "rootDeviceHint.wwn and rootDeviceHint.raid.wwn are empty. Please specify one or the other."
}
if rdh.WWN != "" && len(rdh.Raid.WWN) > 0 {
return "WWN specified twice (rootDeviceHint.wwn and rootDeviceHint.raid.wwn). Please specify only one or the other."
}
return true
return ""
}

// ListOfWWN gives the list of WWNs - no matter if it's in WWN or Raid.
Expand Down Expand Up @@ -111,6 +119,8 @@ const (
const (
// ErrorMessageMissingRootDeviceHints specifies the error message when no root device hints are specified.
ErrorMessageMissingRootDeviceHints string = "no root device hints specified"
// ErrorMessageInvalidRootDeviceHints specifies the error message when invalid root device hints are specified.
ErrorMessageInvalidRootDeviceHints string = "invalid root device hints specified"
// ErrorMessageMissingHetznerSecret specifies the error message when no Hetzner secret was found.
ErrorMessageMissingHetznerSecret string = "could not find HetznerSecret"
// ErrorMessageMissingRescueSSHSecret specifies the error message when no RescueSSH secret was found.
Expand Down
21 changes: 21 additions & 0 deletions api/v1beta1/hetznerbaremetalmachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1beta1
import (
"errors"
"fmt"
"net/url"
"strings"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -194,6 +195,26 @@ func (image Image) GetDetails() (imagePath string, needsDownload bool, errorMess
return imagePath, needsDownload, errorMessage
}

// String returns a string representation. The password gets redacted from the URL.
func (image Image) String() string {
cleanURL := ""
if image.URL != "" {
u, err := url.Parse(image.URL)
if err != nil {
cleanURL = err.Error()
} else {
cleanURL = u.Redacted()
}
}
if cleanURL == "" {
cleanURL = image.Path
}
if image.Name == "" {
return cleanURL
}
return fmt.Sprintf("%s (%s)", image.Name, cleanURL)
}

// Partition defines the additional Partitions to be created.
type Partition struct {
// Mount defines the mount path for this filesystem.
Expand Down
50 changes: 50 additions & 0 deletions api/v1beta1/hetznerbaremetalmachine_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ package v1beta1

import (
"errors"
"testing"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/stretchr/testify/require"
capierrors "sigs.k8s.io/cluster-api/errors"
)

Expand Down Expand Up @@ -235,3 +237,51 @@ var _ = Describe("Test HasHostAnnotation", func() {
}),
)
})

func Test_Image_String(t *testing.T) {
for _, row := range []struct {
image Image
expected string
}{
{
Image{
URL: "",
Name: "",
Path: "",
},
"",
},
{
Image{
URL: "https://user:[email protected]/images/Ubuntu-2204-jammy-amd64-custom.tar.gz",
Name: "Ubuntu-2204",
Path: "",
},
"Ubuntu-2204 (https://user:[email protected]/images/Ubuntu-2204-jammy-amd64-custom.tar.gz)",
},
{
Image{
URL: "https://example.com/foo.tgz",
Name: "foo",
Path: "",
},
"foo (https://example.com/foo.tgz)",
},
{
Image{
URL: "https://example.com/nameless.tgz",
Path: "",
},
"https://example.com/nameless.tgz",
},
{
Image{
Name: "nfs",
Path: "/root/.oldroot/nfs/install/../images/Ubuntu-2004-focal-64-minimal-hwe.tar.gz",
},
"nfs (/root/.oldroot/nfs/install/../images/Ubuntu-2004-focal-64-minimal-hwe.tar.gz)",
},
} {
require.Equal(t, row.expected, row.image.String())
}
}
2 changes: 1 addition & 1 deletion controllers/hetznerbaremetalhost_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (r *HetznerBareMetalHostReconciler) Reconcile(ctx context.Context, req ctrl
Name: bmHost.Spec.Status.HetznerClusterRef,
}
if err := r.Client.Get(ctx, hetznerClusterName, hetznerCluster); err != nil {
return reconcile.Result{}, errors.New("HetznerCluster not found")
return reconcile.Result{}, fmt.Errorf("failed to get HetznerCluster: %w", err)
}

log = log.WithValues("HetznerCluster", klog.KObj(hetznerCluster))
Expand Down
11 changes: 11 additions & 0 deletions hack/filter-caph-controller-manager-logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
'"Starting reconciling cluster"',
'"Completed function"',
'"Adding request."',
'Update to resource only changes insignificant fields',
'"approved csr"',
'"Registering webhook"',
]

def main():
Expand Down Expand Up @@ -66,9 +69,17 @@ def handle_line(line):
t = data.pop('time', '')
t = re.sub(r'^.*T(.+)*\..+$', r'\1', t) # '2023-04-17T12:12:53.423Z

# skip too long entries
for key, value in list(data.items()):
if not isinstance(value, str):
continue
if len(value) > 1_000:
data[key] = value[:1_000] + "...cut..."

level = data.pop('level', '').ljust(5)
file = data.pop('file', '')
message = data.pop('message', '')

if not data:
data=''

Expand Down
18 changes: 12 additions & 6 deletions pkg/services/baremetal/client/ssh/detect-linux-on-another-disk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# lsblk from util-linux 2.34 (Ubuntu 20.04) does not know column PARTTYPENAME

set -euo pipefail

trap 'echo "Warning: A command has failed. Exiting the script. Line was ($0:$LINENO): $(sed -n "${LINENO}p" "$0")"; exit 3' ERR

function usage() {
echo "$0 wwn1 [wwn2 ...]"
echo " Check if there is a Linux partition, but skip all WWNs given as arguments"
echo " Background: If we provision a disk, then there must not be a Linux OS on an other partition"
echo " Background: If we provision a disk, then there must not be a Linux OS on another partition"
echo " otherwise it is likely that the old OS gets booted, and not the new OS."
echo " Exit 0: If there is no Linux installation found."
echo " Exit 1: There is a Linux on a different disk.".
Expand All @@ -46,25 +48,29 @@ for wwn in "$@"; do
fi
done
fail=0
while read name wwn type parttype; do

lines=$(lsblk -r -oNAME,WWN,TYPE)

while read name wwn type; do
if [[ " $* " == *" $wwn "* ]]; then
#echo "ok: skipping $name $wwn, since it was an argument to the script."
continue
fi
root_directory_content=$(grub-fstest /dev/$name ls / 2>/dev/null || true | tr ' ' '\n' | sort | tr '\n' ' ')
if [[ $root_directory_content =~ .*boot/.*etc/.* ]]; then
echo "FAIL: $name $wwn partitionType=$parttype looks like a Linux root partition on another disk."
echo "FAIL: $name $wwn looks like a Linux root partition on another disk."
fail=1
continue
fi
if [[ $root_directory_content =~ .*initrd.*vmlinuz.* ]]; then
echo "FAIL: $name $wwn partitionType=$parttype looks like a Linux /boot partition on another disk."
echo "FAIL: $name $wwn looks like a Linux /boot partition on another disk."
fail=1
continue
fi
#echo "ok: $name $wwn $parttype, does not look like root Linux partition."
done < <(lsblk -r -oNAME,WWN,TYPE,PARTTYPENAME | grep -v NAME | grep -i part)
done < <(echo "$lines" | grep -v NAME | grep -i part)
if [ $fail -eq 1 ]; then
exit 1
fi
echo "Looks good. No Linux root partition on other devices"
echo "Looks good. No Linux root partition on another devices"

24 changes: 21 additions & 3 deletions pkg/services/baremetal/host/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (

"github.com/syself/hrobot-go/models"
"golang.org/x/crypto/ssh"
"golang.org/x/crypto/ssh"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -545,17 +546,27 @@ func (s *Service) actionRegistering() actionResult {
s.scope.HetznerBareMetalHost.Spec.Status.HardwareDetails = &hardwareDetails
}

if s.scope.HetznerBareMetalHost.Spec.RootDeviceHints == nil ||
!s.scope.HetznerBareMetalHost.Spec.RootDeviceHints.IsValid() {
if s.scope.HetznerBareMetalHost.Spec.RootDeviceHints == nil {
conditions.MarkFalse(
s.scope.HetznerBareMetalHost,
infrav1.RootDeviceHintsValidatedCondition,
infrav1.ValidationFailedReason,
clusterv1.ConditionSeverityError,
"validation failed - check specified rootDeviceHints",
infrav1.ErrorMessageMissingRootDeviceHints,
)
return s.recordActionFailure(infrav1.RegistrationError, infrav1.ErrorMessageMissingRootDeviceHints)
}
errMsg := s.scope.HetznerBareMetalHost.Spec.RootDeviceHints.IsValidWithMessage()
if errMsg != "" {
conditions.MarkFalse(
s.scope.HetznerBareMetalHost,
infrav1.RootDeviceHintsValidatedCondition,
infrav1.ValidationFailedReason,
clusterv1.ConditionSeverityError,
errMsg,
)
return s.recordActionFailure(infrav1.RegistrationError, errMsg)
}

if err := validateRootDevices(s.scope.HetznerBareMetalHost.Spec.RootDeviceHints, s.scope.HetznerBareMetalHost.Spec.Status.HardwareDetails.Storage); err != nil {
conditions.MarkFalse(
Expand Down Expand Up @@ -992,6 +1003,8 @@ func (s *Service) actionImageInstalling() actionResult {
}
}

record.Event(s.scope.HetznerBareMetalHost, "InstallImagePreflightCheckSuccessful", "Rescue system reachable, disks look good.")

autoSetupInput, actionRes := s.createAutoSetupInput(sshClient)
if actionRes != nil {
return actionRes
Expand All @@ -1017,11 +1030,16 @@ func (s *Service) actionImageInstalling() actionResult {
}
}

record.Event(s.scope.HetznerBareMetalHost, "InstallingMachineImageStarted",
s.scope.HetznerBareMetalHost.Spec.Status.InstallImage.Image.String())

out = sshClient.UntarTGZ()
if out.Err != nil {
record.Warnf(s.scope.HetznerBareMetalHost, "UntarInstallimageTgzFailed", "err: %s, stderr: %s", out.Err.Error(), out.StdErr)
return actionError{err: fmt.Errorf("UntarInstallimageTgzFailed: %w", out.Err)}
}
record.Event(s.scope.HetznerBareMetalHost, "ExecuteInstallImageStarted",
s.scope.HetznerBareMetalHost.Spec.Status.InstallImage.Image.String())

// Execute install image
out = sshClient.ExecuteInstallImage(postInstallScript != "")
Expand Down
29 changes: 29 additions & 0 deletions vendor/github.com/stretchr/testify/require/doc.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions vendor/github.com/stretchr/testify/require/forward_requirements.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f63b41f

Please sign in to comment.