Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 Several small changes to make debugging bare-metal provisioning easier #1196

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions api/v1beta1/hetznerbaremetalhost_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,19 @@ type RootDeviceHints struct {

// IsValid checks whether rootDeviceHint is valid.
func (rdh *RootDeviceHints) IsValid() bool {
if rdh.WWN == "" && len(rdh.Raid.WWN) == 0 ||
rdh.WWN != "" && len(rdh.Raid.WWN) > 0 {
return false
return rdh.IsValidWithMessage() == ""
}

// IsValidWithMessage checks whether rootDeviceHint is valid.
// If valid, an empty string gets returned.
func (rdh *RootDeviceHints) IsValidWithMessage() string {
if rdh.WWN == "" && len(rdh.Raid.WWN) == 0 {
return "rootDeviceHint.wwn and rootDeviceHint.raid.wwn are empty. Please specify one or the other."
}
if rdh.WWN != "" && len(rdh.Raid.WWN) > 0 {
return "WWN specified twice (rootDeviceHint.wwn and rootDeviceHint.raid.wwn). Please specify only one or the other."
}
return true
return ""
}

// ListOfWWN gives the list of WWNs - no matter if it's in WWN or Raid.
Expand Down Expand Up @@ -111,6 +119,8 @@ const (
const (
// ErrorMessageMissingRootDeviceHints specifies the error message when no root device hints are specified.
ErrorMessageMissingRootDeviceHints string = "no root device hints specified"
// ErrorMessageInvalidRootDeviceHints specifies the error message when invalid root device hints are specified.
ErrorMessageInvalidRootDeviceHints string = "invalid root device hints specified"
// ErrorMessageMissingHetznerSecret specifies the error message when no Hetzner secret was found.
ErrorMessageMissingHetznerSecret string = "could not find HetznerSecret"
// ErrorMessageMissingRescueSSHSecret specifies the error message when no RescueSSH secret was found.
Expand Down
21 changes: 21 additions & 0 deletions api/v1beta1/hetznerbaremetalmachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package v1beta1
import (
"errors"
"fmt"
"net/url"
"strings"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -194,6 +195,26 @@ func (image Image) GetDetails() (imagePath string, needsDownload bool, errorMess
return imagePath, needsDownload, errorMessage
}

// String returns a string representation. The password gets redacted from the URL.
func (image Image) String() string {
cleanURL := ""
if image.URL != "" {
u, err := url.Parse(image.URL)
if err != nil {
cleanURL = err.Error()
} else {
cleanURL = u.Redacted()
}
}
if cleanURL == "" {
cleanURL = image.Path
}
if image.Name == "" {
return cleanURL
}
return fmt.Sprintf("%s (%s)", image.Name, cleanURL)
}

// Partition defines the additional Partitions to be created.
type Partition struct {
// Mount defines the mount path for this filesystem.
Expand Down
50 changes: 50 additions & 0 deletions api/v1beta1/hetznerbaremetalmachine_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ package v1beta1

import (
"errors"
"testing"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/stretchr/testify/require"
capierrors "sigs.k8s.io/cluster-api/errors"
)

Expand Down Expand Up @@ -235,3 +237,51 @@ var _ = Describe("Test HasHostAnnotation", func() {
}),
)
})

func Test_Image_String(t *testing.T) {
for _, row := range []struct {
image Image
expected string
}{
{
Image{
URL: "",
Name: "",
Path: "",
},
"",
},
{
Image{
URL: "https://user:[email protected]/images/Ubuntu-2204-jammy-amd64-custom.tar.gz",
Name: "Ubuntu-2204",
Path: "",
},
"Ubuntu-2204 (https://user:[email protected]/images/Ubuntu-2204-jammy-amd64-custom.tar.gz)",
},
{
Image{
URL: "https://example.com/foo.tgz",
Name: "foo",
Path: "",
},
"foo (https://example.com/foo.tgz)",
},
{
Image{
URL: "https://example.com/nameless.tgz",
Path: "",
},
"https://example.com/nameless.tgz",
},
{
Image{
Name: "nfs",
Path: "/root/.oldroot/nfs/install/../images/Ubuntu-2004-focal-64-minimal-hwe.tar.gz",
},
"nfs (/root/.oldroot/nfs/install/../images/Ubuntu-2004-focal-64-minimal-hwe.tar.gz)",
},
} {
require.Equal(t, row.expected, row.image.String())
}
}
2 changes: 1 addition & 1 deletion controllers/hetznerbaremetalhost_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (r *HetznerBareMetalHostReconciler) Reconcile(ctx context.Context, req ctrl
Name: bmHost.Spec.Status.HetznerClusterRef,
}
if err := r.Client.Get(ctx, hetznerClusterName, hetznerCluster); err != nil {
return reconcile.Result{}, errors.New("HetznerCluster not found")
return reconcile.Result{}, fmt.Errorf("failed to get HetznerCluster: %w", err)
}

log = log.WithValues("HetznerCluster", klog.KObj(hetznerCluster))
Expand Down
11 changes: 11 additions & 0 deletions hack/filter-caph-controller-manager-logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
'"Starting reconciling cluster"',
'"Completed function"',
'"Adding request."',
'Update to resource only changes insignificant fields',
'"approved csr"',
'"Registering webhook"',
]

def main():
Expand Down Expand Up @@ -66,9 +69,17 @@ def handle_line(line):
t = data.pop('time', '')
t = re.sub(r'^.*T(.+)*\..+$', r'\1', t) # '2023-04-17T12:12:53.423Z

# skip too long entries
for key, value in list(data.items()):
if not isinstance(value, str):
continue
if len(value) > 1_000:
data[key] = value[:1_000] + "...cut..."

level = data.pop('level', '').ljust(5)
file = data.pop('file', '')
message = data.pop('message', '')

if not data:
data=''

Expand Down
18 changes: 12 additions & 6 deletions pkg/services/baremetal/client/ssh/detect-linux-on-another-disk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# lsblk from util-linux 2.34 (Ubuntu 20.04) does not know column PARTTYPENAME

set -euo pipefail

trap 'echo "Warning: A command has failed. Exiting the script. Line was ($0:$LINENO): $(sed -n "${LINENO}p" "$0")"; exit 3' ERR

function usage() {
echo "$0 wwn1 [wwn2 ...]"
echo " Check if there is a Linux partition, but skip all WWNs given as arguments"
echo " Background: If we provision a disk, then there must not be a Linux OS on an other partition"
echo " Background: If we provision a disk, then there must not be a Linux OS on another partition"
echo " otherwise it is likely that the old OS gets booted, and not the new OS."
echo " Exit 0: If there is no Linux installation found."
echo " Exit 1: There is a Linux on a different disk.".
Expand All @@ -46,25 +48,29 @@ for wwn in "$@"; do
fi
done
fail=0
while read name wwn type parttype; do

lines=$(lsblk -r -oNAME,WWN,TYPE)

while read name wwn type; do
if [[ " $* " == *" $wwn "* ]]; then
#echo "ok: skipping $name $wwn, since it was an argument to the script."
continue
fi
root_directory_content=$(grub-fstest /dev/$name ls / 2>/dev/null || true | tr ' ' '\n' | sort | tr '\n' ' ')
if [[ $root_directory_content =~ .*boot/.*etc/.* ]]; then
echo "FAIL: $name $wwn partitionType=$parttype looks like a Linux root partition on another disk."
echo "FAIL: $name $wwn looks like a Linux root partition on another disk."
fail=1
continue
fi
if [[ $root_directory_content =~ .*initrd.*vmlinuz.* ]]; then
echo "FAIL: $name $wwn partitionType=$parttype looks like a Linux /boot partition on another disk."
echo "FAIL: $name $wwn looks like a Linux /boot partition on another disk."
fail=1
continue
fi
#echo "ok: $name $wwn $parttype, does not look like root Linux partition."
done < <(lsblk -r -oNAME,WWN,TYPE,PARTTYPENAME | grep -v NAME | grep -i part)
done < <(echo "$lines" | grep -v NAME | grep -i part)
if [ $fail -eq 1 ]; then
exit 1
fi
echo "Looks good. No Linux root partition on other devices"
echo "Looks good. No Linux root partition on another devices"

23 changes: 20 additions & 3 deletions pkg/services/baremetal/host/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,17 +545,27 @@ func (s *Service) actionRegistering() actionResult {
s.scope.HetznerBareMetalHost.Spec.Status.HardwareDetails = &hardwareDetails
}

if s.scope.HetznerBareMetalHost.Spec.RootDeviceHints == nil ||
!s.scope.HetznerBareMetalHost.Spec.RootDeviceHints.IsValid() {
if s.scope.HetznerBareMetalHost.Spec.RootDeviceHints == nil {
conditions.MarkFalse(
s.scope.HetznerBareMetalHost,
infrav1.RootDeviceHintsValidatedCondition,
infrav1.ValidationFailedReason,
clusterv1.ConditionSeverityError,
"validation failed - check specified rootDeviceHints",
infrav1.ErrorMessageMissingRootDeviceHints,
)
return s.recordActionFailure(infrav1.RegistrationError, infrav1.ErrorMessageMissingRootDeviceHints)
}
errMsg := s.scope.HetznerBareMetalHost.Spec.RootDeviceHints.IsValidWithMessage()
if errMsg != "" {
conditions.MarkFalse(
s.scope.HetznerBareMetalHost,
infrav1.RootDeviceHintsValidatedCondition,
infrav1.ValidationFailedReason,
clusterv1.ConditionSeverityError,
errMsg,
)
return s.recordActionFailure(infrav1.RegistrationError, errMsg)
}

if err := validateRootDevices(s.scope.HetznerBareMetalHost.Spec.RootDeviceHints, s.scope.HetznerBareMetalHost.Spec.Status.HardwareDetails.Storage); err != nil {
conditions.MarkFalse(
Expand Down Expand Up @@ -992,6 +1002,8 @@ func (s *Service) actionImageInstalling() actionResult {
}
}

record.Event(s.scope.HetznerBareMetalHost, "InstallImagePreflightCheckSuccessful", "Rescue system reachable, disks look good.")

autoSetupInput, actionRes := s.createAutoSetupInput(sshClient)
if actionRes != nil {
return actionRes
Expand All @@ -1017,11 +1029,16 @@ func (s *Service) actionImageInstalling() actionResult {
}
}

record.Event(s.scope.HetznerBareMetalHost, "InstallingMachineImageStarted",
s.scope.HetznerBareMetalHost.Spec.Status.InstallImage.Image.String())

out = sshClient.UntarTGZ()
if out.Err != nil {
record.Warnf(s.scope.HetznerBareMetalHost, "UntarInstallimageTgzFailed", "err: %s, stderr: %s", out.Err.Error(), out.StdErr)
return actionError{err: fmt.Errorf("UntarInstallimageTgzFailed: %w", out.Err)}
}
record.Event(s.scope.HetznerBareMetalHost, "ExecuteInstallImageStarted",
s.scope.HetznerBareMetalHost.Spec.Status.InstallImage.Image.String())

// Execute install image
out = sshClient.ExecuteInstallImage(postInstallScript != "")
Expand Down
29 changes: 29 additions & 0 deletions vendor/github.com/stretchr/testify/require/doc.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions vendor/github.com/stretchr/testify/require/forward_requirements.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading