Skip to content

Commit

Permalink
fix: rework the 'metal-iso' config acquisition
Browse files Browse the repository at this point in the history
Fixes #9538

Re-do the implementation by using the volume management primitives, so
that we can avoid/skip old code. This should fix all issues related to
the partition/whole disk.

Fix issues in the volume management (exposed, as we haven't used it this
way before).

Build a test case in `talosctl cluster create` to inject machine config
via `metal-iso`.

Signed-off-by: Andrey Smirnov <[email protected]>
  • Loading branch information
smira committed Oct 24, 2024
1 parent 1993afc commit d393938
Show file tree
Hide file tree
Showing 18 changed files with 323 additions and 137 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-10-18T16:27:22Z by kres 34e72ac.
# Generated on 2024-10-23T17:20:56Z by kres 6d3cad4.

name: default
concurrency:
Expand Down Expand Up @@ -2283,6 +2283,13 @@ jobs:
WITH_NETWORK_CHAOS: "yes"
run: |
sudo -E make e2e-qemu
- name: e2e-metal-iso
env:
IMAGE_REGISTRY: registry.dev.siderolabs.io
SHORT_INTEGRATION_TEST: "yes"
WITH_CONFIG_INJECTION_METHOD: metal-iso
run: |
sudo -E make e2e-qemu
- name: save artifacts
if: always()
uses: actions/upload-artifact@v4
Expand Down
9 changes: 8 additions & 1 deletion .github/workflows/integration-misc-3-cron.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-09-09T13:58:35Z by kres 8be5fa7.
# Generated on 2024-10-23T17:20:56Z by kres 6d3cad4.

name: integration-misc-3-cron
concurrency:
Expand Down Expand Up @@ -85,6 +85,13 @@ jobs:
WITH_NETWORK_CHAOS: "yes"
run: |
sudo -E make e2e-qemu
- name: e2e-metal-iso
env:
IMAGE_REGISTRY: registry.dev.siderolabs.io
SHORT_INTEGRATION_TEST: "yes"
WITH_CONFIG_INJECTION_METHOD: metal-iso
run: |
sudo -E make e2e-qemu
- name: save artifacts
if: always()
uses: actions/upload-artifact@v4
Expand Down
7 changes: 7 additions & 0 deletions .kres.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,13 @@ spec:
SHORT_INTEGRATION_TEST: yes
WITH_NETWORK_CHAOS: yes
IMAGE_REGISTRY: registry.dev.siderolabs.io
- name: e2e-metal-iso
command: e2e-qemu
withSudo: true
environment:
SHORT_INTEGRATION_TEST: yes
WITH_CONFIG_INJECTION_METHOD: "metal-iso"
IMAGE_REGISTRY: registry.dev.siderolabs.io
- name: save-talos-logs
conditions:
- always
Expand Down
3 changes: 3 additions & 0 deletions api/resource/definitions/enums/enums.proto
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,9 @@ enum BlockEncryptionProviderType {
enum BlockFilesystemType {
FILESYSTEM_TYPE_NONE = 0;
FILESYSTEM_TYPE_XFS = 1;
FILESYSTEM_TYPE_VFAT = 2;
FILESYSTEM_TYPE_EXT4 = 3;
FILESYSTEM_TYPE_ISO9660 = 4;
}

// BlockVolumePhase describes volume phase.
Expand Down
58 changes: 37 additions & 21 deletions cmd/talosctl/cmd/mgmt/cluster/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ var (
withSiderolinkAgent agentFlag
withJSONLogs bool
debugShellEnabled bool
configInjectionMethodFlag string
)

// createCmd represents the cluster up command.
Expand Down Expand Up @@ -865,6 +866,17 @@ func create(ctx context.Context) error {
// Add talosconfig to provision options, so we'll have it to parse there
provisionOptions = append(provisionOptions, provision.WithTalosConfig(configBundle.TalosConfig()))

var configInjectionMethod provision.ConfigInjectionMethod

switch configInjectionMethodFlag {
case "", "default", "http":
configInjectionMethod = provision.ConfigInjectionMethodHTTP
case "metal-iso":
configInjectionMethod = provision.ConfigInjectionMethodMetalISO
default:
return fmt.Errorf("unknown config injection method %q", configInjectionMethod)
}

// Create the controlplane nodes.
for i := range controlplanes {
var cfg config.Provider
Expand All @@ -882,16 +894,17 @@ func create(ctx context.Context) error {
}

nodeReq := provision.NodeRequest{
Name: nodeName(clusterName, "controlplane", i+1, nodeUUID),
Type: machine.TypeControlPlane,
IPs: nodeIPs,
Memory: controlPlaneMemory,
NanoCPUs: controlPlaneNanoCPUs,
Disks: disks,
SkipInjectingConfig: skipInjectingConfig,
BadRTC: badRTC,
ExtraKernelArgs: extraKernelArgs,
UUID: pointer.To(nodeUUID),
Name: nodeName(clusterName, "controlplane", i+1, nodeUUID),
Type: machine.TypeControlPlane,
IPs: nodeIPs,
Memory: controlPlaneMemory,
NanoCPUs: controlPlaneNanoCPUs,
Disks: disks,
SkipInjectingConfig: skipInjectingConfig,
ConfigInjectionMethod: configInjectionMethod,
BadRTC: badRTC,
ExtraKernelArgs: extraKernelArgs,
UUID: pointer.To(nodeUUID),
}

if withInitNode && i == 0 {
Expand All @@ -909,6 +922,7 @@ func create(ctx context.Context) error {
}

nodeReq.Config = cfg

request.Nodes = append(request.Nodes, nodeReq)
}

Expand Down Expand Up @@ -956,17 +970,18 @@ func create(ctx context.Context) error {

request.Nodes = append(request.Nodes,
provision.NodeRequest{
Name: nodeName(clusterName, "worker", i, nodeUUID),
Type: machine.TypeWorker,
IPs: nodeIPs,
Memory: workerMemory,
NanoCPUs: workerNanoCPUs,
Disks: disks,
Config: cfg,
SkipInjectingConfig: skipInjectingConfig,
BadRTC: badRTC,
ExtraKernelArgs: extraKernelArgs,
UUID: pointer.To(nodeUUID),
Name: nodeName(clusterName, "worker", i, nodeUUID),
Type: machine.TypeWorker,
IPs: nodeIPs,
Memory: workerMemory,
NanoCPUs: workerNanoCPUs,
Disks: disks,
Config: cfg,
ConfigInjectionMethod: configInjectionMethod,
SkipInjectingConfig: skipInjectingConfig,
BadRTC: badRTC,
ExtraKernelArgs: extraKernelArgs,
UUID: pointer.To(nodeUUID),
})
}

Expand Down Expand Up @@ -1312,6 +1327,7 @@ func init() {
createCmd.Flags().BoolVar(&withUUIDHostnames, "with-uuid-hostnames", false, "use machine UUIDs as default hostnames (QEMU only)")
createCmd.Flags().Var(&withSiderolinkAgent, "with-siderolink", "enables the use of siderolink agent as configuration apply mechanism. `true` or `wireguard` enables the agent, `tunnel` enables the agent with grpc tunneling") //nolint:lll
createCmd.Flags().BoolVar(&withJSONLogs, "with-json-logs", false, "enable JSON logs receiver and configure Talos to send logs there")
createCmd.Flags().StringVar(&configInjectionMethodFlag, "config-injection-method", "", "a method to inject machine config: default is HTTP server, 'metal-iso' to mount an ISO (QEMU only)")

createCmd.MarkFlagsMutuallyExclusive(inputDirFlag, nodeInstallImageFlag)
createCmd.MarkFlagsMutuallyExclusive(inputDirFlag, configDebugFlag)
Expand Down
8 changes: 8 additions & 0 deletions hack/test/e2e-qemu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,14 @@ EOF
;;
esac

case "${WITH_CONFIG_INJECTION_METHOD:-default}" in
default)
;;
*)
QEMU_FLAGS+=("--config-injection-method=${WITH_CONFIG_INJECTION_METHOD}")
;;
esac

function create_cluster {
build_registry_mirrors

Expand Down
5 changes: 4 additions & 1 deletion internal/app/machined/pkg/controllers/block/discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,10 @@ func (ctrl *DiscoveryController) rescan(ctx context.Context, r controller.Runtim
dv.TypedSpec().Type = device.TypedSpec().Type
dv.TypedSpec().DevicePath = device.TypedSpec().DevicePath
dv.TypedSpec().Parent = device.TypedSpec().Parent
dv.TypedSpec().ParentDevPath = filepath.Join("/dev", device.TypedSpec().Parent)

if device.TypedSpec().Parent != "" {
dv.TypedSpec().ParentDevPath = filepath.Join("/dev", device.TypedSpec().Parent)
}

dv.TypedSpec().SetSize(info.Size)
dv.TypedSpec().SectorSize = info.SectorSize
Expand Down
75 changes: 53 additions & 22 deletions internal/app/machined/pkg/runtime/v1alpha1/platform/metal/metal.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,33 @@ package metal

import (
"context"
stderrors "errors"
"fmt"
"log"
"os"
"path/filepath"
"time"

"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/gen/channel"
"github.com/siderolabs/go-blockdevice/blockdevice/filesystem"
"github.com/siderolabs/go-blockdevice/blockdevice/probe"
"github.com/siderolabs/go-pointer"
"github.com/siderolabs/go-procfs/procfs"
"github.com/siderolabs/go-retry/retry"
"golang.org/x/sys/unix"
"gopkg.in/yaml.v3"

"github.com/siderolabs/talos/internal/app/machined/pkg/runtime"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/errors"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/internal/netutils"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/metal/oauth2"
"github.com/siderolabs/talos/internal/app/machined/pkg/runtime/v1alpha1/platform/metal/url"
"github.com/siderolabs/talos/internal/pkg/mount/v2"
"github.com/siderolabs/talos/pkg/download"
"github.com/siderolabs/talos/pkg/machinery/cel"
"github.com/siderolabs/talos/pkg/machinery/cel/celenv"
"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/meta"
"github.com/siderolabs/talos/pkg/machinery/resources/block"
"github.com/siderolabs/talos/pkg/machinery/resources/hardware"
runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime"
)
Expand Down Expand Up @@ -119,40 +121,69 @@ func (m *Metal) Mode() runtime.Mode {
return runtime.ModeMetal
}

func metalISOMatch() cel.Expression {
return cel.MustExpression(cel.ParseBooleanExpression(
fmt.Sprintf("volume.label == '%s' || volume.partition_label == '%s'", constants.MetalConfigISOLabel, constants.MetalConfigISOLabel),
celenv.VolumeLocator(),
))
}

func readConfigFromISO(ctx context.Context, r state.State) ([]byte, error) {
if err := netutils.WaitForDevicesReady(ctx, r); err != nil {
return nil, fmt.Errorf("failed to wait for devices: %w", err)
volumeID := "platform/metal/config"

// create a volume which matches the expected filesystem label
vc := block.NewVolumeConfig(block.NamespaceName, volumeID)
vc.Metadata().Labels().Set(block.PlatformLabel, "")
vc.TypedSpec().Type = block.VolumeTypePartition
vc.TypedSpec().Locator = block.LocatorSpec{
Match: metalISOMatch(),
}

dev, err := probe.GetDevWithFileSystemLabel(constants.MetalConfigISOLabel)
if err != nil {
return nil, fmt.Errorf("failed to find %s iso: %w", constants.MetalConfigISOLabel, err)
vc.TypedSpec().Mount = block.MountSpec{
TargetPath: mnt,
}

//nolint:errcheck
defer dev.Close()
if err := r.Create(ctx, vc); err != nil && !state.IsConflictError(err) {
return nil, fmt.Errorf("error creating user disk volume configuration: %w", err)
}

sb, err := filesystem.Probe(dev.Device().Name())
// wait for the volume to be either ready or missing (includes waiting for devices to be ready)
volumeStatus, err := safe.StateWatchFor[*block.VolumeStatus](ctx,
r,
block.NewVolumeStatus(vc.Metadata().Namespace(), vc.Metadata().ID()).Metadata(),
state.WithEventTypes(state.Created, state.Updated),
state.WithCondition(func(r resource.Resource) (bool, error) {
phase := r.(*block.VolumeStatus).TypedSpec().Phase

return phase == block.VolumePhaseReady || phase == block.VolumePhaseMissing, nil
}),
)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to watch for volume status: %w", err)
}

if sb == nil {
return nil, stderrors.New("error while substituting filesystem type")
if volumeStatus.TypedSpec().Phase == block.VolumePhaseMissing {
return nil, fmt.Errorf("failed to find volume with machine configuration %s", vc.TypedSpec().Locator.Match)
}

if err = unix.Mount(dev.Device().Name(), mnt, sb.Type(), unix.MS_RDONLY, ""); err != nil {
return nil, fmt.Errorf("failed to mount iso: %w", err)
// mount the volume, unmount when done
unmounter, err := mount.NewPoint(volumeStatus.TypedSpec().MountLocation, vc.TypedSpec().Mount.TargetPath, volumeStatus.TypedSpec().Filesystem.String(), mount.WithReadonly()).Mount()
if err != nil {
return nil, fmt.Errorf("failed to mount volume: %w", err)
}

defer unmounter() //nolint:errcheck

b, err := os.ReadFile(filepath.Join(mnt, filepath.Base(constants.ConfigPath)))
if err != nil {
return nil, fmt.Errorf("read config: %s", err.Error())
return nil, fmt.Errorf("read config: %w", err)
}

if err = unix.Unmount(mnt, 0); err != nil {
return nil, fmt.Errorf("failed to unmount: %w", err)
}
log.Printf("read machine config from volume: %s (filesystem %q, UUID %q, size %s)",
volumeStatus.TypedSpec().Location,
volumeStatus.TypedSpec().Filesystem,
volumeStatus.TypedSpec().UUID,
volumeStatus.TypedSpec().PrettySize,
)

return b, nil
}
Expand Down
Loading

0 comments on commit d393938

Please sign in to comment.