From 559065dfde9ff93b72be7e8390ce01f284c1b89a Mon Sep 17 00:00:00 2001 From: Amit Barve Date: Tue, 22 Oct 2024 06:49:05 -0700 Subject: [PATCH] Use Block CIM layers for container RootFS This commit adds the ability to parse block CIM layer mounts and to mount the merged block CIMs to be used as a rootfs for a container. Signed-off-by: Amit Barve --- internal/layers/helpers.go | 7 +- internal/layers/wcow_mount.go | 166 +++++++++++++++++++++++--------- internal/layers/wcow_parse.go | 91 ++++++++++++++++- internal/resources/resources.go | 24 +++++ internal/wclayer/cim/mount.go | 89 ++++++++++++++++- 5 files changed, 323 insertions(+), 54 deletions(-) diff --git a/internal/layers/helpers.go b/internal/layers/helpers.go index 2a67a7fb1b..2c9f66a876 100644 --- a/internal/layers/helpers.go +++ b/internal/layers/helpers.go @@ -75,8 +75,11 @@ const ( // parent layer CIMs parentLayerCimPathsFlag = "parentCimPaths=" - LegacyMountType string = "windows-layer" - CimFSMountType string = "CimFS" + LegacyMountType string = "windows-layer" + ForkedCIMMountType string = "CimFS" + BlockCIMMountType string = "BlockCIM" + BlockCIMTypeFlag string = "blockCIMType=" + mergedCIMPathFlag string = "mergedCIMPath=" ) // getOptionAsArray finds if there is an option which has the given prefix and if such an diff --git a/internal/layers/wcow_mount.go b/internal/layers/wcow_mount.go index fa8a5c0777..9df9f199eb 100644 --- a/internal/layers/wcow_mount.go +++ b/internal/layers/wcow_mount.go @@ -12,12 +12,14 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" + "go.opencensus.io/trace" "golang.org/x/sys/windows" "github.com/Microsoft/hcsshim/computestorage" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/hcserror" "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/resources" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/internal/uvm/scsi" @@ -37,6 +39,11 @@ func MountWCOWLayers(ctx context.Context, containerID string, vm *uvm.UtilityVM, return mountProcessIsolatedForkedCimLayers(ctx, containerID, l) } return nil, nil, fmt.Errorf("hyperv isolated containers aren't supported with forked cim layers") + case *wcowBlockCIMLayers: + if vm == nil { + return mountProcessIsolatedBlockCIMLayers(ctx, containerID, l) + } + return nil, nil, fmt.Errorf("hyperv isolated containers aren't supported with block cim layers") default: return nil, nil, fmt.Errorf("invalid layer type %T", wl) } @@ -171,53 +178,43 @@ func mountProcessIsolatedWCIFSLayers(ctx context.Context, l *wcowWCIFSLayers) (_ }, nil } -// wcowHostForkedCIMLayerCloser is used to cleanup forked CIM layers mounted on the host for process isolated -// containers -type wcowHostForkedCIMLayerCloser struct { - scratchLayerData - containerID string -} - -func (l *wcowHostForkedCIMLayerCloser) Release(ctx context.Context) error { - mountPath, err := wclayer.GetLayerMountPath(ctx, l.scratchLayerPath) - if err != nil { - return err - } - - if err = computestorage.DetachOverlayFilter(ctx, mountPath, hcsschema.UnionFS); err != nil { - return err - } - - if err = cimlayer.CleanupContainerMounts(l.containerID); err != nil { - return err - } - return wclayer.DeactivateLayer(ctx, l.scratchLayerPath) -} +// Handles the common processing for mounting all 3 types of cimfs layers. This involves +// mounting the scratch, attaching the filter and preparing the return values. +// `volume` is the path to the volume at which read only layer CIMs are mounted. +func mountProcessIsolatedCimLayersCommon(ctx context.Context, containerID string, volume string, s *scratchLayerData) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) { + ctx, span := oc.StartSpan(ctx, "mountProcessIsolatedCimLayersCommon") + defer func() { + oc.SetSpanStatus(span, err) + span.End() + }() + span.AddAttributes( + trace.StringAttribute("scratch path", s.scratchLayerPath), + trace.StringAttribute("mounted CIM volume", volume)) -func mountProcessIsolatedForkedCimLayers(ctx context.Context, containerID string, l *wcowForkedCIMLayers) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) { - if err = wclayer.ActivateLayer(ctx, l.scratchLayerPath); err != nil { - return nil, nil, err - } + rcl := &resources.ResourceCloserList{} defer func() { if err != nil { - _ = wclayer.DeactivateLayer(ctx, l.scratchLayerPath) + if rErr := rcl.Release(ctx); rErr != nil { + log.G(ctx).WithError(err).Warnf("mount process isolated cim layers common, undo failed with: %s", rErr) + } } }() - mountPath, err := wclayer.GetLayerMountPath(ctx, l.scratchLayerPath) - if err != nil { + if err = wclayer.ActivateLayer(ctx, s.scratchLayerPath); err != nil { return nil, nil, err } + rcl.AddFunc(func(uCtx context.Context) error { + return wclayer.DeactivateLayer(uCtx, s.scratchLayerPath) + }) - volume, err := cimlayer.MountForkedCimLayer(ctx, l.layers[0].cimPath, containerID) + mountPath, err := wclayer.GetLayerMountPath(ctx, s.scratchLayerPath) if err != nil { - return nil, nil, fmt.Errorf("mount layer cim: %w", err) + return nil, nil, err } - defer func() { - if err != nil { - _ = cimlayer.UnmountCimLayer(ctx, volume) - } - }() + log.G(ctx).WithFields(logrus.Fields{ + "scratch": s.scratchLayerPath, + "mounted path": mountPath, + }).Debug("scratch activated") layerID, err := cimlayer.LayerID(volume) if err != nil { @@ -239,22 +236,97 @@ func mountProcessIsolatedForkedCimLayers(ctx context.Context, containerID string if err = computestorage.AttachOverlayFilter(ctx, mountPath, layerData); err != nil { return nil, nil, err } + rcl.AddFunc(func(uCtx context.Context) error { + return computestorage.DetachOverlayFilter(uCtx, mountPath, hcsschema.UnionFS) + }) + + log.G(ctx).WithField("layer data", layerData).Debug("unionFS filter attached") + + return &MountedWCOWLayers{ + RootFS: mountPath, + MountedLayerPaths: []MountedWCOWLayer{{ + LayerID: layerID, + MountedPath: volume, + }}, + }, rcl, nil +} + +func mountProcessIsolatedForkedCimLayers(ctx context.Context, containerID string, l *wcowForkedCIMLayers) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) { + ctx, span := oc.StartSpan(ctx, "mountProcessIsolatedForkedCimLayers") + defer func() { + oc.SetSpanStatus(span, err) + span.End() + }() + + rcl := &resources.ResourceCloserList{} + defer func() { + if err != nil { + if rErr := rcl.Release(ctx); rErr != nil { + log.G(ctx).WithError(err).Warnf("mount process isolated forked CIM layers, undo failed with: %s", rErr) + } + } + }() + + volume, err := cimlayer.MountForkedCimLayer(ctx, l.layers[0].cimPath, containerID) + if err != nil { + return nil, nil, fmt.Errorf("mount forked layer cim: %w", err) + } + rcl.AddFunc(func(uCtx context.Context) error { + return cimlayer.UnmountCimLayer(uCtx, volume) + }) + + mountedLayers, closer, err := mountProcessIsolatedCimLayersCommon(ctx, containerID, volume, &l.scratchLayerData) + if err != nil { + return nil, nil, err + } + return mountedLayers, rcl.Add(closer), nil +} + +func mountProcessIsolatedBlockCIMLayers(ctx context.Context, containerID string, l *wcowBlockCIMLayers) (_ *MountedWCOWLayers, _ resources.ResourceCloser, err error) { + ctx, span := oc.StartSpan(ctx, "mountProcessIsolatedBlockCIMLayers") + defer func() { + oc.SetSpanStatus(span, err) + span.End() + }() + + var volume string + + rcl := &resources.ResourceCloserList{} defer func() { if err != nil { - _ = computestorage.DetachOverlayFilter(ctx, mountPath, hcsschema.UnionFS) + if rErr := rcl.Release(ctx); rErr != nil { + log.G(ctx).WithError(err).Warnf("mount process isolated forked CIM layers, undo failed with: %s", rErr) + } } }() - return &MountedWCOWLayers{ - RootFS: mountPath, - MountedLayerPaths: []MountedWCOWLayer{{ - LayerID: layerID, - MountedPath: volume, - }}, - }, &wcowHostForkedCIMLayerCloser{ - containerID: containerID, - scratchLayerData: l.scratchLayerData, - }, nil + log.G(ctx).WithFields(logrus.Fields{ + "scratch": l.scratchLayerPath, + "merged layer": l.mergedLayer, + "parent layers": l.parentLayers, + }).Debug("mounting process isolated block CIM layers") + + if len(l.parentLayers) > 1 { + volume, err = cimlayer.MergeMountBlockCIMLayer(ctx, l.mergedLayer, l.parentLayers, containerID) + } else { + volume, err = cimlayer.MountBlockCIMLayer(ctx, l.parentLayers[0], containerID) + } + if err != nil { + return nil, nil, fmt.Errorf("mount block CIM layers: %w", err) + } + rcl.AddFunc(func(uCtx context.Context) error { + return cimlayer.UnmountCimLayer(uCtx, volume) + }) + + log.G(ctx).WithField("volume", volume).Debug("mounted blockCIM layers for process isolated container") + + mountedLayers, layerCloser, err := mountProcessIsolatedCimLayersCommon(ctx, containerID, volume, &l.scratchLayerData) + if err != nil { + return nil, nil, fmt.Errorf("failed mount CIM layers common: %w", err) + } + rcl.Add(layerCloser) + + return mountedLayers, rcl, nil } type wcowIsolatedWCIFSLayerCloser struct { diff --git a/internal/layers/wcow_parse.go b/internal/layers/wcow_parse.go index 541766358c..18a18f93e1 100644 --- a/internal/layers/wcow_parse.go +++ b/internal/layers/wcow_parse.go @@ -5,15 +5,18 @@ package layers import ( "context" + "encoding/json" "fmt" "os" "path/filepath" + "strings" "github.com/containerd/containerd/api/types" "github.com/Microsoft/hcsshim/internal/copyfile" "github.com/Microsoft/hcsshim/internal/uvm" "github.com/Microsoft/hcsshim/internal/uvmfolder" + "github.com/Microsoft/hcsshim/pkg/cimfs" ) // WCOW image layers is a tagging interface that all WCOW layers MUST implement. This is @@ -67,6 +70,17 @@ type wcowForkedCIMLayers struct { layers []forkedCIMLayer } +// Represents CIM layers where each layer is stored in a block device or in a single file +// and multiple such layer CIMs are merged before mounting them. Currently can only be +// used for process isolated containers. +type wcowBlockCIMLayers struct { + scratchLayerData + // parent layers in order [layerN (top-most), layerN-1,..layer0 (base)] + parentLayers []*cimfs.BlockCIM + // a merged layer is prepared by combining all parent layers + mergedLayer *cimfs.BlockCIM +} + func parseForkedCimMount(m *types.Mount) (*wcowForkedCIMLayers, error) { parentLayerPaths, err := getOptionAsArray(m, parentLayerPathsFlag) if err != nil { @@ -94,8 +108,77 @@ func parseForkedCimMount(m *types.Mount) (*wcowForkedCIMLayers, error) { }, nil } -// ParseWCOWLayers parses the layers provided by containerd into the format understood by hcsshim and prepares -// them for mounting. +// TODO(ambarve): The code to parse a mount type should be in a separate package/module +// somewhere and then should be consumed by both hcsshim & containerd from there. +func parseBlockCIMMount(m *types.Mount) (*wcowBlockCIMLayers, error) { + var ( + parentPaths []string + layerType cimfs.BlockCIMType + mergedCIMPath string + ) + + for _, option := range m.Options { + if val, ok := strings.CutPrefix(option, parentLayerCimPathsFlag); ok { + err := json.Unmarshal([]byte(val), &parentPaths) + if err != nil { + return nil, err + } + } else if val, ok = strings.CutPrefix(option, BlockCIMTypeFlag); ok { + if val == "device" { + layerType = cimfs.BlockCIMTypeDevice + } else if val == "file" { + layerType = cimfs.BlockCIMTypeSingleFile + } else { + return nil, fmt.Errorf("invalid block CIM type `%s`", val) + } + } else if val, ok = strings.CutPrefix(option, mergedCIMPathFlag); ok { + mergedCIMPath = val + } + } + + if len(parentPaths) == 0 { + return nil, fmt.Errorf("need at least 1 parent layer") + } + if layerType == cimfs.BlockCIMTypeNone { + return nil, fmt.Errorf("BlockCIM type not provided") + } + if mergedCIMPath == "" && len(parentPaths) > 1 { + return nil, fmt.Errorf("merged CIM path not provided") + } + + var ( + parentLayers []*cimfs.BlockCIM + mergedLayer *cimfs.BlockCIM + ) + + if len(parentPaths) > 1 { + // for single parent layers merge won't be done + mergedLayer = &cimfs.BlockCIM{ + Type: layerType, + BlockPath: filepath.Dir(mergedCIMPath), + CimName: filepath.Base(mergedCIMPath), + } + } + + for _, p := range parentPaths { + parentLayers = append(parentLayers, &cimfs.BlockCIM{ + Type: layerType, + BlockPath: filepath.Dir(p), + CimName: filepath.Base(p), + }) + } + + return &wcowBlockCIMLayers{ + scratchLayerData: scratchLayerData{ + scratchLayerPath: m.Source, + }, + parentLayers: parentLayers, + mergedLayer: mergedLayer, + }, nil +} + +// ParseWCOWLayers parses the layers provided by containerd into the format understood by +// hcsshim and prepares them for mounting. func ParseWCOWLayers(rootfs []*types.Mount, layerFolders []string) (WCOWLayers, error) { if err := validateRootfsAndLayers(rootfs, layerFolders); err != nil { return nil, err @@ -123,8 +206,10 @@ func ParseWCOWLayers(rootfs []*types.Mount, layerFolders []string) (WCOWLayers, }, layerPaths: parentLayers, }, nil - case CimFSMountType: + case ForkedCIMMountType: return parseForkedCimMount(m) + case BlockCIMMountType: + return parseBlockCIMMount(m) default: return nil, fmt.Errorf("invalid windows mount type: '%s'", m.Type) } diff --git a/internal/resources/resources.go b/internal/resources/resources.go index cc08b3a566..a111506f62 100644 --- a/internal/resources/resources.go +++ b/internal/resources/resources.go @@ -168,3 +168,27 @@ func ReleaseResources(ctx context.Context, r *Resources, vm *uvm.UtilityVM, all } return nil } + +type ResourceCloserList struct { + closers []ResourceCloser +} + +func (l *ResourceCloserList) Add(rOp ResourceCloser) *ResourceCloserList { + l.closers = append(l.closers, rOp) + return l +} + +func (l *ResourceCloserList) AddFunc(rOp ResourceCloserFunc) *ResourceCloserList { + l.closers = append(l.closers, rOp) + return l +} + +func (l *ResourceCloserList) Release(ctx context.Context) error { + // MUST release in the reverse order + for i := len(l.closers) - 1; i >= 0; i-- { + if oErr := l.closers[i].Release(ctx); oErr != nil { + return oErr + } + } + return nil +} diff --git a/internal/wclayer/cim/mount.go b/internal/wclayer/cim/mount.go index f3ddc2260b..56d0d0ac7d 100644 --- a/internal/wclayer/cim/mount.go +++ b/internal/wclayer/cim/mount.go @@ -6,11 +6,15 @@ import ( "context" "fmt" "os" + "path/filepath" "strings" "github.com/Microsoft/go-winio/pkg/guid" - hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/oc" cimfs "github.com/Microsoft/hcsshim/pkg/cimfs" + "github.com/sirupsen/logrus" + "go.opencensus.io/trace" ) var cimMountNamespace guid.GUID = guid.GUID{Data1: 0x6827367b, Data2: 0xc388, Data3: 0x4e9b, Data4: [8]byte{0x96, 0x1c, 0x6d, 0x2c, 0x93, 0x6c}} @@ -25,13 +29,88 @@ func MountForkedCimLayer(ctx context.Context, cimPath, containerID string) (stri return "", fmt.Errorf("generated cim mount GUID: %w", err) } - vol, err := cimfs.Mount(cimPath, volumeGUID, hcsschema.CimMountFlagCacheFiles) + vol, err := cimfs.Mount(cimPath, volumeGUID, 0) if err != nil { return "", err } return vol, nil } +// MountBlockCIMLayer mounts the given block cim and returns the mount +// location of that cim. The containerID is used to generate the volumeID for the volume +// at which this CIM is mounted. containerID is used so that if the shim process crashes +// for any reason, the mounted cim can be correctly cleaned up during `shim delete` call. +func MountBlockCIMLayer(ctx context.Context, layer *cimfs.BlockCIM, containerID string) (_ string, err error) { + ctx, span := oc.StartSpan(ctx, "MountBlockCIMLayer") + defer func() { + oc.SetSpanStatus(span, err) + span.End() + }() + span.AddAttributes( + trace.StringAttribute("layer", layer.String())) + + var mountFlags uint32 + switch layer.Type { + case cimfs.BlockCIMTypeDevice: + mountFlags |= cimfs.CimMountBlockDeviceCim + case cimfs.BlockCIMTypeSingleFile: + mountFlags |= cimfs.CimMountSingleFileCim + default: + return "", fmt.Errorf("invalid BlockCIMType for merged layer: %w", os.ErrInvalid) + } + + volumeGUID, err := guid.NewV5(cimMountNamespace, []byte(containerID)) + if err != nil { + return "", fmt.Errorf("generated cim mount GUID: %w", err) + } + + cimPath := filepath.Join(layer.BlockPath, layer.CimName) + + log.G(ctx).WithFields(logrus.Fields{ + "flags": mountFlags, + "volume": volumeGUID.String(), + }).Debug("mounting block layer CIM") + + vol, err := cimfs.Mount(cimPath, volumeGUID, mountFlags) + if err != nil { + return "", err + } + return vol, nil +} + +// MergeMountBlockCIMLayer mounts the given merged block cim and returns the mount +// location of that cim. The containerID is used to generate the volumeID for the volume +// at which this CIM is mounted. containerID is used so that if the shim process crashes +// for any reason, the mounted cim can be correctly cleaned up during `shim delete` call. +// parentLayers MUST be in the base to topmost order. I.e base layer should be at index 0 +// and immediate parent MUST be at the last index. +func MergeMountBlockCIMLayer(ctx context.Context, mergedLayer *cimfs.BlockCIM, parentLayers []*cimfs.BlockCIM, containerID string) (_ string, err error) { + _, span := oc.StartSpan(ctx, "MergeMountBlockCIMLayer") + defer func() { + oc.SetSpanStatus(span, err) + span.End() + }() + span.AddAttributes( + trace.StringAttribute("merged layer", mergedLayer.String()), + trace.StringAttribute("parent layers", fmt.Sprintf("%v", parentLayers))) + + var mountFlags uint32 + switch mergedLayer.Type { + case cimfs.BlockCIMTypeDevice: + mountFlags |= cimfs.CimMountBlockDeviceCim + case cimfs.BlockCIMTypeSingleFile: + mountFlags |= cimfs.CimMountSingleFileCim + default: + return "", fmt.Errorf("invalid BlockCIMType for merged layer: %w", os.ErrInvalid) + } + + volumeGUID, err := guid.NewV5(cimMountNamespace, []byte(containerID)) + if err != nil { + return "", fmt.Errorf("generated cim mount GUID: %w", err) + } + return cimfs.MountMergedBlockCIMs(mergedLayer, parentLayers, mountFlags, volumeGUID) +} + // Unmounts the cim mounted at the given volume func UnmountCimLayer(ctx context.Context, volume string) error { return cimfs.Unmount(volume) @@ -44,6 +123,12 @@ func CleanupContainerMounts(containerID string) error { } volPath := fmt.Sprintf("\\\\?\\Volume{%s}\\", volumeGUID.String()) + + log.L.WithFields(logrus.Fields{ + "volume": volPath, + "containerID": containerID, + }).Debug("cleanup container CIM mounts") + if _, err := os.Stat(volPath); err == nil { err = cimfs.Unmount(volPath) if err != nil {