From 57337afec0ce77eb7bf0185c33303aa62aa01a75 Mon Sep 17 00:00:00 2001 From: Ivan Shvedunov Date: Thu, 27 Sep 2018 21:41:23 +0300 Subject: [PATCH] Add devicemapper GC for persistent rootfs Upon GC at the Virtlet startup, the virtual block devices that are created by Virtlet are recognized by name and the sector 0 of underlying block device (Virtlet magic header). If they're not related to any active domain, they're removed (note that this doesn't mean the data on the device is affected in any way). --- pkg/blockdev/blockdev.go | 245 ++++++++++++++++++ pkg/blockdev/blockdev_test.go | 186 +++++++++++++ pkg/blockdev/fake/blockdev.go | 121 +++++++++ ...ainDefinitions__persistent_rootfs.out.yaml | 8 +- ...estPersistentRootVolume__symlinks.out.yaml | 4 +- pkg/libvirttools/gc.go | 33 +++ pkg/libvirttools/gc_test.go | 85 ++++-- .../persistentroot_volumesource.go | 132 +--------- .../persistentroot_volumesource_test.go | 116 ++++----- pkg/utils/fake/command.go | 16 +- 10 files changed, 724 insertions(+), 222 deletions(-) create mode 100644 pkg/blockdev/blockdev.go create mode 100644 pkg/blockdev/blockdev_test.go create mode 100644 pkg/blockdev/fake/blockdev.go diff --git a/pkg/blockdev/blockdev.go b/pkg/blockdev/blockdev.go new file mode 100644 index 000000000..732905c4f --- /dev/null +++ b/pkg/blockdev/blockdev.go @@ -0,0 +1,245 @@ +/* +Copyright 2018 Mirantis + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package blockdev + +import ( + "crypto/sha256" + "encoding/binary" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/Mirantis/virtlet/pkg/utils" + "github.com/golang/glog" +) + +const ( + // VirtletLogicalDevicePrefix denotes the required prefix for + // the virtual block devices created by Virtlet. + VirtletLogicalDevicePrefix = "virtlet-dm-" + virtletRootfsMagic = 0x263dbe52ba576702 + virtletRootfsMetadataVersion = 1 + sectorSize = 512 + devnameUeventVar = "DEVNAME=" +) + +type virtletRootfsHeader struct { + Magic uint64 + MetadataVersion uint16 + ImageHash [sha256.Size]byte +} + +// LogicalDeviceHandler makes it possible to store metadata in the +// first sector of a block device, making the rest of the device +// available as another logical device managed by the device mapper. +type LogicalDeviceHandler struct { + commander utils.Commander + devPath string + sysfsPath string +} + +// NewLogicalDeviceHandler creates a new LogicalDeviceHandler using +// the specified commander and paths that should be used in place of +// /dev and /sys directories (empty string to use /dev and /sys, +// respectively) +func NewLogicalDeviceHandler(commander utils.Commander, devPath, sysfsPath string) *LogicalDeviceHandler { + if devPath == "" { + devPath = "/dev" + } + if sysfsPath == "" { + sysfsPath = "/sys" + } + return &LogicalDeviceHandler{commander, devPath, sysfsPath} +} + +// EnsureDevHeaderMatches returns true if the specified block device +// has proper Virtlet header that matches the specified image hash +func (ldh *LogicalDeviceHandler) EnsureDevHeaderMatches(devPath string, imageHash [sha256.Size]byte) (bool, error) { + f, err := os.OpenFile(devPath, os.O_RDWR|os.O_SYNC, 0) + if err != nil { + return false, fmt.Errorf("open %q: %v", devPath, err) + } + defer func() { + if f != nil { + f.Close() + } + }() + + var hdr virtletRootfsHeader + if err := binary.Read(f, binary.BigEndian, &hdr); err != nil { + return false, fmt.Errorf("reading rootfs header: %v", err) + } + + headerMatch := true + switch { + case hdr.Magic != virtletRootfsMagic || hdr.ImageHash != imageHash: + headerMatch = false + if _, err := f.Seek(0, os.SEEK_SET); err != nil { + return false, fmt.Errorf("seek: %v", err) + } + if err := binary.Write(f, binary.BigEndian, virtletRootfsHeader{ + Magic: virtletRootfsMagic, + MetadataVersion: virtletRootfsMetadataVersion, + ImageHash: imageHash, + }); err != nil { + return false, fmt.Errorf("writing rootfs header: %v", err) + } + case hdr.MetadataVersion != virtletRootfsMetadataVersion: + // NOTE: we should handle earlier metadata versions + // after we introduce new ones. But we can't handle + // future metadata versions and any non-matching + // metadata versions are future ones currently, so we + // don't want to lose any data here. + return false, fmt.Errorf("unsupported virtlet root device metadata version %v", hdr.MetadataVersion) + } + + if err := f.Close(); err != nil { + return false, fmt.Errorf("error closing rootfs device: %v", err) + } + f = nil + return headerMatch, nil +} + +// blockDevSizeInSectors returns the size of the block device in sectors +func (ldh *LogicalDeviceHandler) blockDevSizeInSectors(devPath string) (uint64, error) { + // NOTE: this is also doable via ioctl but this way it's + // shorter (no need for fake non-linux impl, extra interface, + // extra fake impl for it). Some links that may help if we + // decide to use the ioctl later on: + // https://github.com/karelzak/util-linux/blob/master/disk-utils/blockdev.c + // https://github.com/aicodix/smr/blob/24aa589f378827a69a07d220f114c169693dacec/smr.go#L29 + out, err := ldh.commander.Command("blockdev", "--getsz", devPath).Run(nil) + if err != nil { + return 0, err + } + nSectors, err := strconv.ParseUint(strings.TrimSpace(string(out)), 10, 64) + if err != nil { + return 0, fmt.Errorf("bad size value returned by blockdev: %q: %v", out, err) + } + return nSectors, nil +} + +// Map maps the device sectors starting from 1 to a new virtual block +// device. dmName specifies the name of the new device. +func (ldh *LogicalDeviceHandler) Map(devPath, dmName string, imageSize uint64) error { + if !strings.HasPrefix(dmName, VirtletLogicalDevicePrefix) { + return fmt.Errorf("bad logical device name %q: must have prefix %q", dmName, VirtletLogicalDevicePrefix) + } + + nSectors, err := ldh.blockDevSizeInSectors(devPath) + if err != nil { + return err + } + + // sector 0 is reserved for the Virtlet metadata + minSectors := (imageSize+sectorSize-1)/sectorSize + 1 + if nSectors < minSectors { + return fmt.Errorf("block device too small for the image: need at least %d bytes (%d sectors) but got %d bytes (%d sectors)", + minSectors*sectorSize, + minSectors, + nSectors*sectorSize, + nSectors) + } + + hostPath, err := filepath.EvalSymlinks(devPath) + if err != nil { + return err + } + + dmTable := fmt.Sprintf("0 %d linear %s 1\n", nSectors-1, hostPath) + _, err = ldh.commander.Command("dmsetup", "create", dmName).Run([]byte(dmTable)) + return err +} + +// Unmap unmaps the virtual block device +func (ldh *LogicalDeviceHandler) Unmap(dmName string) error { + _, err := ldh.commander.Command("dmsetup", "remove", dmName).Run(nil) + return err +} + +// ListVirtletLogicalDevices returns a list of logical devices managed +// by Virtlet +func (ldh *LogicalDeviceHandler) ListVirtletLogicalDevices() ([]string, error) { + table, err := ldh.commander.Command("dmsetup", "table").Run(nil) + if err != nil { + return nil, fmt.Errorf("dmsetup table: %v", err) + } + var r []string + for _, l := range strings.Split(string(table), "\n") { + if l == "" { + continue + } + fields := strings.Fields(l) + if len(fields) != 6 || fields[3] != "linear" { + continue + } + virtDevName := fields[0] + if strings.HasSuffix(virtDevName, ":") { + virtDevName = virtDevName[:len(virtDevName)-1] + } + + devID := fields[4] + ueventFile := filepath.Join(ldh.sysfsPath, "dev/block", devID, "uevent") + ueventContent, err := ioutil.ReadFile(ueventFile) + if err != nil { + glog.Warningf("error reading %q: %v", ueventFile, err) + continue + } + devName := "" + for _, ul := range strings.Split(string(ueventContent), "\n") { + ul = strings.TrimSpace(ul) + if strings.HasPrefix(ul, devnameUeventVar) { + devName = ul[len(devnameUeventVar):] + break + } + } + if devName == "" { + glog.Warningf("bad uevent file %q: no DEVNAME", ueventFile) + continue + } + + isVbd, err := ldh.deviceHasVirtletHeader(devName) + if err != nil { + glog.Warningf("checking device file %q: %v", devName, err) + continue + } + + if isVbd { + r = append(r, virtDevName) + } + } + + return r, nil +} + +func (ldh *LogicalDeviceHandler) deviceHasVirtletHeader(devName string) (bool, error) { + f, err := os.Open(filepath.Join(ldh.devPath, devName)) + if err != nil { + return false, err + } + defer f.Close() + + var hdr virtletRootfsHeader + if err := binary.Read(f, binary.BigEndian, &hdr); err != nil { + return false, err + } + + return hdr.Magic == virtletRootfsMagic, nil +} diff --git a/pkg/blockdev/blockdev_test.go b/pkg/blockdev/blockdev_test.go new file mode 100644 index 000000000..379c40f9f --- /dev/null +++ b/pkg/blockdev/blockdev_test.go @@ -0,0 +1,186 @@ +/* +Copyright 2018 Mirantis + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package blockdev + +import ( + "crypto/sha256" + "os" + "path/filepath" + "reflect" + "testing" + + fake "github.com/Mirantis/virtlet/pkg/blockdev/fake" + "github.com/Mirantis/virtlet/pkg/utils" + fakeutils "github.com/Mirantis/virtlet/pkg/utils/fake" + testutils "github.com/Mirantis/virtlet/pkg/utils/testing" +) + +func TestDevHeader(t *testing.T) { + for _, tc := range []struct { + name string + content []string + dmPath string + fileSize uint64 + imageWrittenAgain bool + errors [2]string + }{ + { + name: "image unchanged", + content: []string{"image1", "image1"}, + fileSize: 8704, // just added a sector + }, + { + name: "image change", + content: []string{"image1", "image2"}, + fileSize: 16384, + imageWrittenAgain: true, + }, + { + name: "first image too big", + content: []string{"image1"}, + fileSize: 4096, + errors: [2]string{ + "too small", + "", + }, + }, + { + name: "second image too big", + content: []string{"image1", "image2"}, + fileSize: 8704, + errors: [2]string{ + "", + "too small", + }, + }, + } { + t.Run(tc.name, func(t *testing.T) { + fake.WithFakeRootDev(t, tc.fileSize, func(devPath, devDir string) { + for n, content := range tc.content { + if content == "" { + continue + } + cmd := fakeutils.NewCommander(nil, nil) + + ldh := NewLogicalDeviceHandler(cmd, "", "") + headerExpectedToMatch := n > 0 && tc.content[n-1] == content + imageHash := sha256.Sum256([]byte(content)) + headerMatches, err := ldh.EnsureDevHeaderMatches(devPath, imageHash) + if err != nil { + t.Fatalf("EnsureDevHeaderMatches: %v", err) + } + + switch { + case headerMatches == headerExpectedToMatch: + // ok + case headerMatches: + t.Errorf("[%d] the header is expected to match but didn't", n) + case !headerMatches: + t.Errorf("[%d] the header is not expected to match but did", n) + } + } + }) + }) + } +} + +func TestCreateRemoveVirtualBlockDevice(t *testing.T) { + fake.WithFakeRootDev(t, 0, func(devPath, devDir string) { + rec := testutils.NewToplevelRecorder() + cmd := fakeutils.NewCommander(rec, []fakeutils.CmdSpec{ + { + Match: "blockdev --getsz", + Stdout: "4", + }, + { + Match: "dmsetup create", + }, + { + Match: "dmsetup remove", + }, + }) + cmd.ReplaceTempPath("__dev__", "/dev") + symlinkPath := filepath.Join(devDir, "rootdevlink") + if err := os.Symlink(devPath, symlinkPath); err != nil { + t.Fatalf("Symlink(): %v", err) + } + + ldh := NewLogicalDeviceHandler(cmd, "", "") + if err := ldh.Map(symlinkPath, "virtlet-dm-foobar", 1024); err != nil { + t.Fatalf("Map(): %v", err) + } + if err := ldh.Unmap("virtlet-dm-foobar"); err != nil { + t.Fatalf("Unmap(): %v", err) + } + + expectedRecs := []*testutils.Record{ + { + Name: "CMD", + Value: map[string]string{ + "cmd": "blockdev --getsz /dev/rootdevlink", + "stdout": "4", + }, + }, + { + Name: "CMD", + Value: map[string]string{ + "cmd": "dmsetup create virtlet-dm-foobar", + "stdin": "0 3 linear /dev/rootdev 1\n", + }, + }, + { + Name: "CMD", + Value: map[string]string{ + "cmd": "dmsetup remove virtlet-dm-foobar", + }, + }, + } + if !reflect.DeepEqual(expectedRecs, rec.Content()) { + t.Errorf("bad commands recorded:\n%s\ninstead of\n%s", utils.ToJSON(rec.Content()), utils.ToJSON(expectedRecs)) + } + }) +} + +func TestIsVirtletBlockDevice(t *testing.T) { + fake.WithFakeRootDevsAndSysfs(t, func(devPaths []string, table, devDir, sysfsDir string) { + cmd := fakeutils.NewCommander(nil, []fakeutils.CmdSpec{ + { + Match: "^dmsetup table$", + Stdout: table, + }, + }) + ldh := NewLogicalDeviceHandler(cmd, devDir, sysfsDir) + for _, devPath := range devPaths { + if _, err := ldh.EnsureDevHeaderMatches(devPath, sha256.Sum256([]byte("foobar"))); err != nil { + t.Fatalf("EnsureDevHeaderMatches(): %v", err) + } + } + + devs, err := ldh.ListVirtletLogicalDevices() + if err != nil { + t.Fatalf("ListVirtletLogicalDevices(): %v", err) + } + + expectedDevs := []string{ + "virtlet-dm-5edfe2ad-9852-439b-bbfb-3fe8b7c72906", + "virtlet-dm-9a322047-1f0d-4395-8e43-6e1b310ce6f3", + } + if !reflect.DeepEqual(devs, expectedDevs) { + t.Errorf("bad Virtlet block device list: %s instead of %s", utils.ToJSONUnindented(devs), utils.ToJSONUnindented(expectedDevs)) + } + }) +} diff --git a/pkg/blockdev/fake/blockdev.go b/pkg/blockdev/fake/blockdev.go new file mode 100644 index 000000000..f13f5d37b --- /dev/null +++ b/pkg/blockdev/fake/blockdev.go @@ -0,0 +1,121 @@ +/* +Copyright 2018 Mirantis + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package fake + +import ( + "io/ioutil" + "os" + "path/filepath" + "testing" +) + +const dmsetupTable = `vg1-home_rimage_1: 0 209715200 linear 8:1 10240 +vg1-home_rimage_0: 0 209715200 linear 8:17 29304832 +vg1-home: 0 209715200 raid raid1 3 0 region_size 1024 2 253:1 253:2 253:3 253:4 +virtlet-dm-5edfe2ad-9852-439b-bbfb-3fe8b7c72906: 0 8191999 linear 252:0 1 +virtlet-dm-92ed2bdc-4b47-43ce-b0ba-ff1c06a2652d: 0 8191999 linear 252:1 1 +virtlet-dm-9a322047-1f0d-4395-8e43-6e1b310ce6f3: 0 8191999 linear 252:2 1 +vg1-home_rmeta_1: 0 8192 linear 8:1 2048 +vg1-home_rmeta_0: 0 8192 linear 8:17 29296640 +vg1-swap: 0 29294592 linear 8:1 908404736 +vg1-root: 0 29294592 linear 8:17 2048 +vg1-var: 0 1397358592 striped 2 128 8:1 209725440 8:17 239020032 +` + +var ueventFiles = map[string]string{ + "252:0": `MAJOR=252 +MINOR=0 +DEVNAME=rootdev +DEVTYPE=disk +`, + "252:1": `MAJOR=252 +MINOR=0 +DEVNAME=nonrootdev +DEVTYPE=disk +`, + "252:2": `MAJOR=252 +MINOR=0 +DEVNAME=rootdev1 +DEVTYPE=disk +`, + "8:1": `MAJOR=8 +MINOR=1 +DEVNAME=swapdev +DEVTYPE=disk +`, +} + +// WithFakeRootDevs calls the specified function passing it the paths to +// the fake block devices and their containing directory. +func WithFakeRootDevs(t *testing.T, size uint64, names []string, toCall func(devPaths []string, devDir string)) { + tmpDir, err := ioutil.TempDir("", "fake-blockdev") + if err != nil { + t.Fatalf("TempDir(): %v", err) + } + defer os.RemoveAll(tmpDir) + fakeDevDir := filepath.Join(tmpDir, "__dev__") + if err := os.Mkdir(fakeDevDir, 0777); err != nil { + t.Fatalf("Mkdir(): %v", err) + } + + var devPaths []string + for _, name := range names { + devPath := filepath.Join(fakeDevDir, name) + if err := ioutil.WriteFile(devPath, make([]byte, size), 0666); err != nil { + t.Fatalf("WriteFile(): %v", err) + } + devPaths = append(devPaths, devPath) + + } + toCall(devPaths, fakeDevDir) +} + +// WithFakeRootDev calls the specified function passing it the path to +// the fake block device and its containing directory. +func WithFakeRootDev(t *testing.T, size uint64, toCall func(devPath, devDir string)) { + WithFakeRootDevs(t, size, []string{"rootdev"}, func(devPaths []string, devDir string) { + toCall(devPaths[0], devDir) + }) +} + +// WithFakeRootDevsAndSysfs calls the specified function passing it +// the paths to the fake block devices and their containing directory, +// as well as the path to fake sysfs containing uevent entries for the +// fake devices. +func WithFakeRootDevsAndSysfs(t *testing.T, toCall func(devPaths []string, table, devDir, sysfsDir string)) { + WithFakeRootDevs(t, 2048, []string{"rootdev", "rootdev1"}, func(devPaths []string, devDir string) { + // write dummy headerless file as swapdev and nonrootdev + for _, devName := range []string{"swapdev", "nonrootdev"} { + if err := ioutil.WriteFile(filepath.Join(devDir, devName), make([]byte, 1024), 0666); err != nil { + t.Fatalf("WriteFile(): %v", err) + } + } + + sysfsDir := filepath.Join(devDir, "sys") + for id, content := range ueventFiles { + devInfoDir := filepath.Join(sysfsDir, "dev/block", id) + if err := os.MkdirAll(devInfoDir, 0777); err != nil { + t.Fatalf("MkdirAll(): %v", err) + } + if err := ioutil.WriteFile(filepath.Join(devInfoDir, "uevent"), []byte(content), 0666); err != nil { + t.Fatalf("WriteFile(): %v", err) + } + } + + toCall(devPaths, dmsetupTable, devDir, sysfsDir) + }) +} diff --git a/pkg/libvirttools/TestDomainDefinitions__persistent_rootfs.out.yaml b/pkg/libvirttools/TestDomainDefinitions__persistent_rootfs.out.yaml index d16bfc007..d4c9d1bc9 100755 --- a/pkg/libvirttools/TestDomainDefinitions__persistent_rootfs.out.yaml +++ b/pkg/libvirttools/TestDomainDefinitions__persistent_rootfs.out.yaml @@ -6,12 +6,12 @@ stdout: "1000" - name: CMD value: - cmd: dmsetup create virtlet-dm-69eec606-0493-5825-73a4-c5e0c0236155 + cmd: dmsetup create virtlet-dm-231700d5-c9a6-5a49-738d-99a954c51550 stdin: | 0 999 linear /fakedev/69eec606-0493-5825-73a4-c5e0c0236155/volumeDevices/kubernetes.io~local-volume/root 1 - name: CMD value: - cmd: qemu-img convert -O raw /fake/volume/path /dev/mapper/virtlet-dm-69eec606-0493-5825-73a4-c5e0c0236155 + cmd: qemu-img convert -O raw /fake/volume/path /dev/mapper/virtlet-dm-231700d5-c9a6-5a49-738d-99a954c51550 - name: 'domain conn: DefineDomain' value: |- @@ -38,7 +38,7 @@ /vmwrapper - +
@@ -82,4 +82,4 @@ - name: 'domain conn: virtlet-231700d5-c9a6-container1: Undefine' - name: CMD value: - cmd: dmsetup remove virtlet-dm-69eec606-0493-5825-73a4-c5e0c0236155 + cmd: dmsetup remove virtlet-dm-231700d5-c9a6-5a49-738d-99a954c51550 diff --git a/pkg/libvirttools/TestPersistentRootVolume__symlinks.out.yaml b/pkg/libvirttools/TestPersistentRootVolume__symlinks.out.yaml index 054a43f45..7b70d3c90 100755 --- a/pkg/libvirttools/TestPersistentRootVolume__symlinks.out.yaml +++ b/pkg/libvirttools/TestPersistentRootVolume__symlinks.out.yaml @@ -3,7 +3,7 @@ value: persistent/image1 - name: CMD value: - cmd: blockdev --getsz /dev/rootdev + cmd: blockdev --getsz /dev/rootdevlink stdout: "17" - name: CMD value: @@ -29,7 +29,7 @@ value: persistent/image1 - name: CMD value: - cmd: blockdev --getsz /dev/rootdev + cmd: blockdev --getsz /dev/rootdevlink stdout: "17" - name: CMD value: diff --git a/pkg/libvirttools/gc.go b/pkg/libvirttools/gc.go index df5d36a24..30f4fbfbe 100644 --- a/pkg/libvirttools/gc.go +++ b/pkg/libvirttools/gc.go @@ -21,6 +21,8 @@ import ( "os" "path/filepath" "strings" + + "github.com/Mirantis/virtlet/pkg/blockdev" ) const ( @@ -43,6 +45,7 @@ func (v *VirtualizationTool) GarbageCollect() (allErrors []error) { allErrors = append(allErrors, v.removeOrphanRootVolumes(ids)...) allErrors = append(allErrors, v.removeOrphanQcow2Volumes(ids)...) allErrors = append(allErrors, v.removeOrphanConfigImages(ids, configIsoDir)...) + allErrors = append(allErrors, v.removeOrphanVirtualBlockDevices(ids, "", "")...) return } @@ -263,3 +266,33 @@ func (v *VirtualizationTool) removeOrphanConfigImages(ids []string, directory st return allErrors } + +func (v *VirtualizationTool) removeOrphanVirtualBlockDevices(ids []string, devPath, sysfsPath string) []error { + idsInUse := make(map[string]bool) + for _, id := range ids { + idsInUse[id] = true + } + ldh := blockdev.NewLogicalDeviceHandler(v.Commander(), devPath, sysfsPath) + dmNames, err := ldh.ListVirtletLogicalDevices() + if err != nil { + return []error{err} + } + + var allErrors []error + for _, dmName := range dmNames { + if !strings.HasPrefix(dmName, blockdev.VirtletLogicalDevicePrefix) { + panic("bad dmname " + dmName) + } + id := dmName[len(blockdev.VirtletLogicalDevicePrefix):] + if idsInUse[id] { + continue + } + if err := ldh.Unmap(dmName); err != nil { + allErrors = append( + allErrors, + fmt.Errorf("error unmapping %q: %v", dmName, err)) + } + } + + return allErrors +} diff --git a/pkg/libvirttools/gc_test.go b/pkg/libvirttools/gc_test.go index d2a93fc84..ce96b9a79 100644 --- a/pkg/libvirttools/gc_test.go +++ b/pkg/libvirttools/gc_test.go @@ -17,6 +17,7 @@ limitations under the License. package libvirttools import ( + "crypto/sha256" "io/ioutil" "os" "path/filepath" @@ -24,12 +25,15 @@ import ( libvirtxml "github.com/libvirt/libvirt-go-xml" + blockdev "github.com/Mirantis/virtlet/pkg/blockdev" + fakeblockdev "github.com/Mirantis/virtlet/pkg/blockdev/fake" + fakeutils "github.com/Mirantis/virtlet/pkg/utils/fake" testutils "github.com/Mirantis/virtlet/pkg/utils/testing" "github.com/Mirantis/virtlet/tests/gm" ) var ( - randomUUIDs = [...]string{ + testUUIDs = [...]string{ "5edfe2ad-9852-439b-bbfb-3fe8b7c72906", "8a6163c3-e4ee-488f-836a-d2abe92d0744", "13f51f8d-0f4e-4538-9db0-413380ff9c84", @@ -40,12 +44,12 @@ func TestDomainCleanup(t *testing.T) { ct := newContainerTester(t, testutils.NewToplevelRecorder(), nil) defer ct.teardown() - for _, uuid := range randomUUIDs { + for _, uuid := range testUUIDs { if _, err := ct.domainConn.DefineDomain(&libvirtxml.Domain{ Name: "virtlet-" + uuid[:13] + "-container1", UUID: uuid, }); err != nil { - t.Fatalf("Cannot define new fake domain: %v", err) + t.Fatalf("Cannot define the fake domain: %v", err) } } if _, err := ct.domainConn.DefineDomain(&libvirtxml.Domain{ @@ -60,9 +64,8 @@ func TestDomainCleanup(t *testing.T) { } // this should remove all domains (including other than virlet defined) - // with an exception of the last listed in randomUUIDs slice - errors := ct.virtTool.removeOrphanDomains(randomUUIDs[2:]) - if errors != nil { + // with an exception of the last listed in testUUIDs slice + if errors := ct.virtTool.removeOrphanDomains(testUUIDs[2:]); len(errors) != 0 { t.Errorf("removeOrphanDomains returned errors: %v", errors) } @@ -82,7 +85,7 @@ func TestRootVolumesCleanup(t *testing.T) { t.Fatalf("StoragePool(): %v", err) } - for _, uuid := range randomUUIDs { + for _, uuid := range testUUIDs { if _, err := pool.CreateStorageVol(&libvirtxml.StorageVolume{ Name: "root for " + uuid, Target: &libvirtxml.StorageVolumeTarget{Path: "/some/path/virtlet_root_" + uuid}, @@ -102,9 +105,8 @@ func TestRootVolumesCleanup(t *testing.T) { } // this should remove only root volumes corresponding to the two first - // elements of randomUUIDs slice, keeping others - errors := ct.virtTool.removeOrphanRootVolumes(randomUUIDs[2:]) - if errors != nil { + // elements of testUUIDs slice, keeping others + if errors := ct.virtTool.removeOrphanRootVolumes(testUUIDs[2:]); len(errors) != 0 { t.Errorf("removeOrphanRootVolumes returned errors: %v", errors) } @@ -124,7 +126,7 @@ func TestQcow2VolumesCleanup(t *testing.T) { t.Fatalf("StoragePool(): %v", err) } - for _, uuid := range randomUUIDs { + for _, uuid := range testUUIDs { if _, err := pool.CreateStorageVol(&libvirtxml.StorageVolume{ Name: "qcow flexvolume for " + uuid, Target: &libvirtxml.StorageVolumeTarget{Path: "/some/path/virtlet-" + uuid}, @@ -144,9 +146,8 @@ func TestQcow2VolumesCleanup(t *testing.T) { } // this should remove only ephemeral qcow2 volumes corresponding to - // the two first elements of randomUUIDs slice, keeping others - errors := ct.virtTool.removeOrphanQcow2Volumes(randomUUIDs[2:]) - if errors != nil { + // the two first elements of testUUIDs slice, keeping others + if errors := ct.virtTool.removeOrphanQcow2Volumes(testUUIDs[2:]); len(errors) != 0 { t.Errorf("removeOrphanRootVolumes returned errors: %v", errors) } @@ -167,7 +168,7 @@ func TestConfigISOsCleanup(t *testing.T) { } defer os.RemoveAll(directory) - for _, uuid := range randomUUIDs { + for _, uuid := range testUUIDs { fname := filepath.Join(directory, "config-"+uuid+".iso") if file, err := os.Create(fname); err != nil { t.Fatalf("Cannot create fake iso with name %q: %v", fname, err) @@ -191,9 +192,8 @@ func TestConfigISOsCleanup(t *testing.T) { } // this should remove only config iso file corresponding to the first - // element of randomUUIDs slice, keeping other files - errors := ct.virtTool.removeOrphanConfigImages(randomUUIDs[1:], directory) - if errors != nil { + // element of testUUIDs slice, keeping other files + if errors := ct.virtTool.removeOrphanConfigImages(testUUIDs[1:], directory); len(errors) != 0 { t.Errorf("removeOrphanConfigImages returned errors: %v", errors) } @@ -207,7 +207,7 @@ func TestConfigISOsCleanup(t *testing.T) { t.Fatalf("Expected removeOrphanConfigImages to remove single file, but it removed %d files", len(diff)) } - expectedPath := filepath.Join(directory, "config-"+randomUUIDs[0]+".iso") + expectedPath := filepath.Join(directory, "config-"+testUUIDs[0]+".iso") if diff[0] != expectedPath { t.Fatalf("Expected removeOrphanConfigImages to remove only %q file, but it also removed: %q", expectedPath, diff[0]) } @@ -215,6 +215,53 @@ func TestConfigISOsCleanup(t *testing.T) { // no gm validation, because we are testing only file operations in this test } +func TestDeviceMapperCleanup(t *testing.T) { + fakeblockdev.WithFakeRootDevsAndSysfs(t, func(devPaths []string, table, devDir, sysfsDir string) { + dmRemoveCmd := "dmsetup remove virtlet-dm-9a322047-1f0d-4395-8e43-6e1b310ce6f3" + ct := newContainerTester(t, testutils.NewToplevelRecorder(), []fakeutils.CmdSpec{ + { + Match: "^dmsetup table$", + Stdout: table, + }, + { + Match: "^" + dmRemoveCmd + "$", + }, + }) + defer ct.teardown() + + ldh := blockdev.NewLogicalDeviceHandler(ct.virtTool.commander, devDir, sysfsDir) + for _, devPath := range devPaths { + if _, err := ldh.EnsureDevHeaderMatches(devPath, sha256.Sum256([]byte("foobar"))); err != nil { + t.Fatalf("EnsureDevHeaderMatches(): %v", err) + } + } + + for _, uuid := range testUUIDs { + if _, err := ct.domainConn.DefineDomain(&libvirtxml.Domain{ + Name: "virtlet-" + uuid[:13] + "-container1", + UUID: uuid, + }); err != nil { + t.Fatalf("Cannot define the fake domain: %v", err) + } + } + + if errors := ct.virtTool.removeOrphanVirtualBlockDevices(testUUIDs[:], devDir, sysfsDir); len(errors) != 0 { + t.Errorf("removeOrphanDomains returned errors: %v", errors) + } + + n := 0 + for _, r := range ct.rec.Content() { + if r.Name == "CMD" && r.Value.(map[string]string)["cmd"] == dmRemoveCmd { + n++ + } + } + if n != 1 { + t.Errorf("dmsetup remove for the orphaned volume is expected to be called exactly 1 time, but was called %d times", n) + } + }) + // no gm validation b/c we just verify 'dmsetup remove' command above +} + // https://stackoverflow.com/a/45428032 // difference returns the elements in a that aren't in b func difference(a, b []string) []string { diff --git a/pkg/libvirttools/persistentroot_volumesource.go b/pkg/libvirttools/persistentroot_volumesource.go index 214139a2b..f91c64497 100644 --- a/pkg/libvirttools/persistentroot_volumesource.go +++ b/pkg/libvirttools/persistentroot_volumesource.go @@ -18,32 +18,16 @@ package libvirttools import ( "crypto/sha256" - "encoding/binary" "encoding/hex" "fmt" - "os" - "path/filepath" - "strconv" - "strings" libvirtxml "github.com/libvirt/libvirt-go-xml" digest "github.com/opencontainers/go-digest" + "github.com/Mirantis/virtlet/pkg/blockdev" "github.com/Mirantis/virtlet/pkg/metadata/types" ) -const ( - virtletRootfsMagic = 0x263dbe52ba576702 - virtletRootfsMetadataVersion = 1 - sectorSize = 512 -) - -type virtletRootfsHeader struct { - Magic uint64 - MetadataVersion uint16 - ImageHash [sha256.Size]byte -} - // persistentRootVolume represents a root volume that can survive the // deletion of its pod type persistentRootVolume struct { @@ -53,116 +37,22 @@ type persistentRootVolume struct { var _ VMVolume = &persistentRootVolume{} +func (v *persistentRootVolume) devHandler() *blockdev.LogicalDeviceHandler { + return blockdev.NewLogicalDeviceHandler(v.owner.Commander(), "", "") +} + func (v *persistentRootVolume) UUID() string { return v.dev.UUID() } func (v *persistentRootVolume) dmName() string { - return "virtlet-dm-" + v.config.PodSandboxID + return "virtlet-dm-" + v.config.DomainUUID } func (v *persistentRootVolume) dmPath() string { return "/dev/mapper/" + v.dmName() } -func (v *persistentRootVolume) ensureDevHeaderMatches(imageHash [sha256.Size]byte) (bool, error) { - f, err := os.OpenFile(v.dev.HostPath, os.O_RDWR|os.O_SYNC, 0) - if err != nil { - return false, fmt.Errorf("open %q: %v", v.dev.HostPath, err) - } - defer func() { - if f != nil { - f.Close() - } - }() - - var hdr virtletRootfsHeader - if err := binary.Read(f, binary.BigEndian, &hdr); err != nil { - return false, fmt.Errorf("reading rootfs header: %v", err) - } - - headerMatch := true - switch { - case hdr.Magic != virtletRootfsMagic || hdr.ImageHash != imageHash: - headerMatch = false - if _, err := f.Seek(0, os.SEEK_SET); err != nil { - return false, fmt.Errorf("seek: %v", err) - } - if err := binary.Write(f, binary.BigEndian, virtletRootfsHeader{ - Magic: virtletRootfsMagic, - MetadataVersion: virtletRootfsMetadataVersion, - ImageHash: imageHash, - }); err != nil { - return false, fmt.Errorf("writing rootfs header: %v", err) - } - case hdr.MetadataVersion != virtletRootfsMetadataVersion: - // NOTE: we should handle earlier metadata versions - // after we introduce new ones. But we can't handle - // future metadata versions and any non-matching - // metadata versions are future ones currently, so we - // don't want to lose any data here. - return false, fmt.Errorf("unsupported virtlet root device metadata version %v", hdr.MetadataVersion) - } - - if err := f.Close(); err != nil { - return false, fmt.Errorf("error closing rootfs device: %v", err) - } - f = nil - return headerMatch, nil -} - -func (v *persistentRootVolume) blockDevSizeInSectors() (uint64, error) { - // NOTE: this is also doable via ioctl but this way it's - // shorter (no need for fake non-linux impl, extra interface, - // extra fake impl for it). Some links that may help if we - // decide to use the ioctl later on: - // https://github.com/karelzak/util-linux/blob/master/disk-utils/blockdev.c - // https://github.com/aicodix/smr/blob/24aa589f378827a69a07d220f114c169693dacec/smr.go#L29 - out, err := v.owner.Commander().Command("blockdev", "--getsz", v.dev.HostPath).Run(nil) - if err != nil { - return 0, err - } - nSectors, err := strconv.ParseUint(strings.TrimSpace(string(out)), 10, 64) - if err != nil { - return 0, fmt.Errorf("bad size value returned by blockdev: %q: %v", out, err) - } - return nSectors, nil -} - -func (v *persistentRootVolume) dmCmd(cmd []string, stdin string) error { - dmCmd := v.owner.Commander().Command(cmd[0], cmd[1:]...) - var stdinBytes []byte - if stdin != "" { - stdinBytes = []byte(stdin) - } - _, err := dmCmd.Run(stdinBytes) - return err -} - -func (v *persistentRootVolume) dmSetup(imageSize uint64) error { - nSectors, err := v.blockDevSizeInSectors() - if err != nil { - return err - } - // sector 0 is reserved for the Virtlet metadata - minSectors := (imageSize+sectorSize-1)/sectorSize + 1 - if nSectors < minSectors { - return fmt.Errorf("block device too small for the image: need at least %d bytes (%d sectors) but got %d bytes (%d sectors)", - minSectors*sectorSize, - minSectors, - nSectors*sectorSize, - nSectors) - } - hostPath, err := filepath.EvalSymlinks(v.dev.HostPath) - if err != nil { - return err - } - dmTable := fmt.Sprintf("0 %d linear %s 1\n", nSectors-1, hostPath) - dmCmd := v.owner.Commander().Command("dmsetup", "create", v.dmName()) - _, err = dmCmd.Run([]byte(dmTable)) - return err -} - func (v *persistentRootVolume) copyImageToDev(imagePath string) error { _, err := v.owner.Commander().Command("qemu-img", "convert", "-O", "raw", imagePath, v.dmPath()).Run(nil) return err @@ -187,10 +77,11 @@ func (v *persistentRootVolume) Setup() (*libvirtxml.DomainDisk, *libvirtxml.Doma var hash [sha256.Size]byte copy(hash[:], imageHash) - headerMatches, err := v.ensureDevHeaderMatches(hash) + ldh := v.devHandler() + headerMatches, err := ldh.EnsureDevHeaderMatches(v.dev.HostPath, hash) if err == nil { - err = v.dmSetup(imageSize) + err = ldh.Map(v.dev.HostPath, v.dmName(), imageSize) } if err == nil && !headerMatches { @@ -203,12 +94,11 @@ func (v *persistentRootVolume) Setup() (*libvirtxml.DomainDisk, *libvirtxml.Doma return &libvirtxml.DomainDisk{ Device: "disk", - Source: &libvirtxml.DomainDiskSource{Block: &libvirtxml.DomainDiskSourceBlock{Dev: v.dmPath()}}, //hostPath}}, + Source: &libvirtxml.DomainDiskSource{Block: &libvirtxml.DomainDiskSourceBlock{Dev: v.dmPath()}}, Driver: &libvirtxml.DomainDiskDriver{Name: "qemu", Type: "raw"}, }, nil, nil } func (v *persistentRootVolume) Teardown() error { - _, err := v.owner.Commander().Command("dmsetup", "remove", v.dmName()).Run(nil) - return err + return v.devHandler().Unmap(v.dmName()) } diff --git a/pkg/libvirttools/persistentroot_volumesource_test.go b/pkg/libvirttools/persistentroot_volumesource_test.go index df48a61ff..5a408fc65 100644 --- a/pkg/libvirttools/persistentroot_volumesource_test.go +++ b/pkg/libvirttools/persistentroot_volumesource_test.go @@ -17,7 +17,6 @@ limitations under the License. package libvirttools import ( - "io/ioutil" "os" "path/filepath" "strconv" @@ -27,6 +26,7 @@ import ( "github.com/Mirantis/virtlet/tests/gm" digest "github.com/opencontainers/go-digest" + fakeblockdev "github.com/Mirantis/virtlet/pkg/blockdev/fake" "github.com/Mirantis/virtlet/pkg/metadata/types" fakeutils "github.com/Mirantis/virtlet/pkg/utils/fake" testutils "github.com/Mirantis/virtlet/pkg/utils/testing" @@ -98,75 +98,55 @@ func TestPersistentRootVolume(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - rec := testutils.NewToplevelRecorder() - im := newFakeImageManager(rec.Child("image"), fakeImages...) - if tc.fileSize%512 != 0 { - t.Fatalf("block device size must be a multiple of 512") - } - - tmpDir, err := ioutil.TempDir("", "fake-persistent-rootfs") - if err != nil { - t.Fatalf("TempDir(): %v", err) - } - defer os.RemoveAll(tmpDir) - fakeDevDir := filepath.Join(tmpDir, "__dev__") - if err := os.Mkdir(fakeDevDir, 0777); err != nil { - t.Fatalf("Mkdir(): %v", err) - } - - devPath := filepath.Join(fakeDevDir, "rootdev") - devFile, err := os.Create(devPath) - if err != nil { - t.Fatalf("Create(): %v", err) - } - if _, err := devFile.Write(make([]byte, tc.fileSize)); err != nil { - devFile.Close() - t.Fatalf("Write(): %v", err) - } - if err := devFile.Close(); err != nil { - t.Fatalf("devFile.Close()") - } - devPathToUse := devPath - if tc.useSymlink { - devPathToUse := filepath.Join(fakeDevDir, "rootdevlink") - if err := os.Symlink(devPath, devPathToUse); err != nil { - t.Fatalf("Symlink(): %v", err) + fakeblockdev.WithFakeRootDev(t, tc.fileSize, func(devPath, devDir string) { + rec := testutils.NewToplevelRecorder() + im := newFakeImageManager(rec.Child("image"), fakeImages...) + if tc.fileSize%512 != 0 { + t.Fatalf("block device size must be a multiple of 512") } - } - for n, imageName := range []string{tc.imageName, tc.secondImageName} { - if imageName == "" { - continue + devPathToUse := devPath + if tc.useSymlink { + devPathToUse = filepath.Join(devDir, "rootdevlink") + if err := os.Symlink(devPath, devPathToUse); err != nil { + t.Fatalf("Symlink(): %v", err) + } } - cmdSpecs := []fakeutils.CmdSpec{ - { - Match: "blockdev --getsz", - Stdout: strconv.Itoa(int(tc.fileSize / 512)), - }, - { - Match: "dmsetup create", - }, - { - Match: "dmsetup remove", - }, - } - if n == 0 || tc.imageWrittenAgain { - // qemu-img convert is used to write the image to the block device. - // It should only be called if the image changes. - cmdSpecs = append(cmdSpecs, fakeutils.CmdSpec{ - Match: "qemu-img convert", - }) - } - cmd := fakeutils.NewCommander(rec, cmdSpecs) - cmd.ReplaceTempPath("__dev__", "/dev") - owner := newFakeVolumeOwner(nil, im, cmd) - rootVol := getPersistentRootVolume(t, imageName, devPathToUse, owner) - verifyRootVolumeSetup(t, rec, rootVol, tc.errors[n]) - if tc.errors[n] == "" { - verifyRootVolumeTeardown(t, rec, rootVol) + + for n, imageName := range []string{tc.imageName, tc.secondImageName} { + if imageName == "" { + continue + } + cmdSpecs := []fakeutils.CmdSpec{ + { + Match: "blockdev --getsz", + Stdout: strconv.Itoa(int(tc.fileSize / 512)), + }, + { + Match: "dmsetup create", + }, + { + Match: "dmsetup remove", + }, + } + if n == 0 || tc.imageWrittenAgain { + // qemu-img convert is used to write the image to the block device. + // It should only be called if the image changes. + cmdSpecs = append(cmdSpecs, fakeutils.CmdSpec{ + Match: "qemu-img convert", + }) + } + cmd := fakeutils.NewCommander(rec, cmdSpecs) + cmd.ReplaceTempPath("__dev__", "/dev") + owner := newFakeVolumeOwner(nil, im, cmd) + rootVol := getPersistentRootVolume(t, imageName, devPathToUse, owner) + verifyRootVolumeSetup(t, rec, rootVol, tc.errors[n]) + if tc.errors[n] == "" { + verifyRootVolumeTeardown(t, rec, rootVol) + } } - } - gm.Verify(t, gm.NewYamlVerifier(rec.Content())) + gm.Verify(t, gm.NewYamlVerifier(rec.Content())) + }) }) } } @@ -223,8 +203,8 @@ func verifyRootVolumeTeardown(t *testing.T, rec testutils.Recorder, rootVol *per func getPersistentRootVolume(t *testing.T, imageName, devHostPath string, owner volumeOwner) *persistentRootVolume { volumes, err := GetRootVolume( &types.VMConfig{ - PodSandboxID: testUUID, - Image: imageName, + DomainUUID: testUUID, + Image: imageName, VolumeDevices: []types.VMVolumeDevice{ { DevicePath: "/", diff --git a/pkg/utils/fake/command.go b/pkg/utils/fake/command.go index 12468eecf..d5f145da1 100644 --- a/pkg/utils/fake/command.go +++ b/pkg/utils/fake/command.go @@ -53,20 +53,20 @@ func (c *fakeCommand) Run(stdin []byte) ([]byte, error) { r := map[string]string{ "cmd": fullCmd, } - defer c.rec.Rec("CMD", r) + if c.rec != nil { + defer c.rec.Rec("CMD", r) + } for _, spec := range c.commander.specs { matched, err := regexp.MatchString(spec.Match, fullCmd) if err != nil { return nil, fmt.Errorf("failed to match regexp %q: %v", spec.Match, err) } if matched { - if c.rec != nil { - if stdin != nil { - r["stdin"] = c.subst(string(stdin)) - } - if spec.Stdout != "" { - r["stdout"] = spec.Stdout - } + if stdin != nil { + r["stdin"] = c.subst(string(stdin)) + } + if spec.Stdout != "" { + r["stdout"] = spec.Stdout } return []byte(spec.Stdout), nil }