diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index e06054f81b8..6ddfd0fd4e4 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -285,3 +285,26 @@ jobs: run: | export PERFORMANCE_TEST_MODE=${{ matrix.mode }} sudo -E make smoke-performance + + failover-test: + runs-on: ubuntu-latest + needs: [contrib-build, nydus-build] + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Download Nydus + uses: actions/download-artifact@v3 + with: + name: nydus-artifact + path: target/release + - name: Download Nydusify + uses: actions/download-artifact@v3 + with: + name: nydusify-artifact + path: contrib/nydusify/cmd + - name: Prepare Nydus Container Environment + run: | + sudo bash misc/failover/prepare.sh + - name: Failover Test + run: | + sudo -E make smoke-failover diff --git a/Makefile b/Makefile index 07e6c48a273..22a9ceebf4b 100644 --- a/Makefile +++ b/Makefile @@ -133,6 +133,9 @@ smoke-performance: smoke-benchmark: make -C smoke test-benchmark +smoke-failover: + make -C smoke test-failover + smoke: release smoke-only contrib-build: nydusify ctr-remote nydus-overlayfs diff --git a/misc/failover/prepare.sh b/misc/failover/prepare.sh new file mode 100755 index 00000000000..cf8f5081024 --- /dev/null +++ b/misc/failover/prepare.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +readonly SNAPSHOTTER_VERSION=0.13.3 +readonly NERDCTL_VERSION=1.7.0 +readonly CNI_PLUGINS_VERSION=1.3.0 + +# setup nerdctl and nydusd env +sudo install -D -m 755 contrib/nydusify/cmd/nydusify /usr/local/bin +sudo install -D -m 755 target/release/nydusd target/release/nydus-image /usr/local/bin +wget https://github.com/containerd/nydus-snapshotter/releases/download/v$SNAPSHOTTER_VERSION/nydus-snapshotter-v$SNAPSHOTTER_VERSION-x86_64.tgz +tar zxvf nydus-snapshotter-v$SNAPSHOTTER_VERSION-x86_64.tgz +sudo install -D -m 755 nydus-snapshotter/containerd-nydus-grpc /usr/local/bin +sudo wget https://github.com/containerd/nerdctl/releases/download/v$NERDCTL_VERSION/nerdctl-$NERDCTL_VERSION-linux-amd64.tar.gz +sudo tar -xzvf nerdctl-$NERDCTL_VERSION-linux-amd64.tar.gz -C /usr/local/bin +sudo mkdir -p /opt/cni/bin +sudo wget https://github.com/containernetworking/plugins/releases/download/v$CNI_PLUGINS_VERSION/cni-plugins-linux-amd64-v$CNI_PLUGINS_VERSION.tgz +sudo tar -xzvf cni-plugins-linux-amd64-v$CNI_PLUGINS_VERSION.tgz -C /opt/cni/bin +sudo install -D misc/performance/containerd_config.toml /etc/containerd/config.toml +sudo systemctl restart containerd +sudo install -D misc/performance/nydusd_config.json /etc/nydus/nydusd-config.fusedev.json +sudo install -D misc/failover/snapshotter_config.toml /etc/nydus/config.toml +sudo install -D misc/performance/nydus-snapshotter.service /etc/systemd/system/nydus-snapshotter.service +sudo systemctl start nydus-snapshotter diff --git a/misc/failover/snapshotter_config.toml b/misc/failover/snapshotter_config.toml new file mode 100644 index 00000000000..1138d652d87 --- /dev/null +++ b/misc/failover/snapshotter_config.toml @@ -0,0 +1,132 @@ +version = 1 +# Snapshotter's own home directory where it stores and creates necessary resources +root = "/var/lib/containerd-nydus" +# The snapshotter's GRPC server socket, containerd will connect to plugin on this socket +address = "/run/containerd-nydus/containerd-nydus-grpc.sock" +daemon_mode = "dedicated" +# Whether snapshotter should try to clean up resources when it is closed +cleanup_on_close = false + +[system] +# Snapshotter's debug and trace HTTP server interface +enable = true +# Unix domain socket path where system controller is listening on +address = "/run/containerd-nydus/system.sock" + +[system.debug] +# Snapshotter can profile the CPU utilization of each nydusd daemon when it is being started. +# This option specifies the profile duration when nydusd is downloading and uncomproessing data. +daemon_cpu_profile_duration_secs = 5 +# Enable by assigning an address, empty indicates pprof server is disabled +pprof_address = "" + +[daemon] +# Specify a configuration file for nydusd +nydusd_config = "/etc/nydus/nydusd-config.fusedev.json" +nydusd_path = "/usr/local/bin/nydusd" +nydusimage_path = "/usr/local/bin/nydus-image" +# fusedev or fscache +fs_driver = "fusedev" +# How to process when daemon dies: "none", "restart" or "failover" +recover_policy = "failover" +# Nydusd worker thread number to handle FUSE or fscache requests, [0-1024]. +# Setting to 0 will use the default configuration of nydusd. +threads_number = 4 +# Log rotation size for nydusd, in unit MB(megabytes) +log_rotation_size = 100 + +[cgroup] +# Whether to use separate cgroup for nydusd. +enable = true +# The memory limit for nydusd cgroup, which contains all nydusd processes. +# Percentage is supported as well, please ensure it is end with "%". +# The default unit is bytes. Acceptable values include "209715200", "200MiB", "200Mi" and "10%". +memory_limit = "" + +[log] +# Print logs to stdout rather than logging files +log_to_stdout = false +# Snapshotter's log level +level = "info" +log_rotation_compress = true +log_rotation_local_time = true +# Max number of days to retain logs +log_rotation_max_age = 7 +log_rotation_max_backups = 5 +# In unit MB(megabytes) +log_rotation_max_size = 100 + +[metrics] +# Enable by assigning an address, empty indicates metrics server is disabled +address = ":9110" + +[remote] +convert_vpc_registry = false + +[remote.mirrors_config] +# Snapshotter will overwrite daemon's mirrors configuration +# if the values loaded from this driectory are not null before starting a daemon. +# Set to "" or an empty directory to disable it. +#dir = "/etc/nydus/certs.d" + +[remote.auth] +# Fetch the private registry auth by listening to K8s API server +enable_kubeconfig_keychain = false +# synchronize `kubernetes.io/dockerconfigjson` secret from kubernetes API server with specified kubeconfig (default `$KUBECONFIG` or `~/.kube/config`) +kubeconfig_path = "" +# Fetch the private registry auth as CRI image service proxy +enable_cri_keychain = false +# the target image service when using image proxy +#image_service_address = "/run/containerd/containerd.sock" + +[snapshot] +# Let containerd use nydus-overlayfs mount helper +enable_nydus_overlayfs = false +# Insert Kata Virtual Volume option to `Mount.Options` +enable_kata_volume = false +# Whether to remove resources when a snapshot is removed +sync_remove = false + +[cache_manager] +# Disable or enable recyclebin +disable = false +# How long to keep deleted files in recyclebin +gc_period = "24h" +# Directory to host cached files +cache_dir = "" + +[image] +public_key_file = "" +validate_signature = false + +# The configuraions for features that are not production ready +[experimental] +# Whether to enable stargz support +enable_stargz = false +# Whether to enable referrers support +# The option enables trying to fetch the Nydus image associated with the OCI image and run it. +# Also see https://github.com/opencontainers/distribution-spec/blob/main/spec.md#listing-referrers +enable_referrer_detect = false +# Whether to enable authentication support +# The option enables nydus snapshot to provide backend information to nydusd. +enable_backend_source = false +[experimental.tarfs] +# Whether to enable nydus tarfs mode. Tarfs is supported by: +# - The EROFS filesystem driver since Linux 6.4 +# - Nydus Image Service release v2.3 +enable_tarfs = false +# Mount rafs on host by loopdev and EROFS +mount_tarfs_on_host = false +# Only enable nydus tarfs mode for images with `tarfs hint` label when true +tarfs_hint = false +# Maximum of concurrence to converting OCIv1 images to tarfs, 0 means default +max_concurrent_proc = 0 +# Mode to export tarfs images: +# - "none" or "": do not export tarfs +# - "layer_verity_only": only generate disk verity information for a layer blob +# - "image_verity_only": only generate disk verity information for all blobs of an image +# - "layer_block": generate a raw block disk image with tarfs for a layer +# - "image_block": generate a raw block disk image with tarfs for an image +# - "layer_block_with_verity": generate a raw block disk image with tarfs for a layer with dm-verity info +# - "image_block_with_verity": generate a raw block disk image with tarfs for an image with dm-verity info +export_mode = "" diff --git a/smoke/Makefile b/smoke/Makefile index a83a56ca95f..697c486fbd3 100644 --- a/smoke/Makefile +++ b/smoke/Makefile @@ -45,3 +45,9 @@ test-benchmark: build # make test TESTS=TestCompatibility test-compatibility: build make test TESTS=TestCompatibility + +# SNAPSHOTTER_SYSTEM_SOCK=/run/containerd-nydus/system.sock +# SNAPSHOTTER=nydus +# FAILOVER_TEST_IMAGE=wordpress +test-failover: build + FAILOVER_TEST=true sudo -E ./smoke.test -test.v -test.timeout 10m -test.parallel=1 -test.run=TestFailover diff --git a/smoke/tests/failover_test.go b/smoke/tests/failover_test.go new file mode 100644 index 00000000000..ad2c8db85fb --- /dev/null +++ b/smoke/tests/failover_test.go @@ -0,0 +1,80 @@ +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package tests + +import ( + "fmt" + "net/http" + "os" + "testing" + "time" + + "github.com/dragonflyoss/nydus/smoke/tests/tool" + "github.com/dragonflyoss/nydus/smoke/tests/tool/test" + "github.com/google/uuid" +) + +// Environment Requirement: Containerd, nerdctl >= 0.22, nydus-snapshotter, nydusd. +// Prepare: setup nydus for containerd, reference: https://github.com/dragonflyoss/nydus/blob/master/docs/containerd-env-setup.md. + +type FailoverTestSuit struct { + t *testing.T +} + +func (f *FailoverTestSuit) TestFailover(t *testing.T) { + // prepare the basic constants + snapshotter := os.Getenv("SNAPSHOTTER") + if snapshotter == "" { + snapshotter = defaultSnapshotter + } + sourceImage := os.Getenv("FAILOVER_TEST_IMAGE") + if sourceImage == "" { + sourceImage = "wordpress" + } + snapshotterSystemSock := os.Getenv("SNAPSHOTTER_SYSTEM_SOCK") + if snapshotterSystemSock == "" { + snapshotterSystemSock = defaultSnapshotterSystemSock + } + + ctx := tool.DefaultContext(t) + + // prepare and convert image + sourceImage = tool.PrepareImage(t, sourceImage) + imageName := fmt.Sprintf("%s:nydus", sourceImage) + tool.ConvertImage(t, ctx, sourceImage, imageName) + + containerName := uuid.NewString() + tool.RunContainerSimple(t, imageName, snapshotter, containerName, false) + defer tool.ClearContainer(t, imageName, snapshotter, containerName) + + snapshotterCli := tool.NewSnapshotterClient(snapshotterSystemSock) + daemons, err := snapshotterCli.GetNydusDaemonInfos() + if err != nil { + t.Fatalf("Failed to get nydus daemon infos: %s", err.Error()) + } + + // kill the nydus daemons + for _, daemon := range daemons { + killCmd := fmt.Sprintf("kill -9 %d", daemon.Pid) + tool.Run(t, killCmd) + } + + // wait for the nydus daemons recover + time.Sleep(5 * time.Second) + + // check the container by requesting its wait url + runArgs := tool.GetRunArgs(t, imageName) + resp, err := http.Get(runArgs.WaitURL) + if err != nil || !(resp.StatusCode >= 200 && resp.StatusCode < 300) { + t.Fatal("Failed to access the wait url of the recoverd container") + } +} + +func TestFailover(t *testing.T) { + if v, ok := os.LookupEnv("FAILOVER_TEST"); !ok || v != "true" { + t.Skip("skipping failover test") + } + test.Run(t, &FailoverTestSuit{t: t}) +} diff --git a/smoke/tests/main_test.go b/smoke/tests/main_test.go index 695bc5af9a7..e93598802a0 100644 --- a/smoke/tests/main_test.go +++ b/smoke/tests/main_test.go @@ -11,6 +11,11 @@ import ( "github.com/dragonflyoss/nydus/smoke/tests/tool" ) +const ( + defaultSnapshotter = "nydus" + defaultSnapshotterSystemSock = "/run/containerd-nydus/system.sock" +) + func TestMain(m *testing.M) { registryPort := os.Getenv("REGISTRY_PORT") if registryPort == "" { diff --git a/smoke/tests/performance_test.go b/smoke/tests/performance_test.go index 06fcc9c19ec..ce28ed3c2d6 100644 --- a/smoke/tests/performance_test.go +++ b/smoke/tests/performance_test.go @@ -62,28 +62,10 @@ func (p *PerformanceTestSuite) prepareTestImage(t *testing.T, ctx *tool.Context, if p.testImage != "" { return } - - ctx.PrepareWorkDir(t) - defer ctx.Destroy(t) source := tool.PrepareImage(t, image) - - // Prepare options target := fmt.Sprintf("%s-nydus-%s", source, uuid.NewString()) - fsVersion := fmt.Sprintf("--fs-version %s", ctx.Build.FSVersion) - logLevel := "--log-level warn" - if ctx.Binary.NydusifyOnlySupportV5 { - fsVersion = "" - logLevel = "" - } - enableOCIRef := "" - if ctx.Build.OCIRef { - enableOCIRef = "--oci-ref" - } - // Convert image - convertCmd := fmt.Sprintf("%s %s convert --source %s --target %s --nydus-image %s --work-dir %s %s %s", - ctx.Binary.Nydusify, logLevel, source, target, ctx.Binary.Builder, ctx.Env.WorkDir, fsVersion, enableOCIRef) - tool.RunWithoutOutput(t, convertCmd) + tool.ConvertImage(t, ctx, source, target) p.testImage = target } diff --git a/smoke/tests/tool/container.go b/smoke/tests/tool/container.go index 72394743d0e..97b9ad170e6 100644 --- a/smoke/tests/tool/container.go +++ b/smoke/tests/tool/container.go @@ -98,6 +98,18 @@ func SupportContainerImage(image string) bool { return existsInURLWait || existsInCmdStdout } +// GetRunArgs return the RunArgs for the image +func GetRunArgs(t *testing.T, image string) *RunArgs { + repo := ImageRepo(t, image) + if args, ok := urlWait[repo]; ok { + return &args + } + if args, ok := cmdStdout[repo]; ok { + return &args + } + return nil +} + // runURLWaitContainer run container util getting http response from WaitUrl func runURLWaitContainer(t *testing.T, image string, snapshotter string, containerName string, runArgs RunArgs) { cmd := fmt.Sprintf("sudo nerdctl --insecure-registry --snapshotter %s run -d --net=host", snapshotter) @@ -140,7 +152,7 @@ func RunContainerWithBaseline(t *testing.T, image string, containerName string, args, ok := urlWait[ImageRepo(t, image)] if ok { runURLWaitContainer(t, image, "nydus", containerName, args) - defer clearContainer(t, image, "nydus", containerName) + defer ClearContainer(t, image, "nydus", containerName) } else { t.Fatalf(fmt.Sprintf("%s is not in URL_WAIT", image)) } @@ -164,10 +176,10 @@ func RunContainer(t *testing.T, image string, snapshotter string, containerName args, ok := urlWait[ImageRepo(t, image)] if ok { runURLWaitContainer(t, image, snapshotter, containerName, args) - defer clearContainer(t, image, snapshotter, containerName) + defer ClearContainer(t, image, snapshotter, containerName) } else if args, ok := cmdStdout[ImageRepo(t, image)]; ok { runCmdStdoutContainer(t, image, snapshotter, containerName, args) - defer clearContainer(t, image, snapshotter, containerName) + defer ClearContainer(t, image, snapshotter, containerName) } containerMetic.E2ETime = time.Since(startTime) @@ -183,8 +195,24 @@ func RunContainer(t *testing.T, image string, snapshotter string, containerName return &containerMetic } +// RunContainerSimple just runs a container simply +func RunContainerSimple(t *testing.T, image, snapshotter, containerName string, autoClear bool) { + args, ok := urlWait[ImageRepo(t, image)] + if ok { + runURLWaitContainer(t, image, snapshotter, containerName, args) + if autoClear { + defer ClearContainer(t, image, snapshotter, containerName) + } + } else if args, ok := cmdStdout[ImageRepo(t, image)]; ok { + runCmdStdoutContainer(t, image, snapshotter, containerName, args) + if autoClear { + defer ClearContainer(t, image, snapshotter, containerName) + } + } +} + // ClearContainer clear container by containerName -func clearContainer(t *testing.T, image string, snapshotter, containerName string) { +func ClearContainer(t *testing.T, image string, snapshotter, containerName string) { RunWithoutOutput(t, fmt.Sprintf("sudo nerdctl --snapshotter %s rm -f %s", snapshotter, containerName)) RunWithoutOutput(t, fmt.Sprintf("sudo nerdctl --snapshotter %s image rm %s", snapshotter, image)) } diff --git a/smoke/tests/tool/image.go b/smoke/tests/tool/image.go index fa2d14bb464..ecb3a6404d8 100644 --- a/smoke/tests/tool/image.go +++ b/smoke/tests/tool/image.go @@ -34,3 +34,26 @@ func PrepareImage(t *testing.T, source string) string { Run(t, fmt.Sprintf("docker push %s", target)) return target } + +// ConvertImage converts source image to nydus image +func ConvertImage(t *testing.T, ctx *Context, source, target string) { + ctx.PrepareWorkDir(t) + defer ctx.Destroy(t) + + // Prepare options + fsVersion := fmt.Sprintf("--fs-version %s", ctx.Build.FSVersion) + logLevel := "--log-level warn" + if ctx.Binary.NydusifyOnlySupportV5 { + fsVersion = "" + logLevel = "" + } + enableOCIRef := "" + if ctx.Build.OCIRef { + enableOCIRef = "--oci-ref" + } + + // Convert image + convertCmd := fmt.Sprintf("%s %s convert --source %s --target %s --nydus-image %s --work-dir %s %s %s", + ctx.Binary.Nydusify, logLevel, source, target, ctx.Binary.Builder, ctx.Env.WorkDir, fsVersion, enableOCIRef) + Run(t, convertCmd) +} diff --git a/smoke/tests/tool/snapshotter.go b/smoke/tests/tool/snapshotter.go new file mode 100644 index 00000000000..5f500115df6 --- /dev/null +++ b/smoke/tests/tool/snapshotter.go @@ -0,0 +1,85 @@ +// Copyright 2023 Nydus Developers. All rights reserved. +// +// SPDX-License-Identifier: Apache-2.0 + +package tool + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "time" +) + +// SnapshotterClient commnicates with nydus-snapshotter via +// the system controller endpoint unix socket of nydus-snapshotter. +type SnapshotterClient struct { + client *http.Client +} + +type DaemonInfoFromSnapshotter struct { + ID string `json:"id"` + Pid int `json:"pid"` + APISock string `json:"api_socket"` + SupervisorPath string `json:"supervisor_path"` + Reference int `json:"reference"` + HostMountpoint string `json:"mountpoint"` + StartupCPUUtilization float64 `json:"startup_cpu_utilization"` + MemoryRSS float64 `json:"memory_rss_kb"` + ReadData float32 `json:"read_data_kb"` + + Instances map[string]rafsInstanceInfo `json:"instances"` +} + +type rafsInstanceInfo struct { + SnapshotID string `json:"snapshot_id"` + SnapshotDir string `json:"snapshot_dir"` + Mountpoint string `json:"mountpoint"` + ImageID string `json:"image_id"` +} + +func NewSnapshotterClient(sock string) *SnapshotterClient { + transport := &http.Transport{ + MaxIdleConns: 10, + IdleConnTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { + dialer := &net.Dialer{ + Timeout: 5 * time.Second, + KeepAlive: 5 * time.Second, + } + return dialer.DialContext(ctx, "unix", sock) + }, + } + + client := &http.Client{ + Timeout: 30 * time.Second, + Transport: transport, + } + return &SnapshotterClient{ + client: client, + } +} + +func (cli *SnapshotterClient) GetNydusDaemonInfos() ([]*DaemonInfoFromSnapshotter, error) { + resp, err := cli.client.Get(fmt.Sprintf("http://unix%s", "/api/v1/daemons")) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + var infos []*DaemonInfoFromSnapshotter + if err = json.Unmarshal(body, &infos); err != nil { + return nil, err + } + + return infos, nil +}