diff --git a/CHANGELOG.md b/CHANGELOG.md index 4814e62..62e241c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - A new CLI flag `--debug` causes `conjur-preflight` to log more verbose information about the execution of the application and its checks. [conjurinc/conjur-preflight#19](https://github.com/conjurinc/conjur-preflight/pull/19) +- `conjur-preflight` now includes disk related checks for read, write, and sync + latency, as well as read and write operations per second (IOPs). These require + `fio` and `libaio` to be present as a prerequesite for these checks. + [conjurinc/conjur-preflight#19](https://github.com/conjurinc/conjur-preflight/pull/19) ### Fixed - Previously, the application version was not properly embedded in the final diff --git a/ci/integration/Dockerfile.rhel.all-dependencies b/ci/integration/Dockerfile.rhel.all-dependencies index e62c874..d9f34d5 100644 --- a/ci/integration/Dockerfile.rhel.all-dependencies +++ b/ci/integration/Dockerfile.rhel.all-dependencies @@ -1,3 +1,46 @@ FROM redhat/ubi8 RUN yum install -y podman + +# Install FIO from source, since it's not available from the +# UBI package repositories unless also running on a registered +# RHEL host. +RUN yum install -y \ + wget \ + gcc \ + make && \ + # + # Build libaio for iops tests + # + wget \ + -O libaio.tar.gz \ + https://pagure.io/libaio/archive/libaio-0.3.111/libaio-libaio-0.3.111.tar.gz && \ + mkdir libaio && \ + tar \ + --strip-components=1 \ + --directory=libaio \ + -xvf libaio.tar.gz && \ + cd libaio && \ + make install && \ + ldconfig && \ + cd .. && \ + rm libaio.tar.gz && \ + # + # Build fio + # + wget \ + -O fio.tar.gz \ + https://github.com/axboe/fio/archive/refs/tags/fio-3.33.tar.gz && \ + mkdir fio && \ + tar \ + --strip-components=1 \ + --directory=fio \ + -xvf fio.tar.gz && \ + cd fio && \ + ./configure && \ + make && \ + make install && \ + cd .. && \ + rm -rf fio.tar.gz && \ + yum remove -y wget make gcc + diff --git a/ci/integration/Dockerfile.ubuntu.all-dependencies b/ci/integration/Dockerfile.ubuntu.all-dependencies index 64c354f..66c538f 100644 --- a/ci/integration/Dockerfile.ubuntu.all-dependencies +++ b/ci/integration/Dockerfile.ubuntu.all-dependencies @@ -22,3 +22,6 @@ RUN apt-get update && \ docker-ce \ docker-ce-cli \ containerd.io + +RUN apt-get update && \ + apt-get install -y fio diff --git a/pkg/checks/disk/fio.go b/pkg/checks/disk/fio.go new file mode 100644 index 0000000..89a4a37 --- /dev/null +++ b/pkg/checks/disk/fio.go @@ -0,0 +1,117 @@ +package disk + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "os" + "os/exec" + "strings" +) + +// FioResult is a data structure that represents the JSON +// output returned from running `fio`. +type FioResult struct { + Version string `json:"fio version"` + Jobs []FioJobResult `json:"jobs"` +} + +// FioJobResult represents the results from an individual fio job. A FioResult +// may include multiple job results. +type FioJobResult struct { + Sync FioJobModeResult `json:"sync"` + Read FioJobModeResult `json:"read"` + Write FioJobModeResult `json:"write"` +} + +// FioJobModeResult represents the measurements for a given test mode +// (e.g. read, write). Not all modes provide all values. The populated +// values depend on the fio job parameters. +type FioJobModeResult struct { + Iops float64 `json:"iops"` + IopsMin int64 `json:"iops_min"` + IopsMax int64 `json:"iops_max"` + IopsMean float64 `json:"iops_mean"` + IopsStddev float64 `json:"iops_stddev"` + + LatNs FioResultStats `json:"lat_ns"` +} + +// FioResultStats represents the statistical measurements provided by fio. +type FioResultStats struct { + Min int64 `json:"min"` + Max int64 `json:"max"` + Mean float64 `json:"mean"` + StdDev float64 `json:"stddev"` + N int64 `json:"N"` + Percentile FioPercentile `json:"percentile"` +} + +// FioPercentile provides a simple interface to return particular statistical +// percentiles from the fio stats results. +type FioPercentile struct { + NinetyNinth int64 `json:"99.000000"` +} + +const fioExecutable = "fio" + +func runFio( + jobName string, + args []string, +) (*FioResult, error) { + + fioPath, err := exec.LookPath(fioExecutable) + + if err != nil { + return nil, err + } + + // Create test directory and defer its removal to make sure it's not left + // behind. + err = os.MkdirAll(jobName, os.ModePerm) + if err != nil { + return nil, err + } + defer os.RemoveAll(jobName) + + cmd := exec.Command( + fioPath, + args..., + ) + + var outBuffer, errBuffer bytes.Buffer + cmd.Stdout = &outBuffer + cmd.Stderr = &errBuffer + + err = cmd.Run() // and wait + if err != nil { + return nil, errors.New( + strings.ReplaceAll( + strings.TrimSpace( + errBuffer.String(), + ), + "\n", + ", ", + ), + ) + } + + fioResult := FioResult{} + + jsonBytes := outBuffer.Bytes() + + // Attempt to write the full fio result to a file + outputFilename := fmt.Sprintf("%s.json", jobName) + err = os.WriteFile(outputFilename, jsonBytes, 0644) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to write %s: %s", outputFilename, err) + } + + err = json.Unmarshal(jsonBytes, &fioResult) + if err != nil { + return nil, err + } + + return &fioResult, nil +} diff --git a/pkg/checks/disk/iops.go b/pkg/checks/disk/iops.go new file mode 100644 index 0000000..ae5649b --- /dev/null +++ b/pkg/checks/disk/iops.go @@ -0,0 +1,120 @@ +package disk + +import ( + "fmt" + "os" + + "github.com/conjurinc/conjur-preflight/pkg/framework" +) + +// IopsCheck is a pre-flight check to report the read and write IOPs for the +// directory in which `conjur-preflight` is run. +type IopsCheck struct { +} + +// Run executes the IopsCheck by running `fio` and processing its output +func (*IopsCheck) Run() <-chan []framework.CheckResult { + future := make(chan []framework.CheckResult) + + go func() { + + fioResult, err := runFioIopsTest() + + if err != nil { + future <- []framework.CheckResult{ + { + Title: "FIO IOPs", + Status: framework.STATUS_ERROR, + Value: "N/A", + Message: err.Error(), + }, + } + + return + } + + future <- []framework.CheckResult{ + fioReadIopsResult(fioResult), + fioWriteIopsResult(fioResult), + } + }() // async + + return future +} + +func fioReadIopsResult(fioResult *FioResult) framework.CheckResult { + + // 50 iops min from https://etcd.io/docs/v3.3/op-guide/hardware/ + status := framework.STATUS_INFO + if fioResult.Jobs[0].Read.Iops < 50 { + status = framework.STATUS_WARN + } + + // Format title + path, _ := os.Getwd() + titleStr := fmt.Sprintf("FIO - read iops (%s)", path) + + // Format value + valueStr := fmt.Sprintf( + "%0.2f (Min: %d, Max: %d, StdDev: %0.2f)", + fioResult.Jobs[0].Read.Iops, + fioResult.Jobs[0].Read.IopsMin, + fioResult.Jobs[0].Read.IopsMax, + fioResult.Jobs[0].Read.IopsStddev, + ) + + return framework.CheckResult{ + Title: titleStr, + Status: status, + Value: valueStr, + } +} + +func fioWriteIopsResult(fioResult *FioResult) framework.CheckResult { + + // 50 iops min from https://etcd.io/docs/v3.3/op-guide/hardware/ + status := framework.STATUS_INFO + if fioResult.Jobs[0].Write.Iops < 50 { + status = framework.STATUS_WARN + } + + // Format title + path, _ := os.Getwd() + titleStr := fmt.Sprintf("FIO - write iops (%s)", path) + + // Format value + valueStr := fmt.Sprintf( + "%0.2f (Min: %d, Max: %d, StdDev: %0.2f)", + fioResult.Jobs[0].Write.Iops, + fioResult.Jobs[0].Write.IopsMin, + fioResult.Jobs[0].Write.IopsMax, + fioResult.Jobs[0].Write.IopsStddev, + ) + + return framework.CheckResult{ + Title: titleStr, + Status: status, + Value: valueStr, + } +} + +func runFioIopsTest() (*FioResult, error) { + return runFio( + "conjur-fio-iops", + []string{ + "--filename=conjur-fio-iops/data", + "--size=100MB", + "--direct=1", + "--rw=randrw", + "--bs=4k", + "--ioengine=libaio", + "--iodepth=256", + "--runtime=10", + "--numjobs=4", + "--time_based", + "--group_reporting", + "--output-format=json", + "--name=conjur-fio-iops", + }, + ) +} diff --git a/pkg/checks/disk/latency.go b/pkg/checks/disk/latency.go new file mode 100644 index 0000000..4d65b01 --- /dev/null +++ b/pkg/checks/disk/latency.go @@ -0,0 +1,120 @@ +package disk + +import ( + "fmt" + "os" + + "github.com/conjurinc/conjur-preflight/pkg/framework" +) + +// LatencyCheck is a pre-flight check to report the read, write, and sync +// latency for the directory in which `conjur-preflight` is run. +type LatencyCheck struct { +} + +// Run executes the LatencyCheck by running `fio` and processing its output +func (*LatencyCheck) Run() <-chan []framework.CheckResult { + future := make(chan []framework.CheckResult) + + go func() { + + fioResult, err := runFioLatencyTest() + + if err != nil { + future <- []framework.CheckResult{ + { + Title: "FIO Latency", + Status: framework.STATUS_ERROR, + Value: "N/A", + Message: err.Error(), + }, + } + + return + } + + future <- []framework.CheckResult{ + fioReadLatencyResult(fioResult), + fioWriteLatencyResult(fioResult), + fioSyncLatencyResult(fioResult), + } + }() // async + + return future +} + +func fioReadLatencyResult(fioResult *FioResult) framework.CheckResult { + // Convert the nanosecond result to milliseconds for readability + latMs := float64(fioResult.Jobs[0].Read.LatNs.Percentile.NinetyNinth) / 1e6 + + latMsStr := fmt.Sprintf("%0.2f ms", latMs) + + status := framework.STATUS_INFO + if latMs > 10.0 { + status = framework.STATUS_WARN + } + + path, _ := os.Getwd() + + return framework.CheckResult{ + Title: fmt.Sprintf("FIO - read latency (99%%, %s)", path), + Status: status, + Value: latMsStr, + } +} + +func fioWriteLatencyResult(fioResult *FioResult) framework.CheckResult { + // Convert the nanosecond result to milliseconds for readability + latMs := float64(fioResult.Jobs[0].Write.LatNs.Percentile.NinetyNinth) / 1e6 + + latMsStr := fmt.Sprintf("%0.2f ms", latMs) + + status := framework.STATUS_INFO + if latMs > 10.0 { + status = framework.STATUS_WARN + } + + path, _ := os.Getwd() + + return framework.CheckResult{ + Title: fmt.Sprintf("FIO - write latency (99%%, %s)", path), + Status: status, + Value: latMsStr, + } +} + +func fioSyncLatencyResult(fioResult *FioResult) framework.CheckResult { + // Convert the nanosecond result to milliseconds for readability + latMs := float64(fioResult.Jobs[0].Sync.LatNs.Percentile.NinetyNinth) / 1e6 + + latMsStr := fmt.Sprintf("%0.2f ms", latMs) + + status := framework.STATUS_INFO + if latMs > 10.0 { + status = framework.STATUS_WARN + } + + path, _ := os.Getwd() + + return framework.CheckResult{ + Title: fmt.Sprintf("FIO - sync latency (99%%, %s)", path), + Status: status, + Value: latMsStr, + } +} + +func runFioLatencyTest() (*FioResult, error) { + return runFio( + "conjur-fio-latency", + []string{ + "--rw=readwrite", + "--ioengine=sync", + "--fdatasync=1", + "--directory=conjur-fio-latency", + "--size=22m", + "--bs=2300", + "--name=conjur-fio-latency", + "--output-format=json", + }, + ) +} diff --git a/pkg/checks/disk.go b/pkg/checks/disk/space.go similarity index 78% rename from pkg/checks/disk.go rename to pkg/checks/disk/space.go index bc3187d..4c5585d 100644 --- a/pkg/checks/disk.go +++ b/pkg/checks/disk/space.go @@ -1,4 +1,4 @@ -package checks +package disk import ( "fmt" @@ -8,10 +8,13 @@ import ( "github.com/shirou/gopsutil/v3/disk" ) -type DiskSpace struct { +// SpaceCheck reports on the available partitions and devices on the current +// machine, as well as their available disk space. +type SpaceCheck struct { } -func (diskSpace *DiskSpace) Run() <-chan []framework.CheckResult { +// Run executes the disk checks and returns their results +func (*SpaceCheck) Run() <-chan []framework.CheckResult { future := make(chan []framework.CheckResult) go func() { diff --git a/pkg/checks/disk_test.go b/pkg/checks/disk/space_test.go similarity index 86% rename from pkg/checks/disk_test.go rename to pkg/checks/disk/space_test.go index 1de69da..6815fd8 100644 --- a/pkg/checks/disk_test.go +++ b/pkg/checks/disk/space_test.go @@ -1,16 +1,16 @@ -package checks_test +package disk_test import ( "regexp" "testing" - "github.com/conjurinc/conjur-preflight/pkg/checks" + "github.com/conjurinc/conjur-preflight/pkg/checks/disk" "github.com/conjurinc/conjur-preflight/pkg/framework" "github.com/stretchr/testify/assert" ) func TestDiskRun(t *testing.T) { - testCheck := &checks.DiskSpace{} + testCheck := &disk.DiskSpace{} resultChan := testCheck.Run() results := <-resultChan diff --git a/pkg/report/default.go b/pkg/report/default.go index 5e45537..e538079 100644 --- a/pkg/report/default.go +++ b/pkg/report/default.go @@ -2,6 +2,7 @@ package report import ( "github.com/conjurinc/conjur-preflight/pkg/checks" + "github.com/conjurinc/conjur-preflight/pkg/checks/disk" "github.com/conjurinc/conjur-preflight/pkg/framework" ) @@ -16,12 +17,12 @@ func NewDefaultReport() framework.Report { &checks.Cpu{}, }, }, - // TODO - // - IOPS { Title: "Disk", Checks: []framework.Check{ - &checks.DiskSpace{}, + &disk.DiskSpace{}, + &disk.DiskIops{}, + &disk.DiskLatency{}, }, }, {