diff --git a/README.md b/README.md index d6c3e7fd4c7b0..b37dd855f10b7 100644 --- a/README.md +++ b/README.md @@ -258,6 +258,7 @@ For documentation on the latest development code see the [documentation index][d * [mailchimp](./plugins/inputs/mailchimp) * [marklogic](./plugins/inputs/marklogic) * [mcrouter](./plugins/inputs/mcrouter) +* [mdstat](./plugins/inputs/mdstat) * [memcached](./plugins/inputs/memcached) * [mem](./plugins/inputs/mem) * [mesos](./plugins/inputs/mesos) diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index aa273a4aa7fb5..d8b42b08782a0 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -100,6 +100,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/mailchimp" _ "github.com/influxdata/telegraf/plugins/inputs/marklogic" _ "github.com/influxdata/telegraf/plugins/inputs/mcrouter" + _ "github.com/influxdata/telegraf/plugins/inputs/mdstat" _ "github.com/influxdata/telegraf/plugins/inputs/mem" _ "github.com/influxdata/telegraf/plugins/inputs/memcached" _ "github.com/influxdata/telegraf/plugins/inputs/mesos" diff --git a/plugins/inputs/mdstat/README.md b/plugins/inputs/mdstat/README.md new file mode 100644 index 0000000000000..6180833b69ade --- /dev/null +++ b/plugins/inputs/mdstat/README.md @@ -0,0 +1,49 @@ +# mdstat Input Plugin + +The mdstat plugin gathers statistics about any Linux MD RAID arrays configured on the host +by reading /proc/mdstat. For a full list of available fields see the +/proc/mdstat section of the [proc man page](http://man7.org/linux/man-pages/man5/proc.5.html). +For a better idea of what each field represents, see the +[mdstat man page](https://raid.wiki.kernel.org/index.php/Mdstat). + +Stat collection based on Prometheus' mdstat collection library at https://github.com/prometheus/procfs/blob/master/mdstat.go + + +### Configuration: + +```toml +# Get kernel statistics from /proc/mdstat +[[inputs.mdstat]] + ## Sets file path + ## If not specified, then default is /proc/mdstat + # file_name = "/proc/mdstat" +``` + +### Measurements & Fields: + +- mdstat + - BlocksSynced (if the array is rebuilding/checking, this is the count of blocks that have been scanned) + - BlocksSyncedFinishTime (the expected finish time of the rebuild scan, listed in minutes remaining) + - BlocksSyncedPct (the percentage of the rebuild scan left) + - BlocksSyncedSpeed (the current speed the rebuild is running at, listed in K/sec) + - BlocksTotal (the total count of blocks in the array) + - DisksActive (the number of disks that are currently considered healthy in the array) + - DisksFailed (the current count of failed disks in the array) + - DisksSpare (the current count of "spare" disks in the array) + - DisksTotal (total count of disks in the array) + +### Tags: + +- mdstat + - ActivityState (`active` or `inactive`) + - Devices (comma separated list of devices that make up the array) + - Name (name of the array) + +### Example Output: + +``` +$ telegraf --config ~/ws/telegraf.conf --input-filter mdstat --test +* Plugin: mdstat, Collection 1 +> mdstat,ActivityState=active,Devices=sdm1\,sdn1,Name=md1 BlocksSynced=231299072i,BlocksSyncedFinishTime=0,BlocksSyncedPct=0,BlocksSyncedSpeed=0,BlocksTotal=231299072i,DisksActive=2i,DisksFailed=0i,DisksSpare=0i,DisksTotal=2i,DisksDown=0i 1617814276000000000 +> mdstat,ActivityState=active,Devices=sdm5\,sdn5,Name=md2 BlocksSynced=2996224i,BlocksSyncedFinishTime=0,BlocksSyncedPct=0,BlocksSyncedSpeed=0,BlocksTotal=2996224i,DisksActive=2i,DisksFailed=0i,DisksSpare=0i,DisksTotal=2i,DisksDown=0i 1617814276000000000 +``` diff --git a/plugins/inputs/mdstat/mdstat.go b/plugins/inputs/mdstat/mdstat.go new file mode 100644 index 0000000000000..0f18379c4c092 --- /dev/null +++ b/plugins/inputs/mdstat/mdstat.go @@ -0,0 +1,313 @@ +// +build linux + +// Copyright 2018 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Code has been changed since initial import. + +package mdstat + +import ( + "fmt" + "io/ioutil" + "os" + "regexp" + "sort" + "strconv" + "strings" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +const ( + defaultHostProc = "/proc" + envProc = "HOST_PROC" +) + +var ( + statusLineRE = regexp.MustCompile(`(\d+) blocks .*\[(\d+)/(\d+)\] \[([U_]+)\]`) + recoveryLineBlocksRE = regexp.MustCompile(`\((\d+)/\d+\)`) + recoveryLinePctRE = regexp.MustCompile(`= (.+)%`) + recoveryLineFinishRE = regexp.MustCompile(`finish=(.+)min`) + recoveryLineSpeedRE = regexp.MustCompile(`speed=(.+)[A-Z]`) + componentDeviceRE = regexp.MustCompile(`(.*)\[\d+\]`) +) + +type statusLine struct { + active int64 + total int64 + size int64 + down int64 +} + +type recoveryLine struct { + syncedBlocks int64 + pct float64 + finish float64 + speed float64 +} + +type MdstatConf struct { + FileName string `toml:"file_name"` +} + +func (k *MdstatConf) Description() string { + return "Get md array statistics from /proc/mdstat" +} + +var mdSampleConfig = ` + ## Sets file path + ## If not specified, then default is /proc/mdstat + # file_name = "/proc/mdstat" +` + +func (k *MdstatConf) SampleConfig() string { + return mdSampleConfig +} + +func evalStatusLine(deviceLine, statusLineStr string) (statusLine, error) { + sizeFields := strings.Fields(statusLineStr) + if len(sizeFields) < 1 { + return statusLine{active: 0, total: 0, down: 0, size: 0}, + fmt.Errorf("statusLine empty? %q", statusLineStr) + } + sizeStr := sizeFields[0] + size, err := strconv.ParseInt(sizeStr, 10, 64) + if err != nil { + return statusLine{active: 0, total: 0, down: 0, size: 0}, + fmt.Errorf("unexpected statusLine %q: %w", statusLineStr, err) + } + + if strings.Contains(deviceLine, "raid0") || strings.Contains(deviceLine, "linear") { + // In the device deviceLine, only disks have a number associated with them in []. + total := int64(strings.Count(deviceLine, "[")) + return statusLine{active: total, total: total, down: 0, size: size}, nil + } + + if strings.Contains(deviceLine, "inactive") { + return statusLine{active: 0, total: 0, down: 0, size: size}, nil + } + + matches := statusLineRE.FindStringSubmatch(statusLineStr) + if len(matches) != 5 { + return statusLine{active: 0, total: 0, down: 0, size: size}, + fmt.Errorf("couldn't find all the substring matches: %s", statusLineStr) + } + total, err := strconv.ParseInt(matches[2], 10, 64) + if err != nil { + return statusLine{active: 0, total: 0, down: 0, size: size}, + fmt.Errorf("unexpected statusLine %q: %w", statusLineStr, err) + } + active, err := strconv.ParseInt(matches[3], 10, 64) + if err != nil { + return statusLine{active: 0, total: total, down: 0, size: size}, + fmt.Errorf("unexpected statusLine %q: %w", statusLineStr, err) + } + down := int64(strings.Count(matches[4], "_")) + + return statusLine{active: active, total: total, size: size, down: down}, nil +} + +func evalRecoveryLine(recoveryLineStr string) (recoveryLine, error) { + // Get count of completed vs. total blocks + matches := recoveryLineBlocksRE.FindStringSubmatch(recoveryLineStr) + if len(matches) != 2 { + return recoveryLine{syncedBlocks: 0, pct: 0, finish: 0, speed: 0}, + fmt.Errorf("unexpected recoveryLine matching syncedBlocks: %s", recoveryLineStr) + } + syncedBlocks, err := strconv.ParseInt(matches[1], 10, 64) + if err != nil { + return recoveryLine{syncedBlocks: 0, pct: 0, finish: 0, speed: 0}, + fmt.Errorf("error parsing int from recoveryLine %q: %w", recoveryLineStr, err) + } + + // Get percentage complete + matches = recoveryLinePctRE.FindStringSubmatch(recoveryLineStr) + if len(matches) != 2 { + return recoveryLine{syncedBlocks: syncedBlocks, pct: 0, finish: 0, speed: 0}, + fmt.Errorf("unexpected recoveryLine matching percentage: %s", recoveryLineStr) + } + pct, err := strconv.ParseFloat(matches[1], 64) + if err != nil { + return recoveryLine{syncedBlocks: syncedBlocks, pct: 0, finish: 0, speed: 0}, + fmt.Errorf("error parsing float from recoveryLine %q: %w", recoveryLineStr, err) + } + + // Get time expected left to complete + matches = recoveryLineFinishRE.FindStringSubmatch(recoveryLineStr) + if len(matches) != 2 { + return recoveryLine{syncedBlocks: syncedBlocks, pct: pct, finish: 0, speed: 0}, + fmt.Errorf("unexpected recoveryLine matching est. finish time: %s", recoveryLineStr) + } + finish, err := strconv.ParseFloat(matches[1], 64) + if err != nil { + return recoveryLine{syncedBlocks: syncedBlocks, pct: pct, finish: 0, speed: 0}, + fmt.Errorf("error parsing float from recoveryLine %q: %w", recoveryLineStr, err) + } + + // Get recovery speed + matches = recoveryLineSpeedRE.FindStringSubmatch(recoveryLineStr) + if len(matches) != 2 { + return recoveryLine{syncedBlocks: syncedBlocks, pct: pct, finish: finish, speed: 0}, + fmt.Errorf("unexpected recoveryLine matching speed: %s", recoveryLineStr) + } + speed, err := strconv.ParseFloat(matches[1], 64) + if err != nil { + return recoveryLine{syncedBlocks: syncedBlocks, pct: pct, finish: finish, speed: 0}, + fmt.Errorf("error parsing float from recoveryLine %q: %w", recoveryLineStr, err) + } + return recoveryLine{syncedBlocks: syncedBlocks, pct: pct, finish: finish, speed: speed}, nil +} + +func evalComponentDevices(deviceFields []string) string { + mdComponentDevices := make([]string, 0) + if len(deviceFields) > 3 { + for _, field := range deviceFields[4:] { + match := componentDeviceRE.FindStringSubmatch(field) + if match == nil { + continue + } + mdComponentDevices = append(mdComponentDevices, match[1]) + } + } + + // Ensure no churn on tag ordering change + sort.Strings(mdComponentDevices) + return strings.Join(mdComponentDevices, ",") +} + +func (k *MdstatConf) Gather(acc telegraf.Accumulator) error { + data, err := k.getProcMdstat() + if err != nil { + return err + } + lines := strings.Split(string(data), "\n") + // empty file should return nothing + if len(lines) < 3 { + return nil + } + for i, line := range lines { + if strings.TrimSpace(line) == "" || line[0] == ' ' || strings.HasPrefix(line, "Personalities") || strings.HasPrefix(line, "unused") { + continue + } + deviceFields := strings.Fields(line) + if len(deviceFields) < 3 || len(lines) <= i+3 { + return fmt.Errorf("not enough fields in mdline (expected at least 3): %s", line) + } + mdName := deviceFields[0] // mdx + state := deviceFields[2] // active or inactive + + /* + Failed disks have the suffix (F) & Spare disks have the suffix (S). + Failed disks may also not be marked separately... + */ + fail := int64(strings.Count(line, "(F)")) + spare := int64(strings.Count(line, "(S)")) + + sts, err := evalStatusLine(lines[i], lines[i+1]) + if err != nil { + return fmt.Errorf("error parsing md device lines: %w", err) + } + + syncLineIdx := i + 2 + if strings.Contains(lines[i+2], "bitmap") { // skip bitmap line + syncLineIdx++ + } + + var rcvry recoveryLine + // If device is syncing at the moment, get the number of currently + // synced bytes, otherwise that number equals the size of the device. + rcvry.syncedBlocks = sts.size + recovering := strings.Contains(lines[syncLineIdx], "recovery") + resyncing := strings.Contains(lines[syncLineIdx], "resync") + checking := strings.Contains(lines[syncLineIdx], "check") + + // Append recovery and resyncing state info. + if recovering || resyncing || checking { + if recovering { + state = "recovering" + } else if checking { + state = "checking" + } else { + state = "resyncing" + } + + // Handle case when resync=PENDING or resync=DELAYED. + if strings.Contains(lines[syncLineIdx], "PENDING") || strings.Contains(lines[syncLineIdx], "DELAYED") { + rcvry.syncedBlocks = 0 + } else { + var err error + rcvry, err = evalRecoveryLine(lines[syncLineIdx]) + if err != nil { + return fmt.Errorf("error parsing sync line in md device %q: %w", mdName, err) + } + } + } + fields := map[string]interface{}{ + "DisksActive": sts.active, + "DisksFailed": fail, + "DisksSpare": spare, + "DisksTotal": sts.total, + "DisksDown": sts.down, + "BlocksTotal": sts.size, + "BlocksSynced": rcvry.syncedBlocks, + "BlocksSyncedPct": rcvry.pct, + "BlocksSyncedFinishTime": rcvry.finish, + "BlocksSyncedSpeed": rcvry.speed, + } + tags := map[string]string{ + "Name": mdName, + "ActivityState": state, + "Devices": evalComponentDevices(deviceFields), + } + acc.AddFields("mdstat", fields, tags) + } + + return nil +} + +func (k *MdstatConf) getProcMdstat() ([]byte, error) { + var mdStatFile string + if k.FileName == "" { + mdStatFile = proc(envProc, defaultHostProc) + "/mdstat" + } else { + mdStatFile = k.FileName + } + if _, err := os.Stat(mdStatFile); os.IsNotExist(err) { + return nil, fmt.Errorf("mdstat: %s does not exist", mdStatFile) + } else if err != nil { + return nil, err + } + + data, err := ioutil.ReadFile(mdStatFile) + if err != nil { + return nil, err + } + + return data, nil +} + +func init() { + inputs.Add("mdstat", func() telegraf.Input { return &MdstatConf{} }) +} + +// proc can be used to read file paths from env +func proc(env, path string) string { + // try to read full file path + if p := os.Getenv(env); p != "" { + return p + } + // return default path + return path +} diff --git a/plugins/inputs/mdstat/mdstat_notlinux.go b/plugins/inputs/mdstat/mdstat_notlinux.go new file mode 100644 index 0000000000000..f0fe87e66ba91 --- /dev/null +++ b/plugins/inputs/mdstat/mdstat_notlinux.go @@ -0,0 +1,3 @@ +// +build !linux + +package mdstat diff --git a/plugins/inputs/mdstat/mdstat_test.go b/plugins/inputs/mdstat/mdstat_test.go new file mode 100644 index 0000000000000..030ac2cb55f6f --- /dev/null +++ b/plugins/inputs/mdstat/mdstat_test.go @@ -0,0 +1,148 @@ +// +build linux + +package mdstat + +import ( + "io/ioutil" + "os" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" +) + +func TestFullMdstatProcFile(t *testing.T) { + filename := makeFakeMDStatFile([]byte(mdStatFileFull)) + defer os.Remove(filename) + k := MdstatConf{ + FileName: filename, + } + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.NoError(t, err) + + fields := map[string]interface{}{ + "BlocksSynced": int64(10620027200), + "BlocksSyncedFinishTime": float64(101.6), + "BlocksSyncedPct": float64(94.3), + "BlocksSyncedSpeed": float64(103517), + "BlocksTotal": int64(11251451904), + "DisksActive": int64(12), + "DisksFailed": int64(0), + "DisksSpare": int64(0), + "DisksTotal": int64(12), + "DisksDown": int64(0), + } + acc.AssertContainsFields(t, "mdstat", fields) +} + +func TestFailedDiskMdStatProcFile1(t *testing.T) { + filename := makeFakeMDStatFile([]byte(mdStatFileFailedDisk)) + defer os.Remove(filename) + + k := MdstatConf{ + FileName: filename, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.NoError(t, err) + + fields := map[string]interface{}{ + "BlocksSynced": int64(5860144128), + "BlocksSyncedFinishTime": float64(0), + "BlocksSyncedPct": float64(0), + "BlocksSyncedSpeed": float64(0), + "BlocksTotal": int64(5860144128), + "DisksActive": int64(3), + "DisksFailed": int64(0), + "DisksSpare": int64(0), + "DisksTotal": int64(4), + "DisksDown": int64(1), + } + acc.AssertContainsFields(t, "mdstat", fields) +} + +func TestEmptyMdStatProcFile1(t *testing.T) { + filename := makeFakeMDStatFile([]byte(mdStatFileEmpty)) + defer os.Remove(filename) + + k := MdstatConf{ + FileName: filename, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.NoError(t, err) +} + +func TestInvalidMdStatProcFile1(t *testing.T) { + filename := makeFakeMDStatFile([]byte(mdStatFileInvalid)) + defer os.Remove(filename) + + k := MdstatConf{ + FileName: filename, + } + + acc := testutil.Accumulator{} + err := k.Gather(&acc) + assert.Error(t, err) +} + +const mdStatFileFull = ` +Personalities : [raid1] [raid10] [linear] [multipath] [raid0] [raid6] [raid5] [raid4] +md2 : active raid10 sde[2] sdl[9] sdf[3] sdk[8] sdh[5] sdd[1] sdg[4] sdn[11] sdm[10] sdj[7] sdc[0] sdi[6] + 11251451904 blocks super 1.2 512K chunks 2 near-copies [12/12] [UUUUUUUUUUUU] + [==================>..] check = 94.3% (10620027200/11251451904) finish=101.6min speed=103517K/sec + bitmap: 35/84 pages [140KB], 65536KB chunk + +md1 : active raid1 sdb2[2] sda2[0] + 5909504 blocks super 1.2 [2/2] [UU] + +md0 : active raid1 sdb1[2] sda1[0] + 244005888 blocks super 1.2 [2/2] [UU] + bitmap: 1/2 pages [4KB], 65536KB chunk + +unused devices: +` + +const mdStatFileFailedDisk = ` +Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5] [raid4] [raid10] +md0 : active raid5 sdd1[3] sdb1[1] sda1[0] + 5860144128 blocks super 1.2 level 5, 64k chunk, algorithm 2 [4/3] [UUU_] + bitmap: 8/15 pages [32KB], 65536KB chunk + +unused devices: +` + +const mdStatFileEmpty = ` +Personalities : +unused devices: +` + +const mdStatFileInvalid = ` +Personalities : + +mdf1: testman actve + +md0 : active raid1 sdb1[2] sda1[0] + 244005888 blocks super 1.2 [2/2] [UU] + bitmap: 1/2 pages [4KB], 65536KB chunk + +unused devices: +` + +func makeFakeMDStatFile(content []byte) (filename string) { + fileobj, err := ioutil.TempFile("", "mdstat") + if err != nil { + panic(err) + } + + if _, err = fileobj.Write(content); err != nil { + panic(err) + } + if err := fileobj.Close(); err != nil { + panic(err) + } + return fileobj.Name() +}