From c636ae4bfb4263a8098a7daba4eae6a3750427dc Mon Sep 17 00:00:00 2001 From: Pamela Mei <126221706+PamelaMei-SAP@users.noreply.github.com> Date: Sun, 18 Feb 2024 19:04:30 +0800 Subject: [PATCH] filesystem: surface device errors (#2923) filesystem: surface filesystem device error Fixes: #2918 --------- Signed-off-by: Pamela Mei i540369 Signed-off-by: Vitaly Zhuravlev --- collector/filesystem_common.go | 18 +++++++++--------- collector/filesystem_linux.go | 11 +++++++---- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/collector/filesystem_common.go b/collector/filesystem_common.go index f5dde59ab7..f5d5135241 100644 --- a/collector/filesystem_common.go +++ b/collector/filesystem_common.go @@ -60,7 +60,7 @@ var ( "Regexp of filesystem types to ignore for filesystem collector.", ).Hidden().String() - filesystemLabelNames = []string{"device", "mountpoint", "fstype"} + filesystemLabelNames = []string{"device", "mountpoint", "fstype", "device_error"} ) type filesystemCollector struct { @@ -73,7 +73,7 @@ type filesystemCollector struct { } type filesystemLabels struct { - device, mountPoint, fsType, options string + device, mountPoint, fsType, options, deviceError string } type filesystemStats struct { @@ -184,11 +184,11 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric( c.deviceErrorDesc, prometheus.GaugeValue, - s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.deviceError, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.roDesc, prometheus.GaugeValue, - s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.ro, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) if s.deviceError > 0 { @@ -197,23 +197,23 @@ func (c *filesystemCollector) Update(ch chan<- prometheus.Metric) error { ch <- prometheus.MustNewConstMetric( c.sizeDesc, prometheus.GaugeValue, - s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.size, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.freeDesc, prometheus.GaugeValue, - s.free, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.free, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.availDesc, prometheus.GaugeValue, - s.avail, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.avail, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.filesDesc, prometheus.GaugeValue, - s.files, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.files, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) ch <- prometheus.MustNewConstMetric( c.filesFreeDesc, prometheus.GaugeValue, - s.filesFree, s.labels.device, s.labels.mountPoint, s.labels.fsType, + s.filesFree, s.labels.device, s.labels.mountPoint, s.labels.fsType, s.labels.deviceError, ) } return nil diff --git a/collector/filesystem_linux.go b/collector/filesystem_linux.go index 2251cc4b8f..23e5359d11 100644 --- a/collector/filesystem_linux.go +++ b/collector/filesystem_linux.go @@ -85,6 +85,7 @@ func (c *filesystemCollector) GetStats() ([]filesystemStats, error) { stuckMountsMtx.Lock() if _, ok := stuckMounts[labels.mountPoint]; ok { + labels.deviceError = "mountpoint timeout" stats = append(stats, filesystemStats{ labels: labels, deviceError: 1, @@ -125,6 +126,7 @@ func (c *filesystemCollector) processStat(labels filesystemLabels) filesystemSta close(success) if err != nil { + labels.deviceError = err.Error() level.Debug(c.logger).Log("msg", "Error on statfs() system call", "rootfs", rootfsFilePath(labels.mountPoint), "err", err) return filesystemStats{ labels: labels, @@ -211,10 +213,11 @@ func parseFilesystemLabels(r io.Reader) ([]filesystemLabels, error) { parts[1] = strings.Replace(parts[1], "\\011", "\t", -1) filesystems = append(filesystems, filesystemLabels{ - device: parts[0], - mountPoint: rootfsStripPrefix(parts[1]), - fsType: parts[2], - options: parts[3], + device: parts[0], + mountPoint: rootfsStripPrefix(parts[1]), + fsType: parts[2], + options: parts[3], + deviceError: "", }) }