From a8c9a0e54a75977d82abc014c9b86c28b8abc4fe Mon Sep 17 00:00:00 2001 From: Navid Yaghoobi Date: Sat, 16 Mar 2024 10:52:14 +1100 Subject: [PATCH] enhance all metrics with the same fields as for podman_<...>_info metric Signed-off-by: Navid Yaghoobi --- README.md | 6 +- cmd/root.go | 20 ++- collector/container.go | 287 ++++++++++++++++++++----------- collector/image.go | 71 +++++--- collector/pod.go | 84 ++++++--- collector/utils.go | 4 +- collector/volume.go | 21 ++- exporter/exporter.go | 11 +- test/e2e/e2e_suite_test.go | 1 + test/e2e_em/container_test.go | 65 +++++++ test/e2e_em/e2e_em_suite_test.go | 72 ++++++++ test/e2e_em/image_test.go | 38 ++++ test/e2e_em/pod_test.go | 43 +++++ test/e2e_em/volume_test.go | 32 ++++ 14 files changed, 606 insertions(+), 149 deletions(-) create mode 100644 test/e2e_em/container_test.go create mode 100644 test/e2e_em/e2e_em_suite_test.go create mode 100644 test/e2e_em/image_test.go create mode 100644 test/e2e_em/pod_test.go create mode 100644 test/e2e_em/volume_test.go diff --git a/README.md b/README.md index db280867..0e1e41e5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # prometheus-podman-exporter -[![PkgGoDev](https://pkg.go.dev/badge/github.com/containers/prometheus-podman-exporter)](https://pkg.go.dev/github.com/containers/prometheus-podman-exporter) +![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg) +![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/containers/prometheus-podman-exporter) [![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/containers/prometheus-podman-exporter) ![Go](https://github.com/containers/prometheus-podman-exporter/workflows/Go/badge.svg) @@ -24,8 +25,9 @@ Usage: prometheus-podman-exporter [flags] Flags: - -t, --collector.cache_duration int Duration (seconds) to retrieve container, size and refresh the cache (default 3600) + -t, --collector.cache_duration int Duration (seconds) to retrieve container, size and refresh the cache. (default 3600) -a, --collector.enable-all Enable all collectors by default. + --collector.enhance-metrics enhance all metrics with the same field as for their podman_<...>_info metrics. -i, --collector.image Enable image collector. -n, --collector.network Enable network collector. -o, --collector.pod Enable pod collector. diff --git a/cmd/root.go b/cmd/root.go index 71cdb2d4..62abfb6d 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -67,36 +67,54 @@ func Execute() { func init() { rootCmd.Flags().BoolP("debug", "d", false, "Set log level to debug.") + rootCmd.Flags().BoolP("version", "", false, "Print version and exit.") + rootCmd.Flags().StringP("web.config.file", "", "", "[EXPERIMENTAL] Path to configuration file that can enable TLS or authentication.") + rootCmd.Flags().StringP("web.listen-address", "l", ":9882", "Address on which to expose metrics and web interface.") + rootCmd.Flags().StringP("web.telemetry-path", "p", "/metrics", "Path under which to expose metrics.") + rootCmd.Flags().BoolP("web.disable-exporter-metrics", "e", false, "Exclude metrics about the exporter itself (promhttp_*, process_*, go_*).") + rootCmd.Flags().IntP("web.max-requests", "m", maxRequest, "Maximum number of parallel scrape requests. Use 0 to disable") + rootCmd.Flags().BoolP("collector.enable-all", "a", false, "Enable all collectors by default.") + rootCmd.Flags().BoolP("collector.image", "i", false, "Enable image collector.") + rootCmd.Flags().BoolP("collector.pod", "o", false, "Enable pod collector.") + rootCmd.Flags().BoolP("collector.volume", "v", false, "Enable volume collector.") + rootCmd.Flags().BoolP("collector.network", "n", false, "Enable network collector.") + rootCmd.Flags().BoolP("collector.system", "s", false, "Enable system collector.") + rootCmd.Flags().BoolP("collector.store_labels", "b", false, "Convert pod/container/image labels on prometheus metrics for each pod/container/image.") + rootCmd.Flags().StringP("collector.whitelisted_labels", "w", "", "Comma separated list of pod/container/image labels to be converted\n"+ "to labels on prometheus metrics for each pod/container/image.\n"+ "collector.store_labels must be set to false for this to take effect.") + rootCmd.Flags().Int64P("collector.cache_duration", "t", cacheDuration, - "Duration (seconds) to retrieve container, size and refresh the cache") + "Duration (seconds) to retrieve container, size and refresh the cache.") + + rootCmd.Flags().BoolP("collector.enhance-metrics", "", false, + "enhance all metrics with the same field as for their podman_<...>_info metrics.") } diff --git a/collector/container.go b/collector/container.go index 2634dd05..3114399c 100644 --- a/collector/container.go +++ b/collector/container.go @@ -28,6 +28,11 @@ type containerCollector struct { logger log.Logger } +type containerDescLabels struct { + labels []string + labelsValue []string +} + func init() { registerCollector("container", defaultEnabled, NewContainerStatsCollector) } @@ -39,124 +44,55 @@ func NewContainerStatsCollector(logger log.Logger) (Collector, error) { nil, prometheus.GaugeValue, }, state: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "state"), - //nolint:lll - "Container current state (-1=unknown,0=created,1=initialized,2=running,3=stopped,4=paused,5=exited,6=removing,7=stopping).", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, health: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "health"), - "Container current health (-1=unknown,0=healthy,1=unhealthy,2=starting).", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, created: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "created_seconds"), - "Container creation time in unixtime.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, started: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "started_seconds"), - "Container started time in unixtime.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, exited: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "exited_seconds"), - "Container exited time in unixtime.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, exitCode: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "exit_code"), - "Container exit code, if the container has not exited or restarted then the exit code will be 0.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, pids: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "pids"), - "Container pid number.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, cpu: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "cpu_seconds_total"), - "total CPU time spent for container in seconds.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.CounterValue, + nil, prometheus.CounterValue, }, cpuSystem: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "cpu_system_seconds_total"), - "total system CPU time spent for container in seconds.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.CounterValue, + nil, prometheus.CounterValue, }, memUsage: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "mem_usage_bytes"), - "Container memory usage.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, memLimit: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "mem_limit_bytes"), - "Container memory limit.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, netInput: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "net_input_total"), - "Container network input in bytes.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.CounterValue, + nil, prometheus.CounterValue, }, netOutput: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "net_output_total"), - "Container network output in bytes.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.CounterValue, + nil, prometheus.CounterValue, }, blockInput: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "block_input_total"), - "Container block input in bytes.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.CounterValue, + nil, prometheus.CounterValue, }, blockOutput: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "block_output_total"), - "Container block output in bytes.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.CounterValue, + nil, prometheus.CounterValue, }, rwSize: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "rw_size_bytes"), - "Container top read-write layer size in bytes.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, rootFsSize: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "rootfs_size_bytes"), - "Container root filesystem size in bytes.", - []string{"id", "pod_id", "pod_name"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, logger: logger, }, nil @@ -164,6 +100,8 @@ func NewContainerStatsCollector(logger log.Logger) (Collector, error) { // Update reads and exposes container stats. func (c *containerCollector) Update(ch chan<- prometheus.Metric) error { + defaultContainersLabel := []string{"id", "pod_id", "pod_name"} + reports, err := pdcs.Containers() if err != nil { return err @@ -175,11 +113,169 @@ func (c *containerCollector) Update(ch chan<- prometheus.Metric) error { } for _, rep := range reports { - infoMetric, infoValues := c.getContainerInfoDesc(rep) - c.info.desc = infoMetric + cntLabelsInfo := c.getContainerDescLabel(rep) + + if enhanceAllMetrics { + defaultContainersLabel = cntLabelsInfo.labels + } + + infoDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "info"), + "Container information.", + cntLabelsInfo.labels, nil, + ) + + stateDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "state"), + //nolint:lll + "Container current state (-1=unknown,0=created,1=initialized,2=running,3=stopped,4=paused,5=exited,6=removing,7=stopping).", + defaultContainersLabel, nil, + ) + + healthDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "health"), + "Container current health (-1=unknown,0=healthy,1=unhealthy,2=starting).", + defaultContainersLabel, nil, + ) + + createdDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "created_seconds"), + "Container creation time in unixtime.", + defaultContainersLabel, nil, + ) + + startedDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "started_seconds"), + "Container started time in unixtime.", + defaultContainersLabel, nil, + ) + + exitedDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "exited_seconds"), + "Container exited time in unixtime.", + defaultContainersLabel, nil, + ) + + exitedCodeDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "exit_code"), + "Container exit code, if the container has not exited or restarted then the exit code will be 0.", + defaultContainersLabel, nil, + ) + + pidsDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "pids"), + "Container pid number.", + defaultContainersLabel, nil, + ) + + cpuDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "cpu_seconds_total"), + "total CPU time spent for container in seconds.", + defaultContainersLabel, nil, + ) + + cpuSystemDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "cpu_system_seconds_total"), + "total system CPU time spent for container in seconds.", + defaultContainersLabel, nil, + ) + + memUsageDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "mem_usage_bytes"), + "Container memory usage.", + defaultContainersLabel, nil, + ) + + memLimitDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "mem_limit_bytes"), + "Container memory limit.", + defaultContainersLabel, nil, + ) + + netInputDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "net_input_total"), + "Container network input in bytes.", + defaultContainersLabel, nil, + ) + + netOutputDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "net_output_total"), + "Container network output in bytes.", + defaultContainersLabel, nil, + ) + + blockInputDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "block_input_total"), + "Container block input in bytes.", + defaultContainersLabel, nil, + ) + + blockOutputDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "block_output_total"), + "Container block output in bytes.", + defaultContainersLabel, nil, + ) + + rwSizeDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "rw_size_bytes"), + "Container top read-write layer size in bytes.", + defaultContainersLabel, nil, + ) + + rootFsSizeDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "container", "rootfs_size_bytes"), + "Container root filesystem size in bytes.", + defaultContainersLabel, nil, + ) + + c.info.desc = infoDesc + c.state.desc = stateDesc + c.health.desc = healthDesc + c.created.desc = createdDesc + c.started.desc = startedDesc + c.exited.desc = exitedDesc + c.exitCode.desc = exitedCodeDesc + c.pids.desc = pidsDesc + c.cpu.desc = cpuDesc + c.cpuSystem.desc = cpuSystemDesc + c.memUsage.desc = memUsageDesc + c.memLimit.desc = memLimitDesc + c.netInput.desc = netInputDesc + c.netOutput.desc = netOutputDesc + c.blockInput.desc = blockInputDesc + c.blockOutput.desc = blockOutputDesc + c.rwSize.desc = rwSizeDesc + c.rootFsSize.desc = rootFsSizeDesc + cntStat := getContainerStat(rep.ID, statReports) - ch <- c.info.mustNewConstMetric(1, infoValues...) + ch <- c.info.mustNewConstMetric(1, cntLabelsInfo.labelsValue...) + + if enhanceAllMetrics { + ch <- c.state.mustNewConstMetric(float64(rep.State), cntLabelsInfo.labelsValue...) + ch <- c.health.mustNewConstMetric(float64(rep.Health), cntLabelsInfo.labelsValue...) + ch <- c.created.mustNewConstMetric(float64(rep.Created), cntLabelsInfo.labelsValue...) + ch <- c.started.mustNewConstMetric(float64(rep.Started), cntLabelsInfo.labelsValue...) + ch <- c.exited.mustNewConstMetric(float64(rep.Exited), cntLabelsInfo.labelsValue...) + ch <- c.exitCode.mustNewConstMetric(float64(rep.ExitCode), cntLabelsInfo.labelsValue...) + ch <- c.rwSize.mustNewConstMetric(float64(rep.RwSize), cntLabelsInfo.labelsValue...) + ch <- c.rootFsSize.mustNewConstMetric(float64(rep.RootFsSize), cntLabelsInfo.labelsValue...) + + if cntStat != nil { + ch <- c.pids.mustNewConstMetric(float64(cntStat.PIDs), cntLabelsInfo.labelsValue...) + ch <- c.cpu.mustNewConstMetric(cntStat.CPU, cntLabelsInfo.labelsValue...) + ch <- c.cpuSystem.mustNewConstMetric(cntStat.CPUSystem, cntLabelsInfo.labelsValue...) + ch <- c.memUsage.mustNewConstMetric(float64(cntStat.MemUsage), cntLabelsInfo.labelsValue...) + ch <- c.memLimit.mustNewConstMetric(float64(cntStat.MemLimit), cntLabelsInfo.labelsValue...) + ch <- c.netInput.mustNewConstMetric(float64(cntStat.NetInput), cntLabelsInfo.labelsValue...) + ch <- c.netOutput.mustNewConstMetric(float64(cntStat.NetOutput), cntLabelsInfo.labelsValue...) + ch <- c.blockInput.mustNewConstMetric(float64(cntStat.BlockInput), cntLabelsInfo.labelsValue...) + ch <- c.blockOutput.mustNewConstMetric(float64(cntStat.BlockOutput), cntLabelsInfo.labelsValue...) + } + + continue + } + ch <- c.state.mustNewConstMetric(float64(rep.State), rep.ID, rep.PodID, rep.PodName) ch <- c.health.mustNewConstMetric(float64(rep.Health), rep.ID, rep.PodID, rep.PodName) ch <- c.created.mustNewConstMetric(float64(rep.Created), rep.ID, rep.PodID, rep.PodName) @@ -205,7 +301,7 @@ func (c *containerCollector) Update(ch chan<- prometheus.Metric) error { return nil } -func (c *containerCollector) getContainerInfoDesc(rep pdcs.Container) (*prometheus.Desc, []string) { +func (c *containerCollector) getContainerDescLabel(rep pdcs.Container) *containerDescLabels { containerLabels := []string{"id", "name", "image", "ports", "pod_id", "pod_name"} containerLabelsValue := []string{rep.ID, rep.Name, rep.Image, rep.Ports, rep.PodID, rep.PodName} @@ -214,13 +310,12 @@ func (c *containerCollector) getContainerInfoDesc(rep pdcs.Container) (*promethe containerLabels = append(containerLabels, extraLabels...) containerLabelsValue = append(containerLabelsValue, extraValues...) - infoDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "container", "info"), - "Container information.", - containerLabels, nil, - ) + cntDescLabels := containerDescLabels{ + labels: containerLabels, + labelsValue: containerLabelsValue, + } - return infoDesc, containerLabelsValue + return &cntDescLabels } func (c *containerCollector) getExtraLabelsAndValues( diff --git a/collector/image.go b/collector/image.go index 7f7debec..6b62c0ee 100644 --- a/collector/image.go +++ b/collector/image.go @@ -13,6 +13,11 @@ type imageCollector struct { logger log.Logger } +type imageDescLabels struct { + labels []string + labelsValue []string +} + func init() { registerCollector("image", defaultDisabled, NewImageStatsCollector) } @@ -24,18 +29,10 @@ func NewImageStatsCollector(logger log.Logger) (Collector, error) { nil, prometheus.GaugeValue, }, size: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "image", "size"), - "Image size", - []string{"id", "repository", "tag"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, created: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "image", "created_seconds"), - "Image creation time in unixtime.", - []string{"id", "repository", "tag"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, logger: logger, }, nil @@ -43,15 +40,50 @@ func NewImageStatsCollector(logger log.Logger) (Collector, error) { // Update reads and exposes images stats. func (c *imageCollector) Update(ch chan<- prometheus.Metric) error { + defaultImageLabels := []string{"id", "repository", "tag"} + reports, err := pdcs.Images() if err != nil { return err } for _, rep := range reports { - infoMetric, infoValues := c.getImageInfoDesc(rep) - c.info.desc = infoMetric - ch <- c.info.mustNewConstMetric(1, infoValues...) + imageLabelsInfo := c.getImageDescLabels(rep) + + if enhanceAllMetrics { + defaultImageLabels = imageLabelsInfo.labels + } + + infoDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "image", "info"), + "Image information.", + imageLabelsInfo.labels, nil, + ) + + sizeDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "image", "size"), + "Image size.", + defaultImageLabels, nil, + ) + + createdDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "image", "created_seconds"), + "Image creation time in unixtime.", + defaultImageLabels, nil, + ) + + c.info.desc = infoDesc + c.created.desc = createdDesc + c.size.desc = sizeDesc + + ch <- c.info.mustNewConstMetric(1, imageLabelsInfo.labelsValue...) + + if enhanceAllMetrics { + ch <- c.size.mustNewConstMetric(float64(rep.Size), imageLabelsInfo.labelsValue...) + ch <- c.created.mustNewConstMetric(float64(rep.Created), imageLabelsInfo.labelsValue...) + + continue + } ch <- c.size.mustNewConstMetric(float64(rep.Size), rep.ID, rep.Repository, rep.Tag) ch <- c.created.mustNewConstMetric(float64(rep.Created), rep.ID, rep.Repository, rep.Tag) @@ -60,7 +92,7 @@ func (c *imageCollector) Update(ch chan<- prometheus.Metric) error { return nil } -func (c *imageCollector) getImageInfoDesc(rep pdcs.Image) (*prometheus.Desc, []string) { +func (c *imageCollector) getImageDescLabels(rep pdcs.Image) *imageDescLabels { imageLabels := []string{"id", "parent_id", "repository", "tag", "digest"} imageLabelsValue := []string{rep.ID, rep.ParentID, rep.Repository, rep.Tag, rep.Digest} @@ -69,13 +101,12 @@ func (c *imageCollector) getImageInfoDesc(rep pdcs.Image) (*prometheus.Desc, []s imageLabels = append(imageLabels, extraLabels...) imageLabelsValue = append(imageLabelsValue, extraValues...) - infoDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "image", "info"), - "Image information.", - imageLabels, nil, - ) + imgDescLabels := imageDescLabels{ + labels: imageLabels, + labelsValue: imageLabelsValue, + } - return infoDesc, imageLabelsValue + return &imgDescLabels } func (c *imageCollector) getExtraLabelsAndValues(collectorLabels []string, rep pdcs.Image) ([]string, []string) { diff --git a/collector/pod.go b/collector/pod.go index 81a885b0..58a15674 100644 --- a/collector/pod.go +++ b/collector/pod.go @@ -14,6 +14,11 @@ type podCollector struct { logger log.Logger } +type podDescLabels struct { + labels []string + labelsValue []string +} + func init() { registerCollector("pod", defaultDisabled, NewPodStatsCollector) } @@ -25,25 +30,13 @@ func NewPodStatsCollector(logger log.Logger) (Collector, error) { nil, prometheus.GaugeValue, }, state: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "pod", "state"), - "Pods current state current state (-1=unknown,0=created,1=error,2=exited,3=paused,4=running,5=degraded,6=stopped).", - []string{"id"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, numOfContainers: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "pod", "containers"), - "Number of containers in a pod.", - []string{"id"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, created: typedDesc{ - prometheus.NewDesc( - prometheus.BuildFQName(namespace, "pod", "created_seconds"), - "Pods creation time in unixtime.", - []string{"id"}, nil, - ), prometheus.GaugeValue, + nil, prometheus.GaugeValue, }, logger: logger, }, nil @@ -51,16 +44,58 @@ func NewPodStatsCollector(logger log.Logger) (Collector, error) { // Update reads and exposes pod stats. func (c *podCollector) Update(ch chan<- prometheus.Metric) error { + defaultPodLabels := []string{"id"} + reports, err := pdcs.Pods() if err != nil { return err } for _, rep := range reports { - infoMetric, infoValues := c.getPodInfoDesc(rep) - c.info.desc = infoMetric + podLabelsInfo := c.getPodDescLabels(rep) + + if enhanceAllMetrics { + defaultPodLabels = podLabelsInfo.labels + } + + infoDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "pod", "info"), + "Pod information", + podLabelsInfo.labels, nil, + ) + + stateDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "pod", "state"), + "Pods current state current state (-1=unknown,0=created,1=error,2=exited,3=paused,4=running,5=degraded,6=stopped).", + defaultPodLabels, nil) + + numOfCntDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "pod", "containers"), + "Number of containers in a pod.", + defaultPodLabels, nil, + ) + + createdDesc := prometheus.NewDesc( + prometheus.BuildFQName(namespace, "pod", "created_seconds"), + "Pods creation time in unixtime.", + defaultPodLabels, nil, + ) + + c.info.desc = infoDesc + c.state.desc = stateDesc + c.numOfContainers.desc = numOfCntDesc + c.created.desc = createdDesc + + ch <- c.info.mustNewConstMetric(1, podLabelsInfo.labelsValue...) + + if enhanceAllMetrics { + ch <- c.state.mustNewConstMetric(float64(rep.State), podLabelsInfo.labelsValue...) + ch <- c.numOfContainers.mustNewConstMetric(float64(rep.NumOfContainers), podLabelsInfo.labelsValue...) + ch <- c.created.mustNewConstMetric(float64(rep.Created), podLabelsInfo.labelsValue...) + + continue + } - ch <- c.info.mustNewConstMetric(1, infoValues...) ch <- c.state.mustNewConstMetric(float64(rep.State), rep.ID) ch <- c.numOfContainers.mustNewConstMetric(float64(rep.NumOfContainers), rep.ID) ch <- c.created.mustNewConstMetric(float64(rep.Created), rep.ID) @@ -69,7 +104,7 @@ func (c *podCollector) Update(ch chan<- prometheus.Metric) error { return nil } -func (c *podCollector) getPodInfoDesc(rep pdcs.Pod) (*prometheus.Desc, []string) { +func (c *podCollector) getPodDescLabels(rep pdcs.Pod) *podDescLabels { podLabels := []string{"id", "name", "infra_id"} podLabelsValue := []string{rep.ID, rep.Name, rep.InfraID} @@ -78,13 +113,12 @@ func (c *podCollector) getPodInfoDesc(rep pdcs.Pod) (*prometheus.Desc, []string) podLabels = append(podLabels, extraLabels...) podLabelsValue = append(podLabelsValue, extraValues...) - infoDesc := prometheus.NewDesc( - prometheus.BuildFQName(namespace, "pod", "info"), - "Pod information", - podLabels, nil, - ) + pDescLabels := podDescLabels{ + labels: podLabels, + labelsValue: podLabelsValue, + } - return infoDesc, podLabelsValue + return &pDescLabels } func (c *podCollector) getExtraLabelsAndValues(collectorLabels []string, rep pdcs.Pod) ([]string, []string) { diff --git a/collector/utils.go b/collector/utils.go index 13283b7f..37ea6f1f 100644 --- a/collector/utils.go +++ b/collector/utils.go @@ -9,15 +9,17 @@ import ( var ( collectorSync sync.Once storeLabels bool + enhanceAllMetrics bool whitelistedLabels []string invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) ) // RegisterVariableLabels sets storeLabels or whiteListed labels to be converted to metrics. -func RegisterVariableLabels(storeLabel bool, whiteListed string) { +func RegisterVariableLabels(storeLabel bool, whiteListed string, enhanceMetrics bool) { collectorSync.Do(func() { storeLabels = storeLabel whitelistedLabels = strings.Split(whiteListed, ",") + enhanceAllMetrics = enhanceMetrics }) } diff --git a/collector/volume.go b/collector/volume.go index 2bfcdc52..2f34ad11 100644 --- a/collector/volume.go +++ b/collector/volume.go @@ -12,25 +12,33 @@ type volumeCollector struct { logger log.Logger } +var volumeDefaultLAbels = []string{"name", "driver", "mount_point"} + func init() { registerCollector("volume", defaultDisabled, NewVolumeStatsCollector) } // NewVolumeStatsCollector returns a Collector exposing volume stats information. func NewVolumeStatsCollector(logger log.Logger) (Collector, error) { + createdLabels := []string{"name"} + + if enhanceAllMetrics { + createdLabels = volumeDefaultLAbels + } + return &volumeCollector{ info: typedDesc{ prometheus.NewDesc( prometheus.BuildFQName(namespace, "volume", "info"), "Volume information.", - []string{"name", "driver", "mount_point"}, nil, + volumeDefaultLAbels, nil, ), prometheus.GaugeValue, }, created: typedDesc{ prometheus.NewDesc( prometheus.BuildFQName(namespace, "volume", "created_seconds"), "Volume creation time in unixtime.", - []string{"name"}, nil, + createdLabels, nil, ), prometheus.GaugeValue, }, logger: logger, @@ -45,8 +53,15 @@ func (c *volumeCollector) Update(ch chan<- prometheus.Metric) error { } for _, rep := range reports { - ch <- c.created.mustNewConstMetric(float64(rep.Created), rep.Name) ch <- c.info.mustNewConstMetric(1, rep.Name, rep.Driver, rep.MountPoint) + + if enhanceAllMetrics { + ch <- c.created.mustNewConstMetric(float64(rep.Created), rep.Name, rep.Driver, rep.MountPoint) + + continue + } + + ch <- c.created.mustNewConstMetric(float64(rep.Created), rep.Name) } return nil diff --git a/exporter/exporter.go b/exporter/exporter.go index 28c840c6..95e51891 100644 --- a/exporter/exporter.go +++ b/exporter/exporter.go @@ -35,6 +35,7 @@ type exporterOptions struct { enableVolumes bool enableNetworks bool enableSystem bool + enhanceMetrics bool } // Start starts prometheus exporter. @@ -65,6 +66,8 @@ func Start(cmd *cobra.Command, _ []string) error { } level.Info(logger).Log("msg", "Starting podman-prometheus-exporter", "version", version.Info()) + level.Info(logger).Log("msg", "metrics", "enhanced", cmdOptions.enhanceMetrics) + http.Handle( cmdOptions.webTelemetryPath, newHandler(cmdOptions.webDisableExporterMetrics, cmdOptions.webMaxRequests, logger), @@ -113,7 +116,7 @@ func Start(cmd *cobra.Command, _ []string) error { func setEnabledCollectors(opts *exporterOptions) error { enList := []string{"container"} - collector.RegisterVariableLabels(opts.storeLabels, opts.whiteListedLabels) + collector.RegisterVariableLabels(opts.storeLabels, opts.whiteListedLabels, opts.enhanceMetrics) if opts.enableAll { enList = append(enList, "pod") @@ -239,6 +242,11 @@ func parseOptions(cmd *cobra.Command) (*exporterOptions, error) { //nolint:cyclo return nil, errMinCacheDurtion } + enhanceMetrics, err := cmd.Flags().GetBool("collector.enhance-metrics") + if err != nil { + return nil, err + } + return &exporterOptions{ debug: debug, webListen: webListen, @@ -255,5 +263,6 @@ func parseOptions(cmd *cobra.Command) (*exporterOptions, error) { //nolint:cyclo enableNetworks: enableNetworks, enableSystem: enableSystem, cacheDuration: cacheDuration, + enhanceMetrics: enhanceMetrics, }, nil } diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 782fbb1d..0a68b4ba 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -48,6 +48,7 @@ var _ = BeforeSuite(func() { rootCmd.Flags().BoolP("collector.store_labels", "b", false, "") rootCmd.Flags().StringP("collector.whitelisted_labels", "w", "", "") rootCmd.Flags().Int64P("collector.cache_duration", "t", cacheDuration, "") + rootCmd.Flags().BoolP("collector.enhance-metrics", "", false, "") go func() { err := exporter.Start(rootCmd, nil) diff --git a/test/e2e_em/container_test.go b/test/e2e_em/container_test.go new file mode 100644 index 00000000..1b2fa39e --- /dev/null +++ b/test/e2e_em/container_test.go @@ -0,0 +1,65 @@ +package e2e_em_test + +import ( + "encoding/json" + "fmt" + "os/exec" + + "github.com/containers/podman/v4/pkg/domain/entities" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Container", func() { + It("container metrics", func() { + testCnt01PodName := "exp_e2e_test_cnt01_pod01" + testCnt01Name := "exp_e2e_test_cnt01" + testBusyBoxImage := "quay.io/quay/busybox:latest" + + var ( + cnt01Inpect []entities.ContainerInspectReport + cnt01Pod01Inspect entities.PodInspectReport + ) + + cnt01InspectOutput, err := exec.Command("podman", "container", "inspect", testCnt01Name).Output() + Expect(err).To(BeNil()) + err = json.Unmarshal(cnt01InspectOutput, &cnt01Inpect) + Expect(err).To(BeNil()) + + pod01InspectOutput, err := exec.Command("podman", "pod", "inspect", testCnt01PodName).Output() + Expect(err).To(BeNil()) + err = json.Unmarshal(pod01InspectOutput, &cnt01Pod01Inspect) + Expect(err).To(BeNil()) + + response := queryEndPoint() + + expectedCnt01Info := fmt.Sprintf("podman_container_info{id=\"%s\",image=\"%s\",name=\"%s\",pod_id=\"%s\",pod_name=\"%s\",ports=\"\"}", + cnt01Inpect[0].ID[0:12], testBusyBoxImage, testCnt01Name, cnt01Pod01Inspect.ID[0:12], cnt01Pod01Inspect.Name) + + expectedCnt01State := fmt.Sprintf("podman_container_state{id=\"%s\",image=\"%s\",name=\"%s\",pod_id=\"%s\",pod_name=\"%s\",ports=\"\"}", + cnt01Inpect[0].ID[0:12], testBusyBoxImage, testCnt01Name, cnt01Pod01Inspect.ID[0:12], cnt01Pod01Inspect.Name) + + expectedCnt01Created := fmt.Sprintf("podman_container_created_seconds{id=\"%s\",image=\"%s\",name=\"%s\",pod_id=\"%s\",pod_name=\"%s\",ports=\"\"}", + cnt01Inpect[0].ID[0:12], testBusyBoxImage, testCnt01Name, cnt01Pod01Inspect.ID[0:12], cnt01Pod01Inspect.Name) + + expectedCnt01ExitedSeconds := fmt.Sprintf("podman_container_exited_seconds{id=\"%s\",image=\"%s\",name=\"%s\",pod_id=\"%s\",pod_name=\"%s\",ports=\"\"}", + cnt01Inpect[0].ID[0:12], testBusyBoxImage, testCnt01Name, cnt01Pod01Inspect.ID[0:12], cnt01Pod01Inspect.Name) + + expectedCnt01ExitedCode := fmt.Sprintf("podman_container_exit_code{id=\"%s\",image=\"%s\",name=\"%s\",pod_id=\"%s\",pod_name=\"%s\",ports=\"\"}", + cnt01Inpect[0].ID[0:12], testBusyBoxImage, testCnt01Name, cnt01Pod01Inspect.ID[0:12], cnt01Pod01Inspect.Name) + + expectedCnt01RwSize := fmt.Sprintf("podman_container_rw_size_bytes{id=\"%s\",image=\"%s\",name=\"%s\",pod_id=\"%s\",pod_name=\"%s\",ports=\"\"}", + cnt01Inpect[0].ID[0:12], testBusyBoxImage, testCnt01Name, cnt01Pod01Inspect.ID[0:12], cnt01Pod01Inspect.Name) + + expectedCnt01RootFsSize := fmt.Sprintf("podman_container_rootfs_size_bytes{id=\"%s\",image=\"%s\",name=\"%s\",pod_id=\"%s\",pod_name=\"%s\",ports=\"\"}", + cnt01Inpect[0].ID[0:12], testBusyBoxImage, testCnt01Name, cnt01Pod01Inspect.ID[0:12], cnt01Pod01Inspect.Name) + + Expect(response).Should(ContainElement(ContainSubstring(expectedCnt01Info))) + Expect(response).Should(ContainElement(ContainSubstring(expectedCnt01State))) + Expect(response).Should(ContainElement(ContainSubstring(expectedCnt01Created))) + Expect(response).Should(ContainElement(ContainSubstring(expectedCnt01ExitedSeconds))) + Expect(response).Should(ContainElement(ContainSubstring(expectedCnt01ExitedCode))) + Expect(response).Should(ContainElement(ContainSubstring(expectedCnt01RwSize))) + Expect(response).Should(ContainElement(ContainSubstring(expectedCnt01RootFsSize))) + }) +}) diff --git a/test/e2e_em/e2e_em_suite_test.go b/test/e2e_em/e2e_em_suite_test.go new file mode 100644 index 00000000..85b787d2 --- /dev/null +++ b/test/e2e_em/e2e_em_suite_test.go @@ -0,0 +1,72 @@ +package e2e_em_test + +import ( + "io" + "net/http" + "strings" + "testing" + "time" + + "github.com/containers/prometheus-podman-exporter/exporter" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/spf13/cobra" +) + +var ( + endpointURL = "http://127.0.0.1:9882/metrics" + cacheDuration int64 = 3600 +) + +func TestE2eEm(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "E2E EnhanceMetrics Suite") +} + +var _ = BeforeSuite(func() { + var rootCmd = &cobra.Command{ + Use: "", + Short: "", + Long: ``, + } + + rootCmd.Flags().BoolP("debug", "d", false, "") + rootCmd.Flags().BoolP("version", "", false, "") + rootCmd.Flags().StringP("web.config.file", "", "", "") + rootCmd.Flags().StringP("web.listen-address", "l", ":9882", "") + rootCmd.Flags().StringP("web.telemetry-path", "p", "/metrics", "") + rootCmd.Flags().BoolP("web.disable-exporter-metrics", "e", false, "") + rootCmd.Flags().IntP("web.max-requests", "m", 10, "") + rootCmd.Flags().BoolP("collector.enable-all", "a", true, "") + rootCmd.Flags().BoolP("collector.image", "i", false, "") + rootCmd.Flags().BoolP("collector.pod", "o", false, "") + rootCmd.Flags().BoolP("collector.volume", "v", false, "") + rootCmd.Flags().BoolP("collector.network", "n", false, "") + rootCmd.Flags().BoolP("collector.system", "s", false, "") + rootCmd.Flags().BoolP("collector.store_labels", "b", false, "") + rootCmd.Flags().StringP("collector.whitelisted_labels", "w", "", "") + rootCmd.Flags().Int64P("collector.cache_duration", "t", cacheDuration, "") + rootCmd.Flags().BoolP("collector.enhance-metrics", "", true, "") + + go func() { + err := exporter.Start(rootCmd, nil) + Expect(err).To(BeNil()) + }() + + time.Sleep(10 * time.Second) +}) + +func queryEndPoint() []string { + req, err := http.NewRequest("GET", endpointURL, nil) + Expect(err).To(BeNil()) + + res, err := http.DefaultClient.Do(req) + Expect(err).To(BeNil()) + + defer res.Body.Close() + + body, err := io.ReadAll(res.Body) + Expect(err).To(BeNil()) + + return strings.Split(string(body), "\n") +} diff --git a/test/e2e_em/image_test.go b/test/e2e_em/image_test.go new file mode 100644 index 00000000..e14380d4 --- /dev/null +++ b/test/e2e_em/image_test.go @@ -0,0 +1,38 @@ +package e2e_em_test + +import ( + "encoding/json" + "fmt" + "os/exec" + + "github.com/containers/podman/v4/pkg/domain/entities" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Image", func() { + It("image metrics", func() { + testBusyBoxImage := "quay.io/quay/busybox" + + imageInpsectOutput, err := exec.Command("podman", "image", "inspect", testBusyBoxImage).Output() + Expect(err).To(BeNil()) + + var imageInspect []entities.ImageInspectReport + err = json.Unmarshal(imageInpsectOutput, &imageInspect) + Expect(err).To(BeNil()) + + response := queryEndPoint() + expectedImageSize := fmt.Sprintf("podman_image_size{digest=\"%s\",id=\"%s\",parent_id=\"\",repository=\"%s\",tag=\"latest\"}", + imageInspect[0].Digest.String(), imageInspect[0].ID[0:12], testBusyBoxImage) + + expectedImageCreated := fmt.Sprintf("podman_image_created_seconds{digest=\"%s\",id=\"%s\",parent_id=\"\",repository=\"%s\",tag=\"latest\"}", + imageInspect[0].Digest.String(), imageInspect[0].ID[0:12], testBusyBoxImage) + + expectedImageInfo := fmt.Sprintf("podman_image_info{digest=\"%s\",id=\"%s\",parent_id=\"\",repository=\"%s\",tag=\"latest\"}", + imageInspect[0].Digest.String(), imageInspect[0].ID[0:12], testBusyBoxImage) + + Expect(response).Should(ContainElement(ContainSubstring(expectedImageSize))) + Expect(response).Should(ContainElement(ContainSubstring(expectedImageCreated))) + Expect(response).Should(ContainElement(ContainSubstring(expectedImageInfo))) + }) +}) diff --git a/test/e2e_em/pod_test.go b/test/e2e_em/pod_test.go new file mode 100644 index 00000000..b6913d95 --- /dev/null +++ b/test/e2e_em/pod_test.go @@ -0,0 +1,43 @@ +package e2e_em_test + +import ( + "encoding/json" + "fmt" + "os/exec" + + "github.com/containers/podman/v4/pkg/domain/entities" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Pod", func() { + It("pod metrics", func() { + testPod01Name := "exp_e2e_test_pod01" + + var pod01Inspect entities.PodInspectReport + + pod01InspectOutput, err := exec.Command("podman", "pod", "inspect", testPod01Name).Output() + Expect(err).To(BeNil()) + err = json.Unmarshal(pod01InspectOutput, &pod01Inspect) + Expect(err).To(BeNil()) + + response := queryEndPoint() + + expectedPod01Info := fmt.Sprintf("podman_pod_info{id=\"%s\",infra_id=\"%s\",name=\"%s\"}", + pod01Inspect.ID[0:12], pod01Inspect.InfraContainerID[0:12], testPod01Name) + + expectedPod01State := fmt.Sprintf("podman_pod_state{id=\"%s\",infra_id=\"%s\",name=\"%s\"}", + pod01Inspect.ID[0:12], pod01Inspect.InfraContainerID[0:12], testPod01Name) + + expectedPod01Created := fmt.Sprintf("podman_pod_created_seconds{id=\"%s\",infra_id=\"%s\",name=\"%s\"}", + pod01Inspect.ID[0:12], pod01Inspect.InfraContainerID[0:12], testPod01Name) + + expectedPod01Containers := fmt.Sprintf("podman_pod_containers{id=\"%s\",infra_id=\"%s\",name=\"%s\"}", + pod01Inspect.ID[0:12], pod01Inspect.InfraContainerID[0:12], testPod01Name) + + Expect(response).Should(ContainElement(ContainSubstring(expectedPod01Info))) + Expect(response).Should(ContainElement(ContainSubstring(expectedPod01State))) + Expect(response).Should(ContainElement(ContainSubstring(expectedPod01Created))) + Expect(response).Should(ContainElement(ContainSubstring(expectedPod01Containers))) + }) +}) diff --git a/test/e2e_em/volume_test.go b/test/e2e_em/volume_test.go new file mode 100644 index 00000000..50a6cbf6 --- /dev/null +++ b/test/e2e_em/volume_test.go @@ -0,0 +1,32 @@ +package e2e_em_test + +import ( + "encoding/json" + "fmt" + "os/exec" + + "github.com/containers/podman/v4/pkg/domain/entities" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("Volume", func() { + It("volume metrics", func() { + testVolName := "exp_e2e_test_vol01" + + volInspectOutput, err := exec.Command("podman", "volume", "inspect", testVolName).Output() + Expect(err).To(BeNil()) + + var volInspect []entities.VolumeInspectReport + + err = json.Unmarshal(volInspectOutput, &volInspect) + Expect(err).To(BeNil()) + + response := queryEndPoint() + expectedVolCreated := fmt.Sprintf("podman_volume_created_seconds{driver=\"%s\",mount_point=\"%s\",name=\"%s\"}", volInspect[0].Driver, volInspect[0].Mountpoint, testVolName) + expectedVolInfo := fmt.Sprintf("podman_volume_info{driver=\"%s\",mount_point=\"%s\",name=\"%s\"}", volInspect[0].Driver, volInspect[0].Mountpoint, testVolName) + + Expect(response).Should(ContainElement(ContainSubstring(expectedVolCreated))) + Expect(response).Should(ContainElement(ContainSubstring(expectedVolInfo))) + }) +})