From 7432c7d37288b4ad48affd180833895d4df9319e Mon Sep 17 00:00:00 2001 From: Spencer McKee Date: Tue, 16 Apr 2024 12:22:34 -0700 Subject: [PATCH] adds infiniband metrics plugin --- docs/metrics/plugins/infiniband.md | 48 ++++++ pkg/metrics/metrics.go | 18 +++ pkg/metrics/types.go | 5 + pkg/plugin/infiniband/Makefile | 11 ++ pkg/plugin/infiniband/embedded_filesystem.go | 36 +++++ pkg/plugin/infiniband/infiniband_linux.go | 80 ++++++++++ .../infiniband/infiniband_linux_test.go | 77 +++++++++ .../infiniband/infiniband_mock_generated.go | 10 ++ .../infiniband/infiniband_stats_linux.go | 149 ++++++++++++++++++ .../infiniband/infiniband_stats_linux_test.go | 145 +++++++++++++++++ pkg/plugin/infiniband/types_linux.go | 31 ++++ .../linuxutil/linuxutil_mock_generated.go | 10 +- pkg/plugin/registry/registry_linux.go | 2 + pkg/utils/attr_utils.go | 1 + pkg/utils/metric_names.go | 2 + site/sidebars.js | 1 + test/plugin/infiniband/main_linux.go | 49 ++++++ 17 files changed, 673 insertions(+), 2 deletions(-) create mode 100644 docs/metrics/plugins/infiniband.md create mode 100644 pkg/plugin/infiniband/Makefile create mode 100644 pkg/plugin/infiniband/embedded_filesystem.go create mode 100644 pkg/plugin/infiniband/infiniband_linux.go create mode 100644 pkg/plugin/infiniband/infiniband_linux_test.go create mode 100644 pkg/plugin/infiniband/infiniband_mock_generated.go create mode 100644 pkg/plugin/infiniband/infiniband_stats_linux.go create mode 100644 pkg/plugin/infiniband/infiniband_stats_linux_test.go create mode 100644 pkg/plugin/infiniband/types_linux.go create mode 100644 test/plugin/infiniband/main_linux.go diff --git a/docs/metrics/plugins/infiniband.md b/docs/metrics/plugins/infiniband.md new file mode 100644 index 0000000000..aaaa6ed230 --- /dev/null +++ b/docs/metrics/plugins/infiniband.md @@ -0,0 +1,48 @@ +# `infiniband` (Linux) + +Gathers Nvidia Infiniband port counters and debug status parameters from /sys/class/infiniband and /sys/class/net (respectively). + +## Metrics + +Infiniband Port Counter Statistics + +Infiniband Status Parameter Statistics + +## Architecture + +The plugin uses the following data sources: + +1. `/sys/class/infiniband` +2. `/sys/class/net` + +### Code Locations + +- Plugin code interfacing with the Infiniband driver: *pkg/plugin/infiniband/* + +## Label Values for Infiniband Port Counters + +Below is a running list of all statistics for Infiniband port counters + +- `excessive_buffer_overrun_errors` +- `link_downed` +- `link_error_recovery` +- `local_link_integrity_errors` +- `port_rcv_constraint_errors` +- `port_rcv_data` +- `port_rcv_errors` +- `port_rcv_packets` +- `port_rcv_remote_physical_errors` +- `port_rcv_switch_replay_errors` +- `port_xmit_constraint_errors` +- `port_xmit_data` +- `port_xmit_discards` +- `port_xmit_packets` +- `symbol_error` +- `VL15_dropped` + +## Label Values for Infiniband Debug Status Parameters + +Below is a running list of all statistics for Infiniband debug status parameters + +- `lro_timeout` +- `link_down_reason` diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 44e19f8619..6437cf952f 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -147,6 +147,24 @@ func InitializeMetrics() { utils.DNSLabels..., ) + // InfiniBand Metrics + InfinibandCounterStats = exporter.CreatePrometheusGaugeVecForMetric( + exporter.DefaultRegistry, + utils.InfinibandCounterStatsName, + infinibandCounterStatsDescription, + utils.StatName, + utils.Device, + utils.Port, + ) + + InfinibandStatusParams = exporter.CreatePrometheusGaugeVecForMetric( + exporter.DefaultRegistry, + utils.InfinibandStatusParamsName, + infinibandStatusParamsDescription, + utils.StatName, + utils.InterfaceName, + ) + isInitialized = true metricsLogger.Info("Metrics initialized") } diff --git a/pkg/metrics/types.go b/pkg/metrics/types.go index bc08d559a9..8fd1eaa5cd 100644 --- a/pkg/metrics/types.go +++ b/pkg/metrics/types.go @@ -38,6 +38,8 @@ const ( nodeApiServerHandshakeLatencyDesc = "Histogram depicting latency of the TCP handshake between nodes and Kubernetes API server measured in milliseconds" dnsRequestCounterDescription = "DNS requests by statistics" dnsResponseCounterDescription = "DNS responses by statistics" + infinibandCounterStatsDescription = "InfiniBand Counter Statistics" + infinibandStatusParamsDescription = "InfiniBand Status Parameters" // Control plane metrics pluginManagerFailedToReconcileCounterDescription = "Number of times the plugin manager failed to reconcile the plugins" @@ -86,6 +88,9 @@ var ( // DNS Metrics. DNSRequestCounter ICounterVec DNSResponseCounter ICounterVec + + InfinibandCounterStats IGaugeVec + InfinibandStatusParams IGaugeVec ) func ToPrometheusType(metric interface{}) prometheus.Collector { diff --git a/pkg/plugin/infiniband/Makefile b/pkg/plugin/infiniband/Makefile new file mode 100644 index 0000000000..134f59a69c --- /dev/null +++ b/pkg/plugin/infiniband/Makefile @@ -0,0 +1,11 @@ +REPO_ROOT = $(shell git rev-parse --show-toplevel) +TOOLS_BIN_DIR = $(REPO_ROOT)/hack/tools/bin +MOCKGEN = $(TOOLS_BIN_DIR)/mockgen + +.PHONY: generate + +generate: $(MOCKGEN) ## Generate mock clients + $(MOCKGEN) -source=$(REPO_ROOT)/pkg/plugin/infiniband/types_linux.go -copyright_file=$(REPO_ROOT)/pkg/lib/ignore_headers.txt -package=infiniband > infiniband_mock_generated.go + +$(MOCKGEN): + @make -C $(REPO_ROOT) $(MOCKGEN) diff --git a/pkg/plugin/infiniband/embedded_filesystem.go b/pkg/plugin/infiniband/embedded_filesystem.go new file mode 100644 index 0000000000..8f07f73a1c --- /dev/null +++ b/pkg/plugin/infiniband/embedded_filesystem.go @@ -0,0 +1,36 @@ +package infiniband + +import "testing/fstest" + +var embeddedFs = fstest.MapFS{ + "infiniband/mlx5_ib0/ports/1/counters/excessive_buffer_overrun_errors": &fstest.MapFile{ + Data: []byte("1"), + }, + "infiniband/mlx5_ib0/ports/1/counters/VL15_dropped": &fstest.MapFile{ + Data: []byte("1"), + }, + "infiniband/mlx5_ib0/ports/2/counters/excessive_buffer_overrun_errors": &fstest.MapFile{ + Data: []byte("1"), + }, + "infiniband/mlx5_ib0/ports/2/counters/VL15_dropped": &fstest.MapFile{ + Data: []byte("1"), + }, + "infiniband/mlx5_an0/ports/1/counters/excessive_buffer_overrun_errors": &fstest.MapFile{ + Data: []byte("1"), + }, + "infiniband/mlx5_an0/ports/1/counters/VL15_dropped": &fstest.MapFile{ + Data: []byte("1"), + }, + "net/ib0/debug/link_down_reason": &fstest.MapFile{ + Data: []byte("1"), + }, + "net/ib0/debug/lro_timeout": &fstest.MapFile{ + Data: []byte("1"), + }, + "net/docker0/debug/link_down_reason": &fstest.MapFile{ + Data: []byte("1"), + }, + "net/docker0/debug/lro_timeout": &fstest.MapFile{ + Data: []byte("1"), + }, +} diff --git a/pkg/plugin/infiniband/infiniband_linux.go b/pkg/plugin/infiniband/infiniband_linux.go new file mode 100644 index 0000000000..e4b6c8eff0 --- /dev/null +++ b/pkg/plugin/infiniband/infiniband_linux.go @@ -0,0 +1,80 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +// Package infiniband contains the Retina infiniband plugin. It gathers infiniband statistics and debug status parameters. +package infiniband + +import ( + "context" + "time" + + hubblev1 "github.com/cilium/cilium/pkg/hubble/api/v1" + kcfg "github.com/microsoft/retina/pkg/config" + "github.com/microsoft/retina/pkg/log" + "github.com/microsoft/retina/pkg/plugin/api" + "go.uber.org/zap" +) + +// New creates a infiniband plugin. +func New(cfg *kcfg.Config) api.Plugin { + return &infiniband{ + cfg: cfg, + l: log.Logger().Named(string(Name)), + } +} + +func (ib *infiniband) Name() string { + return string(Name) +} + +func (ib *infiniband) Generate(ctx context.Context) error { //nolint //implementing iface + return nil +} + +func (ib *infiniband) Compile(ctx context.Context) error { //nolint // implementing iface + return nil +} + +func (ib *infiniband) Init() error { + ib.l.Info("Initializing infiniband plugin...") + return nil +} + +func (ib *infiniband) Start(ctx context.Context) error { + ib.isRunning = true + return ib.run(ctx) +} + +func (ib *infiniband) SetupChannel(ch chan *hubblev1.Event) error { // nolint // impl. iface + ib.l.Warn("Plugin does not support SetupChannel", zap.String("plugin", string(Name))) + return nil +} + +func (ib *infiniband) run(ctx context.Context) error { + ib.l.Info("Running infiniband plugin...") + ticker := time.NewTicker(ib.cfg.MetricsInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + ib.l.Info("Context is done, infiniband will stop running") + return nil + case <-ticker.C: + infinibandReader := NewInfinibandReader() + err := infinibandReader.readAndUpdate() + if err != nil { + ib.l.Error("Reading infiniband stats failed", zap.Error(err)) + } + } + } +} + +func (ib *infiniband) Stop() error { + if !ib.isRunning { + return nil + } + ib.l.Info("Stopping infiniband plugin...") + ib.isRunning = false + return nil +} diff --git a/pkg/plugin/infiniband/infiniband_linux_test.go b/pkg/plugin/infiniband/infiniband_linux_test.go new file mode 100644 index 0000000000..e22b94b29c --- /dev/null +++ b/pkg/plugin/infiniband/infiniband_linux_test.go @@ -0,0 +1,77 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +//go:build unit +// +build unit + +package infiniband + +import ( + "context" + "testing" + "time" + + kcfg "github.com/microsoft/retina/pkg/config" + + "github.com/microsoft/retina/pkg/log" + "github.com/stretchr/testify/require" + "golang.org/x/sync/errgroup" +) + +var ( + cfgPodLevelEnabled = &kcfg.Config{ + MetricsInterval: 1 * time.Second, + EnablePodLevel: true, + } + cfgPodLevelDisabled = &kcfg.Config{ + MetricsInterval: 1 * time.Second, + EnablePodLevel: false, + } +) + +func TestStop(t *testing.T) { + log.SetupZapLogger(log.GetDefaultLogOpts()) + p := &infiniband{ + cfg: cfgPodLevelEnabled, + l: log.Logger().Named(string(Name)), + } + err := p.Stop() + if err != nil { + t.Fatalf("Expected no error") + } + if p.isRunning { + t.Fatalf("Expected isRunning to be false") + } + + p.isRunning = true + err = p.Stop() + if err != nil { + t.Fatalf("Expected no error") + } + if p.isRunning { + t.Fatalf("Expected isRunning to be false") + } +} + +func TestShutdown(t *testing.T) { + log.SetupZapLogger(log.GetDefaultLogOpts()) + p := &infiniband{ + cfg: &kcfg.Config{ + MetricsInterval: 100 * time.Second, + EnablePodLevel: true, + }, + l: log.Logger().Named(string(Name)), + } + + ctx, cancel := context.WithCancel(context.Background()) + g, errctx := errgroup.WithContext(ctx) + + g.Go(func() error { + return p.Start(errctx) + }) + + time.Sleep(1 * time.Second) + cancel() + err := g.Wait() + require.NoError(t, err) +} diff --git a/pkg/plugin/infiniband/infiniband_mock_generated.go b/pkg/plugin/infiniband/infiniband_mock_generated.go new file mode 100644 index 0000000000..85ab8725a5 --- /dev/null +++ b/pkg/plugin/infiniband/infiniband_mock_generated.go @@ -0,0 +1,10 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: types_linux.go +// +// Generated by this command: +// +// mockgen -source=types_linux.go -destination=infiniband_mock_generated.go -package=infiniband +// + +// Package infiniband is a generated GoMock package. +package infiniband diff --git a/pkg/plugin/infiniband/infiniband_stats_linux.go b/pkg/plugin/infiniband/infiniband_stats_linux.go new file mode 100644 index 0000000000..9cfc5b56a2 --- /dev/null +++ b/pkg/plugin/infiniband/infiniband_stats_linux.go @@ -0,0 +1,149 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. +package infiniband + +import ( + "os" + "path/filepath" + "strconv" + "strings" + + "io/fs" + + "github.com/microsoft/retina/pkg/log" + "github.com/microsoft/retina/pkg/metrics" + "go.uber.org/zap" +) + +const ( + pathInfiniband = "/sys/class/infiniband" + pathDebugStatusParameters = "/sys/class/net" +) + +const ( + InfinibandDevicePrefix = "mlx5_ib" + InfinibandIfacePrefix = "ib" +) + +func NewInfinibandReader() *InfinibandReader { + return &InfinibandReader{ + l: log.Logger().Named(string("InfinibandReader")), + counterStats: make(map[CounterStat]uint64), + statusParamStats: make(map[StatusParam]uint64), + } +} + +type InfinibandReader struct { // nolint // clearer naming + l *log.ZapLogger + counterStats map[CounterStat]uint64 + statusParamStats map[StatusParam]uint64 +} + +func (ir *InfinibandReader) readAndUpdate() error { + ibFS := os.DirFS(pathInfiniband) + counterStatsErr := ir.readCounterStats(ibFS, pathInfiniband) + + netFS := os.DirFS(pathDebugStatusParameters) + statusParamStatsErr := ir.readStatusParamStats(netFS, pathDebugStatusParameters) + + ir.updateMetrics() + ir.l.Debug("Done reading and updating stats") + + if counterStatsErr != nil { + return counterStatsErr + } else if statusParamStatsErr != nil { + return statusParamStatsErr + } + return nil +} + +func (ir *InfinibandReader) readCounterStats(fsys fs.FS, path string) error { + devices, err := fs.ReadDir(fsys, path) + if err != nil { + ir.l.Error("error reading dir:", zap.Error(err)) + return err // nolint std. fmt. + } + for _, device := range devices { + if !strings.HasPrefix(device.Name(), InfinibandDevicePrefix) { + continue + } + portsPath := filepath.Join(path, device.Name(), "ports") + ports, err := fs.ReadDir(fsys, portsPath) // does the real filesystem c + if err != nil { + ir.l.Error("error reading dir:", zap.Error(err)) + continue + } + for _, port := range ports { + countersPath := filepath.Join(portsPath, port.Name(), "counters") + counters, err := fs.ReadDir(fsys, countersPath) + if err != nil { + ir.l.Error("error reading dir:", zap.Error(err)) + continue + } + for _, counter := range counters { + counterPath := filepath.Join(countersPath, counter.Name()) + val, err := fs.ReadFile(fsys, counterPath) + if err != nil { + ir.l.Error("Error while reading infiniband file: \n", zap.Error(err)) + continue + } + num, err := strconv.ParseUint(strings.TrimSpace(string(val)), 10, 64) + if err != nil { + ir.l.Error("error parsing string:", zap.Error(err)) + continue // nolint std. fmt. + } + ir.counterStats[CounterStat{Name: counter.Name(), Device: device.Name(), Port: port.Name()}] = num + } + + } + } + return nil +} + +func (ir *InfinibandReader) readStatusParamStats(fsys fs.FS, path string) error { + ifaces, err := fs.ReadDir(fsys, path) + if err != nil { + ir.l.Error("error reading dir:", zap.Error(err)) + return err // nolint std. fmt. + } + ir.statusParamStats = make(map[StatusParam]uint64) + for _, iface := range ifaces { + if !strings.HasPrefix(iface.Name(), InfinibandIfacePrefix) { + continue + } + statusParamsPath := filepath.Join(path, iface.Name(), "debug") + statusParams, err := fs.ReadDir(fsys, statusParamsPath) + if err != nil { + ir.l.Error("error parsing string:", zap.Error(err)) + continue + } + for _, statusParam := range statusParams { + statusParamPath := filepath.Join(statusParamsPath, statusParam.Name()) + val, err := fs.ReadFile(fsys, statusParamPath) + if err != nil { + ir.l.Error("Error while reading infiniband path file: \n", zap.Error(err)) + continue + } + num, err := strconv.ParseUint(string(val), 10, 64) + if err != nil { + ir.l.Error("Error while reading infiniband file: \n", zap.Error(err)) + return err // nolint std. fmt. + } + ir.statusParamStats[StatusParam{Name: statusParam.Name(), Iface: iface.Name()}] = num + + } + } + return nil +} + +func (ir *InfinibandReader) updateMetrics() { + // Adding counter stats + for counter, val := range ir.counterStats { + metrics.InfinibandCounterStats.WithLabelValues(counter.Name, counter.Device, counter.Port).Set(float64(val)) + } + + // Adding status params + for statusParam, val := range ir.statusParamStats { + metrics.InfinibandStatusParams.WithLabelValues(statusParam.Name, statusParam.Iface).Set(float64(val)) + } +} diff --git a/pkg/plugin/infiniband/infiniband_stats_linux_test.go b/pkg/plugin/infiniband/infiniband_stats_linux_test.go new file mode 100644 index 0000000000..5fd102e8d0 --- /dev/null +++ b/pkg/plugin/infiniband/infiniband_stats_linux_test.go @@ -0,0 +1,145 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. +package infiniband + +import ( + "testing" + + "github.com/microsoft/retina/pkg/log" + "github.com/microsoft/retina/pkg/metrics" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/assert" + gomock "go.uber.org/mock/gomock" +) + +var ( + MockGaugeVec *metrics.MockIGaugeVec + MockCounterVec *metrics.MockICounterVec +) + +func TestNewInfinibandReader(t *testing.T) { + log.SetupZapLogger(log.GetDefaultLogOpts()) + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + nr := NewInfinibandReader() + assert.NotNil(t, nr) +} + +func InitalizeMetricsForTesting(ctrl *gomock.Controller) { + metricsLogger := log.Logger().Named("metrics") + metricsLogger.Info("Initializing metrics for testing") + + MockGaugeVec = metrics.NewMockIGaugeVec(ctrl) + metrics.InfinibandCounterStats = MockGaugeVec //nolint:typecheck // no type check + metrics.InfinibandStatusParams = MockGaugeVec +} + +//nolint:testifylint // not making linter changes to preserve exact behavior +func TestReadCounterStats(t *testing.T) { + log.SetupZapLogger(log.GetDefaultLogOpts()) + tests := []struct { + name string + filePath string + result *CounterStat + wantErr bool + }{ + { + name: "test correct", + filePath: "infiniband", + wantErr: false, + }, + { + name: "test error", + filePath: "infiniband-error", + wantErr: true, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + nr := NewInfinibandReader() + InitalizeMetricsForTesting(ctrl) + + testmetric := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "testmetric", + Help: "testmetric", + }) + + MockGaugeVec.EXPECT().WithLabelValues(gomock.Any()).Return(testmetric).AnyTimes() + + assert.NotNil(t, nr) + err := nr.readCounterStats(embeddedFs, tt.filePath) + if tt.wantErr { + assert.NotNil(t, err, "Expected error but got nil") + } else { + assert.Nil(t, err, "Expected nil but got err") + assert.NotNil(t, nr.counterStats, "Expected data got nil") + for _, val := range nr.counterStats { + assert.Equal(t, val, uint64(1)) + } + assert.Equal(t, 4, len(nr.counterStats), "Read values are not equal to expected") + nr.updateMetrics() + } + }) + } +} + +func TestReadStatusParamStats(t *testing.T) { + log.SetupZapLogger(log.GetDefaultLogOpts()) + tests := []struct { + name string + filePath string + result *StatusParam + wantErr bool + }{ + { + name: "test correct", + filePath: "net", + wantErr: false, + }, + { + name: "test error", + filePath: "net-error", + wantErr: true, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + nr := NewInfinibandReader() + assert.NotNil(t, nr) + + InitalizeMetricsForTesting(ctrl) + + testmetric := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "testmetric", + Help: "testmetric", + }) + + MockGaugeVec.EXPECT().WithLabelValues(gomock.Any()).Return(testmetric).AnyTimes() + + err := nr.readStatusParamStats(embeddedFs, tt.filePath) + if tt.wantErr { + assert.NotNil(t, err, "Expected error but got nil") // nolint std. fmt. + } else { + assert.Nil(t, err, "Expected nil but got err") // nolint std. fmt. + assert.NotNil(t, nr.statusParamStats, "Expected data got nil") + for _, val := range nr.statusParamStats { + assert.Equal(t, uint64(1), val) + } + assert.Equal(t, 2, len(nr.statusParamStats), "Read values are not equal to expected") // nolint // no issue + + nr.updateMetrics() + } + }) + } +} diff --git a/pkg/plugin/infiniband/types_linux.go b/pkg/plugin/infiniband/types_linux.go new file mode 100644 index 0000000000..51fe2f78c9 --- /dev/null +++ b/pkg/plugin/infiniband/types_linux.go @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. +package infiniband + +import ( + kcfg "github.com/microsoft/retina/pkg/config" + "github.com/microsoft/retina/pkg/log" + "github.com/microsoft/retina/pkg/plugin/api" +) + +const ( + Name api.PluginName = "infiniband" +) + +//go:generate go run go.uber.org/mock/mockgen@v0.4.0 -source=types_linux.go -destination=infiniband_mock_generated.go -package=infiniband +type infiniband struct { + cfg *kcfg.Config + l *log.ZapLogger + isRunning bool +} + +type CounterStat struct { + Name string + Device string + Port string +} + +type StatusParam struct { + Name string + Iface string +} diff --git a/pkg/plugin/linuxutil/linuxutil_mock_generated.go b/pkg/plugin/linuxutil/linuxutil_mock_generated.go index 225be69fe1..c94c26921c 100644 --- a/pkg/plugin/linuxutil/linuxutil_mock_generated.go +++ b/pkg/plugin/linuxutil/linuxutil_mock_generated.go @@ -1,9 +1,15 @@ +// autogenerated +// +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. +// + // Code generated by MockGen. DO NOT EDIT. -// Source: types_linux.go +// Source: /home/spencermckee/retina/pkg/plugin/linuxutil/types_linux.go // // Generated by this command: // -// mockgen -source=types_linux.go -destination=linuxutil_mock_generated.go -package=linuxutil +// mockgen -source=/home/spencermckee/retina/pkg/plugin/linuxutil/types_linux.go -copyright_file=/home/spencermckee/retina/pkg/lib/ignore_headers.txt -package=linuxutil // // Package linuxutil is a generated GoMock package. diff --git a/pkg/plugin/registry/registry_linux.go b/pkg/plugin/registry/registry_linux.go index 6918a6fe35..cbe7ed5f33 100644 --- a/pkg/plugin/registry/registry_linux.go +++ b/pkg/plugin/registry/registry_linux.go @@ -11,6 +11,7 @@ import ( "github.com/microsoft/retina/pkg/plugin/dns" "github.com/microsoft/retina/pkg/plugin/dropreason" "github.com/microsoft/retina/pkg/plugin/linuxutil" + "github.com/microsoft/retina/pkg/plugin/infiniband" "github.com/microsoft/retina/pkg/plugin/mockplugin" "github.com/microsoft/retina/pkg/plugin/packetforward" "github.com/microsoft/retina/pkg/plugin/packetparser" @@ -26,6 +27,7 @@ func RegisterPlugins() { PluginHandler[dropreason.Name] = dropreason.New PluginHandler[packetforward.Name] = packetforward.New PluginHandler[linuxutil.Name] = linuxutil.New + PluginHandler[infiniband.Name] = infiniband.New PluginHandler[packetparser.Name] = packetparser.New PluginHandler[dns.Name] = dns.New PluginHandler[tcpretrans.Name] = tcpretrans.New diff --git a/pkg/utils/attr_utils.go b/pkg/utils/attr_utils.go index c0c47e9574..4776d42e76 100644 --- a/pkg/utils/attr_utils.go +++ b/pkg/utils/attr_utils.go @@ -48,6 +48,7 @@ var ( Endpoint = "endpoint" AclRule = "aclrule" Active = "ACTIVE" + Device = "device" // TCP Connection Statistic Names ResetCount = "ResetCount" diff --git a/pkg/utils/metric_names.go b/pkg/utils/metric_names.go index b672dd2087..3db7daaa3e 100644 --- a/pkg/utils/metric_names.go +++ b/pkg/utils/metric_names.go @@ -29,6 +29,8 @@ const ( NodeApiServerLatencyName = "node_apiserver_latency" NodeApiServerTcpHandshakeLatencyName = "node_apiserver_handshake_latency" NoResponseFromApiServerName = "node_apiserver_no_response" + InfinibandCounterStatsName = "infiniband_counter_stats" + InfinibandStatusParamsName = "infiniband_status_params" // Common Gauges across os distributions NodeConnectivityStatusName = "node_connectivity_status" diff --git a/site/sidebars.js b/site/sidebars.js index d75b6b45da..f5f40cbbe9 100644 --- a/site/sidebars.js +++ b/site/sidebars.js @@ -48,6 +48,7 @@ const sidebars = { 'metrics/plugins/packetforward', 'metrics/plugins/dropreason', 'metrics/plugins/linuxutil', + 'metrics/plugins/infiniband', 'metrics/plugins/dns', 'metrics/plugins/hnsstats', 'metrics/plugins/packetparser', diff --git a/test/plugin/infiniband/main_linux.go b/test/plugin/infiniband/main_linux.go new file mode 100644 index 0000000000..c97bfdc0e5 --- /dev/null +++ b/test/plugin/infiniband/main_linux.go @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. +package main + +import ( + "context" + "time" + + kcfg "github.com/microsoft/retina/pkg/config" + "github.com/microsoft/retina/pkg/log" + "github.com/microsoft/retina/pkg/metrics" + "github.com/microsoft/retina/pkg/plugin/infiniband" + "go.uber.org/zap" +) + +func main() { + log.SetupZapLogger(log.GetDefaultLogOpts()) + l := log.Logger().Named("test-infiniband") + + metrics.InitializeMetrics() + + cfg := &kcfg.Config{ + MetricsInterval: 1 * time.Second, + EnablePodLevel: true, + } + tt := infiniband.New(cfg) + err := tt.Init() + if err != nil { + l.Error("Init failed:%v", zap.Error(err)) + return + } + ctx := context.Background() + err = tt.Start(ctx) + if err != nil { + l.Error("start failed:%v", zap.Error(err)) + return + } + l.Info("started infiniband logger") + + defer func() { + err := tt.Stop() + if err != nil { + l.Error("stop failed:%v", zap.Error(err)) + } + }() + + <-ctx.Done() + +}