Skip to content

Commit

Permalink
Renaming and other review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
ciarams87 committed Sep 19, 2023
1 parent a34977a commit 46fc72f
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 45 deletions.
2 changes: 1 addition & 1 deletion docs/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ NGINX Kubernetes Gateway exports the following metrics:
- nginx_reload_errors_total. Number of unsuccessful NGINX reloads.
- nginx_stale_config. 1 means NKG failed to configure NGINX with the latest version of the configuration, which means
NGINX is running with a stale version.
- nginx_last_reload_milliseconds. Duration in milliseconds of NGINX reloads.
- nginx_last_reload_milliseconds. Duration in milliseconds of NGINX reloads (histogram).
- These metrics have the namespace `nginx_kubernetes_gateway`, and include the label `class` which is set to the
Gateway class of NKG. For example, `nginx_kubernetes_gateway_nginx_reloads_total{class="nginx"}`.

Expand Down
33 changes: 18 additions & 15 deletions internal/mode/static/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,9 @@ func StartManager(cfg config.Config) error {
return fmt.Errorf("NGINX is not running: %w", err)
}

var mgrCollector ngxruntime.ManagerCollector
mgrCollector = nkgmetrics.NewManagerFakeCollector()
if cfg.MetricsConfig.Enabled {
mgrCollector, err = configureNginxMetrics(cfg.GatewayClassName)
if err != nil {
return err
}
mgrCollector, err := createAndRegisterMetricsCollectors(cfg.MetricsConfig.Enabled, cfg.GatewayClassName)
if err != nil {
return fmt.Errorf("cannot create and register metrics collectors: %w", err)
}

statusUpdater := status.NewUpdater(status.UpdaterConfig{
Expand Down Expand Up @@ -356,19 +352,26 @@ func setInitialConfig(
return updateControlPlane(&config, logger, eventRecorder, configName, logLevelSetter)
}

func configureNginxMetrics(gatewayClassName string) (*nkgmetrics.ManagerMetricsCollector, error) {
constLabels := map[string]string{"class": gatewayClassName}
// createAndRegisterMetricsCollectors creates the NGINX status and NGINX runtime manager collectors, registers them,
// and returns the runtime manager collector to be used in the nginxRuntimeMgr.
func createAndRegisterMetricsCollectors(metricsEnabled bool, gwClassName string) (ngxruntime.ManagerCollector, error) {
if !metricsEnabled {
// return a no-op collector to avoid nil pointer errors when metrics are disabled
return nkgmetrics.NewManagerNoopCollector(), nil
}
constLabels := map[string]string{"class": gwClassName}

ngxCollector, err := nkgmetrics.NewNginxMetricsCollector(constLabels)
if err != nil {
return nil, fmt.Errorf("cannot get NGINX metrics: %w", err)
return nil, fmt.Errorf("cannot create NGINX status metrics collector: %w", err)
}
mgrCollector := nkgmetrics.NewManagerMetricsCollector(constLabels)
if err = metrics.Registry.Register(mgrCollector); err != nil {
return nil, fmt.Errorf("failed to register manager metrics collector: %w", err)
if err := metrics.Registry.Register(ngxCollector); err != nil {
return nil, fmt.Errorf("failed to register NGINX status metrics collector: %w", err)
}

if err = metrics.Registry.Register(ngxCollector); err != nil {
return nil, fmt.Errorf("failed to register NGINX metrics collector: %w", err)
mgrCollector := nkgmetrics.NewManagerMetricsCollector(constLabels)
if err := metrics.Registry.Register(mgrCollector); err != nil {
return nil, fmt.Errorf("failed to register NGINX manager runtime metrics collector: %w", err)
}

return mgrCollector, nil
Expand Down
32 changes: 16 additions & 16 deletions internal/mode/static/metrics/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,21 @@ func NewManagerMetricsCollector(constLabels map[string]string) *ManagerMetricsCo
Namespace: metricsNamespace,
Help: "Duration in milliseconds of NGINX reloads",
ConstLabels: constLabels,
Buckets: []float64{100.0, 200.0, 300.0, 400.0, 500.0},
Buckets: []float64{500, 1000, 5000, 10000, 30000},
},
),
}
return nc
}

// IncNginxReloadCount increments the counter of successful NGINX reloads and sets the stale config status to false.
func (mc *ManagerMetricsCollector) IncNginxReloadCount() {
func (mc *ManagerMetricsCollector) IncReloadCount() {
mc.reloadsTotal.Inc()
mc.updateConfigStaleStatus(false)
}

// IncNginxReloadErrors increments the counter of NGINX reload errors and sets the stale config status to true.
func (mc *ManagerMetricsCollector) IncNginxReloadErrors() {
func (mc *ManagerMetricsCollector) IncReloadErrors() {
mc.reloadsError.Inc()
mc.updateConfigStaleStatus(true)
}
Expand All @@ -75,8 +75,8 @@ func (mc *ManagerMetricsCollector) updateConfigStaleStatus(stale bool) {
mc.configStale.Set(status)
}

// UpdateLastReloadTime updates the last NGINX reload time.
func (mc *ManagerMetricsCollector) UpdateLastReloadTime(duration time.Duration) {
// ObserveLastReloadTime adds the last NGINX reload time to the histogram.
func (mc *ManagerMetricsCollector) ObserveLastReloadTime(duration time.Duration) {
mc.reloadsDuration.Observe(float64(duration / time.Millisecond))
}

Expand All @@ -96,20 +96,20 @@ func (mc *ManagerMetricsCollector) Collect(ch chan<- prometheus.Metric) {
mc.reloadsDuration.Collect(ch)
}

// ManagerFakeCollector is a fake collector that will implement ManagerCollector interface.
// ManagerNoopCollector is a no-op collector that will implement ManagerCollector interface.
// Used to initialize the ManagerCollector when metrics are disabled to avoid nil pointer errors.
type ManagerFakeCollector struct{}
type ManagerNoopCollector struct{}

// NewManagerFakeCollector creates a fake collector that implements ManagerCollector interface.
func NewManagerFakeCollector() *ManagerFakeCollector {
return &ManagerFakeCollector{}
// NewManagerNoopCollector creates a no-op collector that implements ManagerCollector interface.
func NewManagerNoopCollector() *ManagerNoopCollector {
return &ManagerNoopCollector{}
}

// IncNginxReloadCount implements a fake IncNginxReloadCount.
func (mc *ManagerFakeCollector) IncNginxReloadCount() {}
// IncReloadCount implements a no-op IncReloadCount.
func (mc *ManagerNoopCollector) IncReloadCount() {}

// IncNginxReloadErrors implements a fake IncNginxReloadErrors.
func (mc *ManagerFakeCollector) IncNginxReloadErrors() {}
// IncReloadErrors implements a no-op IncReloadErrors.
func (mc *ManagerNoopCollector) IncReloadErrors() {}

// UpdateLastReloadTime implements a fake UpdateLastReloadTime.
func (mc *ManagerFakeCollector) UpdateLastReloadTime(_ time.Duration) {}
// ObserveLastReloadTime implements a no-op ObserveLastReloadTime.
func (mc *ManagerNoopCollector) ObserveLastReloadTime(_ time.Duration) {}
26 changes: 13 additions & 13 deletions internal/mode/static/nginx/runtime/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,27 +43,27 @@ type Manager interface {

// ManagerCollector is an interface for the metrics of the NGINX runtime manager.
type ManagerCollector interface {
IncNginxReloadCount()
IncNginxReloadErrors()
UpdateLastReloadTime(ms time.Duration)
IncReloadCount()
IncReloadErrors()
ObserveLastReloadTime(ms time.Duration)
}

// ManagerImpl implements Manager.
type ManagerImpl struct {
verifyClient *verifyClient
metricsCollector ManagerCollector
managerCollector ManagerCollector
}

// NewManagerImpl creates a new ManagerImpl.
func NewManagerImpl(mgrCollector ManagerCollector) *ManagerImpl {
func NewManagerImpl(managerCollector ManagerCollector) *ManagerImpl {
return &ManagerImpl{
verifyClient: newVerifyClient(nginxReloadTimeout),
metricsCollector: mgrCollector,
managerCollector: managerCollector,
}
}

func (m *ManagerImpl) Reload(ctx context.Context, configVersion int) error {
t1 := time.Now()
start := time.Now()
// We find the main NGINX PID on every reload because it will change if the NGINX container is restarted.
pid, err := findMainProcess(ctx, os.Stat, os.ReadFile, pidFileTimeout)
if err != nil {
Expand All @@ -79,7 +79,7 @@ func (m *ManagerImpl) Reload(ctx context.Context, configVersion int) error {
// send HUP signal to the NGINX main process reload configuration
// See https://nginx.org/en/docs/control.html
if err := syscall.Kill(pid, syscall.SIGHUP); err != nil {
m.metricsCollector.IncNginxReloadErrors()
m.managerCollector.IncReloadErrors()
return fmt.Errorf("failed to send the HUP signal to NGINX main: %w", err)
}

Expand All @@ -90,18 +90,18 @@ func (m *ManagerImpl) Reload(ctx context.Context, configVersion int) error {
os.ReadFile,
childProcsTimeout,
); err != nil {
m.metricsCollector.IncNginxReloadErrors()
m.managerCollector.IncReloadErrors()
return fmt.Errorf(noNewWorkersErrFmt, configVersion, err)
}

if err = m.verifyClient.waitForCorrectVersion(ctx, configVersion); err != nil {
m.metricsCollector.IncNginxReloadErrors()
m.managerCollector.IncReloadErrors()
return err
}
m.metricsCollector.IncNginxReloadCount()
m.managerCollector.IncReloadCount()

t2 := time.Now()
m.metricsCollector.UpdateLastReloadTime(t2.Sub(t1))
finish := time.Now()
m.managerCollector.ObserveLastReloadTime(finish.Sub(start))
return nil
}

Expand Down

0 comments on commit 46fc72f

Please sign in to comment.