Skip to content

Commit

Permalink
Added cleanup if service errors on Start (#6351)
Browse files Browse the repository at this point in the history
* Added cleanup if service errors on Start

Signed-off-by: Corbin Phelps <[email protected]>

* Added issue to changelog

Signed-off-by: Corbin Phelps <[email protected]>

* Added a test to verify the reusablility of telemetryInitializer in services

Signed-off-by: Corbin Phelps <[email protected]>

* Moved telemetryInitializer shutdown out of service shutdown

Signed-off-by: Corbin Phelps <[email protected]>

* Fixed comment

Signed-off-by: Corbin Phelps <[email protected]>

* Bundled service shutdown and telemetry into a helper function in the collector

Signed-off-by: Corbin Phelps <[email protected]>

* PR Fixups

Signed-off-by: Corbin Phelps <[email protected]>

* Update service/collector.go

Co-authored-by: Bogdan Drutu <[email protected]>

Signed-off-by: Corbin Phelps <[email protected]>
Co-authored-by: Bogdan Drutu <[email protected]>
  • Loading branch information
cpheps and Bogdan Drutu authored Oct 26, 2022
1 parent 36d142f commit 67bdf67
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 5 deletions.
16 changes: 16 additions & 0 deletions .chloggen/service-cleanup-on-start-error.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
component: collector

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fixed collector service not cleaning up if it failed during Start

# One or more tracking issues or pull requests related to the change
issues: [6352]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
21 changes: 16 additions & 5 deletions service/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func (col *Collector) setupConfigurationComponents(ctx context.Context) error {
}

if err = col.service.Start(ctx); err != nil {
return err
return multierr.Append(err, col.shutdownServiceAndTelemetry(ctx))
}
col.setCollectorState(Running)
return nil
Expand Down Expand Up @@ -219,17 +219,28 @@ func (col *Collector) shutdown(ctx context.Context) error {
errs = multierr.Append(errs, fmt.Errorf("failed to shutdown config provider: %w", err))
}

errs = multierr.Append(errs, col.shutdownServiceAndTelemetry(ctx))

col.setCollectorState(Closed)

return errs
}

// shutdownServiceAndTelemetry bundles shutting down the service and telemetryInitializer.
// Returned error will be in multierr form and wrapped.
func (col *Collector) shutdownServiceAndTelemetry(ctx context.Context) error {
var errs error

// shutdown service
if err := col.service.Shutdown(ctx); err != nil {
errs = multierr.Append(errs, fmt.Errorf("failed to shutdown service: %w", err))
errs = multierr.Append(errs, fmt.Errorf("failed to shutdown service after error: %w", err))
}

// TODO: Move this as part of the service shutdown.
// shutdown telemetryInitializer
if err := col.service.telemetryInitializer.shutdown(); err != nil {
errs = multierr.Append(errs, fmt.Errorf("failed to shutdown collector telemetry: %w", err))
}

col.setCollectorState(Closed)

return errs
}

Expand Down
1 change: 1 addition & 0 deletions service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ func newService(set *settings) (*service, error) {
return srv, nil
}

// Start starts the extensions and pipelines. If Start fails Shutdown should be called to ensure a clean state.
func (srv *service) Start(ctx context.Context) error {
srv.telemetrySettings.Logger.Info("Starting "+srv.buildInfo.Command+"...",
zap.String("Version", srv.buildInfo.Version),
Expand Down
67 changes: 67 additions & 0 deletions service/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ package service

import (
"context"
"fmt"
"net/http"
"path/filepath"
"testing"

Expand Down Expand Up @@ -137,6 +139,71 @@ func TestServiceTelemetryCleanupOnError(t *testing.T) {

}

// TestServiceTelemetryReusable tests that a single telemetryInitializer can be reused in multiple services
func TestServiceTelemetryReusable(t *testing.T) {
factories, err := componenttest.NopFactories()
require.NoError(t, err)

// Read valid yaml config from file
validProvider, err := NewConfigProvider(newDefaultConfigProviderSettings([]string{filepath.Join("testdata", "otelcol-nop.yaml")}))
require.NoError(t, err)
validCfg, err := validProvider.Get(context.Background(), factories)
require.NoError(t, err)

// Create a service
telemetry := newColTelemetry(featuregate.NewRegistry())
// For safety ensure everything is cleaned up
t.Cleanup(func() {
assert.NoError(t, telemetry.shutdown())
})

srvOne, err := newService(&settings{
BuildInfo: component.NewDefaultBuildInfo(),
Factories: factories,
Config: validCfg,
telemetry: telemetry,
})
require.NoError(t, err)

// URL of the telemetry service metrics endpoint
telemetryURL := fmt.Sprintf("http://%s/metrics", telemetry.server.Addr)

// Start the service
require.NoError(t, srvOne.Start(context.Background()))

// check telemetry server to ensure we get a response
var resp *http.Response

// #nosec G107
resp, err = http.Get(telemetryURL)
assert.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)

// Shutdown the service
require.NoError(t, srvOne.Shutdown(context.Background()))

// Create a new service with the same telemetry
srvTwo, err := newService(&settings{
BuildInfo: component.NewDefaultBuildInfo(),
Factories: factories,
Config: validCfg,
telemetry: telemetry,
})
require.NoError(t, err)

// Start the new service
require.NoError(t, srvTwo.Start(context.Background()))

// check telemetry server to ensure we get a response
// #nosec G107
resp, err = http.Get(telemetryURL)
assert.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)

// Shutdown the new service
require.NoError(t, srvTwo.Shutdown(context.Background()))
}

func createExampleService(t *testing.T, factories component.Factories) *service {
// Read yaml config from file
prov, err := NewConfigProvider(newDefaultConfigProviderSettings([]string{filepath.Join("testdata", "otelcol-nop.yaml")}))
Expand Down

0 comments on commit 67bdf67

Please sign in to comment.