diff --git a/.chloggen/windows-service-respects-telemetry-config.yaml b/.chloggen/windows-service-respects-telemetry-config.yaml new file mode 100644 index 00000000000..312765f92d2 --- /dev/null +++ b/.chloggen/windows-service-respects-telemetry-config.yaml @@ -0,0 +1,25 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: 'bug_fix' + +# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver) +component: otelcol + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Respect telemetry configuration when running as a Windows service + +# One or more tracking issues or pull requests related to the change +issues: [5300] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/.github/workflows/build-and-test-windows.yaml b/.github/workflows/build-and-test-windows.yaml index c15649edeb7..8f70d45ae82 100644 --- a/.github/workflows/build-and-test-windows.yaml +++ b/.github/workflows/build-and-test-windows.yaml @@ -61,15 +61,16 @@ jobs: - name: Install otelcorecol as a service run: | - New-Service -Name "otelcorecol" -BinaryPathName "${PWD}\bin\otelcorecol_windows_amd64 --config ${PWD}\examples\local\otel-config.yaml" + New-Service -Name "otelcorecol" -StartupType "Manual" -BinaryPathName "${PWD}\bin\otelcorecol_windows_amd64 --config ${PWD}\examples\local\otel-config.yaml" eventcreate.exe /t information /id 1 /l application /d "Creating event provider for 'otelcorecol'" /so otelcorecol - name: Test otelcorecol service + working-directory: ${{ github.workspace }}/otelcol run: | - (Start-Service otelcorecol -PassThru).WaitForStatus('Running', '00:00:30') - (Stop-Service otelcorecol -PassThru).WaitForStatus('Stopped', '00:00:30') + go test -timeout 90s -run ^TestCollectorAsService$ -v -tags=win32service - name: Remove otelcorecol service + if: always() run: | Remove-Service otelcorecol Remove-Item HKLM:\SYSTEM\CurrentControlSet\Services\EventLog\Application\otelcorecol diff --git a/otelcol/collector.go b/otelcol/collector.go index 3fd5cb2ca31..a1500f5b81c 100644 --- a/otelcol/collector.go +++ b/otelcol/collector.go @@ -99,8 +99,9 @@ type Collector struct { configProvider ConfigProvider - service *service.Service - state *atomic.Int32 + serviceConfig *service.Config + service *service.Service + state *atomic.Int32 // shutdownChan is used to terminate the collector. shutdownChan chan struct{} @@ -182,6 +183,7 @@ func (col *Collector) setupConfigurationComponents(ctx context.Context) error { return fmt.Errorf("invalid configuration: %w", err) } + col.serviceConfig = &cfg.Service col.service, err = service.New(ctx, service.Settings{ BuildInfo: col.set.BuildInfo, CollectorConf: conf, diff --git a/otelcol/collector_windows.go b/otelcol/collector_windows.go index f443d930b17..cc0a3611628 100644 --- a/otelcol/collector_windows.go +++ b/otelcol/collector_windows.go @@ -18,6 +18,7 @@ import ( "golang.org/x/sys/windows/svc/eventlog" "go.opentelemetry.io/collector/featuregate" + "go.opentelemetry.io/collector/service" ) type windowsService struct { @@ -76,11 +77,6 @@ func (s *windowsService) Execute(args []string, requests <-chan svc.ChangeReques } func (s *windowsService) start(elog *eventlog.Log, colErrorChannel chan error) error { - // Append to new slice instead of the already existing s.settings.LoggingOptions slice to not change that. - s.settings.LoggingOptions = append( - []zap.Option{zap.WrapCore(withWindowsCore(elog))}, - s.settings.LoggingOptions..., - ) // Parse all the flags manually. if err := s.flags.Parse(os.Args[1:]); err != nil { return err @@ -96,6 +92,18 @@ func (s *windowsService) start(elog *eventlog.Log, colErrorChannel chan error) e return err } + // The logging options need to be in place before the collector Run method is called + // since the telemetry creates the logger at the time of the Run method call. + // However, the zap.WrapCore function needs to read the serviceConfig to determine + // if the Windows Event Log should be used, however, the serviceConfig is also + // only read at the time of the Run method call. To work around this, we pass the + // serviceConfig as a pointer to the logging options, and then read its value + // when the zap.Logger is created by the telemetry. + s.col.set.LoggingOptions = append( + s.col.set.LoggingOptions, + zap.WrapCore(withWindowsCore(elog, &s.col.serviceConfig)), + ) + // col.Run blocks until receiving a SIGTERM signal, so needs to be started // asynchronously, but it will exit early if an error occurs on startup go func() { @@ -192,8 +200,18 @@ func (w windowsEventLogCore) Sync() error { return w.core.Sync() } -func withWindowsCore(elog *eventlog.Log) func(zapcore.Core) zapcore.Core { +func withWindowsCore(elog *eventlog.Log, serviceConfig **service.Config) func(zapcore.Core) zapcore.Core { return func(core zapcore.Core) zapcore.Core { + if serviceConfig != nil { + for _, output := range (*serviceConfig).Telemetry.Logs.OutputPaths { + if output != "stdout" && output != "stderr" { + // A log file was specified in the configuration, so we should not use the Windows Event Log + return core + } + } + } + + // Use the Windows Event Log encoderConfig := zap.NewProductionEncoderConfig() encoderConfig.LineEnding = "\r\n" return windowsEventLogCore{core, elog, zapcore.NewConsoleEncoder(encoderConfig)} diff --git a/otelcol/collector_windows_service_test.go b/otelcol/collector_windows_service_test.go new file mode 100644 index 00000000000..999fa87bbbc --- /dev/null +++ b/otelcol/collector_windows_service_test.go @@ -0,0 +1,185 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//go:build windows && win32service + +package otelcol + +import ( + "encoding/xml" + "fmt" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + "github.com/stretchr/testify/require" + "golang.org/x/sys/windows/svc" + "golang.org/x/sys/windows/svc/mgr" +) + +const ( + collectorServiceName = "otelcorecol" +) + +// Test the collector as a Windows service. +// The test assumes that the service and respective event source are already created. +// The test also must be executed with administrative privileges. +func TestCollectorAsService(t *testing.T) { + collector_executable, err := filepath.Abs(filepath.Join("..", "bin", "otelcorecol_windows_amd64")) + require.NoError(t, err) + _, err = os.Stat(collector_executable) + require.NoError(t, err) + + scm, err := mgr.Connect() + require.NoError(t, err) + defer scm.Disconnect() + + service, err := scm.OpenService(collectorServiceName) + require.NoError(t, err) + defer service.Close() + + tests := []struct { + name string + configFile string + expectStartFailure bool + customSetup func(*testing.T) + customValidation func(*testing.T) + }{ + { + name: "Default", + configFile: filepath.Join("..", "examples", "local", "otel-config.yaml"), + }, + { + name: "ConfigFileNotFound", + configFile: filepath.Join(".", "non", "existent", "otel-config.yaml"), + expectStartFailure: true, + }, + { + name: "LogToFile", + configFile: filepath.Join(".", "testdata", "otel-log-to-file.yaml"), + customSetup: func(t *testing.T) { + // Create the folder and clean the log file if it exists + programDataPath := os.Getenv("ProgramData") + logsPath := filepath.Join(programDataPath, "OpenTelemetry", "Collector", "Logs") + err := os.MkdirAll(logsPath, os.ModePerm) + require.NoError(t, err) + + logFilePath := filepath.Join(logsPath, "otelcol.log") + err = os.Remove(logFilePath) + if err != nil && !os.IsNotExist(err) { + require.NoError(t, err) + } + }, + customValidation: func(t *testing.T) { + // Check that the log file was created + programDataPath := os.Getenv("ProgramData") + logsPath := filepath.Join(programDataPath, "OpenTelemetry", "Collector", "Logs") + logFilePath := filepath.Join(logsPath, "otelcol.log") + fileinfo, err := os.Stat(logFilePath) + require.NoError(t, err) + require.NotEmpty(t, fileinfo.Size()) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + serviceConfig, err := service.Config() + require.NoError(t, err) + + // Setup the command line to launch the collector as a service + fullConfigPath, err := filepath.Abs(tt.configFile) + require.NoError(t, err) + + serviceConfig.BinaryPathName = fmt.Sprintf("\"%s\" --config \"%s\"", collector_executable, fullConfigPath) + err = service.UpdateConfig(serviceConfig) + require.NoError(t, err) + + if tt.customSetup != nil { + tt.customSetup(t) + } + + startTime := time.Now() + + err = service.Start() + require.NoError(t, err) + + expectedState := svc.Running + if tt.expectStartFailure { + expectedState = svc.Stopped + } else { + defer func() { + _, err = service.Control(svc.Stop) + require.NoError(t, err) + + require.Eventually(t, func() bool { + status, _ := service.Query() + return status.State == svc.Stopped + }, 10*time.Second, 500*time.Millisecond) + }() + } + + // Wait for the service to reach the expected state + require.Eventually(t, func() bool { + status, _ := service.Query() + return status.State == expectedState + }, 10*time.Second, 500*time.Millisecond) + + if tt.customValidation != nil { + tt.customValidation(t) + } else { + // Read the events from the otelcorecol source and check that they were emitted after the service + // command started. This is a simple validation that the messages are being logged on the + // Windows event log. + cmd := exec.Command("wevtutil.exe", "qe", "Application", "/c:1", "/rd:true", "/f:RenderedXml", "/q:*[System[Provider[@Name='otelcorecol']]]") + out, err := cmd.CombinedOutput() + require.NoError(t, err) + + var e Event + require.NoError(t, xml.Unmarshal([]byte(out), &e)) + + eventTime, err := time.Parse("2006-01-02T15:04:05.9999999Z07:00", e.System.TimeCreated.SystemTime) + require.NoError(t, err) + + require.True(t, eventTime.After(startTime.In(time.UTC))) + } + }) + } +} + +// Helper types to read the XML events from the event log using wevtutil +type Event struct { + XMLName xml.Name `xml:"Event"` + System System `xml:"System"` + Data string `xml:"EventData>Data"` +} + +type System struct { + Provider Provider `xml:"Provider"` + EventID int `xml:"EventID"` + Version int `xml:"Version"` + Level int `xml:"Level"` + Task int `xml:"Task"` + Opcode int `xml:"Opcode"` + Keywords string `xml:"Keywords"` + TimeCreated TimeCreated `xml:"TimeCreated"` + EventRecordID int `xml:"EventRecordID"` + Execution Execution `xml:"Execution"` + Channel string `xml:"Channel"` + Computer string `xml:"Computer"` +} + +type Provider struct { + Name string `xml:"Name,attr"` +} + +type TimeCreated struct { + SystemTime string `xml:"SystemTime,attr"` +} + +type Execution struct { + ProcessID string `xml:"ProcessID,attr"` + ThreadID string `xml:"ThreadID,attr"` +} diff --git a/otelcol/testdata/otel-log-to-file.yaml b/otelcol/testdata/otel-log-to-file.yaml new file mode 100644 index 00000000000..e618aca22fc --- /dev/null +++ b/otelcol/testdata/otel-log-to-file.yaml @@ -0,0 +1,30 @@ +extensions: + zpages: + +receivers: + otlp: + protocols: + grpc: + http: + +exporters: + debug: + verbosity: detailed + +service: + telemetry: + logs: + level: warn + output_paths: + # The folder need to be created prior to starting the collector + - ${ProgramData}\OpenTelemetry\Collector\Logs\otelcol.log + + pipelines: + traces: + receivers: [otlp] + exporters: [debug] + metrics: + receivers: [otlp] + exporters: [debug] + + extensions: [zpages]