-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add file watch to support config reload on file change #4454
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -18,12 +18,17 @@ import ( | |||
"context" | ||||
"errors" | ||||
"fmt" | ||||
"os" | ||||
"time" | ||||
|
||||
"go.opentelemetry.io/collector/config" | ||||
) | ||||
|
||||
var errNoOp = errors.New("no change") | ||||
|
||||
type fileMapProvider struct { | ||||
fileName string | ||||
watching bool | ||||
} | ||||
|
||||
// NewFile returns a new Provider that reads the configuration from the given file. | ||||
|
@@ -33,7 +38,7 @@ func NewFile(fileName string) Provider { | |||
} | ||||
} | ||||
|
||||
func (fmp *fileMapProvider) Retrieve(_ context.Context, _ func(*ChangeEvent)) (Retrieved, error) { | ||||
func (fmp *fileMapProvider) Retrieve(ctx context.Context, onChange func(*ChangeEvent)) (Retrieved, error) { | ||||
if fmp.fileName == "" { | ||||
return nil, errors.New("config file not specified") | ||||
} | ||||
|
@@ -43,9 +48,65 @@ func (fmp *fileMapProvider) Retrieve(_ context.Context, _ func(*ChangeEvent)) (R | |||
return nil, fmt.Errorf("error loading config file %q: %w", fmp.fileName, err) | ||||
} | ||||
|
||||
// Ensure that watches are only created once as Retrieve is called each time that `onChange()` is invoked. | ||||
if !fmp.watching { | ||||
watchFile(ctx, fmp.fileName, onChange) | ||||
fmp.watching = true | ||||
} | ||||
|
||||
return &simpleRetrieved{confMap: cp}, nil | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this will work as expected by the
|
||||
} | ||||
|
||||
func (*fileMapProvider) Shutdown(context.Context) error { | ||||
func (*fileMapProvider) Shutdown(_ context.Context) error { | ||||
return nil | ||||
} | ||||
|
||||
// watchFile sets up a watch on a filename to detect changes and calls onChange() with a suitable ChangeEvent. | ||||
// The watch is cancelled when the context is done. | ||||
func watchFile(ctx context.Context, filename string, onChange func(*ChangeEvent)) { | ||||
var ( | ||||
lastfi os.FileInfo | ||||
) | ||||
|
||||
check := func() error { | ||||
currfi, err := os.Stat(filename) | ||||
modified := false | ||||
if err != nil { | ||||
if os.IsNotExist(err) && lastfi != nil { | ||||
return errNoOp | ||||
} | ||||
return err | ||||
} | ||||
if lastfi != nil && (currfi.Size() != lastfi.Size() || !currfi.ModTime().Equal(lastfi.ModTime())) { | ||||
modified = true | ||||
} | ||||
|
||||
lastfi = currfi | ||||
if modified { | ||||
return nil | ||||
} | ||||
|
||||
return errNoOp | ||||
} | ||||
|
||||
// Check the file every second and if it's been updated then initiate a config reload. | ||||
go func() { | ||||
ticker := time.NewTicker(time.Second) | ||||
defer ticker.Stop() | ||||
for { | ||||
select { | ||||
case <-ctx.Done(): | ||||
return | ||||
case <-ticker.C: | ||||
// If check returns a valid event, exit the loop. A new watch will be placed on the next Retrieve() | ||||
err := check() | ||||
if err == nil || err != errNoOp { | ||||
time.Sleep(time.Second * 2) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: this will result in reading the file 2 seconds after the first modification. A slightly better approach is to wait 2 seconds after the last modification. The difference is small but may be visible if we have small writers. It is probably fine for now. |
||||
onChange(&ChangeEvent{ | ||||
Error: err, | ||||
}) | ||||
} | ||||
} | ||||
} | ||||
}() | ||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// Copyright The OpenTelemetry Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package configmapprovider | ||
|
||
import ( | ||
"context" | ||
"io/ioutil" | ||
"os" | ||
"sync/atomic" | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestWatchFile(t *testing.T) { | ||
// Create a temporary file. | ||
file, err := ioutil.TempFile("", "file_watcher_test") | ||
require.NoError(t, err) | ||
defer func() { | ||
file.Close() | ||
os.Remove(file.Name()) | ||
}() | ||
|
||
received := atomic.Value{} | ||
received.Store(false) | ||
|
||
// Write some initial content. | ||
_, err = file.WriteString("hello") | ||
require.NoError(t, err) | ||
require.NoError(t, file.Sync()) | ||
|
||
// Setup the watch. | ||
ctx, cancel := context.WithCancel(context.Background()) | ||
defer cancel() | ||
watchFile(ctx, file.Name(), testingOnChange(t, &received)) | ||
time.Sleep(time.Second * 2) | ||
|
||
// Update the file and verify we see the updated content. | ||
_, err = file.WriteString(" world") | ||
require.NoError(t, err) | ||
require.NoError(t, file.Sync()) | ||
|
||
require.Eventually(t, func() bool { | ||
return received.Load().(bool) | ||
}, time.Second*10, time.Second) | ||
|
||
// Cancel the context. | ||
cancel() | ||
} | ||
|
||
func TestWatchFile_ReloadError(t *testing.T) { | ||
// Create then delete a temporary file so we have a filename that we know can't be opened. | ||
file, err := ioutil.TempFile("", "file_watcher_test") | ||
require.NoError(t, err) | ||
_ = file.Close() | ||
require.NoError(t, os.Remove(file.Name())) | ||
|
||
ctx, cancel := context.WithCancel(context.Background()) | ||
defer cancel() | ||
watchFile(ctx, file.Name(), func(event *ChangeEvent) {}) | ||
} | ||
|
||
func testingOnChange(t *testing.T, r *atomic.Value) func(c *ChangeEvent) { | ||
return func(c *ChangeEvent) { | ||
require.Nil(t, c.Error) | ||
r.Store(true) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,36 +24,54 @@ import ( | |
) | ||
|
||
type configWatcher struct { | ||
cfg *config.Config | ||
ret configmapprovider.Retrieved | ||
ctx context.Context | ||
set CollectorSettings | ||
watcher chan error | ||
ret configmapprovider.Retrieved | ||
} | ||
|
||
func newConfigWatcher(ctx context.Context, set CollectorSettings) (*configWatcher, error) { | ||
cm := &configWatcher{watcher: make(chan error, 1)} | ||
func newConfigWatcher(ctx context.Context, set CollectorSettings) *configWatcher { | ||
cm := &configWatcher{ | ||
ctx: ctx, | ||
watcher: make(chan error, 1), | ||
set: set, | ||
} | ||
|
||
return cm | ||
} | ||
|
||
ret, err := set.ConfigMapProvider.Retrieve(ctx, cm.onChange) | ||
func (cm *configWatcher) get() (*config.Config, error) { | ||
// Ensure that a previously existing Retrieved is closed out properly. | ||
if cm.ret != nil { | ||
err := cm.ret.Close(cm.ctx) | ||
if err != nil { | ||
return nil, fmt.Errorf("cannot close previously retrieved config: %w", err) | ||
} | ||
} | ||
|
||
var ( | ||
cfg *config.Config | ||
err error | ||
) | ||
cm.ret, err = cm.set.ConfigMapProvider.Retrieve(cm.ctx, cm.onChange) | ||
if err != nil { | ||
return nil, fmt.Errorf("cannot retrieve the configuration: %w", err) | ||
} | ||
|
||
var cfg *config.Config | ||
m, err := ret.Get(ctx) | ||
m, err := cm.ret.Get(cm.ctx) | ||
if err != nil { | ||
return nil, fmt.Errorf("cannot get the configuration: %w", err) | ||
} | ||
if cfg, err = set.ConfigUnmarshaler.Unmarshal(m, set.Factories); err != nil { | ||
|
||
if cfg, err = cm.set.ConfigUnmarshaler.Unmarshal(m, cm.set.Factories); err != nil { | ||
return nil, fmt.Errorf("cannot unmarshal the configuration: %w", err) | ||
} | ||
|
||
if err = cfg.Validate(); err != nil { | ||
return nil, fmt.Errorf("invalid configuration: %w", err) | ||
} | ||
|
||
cm.cfg = cfg | ||
cm.ret = ret | ||
|
||
return cm, nil | ||
return cfg, nil | ||
} | ||
|
||
func (cm *configWatcher) onChange(event *configmapprovider.ChangeEvent) { | ||
|
@@ -62,7 +80,6 @@ func (cm *configWatcher) onChange(event *configmapprovider.ChangeEvent) { | |
} | ||
} | ||
|
||
func (cm *configWatcher) close(ctx context.Context) error { | ||
func (cm *configWatcher) close() { | ||
close(cm.watcher) | ||
return cm.ret.Close(ctx) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this deleted? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i have modified the code flow in a way that the config watcher is only created once in the lifecycle of the collector as compared to how it was originally implemented where a config watcher is created per change in the config file. once that change was made, it didnt make sense to close the retrieved and pass the error down. i moved that logic into a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The new rearranged code is harder to follow and understand. Please refactor it to clearly show that the code follows lifecycle described in the
It was more visible before this change, admittedly it was not ideal but was better than what we have now. Now it is even harder to see that we are actually following the required lifecycle. All the current loop in runAndWaitForShutdownEvent shows is a watch, followed by |
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the purpose of this? It does't seem to be set anywhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed this one. this is to ensure that we create a watch only once as
Retrieve
is called each time theonChange
is invoked during a file change.