-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Allow filebeat to only run once #2456
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
package beater | ||
|
||
import ( | ||
"flag" | ||
"fmt" | ||
"sync" | ||
|
||
|
@@ -15,11 +16,12 @@ import ( | |
"github.com/elastic/beats/filebeat/spooler" | ||
) | ||
|
||
var once = flag.Bool("once", false, "Run filebeat only once until all harvesters reach EOF") | ||
|
||
// Filebeat is a beater object. Contains all objects needed to run the beat | ||
type Filebeat struct { | ||
config *cfg.Config | ||
sigWait *signalWait | ||
done chan struct{} | ||
config *cfg.Config | ||
done chan struct{} | ||
} | ||
|
||
// New creates a new Filebeat pointer instance. | ||
|
@@ -33,9 +35,8 @@ func New(b *beat.Beat, rawConfig *common.Config) (beat.Beater, error) { | |
} | ||
|
||
fb := &Filebeat{ | ||
done: make(chan struct{}), | ||
sigWait: newSignalWait(), | ||
config: &config, | ||
done: make(chan struct{}), | ||
config: &config, | ||
} | ||
return fb, nil | ||
} | ||
|
@@ -45,13 +46,12 @@ func (fb *Filebeat) Run(b *beat.Beat) error { | |
var err error | ||
config := fb.config | ||
|
||
var wgEvents *sync.WaitGroup // count active events for waiting on shutdown | ||
var finishedLogger publisher.SuccessLogger | ||
waitFinished := newSignalWait() | ||
waitEvents := newSignalWait() | ||
|
||
if fb.config.ShutdownTimeout > 0 { | ||
wgEvents = &sync.WaitGroup{} | ||
finishedLogger = newFinishedLogger(wgEvents) | ||
} | ||
// count active events for waiting on shutdown | ||
wgEvents := &sync.WaitGroup{} | ||
finishedLogger := newFinishedLogger(wgEvents) | ||
|
||
// Setup registrar to persist state | ||
registrar, err := registrar.New(config.RegistryFile, finishedLogger) | ||
|
@@ -60,13 +60,14 @@ func (fb *Filebeat) Run(b *beat.Beat) error { | |
return err | ||
} | ||
|
||
// Channel from harvesters to spooler | ||
successLogger := newRegistrarLogger(registrar) | ||
// Make sure all events that were published in | ||
registrarChannel := newRegistrarLogger(registrar) | ||
|
||
// Channel from spooler to harvester | ||
publisherChan := newPublisherChannel() | ||
|
||
// Publishes event to output | ||
publisher := publisher.New(config.PublishAsync, | ||
publisherChan.ch, successLogger, b.Publisher) | ||
publisher := publisher.New(config.PublishAsync, publisherChan.ch, registrarChannel, b.Publisher) | ||
|
||
// Init and Start spooler: Harvesters dump events into the spooler. | ||
spooler, err := spooler.New(config, publisherChan) | ||
|
@@ -75,9 +76,7 @@ func (fb *Filebeat) Run(b *beat.Beat) error { | |
return err | ||
} | ||
|
||
crawler, err := crawler.New( | ||
newSpoolerOutlet(fb.done, spooler, wgEvents), | ||
config.Prospectors) | ||
crawler, err := crawler.New(newSpoolerOutlet(fb.done, spooler, wgEvents), config.Prospectors) | ||
if err != nil { | ||
logp.Err("Could not init crawler: %v", err) | ||
return err | ||
|
@@ -98,30 +97,45 @@ func (fb *Filebeat) Run(b *beat.Beat) error { | |
// Start publisher | ||
publisher.Start() | ||
// Stopping publisher (might potentially drop items) | ||
defer publisher.Stop() | ||
defer successLogger.Close() | ||
defer func() { | ||
// Closes first the registrar logger to make sure not more events arrive at the registrar | ||
// registrarChannel must be closed first to potentially unblock (pretty unlikely) the publisher | ||
registrarChannel.Close() | ||
publisher.Stop() | ||
}() | ||
|
||
// Starting spooler | ||
spooler.Start() | ||
|
||
// Stopping spooler will flush items | ||
defer func() { | ||
// With harvesters being stopped, optionally wait for all enqueued events being | ||
// published and written by registrar before continuing shutdown. | ||
fb.sigWait.Wait() | ||
// Wait for all events to be processed or timeout | ||
waitEvents.Wait() | ||
|
||
// continue shutdown | ||
// Closes publisher so no further events can be sent | ||
publisherChan.Close() | ||
// Stopping spooler | ||
spooler.Stop() | ||
}() | ||
|
||
err = crawler.Start(registrar.GetStates()) | ||
err = crawler.Start(registrar.GetStates(), *once) | ||
if err != nil { | ||
return err | ||
} | ||
// Blocks progressing. As soon as channel is closed, all defer statements come into play | ||
|
||
<-fb.done | ||
// If run once, add crawler completion check as alternative to done signal | ||
if *once { | ||
runOnce := func() { | ||
logp.Info("Running filebeat once. Waiting for completion ...") | ||
crawler.WaitForCompletion() | ||
logp.Info("All data collection completed. Shutting down.") | ||
} | ||
waitFinished.Add(runOnce) | ||
} | ||
|
||
// Add done channel to wait for shutdown signal | ||
waitFinished.AddChan(fb.done) | ||
waitFinished.Wait() | ||
|
||
// Stop crawler -> stop prospectors -> stop harvesters | ||
// Note: waiting for crawlers to stop here in order to install wgEvents.Wait | ||
|
@@ -130,14 +144,18 @@ func (fb *Filebeat) Run(b *beat.Beat) error { | |
crawler.Stop() | ||
|
||
timeout := fb.config.ShutdownTimeout | ||
if timeout > 0 { | ||
logp.Info("Shutdown output timer started. Waiting for max %v.", timeout) | ||
|
||
// Wait for either timeout or all events having been ACKed by outputs. | ||
fb.sigWait.Add(withLog(wgEvents.Wait, | ||
// Checks if on shutdown it should wait for all events to be published | ||
waitPublished := fb.config.ShutdownTimeout > 0 || *once | ||
if waitPublished { | ||
// Wait for registrar to finish writing registry | ||
waitEvents.Add(withLog(wgEvents.Wait, | ||
"Continue shutdown: All enqueued events being published.")) | ||
fb.sigWait.Add(withLog(waitDuration(timeout), | ||
"Continue shutdown: Time out waiting for events being published.")) | ||
// Wait for either timeout or all events having been ACKed by outputs. | ||
if fb.config.ShutdownTimeout > 0 { | ||
logp.Info("Shutdown output timer started. Waiting for max %v.", timeout) | ||
waitEvents.Add(withLog(waitDuration(timeout), | ||
"Continue shutdown: Time out waiting for events being published.")) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add:
This will ensure CTRL-C will be handled immediately/properly if run-once without shutdown_timeout is configured. If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. Added. |
||
} | ||
|
||
return nil | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ type Prospector struct { | |
done chan struct{} | ||
states *file.States | ||
wg sync.WaitGroup | ||
channelWg sync.WaitGroup // Separate waitgroup for channels as not stopped on completion | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand the necessary for channelWg. why not reuse wg? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. channelWg is needed, because when only run once, the prospector does the wait() for the first run to be completed. When the prospector Run returns, channel are not shutdown yet and shouldn't because events are still processed. This only happens when prospector is closed with done. I see that a similar pattern could apply here that we did with the other channels, but we can do this in a follow up PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TBH I still not fully understand why this is required. But resource management just became somewhat more complex :( What bothers me is adding many async/sync code-blocks(utils) trying to manage something adding more and more complexity. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, complexity currently increases which is not good. I still hope as soon as we have the feature working as we want that we can reduce the complexity again through refactoring. |
||
harvesterCounter uint64 | ||
} | ||
|
||
|
@@ -50,6 +51,7 @@ func NewProspector(cfg *common.Config, states file.States, outlet Outlet) (*Pros | |
done: make(chan struct{}), | ||
states: states.Copy(), | ||
wg: sync.WaitGroup{}, | ||
channelWg: sync.WaitGroup{}, | ||
} | ||
|
||
if err := cfg.Unpack(&prospector.config); err != nil { | ||
|
@@ -101,16 +103,22 @@ func (p *Prospector) Init() error { | |
} | ||
|
||
// Starts scanning through all the file paths and fetch the related files. Start a harvester for each file | ||
func (p *Prospector) Run() { | ||
func (p *Prospector) Run(once bool) { | ||
|
||
logp.Info("Starting prospector of type: %v", p.config.InputType) | ||
p.wg.Add(2) | ||
defer p.wg.Done() | ||
|
||
// This waitgroup is not needed if run only once | ||
// Waitgroup has to be added here to prevent panic in case Stop is called immediately afterwards | ||
if !once { | ||
// Add waitgroup to make sure prospectors finished | ||
p.wg.Add(1) | ||
defer p.wg.Done() | ||
} | ||
// Open channel to receive events from harvester and forward them to spooler | ||
// Here potential filtering can happen | ||
p.channelWg.Add(1) | ||
go func() { | ||
defer p.wg.Done() | ||
defer p.channelWg.Done() | ||
for { | ||
select { | ||
case <-p.done: | ||
|
@@ -128,6 +136,13 @@ func (p *Prospector) Run() { | |
// Initial prospector run | ||
p.prospectorer.Run() | ||
|
||
// Shuts down after the first complete scan of all prospectors | ||
// As all harvesters are part of the prospector waitgroup, this waits for the closing of all harvesters | ||
if once { | ||
p.wg.Wait() | ||
return | ||
} | ||
|
||
for { | ||
select { | ||
case <-p.done: | ||
|
@@ -162,6 +177,7 @@ func (p *Prospector) updateState(event *input.Event) error { | |
func (p *Prospector) Stop() { | ||
logp.Info("Stopping Prospector") | ||
close(p.done) | ||
p.channelWg.Wait() | ||
p.wg.Wait() | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe add comment: registrarChannel must be closed first to potentially unblock (pretty unlikely) the publisher.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added