Skip to content

Commit

Permalink
feat: add the ability to log queries killed by query-timeout (#23978)…
Browse files Browse the repository at this point in the history
… (#24048)

* feat: add the ability to log queries killed by `query-timeout`

* chore: update example config

* chore: improve logging details
  • Loading branch information
jeffreyssmith2nd authored Jan 20, 2023
1 parent 7c53a4c commit d228760
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 2 deletions.
7 changes: 5 additions & 2 deletions cmd/influxd/run/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ func NewServer(c *Config, buildInfo *BuildInfo) (*Server, error) {
s.QueryExecutor.TaskManager.QueryTimeout = time.Duration(c.Coordinator.QueryTimeout)
s.QueryExecutor.TaskManager.LogQueriesAfter = time.Duration(c.Coordinator.LogQueriesAfter)
s.QueryExecutor.TaskManager.MaxConcurrentQueries = c.Coordinator.MaxConcurrentQueries
s.QueryExecutor.TaskManager.LogTimedoutQueries = c.Coordinator.LogTimedOutQueries

// Initialize the monitor
s.Monitor.Version = s.buildInfo.Version
Expand Down Expand Up @@ -465,8 +466,10 @@ func (s *Server) Open() error {
s.TSDBStore.WithLogger(s.Logger)
if s.config.Data.QueryLogEnabled {
s.QueryExecutor.WithLogger(s.Logger)
} else if s.config.Coordinator.LogQueriesAfter > 0 {
// Log long-running queries even if not logging all queries
} else if s.config.Coordinator.LogQueriesAfter > 0 || s.config.Coordinator.LogTimedOutQueries {
// If we need to do any logging, add a logger.
// The TaskManager properly handles both of the above configs
// so it only logs as is appropriate.
s.QueryExecutor.TaskManager.Logger = s.Logger
}
s.PointsWriter.WithLogger(s.Logger)
Expand Down
3 changes: 3 additions & 0 deletions coordinator/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type Config struct {
MaxConcurrentQueries int `toml:"max-concurrent-queries"`
QueryTimeout toml.Duration `toml:"query-timeout"`
LogQueriesAfter toml.Duration `toml:"log-queries-after"`
LogTimedOutQueries bool `toml:"log-timedout-queries"`
MaxSelectPointN int `toml:"max-select-point"`
MaxSelectSeriesN int `toml:"max-select-series"`
MaxSelectBucketsN int `toml:"max-select-buckets"`
Expand All @@ -48,6 +49,7 @@ func NewConfig() Config {
MaxSelectPointN: DefaultMaxSelectPointN,
MaxSelectSeriesN: DefaultMaxSelectSeriesN,
TerminationQueryLog: false,
LogTimedOutQueries: false,
}
}

Expand All @@ -58,6 +60,7 @@ func (c Config) Diagnostics() (*diagnostics.Diagnostics, error) {
"max-concurrent-queries": c.MaxConcurrentQueries,
"query-timeout": c.QueryTimeout,
"log-queries-after": c.LogQueriesAfter,
"log-timedout-queries": c.LogTimedOutQueries,
"max-select-point": c.MaxSelectPointN,
"max-select-series": c.MaxSelectSeriesN,
"max-select-buckets": c.MaxSelectBucketsN,
Expand Down
3 changes: 3 additions & 0 deletions etc/config.sample.toml
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@
# discover slow or resource intensive queries. Setting the value to 0 disables the slow query logging.
# log-queries-after = "0s"

# Enables the logging of queries that are killed as a result of exceeding `query-timeout`
# log-timedout-queries = false

# The maximum number of points a SELECT can process. A value of 0 will make
# the maximum point count unlimited. This will only be checked every second so queries will not
# be aborted immediately when hitting the limit.
Expand Down
11 changes: 11 additions & 0 deletions query/task_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ type TaskManager struct {
// If zero, slow queries will never be logged.
LogQueriesAfter time.Duration

// If true, queries that are killed due to `query-timeout` will be logged.
LogTimedoutQueries bool

// Maximum number of concurrent queries.
MaxConcurrentQueries int

Expand Down Expand Up @@ -315,6 +318,14 @@ func (t *TaskManager) waitForQuery(qid uint64, interrupt <-chan struct{}, closin

t.queryError(qid, err)
case <-timerCh:
if t.LogTimedoutQueries {
t.Logger.Warn(
"query killed for exceeding timeout limit",
zap.String("query", t.queries[qid].query),
zap.String("database", t.queries[qid].database),
zap.String("timeout", prettyTime(t.QueryTimeout).String()),
)
}
t.queryError(qid, ErrQueryTimeoutLimitExceeded)
case <-interrupt:
// Query was manually closed so exit the select.
Expand Down

0 comments on commit d228760

Please sign in to comment.