From 7ae1161c8d5b1b7efb22fe14dba968a49d943ac8 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Wed, 26 May 2021 12:14:08 -0400 Subject: [PATCH 1/2] Add optional configuration values for scheduler max_threads and misfire_grace_time --- docs/source/elastalert.rst | 4 ++++ elastalert/elastalert.py | 11 ++++++++++- elastalert/schema.yaml | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/docs/source/elastalert.rst b/docs/source/elastalert.rst index bdd1071e..f5884b97 100755 --- a/docs/source/elastalert.rst +++ b/docs/source/elastalert.rst @@ -164,6 +164,8 @@ it ran the query for a given rule, and periodically query from that time until t this field is a nested unit of time, such as ``minutes: 5``. This is how time is defined in every ElastAlert configuration. +``misfire_grace_time``: If the rule scheduler is running behind, due to large numbers of rules or long-running rules, this grace time settings allows a rule to still be executed, provided its next scheduled runt time is no more than this grace period, in seconds, overdue. The default is 5 seconds. + ``writeback_index``: The index on ``es_host`` to use. ``max_query_size``: The maximum number of documents that will be downloaded from Elasticsearch in a single query. The @@ -174,6 +176,8 @@ using the size of ``max_query_size`` through the set amount of pages, when ``max ``max_scrolling_count``: The maximum amount of pages to scroll through. The default is ``0``, which means the scrolling has no limit. For example if this value is set to ``5`` and the ``max_query_size`` is set to ``10000`` then ``50000`` documents will be downloaded at most. +``max_threads``: The maximum number of concurrent threads available to process scheduled rules. Large numbers of long-running rules may require this value be increased, though this could overload the Elasticsearch cluster if too many complex queries are running concurrently. Default is 10. + ``scroll_keepalive``: The maximum time (formatted in `Time Units `_) the scrolling context should be kept alive. Avoid using high values as it abuses resources in Elasticsearch, but be mindful to allow sufficient time to finish processing all the results. ``max_aggregation``: The maximum number of alerts to aggregate together. If a rule has ``aggregation`` set, all diff --git a/elastalert/elastalert.py b/elastalert/elastalert.py index d9a4a4e7..e30f991c 100755 --- a/elastalert/elastalert.py +++ b/elastalert/elastalert.py @@ -22,6 +22,7 @@ import dateutil.tz import pytz from apscheduler.schedulers.background import BackgroundScheduler +from apscheduler.executors.pool import ThreadPoolExecutor from croniter import croniter from elasticsearch.exceptions import ConnectionError from elasticsearch.exceptions import ElasticsearchException @@ -171,7 +172,15 @@ def __init__(self, args): self.thread_data.alerts_sent = 0 self.thread_data.num_hits = 0 self.thread_data.num_dupes = 0 - self.scheduler = BackgroundScheduler() + executors = { + 'default': ThreadPoolExecutor(max_workers=self.conf.get('max_threads', 10)), + } + job_defaults = { + 'misfire_grace_time': self.conf.get('misfire_grace_time', 5), + 'coalesce': True, + 'max_instances': 1 + } + self.scheduler = BackgroundScheduler(executors=executors, job_defaults=job_defaults) self.string_multi_field_name = self.conf.get('string_multi_field_name', False) self.statsd_instance_tag = self.conf.get('statsd_instance_tag', '') self.statsd_host = self.conf.get('statsd_host', '') diff --git a/elastalert/schema.yaml b/elastalert/schema.yaml index 33aee274..db2f5248 100644 --- a/elastalert/schema.yaml +++ b/elastalert/schema.yaml @@ -198,6 +198,8 @@ properties: query_delay: *timeframe max_query_size: {type: integer} max_scrolling: {type: integer} + max_threads: {type: integer} + misfire_grace_time: {type: integer} owner: {type: string} priority: {type: integer} From 0cf1075a9ec5df5a45fb07f190621b4831645212 Mon Sep 17 00:00:00 2001 From: Jason Ertel Date: Wed, 26 May 2021 12:16:06 -0400 Subject: [PATCH 2/2] Add changelog entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd53aa35..cf3e0ed1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ - None ## New features -- None +- Expose rule scheduler properties as configurable settings - [#192](https://github.com/jertel/elastalert2/pull/192) - #jertel ## Other changes - Speed up unit tests by adding default parallelism - [164](https://github.com/jertel/elastalert2/pull/164) - @ferozsalam