From 7ae1161c8d5b1b7efb22fe14dba968a49d943ac8 Mon Sep 17 00:00:00 2001
From: Jason Ertel <jason.ertel@securityonionsolutions.com>
Date: Wed, 26 May 2021 12:14:08 -0400
Subject: [PATCH 1/2] Add optional configuration values for scheduler
 max_threads and misfire_grace_time

---
 docs/source/elastalert.rst |  4 ++++
 elastalert/elastalert.py   | 11 ++++++++++-
 elastalert/schema.yaml     |  2 ++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/docs/source/elastalert.rst b/docs/source/elastalert.rst
index bdd1071e..f5884b97 100755
--- a/docs/source/elastalert.rst
+++ b/docs/source/elastalert.rst
@@ -164,6 +164,8 @@ it ran the query for a given rule, and periodically query from that time until t
 this field is a nested unit of time, such as ``minutes: 5``. This is how time is defined in every ElastAlert
 configuration.
 
+``misfire_grace_time``: If the rule scheduler is running behind, due to large numbers of rules or long-running rules, this grace time settings allows a rule to still be executed, provided its next scheduled runt time is no more than this grace period, in seconds, overdue. The default is 5 seconds.
+
 ``writeback_index``: The index on ``es_host`` to use.
 
 ``max_query_size``: The maximum number of documents that will be downloaded from Elasticsearch in a single query. The
@@ -174,6 +176,8 @@ using the size of ``max_query_size`` through the set amount of pages, when ``max
 ``max_scrolling_count``: The maximum amount of pages to scroll through. The default is ``0``, which means the scrolling has no limit.
 For example if this value is set to ``5`` and the ``max_query_size`` is set to ``10000`` then ``50000`` documents will be downloaded at most.
 
+``max_threads``: The maximum number of concurrent threads available to process scheduled rules. Large numbers of long-running rules may require this value be increased, though this could overload the Elasticsearch cluster if too many complex queries are running concurrently. Default is 10.
+
 ``scroll_keepalive``: The maximum time (formatted in `Time Units <https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#time-units>`_) the scrolling context should be kept alive. Avoid using high values as it abuses resources in Elasticsearch, but be mindful to allow sufficient time to finish processing all the results.
 
 ``max_aggregation``: The maximum number of alerts to aggregate together. If a rule has ``aggregation`` set, all
diff --git a/elastalert/elastalert.py b/elastalert/elastalert.py
index d9a4a4e7..e30f991c 100755
--- a/elastalert/elastalert.py
+++ b/elastalert/elastalert.py
@@ -22,6 +22,7 @@
 import dateutil.tz
 import pytz
 from apscheduler.schedulers.background import BackgroundScheduler
+from apscheduler.executors.pool import ThreadPoolExecutor
 from croniter import croniter
 from elasticsearch.exceptions import ConnectionError
 from elasticsearch.exceptions import ElasticsearchException
@@ -171,7 +172,15 @@ def __init__(self, args):
         self.thread_data.alerts_sent = 0
         self.thread_data.num_hits = 0
         self.thread_data.num_dupes = 0
-        self.scheduler = BackgroundScheduler()
+        executors = {
+            'default': ThreadPoolExecutor(max_workers=self.conf.get('max_threads', 10)),
+        }
+        job_defaults = {
+            'misfire_grace_time': self.conf.get('misfire_grace_time', 5),
+            'coalesce': True,
+            'max_instances': 1
+        }
+        self.scheduler = BackgroundScheduler(executors=executors, job_defaults=job_defaults)
         self.string_multi_field_name = self.conf.get('string_multi_field_name', False)
         self.statsd_instance_tag = self.conf.get('statsd_instance_tag', '')
         self.statsd_host = self.conf.get('statsd_host', '')
diff --git a/elastalert/schema.yaml b/elastalert/schema.yaml
index 33aee274..db2f5248 100644
--- a/elastalert/schema.yaml
+++ b/elastalert/schema.yaml
@@ -198,6 +198,8 @@ properties:
   query_delay: *timeframe
   max_query_size: {type: integer}
   max_scrolling: {type: integer}
+  max_threads: {type: integer}
+  misfire_grace_time: {type: integer}
 
   owner: {type: string}
   priority: {type: integer}

From 0cf1075a9ec5df5a45fb07f190621b4831645212 Mon Sep 17 00:00:00 2001
From: Jason Ertel <jason.ertel@securityonionsolutions.com>
Date: Wed, 26 May 2021 12:16:06 -0400
Subject: [PATCH 2/2] Add changelog entry

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cd53aa35..cf3e0ed1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,7 +15,7 @@
 - None
 
 ## New features
-- None
+- Expose rule scheduler properties as configurable settings - [#192](https://github.com/jertel/elastalert2/pull/192) - #jertel
 
 ## Other changes
 - Speed up unit tests by adding default parallelism - [164](https://github.com/jertel/elastalert2/pull/164) - @ferozsalam