From 89b4a74ff8e0c893b9f58f240af9681d8fc97231 Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Wed, 6 May 2015 11:28:40 -0700 Subject: [PATCH 1/2] Added docs for terms_size, upped the default, and fixed top_count_number --- docs/source/ruletypes.rst | 10 ++++++++-- elastalert/elastalert.py | 8 +++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/docs/source/ruletypes.rst b/docs/source/ruletypes.rst index f13de43fc..d0af7952b 100644 --- a/docs/source/ruletypes.rst +++ b/docs/source/ruletypes.rst @@ -251,7 +251,10 @@ of tens of thousands or more. ``doc_type`` must be set to use this. ``doc_type``: Specify the ``_type`` of document to search for. This must be present if ``use_count_query`` or ``use_terms_query`` is set. ``use_terms_query``: If true, ElastAlert will make an aggregation query against Elasticsearch to get counts of documents matching -each unique value of ``query_key``. This be used with ``query_key`` and ``doc_type``. +each unique value of ``query_key``. This be used with ``query_key`` and ``doc_type``. This will only return a maximum of ``terms_size``, +default 50, unique terms. + +``terms_size``: When used with ``use_terms_query``, this is the maximum number of terms returned per query. Default is 50. ``query_key``: The number of events is remembered separately for each unique ``query_key`` field. If this option is set, the field must be present for all events. @@ -374,7 +377,10 @@ of tens of thousands or more. ``doc_type`` must be set to use this. ``doc_type``: Specify the ``_type`` of document to search for. This must be present if ``use_count_query`` or ``use_terms_query`` is set. ``use_terms_query``: If true, ElastAlert will make an aggregation query against Elasticsearch to get counts of documents matching -each unique value of ``query_key``. This be used with ``query_key``. ``doc_type`` must be set to use this. +each unique value of ``query_key``. This be used with ``query_key`` and ``doc_type``. This will only return a maximum of ``terms_size``, +default 50, unique terms. + +``terms_size``: When used with ``use_terms_query``, this is the maximum number of terms returned per query. Default is 50. Flatline ~~~~~~~~ diff --git a/elastalert/elastalert.py b/elastalert/elastalert.py index 1e1e1f06e..2ffe9b23a 100644 --- a/elastalert/elastalert.py +++ b/elastalert/elastalert.py @@ -247,7 +247,7 @@ def get_hits_count(self, rule, starttime, endtime, index): logging.info("Queried rule %s from %s to %s: %s hits" % (rule['name'], pretty_ts(starttime, lt), pretty_ts(endtime, lt), res['count'])) return {endtime: res['count']} - def get_hits_terms(self, rule, starttime, endtime, index, key, qk=None): + def get_hits_terms(self, rule, starttime, endtime, index, key, qk=None, size=None): rule_filter = copy.copy(rule['filter']) if qk: filter_key = rule['query_key'] @@ -255,7 +255,9 @@ def get_hits_terms(self, rule, starttime, endtime, index, key, qk=None): filter_key += '.raw' rule_filter.extend([{'term': {filter_key: qk}}]) base_query = self.get_query(rule_filter, starttime, endtime, timestamp_field=rule['timestamp_field'], sort=False) - query = self.get_terms_query(base_query, rule.get('terms_size', 5), key) + if size is None: + size = rule.get('terms_size', 50) + query = self.get_terms_query(base_query, size, key) try: res = self.current_es.search(index=index, doc_type=rule['doc_type'], body=query, search_type='count', ignore_unavailable=True) @@ -1033,7 +1035,7 @@ def get_top_counts(self, rule, starttime, endtime, keys, number=5, qk=None): all_counts = {} for key in keys: index = self.get_index(rule, starttime, endtime) - buckets = self.get_hits_terms(rule, starttime, endtime, index, key, qk).values()[0] + buckets = self.get_hits_terms(rule, starttime, endtime, index, key, qk, number).values()[0] # get_hits_terms adds to num_hits, but we don't want to count these self.num_hits -= len(buckets) terms = {} From 4ae1aafdd6bba1be0104f709fcb62000ae552b9d Mon Sep 17 00:00:00 2001 From: Quentin Long Date: Wed, 6 May 2015 14:30:29 -0700 Subject: [PATCH 2/2] Fixed type in use_terms_query docs --- docs/source/ruletypes.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/ruletypes.rst b/docs/source/ruletypes.rst index d0af7952b..e37b49d7c 100644 --- a/docs/source/ruletypes.rst +++ b/docs/source/ruletypes.rst @@ -251,7 +251,7 @@ of tens of thousands or more. ``doc_type`` must be set to use this. ``doc_type``: Specify the ``_type`` of document to search for. This must be present if ``use_count_query`` or ``use_terms_query`` is set. ``use_terms_query``: If true, ElastAlert will make an aggregation query against Elasticsearch to get counts of documents matching -each unique value of ``query_key``. This be used with ``query_key`` and ``doc_type``. This will only return a maximum of ``terms_size``, +each unique value of ``query_key``. This must be used with ``query_key`` and ``doc_type``. This will only return a maximum of ``terms_size``, default 50, unique terms. ``terms_size``: When used with ``use_terms_query``, this is the maximum number of terms returned per query. Default is 50. @@ -377,7 +377,7 @@ of tens of thousands or more. ``doc_type`` must be set to use this. ``doc_type``: Specify the ``_type`` of document to search for. This must be present if ``use_count_query`` or ``use_terms_query`` is set. ``use_terms_query``: If true, ElastAlert will make an aggregation query against Elasticsearch to get counts of documents matching -each unique value of ``query_key``. This be used with ``query_key`` and ``doc_type``. This will only return a maximum of ``terms_size``, +each unique value of ``query_key``. This must be used with ``query_key`` and ``doc_type``. This will only return a maximum of ``terms_size``, default 50, unique terms. ``terms_size``: When used with ``use_terms_query``, this is the maximum number of terms returned per query. Default is 50.