diff --git a/.github/ISSUE_TEMPLATE b/.github/ISSUE_TEMPLATE new file mode 100644 index 00000000..81f621e0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE @@ -0,0 +1,3 @@ +PLEASE READ: The majority of topics are better suited for the Discussion forum. You can access this area by clicking The Discussions link above. Please search the discussions area first, for keywords that could be associated with the problem you are experiencing. If you do not see an existing discussion, please open a new discussion and include sufficient details for someone in the community to help you. + +If you are confident you have discovered a legitimate issue, attach logs and reproduction steps to this issue. Failure to provide sufficient information will likely cause this issue to go stale and eventually be deleted. \ No newline at end of file diff --git a/.github/workflows/master_build_test.yml b/.github/workflows/master_build_test.yml new file mode 100644 index 00000000..8ab9d01a --- /dev/null +++ b/.github/workflows/master_build_test.yml @@ -0,0 +1,28 @@ +name: master_build_test + +# Controls when the action will run. +on: + # Triggers the workflow on push or pull request events but only for the master branch + push: + branches: [ master ] + pull_request: + branches: [ master ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + # This workflow contains a single job called "build" + build: + # The type of runner that the job will run on + runs-on: ubuntu-latest + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v2 + + # Runs a single command using the runners shell + - name: Build and run tests + run: make test-docker diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 00000000..0b003bac --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,31 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: upload_python_package + +on: + release: + types: [created] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.github/workflows/upload_chart.yml b/.github/workflows/upload_chart.yml new file mode 100644 index 00000000..7cba508b --- /dev/null +++ b/.github/workflows/upload_chart.yml @@ -0,0 +1,19 @@ +name: upload_chart + +on: + release: + types: [created] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Helm push chart + env: + HELM_REPO_ACCESS_TOKEN: ${{ secrets.HELM_REPO_ACCESS_TOKEN }} + run: | + wget https://get.helm.sh/helm-v3.2.1-linux-amd64.tar.gz + tar xf helm-v3.2.1-linux-amd64.tar.gz + linux-amd64/helm plugin install https://github.com/chartmuseum/helm-push + linux-amd64/helm push chart/elastalert2 https://charts.banzaicloud.io/gh/Codesim-LLC diff --git a/.secrets.baseline b/.secrets.baseline deleted file mode 100644 index b4405a48..00000000 --- a/.secrets.baseline +++ /dev/null @@ -1,27 +0,0 @@ -{ - "exclude_regex": ".*tests/.*|.*yelp/testing/.*|\\.pre-commit-config\\.yaml", - "generated_at": "2018-07-06T22:54:22Z", - "plugins_used": [ - { - "base64_limit": 4.5, - "name": "Base64HighEntropyString" - }, - { - "hex_limit": 3, - "name": "HexHighEntropyString" - }, - { - "name": "PrivateKeyDetector" - } - ], - "results": { - ".travis.yml": [ - { - "hashed_secret": "4f7a1ea04dafcbfee994ee1d08857b8aaedf8065", - "line_number": 14, - "type": "Base64 High Entropy String" - } - ] - }, - "version": "0.9.1" -} diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 569bf12d..00000000 --- a/.travis.yml +++ /dev/null @@ -1,42 +0,0 @@ -language: python -python: -- '3.6' -env: -- TOXENV=docs -- TOXENV=py36 -install: -- pip install tox -- > - if [[ -n "${ES_VERSION}" ]] ; then - wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz - mkdir elasticsearch-${ES_VERSION} && tar -xzf elasticsearch-${ES_VERSION}.tar.gz -C elasticsearch-${ES_VERSION} --strip-components=1 - ./elasticsearch-${ES_VERSION}/bin/elasticsearch & - fi -script: -- > - if [[ -n "${ES_VERSION}" ]] ; then - wget -q --waitretry=1 --retry-connrefused --tries=30 -O - http://127.0.0.1:9200 - make test-elasticsearch - else - make test - fi -jobs: - include: - - stage: 'Elasticsearch test' - env: TOXENV=py36 ES_VERSION=7.0.0-linux-x86_64 - - env: TOXENV=py36 ES_VERSION=6.6.2 - - env: TOXENV=py36 ES_VERSION=6.3.2 - - env: TOXENV=py36 ES_VERSION=6.2.4 - - env: TOXENV=py36 ES_VERSION=6.0.1 - - env: TOXENV=py36 ES_VERSION=5.6.16 - -deploy: - provider: pypi - user: yelplabs - password: - secure: TpSTlFu89tciZzboIfitHhU5NhAB1L1/rI35eQTXstiqzYg2mweOuip+MPNx9AlX3Swg7MhaFYnSUvRqPljuoLjLD0EQ7BHLVSBFl92ukkAMTeKvM6LbB9HnGOwzmAvTR5coegk8IHiegudODWvnhIj4hp7/0EA+gVX7E55kEAw= - on: - tags: true - distributions: sdist bdist_wheel - repo: Yelp/elastalert - branch: master diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..9b6924ce --- /dev/null +++ b/Dockerfile @@ -0,0 +1,36 @@ +FROM python:alpine as builder + +LABEL description="Elastalert 2 suitable for Kubernetes and Helm" +LABEL maintainer="Jason Ertel (jertel at codesim.com)" + +RUN apk --update upgrade && \ + apk add git && \ + rm -rf /var/cache/apk/* + +RUN mkdir -p /opt/elastalert && \ + git clone https://github.com/jertel/elastalert2 /tmp/elastalert && \ + cd /tmp/elastalert && \ + pip install setuptools wheel && \ + python setup.py sdist bdist_wheel + +FROM python:alpine + +COPY --from=builder /tmp/elastalert/dist/*.tar.gz /tmp/ + +RUN apk --update upgrade && \ + apk add gcc libffi-dev musl-dev python3-dev openssl-dev tzdata libmagic cargo && \ + pip install /tmp/*.tar.gz && \ + apk del gcc libffi-dev musl-dev python3-dev openssl-dev cargo && \ + rm -rf /var/cache/apk/* + +RUN mkdir -p /opt/elastalert && \ + echo "#!/bin/sh" >> /opt/elastalert/run.sh && \ + echo "set -e" >> /opt/elastalert/run.sh && \ + echo "elastalert-create-index --config /opt/config/elastalert_config.yaml" >> /opt/elastalert/run.sh && \ + echo "elastalert --config /opt/config/elastalert_config.yaml \"\$@\"" >> /opt/elastalert/run.sh && \ + chmod +x /opt/elastalert/run.sh + +ENV TZ "UTC" + +WORKDIR /opt/elastalert +ENTRYPOINT ["/opt/elastalert/run.sh"] diff --git a/Dockerfile-test b/Dockerfile-test index 3c153e64..2c04ca85 100644 --- a/Dockerfile-test +++ b/Dockerfile-test @@ -1,7 +1,9 @@ FROM ubuntu:latest -RUN apt-get update && apt-get upgrade -y -RUN apt-get -y install build-essential python3.6 python3.6-dev python3-pip libssl-dev git +RUN apt update && apt upgrade -y +RUN apt install software-properties-common -y +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt -y install build-essential python3.9 python3.9-dev python3-pip libssl-dev git WORKDIR /home/elastalert diff --git a/README.md b/README.md index 99acc02e..463b238b 100644 --- a/README.md +++ b/README.md @@ -1,323 +1,53 @@ -Recent changes: As of Elastalert 0.2.0, you must use Python 3.6. Python 2 will not longer be supported. +# Elastalert 2 -[![Build Status](https://travis-ci.org/Yelp/elastalert.svg)](https://travis-ci.org/Yelp/elastalert) -[![Join the chat at https://gitter.im/Yelp/elastalert](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Yelp/elastalert?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +Elastalert 2 is the supported fork of [Elastalert][0], which had been maintained by the Yelp team +but become mostly stale when the Yelp team ceased using Elastalert. -## ElastAlert - [Read the Docs](http://elastalert.readthedocs.org). -### Easy & Flexible Alerting With Elasticsearch - -ElastAlert is a simple framework for alerting on anomalies, spikes, or other patterns of interest from data in Elasticsearch. - -ElastAlert works with all versions of Elasticsearch. - -At Yelp, we use Elasticsearch, Logstash and Kibana for managing our ever increasing amount of data and logs. -Kibana is great for visualizing and querying data, but we quickly realized that it needed a companion tool for alerting -on inconsistencies in our data. Out of this need, ElastAlert was created. - -If you have data being written into Elasticsearch in near real time and want to be alerted when that data matches certain patterns, ElastAlert is the tool for you. If you can see it in Kibana, ElastAlert can alert on it. - -## Overview - -We designed ElastAlert to be reliable, highly modular, and easy to set up and configure. - -It works by combining Elasticsearch with two types of components, rule types and alerts. -Elasticsearch is periodically queried and the data is passed to the rule type, which determines when -a match is found. When a match occurs, it is given to one or more alerts, which take action based on the match. - -This is configured by a set of rules, each of which defines a query, a rule type, and a set of alerts. - -Several rule types with common monitoring paradigms are included with ElastAlert: - -- Match where there are at least X events in Y time" (``frequency`` type) -- Match when the rate of events increases or decreases" (``spike`` type) -- Match when there are less than X events in Y time" (``flatline`` type) -- Match when a certain field matches a blacklist/whitelist" (``blacklist`` and ``whitelist`` type) -- Match on any event matching a given filter" (``any`` type) -- Match when a field has two different values within some time" (``change`` type) -- Match when a never before seen term appears in a field" (``new_term`` type) -- Match when the number of unique values for a field is above or below a threshold (``cardinality`` type) - -Currently, we have built-in support for the following alert types: - -- Email -- JIRA -- OpsGenie -- Commands -- HipChat -- MS Teams -- Slack -- Telegram -- GoogleChat -- AWS SNS -- VictorOps -- PagerDuty -- PagerTree -- Exotel -- Twilio -- Gitter -- Line Notify -- Zabbix - -Additional rule types and alerts can be easily imported or written. - -In addition to this basic usage, there are many other features that make alerts more useful: - -- Alerts link to Kibana dashboards -- Aggregate counts for arbitrary fields -- Combine alerts into periodic reports -- Separate alerts by using a unique key field -- Intercept and enhance match data - -To get started, check out `Running ElastAlert For The First Time` in the [documentation](http://elastalert.readthedocs.org). - -## Running ElastAlert -You can either install the latest released version of ElastAlert using pip: - -```pip install elastalert``` - -or you can clone the ElastAlert repository for the most recent changes: - -```git clone https://github.com/Yelp/elastalert.git``` - -Install the module: - -```pip install "setuptools>=11.3"``` - -```python setup.py install``` - -The following invocation can be used to run ElastAlert after installing - -``$ elastalert [--debug] [--verbose] [--start ] [--end ] [--rule ] [--config ]`` - -``--debug`` will print additional information to the screen as well as suppresses alerts and instead prints the alert body. Not compatible with `--verbose`. - -``--verbose`` will print additional information without suppressing alerts. Not compatible with `--debug.` - -``--start`` will begin querying at the given timestamp. By default, ElastAlert will begin querying from the present. -Timestamp format is ``YYYY-MM-DDTHH-MM-SS[-/+HH:MM]`` (Note the T between date and hour). -Eg: ``--start 2014-09-26T12:00:00`` (UTC) or ``--start 2014-10-01T07:30:00-05:00`` - -``--end`` will cause ElastAlert to stop querying at the given timestamp. By default, ElastAlert will continue -to query indefinitely. - -``--rule`` will allow you to run only one rule. It must still be in the rules folder. -Eg: ``--rule this_rule.yaml`` - -``--config`` allows you to specify the location of the configuration. By default, it is will look for config.yaml in the current directory. - -## Third Party Tools And Extras -### Kibana plugin -![img](https://raw.githubusercontent.com/bitsensor/elastalert-kibana-plugin/master/showcase.gif) -Available at the [ElastAlert Kibana plugin repository](https://github.com/bitsensor/elastalert-kibana-plugin). - -### Docker -A [Dockerized version](https://github.com/bitsensor/elastalert) of ElastAlert including a REST api is build from `master` to `bitsensor/elastalert:latest`. - -```bash -git clone https://github.com/bitsensor/elastalert.git; cd elastalert -docker run -d -p 3030:3030 \ - -v `pwd`/config/elastalert.yaml:/opt/elastalert/config.yaml \ - -v `pwd`/config/config.json:/opt/elastalert-server/config/config.json \ - -v `pwd`/rules:/opt/elastalert/rules \ - -v `pwd`/rule_templates:/opt/elastalert/rule_templates \ - --net="host" \ - --name elastalert bitsensor/elastalert:latest -``` +Elastalert 2 is backwards compatible with the original Elastalert rules. ## Documentation -Read the documentation at [Read the Docs](http://elastalert.readthedocs.org). - -To build a html version of the docs locally - -``` -pip install sphinx_rtd_theme sphinx -cd docs -make html -``` - -View in browser at build/html/index.html - -## Configuration - -See config.yaml.example for details on configuration. - -## Example rules - -Examples of different types of rules can be found in example_rules/. - -- ``example_spike.yaml`` is an example of the "spike" rule type, which allows you to alert when the rate of events, averaged over a time period, -increases by a given factor. This example will send an email alert when there are 3 times more events matching a filter occurring within the -last 2 hours than the number of events in the previous 2 hours. - -- ``example_frequency.yaml`` is an example of the "frequency" rule type, which will alert when there are a given number of events occuring -within a time period. This example will send an email when 50 documents matching a given filter occur within a 4 hour timeframe. - -- ``example_change.yaml`` is an example of the "change" rule type, which will alert when a certain field in two documents changes. In this example, -the alert email is sent when two documents with the same 'username' field but a different value of the 'country_name' field occur within 24 hours -of each other. - -- ``example_new_term.yaml`` is an example of the "new term" rule type, which alerts when a new value appears in a field or fields. In this example, -an email is sent when a new value of ("username", "computer") is encountered in example login logs. - -## Frequently Asked Questions - -### My rule is not getting any hits? - -So you've managed to set up ElastAlert, write a rule, and run it, but nothing happens, or it says ``0 query hits``. First of all, we recommend using the command ``elastalert-test-rule rule.yaml`` to debug. It will show you how many documents match your filters for the last 24 hours (or more, see ``--help``), and then shows you if any alerts would have fired. If you have a filter in your rule, remove it and try again. This will show you if the index is correct and that you have at least some documents. If you have a filter in Kibana and want to recreate it in ElastAlert, you probably want to use a query string. Your filter will look like - -``` -filter: -- query: - query_string: - query: "foo: bar AND baz: abc*" -``` -If you receive an error that Elasticsearch is unable to parse it, it's likely the YAML is not spaced correctly, and the filter is not in the right format. If you are using other types of filters, like ``term``, a common pitfall is not realizing that you may need to use the analyzed token. This is the default if you are using Logstash. For example, - -``` -filter: -- term: - foo: "Test Document" -``` - -will not match even if the original value for ``foo`` was exactly "Test Document". Instead, you want to use ``foo.raw``. If you are still having trouble troubleshooting why your documents do not match, try running ElastAlert with ``--es_debug_trace /path/to/file.log``. This will log the queries made to Elasticsearch in full so that you can see exactly what is happening. - -### I got hits, why didn't I get an alert? - -If you got logs that had ``X query hits, 0 matches, 0 alerts sent``, it depends on the ``type`` why you didn't get any alerts. If ``type: any``, a match will occur for every hit. If you are using ``type: frequency``, ``num_events`` must occur within ``timeframe`` of each other for a match to occur. Different rules apply for different rule types. +Documentation, including an FAQ, for Elastalert 2 can be found on [readthedocs.com][3]. This is the place to start if you're not familiar with Elastalert at all. -If you see ``X matches, 0 alerts sent``, this may occur for several reasons. If you set ``aggregation``, the alert will not be sent until after that time has elapsed. If you have gotten an alert for this same rule before, that rule may be silenced for a period of time. The default is one minute between alerts. If a rule is silenced, you will see ``Ignoring match for silenced rule`` in the logs. +The full list of platforms that Elastalert can fire alerts into can be found [in the documentation][4]. -If you see ``X alerts sent`` but didn't get any alert, it's probably related to the alert configuration. If you are using the ``--debug`` flag, you will not receive any alerts. Instead, the alert text will be written to the console. Use ``--verbose`` to achieve the same affects without preventing alerts. If you are using email alert, make sure you have it configured for an SMTP server. By default, it will connect to localhost on port 25. It will also use the word "elastalert" as the "From:" address. Some SMTP servers will reject this because it does not have a domain while others will add their own domain automatically. See the email section in the documentation for how to configure this. -### Why did I only get one alert when I expected to get several? - -There is a setting called ``realert`` which is the minimum time between two alerts for the same rule. Any alert that occurs within this time will simply be dropped. The default value for this is one minute. If you want to receive an alert for every single match, even if they occur right after each other, use - -``` -realert: - minutes: 0 -``` +## Contributing -You can of course set it higher as well. +PRs are welcome, but must include tests, when possible. PRs will not be merged if they do not pass +the automated CI workflows. -### How can I prevent duplicate alerts? +The current status of the CI workflow: -By setting ``realert``, you will prevent the same rule from alerting twice in an amount of time. - -``` -realert: - days: 1 -``` - -You can also prevent duplicates based on a certain field by using ``query_key``. For example, to prevent multiple alerts for the same user, you might use - -``` -realert: - hours: 8 -query_key: user -``` +![CI Workflow](https://github.com/jertel/elastalert/workflows/master_build_test/badge.svg) -Note that this will also affect the way many rule types work. If you are using ``type: frequency`` for example, ``num_events`` for a single value of ``query_key`` must occur before an alert will be sent. You can also use a compound of multiple fields for this key. For example, if you only wanted to receieve an alert once for a specific error and hostname, you could use - -``` -query_key: [error, hostname] -``` - -Internally, this works by creating a new field for each document called ``field1,field2`` with a value of ``value1,value2`` and using that as the ``query_key``. - -The data for when an alert will fire again is stored in Elasticsearch in the ``elastalert_status`` index, with a ``_type`` of ``silence`` and also cached in memory. - -### How can I change what's in the alert? - -You can use the field ``alert_text`` to add custom text to an alert. By setting ``alert_text_type: alert_text_only``, it will be the entirety of the alert. You can also add different fields from the alert by using Python style string formatting and ``alert_text_args``. For example - -``` -alert_text: "Something happened with {0} at {1}" -alert_text_type: alert_text_only -alert_text_args: ["username", "@timestamp"] -``` - -You can also limit the alert to only containing certain fields from the document by using ``include``. - -``` -include: ["ip_address", "hostname", "status"] -``` +## Docker -### My alert only contains data for one event, how can I see more? +If you're interested in a pre-built Docker image check out the [elastalert2][2] project on Docker Hub. -If you are using ``type: frequency``, you can set the option ``attach_related: true`` and every document will be included in the alert. An alternative, which works for every type, is ``top_count_keys``. This will show the top counts for each value for certain fields. For example, if you have +A properly configured elastalert_config.json file must be mounted into the container during startup of the container. Use the [example file][1] provided as a template, and once saved locally to a file such as `/tmp/elastalert.yaml`, run the container as follows: +```bash +docker run -d -v /tmp/elastalert.yaml:/opt/config/elastalert_config.yaml jertel/elastalert2 ``` -top_count_keys: ["ip_address", "status"] -``` - -and 10 documents matched your alert, it may contain something like - -``` -ip_address: -127.0.0.1: 7 -10.0.0.1: 2 -192.168.0.1: 1 - -status: -200: 9 -500: 1 -``` - -### How can I make the alert come at a certain time? - -The ``aggregation`` feature will take every alert that has occured over a period of time and send them together in one alert. You can use cron style syntax to send all alerts that have occured since the last once by using - -``` -aggregation: - schedule: '2 4 * * mon,fri' -``` - -### I have lots of documents and it's really slow, how can I speed it up? - -There are several ways to potentially speed up queries. If you are using ``index: logstash-*``, Elasticsearch will query all shards, even if they do not possibly contain data with the correct timestamp. Instead, you can use Python time format strings and set ``use_strftime_index`` - -``` -index: logstash-%Y.%m -use_strftime_index: true -``` - -Another thing you could change is ``buffer_time``. By default, ElastAlert will query large overlapping windows in order to ensure that it does not miss any events, even if they are indexed in real time. In config.yaml, you can adjust ``buffer_time`` to a smaller number to only query the most recent few minutes. +To build the image locally, install Docker and then run the following command: ``` -buffer_time: - minutes: 5 +docker build . -t elastalert ``` -By default, ElastAlert will download every document in full before processing them. Instead, you can have ElastAlert simply get a count of the number of documents that have occured in between each query. To do this, set ``use_count_query: true``. This cannot be used if you use ``query_key``, because ElastAlert will not know the contents of each documents, just the total number of them. This also reduces the precision of alerts, because all events that occur between each query will be rounded to a single timestamp. - -If you are using ``query_key`` (a single key, not multiple keys) you can use ``use_terms_query``. This will make ElastAlert perform a terms aggregation to get the counts for each value of a certain field. Both ``use_terms_query`` and ``use_count_query`` also require ``doc_type`` to be set to the ``_type`` of the documents. They may not be compatible with all rule types. - -### Can I perform aggregations? - -The only aggregation supported currently is a terms aggregation, by setting ``use_terms_query``. - -### I'm not using @timestamp, what do I do? - -You can use ``timestamp_field`` to change which field ElastAlert will use as the timestamp. You can use ``timestamp_type`` to change it between ISO 8601 and unix timestamps. You must have some kind of timestamp for ElastAlert to work. If your events are not in real time, you can use ``query_delay`` and ``buffer_time`` to adjust when ElastAlert will look for documents. - -### I'm using flatline but I don't see any alerts - -When using ``type: flatline``, ElastAlert must see at least one document before it will alert you that it has stopped seeing them. - -### How can I get a "resolve" event? - -ElastAlert does not currently support stateful alerts or resolve events. - -### Can I set a warning threshold? +## Kubernetes -Currently, the only way to set a warning threshold is by creating a second rule with a lower threshold. +See the Helm chart [README.md](chart/elastalert2/README.md) for information on installing this application into an existing Kubernetes cluster. ## License -ElastAlert is licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 +Elastalert is licensed under the [Apache License, Version 2.0][5]. -### Read the documentation at [Read the Docs](http://elastalert.readthedocs.org). +[0]: https://github.com/yelp/elastalert +[1]: https://github.com/jertel/elastalert2/blob/master/config.yaml.example +[2]: https://hub.docker.com/r/jertel/elastalert2 +[3]: https://elastalert2.readthedocs.io/ +[4]: https://elastalert2.readthedocs.io/en/latest/ruletypes.html#alerts +[5]: http://www.apache.org/licenses/LICENSE-2 -### Questions? Drop by #elastalert on Freenode IRC. diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 00000000..fe975661 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,36 @@ +# Releases + +As Elastalert 2 is a community-maintained project, releases will typically contain unrelated contributions without a common theme. It's up to the maintainers to determine when the project is ready for a release, however, if you are looking to use a newly merged feature that hasn't yet been released, feel free to open a [discussion][5] and let us know. + +Maintainers, when creating a new release, follow the procedure below: + +1. Determine an appropriate new version number in the format _a.b.c_, using the following guidelines: + - The major version (a) should not change. + - The minor version (b) should be incremented if a new feature has been added or if a bug fix will have a significant user-impact. Reset the patch version to zero if the minor version is incremented. + - The patch version (c) should be incremented when low-impact bugs are fixed, or security vulnerabilities are patched. +2. Ensure the following are updated _before_ publishing/tagging the new release: + - [setup.py](setup.py): Match the version to the new release version + - [Chart.yaml](chart/elastalert2/Chart.yaml): Match chart version and the app version to the new release version (typically keep them in sync) + - [values.yaml](chart/elastalert2/values.yaml): Match the default image version to the new release version. + - [README.md](chart/elastalert2/README.md): Match the default image version to the new release version. +3. Double-check that the Docker image successfully built the latest image. +4. Create a [new][1] release. + - The title (and tag) of the release will be the same value as the new version determined in step 1. + - The description of the release will contain a bulleted list of all merged pull requests, in the following format: + `- change description #PR - @committer` + Ex: + `- Added new Foobar alerts #12345 - @jertel` + - Check the box to 'Create a discussion for this release'. + - Save the draft. +5. Verify that artifacts have been published: + - Python PIP package was [published][3] successfully. + - Helm chart has been [published][4] successfully. + - Docker Hub image was [tagged][2] successfully. +6. Wait a minimum of a few hours for community feedback in case someone notices a problem with the the upcoming release. +7. Publish the release. + +[1]: https://github.com/jertel/elastalert2/releases/new +[2]: https://hub.docker.com/r/jertel/elastalert2/builds +[3]: https://github.com/jertel/elastalert2/actions/workflows/python-publish.yml +[4]: https://github.com/jertel/elastalert2/actions/workflows/upload_chart.yml +[5]: https://github.com/jertel/elastalert2/discussions \ No newline at end of file diff --git a/changelog.md b/changelog.md deleted file mode 100644 index 975d6855..00000000 --- a/changelog.md +++ /dev/null @@ -1,383 +0,0 @@ -# Change Log - -# v0.2.4 - -### Added -- Added back customFields support for The Hive - -# v0.2.3 - -### Added -- Added back TheHive alerter without TheHive4py library - -# v0.2.2 - -### Added -- Integration with Kibana Discover app -- Addied ability to specify opsgenie alert detailsĀ  - -### Fixed -- Fix some encoding issues with command alerter -- Better error messages for missing config file -- Fixed an issue with run_every not applying per-rule -- Fixed an issue with rules not being removed -- Fixed an issue with top count keys and nested query keys -- Various documentation fixes -- Fixed an issue with not being able to use spike aggregation - -### Removed -- Remove The Hive alerter - -# v0.2.1 - -### Fixed -- Fixed an AttributeError introduced in 0.2.0 - -# v0.2.0 - -- Switched to Python 3 - -### Added -- Add rule loader class for customized rule loading -- Added thread based rules and limit_execution -- Run_every can now be customized per rule - -### Fixed -- Various small fixes - -# v0.1.39 - -### Added -- Added spike alerts for metric aggregations -- Allow SSL connections for Stomp -- Allow limits on alert text length -- Add optional min doc count for terms queries -- Add ability to index into arrays for alert_text_args, etc - -### Fixed -- Fixed bug involving --config flag with create-index -- Fixed some settings not being inherited from the config properly -- Some fixes for Hive alerter -- Close SMTP connections properly -- Fix timestamps in Pagerduty v2 payload -- Fixed an bug causing aggregated alerts to mix up - -# v0.1.38 - -### Added -- Added PagerTree alerter -- Added Line alerter -- Added more customizable logging -- Added new logic in test-rule to detemine the default timeframe - -### Fixed -- Fixed an issue causing buffer_time to sometimes be ignored - -# v0.1.37 - -### Added -- Added more options for Opsgenie alerter -- Added more pagerduty options -- Added ability to add metadata to elastalert logs - -### Fixed -- Fixed some documentation to be more clear -- Stop requiring doc_type for metric aggregations -- No longer puts quotes around regex terms in blacklists or whitelists - -# v0.1.36 - -### Added -- Added a prefix "metric_" to the key used for metric aggregations to avoid possible conflicts -- Added option to skip Alerta certificate validation - -### Fixed -- Fixed a typo in the documentation for spike rule - -# v0.1.35 - -### Fixed -- Fixed an issue preventing new term rule from working with terms query - -# v0.1.34 - -### Added -- Added prefix/suffix support for summary table -- Added support for ignoring SSL validation in Slack -- More visible exceptions during query parse failures - -### Fixed -- Fixed top_count_keys when using compound query_key -- Fixed num_hits sometimes being reported too low -- Fixed an issue with setting ES_USERNAME via env -- Fixed an issue when using test script with custom timestamps -- Fixed a unicode error when using Telegram -- Fixed an issue with jsonschema version conflict -- Fixed an issue with nested timestamps in cardinality type - -# v0.1.33 - -### Added -- Added ability to pipe alert text to a command -- Add --start and --end support for elastalert-test-rule -- Added ability to turn blacklist/whitelist files into queries for better performance -- Allow setting of OpsGenie priority -- Add ability to query the adjacent index if timestamp_field not used for index timestamping -- Add support for pagerduty v2 -- Add option to turn off .raw/.keyword field postfixing in new term rule -- Added --use-downloaded feature for elastalert-test-rule - -### Fixed -- Fixed a bug that caused num_hits in matches to sometimes be erroneously small -- Fixed an issue with HTTP Post alerter that could cause it to hang indefinitely -- Fixed some issues with string formatting for various alerters -- Fixed a couple of incorrect parts of the documentation - -# v0.1.32 - -### Added -- Add support for setting ES url prefix via environment var -- Add support for using native Slack fields in alerts - -### Fixed -- Fixed a bug that would could scrolling queries to sometimes terminate early - -# v0.1.31 - -### Added -- Added ability to add start date to new term rule - -### Fixed -- Fixed a bug in create_index which would try to delete a nonexistent index -- Apply filters to new term rule all terms query -- Support Elasticsearch 6 for new term rule -- Fixed is_enabled not working on rule changes - - -# v0.1.30 - -### Added -- Alerta alerter -- Added support for transitioning JIRA issues -- Option to recreate index in elastalert-create-index - -### Fixed -- Update jira_ custom fields before each alert if they were modified -- Use json instead of simplejson -- Allow for relative path for smtp_auth_file -- Fixed some grammar issues -- Better code formatting of index mappings -- Better formatting and size limit for HipChat HTML -- Fixed gif link in readme for kibana plugin -- Fixed elastalert-test-rule with Elasticsearch > 4 -- Added documentation for is_enabled option - -## v0.1.29 - -### Added -- Added a feature forget_keys to prevent realerting when using flatline with query_key -- Added a new alert_text_type, aggregation_summary_only - -### Fixed -- Fixed incorrect documentation about es_conn_timeout default - -## v0.1.28 - -### Added -- Added support for Stride formatting of simple HTML tags -- Added support for custom titles in Opsgenie alerts -- Added a denominator to percentage match based alerts - -### Fixed -- Fixed a bug with Stomp alerter connections -- Removed escaping of some characaters in Slack messages - -## v0.1.27 - -# Added -- Added support for a value other than in formatted alerts - -### Fixed -- Fixed a failed creation of elastalert indicies when using Elasticsearch 6 -- Truncate Telegram alerts to avoid API errors - -## v0.1.26 - -### Added -- Added support for Elasticsearch 6 -- Added support for mentions in Hipchat - -### Fixed -- Fixed an issue where a nested field lookup would crash if one of the intermediate fields was null - -## v0.1.25 - -### Fixed -- Fixed a bug causing new term rule to break unless you passed a start time -- Add a slight clarification on the localhost:9200 reported in es_debug_trace - -## v0.1.24 - -### Fixed -- Pinned pytest -- create-index reads index name from config.yaml -- top_count_keys now works for context on a flatline rule type -- Fixed JIRA behavior for issues with statuses that have spaces in the name - -## v0.1.22 - -### Added -- Added Stride alerter -- Allow custom string formatters for aggregation percentage -- Added a field to disable rules from config -- Added support for subaggregations for the metric rule type - -### Fixed -- Fixed a bug causing create-index to fail if missing config.yaml -- Fixed a bug when using ES5 with query_key and top_count_keys -- Allow enhancements to set and clear arbitrary JIRA fields -- Fixed a bug causing timestamps to be formatted in scientific notation -- Stop attempting to initialize alerters in debug mode -- Changed default alert ordering so that JIRA tickets end up in other alerts -- Fixed a bug when using Stomp alerter with complex query_key -- Fixed a bug preventing hipchat room ID from being an integer -- Fixed a bug causing duplicate alerts when using spike with alert_on_new_data -- Minor fixes to summary table formatting -- Fixed elastalert-test-rule when using new term rule type - -## v0.1.21 - -### Fixed -- Fixed an incomplete bug fix for preventing duplicate enhancement runs - -## v0.1.20 - -### Added -- Added support for client TLS keys - -### Fixed -- Fixed the formatting of summary tables in Slack -- Fixed ES_USE_SSL env variable -- Fixed the unique value count printed by new_term rule type -- Jira alerter no longer uses the non-existent json code formatter - -## v0.1.19 - -### Added -- Added support for populating JIRA fields via fields in the match -- Added support for using a TLS certificate file for SMTP connections -- Allow a custom suffix for non-analyzed Elasticsearch fields, like ".raw" or ".keyword" -- Added match_time to Elastalert alert documents in Elasticsearch - -### Fixed -- Fixed an error in the documentation for rule importing -- Prevent enhancements from re-running on retried alerts -- Fixed a bug when using custom timestamp formats and new term rule -- Lowered jira_bump_after_inactivity default to 0 days - -## v0.1.18 - -### Added -- Added a new alerter "post" based on "simple" which makes POSTS JSON to HTTP endpoints -- Added an option jira_bump_after_inacitivty to prevent ElastAlert commenting on active JIRA tickets - -### Removed -- Removed "simple" alerter, replaced by "post" - -## v0.1.17 - -### Added -- Added a --patience flag to allow Elastalert to wait for Elasticsearch to become available -- Allow custom PagerDuty alert titles via alert_subject - -## v0.1.16 - -### Fixed -- Fixed a bug where JIRA titles might not use query_key values -- Fixed a bug where flatline alerts don't respect query_key for realert -- Fixed a typo "twilio_accout_sid" - -### Added -- Added support for env variables in kibana4 dashboard links -- Added ca_certs option for custom CA support - -## v0.1.15 - -### Fixed -- Fixed a bug where Elastalert would crash on connection error during startup -- Fixed some typos in documentation -- Fixed a bug in metric bucket offset calculation -- Fixed a TypeError in Service Now alerter - -### Added -- Added support for compound compare key in change rules -- Added support for absolute paths in rule config imports -- Added Microsoft Teams alerter -- Added support for markdown in Slack alerts -- Added error codes to test script -- Added support for lists in email_from_field - - -## v0.1.14 - 2017-05-11 - -### Fixed -- Twilio alerter uses the from number appropriately -- Fixed a TypeError in SNS alerter -- Some changes to requirements.txt and setup.py -- Fixed a TypeError in new term rule - -### Added -- Set a custom pagerduty incident key -- Preserve traceback in most exceptions - -## v0.1.12 - 2017-04-21 - -### Fixed -- Fixed a bug causing filters to be ignored when using Elasticsearch 5 - - -## v0.1.11 - 2017-04-19 - -### Fixed -- Fixed an issue that would cause filters starting with "query" to sometimes throw errors in ES5 -- Fixed a bug with multiple versions of ES on different rules -- Fixed a possible KeyError when using use_terms_query with ES5 - -## v0.1.10 - 2017-04-17 - -### Fixed -- Fixed an AttributeError occuring with older versions of Elasticsearch library -- Made example rules more consistent and with unique names -- Fixed an error caused by a typo when es_username is used - -## v0.1.9 - 2017-04-14 - -### Added -- Added a changelog -- Added metric aggregation rule type -- Added percentage match rule type -- Added default doc style and improved the instructions -- Rule names will default to the filename -- Added import keyword in rules to include sections from other files -- Added email_from_field option to derive the recipient from a field in the match -- Added simple HTTP alerter -- Added Exotel SMS alerter -- Added a readme link to third party Kibana plugin -- Added option to use env variables to configure some settings -- Added duplicate hits count in log line - -### Fixed -- Fixed a bug in change rule where a boolean false would be ignored -- Clarify documentation on format of alert_text_args and alert_text_kw -- Fixed a bug preventing new silence stashes from being loaded after a rule has previous alerted -- Changed the default es_host in elastalert-test-rule to localhost -- Fixed a bug preventing ES <5.0 formatted queries working in elastalert-test-rule -- Fixed top_count_keys adding .raw on ES >5.0, uses .keyword instead -- Fixed a bug causing compound aggregation keys not to work -- Better error reporting for the Jira alerter -- AWS request signing now refreshes credentials, uses boto3 -- Support multiple ES versions on different rules -- Added documentation for percentage match rule type - -### Removed -- Removed a feature that would disable writeback_es on errors, causing various issues diff --git a/chart/elastalert2/.helmignore b/chart/elastalert2/.helmignore new file mode 100644 index 00000000..f0c13194 --- /dev/null +++ b/chart/elastalert2/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/chart/elastalert2/Chart.yaml b/chart/elastalert2/Chart.yaml new file mode 100644 index 00000000..23f24baf --- /dev/null +++ b/chart/elastalert2/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +description: Automated rule-based alerting for Elasticsearch +name: elastalert2 +version: 2.0.3 +appVersion: 2.0.3 +home: https://github.com/jertel/elastalert2 +sources: +- https://github.com/jertel/elastalert2 +maintainers: + - name: jertel + email: jertel@codesim.com +engine: gotpl diff --git a/chart/elastalert2/README.md b/chart/elastalert2/README.md new file mode 100644 index 00000000..86132ff5 --- /dev/null +++ b/chart/elastalert2/README.md @@ -0,0 +1,90 @@ + +# Elastalert 2 Helm Chart for Kubernetes + +An Elastalert 2 helm chart is available in the jertel Helm repository, and can be installed into an existing Kubernetes cluster by following the instructions below. + +## Installing the Chart + +Add the jertel repository to your Helm configuration: + +```console +helm repo add jertel https://helm.jertel.com +``` + +Next, install the chart with a release name, such as _elastalert2_: + +```console +helm install elastalert2 jertel/elastalert2 +``` + +The command deploys Elastalert 2 on the Kubernetes cluster in the default configuration. The [configuration](#configuration) section lists the parameters that can be configured during installation. + +See the comment in the default `values.yaml` for specifying a `writebackIndex` for ES 5.x. + +If necessary, open Dev Tools on Kibana and send the below request to avoid errors like `RequestError: TransportError(400, u'search_phase_execution_exception', u'No mapping found for [alert_time] in order to sort on')` + +``` +PUT /elastalert/_mapping/elastalert +{ + "properties": { + "alert_time": {"type": "date"} + } +} +``` + +## Uninstalling the Chart + +To uninstall/delete the Elastalert 2 deployment: + +```console +helm delete elastalert2 --purge +``` + +The command removes all the Kubernetes components associated with the chart and deletes the release. + +## Configuration + +| Parameter | Description | Default | +|----------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------|---------------------------------| +| `image.repository` | docker image | jertel/elastalert-docker | +| `image.tag` | docker image tag | 2.0.3 | +| `image.pullPolicy` | image pull policy | IfNotPresent | +| `podAnnotations` | Annotations to be added to pods | {} | +| `command` | command override for container | `NULL` | +| `args` | args override for container | `NULL` | +| `replicaCount` | number of replicas to run | 1 | +| `elasticsearch.host` | elasticsearch endpoint to use | elasticsearch | +| `elasticsearch.port` | elasticsearch port to use | 9200 | +| `elasticsearch.useSsl` | whether or not to connect to es_host using SSL | False | +| `elasticsearch.username` | Username for ES with basic auth | `NULL` | +| `elasticsearch.password` | Password for ES with basic auth | `NULL` | +| `elasticsearch.credentialsSecret` | Specifies an existing secret to be used for the ES username/password auth | `NULL` | +| `elasticsearch.credentialsSecretUsernameKey` | The key in elasticsearch.credentialsSecret that stores the ES password auth | `NULL` | +| `elasticsearch.credentialsSecretPasswordKey` | The key in elasticsearch.credentialsSecret that stores the ES username auth | `NULL` | +| `elasticsearch.verifyCerts` | whether or not to verify TLS certificates | True | +| `elasticsearch.clientCert` | path to a PEM certificate to use as the client certificate | /certs/client.pem | +| `elasticsearch.clientKey` | path to a private key file to use as the client key | /certs/client-key.pem | +| `elasticsearch.caCerts` | path to a CA cert bundle to use to verify SSL connections | /certs/ca.pem | +| `elasticsearch.certsVolumes` | certs volumes, required to mount ssl certificates when elasticsearch has tls enabled | `NULL` | +| `elasticsearch.certsVolumeMounts` | mount certs volumes, required to mount ssl certificates when elasticsearch has tls enabled | `NULL` | +| `extraConfigOptions` | Additional options to propagate to all rules, cannot be `alert`, `type`, `name` or `index` | `{}` | +| `secretConfigName` | name of the secret which holds the Elastalert config. **Note:** this will completely overwrite the generated config | `NULL` | +| `secretRulesName` | name of the secret which holds the Elastalert rules. **Note:** this will overwrite the generated rules | `NULL` | +| `secretRulesList` | a list of rules to enable from the secret | [] | +| `optEnv` | Additional pod environment variable definitions | [] | +| `extraVolumes` | Additional volume definitions | [] | +| `extraVolumeMounts` | Additional volumeMount definitions | [] | +| `serviceAccount.create` | Specifies whether a service account should be created. | `true` | +| `serviceAccount.name` | Service account to be used. If not set and `serviceAccount.create` is `true`, a name is generated using the fullname template | | +| `serviceAccount.annotations` | ServiceAccount annotations | | +| `podSecurityPolicy.create` | Create pod security policy resources | `false` | +| `resources` | Container resource requests and limits | {} | +| `rules` | Rule and alert configuration for Elastalert | {} example shown in values.yaml | +| `runIntervalMins` | Default interval between alert checks, in minutes | 1 | +| `realertIntervalMins` | Time between alarms for same rule, in minutes | `NULL` | +| `alertRetryLimitMins` | Time to retry failed alert deliveries, in minutes | 2880 (2 days) | +| `bufferTimeMins` | Default rule buffer time, in minutes | 15 | +| `writebackIndex` | Name or prefix of elastalert index(es) | elastalert | +| `nodeSelector` | Node selector for deployment | {} | +| `affinity` | Affinity specifications for the deployed pod(s) | {} | +| `tolerations` | Tolerations for deployment | [] | diff --git a/chart/elastalert2/templates/NOTES.txt b/chart/elastalert2/templates/NOTES.txt new file mode 100644 index 00000000..7b1c2985 --- /dev/null +++ b/chart/elastalert2/templates/NOTES.txt @@ -0,0 +1 @@ +1. Elastalert is now running against: {{ .Values.elasticsearch.host }}:{{ .Values.elasticsearch.port }} \ No newline at end of file diff --git a/chart/elastalert2/templates/_helpers.tpl b/chart/elastalert2/templates/_helpers.tpl new file mode 100644 index 00000000..2fbdad46 --- /dev/null +++ b/chart/elastalert2/templates/_helpers.tpl @@ -0,0 +1,36 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "elastalert.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "elastalert.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create the name of the service account to use +*/}} +{{- define "elastalert.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "elastalert.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} diff --git a/chart/elastalert2/templates/deployment.yaml b/chart/elastalert2/templates/deployment.yaml new file mode 100644 index 00000000..0d696a12 --- /dev/null +++ b/chart/elastalert2/templates/deployment.yaml @@ -0,0 +1,124 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "elastalert.fullname" . }} + labels: + app: {{ template "elastalert.name" . }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + selector: + matchLabels: + app: {{ template "elastalert.name" . }} + release: {{ .Release.Name }} + replicas: {{ .Values.replicaCount }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/config.yaml") . | sha256sum }} + checksum/rules: {{ include (print $.Template.BasePath "/rules.yaml") . | sha256sum }} +{{- if .Values.podAnnotations }} +{{ toYaml .Values.podAnnotations | indent 8 }} +{{- end }} + labels: + name: {{ template "elastalert.fullname" . }}-elastalert + app: {{ template "elastalert.name" . }} + release: {{ .Release.Name }} + spec: + serviceAccountName: {{ include "elastalert.serviceAccountName" . }} + containers: + - name: elastalert + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} +{{- if .Values.command }} + command: +{{ toYaml .Values.command | indent 10 }} +{{- end }} +{{- if .Values.args }} + args: +{{ toYaml .Values.args | indent 10 }} +{{- end }} + volumeMounts: + - name: config + mountPath: '/opt/config' + - name: rules + mountPath: '/opt/rules' +{{- if .Values.elasticsearch.certsVolumeMounts }} +{{ toYaml .Values.elasticsearch.certsVolumeMounts | indent 10 }} +{{- end }} +{{- if .Values.extraVolumeMounts }} +{{ toYaml .Values.extraVolumeMounts | indent 10 }} +{{- end }} + resources: +{{ toYaml .Values.resources | indent 12 }} + env: +{{- if .Values.elasticsearch.credentialsSecret }} +{{- if .Values.elasticsearch.credentialsSecretUsernameKey }} + - name: ES_USERNAME + valueFrom: + secretKeyRef: + name: {{ .Values.elasticsearch.credentialsSecret }} + key: {{ .Values.elasticsearch.credentialsSecretUsernameKey }} +{{- end }} +{{- if .Values.elasticsearch.credentialsSecretPasswordKey }} + - name: ES_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.elasticsearch.credentialsSecret }} + key: {{ .Values.elasticsearch.credentialsSecretPasswordKey }} +{{- end }} +{{- end }} +{{- if .Values.optEnv }} +{{ .Values.optEnv | toYaml | indent 10}} +{{- end }} + restartPolicy: Always +{{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} +{{- end }} +{{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} +{{- end }} +{{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} +{{- end }} + volumes: + - name: rules +{{- if .Values.secretRulesName }} + secret: + secretName: {{ .Values.secretRulesName }} + items: +{{- range $key := .Values.secretRulesList }} + - key: {{ $key }} + path: {{ $key}}.yaml +{{- end }} +{{- else }} + configMap: + name: {{ template "elastalert.fullname" . }}-rules + items: +{{- range $key, $value := .Values.rules }} + - key: {{ $key }} + path: {{ $key}}.yaml +{{- end }} +{{- end }} + - name: config +{{- if .Values.secretConfigName }} + secret: + secretName: {{ .Values.secretConfigName }} +{{- else }} + configMap: + name: {{ template "elastalert.fullname" . }}-config +{{- end }} + items: + - key: elastalert_config + path: elastalert_config.yaml +{{- if .Values.elasticsearch.certsVolumes }} +{{ toYaml .Values.elasticsearch.certsVolumes | indent 8 }} +{{- end }} +{{- if .Values.extraVolumes }} +{{ toYaml .Values.extraVolumes | indent 8 }} +{{- end }} diff --git a/chart/elastalert2/templates/podsecuritypolicy.yaml b/chart/elastalert2/templates/podsecuritypolicy.yaml new file mode 100644 index 00000000..e3777203 --- /dev/null +++ b/chart/elastalert2/templates/podsecuritypolicy.yaml @@ -0,0 +1,39 @@ +{{- if .Values.podSecurityPolicy.create }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "elastalert.fullname" . }} + labels: + app: {{ template "elastalert.name" . }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + # Prevents running in privileged mode + privileged: false + # Required to prevent escalations to root. + allowPrivilegeEscalation: false + volumes: + - configMap + - secret + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: RunAsAny + seLinux: + rule: RunAsAny + supplementalGroups: + rule: MustRunAs + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + fsGroup: + rule: MustRunAs + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + readOnlyRootFilesystem: false +{{- end }} diff --git a/chart/elastalert2/templates/role.yaml b/chart/elastalert2/templates/role.yaml new file mode 100644 index 00000000..93b9cadd --- /dev/null +++ b/chart/elastalert2/templates/role.yaml @@ -0,0 +1,20 @@ +{{- if .Values.podSecurityPolicy.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ template "elastalert.fullname" . }} + labels: + app: {{ template "elastalert.name" . }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +rules: + - apiGroups: + - policy + resources: + - podsecuritypolicies + resourceNames: + - {{ template "elastalert.fullname" . }} + verbs: + - use +{{- end -}} diff --git a/chart/elastalert2/templates/rolebinding.yaml b/chart/elastalert2/templates/rolebinding.yaml new file mode 100644 index 00000000..67a69d1f --- /dev/null +++ b/chart/elastalert2/templates/rolebinding.yaml @@ -0,0 +1,18 @@ +{{- if .Values.podSecurityPolicy.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ template "elastalert.fullname" . }} + labels: + app: {{ template "elastalert.name" . }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "elastalert.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ template "elastalert.serviceAccountName" . }} +{{- end -}} diff --git a/chart/elastalert2/templates/rules.yaml b/chart/elastalert2/templates/rules.yaml new file mode 100644 index 00000000..1e4afd45 --- /dev/null +++ b/chart/elastalert2/templates/rules.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "elastalert.fullname" . }}-rules + labels: + app: {{ template "elastalert.name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: +{{- range $key, $value := .Values.rules }} +{{ $key | indent 2}}: |- +{{ $value | indent 4}} +{{- end }} diff --git a/chart/elastalert2/templates/serviceaccount.yaml b/chart/elastalert2/templates/serviceaccount.yaml new file mode 100644 index 00000000..dc1e08c5 --- /dev/null +++ b/chart/elastalert2/templates/serviceaccount.yaml @@ -0,0 +1,15 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "elastalert.serviceAccountName" . }} + labels: + app: {{ template "elastalert.name" . }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end -}} diff --git a/chart/elastalert2/values.yaml b/chart/elastalert2/values.yaml new file mode 100644 index 00000000..d54f54bd --- /dev/null +++ b/chart/elastalert2/values.yaml @@ -0,0 +1,228 @@ +# number of replicas to run +replicaCount: 1 + +# number of helm release revisions to retain +revisionHistoryLimit: 5 + +# Default internal between alert checks against the elasticsearch datasource, in minutes +runIntervalMins: 1 + +# Default rule buffer duration, in minutes +bufferTimeMins: 15 + +# Amount of time to retry and deliver failed alerts (1440 minutes per day) +alertRetryLimitMins: 2880 + +# Default time before realerting, in minutes +realertIntervalMins: "" + +# For ES 5: The name of the index which stores elastalert 2 statuses, typically elastalert_status +# For ES 6: The prefix of the names of indices which store elastalert 2 statuses, typically elastalert +# +writebackIndex: elastalert + +image: + # docker image + repository: jertel/elastalert2 + # docker image tag + tag: 2.0.3 + pullPolicy: IfNotPresent + +resources: {} + +# Annotations to be added to pods +podAnnotations: {} + +elasticsearch: + # elasticsearch endpoint e.g. (svc.namespace||svc) + host: elasticsearch + # elasticsearch port + port: 9200 + # whether or not to connect to es_host using TLS + useSsl: "False" + # Username if authenticating to ES with basic auth + username: "" + # Password if authenticating to ES with basic auth + password: "" + # Specifies an existing secret to be used for the ES username/password + credentialsSecret: "" + # The key in elasticsearch.credentialsSecret that stores the ES password + credentialsSecretUsernameKey: "" + # The key in elasticsearch.credentialsSecret that stores the ES username + credentialsSecretPasswordKey: "" + # whether or not to verify TLS certificates + verifyCerts: "True" + # Enable certificate based authentication + # path to a PEM certificate to use as the client certificate + # clientCert: "/certs/client.pem" + # path to a private key file to use as the client key + # clientKey: "/certs/client-key.pem" + # path to a CA cert bundle to use to verify SSL connections + # caCerts: "/certs/ca.pem" + # # certs volumes, required to mount ssl certificates when elasticsearch has tls enabled + # certsVolumes: + # - name: es-certs + # secret: + # defaultMode: 420 + # secretName: es-certs + # # mount certs volumes, required to mount ssl certificates when elasticsearch has tls enabled + # certsVolumeMounts: + # - name: es-certs + # mountPath: /certs + # readOnly: true + +# Optional env variables for the pod +optEnv: [] + +extraConfigOptions: {} + # # Options to propagate to all rules, e.g. a common slack_webhook_url or kibana_url + # # Please note at the time of implementing this value, it will not work for required_locals + # # Which MUST be set at the rule level, these are: ['alert', 'type', 'name', 'index'] + # generate_kibana_link: true + # kibana_url: https://kibana.yourdomain.com + # slack_webhook_url: dummy + +# To load Elastalert 2 config via secret, uncomment the line below +# secretConfigName: elastalert-config-secret + +# Example of a secret config + +#apiVersion: v1 +#kind: Secret +#metadata: +# name: elastalert-config-secret +#type: Opaque +#stringData: +# elastalert_config: |- +# rules_folder: /opt/rules +# scan_subdirectories: false +# run_every: +# minutes: 1 +# buffer_time: +# minutes: 15 +# es_host: elasticsearch +# es_port: 9200 +# writeback_index: elastalert +# use_ssl: False +# verify_certs: True +# alert_time_limit: +# minutes: 2880 +# slack_webhook_url: https://hooks.slack.com/services/xxxx +# slack_channel_override: '#alerts' + + +# To load Elastalert's rules via secret, uncomment the line below +#secretRulesName: elastalert-rules-secret + +# Additionally, you must specificy which rules to load from the secret +#secretRulesList: [ "rule_1", "rule_2" ] + +# Example of secret rules + +#apiVersion: v1 +#kind: Secret +#metadata: +# name: elastalert-rules-secret +# namespace: elastic-system +#type: Opaque +#stringData: +# rule_1: |- +# name: Rule 1 +# type: frequency +# index: index1-* +# num_events: 3 +# timeframe: +# minutes: 1 +# alert: +# - "slack" +# rule_2: |- +# name: Rule 2 +# type: frequency +# index: index2-* +# num_events: 5 +# timeframe: +# minutes: 10 +# alert: +# - "slack" + +# Command and args override for container e.g. (https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/) +# command: ["YOUR_CUSTOM_COMMAND"] +# args: ["YOUR", "CUSTOM", "ARGS"] + +# rule configurations e.g. (http://elastalert2.readthedocs.io/en/latest/) +rules: {} + # deadman_slack: |- + # --- + # name: Deadman Switch Slack + # type: frequency + # index: containers-* + # num_events: 3 + # timeframe: + # minutes: 3 + # filter: + # - term: + # message: "deadmanslack" + # alert: + # - "slack" + # slack: + # slack_webhook_url: dummy + # deadman_pagerduty: |- + # --- + # name: Deadman Switch PagerDuty + # type: frequency + # index: containers-* + # num_events: 3 + # timeframe: + # minutes: 3 + # filter: + # - term: + # message: "deadmanpd" + # alert: + # - "pagerduty" + # pagerduty: + # pagerduty_service_key: dummy + # pagerduty_client_name: Elastalert Deadman Switch + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: + +# Enable pod security policy +# https://kubernetes.io/docs/concepts/policy/pod-security-policy/ +podSecurityPolicy: + create: false + +# Support using node selectors and tolerations +# nodeSelector: +# "node-role.kubernetes.io/infra_worker": "true" +nodeSelector: {} + +# Specify node affinity or anti-affinity specifications +affinity: {} + +# tolerations: +# - key: "node_role" +# operator: "Equal" +# value: "infra_worker" +# effect: "NoSchedule" +tolerations: [] + +extraVolumes: [] + # - name: smtp-auth + # secret: + # secretName: elastalert-smtp-auth + # items: + # - key: smtp_auth.yaml + # path: smtp_auth.yaml + # mode: 0400 + +extraVolumeMounts: [] + # - name: smtp-auth + # mountPath: /opt/config-smtp/smtp_auth.yaml + # subPath: smtp_auth.yaml + # readOnly: true diff --git a/config.yaml.example b/config.yaml.example index 958e40a8..973a3e9d 100644 --- a/config.yaml.example +++ b/config.yaml.example @@ -54,7 +54,6 @@ es_port: 9200 # Use SSL authentication with client certificates client_cert must be # a pem file containing both cert and key for client -#verify_certs: True #ca_certs: /path/to/cacert.pem #client_cert: /path/to/client_cert.pem #client_key: /path/to/client_key.key @@ -84,38 +83,38 @@ alert_time_limit: # logline: # format: '%(asctime)s %(levelname)+8s %(name)+20s %(message)s' # -# handlers: -# console: -# class: logging.StreamHandler -# formatter: logline -# level: DEBUG -# stream: ext://sys.stderr +# handlers: +# console: +# class: logging.StreamHandler +# formatter: logline +# level: DEBUG +# stream: ext://sys.stderr # -# file: -# class : logging.FileHandler -# formatter: logline -# level: DEBUG -# filename: elastalert.log +# file: +# class : logging.FileHandler +# formatter: logline +# level: DEBUG +# filename: elastalert.log # -# loggers: -# elastalert: -# level: WARN -# handlers: [] -# propagate: true +# loggers: +# elastalert: +# level: WARN +# handlers: [] +# propagate: true # -# elasticsearch: -# level: WARN -# handlers: [] -# propagate: true +# elasticsearch: +# level: WARN +# handlers: [] +# propagate: true # -# elasticsearch.trace: -# level: WARN -# handlers: [] -# propagate: true +# elasticsearch.trace: +# level: WARN +# handlers: [] +# propagate: true # -# '': # root logger -# level: WARN -# handlers: -# - console -# - file -# propagate: false +# '': # root logger +# level: WARN +# handlers: +# - console +# - file +# propagate: false diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..eab45524 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,12 @@ +# Documentation + +You can read this documentation at [Read The Docs][0]. + +To build a local version of these docs, the following from within the `/docs` directory: + +``` +pip install m2r2 sphinx_rtd_theme sphinx +make html +``` + +You can then view the generated HTML in from within the `build/` folder. diff --git a/docs/source/conf.py b/docs/source/conf.py index 80a76ed1..4a7ac542 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,13 +19,13 @@ # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [] +extensions = ["m2r2"] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ['.rst', '.md'] # The encoding of source files. # source_encoding = 'utf-8' @@ -62,6 +62,7 @@ # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = [] +exclude_patterns = ['recipes/*.md'] # The reST default role (used for this markup: `text`) to use for all documents. # default_role = None diff --git a/docs/source/elastalert.rst b/docs/source/elastalert.rst index b1008c3c..6b5bfe31 100755 --- a/docs/source/elastalert.rst +++ b/docs/source/elastalert.rst @@ -35,14 +35,29 @@ Currently, we have support built in for these alert types: - Email - JIRA - OpsGenie -- SNS -- HipChat +- AWS SNS +- MS Teams - Slack +- Mattermost - Telegram - GoogleChat +- PagerDuty +- PagerTree +- Exotel +- Twilio +- Splunk On-Call (Formerly VictorOps) +- Gitter +- ServiceNow - Debug - Stomp +- Alerta +- HTTP POST +- Line Notify - TheHive +- Zabbix +- Discord +- Dingtalk +- Chatwork Additional rule types and alerts can be easily imported or written. (See :ref:`Writing rule types ` and :ref:`Writing alerts `) @@ -203,6 +218,10 @@ The default value is ``.raw`` for Elasticsearch 2 and ``.keyword`` for Elasticse ``skip_invalid``: If ``True``, skip invalid files instead of exiting. +``jinja_root_name``: When using a Jinja template, specify the name of the root field name in the template. The default is ``_data``. + +``jinja_template_path``: When using a Jinja template, specify filesystem path to template, this overrides the default behaviour of using alert_text as the template. + Logging ------- diff --git a/docs/source/elasticsearch_security_privileges.rst b/docs/source/elasticsearch_security_privileges.rst new file mode 100644 index 00000000..d74497f7 --- /dev/null +++ b/docs/source/elasticsearch_security_privileges.rst @@ -0,0 +1,35 @@ +Elasticsearch Security Privileges +********************************* + +While ElastAlert will just work out-of-the-box for unsecured Elasticsearch, it will need a user with a certain set of permissions to work on secure Elasticseach that allow it to read the documents, check the cluster status etc. + +SearchGuard Permissions +======================= + +The permissions in Elasticsearch are specific to the plugin being used for RBAC. However, the permissions mentioned here can be mapped easily to different plugins other than Searchguard. + +Details about SearchGuard Action Groups: https://docs.search-guard.com/latest/action-groups + + +Writeback Permissions +--------------------------- + +For the global config (which writes to the writeback index), you would need to give all permissions on the writeback indices. +In addition, some permissions related to Cluster Monitor Access are required. + +``Cluster Permissions``: CLUSTER_MONITOR, indices:data/read/scroll* + +``Index Permissions`` (Over Writeback Indices): INDICES_ALL + + +Per Rule Permissions +-------------------------- + +For per rule Elasticsearch config, you would need at least the read permissions on the index you want to query. +Detailed SearchGuard Permissions: + +``Cluster Permissions``: CLUSTER_COMPOSITE_OPS_RO + +``Index Permissions`` (Over the index the rule is querying on): READ, indices:data/read/scroll* + + diff --git a/docs/source/index.rst b/docs/source/index.rst index 4219bf13..17b15631 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -15,12 +15,14 @@ Contents: running_elastalert ruletypes elastalert_status + elasticsearch_security_privileges recipes/adding_rules recipes/adding_alerts recipes/writing_filters recipes/adding_enhancements recipes/adding_loaders recipes/signing_requests + recipes/faq Indices and Tables ================== diff --git a/docs/source/recipes/faq-md.md b/docs/source/recipes/faq-md.md new file mode 100644 index 00000000..15f87fba --- /dev/null +++ b/docs/source/recipes/faq-md.md @@ -0,0 +1,246 @@ +My rule is not getting any hits? +========== + +So you've managed to set up ElastAlert, write a rule, and run it, but nothing happens, or it says +``0 query hits``. First of all, we recommend using the command ``elastalert-test-rule rule.yaml`` to +debug. It will show you how many documents match your filters for the last 24 hours (or more, see +``--help``), and then shows you if any alerts would have fired. If you have a filter in your rule, +remove it and try again. This will show you if the index is correct and that you have at least some +documents. If you have a filter in Kibana and want to recreate it in ElastAlert, you probably want +to use a query string. Your filter will look like + +``` +filter: +- query: + query_string: + query: "foo: bar AND baz: abc*" +``` +If you receive an error that Elasticsearch is unable to parse it, it's likely the YAML is not spaced +correctly, and the filter is not in the right format. If you are using other types of filters, like +``term``, a common pitfall is not realizing that you may need to use the analyzed token. This is the +default if you are using Logstash. For example, + +``` +filter: +- term: + foo: "Test Document" +``` + +will not match even if the original value for ``foo`` was exactly "Test Document". Instead, you want +to use ``foo.raw``. If you are still having trouble troubleshooting why your documents do not match, +try running ElastAlert with ``--es_debug_trace /path/to/file.log``. This will log the queries made +to Elasticsearch in full so that you can see exactly what is happening. + +I got hits, why didn't I get an alert? +========== + +If you got logs that had ``X query hits, 0 matches, 0 alerts sent``, it depends on the ``type`` why +you didn't get any alerts. If ``type: any``, a match will occur for every hit. If you are using +``type: frequency``, ``num_events`` must occur within ``timeframe`` of each other for a match to +occur. Different rules apply for different rule types. + +If you see ``X matches, 0 alerts sent``, this may occur for several reasons. If you set +``aggregation``, the alert will not be sent until after that time has elapsed. If you have gotten an +alert for this same rule before, that rule may be silenced for a period of time. The default is one +minute between alerts. If a rule is silenced, you will see ``Ignoring match for silenced rule`` in +the logs. + +If you see ``X alerts sent`` but didn't get any alert, it's probably related to the alert +configuration. If you are using the ``--debug`` flag, you will not receive any alerts. Instead, the +alert text will be written to the console. Use ``--verbose`` to achieve the same affects without +preventing alerts. If you are using email alert, make sure you have it configured for an SMTP +server. By default, it will connect to localhost on port 25. It will also use the word "elastalert" +as the "From:" address. Some SMTP servers will reject this because it does not have a domain while +others will add their own domain automatically. See the email section in the documentation for how +to configure this. + +Why did I only get one alert when I expected to get several? +========== + +There is a setting called ``realert`` which is the minimum time between two alerts for the same +rule. Any alert that occurs within this time will simply be dropped. The default value for this is +one minute. If you want to receive an alert for every single match, even if they occur right after +each other, use + +``` +realert: + minutes: 0 +``` + +You can of course set it higher as well. + +How can I prevent duplicate alerts? +========== + +By setting ``realert``, you will prevent the same rule from alerting twice in an amount of time. + +``` +realert: + days: 1 +``` + +You can also prevent duplicates based on a certain field by using ``query_key``. For example, to +prevent multiple alerts for the same user, you might use + +``` +realert: + hours: 8 +query_key: user +``` + +Note that this will also affect the way many rule types work. If you are using ``type: frequency`` +for example, ``num_events`` for a single value of ``query_key`` must occur before an alert will be +sent. You can also use a compound of multiple fields for this key. For example, if you only wanted +to receieve an alert once for a specific error and hostname, you could use + +``` +query_key: [error, hostname] +``` + +Internally, this works by creating a new field for each document called ``field1,field2`` with a +value of ``value1,value2`` and using that as the ``query_key``. + +The data for when an alert will fire again is stored in Elasticsearch in the ``elastalert_status`` +index, with a ``_type`` of ``silence`` and also cached in memory. + +How can I change what's in the alert? +========== + +You can use the field ``alert_text`` to add custom text to an alert. By setting ``alert_text_type: +alert_text_only`` Or ``alert_text_type: alert_text_jinja``, it will be the entirety of the alert. +You can also add different fields from the alert: + +With ``alert_text_type: alert_text_jinja`` by using [Jinja2](https://pypi.org/project/Jinja2/) +Template. + +``` +alert_text_type: alert_text_jinja + +alert_text: | + Alert triggered! *({{num_hits}} Matches!)* + Something happened with {{username}} ({{email}}) + {{description|truncate}} + +``` + +- Top fields are accessible via `{{field_name}}` or `{{_data['field_name']}}`, `_data` is useful + when accessing *fields with dots in their keys*, as Jinja treat dot as a nested field. +- If `_data` conflicts with your top level data, use ``jinja_root_name`` to change its name. + +With ``alert_text_type: alert_text_only`` by using Python style string formatting and +``alert_text_args``. For example + +``` +alert_text: "Something happened with {0} at {1}" +alert_text_type: alert_text_only +alert_text_args: ["username", "@timestamp"] +``` + +You can also limit the alert to only containing certain fields from the document by using +``include``. + +``` +include: ["ip_address", "hostname", "status"] +``` + +My alert only contains data for one event, how can I see more? +========== + +If you are using ``type: frequency``, you can set the option ``attach_related: true`` and every +document will be included in the alert. An alternative, which works for every type, is +``top_count_keys``. This will show the top counts for each value for certain fields. For example, if +you have + +``` +top_count_keys: ["ip_address", "status"] +``` + +and 10 documents matched your alert, it may contain something like + +``` +ip_address: +127.0.0.1: 7 +10.0.0.1: 2 +192.168.0.1: 1 + +status: +200: 9 +500: 1 +``` + +How can I make the alert come at a certain time? +========== + +The ``aggregation`` feature will take every alert that has occured over a period of time and send +them together in one alert. You can use cron style syntax to send all alerts that have occured since +the last once by using + +``` +aggregation: + schedule: '2 4 * * mon,fri' +``` + +I have lots of documents and it's really slow, how can I speed it up? +========== + +There are several ways to potentially speed up queries. If you are using ``index: logstash-*``, +Elasticsearch will query all shards, even if they do not possibly contain data with the correct +timestamp. Instead, you can use Python time format strings and set ``use_strftime_index`` + +``` +index: logstash-%Y.%m +use_strftime_index: true +``` + +Another thing you could change is ``buffer_time``. By default, ElastAlert will query large +overlapping windows in order to ensure that it does not miss any events, even if they are indexed in +real time. In config.yaml, you can adjust ``buffer_time`` to a smaller number to only query the most +recent few minutes. + +``` +buffer_time: + minutes: 5 +``` + +By default, ElastAlert will download every document in full before processing them. Instead, you can +have ElastAlert simply get a count of the number of documents that have occured in between each +query. To do this, set ``use_count_query: true``. This cannot be used if you use ``query_key``, +because ElastAlert will not know the contents of each documents, just the total number of them. This +also reduces the precision of alerts, because all events that occur between each query will be +rounded to a single timestamp. + +If you are using ``query_key`` (a single key, not multiple keys) you can use ``use_terms_query``. +This will make ElastAlert perform a terms aggregation to get the counts for each value of a certain +field. Both ``use_terms_query`` and ``use_count_query`` also require ``doc_type`` to be set to the +``_type`` of the documents. They may not be compatible with all rule types. + +Can I perform aggregations? +========== + +The only aggregation supported currently is a terms aggregation, by setting ``use_terms_query``. + +I'm not using @timestamp, what do I do? +========== + +You can use ``timestamp_field`` to change which field ElastAlert will use as the timestamp. You can +use ``timestamp_type`` to change it between ISO 8601 and unix timestamps. You must have some kind of +timestamp for ElastAlert to work. If your events are not in real time, you can use ``query_delay`` +and ``buffer_time`` to adjust when ElastAlert will look for documents. + +I'm using flatline but I don't see any alerts +========== + +When using ``type: flatline``, ElastAlert must see at least one document before it will alert you +that it has stopped seeing them. + +How can I get a "resolve" event? +========== + +ElastAlert does not currently support stateful alerts or resolve events. + +Can I set a warning threshold? +========== + +Currently, the only way to set a warning threshold is by creating a second rule with a lower +threshold. + diff --git a/docs/source/recipes/faq.rst b/docs/source/recipes/faq.rst new file mode 100644 index 00000000..34a632f6 --- /dev/null +++ b/docs/source/recipes/faq.rst @@ -0,0 +1,6 @@ +.. _faq: + +Frequently Asked Questions +-------------------------- + +.. mdinclude:: faq-md.md diff --git a/docs/source/ruletypes.rst b/docs/source/ruletypes.rst index 2e26b0ca..cb4d0196 100644 --- a/docs/source/ruletypes.rst +++ b/docs/source/ruletypes.rst @@ -106,6 +106,8 @@ Rule Configuration Cheat Sheet +--------------------------------------------------------------+ | | ``scan_entire_timeframe`` (bool, default False) | | +--------------------------------------------------------------+ | +| ``query_timezone`` (string, default empty string) | | ++--------------------------------------------------------------+ | | ``import`` (string) | | | | | | IGNORED IF ``use_count_query`` or ``use_terms_query`` is true| | @@ -144,7 +146,7 @@ Rule Configuration Cheat Sheet +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ | ``ignore_null`` (boolean, no default) | | | Req | Req | | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ -| ``query_key`` (string, no default) | Opt | | | Req | Opt | Opt | Opt | Req | Opt | +| ``query_key`` (string or list, no default) | Opt | | | Req | Opt | Opt | Opt | Req | Opt | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ | ``aggregation_key`` (string, no default) | Opt | | | | | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ @@ -164,7 +166,7 @@ Rule Configuration Cheat Sheet | | | | | | | | | | | |``doc_type`` (string, no default) | | | | | | | | | | | | | | | | | | | | | -|``query_key`` (string, no default) | | | | | | | | | | +|``query_key`` (string or list, no default) | | | | | | | | | | | | | | | | | | | | | |``terms_size`` (int, default 50) | | | | | | | | | | +----------------------------------------------------+--------+-----------+-----------+--------+-----------+-------+----------+--------+-----------+ @@ -568,9 +570,9 @@ The currently supported versions of Kibana Discover are: - `5.6` - `6.0`, `6.1`, `6.2`, `6.3`, `6.4`, `6.5`, `6.6`, `6.7`, `6.8` -- `7.0`, `7.1`, `7.2`, `7.3` +- `7.0`, `7.1`, `7.2`, `7.3`, `7.4`, `7.5`, `7.6`, `7.7`, `7.8`, `7.9`, `7.10`, `7.11`, `7.12` -``kibana_discover_version: '7.3'`` +``kibana_discover_version: '7.12'`` kibana_discover_index_pattern_id ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -710,6 +712,13 @@ scan the same range again, triggering duplicate alerts. Some rules and alerts require additional options, which also go in the top level of the rule configuration file. +query_timezone +^^^^^^^^^^^^^^ + +``query_timezone``: Whether to convert UTC time to the specified time zone in rule queries. +If not set, start and end time of query will be used UTC. (Optional, string, default empty string) + +Example value : query_timezone: "Europe/Istanbul" .. _testing : @@ -953,7 +962,7 @@ Optional: ``field_value``: When set, uses the value of the field in the document and not the number of matching documents. This is useful to monitor for example a temperature sensor and raise an alarm if the temperature grows too fast. Note that the means of the field on the reference and current windows are used to determine if the ``spike_height`` value is reached. -Note also that the threshold parameters are ignored in this smode. +Note also that the threshold parameters are ignored in this mode. ``threshold_ref``: The minimum number of events that must exist in the reference window for an alert to trigger. For example, if @@ -1376,9 +1385,30 @@ There are several ways to format the body text of the various types of events. I top_counts = top_counts_header, LF, top_counts_value field_values = Field, ": ", Value -Similarly to ``alert_subject``, ``alert_text`` can be further formatted using standard Python formatting syntax. +Similarly to ``alert_subject``, ``alert_text`` can be further formatted using Jinja2 Templates or Standard Python Formatting Syntax + +1. Jinja Template + +By setting ``alert_text_type: alert_text_jinja`` you can use jinja2 templates in ``alert_text``. :: + + alert_text_type: alert_text_jinja + + alert_text: | + Alert triggered! *({{num_hits}} Matches!)* + Something happened with {{username}} ({{email}}) + {{description|truncate}} + +Top fields are accessible via `{{field_name}}` or `{{_data['field_name']}}`, `_data` is useful when accessing *fields with dots in their keys*, as Jinja treat dot as a nested field. +If `_data` conflicts with your top level data, use ``jinja_root_name`` to change its name. + +2. Standard Python Formatting Syntax + The field names whose values will be used as the arguments can be passed with ``alert_text_args`` or ``alert_text_kw``. -You may also refer to any top-level rule property in the ``alert_subject_args``, ``alert_text_args``, ``alert_missing_value``, and ``alert_text_kw fields``. However, if the matched document has a key with the same name, that will take preference over the rule property. +You may also refer to any top-level rule property in the ``alert_subject_args``, ``alert_text_args``, ``alert_missing_value``, and ``alert_text_kw fields``. However, if the matched document has a key with the same name, that will take preference over the rule property. :: + + alert_text: "Something happened with {0} at {1}" + alert_text_type: alert_text_only + alert_text_args: ["username", "@timestamp"] By default:: @@ -1398,6 +1428,14 @@ With ``alert_text_type: alert_text_only``:: alert_text + +With ``alert_text_type: alert_text_jinja``:: + + body = rule_name + + alert_text + + With ``alert_text_type: exclude_fields``:: body = rule_name @@ -1466,6 +1504,18 @@ Example usage using new-style format:: command: ["/bin/send_alert", "--username", "{match[username]}"] +Datadog +~~~~~~~ + +This alert will create a [Datadog Event](https://docs.datadoghq.com/events/). Events are limited to 4000 characters. If an event is sent that contains +a message that is longer than 4000 characters, only his first 4000 characters will be displayed. + +This alert requires two additional options: + +``datadog_api_key``: [Datadog API key](https://docs.datadoghq.com/account_management/api-app-keys/#api-keys) + +``datadog_app_key``: [Datadog application key](https://docs.datadoghq.com/account_management/api-app-keys/#application-keys) + Email ~~~~~ @@ -1516,7 +1566,7 @@ by the smtp server. ``bcc``: This adds the BCC emails to the list of recipients but does not show up in the email message. By default, this is left empty. ``email_format``: If set to ``html``, the email's MIME type will be set to HTML, and HTML content should correctly render. If you use this, -you need to put your own HTML into ``alert_text`` and use ``alert_text_type: alert_text_only``. +you need to put your own HTML into ``alert_text`` and use ``alert_text_type: alert_text_jinja`` Or ``alert_text_type: alert_text_only``. Jira ~~~~ @@ -1543,6 +1593,8 @@ For an example JIRA account file, see ``example_rules/jira_acct.yaml``. The acco Optional: +``jira_assignee``: Assigns an issue to a user. + ``jira_component``: The name of the component or components to set the ticket to. This can be a single string or a list of strings. This is provided for backwards compatibility and will eventually be deprecated. It is preferable to use the plural ``jira_components`` instead. ``jira_components``: The name of the component or components to set the ticket to. This can be a single string or a list of strings. @@ -1635,7 +1687,7 @@ OpsGenie alerter will create an alert which can be used to notify Operations peo integration must be created in order to acquire the necessary ``opsgenie_key`` rule variable. Currently the OpsGenieAlerter only creates an alert, however it could be extended to update or close existing alerts. -It is necessary for the user to create an OpsGenie Rest HTTPS API `integration page `_ in order to create alerts. +It is necessary for the user to create an OpsGenie Rest HTTPS API `integration page `_ in order to create alerts. The OpsGenie alert requires one option: @@ -1644,10 +1696,10 @@ The OpsGenie alert requires one option: Optional: ``opsgenie_account``: The OpsGenie account to integrate with. - +``opsgenie_addr``: The OpsGenie URL to to connect against, default is ``https://api.opsgenie.com/v2/alerts`` ``opsgenie_recipients``: A list OpsGenie recipients who will be notified by the alert. ``opsgenie_recipients_args``: Map of arguments used to format opsgenie_recipients. -``opsgenie_default_recipients``: List of default recipients to notify when the formatting of opsgenie_recipients is unsuccesful. +``opsgenie_default_receipients``: List of default recipients to notify when the formatting of opsgenie_recipients is unsuccesful. ``opsgenie_teams``: A list of OpsGenie teams to notify (useful for schedules with escalation). ``opsgenie_teams_args``: Map of arguments used to format opsgenie_teams (useful for assigning the alerts to teams based on some data) ``opsgenie_default_teams``: List of default teams to notify when the formatting of opsgenie_teams is unsuccesful. @@ -1665,6 +1717,8 @@ Optional: ``opsgenie_details``: Map of custom key/value pairs to include in the alert's details. The value can sourced from either fields in the first match, environment variables, or a constant value. +``opsgenie_proxy``: By default ElastAlert will not use a network proxy to send notifications to OpsGenie. Set this option using ``hostname:port`` if you need to use a proxy. + Example usage:: opsgenie_details: @@ -1672,83 +1726,55 @@ Example usage:: Environment: '$VAR' # environment variable Message: { field: message } # field in the first match -SNS -~~~ +AWS SNS +~~~~~~~ -The SNS alerter will send an SNS notification. The body of the notification is formatted the same as with other alerters. -The SNS alerter uses boto3 and can use credentials in the rule yaml, in a standard AWS credential and config files, or +The AWS SNS alerter will send an AWS SNS notification. The body of the notification is formatted the same as with other alerters. +The AWS SNS alerter uses boto3 and can use credentials in the rule yaml, in a standard AWS credential and config files, or via environment variables. See http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html for details. -SNS requires one option: +AWS SNS requires one option: ``sns_topic_arn``: The SNS topic's ARN. For example, ``arn:aws:sns:us-east-1:123456789:somesnstopic`` Optional: -``aws_access_key``: An access key to connect to SNS with. - -``aws_secret_key``: The secret key associated with the access key. - -``aws_region``: The AWS region in which the SNS resource is located. Default is us-east-1 - -``profile``: The AWS profile to use. If none specified, the default will be used. - -HipChat -~~~~~~~ - -HipChat alerter will send a notification to a predefined HipChat room. The body of the notification is formatted the same as with other alerters. - -The alerter requires the following two options: - -``hipchat_auth_token``: The randomly generated notification token created by HipChat. Go to https://XXXXX.hipchat.com/account/api and use -'Create new token' section, choosing 'Send notification' in Scopes list. - -``hipchat_room_id``: The id associated with the HipChat room you want to send the alert to. Go to https://XXXXX.hipchat.com/rooms and choose -the room you want to post to. The room ID will be the numeric part of the URL. - -``hipchat_msg_color``: The color of the message background that is sent to HipChat. May be set to green, yellow or red. Default is red. - -``hipchat_domain``: The custom domain in case you have HipChat own server deployment. Default is api.hipchat.com. - -``hipchat_ignore_ssl_errors``: Ignore TLS errors (self-signed certificates, etc.). Default is false. +``sns_aws_access_key_id``: An access key to connect to SNS with. -``hipchat_proxy``: By default ElastAlert will not use a network proxy to send notifications to HipChat. Set this option using ``hostname:port`` if you need to use a proxy. +``sns_aws_secret_access_key``: The secret key associated with the access key. -``hipchat_notify``: When set to true, triggers a hipchat bell as if it were a user. Default is true. +``sns_aws_region``: The AWS region in which the SNS resource is located. Default is us-east-1 -``hipchat_from``: When humans report to hipchat, a timestamp appears next to their name. For bots, the name is the name of the token. The from, instead of a timestamp, defaults to empty unless set, which you can do here. This is optional. +``sns_aws_profile``: The AWS profile to use. If none specified, the default will be used. -``hipchat_message_format``: Determines how the message is treated by HipChat and rendered inside HipChat applications -html - Message is rendered as HTML and receives no special treatment. Must be valid HTML and entities must be escaped (e.g.: '&' instead of '&'). May contain basic tags: a, b, i, strong, em, br, img, pre, code, lists, tables. -text - Message is treated just like a message sent by a user. Can include @mentions, emoticons, pastes, and auto-detected URLs (Twitter, YouTube, images, etc). -Valid values: html, text. -Defaults to 'html'. +Example When not using aws_profile usage:: -``hipchat_mentions``: When using a ``html`` message format, it's not possible to mentions specific users using the ``@user`` syntax. -In that case, you can set ``hipchat_mentions`` to a list of users which will be first mentioned using a single text message, then the normal ElastAlert message will be sent to Hipchat. -If set, it will mention the users, no matter if the original message format is set to HTML or text. -Valid values: list of strings. -Defaults to ``[]``. - - -Stride -~~~~~~~ - -Stride alerter will send a notification to a predefined Stride room. The body of the notification is formatted the same as with other alerters. -Simple HTML such as and tags will be parsed into a format that Stride can consume. - -The alerter requires the following two options: + alert: + - sns + sns_topic_arn: 'arn:aws:sns:us-east-1:123456789:somesnstopic' + sns_aws_access_key_id: 'XXXXXXXXXXXXXXXXXX'' + sns_aws_secret_access_key: 'YYYYYYYYYYYYYYYYYYYY' + sns_aws_region: 'us-east-1' # You must nest aws_region within your alert configuration so it is not used to sign AWS requests. + +Example When to use aws_profile usage:: -``stride_access_token``: The randomly generated notification token created by Stride. + # Create ~/.aws/credentials -``stride_cloud_id``: The site_id associated with the Stride site you want to send the alert to. + [default] + aws_access_key_id = xxxxxxxxxxxxxxxxxxxx + aws_secret_access_key = yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy -``stride_conversation_id``: The conversation_id associated with the Stride conversation you want to send the alert to. + # Create ~/.aws/config -``stride_ignore_ssl_errors``: Ignore TLS errors (self-signed certificates, etc.). Default is false. + [default] + region = us-east-1 -``stride_proxy``: By default ElastAlert will not use a network proxy to send notifications to Stride. Set this option using ``hostname:port`` if you need to use a proxy. + # alert rule setting + alert: + - sns + sns_topic_arn: 'arn:aws:sns:us-east-1:123456789:somesnstopic' + sns_aws_profile: 'default' MS Teams ~~~~~~~~ @@ -1791,19 +1817,25 @@ Optional: ElastAlert rule. Any Apple emoji can be used, see http://emojipedia.org/apple/ . If slack_icon_url_override parameter is provided, emoji is ignored. ``slack_icon_url_override``: By default ElastAlert will use the :ghost: emoji when posting to the channel. You can provide icon_url to use custom image. -Provide absolute address of the pciture, for example: http://some.address.com/image.jpg . +Provide absolute address of the pciture. ``slack_msg_color``: By default the alert will be posted with the 'danger' color. You can also use 'good' or 'warning' colors. +``slack_parse_override``: By default the notification message is escaped 'none'. You can also use 'full'. + +``slack_text_string``: Notification message you want to add. + ``slack_proxy``: By default ElastAlert will not use a network proxy to send notifications to Slack. Set this option using ``hostname:port`` if you need to use a proxy. ``slack_alert_fields``: You can add additional fields to your slack alerts using this field. Specify the title using `title` and a value for the field using `value`. Additionally you can specify whether or not this field should be a `short` field using `short: true`. +``slack_ignore_ssl_errors``: By default ElastAlert will verify SSL certificate. Set this option to False if you want to ignore SSL errors. + ``slack_title``: Sets a title for the message, this shows up as a blue text at the start of the message ``slack_title_link``: You can add a link in your Slack notification by setting this to a valid URL. Requires slack_title to be set. -``slack_timeout``: You can specify a timeout value, in seconds, for making communicating with Slac. The default is 10. If a timeout occurs, the alert will be retried next time elastalert cycles. +``slack_timeout``: You can specify a timeout value, in seconds, for making communicating with Slack. The default is 10. If a timeout occurs, the alert will be retried next time elastalert cycles. ``slack_attach_kibana_discover_url``: Enables the attachment of the ``kibana_discover_url`` to the slack notification. The config ``generate_kibana_discover_url`` must also be ``True`` in order to generate the url. Defaults to ``False``. @@ -1811,6 +1843,8 @@ Provide absolute address of the pciture, for example: http://some.address.com/im ``slack_kibana_discover_title``: The title of the Kibana Discover url attachment. Defaults to ``Discover in Kibana``. +``slack_ca_certs``: path to a CA cert bundle to use to verify SSL connections. + Mattermost ~~~~~~~~~~ @@ -1831,7 +1865,7 @@ Optional: ``mattermost_channel_override``: Incoming webhooks have a default channel, but it can be overridden. A public channel can be specified "#other-channel", and a Direct Message with "@username". ``mattermost_icon_url_override``: By default ElastAlert will use the default webhook icon when posting to the channel. You can provide icon_url to use custom image. -Provide absolute address of the picture (for example: http://some.address.com/image.jpg) or Base64 data url. +Provide absolute address of the picture or Base64 data url. ``mattermost_msg_pretext``: You can set the message attachment pretext using this option. @@ -1847,7 +1881,7 @@ Telegram alerter will send a notification to a predefined Telegram username or c The alerter requires the following two options: -``telegram_bot_token``: The token is a string along the lines of ``110201543:AAHdqTcvCH1vGWJxfSeofSAs0K5PALDsaw`` that will be required to authorize the bot and send requests to the Bot API. You can learn about obtaining tokens and generating new ones in this document https://core.telegram.org/bots#botfather +``telegram_bot_token``: The token is a string along the lines of ``110201543:AAHdqTcvCH1vGWJxfSeofSAs0K5PALDsaw`` that will be required to authorize the bot and send requests to the Bot API. You can learn about obtaining tokens and generating new ones in this document https://core.telegram.org/bots#6-botfather ``telegram_room_id``: Unique identifier for the target chat or username of the target channel using telegram chat_id (in the format "-xxxxxxxx") @@ -1857,6 +1891,10 @@ Optional: ``telegram_proxy``: By default ElastAlert will not use a network proxy to send notifications to Telegram. Set this option using ``hostname:port`` if you need to use a proxy. +``telegram_proxy_login``: The Telegram proxy auth username. + +``telegram_proxy_pass``: The Telegram proxy auth password. + GoogleChat ~~~~~~~~~~ GoogleChat alerter will send a notification to a predefined GoogleChat channel. The body of the notification is formatted the same as with other alerters. @@ -1908,7 +1946,7 @@ V2 API Options (Optional): These options are specific to the PagerDuty V2 API -See https://v2.developer.pagerduty.com/docs/send-an-event-events-api-v2 +See https://developer.pagerduty.com/docs/events-api-v2/trigger-events/ ``pagerduty_api_version``: Defaults to `v1`. Set to `v2` to enable the PagerDuty V2 Event API. @@ -1930,6 +1968,11 @@ See https://v2.developer.pagerduty.com/docs/send-an-event-events-api-v2 ``pagerduty_v2_payload_source_args``: If set, and ``pagerduty_v2_payload_source`` is a formattable string, Elastalert will format the source based on the provided array of fields from the rule or match. +``pagerduty_v2_payload_custom_details``: List of keys:values to use as the content of the custom_details payload. Example - ip:clientip will map the value from the clientip index of Elasticsearch to JSON key named ip. + +``pagerduty_v2_payload_include_all_info``: If True, this will include the entire Elasticsearch document as a custom detail field called "information" in the PagerDuty alert. + + PagerTree ~~~~~~~~~ @@ -1939,6 +1982,8 @@ The alerter requires the following options: ``pagertree_integration_url``: URL generated by PagerTree for the integration. +``pagertree_proxy``: By default ElastAlert will not use a network proxy to send notifications to PagerTree. Set this option using hostname:port if you need to use a proxy. + Exotel ~~~~~~ @@ -1950,7 +1995,7 @@ The alerter requires the following option: ``exotel_auth_token``: Auth token assosiated with your Exotel account. -If you don't know how to find your accound sid and auth token, refer - http://support.exotel.in/support/solutions/articles/3000023019-how-to-find-my-exotel-token-and-exotel-sid- +If you don't know how to find your accound sid and auth token, refer - https://support.exotel.com/support/solutions/articles/3000023019-how-to-find-my-exotel-token-and-exotel-sid ``exotel_to_number``: The phone number where you would like send the notification. @@ -1977,26 +2022,26 @@ The alerter requires the following option: ``twilio_from_number``: Your twilio phone number from which message will be sent. -VictorOps -~~~~~~~~~ +Splunk On-Call (Formerly VictorOps) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -VictorOps alerter will trigger an incident to a predefined VictorOps routing key. The body of the notification is formatted the same as with other alerters. +Splunk On-Call (Formerly VictorOps) alerter will trigger an incident to a predefined Splunk On-Call (Formerly VictorOps) routing key. The body of the notification is formatted the same as with other alerters. The alerter requires the following options: ``victorops_api_key``: API key generated under the 'REST Endpoint' in the Integrations settings. -``victorops_routing_key``: VictorOps routing key to route the alert to. +``victorops_routing_key``: Splunk On-Call (Formerly VictorOps) routing key to route the alert to. -``victorops_message_type``: VictorOps field to specify severity level. Must be one of the following: INFO, WARNING, ACKNOWLEDGEMENT, CRITICAL, RECOVERY +``victorops_message_type``: Splunk On-Call (Formerly VictorOps) field to specify severity level. Must be one of the following: INFO, WARNING, ACKNOWLEDGEMENT, CRITICAL, RECOVERY Optional: -``victorops_entity_id``: The identity of the incident used by VictorOps to correlate incidents throughout the alert lifecycle. If not defined, VictorOps will assign a random string to each alert. +``victorops_entity_id``: The identity of the incident used by Splunk On-Call (Formerly VictorOps) to correlate incidents throughout the alert lifecycle. If not defined, Splunk On-Call (Formerly VictorOps) will assign a random string to each alert. ``victorops_entity_display_name``: Human-readable name of alerting entity to summarize incidents without affecting the life-cycle workflow. -``victorops_proxy``: By default ElastAlert will not use a network proxy to send notifications to VictorOps. Set this option using ``hostname:port`` if you need to use a proxy. +``victorops_proxy``: By default ElastAlert will not use a network proxy to send notifications to Splunk On-Call (Formerly VictorOps). Set this option using ``hostname:port`` if you need to use a proxy. Gitter ~~~~~~ @@ -2021,7 +2066,7 @@ The ServiceNow alerter will create a ne Incident in ServiceNow. The body of the The alerter requires the following options: -``servicenow_rest_url``: The ServiceNow RestApi url, this will look like https://instancename.service-now.com/api/now/v1/table/incident +``servicenow_rest_url``: The ServiceNow RestApi url, this will look like https://developer.servicenow.com/dev.do#!/reference/api/orlando/rest/c_TableAPI#r_TableAPI-POST ``username``: The ServiceNow Username to access the api. @@ -2058,12 +2103,20 @@ Stomp This alert type will use the STOMP protocol in order to push a message to a broker like ActiveMQ or RabbitMQ. The message body is a JSON string containing the alert details. The default values will work with a pristine ActiveMQ installation. -Optional: +The alerter requires the following options: ``stomp_hostname``: The STOMP host to use, defaults to localhost. + ``stomp_hostport``: The STOMP port to use, defaults to 61613. + ``stomp_login``: The STOMP login to use, defaults to admin. + ``stomp_password``: The STOMP password to use, defaults to admin. + +Optional: + +``stomp_ssl``: Connect the STOMP host using TLS, defaults to False. + ``stomp_destination``: The STOMP destination to use, defaults to /queue/ALERT The stomp_destination field depends on the broker, the /queue/ALERT example is the nomenclature used by ActiveMQ. Each broker has its own logic. @@ -2072,7 +2125,7 @@ Alerta ~~~~~~ Alerta alerter will post an alert in the Alerta server instance through the alert API endpoint. -See http://alerta.readthedocs.io/en/latest/api/alert.html for more details on the Alerta JSON format. +See https://docs.alerta.io/en/latest/api/alert.html for more details on the Alerta JSON format. For Alerta 5.0 @@ -2088,6 +2141,8 @@ Optional: ``alerta_use_match_timestamp``: If true, it will use the timestamp of the first match as the ``createTime`` of the alert. otherwise, the current server time is used. +``alerta_api_skip_ssl``: Defaults to False. + ``alert_missing_value``: Text to replace any match field not found when formating strings. Defaults to ````. The following options dictate the values of the API JSON payload: @@ -2179,6 +2234,20 @@ Example usage:: http_post_headers: authorization: Basic 123dr3234 +Squadcast +~~~~~~~~~ + +Alerts can be sent to Squadcast using the `http post` method described above and Squadcast will process it and send Phone, SMS, Email and Push notifications to the relevant person(s) and let them take actions. + +Configuration variables in rules YAML file:: + + alert: post + http_post_url: + http_post_static_payload: + Title: + http_post_all_values: true + +For more details, you can refer the `Squadcast documentation `_. Alerter ~~~~~~~ @@ -2216,6 +2285,8 @@ Optional: ``hive_proxies``: Proxy configuration. +``hive_verify``: Wether or not to enable SSL certificate validation. Defaults to False. + ``hive_observable_data_mapping``: If needed, matched data fields can be mapped to TheHive observable types using python string formatting. Example usage:: @@ -2257,4 +2328,108 @@ Required: ``zbx_sender_host``: The address where zabbix server is running. ``zbx_sender_port``: The port where zabbix server is listenning. ``zbx_host``: This field setup the host in zabbix that receives the value sent by Elastalert. -``zbx_item``: This field setup the item in the host that receives the value sent by Elastalert. +``zbx_key``: This field setup the key in the host that receives the value sent by Elastalert. + + +Discord +~~~~~~~ + +Discord will send notification to a Discord application. The body of the notification is formatted the same as with other alerters. + +Required: + +``discord_webhook_url``: The webhook URL. + +Optional: + +``discord_emoji_title``: By default ElastAlert will use the ``:warning:`` emoji when posting to the channel. You can use a different emoji per ElastAlert rule. Any Apple emoji can be used, see http://emojipedia.org/apple/ . If slack_icon_url_override parameter is provided, emoji is ignored. + +``discord_proxy``: By default ElastAlert will not use a network proxy to send notifications to Discord. Set this option using hostname:port if you need to use a proxy. + +``discord_proxy_login``: The Discord proxy auth username. + +``discord_proxy_password``: The Discord proxy auth username. + +``discord_embed_color``: embed color. By default ``0xffffff``. + +``discord_embed_footer``: embed footer. + +``discord_embed_icon_url``: You can provide icon_url to use custom image. Provide absolute address of the pciture. + +Dingtalk +~~~~~~~~ + +Dingtalk will send notification to a Dingtalk application. The body of the notification is formatted the same as with other alerters. + +Required: + +``dingtalk_access_token``: Dingtalk access token. + +``dingtalk_msgtype``: Dingtalk msgtype. ``text``, ``markdown``, ``single_action_card``, ``action_card``. + +dingtalk_msgtype single_action_card Required: + +``dingtalk_single_title``: The title of a single button.. + +``dingtalk_single_url``: Jump link for a single button. + +dingtalk_msgtype action_card Required: + +``dingtalk_btns``: Button. + +dingtalk_msgtype action_card Optional: + +``dingtalk_btn_orientation``: "0": Buttons are arranged vertically "1": Buttons are arranged horizontally. + +Example msgtype : text:: + + alert: + - dingtalk + dingtalk_access_token: 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' + dingtalk_msgtype: 'text' + + +Example msgtype : markdown:: + + alert: + - dingtalk + dingtalk_access_token: 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' + dingtalk_msgtype: 'markdown' + + +Example msgtype : single_action_card:: + + alert: + - dingtalk + dingtalk_access_token: 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' + dingtalk_msgtype: 'single_action_card' + dingtalk_single_title: 'test3' + dingtalk_single_url: 'https://xxxx.xxx' + + +Example msgtype : action_card:: + + alert: + - dingtalk + dingtalk_access_token: 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' + dingtalk_msgtype: 'action_card' + dingtalk_btn_orientation: '0' + dingtalk_btns: [{'title': 'a', 'actionURL': 'https://xxxx1.xxx'}, {'title': 'b', 'actionURL': 'https://xxxx2.xxx'}] + +Chatwork +~~~~~~~~ + +Chatwork will send notification to a Chatwork application. The body of the notification is formatted the same as with other alerters. + +Required: + +``chatwork_apikey``: ChatWork API KEY. + +``chatwork_room_id``: The ID of the room you are talking to in Chatwork. How to find the room ID is the part of the number after "rid" at the end of the URL of the browser. + +Example usage:: + + alert: + - chatwork + chatwork_apikey: 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' + chatwork_room_id: 'xxxxxxxxx' diff --git a/docs/source/running_elastalert.rst b/docs/source/running_elastalert.rst index 8c87d875..1fc6eebe 100644 --- a/docs/source/running_elastalert.rst +++ b/docs/source/running_elastalert.rst @@ -10,7 +10,10 @@ Requirements - ISO8601 or Unix timestamped data - Python 3.6 - pip, see requirements.txt -- Packages on Ubuntu 14.x: python-pip python-dev libffi-dev libssl-dev +- Packages on Ubuntu 18.x: build-essential python3-pip python3.6 python3.6-dev libffi-dev libssl-dev +- Packages on Ubuntu 20.x: build-essential python3-pip python3.6 python3.6-dev libffi-dev libssl-dev +- Packages on CentOS 7.x: 'Development Tools' python3-pip python36 python3-devel python3-libs python3-setuptools libffi-devel openssl-devel +- Packages on CentOS 8.x: 'Development Tools' python3-pip python36 python3-devel python3-setuptools python3-libs libffi-devel openssl-devel Downloading and Configuring --------------------------- diff --git a/elastalert/alerts.py b/elastalert/alerts.py index d3fa7518..db398844 100644 --- a/elastalert/alerts.py +++ b/elastalert/alerts.py @@ -2,7 +2,6 @@ import copy import datetime import json -import logging import os import re import subprocess @@ -11,8 +10,9 @@ import uuid import warnings from email.mime.text import MIMEText +from email.mime.multipart import MIMEMultipart +from email.mime.image import MIMEImage from email.utils import formatdate -from html.parser import HTMLParser from smtplib import SMTP from smtplib import SMTP_SSL from smtplib import SMTPAuthenticationError @@ -63,7 +63,13 @@ def _ensure_new_line(self): def _add_custom_alert_text(self): missing = self.rule.get('alert_missing_value', '') alert_text = str(self.rule.get('alert_text', '')) - if 'alert_text_args' in self.rule: + if 'alert_text_jinja' == self.rule.get('alert_text_type'): + # Top fields are accessible via `{{field_name}}` or `{{jinja_root_name['field_name']}}` + # `jinja_root_name` dict is useful when accessing *fields with dots in their keys*, + # as Jinja treat dot as a nested field. + alert_text = self.rule.get("jinja_template").render(**self.match, + **{self.rule['jinja_root_name']: self.match}) + elif 'alert_text_args' in self.rule: alert_text_args = self.rule.get('alert_text_args') alert_text_values = [lookup_es_key(self.match, arg) for arg in alert_text_args] @@ -142,7 +148,7 @@ def __str__(self): self._add_custom_alert_text() self._ensure_new_line() - if self.rule.get('alert_text_type') != 'alert_text_only': + if self.rule.get('alert_text_type') != 'alert_text_only' and self.rule.get('alert_text_type') != 'alert_text_jinja': self._add_rule_text() self._ensure_new_line() if self.rule.get('top_count_keys'): @@ -371,7 +377,6 @@ def alert(self, matches): conn = stomp.Connection([(self.stomp_hostname, self.stomp_hostport)], use_ssl=self.stomp_ssl) - conn.start() conn.connect(self.stomp_login, self.stomp_password) # Ensures that the CONNECTED frame is received otherwise, the disconnect call will fail. time.sleep(1) @@ -406,6 +411,8 @@ class EmailAlerter(Alerter): def __init__(self, *args): super(EmailAlerter, self).__init__(*args) + self.assets_dir = self.rule.get('assets_dir', '/tmp') + self.images_dictionary = dict(zip(self.rule.get('email_image_keys', []), self.rule.get('email_image_values', []))) self.smtp_host = self.rule.get('smtp_host', 'localhost') self.smtp_ssl = self.rule.get('smtp_ssl', False) self.from_addr = self.rule.get('from_addr', 'ElastAlert') @@ -450,7 +457,17 @@ def alert(self, matches): if 'email_add_domain' in self.rule: to_addr = [name + self.rule['email_add_domain'] for name in to_addr] if self.rule.get('email_format') == 'html': - email_msg = MIMEText(body, 'html', _charset='UTF-8') + # email_msg = MIMEText(body, 'html', _charset='UTF-8') # old way + email_msg = MIMEMultipart() + msgText = MIMEText(body, 'html', _charset='UTF-8') + email_msg.attach(msgText) # Added, and edited the previous line + + for image_key in self.images_dictionary: + fp = open(os.path.join(self.assets_dir, self.images_dictionary[image_key]), 'rb') + img = MIMEImage(fp.read()) + fp.close() + img.add_header('Content-ID', '<{}>'.format(image_key)) + email_msg.attach(img) else: email_msg = MIMEText(body, _charset='UTF-8') email_msg['Subject'] = self.create_title(matches) @@ -586,7 +603,7 @@ def __init__(self, rule): msg = '%s Both have common statuses of (%s). As such, no tickets will ever be found.' % ( msg, ','.join(intersection)) msg += ' This should be simplified to use only one or the other.' - logging.warning(msg) + elastalert_logger.warning(msg) self.reset_jira_args() @@ -606,7 +623,7 @@ def set_priority(self): if self.priority is not None and self.client is not None: self.jira_args['priority'] = {'id': self.priority_ids[self.priority]} except KeyError: - logging.error("Priority %s not found. Valid priorities are %s" % (self.priority, list(self.priority_ids.keys()))) + elastalert_logger.error("Priority %s not found. Valid priorities are %s" % (self.priority, list(self.priority_ids.keys()))) def reset_jira_args(self): self.jira_args = {'project': {'key': self.project}, @@ -749,7 +766,7 @@ def find_existing_ticket(self, matches): try: issues = self.client.search_issues(jql) except JIRAError as e: - logging.exception("Error while searching for JIRA ticket using jql '%s': %s" % (jql, e)) + elastalert_logger.exception("Error while searching for JIRA ticket using jql '%s': %s" % (jql, e)) return None if len(issues): @@ -792,19 +809,19 @@ def alert(self, matches): try: self.comment_on_ticket(ticket, match) except JIRAError as e: - logging.exception("Error while commenting on ticket %s: %s" % (ticket, e)) + elastalert_logger.exception("Error while commenting on ticket %s: %s" % (ticket, e)) if self.labels: for label in self.labels: try: ticket.fields.labels.append(label) except JIRAError as e: - logging.exception("Error while appending labels to ticket %s: %s" % (ticket, e)) + elastalert_logger.exception("Error while appending labels to ticket %s: %s" % (ticket, e)) if self.transition: elastalert_logger.info('Transitioning existing ticket %s' % (ticket.key)) try: self.transition_ticket(ticket) except JIRAError as e: - logging.exception("Error while transitioning ticket %s: %s" % (ticket, e)) + elastalert_logger.exception("Error while transitioning ticket %s: %s" % (ticket, e)) if self.pipeline is not None: self.pipeline['jira_ticket'] = ticket @@ -868,7 +885,9 @@ def create_default_title(self, matches, for_search=False): if for_search: return title - title += ' - %s' % (pretty_ts(matches[0][self.rule['timestamp_field']], self.rule.get('use_local_time'))) + timestamp = matches[0].get(self.rule['timestamp_field']) + if timestamp: + title += ' - %s' % (pretty_ts(timestamp, self.rule.get('use_local_time'))) # Add count for spikes count = matches[0].get('spike_count') @@ -893,13 +912,9 @@ def __init__(self, *args): if isinstance(self.rule['command'], str): self.shell = True if '%' in self.rule['command']: - logging.warning('Warning! You could be vulnerable to shell injection!') + elastalert_logger.warning('Warning! You could be vulnerable to shell injection!') self.rule['command'] = [self.rule['command']] - self.new_style_string_format = False - if 'new_style_string_format' in self.rule and self.rule['new_style_string_format']: - self.new_style_string_format = True - def alert(self, matches): # Format the command and arguments try: @@ -935,11 +950,11 @@ class SnsAlerter(Alerter): def __init__(self, *args): super(SnsAlerter, self).__init__(*args) self.sns_topic_arn = self.rule.get('sns_topic_arn', '') - self.aws_access_key_id = self.rule.get('aws_access_key_id') - self.aws_secret_access_key = self.rule.get('aws_secret_access_key') - self.aws_region = self.rule.get('aws_region', 'us-east-1') + self.sns_aws_access_key_id = self.rule.get('sns_aws_access_key_id') + self.sns_aws_secret_access_key = self.rule.get('sns_aws_secret_access_key') + self.sns_aws_region = self.rule.get('sns_aws_region', 'us-east-1') self.profile = self.rule.get('boto_profile', None) # Deprecated - self.profile = self.rule.get('aws_profile', None) + self.profile = self.rule.get('sns_aws_profile', None) def create_default_title(self, matches): subject = 'ElastAlert: %s' % (self.rule['name']) @@ -948,12 +963,15 @@ def create_default_title(self, matches): def alert(self, matches): body = self.create_alert_body(matches) - session = boto3.Session( - aws_access_key_id=self.aws_access_key_id, - aws_secret_access_key=self.aws_secret_access_key, - region_name=self.aws_region, - profile_name=self.profile - ) + if self.profile is None: + session = boto3.Session( + aws_access_key_id=self.sns_aws_access_key_id, + aws_secret_access_key=self.sns_aws_access_key_id, + region_name=self.sns_aws_region + ) + else: + session = boto3.Session(profile_name=self.profile) + sns_client = session.client('sns') sns_client.publish( TopicArn=self.sns_topic_arn, @@ -963,92 +981,6 @@ def alert(self, matches): elastalert_logger.info("Sent sns notification to %s" % (self.sns_topic_arn)) -class HipChatAlerter(Alerter): - """ Creates a HipChat room notification for each alert """ - required_options = frozenset(['hipchat_auth_token', 'hipchat_room_id']) - - def __init__(self, rule): - super(HipChatAlerter, self).__init__(rule) - self.hipchat_msg_color = self.rule.get('hipchat_msg_color', 'red') - self.hipchat_message_format = self.rule.get('hipchat_message_format', 'html') - self.hipchat_auth_token = self.rule['hipchat_auth_token'] - self.hipchat_room_id = self.rule['hipchat_room_id'] - self.hipchat_domain = self.rule.get('hipchat_domain', 'api.hipchat.com') - self.hipchat_ignore_ssl_errors = self.rule.get('hipchat_ignore_ssl_errors', False) - self.hipchat_notify = self.rule.get('hipchat_notify', True) - self.hipchat_from = self.rule.get('hipchat_from', '') - self.url = 'https://%s/v2/room/%s/notification?auth_token=%s' % ( - self.hipchat_domain, self.hipchat_room_id, self.hipchat_auth_token) - self.hipchat_proxy = self.rule.get('hipchat_proxy', None) - - def create_alert_body(self, matches): - body = super(HipChatAlerter, self).create_alert_body(matches) - - # HipChat sends 400 bad request on messages longer than 10000 characters - if self.hipchat_message_format == 'html': - # Use appropriate line ending for text/html - br = '
' - body = body.replace('\n', br) - - truncated_message = '
...(truncated)' - truncate_to = 10000 - len(truncated_message) - else: - truncated_message = '..(truncated)' - truncate_to = 10000 - len(truncated_message) - - if (len(body) > 9999): - body = body[:truncate_to] + truncated_message - - return body - - def alert(self, matches): - body = self.create_alert_body(matches) - - # Post to HipChat - headers = {'content-type': 'application/json'} - # set https proxy, if it was provided - proxies = {'https': self.hipchat_proxy} if self.hipchat_proxy else None - payload = { - 'color': self.hipchat_msg_color, - 'message': body, - 'message_format': self.hipchat_message_format, - 'notify': self.hipchat_notify, - 'from': self.hipchat_from - } - - try: - if self.hipchat_ignore_ssl_errors: - requests.packages.urllib3.disable_warnings() - - if self.rule.get('hipchat_mentions', []): - ping_users = self.rule.get('hipchat_mentions', []) - ping_msg = payload.copy() - ping_msg['message'] = "ping {}".format( - ", ".join("@{}".format(user) for user in ping_users) - ) - ping_msg['message_format'] = "text" - - response = requests.post( - self.url, - data=json.dumps(ping_msg, cls=DateTimeEncoder), - headers=headers, - verify=not self.hipchat_ignore_ssl_errors, - proxies=proxies) - - response = requests.post(self.url, data=json.dumps(payload, cls=DateTimeEncoder), headers=headers, - verify=not self.hipchat_ignore_ssl_errors, - proxies=proxies) - warnings.resetwarnings() - response.raise_for_status() - except RequestException as e: - raise EAException("Error posting to HipChat: %s" % e) - elastalert_logger.info("Alert sent to HipChat room %s" % self.hipchat_room_id) - - def get_info(self): - return {'type': 'hipchat', - 'hipchat_room_id': self.hipchat_room_id} - - class MsTeamsAlerter(Alerter): """ Creates a Microsoft Teams Conversation Message for each alert """ required_options = frozenset(['ms_teams_webhook_url', 'ms_teams_alert_summary']) @@ -1205,7 +1137,7 @@ def alert(self, matches): if self.slack_ca_certs: verify = self.slack_ca_certs else: - verify = self.slack_ignore_ssl_errors + verify = not self.slack_ignore_ssl_errors if self.slack_ignore_ssl_errors: requests.packages.urllib3.disable_warnings() payload['channel'] = channel_override @@ -1357,6 +1289,8 @@ def __init__(self, rule): self.pagerduty_v2_payload_severity = self.rule.get('pagerduty_v2_payload_severity', 'critical') self.pagerduty_v2_payload_source = self.rule.get('pagerduty_v2_payload_source', 'ElastAlert') self.pagerduty_v2_payload_source_args = self.rule.get('pagerduty_v2_payload_source_args', None) + self.pagerduty_v2_payload_custom_details = self.rule.get('pagerduty_v2_payload_custom_details', {}) + self.pagerduty_v2_payload_include_all_info = self.rule.get('pagerduty_v2_payload_include_all_info', True) if self.pagerduty_api_version == 'v2': self.url = 'https://events.pagerduty.com/v2/enqueue' @@ -1369,6 +1303,13 @@ def alert(self, matches): # post to pagerduty headers = {'content-type': 'application/json'} if self.pagerduty_api_version == 'v2': + + custom_details_payload = {'information': body} if self.pagerduty_v2_payload_include_all_info else {} + if self.pagerduty_v2_payload_custom_details: + for match in matches: + for custom_details_key, es_key in list(self.pagerduty_v2_payload_custom_details.items()): + custom_details_payload[custom_details_key] = lookup_es_key(match, es_key) + payload = { 'routing_key': self.pagerduty_service_key, 'event_action': self.pagerduty_event_type, @@ -1389,9 +1330,7 @@ def alert(self, matches): self.pagerduty_v2_payload_source_args, matches), 'summary': self.create_title(matches), - 'custom_details': { - 'information': body, - }, + 'custom_details': custom_details_payload, }, } match_timestamp = lookup_es_key(matches[0], self.rule.get('timestamp_field', '@timestamp')) @@ -1917,7 +1856,7 @@ def get_json_payload(self, match): alerta_payload_dict = { 'resource': resolve_string(self.resource, match, self.missing_text), - 'severity': self.severity, + 'severity': resolve_string(self.severity, match), 'timeout': self.timeout, 'createTime': createTime, 'type': self.type, @@ -1956,6 +1895,8 @@ def __init__(self, rule): self.post_static_payload = self.rule.get('http_post_static_payload', {}) self.post_all_values = self.rule.get('http_post_all_values', not self.post_payload) self.post_http_headers = self.rule.get('http_post_headers', {}) + self.post_ca_certs = self.rule.get('http_post_ca_certs') + self.post_ignore_ssl_errors = self.rule.get('http_post_ignore_ssl_errors', False) self.timeout = self.rule.get('http_post_timeout', 10) def alert(self, matches): @@ -1969,12 +1910,18 @@ def alert(self, matches): "Content-Type": "application/json", "Accept": "application/json;charset=utf-8" } + if self.post_ca_certs: + verify = self.post_ca_certs + else: + verify = not self.post_ignore_ssl_errors + headers.update(self.post_http_headers) proxies = {'https': self.post_proxy} if self.post_proxy else None for url in self.post_url: try: response = requests.post(url, data=json.dumps(payload, cls=DateTimeEncoder), - headers=headers, proxies=proxies, timeout=self.timeout) + headers=headers, proxies=proxies, timeout=self.timeout, + verify=verify) response.raise_for_status() except RequestException as e: raise EAException("Error posting HTTP Post alert: %s" % e) @@ -1985,99 +1932,6 @@ def get_info(self): 'http_post_webhook_url': self.post_url} -class StrideHTMLParser(HTMLParser): - """Parse html into stride's fabric structure""" - - def __init__(self): - """ - Define a couple markup place holders. - """ - self.content = [] - self.mark = None - HTMLParser.__init__(self) - - def handle_starttag(self, tag, attrs): - """Identify and verify starting tag is fabric compatible.""" - if tag == 'b' or tag == 'strong': - self.mark = dict(type='strong') - if tag == 'u': - self.mark = dict(type='underline') - if tag == 'a': - self.mark = dict(type='link', attrs=dict(attrs)) - - def handle_endtag(self, tag): - """Clear mark on endtag.""" - self.mark = None - - def handle_data(self, data): - """Construct data node for our data.""" - node = dict(type='text', text=data) - if self.mark: - node['marks'] = [self.mark] - self.content.append(node) - - -class StrideAlerter(Alerter): - """ Creates a Stride conversation message for each alert """ - - required_options = frozenset( - ['stride_access_token', 'stride_cloud_id', 'stride_conversation_id']) - - def __init__(self, rule): - super(StrideAlerter, self).__init__(rule) - - self.stride_access_token = self.rule['stride_access_token'] - self.stride_cloud_id = self.rule['stride_cloud_id'] - self.stride_conversation_id = self.rule['stride_conversation_id'] - self.stride_ignore_ssl_errors = self.rule.get('stride_ignore_ssl_errors', False) - self.stride_proxy = self.rule.get('stride_proxy', None) - self.url = 'https://api.atlassian.com/site/%s/conversation/%s/message' % ( - self.stride_cloud_id, self.stride_conversation_id) - - def alert(self, matches): - body = self.create_alert_body(matches).strip() - - # parse body with StrideHTMLParser - parser = StrideHTMLParser() - parser.feed(body) - - # Post to Stride - headers = { - 'content-type': 'application/json', - 'Authorization': 'Bearer {}'.format(self.stride_access_token) - } - - # set https proxy, if it was provided - proxies = {'https': self.stride_proxy} if self.stride_proxy else None - - # build stride json payload - # https://developer.atlassian.com/cloud/stride/apis/document/structure/ - payload = {'body': {'version': 1, 'type': "doc", 'content': [ - {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ - {'type': 'paragraph', 'content': parser.content} - ]} - ]}} - - try: - if self.stride_ignore_ssl_errors: - requests.packages.urllib3.disable_warnings() - response = requests.post( - self.url, data=json.dumps(payload, cls=DateTimeEncoder), - headers=headers, verify=not self.stride_ignore_ssl_errors, - proxies=proxies) - warnings.resetwarnings() - response.raise_for_status() - except RequestException as e: - raise EAException("Error posting to Stride: %s" % e) - elastalert_logger.info( - "Alert sent to Stride conversation %s" % self.stride_conversation_id) - - def get_info(self): - return {'type': 'stride', - 'stride_cloud_id': self.stride_cloud_id, - 'stride_converstation_id': self.stride_converstation_id} - - class LineNotifyAlerter(Alerter): """ Created a Line Notify for each alert """ required_option = frozenset(["linenotify_access_token"]) @@ -2138,11 +1992,12 @@ def alert(self, matches): alert_config = { 'artifacts': artifacts, - 'sourceRef': str(uuid.uuid4())[0:6], - 'customFields': {}, 'caseTemplate': None, + 'customFields': {}, + 'date': int(time.time()) * 1000, + 'description': self.create_alert_body(matches), + 'sourceRef': str(uuid.uuid4())[0:6], 'title': '{rule[index]}_{rule[name]}'.format(**context), - 'date': int(time.time()) * 1000 } alert_config.update(self.rule.get('hive_alert_config', {})) custom_fields = {} @@ -2154,7 +2009,10 @@ def alert(self, matches): n += 1 custom_fields[cf_key] = cf elif isinstance(alert_config_value, str): - alert_config[alert_config_field] = alert_config_value.format(**context) + alert_value = alert_config_value.format(**context) + if alert_config_field in ['severity', 'tlp']: + alert_value = int(alert_value) + alert_config[alert_config_field] = alert_value elif isinstance(alert_config_value, (list, tuple)): formatted_list = [] for element in alert_config_value: @@ -2182,3 +2040,225 @@ def get_info(self): 'type': 'hivealerter', 'hive_host': self.rule.get('hive_connection', {}).get('hive_host', '') } + + +class DiscordAlerter(Alerter): + + required_options = frozenset(['discord_webhook_url']) + + def __init__(self, rule): + super(DiscordAlerter, self).__init__(rule) + self.discord_webhook_url = self.rule['discord_webhook_url'] + self.discord_emoji_title = self.rule.get('discord_emoji_title', ':warning:') + self.discord_proxy = self.rule.get('discord_proxy', None) + self.discord_proxy_login = self.rule.get('discord_proxy_login', None) + self.discord_proxy_password = self.rule.get('discord_proxy_password', None) + self.discord_embed_color = self.rule.get('discord_embed_color', 0xffffff) + self.discord_embed_footer = self.rule.get('discord_embed_footer', None) + self.discord_embed_icon_url = self.rule.get('discord_embed_icon_url', None) + + def alert(self, matches): + body = '' + title = u'%s' % (self.create_title(matches)) + for match in matches: + body += str(BasicMatchString(self.rule, match)) + if len(matches) > 1: + body += '\n----------------------------------------\n' + if len(body) > 2047: + body = body[0:1950] + '\n *message was cropped according to discord embed description limits!* ' + + body += '```' + + proxies = {'https': self.discord_proxy} if self.discord_proxy else None + auth = HTTPProxyAuth(self.discord_proxy_login, self.discord_proxy_password) if self.discord_proxy_login else None + headers = {"Content-Type": "application/json"} + + data = {} + data["content"] = "%s %s %s" % (self.discord_emoji_title, title, self.discord_emoji_title) + data["embeds"] = [] + embed = {} + embed["description"] = "%s" % (body) + embed["color"] = (self.discord_embed_color) + + if self.discord_embed_footer: + embed["footer"] = {} + embed["footer"]["text"] = (self.discord_embed_footer) if self.discord_embed_footer else None + embed["footer"]["icon_url"] = (self.discord_embed_icon_url) if self.discord_embed_icon_url else None + else: + None + + data["embeds"].append(embed) + + try: + response = requests.post(self.discord_webhook_url, data=json.dumps(data), headers=headers, proxies=proxies, auth=auth) + warnings.resetwarnings() + response.raise_for_status() + except RequestException as e: + raise EAException("Error posting to Discord: %s. Details: %s" % (e, "" if e.response is None else e.response.text)) + + elastalert_logger.info( + "Alert sent to the webhook %s" % self.discord_webhook_url) + + def get_info(self): + return {'type': 'discord', + 'discord_webhook_url': self.discord_webhook_url} + + +class DingTalkAlerter(Alerter): + """ Creates a DingTalk room message for each alert """ + required_options = frozenset(['dingtalk_access_token', 'dingtalk_msgtype']) + + def __init__(self, rule): + super(DingTalkAlerter, self).__init__(rule) + self.dingtalk_access_token = self.rule.get('dingtalk_access_token') + self.dingtalk_webhook_url = 'https://oapi.dingtalk.com/robot/send?access_token=%s' % (self.dingtalk_access_token) + self.dingtalk_msgtype = self.rule.get('dingtalk_msgtype') + self.dingtalk_single_title = self.rule.get('dingtalk_single_title', 'elastalert') + self.dingtalk_single_url = self.rule.get('dingtalk_single_url', '') + self.dingtalk_btn_orientation = self.rule.get('dingtalk_btn_orientation', '') + self.dingtalk_btns = self.rule.get('dingtalk_btns', []) + self.dingtalk_proxy = self.rule.get('dingtalk_proxy', None) + self.dingtalk_proxy_login = self.rule.get('dingtalk_proxy_login', None) + self.dingtalk_proxy_password = self.rule.get('dingtalk_proxy_pass', None) + + def format_body(self, body): + return body.encode('utf8') + + def alert(self, matches): + title = self.create_title(matches) + body = self.create_alert_body(matches) + + proxies = {'https': self.dingtalk_proxy} if self.dingtalk_proxy else None + auth = HTTPProxyAuth(self.dingtalk_proxy_login, self.dingtalk_proxy_password) if self.dingtalk_proxy_login else None + headers = { + 'Content-Type': 'application/json', + 'Accept': 'application/json;charset=utf-8' + } + + if self.dingtalk_msgtype == 'text': + # text + payload = { + 'msgtype': self.dingtalk_msgtype, + 'text': { + 'content': body + } + } + elif self.dingtalk_msgtype == 'markdown': + # markdown + payload = { + 'msgtype': self.dingtalk_msgtype, + 'markdown': { + 'title': title, + 'text': body + } + } + elif self.dingtalk_msgtype == 'single_action_card': + # singleActionCard + payload = { + 'msgtype': 'actionCard', + 'actionCard': { + 'title': title, + 'text': body, + 'singleTitle': self.dingtalk_single_title, + 'singleURL': self.dingtalk_single_url + } + } + elif self.dingtalk_msgtype == 'action_card': + # actionCard + payload = { + 'msgtype': 'actionCard', + 'actionCard': { + 'title': title, + 'text': body + } + } + if self.dingtalk_btn_orientation != '': + payload['actionCard']['btnOrientation'] = self.dingtalk_btn_orientation + if self.dingtalk_btns: + payload['actionCard']['btns'] = self.dingtalk_btns + + try: + response = requests.post(self.dingtalk_webhook_url, data=json.dumps(payload, + cls=DateTimeEncoder), headers=headers, proxies=proxies, auth=auth) + warnings.resetwarnings() + response.raise_for_status() + except RequestException as e: + raise EAException("Error posting to dingtalk: %s" % e) + + elastalert_logger.info("Trigger sent to dingtalk") + + def get_info(self): + return { + "type": "dingtalk", + "dingtalk_webhook_url": self.dingtalk_webhook_url + } + + +class ChatworkAlerter(Alerter): + """ Creates a Chatwork room message for each alert """ + required_options = frozenset(['chatwork_apikey', 'chatwork_room_id']) + + def __init__(self, rule): + super(ChatworkAlerter, self).__init__(rule) + self.chatwork_apikey = self.rule.get('chatwork_apikey') + self.chatwork_room_id = self.rule.get('chatwork_room_id') + self.url = 'https://api.chatwork.com/v2/rooms/%s/messages' % (self.chatwork_room_id) + self.chatwork_proxy = self.rule.get('chatwork_proxy', None) + self.chatwork_proxy_login = self.rule.get('chatwork_proxy_login', None) + self.chatwork_proxy_pass = self.rule.get('chatwork_proxy_pass', None) + + def alert(self, matches): + body = self.create_alert_body(matches) + + headers = {'X-ChatWorkToken': self.chatwork_apikey} + # set https proxy, if it was provided + proxies = {'https': self.chatwork_proxy} if self.chatwork_proxy else None + auth = HTTPProxyAuth(self.chatwork_proxy_login, self.chatwork_proxy_pass) if self.chatwork_proxy_login else None + params = {'body': body} + + try: + response = requests.post(self.url, params=params, headers=headers, proxies=proxies, auth=auth) + warnings.resetwarnings() + response.raise_for_status() + except RequestException as e: + raise EAException("Error posting to Chattwork: %s. Details: %s" % (e, "" if e.response is None else e.response.text)) + + elastalert_logger.info( + "Alert sent to Chatwork room %s" % self.chatwork_room_id) + + def get_info(self): + return { + "type": "chatwork", + "chatwork_room_id": self.chatwork_room_id + } + + +class DatadogAlerter(Alerter): + ''' Creates a Datadog Event for each alert ''' + required_options = frozenset(['datadog_api_key', 'datadog_app_key']) + + def __init__(self, rule): + super(DatadogAlerter, self).__init__(rule) + self.dd_api_key = self.rule.get('datadog_api_key', None) + self.dd_app_key = self.rule.get('datadog_app_key', None) + + def alert(self, matches): + url = 'https://api.datadoghq.com/api/v1/events' + headers = { + 'Content-Type': 'application/json', + 'DD-API-KEY': self.dd_api_key, + 'DD-APPLICATION-KEY': self.dd_app_key + } + payload = { + 'title': self.create_title(matches), + 'text': self.create_alert_body(matches) + } + try: + response = requests.post(url, data=json.dumps(payload, cls=DateTimeEncoder), headers=headers) + response.raise_for_status() + except RequestException as e: + raise EAException('Error posting event to Datadog: %s' % e) + elastalert_logger.info('Alert sent to Datadog') + + def get_info(self): + return {'type': 'datadog'} diff --git a/elastalert/elastalert.py b/elastalert/elastalert.py index f1e1d6b7..9a47b191 100755 --- a/elastalert/elastalert.py +++ b/elastalert/elastalert.py @@ -27,6 +27,7 @@ from elasticsearch.exceptions import ElasticsearchException from elasticsearch.exceptions import NotFoundError from elasticsearch.exceptions import TransportError +from .prometheus_wrapper import PrometheusWrapper from . import kibana from .alerts import DebugAlerter @@ -55,6 +56,7 @@ from .util import ts_now from .util import ts_to_dt from .util import unix_to_dt +from .util import ts_utc_to_tz class ElastAlerter(object): @@ -85,6 +87,11 @@ def parse_args(self, args): parser.add_argument('--rule', dest='rule', help='Run only a specific rule (by filename, must still be in rules folder)') parser.add_argument('--silence', dest='silence', help='Silence rule for a time period. Must be used with --rule. Usage: ' '--silence =, eg. --silence hours=2') + parser.add_argument( + "--silence_qk_value", + dest="silence_qk_value", + help="Silence the rule only for this specific query key value.", + ) parser.add_argument('--start', dest='start', help='YYYY-MM-DDTHH:MM:SS Start querying from this timestamp. ' 'Use "NOW" to start from current time. (Default: present)') parser.add_argument('--end', dest='end', help='YYYY-MM-DDTHH:MM:SS Query to this timestamp. (Default: present)') @@ -107,6 +114,7 @@ def parse_args(self, args): dest='es_debug_trace', help='Enable logging from Elasticsearch queries as curl command. Queries will be logged to file. Note that ' 'this will incorrectly display localhost:9200 as the host/port') + parser.add_argument('--prometheus_port', type=int, dest='prometheus_port', help='Enables Prometheus metrics on specified port.') self.args = parser.parse_args(args) def __init__(self, args): @@ -161,6 +169,7 @@ def __init__(self, args): self.starttime = self.args.start self.disabled_rules = [] self.replace_dots_in_field_names = self.conf.get('replace_dots_in_field_names', False) + self.thread_data.alerts_sent = 0 self.thread_data.num_hits = 0 self.thread_data.num_dupes = 0 self.scheduler = BackgroundScheduler() @@ -172,13 +181,17 @@ def __init__(self, args): else: self.statsd = None self.add_metadata_alert = self.conf.get('add_metadata_alert', False) + self.prometheus_port = self.args.prometheus_port self.show_disabled_rules = self.conf.get('show_disabled_rules', True) self.writeback_es = elasticsearch_client(self.conf) remove = [] for rule in self.rules: - if not self.init_rule(rule): + if 'is_enabled' in rule and not rule['is_enabled']: + self.disabled_rules.append(rule) + remove.append(rule) + elif not self.init_rule(rule): remove.append(rule) list(map(self.rules.remove, remove)) @@ -409,7 +422,7 @@ def get_hits(self, rule, starttime, endtime, index, scroll=False): # Different versions of ES have this formatted in different ways. Fallback to str-ing the whole thing raise ElasticsearchException(str(res['_shards']['failures'])) - logging.debug(str(res)) + elastalert_logger.debug(str(res)) except ElasticsearchException as e: # Elasticsearch sometimes gives us GIGANTIC error messages # (so big that they will fill the entire terminal buffer) @@ -628,6 +641,11 @@ def run_query(self, rule, start=None, end=None, scroll=False): if end is None: end = ts_now() + if rule.get('query_timezone') != "": + elastalert_logger.info("Query start and end time converting UTC to query_timezone : {}".format(rule.get('query_timezone'))) + start = ts_utc_to_tz(start, rule.get('query_timezone')) + end = ts_utc_to_tz(end, rule.get('query_timezone')) + # Reset hit counter and query rule_inst = rule['type'] rule['scrolling_cycle'] = rule.get('scrolling_cycle', 0) + 1 @@ -852,7 +870,7 @@ def enhance_filter(self, rule): filters.append(query_str_filter) else: filters.append({'query': query_str_filter}) - logging.debug("Enhanced filter with {} terms: {}".format(listname, str(query_str_filter))) + elastalert_logger.debug("Enhanced filter with {} terms: {}".format(listname, str(query_str_filter))) def run_rule(self, rule, endtime, starttime=None): """ Run a rule for a given time period, including querying and alerting on results. @@ -879,15 +897,16 @@ def run_rule(self, rule, endtime, starttime=None): rule['original_starttime'] = rule['starttime'] rule['scrolling_cycle'] = 0 + self.thread_data.num_hits = 0 + self.thread_data.num_dupes = 0 + self.thread_data.cumulative_hits = 0 + # Don't run if starttime was set to the future if ts_now() <= rule['starttime']: - logging.warning("Attempted to use query start time in the future (%s), sleeping instead" % (starttime)) + elastalert_logger.warning("Attempted to use query start time in the future (%s), sleeping instead" % (starttime)) return 0 # Run the rule. If querying over a large time period, split it up into segments - self.thread_data.num_hits = 0 - self.thread_data.num_dupes = 0 - self.thread_data.cumulative_hits = 0 segment_size = self.get_segment_size(rule) tmp_endtime = rule['starttime'] @@ -976,7 +995,7 @@ def run_rule(self, rule, endtime, starttime=None): def init_rule(self, new_rule, new=True): ''' Copies some necessary non-config state from an exiting rule to a new rule. ''' - if not new: + if not new and self.scheduler.get_job(job_id=new_rule['name']): self.scheduler.remove_job(job_id=new_rule['name']) try: @@ -1090,12 +1109,21 @@ def load_rule_changes(self): try: new_rule = self.rules_loader.load_configuration(rule_file, self.conf) if not new_rule: - logging.error('Invalid rule file skipped: %s' % rule_file) + elastalert_logger.error('Invalid rule file skipped: %s' % rule_file) continue if 'is_enabled' in new_rule and not new_rule['is_enabled']: elastalert_logger.info('Rule file %s is now disabled.' % (rule_file)) # Remove this rule if it's been disabled self.rules = [rule for rule in self.rules if rule['rule_file'] != rule_file] + # Stop job if is running + if self.scheduler.get_job(job_id=new_rule['name']): + self.scheduler.remove_job(job_id=new_rule['name']) + # Append to disabled_rule + for disabled_rule in self.disabled_rules: + if disabled_rule['name'] == new_rule['name']: + break + else: + self.disabled_rules.append(new_rule) continue except EAException as e: message = 'Could not load rule %s: %s' % (rule_file, e) @@ -1114,7 +1142,6 @@ def load_rule_changes(self): # Re-enable if rule had been disabled for disabled_rule in self.disabled_rules: if disabled_rule['name'] == new_rule['name']: - self.rules.append(disabled_rule) self.disabled_rules.remove(disabled_rule) break @@ -1130,7 +1157,7 @@ def load_rule_changes(self): try: new_rule = self.rules_loader.load_configuration(rule_file, self.conf) if not new_rule: - logging.error('Invalid rule file skipped: %s' % rule_file) + elastalert_logger.error('Invalid rule file skipped: %s' % rule_file) continue if 'is_enabled' in new_rule and not new_rule['is_enabled']: continue @@ -1213,12 +1240,12 @@ def wait_until_responsive(self, timeout, clock=timeit.default_timer): time.sleep(1.0) if self.writeback_es.ping(): - logging.error( + elastalert_logger.error( 'Writeback alias "%s" does not exist, did you run `elastalert-create-index`?', self.writeback_alias, ) else: - logging.error( + elastalert_logger.error( 'Could not reach ElasticSearch at "%s:%d".', self.conf['es_host'], self.conf['es_port'], @@ -1312,7 +1339,7 @@ def handle_rule_execution(self, rule): # We were processing for longer than our refresh interval # This can happen if --start was specified with a large time period # or if we are running too slow to process events in real time. - logging.warning( + elastalert_logger.warning( "Querying from %s to %s took longer than %s!" % ( old_starttime, pretty_ts(endtime, rule.get('use_local_time')), @@ -1645,7 +1672,7 @@ def writeback(self, doc_type, body, rule=None, match_body=None): res = self.writeback_es.index(index=index, doc_type=doc_type, body=body) return res except ElasticsearchException as e: - logging.exception("Error writing alert info to Elasticsearch: %s" % (e)) + elastalert_logger.exception("Error writing alert info to Elasticsearch: %s" % (e)) def find_recent_pending_alerts(self, time_limit): """ Queries writeback_es to find alerts that did not send @@ -1673,7 +1700,7 @@ def find_recent_pending_alerts(self, time_limit): if res['hits']['hits']: return res['hits']['hits'] except ElasticsearchException as e: - logging.exception("Error finding recent pending alerts: %s %s" % (e, query)) + elastalert_logger.exception("Error finding recent pending alerts: %s %s" % (e, query)) return [] def send_pending_alerts(self): @@ -1751,7 +1778,7 @@ def get_aggregated_matches(self, _id): """ Removes and returns all matches from writeback_es that have aggregate_id == _id """ # XXX if there are more than self.max_aggregation matches, you have big alerts and we will leave entries in ES. - query = {'query': {'query_string': {'query': 'aggregate_id:%s' % (_id)}}, 'sort': {'@timestamp': 'asc'}} + query = {'query': {'query_string': {'query': 'aggregate_id:"%s"' % (_id)}}, 'sort': {'@timestamp': 'asc'}} matches = [] try: if self.writeback_es.is_atleastsixtwo(): @@ -1873,25 +1900,28 @@ def add_aggregated_alert(self, match, rule): def silence(self, silence_cache_key=None): """ Silence an alert for a period of time. --silence and --rule must be passed as args. """ if self.debug: - logging.error('--silence not compatible with --debug') + elastalert_logger.error('--silence not compatible with --debug') exit(1) if not self.args.rule: - logging.error('--silence must be used with --rule') + elastalert_logger.error('--silence must be used with --rule') exit(1) # With --rule, self.rules will only contain that specific rule if not silence_cache_key: - silence_cache_key = self.rules[0]['name'] + "._silence" + if self.args.silence_qk_value: + silence_cache_key = self.rules[0]['name'] + "." + self.args.silence_qk_value + else: + silence_cache_key = self.rules[0]['name'] + "._silence" try: silence_ts = parse_deadline(self.args.silence) except (ValueError, TypeError): - logging.error('%s is not a valid time period' % (self.args.silence)) + elastalert_logger.error('%s is not a valid time period' % (self.args.silence)) exit(1) if not self.set_realert(silence_cache_key, silence_ts, 0): - logging.error('Failed to save silence command to Elasticsearch') + elastalert_logger.error('Failed to save silence command to Elasticsearch') exit(1) elastalert_logger.info('Success. %s will be silenced until %s' % (silence_cache_key, silence_ts)) @@ -1952,7 +1982,7 @@ def is_silenced(self, rule_name): def handle_error(self, message, data=None): ''' Logs message at error level and writes message, data and traceback to Elasticsearch. ''' - logging.error(message) + elastalert_logger.error(message) body = {'message': message} tb = traceback.format_exc() body['traceback'] = tb.strip().split('\n') @@ -1962,7 +1992,7 @@ def handle_error(self, message, data=None): def handle_uncaught_exception(self, exception, rule): """ Disables a rule and sends a notification. """ - logging.error(traceback.format_exc()) + elastalert_logger.error(traceback.format_exc()) self.handle_error('Uncaught exception running rule %s: %s' % (rule['name'], exception), {'rule': rule['name']}) if self.disable_rules_on_error: self.rules = [running_rule for running_rule in self.rules if running_rule['name'] != rule['name']] @@ -2076,6 +2106,11 @@ def main(args=None): if not args: args = sys.argv[1:] client = ElastAlerter(args) + + if client.prometheus_port and not client.debug: + p = PrometheusWrapper(client) + p.start() + if not client.args.silence: client.start() diff --git a/elastalert/es_mappings/6/elastalert.json b/elastalert/es_mappings/6/elastalert.json index 645a6776..2cc97bcf 100644 --- a/elastalert/es_mappings/6/elastalert.json +++ b/elastalert/es_mappings/6/elastalert.json @@ -29,6 +29,7 @@ "format": "dateOptionalTime" }, "match_body": { + "enabled": "false", "type": "object" }, "aggregate_id": { diff --git a/elastalert/kibana_discover.py b/elastalert/kibana_discover.py index 7e4dbb5d..58e3476f 100644 --- a/elastalert/kibana_discover.py +++ b/elastalert/kibana_discover.py @@ -8,20 +8,21 @@ import urllib.parse from .util import EAException +from .util import elastalert_logger from .util import lookup_es_key from .util import ts_add kibana_default_timedelta = datetime.timedelta(minutes=10) kibana5_kibana6_versions = frozenset(['5.6', '6.0', '6.1', '6.2', '6.3', '6.4', '6.5', '6.6', '6.7', '6.8']) -kibana7_versions = frozenset(['7.0', '7.1', '7.2', '7.3']) +kibana7_versions = frozenset(['7.0', '7.1', '7.2', '7.3', '7.4', '7.5', '7.6', '7.7', '7.8', '7.9', '7.10', '7.11', '7.12']) def generate_kibana_discover_url(rule, match): ''' Creates a link for a kibana discover app. ''' discover_app_url = rule.get('kibana_discover_app_url') if not discover_app_url: - logging.warning( + elastalert_logger.warning( 'Missing kibana_discover_app_url for rule %s' % ( rule.get('name', '') ) @@ -30,7 +31,7 @@ def generate_kibana_discover_url(rule, match): kibana_version = rule.get('kibana_discover_version') if not kibana_version: - logging.warning( + elastalert_logger.warning( 'Missing kibana_discover_version for rule %s' % ( rule.get('name', '') ) @@ -39,7 +40,7 @@ def generate_kibana_discover_url(rule, match): index = rule.get('kibana_discover_index_pattern_id') if not index: - logging.warning( + elastalert_logger.warning( 'Missing kibana_discover_index_pattern_id for rule %s' % ( rule.get('name', '') ) @@ -70,7 +71,7 @@ def generate_kibana_discover_url(rule, match): appState = kibana_discover_app_state(index, columns, filters, query_keys, match) else: - logging.warning( + elastalert_logger.warning( 'Unknown kibana discover application version %s for rule %s' % ( kibana_version, rule.get('name', '') diff --git a/elastalert/loaders.py b/elastalert/loaders.py index 24a1cca6..0cbd0d26 100644 --- a/elastalert/loaders.py +++ b/elastalert/loaders.py @@ -2,13 +2,15 @@ import copy import datetime import hashlib -import logging import os import sys import jsonschema import yaml import yaml.scanner +from jinja2 import Template +from jinja2 import Environment +from jinja2 import FileSystemLoader from staticconf.loader import yaml_loader from . import alerts @@ -20,11 +22,13 @@ from .util import dt_to_unix from .util import dt_to_unixms from .util import EAException +from .util import elastalert_logger from .util import get_module from .util import ts_to_dt from .util import ts_to_dt_with_format from .util import unix_to_dt from .util import unixms_to_dt +from .zabbix import ZabbixAlerter class RulesLoader(object): @@ -62,8 +66,6 @@ class RulesLoader(object): 'debug': alerts.DebugAlerter, 'command': alerts.CommandAlerter, 'sns': alerts.SnsAlerter, - 'hipchat': alerts.HipChatAlerter, - 'stride': alerts.StrideAlerter, 'ms_teams': alerts.MsTeamsAlerter, 'slack': alerts.SlackAlerter, 'mattermost': alerts.MattermostAlerter, @@ -77,7 +79,14 @@ class RulesLoader(object): 'servicenow': alerts.ServiceNowAlerter, 'alerta': alerts.AlertaAlerter, 'post': alerts.HTTPPostAlerter, - 'hivealerter': alerts.HiveAlerter + 'pagertree': alerts.PagerTreeAlerter, + 'linenotify': alerts.LineNotifyAlerter, + 'hivealerter': alerts.HiveAlerter, + 'zabbix': ZabbixAlerter, + 'discord': alerts.DiscordAlerter, + 'dingtalk': alerts.DingTalkAlerter, + 'chatwork': alerts.ChatworkAlerter, + 'datadog': alerts.DatadogAlerter } # A partial ordering of alert types. Relative order will be preserved in the resulting alerts list @@ -89,6 +98,8 @@ class RulesLoader(object): base_config = {} + jinja_environment = Environment(loader=FileSystemLoader("")) + def __init__(self, conf): # schema for rule yaml self.rule_schema = jsonschema.Draft7Validator( @@ -115,10 +126,7 @@ def load(self, conf, args=None): rule = self.load_configuration(rule_file, conf, args) # A rule failed to load, don't try to process it if not rule: - logging.error('Invalid rule file skipped: %s' % rule_file) - continue - # By setting "is_enabled: False" in rule file, a rule is easily disabled - if 'is_enabled' in rule and not rule['is_enabled']: + elastalert_logger.error('Invalid rule file skipped: %s' % rule_file) continue if rule['name'] in names: raise EAException('Duplicate rule named %s' % (rule['name'])) @@ -282,6 +290,8 @@ def load_options(self, rule, conf, filename, args=None): rule.setdefault('_source_enabled', True) rule.setdefault('use_local_time', True) rule.setdefault('description', "") + rule.setdefault('jinja_root_name', "_data") + rule.setdefault('query_timezone', "") # Set timestamp_type conversion function, used when generating queries and processing hits rule['timestamp_type'] = rule['timestamp_type'].strip().lower() @@ -318,13 +328,6 @@ def _dt_to_ts_with_format(dt): rule.setdefault('client_cert', conf.get('client_cert')) rule.setdefault('client_key', conf.get('client_key')) - # Set HipChat options from global config - rule.setdefault('hipchat_msg_color', 'red') - rule.setdefault('hipchat_domain', 'api.hipchat.com') - rule.setdefault('hipchat_notify', True) - rule.setdefault('hipchat_from', '') - rule.setdefault('hipchat_ignore_ssl_errors', False) - # Make sure we have required options if self.required_locals - frozenset(list(rule.keys())): raise EAException('Missing required option(s): %s' % (', '.join(self.required_locals - frozenset(list(rule.keys()))))) @@ -396,14 +399,22 @@ def _dt_to_ts_with_format(dt): if rule.get('use_strftime_index'): for token in ['%y', '%M', '%D']: if token in rule.get('index'): - logging.warning('Did you mean to use %s in the index? ' - 'The index will be formatted like %s' % (token, - datetime.datetime.now().strftime( - rule.get('index')))) + elastalert_logger.warning('Did you mean to use %s in the index? ' + 'The index will be formatted like %s' % (token, + datetime.datetime.now().strftime( + rule.get('index')))) if rule.get('scan_entire_timeframe') and not rule.get('timeframe'): raise EAException('scan_entire_timeframe can only be used if there is a timeframe specified') + # Compile Jinja Template + if rule.get('alert_text_type') == 'alert_text_jinja': + jinja_template_path = rule.get('jinja_template_path') + if jinja_template_path: + rule["jinja_template"] = self.jinja_environment.get_or_select_template(jinja_template_path) + else: + rule["jinja_template"] = Template(str(rule.get('alert_text', ''))) + def load_modules(self, rule, args=None): """ Loads things that could be modules. Enhancements, alerts and rule type. """ # Set match enhancements @@ -488,7 +499,7 @@ def adjust_deprecated_values(rule): rule['http_post_proxy'] = rule['simple_proxy'] if 'simple_webhook_url' in rule: rule['http_post_url'] = rule['simple_webhook_url'] - logging.warning( + elastalert_logger.warning( '"simple" alerter has been renamed "post" and comptability may be removed in a future release.') diff --git a/elastalert/opsgenie.py b/elastalert/opsgenie.py index bcdaf2d0..8db52d89 100644 --- a/elastalert/opsgenie.py +++ b/elastalert/opsgenie.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import json -import logging import os.path import requests @@ -46,11 +45,11 @@ def _parse_responders(self, responders, responder_args, matches, default_respond try: formated_responders.append(responder.format(**responders_values)) except KeyError as error: - logging.warn("OpsGenieAlerter: Cannot create responder for OpsGenie Alert. Key not foud: %s. " % (error)) + elastalert_logger.warning("OpsGenieAlerter: Cannot create responder for OpsGenie Alert. Key not foud: %s. " % (error)) if not formated_responders: - logging.warn("OpsGenieAlerter: no responders can be formed. Trying the default responder ") + elastalert_logger.warning("OpsGenieAlerter: no responders can be formed. Trying the default responder ") if not default_responders: - logging.warn("OpsGenieAlerter: default responder not set. Falling back") + elastalert_logger.warning("OpsGenieAlerter: default responder not set. Falling back") formated_responders = responders else: formated_responders = default_responders @@ -90,7 +89,7 @@ def alert(self, matches): post['tags'] = self.tags if self.priority and self.priority not in ('P1', 'P2', 'P3', 'P4', 'P5'): - logging.warn("Priority level does not appear to be specified correctly. \ + elastalert_logger.warning("Priority level does not appear to be specified correctly. \ Please make sure to set it to a value between P1 and P5") else: post['priority'] = self.priority @@ -102,7 +101,7 @@ def alert(self, matches): if details: post['details'] = details - logging.debug(json.dumps(post)) + elastalert_logger.debug(json.dumps(post)) headers = { 'Content-Type': 'application/json', @@ -114,12 +113,12 @@ def alert(self, matches): try: r = requests.post(self.to_addr, json=post, headers=headers, proxies=proxies) - logging.debug('request response: {0}'.format(r)) + elastalert_logger.debug('request response: {0}'.format(r)) if r.status_code != 202: elastalert_logger.info("Error response from {0} \n " "API Response: {1}".format(self.to_addr, r)) r.raise_for_status() - logging.info("Alert sent to OpsGenie") + elastalert_logger.info("Alert sent to OpsGenie") except Exception as err: raise EAException("Error sending alert: {0}".format(err)) diff --git a/elastalert/prometheus_wrapper.py b/elastalert/prometheus_wrapper.py new file mode 100644 index 00000000..d94a3520 --- /dev/null +++ b/elastalert/prometheus_wrapper.py @@ -0,0 +1,55 @@ +import prometheus_client + + +class PrometheusWrapper: + """ Exposes ElastAlert metrics on a Prometheus metrics endpoint. + Wraps ElastAlerter run_rule and writeback to collect metrics. """ + + def __init__(self, client): + self.prometheus_port = client.prometheus_port + self.run_rule = client.run_rule + self.writeback = client.writeback + + client.run_rule = self.metrics_run_rule + client.writeback = self.metrics_writeback + + # initialize prometheus metrics to be exposed + self.prom_scrapes = prometheus_client.Counter('elastalert_scrapes', 'Number of scrapes for rule', ['rule_name']) + self.prom_hits = prometheus_client.Counter('elastalert_hits', 'Number of hits for rule', ['rule_name']) + self.prom_matches = prometheus_client.Counter('elastalert_matches', 'Number of matches for rule', ['rule_name']) + self.prom_time_taken = prometheus_client.Counter('elastalert_time_taken', 'Time taken to evaluate rule', ['rule_name']) + self.prom_alerts_sent = prometheus_client.Counter('elastalert_alerts_sent', 'Number of alerts sent for rule', ['rule_name']) + self.prom_alerts_not_sent = prometheus_client.Counter('elastalert_alerts_not_sent', 'Number of alerts not sent', ['rule_name']) + self.prom_errors = prometheus_client.Counter('elastalert_errors', 'Number of errors for rule') + self.prom_alerts_silenced = prometheus_client.Counter('elastalert_alerts_silenced', 'Number of silenced alerts', ['rule_name']) + + def start(self): + prometheus_client.start_http_server(self.prometheus_port) + + def metrics_run_rule(self, rule, endtime, starttime=None): + """ Increment counter every time rule is run """ + try: + self.prom_scrapes.labels(rule['name']).inc() + finally: + return self.run_rule(rule, endtime, starttime) + + def metrics_writeback(self, doc_type, body): + """ Update various prometheus metrics accoording to the doc_type """ + + res = self.writeback(doc_type, body) + try: + if doc_type == 'elastalert_status': + self.prom_hits.labels(body['rule_name']).inc(int(body['hits'])) + self.prom_matches.labels(body['rule_name']).inc(int(body['matches'])) + self.prom_time_taken.labels(body['rule_name']).inc(float(body['time_taken'])) + elif doc_type == 'elastalert': + if body['alert_sent']: + self.prom_alerts_sent.labels(body['rule_name']).inc() + else: + self.prom_alerts_not_sent.labels(body['rule_name']).inc() + elif doc_type == 'elastalert_error': + self.prom_errors.inc() + elif doc_type == 'silence': + self.prom_alerts_silenced.labels(body['rule_name']).inc() + finally: + return res diff --git a/elastalert/ruletypes.py b/elastalert/ruletypes.py index 2f1d2f82..42fcc95b 100644 --- a/elastalert/ruletypes.py +++ b/elastalert/ruletypes.py @@ -3,7 +3,7 @@ import datetime import sys -from blist import sortedlist +from sortedcontainers import SortedKeyList as sortedlist from .util import add_raw_postfix from .util import dt_to_ts @@ -318,12 +318,14 @@ def append(self, event): This will also pop the oldest events and call onRemoved on them until the window size is less than timeframe. """ self.data.add(event) - self.running_count += event[1] + if event and event[1]: + self.running_count += event[1] while self.duration() >= self.timeframe: oldest = self.data[0] self.data.remove(oldest) - self.running_count -= oldest[1] + if oldest and oldest[1]: + self.running_count -= oldest[1] self.onRemoved and self.onRemoved(oldest) def duration(self): @@ -351,6 +353,20 @@ def mean(self): else: return None + def min(self): + """ The minimum of the value_field in the window. """ + if len(self.data) > 0: + return min([x[1] for x in self.data]) + else: + return None + + def max(self): + """ The maximum of the value_field in the window. """ + if len(self.data) > 0: + return max([x[1] for x in self.data]) + else: + return None + def __iter__(self): return iter(self.data) @@ -363,7 +379,8 @@ def append_middle(self, event): # Append left if ts is earlier than first event if self.get_ts(self.data[0]) > ts: self.data.appendleft(event) - self.running_count += event[1] + if event and event[1]: + self.running_count += event[1] return # Rotate window until we can insert event @@ -374,7 +391,8 @@ def append_middle(self, event): # This should never happen return self.data.append(event) - self.running_count += event[1] + if event and event[1]: + self.running_count += event[1] self.data.rotate(-rotation) @@ -422,17 +440,33 @@ def add_data(self, data): if qk is None: qk = 'other' if self.field_value is not None: - count = lookup_es_key(event, self.field_value) - if count is not None: - try: - count = int(count) - except ValueError: - elastalert_logger.warn('{} is not a number: {}'.format(self.field_value, count)) - else: - self.handle_event(event, count, qk) + if self.field_value in event: + count = lookup_es_key(event, self.field_value) + if count is not None: + try: + count = int(count) + except ValueError: + elastalert_logger.warn('{} is not a number: {}'.format(self.field_value, count)) + else: + self.handle_event(event, count, qk) else: self.handle_event(event, 1, qk) + def get_spike_values(self, qk): + """ + extending ref/cur value retrieval logic for spike aggregations + """ + spike_check_type = self.rules.get('metric_agg_type') + if spike_check_type in [None, 'sum', 'value_count']: + # default count logic is appropriate in all these cases + return self.ref_windows[qk].count(), self.cur_windows[qk].count() + elif spike_check_type == 'avg': + return self.ref_windows[qk].mean(), self.cur_windows[qk].mean() + elif spike_check_type == 'min': + return self.ref_windows[qk].min(), self.cur_windows[qk].min() + elif spike_check_type == 'max': + return self.ref_windows[qk].max(), self.cur_windows[qk].max() + def clear_windows(self, qk, event): # Reset the state and prevent alerts until windows filled again self.ref_windows[qk].clear() @@ -470,7 +504,8 @@ def handle_event(self, event, count, qk='all'): self.add_match(match, qk) self.clear_windows(qk, match) else: - if self.find_matches(self.ref_windows[qk].count(), self.cur_windows[qk].count()): + ref, cur = self.get_spike_values(qk) + if self.find_matches(ref, cur): # skip over placeholder events which have count=0 for match, count in self.cur_windows[qk].data: if count: @@ -482,8 +517,7 @@ def handle_event(self, event, count, qk='all'): def add_match(self, match, qk): extra_info = {} if self.field_value is None: - spike_count = self.cur_windows[qk].count() - reference_count = self.ref_windows[qk].count() + reference_count, spike_count = self.get_spike_values(qk) else: spike_count = self.cur_windows[qk].mean() reference_count = self.ref_windows[qk].mean() @@ -674,7 +708,7 @@ def get_all_terms(self, args): time_filter = {self.rules['timestamp_field']: {'lt': self.rules['dt_to_ts'](tmp_end), 'gte': self.rules['dt_to_ts'](tmp_start)}} query_template['filter'] = {'bool': {'must': [{'range': time_filter}]}} - query = {'aggs': {'filtered': query_template}} + query = {'aggs': {'filtered': query_template}, 'size': 0} if 'filter' in self.rules: for item in self.rules['filter']: @@ -1026,6 +1060,7 @@ class MetricAggregationRule(BaseAggregationRule): """ A rule that matches when there is a low number of events given a timeframe. """ required_options = frozenset(['metric_agg_key', 'metric_agg_type']) allowed_aggregations = frozenset(['min', 'max', 'avg', 'sum', 'cardinality', 'value_count']) + allowed_percent_aggregations = frozenset(['percentiles']) def __init__(self, *args): super(MetricAggregationRule, self).__init__(*args) @@ -1035,8 +1070,10 @@ def __init__(self, *args): self.metric_key = 'metric_' + self.rules['metric_agg_key'] + '_' + self.rules['metric_agg_type'] - if not self.rules['metric_agg_type'] in self.allowed_aggregations: + if not self.rules['metric_agg_type'] in self.allowed_aggregations.union(self.allowed_percent_aggregations): raise EAException("metric_agg_type must be one of %s" % (str(self.allowed_aggregations))) + if self.rules['metric_agg_type'] in self.allowed_percent_aggregations and self.rules['percentile_range'] is None: + raise EAException("percentile_range must be specified for percentiles aggregation") self.rules['aggregation_query_element'] = self.generate_aggregation_query() @@ -1051,14 +1088,20 @@ def get_match_str(self, match): return message def generate_aggregation_query(self): - return {self.metric_key: {self.rules['metric_agg_type']: {'field': self.rules['metric_agg_key']}}} + query = {self.metric_key: {self.rules['metric_agg_type']: {'field': self.rules['metric_agg_key']}}} + if self.rules['metric_agg_type'] in self.allowed_percent_aggregations: + query[self.metric_key][self.rules['metric_agg_type']]['percents'] = [self.rules['percentile_range']] + return query def check_matches(self, timestamp, query_key, aggregation_data): if "compound_query_key" in self.rules: self.check_matches_recursive(timestamp, query_key, aggregation_data, self.rules['compound_query_key'], dict()) else: - metric_val = aggregation_data[self.metric_key]['value'] + if self.rules['metric_agg_type'] in self.allowed_percent_aggregations: + metric_val = list(aggregation_data[self.metric_key]['values'].values())[0] + else: + metric_val = aggregation_data[self.metric_key]['value'] if self.crossed_thresholds(metric_val): match = {self.rules['timestamp_field']: timestamp, self.metric_key: metric_val} @@ -1079,18 +1122,20 @@ def check_matches_recursive(self, timestamp, query_key, aggregation_data, compou result, compound_keys[1:], match_data) - else: - metric_val = aggregation_data[self.metric_key]['value'] - if self.crossed_thresholds(metric_val): - match_data[self.rules['timestamp_field']] = timestamp - match_data[self.metric_key] = metric_val - - # add compound key to payload to allow alerts to trigger for every unique occurence - compound_value = [match_data[key] for key in self.rules['compound_query_key']] - match_data[self.rules['query_key']] = ",".join([str(value) for value in compound_value]) + if 'interval_aggs' in aggregation_data: + metric_val_arr = [term[self.metric_key]['value'] for term in aggregation_data['interval_aggs']['buckets']] + else: + metric_val_arr = [aggregation_data[self.metric_key]['value']] + for metric_val in metric_val_arr: + if self.crossed_thresholds(metric_val): + match_data[self.rules['timestamp_field']] = timestamp + match_data[self.metric_key] = metric_val - self.add_match(match_data) + # add compound key to payload to allow alerts to trigger for every unique occurence + compound_value = [match_data[key] for key in self.rules['compound_query_key']] + match_data[self.rules['query_key']] = ",".join([str(value) for value in compound_value]) + self.add_match(match_data) def crossed_thresholds(self, metric_value): if metric_value is None: @@ -1106,6 +1151,7 @@ class SpikeMetricAggregationRule(BaseAggregationRule, SpikeRule): """ A rule that matches when there is a spike in an aggregated event compared to its reference point """ required_options = frozenset(['metric_agg_key', 'metric_agg_type', 'spike_height', 'spike_type']) allowed_aggregations = frozenset(['min', 'max', 'avg', 'sum', 'cardinality', 'value_count']) + allowed_percent_aggregations = frozenset(['percentiles']) def __init__(self, *args): # We inherit everything from BaseAggregation and Spike, overwrite only what we need in functions below @@ -1113,8 +1159,11 @@ def __init__(self, *args): # MetricAgg alert things self.metric_key = 'metric_' + self.rules['metric_agg_key'] + '_' + self.rules['metric_agg_type'] - if not self.rules['metric_agg_type'] in self.allowed_aggregations: + + if not self.rules['metric_agg_type'] in self.allowed_aggregations.union(self.allowed_percent_aggregations): raise EAException("metric_agg_type must be one of %s" % (str(self.allowed_aggregations))) + if self.rules['metric_agg_type'] in self.allowed_percent_aggregations and self.rules['percentile_range'] is None: + raise EAException("percentile_range must be specified for percentiles aggregation") # Disabling bucket intervals (doesn't make sense in context of spike to split up your time period) if self.rules.get('bucket_interval'): @@ -1126,7 +1175,10 @@ def generate_aggregation_query(self): """Lifted from MetricAggregationRule, added support for scripted fields""" if self.rules.get('metric_agg_script'): return {self.metric_key: {self.rules['metric_agg_type']: self.rules['metric_agg_script']}} - return {self.metric_key: {self.rules['metric_agg_type']: {'field': self.rules['metric_agg_key']}}} + query = {self.metric_key: {self.rules['metric_agg_type']: {'field': self.rules['metric_agg_key']}}} + if self.rules['metric_agg_type'] in self.allowed_percent_aggregations: + query[self.metric_key][self.rules['metric_agg_type']]['percents'] = [self.rules['percentile_range']] + return query def add_aggregation_data(self, payload): """ @@ -1140,7 +1192,10 @@ def add_aggregation_data(self, payload): else: # no time / term split, just focus on the agg event = {self.ts_field: timestamp} - agg_value = payload_data[self.metric_key]['value'] + if self.rules['metric_agg_type'] in self.allowed_percent_aggregations: + agg_value = list(payload_data[self.metric_key]['values'].values())[0] + else: + agg_value = payload_data[self.metric_key]['value'] self.handle_event(event, agg_value, 'all') return @@ -1160,7 +1215,10 @@ def unwrap_term_buckets(self, timestamp, term_buckets, qk=[]): continue qk_str = ','.join(qk) - agg_value = term_data[self.metric_key]['value'] + if self.rules['metric_agg_type'] in self.allowed_percent_aggregations: + agg_value = list(term_data[self.metric_key]['values'].values())[0] + else: + agg_value = term_data[self.metric_key]['value'] event = {self.ts_field: timestamp, self.rules['query_key']: qk_str} # pass to SpikeRule's tracker diff --git a/elastalert/schema.yaml b/elastalert/schema.yaml index c9f76521..ce23645a 100644 --- a/elastalert/schema.yaml +++ b/elastalert/schema.yaml @@ -110,7 +110,7 @@ oneOf: type: {enum: [spike_aggregation]} spike_height: {type: number} spike_type: {enum: ["up", "down", "both"]} - metric_agg_type: {enum: ["min", "max", "avg", "sum", "cardinality", "value_count"]} + metric_agg_type: {enum: ["min", "max", "avg", "sum", "cardinality", "value_count", "percentiles"]} timeframe: *timeframe use_count_query: {type: boolean} doc_type: {type: string} @@ -120,6 +120,7 @@ oneOf: threshold_ref: {type: number} threshold_cur: {type: number} min_doc_count: {type: integer} + percentile_range: {type: integer} - title: Flatline required: [threshold, timeframe] @@ -153,8 +154,9 @@ oneOf: required: [metric_agg_key,metric_agg_type] properties: type: {enum: [metric_aggregation]} - metric_agg_type: {enum: ["min", "max", "avg", "sum", "cardinality", "value_count"]} + metric_agg_type: {enum: ["min", "max", "avg", "sum", "cardinality", "value_count", "percentiles"]} #timeframe: *timeframe + percentile_range: {type: integer} - title: Percentage Match required: [match_bucket_filter] @@ -222,7 +224,7 @@ properties: ### Kibana Discover App Link generate_kibana_discover_url: {type: boolean} kibana_discover_app_url: {type: string, format: uri} - kibana_discover_version: {type: string, enum: ['7.3', '7.2', '7.1', '7.0', '6.8', '6.7', '6.6', '6.5', '6.4', '6.3', '6.2', '6.1', '6.0', '5.6']} + kibana_discover_version: {type: string, enum: ['7.12', '7.11', '7.10', '7.9', '7.8', '7.7', '7.6', '7.5', '7.4', '7.3', '7.2', '7.1', '7.0', '6.8', '6.7', '6.6', '6.5', '6.4', '6.3', '6.2', '6.1', '6.0', '5.6']} kibana_discover_index_pattern_id: {type: string, minLength: 1} kibana_discover_columns: {type: array, items: {type: string, minLength: 1}, minItems: 1} kibana_discover_from_timedelta: *timedelta @@ -232,7 +234,7 @@ properties: alert_text: {type: string} # Python format string alert_text_args: {type: array, items: {type: string}} alert_text_kw: {type: object} - alert_text_type: {enum: [alert_text_only, exclude_fields, aggregation_summary_only]} + alert_text_type: {enum: [alert_text_only, alert_text_jinja, exclude_fields, aggregation_summary_only]} alert_missing_value: {type: string} timestamp_field: {type: string} field: {} @@ -266,21 +268,6 @@ properties: jira_max_age: {type: number} jira_watchers: *arrayOfString - ### HipChat - hipchat_auth_token: {type: string} - hipchat_room_id: {type: [string, integer]} - hipchat_domain: {type: string} - hipchat_ignore_ssl_errors: {type: boolean} - hipchat_notify: {type: boolean} - hipchat_from: {type: string} - hipchat_mentions: {type: array, items: {type: string}} - - ### Stride - stride_access_token: {type: string} - stride_cloud_id: {type: string} - stride_conversation_id: {type: string} - stride_ignore_ssl_errors: {type: boolean} - ### Slack slack_webhook_url: *arrayOfString slack_username_override: {type: string} @@ -361,24 +348,20 @@ properties: ### Alerta alerta_api_url: {type: string} alerta_api_key: {type: string} - alerta_severity: {enum: [unknown, security, debug, informational, ok, normal, cleared, indeterminate, warning, minor, major, critical]} + alerta_severity: {type: string} alerta_resource: {type: string} # Python format string alerta_environment: {type: string} # Python format string alerta_origin: {type: string} # Python format string alerta_group: {type: string} # Python format string alerta_service: {type: array, items: {type: string}} # Python format string - alerta_service: {type: array, items: {type: string}} # Python format string alerta_correlate: {type: array, items: {type: string}} # Python format string alerta_tags: {type: array, items: {type: string}} # Python format string alerta_event: {type: string} # Python format string - alerta_customer: {type: string} alerta_text: {type: string} # Python format string alerta_type: {type: string} alerta_value: {type: string} # Python format string alerta_attributes_keys: {type: array, items: {type: string}} alerta_attributes_values: {type: array, items: {type: string}} # Python format string - alerta_new_style_string_format: {type: boolean} - ### Simple simple_webhook_url: *arrayOfString @@ -391,4 +374,18 @@ properties: zbx_sender_host: {type: string} zbx_sender_port: {type: integer} zbx_host: {type: string} - zbx_item: {type: string} + zbx_key: {type: string} + + ## Discord + discord_webhook_url: {type: string} + + ### Dingtalk + dingtalk_access_token: {type: string} + dingtalk_msgtype: {type: string} + dingtalk_single_title: {type: string} + dingtalk_single_url: {type: string} + dingtalk_btn_orientation: {type: string} + + ### Chatwork + chatwork_apikey: {type: string} + chatwork_room_id: {type: string} diff --git a/elastalert/test_rule.py b/elastalert/test_rule.py index 06100aa0..af1eaa49 100644 --- a/elastalert/test_rule.py +++ b/elastalert/test_rule.py @@ -83,7 +83,7 @@ def test_file(self, conf, args): # Get one document for schema try: - res = es_client.search(index, size=1, body=query, ignore_unavailable=True) + res = es_client.search(index=index, size=1, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) @@ -109,7 +109,7 @@ def test_file(self, conf, args): five=conf['five'] ) try: - res = es_client.count(index, doc_type=doc_type, body=count_query, ignore_unavailable=True) + res = es_client.count(index=index, doc_type=doc_type, body=count_query, ignore_unavailable=True) except Exception as e: print("Error querying Elasticsearch:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) @@ -153,7 +153,7 @@ def test_file(self, conf, args): # Download up to max_query_size (defaults to 10,000) documents to save if (args.save or args.formatted_output) and not args.count: try: - res = es_client.search(index, size=args.max_query_size, body=query, ignore_unavailable=True) + res = es_client.search(index=index, size=args.max_query_size, body=query, ignore_unavailable=True) except Exception as e: print("Error running your filter:", file=sys.stderr) print(repr(e)[:2048], file=sys.stderr) diff --git a/elastalert/util.py b/elastalert/util.py index bbb0600f..658c6bf4 100644 --- a/elastalert/util.py +++ b/elastalert/util.py @@ -152,7 +152,7 @@ def ts_to_dt(timestamp): def dt_to_ts(dt): if not isinstance(dt, datetime.datetime): - logging.warning('Expected datetime, got %s' % (type(dt))) + elastalert_logger.warning('Expected datetime, got %s' % (type(dt))) return dt ts = dt.isoformat() # Round microseconds to milliseconds @@ -176,7 +176,7 @@ def ts_to_dt_with_format(timestamp, ts_format): def dt_to_ts_with_format(dt, ts_format): if not isinstance(dt, datetime.datetime): - logging.warning('Expected datetime, got %s' % (type(dt))) + elastalert_logger.warning('Expected datetime, got %s' % (type(dt))) return dt ts = dt.strftime(ts_format) return ts @@ -186,6 +186,11 @@ def ts_now(): return datetime.datetime.utcnow().replace(tzinfo=dateutil.tz.tzutc()) +def ts_utc_to_tz(ts, tz_name): + """Convert utc time to local time.""" + return ts.astimezone(dateutil.tz.gettz(tz_name)) + + def inc_ts(timestamp, milliseconds=1): """Increment a timestamp by milliseconds.""" dt = ts_to_dt(timestamp) @@ -202,7 +207,7 @@ def pretty_ts(timestamp, tz=True): dt = ts_to_dt(timestamp) if tz: dt = dt.astimezone(dateutil.tz.tzlocal()) - return dt.strftime('%Y-%m-%d %H:%M %Z') + return dt.strftime('%Y-%m-%d %H:%M %z') def ts_add(ts, td): @@ -361,7 +366,7 @@ def build_es_conn_config(conf): # Deprecated if 'boto_profile' in conf: - logging.warning('Found deprecated "boto_profile", use "profile" instead!') + elastalert_logger.warning('Found deprecated "boto_profile", use "profile" instead!') parsed_conf['profile'] = conf['boto_profile'] if 'profile' in conf: diff --git a/elastalert/zabbix.py b/elastalert/zabbix.py index e3f13aa0..e2b5f1ed 100644 --- a/elastalert/zabbix.py +++ b/elastalert/zabbix.py @@ -1,46 +1,47 @@ -from alerts import Alerter # , BasicMatchString -import logging -from pyzabbix.api import ZabbixAPI -from pyzabbix import ZabbixSender, ZabbixMetric from datetime import datetime +from pyzabbix import ZabbixSender, ZabbixMetric, ZabbixAPI + +from .alerts import Alerter +from .util import elastalert_logger, EAException + class ZabbixClient(ZabbixAPI): - def __init__(self, url='http://localhost', use_authenticate=False, user='Admin', password='zabbix', sender_host='localhost', - sender_port=10051): + def __init__(self, url='http://localhost', use_authenticate=False, user='Admin', password='zabbix', + sender_host='localhost', sender_port=10051): self.url = url self.use_authenticate = use_authenticate self.sender_host = sender_host self.sender_port = sender_port self.metrics_chunk_size = 200 self.aggregated_metrics = [] - self.logger = logging.getLogger(self.__class__.__name__) - super(ZabbixClient, self).__init__(url=self.url, use_authenticate=self.use_authenticate, user=user, password=password) + + super(ZabbixClient, self).__init__(url=self.url, + use_authenticate=self.use_authenticate, + user=user, + password=password) def send_metric(self, hostname, key, data): zm = ZabbixMetric(hostname, key, data) if self.send_aggregated_metrics: - self.aggregated_metrics.append(zm) if len(self.aggregated_metrics) > self.metrics_chunk_size: - self.logger.info("Sending: %s metrics" % (len(self.aggregated_metrics))) + elastalert_logger.info("Sending: %s metrics" % (len(self.aggregated_metrics))) try: - ZabbixSender(zabbix_server=self.sender_host, zabbix_port=self.sender_port).send(self.aggregated_metrics) + ZabbixSender(zabbix_server=self.sender_host, zabbix_port=self.sender_port) \ + .send(self.aggregated_metrics) self.aggregated_metrics = [] except Exception as e: - self.logger.exception(e) - pass + elastalert_logger.exception(e) else: try: - ZabbixSender(zabbix_server=self.sender_host, zabbix_port=self.sender_port).send(zm) + ZabbixSender(zabbix_server=self.sender_host, zabbix_port=self.sender_port).send([zm]) except Exception as e: - self.logger.exception(e) - pass + elastalert_logger.exception(e) class ZabbixAlerter(Alerter): - # By setting required_options to a set of strings # You can ensure that the rule config file specifies all # of the options. Otherwise, ElastAlert will throw an exception @@ -54,6 +55,9 @@ def __init__(self, *args): self.zbx_sender_port = self.rule.get('zbx_sender_port', 10051) self.zbx_host = self.rule.get('zbx_host') self.zbx_key = self.rule.get('zbx_key') + self.timestamp_field = self.rule.get('timestamp_field', '@timestamp') + self.timestamp_type = self.rule.get('timestamp_type', 'iso') + self.timestamp_strptime = self.rule.get('timestamp_strptime', '%Y-%m-%dT%H:%M:%S.%fZ') # Alert is called def alert(self, matches): @@ -63,10 +67,26 @@ def alert(self, matches): # the aggregation option set zm = [] for match in matches: - ts_epoch = int(datetime.strptime(match['@timestamp'], "%Y-%m-%dT%H:%M:%S.%fZ").strftime('%s')) - zm.append(ZabbixMetric(host=self.zbx_host, key=self.zbx_key, value=1, clock=ts_epoch)) + if ':' not in match[self.timestamp_field] or '-' not in match[self.timestamp_field]: + ts_epoch = int(match[self.timestamp_field]) + else: + try: + ts_epoch = int(datetime.strptime(match[self.timestamp_field], self.timestamp_strptime) + .strftime('%s')) + except ValueError: + ts_epoch = int(datetime.strptime(match[self.timestamp_field], '%Y-%m-%dT%H:%M:%SZ') + .strftime('%s')) + zm.append(ZabbixMetric(host=self.zbx_host, key=self.zbx_key, value='1', clock=ts_epoch)) - ZabbixSender(zabbix_server=self.zbx_sender_host, zabbix_port=self.zbx_sender_port).send(zm) + try: + response = ZabbixSender(zabbix_server=self.zbx_sender_host, zabbix_port=self.zbx_sender_port).send(zm) + if response.failed: + elastalert_logger.warning("Missing zabbix host '%s' or host's item '%s', alert will be discarded" + % (self.zbx_host, self.zbx_key)) + else: + elastalert_logger.info("Alert sent to Zabbix") + except Exception as e: + raise EAException("Error sending alert to Zabbix: %s" % e) # get_info is called after an alert is sent to get data that is written back # to Elasticsearch in the field "alert_info" diff --git a/example_rules/exemple_discord_any.yaml b/example_rules/exemple_discord_any.yaml new file mode 100644 index 00000000..e19b4a4b --- /dev/null +++ b/example_rules/exemple_discord_any.yaml @@ -0,0 +1,40 @@ +# This exemple will provide you every alerts that occured between the sleeping time your configured in your config file. +# Every match will be send as a unique alert to discord. If you got 3 match, the alerter will send 3 alert to your discord. + +name: "Exemple discord webhook alert" +type: any +index: your_indice_%Y-%m-%d +use_strftime_index: true + +# Exemple query +filter: +- query: + query_string: + query: "id: 2501 OR id: 5503" + +realert: + minutes: 0 + +# I only add the code content here. This way, it prevent to encode the entire description section. Only the log will be encoded and it will provide more visibility. +include: ["timestamp","name","computer"] +alert_text: "Alerts at {0} on the computer {1}.\n```" +alert_text_args: ["timestamp","computer"] + +# Needed +alert: +- discord +discord_webhook_url: "Your discord webhook url" + +# ----- Optional Section ----- + +discord_proxy: "proxy_address" + +# Must be in "" and must be valid emoji supported by discord. +discord_emoji_title: ":lock:" + +# Must be an hexadecimal value according to the exemple below +discord_embed_color: 0xE24D42 + +# This content will be displayed at the very end of your embed message. If you don't add one of these 2 lines, the footer will not be added. +discord_embed_footer: "Message sent by ElastAlert from your computer" +discord_embed_icon_url: "https://humancoders-formations.s3.amazonaws.com/uploads/course/logo/38/thumb_bigger_formation-elasticsearch.png" \ No newline at end of file diff --git a/example_rules/ssh.yaml b/example_rules/ssh.yaml index 7af89078..a7147217 100644 --- a/example_rules/ssh.yaml +++ b/example_rules/ssh.yaml @@ -1,5 +1,5 @@ # Rule name, must be unique - name: SSH abuse (ElastAlert 3.0.1) - 2 +name: SSH abuse (ElastAlert 3.0.1) - 2 # Alert on x events in y seconds type: frequency diff --git a/requirements-dev.txt b/requirements-dev.txt index 558761d9..5ceb8762 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,9 +1,11 @@ -r requirements.txt -coverage==4.5.4 +coverage==5.5 flake8 +m2r2 +pluggy>=0.12.0 pre-commit -pylint<1.4 -pytest<3.3.0 +pylint<2.8 +pytest<3.7.0 setuptools sphinx_rtd_theme -tox<2.0 +tox==3.23.0 diff --git a/requirements.txt b/requirements.txt index 54978e02..d495170d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,23 +1,26 @@ -apscheduler>=3.3.0 +apscheduler>=3.3.0,<4.0 aws-requests-auth>=0.3.0 -blist>=1.3.6 +sortedcontainers>=2.2.2 boto3>=1.4.4 cffi>=1.11.5 configparser>=3.5.0 croniter>=0.3.16 -elasticsearch>=7.0.0 +elasticsearch>=7.0.0,<8.0.0 envparse>=0.2.0 exotel>=0.1.3 -jira>=1.0.10,<1.0.15 +Jinja2==2.11.3 +jira>=2.0.0 jsonschema>=3.0.2 mock>=2.0.0 prison>=0.1.2 -py-zabbix==1.1.3 +prometheus_client>=0.10.1 +py-zabbix>=1.1.3 PyStaticConfiguration>=0.10.3 python-dateutil>=2.6.0,<2.7.0 PyYAML>=5.1 -requests>=2.0.0 +requests>=2.10.0 stomp.py>=4.1.17 texttable>=0.8.8 -twilio==6.0.0 statsd-tags==3.2.1.post1 +twilio>=6.0.0,<6.1 +tzlocal<3.0 diff --git a/setup.py b/setup.py index 35d1eb34..2436ed79 100644 --- a/setup.py +++ b/setup.py @@ -7,15 +7,13 @@ base_dir = os.path.dirname(__file__) setup( - name='elastalert', - version='0.2.4', - description='Runs custom filters on Elasticsearch and alerts on matches', - author='Quentin Long', - author_email='qlo@yelp.com', + name='elastalert2', + version='2.0.3', + description='Automated rule-based alerting for Elasticsearch', setup_requires='setuptools', - license='Copyright 2014 Yelp', + license='Apache 2.0', classifiers=[ - 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.9', 'License :: OSI Approved :: Apache Software License', 'Operating System :: OS Independent', ], @@ -27,27 +25,31 @@ packages=find_packages(), package_data={'elastalert': ['schema.yaml', 'es_mappings/**/*.json']}, install_requires=[ - 'apscheduler>=3.3.0', + 'apscheduler>=3.3.0,<4.0', 'aws-requests-auth>=0.3.0', - 'blist>=1.3.6', + 'sortedcontainers>=2.2.2', 'boto3>=1.4.4', 'configparser>=3.5.0', 'croniter>=0.3.16', - 'elasticsearch==7.0.0', + 'elasticsearch>=7.0.0,<8.0.0', 'envparse>=0.2.0', 'exotel>=0.1.3', 'jira>=2.0.0', + 'Jinja2==2.11.3', 'jsonschema>=3.0.2', 'mock>=2.0.0', 'prison>=0.1.2', + 'prometheus_client>=0.10.1', + 'py-zabbix>=1.1.3', 'PyStaticConfiguration>=0.10.3', 'python-dateutil>=2.6.0,<2.7.0', - 'PyYAML>=3.12', + 'PyYAML>=5.1', 'requests>=2.10.0', 'stomp.py>=4.1.17', 'texttable>=0.8.8', 'twilio>=6.0.0,<6.1', 'cffi>=1.11.5', - 'statsd-tags==3.2.1.post1' + 'statsd-tags==3.2.1.post1', + 'tzlocal<3.0' ] ) diff --git a/tests/alerts_test.py b/tests/alerts_test.py index 5cd61ae7..4f56ad1a 100644 --- a/tests/alerts_test.py +++ b/tests/alerts_test.py @@ -12,15 +12,14 @@ from elastalert.alerts import Alerter from elastalert.alerts import BasicMatchString from elastalert.alerts import CommandAlerter +from elastalert.alerts import DatadogAlerter from elastalert.alerts import EmailAlerter -from elastalert.alerts import HipChatAlerter from elastalert.alerts import HTTPPostAlerter from elastalert.alerts import JiraAlerter from elastalert.alerts import JiraFormattedMatchString from elastalert.alerts import MsTeamsAlerter from elastalert.alerts import PagerDutyAlerter from elastalert.alerts import SlackAlerter -from elastalert.alerts import StrideAlerter from elastalert.loaders import FileRulesLoader from elastalert.opsgenie import OpsGenieAlerter from elastalert.util import ts_add @@ -1072,32 +1071,6 @@ def test_command(): alert.alert([match]) assert mock_popen.called_with('/bin/test/foo.sh', stdin=subprocess.PIPE, shell=True) - # Test command as string with formatted arg (new-style string format) - rule = {'command': '/bin/test/ --arg {match[somefield]}', 'new_style_string_format': True} - alert = CommandAlerter(rule) - with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: - alert.alert([match]) - assert mock_popen.called_with('/bin/test --arg foobarbaz', stdin=subprocess.PIPE, shell=False) - - rule = {'command': '/bin/test/ --arg {match[nested][field]}', 'new_style_string_format': True} - alert = CommandAlerter(rule) - with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: - alert.alert([match]) - assert mock_popen.called_with('/bin/test --arg 1', stdin=subprocess.PIPE, shell=False) - - # Test command as string without formatted arg (new-style string format) - rule = {'command': '/bin/test/foo.sh', 'new_style_string_format': True} - alert = CommandAlerter(rule) - with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: - alert.alert([match]) - assert mock_popen.called_with('/bin/test/foo.sh', stdin=subprocess.PIPE, shell=True) - - rule = {'command': '/bin/test/foo.sh {{bar}}', 'new_style_string_format': True} - alert = CommandAlerter(rule) - with mock.patch("elastalert.alerts.subprocess.Popen") as mock_popen: - alert.alert([match]) - assert mock_popen.called_with('/bin/test/foo.sh {bar}', stdin=subprocess.PIPE, shell=True) - # Test command with pipe_match_json rule = {'command': ['/bin/test/', '--arg', '%(somefield)s'], 'pipe_match_json': True} @@ -1242,7 +1215,7 @@ def test_slack_uses_custom_title(): data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, - verify=False, + verify=True, timeout=10 ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) @@ -1288,7 +1261,7 @@ def test_slack_uses_custom_timeout(): data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, - verify=False, + verify=True, timeout=20 ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) @@ -1332,7 +1305,7 @@ def test_slack_uses_rule_name_when_custom_title_is_not_provided(): data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, - verify=False, + verify=True, timeout=10 ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) @@ -1377,7 +1350,7 @@ def test_slack_uses_custom_slack_channel(): data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, - verify=False, + verify=True, timeout=10 ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) @@ -1438,7 +1411,7 @@ def test_slack_uses_list_of_custom_slack_channel(): data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, - verify=False, + verify=True, timeout=10 ) assert expected_data1 == json.loads(mock_post_request.call_args_list[0][1]['data']) @@ -1489,7 +1462,7 @@ def test_slack_attach_kibana_discover_url_when_generated(): data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, - verify=False, + verify=True, timeout=10 ) actual_data = json.loads(mock_post_request.call_args_list[0][1]['data']) @@ -1534,7 +1507,7 @@ def test_slack_attach_kibana_discover_url_when_not_generated(): data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, - verify=False, + verify=True, timeout=10 ) actual_data = json.loads(mock_post_request.call_args_list[0][1]['data']) @@ -1586,7 +1559,7 @@ def test_slack_kibana_discover_title(): data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, - verify=False, + verify=True, timeout=10 ) actual_data = json.loads(mock_post_request.call_args_list[0][1]['data']) @@ -1638,13 +1611,58 @@ def test_slack_kibana_discover_color(): data=mock.ANY, headers={'content-type': 'application/json'}, proxies=None, - verify=False, + verify=True, timeout=10 ) actual_data = json.loads(mock_post_request.call_args_list[0][1]['data']) assert expected_data == actual_data +def test_slack_ignore_ssl_errors(): + rule = { + 'name': 'Test Rule', + 'type': 'any', + 'slack_webhook_url': 'http://please.dontgohere.slack', + 'slack_ignore_ssl_errors': True, + 'alert': [] + } + rules_loader = FileRulesLoader({}) + rules_loader.load_modules(rule) + alert = SlackAlerter(rule) + match = { + '@timestamp': '2016-01-01T00:00:00' + } + with mock.patch('requests.post') as mock_post_request: + alert.alert([match]) + + mock_post_request.assert_called_once_with( + rule['slack_webhook_url'], + data=mock.ANY, + headers={'content-type': 'application/json'}, + proxies=None, + verify=False, + timeout=10 + ) + + expected_data = { + 'username': 'elastalert', + 'channel': '', + 'icon_emoji': ':ghost:', + 'attachments': [ + { + 'color': 'danger', + 'title': 'Test Rule', + 'text': BasicMatchString(rule, match).__str__(), + 'mrkdwn_in': ['text', 'pretext'], + 'fields': [] + } + ], + 'text': '', + 'parse': 'none' + } + assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) + + def test_http_alerter_with_payload(): rule = { 'name': 'Test HTTP Post Alerter With Payload', @@ -1672,7 +1690,8 @@ def test_http_alerter_with_payload(): data=mock.ANY, headers={'Content-Type': 'application/json', 'Accept': 'application/json;charset=utf-8'}, proxies=None, - timeout=10 + timeout=10, + verify=True ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) @@ -1707,7 +1726,8 @@ def test_http_alerter_with_payload_all_values(): data=mock.ANY, headers={'Content-Type': 'application/json', 'Accept': 'application/json;charset=utf-8'}, proxies=None, - timeout=10 + timeout=10, + verify=True ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) @@ -1739,7 +1759,8 @@ def test_http_alerter_without_payload(): data=mock.ANY, headers={'Content-Type': 'application/json', 'Accept': 'application/json;charset=utf-8'}, proxies=None, - timeout=10 + timeout=10, + verify=True ) assert expected_data == json.loads(mock_post_request.call_args_list[0][1]['data']) @@ -2086,340 +2107,6 @@ def test_resolving_rule_references(ea): assert 'the_owner' == alert.rule['nested_dict']['nested_owner'] -def test_stride_plain_text(): - rule = { - 'name': 'Test Rule', - 'type': 'any', - 'stride_access_token': 'token', - 'stride_cloud_id': 'cloud_id', - 'stride_conversation_id': 'conversation_id', - 'alert_subject': 'Cool subject', - 'alert': [] - } - rules_loader = FileRulesLoader({}) - rules_loader.load_modules(rule) - alert = StrideAlerter(rule) - match = { - '@timestamp': '2016-01-01T00:00:00', - 'somefield': 'foobarbaz' - } - with mock.patch('requests.post') as mock_post_request: - alert.alert([match]) - - body = "{0}\n\n@timestamp: {1}\nsomefield: {2}".format( - rule['name'], match['@timestamp'], match['somefield'] - ) - expected_data = {'body': {'version': 1, 'type': "doc", 'content': [ - {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ - {'type': 'paragraph', 'content': [ - {'type': 'text', 'text': body} - ]} - ]} - ]}} - - mock_post_request.assert_called_once_with( - alert.url, - data=mock.ANY, - headers={ - 'content-type': 'application/json', - 'Authorization': 'Bearer {}'.format(rule['stride_access_token'])}, - verify=True, - proxies=None - ) - assert expected_data == json.loads( - mock_post_request.call_args_list[0][1]['data']) - - -def test_stride_underline_text(): - rule = { - 'name': 'Test Rule', - 'type': 'any', - 'stride_access_token': 'token', - 'stride_cloud_id': 'cloud_id', - 'stride_conversation_id': 'conversation_id', - 'alert_subject': 'Cool subject', - 'alert_text': 'Underline Text', - 'alert_text_type': 'alert_text_only', - 'alert': [] - } - rules_loader = FileRulesLoader({}) - rules_loader.load_modules(rule) - alert = StrideAlerter(rule) - match = { - '@timestamp': '2016-01-01T00:00:00', - 'somefield': 'foobarbaz' - } - with mock.patch('requests.post') as mock_post_request: - alert.alert([match]) - - body = "Underline Text" - expected_data = {'body': {'version': 1, 'type': "doc", 'content': [ - {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ - {'type': 'paragraph', 'content': [ - {'type': 'text', 'text': body, 'marks': [ - {'type': 'underline'} - ]} - ]} - ]} - ]}} - - mock_post_request.assert_called_once_with( - alert.url, - data=mock.ANY, - headers={ - 'content-type': 'application/json', - 'Authorization': 'Bearer {}'.format(rule['stride_access_token'])}, - verify=True, - proxies=None - ) - assert expected_data == json.loads( - mock_post_request.call_args_list[0][1]['data']) - - -def test_stride_bold_text(): - rule = { - 'name': 'Test Rule', - 'type': 'any', - 'stride_access_token': 'token', - 'stride_cloud_id': 'cloud_id', - 'stride_conversation_id': 'conversation_id', - 'alert_subject': 'Cool subject', - 'alert_text': 'Bold Text', - 'alert_text_type': 'alert_text_only', - 'alert': [] - } - rules_loader = FileRulesLoader({}) - rules_loader.load_modules(rule) - alert = StrideAlerter(rule) - match = { - '@timestamp': '2016-01-01T00:00:00', - 'somefield': 'foobarbaz' - } - with mock.patch('requests.post') as mock_post_request: - alert.alert([match]) - - body = "Bold Text" - expected_data = {'body': {'version': 1, 'type': "doc", 'content': [ - {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ - {'type': 'paragraph', 'content': [ - {'type': 'text', 'text': body, 'marks': [ - {'type': 'strong'} - ]} - ]} - ]} - ]}} - - mock_post_request.assert_called_once_with( - alert.url, - data=mock.ANY, - headers={ - 'content-type': 'application/json', - 'Authorization': 'Bearer {}'.format(rule['stride_access_token'])}, - verify=True, - proxies=None - ) - assert expected_data == json.loads( - mock_post_request.call_args_list[0][1]['data']) - - -def test_stride_strong_text(): - rule = { - 'name': 'Test Rule', - 'type': 'any', - 'stride_access_token': 'token', - 'stride_cloud_id': 'cloud_id', - 'stride_conversation_id': 'conversation_id', - 'alert_subject': 'Cool subject', - 'alert_text': 'Bold Text', - 'alert_text_type': 'alert_text_only', - 'alert': [] - } - rules_loader = FileRulesLoader({}) - rules_loader.load_modules(rule) - alert = StrideAlerter(rule) - match = { - '@timestamp': '2016-01-01T00:00:00', - 'somefield': 'foobarbaz' - } - with mock.patch('requests.post') as mock_post_request: - alert.alert([match]) - - body = "Bold Text" - expected_data = {'body': {'version': 1, 'type': "doc", 'content': [ - {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ - {'type': 'paragraph', 'content': [ - {'type': 'text', 'text': body, 'marks': [ - {'type': 'strong'} - ]} - ]} - ]} - ]}} - - mock_post_request.assert_called_once_with( - alert.url, - data=mock.ANY, - headers={ - 'content-type': 'application/json', - 'Authorization': 'Bearer {}'.format(rule['stride_access_token'])}, - verify=True, - proxies=None - ) - assert expected_data == json.loads( - mock_post_request.call_args_list[0][1]['data']) - - -def test_stride_hyperlink(): - rule = { - 'name': 'Test Rule', - 'type': 'any', - 'stride_access_token': 'token', - 'stride_cloud_id': 'cloud_id', - 'stride_conversation_id': 'conversation_id', - 'alert_subject': 'Cool subject', - 'alert_text': '
Link', - 'alert_text_type': 'alert_text_only', - 'alert': [] - } - rules_loader = FileRulesLoader({}) - rules_loader.load_modules(rule) - alert = StrideAlerter(rule) - match = { - '@timestamp': '2016-01-01T00:00:00', - 'somefield': 'foobarbaz' - } - with mock.patch('requests.post') as mock_post_request: - alert.alert([match]) - - body = "Link" - expected_data = {'body': {'version': 1, 'type': "doc", 'content': [ - {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ - {'type': 'paragraph', 'content': [ - {'type': 'text', 'text': body, 'marks': [ - {'type': 'link', 'attrs': {'href': 'http://stride.com'}} - ]} - ]} - ]} - ]}} - - mock_post_request.assert_called_once_with( - alert.url, - data=mock.ANY, - headers={ - 'content-type': 'application/json', - 'Authorization': 'Bearer {}'.format(rule['stride_access_token'])}, - verify=True, - proxies=None - ) - assert expected_data == json.loads( - mock_post_request.call_args_list[0][1]['data']) - - -def test_stride_html(): - rule = { - 'name': 'Test Rule', - 'type': 'any', - 'stride_access_token': 'token', - 'stride_cloud_id': 'cloud_id', - 'stride_conversation_id': 'conversation_id', - 'alert_subject': 'Cool subject', - 'alert_text': 'Alert: we found something. Link', - 'alert_text_type': 'alert_text_only', - 'alert': [] - } - rules_loader = FileRulesLoader({}) - rules_loader.load_modules(rule) - alert = StrideAlerter(rule) - match = { - '@timestamp': '2016-01-01T00:00:00', - 'somefield': 'foobarbaz' - } - with mock.patch('requests.post') as mock_post_request: - alert.alert([match]) - - expected_data = {'body': {'version': 1, 'type': "doc", 'content': [ - {'type': "panel", 'attrs': {'panelType': "warning"}, 'content': [ - {'type': 'paragraph', 'content': [ - {'type': 'text', 'text': 'Alert', 'marks': [ - {'type': 'strong'} - ]}, - {'type': 'text', 'text': ': we found something. '}, - {'type': 'text', 'text': 'Link', 'marks': [ - {'type': 'link', 'attrs': {'href': 'http://stride.com'}} - ]} - ]} - ]} - ]}} - - mock_post_request.assert_called_once_with( - alert.url, - data=mock.ANY, - headers={ - 'content-type': 'application/json', - 'Authorization': 'Bearer {}'.format(rule['stride_access_token'])}, - verify=True, - proxies=None - ) - assert expected_data == json.loads( - mock_post_request.call_args_list[0][1]['data']) - - -def test_hipchat_body_size_limit_text(): - rule = { - 'name': 'Test Rule', - 'type': 'any', - 'hipchat_auth_token': 'token', - 'hipchat_room_id': 'room_id', - 'hipchat_message_format': 'text', - 'alert_subject': 'Cool subject', - 'alert_text': 'Alert: we found something.\n\n{message}', - 'alert_text_type': 'alert_text_only', - 'alert': [], - 'alert_text_kw': { - '@timestamp': 'time', - 'message': 'message', - }, - } - rules_loader = FileRulesLoader({}) - rules_loader.load_modules(rule) - alert = HipChatAlerter(rule) - match = { - '@timestamp': '2018-01-01T00:00:00', - 'message': 'foo bar\n' * 5000, - } - body = alert.create_alert_body([match]) - - assert len(body) <= 10000 - - -def test_hipchat_body_size_limit_html(): - rule = { - 'name': 'Test Rule', - 'type': 'any', - 'hipchat_auth_token': 'token', - 'hipchat_room_id': 'room_id', - 'hipchat_message_format': 'html', - 'alert_subject': 'Cool subject', - 'alert_text': 'Alert: we found something.\n\n{message}', - 'alert_text_type': 'alert_text_only', - 'alert': [], - 'alert_text_kw': { - '@timestamp': 'time', - 'message': 'message', - }, - } - rules_loader = FileRulesLoader({}) - rules_loader.load_modules(rule) - alert = HipChatAlerter(rule) - match = { - '@timestamp': '2018-01-01T00:00:00', - 'message': 'foo bar\n' * 5000, - } - - body = alert.create_alert_body([match]) - - assert len(body) <= 10000 - - def test_alerta_no_auth(ea): rule = { 'name': 'Test Alerta rule!', @@ -2534,7 +2221,6 @@ def test_alerta_new_style(ea): 'alerta_severity': "debug", 'alerta_text': "Probe {hostname} is UP at {logdate} GMT", 'alerta_value': "UP", - 'alerta_new_style_string_format': True, 'type': 'any', 'alerta_use_match_timestamp': True, 'alert': 'alerta' @@ -2611,3 +2297,39 @@ def test_alert_subject_size_limit_with_args(ea): alert = Alerter(rule) alertSubject = alert.create_custom_title([{'test_term': 'test_value', '@timestamp': '2014-10-31T00:00:00'}]) assert 6 == len(alertSubject) + + +def test_datadog_alerter(): + rule = { + 'name': 'Test Datadog Event Alerter', + 'type': 'any', + 'datadog_api_key': 'test-api-key', + 'datadog_app_key': 'test-app-key', + 'alert': [], + 'alert_subject': 'Test Datadog Event Alert' + } + rules_loader = FileRulesLoader({}) + rules_loader.load_modules(rule) + alert = DatadogAlerter(rule) + match = { + '@timestamp': '2021-01-01T00:00:00', + 'name': 'datadog-test-name' + } + with mock.patch('requests.post') as mock_post_request: + alert.alert([match]) + + expected_data = { + 'title': rule['alert_subject'], + 'text': "Test Datadog Event Alerter\n\n@timestamp: 2021-01-01T00:00:00\nname: datadog-test-name\n" + } + mock_post_request.assert_called_once_with( + "https://api.datadoghq.com/api/v1/events", + data=mock.ANY, + headers={ + 'Content-Type': 'application/json', + 'DD-API-KEY': rule['datadog_api_key'], + 'DD-APPLICATION-KEY': rule['datadog_app_key'] + } + ) + actual_data = json.loads(mock_post_request.call_args_list[0][1]['data']) + assert expected_data == actual_data diff --git a/tests/base_test.py b/tests/base_test.py index 92dc35f7..b86498b1 100644 --- a/tests/base_test.py +++ b/tests/base_test.py @@ -427,8 +427,8 @@ def test_agg_matchtime(ea): call4 = ea.writeback_es.deprecated_search.call_args_list[10][1]['body'] assert 'alert_time' in call2['filter']['range'] - assert call3['query']['query_string']['query'] == 'aggregate_id:ABCD' - assert call4['query']['query_string']['query'] == 'aggregate_id:CDEF' + assert call3['query']['query_string']['query'] == 'aggregate_id:"ABCD"' + assert call4['query']['query_string']['query'] == 'aggregate_id:"CDEF"' assert ea.writeback_es.deprecated_search.call_args_list[9][1]['size'] == 1337 @@ -596,8 +596,8 @@ def test_agg_with_aggregation_key(ea): call4 = ea.writeback_es.deprecated_search.call_args_list[10][1]['body'] assert 'alert_time' in call2['filter']['range'] - assert call3['query']['query_string']['query'] == 'aggregate_id:ABCD' - assert call4['query']['query_string']['query'] == 'aggregate_id:CDEF' + assert call3['query']['query_string']['query'] == 'aggregate_id:"ABCD"' + assert call4['query']['query_string']['query'] == 'aggregate_id:"CDEF"' assert ea.writeback_es.deprecated_search.call_args_list[9][1]['size'] == 1337 diff --git a/tests/kibana_discover_test.py b/tests/kibana_discover_test.py index f06fe4e0..0e796e48 100644 --- a/tests/kibana_discover_test.py +++ b/tests/kibana_discover_test.py @@ -38,7 +38,7 @@ def test_generate_kibana_discover_url_with_kibana_5x_and_6x(kibana_version): assert url == expectedUrl -@pytest.mark.parametrize("kibana_version", ['7.0', '7.1', '7.2', '7.3']) +@pytest.mark.parametrize("kibana_version", ['7.0', '7.1', '7.2', '7.3', '7.4', '7.5', '7.6', '7.7', '7.8', '7.9', '7.10', '7.11', '7.12']) def test_generate_kibana_discover_url_with_kibana_7x(kibana_version): url = generate_kibana_discover_url( rule={ @@ -171,7 +171,7 @@ def test_generate_kibana_discover_url_with_from_timedelta(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', - 'kibana_discover_version': '7.3', + 'kibana_discover_version': '7.12', 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'kibana_discover_from_timedelta': timedelta(hours=1), 'timestamp_field': 'timestamp' @@ -204,7 +204,7 @@ def test_generate_kibana_discover_url_with_from_timedelta_and_timeframe(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', - 'kibana_discover_version': '7.3', + 'kibana_discover_version': '7.12', 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'kibana_discover_from_timedelta': timedelta(hours=1), 'timeframe': timedelta(minutes=20), @@ -238,7 +238,7 @@ def test_generate_kibana_discover_url_with_to_timedelta(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', - 'kibana_discover_version': '7.3', + 'kibana_discover_version': '7.12', 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'kibana_discover_to_timedelta': timedelta(hours=1), 'timestamp_field': 'timestamp' @@ -271,7 +271,7 @@ def test_generate_kibana_discover_url_with_to_timedelta_and_timeframe(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', - 'kibana_discover_version': '7.3', + 'kibana_discover_version': '7.12', 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'kibana_discover_to_timedelta': timedelta(hours=1), 'timeframe': timedelta(minutes=20), @@ -305,7 +305,7 @@ def test_generate_kibana_discover_url_with_timeframe(): url = generate_kibana_discover_url( rule={ 'kibana_discover_app_url': 'http://kibana:5601/#/discover', - 'kibana_discover_version': '7.3', + 'kibana_discover_version': '7.12', 'kibana_discover_index_pattern_id': 'd6cabfb6-aaef-44ea-89c5-600e9a76991a', 'timeframe': timedelta(minutes=20), 'timestamp_field': 'timestamp' diff --git a/tests/rules_test.py b/tests/rules_test.py index 1954b5d5..5f6421d8 100644 --- a/tests/rules_test.py +++ b/tests/rules_test.py @@ -1184,6 +1184,41 @@ def test_metric_aggregation_complex_query_key(): assert rule.matches[1]['sub_qk'] == 'sub_qk_val2' +def test_metric_aggregation_complex_query_key_bucket_interval(): + rules = {'buffer_time': datetime.timedelta(minutes=5), + 'timestamp_field': '@timestamp', + 'metric_agg_type': 'avg', + 'metric_agg_key': 'cpu_pct', + 'bucket_interval': {'minutes': 1}, + 'bucket_interval_timedelta': datetime.timedelta(minutes=1), + 'compound_query_key': ['qk', 'sub_qk'], + 'query_key': 'qk,sub_qk', + 'max_threshold': 0.8} + + # Quoted from https://elastalert.readthedocs.io/en/latest/ruletypes.html#metric-aggregation + # bucket_interval: If present this will divide the metric calculation window into bucket_interval sized segments. + # The metric value will be calculated and evaluated against the threshold(s) for each segment. + interval_aggs = {"interval_aggs": {"buckets": [ + {"metric_cpu_pct_avg": {"value": 0.91}, "key": "1617156690000"}, + {"metric_cpu_pct_avg": {"value": 0.89}, "key": "1617156750000"}, + {"metric_cpu_pct_avg": {"value": 0.78}, "key": "1617156810000"}, + {"metric_cpu_pct_avg": {"value": 0.85}, "key": "1617156870000"}, + {"metric_cpu_pct_avg": {"value": 0.86}, "key": "1617156930000"}, + ]}, "key": "sub_qk_val1"} + + query = {"bucket_aggs": {"buckets": [ + interval_aggs + ]}, "key": "qk_val"} + + rule = MetricAggregationRule(rules) + rule.check_matches(datetime.datetime.now(), 'qk_val', query) + assert len(rule.matches) == 4 + assert rule.matches[0]['qk'] == 'qk_val' + assert rule.matches[1]['qk'] == 'qk_val' + assert rule.matches[0]['sub_qk'] == 'sub_qk_val1' + assert rule.matches[1]['sub_qk'] == 'sub_qk_val1' + + def test_percentage_match(): rules = {'match_bucket_filter': {'term': 'term_val'}, 'buffer_time': datetime.timedelta(minutes=5), diff --git a/tox.ini b/tox.ini index 71099e17..47e62caa 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] project = elastalert -envlist = py36,docs +envlist = py39,docs [testenv] deps = -rrequirements-dev.txt @@ -25,6 +25,6 @@ norecursedirs = .* virtualenv_run docs build venv env [testenv:docs] deps = {[testenv]deps} - sphinx==1.6.6 + sphinx==3.5.4 changedir = docs commands = sphinx-build -b html -d build/doctrees -W source build/html