diff --git a/Dockerfile b/Dockerfile index 1e8bd955..f308b85f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,6 +22,6 @@ RUN apt-get update && \ ADD . /app WORKDIR /app RUN pip install -U setuptools -RUN pip install ."[api, datadog, dynatrace, prometheus, elasticsearch, splunk, pubsub, cloud_monitoring, cloud_service_monitoring, cloud_storage, bigquery, cloudevent, dev]" +RUN pip install ."[api, datadog, dynatrace, prometheus, elasticsearch, opensearch, splunk, pubsub, cloud_monitoring, cloud_service_monitoring, cloud_storage, bigquery, cloudevent, dev]" ENTRYPOINT [ "slo-generator" ] CMD ["-v"] diff --git a/Makefile b/Makefile index 34ac94d1..dd87f7f7 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ develop: install pre-commit install install: clean - $(PIP) install -e ."[api, datadog, prometheus, elasticsearch, splunk, pubsub, cloud_monitoring, bigquery, dev]" + $(PIP) install -e ."[api, datadog, prometheus, elasticsearch, opensearch, splunk, pubsub, cloud_monitoring, bigquery, dev]" uninstall: clean $(PIP) freeze --exclude-editable | xargs $(PIP) uninstall -y @@ -102,7 +102,7 @@ bandit: safety: safety check -integration: int_cm int_csm int_custom int_dd int_dt int_es int_prom int_sp +integration: int_cm int_csm int_custom int_dd int_dt int_es int_prom int_sp int_os int_cm: slo-generator compute -f samples/cloud_monitoring -c samples/config.yaml @@ -128,6 +128,9 @@ int_prom: int_sp: slo-generator compute -f samples/splunk -c samples/config.yaml +int_os: + slo-generator compute -f samples/opensearch -c samples/config.yaml + # Run API locally run_api: slo-generator api --target=run_compute --signature-type=http -c samples/config.yaml diff --git a/docs/providers/opensearch.md b/docs/providers/opensearch.md new file mode 100644 index 00000000..d220e2d1 --- /dev/null +++ b/docs/providers/opensearch.md @@ -0,0 +1,97 @@ +# OpenSearch + +## Backend + +Using the `OpenSearch` backend class, you can query any metrics available in OpenSearch to create an SLO. + +```yaml +backends: + open_search: + url: ${OPENSEARCH_URL} +``` + +Note that `url` can be either a single string (when connecting to a single node) or a list of strings (when connecting to multiple nodes): + +```yaml +backends: + open_search: + url: https://localhost:9200 +``` + +```yaml +backends: + open_search: + url: + - https://localhost:9200 + - https://localhost:9201 +``` + +The following method is available to compute SLOs with the `open_search` backend: + +* `good_bad_ratio` method is used to compute the ratio between two metrics: + +* **Good events**, i.e events we consider as 'good' from the user perspective. +* **Bad or valid events**, i.e events we consider either as 'bad' from the user perspective, or all events we consider as 'valid' for the computation of the SLO. + +This method is often used for availability SLOs, but can be used for other purposes as well (see examples). + +**SLO example:** + +```yaml + backend: open_search + method: good_bad_ratio + service_level_indicator: + index: my-index + date_field: '@timestamp' + query_good: + must: + range: + api-response-time: + lt: 350 + query_bad: + must: + range: + api-response-time: + gte: 350 +``` + +Additional info: + +* `date_field`: Has to be a valid OpenSearch `timestamp` type + +**→ [Full SLO config](../../samples/opensearch/slo_opensearch_latency_sli.yaml)** + +You can also use the `filter_bad` field which identifies bad events instead of the `filter_valid` field which identifies all valid events. + +The Lucene query entered in either the `query_good`, `query_bad` or `query_valid` fields will be combined (using the `bool` operator) into a larger query that filters results on the `window` specified in your Error Budget Policy steps. + +The full `OpenSearch` query body for the `query_bad` above will therefore look like: + +```json +{ + "query": { + "bool": { + "must": { + "range": { + "api-response-time": { + "gte": 350 + } + } + }, + "filter": { + "range": { + "@timestamp": { + "gte": "now-3600s/s", + "lt": "now/s" + } + } + } + } + }, + "track_total_hits": true +} +``` + +### Examples + +Complete SLO samples using the `OpenSearchBackend` backend are available in [samples/opensearch](../../samples/opensearch). Check them out! diff --git a/samples/.env.sample b/samples/.env.sample index 75475e4c..4d9ce95e 100644 --- a/samples/.env.sample +++ b/samples/.env.sample @@ -21,3 +21,4 @@ export DYNATRACE_API_TOKEN= export BIGQUERY_PROJECT_ID= export BIGQUERY_DATASET_ID= export BIGQUERY_TABLE_ID= +export OPENSEARCH_URL= diff --git a/samples/config.yaml b/samples/config.yaml index deabe06a..f31796b9 100644 --- a/samples/config.yaml +++ b/samples/config.yaml @@ -24,6 +24,8 @@ backends: port: ${SPLUNK_PORT} user: ${SPLUNK_USER} password: ${SPLUNK_PWD} + open_search: + url: ${OPENSEARCH_URL} exporters: cloudevent: diff --git a/samples/opensearch/slo_opensearch_availability_sli.yaml b/samples/opensearch/slo_opensearch_availability_sli.yaml new file mode 100644 index 00000000..83fe7556 --- /dev/null +++ b/samples/opensearch/slo_opensearch_availability_sli.yaml @@ -0,0 +1,25 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: open-search-availability + labels: + service_name: opensearch + feature_name: opensearch-availability + slo_name: availability +spec: + description: 99% of the element are valid + backend: open_search + method: good_bad_ratio + exporters: [] + service_level_indicator: + index: "gravitee-request-*" + date_field: '@timestamp' + query_good: + must: + term: + status: 200 + query_bad: + must_not: + term: + status: 200 + goal: 0.99 diff --git a/samples/opensearch/slo_opensearch_latency_sli.yaml b/samples/opensearch/slo_opensearch_latency_sli.yaml new file mode 100644 index 00000000..708df776 --- /dev/null +++ b/samples/opensearch/slo_opensearch_latency_sli.yaml @@ -0,0 +1,27 @@ +apiVersion: sre.google.com/v2 +kind: ServiceLevelObjective +metadata: + name: open-search-latency + labels: + service_name: opensearch + feature_name: opensearch-latency + slo_name: latency +spec: + description: 99% of the element are valid + backend: open_search + method: good_bad_ratio + exporters: [] + service_level_indicator: + index: "gravitee-request-*" + date_field: '@timestamp' + query_good: + must: + range: + api-response-time: + lt: 350 + query_bad: + must: + range: + api-response-time: + gte: 350 + goal: 0.99 diff --git a/setup.cfg b/setup.cfg index c2bbf6ff..8ab9fc25 100644 --- a/setup.cfg +++ b/setup.cfg @@ -91,6 +91,8 @@ cloud_storage = google-cloud-storage elasticsearch = elasticsearch +opensearch = + opensearch-py splunk = splunk-sdk pubsub = diff --git a/slo_generator/backends/open_search.py b/slo_generator/backends/open_search.py new file mode 100644 index 00000000..fe2c662a --- /dev/null +++ b/slo_generator/backends/open_search.py @@ -0,0 +1,144 @@ +""" +`open_search.py` +Opensearch backend implementation. +""" + +import copy +import logging + +from opensearchpy import OpenSearch + +from slo_generator.constants import NO_DATA + +LOGGER = logging.getLogger(__name__) + + +# pylint: disable=duplicate-code +class OpenSearchBackend: + """Backend for querying metrics from OpenSearch. + + Args: + client(opensearch.OpenSearch): Existing OS client. + os_config(dict): OS client configuration. + """ + + def __init__(self, client=None, **os_config): + self.client = client + if self.client is None: + conf = copy.deepcopy(os_config) + url = conf.pop("url", None) + basic_auth = conf.pop("basic_auth", None) + api_key = conf.pop("api_key", None) + if url: + conf["hosts"] = url + if basic_auth: + conf["basic_auth"] = (basic_auth["username"], basic_auth["password"]) + if api_key: + conf["api_key"] = (api_key["id"], api_key["value"]) + + self.client = OpenSearch(**conf) + + # pylint: disable=unused-argument + def good_bad_ratio(self, timestamp, window, slo_config): + """Query two timeseries, one containing 'good' events, one containing + 'bad' events. + + Args: + timestamp(int): UNIX timestamp. + window(int): Window size (in seconds). + slo_config(dict): SLO configuration. + spec: + method: "good_bad_ratio" + service_level_indicator: + query_good(str): the search query to look for good events + query_bad(str): the search query to look for ba events + query_valid(str): the search query to look for valid events + + Returns: + tuple: good_event_count, bad_event_count + """ + measurement = slo_config["spec"]["service_level_indicator"] + index = measurement["index"] + query_good = measurement["query_good"] + query_bad = measurement.get("query_bad") + query_valid = measurement.get("query_valid") + date_field = measurement.get("date_field") + + good = OS.build_query(query_good, window, date_field) + bad = OS.build_query(query_bad, window, date_field) + valid = OS.build_query(query_valid, window, date_field) + + good_events_count = OS.count(self.query(index, good)) + + if query_bad is not None: + bad_events_count = OS.count(self.query(index, bad)) + elif query_valid is not None: + bad_events_count = OS.count(self.query(index, valid)) - good_events_count + else: + raise ValueError("`filter_bad` or `filter_valid` is required.") + + return good_events_count, bad_events_count + + def query(self, index, body): + """Query Opensearch server. + + Args: + index(str): Index to query. + body(dict): Query body. + + Returns: + dict: Response. + """ + return self.client.search(index=index, body=body) + + @staticmethod + def count(response): + """Count event in opensearch response. + + Args: + response(dict): Opensearch query response. + + Returns: + int: Event count. + """ + try: + return response["hits"]["total"]["value"] + except KeyError as exception: + LOGGER.warning("Couldn't find any values in timeseries response") + LOGGER.debug(exception, exc_info=True) + return NO_DATA + + @staticmethod + def build_query(query, window, date_field): + """Build Opensearch query. + + Add window to existing query. + Replace window for different error budget steps on-the-fly. + + Args: + query(dict): Existing query body. + window(int): Window in seconds. + date_field(str): Field to filter time on + + Returns: + dict: Query body with range clause added. + """ + if query is None: + return None + body = {"query": {"bool": query}, "track_total_hits": True} + range_query = { + f"{date_field}": { + "gte": f"now-{window}s/s", + "lt": "now/s", + } + } + + if "filter" in body["query"]["bool"]: + body["query"]["bool"]["filter"]["range"] = range_query + else: + body["query"]["bool"]["filter"] = {"range": range_query} + + return body + + +OS = OpenSearchBackend diff --git a/tests/unit/backends/test_opensearch.py b/tests/unit/backends/test_opensearch.py new file mode 100644 index 00000000..4e5b3640 --- /dev/null +++ b/tests/unit/backends/test_opensearch.py @@ -0,0 +1,122 @@ +import unittest + +from slo_generator.backends.open_search import OpenSearchBackend + + +class TestOpenSearchBackend(unittest.TestCase): + assert 1 == 1 + + def test_build_query_with_empty_query(self): + assert OpenSearchBackend.build_query(None, 3600, "date") is None + + def test_build_query_with_simple_query_and_no_filter(self): + query: dict = { + "must": { + "term": { + "status": "200", + }, + }, + } + + enriched_query = { + "query": { + "bool": { + "must": { + "term": { + "status": "200", + }, + }, + "filter": { + "range": { + "date": { + "gte": "now-3600s/s", + "lt": "now/s", + }, + }, + }, + }, + }, + "track_total_hits": True, + } + + assert OpenSearchBackend.build_query(query, 3600, "date") == enriched_query + + def test_build_query_with_simple_query_and_simple_filter(self): + query: dict = { + "must": { + "term": { + "status": "200", + }, + }, + "filter": { + "term": { + "type": "get", + }, + }, + } + + enriched_query = { + "query": { + "bool": { + "must": { + "term": { + "status": "200", + }, + }, + "filter": { + "term": { + "type": "get", + }, + "range": { + "date": { + "gte": "now-3600s/s", + "lt": "now/s", + }, + }, + }, + }, + }, + "track_total_hits": True, + } + + assert OpenSearchBackend.build_query(query, 3600, "date") == enriched_query + + def test_build_query_with_simple_query_and_existing_filter_with_range(self): + query: dict = { + "must": { + "term": { + "status": "200", + }, + }, + "filter": { + "range": { + "date": { + "gte": "2023-08-28", + "lt": "2023-08-29", + }, + }, + }, + } + + enriched_query: dict = { + "query": { + "bool": { + "must": { + "term": { + "status": "200", + }, + }, + "filter": { + "range": { + "date": { + "gte": "now-3600s/s", + "lt": "now/s", + }, + }, + }, + }, + }, + "track_total_hits": True, + } + + assert OpenSearchBackend.build_query(query, 3600, "date") == enriched_query diff --git a/tests/unit/fixtures/os_generic_response.json b/tests/unit/fixtures/os_generic_response.json new file mode 100644 index 00000000..a56e1eeb --- /dev/null +++ b/tests/unit/fixtures/os_generic_response.json @@ -0,0 +1,7 @@ +{ + "hits": { + "total": { + "value": 120 + } + } +} diff --git a/tests/unit/test_compute.py b/tests/unit/test_compute.py index 7a305f70..0f36dc6d 100644 --- a/tests/unit/test_compute.py +++ b/tests/unit/test_compute.py @@ -19,6 +19,7 @@ from elasticsearch import Elasticsearch from google.auth._default import _CLOUD_SDK_CREDENTIALS_WARNING from mock import MagicMock, patch +from opensearchpy import OpenSearch from prometheus_http_client import Prometheus from splunklib import client as Splunk from splunklib.client import Jobs @@ -40,6 +41,7 @@ mock_dt, mock_dt_errors, mock_es, + mock_os_client, mock_prom, mock_sd, mock_splunk_oneshot, @@ -57,6 +59,7 @@ SLO_CONFIGS_DD = load_slo_samples("datadog", CTX) SLO_CONFIGS_DT = load_slo_samples("dynatrace", CTX) SLO_CONFIGS_SPLUNK = load_slo_samples("splunk", CTX) +SLO_CONFIGS_OS = load_slo_samples("opensearch", CTX) SLO_REPORT = load_fixture("slo_report_v2.json") SLO_REPORT_V1 = load_fixture("slo_report_v1.json") EXPORTERS = load_fixture("exporters.yaml", CTX) @@ -145,6 +148,12 @@ def test_compute_dynatrace(self, mock): with self.subTest(config=config): compute(config, CONFIG) + @patch.object(OpenSearch, "search", mock_os_client) + def test_compute_opensearch(self): + for config in SLO_CONFIGS_OS: + with self.subTest(config=config): + compute(config, CONFIG) + @patch(PUBSUB_MOCKS[0]) @patch(PUBSUB_MOCKS[1]) @patch(PUBSUB_MOCKS[2]) diff --git a/tests/unit/test_stubs.py b/tests/unit/test_stubs.py index 8e4493f6..3bfa4918 100644 --- a/tests/unit/test_stubs.py +++ b/tests/unit/test_stubs.py @@ -60,6 +60,7 @@ "SPLUNK_PORT": "8089", "SPLUNK_USER": "fake", "SPLUNK_PWD": "fake", + "OPENSEARCH_URL": "http://localhost:9201", } @@ -269,6 +270,11 @@ def mock_splunk_oneshot(search): return load_fixture("splunk_generic_response.json") +def mock_os_client(self, index, body): + """Mock Opensearch query result""" + return load_fixture("os_generic_response.json") + + class dotdict(dict): """dot.notation access to dictionary attributes"""