-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Collect postgres statement samples & execution plans
**What does this PR do?** Adds a new feature to "Deep Database Monitoring", enabling collection of statement samples and execution plans. Follow-up to #7852. **How does it work?*** If enabled, a python thread is launched during a regular check run: * collects statement samples at the configured rate limit (default 1 collection per second) * maintains its own `psycopg2` connection to avoid clashing transactions/state with the main thread connection * shuts down if it detects that the main check has not run for two collection intervals * collects execution plans through a postgres function that the user must install into each database being monitored (if we wanted the agent to collect execution plans directly by running `EXPLAIN` then it would need full write permission to all tables) During one "collection" we do the following: 1. read out all new statements from `pg_stat_activity` 1. try to collect a execution plan for each statement 1. submit events directly to the new database monitoring event intake **Rate limiting** There are several different rate limits to keep load on the database to a minimum and to avoid reingesting duplicate events: * `collections_per_second`: limits how often collections are done (each collection is a query to `pg_stat_activity`) * `explained_statements_cache`: limits how often we attempt to collect an execution plan for a given normalized query * `seen_samples_cache`: limits how often we ingest statement samples for the same normalized query and execution plan **Configuration** We're adding a new `statement_samples` postgres instance config section. Here is the full set of available configuration showing the default settings: ```yaml statement_samples: enabled: false collections_per_second: 1 explain_function: 'datadog.explain_statement' explained_statements_cache_maxsize: 5000 explained_statements_per_hour_per_query: 60 seen_samples_cache_maxsize: 10000 samples_per_hour_per_query: 15 ```
- Loading branch information
Showing
14 changed files
with
786 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
134 changes: 134 additions & 0 deletions
134
datadog_checks_base/datadog_checks/base/utils/db/statement_samples.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
import datetime | ||
import decimal | ||
import itertools | ||
import json | ||
import logging | ||
|
||
import requests | ||
from requests.adapters import HTTPAdapter, Retry | ||
|
||
try: | ||
import datadog_agent | ||
|
||
using_stub_datadog_agent = False | ||
except ImportError: | ||
from ....stubs import datadog_agent | ||
|
||
using_stub_datadog_agent = True | ||
|
||
logger = logging.getLogger(__file__) | ||
|
||
|
||
class EventEncoder(json.JSONEncoder): | ||
def default(self, o): | ||
if isinstance(o, decimal.Decimal): | ||
return float(o) | ||
if isinstance(o, (datetime.date, datetime.datetime)): | ||
return o.isoformat() | ||
return super(EventEncoder, self).default(o) | ||
|
||
|
||
def _chunks(items, n): | ||
it = iter(items) | ||
while True: | ||
chunk = tuple(itertools.islice(it, n)) | ||
if not chunk: | ||
return | ||
yield chunk | ||
|
||
|
||
def _new_api_session(api_key): | ||
http = requests.Session() | ||
http.mount( | ||
"https://", HTTPAdapter(max_retries=Retry(connect=2, read=2, redirect=2, status=2, method_whitelist=['POST'])) | ||
) | ||
http.headers.update({'DD-API-KEY': api_key}) | ||
return http | ||
|
||
|
||
def _event_intake_url(host): | ||
if host.endswith("."): | ||
host = host[:-1] | ||
if not host.startswith("https://"): | ||
host = "https://" + host | ||
return host + "/v1/input" | ||
|
||
|
||
default_dbm_url = "dbquery-http-intake.logs.datadoghq.com" | ||
|
||
|
||
def _load_event_endpoints_from_config(config_prefix, default_url): | ||
""" | ||
Returns a list of requests sessions and their endpoint urls [(http, url), ...] | ||
Requests sessions are initialized the first time this is called and reused thereafter | ||
:return: list of (http, url) | ||
:param config_prefix: | ||
:param default_url: | ||
:return: | ||
""" | ||
url = _event_intake_url(datadog_agent.get_config('{}.dd_url'.format(config_prefix)) or default_url) | ||
endpoints = [(_new_api_session(datadog_agent.get_config('api_key')), url)] | ||
logger.debug("initializing event endpoints from %s. url=%s", config_prefix, url) | ||
|
||
for additional_endpoint in datadog_agent.get_config('{}.additional_endpoints'.format(config_prefix)) or []: | ||
api_key, host = additional_endpoint.get('api_key'), additional_endpoint.get('host') | ||
missing_keys = [k for k, v in [('api_key', api_key), ('host', host)] if not v] | ||
if missing_keys: | ||
logger.warning( | ||
"invalid event endpoint found in %s.additional_endpoints. missing required keys %s", | ||
config_prefix, | ||
', '.join(missing_keys), | ||
) | ||
continue | ||
url = _event_intake_url(host) | ||
endpoints.append((_new_api_session(api_key), url)) | ||
logger.debug("initializing additional event endpoint from %s. url=%s", config_prefix, url) | ||
|
||
return endpoints | ||
|
||
|
||
class StatementSamplesClient: | ||
def __init__(self): | ||
self._endpoints = _load_event_endpoints_from_config("database_monitoring", default_dbm_url) | ||
|
||
def submit_events(self, events): | ||
""" | ||
Submit the statement sample events to the event intake | ||
:return: submitted_count, failed_count | ||
""" | ||
submitted_count = 0 | ||
failed_count = 0 | ||
for chunk in _chunks(events, 100): | ||
for http, url in self._endpoints: | ||
try: | ||
r = http.request( | ||
'post', | ||
url, | ||
data=json.dumps(chunk, cls=EventEncoder), | ||
timeout=5, | ||
headers={'Content-Type': 'application/json'}, | ||
) | ||
r.raise_for_status() | ||
logger.debug("Submitted %s statement samples to %s", len(chunk), url) | ||
submitted_count += len(chunk) | ||
except requests.HTTPError as e: | ||
logger.warning("Failed to submit statement samples to %s: %s", url, e) | ||
failed_count += len(chunk) | ||
except Exception: | ||
logger.exception("Failed to submit statement samples to %s", url) | ||
failed_count += len(chunk) | ||
return submitted_count, failed_count | ||
|
||
|
||
class StubStatementSamplesClient: | ||
def __init__(self): | ||
self._events = [] | ||
|
||
def submit_events(self, events): | ||
events = list(events) | ||
self._events.extend(events) | ||
return len(events), 0 | ||
|
||
|
||
statement_samples_client = StubStatementSamplesClient() if using_stub_datadog_agent else StatementSamplesClient() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.