Skip to content

Commit

Permalink
improve sqlserver debug instrumentation
Browse files Browse the repository at this point in the history
Update all internal method invocation debug instrumentation to use new decorator added in #10809
  • Loading branch information
djova committed Dec 8, 2021
1 parent c7abbe2 commit 221740e
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 62 deletions.
19 changes: 6 additions & 13 deletions sqlserver/datadog_checks/sqlserver/activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from datadog_checks.base import is_affirmative
from datadog_checks.base.utils.common import to_native_string
from datadog_checks.base.utils.db.sql import compute_sql_signature
from datadog_checks.base.utils.db.utils import DBMAsyncJob, default_json_event_encoding
from datadog_checks.base.utils.db.utils import DBMAsyncJob, dbm_tracked_method, default_json_event_encoding
from datadog_checks.base.utils.serialization import json

try:
Expand Down Expand Up @@ -117,6 +117,7 @@ def _close_db_conn(self):
def run_job(self):
self.collect_activity()

@dbm_tracked_method("sqlserver", parent_check_attr="check")
def _get_active_connections(self, cursor):
self.log.debug("collecting sql server current connections")
self.log.debug("Running query [%s]", CONNECTIONS_QUERY)
Expand All @@ -127,6 +128,7 @@ def _get_active_connections(self, cursor):
self.log.debug("loaded sql server current connections len(rows)=%s", len(rows))
return rows

@dbm_tracked_method("sqlserver", parent_check_attr="check", track_result_length=True)
def _get_activity(self, cursor):
self.log.debug("collecting sql server activity")
self.log.debug("Running query [%s]", ACTIVITY_QUERY)
Expand Down Expand Up @@ -187,12 +189,13 @@ def _create_activity_event(self, active_sessions, active_connections):
def _truncate_activity_rows(self, rows, max_bytes):
pass

@dbm_tracked_method("sqlserver", parent_check_attr="check")
def collect_activity(self):
"""
Collects all current activity for the SQLServer intance.
:return:
"""
start_time = time.time()

# re-use the check's conn module, but set extra_key=dbm-activity- to ensure we get our own
# raw connection. adodbapi and pyodbc modules are thread safe, but connections are not.
with self.check.connection.open_managed_default_connection(key_prefix=self._conn_key_prefix):
Expand All @@ -204,18 +207,8 @@ def collect_activity(self):
payload = json.dumps(event, default=default_json_event_encoding)
self._check.database_monitoring_query_activity(payload)

elapsed_ms = (time.time() - start_time) * 1000
self.check.histogram(
"dd.sqlserver.activity.collect_activity.time",
elapsed_ms,
tags=self.check.debug_tags(),
hostname=self.check.resolved_hostname,
raw=True,
)
self.check.histogram(
"dd.sqlserver.activity.collect_activity.payload_size",
len(payload),
tags=self.check.debug_tags(),
hostname=self.check.resolved_hostname,
raw=True,
**self.check.debug_stats_kwargs()
)
9 changes: 8 additions & 1 deletion sqlserver/datadog_checks/sqlserver/sqlserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

import six
from cachetools import TTLCache

from datadog_checks.base import AgentCheck, ConfigurationError
from datadog_checks.base.config import is_affirmative
from datadog_checks.base.utils.db import QueryManager
Expand Down Expand Up @@ -177,6 +176,14 @@ def load_static_information(self):
def debug_tags(self):
return self.tags + ['agent_hostname:{}'.format(self.agent_hostname)]

def debug_stats_kwargs(self, tags=None):
tags = tags if tags else []
return {
"tags": self.debug_tags() + tags,
"hostname": self.resolved_hostname,
"raw": True,
}

@property
def agent_hostname(self):
# type: () -> str
Expand Down
87 changes: 39 additions & 48 deletions sqlserver/datadog_checks/sqlserver/statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from datadog_checks.base.utils.common import ensure_unicode, to_native_string
from datadog_checks.base.utils.db.sql import compute_sql_signature
from datadog_checks.base.utils.db.statement_metrics import StatementMetrics
from datadog_checks.base.utils.db.utils import DBMAsyncJob, RateLimitingTTLCache, default_json_event_encoding
from datadog_checks.base.utils.db.utils import DBMAsyncJob, dbm_tracked_method, RateLimitingTTLCache, \
default_json_event_encoding
from datadog_checks.base.utils.serialization import json

try:
Expand Down Expand Up @@ -174,6 +175,7 @@ def _get_statement_metrics_query_cached(self, cursor):
)
return self._statement_metrics_query

@dbm_tracked_method("sqlserver", parent_check_attr="check", track_result_length=True)
def _load_raw_query_metrics_rows(self, cursor):
self.log.debug("collecting sql server statement metrics")
statement_metrics_query = self._get_statement_metrics_query_cached(cursor)
Expand All @@ -193,6 +195,11 @@ def _normalize_queries(self, rows):
except Exception as e:
# obfuscation errors are relatively common so only log them during debugging
self.log.debug("Failed to obfuscate query: %s", e)
self.check.count(
"dd.sqlserver.statements.error",
1,
**self.check.debug_stats_kwargs(tags=["error:obfuscate-query-{}".format(type(e))]),
)
continue
row['text'] = obfuscated_statement
row['query_signature'] = compute_sql_signature(obfuscated_statement)
Expand Down Expand Up @@ -228,72 +235,49 @@ def _to_metrics_payload(self, rows):
'ddagentversion': datadog_agent.get_version(),
}

@dbm_tracked_method("sqlserver", parent_check_attr="check")
def collect_statement_metrics_and_plans(self):
"""
Collects statement metrics and plans.
:return:
"""
start_time = time.time()
plans_submitted = 0
try:
# re-use the check's conn module, but set extra_key=dbm- to ensure we get our own
# raw connection. adodbapi and pyodbc modules are thread safe, but connections are not.
with self.check.connection.open_managed_default_connection(key_prefix=self._conn_key_prefix):
with self.check.connection.get_managed_cursor(key_prefix=self._conn_key_prefix) as cursor:
rows = self._collect_metrics_rows(cursor)
if not rows:
return
for event in self._rows_to_fqt_events(rows):
self.check.database_monitoring_query_sample(
json.dumps(event, default=default_json_event_encoding)
)
payload = self._to_metrics_payload(rows)
self.check.database_monitoring_query_metrics(
json.dumps(payload, default=default_json_event_encoding)

# re-use the check's conn module, but set extra_key=dbm- to ensure we get our own
# raw connection. adodbapi and pyodbc modules are thread safe, but connections are not.
with self.check.connection.open_managed_default_connection(key_prefix=self._conn_key_prefix):
with self.check.connection.get_managed_cursor(key_prefix=self._conn_key_prefix) as cursor:
rows = self._collect_metrics_rows(cursor)
if not rows:
return
for event in self._rows_to_fqt_events(rows):
self.check.database_monitoring_query_sample(
json.dumps(event, default=default_json_event_encoding)
)
for event in self._collect_plans(rows, cursor):
self.check.database_monitoring_query_sample(
json.dumps(event, default=default_json_event_encoding)
)
plans_submitted += 1
except Exception:
self.log.exception('Unable to collect statement metrics due to an error')
self.check.count(
"dd.sqlserver.statements.error",
1,
tags=self.check.debug_tags(),
hostname=self.check.resolved_hostname,
)
return []
payload = self._to_metrics_payload(rows)
self.check.database_monitoring_query_metrics(
json.dumps(payload, default=default_json_event_encoding)
)
for event in self._collect_plans(rows, cursor):
self.check.database_monitoring_query_sample(
json.dumps(event, default=default_json_event_encoding)
)
plans_submitted += 1

elapsed_ms = (time.time() - start_time) * 1000
self.check.histogram(
"dd.sqlserver.statements.collect_statement_metrics_and_plans.time",
elapsed_ms,
tags=self.check.debug_tags(),
hostname=self.check.resolved_hostname,
raw=True,
)
self.check.count(
"dd.sqlserver.statements.plans_submitted.count",
plans_submitted,
tags=self.check.tags + self.check.debug_tags(),
hostname=self.check.resolved_hostname,
raw=True,
**self.check.debug_stats_kwargs(),
)
self.check.gauge(
"dd.sqlserver.statements.seen_plans_cache.len",
len(self._seen_plans_ratelimiter),
tags=self.check.debug_tags(),
hostname=self.check.resolved_hostname,
raw=True,
**self.check.debug_stats_kwargs(),
)
self.check.gauge(
"dd.sqlserver.statements.fqt_cache.len",
len(self._full_statement_text_cache),
tags=self.check.debug_tags(),
hostname=self.check.resolved_hostname,
raw=True,
**self.check.debug_stats_kwargs(),
)

def _rows_to_fqt_events(self, rows):
Expand Down Expand Up @@ -325,6 +309,7 @@ def _rows_to_fqt_events(self, rows):
def run_job(self):
self.collect_statement_metrics_and_plans()

@dbm_tracked_method("sqlserver", parent_check_attr="check")
def _load_plan(self, query_hash, query_plan_hash, cursor):
self.log.debug("collecting plan. query_hash=%s query_plan_hash=%s", query_hash, query_plan_hash)
self.log.debug("Running query [%s] %s", PLAN_LOOKUP_QUERY, (query_hash, query_plan_hash))
Expand All @@ -335,6 +320,7 @@ def _load_plan(self, query_hash, query_plan_hash, cursor):
return None
return result[0][0]

@dbm_tracked_method("sqlserver", parent_check_attr="check")
def _collect_plans(self, rows, cursor):
for row in rows:
plan_key = (row['query_signature'], row['query_hash'], row['query_plan_hash'])
Expand All @@ -353,6 +339,11 @@ def _collect_plans(self, rows, cursor):
e,
)
collection_errors = [{'code': "obfuscate_xml_plan_error", 'message': str(e)}]
self.check.count(
"dd.sqlserver.statements.error",
1,
**self.check.debug_stats_kwargs(tags=["error:obfuscate-xml-plan-{}".format(type(e))]),
)

tags = self.check.tags + ["db:{}".format(row['database_name'])]
yield {
Expand Down

0 comments on commit 221740e

Please sign in to comment.