Skip to content

Commit

Permalink
Add decorator for tracking execution statistics of check methods (#10809
Browse files Browse the repository at this point in the history
)

* dbm add tracked method decorator

Adds a new decorator `dbm_tracked_method` to be used for tracking the performance of methods in a standardized way. This will significantly reduce measurement boilerplate across the DBM integrations and make the measurements less error prone. It's important to track the performance and error rates of various parts of the DBM integrations in order to better troubleshoot customer issues.

We already have a bunch of internal debug metrics tracking the execution time and error rate of various operations in DBM. For example, `dd.postgres.collect_statement_samples.time` tracks the execution time of the `_collect_statement_samples` method for postgres. All of these manual measurements will be updated to use this new measurement decorator instead.

* move to new tracking module

* revert

* add option to disable

* standardize naming

* logging fix

* style

* fix py2
  • Loading branch information
djova authored Dec 8, 2021
1 parent 34f8dee commit 4395d55
Show file tree
Hide file tree
Showing 2 changed files with 188 additions and 0 deletions.
91 changes: 91 additions & 0 deletions datadog_checks_base/datadog_checks/base/utils/tracking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
# (C) Datadog, Inc. 2021-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)

import os

from datadog_checks.base.utils.time import get_timestamp

required_attrs = [
'name',
'log',
'count',
'gauge',
'histogram',
]


def tracked_method(agent_check_getter=None, track_result_length=False):
"""
Decorates an agent check method to provide debug metrics and logging for troubleshooting.
Tracks execution time, errors, and result length.
The function being decorated must be a method on a class that receives the self pointer. This cannot decorate
plain functions.
If the check has a `debug_stats_kwargs` function then that function is called to get a set of kwargs to pass to
the statsd methods (i.e. histogram, count, gauge, etc). This is useful when specific tags need to be added to
these debug metrics in a standardized way.
Set the environment variable DD_DISABLE_TRACKED_METHOD=true to disable tracking.
All metrics produced include the check name in the prefix (i.e. "dd.sqlserver." if the check's name is "sqlserver")
:param agent_check_getter: a function that gets the agent check from the class. The function must receive only a
single parameter, `self`, and it must return a reference to the agent check. If the function is not provided then
`self` must refer to the agent check.
:param track_result_length: if true, the length of the result is tracked
:return: a decorator
"""

def decorator(function):
def wrapper(self, *args, **kwargs):
if os.getenv('DD_DISABLE_TRACKED_METHOD') == "true":
return function(self, *args, **kwargs)

start_time = get_timestamp()

try:
check = agent_check_getter(self) if agent_check_getter else self
except Exception:
print("[{}] invalid tracked_method. failed to get check reference.".format(function.__name__))
return function(self, *args, **kwargs)

for attr in required_attrs:
if not hasattr(check, attr):
print(
"[{}] invalid check reference. Missing required attribute {}.".format(function.__name__, attr)
)
return function(self, *args, **kwargs)

check_name = check.name

stats_kwargs = {}
if hasattr(check, 'debug_stats_kwargs'):
stats_kwargs = dict(check.debug_stats_kwargs())

stats_kwargs['tags'] = stats_kwargs.get('tags', []) + ["operation:{}".format(function.__name__)]

try:
result = function(self, *args, **kwargs)

elapsed_ms = (get_timestamp() - start_time) * 1000
check.histogram("dd.{}.operation.time".format(check_name), elapsed_ms, **stats_kwargs)

check.log.debug("[%s.%s] operation completed in %s ms", check_name, function.__name__, elapsed_ms)

if track_result_length and result is not None:
check.log.debug("[%s.%s] received result length %s", check_name, function.__name__, len(result))
check.gauge("dd.{}.operation.result.length".format(check_name), len(result), **stats_kwargs)

return result
except Exception as e:
check.log.exception("operation %s error", function.__name__)
stats_kwargs['tags'] += ["error:{}".format(type(e))]
check.count("dd.{}.operation.error".format(check_name), 1, **stats_kwargs)
raise

return wrapper

return decorator
97 changes: 97 additions & 0 deletions datadog_checks_base/tests/base/utils/test_tracking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# -*- coding: utf-8 -*-
# (C) Datadog, Inc. 2021-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)

import os

import pytest

from datadog_checks.base import AgentCheck
from datadog_checks.base.utils.tracking import tracked_method


def agent_check_getter(self):
return self.check


class HelloCheck(AgentCheck):
def __init__(self, debug_stats_kwargs):
self._debug_stats_kwargs = debug_stats_kwargs
super(HelloCheck, self).__init__(name="hello")

def debug_stats_kwargs(self):
return self._debug_stats_kwargs


EXPECTED_RESULT = 5


class MyException(Exception):
pass


class TestJob:
def __init__(self, check):
self.check = check

def run_job(self):
result = self.do_work()
self.do_work_return_list()
try:
self.test_tracked_exception()
except Exception:
pass

return result

@tracked_method(agent_check_getter=agent_check_getter)
def do_work(self):
return EXPECTED_RESULT

@tracked_method(agent_check_getter=agent_check_getter, track_result_length=True)
def do_work_return_list(self):
return list(range(5))

@tracked_method(agent_check_getter=agent_check_getter)
def test_tracked_exception(self):
raise MyException("oops")


@pytest.mark.parametrize(
"debug_stats_kwargs",
[
{},
{
"tags": ["hey:there"],
"hostname": "tiberius",
},
],
)
@pytest.mark.parametrize("disable_tracking", [True, False])
def test_tracked_method(aggregator, debug_stats_kwargs, disable_tracking):
os.environ['DD_DISABLE_TRACKED_METHOD'] = str(disable_tracking).lower()
check = HelloCheck(debug_stats_kwargs) if debug_stats_kwargs else AgentCheck(name="hello")
job = TestJob(check)
result = job.run_job()
assert result == EXPECTED_RESULT

tags = debug_stats_kwargs.pop('tags', [])
hostname = debug_stats_kwargs.pop('hostname', None)

if disable_tracking:
for m in ["dd.hello.operation.time", "dd.hello.operation.result.length", "dd.hello.operation.error"]:
assert not aggregator.metrics(m), "when tracking is disabled these metrics should not be recorded"
else:
aggregator.assert_metric("dd.hello.operation.time", hostname=hostname, tags=tags + ["operation:do_work"])
aggregator.assert_metric(
"dd.hello.operation.time", hostname=hostname, tags=tags + ["operation:do_work_return_list"]
)
aggregator.assert_metric(
"dd.hello.operation.result.length", hostname=hostname, tags=tags + ["operation:do_work_return_list"]
)
aggregator.assert_metric(
"dd.hello.operation.error",
hostname=hostname,
tags=tags + ["operation:test_tracked_exception", "error:<class 'test_tracking.MyException'>"],
)

0 comments on commit 4395d55

Please sign in to comment.