Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add decorator for tracking execution statistics of check methods #10809

Merged
merged 8 commits into from
Dec 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions datadog_checks_base/datadog_checks/base/utils/tracking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
# (C) Datadog, Inc. 2021-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)

import os

from datadog_checks.base.utils.time import get_timestamp

required_attrs = [
'name',
'log',
'count',
'gauge',
'histogram',
]


def tracked_method(agent_check_getter=None, track_result_length=False):
"""
Decorates an agent check method to provide debug metrics and logging for troubleshooting.
Tracks execution time, errors, and result length.

The function being decorated must be a method on a class that receives the self pointer. This cannot decorate
plain functions.

If the check has a `debug_stats_kwargs` function then that function is called to get a set of kwargs to pass to
the statsd methods (i.e. histogram, count, gauge, etc). This is useful when specific tags need to be added to
these debug metrics in a standardized way.

Set the environment variable DD_DISABLE_TRACKED_METHOD=true to disable tracking.

All metrics produced include the check name in the prefix (i.e. "dd.sqlserver." if the check's name is "sqlserver")

:param agent_check_getter: a function that gets the agent check from the class. The function must receive only a
single parameter, `self`, and it must return a reference to the agent check. If the function is not provided then
`self` must refer to the agent check.
:param track_result_length: if true, the length of the result is tracked
:return: a decorator
"""

def decorator(function):
def wrapper(self, *args, **kwargs):
if os.getenv('DD_DISABLE_TRACKED_METHOD') == "true":
return function(self, *args, **kwargs)

start_time = get_timestamp()

try:
check = agent_check_getter(self) if agent_check_getter else self
except Exception:
print("[{}] invalid tracked_method. failed to get check reference.".format(function.__name__))
return function(self, *args, **kwargs)

for attr in required_attrs:
if not hasattr(check, attr):
print(
"[{}] invalid check reference. Missing required attribute {}.".format(function.__name__, attr)
)
return function(self, *args, **kwargs)

check_name = check.name

stats_kwargs = {}
if hasattr(check, 'debug_stats_kwargs'):
stats_kwargs = dict(check.debug_stats_kwargs())

stats_kwargs['tags'] = stats_kwargs.get('tags', []) + ["operation:{}".format(function.__name__)]

try:
result = function(self, *args, **kwargs)

elapsed_ms = (get_timestamp() - start_time) * 1000
check.histogram("dd.{}.operation.time".format(check_name), elapsed_ms, **stats_kwargs)

check.log.debug("[%s.%s] operation completed in %s ms", check_name, function.__name__, elapsed_ms)

if track_result_length and result is not None:
check.log.debug("[%s.%s] received result length %s", check_name, function.__name__, len(result))
check.gauge("dd.{}.operation.result.length".format(check_name), len(result), **stats_kwargs)

return result
except Exception as e:
check.log.exception("operation %s error", function.__name__)
stats_kwargs['tags'] += ["error:{}".format(type(e))]
check.count("dd.{}.operation.error".format(check_name), 1, **stats_kwargs)
raise

return wrapper

return decorator
97 changes: 97 additions & 0 deletions datadog_checks_base/tests/base/utils/test_tracking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# -*- coding: utf-8 -*-
# (C) Datadog, Inc. 2021-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)

import os

import pytest

from datadog_checks.base import AgentCheck
from datadog_checks.base.utils.tracking import tracked_method


def agent_check_getter(self):
return self.check


class HelloCheck(AgentCheck):
def __init__(self, debug_stats_kwargs):
self._debug_stats_kwargs = debug_stats_kwargs
super(HelloCheck, self).__init__(name="hello")

def debug_stats_kwargs(self):
return self._debug_stats_kwargs


EXPECTED_RESULT = 5


class MyException(Exception):
pass


class TestJob:
def __init__(self, check):
self.check = check

def run_job(self):
result = self.do_work()
self.do_work_return_list()
try:
self.test_tracked_exception()
except Exception:
pass

return result

@tracked_method(agent_check_getter=agent_check_getter)
def do_work(self):
return EXPECTED_RESULT

@tracked_method(agent_check_getter=agent_check_getter, track_result_length=True)
def do_work_return_list(self):
return list(range(5))

@tracked_method(agent_check_getter=agent_check_getter)
def test_tracked_exception(self):
raise MyException("oops")


@pytest.mark.parametrize(
"debug_stats_kwargs",
[
{},
{
"tags": ["hey:there"],
"hostname": "tiberius",
},
],
)
@pytest.mark.parametrize("disable_tracking", [True, False])
def test_tracked_method(aggregator, debug_stats_kwargs, disable_tracking):
os.environ['DD_DISABLE_TRACKED_METHOD'] = str(disable_tracking).lower()
check = HelloCheck(debug_stats_kwargs) if debug_stats_kwargs else AgentCheck(name="hello")
job = TestJob(check)
result = job.run_job()
assert result == EXPECTED_RESULT

tags = debug_stats_kwargs.pop('tags', [])
hostname = debug_stats_kwargs.pop('hostname', None)

if disable_tracking:
for m in ["dd.hello.operation.time", "dd.hello.operation.result.length", "dd.hello.operation.error"]:
assert not aggregator.metrics(m), "when tracking is disabled these metrics should not be recorded"
else:
aggregator.assert_metric("dd.hello.operation.time", hostname=hostname, tags=tags + ["operation:do_work"])
aggregator.assert_metric(
"dd.hello.operation.time", hostname=hostname, tags=tags + ["operation:do_work_return_list"]
)
aggregator.assert_metric(
"dd.hello.operation.result.length", hostname=hostname, tags=tags + ["operation:do_work_return_list"]
)
aggregator.assert_metric(
"dd.hello.operation.error",
hostname=hostname,
tags=tags + ["operation:test_tracked_exception", "error:<class 'test_tracking.MyException'>"],
)