From 4395d55277ca8548df4f386014f366dc32860660 Mon Sep 17 00:00:00 2001 From: Dusan Jovanovic Date: Wed, 8 Dec 2021 15:03:17 -0500 Subject: [PATCH] Add decorator for tracking execution statistics of check methods (#10809) * dbm add tracked method decorator Adds a new decorator `dbm_tracked_method` to be used for tracking the performance of methods in a standardized way. This will significantly reduce measurement boilerplate across the DBM integrations and make the measurements less error prone. It's important to track the performance and error rates of various parts of the DBM integrations in order to better troubleshoot customer issues. We already have a bunch of internal debug metrics tracking the execution time and error rate of various operations in DBM. For example, `dd.postgres.collect_statement_samples.time` tracks the execution time of the `_collect_statement_samples` method for postgres. All of these manual measurements will be updated to use this new measurement decorator instead. * move to new tracking module * revert * add option to disable * standardize naming * logging fix * style * fix py2 --- .../datadog_checks/base/utils/tracking.py | 91 +++++++++++++++++ .../tests/base/utils/test_tracking.py | 97 +++++++++++++++++++ 2 files changed, 188 insertions(+) create mode 100644 datadog_checks_base/datadog_checks/base/utils/tracking.py create mode 100644 datadog_checks_base/tests/base/utils/test_tracking.py diff --git a/datadog_checks_base/datadog_checks/base/utils/tracking.py b/datadog_checks_base/datadog_checks/base/utils/tracking.py new file mode 100644 index 0000000000000..69c1f98c851ac --- /dev/null +++ b/datadog_checks_base/datadog_checks/base/utils/tracking.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# (C) Datadog, Inc. 2021-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import os + +from datadog_checks.base.utils.time import get_timestamp + +required_attrs = [ + 'name', + 'log', + 'count', + 'gauge', + 'histogram', +] + + +def tracked_method(agent_check_getter=None, track_result_length=False): + """ + Decorates an agent check method to provide debug metrics and logging for troubleshooting. + Tracks execution time, errors, and result length. + + The function being decorated must be a method on a class that receives the self pointer. This cannot decorate + plain functions. + + If the check has a `debug_stats_kwargs` function then that function is called to get a set of kwargs to pass to + the statsd methods (i.e. histogram, count, gauge, etc). This is useful when specific tags need to be added to + these debug metrics in a standardized way. + + Set the environment variable DD_DISABLE_TRACKED_METHOD=true to disable tracking. + + All metrics produced include the check name in the prefix (i.e. "dd.sqlserver." if the check's name is "sqlserver") + + :param agent_check_getter: a function that gets the agent check from the class. The function must receive only a + single parameter, `self`, and it must return a reference to the agent check. If the function is not provided then + `self` must refer to the agent check. + :param track_result_length: if true, the length of the result is tracked + :return: a decorator + """ + + def decorator(function): + def wrapper(self, *args, **kwargs): + if os.getenv('DD_DISABLE_TRACKED_METHOD') == "true": + return function(self, *args, **kwargs) + + start_time = get_timestamp() + + try: + check = agent_check_getter(self) if agent_check_getter else self + except Exception: + print("[{}] invalid tracked_method. failed to get check reference.".format(function.__name__)) + return function(self, *args, **kwargs) + + for attr in required_attrs: + if not hasattr(check, attr): + print( + "[{}] invalid check reference. Missing required attribute {}.".format(function.__name__, attr) + ) + return function(self, *args, **kwargs) + + check_name = check.name + + stats_kwargs = {} + if hasattr(check, 'debug_stats_kwargs'): + stats_kwargs = dict(check.debug_stats_kwargs()) + + stats_kwargs['tags'] = stats_kwargs.get('tags', []) + ["operation:{}".format(function.__name__)] + + try: + result = function(self, *args, **kwargs) + + elapsed_ms = (get_timestamp() - start_time) * 1000 + check.histogram("dd.{}.operation.time".format(check_name), elapsed_ms, **stats_kwargs) + + check.log.debug("[%s.%s] operation completed in %s ms", check_name, function.__name__, elapsed_ms) + + if track_result_length and result is not None: + check.log.debug("[%s.%s] received result length %s", check_name, function.__name__, len(result)) + check.gauge("dd.{}.operation.result.length".format(check_name), len(result), **stats_kwargs) + + return result + except Exception as e: + check.log.exception("operation %s error", function.__name__) + stats_kwargs['tags'] += ["error:{}".format(type(e))] + check.count("dd.{}.operation.error".format(check_name), 1, **stats_kwargs) + raise + + return wrapper + + return decorator diff --git a/datadog_checks_base/tests/base/utils/test_tracking.py b/datadog_checks_base/tests/base/utils/test_tracking.py new file mode 100644 index 0000000000000..06525d6ef286f --- /dev/null +++ b/datadog_checks_base/tests/base/utils/test_tracking.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- +# (C) Datadog, Inc. 2021-present +# All rights reserved +# Licensed under a 3-clause BSD style license (see LICENSE) + +import os + +import pytest + +from datadog_checks.base import AgentCheck +from datadog_checks.base.utils.tracking import tracked_method + + +def agent_check_getter(self): + return self.check + + +class HelloCheck(AgentCheck): + def __init__(self, debug_stats_kwargs): + self._debug_stats_kwargs = debug_stats_kwargs + super(HelloCheck, self).__init__(name="hello") + + def debug_stats_kwargs(self): + return self._debug_stats_kwargs + + +EXPECTED_RESULT = 5 + + +class MyException(Exception): + pass + + +class TestJob: + def __init__(self, check): + self.check = check + + def run_job(self): + result = self.do_work() + self.do_work_return_list() + try: + self.test_tracked_exception() + except Exception: + pass + + return result + + @tracked_method(agent_check_getter=agent_check_getter) + def do_work(self): + return EXPECTED_RESULT + + @tracked_method(agent_check_getter=agent_check_getter, track_result_length=True) + def do_work_return_list(self): + return list(range(5)) + + @tracked_method(agent_check_getter=agent_check_getter) + def test_tracked_exception(self): + raise MyException("oops") + + +@pytest.mark.parametrize( + "debug_stats_kwargs", + [ + {}, + { + "tags": ["hey:there"], + "hostname": "tiberius", + }, + ], +) +@pytest.mark.parametrize("disable_tracking", [True, False]) +def test_tracked_method(aggregator, debug_stats_kwargs, disable_tracking): + os.environ['DD_DISABLE_TRACKED_METHOD'] = str(disable_tracking).lower() + check = HelloCheck(debug_stats_kwargs) if debug_stats_kwargs else AgentCheck(name="hello") + job = TestJob(check) + result = job.run_job() + assert result == EXPECTED_RESULT + + tags = debug_stats_kwargs.pop('tags', []) + hostname = debug_stats_kwargs.pop('hostname', None) + + if disable_tracking: + for m in ["dd.hello.operation.time", "dd.hello.operation.result.length", "dd.hello.operation.error"]: + assert not aggregator.metrics(m), "when tracking is disabled these metrics should not be recorded" + else: + aggregator.assert_metric("dd.hello.operation.time", hostname=hostname, tags=tags + ["operation:do_work"]) + aggregator.assert_metric( + "dd.hello.operation.time", hostname=hostname, tags=tags + ["operation:do_work_return_list"] + ) + aggregator.assert_metric( + "dd.hello.operation.result.length", hostname=hostname, tags=tags + ["operation:do_work_return_list"] + ) + aggregator.assert_metric( + "dd.hello.operation.error", + hostname=hostname, + tags=tags + ["operation:test_tracked_exception", "error:"], + )