diff --git a/kubernetes_state/datadog_checks/kubernetes_state/kubernetes_state.py b/kubernetes_state/datadog_checks/kubernetes_state/kubernetes_state.py index 412dac8e86c47..552c844678cf1 100644 --- a/kubernetes_state/datadog_checks/kubernetes_state/kubernetes_state.py +++ b/kubernetes_state/datadog_checks/kubernetes_state/kubernetes_state.py @@ -46,6 +46,8 @@ def get_clustername(): 'image': 'image_name', } +JOB_NAME_PATTERN = r"(-\d{4,10}$)" + class KubernetesState(OpenMetricsBaseCheck): """ @@ -146,6 +148,9 @@ def __init__(self, name, init_config, instances): self.job_succeeded_count = defaultdict(int) self.job_failed_count = defaultdict(int) + # Regex to extract cronjob from job names + self._job_name_re = re.compile(JOB_NAME_PATTERN) + def check(self, instance): endpoint = instance.get('kube_state_url') @@ -561,12 +566,23 @@ def _label_to_tags(self, name, labels, scraper_config, tag_name=None): tags += self._build_tags(tag_name or name, value, scraper_config) return tags + def _get_job_tags(self, lname, lvalue, scraper_config): + """ + Returns kube_job and kube_cronjob tags in a list. + """ + trimmed_job, was_trimmed = self._trim_job_tag(lvalue) + tags = self._build_tags(lname, trimmed_job, scraper_config) + if was_trimmed: + tags += self._build_tags('kube_cronjob', trimmed_job, scraper_config) + return tags + def _trim_job_tag(self, name): """ Trims suffix of job names if they match -(\\d{4,10}$) + Returns the trimmed name and a boolean indicating whether the name was trimmed. """ - pattern = r"(-\d{4,10}$)" - return re.sub(pattern, '', name) + trimmed = self._job_name_re.sub('', name) + return trimmed, trimmed != name def _extract_job_timestamp(self, name): """ @@ -672,8 +688,7 @@ def kube_job_complete(self, metric, scraper_config): tags = [] for label_name, label_value in iteritems(sample[self.SAMPLE_LABELS]): if label_name == 'job' or label_name == 'job_name': - trimmed_job = self._trim_job_tag(label_value) - tags += self._build_tags(label_name, trimmed_job, scraper_config) + tags += self._get_job_tags(label_name, label_value, scraper_config) else: tags += self._build_tags(label_name, label_value, scraper_config) self.service_check(service_check_name, self.OK, tags=tags + scraper_config['custom_tags']) @@ -684,8 +699,7 @@ def kube_job_failed(self, metric, scraper_config): tags = [] for label_name, label_value in iteritems(sample[self.SAMPLE_LABELS]): if label_name == 'job' or label_name == 'job_name': - trimmed_job = self._trim_job_tag(label_value) - tags += self._build_tags(label_name, trimmed_job, scraper_config) + tags += self._get_job_tags(label_name, label_value, scraper_config) else: tags += self._build_tags(label_name, label_value, scraper_config) self.service_check(service_check_name, self.CRITICAL, tags=tags + scraper_config['custom_tags']) @@ -696,9 +710,8 @@ def kube_job_status_failed(self, metric, scraper_config): tags = [] + scraper_config['custom_tags'] for label_name, label_value in iteritems(sample[self.SAMPLE_LABELS]): if label_name == 'job' or label_name == 'job_name': - trimmed_job = self._trim_job_tag(label_value) + tags += self._get_job_tags(label_name, label_value, scraper_config) job_ts = self._extract_job_timestamp(label_value) - tags += self._build_tags(label_name, trimmed_job, scraper_config) else: tags += self._build_tags(label_name, label_value, scraper_config) if job_ts is not None: # if there is a timestamp, this is a Cron Job @@ -714,9 +727,8 @@ def kube_job_status_succeeded(self, metric, scraper_config): tags = [] + scraper_config['custom_tags'] for label_name, label_value in iteritems(sample[self.SAMPLE_LABELS]): if label_name == 'job' or label_name == 'job_name': - trimmed_job = self._trim_job_tag(label_value) + tags += self._get_job_tags(label_name, label_value, scraper_config) job_ts = self._extract_job_timestamp(label_value) - tags += self._build_tags(label_name, trimmed_job, scraper_config) else: tags += self._build_tags(label_name, label_value, scraper_config) if job_ts is not None: # if there is a timestamp, this is a Cron Job diff --git a/kubernetes_state/tests/test_kubernetes_state.py b/kubernetes_state/tests/test_kubernetes_state.py index d9cccc44a6f92..66d9e3d1c5324 100644 --- a/kubernetes_state/tests/test_kubernetes_state.py +++ b/kubernetes_state/tests/test_kubernetes_state.py @@ -638,6 +638,7 @@ def test_join_standard_tags_labels(aggregator, instance, check_with_join_standar tags=[ 'job_name:curl-cron-job', 'kube_job:curl-cron-job', + 'kube_cronjob:curl-cron-job', 'kube_namespace:default', 'namespace:default', 'optional:tag1', @@ -726,12 +727,26 @@ def test_job_counts(aggregator, instance): # Test cron jobs aggregator.assert_metric( NAMESPACE + '.job.failed', - tags=['namespace:default', 'kube_namespace:default', 'kube_job:hello', 'job:hello', 'optional:tag1'], + tags=[ + 'namespace:default', + 'kube_namespace:default', + 'kube_job:hello', + 'kube_cronjob:hello', + 'job:hello', + 'optional:tag1', + ], value=0, ) aggregator.assert_metric( NAMESPACE + '.job.succeeded', - tags=['namespace:default', 'kube_namespace:default', 'kube_job:hello', 'job:hello', 'optional:tag1'], + tags=[ + 'namespace:default', + 'kube_namespace:default', + 'kube_job:hello', + 'kube_cronjob:hello', + 'job:hello', + 'optional:tag1', + ], value=3, ) @@ -753,12 +768,26 @@ def test_job_counts(aggregator, instance): # Test cron jobs aggregator.assert_metric( NAMESPACE + '.job.failed', - tags=['namespace:default', 'kube_namespace:default', 'kube_job:hello', 'job:hello', 'optional:tag1'], + tags=[ + 'namespace:default', + 'kube_namespace:default', + 'kube_job:hello', + 'kube_cronjob:hello', + 'job:hello', + 'optional:tag1', + ], value=0, ) aggregator.assert_metric( NAMESPACE + '.job.succeeded', - tags=['namespace:default', 'kube_namespace:default', 'kube_job:hello', 'job:hello', 'optional:tag1'], + tags=[ + 'namespace:default', + 'kube_namespace:default', + 'kube_job:hello', + 'kube_cronjob:hello', + 'job:hello', + 'optional:tag1', + ], value=3, ) @@ -792,12 +821,26 @@ def test_job_counts(aggregator, instance): check.check(instance) aggregator.assert_metric( NAMESPACE + '.job.failed', - tags=['namespace:default', 'kube_namespace:default', 'job:hello', 'kube_job:hello', 'optional:tag1'], + tags=[ + 'namespace:default', + 'kube_namespace:default', + 'job:hello', + 'kube_job:hello', + 'kube_cronjob:hello', + 'optional:tag1', + ], value=1, ) aggregator.assert_metric( NAMESPACE + '.job.succeeded', - tags=['namespace:default', 'kube_namespace:default', 'job:hello', 'kube_job:hello', 'optional:tag1'], + tags=[ + 'namespace:default', + 'kube_namespace:default', + 'job:hello', + 'kube_job:hello', + 'kube_cronjob:hello', + 'optional:tag1', + ], value=4, ) @@ -831,7 +874,14 @@ def test_job_counts(aggregator, instance): check.check(instance) aggregator.assert_metric( NAMESPACE + '.job.succeeded', - tags=['namespace:default', 'kube_namespace:default', 'job:hello', 'kube_job:hello', 'optional:tag1'], + tags=[ + 'namespace:default', + 'kube_namespace:default', + 'job:hello', + 'kube_job:hello', + 'kube_cronjob:hello', + 'optional:tag1', + ], value=5, )