Skip to content

Commit

Permalink
Use the new RequestsWrapper for connecting to services (#4059)
Browse files Browse the repository at this point in the history
  • Loading branch information
ofek authored Jul 8, 2019
1 parent 05bd88d commit 902c214
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 89 deletions.
159 changes: 139 additions & 20 deletions yarn/datadog_checks/yarn/data/conf.yaml.example
Original file line number Diff line number Diff line change
@@ -1,5 +1,32 @@
init_config:

## @param proxy - object - optional
## Set HTTP or HTTPS proxies for all instances. Use the `no_proxy` list
## to specify hosts that must bypass proxies.
##
## The SOCKS protocol is also supported like so:
##
## socks5://user:pass@host:port
##
## Using the scheme `socks5` causes the DNS resolution to happen on the
## client, rather than on the proxy server. This is in line with `curl`,
## which uses the scheme to decide whether to do the DNS resolution on
## the client or proxy. If you want to resolve the domains on the proxy
## server, use `socks5h` as the scheme.
#
# proxy:
# http: http://<PROXY_SERVER_FOR_HTTP>:<PORT>
# https: https://<PROXY_SERVER_FOR_HTTPS>:<PORT>
# no_proxy:
# - <HOSTNAME_1>
# - <HOSTNAME_2>

## @param skip_proxy - boolean - optional - default: false
## If set to true, this makes the check bypass any proxy
## settings enabled and attempt to reach services directly.
#
# skip_proxy: false

instances:

## @param resourcemanager_uri - string - required
Expand Down Expand Up @@ -32,15 +59,6 @@ instances:
# <TAG_KEY1>: <YARN_KEY>
# <TAG_KEY2>: <YARN_KEY>

## @param tags - list of key:value elements - optional
## List of tags to attach to every metric, event, and service check emitted by this Integration.
##
## Learn more about tagging: https://docs.datadoghq.com/tagging/
#
# tags:
# - <KEY_1>:<VALUE_1>
# - <KEY_2>:<VALUE_2>

## @param collect_app_metrics - boolean - optional - default: true
## Set this parameter to false to remove yarn.app metrics from metric collection.
#
Expand All @@ -56,22 +74,65 @@ instances:
# - <QUEUE_NAME_1>
# - <QUEUE_NAME_2>

## @param tags - list of key:value elements - optional
## List of tags to attach to every metric, event, and service check emitted by this Integration.
##
## Learn more about tagging: https://docs.datadoghq.com/tagging/
#
# tags:
# - <KEY_1>:<VALUE_1>
# - <KEY_2>:<VALUE_2>

## @param proxy - object - optional
## This overrides the `proxy` setting in `init_config`.
##
## Set HTTP or HTTPS proxies. Use the `no_proxy` list
## to specify hosts that must bypass proxies.
##
## The SOCKS protocol is also supported like so:
##
## socks5://user:pass@host:port
##
## Using the scheme `socks5` causes the DNS resolution to happen on the
## client, rather than on the proxy server. This is in line with `curl`,
## which uses the scheme to decide whether to do the DNS resolution on
## the client or proxy. If you want to resolve the domains on the proxy
## server, use `socks5h` as the scheme.
#
# proxy:
# http: http://<PROXY_SERVER_FOR_HTTP>:<PORT>
# https: https://<PROXY_SERVER_FOR_HTTPS>:<PORT>
# no_proxy:
# - <HOSTNAME_1>
# - <HOSTNAME_2>

## @param skip_proxy - boolean - optional - default: false
## This overrides the `skip_proxy` setting in `init_config`.
##
## If set to true, this makes the check bypass any proxy
## settings enabled and attempt to reach services directly.
#
# skip_proxy: false

## @param username - string - optional
## If your service uses basic HTTP authentication, specify a username to be used in the check.
## The username to use if services are behind basic auth.
#
# username: <USERNAME>

## @param password - string - optional
## If your service uses basic HTTP authentication, specify a password to be used in the check.
## @param ntlm_domain - string - optional
## If your services uses NTLM authentication, you can optionally
## specify a domain that will be used in the check. For NTLM Auth,
## append the username to domain, not as the `username` parameter.
## Example: example_ntlm_domain\example_username
#
# password: <PASSWORD>
# ntlm_domain: <DOMAIN>

## @param ssl_verify - boolean - optional - default: true
## Whether to enable SSL certificate verification for HTTP requests.
## @param password - string - optional
## The password to use if services are behind basic or NTLM auth.
#
# ssl_verify: true
# password: <PASSWORD>

## @param kerberos - string - optional - default: disabled
## @param kerberos_auth - string - optional - default: disabled
## If your service uses Kerberos authentication, you can specify the Kerberos
## strategy to use between:
## * required
Expand All @@ -80,7 +141,7 @@ instances:
##
## See https://github.com/requests/requests-kerberos#mutual-authentication
#
# kerberos: disabled
# kerberos_auth: disabled

## @param kerberos_delegate - boolean - optional - default: false
## Set to true to enable kerberos delegation of credentials to a server that requests delegation.
Expand All @@ -89,8 +150,8 @@ instances:
# kerberos_delegate: false

## @param kerberos_force_initiate - boolean - optional - default: false
## Set to true to preemptively initiate the Kerberos GSS exchange and present a Kerberos ticket on the
## initial request (and all subsequent).
## Set to true to preemptively initiate the Kerberos GSS exchange and present a Kerberos ticket on the initial
## request (and all subsequent).
## See https://github.com/requests/requests-kerberos#preemptive-authentication
#
# kerberos_force_initiate: false
Expand All @@ -112,3 +173,61 @@ instances:
## Set the path to your Kerberos key tab file.
#
# kerberos_keytab: <KEYTAB_FILE_PATH>

## @param tls_verify - boolean - optional - default: true
## Instructs the check to validate the TLS certificate of services.
#
# tls_verify: true

## @param tls_ignore_warning - boolean - optional - default: false
## If you disable `tls_verify` you will receive security warnings in logs.
## Disable those by setting `tls_ignore_warning` to true.
#
# tls_ignore_warning: false

## @param tls_cert - string - optional
## The path to a single file in PEM format containing a certificate as well as any
## number of CA certificates needed to establish the certificate’s authenticity for
## use when connecting to services. It may also contain an unencrypted private key to use.
#
# tls_cert: <CERT_PATH>

## @param tls_private_key - string - optional
## The unencrypted private key to use for `tls_cert` when connecting to services. This is
## required if `tls_cert` is set and it does not already contain a private key.
#
# tls_private_key: <PRIVATE_KEY_PATH>

## @param tls_ca_cert - string - optional
## The path to a file of concatenated CA certificates in PEM format or a directory
## containing several CA certificates in PEM format. If a directory, the directory
## must have been processed using the c_rehash utility supplied with OpenSSL. See:
## https://www.openssl.org/docs/manmaster/man3/SSL_CTX_load_verify_locations.html
#
# tls_ca_cert: <CA_CERT_PATH>

## @param headers - list of key:value elements - optional
## The headers parameter allows you to send specific headers with every request.
## This is useful for explicitly specifying the host header or adding headers for
## authorization purposes.
##
## This overrides any default headers.
#
# headers:
# Host: alternative.host.example.com
# X-Auth-Token: <AUTH_TOKEN>

## @param timeout - integer - optional - default: 10
## The timeout for connecting to services.
#
# timeout: 10

## @param log_requests - boolean - optional - default: false
## Whether or not to debug log the HTTP(S) requests made, including the method and URL.
#
# log_requests: false

## @param persist_connections - boolean - optional - default: false
## Whether or not to persist cookies and use connection pooling for increased performance.
#
# persist_connections: false
74 changes: 16 additions & 58 deletions yarn/datadog_checks/yarn/yarn.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,12 @@
# (C) Datadog, Inc. 2018
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
import os

import requests
import requests_kerberos
from requests.exceptions import ConnectionError, HTTPError, InvalidURL, SSLError, Timeout
from six import iteritems
from six.moves.urllib.parse import urljoin, urlsplit, urlunsplit

from datadog_checks.base import AgentCheck, is_affirmative

KERBEROS_STRATEGIES = {
'required': requests_kerberos.REQUIRED,
'optional': requests_kerberos.OPTIONAL,
'disabled': requests_kerberos.DISABLED,
}


# Default settings
DEFAULT_RM_URI = 'http://localhost:8088'
DEFAULT_TIMEOUT = 5
Expand Down Expand Up @@ -151,6 +140,8 @@ class YarnCheck(AgentCheck):
Extract statistics from YARN's ResourceManger REST API
"""

HTTP_CONFIG_REMAPPER = {'ssl_verify': {'name': 'tls_verify'}}

_ALLOWED_APPLICATION_TAGS = ['applicationTags', 'applicationType', 'name', 'queue', 'user']

def check(self, instance):
Expand Down Expand Up @@ -189,17 +180,17 @@ def check(self, instance):
tags.append('cluster_name:{}'.format(cluster_name))

# Get metrics from the Resource Manager
self._yarn_cluster_metrics(rm_address, instance, tags)
self._yarn_cluster_metrics(rm_address, tags)
if is_affirmative(instance.get('collect_app_metrics', DEFAULT_COLLECT_APP_METRICS)):
self._yarn_app_metrics(rm_address, instance, app_tags, tags)
self._yarn_node_metrics(rm_address, instance, tags)
self._yarn_scheduler_metrics(rm_address, instance, tags, queue_blacklist)
self._yarn_app_metrics(rm_address, app_tags, tags)
self._yarn_node_metrics(rm_address, tags)
self._yarn_scheduler_metrics(rm_address, tags, queue_blacklist)

def _yarn_cluster_metrics(self, rm_address, instance, addl_tags):
def _yarn_cluster_metrics(self, rm_address, addl_tags):
"""
Get metrics related to YARN cluster
"""
metrics_json = self._rest_request_to_json(rm_address, instance, YARN_CLUSTER_METRICS_PATH, addl_tags)
metrics_json = self._rest_request_to_json(rm_address, YARN_CLUSTER_METRICS_PATH, addl_tags)

if metrics_json:

Expand All @@ -208,13 +199,11 @@ def _yarn_cluster_metrics(self, rm_address, instance, addl_tags):
if yarn_metrics is not None:
self._set_yarn_metrics_from_json(addl_tags, yarn_metrics, YARN_CLUSTER_METRICS)

def _yarn_app_metrics(self, rm_address, instance, app_tags, addl_tags):
def _yarn_app_metrics(self, rm_address, app_tags, addl_tags):
"""
Get metrics for running applications
"""
metrics_json = self._rest_request_to_json(
rm_address, instance, YARN_APPS_PATH, addl_tags, states=YARN_APPLICATION_STATES
)
metrics_json = self._rest_request_to_json(rm_address, YARN_APPS_PATH, addl_tags, states=YARN_APPLICATION_STATES)

if metrics_json and metrics_json['apps'] is not None and metrics_json['apps']['app'] is not None:

Expand All @@ -234,11 +223,11 @@ def _yarn_app_metrics(self, rm_address, instance, app_tags, addl_tags):
self._set_yarn_metrics_from_json(tags, app_json, DEPRECATED_YARN_APP_METRICS)
self._set_yarn_metrics_from_json(tags, app_json, YARN_APP_METRICS)

def _yarn_node_metrics(self, rm_address, instance, addl_tags):
def _yarn_node_metrics(self, rm_address, addl_tags):
"""
Get metrics related to YARN nodes
"""
metrics_json = self._rest_request_to_json(rm_address, instance, YARN_NODES_PATH, addl_tags)
metrics_json = self._rest_request_to_json(rm_address, YARN_NODES_PATH, addl_tags)

if metrics_json and metrics_json['nodes'] is not None and metrics_json['nodes']['node'] is not None:

Expand All @@ -250,11 +239,11 @@ def _yarn_node_metrics(self, rm_address, instance, addl_tags):

self._set_yarn_metrics_from_json(tags, node_json, YARN_NODE_METRICS)

def _yarn_scheduler_metrics(self, rm_address, instance, addl_tags, queue_blacklist):
def _yarn_scheduler_metrics(self, rm_address, addl_tags, queue_blacklist):
"""
Get metrics from YARN scheduler
"""
metrics_json = self._rest_request_to_json(rm_address, instance, YARN_SCHEDULER_PATH, addl_tags)
metrics_json = self._rest_request_to_json(rm_address, YARN_SCHEDULER_PATH, addl_tags)

try:
metrics_json = metrics_json['scheduler']['schedulerInfo']
Expand Down Expand Up @@ -331,7 +320,7 @@ def _set_metric(self, metric_name, metric_type, value, tags=None, device_name=No
else:
self.log.error('Metric type "{}" unknown'.format(metric_type))

def _rest_request_to_json(self, url, instance, object_path, tags, *args, **kwargs):
def _rest_request_to_json(self, url, object_path, tags, *args, **kwargs):
"""
Query the given URL and return the JSON response
"""
Expand All @@ -346,33 +335,6 @@ def _rest_request_to_json(self, url, instance, object_path, tags, *args, **kwarg
for directory in args:
url = self._join_url_dir(url, directory)

auth = None

# Authenticate our connection to JMX endpoint if required
kerberos = instance.get('kerberos')
username = instance.get('username')
password = instance.get('password')
if username is not None and password is not None:
auth = (username, password)
elif kerberos is not None:
if kerberos not in KERBEROS_STRATEGIES:
raise Exception('Invalid Kerberos strategy `{}`'.format(kerberos))

auth = requests_kerberos.HTTPKerberosAuth(
mutual_authentication=KERBEROS_STRATEGIES[kerberos],
delegate=is_affirmative(instance.get('kerberos_delegate', False)),
force_preemptive=is_affirmative(instance.get('kerberos_force_initiate', False)),
hostname_override=instance.get('kerberos_hostname'),
principal=instance.get('kerberos_principal'),
)

ssl_verify = is_affirmative(instance.get('ssl_verify', True))

old_keytab_path = None
if 'kerberos_keytab' in instance:
old_keytab_path = os.getenv('KRB5_CLIENT_KTNAME')
os.environ['KRB5_CLIENT_KTNAME'] = instance['kerberos_keytab']

self.log.debug('Attempting to connect to "{}"'.format(url))

# Add kwargs as arguments
Expand All @@ -381,7 +343,7 @@ def _rest_request_to_json(self, url, instance, object_path, tags, *args, **kwarg
url = urljoin(url, '?' + query)

try:
response = requests.get(url, auth=auth, verify=ssl_verify, timeout=self.default_integration_http_timeout)
response = self.http.get(url)
response.raise_for_status()
response_json = response.json()

Expand Down Expand Up @@ -417,10 +379,6 @@ def _rest_request_to_json(self, url, instance, object_path, tags, *args, **kwarg

return response_json

finally:
if old_keytab_path is not None:
os.environ['KRB5_CLIENT_KTNAME'] = old_keytab_path

def _join_url_dir(self, url, *args):
"""
Join a URL with multiple directories
Expand Down
1 change: 0 additions & 1 deletion yarn/requirements.in
Original file line number Diff line number Diff line change
@@ -1 +0,0 @@
requests-kerberos==0.12.0
2 changes: 1 addition & 1 deletion yarn/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def dd_environment():

@pytest.fixture
def check():
return YarnCheck('yarn', {}, {})
return lambda instance: YarnCheck('yarn', {}, [instance])


@pytest.fixture
Expand Down
1 change: 1 addition & 0 deletions yarn/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

@pytest.mark.usefixtures("dd_environment")
def test_check(aggregator, check, instance):
check = check(instance)
check.check(instance)

for metric in common.EXPECTED_METRICS:
Expand Down
Loading

0 comments on commit 902c214

Please sign in to comment.