diff --git a/mapreduce/assets/configuration/spec.yaml b/mapreduce/assets/configuration/spec.yaml new file mode 100644 index 0000000000000..990a38cc1e0da --- /dev/null +++ b/mapreduce/assets/configuration/spec.yaml @@ -0,0 +1,94 @@ +name: MapReduce +files: + - name: mapreduce.yaml + options: + - template: init_config + options: + - name: general_counters + description: | + `general_counters` are job agnostic metrics that create a metric for each specified counter + Create a an object with the following layout: + + - counter_group_name: '' + counters: + - counter_name: 'MAP_INPUT_RECORDS' + - counter_name: 'MAP_OUTPUT_RECORDS' + - counter_name: 'REDUCE_INPUT_RECORDS' + - counter_name: 'REDUCE_OUTPUT_RECORDS' + + For more information on counters visit the MapReduce documentation page: + https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html#Job_Counters_API /noqa + value: + example: + - counter_group_name: 'org.apache.hadoop.mapreduce.FileSystemCounter' + counters: + - counter_name: 'HDFS_BYTES_READ' + type: array + items: + type: object + + - name: job_specific_counters + description: | + `job_specific_counters` are metrics that are specific to a particular job. + Create an object with the following layout: + + - job_name: + metrics: + - counter_group_name: + counters: + - counter_name: + + For more information on counters visit the MapReduce documentation page: + https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html#Job_Counters_API /noqa + value: + example: + - job_name: '' + metrics: + - counter_group_name: 'org.apache.hadoop.mapreduce.FileSystemCounter' + counters: + - counter_name: 'FILE_BYTES_WRITTEN' + - counter_name: 'HDFS_BYTES_WRITTEN' + - counter_group_name: 'org.apache.hadoop.mapreduce.FileSystemCounter' + counters: + - counter_name: 'HDFS_BYTES_READ' + type: array + items: + type: object + + - template: init_config/http + - template: init_config/default + - template: instances + options: + - name: resourcemanager_uri + description: | + The MapReduce check retrieves metrics from YARNS's ResourceManager. This + check must be run from the Master Node and the ResourceManager URI must + be specified below. The ResourceManager URI is composed of the + ResourceManager's hostname and port. + + The ResourceManager hostname can be found in the yarn-site.xml conf file + under the property yarn.resourcemanager.address + + The ResourceManager port can be found in the yarn-site.xml conf file under + the property yarn.resourcemanager.webapp.address + required: true + value: + example: http://localhost:8088 + type: string + - name: cluster_name + description: A friendly name for the cluster. + required: true + value: + example: "" + type: string + - template: instances/http + - template: instances/default + + - template: logs + example: + - type: file + path: /usr/local/hadoop/logs/hadoop-root-namenode-localhost.log + source: mapreduce + - type: file + path: /usr/local/hadoop/logs/hadoop-root-datadnode-localhost.log + source: mapreduce diff --git a/mapreduce/datadog_checks/mapreduce/data/conf.yaml.example b/mapreduce/datadog_checks/mapreduce/data/conf.yaml.example index 24f5943d54b81..27dc30ef16ce5 100644 --- a/mapreduce/datadog_checks/mapreduce/data/conf.yaml.example +++ b/mapreduce/datadog_checks/mapreduce/data/conf.yaml.example @@ -1,7 +1,8 @@ +## All options defined here are available to all instances. +# init_config: - ## @param general_counters - list of objects - optional - ## + ## @param general_counters - list of mappings - optional ## `general_counters` are job agnostic metrics that create a metric for each specified counter ## Create a an object with the following layout: ## @@ -13,14 +14,14 @@ init_config: ## - counter_name: 'REDUCE_OUTPUT_RECORDS' ## ## For more information on counters visit the MapReduce documentation page: - ## https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html#Job_Counters_API #noqa + ## https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html#Job_Counters_API # # general_counters: - # - counter_group_name: 'org.apache.hadoop.mapreduce.FileSystemCounter' + # - counter_group_name: org.apache.hadoop.mapreduce.FileSystemCounter # counters: - # - counter_name: 'HDFS_BYTES_READ' + # - counter_name: HDFS_BYTES_READ - ## @param job_specific_counters - list of objects - optional + ## @param job_specific_counters - list of mappings - optional ## `job_specific_counters` are metrics that are specific to a particular job. ## Create an object with the following layout: ## @@ -31,26 +32,26 @@ init_config: ## - counter_name: ## ## For more information on counters visit the MapReduce documentation page: - ## https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html#Job_Counters_API #noqa + ## https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html#Job_Counters_API # # job_specific_counters: - # - job_name: '' + # - job_name: # metrics: - # - counter_group_name: 'org.apache.hadoop.mapreduce.FileSystemCounter' - # counters: - # - counter_name: 'FILE_BYTES_WRITTEN' - # - counter_name: 'HDFS_BYTES_WRITTEN' - # - counter_group_name: 'org.apache.hadoop.mapreduce.FileSystemCounter' - # counters: - # - counter_name: 'HDFS_BYTES_READ' - - ## @param proxy - object - optional + # - counter_group_name: org.apache.hadoop.mapreduce.FileSystemCounter + # counters: + # - counter_name: FILE_BYTES_WRITTEN + # - counter_name: HDFS_BYTES_WRITTEN + # - counter_group_name: org.apache.hadoop.mapreduce.FileSystemCounter + # counters: + # - counter_name: HDFS_BYTES_READ + + ## @param proxy - mapping - optional ## Set HTTP or HTTPS proxies for all instances. Use the `no_proxy` list ## to specify hosts that must bypass proxies. ## ## The SOCKS protocol is also supported like so: ## - ## socks5://user:pass@host:port + ## socks5://user:pass@host:port ## ## Using the scheme `socks5` causes the DNS resolution to happen on the ## client, rather than on the proxy server. This is in line with `curl`, @@ -62,8 +63,8 @@ init_config: # http: http://: # https: https://: # no_proxy: - # - - # - + # - + # - ## @param skip_proxy - boolean - optional - default: false ## If set to `true`, this makes the check bypass any proxy @@ -71,10 +72,24 @@ init_config: # # skip_proxy: false + ## @param timeout - number - optional - default: 10 + ## The timeout for connecting to services. + # + # timeout: 10 + + ## @param service - string - optional + ## Attach the tag `service:` to every metric, event, and service check emitted by this integration. + ## + ## Additionally, this sets the default `service` for every log source. + # + # service: + +## Every instance is scheduled independent of the others. +# instances: ## @param resourcemanager_uri - string - required - ## The MapReduce check retrieves metrics from YARN's ResourceManager. This + ## The MapReduce check retrieves metrics from YARNS's ResourceManager. This ## check must be run from the Master Node and the ResourceManager URI must ## be specified below. The ResourceManager URI is composed of the ## ResourceManager's hostname and port. @@ -88,33 +103,19 @@ instances: - resourcemanager_uri: http://localhost:8088 ## @param cluster_name - string - required - ## A Required friendly name for the cluster. + ## A friendly name for the cluster. # cluster_name: - ## @param collect_task_metrics - boolean - optional - default: false - ## Set to true to collect histograms on the elapsed time of map and reduce tasks. - # - # collect_task_metrics: false - - ## @param tags - list of key:value elements - optional - ## List of tags to attach to every metric, event and service check emitted by this integration. - ## - ## Learn more about tagging: https://docs.datadoghq.com/tagging/ - # - # tags: - # - : - # - : - - ## @param proxy - object - optional + ## @param proxy - mapping - optional ## This overrides the `proxy` setting in `init_config`. ## - ## Set HTTP or HTTPS proxies. Use the `no_proxy` list + ## Set HTTP or HTTPS proxies for all instances. Use the `no_proxy` list ## to specify hosts that must bypass proxies. ## - ## The SOCKS protocol is also supported like so: + ## The SOCKS protocol is also supported, for example: ## - ## socks5://user:pass@host:port + ## socks5://user:pass@host:port ## ## Using the scheme `socks5` causes the DNS resolution to happen on the ## client, rather than on the proxy server. This is in line with `curl`, @@ -126,8 +127,8 @@ instances: # http: http://: # https: https://: # no_proxy: - # - - # - + # - + # - ## @param skip_proxy - boolean - optional - default: false ## This overrides the `skip_proxy` setting in `init_config`. @@ -137,88 +138,148 @@ instances: # # skip_proxy: false - ## @param auth_type - string - optional - ## The type of authentication to use. - ## The available values are "basic" and "digest". + ## @param auth_type - string - optional - default: basic + ## The type of authentication to use. The available types (and related options) are: + ## + ## - basic + ## |__ username + ## |__ password + ## - digest + ## |__ username + ## |__ password + ## - ntlm + ## |__ ntlm_domain + ## |__ password + ## - kerberos + ## |__ kerberos_auth + ## |__ kerberos_cache + ## |__ kerberos_delegate + ## |__ kerberos_force_initiate + ## |__ kerberos_hostname + ## |__ kerberos_keytab + ## |__ kerberos_principal + ## - aws + ## |__ aws_region + ## |__ aws_host + ## |__ aws_service + ## + ## The `aws` auth type relies on boto3 to automatically gather AWS credentials, for example: from `.aws/credentials`. + ## Details: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/configuration.html#configuring-credentials # - # auth_type: + # auth_type: basic ## @param username - string - optional - ## The username to use if services are behind basic auth. + ## The username to use if services are behind basic or digest auth. # # username: - ## @param ntlm_domain - string - optional - ## If your services uses NTLM authentication, you can - ## specify a domain that is used in the check. For NTLM Auth, - ## append the username to domain, not as the `username` parameter. - ## Example: \ - # - # ntlm_domain: - ## @param password - string - optional ## The password to use if services are behind basic or NTLM auth. # # password: + ## @param ntlm_domain - string - optional + ## If your services use NTLM authentication, specify + ## the domain used in the check. For NTLM Auth, append + ## the username to domain, not as the `username` parameter. + # + # ntlm_domain: \ + ## @param kerberos_auth - string - optional - default: disabled - ## If your service uses Kerberos authentication, you can specify the Kerberos + ## If your services use Kerberos authentication, you can specify the Kerberos ## strategy to use between: - ## * required - ## * optional - ## * disabled + ## + ## - required + ## - optional + ## - disabled ## ## See https://github.com/requests/requests-kerberos#mutual-authentication # # kerberos_auth: disabled + ## @param kerberos_cache - string - optional + ## Sets the KRB5CCNAME environment variable. + ## It should point to a credential cache with a valid TGT. + # + # kerberos_cache: + ## @param kerberos_delegate - boolean - optional - default: false - ## Set to `true` to enable kerberos delegation of credentials to a server that requests delegation. + ## Set to `true` to enable Kerberos delegation of credentials to a server that requests delegation. + ## ## See https://github.com/requests/requests-kerberos#delegation # # kerberos_delegate: false ## @param kerberos_force_initiate - boolean - optional - default: false - ## Set to `true` to preemptively initiate the Kerberos GSS exchange and present a Kerberos ticket on the initial - ## request (and all subsequent). + ## Set to `true` to preemptively initiate the Kerberos GSS exchange and + ## present a Kerberos ticket on the initial request (and all subsequent). + ## ## See https://github.com/requests/requests-kerberos#preemptive-authentication # # kerberos_force_initiate: false ## @param kerberos_hostname - string - optional - ## Override the hostname used for the Kerberos GSS exchange if its DNS name doesn't match its kerberos - ## hostname (eg, behind a content switch or load balancer). + ## Override the hostname used for the Kerberos GSS exchange if its DNS name doesn't + ## match its Kerberos hostname, for example: behind a content switch or load balancer. + ## ## See https://github.com/requests/requests-kerberos#hostname-override # - # kerberos_hostname: null + # kerberos_hostname: ## @param kerberos_principal - string - optional - ## Set an explicit principal, to force Kerberos to look for a matching credential cache for the named user. + ## Set an explicit principal, to force Kerberos to look for a + ## matching credential cache for the named user. + ## ## See https://github.com/requests/requests-kerberos#explicit-principal # - # kerberos_principal: null + # kerberos_principal: ## @param kerberos_keytab - string - optional ## Set the path to your Kerberos key tab file. # # kerberos_keytab: - ## @param kerberos_cache - string - optional - ## Sets the KRB5CCNAME environment variable. - ## It should point to a credential cache with a valid TGT. + ## @param aws_region - string - optional + ## If your services require AWS Signature Version 4 signing, set the region. + ## + ## See https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html + # + # aws_region: + + ## @param aws_host - string - optional + ## If your services require AWS Signature Version 4 signing, set the host. + ## + ## Note: This setting is not necessary for official integrations. + ## + ## See https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html # - # kerberos_cache: + # aws_host: + + ## @param aws_service - string - optional + ## If your services require AWS Signature Version 4 signing, set the service code. For a list + ## of available service codes, see https://docs.aws.amazon.com/general/latest/gr/rande.html + ## + ## Note: This setting is not necessary for official integrations. + ## + ## See https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html + # + # aws_service: ## @param tls_verify - boolean - optional - default: true ## Instructs the check to validate the TLS certificate of services. # # tls_verify: true + ## @param tls_use_host_header - boolean - optional - default: false + ## If a `Host` header is set, this enables its use for SNI (matching against the TLS certificate CN or SAN). + # + # tls_use_host_header: false + ## @param tls_ignore_warning - boolean - optional - default: false ## If `tls_verify` is disabled, security warnings are logged by the check. ## Disable those by setting `tls_ignore_warning` to true. ## - ## Note: `tls_ignore_warning` set to true is currently only reliable if used by one instance of one integration. + ## Note: `tls_ignore_warning` set to true is currently only reliable if used by one instance of one integration. ## If enabled for multiple instances, spurious warnings might still appear even if `tls_ignore_warning` is set ## to true. # @@ -226,7 +287,7 @@ instances: ## @param tls_cert - string - optional ## The path to a single file in PEM format containing a certificate as well as any - ## number of CA certificates needed to establish the certificate’s authenticity for + ## number of CA certificates needed to establish the certificate's authenticity for ## use when connecting to services. It may also contain an unencrypted private key to use. # # tls_cert: @@ -245,7 +306,7 @@ instances: # # tls_ca_cert: - ## @param headers - list of key:value elements - optional + ## @param headers - mapping - optional ## The headers parameter allows you to send specific headers with every request. ## You can use it for explicitly specifying the host header or adding headers for ## authorization purposes. @@ -256,11 +317,30 @@ instances: # Host: # X-Auth-Token: - ## @param timeout - integer - optional - default: 10 - ## The timeout for connecting to services. + ## @param extra_headers - mapping - optional + ## Additional headers to send with every request. + # + # extra_headers: + # Host: + # X-Auth-Token: + + ## @param timeout - number - optional - default: 10 + ## The timeout for accessing services. + ## + ## This overrides the `timeout` setting in `init_config`. # # timeout: 10 + ## @param connect_timeout - number - optional + ## The connect timeout for accessing services. Defaults to `timeout`. + # + # connect_timeout: + + ## @param read_timeout - number - optional + ## The read timeout for accessing services. Defaults to `timeout`. + # + # read_timeout: + ## @param log_requests - boolean - optional - default: false ## Whether or not to debug log the HTTP(S) requests made, including the method and URL. # @@ -270,3 +350,53 @@ instances: ## Whether or not to persist cookies and use connection pooling for increased performance. # # persist_connections: false + + ## @param tags - list of strings - optional + ## A list of tags to attach to every metric and service check emitted by this instance. + ## + ## Learn more about tagging at https://docs.datadoghq.com/tagging + # + # tags: + # - : + # - : + + ## @param service - string - optional + ## Attach the tag `service:` to every metric, event, and service check emitted by this integration. + ## + ## Overrides any `service` defined in the `init_config` section. + # + # service: + + ## @param min_collection_interval - number - optional - default: 15 + ## This changes the collection interval of the check. For more information, see: + ## https://docs.datadoghq.com/developers/write_agent_check/#collection-interval + # + # min_collection_interval: 15 + + ## @param empty_default_hostname - boolean - optional - default: false + ## This forces the check to send metrics with no hostname. + ## + ## This is useful for cluster-level checks. + # + # empty_default_hostname: false + +## Log Section +## +## type - required - Type of log input source (tcp / udp / file / windows_event) +## port / path / channel_path - required - Set port if type is tcp or udp. +## Set path if type is file. +## Set channel_path if type is windows_event. +## source - required - Attribute that defines which Integration sent the logs. +## service - optional - The name of the service that generates the log. +## Overrides any `service` defined in the `init_config` section. +## tags - optional - Add tags to the collected logs. +## +## Discover Datadog log collection: https://docs.datadoghq.com/logs/log_collection/ +# +# logs: +# - type: file +# path: /usr/local/hadoop/logs/hadoop-root-namenode-localhost.log +# source: mapreduce +# - type: file +# path: /usr/local/hadoop/logs/hadoop-root-datadnode-localhost.log +# source: mapreduce diff --git a/mapreduce/manifest.json b/mapreduce/manifest.json index bb1e3bed2c924..5d57c02e3fdb2 100644 --- a/mapreduce/manifest.json +++ b/mapreduce/manifest.json @@ -23,9 +23,12 @@ "type": "check", "integration_id": "mapreduce", "assets": { - "monitors": {}, + "configuration": { + "spec": "assets/configuration/spec.yaml" + }, "dashboards": {}, "service_checks": "assets/service_checks.json", - "logs": {} + "logs": {}, + "monitors": {} } }