diff --git a/METRICS_CATALOG.md b/METRICS_CATALOG.md index 1070e40..c88f209 100644 --- a/METRICS_CATALOG.md +++ b/METRICS_CATALOG.md @@ -140,3 +140,11 @@ All the available metrics | kbdi_up | [1-0] (OK-KO) | Keedio Big Data Insights Status | None | +### Yarn Module Metrics +kbdi_yarn_application_allocated_vcores +kbdi_yarn_application_allocated_mb +kbdi_yarn_application_runningContainers +kbdi_yarn_available_vcores +kbdi_yarn_total_pending_containers +kbdi_yarn_yarn_gc_time_across_nodemanagers +kbdi_yarn_yarn_total_apps_running \ No newline at end of file diff --git a/README.md b/README.md index 7d513ed..15eb29c 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ This exporter scrape the metrics by independent modules (Scrapers). This modules * **Status:** Scrapes the metrics about the current status of the Clusters, services, roles and hosts * **Hosts:** Scrapes the metrics about the Hosts: CPU usage, RAM, SWAP, Agent stats and more useful metrics * **HDFS:** Scrapes the metrics about HDFS: Capacity, blocks stats, file stats, Namenode properties and Snapshots. +* **YARN:** Scrapes the metrics about YARN: Capacity, Application running, GC. * **Impala:** Scrapes the metrics about Impala: Catalog, usage stats, queries stats, state-store info … diff --git a/collector/yarn_module.go b/collector/yarn_module.go index e49bad1..b345bab 100644 --- a/collector/yarn_module.go +++ b/collector/yarn_module.go @@ -3,11 +3,18 @@ package collector import ( // Go Default libraries "context" + "crypto/tls" + "net/http" + "strconv" "strings" "sync" // Own libraries "fmt" + "keedio/cloudera_exporter/cloudera_swagger_client/clusters_resource" + "keedio/cloudera_exporter/cloudera_swagger_client/services_resource" + "keedio/cloudera_exporter/cloudera_swagger_client/yarn_applications_resource" + "keedio/cloudera_exporter/collector/cloudera_client" jp "keedio/cloudera_exporter/json_parser" log "keedio/cloudera_exporter/logger" @@ -214,6 +221,15 @@ func (ScrapeYARNMetrics) Scrape(ctx context.Context, config *Collector_connectio }(element) } wg.Wait() + port, _ := strconv.Atoi(config.Port) + http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true} + client, err := cloudera_client.NewClouderaClient(config.User, config.Passwd, config.Host, port, Config.Api_request_type, config.Api_version) + if err != nil { + return err + } + if err := scrape_yarn_application_status(ctx, client, ch); err != nil { + return err + } // Execute the generic funtion for creation of metrics with the pairs (QUERY, PROM:DESCRIPTOR) // for i := 0; i < len(yarn_query_variable_relationship); i++ { // if create_yarn_metric(ctx, *config, yarn_query_variable_relationship[i].Query, yarn_query_variable_relationship[i].Metric_struct, ch) { @@ -226,5 +242,55 @@ func (ScrapeYARNMetrics) Scrape(ctx context.Context, config *Collector_connectio return nil } +var ( + yarnApplicationMetricTags = []string{"clusterName", "serviceName", "applicationID", "user", "pool", "job_name"} + yarnApplicationAllocateVcoresMetrics = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "yarn_application", "allocated_vcores"), + "Yarn application allocated VCores", + yarnApplicationMetricTags, nil) + yarnApplicationAllocateMBMetrics = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "yarn_application", "allocated_mb"), + "Yarn application allocated Memory", + yarnApplicationMetricTags, nil) + yarnApplicationRunningContainersMetrics = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "yarn_application", "runningContainers"), + "Yarn application allocated containers count", + yarnApplicationMetricTags, nil) +) + +func scrape_yarn_application_status(ctx context.Context, client *cloudera_client.ClouderaClient, ch chan<- prometheus.Metric) error { + res, err := client.ClouderaManagerApi.ClustersResource.ReadClusters(&clusters_resource.ReadClustersParams{Context: ctx}, *client.HttpBasicAuth) + + if err != nil { + return err + } + apiClusters := res.Payload.Items + + for _, cluster := range apiClusters { + serviceRes, err := client.ClouderaManagerApi.ServicesResource.ReadServices(&services_resource.ReadServicesParams{Context: ctx, ClusterName: cluster.Name}, *client.HttpBasicAuth) + if err != nil { + return err + } + services := filter_service_by_type(serviceRes.Payload.Items, "YARN") + if len(services) == 0 { + continue + } + yarnServices := services[0] + yarnApplications, err := client.ClouderaManagerApi.YarnApplicationsResource.GetYarnApplications( + &yarn_applications_resource.GetYarnApplicationsParams{ClusterName: cluster.Name, ServiceName: yarnServices.Name, Context: ctx}, *client.HttpBasicAuth) + if err != nil { + return err + } + for _, v := range yarnApplications.Payload.Applications { + if v.State == "RUNNING" { + ch <- prometheus.MustNewConstMetric(yarnApplicationAllocateVcoresMetrics, prometheus.GaugeValue, float64(v.AllocatedVCores), cluster.Name, yarnServices.Name, v.ApplicationID, v.User, v.Pool, v.Name) + ch <- prometheus.MustNewConstMetric(yarnApplicationAllocateMBMetrics, prometheus.GaugeValue, float64(v.AllocatedMB), cluster.Name, yarnServices.Name, v.ApplicationID, v.User, v.Pool, v.Name) + ch <- prometheus.MustNewConstMetric(yarnApplicationRunningContainersMetrics, prometheus.GaugeValue, float64(v.RunningContainers), cluster.Name, yarnServices.Name, v.ApplicationID, v.User, v.Pool, v.Name) + } + } + } + return nil +} + // check interface var _ Scraper = ScrapeYARNMetrics{}