diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..3978a0f --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +.git +.gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..485dee6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4848a2c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +FROM golang:alpine AS build-env +COPY ./ /opt/workdir/ +WORKDIR /opt/workdir/ +RUN GOOS=linux GOARCH=amd64 go build -o aws_batch_exporter cmd/aws_batch_exporter.go + +FROM alpine:latest +RUN addgroup -S exporterg && adduser -S exporter -G exporterg +USER exporter +COPY --from=build-env /opt/workdir/aws_batch_exporter /opt/aws_batch_exporter +EXPOSE 8080 + +ENTRYPOINT ["/opt/aws_batch_exporter"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..c2e245c --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +## aws_batch_exporter +AWS Batch metrics exporter. + +### How to run +#### local +```bash +go build -o aws_batch_exporter cmd/aws_batch_exporter.go +# required +export REGION= #your region +./aws_batch_exporter +``` + +#### docker +```bash +docker build . -t aws_batch_exporter +docker run -d -p 8080:8080 -e REGION= aws_batch_exporter +``` + +### Requirements +Please set the access key and secret key of AWS or authentication by IAM role in advance. + +### Environment Variables +|variable|description|default|required| +|---|---|---|---| +|SERVER_ADDR|The domain and port that runs the application|`:8080`|`false`| +|REGION|Target's AWS region|`none`|`true`| + +### Metrics +|metric|labels| +|---|---| +|aws_batch_submitted_job|`region`, `id`, `queue`, `name`| +|aws_batch_pending_job|`region`, `id`, `queue`, `name`| +|aws_batch_runnable_job|`region`, `id`, `queue`, `name`| +|aws_batch_starting_job|`region`, `id`, `queue`, `name`| +|aws_batch_running_job|`region`, `id`, `queue`, `name`| +|aws_batch_failed_job|`region`, `id`, `queue`, `name`| +|aws_batch_succeeded_job|`region`, `id`, `queue`, `name`| \ No newline at end of file diff --git a/cmd/aws_batch_exporter.go b/cmd/aws_batch_exporter.go new file mode 100644 index 0000000..6d3bbdb --- /dev/null +++ b/cmd/aws_batch_exporter.go @@ -0,0 +1,9 @@ +package main + +import ( + "github.com/atr0phy/aws_batch_exporter" +) + +func main() { + awsBatchExporter.Run() +} \ No newline at end of file diff --git a/collector.go b/collector.go new file mode 100644 index 0000000..a913ab2 --- /dev/null +++ b/collector.go @@ -0,0 +1,156 @@ +package awsBatchExporter + +import ( + "context" + "log" + "sync" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/batch" + "github.com/aws/aws-sdk-go/service/batch/batchiface" + "github.com/prometheus/client_golang/prometheus" +) + +type Collector struct { + client batchiface.BatchAPI + region string + timeout time.Duration +} + +const ( + namespace = "aws_batch" + timeout = 10 * time.Second +) + +var ( + jobStatus = []string{ + batch.JobStatusSubmitted, + batch.JobStatusPending, + batch.JobStatusRunnable, + batch.JobStatusStarting, + batch.JobStatusRunning, + batch.JobStatusFailed, + batch.JobStatusSucceeded, + } + + jobSubmitted = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "submitted_job"), + "Job in the queue that are in the SUBMITTED state", + []string{"region", "id", "queue", "name"}, nil, + ) + + jobPending = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "pending_job"), + "Job in the queue that are in the PENDING state", + []string{"region", "id", "queue", "name"}, nil, + ) + + jobRunnable = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "runnable_job"), + "Job in the queue that are in the RUNNABLE state", + []string{"region", "id", "queue", "name"}, nil, + ) + + jobStarting = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "starting_job"), + "Job in the queue that are in the STARTING state", + []string{"region", "id", "queue", "name"}, nil, + ) + + jobRunning = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "running_job"), + "Job in the queue that are in the RUNNING state", + []string{"region", "id", "queue", "name"}, nil, + ) + + jobFailed = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "failed_job"), + "Job in the queue that are in the FAILED state", + []string{"region", "id", "queue", "name"}, nil, + ) + + jobSucceeded = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "", "succeeded_job"), + "Job in the queue that are in the SUCCEEDED state", + []string{"region", "id", "queue", "name"}, nil, + ) + + jobDescMap = map[string]*prometheus.Desc{ + batch.JobStatusSubmitted: jobSubmitted, + batch.JobStatusPending: jobPending, + batch.JobStatusRunnable: jobRunnable, + batch.JobStatusStarting: jobStarting, + batch.JobStatusRunning: jobRunning, + batch.JobStatusFailed: jobFailed, + batch.JobStatusSucceeded: jobSucceeded, + } + +) + +type JobResult struct { + id string + queue string + name string + status string +} + +func New(region string) (*Collector, error) { + s, err := session.NewSession(&aws.Config{Region: aws.String(region)}) + if err != nil { + return nil, err + } + + return &Collector{ + client: batch.New(s), + region: "ap-northeast-1", + timeout: timeout, + }, nil +} + +func (*Collector) Describe(ch chan<- *prometheus.Desc) { + ch <- jobSubmitted + ch <- jobPending + ch <- jobRunnable + ch <- jobStarting + ch <- jobRunning + ch <- jobFailed + ch <- jobSucceeded +} + +func (c *Collector) Collect(ch chan<- prometheus.Metric) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + r, err := c.client.DescribeJobQueuesWithContext(ctx, &batch.DescribeJobQueuesInput{}) + if err != nil { + log.Printf("Error collecting metrics: %v\n", err) + return + } + var wg sync.WaitGroup + for _, d := range r.JobQueues { + wg.Add(1) + go func(d batch.JobQueueDetail) { + defer wg.Done() + var results []JobResult + for _, s := range jobStatus { + r, err := c.client.ListJobsWithContext(ctx, &batch.ListJobsInput{JobQueue: d.JobQueueName,JobStatus: &s}) + if err != nil { + log.Printf("Error collecting job status metrics: %v\n", err) + continue + } + for _, j := range r.JobSummaryList { + results = append(results, JobResult{id: *j.JobId, queue: *d.JobQueueName, name: *j.JobName, status: *j.Status}) + } + } + c.collectJobDetailStatus(ch, results) + }(*d) + } + wg.Wait() +} + +func (c *Collector) collectJobDetailStatus(ch chan<- prometheus.Metric, results []JobResult) { + for _, r := range results { + ch <- prometheus.MustNewConstMetric(jobDescMap[r.status], prometheus.GaugeValue, 1, c.region, r.id, r.queue, r.name) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..6a756c5 --- /dev/null +++ b/go.mod @@ -0,0 +1,9 @@ +module github.com/atr0phy/aws_batch_exporter + +go 1.14 + +require ( + github.com/aws/aws-sdk-go v1.33.1 + github.com/prometheus/client_golang v1.7.1 + github.com/prometheus/common v0.10.0 +) diff --git a/main.go b/main.go new file mode 100644 index 0000000..e51f139 --- /dev/null +++ b/main.go @@ -0,0 +1,33 @@ +package awsBatchExporter + +import ( + "log" + "net/http" + "os" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +func getenv(key, fallback string) string { + value := os.Getenv(key) + if len(value) == 0 { + return fallback + } + return value +} + +func Run() { + addr := getenv("SERVER_ADDR", ":8080") + region := os.Getenv("REGION") + if len(region) == 0 { + log.Fatalln("region is required value. exit an application.") + } + collector, err := New(region) + if err != nil { + log.Fatalln(err) + } + prometheus.MustRegister(collector) + http.Handle("/metrics", promhttp.Handler()) + log.Fatal(http.ListenAndServe(addr, nil)) +} \ No newline at end of file