Skip to content

Commit

Permalink
Merge pull request #59 from janboll/add-metrics-cache
Browse files Browse the repository at this point in the history
Implementing metrics caching
  • Loading branch information
janboll authored Sep 9, 2022
2 parents 19c8a98 + 2f4b682 commit c50aaa2
Show file tree
Hide file tree
Showing 11 changed files with 516 additions and 305 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ vpc:
- "us-east-1"
- "eu-central-1"
timeout: 30s
interval: 300s
cache_ttl: 500s
ec2:
enabled: true
regions:
Expand All @@ -84,6 +86,7 @@ route53:
enabled: true
region: "us-east-1"
timeout: 60s
interval: 90s
```
Some exporters might expose different configuration values, see the example files for possible keys.
Expand All @@ -96,6 +99,13 @@ tweak this behavior.
- `LOGS_METRICS_WORKERS`: Number of workers to request log metrics in parallel (default=10)
- `LOGS_METRICS_TTL`: Cache TTL for rds logs related metrics (default=300)


Defaults:
- interval: 15 seconds
- cache_ttl: 35 seconds
- timeout: 10 seconds


To view all available command-line flags, run `./aws-resource-exporter -h`.

## License
Expand Down
6 changes: 1 addition & 5 deletions aws-resource-exporter-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,12 @@ vpc:
- "us-east-1"
- "eu-central-1"
- "eu-west-1"
timeout: 30s
route53:
enabled: true
region: "us-east-1"
timeout: 300s
interval: 300s
ec2:
enabled: true
regions:
- "us-east-1"
- "eu-central-1"
- "us-west-1"
timeout: 30s
- "us-west-1"
48 changes: 32 additions & 16 deletions ec2.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"sync"
"time"

"github.com/app-sre/aws-resource-exporter/pkg"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
Expand All @@ -24,35 +25,51 @@ var TransitGatewaysUsage *prometheus.Desc = prometheus.NewDesc(prometheus.BuildF

type EC2Exporter struct {
sessions []*session.Session
cache pkg.MetricsCache

logger log.Logger
timeout time.Duration
logger log.Logger
timeout time.Duration
interval time.Duration
}

func NewEC2Exporter(sessions []*session.Session, logger log.Logger, timeout time.Duration) *EC2Exporter {
func NewEC2Exporter(sessions []*session.Session, logger log.Logger, config EC2Config) *EC2Exporter {

level.Info(logger).Log("msg", "Initializing EC2 exporter")
return &EC2Exporter{
sessions: sessions,
cache: *pkg.NewMetricsCache(*config.CacheTTL),

logger: logger,
timeout: timeout,
logger: logger,
timeout: *config.Timeout,
interval: *config.Interval,
}
}

func (e *EC2Exporter) Collect(ch chan<- prometheus.Metric) {
ctx, ctxCancel := context.WithTimeout(context.Background(), e.timeout)
defer ctxCancel()
wg := &sync.WaitGroup{}
wg.Add(len(e.sessions))
for _, m := range e.cache.GetAllMetrics() {
ch <- m
}
}

for _, sess := range e.sessions {
go collectInRegion(sess, e.logger, wg, ch, ctx)
func (e *EC2Exporter) CollectLoop() {
for {
ctx, ctxCancel := context.WithTimeout(context.Background(), e.timeout)
defer ctxCancel()
wg := &sync.WaitGroup{}
wg.Add(len(e.sessions))

for _, sess := range e.sessions {
go e.collectInRegion(sess, e.logger, wg, ctx)
}
wg.Wait()

level.Info(e.logger).Log("msg", "EC2 metrics Updated")

time.Sleep(e.interval)
}
wg.Wait()
}

func collectInRegion(sess *session.Session, logger log.Logger, wg *sync.WaitGroup, ch chan<- prometheus.Metric, ctx context.Context) {
func (e *EC2Exporter) collectInRegion(sess *session.Session, logger log.Logger, wg *sync.WaitGroup, ctx context.Context) {
defer wg.Done()
ec2Svc := ec2.New(sess)
serviceQuotaSvc := servicequotas.New(sess)
Expand All @@ -71,9 +88,8 @@ func collectInRegion(sess *session.Session, logger log.Logger, wg *sync.WaitGrou
return
}

ch <- prometheus.MustNewConstMetric(TransitGatewaysUsage, prometheus.GaugeValue, float64(len(gateways)), *sess.Config.Region)
ch <- prometheus.MustNewConstMetric(TransitGatewaysQuota, prometheus.GaugeValue, quota, *sess.Config.Region)

e.cache.AddMetric(prometheus.MustNewConstMetric(TransitGatewaysUsage, prometheus.GaugeValue, float64(len(gateways)), *sess.Config.Region))
e.cache.AddMetric(prometheus.MustNewConstMetric(TransitGatewaysQuota, prometheus.GaugeValue, quota, *sess.Config.Region))
}

func (e *EC2Exporter) Describe(ch chan<- *prometheus.Desc) {
Expand Down
4 changes: 4 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@ require (
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.1.1 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-logfmt/logfmt v0.4.0 // indirect
github.com/golang/protobuf v1.3.3 // indirect
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af // indirect
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/procfs v0.0.8 // indirect
github.com/stretchr/testify v1.8.0 // indirect
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
7 changes: 7 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,14 @@ github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPx
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
Expand Down Expand Up @@ -111,3 +115,6 @@ gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
71 changes: 59 additions & 12 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ func main() {
}

type BaseConfig struct {
Enabled bool `yaml:"enabled"`
Enabled bool `yaml:"enabled"`
Interval *time.Duration `yaml:"interval"`
CacheTTL *time.Duration `yaml:"cache_ttl"`
}

type RDSConfig struct {
Expand All @@ -53,21 +55,20 @@ type RDSConfig struct {

type VPCConfig struct {
BaseConfig `yaml:"base,inline"`
Timeout time.Duration `yaml:"timeout"`
Regions []string `yaml:"regions"`
Timeout *time.Duration `yaml:"timeout"`
Regions []string `yaml:"regions"`
}

type Route53Config struct {
BaseConfig `yaml:"base,inline"`
Interval time.Duration `yaml:"interval"`
Timeout time.Duration `yaml:"timeout"`
Region string `yaml:"region"` // Use only a single Region for now, as the current metric is global
Timeout *time.Duration `yaml:"timeout"`
Region string `yaml:"region"` // Use only a single Region for now, as the current metric is global
}

type EC2Config struct {
BaseConfig `yaml:"base,inline"`
Timeout time.Duration `yaml:"timeout"`
Regions []string `yaml:"regions"`
Timeout *time.Duration `yaml:"timeout"`
Regions []string `yaml:"regions"`
}

type Config struct {
Expand All @@ -77,6 +78,10 @@ type Config struct {
EC2Config EC2Config `yaml:"ec2"`
}

func durationPtr(duration time.Duration) *time.Duration {
return &duration
}

func loadExporterConfiguration(logger log.Logger, configFile string) (*Config, error) {
var config Config
file, err := ioutil.ReadFile(configFile)
Expand All @@ -85,6 +90,42 @@ func loadExporterConfiguration(logger log.Logger, configFile string) (*Config, e
return nil, errors.New("Could not load configuration file: " + configFile)
}
yaml.Unmarshal(file, &config)

if config.RdsConfig.CacheTTL == nil {
config.RdsConfig.CacheTTL = durationPtr(35 * time.Second)
}
if config.VpcConfig.CacheTTL == nil {
config.VpcConfig.CacheTTL = durationPtr(35 * time.Second)
}
if config.Route53Config.CacheTTL == nil {
config.Route53Config.CacheTTL = durationPtr(35 * time.Second)
}
if config.EC2Config.CacheTTL == nil {
config.EC2Config.CacheTTL = durationPtr(35 * time.Second)
}

if config.RdsConfig.Interval == nil {
config.RdsConfig.Interval = durationPtr(15 * time.Second)
}
if config.VpcConfig.Interval == nil {
config.VpcConfig.Interval = durationPtr(15 * time.Second)
}
if config.Route53Config.Interval == nil {
config.Route53Config.Interval = durationPtr(15 * time.Second)
}
if config.EC2Config.Interval == nil {
config.EC2Config.Interval = durationPtr(15 * time.Second)
}

if config.VpcConfig.Timeout == nil {
config.VpcConfig.Timeout = durationPtr(10 * time.Second)
}
if config.Route53Config.Timeout == nil {
config.Route53Config.Timeout = durationPtr(10 * time.Second)
}
if config.EC2Config.Timeout == nil {
config.EC2Config.Timeout = durationPtr(10 * time.Second)
}
return &config, nil
}

Expand All @@ -106,7 +147,9 @@ func setupCollectors(logger log.Logger, configFile string, creds *credentials.Cr
sess := session.Must(session.NewSession(config))
vpcSessions = append(vpcSessions, sess)
}
collectors = append(collectors, NewVPCExporter(vpcSessions, logger, config.VpcConfig.Timeout))
vpcExporter := NewVPCExporter(vpcSessions, logger, config.VpcConfig)
collectors = append(collectors, vpcExporter)
go vpcExporter.CollectLoop()
}
level.Info(logger).Log("msg", "Will RDS metrics be gathered?", "rds-enabled", config.RdsConfig.Enabled)
var rdsSessions []*session.Session
Expand All @@ -116,7 +159,9 @@ func setupCollectors(logger log.Logger, configFile string, creds *credentials.Cr
sess := session.Must(session.NewSession(config))
rdsSessions = append(rdsSessions, sess)
}
collectors = append(collectors, NewRDSExporter(rdsSessions, logger))
rdsExporter := NewRDSExporter(rdsSessions, logger, config.RdsConfig)
collectors = append(collectors, rdsExporter)
go rdsExporter.CollectLoop()
}
level.Info(logger).Log("msg", "Will EC2 metrics be gathered?", "ec2-enabled", config.EC2Config.Enabled)
var ec2Sessions []*session.Session
Expand All @@ -126,13 +171,15 @@ func setupCollectors(logger log.Logger, configFile string, creds *credentials.Cr
sess := session.Must(session.NewSession(config))
ec2Sessions = append(ec2Sessions, sess)
}
collectors = append(collectors, NewEC2Exporter(ec2Sessions, logger, config.EC2Config.Timeout))
ec2Exporter := NewEC2Exporter(ec2Sessions, logger, config.EC2Config)
collectors = append(collectors, ec2Exporter)
go ec2Exporter.CollectLoop()
}
level.Info(logger).Log("msg", "Will Route53 metrics be gathered?", "route53-enabled", config.Route53Config.Enabled)
if config.Route53Config.Enabled {
awsConfig := aws.NewConfig().WithCredentials(creds).WithRegion(config.Route53Config.Region)
sess := session.Must(session.NewSession(awsConfig))
r53Exporter := NewRoute53Exporter(sess, logger, config.Route53Config.Interval, config.Route53Config.Timeout)
r53Exporter := NewRoute53Exporter(sess, logger, config.Route53Config)
collectors = append(collectors, r53Exporter)
go r53Exporter.CollectLoop()
}
Expand Down
68 changes: 68 additions & 0 deletions pkg/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package pkg

import (
"crypto/sha256"
"fmt"
"sync"
"time"

"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
)

type MetricsCache struct {
cacheMutex *sync.Mutex
entries map[string]cacheEntry
ttl time.Duration
}

func NewMetricsCache(ttl time.Duration) *MetricsCache {
return &MetricsCache{
cacheMutex: &sync.Mutex{},
entries: map[string]cacheEntry{},
ttl: ttl,
}
}

func getMetricHash(metric prometheus.Metric) string {
var dto dto.Metric
metric.Write(&dto)
labelString := metric.Desc().String()

for _, labelPair := range dto.GetLabel() {
labelString = fmt.Sprintf("%s,%s,%s", labelString, labelPair.GetName(), labelPair.GetValue())
}

checksum := sha256.Sum256([]byte(labelString))
return fmt.Sprintf("%x", checksum[:])
}

// AddMetric adds a metric to the cache
func (mc *MetricsCache) AddMetric(metric prometheus.Metric) {
mc.cacheMutex.Lock()
mc.entries[getMetricHash(metric)] = cacheEntry{
creation: time.Now(),
metric: metric,
}
mc.cacheMutex.Unlock()
}

// GetAllMetrics Iterates over all cached metrics and discards expired ones.
func (mc *MetricsCache) GetAllMetrics() []prometheus.Metric {
mc.cacheMutex.Lock()
returnArr := make([]prometheus.Metric, 0)
for k, v := range mc.entries {
if time.Since(v.creation).Seconds() > mc.ttl.Seconds() {
delete(mc.entries, k)
} else {
returnArr = append(returnArr, v.metric)
}
}
mc.cacheMutex.Unlock()
return returnArr
}

type cacheEntry struct {
creation time.Time
metric prometheus.Metric
}
Loading

0 comments on commit c50aaa2

Please sign in to comment.