Skip to content

Commit

Permalink
feat: add more metric to detect failures (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
rootsami authored Feb 15, 2023
1 parent bc94f18 commit 49d4ce1
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
14 changes: 14 additions & 0 deletions pkg/metric/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ type Metrics struct {
ChangeResources *prometheus.GaugeVec
AddResources *prometheus.GaugeVec
DestroyResources *prometheus.GaugeVec
PlanFailure *prometheus.GaugeVec
GitPullFailure *prometheus.GaugeVec
}

func NewMetrics(reg prometheus.Registerer) *Metrics {
Expand All @@ -29,10 +31,22 @@ func NewMetrics(reg prometheus.Registerer) *Metrics {
Name: "plan_destroy_resources",
Help: "Number of resources to be destroyed based on tf plan",
}, []string{"stack"}),
PlanFailure: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "terradrift",
Name: "plan_failure",
Help: "Status of the last scan of a stack",
}, []string{"stack"}),
GitPullFailure: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "terradrift",
Name: "git_pull_failure",
Help: "Status of the last git pull",
}, []string{}),
}
reg.MustRegister(m.AddResources)
reg.MustRegister(m.ChangeResources)
reg.MustRegister(m.DestroyResources)
reg.MustRegister(m.PlanFailure)
reg.MustRegister(m.GitPullFailure)

return m
}
11 changes: 10 additions & 1 deletion pkg/server/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ func (s Server) scanHandler(c *gin.Context) {
name := c.Query("stack")
planResp, err := tfstack.StackScan(name, s.Workdir, s.ConfigPath, s.ExtraBackendVars)

// Reset the plan failure metric
promMetrics.PlanFailure.With(prometheus.Labels{"stack": name}).Set(0)

if err == nil {

// Record metrics for drifts in resources
Expand All @@ -35,11 +38,13 @@ func (s Server) scanHandler(c *gin.Context) {
c.JSON(404, errorMessage)
} else if strings.Contains(errorMessage, "error acquiring the state lock") {

promMetrics.PlanFailure.With(prometheus.Labels{"stack": name}).Set(1)
// When there's a current terrafom plan in progress, terraform locks the state till it's finished.
c.JSON(502, "Another plan is in-progress for the requested stack, please try again in few minutes.")

} else {

promMetrics.PlanFailure.With(prometheus.Labels{"stack": name}).Set(1)
c.JSON(500, errorMessage)
}
}
Expand All @@ -48,9 +53,13 @@ func (s Server) scanHandler(c *gin.Context) {
// gitHandler is a handler function for git sync endpoint
func (s Server) gitHandler(c *gin.Context) {

// Reset the git failure metric
promMetrics.GitPullFailure.With(prometheus.Labels{}).Set(0)

status, err := git.GitPull(s.Workdir, s.GitToken, s.GitTimeout)
if err != nil {
c.JSON(500, err)
promMetrics.GitPullFailure.With(prometheus.Labels{}).Set(1)
c.JSON(500, error.Error(err))
} else {
c.JSON(200, status)
}
Expand Down

0 comments on commit 49d4ce1

Please sign in to comment.