Skip to content

Commit

Permalink
[ORCA-782] Add some more metrics and logic to delete 30 days old pull…
Browse files Browse the repository at this point in the history
…s. (#75)
  • Loading branch information
nishkrishnan authored and msarvar committed Sep 27, 2021
1 parent 1e79880 commit 867d20e
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 32 deletions.
9 changes: 5 additions & 4 deletions server/controllers/events/events_controller_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1063,10 +1063,11 @@ func setupE2E(t *testing.T, repoDir string) (events_controllers.VCSEventsControl
TestingMode: true,
CommandRunner: commandRunner,
PullCleaner: &events.PullClosedExecutor{
Locker: lockingClient,
VCSClient: e2eVCSClient,
WorkingDir: workingDir,
DB: boltdb,
Locker: lockingClient,
VCSClient: e2eVCSClient,
WorkingDir: workingDir,
DB: boltdb,
PullClosedTemplate: &events.PullClosedEventTemplate{},
},
Logger: logger,
Parser: eventParser,
Expand Down
2 changes: 2 additions & 0 deletions server/events/event_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ func (e *EventParser) ParseGithubPull(pull *github.PullRequest) (pullModel model

pullState := models.ClosedPullState
closedAt := pull.GetClosedAt()
updatedAt := pull.GetUpdatedAt()
if pull.GetState() == "open" {
pullState = models.OpenPullState
}
Expand All @@ -528,6 +529,7 @@ func (e *EventParser) ParseGithubPull(pull *github.PullRequest) (pullModel model
BaseRepo: baseRepo,
BaseBranch: baseBranch,
ClosedAt: closedAt,
UpdatedAt: updatedAt,
}
return
}
Expand Down
2 changes: 2 additions & 0 deletions server/events/models/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ type PullRequest struct {
State PullRequestState
// BaseRepo is the repository that the pull request will be merged into.
BaseRepo Repo
// UpdatedAt is the time the PR was last updated.
UpdatedAt time.Time
// ClosedAt is the time the PR was closed. This is nil if the PR is open
ClosedAt time.Time
}
Expand Down
24 changes: 18 additions & 6 deletions server/events/pull_closed_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package events
import (
"bytes"
"fmt"
"io"
"sort"
"strings"
"text/template"
Expand All @@ -42,18 +43,29 @@ type PullCleaner interface {
// PullClosedExecutor executes the tasks required to clean up a closed pull
// request.
type PullClosedExecutor struct {
Locker locking.Locker
VCSClient vcs.Client
WorkingDir WorkingDir
Logger logging.SimpleLogging
DB *db.BoltDB
Locker locking.Locker
VCSClient vcs.Client
WorkingDir WorkingDir
Logger logging.SimpleLogging
DB *db.BoltDB
PullClosedTemplate PullCleanupTemplate
}

type templatedProject struct {
RepoRelDir string
Workspaces string
}

type PullCleanupTemplate interface {
Execute(wr io.Writer, data interface{}) error
}

type PullClosedEventTemplate struct{}

func (t *PullClosedEventTemplate) Execute(wr io.Writer, data interface{}) error {
return pullClosedTemplate.Execute(wr, data)
}

var pullClosedTemplate = template.Must(template.New("").Parse(
"Locks and plans deleted for the projects and workspaces modified in this pull request:\n" +
"{{ range . }}\n" +
Expand Down Expand Up @@ -85,7 +97,7 @@ func (p *PullClosedExecutor) CleanUpPull(repo models.Repo, pull models.PullReque

templateData := p.buildTemplateData(locks)
var buf bytes.Buffer
if err = pullClosedTemplate.Execute(&buf, templateData); err != nil {
if err = p.PullClosedTemplate.Execute(&buf, templateData); err != nil {
return errors.Wrap(err, "rendering template for comment")
}
return p.VCSClient.CreateComment(repo, pull.Num, buf.String(), "")
Expand Down
26 changes: 15 additions & 11 deletions server/events/pull_closed_executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ func TestCleanUpPullWorkspaceErr(t *testing.T) {
RegisterMockTestingT(t)
w := mocks.NewMockWorkingDir()
pce := events.PullClosedExecutor{
WorkingDir: w,
WorkingDir: w,
PullClosedTemplate: &events.PullClosedEventTemplate{},
}
err := errors.New("err")
When(w.Delete(fixtures.GithubRepo, fixtures.Pull)).ThenReturn(err)
Expand All @@ -49,8 +50,9 @@ func TestCleanUpPullUnlockErr(t *testing.T) {
w := mocks.NewMockWorkingDir()
l := lockmocks.NewMockLocker()
pce := events.PullClosedExecutor{
Locker: l,
WorkingDir: w,
Locker: l,
WorkingDir: w,
PullClosedTemplate: &events.PullClosedEventTemplate{},
}
err := errors.New("err")
When(l.UnlockByPull(fixtures.GithubRepo.FullName, fixtures.Pull.Num)).ThenReturn(nil, err)
Expand All @@ -69,10 +71,11 @@ func TestCleanUpPullNoLocks(t *testing.T) {
db, err := db.New(tmp)
Ok(t, err)
pce := events.PullClosedExecutor{
Locker: l,
VCSClient: cp,
WorkingDir: w,
DB: db,
Locker: l,
VCSClient: cp,
WorkingDir: w,
DB: db,
PullClosedTemplate: &events.PullClosedEventTemplate{},
}
When(l.UnlockByPull(fixtures.GithubRepo.FullName, fixtures.Pull.Num)).ThenReturn(nil, nil)
err = pce.CleanUpPull(fixtures.GithubRepo, fixtures.Pull)
Expand Down Expand Up @@ -155,10 +158,11 @@ func TestCleanUpPullComments(t *testing.T) {
db, err := db.New(tmp)
Ok(t, err)
pce := events.PullClosedExecutor{
Locker: l,
VCSClient: cp,
WorkingDir: w,
DB: db,
Locker: l,
VCSClient: cp,
WorkingDir: w,
DB: db,
PullClosedTemplate: &events.PullClosedEventTemplate{},
}
t.Log("testing: " + c.Description)
When(l.UnlockByPull(fixtures.GithubRepo.FullName, fixtures.Pull.Num)).ThenReturn(c.Locks, nil)
Expand Down
44 changes: 38 additions & 6 deletions server/scheduled_executor_service.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
package server

import (
"io"
"os"
"os/signal"
"strconv"
"syscall"
"text/template"
"time"

stats "github.com/lyft/gostats"
"github.com/runatlantis/atlantis/server/events"
"github.com/runatlantis/atlantis/server/events/metrics"
"github.com/runatlantis/atlantis/server/events/models"
"github.com/runatlantis/atlantis/server/logging"
)
Expand All @@ -24,11 +27,13 @@ func NewScheduledExecutorService(
workingDirIterator events.WorkDirIterator,
statsScope stats.Scope,
log logging.SimpleLogging,
pullCleaner events.PullCleaner,
) *ScheduledExecutorService {
garbageCollector := &GarbageCollector{
workingDirIterator: workingDirIterator,
stats: statsScope.Scope("scheduled.garbagecollector"),
log: log,
cleaner: pullCleaner,
}

garbageCollectorCron := CronDefinition{
Expand Down Expand Up @@ -76,20 +81,36 @@ type Job interface {
Run()
}

var gcStaleClosedPullTemplate = template.Must(template.New("").Parse(
"Pull Request has been closed for 30 days. Atlantis GC has deleted the locks and plans for the following projects and workspaces:\n" +
"{{ range . }}\n" +
"- dir: `{{ .RepoRelDir }}` {{ .Workspaces }}{{ end }}"))

type GCStalePullTemplate struct{}

func (t *GCStalePullTemplate) Execute(wr io.Writer, data interface{}) error {
return gcStaleClosedPullTemplate.Execute(wr, data)
}

type GarbageCollector struct {
workingDirIterator events.WorkDirIterator
stats stats.Scope
log logging.SimpleLogging
cleaner events.PullCleaner
}

func (g *GarbageCollector) Run() {
errCounter := g.stats.NewCounter(metrics.ExecutionErrorMetric)

pulls, err := g.workingDirIterator.ListCurrentWorkingDirPulls()

if err != nil {
g.log.Err("error listing pulls %s", err)
errCounter.Inc()
}

openPullsCounter := g.stats.NewCounter("pulls.open")
updatedthirtyDaysAgoOpenPullsCounter := g.stats.NewCounter("pulls.open.updated.thirtydaysago")
closedPullsCounter := g.stats.NewCounter("pulls.closed")
thirtyDaysAgoClosedPullsCounter := g.stats.NewCounter("pulls.closed.thirtydaysago")
fiveMinutesAgoClosedPullsCounter := g.stats.NewCounter("pulls.closed.fiveminutesago")
Expand All @@ -101,29 +122,40 @@ func (g *GarbageCollector) Run() {

for _, pull := range pulls {
logger := g.log.With(fmtLogSrc(pull.BaseRepo, pull.Num)...)

if pull.State == models.OpenPullState {
logger.Debug("pull #%d is open", pull.Num)
openPullsCounter.Inc()

if pull.UpdatedAt.Before(thirtyDaysAgo) {
updatedthirtyDaysAgoOpenPullsCounter.Inc()

logger.Warn("Pull hasn't been updated for more than 30 days.")
}
continue
}

// assume only other state is closed
closedPullsCounter.Inc()

logger.Debug("pull #%d is closed but data still on disk", pull.Num)

// TODO: update this to actually go ahead and delete things
if pull.ClosedAt.Before(thirtyDaysAgo) {
thirtyDaysAgoClosedPullsCounter.Inc()

logger.Info("Pull closed for more than 30 days but data still on disk")
logger.Warn("Pull closed for more than 30 days but data still on disk")

err := g.cleaner.CleanUpPull(pull.BaseRepo, pull)

if err != nil {
logger.Err("Error cleaning up 30 days old closed pulls %s", err)
errCounter.Inc()
return
}
}

// This will allow us to catch leaks as soon as they happen (hopefully)
if pull.ClosedAt.Before(fiveMinutesAgo) {
fiveMinutesAgoClosedPullsCounter.Inc()

logger.Info("Pull closed for more than 5 minutes but data still on disk")
logger.Warn("Pull closed for more than 5 minutes but data still on disk")
}
}
}
Expand Down
21 changes: 16 additions & 5 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -401,11 +401,12 @@ func NewServer(userConfig UserConfig, config Config) (*Server, error) {
statsScope,
logger,
&events.PullClosedExecutor{
VCSClient: vcsClient,
Locker: lockingClient,
WorkingDir: workingDir,
Logger: logger,
DB: boltdb,
VCSClient: vcsClient,
Locker: lockingClient,
WorkingDir: workingDir,
Logger: logger,
DB: boltdb,
PullClosedTemplate: &events.PullClosedEventTemplate{},
},
)
eventParser := &events.EventParser{
Expand Down Expand Up @@ -685,6 +686,16 @@ func NewServer(userConfig UserConfig, config Config) (*Server, error) {
),
statsScope,
logger,
&events.PullClosedExecutor{
VCSClient: vcsClient,
Locker: lockingClient,
WorkingDir: workingDir,
Logger: logger,
DB: boltdb,

// using a specific template to signal that this is from an async process
PullClosedTemplate: &GCStalePullTemplate{},
},
)

return &Server{
Expand Down

0 comments on commit 867d20e

Please sign in to comment.