Skip to content

Commit

Permalink
Activity log crash (#64)
Browse files Browse the repository at this point in the history
* Log that a server has crashed in the activity logs
* Don't log if a crash is too frequent
* Give an ip so it does not fail to get pushed to the panel
* Don't panic just log it
  • Loading branch information
QuintenQVD0 authored Jan 23, 2025
1 parent 54f8e25 commit 9f1dbd4
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 0 deletions.
3 changes: 3 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ type SystemConfiguration struct {

CrashDetection CrashDetection `yaml:"crash_detection"`

// The ammount of lines the activity logs should log on server crash
CrashActivityLogLines int `default:"2" yaml:"crash_detection_activity_lines"`

Backups Backups `yaml:"backups"`

Transfers Transfers `yaml:"transfers"`
Expand Down
2 changes: 2 additions & 0 deletions server/activity.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ const (
ActivitySftpRename = models.Event("server:sftp.rename")
ActivitySftpDelete = models.Event("server:sftp.delete")
ActivityFileUploaded = models.Event("server:file.uploaded")
ActivityServerCrashed = models.Event("server:crashed")

)

// RequestActivity is a wrapper around a LoggedEvent that is able to track additional request
Expand Down
16 changes: 16 additions & 0 deletions server/crash.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@ import (
"time"

"emperror.dev/errors"
"github.com/apex/log"

"github.com/pelican-dev/wings/config"
"github.com/pelican-dev/wings/environment"
"github.com/pelican-dev/wings/internal/models"

)

type CrashHandler struct {
Expand Down Expand Up @@ -69,6 +72,12 @@ func (s *Server) handleServerCrash() error {
return nil
}

// Get the last lines from the output before the crash so we can log it
logs, err := s.Environment.Readlog(config.Get().System.CrashActivityLogLines)
if err != nil {
log.WithField("server_id", s.ID()).Warn("Faild to get the last lines out of the console for the activity logs")
}

s.PublishConsoleOutputFromDaemon("---------- Detected server process in a crashed state! ----------")
s.PublishConsoleOutputFromDaemon(fmt.Sprintf("Exit code: %d", exitCode))
s.PublishConsoleOutputFromDaemon(fmt.Sprintf("Out of memory: %t", oomKilled))
Expand All @@ -85,6 +94,13 @@ func (s *Server) handleServerCrash() error {
return &crashTooFrequent{}
}

// Log that the server has crashed
s.SaveActivity(s.NewRequestActivity("", "127.0.0.1"), ActivityServerCrashed, models.ActivityMeta{
"exit_code": exitCode,
"oomkilled": oomKilled,
"logs": logs,
})

s.crasher.SetLastCrash(time.Now())

return errors.Wrap(s.HandlePowerAction(PowerActionStart), "failed to start server after crash detection")
Expand Down

0 comments on commit 9f1dbd4

Please sign in to comment.