diff --git a/pkg/repository/restic/repository.go b/pkg/repository/restic/repository.go index 392caf28429..663de0d950b 100644 --- a/pkg/repository/restic/repository.go +++ b/pkg/repository/restic/repository.go @@ -18,6 +18,7 @@ package restic import ( "os" + "os/exec" "time" "github.com/pkg/errors" @@ -120,6 +121,17 @@ func (r *RepositoryService) exec(cmd *restic.Command, bsl *velerov1api.BackupSto "stderr": stderr, }).Debugf("Ran restic command") if err != nil { + if exitError, ok := err.(*exec.ExitError); ok { + r.log.Errorf("Restic command fail with ExitCode: %d. Process ID is %d, Exit error is: %s", exitError.ExitCode(), exitError.Pid(), exitError.String()) + // Golang's os.exec -1 ExitCode means signal kill. Usually this means killed + // by CGroup's OOM. Log a warning to notice user. + // https://github.com/golang/go/blob/master/src/os/exec_posix.go#L128-L136 + if exitError.ExitCode() == -1 { + r.log.Warnf("The ExitCode is -1, which means the process is terminated by signal. Usually this is caused by CGroup kill due to out of memory. Please check whether this is such information in the work nodes' dmesg log.") + } + } else { + r.log.Error("Error cannot be convert to ExitError format") + } return errors.Wrapf(err, "error running command=%s, stdout=%s, stderr=%s", cmd.String(), stdout, stderr) }