From d7f1ea4fbd5ccdbf4327f855e461bca9caa89507 Mon Sep 17 00:00:00 2001 From: Xun Jiang Date: Fri, 30 Jun 2023 17:53:27 +0800 Subject: [PATCH] Add exit code log and possible memory shortage warning log for Restic command failure. Signed-off-by: Xun Jiang --- changelogs/unreleased/6459-blackpiglet | 1 + pkg/repository/restic/repository.go | 2 +- pkg/restic/exec_commands.go | 4 +++- pkg/util/exec/exec.go | 23 +++++++++++++++++++++++ 4 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 changelogs/unreleased/6459-blackpiglet diff --git a/changelogs/unreleased/6459-blackpiglet b/changelogs/unreleased/6459-blackpiglet new file mode 100644 index 0000000000..26e0e38566 --- /dev/null +++ b/changelogs/unreleased/6459-blackpiglet @@ -0,0 +1 @@ +Add exit code log and possible memory shortage warning log for Restic command failure. \ No newline at end of file diff --git a/pkg/repository/restic/repository.go b/pkg/repository/restic/repository.go index 392caf2842..3c15e0b376 100644 --- a/pkg/repository/restic/repository.go +++ b/pkg/repository/restic/repository.go @@ -112,7 +112,7 @@ func (r *RepositoryService) exec(cmd *restic.Command, bsl *velerov1api.BackupSto cmd.ExtraFlags = append(cmd.ExtraFlags, skipTLSRet) } - stdout, stderr, err := veleroexec.RunCommand(cmd.Cmd()) + stdout, stderr, err := veleroexec.RunCommandWithLog(cmd.Cmd(), r.log) r.log.WithFields(logrus.Fields{ "repository": cmd.RepoName(), "command": cmd.String(), diff --git a/pkg/restic/exec_commands.go b/pkg/restic/exec_commands.go index 0cbc428024..94c17c04a4 100644 --- a/pkg/restic/exec_commands.go +++ b/pkg/restic/exec_commands.go @@ -86,6 +86,7 @@ func RunBackup(backupCmd *Command, log logrus.FieldLogger, updater uploader.Prog err := cmd.Start() if err != nil { + exec.LogErrorAsExitCode(err, log) return stdoutBuf.String(), stderrBuf.String(), err } @@ -119,6 +120,7 @@ func RunBackup(backupCmd *Command, log logrus.FieldLogger, updater uploader.Prog err = cmd.Wait() if err != nil { + exec.LogErrorAsExitCode(err, log) return stdoutBuf.String(), stderrBuf.String(), err } quit <- struct{}{} @@ -229,7 +231,7 @@ func RunRestore(restoreCmd *Command, log logrus.FieldLogger, updater uploader.Pr } }() - stdout, stderr, err := exec.RunCommand(restoreCmd.Cmd()) + stdout, stderr, err := exec.RunCommandWithLog(restoreCmd.Cmd(), log) quit <- struct{}{} // update progress to 100% diff --git a/pkg/util/exec/exec.go b/pkg/util/exec/exec.go index 84bffb257f..109118d582 100644 --- a/pkg/util/exec/exec.go +++ b/pkg/util/exec/exec.go @@ -22,6 +22,7 @@ import ( "os/exec" "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) // RunCommand runs a command and returns its stdout, stderr, and its returned @@ -52,3 +53,25 @@ func RunCommand(cmd *exec.Cmd) (string, string, error) { return stdout, stderr, runErr } + +func RunCommandWithLog(cmd *exec.Cmd, log logrus.FieldLogger) (string, string, error) { + stdout, stderr, err := RunCommand(cmd) + LogErrorAsExitCode(err, log) + return stdout, stderr, err +} + +func LogErrorAsExitCode(err error, log logrus.FieldLogger) { + if err != nil { + if exitError, ok := err.(*exec.ExitError); ok { + log.Errorf("Restic command fail with ExitCode: %d. Process ID is %d, Exit error is: %s", exitError.ExitCode(), exitError.Pid(), exitError.String()) + // Golang's os.exec -1 ExitCode means signal kill. Usually this is caused + // by CGroup's OOM. Log a warning to notice user. + // https://github.com/golang/go/blob/master/src/os/exec_posix.go#L128-L136 + if exitError.ExitCode() == -1 { + log.Warnf("The ExitCode is -1, which means the process is terminated by signal. Usually this is caused by CGroup kill due to out of memory. Please check whether there is such information in the work nodes' dmesg log.") + } + } else { + log.WithError(err).Info("Error cannot be convert to ExitError format.") + } + } +}