Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MGMT-4966 Display a better message when media disconnection failure o… #197

Merged
merged 1 commit into from
May 31, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 125 additions & 21 deletions src/commands/step_processor.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
package commands

import (
"fmt"
"io"
"net/http"
"os"
"strings"
"time"

"github.com/pkg/errors"

"github.com/go-openapi/swag"

log "github.com/sirupsen/logrus"
Expand All @@ -20,6 +26,14 @@ import (

type HandlerType func(string, ...string) (stdout string, stderr string, exitCode int)

type errorCode int

// TODO - add an ErrorCode Enum to the swagger
const (
Undetected errorCode = 999
michaellevy101 marked this conversation as resolved.
Show resolved Hide resolved
MediaDisconnected errorCode = 256
)

type stepSession struct {
session.InventorySession
}
Expand All @@ -33,36 +47,30 @@ func newSession() *stepSession {
return &ret
}

func (s *stepSession) sendStepReply(stepType models.StepType, stepID, output, errStr string, exitCode int) {
if exitCode == 0 && alreadyExistsInService(stepType, output) {
s.Logger().Infof("Result for %s already exists in assisted service", string(stepType))
func (s *stepSession) sendStepReply(reply models.StepReply) {
if reply.ExitCode == 0 && alreadyExistsInService(reply.StepType, reply.Output) {
s.Logger().Infof("Result for %s already exists in assisted service", string(reply.StepType))
return
}
logFunc := s.Logger().Infof
if exitCode != 0 {
if reply.ExitCode != 0 {
logFunc = s.Logger().Warnf
}

if stepType == models.StepTypeFreeNetworkAddresses {
if reply.StepType == models.StepTypeFreeNetworkAddresses {
// the free-addresses step's output spams the log too much
logFunc("Sending step <%s> reply error <%s> exit-code <%d>", stepID, errStr, exitCode)
logFunc("Sending step <%s> reply error <%s> exit-code <%d>", reply.StepID, reply.Error, reply.ExitCode)
} else {
logFunc("Sending step <%s> reply output <%s> error <%s> exit-code <%d>", stepID, output, errStr, exitCode)
logFunc("Sending step <%s> reply output <%s> error <%s> exit-code <%d>", reply.StepID, reply.Output, reply.Error, reply.ExitCode)
}

params := installer.PostStepReplyParams{
HostID: strfmt.UUID(config.GlobalAgentConfig.HostID),
ClusterID: strfmt.UUID(config.GlobalAgentConfig.ClusterID),
DiscoveryAgentVersion: &config.GlobalAgentConfig.AgentVersion,
Reply: &reply,
}
reply := models.StepReply{
StepType: stepType,
StepID: stepID,
ExitCode: int64(exitCode),
Output: output,
Error: errStr,
}
params.Reply = &reply

_, err := s.Client().Installer.PostStepReply(s.Context(), &params)
if err != nil {
switch errValue := err.(type) {
Expand All @@ -75,12 +83,22 @@ func (s *stepSession) sendStepReply(stepType models.StepType, stepID, output, er
default:
s.Logger().WithError(err).Warn("Error posting step reply")
}
} else if exitCode == 0 {
storeInCache(stepType, output)
} else if reply.ExitCode == 0 {
storeInCache(reply.StepType, reply.Output)
}
}

func (s *stepSession) handleSingleStep(stepType models.StepType, stepID string, command string, args []string, handler HandlerType) {
func (s *stepSession) createStepReply(stepType models.StepType, stepID string, output string, errStr string, exitCode int) models.StepReply {
return models.StepReply{
StepType: stepType,
StepID: stepID,
ExitCode: int64(exitCode),
Output: output,
Error: errStr,
}
}

func (s *stepSession) handleSingleStep(stepType models.StepType, stepID string, command string, args []string, handler HandlerType) models.StepReply {
s.Logger().Infof("Executing step: <%s>, command: <%s>, args: <%v>", stepID, command, args)
stdout, stderr, exitCode := handler(command, args...)
if exitCode != 0 {
Expand All @@ -92,21 +110,107 @@ stderr:
%v
`, exitCode, stepID, command, args, stdout, stderr)
}
s.sendStepReply(stepType, stepID, stdout, stderr, exitCode)

return s.createStepReply(stepType, stepID, stdout, stderr, exitCode)
}

func (s *stepSession) handleSteps(steps *models.Steps) {
for _, step := range steps.Instructions {
if step.Command == "" {
errStr := "Missing command"
s.Logger().Warn(errStr)
s.sendStepReply(step.StepType, step.StepID, "", errStr, -1)
s.sendStepReply(s.createStepReply(step.StepType, step.StepID, "", errStr, -1))
continue
}
go s.handleSingleStep(step.StepType, step.StepID, step.Command, step.Args, util.ExecutePrivileged)

go func(step *models.Step) {
if code, err := s.diagnoseSystem(); code != Undetected {
log.Errorf("System issue detected before running step: <%s>, command: <%s>, args: <%v>: %s - stopping the execution", step.StepID, step.Command, step.Args, err.Error())
s.sendStepReply(s.createStepReply(step.StepType, step.StepID, "", err.Error(), int(code)))
return
michaellevy101 marked this conversation as resolved.
Show resolved Hide resolved
}

reply := s.handleSingleStep(step.StepType, step.StepID, step.Command, step.Args, util.ExecutePrivileged)

if reply.ExitCode != 0 {
if code, err := s.diagnoseSystem(); code != Undetected {
log.Errorf("System issue detected after running step: <%s>, command: <%s>, args: <%v>: %s", step.StepID, step.Command, step.Args, err.Error())
reply.ExitCode = int64(code)
}
}

s.sendStepReply(reply)
}(step)
}
}

// diagnoseSystem runs quick validations that need to need to occur before step and after a failure.
// This is in order to detect and report known problems otherwise manifest as confusing error messages or stuck the whole system in the steps themselves.
// One common example of that is virtual media disconnection.
func (s *stepSession) diagnoseSystem() (errorCode, error) {
source, err := s.getMountpointSourceDeviceFile()

if err != nil {
log.Warn(err)
return Undetected, nil
}

if source == "" {
return Undetected, nil
}

file, err := os.Open(source)
if err != nil {
return MediaDisconnected, errors.Wrap(err, "cannot access the media (ISO) - media was likely disconnected")
}

defer file.Close()

_, err = io.ReadFull(file, make([]byte, 2, 2))
if err != nil {
return MediaDisconnected, errors.Wrap(err, "cannot read from the media (ISO) - media was likely disconnected")
}

return Undetected, nil
}

func (s *stepSession) getMountpointSourceDeviceFile() (string, error) {
michaellevy101 marked this conversation as resolved.
Show resolved Hide resolved
mediaPath := "/run/media/iso"
michaellevy101 marked this conversation as resolved.
Show resolved Hide resolved

// Media disconnection issue occurs only for a full-ISO installation
// mostly when serving iso via virtual media via sub optimal networking conditions.
// The minimal-ISO loaded very early and stay in memory. We don't need to read them from the ISO once they're loaded
// The media path exists only for the full-ISO so we can just eliminate this check.
if _, err := os.Stat(mediaPath); err != nil {
return "", nil
}

stdout, stderr, exitCode := util.ExecutePrivileged("findmnt", "--raw", "--noheadings", "--output", "SOURCE,TARGET", "--target", mediaPath)

errorMessage := "failed to validate media disconnection - continuing"

if exitCode != 0 {
return "", errors.Errorf("%s: %s", errorMessage, stderr)
}

if stdout == "" {
return "", errors.Errorf("%s: cannot find ISO mountpoint source", errorMessage)
}

fields := strings.Fields(stdout)

if fields[1] != mediaPath {
return "", fmt.Errorf("%s: media mounted to %s instead of directly to %s", errorMessage, fields[1], mediaPath)
}

source := fields[0]
if source == "" || !strings.HasPrefix(source, "/dev") {
michaellevy101 marked this conversation as resolved.
Show resolved Hide resolved
return "", fmt.Errorf("%s: the mount source isn't a device file %s", errorMessage, source)
}

return source, nil
}

func (s *stepSession) processSingleSession() (int64, string) {
params := installer.GetNextStepsParams{
HostID: strfmt.UUID(config.GlobalAgentConfig.HostID),
Expand Down