Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve debug-ability of e2e tests by uploading logs of failed jobs #1898

Merged
merged 7 commits into from
Jan 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ jobs:
AZCOPY_E2E_CLIENT_SECRET: $(AZCOPY_SPA_CLIENT_SECRET)
AZCOPY_E2E_CLASSIC_ACCOUNT_NAME: $(AZCOPY_E2E_CLASSIC_ACCOUNT_NAME)
AZCOPY_E2E_CLASSIC_ACCOUNT_KEY: $(AZCOPY_E2E_CLASSIC_ACCOUNT_KEY)
AZCOPY_E2E_LOG_OUTPUT: '$(System.DefaultWorkingDirectory)/logs'
AZCOPY_E2E_OAUTH_MANAGED_DISK_CONFIG: $(AZCOPY_E2E_OAUTH_MANAGED_DISK_CONFIG)
AZCOPY_E2E_STD_MANAGED_DISK_CONFIG: $(AZCOPY_E2E_STD_MANAGED_DISK_CONFIG)
CPK_ENCRYPTION_KEY: $(CPK_ENCRYPTION_KEY)
Expand All @@ -162,6 +163,7 @@ jobs:
AZCOPY_E2E_CLIENT_SECRET: $(AZCOPY_SPA_CLIENT_SECRET)
AZCOPY_E2E_CLASSIC_ACCOUNT_NAME: $(AZCOPY_E2E_CLASSIC_ACCOUNT_NAME)
AZCOPY_E2E_CLASSIC_ACCOUNT_KEY: $(AZCOPY_E2E_CLASSIC_ACCOUNT_KEY)
AZCOPY_E2E_LOG_OUTPUT: '$(System.DefaultWorkingDirectory)/logs'
AZCOPY_E2E_OAUTH_MANAGED_DISK_CONFIG: $(AZCOPY_E2E_OAUTH_MANAGED_DISK_CONFIG)
AZCOPY_E2E_STD_MANAGED_DISK_CONFIG: $(AZCOPY_E2E_STD_MANAGED_DISK_CONFIG)
CPK_ENCRYPTION_KEY: $(CPK_ENCRYPTION_KEY)
Expand All @@ -187,13 +189,21 @@ jobs:
AZCOPY_E2E_CLIENT_SECRET: $(AZCOPY_SPA_CLIENT_SECRET)
AZCOPY_E2E_CLASSIC_ACCOUNT_NAME: $(AZCOPY_E2E_CLASSIC_ACCOUNT_NAME)
AZCOPY_E2E_CLASSIC_ACCOUNT_KEY: $(AZCOPY_E2E_CLASSIC_ACCOUNT_KEY)
AZCOPY_E2E_LOG_OUTPUT: '$(System.DefaultWorkingDirectory)/logs'
AZCOPY_E2E_OAUTH_MANAGED_DISK_CONFIG: $(AZCOPY_E2E_OAUTH_MANAGED_DISK_CONFIG)
AZCOPY_E2E_STD_MANAGED_DISK_CONFIG: $(AZCOPY_E2E_STD_MANAGED_DISK_CONFIG)
CPK_ENCRYPTION_KEY: $(CPK_ENCRYPTION_KEY)
CPK_ENCRYPTION_KEY_SHA256: $(CPK_ENCRYPTION_KEY_SHA256)
displayName: 'E2E Test MacOs AMD64'
condition: eq(variables.type, 'mac-os')

- task: PublishBuildArtifacts@1
displayName: 'Publish logs'
condition: succeededOrFailed()
inputs:
pathToPublish: '$(System.DefaultWorkingDirectory)/logs'
artifactName: logs

- job: Test_On_Ubuntu
variables:
isMutexSet: 'false'
Expand Down
5 changes: 5 additions & 0 deletions common/fe-ste-models.go
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,11 @@ var ETransferStatus = TransferStatus(0)

type TransferStatus int32 // Must be 32-bit for atomic operations; negative #s represent a specific failure code

func (t TransferStatus) StatusLocked() bool { // Is an overwrite necessary to change tx status?
// Any kind of failure, or success is considered "locked in".
return t <= ETransferStatus.Failed() || t == ETransferStatus.Success()
}

// Transfer is ready to transfer and not started transferring yet
func (TransferStatus) NotStarted() TransferStatus { return TransferStatus(0) }

Expand Down
56 changes: 45 additions & 11 deletions e2etest/declarativeScenario.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,31 @@ type scenarioState struct {
func (s *scenario) Run() {
defer s.cleanup()

// setup runner
azcopyDir, err := os.MkdirTemp("", "")
if err != nil {
s.a.Error(err.Error())
return
}
azcopyRan := false
defer func() {
if os.Getenv("AZCOPY_E2E_LOG_OUTPUT") == "" {
s.a.Assert(os.RemoveAll(azcopyDir), equals(), nil)
return // no need, just delete logdir
}

err := os.MkdirAll(os.Getenv("AZCOPY_E2E_LOG_OUTPUT"), os.ModePerm|os.ModeDir)
if err != nil {
s.a.Assert(err, equals(), nil)
return
}
if azcopyRan && s.a.Failed() {
s.uploadLogs(azcopyDir)
s.a.(*testingAsserter).t.Log("uploaded logs for job " + s.state.result.jobID.String() + " as an artifact")
}
}()

// setup scenario
// First, validate the accounts make sense for the source/dests
if s.srcAccountType.IsBlobOnly() {
s.a.Assert(s.fromTo.From(), equals(), common.ELocation.Blob())
Expand Down Expand Up @@ -97,29 +122,30 @@ func (s *scenario) Run() {
}

// execute
s.runAzCopy()
azcopyRan = true
s.runAzCopy(azcopyDir)
if s.a.Failed() {
return // execution failed. No point in running validation
}

// resume if needed
if s.needResume {
tx, err := s.state.result.GetTransferList(common.ETransferStatus.Cancelled())
tx, err := s.state.result.GetTransferList(common.ETransferStatus.Cancelled(), azcopyDir)
s.a.AssertNoErr(err, "Failed to get transfer list for Cancelled")
s.a.Assert(len(tx), equals(), len(s.p.debugSkipFiles), "Job cancel didn't completely work")

if !s.runHook(s.hs.beforeResumeHook) {
return
}

s.resumeAzCopy()
s.resumeAzCopy(azcopyDir)
}
if s.a.Failed() {
return // resume failed. No point in running validation
}

// check
s.validateTransferStates()
s.validateTransferStates(azcopyDir)
if s.a.Failed() {
return // no point in doing more validation
}
Expand All @@ -138,6 +164,13 @@ func (s *scenario) Run() {
s.runHook(s.hs.afterValidation)
}

func (s *scenario) uploadLogs(logDir string) {
if s.state.result == nil || os.Getenv("AZCOPY_E2E_LOG_OUTPUT") == "" {
return // nothing to upload
}
s.a.Assert(os.Rename(logDir, filepath.Join(os.Getenv("AZCOPY_E2E_LOG_OUTPUT"), s.state.result.jobID.String())), equals(), nil)
}

func (s *scenario) runHook(h hookFunc) bool {
if h == nil {
return true // nothing to do. So "successful"
Expand Down Expand Up @@ -195,7 +228,7 @@ func (s *scenario) assignSourceAndDest() {
s.state.dest = createTestResource(s.fromTo.To(), false)
}

func (s *scenario) runAzCopy() {
func (s *scenario) runAzCopy(logDirectory string) {
s.chToStdin = make(chan string) // unubuffered seems the most predictable for our usages
defer close(s.chToStdin)

Expand Down Expand Up @@ -223,9 +256,9 @@ func (s *scenario) runAzCopy() {
result, wasClean, err := r.ExecuteAzCopyCommand(
s.operation,
s.state.source.getParam(s.stripTopDir, needsSAS(s.credTypes[0]), tf.objectTarget),
s.state.dest.getParam(false, needsSAS(s.credTypes[1]), common.IffString(tf.destTarget != "", tf.destTarget, tf.objectTarget)),
s.credTypes[0].IsAzureOAuth() || s.credTypes[1].IsAzureOAuth(), // needsOAuth
afterStart, s.chToStdin)
s.state.dest.getParam(false, needsSAS(s.credTypes[1]), common.IffString(tf.destTarget != "", tf.destTarget, tf.objectTarget)),
s.credTypes[0] == common.ECredentialType.OAuthToken() || s.credTypes[1] == common.ECredentialType.OAuthToken(), // needsOAuth
afterStart, s.chToStdin, logDirectory)

if !wasClean {
s.a.AssertNoErr(err, "running AzCopy")
Expand All @@ -243,7 +276,7 @@ func (s *scenario) runAzCopy() {
s.state.result = &result
}

func (s *scenario) resumeAzCopy() {
func (s *scenario) resumeAzCopy(logDir string) {
s.chToStdin = make(chan string) // unubuffered seems the most predictable for our usages
defer close(s.chToStdin)

Expand Down Expand Up @@ -274,6 +307,7 @@ func (s *scenario) resumeAzCopy() {
false,
afterStart,
s.chToStdin,
logDir,
)

if !wasClean {
Expand All @@ -295,7 +329,7 @@ func (s *scenario) validateRemove() {
}
}
}
func (s *scenario) validateTransferStates() {
func (s *scenario) validateTransferStates(azcopyDir string) {
if s.operation == eOperation.Remove() {
s.validateRemove()
return
Expand All @@ -318,7 +352,7 @@ func (s *scenario) validateTransferStates() {
// Is that OK? (Not sure what to do if it's not, because azcopy jobs show, apparently doesn't offer us a way to get the skipped list)
} {
expectedTransfers := s.fs.getForStatus(statusToTest, expectFolders, expectRootFolder)
actualTransfers, err := s.state.result.GetTransferList(statusToTest)
actualTransfers, err := s.state.result.GetTransferList(statusToTest, azcopyDir)
s.a.AssertNoErr(err)

Validator{}.ValidateCopyTransfersAreScheduled(s.a, isSrcEncoded, isDstEncoded, srcRoot, dstRoot, expectedTransfers, actualTransfers, statusToTest, s.FromTo(), s.srcAccountType, s.destAccountType)
Expand Down
22 changes: 17 additions & 5 deletions e2etest/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"

Expand Down Expand Up @@ -184,7 +185,7 @@ func (t *TestRunner) execDebuggableWithOutput(name string, args []string, env []
return stdout.Bytes(), runErr
}

func (t *TestRunner) ExecuteAzCopyCommand(operation Operation, src, dst string, needsOAuth bool, afterStart func() string, chToStdin <-chan string) (CopyOrSyncCommandResult, bool, error) {
func (t *TestRunner) ExecuteAzCopyCommand(operation Operation, src, dst string, needsOAuth bool, afterStart func() string, chToStdin <-chan string, logDir string) (CopyOrSyncCommandResult, bool, error) {
capLen := func(b []byte) []byte {
if len(b) < 1024 {
return b
Expand Down Expand Up @@ -234,6 +235,11 @@ func (t *TestRunner) ExecuteAzCopyCommand(operation Operation, src, dst string,
}
}

if logDir != "" {
env = append(env, "AZCOPY_LOG_LOCATION="+logDir)
env = append(env, "AZCOPY_JOB_PLAN_LOCATION="+filepath.Join(logDir, "plans"))
}

out, err := t.execDebuggableWithOutput(GlobalInputManager{}.GetExecutablePath(), args, env, afterStart, chToStdin)

wasClean := true
Expand Down Expand Up @@ -268,9 +274,15 @@ func (t *TestRunner) SetTransferStatusFlag(value string) {
t.flags["with-status"] = value
}

func (t *TestRunner) ExecuteJobsShowCommand(jobID common.JobID) (JobsShowCommandResult, error) {
func (t *TestRunner) ExecuteJobsShowCommand(jobID common.JobID, azcopyDir string) (JobsShowCommandResult, error) {
args := append([]string{"jobs", "show", jobID.String()}, t.computeArgs()...)
out, err := exec.Command(GlobalInputManager{}.GetExecutablePath(), args...).Output()
cmd := exec.Command(GlobalInputManager{}.GetExecutablePath(), args...)

if azcopyDir != "" {
cmd.Env = append(cmd.Env, "AZCOPY_JOB_PLAN_LOCATION="+filepath.Join(azcopyDir, "plans"))
}

out, err := cmd.Output()
if err != nil {
return JobsShowCommandResult{}, err
}
Expand Down Expand Up @@ -307,12 +319,12 @@ func newCopyOrSyncCommandResult(rawOutput string) (CopyOrSyncCommandResult, bool
return CopyOrSyncCommandResult{jobID: jobSummary.JobID, finalStatus: jobSummary}, true
}

func (c *CopyOrSyncCommandResult) GetTransferList(status common.TransferStatus) ([]common.TransferDetail, error) {
func (c *CopyOrSyncCommandResult) GetTransferList(status common.TransferStatus, azcopyDir string) ([]common.TransferDetail, error) {
runner := newTestRunner()
runner.SetTransferStatusFlag(status.String())

// invoke AzCopy to get the status from the plan files
result, err := runner.ExecuteJobsShowCommand(c.jobID)
result, err := runner.ExecuteJobsShowCommand(c.jobID, azcopyDir)
if err != nil {
return make([]common.TransferDetail, 0), err
}
Expand Down
5 changes: 3 additions & 2 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,13 @@ func main() {
// the user can optionally put the plan files somewhere else
if azcopyJobPlanFolder == "" {
// make the app path folder ".azcopy" first so we can make a plans folder in it
if err := os.Mkdir(azcopyAppPathFolder, os.ModeDir); err != nil && !os.IsExist(err) {
if err := os.MkdirAll(azcopyAppPathFolder, os.ModeDir); err != nil && !os.IsExist(err) {
common.PanicIfErr(err)
}
azcopyJobPlanFolder = path.Join(azcopyAppPathFolder, "plans")
}
if err := os.Mkdir(azcopyJobPlanFolder, os.ModeDir|os.ModePerm); err != nil && !os.IsExist(err) {

if err := os.MkdirAll(azcopyJobPlanFolder, os.ModeDir|os.ModePerm); err != nil && !os.IsExist(err) {
log.Fatalf("Problem making .azcopy directory. Try setting AZCOPY_PLAN_FILE_LOCATION env variable. %v", err)
}

Expand Down
5 changes: 2 additions & 3 deletions ste/JobPartPlan.go
Original file line number Diff line number Diff line change
Expand Up @@ -409,9 +409,8 @@ func (jppt *JobPartPlanTransfer) SetTransferStatus(status common.TransferStatus,
if !overWrite {
common.AtomicMorphInt32((*int32)(&jppt.atomicTransferStatus),
func(startVal int32) (val int32, morphResult interface{}) {
// start value < 0 means that transfer status is already a failed value.
// If current transfer status has already failed value, then it will not be changed.
return common.Iffint32(startVal < 0, startVal, int32(status)), nil
// If current transfer status has some completed value, then it will not be changed.
return common.Iffint32(common.TransferStatus(startVal).StatusLocked(), startVal, int32(status)), nil
})
} else {
(&jppt.atomicTransferStatus).AtomicStore(status)
Expand Down