diff --git a/client/alloc_runner_test.go b/client/alloc_runner_test.go index 9bdfcf9e83d..6ee1f033949 100644 --- a/client/alloc_runner_test.go +++ b/client/alloc_runner_test.go @@ -25,22 +25,24 @@ func (m *MockAllocStateUpdater) Update(alloc *structs.Allocation) { m.Allocs = append(m.Allocs, alloc) } -func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) { +func testAllocRunnerFromAlloc(alloc *structs.Allocation, restarts bool) (*MockAllocStateUpdater, *AllocRunner) { logger := testLogger() conf := config.DefaultConfig() conf.StateDir = os.TempDir() conf.AllocDir = os.TempDir() upd := &MockAllocStateUpdater{} - alloc := mock.Alloc() if !restarts { *alloc.Job.LookupTaskGroup(alloc.TaskGroup).RestartPolicy = structs.RestartPolicy{Attempts: 0} alloc.Job.Type = structs.JobTypeBatch } - ar := NewAllocRunner(logger, conf, upd.Update, alloc) return upd, ar } +func testAllocRunner(restarts bool) (*MockAllocStateUpdater, *AllocRunner) { + return testAllocRunnerFromAlloc(mock.Alloc(), restarts) +} + func TestAllocRunner_SimpleRun(t *testing.T) { ctestutil.ExecCompatible(t) upd, ar := testAllocRunner(false) @@ -61,6 +63,59 @@ func TestAllocRunner_SimpleRun(t *testing.T) { }) } +// TestAllocRuner_RetryArtifact ensures that if one task in a task group is +// retrying fetching an artifact, other tasks in the the group should be able +// to proceed. +func TestAllocRunner_RetryArtifact(t *testing.T) { + ctestutil.ExecCompatible(t) + + alloc := mock.Alloc() + alloc.Job.Type = structs.JobTypeBatch + alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 1 + alloc.Job.TaskGroups[0].RestartPolicy.Delay = time.Duration(4*testutil.TestMultiplier()) * time.Second + + // Create a new task with a bad artifact + badtask := alloc.Job.TaskGroups[0].Tasks[0].Copy() + badtask.Name = "bad" + badtask.Artifacts = []*structs.TaskArtifact{ + {GetterSource: "http://127.1.1.111:12315/foo/bar/baz"}, + } + + alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, badtask) + upd, ar := testAllocRunnerFromAlloc(alloc, true) + go ar.Run() + defer ar.Destroy() + + testutil.WaitForResult(func() (bool, error) { + if upd.Count < 6 { + return false, fmt.Errorf("Not enough updates") + } + last := upd.Allocs[upd.Count-1] + + // web task should have completed successfully while bad task + // retries artififact fetching + webstate := last.TaskStates["web"] + if webstate.State != structs.TaskStateDead { + return false, fmt.Errorf("expected web to be dead but found %q", last.TaskStates["web"].State) + } + if !webstate.Successful() { + return false, fmt.Errorf("expected web to have exited successfully") + } + + // bad task should have failed + badstate := last.TaskStates["bad"] + if badstate.State != structs.TaskStateDead { + return false, fmt.Errorf("expected bad to be dead but found %q", last.TaskStates["web"].State) + } + if !badstate.Failed() { + return false, fmt.Errorf("expected bad to have failed") + } + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) +} + func TestAllocRunner_TerminalUpdate_Destroy(t *testing.T) { ctestutil.ExecCompatible(t) upd, ar := testAllocRunner(false) diff --git a/client/task_runner.go b/client/task_runner.go index 0544a1eae94..76b19f28cfc 100644 --- a/client/task_runner.go +++ b/client/task_runner.go @@ -318,7 +318,7 @@ func (r *TaskRunner) run() { for _, artifact := range r.task.Artifacts { if err := getter.GetArtifact(r.taskEnv, artifact, taskDir); err != nil { - r.setState(structs.TaskStateDead, + r.setState(structs.TaskStatePending, structs.NewTaskEvent(structs.TaskArtifactDownloadFailed).SetDownloadError(err)) r.restartTracker.SetStartError(dstructs.NewRecoverableError(err, true)) goto RESTART