Skip to content

Commit

Permalink
ephemeral disk: migrate should imply sticky (#16826)
Browse files Browse the repository at this point in the history
The `ephemeral_disk` block's `migrate` field allows for best-effort migration of
the ephemeral disk data to new nodes. The documentation says the `migrate` field
is only respected if `sticky=true`, but in fact if client ACLs are not set the
data is migrated even if `sticky=false`.

The existing behavior when client ACLs are disabled has existed since the early
implementation, so "fixing" that case now would silently break backwards
compatibility. Additionally, having `migrate` not imply `sticky` seems
nonsensical: it suggests that if we place on a new node we migrate the data but
if we place on the same node, we throw the data away!

Update so that `migrate=true` implies `sticky=true` as follows:

* The failure mode when client ACLs are enabled comes from the server not passing
  along a migration token. Update the server so that the server provides a
  migration token whenever `migrate=true` and not just when `sticky=true` too.
* Update the scheduler so that `migrate` implies `sticky`.
* Update the client so that we check for `migrate || sticky` where appropriate.
* Refactor the E2E tests to move them off the old framework and make the intention
  of the test more clear.
  • Loading branch information
tgross authored Apr 7, 2023
1 parent bca03b6 commit b8a472d
Show file tree
Hide file tree
Showing 9 changed files with 161 additions and 175 deletions.
3 changes: 3 additions & 0 deletions .changelog/16826.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
ephemeral disk: migrate=true now implies sticky=true
```
2 changes: 1 addition & 1 deletion client/allocwatcher/alloc_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ func newMigratorForAlloc(c Config, tg *structs.TaskGroup, watchedAllocID string,
logger := c.Logger.Named("alloc_migrator").With("alloc_id", c.Alloc.ID).With("previous_alloc", watchedAllocID)

tasks := tg.Tasks
sticky := tg.EphemeralDisk != nil && tg.EphemeralDisk.Sticky
migrate := tg.EphemeralDisk != nil && tg.EphemeralDisk.Migrate
sticky := tg.EphemeralDisk != nil && (tg.EphemeralDisk.Sticky || migrate)

if m != nil {
// Local Allocation because there's an alloc runner
Expand Down
18 changes: 1 addition & 17 deletions e2e/nodedrain/input/drain_migrate.nomad
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
job "drain_migrate" {
datacenters = ["dc1", "dc2"]

constraint {
attribute = "${attr.kernel.name}"
Expand All @@ -19,22 +18,7 @@ job "drain_migrate" {
config {
image = "busybox:1"
command = "/bin/sh"
args = ["local/test.sh"]
}

template {
data = <<EOT
#!/bin/sh
if [ ! -f /alloc/data/{{ env "NOMAD_JOB_NAME" }} ]; then
echo writing {{ env "NOMAD_ALLOC_ID" }} to /alloc/data/{{ env "NOMAD_JOB_NAME" }}
echo {{ env "NOMAD_ALLOC_ID" }} > /alloc/data/{{ env "NOMAD_JOB_NAME" }}
else
echo /alloc/data/{{ env "NOMAD_JOB_NAME" }} already exists
fi
sleep 3600
EOT

destination = "local/test.sh"
args = ["-c", "echo \"data from $NOMAD_ALLOC_ID\" >> /alloc/data/migrate.txt && sleep 120"]
}

resources {
Expand Down
67 changes: 66 additions & 1 deletion e2e/nodedrain/node_drain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package nodedrain
import (
"fmt"
"os"
"strings"
"testing"
"time"

Expand All @@ -24,6 +25,7 @@ func TestNodeDrain(t *testing.T) {
e2eutil.WaitForNodesReady(t, nomadClient, 2) // needs at least 2 to test migration

t.Run("IgnoreSystem", testIgnoreSystem)
t.Run("EphemeralMigrate", testEphemeralMigrate)
t.Run("KeepIneligible", testKeepIneligible)
}

Expand Down Expand Up @@ -78,6 +80,69 @@ func testIgnoreSystem(t *testing.T) {
}
}

// testEphemeralMigrate tests that ephermeral_disk migrations work as expected
// even during a node drain.
func testEphemeralMigrate(t *testing.T) {

t.Cleanup(cleanupDrainState(t))

nomadClient := e2eutil.NomadClient(t)
jobID := "drain-migrate-" + uuid.Short()

allocs := registerAndWaitForRunning(t, nomadClient, jobID, "./input/drain_migrate.nomad", 1)
t.Cleanup(cleanupJobState(t, jobID))
oldAllocID := allocs[0].ID
oldNodeID := allocs[0].NodeID

// make sure the allocation has written its ID to disk so we have something to migrate
must.Wait(t, wait.InitialSuccess(
wait.ErrorFunc(func() error {
got, err := e2eutil.Command("nomad", "alloc", "fs", oldAllocID,
"alloc/data/migrate.txt")
if err != nil {
return fmt.Errorf("did not expect error reading alloc fs: %v", err)
}
if !strings.Contains(got, oldAllocID) {
return fmt.Errorf("expected data to be written for alloc %q", oldAllocID)
}
return nil
}),
wait.Timeout(10*time.Second),
wait.Gap(500*time.Millisecond),
))

out, err := e2eutil.Command("nomad", "node", "drain", "-enable", "-yes", "-detach", oldNodeID)
must.NoError(t, err, must.Sprintf("expected no error when marking node for drain: %v", out))

newAllocs := waitForAllocDrain(t, nomadClient, jobID, oldAllocID, oldNodeID)
must.Len(t, 1, newAllocs, must.Sprint("expected 1 new alloc"))
newAllocID := newAllocs[0].ID
newNodeID := newAllocs[0].NodeID

// although migrate=true implies sticky=true, the drained node is ineligible
// for scheduling so the alloc should have been migrated
must.NotEq(t, oldNodeID, newNodeID, must.Sprint("new alloc was placed on draining node"))

// once the new allocation is running, it should quickly have the right data
must.Wait(t, wait.InitialSuccess(
wait.ErrorFunc(func() error {
got, err := e2eutil.Command("nomad", "alloc", "fs", newAllocID,
"alloc/data/migrate.txt")
if err != nil {
return fmt.Errorf("did not expect error reading alloc fs: %v", err)
}
if !strings.Contains(got, oldAllocID) || !strings.Contains(got, newAllocID) {
return fmt.Errorf(
"expected data to be migrated from alloc=%s on node=%s to alloc=%s on node=%s but got:\n%q",
oldAllocID[:8], oldNodeID[:8], newAllocID[:8], newNodeID[:8], got)
}
return nil
}),
wait.Timeout(10*time.Second),
wait.Gap(500*time.Millisecond),
))
}

// testKeepIneligible tests that nodes can be kept ineligible for scheduling after
// disabling drain.
func testKeepIneligible(t *testing.T) {
Expand Down Expand Up @@ -172,7 +237,7 @@ func waitForAllocDrain(t *testing.T, nomadClient *api.Client, jobID, oldAllocID,
}
}
t.Logf("alloc has drained from node=%s after %v",
oldNodeID, time.Now().Sub(start))
oldNodeID[:8], time.Now().Sub(start))
return nil
}),
wait.Timeout(120*time.Second),
Expand Down
68 changes: 0 additions & 68 deletions e2e/nodedrain/nodedrain.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package nodedrain
import (
"fmt"
"os"
"strings"
"time"

e2e "github.com/hashicorp/nomad/e2e/e2eutil"
Expand Down Expand Up @@ -95,73 +94,6 @@ func waitForNodeDrain(nodeID string, comparison func([]map[string]string) bool,
return err
}

// TestNodeDrainEphemeralMigrate tests that ephermeral_disk migrations work as
// expected even during a node drain.
func (tc *NodeDrainE2ETest) TestNodeDrainEphemeralMigrate(f *framework.F) {
jobID := "test-node-drain-" + uuid.Generate()[0:8]
f.NoError(e2e.Register(jobID, "nodedrain/input/drain_migrate.nomad"))
tc.jobIDs = append(tc.jobIDs, jobID)

expected := []string{"running"}
f.NoError(e2e.WaitForAllocStatusExpected(jobID, ns, expected), "job should be running")

allocs, err := e2e.AllocsForJob(jobID, ns)
f.NoError(err, "could not get allocs for job")
f.Len(allocs, 1, "could not get allocs for job")
oldAllocID := allocs[0]["ID"]

nodes, err := nodesForJob(jobID)
f.NoError(err, "could not get nodes for job")
f.Len(nodes, 1, "could not get nodes for job")
nodeID := nodes[0]

out, err := e2e.Command("nomad", "node", "drain", "-enable", "-yes", "-detach", nodeID)
f.NoError(err, fmt.Sprintf("'nomad node drain' failed: %v\n%v", err, out))
tc.nodeIDs = append(tc.nodeIDs, nodeID)

f.NoError(waitForNodeDrain(nodeID,
func(got []map[string]string) bool {
for _, alloc := range got {
if alloc["ID"] == oldAllocID && alloc["Status"] == "complete" {
return true
}
}
return false
}, &e2e.WaitConfig{Interval: time.Millisecond * 100, Retries: 500},
), "node did not drain")

// wait for the allocation to be migrated
expected = []string{"running", "complete"}
f.NoError(e2e.WaitForAllocStatusExpected(jobID, ns, expected), "job should be running")

allocs, err = e2e.AllocsForJob(jobID, ns)
f.NoError(err, "could not get allocations for job")

// the task writes its alloc ID to a file if it hasn't been previously
// written, so find the contents of the migrated file and make sure they
// match the old allocation, not the running one
var got string
var fsErr error
testutil.WaitForResultRetries(10, func() (bool, error) {
time.Sleep(time.Millisecond * 100)
for _, alloc := range allocs {
if alloc["Status"] == "running" && alloc["Node ID"] != nodeID && alloc["ID"] != oldAllocID {
got, fsErr = e2e.Command("nomad", "alloc", "fs",
alloc["ID"], fmt.Sprintf("alloc/data/%s", jobID))
if err != nil {
return false, err
}
return true, nil
}
}
return false, fmt.Errorf("missing expected allocation")
}, func(e error) {
fsErr = e
})
f.NoError(fsErr, "could not get allocation data")
f.Equal(oldAllocID, strings.TrimSpace(got), "node drained but migration failed")
}

// TestNodeDrainDeadline tests the enforcement of the node drain deadline so
// that allocations are terminated even if they haven't gracefully exited.
func (tc *NodeDrainE2ETest) TestNodeDrainDeadline(f *framework.F) {
Expand Down
9 changes: 2 additions & 7 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -10634,13 +10634,8 @@ func (a *Allocation) ShouldMigrate() bool {
return false
}

// We won't migrate any data is the user hasn't enabled migration or the
// disk is not marked as sticky
if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky {
return false
}

return true
// We won't migrate any data if the user hasn't enabled migration
return tg.EphemeralDisk.Migrate
}

// SetEventDisplayMessages populates the display message if its not already set,
Expand Down
Loading

0 comments on commit b8a472d

Please sign in to comment.