From e0d6ede41d1f4e6513656875672b297467199a7a Mon Sep 17 00:00:00 2001 From: Gabi Date: Thu, 11 Apr 2024 13:49:56 -0500 Subject: [PATCH] fix exhausted node metrics reporting in preemption (#20346) --- .changelog/20346.txt | 3 ++ scheduler/rank.go | 2 + scheduler/rank_test.go | 110 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 .changelog/20346.txt diff --git a/.changelog/20346.txt b/.changelog/20346.txt new file mode 100644 index 000000000000..486a634f618b --- /dev/null +++ b/.changelog/20346.txt @@ -0,0 +1,3 @@ +```release-note:improvement +scheduler: Record exhausted node metrics for devices when preemption fails to find an allocation to evict +``` diff --git a/scheduler/rank.go b/scheduler/rank.go index a5fab803f0cf..dd7d1ac3bea8 100644 --- a/scheduler/rank.go +++ b/scheduler/rank.go @@ -444,6 +444,7 @@ OUTER: if devicePreemptions == nil { iter.ctx.Logger().Named("binpack").Debug("preemption not possible", "requested_device", req) + iter.ctx.Metrics().ExhaustedNode(option.Node, fmt.Sprintf("devices: %s", err)) netIdx.Release() continue OUTER } @@ -460,6 +461,7 @@ OUTER: offer, sumAffinities, err = devAllocator.AssignDevice(req) if offer == nil { iter.ctx.Logger().Named("binpack").Debug("unexpected error, unable to create device offer after considering preemption", "error", err) + iter.ctx.Metrics().ExhaustedNode(option.Node, fmt.Sprintf("devices: %s", err)) continue OUTER } } diff --git a/scheduler/rank_test.go b/scheduler/rank_test.go index a196ea8f38d6..6a810fdc79ba 100644 --- a/scheduler/rank_test.go +++ b/scheduler/rank_test.go @@ -1965,6 +1965,116 @@ func TestBinPackIterator_Devices(t *testing.T) { } } +// Tests that bin packing iterator fails due to overprovisioning of devices +// This test has devices at task level +func TestBinPackIterator_Device_Failure_With_Eviction(t *testing.T) { + _, ctx := testContext(t) + nodes := []*RankedNode{ + { + Node: &structs.Node{ + NodeResources: &structs.NodeResources{ + Processors: processorResources4096, + Cpu: legacyCpuResources4096, + Memory: structs.NodeMemoryResources{ + MemoryMB: 4096, + }, + Networks: []*structs.NetworkResource{}, + Devices: []*structs.NodeDeviceResource{ + { + Vendor: "nvidia", + Type: "gpu", + Instances: []*structs.NodeDevice{ + { + ID: "1", + Healthy: true, + HealthDescription: "healthy", + Locality: &structs.NodeDeviceLocality{}, + }, + }, + Name: "SOME-GPU", + }, + }, + }, + ReservedResources: &structs.NodeReservedResources{ + Cpu: structs.NodeReservedCpuResources{ + CpuShares: 1024, + }, + Memory: structs.NodeReservedMemoryResources{ + MemoryMB: 1024, + }, + }, + }, + }, + } + + // Add a planned alloc that takes up a gpu + plan := ctx.Plan() + plan.NodeAllocation[nodes[0].Node.ID] = []*structs.Allocation{ + { + AllocatedResources: &structs.AllocatedResources{ + Tasks: map[string]*structs.AllocatedTaskResources{ + "web": { + Cpu: structs.AllocatedCpuResources{ + CpuShares: 2048, + }, + Memory: structs.AllocatedMemoryResources{ + MemoryMB: 2048, + }, + Networks: []*structs.NetworkResource{}, + Devices: []*structs.AllocatedDeviceResource{ + { + Vendor: "nvidia", + Type: "gpu", + Name: "SOME-GPU", + DeviceIDs: []string{"1"}, + }, + }, + }, + }, + Shared: structs.AllocatedSharedResources{}, + }, + }, + } + static := NewStaticRankIterator(ctx, nodes) + + // Create a task group with gpu device specified + taskGroup := &structs.TaskGroup{ + EphemeralDisk: &structs.EphemeralDisk{}, + Tasks: []*structs.Task{ + { + Name: "web", + Resources: &structs.Resources{ + CPU: 1024, + MemoryMB: 1024, + Networks: []*structs.NetworkResource{}, + Devices: structs.ResourceDevices{ + { + Name: "nvidia/gpu", + Count: 1, + }, + }, + }, + }, + }, + Networks: []*structs.NetworkResource{}, + } + + binp := NewBinPackIterator(ctx, static, true, 0) + binp.SetTaskGroup(taskGroup) + binp.SetSchedulerConfiguration(testSchedulerConfig) + + scoreNorm := NewScoreNormalizationIterator(ctx, binp) + + out := collectRanked(scoreNorm) + require := require.New(t) + + // We expect a placement failure because we need 1 GPU device + // and the other one is taken + + require.Len(out, 0) + require.Equal(1, ctx.metrics.DimensionExhausted["devices: no devices match request"]) +} + func TestJobAntiAffinity_PlannedAlloc(t *testing.T) { _, ctx := testContext(t) nodes := []*RankedNode{