From 281c31e6e576a6648617b4ba8a4d3b3befd6ae2a Mon Sep 17 00:00:00 2001 From: Aaron Lehmann Date: Mon, 13 Mar 2017 13:54:43 -0700 Subject: [PATCH] scheduler: Fix accounting when task ends up in multiple groups Tasks are split between task groups based on common specs. This allows nodes to only be ranked once per group, not once per task. This logic doesn't work correctly because maps are marshalled in a random order. Currently, the same task can end up in a multiple groups (say, if it's updated multiple times, and the marshalling ends up being different). Add a check to make sure we don't try to schedule the same task twice within the same batch, which can cause resource accounting to be wrong. Note this doesn't fix the brokenness of task spec deduplication based on marshalling the protobuf. This is a fix for the symptom that can be backported, and I'm going to replace the marshalling stuff in a different PR. Signed-off-by: Aaron Lehmann --- manager/scheduler/scheduler.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/manager/scheduler/scheduler.go b/manager/scheduler/scheduler.go index 55420a2d42..e79c0f0532 100644 --- a/manager/scheduler/scheduler.go +++ b/manager/scheduler/scheduler.go @@ -602,6 +602,12 @@ func (s *Scheduler) scheduleNTasksOnNodes(ctx context.Context, n int, taskGroup nodeIter := 0 nodeCount := len(nodes) for taskID, t := range taskGroup { + // Skip tasks which were already scheduled because they ended + // up in two groups at once. + if _, exists := schedulingDecisions[taskID]; exists { + continue + } + node := &nodes[nodeIter%nodeCount] log.G(ctx).WithField("task.id", t.ID).Debugf("assigning to node %s", node.ID)