Skip to content

Commit

Permalink
add reproducer to hashicorp/nomad#14850
Browse files Browse the repository at this point in the history
  • Loading branch information
Pavel Valodzka committed Oct 17, 2022
1 parent 41c677b commit 749f4bf
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 11 deletions.
43 changes: 43 additions & 0 deletions detect.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/ruby

require 'open-uri'
require 'net/http'
require 'json'

ORIGIN = 'http://localhost:4646/'

def request(cmd)
JSON.parse(URI.open("#{ORIGIN}v1/#{cmd}").read)
end

def detect!(allocs)
out = []
index_collisions_cont = Hash.new{|hsh, key|
hsh[key] = Hash.new{|h,k| h[k] = [] }
}

max_version = allocs.map{|j| j['JobVersion'] }.max
allocs.each do |j|
client_status = j['ClientStatus']
next if client_status == 'complete'
id, version, name = j['ID'], j['JobVersion'], j['Name']
if client_status == 'running'
index_collisions_cont[version][name] << id
end
end

has_collisions = false
index_collisions_cont.each do |ver, index_collisions|
index_collisions.each do |name, ids|
next if ids.size == 1
puts("Collision detected:" + " #{ver.to_s} #{name}: #{ids.join(', ')}")
has_collisions = true
end
end

exit 1 if has_collisions
end

id = 'fail'
detect!(request("job/#{id}/allocations"))

11 changes: 11 additions & 0 deletions reproduce_14850.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash -x

# I don't know if it matters, but cluster with the issue runs with a spread scheduler.
curl -XPUT localhost:4646/v1/operator/scheduler/configuration --data '{"SchedulerAlgorithm":"spread"}'

for i in {1..500}
do
nomad job run -var redeploy=$i test.nomad
ruby detect.rb || exit 1
sleep 5
done
43 changes: 32 additions & 11 deletions test.nomad
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
variable "redeploy" {
type = string
}

job "fail" {
datacenters = ["toronto"]

constraint {
attribute = "${attr.unique.consul.name}"
operator = "regexp"
value = "^(nomad-a-3)$"
}

# constraint {
# attribute = "${attr.unique.consul.name}"
# operator = "regexp"
# value = "^(nomad-a-3)$"
# }

update {
healthy_deadline = "30s"
Expand All @@ -14,16 +19,30 @@ job "fail" {
}

meta {
redeploy = 1
redeploy = "${var.redeploy}"
}

group "fail-failed" {
count = "1"
count = "50"

update {
max_parallel = 50
}

network {
port "http" {
to = 8080
}
port "test1" { }
port "test2" { }
port "test3" { }
port "test4" { }
port "test5" { }
port "test6" { }
port "test7" { }
port "test8" { }
port "test9" { }
port "test0" { }
}
service {
port = "http"
Expand All @@ -48,15 +67,17 @@ job "fail" {
ports = ["http"]
}
resources {
cores = 1
#cores = 1
memory = 64
cpu = 64
}

env {
# unhealhy config
HEALTHY_FOR = 60
UNHEALTHY_FOR = -1
#HEALTHY_FOR = 60
#UNHEALTHY_FOR = -1
# healthy config
# HEALTHY_FOR = -1
HEALTHY_FOR = -1
}
}
}
Expand Down

0 comments on commit 749f4bf

Please sign in to comment.