From a856698a32554f33cc575b85025c709657dd9ae4 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Mon, 10 Oct 2022 14:34:40 -0500 Subject: [PATCH] servicedisco: implicit constraint for nomad v1.4 when using nsd checks This PR adds a jobspec mutator to constrain jobs making use of checks in the nomad service provider to nomad clients of at least v1.4.0. Before, in a mixed client version cluster it was possible to submit an NSD job making use of checks and for that job to land on an older, incompatible client node. Closes #14862 --- .changelog/14868.txt | 3 ++ nomad/job_endpoint_hooks.go | 36 +++++++++++++----- nomad/job_endpoint_hooks_test.go | 4 +- nomad/structs/job.go | 49 +++++++++++++++--------- nomad/structs/job_test.go | 65 +++++++++++++++++++++++++------- 5 files changed, 114 insertions(+), 43 deletions(-) create mode 100644 .changelog/14868.txt diff --git a/.changelog/14868.txt b/.changelog/14868.txt new file mode 100644 index 00000000000..89e3ae693d2 --- /dev/null +++ b/.changelog/14868.txt @@ -0,0 +1,3 @@ +```release-note:bug +servicedisco: Fixed a bug where job using checks could land on incompatible client +``` diff --git a/nomad/job_endpoint_hooks.go b/nomad/job_endpoint_hooks.go index 3abadbd2606..1fe5dbe9f1f 100644 --- a/nomad/job_endpoint_hooks.go +++ b/nomad/job_endpoint_hooks.go @@ -3,23 +3,23 @@ package nomad import ( "fmt" - multierror "github.com/hashicorp/go-multierror" + "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" ) const ( - // vaultConstraintLTarget is the lefthand side of the Vault constraint - // injected when Vault policies are used. If an existing constraint - // with this target exists it overrides the injected constraint. - vaultConstraintLTarget = "${attr.vault.version}" + attrVaultVersion = `${attr.vault.version}` + attrConsulVersion = `${attr.consul.version}` + attrNomadVersion = `${attr.nomad.version}` + attrNomadServiceDisco = `${attr.nomad.service_discovery}` ) var ( // vaultConstraint is the implicit constraint added to jobs requesting a // Vault token vaultConstraint = &structs.Constraint{ - LTarget: vaultConstraintLTarget, + LTarget: attrVaultVersion, RTarget: ">= 0.6.1", Operand: structs.ConstraintSemver, } @@ -29,7 +29,7 @@ var ( // Consul version is pinned to a minimum of that which introduced the // namespace feature. consulServiceDiscoveryConstraint = &structs.Constraint{ - LTarget: "${attr.consul.version}", + LTarget: attrConsulVersion, RTarget: ">= 1.7.0", Operand: structs.ConstraintSemver, } @@ -40,10 +40,21 @@ var ( // we need to ensure task groups are placed where they can run // successfully. nativeServiceDiscoveryConstraint = &structs.Constraint{ - LTarget: "${attr.nomad.service_discovery}", + LTarget: attrNomadServiceDisco, RTarget: "true", Operand: "=", } + + // nativeServiceDiscoveryChecksConstraint is the constraint injected into task + // groups that utilize Nomad's native service discovery checks feature. This + // is needed, as operators can have versions of Nomad pre-v1.4 mixed into a + // cluster with v1.4 servers, causing jobs to be placed on incompatible + // clients. + nativeServiceDiscoveryChecksConstraint = &structs.Constraint{ + LTarget: attrNomadVersion, + RTarget: ">= 1.4.0", + Operand: structs.ConstraintSemver, + } ) type admissionController interface { @@ -149,7 +160,7 @@ func (jobImpliedConstraints) Mutate(j *structs.Job) (*structs.Job, []error, erro // Hot path if len(signals) == 0 && len(vaultBlocks) == 0 && - len(nativeServiceDisco) == 0 && len(consulServiceDisco) == 0 { + nativeServiceDisco.Empty() && len(consulServiceDisco) == 0 { return j, nil, nil } @@ -173,10 +184,15 @@ func (jobImpliedConstraints) Mutate(j *structs.Job) (*structs.Job, []error, erro } // If the task group utilises Nomad service discovery, run the mutator. - if ok := nativeServiceDisco[tg.Name]; ok { + if nativeServiceDisco.Basic.Contains(tg.Name) { mutateConstraint(constraintMatcherFull, tg, nativeServiceDiscoveryConstraint) } + // If the task group utilizes NSD checks, run the mutator. + if nativeServiceDisco.Checks.Contains(tg.Name) { + mutateConstraint(constraintMatcherFull, tg, nativeServiceDiscoveryChecksConstraint) + } + // If the task group utilises Consul service discovery, run the mutator. if ok := consulServiceDisco[tg.Name]; ok { mutateConstraint(constraintMatcherLeft, tg, consulServiceDiscoveryConstraint) diff --git a/nomad/job_endpoint_hooks_test.go b/nomad/job_endpoint_hooks_test.go index 9d84fcad916..67ee2971d0b 100644 --- a/nomad/job_endpoint_hooks_test.go +++ b/nomad/job_endpoint_hooks_test.go @@ -203,7 +203,7 @@ func Test_jobImpliedConstraints_Mutate(t *testing.T) { }, Constraints: []*structs.Constraint{ { - LTarget: vaultConstraintLTarget, + LTarget: attrVaultVersion, RTarget: ">= 1.0.0", Operand: structs.ConstraintSemver, }, @@ -224,7 +224,7 @@ func Test_jobImpliedConstraints_Mutate(t *testing.T) { }, Constraints: []*structs.Constraint{ { - LTarget: vaultConstraintLTarget, + LTarget: attrVaultVersion, RTarget: ">= 1.0.0", Operand: structs.ConstraintSemver, }, diff --git a/nomad/structs/job.go b/nomad/structs/job.go index 1ab6373531a..e1950d48cb7 100644 --- a/nomad/structs/job.go +++ b/nomad/structs/job.go @@ -1,5 +1,9 @@ package structs +import ( + "github.com/hashicorp/go-set" +) + const ( // JobServiceRegistrationsRPCMethod is the RPC method for listing all // service registrations assigned to a specific namespaced job. @@ -23,42 +27,53 @@ type JobServiceRegistrationsResponse struct { QueryMeta } +// NativeServiceDiscoveryUsage tracks which groups make use of the nomad service +// discovery provider, and also which of those groups make use of checks. This +// information will be used to configure implicit constraints on the job. +type NativeServiceDiscoveryUsage struct { + Basic *set.Set[string] // implies v1.3.0 + ${attr.nomad.service_discovery} + Checks *set.Set[string] // implies v1.4.0 +} + +// Empty returns true if no groups are using native service discovery. +func (u *NativeServiceDiscoveryUsage) Empty() bool { + return u.Basic.Size() == 0 && u.Checks.Size() == 0 +} + // RequiredNativeServiceDiscovery identifies which task groups, if any, within // the job are utilising Nomad native service discovery. -func (j *Job) RequiredNativeServiceDiscovery() map[string]bool { - groups := make(map[string]bool) +func (j *Job) RequiredNativeServiceDiscovery() *NativeServiceDiscoveryUsage { + basic := set.New[string](10) + checks := set.New[string](10) for _, tg := range j.TaskGroups { - // It is possible for services using the Nomad provider to be - // configured at the task group level, so check here first. - if requiresNativeServiceDiscovery(tg.Services) { - groups[tg.Name] = true - continue - } + // configured at the group level, so check here first. + requiresNativeServiceDiscovery(tg.Name, tg.Services, basic, checks) // Iterate the tasks within the task group to check the services // configured at this more traditional level. for _, task := range tg.Tasks { - if requiresNativeServiceDiscovery(task.Services) { - groups[tg.Name] = true - continue - } + requiresNativeServiceDiscovery(tg.Name, task.Services, basic, checks) } } - - return groups + return &NativeServiceDiscoveryUsage{ + Basic: basic, + Checks: checks, + } } // requiresNativeServiceDiscovery identifies whether any of the services passed // to the function are utilising Nomad native service discovery. -func requiresNativeServiceDiscovery(services []*Service) bool { +func requiresNativeServiceDiscovery(group string, services []*Service, basic, checks *set.Set[string]) { for _, tgService := range services { if tgService.Provider == ServiceProviderNomad { - return true + basic.Insert(group) + if len(tgService.Checks) > 0 { + checks.Insert(group) + } } } - return false } // RequiredConsulServiceDiscovery identifies which task groups, if any, within diff --git a/nomad/structs/job_test.go b/nomad/structs/job_test.go index fc32584fd29..3d14f5324c8 100644 --- a/nomad/structs/job_test.go +++ b/nomad/structs/job_test.go @@ -3,6 +3,8 @@ package structs import ( "testing" + "github.com/hashicorp/go-set" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -13,11 +15,13 @@ func TestServiceRegistrationsRequest_StaleReadSupport(t *testing.T) { func TestJob_RequiresNativeServiceDiscovery(t *testing.T) { testCases := []struct { - inputJob *Job - expectedOutput map[string]bool - name string + name string + inputJob *Job + expBasic []string + expChecks []string }{ { + name: "multiple group services with Nomad provider", inputJob: &Job{ TaskGroups: []*TaskGroup{ { @@ -36,10 +40,40 @@ func TestJob_RequiresNativeServiceDiscovery(t *testing.T) { }, }, }, - expectedOutput: map[string]bool{"group1": true, "group2": true}, - name: "multiple group services with Nomad provider", + expBasic: []string{"group1", "group2"}, + expChecks: nil, + }, + { + name: "multiple group services with Nomad provider with checks", + inputJob: &Job{ + TaskGroups: []*TaskGroup{ + { + Name: "group1", + Services: []*Service{ + {Provider: "nomad", Checks: []*ServiceCheck{{Name: "c1"}}}, + {Provider: "nomad"}, + }, + }, + { + Name: "group2", + Services: []*Service{ + {Provider: "nomad"}, + }, + }, + { + Name: "group3", + Services: []*Service{ + {Provider: "nomad"}, + {Provider: "nomad", Checks: []*ServiceCheck{{Name: "c2"}}}, + }, + }, + }, + }, + expBasic: []string{"group1", "group2", "group3"}, + expChecks: []string{"group1", "group3"}, }, { + name: "multiple task services with Nomad provider", inputJob: &Job{ TaskGroups: []*TaskGroup{ { @@ -71,17 +105,18 @@ func TestJob_RequiresNativeServiceDiscovery(t *testing.T) { { Services: []*Service{ {Provider: "nomad"}, - {Provider: "nomad"}, + {Provider: "nomad", Checks: []*ServiceCheck{{Name: "c1"}}}, }, }, }, }, }, }, - expectedOutput: map[string]bool{"group1": true, "group2": true}, - name: "multiple task services with Nomad provider", + expBasic: []string{"group1", "group2"}, + expChecks: []string{"group2"}, }, { + name: "multiple group services with Consul provider", inputJob: &Job{ TaskGroups: []*TaskGroup{ { @@ -100,10 +135,11 @@ func TestJob_RequiresNativeServiceDiscovery(t *testing.T) { }, }, }, - expectedOutput: map[string]bool{}, - name: "multiple group services with Consul provider", + expBasic: nil, + expChecks: nil, }, { + name: "multiple task services with Consul provider", inputJob: &Job{ TaskGroups: []*TaskGroup{ { @@ -142,15 +178,16 @@ func TestJob_RequiresNativeServiceDiscovery(t *testing.T) { }, }, }, - expectedOutput: map[string]bool{}, - name: "multiple task services with Consul provider", + expBasic: nil, + expChecks: nil, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - actualOutput := tc.inputJob.RequiredNativeServiceDiscovery() - require.Equal(t, tc.expectedOutput, actualOutput) + nsdUsage := tc.inputJob.RequiredNativeServiceDiscovery() + must.Equal(t, set.From(tc.expBasic), nsdUsage.Basic) + must.Equal(t, set.From(tc.expChecks), nsdUsage.Checks) }) } }