Skip to content

Commit

Permalink
Merge pull request #68 from hashicorp/autopilot-ent
Browse files Browse the repository at this point in the history
Autopilot enterprise features
  • Loading branch information
kyhavlov authored Jan 31, 2018
2 parents 306dd90 + fa541c9 commit dfbc075
Show file tree
Hide file tree
Showing 12 changed files with 605 additions and 92,928 deletions.
51 changes: 51 additions & 0 deletions nomad/autopilot_ent.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// +build ent

package nomad

import (
"fmt"

"github.com/hashicorp/consul/agent/consul/autopilot"
"github.com/hashicorp/consul/agent/consul/autopilot_ent"
"github.com/hashicorp/raft"
)

// AdvancedAutopilotDelegate defines a policy for promoting non-voting servers in a way
// that maintains an odd-numbered voter count while respecting configured redundancy
// zones, servers marked non-voter, and any upgrade migrations to perform.
type AdvancedAutopilotDelegate struct {
AutopilotDelegate

promoter *autopilot_ent.AdvancedPromoter
}

func (d *AdvancedAutopilotDelegate) PromoteNonVoters(conf *autopilot.Config, health autopilot.OperatorHealthReply) ([]raft.Server, error) {
minRaftProtocol, err := d.server.autopilot.MinRaftProtocol()
if err != nil {
return nil, fmt.Errorf("error getting server raft protocol versions: %s", err)
}

// If we don't meet the minimum version for non-voter features, bail early
if minRaftProtocol < 3 {
return nil, nil
}

return d.promoter.PromoteNonVoters(conf, health)
}

// getNodeMeta tries to fetch a node's metadata
func (s *Server) getNodeMeta(serverID raft.ServerID) (map[string]string, error) {
meta := make(map[string]string)
for _, member := range s.Members() {
ok, parts := isNomadServer(member)
if !ok || raft.ServerID(parts.ID) != serverID {
continue
}

meta[AutopilotRZTag] = member.Tags[AutopilotRZTag]
meta[AutopilotVersionTag] = member.Tags[AutopilotVersionTag]
break
}

return meta, nil
}
273 changes: 273 additions & 0 deletions nomad/autopilot_ent_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
package nomad

import (
"testing"
"time"

"github.com/hashicorp/consul/agent/consul/autopilot"
"github.com/hashicorp/consul/testutil/retry"
"github.com/hashicorp/nomad/testutil"
"github.com/hashicorp/raft"
"github.com/stretchr/testify/assert"
)

func TestAdvancedAutopilot_DesignateNonVoter(t *testing.T) {
assert := assert.New(t)
s1 := testServer(t, func(c *Config) {
c.RaftConfig.ProtocolVersion = 3
})
defer s1.Shutdown()

s2 := testServer(t, func(c *Config) {
c.DevDisableBootstrap = true
c.RaftConfig.ProtocolVersion = 3
})
defer s2.Shutdown()

s3 := testServer(t, func(c *Config) {
c.DevDisableBootstrap = true
c.NonVoter = true
c.RaftConfig.ProtocolVersion = 3
})
defer s3.Shutdown()

testutil.WaitForLeader(t, s1.RPC)

testJoin(t, s1, s2, s3)
retry.Run(t, func(r *retry.R) { r.Check(wantRaft([]*Server{s1, s2, s3})) })

// Wait twice the server stabilization threshold to give the server
// time to be promoted
time.Sleep(2 * s1.config.AutopilotConfig.ServerStabilizationTime)

future := s1.raft.GetConfiguration()
assert.Nil(future.Error())

servers := future.Configuration().Servers

// s2 should be a voter
if !autopilot.IsPotentialVoter(servers[1].Suffrage) || servers[1].ID != s2.config.RaftConfig.LocalID {
t.Fatalf("bad: %v", servers)
}

// s3 should remain a non-voter
if autopilot.IsPotentialVoter(servers[2].Suffrage) || servers[2].ID != s3.config.RaftConfig.LocalID {
t.Fatalf("bad: %v", servers)
}
}

func TestAdvancedAutopilot_RedundancyZone(t *testing.T) {
assert := assert.New(t)
s1 := testServer(t, func(c *Config) {
c.RaftConfig.ProtocolVersion = 3
c.AutopilotConfig.EnableRedundancyZones = true
c.RedundancyZone = "east"
})
defer s1.Shutdown()

s2 := testServer(t, func(c *Config) {
c.DevDisableBootstrap = true
c.RaftConfig.ProtocolVersion = 3
c.RedundancyZone = "west"
})
defer s2.Shutdown()

s3 := testServer(t, func(c *Config) {
c.DevDisableBootstrap = true
c.RaftConfig.ProtocolVersion = 3
c.RedundancyZone = "west-2"
})
defer s3.Shutdown()

testutil.WaitForLeader(t, s1.RPC)

testJoin(t, s1, s2, s3)
retry.Run(t, func(r *retry.R) { r.Check(wantRaft([]*Server{s1, s2, s3})) })

// Wait past the stabilization time to give the servers a chance to be promoted
time.Sleep(2*s1.config.AutopilotConfig.ServerStabilizationTime + s1.config.AutopilotInterval)

// Now s2 and s3 should be voters
{
future := s1.raft.GetConfiguration()
assert.Nil(future.Error())
servers := future.Configuration().Servers
assert.Equal(3, len(servers))
// s2 and s3 should be voters now
assert.Equal(raft.Voter, servers[1].Suffrage)
assert.Equal(raft.Voter, servers[2].Suffrage)
}

// Join s4
s4 := testServer(t, func(c *Config) {
c.DevDisableBootstrap = true
c.RaftConfig.ProtocolVersion = 3
c.RedundancyZone = "west-2"
})
defer s4.Shutdown()
testJoin(t, s1, s4)
time.Sleep(2*s1.config.AutopilotConfig.ServerStabilizationTime + s1.config.AutopilotInterval)

// s4 should not be a voter yet
{
future := s1.raft.GetConfiguration()
assert.Nil(future.Error())
servers := future.Configuration().Servers
assert.Equal(raft.Nonvoter, servers[3].Suffrage)
}

s3.Shutdown()

// s4 should be a voter now, s3 should be removed
retry.Run(t, func(r *retry.R) {
r.Check(wantRaft([]*Server{s1, s2, s4}))
future := s1.raft.GetConfiguration()
if err := future.Error(); err != nil {
r.Fatal(err)
}
servers := future.Configuration().Servers
for _, s := range servers {
if s.Suffrage != raft.Voter {
r.Fatalf("bad: %v", servers)
}
}
})
}

func TestAdvancedAutopilot_UpgradeMigration(t *testing.T) {
s1 := testServer(t, func(c *Config) {
c.RaftConfig.ProtocolVersion = 3
c.Build = "0.8.0"
})
defer s1.Shutdown()

s2 := testServer(t, func(c *Config) {
c.DevDisableBootstrap = true
c.RaftConfig.ProtocolVersion = 3
c.Build = "0.8.1"
})
defer s2.Shutdown()

testutil.WaitForLeader(t, s1.RPC)
testJoin(t, s1, s2)

// Wait for the migration to complete
retry.Run(t, func(r *retry.R) {
future := s1.raft.GetConfiguration()
if err := future.Error(); err != nil {
r.Fatal(err)
}
for _, s := range future.Configuration().Servers {
switch s.ID {
case raft.ServerID(s1.config.NodeID):
if got, want := s.Suffrage, raft.Nonvoter; got != want {
r.Fatalf("got %v want %v", got, want)
}

case raft.ServerID(s2.config.NodeID):
if got, want := s.Suffrage, raft.Voter; got != want {
r.Fatalf("got %v want %v", got, want)
}

default:
r.Fatalf("unexpected server %s", s.ID)
}
}

if !s2.IsLeader() {
r.Fatal("server 2 should be the leader")
}
})
}

func TestAdvancedAutopilot_CustomUpgradeMigration(t *testing.T) {
s1 := testServer(t, func(c *Config) {
c.RaftConfig.ProtocolVersion = 3
c.AutopilotConfig.EnableCustomUpgrades = true
c.UpgradeVersion = "0.0.1"
})
defer s1.Shutdown()

s2 := testServer(t, func(c *Config) {
c.DevDisableBootstrap = true
c.RaftConfig.ProtocolVersion = 3
c.UpgradeVersion = "0.0.2"
})
defer s2.Shutdown()

testutil.WaitForLeader(t, s1.RPC)
testJoin(t, s1, s2)

// Wait for the migration to complete
retry.Run(t, func(r *retry.R) {
future := s1.raft.GetConfiguration()
if err := future.Error(); err != nil {
r.Fatal(err)
}
for _, s := range future.Configuration().Servers {
switch s.ID {
case raft.ServerID(s1.config.NodeID):
if got, want := s.Suffrage, raft.Nonvoter; got != want {
r.Fatalf("got %v want %v", got, want)
}

case raft.ServerID(s2.config.NodeID):
if got, want := s.Suffrage, raft.Voter; got != want {
r.Fatalf("got %v want %v", got, want)
}

default:
r.Fatalf("unexpected server %s", s.ID)
}
}

if !s2.IsLeader() {
r.Fatal("server 2 should be the leader")
}
})
}

func TestAdvancedAutopilot_DisableUpgradeMigration(t *testing.T) {
s1 := testServer(t, func(c *Config) {
c.RaftConfig.ProtocolVersion = 3
c.AutopilotConfig.DisableUpgradeMigration = true
c.Build = "0.8.0"
})
defer s1.Shutdown()

testutil.WaitForLeader(t, s1.RPC)

s2 := testServer(t, func(c *Config) {
c.DevDisableBootstrap = true
c.RaftConfig.ProtocolVersion = 3
c.Build = "0.8.0"
})
defer s2.Shutdown()

s3 := testServer(t, func(c *Config) {
c.DevDisableBootstrap = true
c.RaftConfig.ProtocolVersion = 3
c.Build = "0.8.1"
})
defer s3.Shutdown()

testJoin(t, s1, s2, s3)

// Wait for both servers to be added as voters
retry.Run(t, func(r *retry.R) {
future := s1.raft.GetConfiguration()
if err := future.Error(); err != nil {
r.Fatal(err)
}

for _, s := range future.Configuration().Servers {
if got, want := s.Suffrage, raft.Voter; got != want {
r.Fatalf("got %v want %v", got, want)
}
}

if !s1.IsLeader() {
r.Fatal("server 1 should be leader")
}
})
}
14 changes: 13 additions & 1 deletion nomad/server_setup_ent.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

package nomad

import "github.com/hashicorp/sentinel/sentinel"
import (
"github.com/hashicorp/consul/agent/consul/autopilot"
"github.com/hashicorp/consul/agent/consul/autopilot_ent"
"github.com/hashicorp/sentinel/sentinel"
)

type EnterpriseState struct {
// sentinel is a shared instance of the policy engine
Expand Down Expand Up @@ -30,6 +34,14 @@ func (s *Server) setupEnterprise(config *Config) error {

// Create the Sentinel instance based on the configuration
s.sentinel = sentinel.New(sentConf)

// Set up the enterprise version of autopilot
apDelegate := &AdvancedAutopilotDelegate{
AutopilotDelegate: AutopilotDelegate{server: s},
}
apDelegate.promoter = autopilot_ent.NewAdvancedPromoter(s.logger, apDelegate, s.getNodeMeta)
s.autopilot = autopilot.NewAutopilot(s.logger, apDelegate, config.AutopilotInterval, config.ServerHealthInterval)

return nil
}

Expand Down
8 changes: 8 additions & 0 deletions nomad/server_setup_pro.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,17 @@

package nomad

import (
"github.com/hashicorp/consul/agent/consul/autopilot"
)

type EnterpriseState struct{}

func (s *Server) setupEnterprise(config *Config) error {
// Set up the OSS version of autopilot
apDelegate := &AutopilotDelegate{s}
s.autopilot = autopilot.NewAutopilot(s.logger, apDelegate, config.AutopilotInterval, config.ServerHealthInterval)

return nil
}

Expand Down
1 change: 1 addition & 0 deletions nomad/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ func testServer(t *testing.T, cb func(*Config)) *Server {

// Tighten the autopilot timing
config.AutopilotConfig.ServerStabilizationTime = 100 * time.Millisecond
config.AutopilotConfig.LastContactThreshold = 200 * time.Millisecond
config.ServerHealthInterval = 50 * time.Millisecond
config.AutopilotInterval = 100 * time.Millisecond

Expand Down
Loading

0 comments on commit dfbc075

Please sign in to comment.