Skip to content

Commit

Permalink
Merge pull request #4179 from hashicorp/f-retry-join
Browse files Browse the repository at this point in the history
Cloud Auto Join
  • Loading branch information
chelseakomlo authored May 8, 2018
2 parents 11933ea + 321b077 commit e809eb5
Show file tree
Hide file tree
Showing 795 changed files with 403,472 additions and 85 deletions.
41 changes: 9 additions & 32 deletions command/agent/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul/lib"
checkpoint "github.com/hashicorp/go-checkpoint"
discover "github.com/hashicorp/go-discover"
gsyslog "github.com/hashicorp/go-syslog"
"github.com/hashicorp/logutils"
flaghelper "github.com/hashicorp/nomad/helper/flag-helpers"
Expand Down Expand Up @@ -545,7 +546,14 @@ func (c *Command) Run(args []string) int {

// Start retry join process
c.retryJoinErrCh = make(chan struct{})
go c.retryJoin(config)

joiner := retryJoiner{
join: c.agent.server.Join,
discover: &discover.Discover{},
errCh: c.retryJoinErrCh,
logger: c.agent.logger,
}
go joiner.RetryJoin(config)

// Wait for exit
return c.handleSignals()
Expand Down Expand Up @@ -835,37 +843,6 @@ func (c *Command) startupJoin(config *Config) error {
return nil
}

// retryJoin is used to handle retrying a join until it succeeds or all retries
// are exhausted.
func (c *Command) retryJoin(config *Config) {
if len(config.Server.RetryJoin) == 0 || !config.Server.Enabled {
return
}

logger := c.agent.logger
logger.Printf("[INFO] agent: Joining cluster...")

attempt := 0
for {
n, err := c.agent.server.Join(config.Server.RetryJoin)
if err == nil {
logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n)
return
}

attempt++
if config.Server.RetryMaxAttempts > 0 && attempt > config.Server.RetryMaxAttempts {
logger.Printf("[ERR] agent: max join retry exhausted, exiting")
close(c.retryJoinErrCh)
return
}

logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err,
config.Server.RetryInterval)
time.Sleep(config.Server.retryInterval)
}
}

func (c *Command) Synopsis() string {
return "Runs a Nomad agent"
}
Expand Down
53 changes: 0 additions & 53 deletions command/agent/command_test.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
package agent

import (
"fmt"
"io/ioutil"
"os"
"strings"
"testing"

"github.com/hashicorp/nomad/testutil"
"github.com/hashicorp/nomad/version"
"github.com/mitchellh/cli"
)
Expand Down Expand Up @@ -78,54 +76,3 @@ func TestCommand_Args(t *testing.T) {
}
}
}

// TODO Why is this failing
func TestRetryJoin(t *testing.T) {
t.Parallel()
agent := NewTestAgent(t, t.Name(), nil)
defer agent.Shutdown()

doneCh := make(chan struct{})
shutdownCh := make(chan struct{})

defer func() {
close(shutdownCh)
<-doneCh
}()

cmd := &Command{
Version: version.GetVersion(),
ShutdownCh: shutdownCh,
Ui: &cli.BasicUi{
Reader: os.Stdin,
Writer: os.Stdout,
ErrorWriter: os.Stderr,
},
}

serfAddr := agent.Config.normalizedAddrs.Serf

args := []string{
"-dev",
"-node", "foo",
"-retry-join", serfAddr,
"-retry-interval", "1s",
}

go func() {
if code := cmd.Run(args); code != 0 {
t.Logf("bad: %d", code)
}
close(doneCh)
}()

testutil.WaitForResult(func() (bool, error) {
mem := agent.server.Members()
if len(mem) != 2 {
return false, fmt.Errorf("bad :%#v", mem)
}
return true, nil
}, func(err error) {
t.Fatalf(err.Error())
})
}
95 changes: 95 additions & 0 deletions command/agent/retry_join.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package agent

import (
"log"
"strings"
"time"
)

// DiscoverInterface is an interface for the Discover type in the go-discover
// library. Using an interface allows for ease of testing.
type DiscoverInterface interface {
// Addrs discovers ip addresses of nodes that match the given filter
// criteria.
// The config string must have the format 'provider=xxx key=val key=val ...'
// where the keys and values are provider specific. The values are URL
// encoded.
Addrs(string, *log.Logger) ([]string, error)

// Help describes the format of the configuration string for address
// discovery and the various provider specific options.
Help() string

// Names returns the names of the configured providers.
Names() []string
}

// retryJoiner is used to handle retrying a join until it succeeds or all of
// its tries are exhausted.
type retryJoiner struct {
// join adds the specified servers to the serf cluster
join func([]string) (int, error)

// discover is of type Discover, where this is either the go-discover
// implementation or a mock used for testing
discover DiscoverInterface

// errCh is used to communicate with the agent when the max retry attempt
// limit has been reached
errCh chan struct{}

// logger is the agent logger.
logger *log.Logger
}

// retryJoin is used to handle retrying a join until it succeeds or all retries
// are exhausted.
func (r *retryJoiner) RetryJoin(config *Config) {
if len(config.Server.RetryJoin) == 0 || !config.Server.Enabled {
return
}

attempt := 0

addrsToJoin := strings.Join(config.Server.RetryJoin, " ")
r.logger.Printf("[INFO] agent: Joining cluster... %s", addrsToJoin)

for {
var addrs []string
var err error

for _, addr := range config.Server.RetryJoin {
switch {
case strings.HasPrefix(addr, "provider="):
servers, err := r.discover.Addrs(addr, r.logger)
if err != nil {
r.logger.Printf("[ERR] agent: Join error %s", err)
} else {
addrs = append(addrs, servers...)
}
default:
addrs = append(addrs, addr)
}
}

if len(addrs) > 0 {
n, err := r.join(addrs)
if err == nil {
r.logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n)
}
}

attempt++
if config.Server.RetryMaxAttempts > 0 && attempt > config.Server.RetryMaxAttempts {
r.logger.Printf("[ERR] agent: max join retry exhausted, exiting")
close(r.errCh)
return
}

if err != nil {
r.logger.Printf("[WARN] agent: Join failed: %v, retrying in %v", err,
config.Server.RetryInterval)
}
time.Sleep(config.Server.retryInterval)
}
}
Loading

0 comments on commit e809eb5

Please sign in to comment.