Skip to content

Commit

Permalink
Replace cluster periodics with watchers
Browse files Browse the repository at this point in the history
Remove periodically sending locks and certificate authorities to leaf clusters. Instead
we can rely on the watcher system to only deliver resources to leaf clusters when changes
occur.
Fixes #8817
  • Loading branch information
rosstimothy committed Dec 31, 2021
1 parent f141d83 commit 0ad9b7b
Show file tree
Hide file tree
Showing 5 changed files with 361 additions and 122 deletions.
47 changes: 32 additions & 15 deletions integration/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3711,10 +3711,10 @@ func testRotateRollback(t *testing.T, s *integrationTestSuite) {
// TestRotateTrustedClusters tests CA rotation support for trusted clusters
func testRotateTrustedClusters(t *testing.T, suite *integrationTestSuite) {
tr := utils.NewTracer(utils.ThisFunction()).Start()
defer tr.Stop()
t.Cleanup(func() { tr.Stop() })

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
t.Cleanup(cancel)

clusterMain := "rotate-main"
clusterAux := "rotate-aux"
Expand Down Expand Up @@ -3773,7 +3773,7 @@ func testRotateTrustedClusters(t *testing.T, suite *integrationTestSuite) {
require.NoError(t, err)
err = aux.Process.GetAuthServer().UpsertRole(ctx, role)
require.NoError(t, err)
trustedClusterToken := "trusted-clsuter-token"
trustedClusterToken := "trusted-cluster-token"
err = svc.GetAuthServer().UpsertToken(ctx,
services.MustCreateProvisionToken(trustedClusterToken, []types.SystemRole{types.RoleTrustedCluster}, time.Time{}))
require.NoError(t, err)
Expand All @@ -3789,7 +3789,7 @@ func testRotateTrustedClusters(t *testing.T, suite *integrationTestSuite) {
tryCreateTrustedCluster(t, aux.Process.GetAuthServer(), trustedCluster)
waitForTunnelConnections(t, svc.GetAuthServer(), aux.Secrets.SiteName, 1)

// capture credentials before has reload started to simulate old client
// capture credentials before reload has started to simulate old client
initialCreds, err := GenerateUserCreds(UserCredsRequest{
Process: svc,
Username: suite.me.Username,
Expand Down Expand Up @@ -3818,24 +3818,40 @@ func testRotateTrustedClusters(t *testing.T, suite *integrationTestSuite) {
})
require.NoError(t, err)

// wait until service phase update to be broadcasted (init phase does not trigger reload)
// wait until service phase update to be broadcast (init phase does not trigger reload)
err = waitForProcessEvent(svc, service.TeleportPhaseChangeEvent, 10*time.Second)
require.NoError(t, err)

// waitForPhase waits until aux cluster detects the rotation
waitForPhase := func(phase string) error {
ctx, cancel := context.WithTimeout(context.Background(), tconf.PollingPeriod*10)
defer cancel()

watcher, err := services.NewCertAuthorityWatcher(ctx, services.CertAuthorityWatcherConfig{
ResourceWatcherConfig: services.ResourceWatcherConfig{
Component: teleport.ComponentProxy,
Clock: tconf.Clock,
Client: aux.GetSiteAPI(clusterAux),
},
})
if err != nil {
return err
}
var lastPhase string
for i := 0; i < 10; i++ {
ca, err := aux.Process.GetAuthServer().GetCertAuthority(types.CertAuthID{
Type: types.HostCA,
DomainName: clusterMain,
}, false)
require.NoError(t, err)
if ca.GetRotation().Phase == phase {
return nil
select {
case <-ctx.Done():
return trace.CompareFailed("failed to converge to phase %q, last phase %q", phase, lastPhase)
case cas := <-watcher.CertAuthorityC:
for _, ca := range cas {
if ca.GetClusterName() == clusterMain &&
ca.GetType() == types.HostCA &&
ca.GetRotation().Phase == phase {
return nil
}
lastPhase = ca.GetRotation().Phase
}
}
lastPhase = ca.GetRotation().Phase
time.Sleep(tconf.PollingPeriod / 2)
}
return trace.CompareFailed("failed to converge to phase %q, last phase %q", phase, lastPhase)
}
Expand Down Expand Up @@ -3916,7 +3932,7 @@ func testRotateTrustedClusters(t *testing.T, suite *integrationTestSuite) {
// shut down the service
cancel()
// close the service without waiting for the connections to drain
svc.Close()
require.NoError(t, svc.Close())

select {
case err := <-runErrCh:
Expand Down Expand Up @@ -4082,6 +4098,7 @@ func (s *integrationTestSuite) rotationConfig(disableWebService bool) *service.C
tconf.PollingPeriod = 500 * time.Millisecond
tconf.ClientTimeout = time.Second
tconf.ShutdownTimeout = 2 * tconf.ClientTimeout
tconf.MaxRetryPeriod = time.Second
return tconf
}

Expand Down
Loading

0 comments on commit 0ad9b7b

Please sign in to comment.