Skip to content

Commit

Permalink
Replace cluster periodics with watchers (#9609) (#9998)
Browse files Browse the repository at this point in the history
* Replace cluster periodics with watchers
Remove periodically sending locks and certificate authorities to leaf clusters. Instead
we can rely on the watcher system to only deliver resources to leaf clusters when changes
occur.

Fixes #8817

(cherry picked from commit 8932ed4)
  • Loading branch information
rosstimothy authored Feb 1, 2022
1 parent ed3e7e9 commit 6cbe24c
Show file tree
Hide file tree
Showing 7 changed files with 539 additions and 135 deletions.
50 changes: 35 additions & 15 deletions integration/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3711,10 +3711,10 @@ func testRotateRollback(t *testing.T, s *integrationTestSuite) {
// TestRotateTrustedClusters tests CA rotation support for trusted clusters
func testRotateTrustedClusters(t *testing.T, suite *integrationTestSuite) {
tr := utils.NewTracer(utils.ThisFunction()).Start()
defer tr.Stop()
t.Cleanup(func() { tr.Stop() })

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
t.Cleanup(cancel)

clusterMain := "rotate-main"
clusterAux := "rotate-aux"
Expand Down Expand Up @@ -3773,7 +3773,7 @@ func testRotateTrustedClusters(t *testing.T, suite *integrationTestSuite) {
require.NoError(t, err)
err = aux.Process.GetAuthServer().UpsertRole(ctx, role)
require.NoError(t, err)
trustedClusterToken := "trusted-clsuter-token"
trustedClusterToken := "trusted-cluster-token"
err = svc.GetAuthServer().UpsertToken(ctx,
services.MustCreateProvisionToken(trustedClusterToken, []types.SystemRole{types.RoleTrustedCluster}, time.Time{}))
require.NoError(t, err)
Expand All @@ -3789,7 +3789,7 @@ func testRotateTrustedClusters(t *testing.T, suite *integrationTestSuite) {
tryCreateTrustedCluster(t, aux.Process.GetAuthServer(), trustedCluster)
waitForTunnelConnections(t, svc.GetAuthServer(), aux.Secrets.SiteName, 1)

// capture credentials before has reload started to simulate old client
// capture credentials before reload has started to simulate old client
initialCreds, err := GenerateUserCreds(UserCredsRequest{
Process: svc,
Username: suite.me.Username,
Expand Down Expand Up @@ -3818,24 +3818,43 @@ func testRotateTrustedClusters(t *testing.T, suite *integrationTestSuite) {
})
require.NoError(t, err)

// wait until service phase update to be broadcasted (init phase does not trigger reload)
// wait until service phase update to be broadcast (init phase does not trigger reload)
err = waitForProcessEvent(svc, service.TeleportPhaseChangeEvent, 10*time.Second)
require.NoError(t, err)

// waitForPhase waits until aux cluster detects the rotation
waitForPhase := func(phase string) error {
ctx, cancel := context.WithTimeout(context.Background(), tconf.PollingPeriod*10)
defer cancel()

watcher, err := services.NewCertAuthorityWatcher(ctx, services.CertAuthorityWatcherConfig{
ResourceWatcherConfig: services.ResourceWatcherConfig{
Component: teleport.ComponentProxy,
Clock: tconf.Clock,
Client: aux.GetSiteAPI(clusterAux),
},
WatchHostCA: true,
})
if err != nil {
return err
}
defer watcher.Close()

var lastPhase string
for i := 0; i < 10; i++ {
ca, err := aux.Process.GetAuthServer().GetCertAuthority(types.CertAuthID{
Type: types.HostCA,
DomainName: clusterMain,
}, false)
require.NoError(t, err)
if ca.GetRotation().Phase == phase {
return nil
select {
case <-ctx.Done():
return trace.CompareFailed("failed to converge to phase %q, last phase %q", phase, lastPhase)
case cas := <-watcher.CertAuthorityC:
for _, ca := range cas {
if ca.GetClusterName() == clusterMain &&
ca.GetType() == types.HostCA &&
ca.GetRotation().Phase == phase {
return nil
}
lastPhase = ca.GetRotation().Phase
}
}
lastPhase = ca.GetRotation().Phase
time.Sleep(tconf.PollingPeriod / 2)
}
return trace.CompareFailed("failed to converge to phase %q, last phase %q", phase, lastPhase)
}
Expand Down Expand Up @@ -3916,7 +3935,7 @@ func testRotateTrustedClusters(t *testing.T, suite *integrationTestSuite) {
// shut down the service
cancel()
// close the service without waiting for the connections to drain
svc.Close()
require.NoError(t, svc.Close())

select {
case err := <-runErrCh:
Expand Down Expand Up @@ -4082,6 +4101,7 @@ func (s *integrationTestSuite) rotationConfig(disableWebService bool) *service.C
tconf.PollingPeriod = 500 * time.Millisecond
tconf.ClientTimeout = time.Second
tconf.ShutdownTimeout = 2 * tconf.ClientTimeout
tconf.MaxRetryPeriod = time.Second
return tconf
}

Expand Down
Loading

0 comments on commit 6cbe24c

Please sign in to comment.