From 4a2d3a3840f4e88b747b800f42472a805fc5cf45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Buko=C5=A1ek?= Date: Wed, 12 Feb 2020 13:23:09 +0100 Subject: [PATCH 1/8] node: Add automatic TLS certificate rotation support --- .buildkite/code.pipeline.yml | 2 +- .changelog/2098.feature.md | 6 + go/common/accessctl/accessctl_test.go | 6 +- go/common/grpc/grpc.go | 14 +- go/common/grpc/policy/policy_test.go | 4 +- go/common/grpc/proxy/proxy_test.go | 11 +- go/common/grpc/testing/ping.go | 8 +- go/common/identity/identity.go | 166 +++++++++++++++--- go/common/identity/identity_test.go | 30 +++- go/common/node/node.go | 14 +- go/ias/proxy/client/client.go | 8 +- go/oasis-node/cmd/common/grpc/grpc.go | 4 +- go/oasis-node/cmd/debug/byzantine/registry.go | 6 +- go/oasis-node/cmd/debug/byzantine/steps.go | 2 +- go/oasis-node/cmd/debug/byzantine/storage.go | 2 +- .../debug/txsource/workload/registration.go | 8 +- go/oasis-node/cmd/identity/identity.go | 2 +- go/oasis-node/cmd/node/node.go | 2 +- go/oasis-node/cmd/registry/node/node.go | 12 +- .../cmd/storage/benchmark/benchmark.go | 2 +- go/oasis-remote-signer/cmd/root.go | 15 +- go/oasis-test-runner/oasis/byzantine.go | 2 +- go/oasis-test-runner/oasis/compute.go | 2 +- go/oasis-test-runner/oasis/keymanager.go | 8 +- go/oasis-test-runner/oasis/oasis.go | 9 +- go/oasis-test-runner/oasis/seed.go | 2 +- go/oasis-test-runner/oasis/sentry.go | 2 +- go/oasis-test-runner/oasis/storage.go | 8 +- go/oasis-test-runner/oasis/validator.go | 8 +- .../scenario/e2e/registry_cli.go | 10 ++ go/registry/api/api.go | 52 ++++-- go/registry/tests/tester.go | 28 +-- go/runtime/committee/client.go | 6 +- go/sentry/client/client.go | 8 +- go/sentry/sentry.go | 2 +- .../mkvs/interop/cmd/protocol_server.go | 2 +- go/worker/common/committee/accessctl.go | 20 ++- go/worker/common/worker.go | 6 +- go/worker/registration/worker.go | 60 ++++++- go/worker/sentry/grpc/init.go | 17 +- go/worker/sentry/worker.go | 6 +- 41 files changed, 451 insertions(+), 131 deletions(-) create mode 100644 .changelog/2098.feature.md diff --git a/.buildkite/code.pipeline.yml b/.buildkite/code.pipeline.yml index 1702877f9a3..d730826452f 100644 --- a/.buildkite/code.pipeline.yml +++ b/.buildkite/code.pipeline.yml @@ -203,7 +203,7 @@ steps: ############### - label: E2E tests parallelism: 7 - timeout_in_minutes: 9 + timeout_in_minutes: 11 command: - .buildkite/scripts/download_e2e_test_artifacts.sh - .buildkite/scripts/test_e2e.sh diff --git a/.changelog/2098.feature.md b/.changelog/2098.feature.md new file mode 100644 index 00000000000..e2ab4bb7fb1 --- /dev/null +++ b/.changelog/2098.feature.md @@ -0,0 +1,6 @@ +node: Add automatic TLS certificate rotation support + +It is now possible to automatically rotate the node's TLS +certificates every N epochs by passing the command-line flag +`worker.registration.rotate_certs`. +Do not use this if you use sentry nodes or IAS proxies. diff --git a/go/common/accessctl/accessctl_test.go b/go/common/accessctl/accessctl_test.go index 084786a34ed..e270abe2ade 100644 --- a/go/common/accessctl/accessctl_test.go +++ b/go/common/accessctl/accessctl_test.go @@ -49,11 +49,11 @@ func TestSubjectFromCertificate(t *testing.T) { require.NoError(err, "Failed to create a temporary directory") defer os.RemoveAll(dataDir) - ident, err := identity.LoadOrGenerate(dataDir, memorySigner.NewFactory()) + ident, err := identity.LoadOrGenerate(dataDir, memorySigner.NewFactory(), false) require.NoError(err, "Failed to generate a new identity") - require.Len(ident.TLSCertificate.Certificate, 1, "The generated identity contains more than 1 certificate in the chain") + require.Len(ident.GetTLSCertificate().Certificate, 1, "The generated identity contains more than 1 certificate in the chain") - x509Cert, err := x509.ParseCertificate(ident.TLSCertificate.Certificate[0]) + x509Cert, err := x509.ParseCertificate(ident.GetTLSCertificate().Certificate[0]) require.NoError(err, "Failed to parse X.509 certificate from TLS certificate") sub := SubjectFromX509Certificate(x509Cert) diff --git a/go/common/grpc/grpc.go b/go/common/grpc/grpc.go index df7061fad86..749e599980d 100644 --- a/go/common/grpc/grpc.go +++ b/go/common/grpc/grpc.go @@ -23,6 +23,7 @@ import ( "google.golang.org/grpc/keepalive" "github.com/oasislabs/oasis-core/go/common/grpc/auth" + "github.com/oasislabs/oasis-core/go/common/identity" "github.com/oasislabs/oasis-core/go/common/logging" "github.com/oasislabs/oasis-core/go/common/service" ) @@ -298,8 +299,8 @@ type ServerConfig struct { // nolint: maligned Port uint16 // Path is the path for the local server. Leave nil to create a TCP server. Path string - // Certificate is the certificate used by the server. Should be nil for local servers. - Certificate *tls.Certificate + // Identity is the identity of the worker that's running the server. + Identity *identity.Identity // InstallWrapper specifies whether intercepting facilities should be enabled on this server, // to enable intercepting RPC calls with a wrapper. InstallWrapper bool @@ -473,11 +474,14 @@ func NewServer(config *ServerConfig) (*Server, error) { grpc.KeepaliveParams(serverKeepAliveParams), grpc.CustomCodec(&CBORCodec{}), } - if config.Certificate != nil { + if config.Identity != nil && config.Identity.GetTLSCertificate() != nil { tlsConfig := &tls.Config{ - Certificates: []tls.Certificate{*config.Certificate}, - ClientAuth: clientAuthType, + ClientAuth: clientAuthType, + GetCertificate: func(ch *tls.ClientHelloInfo) (*tls.Certificate, error) { + return config.Identity.GetTLSCertificate(), nil + }, } + sOpts = append(sOpts, grpc.Creds(credentials.NewTLS(tlsConfig))) } sOpts = append(sOpts, config.CustomOptions...) diff --git a/go/common/grpc/policy/policy_test.go b/go/common/grpc/policy/policy_test.go index 7e4fb740898..d0b79bf9bf8 100644 --- a/go/common/grpc/policy/policy_test.go +++ b/go/common/grpc/policy/policy_test.go @@ -20,6 +20,7 @@ import ( "github.com/oasislabs/oasis-core/go/common/grpc/policy" "github.com/oasislabs/oasis-core/go/common/grpc/policy/api" cmnTesting "github.com/oasislabs/oasis-core/go/common/grpc/testing" + "github.com/oasislabs/oasis-core/go/common/identity" ) var testNs = common.NewTestNamespaceFromSeed([]byte("oasis common grpc policy test ns"), 0) @@ -60,9 +61,10 @@ func TestAccessPolicy(t *testing.T) { serverConfig := &cmnGrpc.ServerConfig{ Name: host, Port: port, - Certificate: serverTLSCert, + Identity: &identity.Identity{}, CustomOptions: []grpc.ServerOption{grpc.CustomCodec(&cmnGrpc.CBORCodec{})}, } + serverConfig.Identity.SetTLSCertificate(serverTLSCert) grpcServer, err := cmnGrpc.NewServer(serverConfig) require.NoErrorf(err, "Failed to create a new gRPC server: %v", err) diff --git a/go/common/grpc/proxy/proxy_test.go b/go/common/grpc/proxy/proxy_test.go index 6bb3ccaae54..b2bf7798259 100644 --- a/go/common/grpc/proxy/proxy_test.go +++ b/go/common/grpc/proxy/proxy_test.go @@ -17,6 +17,7 @@ import ( commonGrpc "github.com/oasislabs/oasis-core/go/common/grpc" "github.com/oasislabs/oasis-core/go/common/grpc/auth" cmnTesting "github.com/oasislabs/oasis-core/go/common/grpc/testing" + "github.com/oasislabs/oasis-core/go/common/identity" ) const ( @@ -58,9 +59,10 @@ func TestGRPCProxy(t *testing.T) { serverConfig := &commonGrpc.ServerConfig{ Name: host, Port: port, - Certificate: serverTLSCert, + Identity: &identity.Identity{}, CustomOptions: []grpc.ServerOption{grpc.CustomCodec(&commonGrpc.CBORCodec{})}, } + serverConfig.Identity.SetTLSCertificate(serverTLSCert) grpcServer, err := commonGrpc.NewServer(serverConfig) require.NoErrorf(err, "Failed to create a new gRPC server: %v", err) @@ -84,14 +86,15 @@ func TestGRPCProxy(t *testing.T) { // Create a proxy gRPC server. proxyServerConfig := &commonGrpc.ServerConfig{ - Name: host, - Port: port + 1, - Certificate: serverTLSCert, + Name: host, + Port: port + 1, + Identity: &identity.Identity{}, CustomOptions: []grpc.ServerOption{ // All unknown requests will be proxied to the grpc server above. grpc.UnknownServiceHandler(Handler(conn)), }, } + proxyServerConfig.Identity.SetTLSCertificate(serverTLSCert) proxyGrpcServer, err := commonGrpc.NewServer(proxyServerConfig) require.NoErrorf(err, "Failed to create a proxy gRPC server: %v", err) diff --git a/go/common/grpc/testing/ping.go b/go/common/grpc/testing/ping.go index 4ad056d6e48..2c9994843dd 100644 --- a/go/common/grpc/testing/ping.go +++ b/go/common/grpc/testing/ping.go @@ -64,14 +64,14 @@ func CreateCertificate(t *testing.T) (*tls.Certificate, *x509.Certificate) { require.NoError(err, "Failed to create a temporary directory") defer os.RemoveAll(dataDir) - ident, err := identity.LoadOrGenerate(dataDir, memorySigner.NewFactory()) + ident, err := identity.LoadOrGenerate(dataDir, memorySigner.NewFactory(), false) require.NoError(err, "Failed to generate a new identity") - require.Len(ident.TLSCertificate.Certificate, 1, "The generated identity contains more than 1 TLS certificate in the chain") + require.Len(ident.GetTLSCertificate().Certificate, 1, "The generated identity contains more than 1 TLS certificate in the chain") - x509Cert, err := x509.ParseCertificate(ident.TLSCertificate.Certificate[0]) + x509Cert, err := x509.ParseCertificate(ident.GetTLSCertificate().Certificate[0]) require.NoError(err, "Failed to parse X.509 certificate from TLS certificate") - return ident.TLSCertificate, x509Cert + return ident.GetTLSCertificate(), x509Cert } // PingQuery is the PingServer query. diff --git a/go/common/identity/identity.go b/go/common/identity/identity.go index 977afbd7b8e..d60ba0679f0 100644 --- a/go/common/identity/identity.go +++ b/go/common/identity/identity.go @@ -6,10 +6,12 @@ import ( "crypto/rand" "crypto/tls" "path/filepath" + "sync" "github.com/oasislabs/oasis-core/go/common/crypto/signature" "github.com/oasislabs/oasis-core/go/common/crypto/signature/signers/memory" tlsCert "github.com/oasislabs/oasis-core/go/common/crypto/tls" + "github.com/oasislabs/oasis-core/go/common/errors" ) const ( @@ -30,31 +32,124 @@ const ( tlsCertFilename = "tls_identity_cert.pem" ) +// ErrCertificateRotationForbidden is returned by RotateCertificates if +// TLS certificate rotation is forbidden. This happens when rotation is +// enabled and an existing TLS certificate was successfully loaded +// (or a new one was generated and persisted to disk). +var ErrCertificateRotationForbidden = errors.New("identity", 1, "identity: TLS certificate rotation forbidden") + // Identity is a node identity. type Identity struct { + sync.RWMutex + // NodeSigner is a node identity key signer. NodeSigner signature.Signer // P2PSigner is a node P2P link key signer. P2PSigner signature.Signer // ConsensusSigner is a node consensus key signer. ConsensusSigner signature.Signer - // TLSSigner is a node TLS certificate signer. - TLSSigner signature.Signer - // TLSCertificate is a certificate that can be used for TLS. - TLSCertificate *tls.Certificate + // tlsSigner is a node TLS certificate signer. + tlsSigner signature.Signer + // tlsCertificate is a certificate that can be used for TLS. + tlsCertificate *tls.Certificate + // nextTLSCertificate is a certificate that can be used for TLS in the next rotation. + nextTLSCertificate *tls.Certificate + // DoNotRotateTLS flag is true if we mustn't rotate the TLS certificates. + DoNotRotateTLS bool +} + +// RotateCertificates rotates the identity's TLS certificates. +// This is called from worker/registration/worker.go every +// CfgRegistrationRotateCerts epochs (if it's non-zero). +func (i *Identity) RotateCertificates() error { + if i.DoNotRotateTLS { + // If we loaded an existing certificate or persisted a generated one + // to disk, certificate rotation must be disabled. + // This behaviour is required for sentry nodes to work. + return ErrCertificateRotationForbidden + } + + i.Lock() + defer i.Unlock() + + if i.tlsCertificate != nil { + // Use the prepared certificate. + if i.nextTLSCertificate != nil { + i.tlsCertificate = i.nextTLSCertificate + i.tlsSigner = memory.NewFromRuntime(i.tlsCertificate.PrivateKey.(ed25519.PrivateKey)) + } + + // Generate a new TLS certificate to be used in the next rotation. + var err error + i.nextTLSCertificate, err = tlsCert.Generate(CommonName) + if err != nil { + return err + } + } + + return nil +} + +// GetTLSSigner returns the current TLS signer. +func (i *Identity) GetTLSSigner() signature.Signer { + i.RLock() + defer i.RUnlock() + + return i.tlsSigner +} + +// SetTLSSigner sets the current TLS signer. +func (i *Identity) SetTLSSigner(s signature.Signer) { + i.Lock() + defer i.Unlock() + + i.tlsSigner = s +} + +// GetTLSCertificate returns the current TLS certificate. +func (i *Identity) GetTLSCertificate() *tls.Certificate { + i.RLock() + defer i.RUnlock() + + return i.tlsCertificate +} + +// SetTLSCertificate sets the current TLS certificate. +func (i *Identity) SetTLSCertificate(cert *tls.Certificate) { + i.Lock() + defer i.Unlock() + + i.tlsCertificate = cert +} + +// GetNextTLSCertificate returns the next TLS certificate. +func (i *Identity) GetNextTLSCertificate() *tls.Certificate { + i.RLock() + defer i.RUnlock() + + return i.nextTLSCertificate +} + +// SetNextTLSCertificate sets the next TLS certificate. +func (i *Identity) SetNextTLSCertificate(nextCert *tls.Certificate) { + i.Lock() + defer i.Unlock() + + i.nextTLSCertificate = nextCert } // Load loads an identity. func Load(dataDir string, signerFactory signature.SignerFactory) (*Identity, error) { - return doLoadOrGenerate(dataDir, signerFactory, false) + return doLoadOrGenerate(dataDir, signerFactory, false, false) } // LoadOrGenerate loads or generates an identity. -func LoadOrGenerate(dataDir string, signerFactory signature.SignerFactory) (*Identity, error) { - return doLoadOrGenerate(dataDir, signerFactory, true) +// If persistTLS is true, it saves the generated TLS certificates to disk. +func LoadOrGenerate(dataDir string, signerFactory signature.SignerFactory, persistTLS bool) (*Identity, error) { + return doLoadOrGenerate(dataDir, signerFactory, true, persistTLS) } -func doLoadOrGenerate(dataDir string, signerFactory signature.SignerFactory, shouldGenerate bool) (*Identity, error) { +func doLoadOrGenerate(dataDir string, signerFactory signature.SignerFactory, shouldGenerate bool, persistTLS bool) (*Identity, error) { var signers []signature.Signer for _, v := range []struct { role signature.SignerRole @@ -86,30 +181,51 @@ func doLoadOrGenerate(dataDir string, signerFactory signature.SignerFactory, sho signers = append(signers, signer) } - // TLS certificate. - // - // TODO: The key and cert could probably be made totally ephemeral, as long - // as the registry update takes effect immediately. var ( - cert *tls.Certificate - err error + nextCert *tls.Certificate + dnr bool ) + + // First, check if we can load the TLS certificate from disk. tlsCertPath, tlsKeyPath := TLSCertPaths(dataDir) - if shouldGenerate { - cert, err = tlsCert.LoadOrGenerate(tlsCertPath, tlsKeyPath, CommonName) + cert, err := tlsCert.Load(tlsCertPath, tlsKeyPath) + if err == nil { + // Load successful, ensure that we won't ever rotate the certificates. + dnr = true } else { - cert, err = tlsCert.Load(tlsCertPath, tlsKeyPath) - } - if err != nil { - return nil, err + // Freshly generate TLS certificates. + cert, err = tlsCert.Generate(CommonName) + if err != nil { + return nil, err + } + + if persistTLS { + // Save generated TLS certificate to disk. + err = tlsCert.Save(tlsCertPath, tlsKeyPath, cert) + if err != nil { + return nil, err + } + + // Disable TLS rotation if we're persisting TLS certificates. + dnr = true + } else { + // Not persisting TLS certificate to disk, generate a new + // certificate to be used in the next rotation. + nextCert, err = tlsCert.Generate(CommonName) + if err != nil { + return nil, err + } + } } return &Identity{ - NodeSigner: signers[0], - P2PSigner: signers[1], - ConsensusSigner: signers[2], - TLSSigner: memory.NewFromRuntime(cert.PrivateKey.(ed25519.PrivateKey)), - TLSCertificate: cert, + NodeSigner: signers[0], + P2PSigner: signers[1], + ConsensusSigner: signers[2], + tlsSigner: memory.NewFromRuntime(cert.PrivateKey.(ed25519.PrivateKey)), + tlsCertificate: cert, + nextTLSCertificate: nextCert, + DoNotRotateTLS: dnr, }, nil } diff --git a/go/common/identity/identity_test.go b/go/common/identity/identity_test.go index 6109f654117..9ffbf4f1152 100644 --- a/go/common/identity/identity_test.go +++ b/go/common/identity/identity_test.go @@ -20,16 +20,36 @@ func TestLoadOrGenerate(t *testing.T) { require.NoError(t, err, "NewFactory") // Generate a new identity. - identity, err := LoadOrGenerate(dataDir, factory) + identity, err := LoadOrGenerate(dataDir, factory, true) require.NoError(t, err, "LoadOrGenerate") // Load an existing identity. - identity2, err := LoadOrGenerate(dataDir, factory) + identity2, err := LoadOrGenerate(dataDir, factory, false) require.NoError(t, err, "LoadOrGenerate (2)") require.EqualValues(t, identity.NodeSigner, identity2.NodeSigner) require.EqualValues(t, identity.P2PSigner, identity2.P2PSigner) require.EqualValues(t, identity.ConsensusSigner, identity2.ConsensusSigner) - require.EqualValues(t, identity.TLSSigner, identity2.TLSSigner) - // TODO: Check that it always generates a fresh certificate once oasis-core#1541 is done. - require.EqualValues(t, identity.TLSCertificate, identity2.TLSCertificate) + require.EqualValues(t, identity.GetTLSSigner(), identity2.GetTLSSigner()) + require.EqualValues(t, identity.GetTLSCertificate(), identity2.GetTLSCertificate()) + + dataDir2, err := ioutil.TempDir("", "oasis-identity-test2_") + require.NoError(t, err, "create data dir (2)") + defer os.RemoveAll(dataDir2) + + // Generate a new identity again, this time without persisting TLS certs. + identity3, err := LoadOrGenerate(dataDir2, factory, false) + require.NoError(t, err, "LoadOrGenerate (3)") + + // Load it back. + identity4, err := LoadOrGenerate(dataDir2, factory, false) + require.NoError(t, err, "LoadOrGenerate (4)") + require.EqualValues(t, identity3.NodeSigner, identity4.NodeSigner) + require.EqualValues(t, identity3.P2PSigner, identity4.P2PSigner) + require.EqualValues(t, identity3.ConsensusSigner, identity4.ConsensusSigner) + require.NotEqual(t, identity.GetTLSSigner(), identity3.GetTLSSigner()) + require.NotEqual(t, identity2.GetTLSSigner(), identity3.GetTLSSigner()) + require.NotEqual(t, identity3.GetTLSSigner(), identity4.GetTLSSigner()) + require.NotEqual(t, identity.GetTLSCertificate(), identity3.GetTLSCertificate()) + require.NotEqual(t, identity2.GetTLSCertificate(), identity3.GetTLSCertificate()) + require.NotEqual(t, identity3.GetTLSCertificate(), identity4.GetTLSCertificate()) } diff --git a/go/common/node/node.go b/go/common/node/node.go index 5f81e5d83ad..9bd4ca08ded 100644 --- a/go/common/node/node.go +++ b/go/common/node/node.go @@ -174,17 +174,24 @@ type CommitteeInfo struct { // Certificate is the certificate for establishing TLS connections. Certificate []byte `json:"certificate"` + // NextCertificate is the certificate that will be used for establishing + // TLS connections after certificate rotation (if enabled). + NextCertificate []byte `json:"next_certificate,omitempty"` + // Addresses is the list of committee addresses at which the node can be reached. Addresses []CommitteeAddress `json:"addresses"` } // Equal compares vs another CommitteeInfo for equality. func (c *CommitteeInfo) Equal(other *CommitteeInfo) bool { - // XXX: Why is this top-level certificate even needed? if !bytes.Equal(c.Certificate, other.Certificate) { return false } + if !bytes.Equal(c.NextCertificate, other.NextCertificate) { + return false + } + if len(c.Addresses) != len(other.Addresses) { return false } @@ -202,6 +209,11 @@ func (c *CommitteeInfo) ParseCertificate() (*x509.Certificate, error) { return x509.ParseCertificate(c.Certificate) } +// ParseCertificate returns the parsed x509 next certificate. +func (c *CommitteeInfo) ParseNextCertificate() (*x509.Certificate, error) { + return x509.ParseCertificate(c.NextCertificate) +} + // P2PInfo contains information for connecting to this node via P2P transport. type P2PInfo struct { // ID is the unique identifier of the node on the P2P transport. diff --git a/go/ias/proxy/client/client.go b/go/ias/proxy/client/client.go index a741b32d490..3e23683a8e7 100644 --- a/go/ias/proxy/client/client.go +++ b/go/ias/proxy/client/client.go @@ -119,9 +119,11 @@ func New(identity *identity.Identity, proxyAddr, tlsCertFile string) (api.Endpoi certPool := x509.NewCertPool() certPool.AddCert(parsedCert) creds := credentials.NewTLS(&tls.Config{ - Certificates: []tls.Certificate{*identity.TLSCertificate}, - RootCAs: certPool, - ServerName: proxy.CommonName, + RootCAs: certPool, + ServerName: proxy.CommonName, + GetClientCertificate: func(cri *tls.CertificateRequestInfo) (*tls.Certificate, error) { + return identity.GetTLSCertificate(), nil + }, }) conn, err := cmnGrpc.Dial( diff --git a/go/oasis-node/cmd/common/grpc/grpc.go b/go/oasis-node/cmd/common/grpc/grpc.go index 041e2ee7f61..045c598d7fd 100644 --- a/go/oasis-node/cmd/common/grpc/grpc.go +++ b/go/oasis-node/cmd/common/grpc/grpc.go @@ -14,6 +14,7 @@ import ( "google.golang.org/grpc" cmnGrpc "github.com/oasislabs/oasis-core/go/common/grpc" + "github.com/oasislabs/oasis-core/go/common/identity" "github.com/oasislabs/oasis-core/go/common/logging" "github.com/oasislabs/oasis-core/go/oasis-node/cmd/common" ) @@ -50,9 +51,10 @@ func NewServerTCP(cert *tls.Certificate, installWrapper bool) (*cmnGrpc.Server, config := &cmnGrpc.ServerConfig{ Name: "internal", Port: uint16(viper.GetInt(CfgServerPort)), - Certificate: cert, + Identity: &identity.Identity{}, InstallWrapper: installWrapper, } + config.Identity.SetTLSCertificate(cert) return cmnGrpc.NewServer(config) } diff --git a/go/oasis-node/cmd/debug/byzantine/registry.go b/go/oasis-node/cmd/debug/byzantine/registry.go index ad433d8c76e..e810e6fdac4 100644 --- a/go/oasis-node/cmd/debug/byzantine/registry.go +++ b/go/oasis-node/cmd/debug/byzantine/registry.go @@ -37,7 +37,7 @@ func registryRegisterNode(svc service.TendermintService, id *identity.Identity, var committeeAddresses []node.CommitteeAddress for _, addr := range addresses { committeeAddresses = append(committeeAddresses, node.CommitteeAddress{ - Certificate: id.TLSCertificate.Certificate[0], + Certificate: id.GetTLSCertificate().Certificate[0], Address: addr, }) } @@ -47,7 +47,7 @@ func registryRegisterNode(svc service.TendermintService, id *identity.Identity, EntityID: entityID, Expiration: 1000, Committee: node.CommitteeInfo{ - Certificate: id.TLSCertificate.Certificate[0], + Certificate: id.GetTLSCertificate().Certificate[0], Addresses: committeeAddresses, }, P2P: node.P2PInfo{ @@ -68,7 +68,7 @@ func registryRegisterNode(svc service.TendermintService, id *identity.Identity, registrationSigner, id.P2PSigner, id.ConsensusSigner, - id.TLSSigner, + id.GetTLSSigner(), }, registry.RegisterGenesisNodeSignatureContext, nodeDesc, diff --git a/go/oasis-node/cmd/debug/byzantine/steps.go b/go/oasis-node/cmd/debug/byzantine/steps.go index 166b1351bab..1aa833061a5 100644 --- a/go/oasis-node/cmd/debug/byzantine/steps.go +++ b/go/oasis-node/cmd/debug/byzantine/steps.go @@ -42,7 +42,7 @@ func initDefaultIdentity(dataDir string) (*identity.Identity, error) { if err != nil { return nil, errors.Wrap(err, "identity NewFactory") } - id, err := identity.LoadOrGenerate(dataDir, signerFactory) + id, err := identity.LoadOrGenerate(dataDir, signerFactory, false) if err != nil { return nil, errors.Wrap(err, "identity LoadOrGenerate") } diff --git a/go/oasis-node/cmd/debug/byzantine/storage.go b/go/oasis-node/cmd/debug/byzantine/storage.go index b241e3cdebf..a70213806c1 100644 --- a/go/oasis-node/cmd/debug/byzantine/storage.go +++ b/go/oasis-node/cmd/debug/byzantine/storage.go @@ -69,7 +69,7 @@ func dialNode(node *node.Node, opts grpc.DialOption) (*grpc.ClientConn, func(), } func newHonestNodeStorage(id *identity.Identity, node *node.Node) (*honestNodeStorage, error) { - opts, err := dialOptionForNode([]tls.Certificate{*id.TLSCertificate}, node) + opts, err := dialOptionForNode([]tls.Certificate{*id.GetTLSCertificate()}, node) if err != nil { return nil, errors.Wrap(err, "storage client DialOptionForNode") } diff --git a/go/oasis-node/cmd/debug/txsource/workload/registration.go b/go/oasis-node/cmd/debug/txsource/workload/registration.go index 9bdddd7651a..848762db24a 100644 --- a/go/oasis-node/cmd/debug/txsource/workload/registration.go +++ b/go/oasis-node/cmd/debug/txsource/workload/registration.go @@ -99,10 +99,10 @@ func getNodeDesc(rng *rand.Rand, nodeIdentity *identity.Identity, entityID signa Expiration: 0, Roles: availableRoles[rng.Intn(len(availableRoles))], Committee: node.CommitteeInfo{ - Certificate: nodeIdentity.TLSCertificate.Certificate[0], + Certificate: nodeIdentity.GetTLSCertificate().Certificate[0], Addresses: []node.CommitteeAddress{ { - Certificate: nodeIdentity.TLSCertificate.Certificate[0], + Certificate: nodeIdentity.GetTLSCertificate().Certificate[0], Address: nodeAddr, }, }, @@ -136,7 +136,7 @@ func signNode(identity *identity.Identity, nodeDesc *node.Node) (*node.MultiSign identity.NodeSigner, identity.P2PSigner, identity.ConsensusSigner, - identity.TLSSigner, + identity.GetTLSSigner(), } sigNode, err := node.MultiSignNode(nodeSigners, registry.RegisterNodeSignatureContext, nodeDesc) @@ -213,7 +213,7 @@ func (r *registration) Run( // nolint: gocyclo if err != nil { return fmt.Errorf("failed to create a temporary directory: %w", err) } - ident, err := identity.LoadOrGenerate(dataDir, memorySigner.NewFactory()) + ident, err := identity.LoadOrGenerate(dataDir, memorySigner.NewFactory(), false) if err != nil { return fmt.Errorf("failed generating account node identity: %w", err) } diff --git a/go/oasis-node/cmd/identity/identity.go b/go/oasis-node/cmd/identity/identity.go index bb1d814b153..983a18fa359 100644 --- a/go/oasis-node/cmd/identity/identity.go +++ b/go/oasis-node/cmd/identity/identity.go @@ -50,7 +50,7 @@ func doNodeInit(cmd *cobra.Command, args []string) { ) os.Exit(1) } - if _, err = identity.LoadOrGenerate(dataDir, nodeSignerFactory); err != nil { + if _, err = identity.LoadOrGenerate(dataDir, nodeSignerFactory, true); err != nil { logger.Error("failed to load or generate node identity", "err", err, ) diff --git a/go/oasis-node/cmd/node/node.go b/go/oasis-node/cmd/node/node.go index 7fe3eb719d8..958d1449f94 100644 --- a/go/oasis-node/cmd/node/node.go +++ b/go/oasis-node/cmd/node/node.go @@ -526,7 +526,7 @@ func newNode(testNode bool) (*Node, error) { // nolint: gocyclo ) return nil, err } - node.Identity, err = identity.LoadOrGenerate(dataDir, signerFactory) + node.Identity, err = identity.LoadOrGenerate(dataDir, signerFactory, false) if err != nil { logger.Error("failed to load/generate identity", "err", err, diff --git a/go/oasis-node/cmd/registry/node/node.go b/go/oasis-node/cmd/registry/node/node.go index 0e45b03ba9c..fca3f77e7d8 100644 --- a/go/oasis-node/cmd/registry/node/node.go +++ b/go/oasis-node/cmd/registry/node/node.go @@ -162,7 +162,7 @@ func doInit(cmd *cobra.Command, args []string) { // nolint: gocyclo ) os.Exit(1) } - nodeIdentity, err := identity.LoadOrGenerate(dataDir, nodeSignerFactory) + nodeIdentity, err := identity.LoadOrGenerate(dataDir, nodeSignerFactory, false) if err != nil { logger.Error("failed to load or generate node identity", "err", err, @@ -170,12 +170,18 @@ func doInit(cmd *cobra.Command, args []string) { // nolint: gocyclo os.Exit(1) } + var nextCert []byte + if c := nodeIdentity.GetNextTLSCertificate(); c != nil { + nextCert = c.Certificate[0] + } + n := &node.Node{ ID: nodeIdentity.NodeSigner.Public(), EntityID: entityID, Expiration: viper.GetUint64(CfgExpiration), Committee: node.CommitteeInfo{ - Certificate: nodeIdentity.TLSCertificate.Certificate[0], + Certificate: nodeIdentity.GetTLSCertificate().Certificate[0], + NextCertificate: nextCert, }, P2P: node.P2PInfo{ ID: nodeIdentity.P2PSigner.Public(), @@ -269,7 +275,7 @@ func doInit(cmd *cobra.Command, args []string) { // nolint: gocyclo signers = append(signers, []signature.Signer{ nodeIdentity.P2PSigner, nodeIdentity.ConsensusSigner, - nodeIdentity.TLSSigner, + nodeIdentity.GetTLSSigner(), }...) signed, err := node.MultiSignNode(signers, registry.RegisterGenesisNodeSignatureContext, n) diff --git a/go/oasis-node/cmd/storage/benchmark/benchmark.go b/go/oasis-node/cmd/storage/benchmark/benchmark.go index 6ef33d200d6..d82a584762c 100644 --- a/go/oasis-node/cmd/storage/benchmark/benchmark.go +++ b/go/oasis-node/cmd/storage/benchmark/benchmark.go @@ -70,7 +70,7 @@ func doBenchmark(cmd *cobra.Command, args []string) { // nolint: gocyclo } // Create an identity. - ident, err := identity.LoadOrGenerate(dataDir, memorySigner.NewFactory()) + ident, err := identity.LoadOrGenerate(dataDir, memorySigner.NewFactory(), false) if err != nil { logger.Error("failed to generate a new identity", "err", err, diff --git a/go/oasis-remote-signer/cmd/root.go b/go/oasis-remote-signer/cmd/root.go index 9a3fd24ca1b..27c1e256236 100644 --- a/go/oasis-remote-signer/cmd/root.go +++ b/go/oasis-remote-signer/cmd/root.go @@ -18,6 +18,7 @@ import ( "github.com/oasislabs/oasis-core/go/common/crypto/tls" "github.com/oasislabs/oasis-core/go/common/grpc" "github.com/oasislabs/oasis-core/go/common/grpc/auth" + "github.com/oasislabs/oasis-core/go/common/identity" "github.com/oasislabs/oasis-core/go/common/logging" "github.com/oasislabs/oasis-core/go/common/version" cmdCommon "github.com/oasislabs/oasis-core/go/oasis-node/cmd/common" @@ -170,12 +171,14 @@ func runRoot(cmd *cobra.Command, args []string) error { peerCertAuth.AllowPeerCertificate(clientCert) // Initialize the gRPC server. - svr, err := grpc.NewServer(&grpc.ServerConfig{ - Name: "remote-signer", - Port: uint16(viper.GetInt(cmdGrpc.CfgServerPort)), - Certificate: cert, - AuthFunc: peerCertAuth.AuthFunc, - }) + svrCfg := &grpc.ServerConfig{ + Name: "remote-signer", + Port: uint16(viper.GetInt(cmdGrpc.CfgServerPort)), + Identity: &identity.Identity{}, + AuthFunc: peerCertAuth.AuthFunc, + } + svrCfg.Identity.SetTLSCertificate(cert) + svr, err := grpc.NewServer(svrCfg) if err != nil { logger.Error("failed to instantiate gRPC server", "err", err, diff --git a/go/oasis-test-runner/oasis/byzantine.go b/go/oasis-test-runner/oasis/byzantine.go index e88c77b5aa1..1dc3006cb13 100644 --- a/go/oasis-test-runner/oasis/byzantine.go +++ b/go/oasis-test-runner/oasis/byzantine.go @@ -83,7 +83,7 @@ func (net *Network) NewByzantine(cfg *ByzantineCfg) (*Byzantine, error) { } // Pre-provision the node identity so that we can update the entity. - publicKey, err := net.provisionNodeIdentity(byzantineDir, cfg.IdentitySeed) + publicKey, err := net.provisionNodeIdentity(byzantineDir, cfg.IdentitySeed, false) if err != nil { return nil, errors.Wrap(err, "oasis/byzantine: failed to provision node identity") } diff --git a/go/oasis-test-runner/oasis/compute.go b/go/oasis-test-runner/oasis/compute.go index 14877f2b3a6..db5d4175270 100644 --- a/go/oasis-test-runner/oasis/compute.go +++ b/go/oasis-test-runner/oasis/compute.go @@ -121,7 +121,7 @@ func (net *Network) NewCompute(cfg *ComputeCfg) (*Compute, error) { // Pre-provision the node identity so that we can update the entity. seed := fmt.Sprintf(computeIdentitySeedTemplate, len(net.computeWorkers)) - publicKey, err := net.provisionNodeIdentity(computeDir, seed) + publicKey, err := net.provisionNodeIdentity(computeDir, seed, false) if err != nil { return nil, errors.Wrap(err, "oasis/compute: failed to provision node identity") } diff --git a/go/oasis-test-runner/oasis/keymanager.go b/go/oasis-test-runner/oasis/keymanager.go index 7082fa0dc1f..c6859e607dd 100644 --- a/go/oasis-test-runner/oasis/keymanager.go +++ b/go/oasis-test-runner/oasis/keymanager.go @@ -238,10 +238,16 @@ func (net *Network) NewKeymanager(cfg *KeymanagerCfg) (*Keymanager, error) { return nil, errors.Wrap(err, "oasis/keymanager: failed to create keymanager subdir") } + // If we're using sentry nodes, we need to have a static TLS certificate. + var persistTLS bool + if len(cfg.SentryIndices) > 0 { + persistTLS = true + } + // Pre-provision the node identity so that we can update the entity. // TODO: Use proper key manager index when multiple key managers are supported. seed := fmt.Sprintf(keymanagerIdentitySeedTemplate, 0) - publicKey, err := net.provisionNodeIdentity(kmDir, seed) + publicKey, err := net.provisionNodeIdentity(kmDir, seed, persistTLS) if err != nil { return nil, errors.Wrap(err, "oasis/keymanager: failed to provision node identity") } diff --git a/go/oasis-test-runner/oasis/oasis.go b/go/oasis-test-runner/oasis/oasis.go index 5f06cfb4bb2..80b8096ca0f 100644 --- a/go/oasis-test-runner/oasis/oasis.go +++ b/go/oasis-test-runner/oasis/oasis.go @@ -28,6 +28,7 @@ import ( "github.com/oasislabs/oasis-core/go/oasis-node/cmd/genesis" "github.com/oasislabs/oasis-core/go/oasis-test-runner/env" "github.com/oasislabs/oasis-core/go/oasis-test-runner/log" + "github.com/oasislabs/oasis-core/go/worker/registration" ) const ( @@ -624,6 +625,10 @@ func (net *Network) startOasisNode( args = append(args, baseArgs...) args = append(args, extraArgs.vec...) + if len(net.sentries) == 0 && len(net.byzantine) == 0 && net.iasProxy == nil { + args = append(args, []string{"--" + registration.CfgRegistrationRotateCerts, "1"}...) + } + w, err := node.dir.NewLogWriter(logConsoleFile) if err != nil { return err @@ -746,7 +751,7 @@ func (net *Network) BasePath() string { return net.baseDir.String() } -func (net *Network) provisionNodeIdentity(dataDir *env.Dir, seed string) (signature.PublicKey, error) { +func (net *Network) provisionNodeIdentity(dataDir *env.Dir, seed string, persistTLS bool) (signature.PublicKey, error) { if net.cfg.DeterministicIdentities { if err := net.generateDeterministicNodeIdentity(dataDir, seed); err != nil { return signature.PublicKey{}, errors.Wrap(err, "oasis: failed to generate deterministic identity") @@ -757,7 +762,7 @@ func (net *Network) provisionNodeIdentity(dataDir *env.Dir, seed string) (signat if err != nil { return signature.PublicKey{}, errors.Wrap(err, "oasis: failed to create node file signer factory") } - nodeIdentity, err := identity.LoadOrGenerate(dataDir.String(), signerFactory) + nodeIdentity, err := identity.LoadOrGenerate(dataDir.String(), signerFactory, persistTLS) if err != nil { return signature.PublicKey{}, errors.Wrap(err, "oasis: failed to provision node identity") } diff --git a/go/oasis-test-runner/oasis/seed.go b/go/oasis-test-runner/oasis/seed.go index 653c775ee28..77d235aeb99 100644 --- a/go/oasis-test-runner/oasis/seed.go +++ b/go/oasis-test-runner/oasis/seed.go @@ -57,7 +57,7 @@ func (net *Network) newSeedNode() (*seedNode, error) { if err != nil { return nil, errors.Wrap(err, "oasis/seed: failed to create seed signer factory") } - seedIdentity, err := identity.LoadOrGenerate(seedDir.String(), signerFactory) + seedIdentity, err := identity.LoadOrGenerate(seedDir.String(), signerFactory, false) if err != nil { return nil, errors.Wrap(err, "oasis/seed: failed to provision seed identity") } diff --git a/go/oasis-test-runner/oasis/sentry.go b/go/oasis-test-runner/oasis/sentry.go index d8fbc7ca2f0..c5df0d6fbc7 100644 --- a/go/oasis-test-runner/oasis/sentry.go +++ b/go/oasis-test-runner/oasis/sentry.go @@ -112,7 +112,7 @@ func (net *Network) NewSentry(cfg *SentryCfg) (*Sentry, error) { ) return nil, fmt.Errorf("oasis/sentry: failed to create sentry file signer: %w", err) } - sentryIdentity, err := identity.LoadOrGenerate(sentryDir.String(), signerFactory) + sentryIdentity, err := identity.LoadOrGenerate(sentryDir.String(), signerFactory, true) if err != nil { net.logger.Error("failed to provision sentry identity", "err", err, diff --git a/go/oasis-test-runner/oasis/storage.go b/go/oasis-test-runner/oasis/storage.go index 02c58eff6bd..cfac467b2ea 100644 --- a/go/oasis-test-runner/oasis/storage.go +++ b/go/oasis-test-runner/oasis/storage.go @@ -141,9 +141,15 @@ func (net *Network) NewStorage(cfg *StorageCfg) (*Storage, error) { return nil, errors.Wrap(err, "oasis/storage: failed to create storage subdir") } + // If we're using sentry nodes, we need to have a static TLS certificate. + var persistTLS bool + if len(cfg.SentryIndices) > 0 { + persistTLS = true + } + // Pre-provision the node identity so that we can update the entity. seed := fmt.Sprintf(storageIdentitySeedTemplate, len(net.storageWorkers)) - publicKey, err := net.provisionNodeIdentity(storageDir, seed) + publicKey, err := net.provisionNodeIdentity(storageDir, seed, persistTLS) if err != nil { return nil, errors.Wrap(err, "oasis/storage: failed to provision node identity") } diff --git a/go/oasis-test-runner/oasis/validator.go b/go/oasis-test-runner/oasis/validator.go index 4fcbeed5eb8..3b16367aff8 100644 --- a/go/oasis-test-runner/oasis/validator.go +++ b/go/oasis-test-runner/oasis/validator.go @@ -157,10 +157,16 @@ func (net *Network) NewValidator(cfg *ValidatorCfg) (*Validator, error) { consensusAddrs = append(consensusAddrs, &consensusAddr) } + // If we're using sentry nodes, we need to have a static TLS certificate. + var persistTLS bool + if len(val.sentries) > 0 { + persistTLS = true + } + // Load node's identity, so that we can pass the validator's Tendermint // address to sentry node(s) to configure it as a private peer. seed := fmt.Sprintf(validatorIdentitySeedTemplate, len(net.validators)) - valPublicKey, err := net.provisionNodeIdentity(valDir, seed) + valPublicKey, err := net.provisionNodeIdentity(valDir, seed, persistTLS) if err != nil { return nil, errors.Wrap(err, "oasis/validator: failed to provision node identity") } diff --git a/go/oasis-test-runner/scenario/e2e/registry_cli.go b/go/oasis-test-runner/scenario/e2e/registry_cli.go index bc4f7828cd8..cd045c13696 100644 --- a/go/oasis-test-runner/scenario/e2e/registry_cli.go +++ b/go/oasis-test-runner/scenario/e2e/registry_cli.go @@ -496,6 +496,7 @@ func (r *registryCLIImpl) initNode(childEnv *env.Env, ent *entity.Entity, entDir // Replace our testNode fields with the generated one, so we can just marshal both nodes and compare the output afterwards. testNode.ID = n.ID testNode.Committee.Certificate = n.Committee.Certificate + testNode.Committee.NextCertificate = n.Committee.NextCertificate testNode.P2P.ID = n.P2P.ID testNode.Consensus.ID = n.Consensus.ID for idx := range testNode.Committee.Addresses { @@ -517,6 +518,15 @@ func (r *registryCLIImpl) initNode(childEnv *env.Env, ent *entity.Entity, entDir if err != nil { return nil, err } + + // TLS certificates are regenerated each time, so replace them with new ones. + testNode.Committee.Certificate = n.Committee.Certificate + testNode.Committee.NextCertificate = n.Committee.NextCertificate + for idx := range testNode.Committee.Addresses { + testNode.Committee.Addresses[idx].Certificate = n.Committee.Certificate + } + testNodeStr, _ = json.Marshal(testNode) + nStr, _ = json.Marshal(n) if !bytes.Equal(nStr, testNodeStr) { return nil, fmt.Errorf("second run test node mismatch! Original node: %s, imported node: %s", testNodeStr, nStr) diff --git a/go/registry/api/api.go b/go/registry/api/api.go index a4d4ca705fe..a48519ea475 100644 --- a/go/registry/api/api.go +++ b/go/registry/api/api.go @@ -5,6 +5,7 @@ import ( "bytes" "context" goEd25519 "crypto/ed25519" + "crypto/x509" "encoding/json" "fmt" "sort" @@ -572,18 +573,38 @@ func VerifyRegisterNodeArgs( // nolint: gocyclo ) return nil, nil, err } - certPub, err := verifyNodeCertificate(logger, &n) + + certPub, err := verifyNodeCertificate(logger, &n, false) if err != nil { return nil, nil, err } + if !sigNode.MultiSigned.IsSignedBy(certPub) { - logger.Error("RegisterNode: not signed by TLS certificate key", - "signed_node", sigNode, - "node", n, - ) - return nil, nil, fmt.Errorf("%w: registration not signed by TLS certificate key", ErrInvalidArgument) + if n.Committee.NextCertificate != nil { + nextCertPub, grr := verifyNodeCertificate(logger, &n, true) + if grr != nil { + return nil, nil, grr + } + + if !sigNode.MultiSigned.IsSignedBy(nextCertPub) { + logger.Error("RegisterNode: not signed by any TLS certificate key", + "signed_node", sigNode, + "node", n, + ) + return nil, nil, fmt.Errorf("%w: registration not signed by any TLS certificate key", ErrInvalidArgument) + } + + expectedSigners = append(expectedSigners, nextCertPub) + } else { + logger.Error("RegisterNode: not signed by TLS certificate key", + "signed_node", sigNode, + "node", n, + ) + return nil, nil, fmt.Errorf("%w: registration not signed by TLS certificate key", ErrInvalidArgument) + } + } else { + expectedSigners = append(expectedSigners, certPub) } - expectedSigners = append(expectedSigners, certPub) // Validate P2PInfo. if !n.P2P.ID.IsValid() { @@ -682,14 +703,23 @@ func VerifyRegisterNodeArgs( // nolint: gocyclo return &n, runtimes, nil } -func verifyNodeCertificate(logger *logging.Logger, node *node.Node) (signature.PublicKey, error) { - var certPub signature.PublicKey +func verifyNodeCertificate(logger *logging.Logger, node *node.Node, useNextCert bool) (signature.PublicKey, error) { + var ( + cert *x509.Certificate + certPub signature.PublicKey + err error + ) - cert, err := node.Committee.ParseCertificate() + if useNextCert { + cert, err = node.Committee.ParseNextCertificate() + } else { + cert, err = node.Committee.ParseCertificate() + } if err != nil { logger.Error("RegisterNode: failed to parse committee certificate", "err", err, "node", node, + "use_next_cert", useNextCert, ) return certPub, fmt.Errorf("%w: failed to parse committee certificate", ErrInvalidArgument) } @@ -698,6 +728,7 @@ func verifyNodeCertificate(logger *logging.Logger, node *node.Node) (signature.P if !ok { logger.Error("RegisterNode: incorrect committee certifiate signing algorithm", "node", node, + "use_next_cert", useNextCert, ) return certPub, fmt.Errorf("%w: incorrect committee certificate signing algorithm", ErrInvalidArgument) } @@ -707,6 +738,7 @@ func verifyNodeCertificate(logger *logging.Logger, node *node.Node) (signature.P logger.Error("RegisterNode: malformed committee certificate signing key", "err", err, "node", node, + "use_next_cert", useNextCert, ) return certPub, fmt.Errorf("%w: malformed committee certificate signing key", ErrInvalidArgument) } diff --git a/go/registry/tests/tester.go b/go/registry/tests/tester.go index 852110993a3..9069f383cd9 100644 --- a/go/registry/tests/tester.go +++ b/go/registry/tests/tester.go @@ -597,12 +597,12 @@ func randomIdentity(rng *drbg.Drbg) *identity.Identity { ConsensusSigner: mustGenerateSigner(), } - var err error - ident.TLSCertificate, err = tls.Generate(identity.CommonName) + cert, err := tls.Generate(identity.CommonName) if err != nil { panic(err) } - ident.TLSSigner = memorySigner.NewFromRuntime(ident.TLSCertificate.PrivateKey.(ed25519.PrivateKey)) + ident.SetTLSCertificate(cert) + ident.SetTLSSigner(memorySigner.NewFromRuntime(cert.PrivateKey.(ed25519.PrivateKey))) return ident } @@ -628,7 +628,7 @@ func (ent *TestEntity) NewTestNodes(nCompute int, nStorage int, idNonce []byte, ent.Signer, nodeIdentity.P2PSigner, nodeIdentity.ConsensusSigner, - nodeIdentity.TLSSigner, + nodeIdentity.GetTLSSigner(), } invalidIdentity := randomIdentity(rng) @@ -660,7 +660,7 @@ func (ent *TestEntity) NewTestNodes(nCompute int, nStorage int, idNonce []byte, nod.Node.P2P.Addresses = append(nod.Node.P2P.Addresses, addr) nod.Node.Consensus.ID = nodeIdentity.ConsensusSigner.Public() // Generate dummy TLS certificate. - nod.Node.Committee.Certificate = nodeIdentity.TLSCertificate.Certificate[0] + nod.Node.Committee.Certificate = nodeIdentity.GetTLSCertificate().Certificate[0] nod.Node.Committee.Addresses = []node.CommitteeAddress{ node.CommitteeAddress{ Certificate: nod.Node.Committee.Certificate, @@ -756,7 +756,7 @@ func (ent *TestEntity) NewTestNodes(nCompute int, nStorage int, idNonce []byte, nodeIdentity.NodeSigner, ent.Signer, nodeIdentity.ConsensusSigner, - nodeIdentity.TLSSigner, + nodeIdentity.GetTLSSigner(), }, api.RegisterNodeSignatureContext, &invNode6, @@ -821,7 +821,7 @@ func (ent *TestEntity) NewTestNodes(nCompute int, nStorage int, idNonce []byte, nodeIdentity.NodeSigner, ent.Signer, nodeIdentity.P2PSigner, - nodeIdentity.TLSSigner, + nodeIdentity.GetTLSSigner(), }, api.RegisterNodeSignatureContext, &invNode10, @@ -838,14 +838,14 @@ func (ent *TestEntity) NewTestNodes(nCompute int, nStorage int, idNonce []byte, invNode11 := *nod.Node invNode11.ID = invalidIdentity.NodeSigner.Public() invNode11.Consensus.ID = invalidIdentity.ConsensusSigner.Public() - invNode11.Committee.Certificate = invalidIdentity.TLSCertificate.Certificate[0] + invNode11.Committee.Certificate = invalidIdentity.GetTLSCertificate().Certificate[0] invalid11.signed, err = node.MultiSignNode( []signature.Signer{ invalidIdentity.NodeSigner, ent.Signer, invalidIdentity.ConsensusSigner, nodeIdentity.P2PSigner, - invalidIdentity.TLSSigner, + invalidIdentity.GetTLSSigner(), }, api.RegisterNodeSignatureContext, &invNode11, @@ -862,14 +862,14 @@ func (ent *TestEntity) NewTestNodes(nCompute int, nStorage int, idNonce []byte, invNode12 := *nod.Node invNode12.ID = invalidIdentity.NodeSigner.Public() invNode12.P2P.ID = invalidIdentity.ConsensusSigner.Public() - invNode12.Committee.Certificate = invalidIdentity.TLSCertificate.Certificate[0] + invNode12.Committee.Certificate = invalidIdentity.GetTLSCertificate().Certificate[0] invalid12.signed, err = node.MultiSignNode( []signature.Signer{ invalidIdentity.NodeSigner, ent.Signer, nodeIdentity.ConsensusSigner, invalidIdentity.P2PSigner, - invalidIdentity.TLSSigner, + invalidIdentity.GetTLSSigner(), }, api.RegisterNodeSignatureContext, &invNode12, @@ -893,7 +893,7 @@ func (ent *TestEntity) NewTestNodes(nCompute int, nStorage int, idNonce []byte, ent.Signer, invalidIdentity.ConsensusSigner, invalidIdentity.P2PSigner, - nodeIdentity.TLSSigner, + nodeIdentity.GetTLSSigner(), }, api.RegisterNodeSignatureContext, &invNode13, @@ -945,14 +945,14 @@ func (ent *TestEntity) NewTestNodes(nCompute int, nStorage int, idNonce []byte, } newNode.P2P.ID = invalidIdentity.P2PSigner.Public() newNode.Consensus.ID = invalidIdentity.ConsensusSigner.Public() - newNode.Committee.Certificate = invalidIdentity.TLSCertificate.Certificate[0] + newNode.Committee.Certificate = invalidIdentity.GetTLSCertificate().Certificate[0] invalid14.signed, err = node.MultiSignNode( []signature.Signer{ nodeIdentity.NodeSigner, ent.Signer, invalidIdentity.ConsensusSigner, invalidIdentity.P2PSigner, - invalidIdentity.TLSSigner, + invalidIdentity.GetTLSSigner(), }, api.RegisterNodeSignatureContext, newNode, diff --git a/go/runtime/committee/client.go b/go/runtime/committee/client.go index ffcce37b1f1..0df88421ab4 100644 --- a/go/runtime/committee/client.go +++ b/go/runtime/committee/client.go @@ -264,9 +264,11 @@ func (cc *committeeClient) updateConnectionLocked(n *node.Node) error { RootCAs: certPool, ServerName: identity.CommonName, } - if cc.clientIdentity != nil { + if cc.clientIdentity != nil && cc.clientIdentity.GetTLSCertificate() != nil { // Configure TLS client authentication if required. - tlsCfg.Certificates = []tls.Certificate{*cc.clientIdentity.TLSCertificate} + tlsCfg.GetClientCertificate = func(cri *tls.CertificateRequestInfo) (*tls.Certificate, error) { + return cc.clientIdentity.GetTLSCertificate(), nil + } } creds := credentials.NewTLS(&tlsCfg) diff --git a/go/sentry/client/client.go b/go/sentry/client/client.go index 3287b57ed01..9e153414052 100644 --- a/go/sentry/client/client.go +++ b/go/sentry/client/client.go @@ -45,9 +45,11 @@ func (c *Client) createConnection() error { certPool := x509.NewCertPool() certPool.AddCert(c.sentryCert) creds := credentials.NewTLS(&tls.Config{ - Certificates: []tls.Certificate{*c.nodeIdentity.TLSCertificate}, - RootCAs: certPool, - ServerName: identity.CommonName, + RootCAs: certPool, + ServerName: identity.CommonName, + GetClientCertificate: func(cri *tls.CertificateRequestInfo) (*tls.Certificate, error) { + return c.nodeIdentity.GetTLSCertificate(), nil + }, }) opts := grpc.WithTransportCredentials(creds) diff --git a/go/sentry/sentry.go b/go/sentry/sentry.go index 05041b2f01c..8ba7c101d92 100644 --- a/go/sentry/sentry.go +++ b/go/sentry/sentry.go @@ -40,7 +40,7 @@ func (b *backend) GetAddresses(ctx context.Context) (*api.SentryAddresses, error var committeeAddresses []node.CommitteeAddress for _, addr := range committeeAddrs { committeeAddresses = append(committeeAddresses, node.CommitteeAddress{ - Certificate: b.identity.TLSCertificate.Certificate[0], + Certificate: b.identity.GetTLSCertificate().Certificate[0], Address: addr, }) } diff --git a/go/storage/mkvs/interop/cmd/protocol_server.go b/go/storage/mkvs/interop/cmd/protocol_server.go index 515401f1f21..a34020540e2 100644 --- a/go/storage/mkvs/interop/cmd/protocol_server.go +++ b/go/storage/mkvs/interop/cmd/protocol_server.go @@ -45,7 +45,7 @@ func doProtoServer(cmd *cobra.Command, args []string) { genesisTestHelpers.SetTestChainContext() // Generate dummy identity. - ident, err := identity.LoadOrGenerate(dataDir, memorySigner.NewFactory()) + ident, err := identity.LoadOrGenerate(dataDir, memorySigner.NewFactory(), false) if err != nil { logger.Error("failed to generate identity", "err", err, diff --git a/go/worker/common/committee/accessctl.go b/go/worker/common/committee/accessctl.go index 6d090b87b30..0022a7d1a10 100644 --- a/go/worker/common/committee/accessctl.go +++ b/go/worker/common/committee/accessctl.go @@ -29,10 +29,20 @@ func (ap AccessPolicy) AddRulesForCommittee(policy *accessctl.Policy, committee continue } + // Allow the node to perform actions from the given access policy. subject := accessctl.SubjectFromDER(node.Committee.Certificate) for _, action := range ap.Actions { policy.Allow(subject, action) } + + // Make sure to also allow the node to perform actions after it has + // rotated its TLS certificates. + if node.Committee.NextCertificate != nil { + subject := accessctl.SubjectFromDER(node.Committee.NextCertificate) + for _, action := range ap.Actions { + policy.Allow(subject, action) + } + } } } @@ -58,7 +68,15 @@ func (ap AccessPolicy) AddRulesForNodeRoles( for _, action := range ap.Actions { policy.Allow(subject, action) } - } + // Make sure to also allow the node to perform actions after is has + // rotated its TLS certificates. + if n.Committee.NextCertificate != nil { + subject := accessctl.SubjectFromDER(n.Committee.NextCertificate) + for _, action := range ap.Actions { + policy.Allow(subject, action) + } + } + } } } diff --git a/go/worker/common/worker.go b/go/worker/common/worker.go index 3b0ae738412..b370d5f4f2e 100644 --- a/go/worker/common/worker.go +++ b/go/worker/common/worker.go @@ -267,9 +267,9 @@ func New( // Create externally-accessible gRPC server. serverConfig := &grpc.ServerConfig{ - Name: "external", - Port: cfg.ClientPort, - Certificate: identity.TLSCertificate, + Name: "external", + Port: cfg.ClientPort, + Identity: identity, } grpc, err := grpc.NewServer(serverConfig) if err != nil { diff --git a/go/worker/registration/worker.go b/go/worker/registration/worker.go index fc9c161fbec..e0949f42562 100644 --- a/go/worker/registration/worker.go +++ b/go/worker/registration/worker.go @@ -40,6 +40,9 @@ const ( // CfgRegistrationForceRegister overrides a previously saved deregistration // request. CfgRegistrationForceRegister = "worker.registration.force_register" + // CfgRegistrationRotateCerts sets the number of epochs that a node's TLS + // certificate should be valid for. + CfgRegistrationRotateCerts = "worker.registration.rotate_certs" ) var ( @@ -207,6 +210,38 @@ Loop: return case epoch = <-ch: // Epoch updated, check if we can submit a registration. + + // Check if we need to rotate the node's TLS certificate. + if !w.identity.DoNotRotateTLS { + // Per how many epochs should we do rotations? + rotateTLSCertsPer := epochtime.EpochTime(viper.GetUint64(CfgRegistrationRotateCerts)) + if rotateTLSCertsPer != 0 && epoch%rotateTLSCertsPer == 0 { + baseEpoch, err := w.epochtime.GetBaseEpoch(w.ctx) + if err != nil { + w.logger.Error("failed to get base epoch, node TLS certificate rotation failed", + "new_epoch", epoch, + "err", err, + ) + } else { + // Rotate node TLS certificates (but not on the + // first epoch). + // TODO: Make this time-based instead. + if epoch != baseEpoch { + err := w.identity.RotateCertificates() + if err != nil { + w.logger.Error("node TLS certificate rotation failed", + "new_epoch", epoch, + "err", err, + ) + } else { + w.logger.Info("node TLS certificates have been rotated", + "new_epoch", epoch, + ) + } + } + } + } + } case <-w.registerCh: // Notification that a role provider has been updated. } @@ -430,9 +465,17 @@ func (w *Worker) gatherCommitteeAddresses(sentryCommitteeAddrs []node.CommitteeA } for _, addr := range addrs { committeeAddresses = append(committeeAddresses, node.CommitteeAddress{ - Certificate: w.identity.TLSCertificate.Certificate[0], + Certificate: w.identity.GetTLSCertificate().Certificate[0], Address: addr, }) + // Make sure to also include the certificate that will be valid + // in the next epoch, so that the node remains reachable. + if nextCert := w.identity.GetNextTLSCertificate(); nextCert != nil { + committeeAddresses = append(committeeAddresses, node.CommitteeAddress{ + Certificate: nextCert.Certificate[0], + Address: addr, + }) + } } } @@ -469,13 +512,19 @@ func (w *Worker) registerNode(epoch epochtime.EpochTime, hook RegisterNodeHook) "epoch", epoch, ) + var nextCert []byte + if c := w.identity.GetNextTLSCertificate(); c != nil { + nextCert = c.Certificate[0] + } + identityPublic := w.identity.NodeSigner.Public() nodeDesc := node.Node{ ID: identityPublic, EntityID: w.entityID, Expiration: uint64(epoch) + 2, Committee: node.CommitteeInfo{ - Certificate: w.identity.TLSCertificate.Certificate[0], + Certificate: w.identity.GetTLSCertificate().Certificate[0], + NextCertificate: nextCert, }, P2P: node.P2PInfo{ ID: w.identity.P2PSigner.Public(), @@ -531,7 +580,7 @@ func (w *Worker) registerNode(epoch epochtime.EpochTime, hook RegisterNodeHook) w.registrationSigner, w.identity.P2PSigner, w.identity.ConsensusSigner, - w.identity.TLSSigner, + w.identity.GetTLSSigner(), } if !w.identity.NodeSigner.Public().Equal(w.registrationSigner.Public()) { // In the case where the registration signer is the entity signer @@ -748,6 +797,10 @@ func New( } } + if viper.GetUint64(CfgRegistrationRotateCerts) != 0 && identity.DoNotRotateTLS { + return nil, fmt.Errorf("node TLS certificate rotation must not be enabled if using pre-generated TLS certificates") + } + w := &Worker{ workerCommonCfg: workerCommonCfg, store: serviceStore, @@ -831,6 +884,7 @@ func init() { Flags.String(CfgRegistrationEntity, "", "entity to use as the node owner in registrations") Flags.String(CfgDebugRegistrationPrivateKey, "", "private key to use to sign node registrations") Flags.Bool(CfgRegistrationForceRegister, false, "override a previously saved deregistration request") + Flags.Uint64(CfgRegistrationRotateCerts, 0, "rotate node TLS certificates every N epochs (0 to disable)") _ = Flags.MarkHidden(CfgDebugRegistrationPrivateKey) _ = viper.BindPFlags(Flags) diff --git a/go/worker/sentry/grpc/init.go b/go/worker/sentry/grpc/init.go index c127e1bd711..f640764482f 100644 --- a/go/worker/sentry/grpc/init.go +++ b/go/worker/sentry/grpc/init.go @@ -61,6 +61,7 @@ func initConnection(ident *identity.Identity) (*upstreamConn, error) { if err != nil { return nil, fmt.Errorf("failed to parse address: %s: %w", addr, err) } + upstreamCerts, err := configparser.ParseCertificateFiles([]string{certFile}) if err != nil { return nil, fmt.Errorf("failed to parse certificate file %s: %w", certFile, err) @@ -71,9 +72,11 @@ func initConnection(ident *identity.Identity) (*upstreamConn, error) { certPool.AddCert(cert) } creds := credentials.NewTLS(&tlsPkg.Config{ - Certificates: []tlsPkg.Certificate{*ident.TLSCertificate}, - RootCAs: certPool, - ServerName: identity.CommonName, + RootCAs: certPool, + ServerName: identity.CommonName, + GetClientCertificate: func(cri *tlsPkg.CertificateRequestInfo) (*tlsPkg.Certificate, error) { + return ident.GetTLSCertificate(), nil + }, }) // Dial node @@ -131,10 +134,10 @@ func New(identity *identity.Identity) (*Worker, error) { // Create externally-accessible proxy gRPC server. serverConfig := &cmnGrpc.ServerConfig{ - Name: "sentry-grpc", - Port: uint16(viper.GetInt(CfgClientPort)), - Certificate: identity.TLSCertificate, - AuthFunc: g.authFunction(), + Name: "sentry-grpc", + Port: uint16(viper.GetInt(CfgClientPort)), + Identity: identity, + AuthFunc: g.authFunction(), CustomOptions: []grpc.ServerOption{ // All unknown requests will be proxied to the upstream grpc server. grpc.UnknownServiceHandler(proxy.Handler(upstreamConn.conn)), diff --git a/go/worker/sentry/worker.go b/go/worker/sentry/worker.go index d6b57ac510f..0f40a4f1241 100644 --- a/go/worker/sentry/worker.go +++ b/go/worker/sentry/worker.go @@ -120,9 +120,9 @@ func New(backend api.Backend, identity *identity.Identity) (*Worker, error) { if w.enabled { grpcServer, err := grpc.NewServer(&grpc.ServerConfig{ - Name: "sentry", - Port: uint16(viper.GetInt(CfgControlPort)), - Certificate: identity.TLSCertificate, + Name: "sentry", + Port: uint16(viper.GetInt(CfgControlPort)), + Identity: identity, }) if err != nil { return nil, fmt.Errorf("worker/sentry: failed to create a new gRPC server: %w", err) From b36c31c56629863294ae3bdc95b2a69521d44f73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Buko=C5=A1ek?= Date: Fri, 28 Feb 2020 08:47:30 +0100 Subject: [PATCH 2/8] worker/sentry: Handle upstream node TLS cert rotations --- go/oasis-node/cmd/node/node.go | 8 +- go/oasis-test-runner/oasis/args.go | 23 ++-- go/oasis-test-runner/oasis/sentry.go | 3 +- go/worker/registration/worker.go | 5 +- go/worker/sentry/grpc/init.go | 161 ++++++++++++++++++++++++--- go/worker/sentry/grpc/worker.go | 74 +++++++++++- go/worker/sentry/worker.go | 25 ++++- 7 files changed, 261 insertions(+), 38 deletions(-) diff --git a/go/oasis-node/cmd/node/node.go b/go/oasis-node/cmd/node/node.go index 958d1449f94..4560a9b2c5b 100644 --- a/go/oasis-node/cmd/node/node.go +++ b/go/oasis-node/cmd/node/node.go @@ -372,13 +372,13 @@ func (n *Node) startWorkers(logger *logging.Logger) error { return err } - // Start the sentry worker. - if err := n.SentryWorker.Start(); err != nil { + // Start the worker registration service. + if err := n.RegistrationWorker.Start(); err != nil { return err } - // Start the worker registration service. - if err := n.RegistrationWorker.Start(); err != nil { + // Start the sentry worker. + if err := n.SentryWorker.Start(); err != nil { return err } diff --git a/go/oasis-test-runner/oasis/args.go b/go/oasis-test-runner/oasis/args.go index a3d724dcfae..0872cfea814 100644 --- a/go/oasis-test-runner/oasis/args.go +++ b/go/oasis-test-runner/oasis/args.go @@ -307,10 +307,10 @@ func (args *argBuilder) grpcSentryUpstreamAddresses(addrs []string) *argBuilder return args } -func (args *argBuilder) grpcSentryUpstreamCertFiles(certFiles []string) *argBuilder { - for _, certFile := range certFiles { +func (args *argBuilder) grpcSentryUpstreamIDs(ids []string) *argBuilder { + for _, id := range ids { args.vec = append(args.vec, []string{ - "--" + workerGrpcSentry.CfgUpstreamCert, certFile, + "--" + workerGrpcSentry.CfgUpstreamID, id, }...) } return args @@ -374,31 +374,32 @@ func (args *argBuilder) addSentries(sentries []*Sentry) *argBuilder { } func (args *argBuilder) addValidatorsAsSentryUpstreams(validators []*Validator) *argBuilder { - var addrs []string + var addrs, ids []string for _, val := range validators { addrs = append(addrs, fmt.Sprintf("%s@127.0.0.1:%d", val.tmAddress, val.consensusPort)) + ids = append(ids, val.NodeID.String()) } - return args.tendermintSentryUpstreamAddress(addrs) + return args.tendermintSentryUpstreamAddress(addrs).grpcSentryUpstreamIDs(ids) } func (args *argBuilder) addSentryStorageWorkers(storageWorkers []*Storage) *argBuilder { - var addrs, certFiles, tmAddrs []string + var addrs, ids, tmAddrs []string for _, storageWorker := range storageWorkers { addrs = append(addrs, fmt.Sprintf("127.0.0.1:%d", storageWorker.clientPort)) - certFiles = append(certFiles, storageWorker.TLSCertPath()) + ids = append(ids, storageWorker.NodeID.String()) tmAddrs = append(tmAddrs, fmt.Sprintf("%s@127.0.0.1:%d", storageWorker.tmAddress, storageWorker.consensusPort)) } - return args.grpcSentryUpstreamAddresses(addrs).grpcSentryUpstreamCertFiles(certFiles).tendermintSentryUpstreamAddress(tmAddrs) + return args.grpcSentryUpstreamAddresses(addrs).grpcSentryUpstreamIDs(ids).tendermintSentryUpstreamAddress(tmAddrs) } func (args *argBuilder) addSentryKeymanagerWorkers(keymanagerWorkers []*Keymanager) *argBuilder { - var addrs, certFiles, tmAddrs []string + var addrs, ids, tmAddrs []string for _, keymanager := range keymanagerWorkers { addrs = append(addrs, fmt.Sprintf("127.0.0.1:%d", keymanager.workerClientPort)) - certFiles = append(certFiles, keymanager.TLSCertPath()) + ids = append(ids, keymanager.NodeID.String()) tmAddrs = append(tmAddrs, fmt.Sprintf("%s@127.0.0.1:%d", keymanager.tmAddress, keymanager.consensusPort)) } - return args.grpcSentryUpstreamAddresses(addrs).grpcSentryUpstreamCertFiles(certFiles).tendermintSentryUpstreamAddress(tmAddrs) + return args.grpcSentryUpstreamAddresses(addrs).grpcSentryUpstreamIDs(ids).tendermintSentryUpstreamAddress(tmAddrs) } func (args *argBuilder) appendSeedNodes(net *Network) *argBuilder { diff --git a/go/oasis-test-runner/oasis/sentry.go b/go/oasis-test-runner/oasis/sentry.go index c5df0d6fbc7..16d9b8d5b56 100644 --- a/go/oasis-test-runner/oasis/sentry.go +++ b/go/oasis-test-runner/oasis/sentry.go @@ -61,7 +61,8 @@ func (sentry *Sentry) startNode() error { workerSentryControlPort(sentry.controlPort). tendermintCoreListenAddress(sentry.consensusPort). appendNetwork(sentry.net). - appendSeedNodes(sentry.net) + appendSeedNodes(sentry.net). + internalSocketAddress(sentry.net.validators[0].SocketPath()) if len(validators) > 0 { args = args.addValidatorsAsSentryUpstreams(validators) diff --git a/go/worker/registration/worker.go b/go/worker/registration/worker.go index e0949f42562..03b840f2100 100644 --- a/go/worker/registration/worker.go +++ b/go/worker/registration/worker.go @@ -508,8 +508,10 @@ func (w *Worker) gatherCommitteeAddresses(sentryCommitteeAddrs []node.CommitteeA } func (w *Worker) registerNode(epoch epochtime.EpochTime, hook RegisterNodeHook) error { + identityPublic := w.identity.NodeSigner.Public() w.logger.Info("performing node (re-)registration", "epoch", epoch, + "node_id", identityPublic.String(), ) var nextCert []byte @@ -517,7 +519,6 @@ func (w *Worker) registerNode(epoch epochtime.EpochTime, hook RegisterNodeHook) nextCert = c.Certificate[0] } - identityPublic := w.identity.NodeSigner.Public() nodeDesc := node.Node{ ID: identityPublic, EntityID: w.entityID, @@ -631,7 +632,7 @@ func (w *Worker) querySentries() ([]node.ConsensusAddress, []node.CommitteeAddre // Query sentry node for addresses. sentryAddresses, err := client.GetAddresses(w.ctx) if err != nil { - w.logger.Warn("failed to obtain addressesfrom sentry node", + w.logger.Warn("failed to obtain addresses from sentry node", "err", err, "sentry_address", sentryAddr, ) diff --git a/go/worker/sentry/grpc/init.go b/go/worker/sentry/grpc/init.go index f640764482f..149158c633e 100644 --- a/go/worker/sentry/grpc/init.go +++ b/go/worker/sentry/grpc/init.go @@ -6,13 +6,16 @@ import ( tlsPkg "crypto/tls" "crypto/x509" "fmt" + "time" + "github.com/cenkalti/backoff/v4" flag "github.com/spf13/pflag" "github.com/spf13/viper" "google.golang.org/grpc" "google.golang.org/grpc/credentials" "google.golang.org/grpc/resolver" + "github.com/oasislabs/oasis-core/go/common/crypto/signature" cmnGrpc "github.com/oasislabs/oasis-core/go/common/grpc" "github.com/oasislabs/oasis-core/go/common/grpc/policy" "github.com/oasislabs/oasis-core/go/common/grpc/proxy" @@ -20,6 +23,9 @@ import ( "github.com/oasislabs/oasis-core/go/common/identity" "github.com/oasislabs/oasis-core/go/common/logging" "github.com/oasislabs/oasis-core/go/common/node" + genesis "github.com/oasislabs/oasis-core/go/genesis/file" + cmdGrpc "github.com/oasislabs/oasis-core/go/oasis-node/cmd/common/grpc" + registry "github.com/oasislabs/oasis-core/go/registry/api" "github.com/oasislabs/oasis-core/go/worker/common/configparser" ) @@ -29,14 +35,16 @@ const ( // CfgUpstreamAddress is the grpc address of the upstream node. CfgUpstreamAddress = "worker.sentry.grpc.upstream.address" - - // CfgUpstreamCert is the path to the certificate files of the upstream node. - CfgUpstreamCert = "worker.sentry.grpc.upstream.cert" + // CfgUpstreamID is the node ID of the upstream node. + CfgUpstreamID = "worker.sentry.grpc.upstream.id" // CfgClientAddresses are addresses on which the gRPC endpoint is reachable. CfgClientAddresses = "worker.sentry.grpc.client.address" // CfgClientPort is the sentry node's client port. CfgClientPort = "worker.sentry.grpc.client.port" + + maxRetries = 3 + retryInterval = 1 * time.Second ) // Flags has the configuration flags. @@ -51,25 +59,146 @@ func GetNodeAddresses() ([]node.Address, error) { return clientAddresses, nil } -func initConnection(ident *identity.Identity) (*upstreamConn, error) { +func initConnection(ctx context.Context, logger *logging.Logger, ident *identity.Identity) (*upstreamConn, error) { var err error addr := viper.GetString(CfgUpstreamAddress) - certFile := viper.GetString(CfgUpstreamCert) upstreamAddrs, err := configparser.ParseAddressList([]string{addr}) if err != nil { return nil, fmt.Errorf("failed to parse address: %s: %w", addr, err) } - upstreamCerts, err := configparser.ParseCertificateFiles([]string{certFile}) + upstreamNodeIDRaw := viper.GetString(CfgUpstreamID) + var upstreamNodeID signature.PublicKey + err = upstreamNodeID.UnmarshalText([]byte(upstreamNodeIDRaw)) if err != nil { - return nil, fmt.Errorf("failed to parse certificate file %s: %w", certFile, err) + return nil, fmt.Errorf("malformed upstream node ID: %s: %w", upstreamNodeIDRaw, err) + } + + logger.Info("upstream node ID is valid", + "upstream_node_id", upstreamNodeIDRaw, + ) + + // Get upstream node's certificates from registry. + regAddr := viper.GetString(cmdGrpc.CfgAddress) + regConn, err := cmnGrpc.Dial(regAddr, []grpc.DialOption{ + grpc.WithInsecure(), + grpc.WithDefaultCallOptions(grpc.WaitForReady(true)), + }...) + if err != nil { + return nil, fmt.Errorf("failed to create registry gRPC client: %w", err) + } + regClient := registry.NewRegistryClient(regConn) + + var upstreamNode *node.Node + var numRetries uint + + retryGetNode := func() error { + var grr error + upstreamNode, grr = regClient.GetNode(ctx, ®istry.IDQuery{ + Height: 0, + ID: upstreamNodeID, + }) + + if numRetries < maxRetries { + numRetries++ + return grr + } + return backoff.Permanent(grr) + } + retryGetNodeSchedule := backoff.NewConstantBackOff(retryInterval) + err = backoff.Retry(retryGetNode, backoff.WithContext(retryGetNodeSchedule, ctx)) + if err != nil { + // Failed to get node from registry, try the genesis doc instead. + logger.Warn("unable to get upstream node from registry, falling back to genesis", + "err", err, + ) + + genesisProvider, grr := genesis.DefaultFileProvider() + if grr != nil { + return nil, fmt.Errorf("failed to get upstream node from both registry and genesis, failure loading genesis file: %w", grr) + } + + genDoc, grr := genesisProvider.GetGenesisDocument() + if grr != nil { + return nil, fmt.Errorf("failed to get upstream node from both registry and genesis, failure getting genesis document: %w", grr) + } + + genNodes := genDoc.Registry.Nodes + upstreamNode = nil + for _, sn := range genNodes { + var n node.Node + if grr := sn.Open(registry.RegisterGenesisNodeSignatureContext, &n); grr != nil { + return nil, fmt.Errorf("failed to open signed node: %w", grr) + } + if n.ID.Equal(upstreamNodeID) { + upstreamNode = &n + break + } + } + } + + if upstreamNode == nil { + // Wait for the node to register. + regCh, regSub, regErr := regClient.WatchNodes(ctx) + if regErr != nil { + return nil, fmt.Errorf("unable to watch for nodes: %w", regErr) + } + defer regSub.Close() + logger.Info("waiting for upstream node to register...", + "upstream_node_id", upstreamNodeID.String(), + ) + for { + select { + case nodeEvent, ok := <-regCh: + if !ok { + return nil, fmt.Errorf("WatchNodes channel closed while waiting for upstream node to register") + } + + if nodeEvent.Node.ID.Equal(upstreamNodeID) { + // Our upstream node finally registered! + upstreamNode = nodeEvent.Node + break + } + case <-ctx.Done(): + return nil, fmt.Errorf("context aborted") + } + + if upstreamNode != nil { + break + } + } } + if upstreamNode == nil { + return nil, fmt.Errorf("upstream node not found in registry nor genesis document, nor did it register") + } + + upstreamCerts := [][]byte{} + if upstreamNode.Committee.Certificate != nil { + upstreamCerts = append(upstreamCerts, upstreamNode.Committee.Certificate) + } + if upstreamNode.Committee.NextCertificate != nil { + upstreamCerts = append(upstreamCerts, upstreamNode.Committee.NextCertificate) + } + if len(upstreamCerts) == 0 { + return nil, fmt.Errorf("upstream node has no defined TLS certificates") + } + + logger.Info("found certificates for upstream node", + "num_certs", len(upstreamCerts), + ) + certPool := x509.NewCertPool() for _, cert := range upstreamCerts { - certPool.AddCert(cert) + // Parse cert and add it to the pool. + parsedCert, grr := x509.ParseCertificate(cert) + if grr != nil { + // This should never happen. + return nil, fmt.Errorf("unable to parse certificate: %w", grr) + } + certPool.AddCert(parsedCert) } creds := credentials.NewTLS(&tlsPkg.Config{ RootCAs: certPool, @@ -98,8 +227,11 @@ func initConnection(ident *identity.Identity) (*upstreamConn, error) { manualResolver.UpdateState(resolverState) return &upstreamConn{ - conn: conn, - resolverCleanupCb: cleanupCb, + nodeID: upstreamNodeID, + certs: upstreamCerts, + conn: conn, + registryClientConn: regConn, + resolverCleanupCb: cleanupCb, }, nil } @@ -126,7 +258,7 @@ func New(identity *identity.Identity) (*Worker, error) { if g.enabled { logger.Info("Initializing gRPC sentry worker") - upstreamConn, err := initConnection(identity) + upstreamConn, err := initConnection(g.ctx, logger, identity) if err != nil { return nil, fmt.Errorf("gRPC sentry worker initializing upstream connection failure: %w", err) } @@ -143,11 +275,11 @@ func New(identity *identity.Identity) (*Worker, error) { grpc.UnknownServiceHandler(proxy.Handler(upstreamConn.conn)), }, } - grpc, err := cmnGrpc.NewServer(serverConfig) + grpcServer, err := cmnGrpc.NewServer(serverConfig) if err != nil { return nil, err } - g.grpc = grpc + g.grpc = grpcServer } return g, nil @@ -156,9 +288,10 @@ func New(identity *identity.Identity) (*Worker, error) { func init() { Flags.Bool(CfgEnabled, false, "Enable Sentry gRPC worker (NOTE: This should only be enabled on gRPC Sentry nodes.)") Flags.String(CfgUpstreamAddress, "", "Address of the upstream node") - Flags.String(CfgUpstreamCert, "", "Path to tls certificate of the upstream node") + Flags.String(CfgUpstreamID, "", "ID of the upstream node") Flags.StringSlice(CfgClientAddresses, []string{}, "Address/port(s) to use for client connections for accessing this node") Flags.Uint16(CfgClientPort, 9100, "Port to use for incoming gRPC client connections") _ = viper.BindPFlags(Flags) + Flags.AddFlagSet(cmdGrpc.ClientFlags) } diff --git a/go/worker/sentry/grpc/worker.go b/go/worker/sentry/grpc/worker.go index 85282062cb4..48bf7721319 100644 --- a/go/worker/sentry/grpc/worker.go +++ b/go/worker/sentry/grpc/worker.go @@ -1,6 +1,7 @@ package grpc import ( + "bytes" "context" "fmt" "sync" @@ -11,6 +12,7 @@ import ( "github.com/oasislabs/oasis-core/go/common/accessctl" "github.com/oasislabs/oasis-core/go/common/cbor" + "github.com/oasislabs/oasis-core/go/common/crypto/signature" cmnGrpc "github.com/oasislabs/oasis-core/go/common/grpc" "github.com/oasislabs/oasis-core/go/common/grpc/auth" "github.com/oasislabs/oasis-core/go/common/grpc/policy" @@ -18,12 +20,13 @@ import ( "github.com/oasislabs/oasis-core/go/common/identity" "github.com/oasislabs/oasis-core/go/common/logging" "github.com/oasislabs/oasis-core/go/common/service" + registry "github.com/oasislabs/oasis-core/go/registry/api" ) var _ service.BackgroundService = (*Worker)(nil) // Worker is a gRPC sentry node worker proxying gRPC requests to upstream node. -type Worker struct { +type Worker struct { // nolint: maligned sync.RWMutex enabled bool @@ -41,14 +44,28 @@ type Worker struct { // Per service policy checkers. grpcPolicyCheckers map[cmnGrpc.ServiceName]*policy.DynamicRuntimePolicyChecker + registryClient registry.Backend + *upstreamConn grpc *cmnGrpc.Server identity *identity.Identity + + // Set to true when quitting if the master worker shouldn't quit, + // but re-init due to changed TLS certificates on the upstream node. + AmQuittingBecauseTLSCertsHaveRotated bool } type upstreamConn struct { - conn *grpc.ClientConn + // ID of the upstream node. + nodeID signature.PublicKey + // TLS certificates for the upstream node. + certs [][]byte + // Client connection to the upstream node. + conn *grpc.ClientConn + // Registry client connection. + registryClientConn *grpc.ClientConn + // Cleanup callback for the manual resolver. resolverCleanupCb func() } @@ -148,10 +165,38 @@ func (g *Worker) updatePolicies(p policyAPI.ServicePolicies) { } } +// Returns true if we need to restart. +func (g *Worker) checkUpstreamNodeTLSCerts(nodeEvent *registry.NodeEvent) bool { + if !nodeEvent.IsRegistration { + return false + } + + // Check if it's our upstream node. + if !nodeEvent.Node.ID.Equal(g.nodeID) { + return false + } + + // XXX: Not sure if certificates are guaranteed to be sorted, + // so we do this slow lookup to be sure. + var numCertMatches uint + for _, cert1 := range g.certs { + for _, addr2 := range nodeEvent.Node.Committee.Addresses { + if bytes.Equal(cert1, addr2.Certificate) { + numCertMatches++ + } + } + } + + // If the number of matching certificates differs, they were rotated, + // so a reconnect is required. + return numCertMatches != uint(len(g.certs)) +} + func (g *Worker) worker() { defer close(g.quitCh) defer (g.cancelCtx)() + // Initialize policy watcher. g.policyWatcher = policyAPI.NewPolicyWatcherClient(g.conn) ch, sub, err := g.policyWatcher.WatchPolicies(g.ctx) if err != nil { @@ -162,11 +207,34 @@ func (g *Worker) worker() { } defer sub.Close() + // Initialize registry watcher. + g.registryClient = registry.NewRegistryClient(g.registryClientConn) + regCh, regSub, regErr := g.registryClient.WatchNodes(g.ctx) + if regErr != nil { + g.logger.Error("failed to watch registry nodes", + "err", regErr, + ) + return + } + defer regSub.Close() + + // Initialization complete. close(g.initCh) - // Watch Policies. + // Watch policies and registry. for { select { + case nodeEvent, ok := <-regCh: + if !ok { + g.logger.Error("WatchNodes stream closed") + return + } + + if g.checkUpstreamNodeTLSCerts(nodeEvent) { + // Upstream node TLS certificates changed, restart is needed. + g.AmQuittingBecauseTLSCertsHaveRotated = true + return + } case p, ok := <-ch: if !ok { g.logger.Error("WatchPolicies stream closed") diff --git a/go/worker/sentry/worker.go b/go/worker/sentry/worker.go index 0f40a4f1241..5eba2a1dedc 100644 --- a/go/worker/sentry/worker.go +++ b/go/worker/sentry/worker.go @@ -139,10 +139,29 @@ func New(backend api.Backend, identity *identity.Identity) (*Worker, error) { } w.grpcWorker = sentryGrpcWorker - // Stop in case of grpc/worker quitting. go func() { - <-w.grpcWorker.Quit() - w.Stop() + for { // nolint: S1000 + // Stop in case of grpc/worker quitting. + select { + case <-w.grpcWorker.Quit(): + if !w.grpcWorker.AmQuittingBecauseTLSCertsHaveRotated { + // Worker is actually quitting. + w.Stop() + return + } + + // Upstream TLS certs have rotated, reconnect. + sgw, err := workerGrpcSentry.New(identity) + if err != nil { + w.logger.Error("worker/sentry: failed to re-init worker after upstream TLS certificate rotation", + "err", err, + ) + w.Stop() + return + } + w.grpcWorker = sgw + } + } }() return w, nil From 2493d4e3eb28cd49233c954f41167182a795047e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Buko=C5=A1ek?= Date: Thu, 5 Mar 2020 11:04:56 +0100 Subject: [PATCH 3/8] grpc/proxy: Dial upstream on-demand --- go/common/grpc/proxy/proxy.go | 34 +++++++++++++++++++++++------- go/common/grpc/proxy/proxy_test.go | 21 ++++++++++++------ go/worker/sentry/grpc/init.go | 16 +++++++++----- go/worker/sentry/grpc/worker.go | 14 ++++++++++++ 4 files changed, 65 insertions(+), 20 deletions(-) diff --git a/go/common/grpc/proxy/proxy.go b/go/common/grpc/proxy/proxy.go index 8a6d440c5f6..e497b4d6403 100644 --- a/go/common/grpc/proxy/proxy.go +++ b/go/common/grpc/proxy/proxy.go @@ -17,21 +17,29 @@ import ( "github.com/oasislabs/oasis-core/go/common/logging" ) -// Handler returns a grpc StreamHandler than can be used -// to proxy requests to provided client. -// XXX: potentially the connection should be established in this package, -// with some sensible defaults e.g. KeepAlive set. -// We might also want to establish a pool of connections to the upstream. -func Handler(conn *grpc.ClientConn) grpc.StreamHandler { +// Dialer should return a gRPC ClientConn that will be used +// to forward calls to. +type Dialer func(ctx context.Context) (*grpc.ClientConn, error) + +// Handler returns a gRPC StreamHandler than can be used +// to proxy requests to the client returned by the proxy dialer. +func Handler(dialer Dialer) grpc.StreamHandler { proxy := &proxy{ logger: logging.GetLogger("grpc/proxy"), - upstreamConn: conn, + dialer: dialer, + upstreamConn: nil, // Will be dialed on-demand. } return grpc.StreamHandler(proxy.handler) } type proxy struct { + // This is the dialer callback we use to make new connections to the + // upstream server if the connection drops, etc. + dialer Dialer + + // This is a cached client connection to the upstream server, so we + // don't have to re-dial it on every call. upstreamConn *grpc.ClientConn logger *logging.Logger @@ -66,6 +74,15 @@ func (p *proxy) handler(srv interface{}, stream grpc.ServerStream) error { // Pass subject header upstream. upstreamCtx = metadata.AppendToOutgoingContext(upstreamCtx, policy.ForwardedSubjectMD, sub) + // Dial upstream if necessary. + if p.upstreamConn == nil { + var grr error + p.upstreamConn, grr = p.dialer(stream.Context()) + if grr != nil { + return grr + } + } + upstreamStream, err := grpc.NewClientStream( upstreamCtx, desc, @@ -92,7 +109,7 @@ func (p *proxy) handler(srv interface{}, stream grpc.ServerStream) error { // can still be in progress. p.logger.Debug("downstream EOF") if err = upstreamStream.CloseSend(); err != nil { - p.logger.Error("failrue closing upstream stream", + p.logger.Error("failure closing upstream stream", "err", err, ) } @@ -154,6 +171,7 @@ func (p *proxy) proxyUpstream(downstream grpc.ServerStream, upstream grpc.Client func (p *proxy) proxyDownstream(upstream grpc.ClientStream, downstream grpc.ServerStream) <-chan error { errCh := make(chan error, 1) var headerSent bool + go func() { for { // Wait for stream msg (from upstream). diff --git a/go/common/grpc/proxy/proxy_test.go b/go/common/grpc/proxy/proxy_test.go index b2bf7798259..59a3f0677a1 100644 --- a/go/common/grpc/proxy/proxy_test.go +++ b/go/common/grpc/proxy/proxy_test.go @@ -74,15 +74,19 @@ func TestGRPCProxy(t *testing.T) { err = grpcServer.Start() require.NoErrorf(err, "Failed to start the gRPC server: %v", err) - // Connect to gRPC server. clientTLSCreds := credentials.NewTLS(&tls.Config{ Certificates: []tls.Certificate{*clientTLSCert}, RootCAs: serverCertPool, ServerName: "oasis-node", }) - address := fmt.Sprintf("%s:%d", host, port) - conn := connectToGrpcServer(ctx, t, address, clientTLSCreds) - defer conn.Close() + + // Create upstream dialer. + upstreamDialer := func(ctx context.Context) (*grpc.ClientConn, error) { + // Connect to gRPC server. + address := fmt.Sprintf("%s:%d", host, port) + conn := connectToGrpcServer(ctx, t, address, clientTLSCreds) + return conn, nil + } // Create a proxy gRPC server. proxyServerConfig := &commonGrpc.ServerConfig{ @@ -91,7 +95,7 @@ func TestGRPCProxy(t *testing.T) { Identity: &identity.Identity{}, CustomOptions: []grpc.ServerOption{ // All unknown requests will be proxied to the grpc server above. - grpc.UnknownServiceHandler(Handler(conn)), + grpc.UnknownServiceHandler(Handler(upstreamDialer)), }, } proxyServerConfig.Identity.SetTLSCertificate(serverTLSCert) @@ -102,12 +106,15 @@ func TestGRPCProxy(t *testing.T) { require.NoErrorf(err, "Failed to start the proxy gRPC server: %v", err) // Connect to the proxy grpc server. - address = fmt.Sprintf("%s:%d", host, port+1) + address := fmt.Sprintf("%s:%d", host, port+1) proxyConn := connectToGrpcServer(ctx, t, address, clientTLSCreds) defer proxyConn.Close() // Create a new ping client. - client := cmnTesting.NewPingClient(conn) + upstreamAddress := fmt.Sprintf("%s:%d", host, port) + upstreamConn := connectToGrpcServer(ctx, t, upstreamAddress, clientTLSCreds) + defer upstreamConn.Close() + client := cmnTesting.NewPingClient(upstreamConn) pingQuery := &cmnTesting.PingQuery{} // Test Ping. res, err := client.Ping(ctx, pingQuery) diff --git a/go/worker/sentry/grpc/init.go b/go/worker/sentry/grpc/init.go index 149158c633e..df145956e1a 100644 --- a/go/worker/sentry/grpc/init.go +++ b/go/worker/sentry/grpc/init.go @@ -258,11 +258,17 @@ func New(identity *identity.Identity) (*Worker, error) { if g.enabled { logger.Info("Initializing gRPC sentry worker") - upstreamConn, err := initConnection(g.ctx, logger, identity) - if err != nil { - return nil, fmt.Errorf("gRPC sentry worker initializing upstream connection failure: %w", err) + g.upstreamDialer = func(ctx context.Context) (*grpc.ClientConn, error) { + g.upstreamDialerMutex.Lock() + defer g.upstreamDialerMutex.Unlock() + + upstreamConn, err := initConnection(g.ctx, logger, identity) + if err != nil { + return nil, fmt.Errorf("gRPC sentry worker initializing upstream connection failure: %w", err) + } + g.upstreamConn = upstreamConn + return upstreamConn.conn, nil } - g.upstreamConn = upstreamConn // Create externally-accessible proxy gRPC server. serverConfig := &cmnGrpc.ServerConfig{ @@ -272,7 +278,7 @@ func New(identity *identity.Identity) (*Worker, error) { AuthFunc: g.authFunction(), CustomOptions: []grpc.ServerOption{ // All unknown requests will be proxied to the upstream grpc server. - grpc.UnknownServiceHandler(proxy.Handler(upstreamConn.conn)), + grpc.UnknownServiceHandler(proxy.Handler(g.upstreamDialer)), }, } grpcServer, err := cmnGrpc.NewServer(serverConfig) diff --git a/go/worker/sentry/grpc/worker.go b/go/worker/sentry/grpc/worker.go index 48bf7721319..8776013c70d 100644 --- a/go/worker/sentry/grpc/worker.go +++ b/go/worker/sentry/grpc/worker.go @@ -17,6 +17,7 @@ import ( "github.com/oasislabs/oasis-core/go/common/grpc/auth" "github.com/oasislabs/oasis-core/go/common/grpc/policy" policyAPI "github.com/oasislabs/oasis-core/go/common/grpc/policy/api" + grpcProxy "github.com/oasislabs/oasis-core/go/common/grpc/proxy" "github.com/oasislabs/oasis-core/go/common/identity" "github.com/oasislabs/oasis-core/go/common/logging" "github.com/oasislabs/oasis-core/go/common/service" @@ -48,6 +49,9 @@ type Worker struct { // nolint: maligned *upstreamConn + upstreamDialer grpcProxy.Dialer + upstreamDialerMutex sync.Mutex + grpc *cmnGrpc.Server identity *identity.Identity @@ -196,6 +200,16 @@ func (g *Worker) worker() { defer close(g.quitCh) defer (g.cancelCtx)() + if g.upstreamConn == nil { + _, err := g.upstreamDialer(g.ctx) + if err != nil { + g.logger.Error("failed to establish upstream connection", + "err", err, + ) + return + } + } + // Initialize policy watcher. g.policyWatcher = policyAPI.NewPolicyWatcherClient(g.conn) ch, sub, err := g.policyWatcher.WatchPolicies(g.ctx) From 6d00bf4fc9da3ef2004ea64fc68f29cdf2e42145 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Buko=C5=A1ek?= Date: Wed, 18 Mar 2020 10:11:18 +0100 Subject: [PATCH 4/8] worker/sentry: Add commands to set/get upstream node TLS certs --- .buildkite/code.pipeline.yml | 2 +- go/oasis-test-runner/oasis/keymanager.go | 8 +- go/oasis-test-runner/oasis/oasis.go | 2 +- go/oasis-test-runner/oasis/storage.go | 8 +- go/oasis-test-runner/oasis/validator.go | 8 +- go/sentry/api/api.go | 14 +++ go/sentry/api/grpc.go | 76 ++++++++++++++ go/sentry/sentry.go | 36 +++++++ go/worker/registration/worker.go | 47 +++++++++ go/worker/sentry/grpc/init.go | 126 +++-------------------- go/worker/sentry/grpc/worker.go | 82 ++++----------- go/worker/sentry/worker.go | 27 +---- 12 files changed, 217 insertions(+), 219 deletions(-) diff --git a/.buildkite/code.pipeline.yml b/.buildkite/code.pipeline.yml index d730826452f..078d07551fc 100644 --- a/.buildkite/code.pipeline.yml +++ b/.buildkite/code.pipeline.yml @@ -203,7 +203,7 @@ steps: ############### - label: E2E tests parallelism: 7 - timeout_in_minutes: 11 + timeout_in_minutes: 12 command: - .buildkite/scripts/download_e2e_test_artifacts.sh - .buildkite/scripts/test_e2e.sh diff --git a/go/oasis-test-runner/oasis/keymanager.go b/go/oasis-test-runner/oasis/keymanager.go index c6859e607dd..bd85464c417 100644 --- a/go/oasis-test-runner/oasis/keymanager.go +++ b/go/oasis-test-runner/oasis/keymanager.go @@ -238,16 +238,10 @@ func (net *Network) NewKeymanager(cfg *KeymanagerCfg) (*Keymanager, error) { return nil, errors.Wrap(err, "oasis/keymanager: failed to create keymanager subdir") } - // If we're using sentry nodes, we need to have a static TLS certificate. - var persistTLS bool - if len(cfg.SentryIndices) > 0 { - persistTLS = true - } - // Pre-provision the node identity so that we can update the entity. // TODO: Use proper key manager index when multiple key managers are supported. seed := fmt.Sprintf(keymanagerIdentitySeedTemplate, 0) - publicKey, err := net.provisionNodeIdentity(kmDir, seed, persistTLS) + publicKey, err := net.provisionNodeIdentity(kmDir, seed, false) if err != nil { return nil, errors.Wrap(err, "oasis/keymanager: failed to provision node identity") } diff --git a/go/oasis-test-runner/oasis/oasis.go b/go/oasis-test-runner/oasis/oasis.go index 80b8096ca0f..60c4762f7c6 100644 --- a/go/oasis-test-runner/oasis/oasis.go +++ b/go/oasis-test-runner/oasis/oasis.go @@ -625,7 +625,7 @@ func (net *Network) startOasisNode( args = append(args, baseArgs...) args = append(args, extraArgs.vec...) - if len(net.sentries) == 0 && len(net.byzantine) == 0 && net.iasProxy == nil { + if !strings.HasPrefix(node.Name, "sentry-") && len(net.byzantine) == 0 && net.iasProxy == nil { args = append(args, []string{"--" + registration.CfgRegistrationRotateCerts, "1"}...) } diff --git a/go/oasis-test-runner/oasis/storage.go b/go/oasis-test-runner/oasis/storage.go index cfac467b2ea..52f87f713da 100644 --- a/go/oasis-test-runner/oasis/storage.go +++ b/go/oasis-test-runner/oasis/storage.go @@ -141,15 +141,9 @@ func (net *Network) NewStorage(cfg *StorageCfg) (*Storage, error) { return nil, errors.Wrap(err, "oasis/storage: failed to create storage subdir") } - // If we're using sentry nodes, we need to have a static TLS certificate. - var persistTLS bool - if len(cfg.SentryIndices) > 0 { - persistTLS = true - } - // Pre-provision the node identity so that we can update the entity. seed := fmt.Sprintf(storageIdentitySeedTemplate, len(net.storageWorkers)) - publicKey, err := net.provisionNodeIdentity(storageDir, seed, persistTLS) + publicKey, err := net.provisionNodeIdentity(storageDir, seed, false) if err != nil { return nil, errors.Wrap(err, "oasis/storage: failed to provision node identity") } diff --git a/go/oasis-test-runner/oasis/validator.go b/go/oasis-test-runner/oasis/validator.go index 3b16367aff8..c8f43ebaefb 100644 --- a/go/oasis-test-runner/oasis/validator.go +++ b/go/oasis-test-runner/oasis/validator.go @@ -157,16 +157,10 @@ func (net *Network) NewValidator(cfg *ValidatorCfg) (*Validator, error) { consensusAddrs = append(consensusAddrs, &consensusAddr) } - // If we're using sentry nodes, we need to have a static TLS certificate. - var persistTLS bool - if len(val.sentries) > 0 { - persistTLS = true - } - // Load node's identity, so that we can pass the validator's Tendermint // address to sentry node(s) to configure it as a private peer. seed := fmt.Sprintf(validatorIdentitySeedTemplate, len(net.validators)) - valPublicKey, err := net.provisionNodeIdentity(valDir, seed, persistTLS) + valPublicKey, err := net.provisionNodeIdentity(valDir, seed, false) if err != nil { return nil, errors.Wrap(err, "oasis/validator: failed to provision node identity") } diff --git a/go/sentry/api/api.go b/go/sentry/api/api.go index f153f7529f4..2d1385960d9 100644 --- a/go/sentry/api/api.go +++ b/go/sentry/api/api.go @@ -3,6 +3,7 @@ package api import ( "context" + "crypto/tls" "github.com/oasislabs/oasis-core/go/common/node" ) @@ -13,9 +14,22 @@ type SentryAddresses struct { Committee []node.CommitteeAddress } +// UpstreamTLSCertificates contains the upstream TLS certificates. +type UpstreamTLSCertificates struct { + Certificate *tls.Certificate + NextCertificate *tls.Certificate +} + // Backend is a sentry backend implementation. type Backend interface { // Get addresses returns the list of consensus and committee addresses of // the sentry node. GetAddresses(context.Context) (*SentryAddresses, error) + + // SetUpstreamTLSCertificates notifies the sentry node of the new + // TLS certificates used by its upstream node. + SetUpstreamTLSCertificates(context.Context, *tls.Certificate, *tls.Certificate) error + + // GetUpstreamTLSCertificates returns the TLS certificates of the sentry node's upstream node. + GetUpstreamTLSCertificates(context.Context) (*UpstreamTLSCertificates, error) } diff --git a/go/sentry/api/grpc.go b/go/sentry/api/grpc.go index ce0a3dcebbc..261d4c08f92 100644 --- a/go/sentry/api/grpc.go +++ b/go/sentry/api/grpc.go @@ -2,6 +2,7 @@ package api import ( "context" + "crypto/tls" "google.golang.org/grpc" @@ -15,6 +16,12 @@ var ( // methodGetAddresses is the GetAddresses method. methodGetAddresses = serviceName.NewMethod("GetAddresses", nil) + // methodSetUpstreamTLSCertificates is the SetUpstreamTLSCertificates method. + methodSetUpstreamTLSCertificates = serviceName.NewMethod("SetUpstreamTLSCertificates", UpstreamTLSCertificates{}) + + // methodGetUpstreamTLSCertificates is the GetUpstreamTLSCertificates method. + methodGetUpstreamTLSCertificates = serviceName.NewMethod("GetUpstreamTLSCertificates", nil) + // serviceDesc is the gRPC service descriptor. serviceDesc = grpc.ServiceDesc{ ServiceName: string(serviceName), @@ -24,6 +31,14 @@ var ( MethodName: methodGetAddresses.ShortName(), Handler: handlerGetAddresses, }, + { + MethodName: methodSetUpstreamTLSCertificates.ShortName(), + Handler: handlerSetUpstreamTLSCertificates, + }, + { + MethodName: methodGetUpstreamTLSCertificates.ShortName(), + Handler: handlerGetUpstreamTLSCertificates, + }, }, Streams: []grpc.StreamDesc{}, } @@ -48,6 +63,48 @@ func handlerGetAddresses( // nolint: golint return interceptor(ctx, nil, info, handler) } +func handlerSetUpstreamTLSCertificates( // nolint: golint + srv interface{}, + ctx context.Context, + dec func(interface{}) error, + interceptor grpc.UnaryServerInterceptor, +) (interface{}, error) { + var req UpstreamTLSCertificates + if err := dec(&req); err != nil { + return nil, err + } + if interceptor == nil { + return nil, srv.(Backend).SetUpstreamTLSCertificates(ctx, req.Certificate, req.NextCertificate) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: methodSetUpstreamTLSCertificates.FullName(), + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return nil, srv.(Backend).SetUpstreamTLSCertificates(ctx, req.(*UpstreamTLSCertificates).Certificate, req.(*UpstreamTLSCertificates).NextCertificate) + } + return interceptor(ctx, &req, info, handler) +} + +func handlerGetUpstreamTLSCertificates( // nolint: golint + srv interface{}, + ctx context.Context, + dec func(interface{}) error, + interceptor grpc.UnaryServerInterceptor, +) (interface{}, error) { + if interceptor == nil { + return srv.(Backend).GetUpstreamTLSCertificates(ctx) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: methodGetUpstreamTLSCertificates.FullName(), + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(Backend).GetUpstreamTLSCertificates(ctx) + } + return interceptor(ctx, nil, info, handler) +} + // RegisterService registers a new sentry service with the given gRPC server. func RegisterService(server *grpc.Server, service Backend) { server.RegisterService(&serviceDesc, service) @@ -65,6 +122,25 @@ func (c *sentryClient) GetAddresses(ctx context.Context) (*SentryAddresses, erro return &rsp, nil } +func (c *sentryClient) SetUpstreamTLSCertificates(ctx context.Context, cert *tls.Certificate, certNext *tls.Certificate) error { + req := &UpstreamTLSCertificates{ + Certificate: cert, + NextCertificate: certNext, + } + if err := c.conn.Invoke(ctx, methodSetUpstreamTLSCertificates.FullName(), req, nil); err != nil { + return err + } + return nil +} + +func (c *sentryClient) GetUpstreamTLSCertificates(ctx context.Context) (*UpstreamTLSCertificates, error) { + var rsp UpstreamTLSCertificates + if err := c.conn.Invoke(ctx, methodGetUpstreamTLSCertificates.FullName(), nil, &rsp); err != nil { + return nil, err + } + return &rsp, nil +} + // NewSentryClient creates a new gRPC sentry client service. func NewSentryClient(c *grpc.ClientConn) Backend { return &sentryClient{c} diff --git a/go/sentry/sentry.go b/go/sentry/sentry.go index 8ba7c101d92..75db22dc346 100644 --- a/go/sentry/sentry.go +++ b/go/sentry/sentry.go @@ -3,7 +3,9 @@ package sentry import ( "context" + "crypto/tls" "fmt" + "sync" "github.com/oasislabs/oasis-core/go/common/identity" "github.com/oasislabs/oasis-core/go/common/logging" @@ -16,10 +18,15 @@ import ( var _ api.Backend = (*backend)(nil) type backend struct { + sync.RWMutex + logger *logging.Logger consensus consensus.Backend identity *identity.Identity + + upstreamTLSCertificate *tls.Certificate + upstreamTLSCertificateNext *tls.Certificate } func (b *backend) GetAddresses(ctx context.Context) (*api.SentryAddresses, error) { @@ -38,11 +45,20 @@ func (b *backend) GetAddresses(ctx context.Context) (*api.SentryAddresses, error return nil, fmt.Errorf("sentry: error obtaining sentry worker addresses: %w", err) } var committeeAddresses []node.CommitteeAddress + for _, addr := range committeeAddrs { committeeAddresses = append(committeeAddresses, node.CommitteeAddress{ Certificate: b.identity.GetTLSCertificate().Certificate[0], Address: addr, }) + // Make sure to also include the certificate that will be valid + // in the next epoch, so that the node remains reachable. + if nextCert := b.identity.GetNextTLSCertificate(); nextCert != nil { + committeeAddresses = append(committeeAddresses, node.CommitteeAddress{ + Certificate: nextCert.Certificate[0], + Address: addr, + }) + } } return &api.SentryAddresses{ @@ -51,6 +67,26 @@ func (b *backend) GetAddresses(ctx context.Context) (*api.SentryAddresses, error }, nil } +func (b *backend) SetUpstreamTLSCertificates(ctx context.Context, cert *tls.Certificate, certNext *tls.Certificate) error { + b.Lock() + defer b.Unlock() + + b.upstreamTLSCertificate = cert + b.upstreamTLSCertificateNext = certNext + + return nil +} + +func (b *backend) GetUpstreamTLSCertificates(ctx context.Context) (*api.UpstreamTLSCertificates, error) { + b.RLock() + defer b.RUnlock() + + return &api.UpstreamTLSCertificates{ + Certificate: b.upstreamTLSCertificate, + NextCertificate: b.upstreamTLSCertificateNext, + }, nil +} + // New constructs a new sentry Backend instance. func New( consensusBackend consensus.Backend, diff --git a/go/worker/registration/worker.go b/go/worker/registration/worker.go index 03b840f2100..dfa0f1c8f68 100644 --- a/go/worker/registration/worker.go +++ b/go/worker/registration/worker.go @@ -6,6 +6,7 @@ import ( "fmt" "sync" "sync/atomic" + "time" "github.com/cenkalti/backoff/v4" "github.com/pkg/errors" @@ -146,6 +147,43 @@ func DebugForceAllowUnroutableAddresses() { } func (w *Worker) registrationLoop() { // nolint: gocyclo + // If we have any sentry nodes, let them know about our TLS certs. + sentryAddrs := w.sentryAddresses + sentryCerts := w.sentryCerts + if len(sentryAddrs) > 0 { + for i, sentryAddr := range sentryAddrs { + var numRetries uint + + pushCerts := func() error { + client, err := sentryClient.New(&sentryAddr, sentryCerts[i], w.identity) + if err != nil { + if numRetries < 60 { + numRetries++ + return err + } + return backoff.Permanent(err) + } + defer client.Close() + + err = client.SetUpstreamTLSCertificates(w.ctx, w.identity.GetTLSCertificate(), w.identity.GetNextTLSCertificate()) + if err != nil { + return err + } + + return nil + } + + sched := backoff.NewConstantBackOff(1 * time.Second) + err := backoff.Retry(pushCerts, backoff.WithContext(sched, w.ctx)) + if err != nil { + w.logger.Error("unable to push upstream TLS certificates to sentry node!", + "err", err, + "sentry_address", sentryAddr, + ) + } + } + } + // Delay node registration till after the consensus service has // finished initial synchronization if applicable. if w.consensus != nil { @@ -638,6 +676,15 @@ func (w *Worker) querySentries() ([]node.ConsensusAddress, []node.CommitteeAddre ) } + // Keep sentries updated with our latest TLS certificates. + err = client.SetUpstreamTLSCertificates(w.ctx, w.identity.GetTLSCertificate(), w.identity.GetNextTLSCertificate()) + if err != nil { + w.logger.Warn("failed to provide upstream TLS certificates to sentry node", + "err", err, + "sentry_address", sentryAddr, + ) + } + consensusAddrs = append(consensusAddrs, sentryAddresses.Consensus...) committeeAddrs = append(committeeAddrs, sentryAddresses.Committee...) } diff --git a/go/worker/sentry/grpc/init.go b/go/worker/sentry/grpc/init.go index df145956e1a..830c37c781e 100644 --- a/go/worker/sentry/grpc/init.go +++ b/go/worker/sentry/grpc/init.go @@ -6,9 +6,7 @@ import ( tlsPkg "crypto/tls" "crypto/x509" "fmt" - "time" - "github.com/cenkalti/backoff/v4" flag "github.com/spf13/pflag" "github.com/spf13/viper" "google.golang.org/grpc" @@ -23,9 +21,8 @@ import ( "github.com/oasislabs/oasis-core/go/common/identity" "github.com/oasislabs/oasis-core/go/common/logging" "github.com/oasislabs/oasis-core/go/common/node" - genesis "github.com/oasislabs/oasis-core/go/genesis/file" cmdGrpc "github.com/oasislabs/oasis-core/go/oasis-node/cmd/common/grpc" - registry "github.com/oasislabs/oasis-core/go/registry/api" + sentry "github.com/oasislabs/oasis-core/go/sentry/api" "github.com/oasislabs/oasis-core/go/worker/common/configparser" ) @@ -42,9 +39,6 @@ const ( CfgClientAddresses = "worker.sentry.grpc.client.address" // CfgClientPort is the sentry node's client port. CfgClientPort = "worker.sentry.grpc.client.port" - - maxRetries = 3 - retryInterval = 1 * time.Second ) // Flags has the configuration flags. @@ -59,7 +53,7 @@ func GetNodeAddresses() ([]node.Address, error) { return clientAddresses, nil } -func initConnection(ctx context.Context, logger *logging.Logger, ident *identity.Identity) (*upstreamConn, error) { +func initConnection(ctx context.Context, logger *logging.Logger, ident *identity.Identity, backend sentry.Backend) (*upstreamConn, error) { var err error addr := viper.GetString(CfgUpstreamAddress) @@ -80,107 +74,18 @@ func initConnection(ctx context.Context, logger *logging.Logger, ident *identity "upstream_node_id", upstreamNodeIDRaw, ) - // Get upstream node's certificates from registry. - regAddr := viper.GetString(cmdGrpc.CfgAddress) - regConn, err := cmnGrpc.Dial(regAddr, []grpc.DialOption{ - grpc.WithInsecure(), - grpc.WithDefaultCallOptions(grpc.WaitForReady(true)), - }...) + // Get upstream node's certificates. + certs, err := backend.GetUpstreamTLSCertificates(ctx) if err != nil { - return nil, fmt.Errorf("failed to create registry gRPC client: %w", err) - } - regClient := registry.NewRegistryClient(regConn) - - var upstreamNode *node.Node - var numRetries uint - - retryGetNode := func() error { - var grr error - upstreamNode, grr = regClient.GetNode(ctx, ®istry.IDQuery{ - Height: 0, - ID: upstreamNodeID, - }) - - if numRetries < maxRetries { - numRetries++ - return grr - } - return backoff.Permanent(grr) - } - retryGetNodeSchedule := backoff.NewConstantBackOff(retryInterval) - err = backoff.Retry(retryGetNode, backoff.WithContext(retryGetNodeSchedule, ctx)) - if err != nil { - // Failed to get node from registry, try the genesis doc instead. - logger.Warn("unable to get upstream node from registry, falling back to genesis", - "err", err, - ) - - genesisProvider, grr := genesis.DefaultFileProvider() - if grr != nil { - return nil, fmt.Errorf("failed to get upstream node from both registry and genesis, failure loading genesis file: %w", grr) - } - - genDoc, grr := genesisProvider.GetGenesisDocument() - if grr != nil { - return nil, fmt.Errorf("failed to get upstream node from both registry and genesis, failure getting genesis document: %w", grr) - } - - genNodes := genDoc.Registry.Nodes - upstreamNode = nil - for _, sn := range genNodes { - var n node.Node - if grr := sn.Open(registry.RegisterGenesisNodeSignatureContext, &n); grr != nil { - return nil, fmt.Errorf("failed to open signed node: %w", grr) - } - if n.ID.Equal(upstreamNodeID) { - upstreamNode = &n - break - } - } - } - - if upstreamNode == nil { - // Wait for the node to register. - regCh, regSub, regErr := regClient.WatchNodes(ctx) - if regErr != nil { - return nil, fmt.Errorf("unable to watch for nodes: %w", regErr) - } - defer regSub.Close() - logger.Info("waiting for upstream node to register...", - "upstream_node_id", upstreamNodeID.String(), - ) - for { - select { - case nodeEvent, ok := <-regCh: - if !ok { - return nil, fmt.Errorf("WatchNodes channel closed while waiting for upstream node to register") - } - - if nodeEvent.Node.ID.Equal(upstreamNodeID) { - // Our upstream node finally registered! - upstreamNode = nodeEvent.Node - break - } - case <-ctx.Done(): - return nil, fmt.Errorf("context aborted") - } - - if upstreamNode != nil { - break - } - } - } - - if upstreamNode == nil { - return nil, fmt.Errorf("upstream node not found in registry nor genesis document, nor did it register") + return nil, fmt.Errorf("failed to get upstream node's TLS certificates: %w", err) } upstreamCerts := [][]byte{} - if upstreamNode.Committee.Certificate != nil { - upstreamCerts = append(upstreamCerts, upstreamNode.Committee.Certificate) + if certs.Certificate != nil { + upstreamCerts = append(upstreamCerts, certs.Certificate.Certificate[0]) } - if upstreamNode.Committee.NextCertificate != nil { - upstreamCerts = append(upstreamCerts, upstreamNode.Committee.NextCertificate) + if certs.NextCertificate != nil { + upstreamCerts = append(upstreamCerts, certs.NextCertificate.Certificate[0]) } if len(upstreamCerts) == 0 { return nil, fmt.Errorf("upstream node has no defined TLS certificates") @@ -227,16 +132,15 @@ func initConnection(ctx context.Context, logger *logging.Logger, ident *identity manualResolver.UpdateState(resolverState) return &upstreamConn{ - nodeID: upstreamNodeID, - certs: upstreamCerts, - conn: conn, - registryClientConn: regConn, - resolverCleanupCb: cleanupCb, + nodeID: upstreamNodeID, + certs: upstreamCerts, + conn: conn, + resolverCleanupCb: cleanupCb, }, nil } // New creates a new sentry grpc worker. -func New(identity *identity.Identity) (*Worker, error) { +func New(backend sentry.Backend, identity *identity.Identity) (*Worker, error) { logger := logging.GetLogger("sentry/grpc/worker") enabled := viper.GetBool(CfgEnabled) @@ -262,7 +166,7 @@ func New(identity *identity.Identity) (*Worker, error) { g.upstreamDialerMutex.Lock() defer g.upstreamDialerMutex.Unlock() - upstreamConn, err := initConnection(g.ctx, logger, identity) + upstreamConn, err := initConnection(g.ctx, logger, identity, backend) if err != nil { return nil, fmt.Errorf("gRPC sentry worker initializing upstream connection failure: %w", err) } diff --git a/go/worker/sentry/grpc/worker.go b/go/worker/sentry/grpc/worker.go index 8776013c70d..00db78b7fd4 100644 --- a/go/worker/sentry/grpc/worker.go +++ b/go/worker/sentry/grpc/worker.go @@ -1,11 +1,12 @@ package grpc import ( - "bytes" "context" "fmt" "sync" + "time" + "github.com/cenkalti/backoff/v4" "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -21,7 +22,6 @@ import ( "github.com/oasislabs/oasis-core/go/common/identity" "github.com/oasislabs/oasis-core/go/common/logging" "github.com/oasislabs/oasis-core/go/common/service" - registry "github.com/oasislabs/oasis-core/go/registry/api" ) var _ service.BackgroundService = (*Worker)(nil) @@ -45,8 +45,6 @@ type Worker struct { // nolint: maligned // Per service policy checkers. grpcPolicyCheckers map[cmnGrpc.ServiceName]*policy.DynamicRuntimePolicyChecker - registryClient registry.Backend - *upstreamConn upstreamDialer grpcProxy.Dialer @@ -54,10 +52,6 @@ type Worker struct { // nolint: maligned grpc *cmnGrpc.Server identity *identity.Identity - - // Set to true when quitting if the master worker shouldn't quit, - // but re-init due to changed TLS certificates on the upstream node. - AmQuittingBecauseTLSCertsHaveRotated bool } type upstreamConn struct { @@ -67,8 +61,6 @@ type upstreamConn struct { certs [][]byte // Client connection to the upstream node. conn *grpc.ClientConn - // Registry client connection. - registryClientConn *grpc.ClientConn // Cleanup callback for the manual resolver. resolverCleanupCb func() } @@ -169,41 +161,29 @@ func (g *Worker) updatePolicies(p policyAPI.ServicePolicies) { } } -// Returns true if we need to restart. -func (g *Worker) checkUpstreamNodeTLSCerts(nodeEvent *registry.NodeEvent) bool { - if !nodeEvent.IsRegistration { - return false - } - - // Check if it's our upstream node. - if !nodeEvent.Node.ID.Equal(g.nodeID) { - return false - } - - // XXX: Not sure if certificates are guaranteed to be sorted, - // so we do this slow lookup to be sure. - var numCertMatches uint - for _, cert1 := range g.certs { - for _, addr2 := range nodeEvent.Node.Committee.Addresses { - if bytes.Equal(cert1, addr2.Certificate) { - numCertMatches++ - } - } - } - - // If the number of matching certificates differs, they were rotated, - // so a reconnect is required. - return numCertMatches != uint(len(g.certs)) -} - func (g *Worker) worker() { defer close(g.quitCh) defer (g.cancelCtx)() if g.upstreamConn == nil { - _, err := g.upstreamDialer(g.ctx) + var numRetries uint + + dialUpstream := func() error { + _, err := g.upstreamDialer(g.ctx) + if err != nil { + if numRetries < 60 { + numRetries++ + return err + } + return backoff.Permanent(err) + } + return nil + } + + sched := backoff.NewConstantBackOff(1 * time.Second) + err := backoff.Retry(dialUpstream, backoff.WithContext(sched, g.ctx)) if err != nil { - g.logger.Error("failed to establish upstream connection", + g.logger.Error("unable to dial upstream node", "err", err, ) return @@ -221,34 +201,12 @@ func (g *Worker) worker() { } defer sub.Close() - // Initialize registry watcher. - g.registryClient = registry.NewRegistryClient(g.registryClientConn) - regCh, regSub, regErr := g.registryClient.WatchNodes(g.ctx) - if regErr != nil { - g.logger.Error("failed to watch registry nodes", - "err", regErr, - ) - return - } - defer regSub.Close() - // Initialization complete. close(g.initCh) - // Watch policies and registry. + // Watch policies. for { select { - case nodeEvent, ok := <-regCh: - if !ok { - g.logger.Error("WatchNodes stream closed") - return - } - - if g.checkUpstreamNodeTLSCerts(nodeEvent) { - // Upstream node TLS certificates changed, restart is needed. - g.AmQuittingBecauseTLSCertsHaveRotated = true - return - } case p, ok := <-ch: if !ok { g.logger.Error("WatchPolicies stream closed") diff --git a/go/worker/sentry/worker.go b/go/worker/sentry/worker.go index 5eba2a1dedc..998b11805a5 100644 --- a/go/worker/sentry/worker.go +++ b/go/worker/sentry/worker.go @@ -133,35 +133,16 @@ func New(backend api.Backend, identity *identity.Identity) (*Worker, error) { } // Initialize the sentry grpc worker. - sentryGrpcWorker, err := workerGrpcSentry.New(identity) + sentryGrpcWorker, err := workerGrpcSentry.New(backend, identity) if err != nil { return nil, fmt.Errorf("worker/sentry: failed to create a new sentry grpc worker: %w", err) } w.grpcWorker = sentryGrpcWorker + // Stop in case of grpc/worker quitting. go func() { - for { // nolint: S1000 - // Stop in case of grpc/worker quitting. - select { - case <-w.grpcWorker.Quit(): - if !w.grpcWorker.AmQuittingBecauseTLSCertsHaveRotated { - // Worker is actually quitting. - w.Stop() - return - } - - // Upstream TLS certs have rotated, reconnect. - sgw, err := workerGrpcSentry.New(identity) - if err != nil { - w.logger.Error("worker/sentry: failed to re-init worker after upstream TLS certificate rotation", - "err", err, - ) - w.Stop() - return - } - w.grpcWorker = sgw - } - } + <-w.grpcWorker.Quit() + w.Stop() }() return w, nil From 979fcf42debfe4f069c461b4fb76129d356b7d8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Buko=C5=A1ek?= Date: Fri, 27 Mar 2020 13:01:39 +0100 Subject: [PATCH 5/8] ias: Re-dial registry if connection is dropped --- go/oasis-node/cmd/ias/auth_registry.go | 49 ++++++++++++++++++-------- go/oasis-test-runner/oasis/oasis.go | 2 +- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/go/oasis-node/cmd/ias/auth_registry.go b/go/oasis-node/cmd/ias/auth_registry.go index 7da32714871..4132023ec5e 100644 --- a/go/oasis-node/cmd/ias/auth_registry.go +++ b/go/oasis-node/cmd/ias/auth_registry.go @@ -3,6 +3,7 @@ package ias import ( "context" "sync" + "time" "github.com/pkg/errors" "github.com/spf13/cobra" @@ -19,8 +20,7 @@ import ( type registryAuthenticator struct { logger *logging.Logger - conn *grpc.ClientConn - client registry.Backend + cmd *cobra.Command enclaves *enclaveStore @@ -53,15 +53,40 @@ func (auth *registryAuthenticator) VerifyEvidence(ctx context.Context, evidence return nil } -func (auth *registryAuthenticator) worker(ctx context.Context) { - defer auth.conn.Close() +func (auth *registryAuthenticator) dialRegistry(ctx context.Context) (*grpc.ClientConn, error) { + conn, err := cmdGrpc.NewClient(auth.cmd) + if err != nil { + return nil, errors.Wrap(err, "ias: failed to create gRPC client") + } + return conn, nil +} +func (auth *registryAuthenticator) worker(ctx context.Context) { waitRuntimes := viper.GetInt(cfgWaitRuntimes) if waitRuntimes <= 0 { close(auth.initCh) } - ch, sub, err := auth.client.WatchRuntimes(ctx) + var redialAttempts uint + +Redial: + redialAttempts++ + conn, err := auth.dialRegistry(ctx) + if err != nil { + auth.logger.Error("unable to connect to registry", + "err", err, + ) + if redialAttempts < 10 { + time.Sleep(2 * time.Second) + auth.logger.Info("attempting to re-dial registry") + goto Redial + } + panic("unable to connect to registry") + } + defer conn.Close() + client := registry.NewRegistryClient(conn) + + ch, sub, err := client.WatchRuntimes(ctx) if err != nil { auth.logger.Error("failed to start the WatchRuntimes stream", "err", err, @@ -70,13 +95,15 @@ func (auth *registryAuthenticator) worker(ctx context.Context) { } defer sub.Close() + redialAttempts = 0 + for { var runtime *registry.Runtime select { case runtime = <-ch: if runtime == nil { - auth.logger.Error("data source stream closed by peer") - panic("data source disappeared") + auth.logger.Warn("data source stream closed by peer, re-dialing") + goto Redial } case <-ctx.Done(): return @@ -100,15 +127,9 @@ func (auth *registryAuthenticator) worker(ctx context.Context) { } func newRegistryAuthenticator(ctx context.Context, cmd *cobra.Command) (iasProxy.Authenticator, error) { - conn, err := cmdGrpc.NewClient(cmd) - if err != nil { - return nil, errors.Wrap(err, "ias: failed to create gRPC client") - } - auth := ®istryAuthenticator{ logger: logging.GetLogger("cmd/ias/proxy/auth/registry"), - conn: conn, - client: registry.NewRegistryClient(conn), + cmd: cmd, enclaves: newEnclaveStore(), initCh: make(chan struct{}), } diff --git a/go/oasis-test-runner/oasis/oasis.go b/go/oasis-test-runner/oasis/oasis.go index 60c4762f7c6..d7d56449205 100644 --- a/go/oasis-test-runner/oasis/oasis.go +++ b/go/oasis-test-runner/oasis/oasis.go @@ -625,7 +625,7 @@ func (net *Network) startOasisNode( args = append(args, baseArgs...) args = append(args, extraArgs.vec...) - if !strings.HasPrefix(node.Name, "sentry-") && len(net.byzantine) == 0 && net.iasProxy == nil { + if !strings.HasPrefix(node.Name, "sentry-") && !strings.HasPrefix(node.Name, "ias-proxy") && len(net.byzantine) == 0 { args = append(args, []string{"--" + registration.CfgRegistrationRotateCerts, "1"}...) } From d2e417497243468f45107c111da41e238ca62887 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Buko=C5=A1ek?= Date: Fri, 27 Mar 2020 15:02:34 +0100 Subject: [PATCH 6/8] sentry/api: Simplify {Set,Get}UpstreamTLSCertificates commands --- go/sentry/api/api.go | 11 ++--------- go/sentry/api/grpc.go | 23 +++++++++-------------- go/sentry/sentry.go | 16 +++++----------- go/worker/registration/worker.go | 19 +++++++++++++++++-- go/worker/sentry/grpc/init.go | 11 +---------- 5 files changed, 34 insertions(+), 46 deletions(-) diff --git a/go/sentry/api/api.go b/go/sentry/api/api.go index 2d1385960d9..adcb778ab14 100644 --- a/go/sentry/api/api.go +++ b/go/sentry/api/api.go @@ -3,7 +3,6 @@ package api import ( "context" - "crypto/tls" "github.com/oasislabs/oasis-core/go/common/node" ) @@ -14,12 +13,6 @@ type SentryAddresses struct { Committee []node.CommitteeAddress } -// UpstreamTLSCertificates contains the upstream TLS certificates. -type UpstreamTLSCertificates struct { - Certificate *tls.Certificate - NextCertificate *tls.Certificate -} - // Backend is a sentry backend implementation. type Backend interface { // Get addresses returns the list of consensus and committee addresses of @@ -28,8 +21,8 @@ type Backend interface { // SetUpstreamTLSCertificates notifies the sentry node of the new // TLS certificates used by its upstream node. - SetUpstreamTLSCertificates(context.Context, *tls.Certificate, *tls.Certificate) error + SetUpstreamTLSCertificates(context.Context, [][]byte) error // GetUpstreamTLSCertificates returns the TLS certificates of the sentry node's upstream node. - GetUpstreamTLSCertificates(context.Context) (*UpstreamTLSCertificates, error) + GetUpstreamTLSCertificates(context.Context) ([][]byte, error) } diff --git a/go/sentry/api/grpc.go b/go/sentry/api/grpc.go index 261d4c08f92..3ea2607703b 100644 --- a/go/sentry/api/grpc.go +++ b/go/sentry/api/grpc.go @@ -2,7 +2,6 @@ package api import ( "context" - "crypto/tls" "google.golang.org/grpc" @@ -17,7 +16,7 @@ var ( methodGetAddresses = serviceName.NewMethod("GetAddresses", nil) // methodSetUpstreamTLSCertificates is the SetUpstreamTLSCertificates method. - methodSetUpstreamTLSCertificates = serviceName.NewMethod("SetUpstreamTLSCertificates", UpstreamTLSCertificates{}) + methodSetUpstreamTLSCertificates = serviceName.NewMethod("SetUpstreamTLSCertificates", [][]byte{}) // methodGetUpstreamTLSCertificates is the GetUpstreamTLSCertificates method. methodGetUpstreamTLSCertificates = serviceName.NewMethod("GetUpstreamTLSCertificates", nil) @@ -69,19 +68,19 @@ func handlerSetUpstreamTLSCertificates( // nolint: golint dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor, ) (interface{}, error) { - var req UpstreamTLSCertificates + var req [][]byte if err := dec(&req); err != nil { return nil, err } if interceptor == nil { - return nil, srv.(Backend).SetUpstreamTLSCertificates(ctx, req.Certificate, req.NextCertificate) + return nil, srv.(Backend).SetUpstreamTLSCertificates(ctx, req) } info := &grpc.UnaryServerInfo{ Server: srv, FullMethod: methodSetUpstreamTLSCertificates.FullName(), } handler := func(ctx context.Context, req interface{}) (interface{}, error) { - return nil, srv.(Backend).SetUpstreamTLSCertificates(ctx, req.(*UpstreamTLSCertificates).Certificate, req.(*UpstreamTLSCertificates).NextCertificate) + return nil, srv.(Backend).SetUpstreamTLSCertificates(ctx, *req.(*[][]byte)) } return interceptor(ctx, &req, info, handler) } @@ -122,23 +121,19 @@ func (c *sentryClient) GetAddresses(ctx context.Context) (*SentryAddresses, erro return &rsp, nil } -func (c *sentryClient) SetUpstreamTLSCertificates(ctx context.Context, cert *tls.Certificate, certNext *tls.Certificate) error { - req := &UpstreamTLSCertificates{ - Certificate: cert, - NextCertificate: certNext, - } - if err := c.conn.Invoke(ctx, methodSetUpstreamTLSCertificates.FullName(), req, nil); err != nil { +func (c *sentryClient) SetUpstreamTLSCertificates(ctx context.Context, certs [][]byte) error { + if err := c.conn.Invoke(ctx, methodSetUpstreamTLSCertificates.FullName(), certs, nil); err != nil { return err } return nil } -func (c *sentryClient) GetUpstreamTLSCertificates(ctx context.Context) (*UpstreamTLSCertificates, error) { - var rsp UpstreamTLSCertificates +func (c *sentryClient) GetUpstreamTLSCertificates(ctx context.Context) ([][]byte, error) { + var rsp [][]byte if err := c.conn.Invoke(ctx, methodGetUpstreamTLSCertificates.FullName(), nil, &rsp); err != nil { return nil, err } - return &rsp, nil + return rsp, nil } // NewSentryClient creates a new gRPC sentry client service. diff --git a/go/sentry/sentry.go b/go/sentry/sentry.go index 75db22dc346..912ac59b09e 100644 --- a/go/sentry/sentry.go +++ b/go/sentry/sentry.go @@ -3,7 +3,6 @@ package sentry import ( "context" - "crypto/tls" "fmt" "sync" @@ -25,8 +24,7 @@ type backend struct { consensus consensus.Backend identity *identity.Identity - upstreamTLSCertificate *tls.Certificate - upstreamTLSCertificateNext *tls.Certificate + upstreamTLSCertificates [][]byte } func (b *backend) GetAddresses(ctx context.Context) (*api.SentryAddresses, error) { @@ -67,24 +65,20 @@ func (b *backend) GetAddresses(ctx context.Context) (*api.SentryAddresses, error }, nil } -func (b *backend) SetUpstreamTLSCertificates(ctx context.Context, cert *tls.Certificate, certNext *tls.Certificate) error { +func (b *backend) SetUpstreamTLSCertificates(ctx context.Context, certs [][]byte) error { b.Lock() defer b.Unlock() - b.upstreamTLSCertificate = cert - b.upstreamTLSCertificateNext = certNext + b.upstreamTLSCertificates = certs return nil } -func (b *backend) GetUpstreamTLSCertificates(ctx context.Context) (*api.UpstreamTLSCertificates, error) { +func (b *backend) GetUpstreamTLSCertificates(ctx context.Context) ([][]byte, error) { b.RLock() defer b.RUnlock() - return &api.UpstreamTLSCertificates{ - Certificate: b.upstreamTLSCertificate, - NextCertificate: b.upstreamTLSCertificateNext, - }, nil + return b.upstreamTLSCertificates, nil } // New constructs a new sentry Backend instance. diff --git a/go/worker/registration/worker.go b/go/worker/registration/worker.go index dfa0f1c8f68..09df7401594 100644 --- a/go/worker/registration/worker.go +++ b/go/worker/registration/worker.go @@ -165,7 +165,15 @@ func (w *Worker) registrationLoop() { // nolint: gocyclo } defer client.Close() - err = client.SetUpstreamTLSCertificates(w.ctx, w.identity.GetTLSCertificate(), w.identity.GetNextTLSCertificate()) + certs := [][]byte{} + if c := w.identity.GetTLSCertificate(); c != nil { + certs = append(certs, c.Certificate[0]) + } + if c := w.identity.GetNextTLSCertificate(); c != nil { + certs = append(certs, c.Certificate[0]) + } + + err = client.SetUpstreamTLSCertificates(w.ctx, certs) if err != nil { return err } @@ -677,7 +685,14 @@ func (w *Worker) querySentries() ([]node.ConsensusAddress, []node.CommitteeAddre } // Keep sentries updated with our latest TLS certificates. - err = client.SetUpstreamTLSCertificates(w.ctx, w.identity.GetTLSCertificate(), w.identity.GetNextTLSCertificate()) + certs := [][]byte{} + if c := w.identity.GetTLSCertificate(); c != nil { + certs = append(certs, c.Certificate[0]) + } + if c := w.identity.GetNextTLSCertificate(); c != nil { + certs = append(certs, c.Certificate[0]) + } + err = client.SetUpstreamTLSCertificates(w.ctx, certs) if err != nil { w.logger.Warn("failed to provide upstream TLS certificates to sentry node", "err", err, diff --git a/go/worker/sentry/grpc/init.go b/go/worker/sentry/grpc/init.go index 830c37c781e..0784c10f471 100644 --- a/go/worker/sentry/grpc/init.go +++ b/go/worker/sentry/grpc/init.go @@ -75,22 +75,13 @@ func initConnection(ctx context.Context, logger *logging.Logger, ident *identity ) // Get upstream node's certificates. - certs, err := backend.GetUpstreamTLSCertificates(ctx) + upstreamCerts, err := backend.GetUpstreamTLSCertificates(ctx) if err != nil { return nil, fmt.Errorf("failed to get upstream node's TLS certificates: %w", err) } - - upstreamCerts := [][]byte{} - if certs.Certificate != nil { - upstreamCerts = append(upstreamCerts, certs.Certificate.Certificate[0]) - } - if certs.NextCertificate != nil { - upstreamCerts = append(upstreamCerts, certs.NextCertificate.Certificate[0]) - } if len(upstreamCerts) == 0 { return nil, fmt.Errorf("upstream node has no defined TLS certificates") } - logger.Info("found certificates for upstream node", "num_certs", len(upstreamCerts), ) From 04bd6b3160ab2eb86b3d937549a98de38195e687 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Buko=C5=A1ek?= Date: Mon, 30 Mar 2020 10:24:46 +0200 Subject: [PATCH 7/8] grpc/proxy: Redial if connection unexpectedly shuts down --- go/common/grpc/proxy/proxy.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/go/common/grpc/proxy/proxy.go b/go/common/grpc/proxy/proxy.go index e497b4d6403..b8ce38296bd 100644 --- a/go/common/grpc/proxy/proxy.go +++ b/go/common/grpc/proxy/proxy.go @@ -9,6 +9,7 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/codes" + "google.golang.org/grpc/connectivity" "google.golang.org/grpc/metadata" "google.golang.org/grpc/status" @@ -74,6 +75,12 @@ func (p *proxy) handler(srv interface{}, stream grpc.ServerStream) error { // Pass subject header upstream. upstreamCtx = metadata.AppendToOutgoingContext(upstreamCtx, policy.ForwardedSubjectMD, sub) + // Check if upstream connection was disconnected. + if p.upstreamConn != nil && p.upstreamConn.GetState() == connectivity.Shutdown { + // We need to redial if the connection was shut down. + p.upstreamConn = nil + } + // Dial upstream if necessary. if p.upstreamConn == nil { var grr error From ee4d9fc24770c42c68d05628278c378925aeb31b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrej=20Buko=C5=A1ek?= Date: Fri, 3 Apr 2020 11:43:24 +0200 Subject: [PATCH 8/8] sentry/client: Use separate TLS certificate --- .buildkite/code.pipeline.yml | 2 +- .changelog/2098.breaking.md | 5 ++++ .changelog/2098.feature.md | 2 +- go/common/identity/identity.go | 53 +++++++++++++++++++++++++++------ go/sentry/client/client.go | 8 ++--- go/worker/sentry/grpc/worker.go | 10 ++----- 6 files changed, 56 insertions(+), 24 deletions(-) create mode 100644 .changelog/2098.breaking.md diff --git a/.buildkite/code.pipeline.yml b/.buildkite/code.pipeline.yml index 078d07551fc..1702877f9a3 100644 --- a/.buildkite/code.pipeline.yml +++ b/.buildkite/code.pipeline.yml @@ -203,7 +203,7 @@ steps: ############### - label: E2E tests parallelism: 7 - timeout_in_minutes: 12 + timeout_in_minutes: 9 command: - .buildkite/scripts/download_e2e_test_artifacts.sh - .buildkite/scripts/test_e2e.sh diff --git a/.changelog/2098.breaking.md b/.changelog/2098.breaking.md new file mode 100644 index 00000000000..37194935649 --- /dev/null +++ b/.changelog/2098.breaking.md @@ -0,0 +1,5 @@ +Sentry nodes no longer require TLS certificate file of the upstream node + +The `worker.sentry.grpc.upstream.cert` option has been removed. +Instead, use `worker.sentry.grpc.upstream.id` to specify the +upstream node's ID. diff --git a/.changelog/2098.feature.md b/.changelog/2098.feature.md index e2ab4bb7fb1..649e8bf3941 100644 --- a/.changelog/2098.feature.md +++ b/.changelog/2098.feature.md @@ -3,4 +3,4 @@ node: Add automatic TLS certificate rotation support It is now possible to automatically rotate the node's TLS certificates every N epochs by passing the command-line flag `worker.registration.rotate_certs`. -Do not use this if you use sentry nodes or IAS proxies. +Do not use this option on sentry nodes or IAS proxies. diff --git a/go/common/identity/identity.go b/go/common/identity/identity.go index d60ba0679f0..8a7c87730f2 100644 --- a/go/common/identity/identity.go +++ b/go/common/identity/identity.go @@ -30,6 +30,10 @@ const ( tlsKeyFilename = "tls_identity.pem" tlsCertFilename = "tls_identity_cert.pem" + + // These are used for the sentry client connection to the sentry node and are never rotated. + tlsSentryClientKeyFilename = "sentry_client_tls_identity.pem" + tlsSentryClientCertFilename = "sentry_client_tls_identity_cert.pem" ) // ErrCertificateRotationForbidden is returned by RotateCertificates if @@ -48,14 +52,17 @@ type Identity struct { P2PSigner signature.Signer // ConsensusSigner is a node consensus key signer. ConsensusSigner signature.Signer + // TLSSentryClientCertificate is the client certificate used for + // connecting to the sentry node's control connection. It is never rotated. + TLSSentryClientCertificate *tls.Certificate + // DoNotRotateTLS flag is true if we mustn't rotate the TLS certificates below. + DoNotRotateTLS bool // tlsSigner is a node TLS certificate signer. tlsSigner signature.Signer // tlsCertificate is a certificate that can be used for TLS. tlsCertificate *tls.Certificate // nextTLSCertificate is a certificate that can be used for TLS in the next rotation. nextTLSCertificate *tls.Certificate - // DoNotRotateTLS flag is true if we mustn't rotate the TLS certificates. - DoNotRotateTLS bool } // RotateCertificates rotates the identity's TLS certificates. @@ -218,14 +225,31 @@ func doLoadOrGenerate(dataDir string, signerFactory signature.SignerFactory, sho } } + // Load or generate the sentry client certificate for this node. + tlsSentryClientCertPath, tlsSentryClientKeyPath := TLSSentryClientCertPaths(dataDir) + sentryClientCert, err := tlsCert.Load(tlsSentryClientCertPath, tlsSentryClientKeyPath) + if err != nil { + // Load failed, generate fresh sentry client cert. + sentryClientCert, err = tlsCert.Generate(CommonName) + if err != nil { + return nil, err + } + // And save it to disk. + err = tlsCert.Save(tlsSentryClientCertPath, tlsSentryClientKeyPath, sentryClientCert) + if err != nil { + return nil, err + } + } + return &Identity{ - NodeSigner: signers[0], - P2PSigner: signers[1], - ConsensusSigner: signers[2], - tlsSigner: memory.NewFromRuntime(cert.PrivateKey.(ed25519.PrivateKey)), - tlsCertificate: cert, - nextTLSCertificate: nextCert, - DoNotRotateTLS: dnr, + NodeSigner: signers[0], + P2PSigner: signers[1], + ConsensusSigner: signers[2], + tlsSigner: memory.NewFromRuntime(cert.PrivateKey.(ed25519.PrivateKey)), + tlsCertificate: cert, + nextTLSCertificate: nextCert, + DoNotRotateTLS: dnr, + TLSSentryClientCertificate: sentryClientCert, }, nil } @@ -239,3 +263,14 @@ func TLSCertPaths(dataDir string) (string, string) { return tlsCertPath, tlsKeyPath } + +// TLSSentryClientCertPaths returns the sentry client TLS private key and +// certificate paths relative to the passed data directory. +func TLSSentryClientCertPaths(dataDir string) (string, string) { + var ( + tlsKeyPath = filepath.Join(dataDir, tlsSentryClientKeyFilename) + tlsCertPath = filepath.Join(dataDir, tlsSentryClientCertFilename) + ) + + return tlsCertPath, tlsKeyPath +} diff --git a/go/sentry/client/client.go b/go/sentry/client/client.go index 9e153414052..2a84baa53dd 100644 --- a/go/sentry/client/client.go +++ b/go/sentry/client/client.go @@ -45,11 +45,9 @@ func (c *Client) createConnection() error { certPool := x509.NewCertPool() certPool.AddCert(c.sentryCert) creds := credentials.NewTLS(&tls.Config{ - RootCAs: certPool, - ServerName: identity.CommonName, - GetClientCertificate: func(cri *tls.CertificateRequestInfo) (*tls.Certificate, error) { - return c.nodeIdentity.GetTLSCertificate(), nil - }, + RootCAs: certPool, + ServerName: identity.CommonName, + Certificates: []tls.Certificate{*c.nodeIdentity.TLSSentryClientCertificate}, }) opts := grpc.WithTransportCredentials(creds) diff --git a/go/worker/sentry/grpc/worker.go b/go/worker/sentry/grpc/worker.go index 00db78b7fd4..60edf2239cb 100644 --- a/go/worker/sentry/grpc/worker.go +++ b/go/worker/sentry/grpc/worker.go @@ -166,21 +166,15 @@ func (g *Worker) worker() { defer (g.cancelCtx)() if g.upstreamConn == nil { - var numRetries uint - dialUpstream := func() error { _, err := g.upstreamDialer(g.ctx) if err != nil { - if numRetries < 60 { - numRetries++ - return err - } - return backoff.Permanent(err) + return err } return nil } - sched := backoff.NewConstantBackOff(1 * time.Second) + sched := backoff.WithMaxRetries(backoff.NewConstantBackOff(1*time.Second), 60) err := backoff.Retry(dialUpstream, backoff.WithContext(sched, g.ctx)) if err != nil { g.logger.Error("unable to dial upstream node",