From 7da85ca0abbd2383abcc04efec764a3f7d5b11da Mon Sep 17 00:00:00 2001 From: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com> Date: Mon, 9 Dec 2019 13:45:37 +0200 Subject: [PATCH 1/4] rebased Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com> --- CHANGELOG.md | 2 + cmd/thanos/flags.go | 6 +- cmd/thanos/query.go | 5 +- cmd/thanos/receive.go | 5 +- cmd/thanos/rule.go | 5 +- cmd/thanos/sidecar.go | 5 +- cmd/thanos/store.go | 5 +- pkg/server/grpc/grpc.go | 4 + pkg/server/grpc/option.go | 8 ++ pkg/tls/options.go | 104 ------------------- pkg/tls/tls.go | 213 ++++++++++++++++++++++++++++++++++++++ test/e2e/tls_test.go | 199 +++++++++++++++++++++++++++++++++++ 12 files changed, 451 insertions(+), 110 deletions(-) delete mode 100644 pkg/tls/options.go create mode 100644 pkg/tls/tls.go create mode 100644 test/e2e/tls_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 3af82e0be0..3e14e99c6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ We use *breaking* word for marking changes that are not backward compatible (rel - [#1758](https://github.com/thanos-io/thanos/pull/1758) Bucket: `thanos bucket web` now supports `--web.external-prefix` for proxying on a subpath. - [#1770](https://github.com/thanos-io/thanos/pull/1770) Bucket: Add `--web.prefix-header` flags to allow for bucket UI to be accessible behind a reverse proxy. - [#1668](https://github.com/thanos-io/thanos/pull/1668) Receiver: Added TLS options for both server and client remote write. +- [#1672](https://github.com/thanos-io/thanos/pull/1672) All components: Client and server certificates are auto reloaded when changed on disk. +- [#1672](https://github.com/thanos-io/thanos/pull/1672) Add a new `--grpc-server-max-connection-age` CLI option which controls how often to re-do the tls handshake and re-read the certificates. This in reality controls the keep alive so when the connection is closed it requires a new tls handshake. ### Fixed diff --git a/cmd/thanos/flags.go b/cmd/thanos/flags.go index b6d7776086..7c592f0474 100644 --- a/cmd/thanos/flags.go +++ b/cmd/thanos/flags.go @@ -3,6 +3,7 @@ package main import ( "fmt" "strings" + "time" "github.com/thanos-io/thanos/pkg/extflag" @@ -23,6 +24,7 @@ func regGRPCFlags(cmd *kingpin.CmdClause) ( grpcTLSSrvCert *string, grpcTLSSrvKey *string, grpcTLSSrvClientCA *string, + grpcMaxConnectionAge *time.Duration, ) { grpcBindAddr = cmd.Flag("grpc-address", "Listen ip:port address for gRPC endpoints (StoreAPI). Make sure this address is routable from other components."). Default("0.0.0.0:10901").String() @@ -31,12 +33,14 @@ func regGRPCFlags(cmd *kingpin.CmdClause) ( grpcTLSSrvCert = cmd.Flag("grpc-server-tls-cert", "TLS Certificate for gRPC server, leave blank to disable TLS").Default("").String() grpcTLSSrvKey = cmd.Flag("grpc-server-tls-key", "TLS Key for the gRPC server, leave blank to disable TLS").Default("").String() grpcTLSSrvClientCA = cmd.Flag("grpc-server-tls-client-ca", "TLS CA to verify clients against. If no client CA is specified, there is no client verification on server side. (tls.NoClientCert)").Default("").String() + grpcMaxConnectionAge = cmd.Flag("grpc-server-max-connection-age", "The grpc server max connection age. This controls how often to re-read the tls certificates and redo the TLS handshake ").Default("1m").Duration() return grpcBindAddr, grpcGracePeriod, grpcTLSSrvCert, grpcTLSSrvKey, - grpcTLSSrvClientCA + grpcTLSSrvClientCA, + grpcMaxConnectionAge } func regHTTPFlags(cmd *kingpin.CmdClause) (httpBindAddr *string, httpGracePeriod *model.Duration) { diff --git a/cmd/thanos/query.go b/cmd/thanos/query.go index 2dd9c17663..63ac9770ff 100644 --- a/cmd/thanos/query.go +++ b/cmd/thanos/query.go @@ -47,7 +47,7 @@ func registerQuery(m map[string]setupFunc, app *kingpin.Application) { cmd := app.Command(comp.String(), "query node exposing PromQL enabled Query API with data retrieved from multiple store nodes") httpBindAddr, httpGracePeriod := regHTTPFlags(cmd) - grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA := regGRPCFlags(cmd) + grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA, grpcMaxConnAge := regGRPCFlags(cmd) secure := cmd.Flag("grpc-client-tls-secure", "Use TLS when talking to the gRPC server").Default("false").Bool() cert := cmd.Flag("grpc-client-tls-cert", "TLS Certificates to use to identify this client to the server").Default("").String() @@ -137,6 +137,7 @@ func registerQuery(m map[string]setupFunc, app *kingpin.Application) { *grpcCert, *grpcKey, *grpcClientCA, + *grpcMaxConnAge, *secure, *cert, *key, @@ -219,6 +220,7 @@ func runQuery( grpcCert string, grpcKey string, grpcClientCA string, + grpcMaxConnAge time.Duration, secure bool, cert string, key string, @@ -408,6 +410,7 @@ func runQuery( grpcserver.WithListen(grpcBindAddr), grpcserver.WithGracePeriod(grpcGracePeriod), grpcserver.WithTLSConfig(tlsCfg), + grpcserver.WithMaxConnAge(grpcMaxConnAge), ) g.Add(func() error { diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go index 09c3e8f317..d7aa7f946a 100644 --- a/cmd/thanos/receive.go +++ b/cmd/thanos/receive.go @@ -36,7 +36,7 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application) { cmd := app.Command(comp.String(), "Accept Prometheus remote write API requests and write to local tsdb (EXPERIMENTAL, this may change drastically without notice)") httpBindAddr, httpGracePeriod := regHTTPFlags(cmd) - grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA := regGRPCFlags(cmd) + grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA, grpcMaxConnAge := regGRPCFlags(cmd) rwAddress := cmd.Flag("remote-write.address", "Address to listen on for remote write requests."). Default("0.0.0.0:19291").String() @@ -109,6 +109,7 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application) { *grpcCert, *grpcKey, *grpcClientCA, + *grpcMaxConnAge, *httpBindAddr, time.Duration(*httpGracePeriod), *rwAddress, @@ -144,6 +145,7 @@ func runReceive( grpcCert string, grpcKey string, grpcClientCA string, + grpcMaxConnAge time.Duration, httpBindAddr string, httpGracePeriod time.Duration, rwAddress string, @@ -370,6 +372,7 @@ func runReceive( grpcserver.WithListen(grpcBindAddr), grpcserver.WithGracePeriod(grpcGracePeriod), grpcserver.WithTLSConfig(tlsCfg), + grpcserver.WithMaxConnAge(grpcMaxConnAge), ) startGRPC <- struct{}{} } diff --git a/cmd/thanos/rule.go b/cmd/thanos/rule.go index b3f3db72ef..24d8ef9be1 100644 --- a/cmd/thanos/rule.go +++ b/cmd/thanos/rule.go @@ -63,7 +63,7 @@ func registerRule(m map[string]setupFunc, app *kingpin.Application) { cmd := app.Command(comp.String(), "ruler evaluating Prometheus rules against given Query nodes, exposing Store API and storing old blocks in bucket") httpBindAddr, httpGracePeriod := regHTTPFlags(cmd) - grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA := regGRPCFlags(cmd) + grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA, grpcMaxConnAge := regGRPCFlags(cmd) labelStrs := cmd.Flag("label", "Labels to be applied to all generated metrics (repeated). Similar to external labels for Prometheus, used to identify ruler and its blocks as unique source."). PlaceHolder("=\"\"").Strings() @@ -162,6 +162,7 @@ func registerRule(m map[string]setupFunc, app *kingpin.Application) { *grpcCert, *grpcKey, *grpcClientCA, + *grpcMaxConnAge, *httpBindAddr, time.Duration(*httpGracePeriod), *webRoutePrefix, @@ -199,6 +200,7 @@ func runRule( grpcCert string, grpcKey string, grpcClientCA string, + grpcMaxConnAge time.Duration, httpBindAddr string, httpGracePeriod time.Duration, webRoutePrefix string, @@ -514,6 +516,7 @@ func runRule( grpcserver.WithListen(grpcBindAddr), grpcserver.WithGracePeriod(grpcGracePeriod), grpcserver.WithTLSConfig(tlsCfg), + grpcserver.WithMaxConnAge(grpcMaxConnAge), ) g.Add(func() error { diff --git a/cmd/thanos/sidecar.go b/cmd/thanos/sidecar.go index f4b319898c..0f2c7d879a 100644 --- a/cmd/thanos/sidecar.go +++ b/cmd/thanos/sidecar.go @@ -37,7 +37,7 @@ func registerSidecar(m map[string]setupFunc, app *kingpin.Application) { cmd := app.Command(component.Sidecar.String(), "sidecar for Prometheus server") httpBindAddr, httpGracePeriod := regHTTPFlags(cmd) - grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA := regGRPCFlags(cmd) + grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA, grpcMaxConnAge := regGRPCFlags(cmd) promURL := cmd.Flag("prometheus.url", "URL at which to reach Prometheus's API. For better performance use local network."). Default("http://localhost:9090").URL() @@ -82,6 +82,7 @@ func registerSidecar(m map[string]setupFunc, app *kingpin.Application) { *grpcCert, *grpcKey, *grpcClientCA, + *grpcMaxConnAge, *httpBindAddr, time.Duration(*httpGracePeriod), *promURL, @@ -106,6 +107,7 @@ func runSidecar( grpcCert string, grpcKey string, grpcClientCA string, + grpcMaxConnAge time.Duration, httpBindAddr string, httpGracePeriod time.Duration, promURL *url.URL, @@ -254,6 +256,7 @@ func runSidecar( grpcserver.WithListen(grpcBindAddr), grpcserver.WithGracePeriod(grpcGracePeriod), grpcserver.WithTLSConfig(tlsCfg), + grpcserver.WithMaxConnAge(grpcMaxConnAge), ) g.Add(func() error { statusProber.Ready() diff --git a/cmd/thanos/store.go b/cmd/thanos/store.go index 51f4723d39..fb11732308 100644 --- a/cmd/thanos/store.go +++ b/cmd/thanos/store.go @@ -31,7 +31,7 @@ func registerStore(m map[string]setupFunc, app *kingpin.Application) { cmd := app.Command(component.Store.String(), "store node giving access to blocks in a bucket provider. Now supported GCS, S3, Azure, Swift and Tencent COS.") httpBindAddr, httpGracePeriod := regHTTPFlags(cmd) - grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA := regGRPCFlags(cmd) + grpcBindAddr, grpcGracePeriod, grpcCert, grpcKey, grpcClientCA, grpcMaxConnAge := regGRPCFlags(cmd) dataDir := cmd.Flag("data-dir", "Data directory in which to cache remote blocks."). Default("./data").String() @@ -84,6 +84,7 @@ func registerStore(m map[string]setupFunc, app *kingpin.Application) { *grpcCert, *grpcKey, *grpcClientCA, + *grpcMaxConnAge, *httpBindAddr, time.Duration(*httpGracePeriod), uint64(*indexCacheSize), @@ -117,6 +118,7 @@ func runStore( grpcCert string, grpcKey string, grpcClientCA string, + grpcMaxConnAge time.Duration, httpBindAddr string, httpGracePeriod time.Duration, indexCacheSizeBytes uint64, @@ -246,6 +248,7 @@ func runStore( grpcserver.WithListen(grpcBindAddr), grpcserver.WithGracePeriod(grpcGracePeriod), grpcserver.WithTLSConfig(tlsCfg), + grpcserver.WithMaxConnAge(grpcMaxConnAge), ) g.Add(func() error { diff --git a/pkg/server/grpc/grpc.go b/pkg/server/grpc/grpc.go index 21416b8fda..be65d9ec6a 100644 --- a/pkg/server/grpc/grpc.go +++ b/pkg/server/grpc/grpc.go @@ -20,6 +20,7 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/keepalive" "google.golang.org/grpc/status" ) @@ -74,6 +75,9 @@ func New(logger log.Logger, reg prometheus.Registerer, tracer opentracing.Tracer if options.tlsConfig != nil { grpcOpts = append(grpcOpts, grpc.Creds(credentials.NewTLS(options.tlsConfig))) } + if options.maxConnAge > 0 { + grpcOpts = append(grpcOpts, grpc.KeepaliveParams(keepalive.ServerParameters{MaxConnectionAge: options.maxConnAge})) + } s := grpc.NewServer(grpcOpts...) storepb.RegisterStoreServer(s, storeSrv) diff --git a/pkg/server/grpc/option.go b/pkg/server/grpc/option.go index 6c804e4f79..4a5482c52b 100644 --- a/pkg/server/grpc/option.go +++ b/pkg/server/grpc/option.go @@ -7,6 +7,7 @@ import ( type options struct { gracePeriod time.Duration + maxConnAge time.Duration listen string tlsConfig *tls.Config @@ -45,3 +46,10 @@ func WithTLSConfig(cfg *tls.Config) Option { o.tlsConfig = cfg }) } + +// WithMaxConnAge sets the maximum connection age for gRPC server. +func WithMaxConnAge(t time.Duration) Option { + return optionFunc(func(o *options) { + o.maxConnAge = t + }) +} diff --git a/pkg/tls/options.go b/pkg/tls/options.go deleted file mode 100644 index 58b6c56d36..0000000000 --- a/pkg/tls/options.go +++ /dev/null @@ -1,104 +0,0 @@ -package tls - -import ( - "crypto/tls" - "crypto/x509" - "io/ioutil" - - "github.com/go-kit/kit/log" - "github.com/go-kit/kit/log/level" - "github.com/pkg/errors" -) - -// NewServerConfig provides new server TLS configuration. -func NewServerConfig(logger log.Logger, cert, key, clientCA string) (*tls.Config, error) { - if key == "" && cert == "" { - if clientCA != "" { - return nil, errors.New("when a client CA is used a server key and certificate must also be provided") - } - - level.Info(logger).Log("msg", "disabled TLS, key and cert must be set to enable") - return nil, nil - } - - level.Info(logger).Log("msg", "enabling server side TLS") - - if key == "" || cert == "" { - return nil, errors.New("both server key and certificate must be provided") - } - - tlsCfg := &tls.Config{ - MinVersion: tls.VersionTLS12, - } - - tlsCert, err := tls.LoadX509KeyPair(cert, key) - if err != nil { - return nil, errors.Wrap(err, "server credentials") - } - - tlsCfg.Certificates = []tls.Certificate{tlsCert} - - if clientCA != "" { - caPEM, err := ioutil.ReadFile(clientCA) - if err != nil { - return nil, errors.Wrap(err, "reading client CA") - } - - certPool := x509.NewCertPool() - if !certPool.AppendCertsFromPEM(caPEM) { - return nil, errors.Wrap(err, "building client CA") - } - tlsCfg.ClientCAs = certPool - tlsCfg.ClientAuth = tls.RequireAndVerifyClientCert - - level.Info(logger).Log("msg", "server TLS client verification enabled") - } - - return tlsCfg, nil -} - -// NewClientConfig provides new client TLS configuration. -func NewClientConfig(logger log.Logger, cert, key, caCert, serverName string) (*tls.Config, error) { - var certPool *x509.CertPool - if caCert != "" { - caPEM, err := ioutil.ReadFile(caCert) - if err != nil { - return nil, errors.Wrap(err, "reading client CA") - } - - certPool = x509.NewCertPool() - if !certPool.AppendCertsFromPEM(caPEM) { - return nil, errors.Wrap(err, "building client CA") - } - level.Info(logger).Log("msg", "TLS client using provided certificate pool") - } else { - var err error - certPool, err = x509.SystemCertPool() - if err != nil { - return nil, errors.Wrap(err, "reading system certificate pool") - } - level.Info(logger).Log("msg", "TLS client using system certificate pool") - } - - tlsCfg := &tls.Config{ - RootCAs: certPool, - } - - if serverName != "" { - tlsCfg.ServerName = serverName - } - - if (key != "") != (cert != "") { - return nil, errors.New("both client key and certificate must be provided") - } - - if cert != "" { - cert, err := tls.LoadX509KeyPair(cert, key) - if err != nil { - return nil, errors.Wrap(err, "client credentials") - } - tlsCfg.Certificates = []tls.Certificate{cert} - level.Info(logger).Log("msg", "TLS client authentication enabled") - } - return tlsCfg, nil -} diff --git a/pkg/tls/tls.go b/pkg/tls/tls.go new file mode 100644 index 0000000000..3a9d5e26e1 --- /dev/null +++ b/pkg/tls/tls.go @@ -0,0 +1,213 @@ +package tls + +import ( + "crypto/tls" + "crypto/x509" + "io/ioutil" + "os" + "sync" + "time" + + "github.com/go-kit/kit/log" + + "github.com/go-kit/kit/log/level" + "github.com/pkg/errors" +) + +// NewServerConfig returns a new server tls config with auto reloading certificates. +// It reload the key and certificates when the local mod time of any file has changed. +func NewServerConfig(logger log.Logger, srvCertPath, srvKeyPath, cltCApath string) (*tls.Config, error) { + if srvCertPath == "" && srvKeyPath == "" { + if cltCApath != "" { + return nil, errors.New("when a client CA is used a server key and certificate must also be provided") + } + + level.Info(logger).Log("msg", "disabled TLS, key and cert must be set to enable") + return nil, nil + } + + level.Info(logger).Log("msg", "enabling server side TLS") + + if srvCertPath == "" || srvKeyPath == "" { + return nil, errors.New("both server key and certificate path must be provided") + } + + tlsCfg := &tls.Config{ + MinVersion: tls.VersionTLS12, + } + + mngr := &serverTLSManager{ + srvCertPath: srvCertPath, + srvKeyPath: srvKeyPath, + cltCApath: cltCApath, + cltTLSConfig: tlsCfg, + } + // TODO (krasi) use GetConfigForServer when add in the std. + // https://github.com/golang/go/issues/22836 + // With the current implementation the CA's can't be auto reloaded without a restart. + tlsCfg.GetCertificate = mngr.getCertificate + level.Info(logger).Log("msg", "enabled server side TLS") + + if cltCApath != "" { + tlsCfg.ClientAuth = tls.RequireAndVerifyClientCert + tlsCfg.GetConfigForClient = mngr.getConfigForClient + level.Info(logger).Log("msg", "server TLS client verification enabled") + } + + return tlsCfg, nil +} + +type serverTLSManager struct { + srvCertPath, + srvKeyPath, + cltCApath string + + mtxSrv sync.Mutex + srvCert *tls.Certificate + srvCertModTime time.Time + srvKeyModTime time.Time + + mtxClt sync.Mutex + cltTLSConfig *tls.Config + cltCAmodTime time.Time +} + +func (m *serverTLSManager) getCertificate(clientHello *tls.ClientHelloInfo) (*tls.Certificate, error) { + m.mtxSrv.Lock() + defer m.mtxSrv.Unlock() + + statCert, err := os.Stat(m.srvCertPath) + if err != nil { + return nil, err + } + statKey, err := os.Stat(m.srvKeyPath) + if err != nil { + return nil, err + } + + if m.srvCert == nil || !statCert.ModTime().Equal(m.srvCertModTime) || !statKey.ModTime().Equal(m.srvKeyModTime) { + cert, err := tls.LoadX509KeyPair(m.srvCertPath, m.srvKeyPath) + if err != nil { + return nil, errors.Wrap(err, "loading server cert and key") + } + m.srvCertModTime = statCert.ModTime() + m.srvKeyModTime = statKey.ModTime() + m.srvCert = &cert + } + + return m.srvCert, nil +} + +func (m *serverTLSManager) getConfigForClient(*tls.ClientHelloInfo) (*tls.Config, error) { + m.mtxClt.Lock() + defer m.mtxClt.Unlock() + + statCA, err := os.Stat(m.cltCApath) + if err != nil { + return nil, err + } + + if m.cltTLSConfig.ClientCAs == nil || !statCA.ModTime().Equal(m.cltCAmodTime) { + caPEM, err := ioutil.ReadFile(m.cltCApath) + if err != nil { + return nil, errors.Wrap(err, "reading client CA") + } + + m.cltTLSConfig.ClientCAs = x509.NewCertPool() + if !m.cltTLSConfig.ClientCAs.AppendCertsFromPEM(caPEM) { + return nil, errors.New("adding CA file to the client pool") + } + m.cltCAmodTime = statCA.ModTime() + } + + return m.cltTLSConfig, nil +} + +// NewClientConfig returns a new client tls config with auto reloading certificates. +// It reload the key and certificates when the local mod time of any file has changed. +func NewClientConfig(logger log.Logger, certPath, keyPath, caCertPath, serverName string) (*tls.Config, error) { + if (keyPath != "") != (certPath != "") { + return nil, errors.New("both client key and certificate must be provided") + } + + var certPool *x509.CertPool + if caCertPath != "" { + caPEM, err := ioutil.ReadFile(caCertPath) + if err != nil { + return nil, errors.Wrap(err, "reading client CA") + } + + certPool = x509.NewCertPool() + if !certPool.AppendCertsFromPEM(caPEM) { + return nil, errors.Wrap(err, "building client CA") + } + level.Info(logger).Log("msg", "TLS client using provided certificate pool") + } else { + var err error + certPool, err = x509.SystemCertPool() + if err != nil { + return nil, errors.Wrap(err, "reading system certificate pool") + } + level.Info(logger).Log("msg", "TLS client using system certificate pool") + } + + tlsCfg := &tls.Config{ + RootCAs: certPool, + } + + if serverName != "" { + tlsCfg.ServerName = serverName + } + + if (keyPath != "") != (certPath != "") { + return nil, errors.New("both client key and certificate must be provided or both should be empty") + } + + if certPath != "" { + mngr := &clientTLSManager{ + certPath: certPath, + keyPath: keyPath, + } + // TODO (krasi) implement CA rotation when added in the std. + // With the current implementation the CA's can't be auto reloaded without a restart. + tlsCfg.GetClientCertificate = mngr.getClientCertificate + level.Info(logger).Log("msg", "TLS client authentication enabled") + } + return tlsCfg, nil +} + +type clientTLSManager struct { + certPath, + keyPath string + + mtx sync.Mutex + cert *tls.Certificate + certModTime time.Time + keyModTime time.Time +} + +func (m *clientTLSManager) getClientCertificate(*tls.CertificateRequestInfo) (*tls.Certificate, error) { + m.mtx.Lock() + defer m.mtx.Unlock() + + statCert, err := os.Stat(m.certPath) + if err != nil { + return nil, err + } + statKey, err := os.Stat(m.keyPath) + if err != nil { + return nil, err + } + + if m.cert == nil || !statCert.ModTime().Equal(m.certModTime) || !statKey.ModTime().Equal(m.keyModTime) { + cert, err := tls.LoadX509KeyPair(m.certPath, m.keyPath) + if err != nil { + return nil, errors.Wrap(err, "loading server cert and key") + } + m.certModTime = statCert.ModTime() + m.keyModTime = statKey.ModTime() + m.cert = &cert + } + + return m.cert, nil +} diff --git a/test/e2e/tls_test.go b/test/e2e/tls_test.go new file mode 100644 index 0000000000..d9afe5b15e --- /dev/null +++ b/test/e2e/tls_test.go @@ -0,0 +1,199 @@ +package e2e_test + +import ( + "bytes" + "context" + "crypto/rand" + "crypto/rsa" + "crypto/x509" + "encoding/pem" + "fmt" + "io/ioutil" + "math/big" + "net" + "os" + "path/filepath" + "testing" + "time" + + "github.com/fortytw2/leaktest" + "github.com/go-kit/kit/log" + "github.com/thanos-io/thanos/pkg/testutil" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/keepalive" + + thTLS "github.com/thanos-io/thanos/pkg/tls" + + pb "google.golang.org/grpc/examples/features/proto/echo" +) + +var serverName = "thanos" + +func TestGRPCServerCertAutoRotate(t *testing.T) { + defer leaktest.CheckTimeout(t, 10*time.Second)() + + logger := log.NewLogfmtLogger(os.Stderr) + expMessage := "hello world" + + tmpDirClt, err := ioutil.TempDir("", "test-tls-clt") + testutil.Ok(t, err) + defer func() { testutil.Ok(t, os.RemoveAll(tmpDirClt)) }() + caClt := filepath.Join(tmpDirClt, "ca") + certClt := filepath.Join(tmpDirClt, "cert") + keyClt := filepath.Join(tmpDirClt, "key") + + tmpDirSrv, err := ioutil.TempDir("", "test-tls-srv") + testutil.Ok(t, err) + defer func() { testutil.Ok(t, os.RemoveAll(tmpDirSrv)) }() + caSrv := filepath.Join(tmpDirSrv, "ca") + certSrv := filepath.Join(tmpDirSrv, "cert") + keySrv := filepath.Join(tmpDirSrv, "key") + + genCerts(t, certSrv, keySrv, caClt, serverName) + genCerts(t, certClt, keyClt, caSrv, serverName) + + configSrv, err := thTLS.NewServerConfig(logger, certSrv, keySrv, caSrv) + testutil.Ok(t, err) + + srv := grpc.NewServer(grpc.KeepaliveParams(keepalive.ServerParameters{MaxConnectionAge: 1 * time.Millisecond}), grpc.Creds(credentials.NewTLS(configSrv))) + + pb.RegisterEchoServer(srv, &ecServer{}) + p, err := testutil.FreePort() + testutil.Ok(t, err) + addr := fmt.Sprint("localhost:", p) + lis, err := net.Listen("tcp", addr) + testutil.Ok(t, err) + + go func() { + testutil.Ok(t, srv.Serve(lis)) + }() + defer func() { srv.Stop() }() + time.Sleep(50 * time.Millisecond) // Wait for the server to start. + + // Setup the connection and the client. + configClt, err := thTLS.NewClientConfig(logger, certClt, keyClt, caClt, serverName) + testutil.Ok(t, err) + conn, err := grpc.Dial(addr, grpc.WithConnectParams(grpc.ConnectParams{MinConnectTimeout: 1 * time.Second}), grpc.WithTransportCredentials(credentials.NewTLS(configClt))) + testutil.Ok(t, err) + defer func() { + testutil.Ok(t, conn.Close()) + }() + clt := pb.NewEchoClient(conn) + + // Check a good state. + resp, err := clt.UnaryEcho(context.Background(), &pb.EchoRequest{Message: expMessage}) + testutil.Ok(t, err) + testutil.Equals(t, expMessage, resp.Message) + + // Reload to a bad state. + genCerts(t, certSrv, keySrv, "", serverName) + genCerts(t, certClt, keyClt, "", serverName) + time.Sleep(10 * time.Millisecond) // Wait for the server MaxConnectionAge to expire. + _, err = clt.UnaryEcho(context.Background(), &pb.EchoRequest{Message: expMessage}) + checkAuthError(t, err) + + // Restore a good tls state. + genCerts(t, certSrv, keySrv, caClt, serverName) + genCerts(t, certClt, keyClt, caSrv, serverName) + time.Sleep(10 * time.Millisecond) // Wait for the server MaxConnectionAge to expire. + resp, err = clt.UnaryEcho(context.Background(), &pb.EchoRequest{Message: expMessage}) + testutil.Ok(t, err) + testutil.Equals(t, expMessage, resp.Message) + + // Rotate the client certs and check for bad state. + genCerts(t, certClt, keyClt, "", serverName) + time.Sleep(10 * time.Millisecond) // Wait for the server MaxConnectionAge to expire. + _, err = clt.UnaryEcho(context.Background(), &pb.EchoRequest{Message: expMessage}) + checkAuthError(t, err) +} + +func checkAuthError(t *testing.T, err error) { + testutil.NotOk(t, err) + // The first type assertion returns the rpc error. + err, _ = err.(x509.UnknownAuthorityError) + // The second type assertion returns the tls error. + err, isUnknownAuthorityError := err.(x509.UnknownAuthorityError) + testutil.Assert(t, isUnknownAuthorityError, "expected UnknownAuthorityError, but got:%v", err) +} + +var caRoot = &x509.Certificate{ + SerialNumber: big.NewInt(2019), + NotAfter: time.Now().AddDate(10, 0, 0), + IsCA: true, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth}, + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign, + BasicConstraintsValid: true, +} + +var cert = &x509.Certificate{ + SerialNumber: big.NewInt(1658), + DNSNames: []string{serverName}, + NotAfter: time.Now().AddDate(10, 0, 0), + SubjectKeyId: []byte{1, 2, 3}, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth}, + KeyUsage: x509.KeyUsageDigitalSignature, +} + +// genCerts generates certificates and writes those to the provided paths. +// When the CA file already exists it is not overwritten and +// it is used to sign the certificates. +func genCerts(t *testing.T, certPath, privkeyPath, caPath, serverName string) { + caPrivKey, err := rsa.GenerateKey(rand.Reader, 1024) + testutil.Ok(t, err) + + if caPath != "" { + caSrvPriv := caPath + ".priv" + // When the CA private file exists don't overwrite it but + // use it to extract the private key to be used for signing the certificate. + if _, err := os.Stat(caSrvPriv); !os.IsNotExist(err) { + d, err := ioutil.ReadFile(caSrvPriv) + testutil.Ok(t, err) + caPrivKey, err = x509.ParsePKCS1PrivateKey(d) + testutil.Ok(t, err) + } else { + caBytes, err := x509.CreateCertificate(rand.Reader, caRoot, caRoot, &caPrivKey.PublicKey, caPrivKey) + testutil.Ok(t, err) + caPEM := pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE", + Bytes: caBytes, + }) + testutil.Ok(t, ioutil.WriteFile(caPath, caPEM, 0644)) + testutil.Ok(t, ioutil.WriteFile(caSrvPriv, x509.MarshalPKCS1PrivateKey(caPrivKey), 0644)) + } + } + + certPrivKey, err := rsa.GenerateKey(rand.Reader, 1024) + testutil.Ok(t, err) + + // Sign the cert with the CA private key. + certBytes, err := x509.CreateCertificate(rand.Reader, cert, caRoot, &certPrivKey.PublicKey, caPrivKey) + testutil.Ok(t, err) + + certPEM := new(bytes.Buffer) + testutil.Ok(t, pem.Encode(certPEM, &pem.Block{ + Type: "CERTIFICATE", + Bytes: certBytes, + })) + + if certPath != "" { + testutil.Ok(t, ioutil.WriteFile(certPath, certPEM.Bytes(), 0644)) + } + + certPrivKeyPEM := new(bytes.Buffer) + testutil.Ok(t, pem.Encode(certPrivKeyPEM, &pem.Block{ + Type: "RSA PRIVATE KEY", + Bytes: x509.MarshalPKCS1PrivateKey(certPrivKey), + })) + if privkeyPath != "" { + testutil.Ok(t, ioutil.WriteFile(privkeyPath, certPrivKeyPEM.Bytes(), 0644)) + } +} + +type ecServer struct { + pb.UnimplementedEchoServer +} + +func (s *ecServer) UnaryEcho(ctx context.Context, req *pb.EchoRequest) (*pb.EchoResponse, error) { + return &pb.EchoResponse{Message: req.Message}, nil +} From 998bffd4cdacc9bf7ee6f57c1b5f1e037a5006f4 Mon Sep 17 00:00:00 2001 From: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com> Date: Mon, 9 Dec 2019 14:02:02 +0200 Subject: [PATCH 2/4] regeareted the docs Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com> --- docs/components/query.md | 4 ++++ docs/components/rule.md | 4 ++++ docs/components/sidecar.md | 4 ++++ docs/components/store.md | 4 ++++ 4 files changed, 16 insertions(+) diff --git a/docs/components/query.md b/docs/components/query.md index e566b4cb0d..37db0b25f4 100644 --- a/docs/components/query.md +++ b/docs/components/query.md @@ -277,6 +277,10 @@ Flags: TLS CA to verify clients against. If no client CA is specified, there is no client verification on server side. (tls.NoClientCert) + --grpc-server-max-connection-age=1m + The grpc server max connection age. This + controls how often to re-read the tls + certificates and redo the TLS handshake --grpc-client-tls-secure Use TLS when talking to the gRPC server --grpc-client-tls-cert="" TLS Certificates to use to identify this client to the server diff --git a/docs/components/rule.md b/docs/components/rule.md index 359c0572a4..0530e4de9a 100644 --- a/docs/components/rule.md +++ b/docs/components/rule.md @@ -184,6 +184,10 @@ Flags: TLS CA to verify clients against. If no client CA is specified, there is no client verification on server side. (tls.NoClientCert) + --grpc-server-max-connection-age=1m + The grpc server max connection age. This + controls how often to re-read the tls + certificates and redo the TLS handshake --label=="" ... Labels to be applied to all generated metrics (repeated). Similar to external labels for diff --git a/docs/components/sidecar.md b/docs/components/sidecar.md index 2d735061f3..c6e29be1a7 100644 --- a/docs/components/sidecar.md +++ b/docs/components/sidecar.md @@ -118,6 +118,10 @@ Flags: TLS CA to verify clients against. If no client CA is specified, there is no client verification on server side. (tls.NoClientCert) + --grpc-server-max-connection-age=1m + The grpc server max connection age. This + controls how often to re-read the tls + certificates and redo the TLS handshake --prometheus.url=http://localhost:9090 URL at which to reach Prometheus's API. For better performance use local network. diff --git a/docs/components/store.md b/docs/components/store.md index 3b2a7f5737..a6c8603125 100644 --- a/docs/components/store.md +++ b/docs/components/store.md @@ -67,6 +67,10 @@ Flags: TLS CA to verify clients against. If no client CA is specified, there is no client verification on server side. (tls.NoClientCert) + --grpc-server-max-connection-age=1m + The grpc server max connection age. This + controls how often to re-read the tls + certificates and redo the TLS handshake --data-dir="./data" Data directory in which to cache remote blocks. --index-cache-size=250MB Maximum size of items held in the index cache. --chunk-pool-size=2GB Maximum size of concurrently allocatable bytes From c14ff481cb859e001d4030c8e4bd0ca72eeda1e9 Mon Sep 17 00:00:00 2001 From: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com> Date: Mon, 9 Dec 2019 14:29:45 +0200 Subject: [PATCH 3/4] comments format Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com> --- test/e2e/tls_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/e2e/tls_test.go b/test/e2e/tls_test.go index d9afe5b15e..10c8fed481 100644 --- a/test/e2e/tls_test.go +++ b/test/e2e/tls_test.go @@ -86,9 +86,8 @@ func TestGRPCServerCertAutoRotate(t *testing.T) { testutil.Ok(t, err) testutil.Equals(t, expMessage, resp.Message) - // Reload to a bad state. + // Reload the server certs and ensure that the tls handshake fails. genCerts(t, certSrv, keySrv, "", serverName) - genCerts(t, certClt, keyClt, "", serverName) time.Sleep(10 * time.Millisecond) // Wait for the server MaxConnectionAge to expire. _, err = clt.UnaryEcho(context.Background(), &pb.EchoRequest{Message: expMessage}) checkAuthError(t, err) @@ -101,7 +100,7 @@ func TestGRPCServerCertAutoRotate(t *testing.T) { testutil.Ok(t, err) testutil.Equals(t, expMessage, resp.Message) - // Rotate the client certs and check for bad state. + // Reload the client certs and ensure that the tls handshake fails. genCerts(t, certClt, keyClt, "", serverName) time.Sleep(10 * time.Millisecond) // Wait for the server MaxConnectionAge to expire. _, err = clt.UnaryEcho(context.Background(), &pb.EchoRequest{Message: expMessage}) From f0f6ac5d677a900a6542b1f45ef215bdf76ee157 Mon Sep 17 00:00:00 2001 From: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com> Date: Mon, 9 Dec 2019 15:29:14 +0200 Subject: [PATCH 4/4] nits Signed-off-by: Krasi Georgiev <8903888+krasi-georgiev@users.noreply.github.com> --- pkg/tls/tls.go | 9 ++---- test/e2e/tls_test.go | 77 +++++++++++++++++++++++--------------------- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/pkg/tls/tls.go b/pkg/tls/tls.go index 3a9d5e26e1..ead509234f 100644 --- a/pkg/tls/tls.go +++ b/pkg/tls/tls.go @@ -15,7 +15,7 @@ import ( ) // NewServerConfig returns a new server tls config with auto reloading certificates. -// It reload the key and certificates when the local mod time of any file has changed. +// It reloads the key and certificates when the local mod time of any file has changed. func NewServerConfig(logger log.Logger, srvCertPath, srvKeyPath, cltCApath string) (*tls.Config, error) { if srvCertPath == "" && srvKeyPath == "" { if cltCApath != "" { @@ -124,7 +124,7 @@ func (m *serverTLSManager) getConfigForClient(*tls.ClientHelloInfo) (*tls.Config } // NewClientConfig returns a new client tls config with auto reloading certificates. -// It reload the key and certificates when the local mod time of any file has changed. +// It reloads the key and certificates when the local mod time of any file has changed. func NewClientConfig(logger log.Logger, certPath, keyPath, caCertPath, serverName string) (*tls.Config, error) { if (keyPath != "") != (certPath != "") { return nil, errors.New("both client key and certificate must be provided") @@ -159,10 +159,6 @@ func NewClientConfig(logger log.Logger, certPath, keyPath, caCertPath, serverNam tlsCfg.ServerName = serverName } - if (keyPath != "") != (certPath != "") { - return nil, errors.New("both client key and certificate must be provided or both should be empty") - } - if certPath != "" { mngr := &clientTLSManager{ certPath: certPath, @@ -173,6 +169,7 @@ func NewClientConfig(logger log.Logger, certPath, keyPath, caCertPath, serverNam tlsCfg.GetClientCertificate = mngr.getClientCertificate level.Info(logger).Log("msg", "TLS client authentication enabled") } + return tlsCfg, nil } diff --git a/test/e2e/tls_test.go b/test/e2e/tls_test.go index 10c8fed481..087aea6e2e 100644 --- a/test/e2e/tls_test.go +++ b/test/e2e/tls_test.go @@ -24,7 +24,6 @@ import ( "google.golang.org/grpc/keepalive" thTLS "github.com/thanos-io/thanos/pkg/tls" - pb "google.golang.org/grpc/examples/features/proto/echo" ) @@ -74,7 +73,7 @@ func TestGRPCServerCertAutoRotate(t *testing.T) { // Setup the connection and the client. configClt, err := thTLS.NewClientConfig(logger, certClt, keyClt, caClt, serverName) testutil.Ok(t, err) - conn, err := grpc.Dial(addr, grpc.WithConnectParams(grpc.ConnectParams{MinConnectTimeout: 1 * time.Second}), grpc.WithTransportCredentials(credentials.NewTLS(configClt))) + conn, err := grpc.Dial(addr, grpc.WithConnectParams(grpc.ConnectParams{MinConnectTimeout: 1 * time.Minute}), grpc.WithTransportCredentials(credentials.NewTLS(configClt))) testutil.Ok(t, err) defer func() { testutil.Ok(t, conn.Close()) @@ -88,21 +87,21 @@ func TestGRPCServerCertAutoRotate(t *testing.T) { // Reload the server certs and ensure that the tls handshake fails. genCerts(t, certSrv, keySrv, "", serverName) - time.Sleep(10 * time.Millisecond) // Wait for the server MaxConnectionAge to expire. + time.Sleep(50 * time.Millisecond) // Wait for the server MaxConnectionAge to expire. _, err = clt.UnaryEcho(context.Background(), &pb.EchoRequest{Message: expMessage}) checkAuthError(t, err) // Restore a good tls state. genCerts(t, certSrv, keySrv, caClt, serverName) genCerts(t, certClt, keyClt, caSrv, serverName) - time.Sleep(10 * time.Millisecond) // Wait for the server MaxConnectionAge to expire. + time.Sleep(50 * time.Millisecond) // Wait for the server MaxConnectionAge to expire. resp, err = clt.UnaryEcho(context.Background(), &pb.EchoRequest{Message: expMessage}) testutil.Ok(t, err) testutil.Equals(t, expMessage, resp.Message) // Reload the client certs and ensure that the tls handshake fails. genCerts(t, certClt, keyClt, "", serverName) - time.Sleep(10 * time.Millisecond) // Wait for the server MaxConnectionAge to expire. + time.Sleep(50 * time.Millisecond) // Wait for the server MaxConnectionAge to expire. _, err = clt.UnaryEcho(context.Background(), &pb.EchoRequest{Message: expMessage}) checkAuthError(t, err) } @@ -138,28 +137,22 @@ var cert = &x509.Certificate{ // When the CA file already exists it is not overwritten and // it is used to sign the certificates. func genCerts(t *testing.T, certPath, privkeyPath, caPath, serverName string) { - caPrivKey, err := rsa.GenerateKey(rand.Reader, 1024) - testutil.Ok(t, err) - - if caPath != "" { - caSrvPriv := caPath + ".priv" - // When the CA private file exists don't overwrite it but - // use it to extract the private key to be used for signing the certificate. - if _, err := os.Stat(caSrvPriv); !os.IsNotExist(err) { - d, err := ioutil.ReadFile(caSrvPriv) - testutil.Ok(t, err) - caPrivKey, err = x509.ParsePKCS1PrivateKey(d) - testutil.Ok(t, err) - } else { - caBytes, err := x509.CreateCertificate(rand.Reader, caRoot, caRoot, &caPrivKey.PublicKey, caPrivKey) - testutil.Ok(t, err) - caPEM := pem.EncodeToMemory(&pem.Block{ - Type: "CERTIFICATE", - Bytes: caBytes, - }) - testutil.Ok(t, ioutil.WriteFile(caPath, caPEM, 0644)) - testutil.Ok(t, ioutil.WriteFile(caSrvPriv, x509.MarshalPKCS1PrivateKey(caPrivKey), 0644)) - } + var ( + err error + caPrivKey *rsa.PrivateKey + caSrvPriv = caPath + ".priv" + ) + + // When the CA private file exists don't overwrite it but + // use it to extract the private key to be used for signing the certificate. + if _, err := os.Stat(caSrvPriv); !os.IsNotExist(err) { + d, err := ioutil.ReadFile(caSrvPriv) + testutil.Ok(t, err) + caPrivKey, err = x509.ParsePKCS1PrivateKey(d) + testutil.Ok(t, err) + } else { + caPrivKey, err = rsa.GenerateKey(rand.Reader, 1024) + testutil.Ok(t, err) } certPrivKey, err := rsa.GenerateKey(rand.Reader, 1024) @@ -169,22 +162,32 @@ func genCerts(t *testing.T, certPath, privkeyPath, caPath, serverName string) { certBytes, err := x509.CreateCertificate(rand.Reader, cert, caRoot, &certPrivKey.PublicKey, caPrivKey) testutil.Ok(t, err) - certPEM := new(bytes.Buffer) - testutil.Ok(t, pem.Encode(certPEM, &pem.Block{ - Type: "CERTIFICATE", - Bytes: certBytes, - })) + if caPath != "" { + caBytes, err := x509.CreateCertificate(rand.Reader, caRoot, caRoot, &caPrivKey.PublicKey, caPrivKey) + testutil.Ok(t, err) + caPEM := pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE", + Bytes: caBytes, + }) + testutil.Ok(t, ioutil.WriteFile(caPath, caPEM, 0644)) + testutil.Ok(t, ioutil.WriteFile(caSrvPriv, x509.MarshalPKCS1PrivateKey(caPrivKey), 0644)) + } if certPath != "" { + certPEM := new(bytes.Buffer) + testutil.Ok(t, pem.Encode(certPEM, &pem.Block{ + Type: "CERTIFICATE", + Bytes: certBytes, + })) testutil.Ok(t, ioutil.WriteFile(certPath, certPEM.Bytes(), 0644)) } - certPrivKeyPEM := new(bytes.Buffer) - testutil.Ok(t, pem.Encode(certPrivKeyPEM, &pem.Block{ - Type: "RSA PRIVATE KEY", - Bytes: x509.MarshalPKCS1PrivateKey(certPrivKey), - })) if privkeyPath != "" { + certPrivKeyPEM := new(bytes.Buffer) + testutil.Ok(t, pem.Encode(certPrivKeyPEM, &pem.Block{ + Type: "RSA PRIVATE KEY", + Bytes: x509.MarshalPKCS1PrivateKey(certPrivKey), + })) testutil.Ok(t, ioutil.WriteFile(privkeyPath, certPrivKeyPEM.Bytes(), 0644)) } }